From 18d4625b5d8db86767b10cf34cbf8260b313eb00 Mon Sep 17 00:00:00 2001
From: Tommaso Teofili <tommaso.teofili@elastic.co>
Date: Fri, 19 Jun 2026 09:08:20 +0200
Subject: [PATCH] Lower FST heap utilization for Nori/Korean analyzers

---
 .../lucene/analysis/morph/MorphFSTLoader.java | 137 ++++++++++++++++
 .../analysis/morph/TestMorphFSTLoader.java    |  97 +++++++++++
 .../analysis/ja/dict/TokenInfoDictionary.java |  42 +++--
 .../analysis/ja/dict/UserDictionary.java      |   4 +-
 .../analysis/ko/KoreanTokenizerFactory.java   |   9 +-
 .../analysis/ko/dict/TokenInfoDictionary.java |  49 ++++--
 .../lucene/analysis/ko/dict/TokenInfoFST.java |  14 +-
 .../analysis/ko/dict/UserDictionary.java      |  16 +-
 .../dict/TestKoreanDictionaryHeapUsage.java   |  86 ++++++++++
 .../dict/TestKoreanDictionarySmallHeap.java   |  53 ++++++
 lucene/benchmark-jmh/build.gradle             |   1 +
 .../benchmark-jmh/src/java/module-info.java   |   1 +
 .../jmh/KoreanTokenizerBenchmark.java         |  97 +++++++++++
 .../lucene/util/fst/DirectBufferFSTStore.java | 116 +++++++++++++
 .../java/org/apache/lucene/util/fst/FST.java  |   7 +
 .../util/fst/ReverseDirectBufferReader.java   |  56 +++++++
 .../util/fst/TestDirectBufferFSTStore.java    | 155 ++++++++++++++++++
 17 files changed, 901 insertions(+), 39 deletions(-)
 create mode 100644 lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/MorphFSTLoader.java
 create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/morph/TestMorphFSTLoader.java
 create mode 100644 lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestKoreanDictionaryHeapUsage.java
 create mode 100644 lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestKoreanDictionarySmallHeap.java
 create mode 100644 lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/KoreanTokenizerBenchmark.java
 create mode 100644 lucene/core/src/java/org/apache/lucene/util/fst/DirectBufferFSTStore.java
 create mode 100644 lucene/core/src/java/org/apache/lucene/util/fst/ReverseDirectBufferReader.java
 create mode 100644 lucene/core/src/test/org/apache/lucene/util/fst/TestDirectBufferFSTStore.java

diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/MorphFSTLoader.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/MorphFSTLoader.java
new file mode 100644
index 000000000000..a67ae6170122
--- /dev/null
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/MorphFSTLoader.java
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.morph;
+
+import java.io.BufferedInputStream;
+import java.io.Closeable;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.InputStreamDataInput;
+import org.apache.lucene.store.MMapDirectory;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.fst.DirectBufferFSTStore;
+import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.fst.FSTReader;
+import org.apache.lucene.util.fst.OffHeapFSTStore;
+import org.apache.lucene.util.fst.PositiveIntOutputs;
+
+/**
+ * Loads morphological dictionary {@link FST}s with storage appropriate to the byte source.
+ *
+ * <ul>
+ *   <li>{@link #loadFromPath(Path)} — zero-copy mmap via {@link OffHeapFSTStore}
+ *   <li>{@link #loadFromStream(InputStream)} — one copy into a direct buffer for classpath/JAR
+ *       streams
+ *   <li>{@link #loadFromUrl(URL)} — mmap for {@code file:} URLs, otherwise stream copy
+ *   <li>{@link #loadCompiled(FST.FSTMetadata, FSTReader)} — relocates in-memory compiler output
+ *       off-heap
+ * </ul>
+ */
+public final class MorphFSTLoader {
+
+  private static final PositiveIntOutputs OUTPUTS = PositiveIntOutputs.getSingleton();
+
+  private MorphFSTLoader() {}
+
+  /**
+   * An FST plus an optional resource that must remain open for mmap-backed FSTs. Callers loading
+   * from a {@link Path} must retain this holder for the lifetime of the returned {@link FST}.
+   */
+  public static final class LoadedFST implements Closeable {
+    private final FST<Long> fst;
+    private final Closeable resource;
+
+    LoadedFST(FST<Long> fst, Closeable resource) {
+      this.fst = fst;
+      this.resource = resource;
+    }
+
+    public FST<Long> fst() {
+      return fst;
+    }
+
+    /** Returns the mmap resource, or {@code null} when the FST does not require one. */
+    public Closeable resource() {
+      return resource;
+    }
+
+    @Override
+    public void close() throws IOException {
+      if (resource != null) {
+        resource.close();
+      }
+    }
+  }
+
+  /**
+   * Load an FST from a file using mmap ({@link OffHeapFSTStore}). The returned {@link LoadedFST}
+   * must be kept alive while the FST is in use.
+   */
+  public static LoadedFST loadFromPath(Path fstFile) throws IOException {
+    Directory dir = new MMapDirectory(fstFile.getParent());
+    IndexInput in = dir.openInput(fstFile.getFileName().toString(), IOContext.READONCE);
+    FST.FSTMetadata<Long> metadata = FST.readMetadata(in, OUTPUTS);
+    OffHeapFSTStore store = new OffHeapFSTStore(in, in.getFilePointer(), metadata);
+    FST<Long> fst = FST.fromFSTReader(metadata, store);
+    Closeable resource = () -> IOUtils.close(in, dir);
+    return new LoadedFST(fst, resource);
+  }
+
+  /**
+   * Load an FST from a stream by copying bytes into a direct buffer ({@link
+   * DirectBufferFSTStore}).
+   */
+  public static FST<Long> loadFromStream(InputStream in) throws IOException {
+    DataInput dataIn = new InputStreamDataInput(new BufferedInputStream(in));
+    FST.FSTMetadata<Long> metadata = FST.readMetadata(dataIn, OUTPUTS);
+    return FST.fromFSTReader(
+        metadata, new DirectBufferFSTStore(dataIn, metadata.getNumBytes()));
+  }
+
+  /**
+   * Load an FST from a URL. {@code file:} URLs are mmap'd; other schemes are read as streams.
+   */
+  public static LoadedFST loadFromUrl(URL fstUrl) throws IOException {
+    if ("file".equalsIgnoreCase(fstUrl.getProtocol())) {
+      try {
+        URI uri = fstUrl.toURI();
+        return loadFromPath(Paths.get(uri));
+      } catch (URISyntaxException e) {
+        throw new IOException("Bad file URL: " + fstUrl, e);
+      }
+    }
+    try (InputStream is = new BufferedInputStream(fstUrl.openStream())) {
+      return new LoadedFST(loadFromStream(is), null);
+    }
+  }
+
+  /** Relocate a freshly compiled on-heap FST into a direct buffer. */
+  public static FST<Long> loadCompiled(
+      FST.FSTMetadata<Long> metadata, FSTReader compilerReader) throws IOException {
+    return FST.fromFSTReader(
+        metadata, new DirectBufferFSTStore(compilerReader, metadata.getNumBytes()));
+  }
+}
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/morph/TestMorphFSTLoader.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/morph/TestMorphFSTLoader.java
new file mode 100644
index 000000000000..4f8c90d83835
--- /dev/null
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/morph/TestMorphFSTLoader.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.morph;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.file.Path;
+import org.apache.lucene.store.OutputStreamDataOutput;
+import org.apache.lucene.tests.util.LuceneTestCase;
+import org.apache.lucene.util.IntsRefBuilder;
+import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.fst.FSTCompiler;
+import org.apache.lucene.util.fst.IntsRefFSTEnum;
+import org.apache.lucene.util.fst.PositiveIntOutputs;
+
+public class TestMorphFSTLoader extends LuceneTestCase {
+
+  private static final long MAX_SHALLOW_FST_RAM = 10_000L;
+
+  public void testLoadFromPathMmapsWithoutHeapCopy() throws Exception {
+    Path fstFile = writeSampleFstToPath();
+    try (MorphFSTLoader.LoadedFST loaded = MorphFSTLoader.loadFromPath(fstFile)) {
+      assertNotNull(loaded.resource());
+      assertTrue(loaded.fst().ramBytesUsed() < MAX_SHALLOW_FST_RAM);
+      assertEnumEquals(buildSampleFst(), loaded.fst());
+    }
+  }
+
+  public void testLoadFromStreamCopiesOffHeap() throws Exception {
+    FST<Long> expected = buildSampleFst();
+    ByteArrayOutputStream bos = new ByteArrayOutputStream();
+    expected.save(new OutputStreamDataOutput(bos), new OutputStreamDataOutput(bos));
+    FST<Long> loaded =
+        MorphFSTLoader.loadFromStream(new java.io.ByteArrayInputStream(bos.toByteArray()));
+    assertTrue(loaded.ramBytesUsed() < MAX_SHALLOW_FST_RAM);
+    assertEnumEquals(expected, loaded);
+  }
+
+  public void testLoadCompiledCopiesOffHeap() throws Exception {
+    PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
+    FSTCompiler<Long> compiler =
+        new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs).build();
+    IntsRefBuilder scratch = new IntsRefBuilder();
+    scratch.append(42);
+    compiler.add(scratch.get(), 7L);
+    FST.FSTMetadata<Long> metadata = compiler.compile();
+    FST<Long> loaded = MorphFSTLoader.loadCompiled(metadata, compiler.getFSTReader());
+    assertTrue(loaded.ramBytesUsed() < MAX_SHALLOW_FST_RAM);
+  }
+
+  private static Path writeSampleFstToPath() throws IOException {
+    FST<Long> fst = buildSampleFst();
+    Path path = createTempFile("morph-fst", ".dat");
+    fst.save(path);
+    return path;
+  }
+
+  private static FST<Long> buildSampleFst() throws IOException {
+    PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
+    FSTCompiler<Long> compiler =
+        new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs).build();
+    IntsRefBuilder scratch = new IntsRefBuilder();
+    for (int i = 0; i < 64; i++) {
+      scratch.clear();
+      scratch.append(i);
+      compiler.add(scratch.get(), (long) i);
+    }
+    return FST.fromFSTReader(compiler.compile(), compiler.getFSTReader());
+  }
+
+  private static void assertEnumEquals(FST<Long> expected, FST<Long> actual) throws IOException {
+    IntsRefFSTEnum<Long> expectedEnum = new IntsRefFSTEnum<>(expected);
+    IntsRefFSTEnum<Long> actualEnum = new IntsRefFSTEnum<>(actual);
+    IntsRefFSTEnum.InputOutput<Long> e;
+    while ((e = expectedEnum.next()) != null) {
+      IntsRefFSTEnum.InputOutput<Long> a = actualEnum.next();
+      assertNotNull(a);
+      assertEquals(e.input, a.input);
+      assertEquals(e.output, a.output);
+    }
+    assertNull(actualEnum.next());
+  }
+}
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary.java
index ac2520f5b62c..045e07b78e3c 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary.java
@@ -16,8 +16,6 @@
  */
 package org.apache.lucene.analysis.ja.dict;
 
-import static org.apache.lucene.util.fst.FST.readMetadata;
-
 import java.io.BufferedInputStream;
 import java.io.IOException;
 import java.io.InputStream;
@@ -25,12 +23,9 @@
 import java.nio.file.Files;
 import java.nio.file.Path;
 import org.apache.lucene.analysis.morph.BinaryDictionary;
-import org.apache.lucene.store.DataInput;
-import org.apache.lucene.store.InputStreamDataInput;
+import org.apache.lucene.analysis.morph.MorphFSTLoader;
 import org.apache.lucene.util.IOSupplier;
 import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.fst.FST;
-import org.apache.lucene.util.fst.PositiveIntOutputs;
 
 /**
  * Binary dictionary implementation for a known-word dictionary model: Words are encoded into an FST
@@ -42,6 +37,9 @@ public final class TokenInfoDictionary extends BinaryDictionary<TokenInfoMorphDa
 
   private final TokenInfoFST fst;
   private final TokenInfoMorphData morphAtts;
+  /** Keeps mmap resources alive for {@link MorphFSTLoader#loadFromPath}; otherwise null. */
+  @SuppressWarnings("unused")
+  private final MorphFSTLoader.LoadedFST fstHolder;
 
   /**
    * Create a {@link TokenInfoDictionary} from an external resource path.
@@ -54,11 +52,17 @@ public final class TokenInfoDictionary extends BinaryDictionary<TokenInfoMorphDa
    */
   public TokenInfoDictionary(Path targetMapFile, Path posDictFile, Path dictFile, Path fstFile)
       throws IOException {
-    this(
+    super(
         () -> Files.newInputStream(targetMapFile),
-        () -> Files.newInputStream(posDictFile),
         () -> Files.newInputStream(dictFile),
-        () -> Files.newInputStream(fstFile));
+        DictionaryConstants.TARGETMAP_HEADER,
+        DictionaryConstants.DICT_HEADER,
+        DictionaryConstants.VERSION);
+    this.morphAtts =
+        new TokenInfoMorphData(buffer, () -> Files.newInputStream(posDictFile));
+    MorphFSTLoader.LoadedFST loaded = MorphFSTLoader.loadFromPath(fstFile);
+    this.fstHolder = loaded;
+    this.fst = new TokenInfoFST(loaded.fst(), true);
   }
 
   /**
@@ -73,11 +77,16 @@ public TokenInfoDictionary(Path targetMapFile, Path posDictFile, Path dictFile,
    */
   public TokenInfoDictionary(URL targetMapUrl, URL posDictUrl, URL dictUrl, URL fstUrl)
       throws IOException {
-    this(
+    super(
         () -> targetMapUrl.openStream(),
-        () -> posDictUrl.openStream(),
         () -> dictUrl.openStream(),
-        () -> fstUrl.openStream());
+        DictionaryConstants.TARGETMAP_HEADER,
+        DictionaryConstants.DICT_HEADER,
+        DictionaryConstants.VERSION);
+    this.morphAtts = new TokenInfoMorphData(buffer, () -> posDictUrl.openStream());
+    MorphFSTLoader.LoadedFST loaded = MorphFSTLoader.loadFromUrl(fstUrl);
+    this.fstHolder = loaded.resource() != null ? loaded : null;
+    this.fst = new TokenInfoFST(loaded.fst(), true);
   }
 
   private TokenInfoDictionary() throws IOException {
@@ -101,14 +110,11 @@ private TokenInfoDictionary(
         DictionaryConstants.DICT_HEADER,
         DictionaryConstants.VERSION);
     this.morphAtts = new TokenInfoMorphData(buffer, posResource);
-
-    FST<Long> fst;
     try (InputStream is = new BufferedInputStream(fstResource.get())) {
-      DataInput in = new InputStreamDataInput(is);
-      fst = new FST<>(readMetadata(in, PositiveIntOutputs.getSingleton()), in);
+      // TODO: some way to configure?
+      this.fst = new TokenInfoFST(MorphFSTLoader.loadFromStream(is), true);
     }
-    // TODO: some way to configure?
-    this.fst = new TokenInfoFST(fst, true);
+    this.fstHolder = null;
   }
 
   static InputStream getClassResource(String suffix) throws IOException {
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java
index 9a8ee3a8f8ce..5cf8b8f6a22c 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java
@@ -23,6 +23,7 @@
 import java.util.List;
 import java.util.regex.Pattern;
 import org.apache.lucene.analysis.morph.Dictionary;
+import org.apache.lucene.analysis.morph.MorphFSTLoader;
 import org.apache.lucene.analysis.util.CSVUtil;
 import org.apache.lucene.util.IntsRefBuilder;
 import org.apache.lucene.util.fst.FST;
@@ -141,9 +142,10 @@ private UserDictionary(List<String[]> featureEntries) throws IOException {
       segmentations.add(wordIdAndLength);
       ord++;
     }
+    FST.FSTMetadata<Long> metadata = fstCompiler.compile();
     this.fst =
         new TokenInfoFST(
-            FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()), false);
+            MorphFSTLoader.loadCompiled(metadata, fstCompiler.getFSTReader()), false);
     this.morphAtts = new UserMorphData(data.toArray(String[]::new));
     this.segmentations = segmentations.toArray(int[][]::new);
   }
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizerFactory.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizerFactory.java
index 399012fb52b8..e2e78fe88b1f 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizerFactory.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizerFactory.java
@@ -59,6 +59,10 @@
  *       {@link DecompoundMode}
  *   <li>outputUnknownUnigrams: If true outputs unigrams for unknown words.
  *   <li>discardPunctuation: true if punctuation tokens should be dropped from the output.
+ *   <li>cacheHangulRootArcs: if true caches Hangul syllable root arcs in user dictionary FST for
+ *       faster tokenization at the cost of additional heap. Default is true. The system dictionary
+ *       honors system property {@link
+ *       org.apache.lucene.analysis.ko.dict.TokenInfoDictionary#CACHE_HANGUL_ROOT_ARCS_PROPERTY}.
  * </ul>
  *
  * @lucene.experimental
@@ -75,6 +79,7 @@ public class KoreanTokenizerFactory extends TokenizerFactory implements Resource
   private static final String DECOMPOUND_MODE = "decompoundMode";
   private static final String OUTPUT_UNKNOWN_UNIGRAMS = "outputUnknownUnigrams";
   private static final String DISCARD_PUNCTUATION = "discardPunctuation";
+  private static final String CACHE_HANGUL_ROOT_ARCS = "cacheHangulRootArcs";
 
   private final String userDictionaryPath;
   private final String userDictionaryEncoding;
@@ -83,6 +88,7 @@ public class KoreanTokenizerFactory extends TokenizerFactory implements Resource
   private final KoreanTokenizer.DecompoundMode mode;
   private final boolean outputUnknownUnigrams;
   private final boolean discardPunctuation;
+  private final boolean cacheHangulRootArcs;
 
   /** Creates a new KoreanTokenizerFactory */
   public KoreanTokenizerFactory(Map<String, String> args) {
@@ -95,6 +101,7 @@ public KoreanTokenizerFactory(Map<String, String> args) {
                 .toUpperCase(Locale.ROOT));
     outputUnknownUnigrams = getBoolean(args, OUTPUT_UNKNOWN_UNIGRAMS, false);
     discardPunctuation = getBoolean(args, DISCARD_PUNCTUATION, true);
+    cacheHangulRootArcs = getBoolean(args, CACHE_HANGUL_ROOT_ARCS, true);
 
     if (!args.isEmpty()) {
       throw new IllegalArgumentException("Unknown parameters: " + args);
@@ -120,7 +127,7 @@ public void inform(ResourceLoader loader) throws IOException {
                 .onMalformedInput(CodingErrorAction.REPORT)
                 .onUnmappableCharacter(CodingErrorAction.REPORT);
         Reader reader = new InputStreamReader(stream, decoder);
-        userDictionary = UserDictionary.open(reader);
+        userDictionary = UserDictionary.open(reader, cacheHangulRootArcs);
       }
     } else {
       userDictionary = null;
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary.java
index 679fb8739882..c9e34b1bd39e 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary.java
@@ -16,8 +16,6 @@
  */
 package org.apache.lucene.analysis.ko.dict;
 
-import static org.apache.lucene.util.fst.FST.readMetadata;
-
 import java.io.BufferedInputStream;
 import java.io.IOException;
 import java.io.InputStream;
@@ -25,12 +23,10 @@
 import java.nio.file.Files;
 import java.nio.file.Path;
 import org.apache.lucene.analysis.morph.BinaryDictionary;
-import org.apache.lucene.store.DataInput;
-import org.apache.lucene.store.InputStreamDataInput;
+import org.apache.lucene.analysis.morph.MorphFSTLoader;
 import org.apache.lucene.util.IOSupplier;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.fst.FST;
-import org.apache.lucene.util.fst.PositiveIntOutputs;
 
 /**
  * Binary dictionary implementation for a known-word dictionary model: Words are encoded into an FST
@@ -42,6 +38,9 @@ public final class TokenInfoDictionary extends BinaryDictionary<TokenInfoMorphDa
 
   private final TokenInfoFST fst;
   private final TokenInfoMorphData morphAtts;
+  /** Keeps mmap resources alive for {@link MorphFSTLoader#loadFromPath}; otherwise null. */
+  @SuppressWarnings("unused")
+  private final MorphFSTLoader.LoadedFST fstHolder;
 
   private TokenInfoDictionary() throws IOException {
     this(
@@ -62,11 +61,17 @@ private TokenInfoDictionary() throws IOException {
    */
   public TokenInfoDictionary(Path targetMapFile, Path posDictFile, Path dictFile, Path fstFile)
       throws IOException {
-    this(
+    super(
         () -> Files.newInputStream(targetMapFile),
-        () -> Files.newInputStream(posDictFile),
         () -> Files.newInputStream(dictFile),
-        () -> Files.newInputStream(fstFile));
+        DictionaryConstants.TARGETMAP_HEADER,
+        DictionaryConstants.DICT_HEADER,
+        DictionaryConstants.VERSION);
+    this.morphAtts =
+        new TokenInfoMorphData(buffer, () -> Files.newInputStream(posDictFile));
+    MorphFSTLoader.LoadedFST loaded = MorphFSTLoader.loadFromPath(fstFile);
+    this.fstHolder = loaded;
+    this.fst = new TokenInfoFST(loaded.fst(), cacheHangulRootArcs());
   }
 
   /**
@@ -81,11 +86,16 @@ public TokenInfoDictionary(Path targetMapFile, Path posDictFile, Path dictFile,
    */
   public TokenInfoDictionary(URL targetMapUrl, URL posDictUrl, URL dictUrl, URL fstUrl)
       throws IOException {
-    this(
+    super(
         () -> targetMapUrl.openStream(),
-        () -> posDictUrl.openStream(),
         () -> dictUrl.openStream(),
-        () -> fstUrl.openStream());
+        DictionaryConstants.TARGETMAP_HEADER,
+        DictionaryConstants.DICT_HEADER,
+        DictionaryConstants.VERSION);
+    this.morphAtts = new TokenInfoMorphData(buffer, () -> posDictUrl.openStream());
+    MorphFSTLoader.LoadedFST loaded = MorphFSTLoader.loadFromUrl(fstUrl);
+    this.fstHolder = loaded.resource() != null ? loaded : null;
+    this.fst = new TokenInfoFST(loaded.fst(), cacheHangulRootArcs());
   }
 
   private TokenInfoDictionary(
@@ -101,12 +111,10 @@ private TokenInfoDictionary(
         DictionaryConstants.DICT_HEADER,
         DictionaryConstants.VERSION);
     this.morphAtts = new TokenInfoMorphData(buffer, posResource);
-    FST<Long> fst;
     try (InputStream is = new BufferedInputStream(fstResource.get())) {
-      DataInput in = new InputStreamDataInput(is);
-      fst = new FST<>(readMetadata(in, PositiveIntOutputs.getSingleton()), in);
+      this.fst = new TokenInfoFST(MorphFSTLoader.loadFromStream(is), cacheHangulRootArcs());
     }
-    this.fst = new TokenInfoFST(fst);
+    this.fstHolder = null;
   }
 
   static InputStream getClassResource(String suffix) throws IOException {
@@ -119,6 +127,17 @@ public TokenInfoFST getFST() {
     return fst;
   }
 
+  /**
+   * Whether to cache Hangul syllable root arcs for the system dictionary. Controlled by system
+   * property {@value #CACHE_HANGUL_ROOT_ARCS_PROPERTY}, default {@code true}.
+   */
+  static final String CACHE_HANGUL_ROOT_ARCS_PROPERTY =
+      "org.apache.lucene.analysis.ko.cacheHangulRootArcs";
+
+  private static boolean cacheHangulRootArcs() {
+    return Boolean.parseBoolean(System.getProperty(CACHE_HANGUL_ROOT_ARCS_PROPERTY, "true"));
+  }
+
   public static TokenInfoDictionary getInstance() {
     return SingletonHolder.INSTANCE;
   }
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoFST.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoFST.java
index fa591eb00439..c318321ec6bb 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoFST.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoFST.java
@@ -19,11 +19,21 @@
 import java.io.IOException;
 import org.apache.lucene.util.fst.FST;
 
-/** Thin wrapper around an FST with root-arc caching for Hangul syllables (11,172 arcs). */
+/**
+ * Thin wrapper around an FST with root-arc caching for Hangul syllables (11,172 arcs).
+ *
+ * <p>When {@code cacheHangulSyllables} is {@code true}, root arcs for all Hangul syllables
+ * ({@code 0xAC00}–{@code 0xD7A3}) are cached for faster lookup at the cost of additional heap. When
+ * {@code false}, only a single unused root arc is cached.
+ */
 public final class TokenInfoFST extends org.apache.lucene.analysis.morph.TokenInfoFST {
 
   public TokenInfoFST(FST<Long> fst) throws IOException {
-    super(fst, 0xD7A3, 0xAC00);
+    this(fst, true);
+  }
+
+  public TokenInfoFST(FST<Long> fst, boolean cacheHangulSyllables) throws IOException {
+    super(fst, cacheHangulSyllables ? 0xD7A3 : 0, cacheHangulSyllables ? 0xAC00 : 0);
   }
 
   /**
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java
index f2e54ad5ecfb..1810c537aed5 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java
@@ -23,6 +23,7 @@
 import java.util.Comparator;
 import java.util.List;
 import org.apache.lucene.analysis.morph.Dictionary;
+import org.apache.lucene.analysis.morph.MorphFSTLoader;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.IntsRefBuilder;
 import org.apache.lucene.util.fst.FST;
@@ -47,6 +48,10 @@ public final class UserDictionary implements Dictionary<UserMorphData> {
   private UserMorphData morphAtts;
 
   public static UserDictionary open(Reader reader) throws IOException {
+    return open(reader, true);
+  }
+
+  public static UserDictionary open(Reader reader, boolean cacheHangulRootArcs) throws IOException {
 
     BufferedReader br = new BufferedReader(reader);
     String line;
@@ -67,11 +72,15 @@ public static UserDictionary open(Reader reader) throws IOException {
     if (entries.isEmpty()) {
       return null;
     } else {
-      return new UserDictionary(entries);
+      return new UserDictionary(entries, cacheHangulRootArcs);
     }
   }
 
   private UserDictionary(List<String> entries) throws IOException {
+    this(entries, true);
+  }
+
+  private UserDictionary(List<String> entries, boolean cacheHangulRootArcs) throws IOException {
     final CharacterDefinition charDef = CharacterDefinition.getInstance();
     entries.sort(Comparator.comparing(e -> e.split("\\s+")[0]));
 
@@ -135,8 +144,11 @@ private UserDictionary(List<String> entries) throws IOException {
     if (entryIndex < rightIds.length) {
       rightIds = ArrayUtil.copyOfSubArray(rightIds, 0, entryIndex);
     }
+    FST.FSTMetadata<Long> metadata = fstCompiler.compile();
     this.fst =
-        new TokenInfoFST(FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()));
+        new TokenInfoFST(
+            MorphFSTLoader.loadCompiled(metadata, fstCompiler.getFSTReader()),
+            cacheHangulRootArcs);
     int[][] segmentations = _segmentations.toArray(int[][]::new);
     this.morphAtts = new UserMorphData(segmentations, rightIds);
   }
diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestKoreanDictionaryHeapUsage.java b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestKoreanDictionaryHeapUsage.java
new file mode 100644
index 000000000000..add499777641
--- /dev/null
+++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestKoreanDictionaryHeapUsage.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko.dict;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.ko.KoreanAnalyzer;
+import org.apache.lucene.analysis.ko.KoreanTokenizer;
+import org.apache.lucene.analysis.ko.TestKoreanTokenizer;
+import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.tests.util.RamUsageTester;
+import org.apache.lucene.util.fst.FST;
+
+public class TestKoreanDictionaryHeapUsage extends BaseTokenStreamTestCase {
+
+  private static final int TOKENIZER_COUNT = 50;
+  private static final long MAX_FST_RAM_BYTES = 10_000L;
+
+  public void testSystemDictionaryFstIsOffHeap() throws IOException {
+    FST<Long> fst = TokenInfoDictionary.getInstance().getFST().getInternalFST();
+    assertFstUsesDirectBuffer(fst);
+  }
+
+  public void testUserDictionaryFstIsOffHeap() throws IOException {
+    UserDictionary userDictionary = TestKoreanTokenizer.readDict();
+    FST<Long> fst = userDictionary.getFST().getInternalFST();
+    assertFstUsesDirectBuffer(fst);
+  }
+
+  public void testManyTokenizersDoNotDuplicateFstBytes() throws IOException {
+    UserDictionary userDictionary = TestKoreanTokenizer.readDict();
+    List<KoreanTokenizer> tokenizers = new ArrayList<>(TOKENIZER_COUNT);
+    for (int i = 0; i < TOKENIZER_COUNT; i++) {
+      tokenizers.add(
+          new KoreanTokenizer(
+              newAttributeFactory(), userDictionary, KoreanTokenizer.DecompoundMode.NONE, false));
+    }
+    long tokenizerHeap = RamUsageTester.ramUsed(tokenizers);
+    assertTrue(
+        "expected small per-tokenizer heap footprint, got " + RamUsageTester.humanSizeOf(tokenizers),
+        tokenizerHeap < 5_000_000L);
+    assertFstUsesDirectBuffer(TokenInfoDictionary.getInstance().getFST().getInternalFST());
+  }
+
+  public void testReducedRootCacheUsesLessHeap() throws Exception {
+    FST<Long> fst = TokenInfoDictionary.getInstance().getFST().getInternalFST();
+    TokenInfoFST cached = new TokenInfoFST(fst, true);
+    TokenInfoFST uncached = new TokenInfoFST(fst, false);
+    long cachedHeap = RamUsageTester.ramUsed(cached);
+    long uncachedHeap = RamUsageTester.ramUsed(uncached);
+    assertTrue(cachedHeap > uncachedHeap);
+    assertTrue(cachedHeap - uncachedHeap >= 500_000L);
+  }
+
+  public void testSegmentationParityWithFullAnalyzer() throws Exception {
+    try (Analyzer analyzer = new KoreanAnalyzer()) {
+      assertAnalyzesTo(
+          analyzer,
+          "한국은 대단한 나라입니다.",
+          new String[] {"한국", "대단", "나라", "이"},
+          new int[] {0, 4, 8, 10},
+          new int[] {2, 6, 10, 13},
+          new int[] {1, 2, 3, 1});
+    }
+  }
+
+  private static void assertFstUsesDirectBuffer(FST<Long> fst) {
+    assertTrue(fst.ramBytesUsed() < MAX_FST_RAM_BYTES);
+  }
+}
diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestKoreanDictionarySmallHeap.java b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestKoreanDictionarySmallHeap.java
new file mode 100644
index 000000000000..5b946ad12d7f
--- /dev/null
+++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestKoreanDictionarySmallHeap.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko.dict;
+
+import java.io.IOException;
+import java.io.StringReader;
+import org.apache.lucene.analysis.ko.KoreanAnalyzer;
+import org.apache.lucene.analysis.ko.KoreanPartOfSpeechStopFilter;
+import org.apache.lucene.analysis.ko.KoreanTokenizer;
+import org.apache.lucene.tests.util.LuceneTestCase;
+import org.junit.Ignore;
+
+/**
+ * Manual smoke test for Nori dictionary loading under a very small heap.
+ *
+ * <p>Run with:
+ *
+ * <pre>
+ * ./gradlew :lucene:analysis:nori:test --tests TestKoreanDictionarySmallHeap \
+ *   -Dtests.jvm.args=-Xmx32m
+ * </pre>
+ */
+@Ignore("Manual small-heap validation; enable locally when verifying off-heap FST loading")
+public class TestKoreanDictionarySmallHeap extends LuceneTestCase {
+
+  public void testLoadDictionaryAndTokenizeOnSmallHeap() throws IOException {
+    TokenInfoDictionary dictionary = TokenInfoDictionary.getInstance();
+    assertNotNull(dictionary.getFST());
+
+    try (KoreanAnalyzer analyzer =
+        new KoreanAnalyzer(
+            null,
+            KoreanTokenizer.DecompoundMode.NONE,
+            KoreanPartOfSpeechStopFilter.DEFAULT_STOP_TAGS,
+            false)) {
+      assertNotNull(analyzer.tokenStream("field", "한국어 형태소 분석"));
+    }
+  }
+}
diff --git a/lucene/benchmark-jmh/build.gradle b/lucene/benchmark-jmh/build.gradle
index 6d64bb7a4f72..d1542cf6a7fe 100644
--- a/lucene/benchmark-jmh/build.gradle
+++ b/lucene/benchmark-jmh/build.gradle
@@ -19,6 +19,7 @@ description = 'Lucene JMH micro-benchmarking module'
 
 dependencies {
   moduleImplementation project(':lucene:core')
+  moduleImplementation project(':lucene:analysis:nori')
   moduleImplementation project(':lucene:expressions')
   moduleImplementation project(':lucene:join')
   moduleImplementation project(':lucene:sandbox')
diff --git a/lucene/benchmark-jmh/src/java/module-info.java b/lucene/benchmark-jmh/src/java/module-info.java
index 8090c7554739..b9b16b76debf 100644
--- a/lucene/benchmark-jmh/src/java/module-info.java
+++ b/lucene/benchmark-jmh/src/java/module-info.java
@@ -23,6 +23,7 @@
   requires jmh.core;
   requires jdk.unsupported;
   requires org.apache.lucene.core;
+  requires org.apache.lucene.analysis.nori;
   requires org.apache.lucene.expressions;
   requires org.apache.lucene.join;
   requires org.apache.lucene.sandbox;
diff --git a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/KoreanTokenizerBenchmark.java b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/KoreanTokenizerBenchmark.java
new file mode 100644
index 000000000000..c2f6915069be
--- /dev/null
+++ b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/KoreanTokenizerBenchmark.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.jmh;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.concurrent.TimeUnit;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.ko.KoreanAnalyzer;
+import org.apache.lucene.analysis.ko.KoreanTokenizer;
+import org.apache.lucene.analysis.ko.dict.UserDictionary;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.annotations.Warmup;
+
+@State(Scope.Benchmark)
+@BenchmarkMode(Mode.Throughput)
+@OutputTimeUnit(TimeUnit.SECONDS)
+@Fork(value = 1, warmups = 1)
+@Warmup(iterations = 2, time = 1)
+@Measurement(iterations = 3, time = 2)
+public class KoreanTokenizerBenchmark {
+
+  @Param({"SHORT", "LONG"})
+  public String textSize;
+
+  @Param({"false", "true"})
+  public boolean userDictionary;
+
+  private Analyzer analyzer;
+  private String text;
+
+  @Setup(Level.Trial)
+  public void setup() throws IOException {
+    UserDictionary userDict = userDictionary ? openUserDictionary() : null;
+    analyzer = new KoreanAnalyzer(userDict, KoreanTokenizer.DecompoundMode.DISCARD, null, false);
+    text =
+        switch (textSize) {
+          case "SHORT" -> "한국은 대단한 나라입니다.";
+          case "LONG" ->
+              """
+              서울특별시는 대한민국의 수도이자 최대 도시이다. 한강을 중심으로 발전해 왔으며 \
+              정치, 경제, 사회, 문화의 중심지 역할을 한다. 많은 기업과 연구 기관이 \
+              집중되어 있어 정보 기술과 금융 산업이 발달했다.""";
+          default -> throw new IllegalStateException("unknown textSize: " + textSize);
+        };
+  }
+
+  @TearDown(Level.Trial)
+  public void tearDown() throws IOException {
+    if (analyzer != null) {
+      analyzer.close();
+    }
+  }
+
+  @Benchmark
+  public int tokenizeDocument() throws IOException {
+    int tokenCount = 0;
+    try (TokenStream stream = analyzer.tokenStream("field", text)) {
+      stream.reset();
+      while (stream.incrementToken()) {
+        tokenCount++;
+      }
+      stream.end();
+    }
+    return tokenCount;
+  }
+
+  private static UserDictionary openUserDictionary() throws IOException {
+    return UserDictionary.open(new StringReader("세종시\n세종 시"));
+  }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/DirectBufferFSTStore.java b/lucene/core/src/java/org/apache/lucene/util/fst/DirectBufferFSTStore.java
new file mode 100644
index 000000000000..7afbca79b769
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/util/fst/DirectBufferFSTStore.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.util.fst;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.util.RamUsageEstimator;
+
+/**
+ * Provides off-heap storage of finite state machine (FST) bytes in a direct {@link ByteBuffer}.
+ *
+ * <p>Unlike {@link OnHeapFSTStore}, FST bytes are not counted in JVM heap by {@link
+ * #ramBytesUsed()}. Morphological analysis dictionaries use this store to reduce on-heap pressure.
+ *
+ * @lucene.experimental
+ */
+public final class DirectBufferFSTStore implements FSTReader {
+
+  private static final long BASE_RAM_BYTES_USED =
+      RamUsageEstimator.shallowSizeOfInstance(DirectBufferFSTStore.class);
+
+  private final ByteBuffer buffer;
+
+  /**
+   * Read FST bytes from {@code in} into a direct buffer.
+   *
+   * @param in input positioned at the first FST byte
+   * @param numBytes number of FST bytes to read
+   */
+  public DirectBufferFSTStore(DataInput in, long numBytes) throws IOException {
+    this.buffer = readDirectBuffer(in, numBytes);
+  }
+
+  /**
+   * Copy FST bytes from an existing {@link FSTReader} into a direct buffer.
+   *
+   * @param source reader containing FST bytes
+   * @param numBytes number of FST bytes to copy
+   */
+  public DirectBufferFSTStore(FSTReader source, long numBytes) throws IOException {
+    if (numBytes > Integer.MAX_VALUE) {
+      throw new IllegalArgumentException("FST too large for direct buffer: " + numBytes);
+    }
+    ByteBuffer tmp = ByteBuffer.allocateDirect((int) numBytes);
+    source.writeTo(
+        new DataOutput() {
+          @Override
+          public void writeByte(byte b) {
+            tmp.put(b);
+          }
+
+          @Override
+          public void writeBytes(byte[] b, int offset, int length) {
+            tmp.put(b, offset, length);
+          }
+        });
+    tmp.flip();
+    this.buffer = tmp.asReadOnlyBuffer();
+  }
+
+  private static ByteBuffer readDirectBuffer(DataInput in, long numBytes) throws IOException {
+    if (numBytes > Integer.MAX_VALUE) {
+      throw new IllegalArgumentException("FST too large for direct buffer: " + numBytes);
+    }
+    int size = (int) numBytes;
+    ByteBuffer tmp = ByteBuffer.allocateDirect(size);
+    byte[] scratch = new byte[8192];
+    int remaining = size;
+    while (remaining > 0) {
+      int toRead = Math.min(scratch.length, remaining);
+      in.readBytes(scratch, 0, toRead);
+      tmp.put(scratch, 0, toRead);
+      remaining -= toRead;
+    }
+    tmp.flip();
+    return tmp.asReadOnlyBuffer();
+  }
+
+  @Override
+  public long ramBytesUsed() {
+    return BASE_RAM_BYTES_USED;
+  }
+
+  @Override
+  public FST.BytesReader getReverseBytesReader() {
+    return new ReverseDirectBufferReader(buffer.duplicate());
+  }
+
+  @Override
+  public void writeTo(DataOutput out) throws IOException {
+    ByteBuffer dup = buffer.duplicate();
+    dup.rewind();
+    byte[] scratch = new byte[8192];
+    while (dup.hasRemaining()) {
+      int len = Math.min(scratch.length, dup.remaining());
+      dup.get(scratch, 0, len);
+      out.writeBytes(scratch, 0, len);
+    }
+  }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/FST.java b/lucene/core/src/java/org/apache/lucene/util/fst/FST.java
index 30fd39805daf..96a5e941e58c 100644
--- a/lucene/core/src/java/org/apache/lucene/util/fst/FST.java
+++ b/lucene/core/src/java/org/apache/lucene/util/fst/FST.java
@@ -420,6 +420,13 @@ public FST(FSTMetadata<T> metadata, DataInput in) throws IOException {
     this(metadata, new OnHeapFSTStore(DEFAULT_MAX_BLOCK_BITS, in, metadata.numBytes));
   }
 
+  /**
+   * Load a previously saved FST with a DataInput for metadata using a {@link DirectBufferFSTStore}.
+   */
+  public static <T> FST<T> loadDirect(FSTMetadata<T> metadata, DataInput in) throws IOException {
+    return fromFSTReader(metadata, new DirectBufferFSTStore(in, metadata.numBytes));
+  }
+
   /** Create the FST with a metadata object and a FSTReader. */
   FST(FSTMetadata<T> metadata, FSTReader fstReader) {
     assert fstReader != null;
diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/ReverseDirectBufferReader.java b/lucene/core/src/java/org/apache/lucene/util/fst/ReverseDirectBufferReader.java
new file mode 100644
index 000000000000..e5124bbd8ffa
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/util/fst/ReverseDirectBufferReader.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.util.fst;
+
+import java.nio.ByteBuffer;
+
+/** Reads in reverse from a direct {@link ByteBuffer}. */
+final class ReverseDirectBufferReader extends FST.BytesReader {
+  private final ByteBuffer buffer;
+  private int pos;
+
+  ReverseDirectBufferReader(ByteBuffer buffer) {
+    this.buffer = buffer;
+  }
+
+  @Override
+  public byte readByte() {
+    return buffer.get(pos--);
+  }
+
+  @Override
+  public void readBytes(byte[] b, int offset, int len) {
+    for (int i = 0; i < len; i++) {
+      b[offset + i] = buffer.get(pos--);
+    }
+  }
+
+  @Override
+  public void skipBytes(long count) {
+    pos -= count;
+  }
+
+  @Override
+  public long getPosition() {
+    return pos;
+  }
+
+  @Override
+  public void setPosition(long pos) {
+    this.pos = (int) pos;
+  }
+}
diff --git a/lucene/core/src/test/org/apache/lucene/util/fst/TestDirectBufferFSTStore.java b/lucene/core/src/test/org/apache/lucene/util/fst/TestDirectBufferFSTStore.java
new file mode 100644
index 000000000000..4dafd99bc6ca
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/util/fst/TestDirectBufferFSTStore.java
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.util.fst;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.ByteArrayDataOutput;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.OutputStreamDataOutput;
+import org.apache.lucene.tests.util.LuceneTestCase;
+import org.apache.lucene.tests.util.RamUsageTester;
+import org.apache.lucene.util.IntsRefBuilder;
+
+public class TestDirectBufferFSTStore extends LuceneTestCase {
+
+  private static final int MAX_BLOCK_BITS = 30;
+
+  public void testEquivalenceWithOnHeapStore() throws IOException {
+    SavedFST saved = buildSavedFst();
+    FST<Long> onHeapFst = loadOnHeap(saved);
+    FST<Long> directFst = loadDirect(saved);
+
+    IntsRefFSTEnum<Long> onHeapEnum = new IntsRefFSTEnum<>(onHeapFst);
+    IntsRefFSTEnum<Long> directEnum = new IntsRefFSTEnum<>(directFst);
+    IntsRefFSTEnum.InputOutput<Long> onHeapEntry;
+    IntsRefFSTEnum.InputOutput<Long> directEntry;
+    while ((onHeapEntry = onHeapEnum.next()) != null) {
+      directEntry = directEnum.next();
+      assertNotNull(directEntry);
+      assertEquals(onHeapEntry.input, directEntry.input);
+      assertEquals(onHeapEntry.output, directEntry.output);
+    }
+    assertNull(directEnum.next());
+  }
+
+  public void testRamBytesUsed() throws IOException {
+    SavedFST saved = buildSavedFst();
+    DirectBufferFSTStore directStore =
+        new DirectBufferFSTStore(saved.dataInput(), saved.metadata.getNumBytes());
+    OnHeapFSTStore onHeapStore =
+        new OnHeapFSTStore(MAX_BLOCK_BITS, saved.dataInput(), saved.metadata.getNumBytes());
+
+    assertTrue(directStore.ramBytesUsed() < 1_000);
+    assertTrue(onHeapStore.ramBytesUsed() > directStore.ramBytesUsed() + 100);
+    assertTrue(RamUsageTester.ramUsed(directStore) < RamUsageTester.ramUsed(onHeapStore));
+    assertNoLargeByteArray(directStore);
+  }
+
+  public void testBytesReader() throws IOException {
+    SavedFST saved = buildSavedFst();
+    FST<Long> fst = loadDirect(saved);
+    FST.BytesReader reader = fst.getBytesReader();
+    byte[] data = saved.dataBytes;
+    reader.setPosition(data.length - 1);
+    assertEquals(data[data.length - 1], reader.readByte());
+    byte[] scratch = new byte[4];
+    reader.setPosition(3);
+    reader.readBytes(scratch, 0, 4);
+    for (int i = 0; i < 4; i++) {
+      assertEquals(data[3 - i], scratch[i]);
+    }
+  }
+
+  public void testCopyFromFSTReader() throws IOException {
+    SavedFST saved = buildSavedFst();
+    OnHeapFSTStore source =
+        new OnHeapFSTStore(MAX_BLOCK_BITS, saved.dataInput(), saved.metadata.getNumBytes());
+    DirectBufferFSTStore copied = new DirectBufferFSTStore(source, saved.metadata.getNumBytes());
+
+    ByteArrayOutputStream out = new ByteArrayOutputStream(saved.dataBytes.length);
+    copied.writeTo(new OutputStreamDataOutput(out));
+    assertArrayEquals(saved.dataBytes, out.toByteArray());
+    assertNoLargeByteArray(copied);
+  }
+
+  private static FST<Long> loadOnHeap(SavedFST saved) throws IOException {
+    return new FST<>(saved.metadata, saved.dataInput());
+  }
+
+  private static FST<Long> loadDirect(SavedFST saved) throws IOException {
+    return FST.loadDirect(saved.metadata, saved.dataInput());
+  }
+
+  private static SavedFST buildSavedFst() throws IOException {
+    PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
+    FSTCompiler<Long> compiler =
+        new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs).build();
+    IntsRefBuilder scratch = new IntsRefBuilder();
+    for (int i = 0; i < 256; i++) {
+      scratch.clear();
+      scratch.append(i);
+      compiler.add(scratch.get(), (long) i);
+    }
+    FST.FSTMetadata<Long> metadata = compiler.compile();
+    FST<Long> fst = FST.fromFSTReader(metadata, compiler.getFSTReader());
+
+    ByteArrayOutputStream metaBytes = new ByteArrayOutputStream();
+    ByteArrayOutputStream dataBytes = new ByteArrayOutputStream();
+    fst.save(new OutputStreamDataOutput(metaBytes), new OutputStreamDataOutput(dataBytes));
+    return new SavedFST(
+        metaBytes.toByteArray(), dataBytes.toByteArray(), outputs);
+  }
+
+  private static void assertNoLargeByteArray(Object root) {
+    List<byte[]> largeArrays = new ArrayList<>();
+    RamUsageTester.ramUsed(
+        root,
+        new RamUsageTester.Accumulator() {
+          @Override
+          public long accumulateArray(
+              Object array, long shallowSize, List<Object> values, Collection<Object> queue) {
+            if (array instanceof byte[] bytes && bytes.length > 1_000_000) {
+              largeArrays.add(bytes);
+            }
+            return super.accumulateArray(array, shallowSize, values, queue);
+          }
+        });
+    assertEquals(0, largeArrays.size());
+  }
+
+  private static final class SavedFST {
+    final byte[] metaBytes;
+    final byte[] dataBytes;
+    final FST.FSTMetadata<Long> metadata;
+
+    SavedFST(byte[] metaBytes, byte[] dataBytes, PositiveIntOutputs outputs)
+        throws IOException {
+      this.metaBytes = metaBytes;
+      this.dataBytes = dataBytes;
+      this.metadata = FST.readMetadata(new ByteArrayDataInput(metaBytes), outputs);
+    }
+
+    DataInput dataInput() {
+      return new ByteArrayDataInput(dataBytes);
+    }
+  }
+}