From 5aa8ec264c8f7ddc372aab80a63dbfab43fc6dfb Mon Sep 17 00:00:00 2001 From: Ashwin Krishna Kumar Date: Tue, 17 Feb 2026 20:44:41 +0530 Subject: [PATCH] Fix automatic ordinal renumbering for >350M nodes --- .../graph/disk/AbstractGraphIndexWriter.java | 33 ++++-- .../jvector/graph/disk/OnDiskGraphIndex.java | 14 ++- .../jvector/graph/disk/OrdinalMapper.java | 104 ++++++++++++++++++ .../graph/disk/TestOnDiskGraphIndex.java | 10 +- 4 files changed, 143 insertions(+), 18 deletions(-) diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/AbstractGraphIndexWriter.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/AbstractGraphIndexWriter.java index a5ff739f3..89965fc8e 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/AbstractGraphIndexWriter.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/AbstractGraphIndexWriter.java @@ -18,6 +18,7 @@ import io.github.jbellis.jvector.disk.IndexWriter; import io.github.jbellis.jvector.graph.ImmutableGraphIndex; +import io.github.jbellis.jvector.graph.disk.OrdinalMapper.MutableOrdinalMapper; import io.github.jbellis.jvector.graph.disk.feature.Feature; import io.github.jbellis.jvector.graph.disk.feature.FeatureId; import io.github.jbellis.jvector.graph.disk.feature.FusedFeature; @@ -137,25 +138,33 @@ boolean isSeparated(Feature feature) { /** * Computes sequential renumbering for graph ordinals. + * It is NOT safe to call this while the graph is being modified. * @param graph the graph index to renumber * @return a Map of old to new graph ordinals where the new ordinals are sequential starting at 0, * while preserving the original relative ordering in `graph`. That is, for all node ids i and j, * if i < j in `graph` then map[i] < map[j] in the returned map. "Holes" left by * deleted nodes are filled in by shifting down the new ordinals. */ - public static Map sequentialRenumbering(ImmutableGraphIndex graph) { - try (var view = graph.getView()) { - Int2IntHashMap oldToNewMap = new Int2IntHashMap(-1); - int nextOrdinal = 0; - for (int i = 0; i < view.getIdUpperBound(); i++) { - if (graph.containsNode(i)) { - oldToNewMap.put(i, nextOrdinal++); - } + static OrdinalMapper sequentialRenumbering(ImmutableGraphIndex graph) { + + var oldOrdinalUpperBound = graph.getIdUpperBound(); + var newOrdinalUpperBound = graph.size(0); + + MutableOrdinalMapper mapper; + // if the graph is sufficiently sparse, use a sparse mapper + if ((oldOrdinalUpperBound / 8) > newOrdinalUpperBound) { + mapper = new OrdinalMapper.SparseOrdinalMapper(); + } else { + mapper = new OrdinalMapper.DenseOrdinalMapper(oldOrdinalUpperBound, newOrdinalUpperBound); + } + + int nextOrdinal = 0; + for (int i = 0; i < oldOrdinalUpperBound; i++) { + if (graph.containsNode(i)) { + mapper.set(i, nextOrdinal++); } - return oldToNewMap; - } catch (Exception e) { - throw new RuntimeException(e); } + return mapper; } /** @@ -397,7 +406,7 @@ public K build() throws IOException { } if (ordinalMapper == null) { - ordinalMapper = new OrdinalMapper.MapMapper(sequentialRenumbering(graphIndex)); + ordinalMapper = sequentialRenumbering(graphIndex); } return reallyBuild(dimension); } diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndex.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndex.java index 9ab122392..9bfc889f1 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndex.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndex.java @@ -713,7 +713,19 @@ public static void write(ImmutableGraphIndex graph, Path path) throws IOException { - try (var writer = new OnDiskGraphIndexWriter.Builder(graph, path).withMap(oldToNewOrdinals) + write(graph, vectors, new OrdinalMapper.MapMapper(oldToNewOrdinals), path); + } + + /** Convenience function for writing a vanilla DiskANN-style index with no extra Features. */ + @VisibleForTesting + static void write(ImmutableGraphIndex graph, + RandomAccessVectorValues vectors, + OrdinalMapper ordinalMapper, + Path path) + throws IOException + { + try (var writer = new OnDiskGraphIndexWriter.Builder(graph, path) + .withMapper(ordinalMapper) .with(new InlineVectors(vectors.dimension())) .build()) { diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OrdinalMapper.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OrdinalMapper.java index 526241eff..c044186a6 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OrdinalMapper.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OrdinalMapper.java @@ -18,6 +18,7 @@ import org.agrona.collections.Int2IntHashMap; +import java.util.Arrays; import java.util.Map; /** @@ -106,4 +107,107 @@ public int newToOld(int newOrdinal) { return newToOld.get(newOrdinal); } } + + interface MutableOrdinalMapper extends OrdinalMapper { + /** + * Adds a mapping between oldOrdinal <-> newOrdinal + * neither oldOrdinal or newOrdinal should be {@link OrdinalMapper#OMITTED} + */ + void set(int oldOrdinal, int newOrdinal); + } + + /** + * An OrdinalMapper implementation that works best when the ordinal mappings + * are sparse. + * Not thread safe for concurrenct reads and writes. + */ + class SparseOrdinalMapper implements MutableOrdinalMapper { + + private final Int2IntHashMap oldToNew; + private final Int2IntHashMap newToOld; + private int maxOrdinal; + + /** + * Creates an empty ordinal mapper + */ + SparseOrdinalMapper() { + oldToNew = new Int2IntHashMap(OMITTED); + newToOld = new Int2IntHashMap(OMITTED); + maxOrdinal = Integer.MIN_VALUE; + } + + @Override + public void set(int oldOrdinal, int newOrdinal) { + oldToNew.put(oldOrdinal, newOrdinal); + newToOld.put(newOrdinal, oldOrdinal); + if (newOrdinal > maxOrdinal) { + maxOrdinal = newOrdinal; + } + } + + @Override + public int maxOrdinal() { + return maxOrdinal; + } + + @Override + public int oldToNew(int oldOrdinal) { + return oldToNew.get(oldOrdinal); + } + + @Override + public int newToOld(int newOrdinal) { + return newToOld.get(newOrdinal); + } + } + + /** + * An OrdinalMapper implementation that works best when ordinal mappings + * are dense. + * Not thread safe for concurrent reads and writes. + */ + class DenseOrdinalMapper implements MutableOrdinalMapper { + + private final int[] oldToNew; + private final int[] newToOld; + private int maxOrdinal; + + /** + * Preallocates space based on the provided inputs. + * The bounds do not need to be precise and will not affect the + * return value of {@link #maxOrdinal()}. + * This mapper cannot be resized. + */ + DenseOrdinalMapper(int oldOrdinalUpperBound, int newOrdinalUpperBound) { + oldToNew = new int[oldOrdinalUpperBound]; + newToOld = new int[oldOrdinalUpperBound]; + Arrays.fill(oldToNew, OMITTED); + Arrays.fill(newToOld, OMITTED); + maxOrdinal = Integer.MIN_VALUE; + } + + @Override + public void set(int oldOrdinal, int newOrdinal) { + oldToNew[oldOrdinal] = newOrdinal; + newToOld[newOrdinal] = oldOrdinal; + if (newOrdinal > maxOrdinal) { + maxOrdinal = newOrdinal; + } + } + + @Override + public int maxOrdinal() { + return maxOrdinal; + } + + @Override + public int oldToNew(int oldOrdinal) { + return oldToNew[oldOrdinal]; + } + + @Override + public int newToOld(int newOrdinal) { + return newToOld[newOrdinal]; + } + } } diff --git a/jvector-tests/src/test/java/io/github/jbellis/jvector/graph/disk/TestOnDiskGraphIndex.java b/jvector-tests/src/test/java/io/github/jbellis/jvector/graph/disk/TestOnDiskGraphIndex.java index 29a8dca29..84ae99f52 100644 --- a/jvector-tests/src/test/java/io/github/jbellis/jvector/graph/disk/TestOnDiskGraphIndex.java +++ b/jvector-tests/src/test/java/io/github/jbellis/jvector/graph/disk/TestOnDiskGraphIndex.java @@ -119,14 +119,14 @@ public void testRenumberingOnDelete(boolean addHierarchy) throws IOException { assertTrue(getNeighborNodes(originalView, 0, 2).contains(1)); // create renumbering map - Map oldToNewMap = OnDiskGraphIndexWriter.sequentialRenumbering(original); - assertEquals(2, oldToNewMap.size()); - assertEquals(0, (int) oldToNewMap.get(1)); - assertEquals(1, (int) oldToNewMap.get(2)); + OrdinalMapper ordinalMapper = OnDiskGraphIndexWriter.sequentialRenumbering(original); + assertEquals(2, ordinalMapper.maxOrdinal() + 1); // number of elements = max + 1 + assertEquals(0, ordinalMapper.oldToNew(1)); + assertEquals(1, ordinalMapper.oldToNew(2)); // write the graph var outputPath = testDirectory.resolve("renumbered_graph"); - OnDiskGraphIndex.write(original, ravv, oldToNewMap, outputPath); + OnDiskGraphIndex.write(original, ravv, ordinalMapper, outputPath); // check that written graph ordinals match the new ones try (var readerSupplier = new SimpleMappedReader.Supplier(outputPath.toAbsolutePath()); var onDiskGraph = OnDiskGraphIndex.load(readerSupplier);