From 7135b9752c0ed03fa2eb054a1eee61326baa9f1d Mon Sep 17 00:00:00 2001 From: Mark Wolters Date: Tue, 17 Feb 2026 14:56:10 -0500 Subject: [PATCH 1/2] monitor both test and cache directories --- .../github/jbellis/jvector/example/Grid.java | 14 +- .../diagnostics/BenchmarkDiagnostics.java | 75 +++-- .../diagnostics/DiskUsageMonitor.java | 316 ++++++++++++++---- 3 files changed, 317 insertions(+), 88 deletions(-) diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/Grid.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/Grid.java index 1e24f7fd8..e18880399 100644 --- a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/Grid.java +++ b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/Grid.java @@ -29,6 +29,7 @@ import io.github.jbellis.jvector.example.benchmarks.diagnostics.BenchmarkDiagnostics; import io.github.jbellis.jvector.example.benchmarks.diagnostics.DiagnosticLevel; import io.github.jbellis.jvector.example.benchmarks.datasets.DataSet; +import io.github.jbellis.jvector.example.benchmarks.diagnostics.DiskUsageMonitor; import io.github.jbellis.jvector.example.reporting.*; import io.github.jbellis.jvector.example.reporting.RunArtifacts; import io.github.jbellis.jvector.example.util.CompressorParameters; @@ -225,10 +226,10 @@ static void runOneGraph(OnDiskGraphIndexCache cache, DataSet ds, Path workDirectory) throws IOException { - // TODO this does not capture disk usage for cached indexes. Need to update // Capture initial memory and disk state var diagnostics = new BenchmarkDiagnostics(getDiagnosticLevel()); - diagnostics.setMonitoredDirectory(workDirectory); + diagnostics.startMonitoring("testDirectory", workDirectory); + diagnostics.startMonitoring("indexCache", Paths.get(indexCacheDir)); diagnostics.capturePrePhaseSnapshot("Graph Build"); Map, ImmutableGraphIndex> indexes = new HashMap<>(); @@ -771,7 +772,8 @@ public static List runAllAndCollectResults( try { // Capture initial state var diagnostics = new BenchmarkDiagnostics(getDiagnosticLevel()); - diagnostics.setMonitoredDirectory(testDirectory); + diagnostics.startMonitoring("testDirectory", testDirectory); + diagnostics.startMonitoring("indexCache", Paths.get(indexCacheDir)); diagnostics.capturePrePhaseSnapshot("Build"); Map, ImmutableGraphIndex> indexes = new HashMap<>(); @@ -822,7 +824,7 @@ public static List runAllAndCollectResults( diagnostics.capturePostPhaseSnapshot("Build"); diagnostics.printDiskStatistics("Graph Index Build"); var buildSnapshot = diagnostics.getLatestSystemSnapshot(); - var buildDiskSnapshot = diagnostics.getLatestDiskSnapshot(); + DiskUsageMonitor.MultiDirectorySnapshot buildDiskSnapshot = diagnostics.getLatestDiskSnapshot(); try (ConfiguredSystem cs = new ConfiguredSystem(ds, index, cvArg, features)) { int queryRuns = 2; @@ -876,8 +878,8 @@ public static List runAllAndCollectResults( // Add disk metrics if available if (buildDiskSnapshot != null) { - allMetrics.put("Disk Usage (MB)", buildDiskSnapshot.totalBytes / 1024.0 / 1024.0); - allMetrics.put("File Count", buildDiskSnapshot.fileCount); + allMetrics.put("Disk Usage (MB)", buildDiskSnapshot.getTotalBytes() / 1024.0 / 1024.0); + allMetrics.put("File Count", buildDiskSnapshot.getTotalFileCount()); } results.add(new BenchResult(ds.getName(), params, allMetrics)); diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/benchmarks/diagnostics/BenchmarkDiagnostics.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/benchmarks/diagnostics/BenchmarkDiagnostics.java index d97fdd510..63461a113 100644 --- a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/benchmarks/diagnostics/BenchmarkDiagnostics.java +++ b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/benchmarks/diagnostics/BenchmarkDiagnostics.java @@ -36,9 +36,8 @@ public class BenchmarkDiagnostics implements AutoCloseable { private final DiskUsageMonitor diskUsageMonitor; private final PerformanceAnalyzer performanceAnalyzer; private final List snapshots; - private final List diskSnapshots; + private final List diskSnapshots; private final List timingAnalyses; - private Path monitoredDirectory; private boolean diskMonitorStarted = false; public BenchmarkDiagnostics(DiagnosticLevel level) { @@ -49,7 +48,6 @@ public BenchmarkDiagnostics(DiagnosticLevel level) { this.snapshots = new ArrayList<>(); this.diskSnapshots = new ArrayList<>(); this.timingAnalyses = new ArrayList<>(); - this.monitoredDirectory = null; } /** @@ -79,12 +77,27 @@ public static BenchmarkDiagnostics createVerbose() { * * @param directory the directory to monitor * @throws IOException if unable to start monitoring + * @deprecated Use {@link #startMonitoring(String, Path)} instead */ + @Deprecated public void setMonitoredDirectory(Path directory) throws IOException { - this.monitoredDirectory = directory; - if (directory != null && !diskMonitorStarted) { - diskUsageMonitor.start(directory); + startMonitoring("default", directory); + } + + /** + * Starts monitoring a labeled directory for disk usage. + * This should be called before capturing any snapshots for optimal performance. + * + * @param label a label to identify this directory in reports + * @param directory the directory to monitor + * @throws IOException if unable to start monitoring + */ + public void startMonitoring(String label, Path directory) throws IOException { + if (!diskMonitorStarted) { + diskUsageMonitor.startMonitoring(label, directory); diskMonitorStarted = true; + } else { + diskUsageMonitor.addDirectory(label, directory); } } @@ -95,12 +108,12 @@ public void capturePrePhaseSnapshot(String phase) { SystemMonitor.SystemSnapshot snapshot = systemMonitor.captureSnapshot(); snapshots.add(snapshot); - // Capture disk usage if directory is set - if (monitoredDirectory != null) { + // Capture disk usage if monitoring is started + if (diskMonitorStarted) { try { - DiskUsageMonitor.DiskUsageSnapshot diskSnapshot = diskUsageMonitor.captureSnapshot(monitoredDirectory); + DiskUsageMonitor.MultiDirectorySnapshot diskSnapshot = diskUsageMonitor.captureSnapshot(); diskSnapshots.add(diskSnapshot); - } catch (IOException e) { + } catch (Exception e) { if (level != DiagnosticLevel.NONE) { System.err.printf("[%s] Failed to capture disk usage: %s%n", phase, e.getMessage()); } @@ -127,15 +140,15 @@ public void capturePostPhaseSnapshot(String phase) { snapshots.add(postSnapshot); // Capture and log disk usage changes - if (monitoredDirectory != null) { + if (diskMonitorStarted) { try { - DiskUsageMonitor.DiskUsageSnapshot postDiskSnapshot = diskUsageMonitor.captureSnapshot(monitoredDirectory); + DiskUsageMonitor.MultiDirectorySnapshot postDiskSnapshot = diskUsageMonitor.captureSnapshot(); if (!diskSnapshots.isEmpty() && level != DiagnosticLevel.NONE) { - DiskUsageMonitor.DiskUsageSnapshot preDiskSnapshot = diskSnapshots.get(diskSnapshots.size() - 1); + DiskUsageMonitor.MultiDirectorySnapshot preDiskSnapshot = diskSnapshots.get(diskSnapshots.size() - 1); diskUsageMonitor.logDifference(phase, preDiskSnapshot, postDiskSnapshot); } diskSnapshots.add(postDiskSnapshot); - } catch (IOException e) { + } catch (Exception e) { if (level != DiagnosticLevel.NONE) { System.err.printf("[%s] Failed to capture disk usage: %s%n", phase, e.getMessage()); } @@ -223,7 +236,7 @@ public SystemMonitor.SystemSnapshot getLatestSystemSnapshot() { /** * Gets the latest disk usage snapshot, or null if none captured */ - public DiskUsageMonitor.DiskUsageSnapshot getLatestDiskSnapshot() { + public DiskUsageMonitor.MultiDirectorySnapshot getLatestDiskSnapshot() { return diskSnapshots.isEmpty() ? null : diskSnapshots.get(diskSnapshots.size() - 1); } @@ -298,17 +311,33 @@ public void logSummary() { public void printDiskStatistics(String label) { // Disk usage summary if (!diskSnapshots.isEmpty()) { - DiskUsageMonitor.DiskUsageSnapshot firstDisk = diskSnapshots.get(0); - DiskUsageMonitor.DiskUsageSnapshot lastDisk = diskSnapshots.get(diskSnapshots.size() - 1); - DiskUsageMonitor.DiskUsageSnapshot totalDisk = lastDisk.subtract(firstDisk); + DiskUsageMonitor.MultiDirectorySnapshot firstDisk = diskSnapshots.get(0); + DiskUsageMonitor.MultiDirectorySnapshot lastDisk = diskSnapshots.get(diskSnapshots.size() - 1); + DiskUsageMonitor.MultiDirectorySnapshot totalDisk = lastDisk.subtract(firstDisk); System.out.printf("\nDisk Usage Summary %s:%n", label); - System.out.printf(" Total Disk Used: %s%n", DiskUsageMonitor.formatBytes(lastDisk.totalBytes)); - System.out.printf(" Total Files: %d%n", lastDisk.fileCount); - System.out.printf(" Net Change: %s, %+d files%n", - DiskUsageMonitor.formatBytes(totalDisk.totalBytes), totalDisk.fileCount); + + // Print statistics for each monitored directory + for (String dirLabel : lastDisk.snapshots.keySet()) { + DiskUsageMonitor.DiskUsageSnapshot lastSnap = lastDisk.get(dirLabel); + DiskUsageMonitor.DiskUsageSnapshot totalSnap = totalDisk.get(dirLabel); + + System.out.printf(" [%s]:%n", dirLabel); + System.out.printf(" Total Disk Used: %s%n", DiskUsageMonitor.formatBytes(lastSnap.totalBytes)); + System.out.printf(" Total Files: %d%n", lastSnap.fileCount); + if (totalSnap != null) { + System.out.printf(" Net Change: %s, %+d files%n", + DiskUsageMonitor.formatBytes(totalSnap.totalBytes), totalSnap.fileCount); + } + } + + // Print overall totals + System.out.printf(" [Overall Total]:%n"); + System.out.printf(" Total Disk Used: %s%n", DiskUsageMonitor.formatBytes(lastDisk.getTotalBytes())); + System.out.printf(" Total Files: %d%n", lastDisk.getTotalFileCount()); + System.out.printf(" Net Change: %s, %+d files%n", + DiskUsageMonitor.formatBytes(totalDisk.getTotalBytes()), totalDisk.getTotalFileCount()); } - } /** diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/benchmarks/diagnostics/DiskUsageMonitor.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/benchmarks/diagnostics/DiskUsageMonitor.java index fca9adb01..c00d27469 100644 --- a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/benchmarks/diagnostics/DiskUsageMonitor.java +++ b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/benchmarks/diagnostics/DiskUsageMonitor.java @@ -48,49 +48,79 @@ public class DiskUsageMonitor implements AutoCloseable { private Thread watchThread; private volatile boolean running; - // Current state (thread-safe) - private final AtomicLong totalBytes = new AtomicLong(0); - private final AtomicLong fileCount = new AtomicLong(0); + // Multi-directory tracking + private final Map directoryStats = new ConcurrentHashMap<>(); // Directory and file tracking private final Map watchKeyToPath = new ConcurrentHashMap<>(); - private final Map fileSizeCache = new ConcurrentHashMap<>(); - private Path rootDirectory; + private final Map fileSizeCache = new ConcurrentHashMap<>(); // Monitoring state private volatile boolean started = false; + /** + * Internal class to track statistics for a single directory + */ + private static class DirectoryStats { + final String label; + final Path path; + final AtomicLong totalBytes = new AtomicLong(0); + final AtomicLong fileCount = new AtomicLong(0); + + DirectoryStats(String label, Path path) { + this.label = label; + this.path = path; + } + } + + /** + * Internal class to track which directory a file belongs to + */ + private static class DirectoryFileInfo { + final String directoryLabel; + final long size; + + DirectoryFileInfo(String directoryLabel, long size) { + this.directoryLabel = directoryLabel; + this.size = size; + } + } + /** * Starts monitoring the specified directory for filesystem changes. * Performs an initial scan to establish baseline, then monitors changes incrementally. - * + * * @param directory the directory to monitor * @throws IOException if unable to start monitoring * @throws IllegalStateException if already started + * @deprecated Use {@link #startMonitoring(String, Path)} instead */ + @Deprecated public void start(Path directory) throws IOException { + startMonitoring("default", directory); + } + + /** + * Starts monitoring a single labeled directory for filesystem changes. + * Performs an initial scan to establish baseline, then monitors changes incrementally. + * + * @param label a label to identify this directory in reports + * @param directory the directory to monitor + * @throws IOException if unable to start monitoring + * @throws IllegalStateException if already started + */ + public void startMonitoring(String label, Path directory) throws IOException { if (started) { - throw new IllegalStateException("Monitor already started"); - } - - if (!Files.exists(directory)) { - // Directory doesn't exist yet, initialize with zero values - started = true; - return; + throw new IllegalStateException("Monitor already started. Use addDirectory() to add more directories."); } - this.rootDirectory = directory; this.watchService = FileSystems.getDefault().newWatchService(); - // Perform initial scan to establish baseline - performInitialScan(directory); - - // Register watchers recursively - registerRecursive(directory); + addDirectory(label, directory); // Start event processing thread running = true; - watchThread = new Thread(this::processEvents, "DiskUsageMonitor-" + directory.getFileName()); + watchThread = new Thread(this::processEvents, "DiskUsageMonitor"); watchThread.setDaemon(true); watchThread.start(); @@ -98,28 +128,82 @@ public void start(Path directory) throws IOException { } /** - * Captures a snapshot of current disk usage. - * This is an O(1) operation that returns cached values, unlike the previous - * implementation which performed full directory traversal. - * - * @return snapshot of current disk usage + * Adds an additional directory to monitor. Must be called after startMonitoring(). + * + * @param label a label to identify this directory in reports + * @param directory the directory to monitor + * @throws IOException if unable to monitor the directory + * @throws IllegalStateException if not yet started */ - public DiskUsageSnapshot captureSnapshot() { - return new DiskUsageSnapshot(totalBytes.get(), fileCount.get()); + public void addDirectory(String label, Path directory) throws IOException { + if (!started && watchService == null) { + throw new IllegalStateException("Must call startMonitoring() before addDirectory()"); + } + + if (directoryStats.containsKey(label)) { + throw new IllegalArgumentException("Directory with label '" + label + "' already being monitored"); + } + + DirectoryStats stats = new DirectoryStats(label, directory); + directoryStats.put(label, stats); + + if (!Files.exists(directory)) { + // Directory doesn't exist yet, initialize with zero values + return; + } + + // Perform initial scan to establish baseline + performInitialScan(label, directory, stats); + + // Register watchers recursively + registerRecursive(directory, label); + } + + /** + * Captures a snapshot of current disk usage across all monitored directories. + * This is an O(1) operation that returns cached values. + * + * @return snapshot of current disk usage for all directories + */ + public MultiDirectorySnapshot captureSnapshot() { + Map snapshots = new java.util.HashMap<>(); + for (Map.Entry entry : directoryStats.entrySet()) { + DirectoryStats stats = entry.getValue(); + snapshots.put(entry.getKey(), new DiskUsageSnapshot(stats.totalBytes.get(), stats.fileCount.get())); + } + return new MultiDirectorySnapshot(snapshots); + } + + /** + * Captures disk usage for a specific labeled directory. + * + * @param label the label of the directory to capture + * @return snapshot of disk usage for the specified directory, or null if not found + */ + public DiskUsageSnapshot captureSnapshot(String label) { + DirectoryStats stats = directoryStats.get(label); + if (stats == null) { + return null; + } + return new DiskUsageSnapshot(stats.totalBytes.get(), stats.fileCount.get()); } /** * Captures disk usage for a directory without starting continuous monitoring. * This is a fallback method for compatibility with the old API. - * + * * @param directory the directory to scan * @return snapshot of disk usage * @throws IOException if unable to scan directory + * @deprecated Use labeled monitoring instead */ + @Deprecated public DiskUsageSnapshot captureSnapshot(Path directory) throws IOException { - if (started && directory.equals(rootDirectory)) { - // Use cached values if monitoring this directory - return captureSnapshot(); + // Check if this directory is being monitored + for (Map.Entry entry : directoryStats.entrySet()) { + if (entry.getValue().path.equals(directory)) { + return captureSnapshot(entry.getKey()); + } } // Fallback to one-time scan for compatibility @@ -127,8 +211,40 @@ public DiskUsageSnapshot captureSnapshot(Path directory) throws IOException { } /** - * Logs the difference between two disk usage snapshots + * Logs the difference between two multi-directory snapshots */ + public void logDifference(String phase, MultiDirectorySnapshot before, MultiDirectorySnapshot after) { + System.out.printf("[%s] Disk Usage Changes:%n", phase); + + for (String label : after.snapshots.keySet()) { + DiskUsageSnapshot beforeSnap = before.snapshots.get(label); + DiskUsageSnapshot afterSnap = after.snapshots.get(label); + + if (beforeSnap == null) { + // New directory added + System.out.printf(" [%s] (new): %s, %d files%n", + label, + formatBytes(afterSnap.totalBytes), + afterSnap.fileCount); + } else { + long sizeDiff = afterSnap.totalBytes - beforeSnap.totalBytes; + long filesDiff = afterSnap.fileCount - beforeSnap.fileCount; + + System.out.printf(" [%s] Size: %s (change: %s), Files: %d (change: %+d)%n", + label, + formatBytes(afterSnap.totalBytes), + formatBytesDiff(sizeDiff), + afterSnap.fileCount, + filesDiff); + } + } + } + + /** + * Logs the difference between two single-directory snapshots (legacy method) + * @deprecated Use {@link #logDifference(String, MultiDirectorySnapshot, MultiDirectorySnapshot)} instead + */ + @Deprecated public void logDifference(String phase, DiskUsageSnapshot before, DiskUsageSnapshot after) { long sizeDiff = after.totalBytes - before.totalBytes; long filesDiff = after.fileCount - before.fileCount; @@ -175,17 +291,17 @@ public void close() throws IOException { /** * Performs initial directory scan to establish baseline metrics */ - private void performInitialScan(Path directory) throws IOException { + private void performInitialScan(String label, Path directory, DirectoryStats stats) throws IOException { AtomicLong size = new AtomicLong(0); AtomicLong count = new AtomicLong(0); - Files.walkFileTree(directory, new SimpleFileVisitor() { + Files.walkFileTree(directory, new SimpleFileVisitor<>() { @Override public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { long fileSize = attrs.size(); size.addAndGet(fileSize); count.incrementAndGet(); - fileSizeCache.put(file, fileSize); + fileSizeCache.put(file, new DirectoryFileInfo(label, fileSize)); return FileVisitResult.CONTINUE; } @@ -196,8 +312,8 @@ public FileVisitResult visitFileFailed(Path file, IOException exc) { } }); - totalBytes.set(size.get()); - fileCount.set(count.get()); + stats.totalBytes.set(size.get()); + stats.fileCount.set(count.get()); } /** @@ -211,7 +327,7 @@ private DiskUsageSnapshot performOneTimeScan(Path directory) throws IOException AtomicLong size = new AtomicLong(0); AtomicLong count = new AtomicLong(0); - Files.walkFileTree(directory, new SimpleFileVisitor() { + Files.walkFileTree(directory, new SimpleFileVisitor<>() { @Override public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) { size.addAndGet(attrs.size()); @@ -231,8 +347,8 @@ public FileVisitResult visitFileFailed(Path file, IOException exc) { /** * Registers watchers for a directory and all its subdirectories */ - private void registerRecursive(Path directory) throws IOException { - Files.walkFileTree(directory, new SimpleFileVisitor() { + private void registerRecursive(Path directory, String label) throws IOException { + Files.walkFileTree(directory, new SimpleFileVisitor<>() { @Override public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) throws IOException { WatchKey key = dir.register(watchService, ENTRY_CREATE, ENTRY_DELETE, ENTRY_MODIFY); @@ -247,6 +363,18 @@ public FileVisitResult visitFileFailed(Path file, IOException exc) { }); } + /** + * Determines which directory label a path belongs to + */ + private String getDirectoryLabel(Path path) { + for (Map.Entry entry : directoryStats.entrySet()) { + if (path.startsWith(entry.getValue().path)) { + return entry.getKey(); + } + } + return null; + } + /** * Event processing loop - runs in background thread */ @@ -314,19 +442,26 @@ private void handleCreate(Path path) throws IOException { return; // File may have been deleted before we could process } + String label = getDirectoryLabel(path); + if (label == null) { + return; // Path not under any monitored directory + } + + DirectoryStats stats = directoryStats.get(label); + if (Files.isDirectory(path)) { // Register watcher for new directory WatchKey key = path.register(watchService, ENTRY_CREATE, ENTRY_DELETE, ENTRY_MODIFY); watchKeyToPath.put(key, path); // Scan new directory for existing files - Files.walkFileTree(path, new SimpleFileVisitor() { + Files.walkFileTree(path, new SimpleFileVisitor<>() { @Override public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { long size = attrs.size(); - fileSizeCache.put(file, size); - totalBytes.addAndGet(size); - fileCount.incrementAndGet(); + fileSizeCache.put(file, new DirectoryFileInfo(label, size)); + stats.totalBytes.addAndGet(size); + stats.fileCount.incrementAndGet(); return FileVisitResult.CONTINUE; } @@ -342,9 +477,9 @@ public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) th }); } else if (Files.isRegularFile(path)) { long size = Files.size(path); - fileSizeCache.put(path, size); - totalBytes.addAndGet(size); - fileCount.incrementAndGet(); + fileSizeCache.put(path, new DirectoryFileInfo(label, size)); + stats.totalBytes.addAndGet(size); + stats.fileCount.incrementAndGet(); } } @@ -352,10 +487,13 @@ public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) th * Handles file/directory deletion events */ private void handleDelete(Path path) { - Long size = fileSizeCache.remove(path); - if (size != null) { - totalBytes.addAndGet(-size); - fileCount.decrementAndGet(); + DirectoryFileInfo info = fileSizeCache.remove(path); + if (info != null) { + DirectoryStats stats = directoryStats.get(info.directoryLabel); + if (stats != null) { + stats.totalBytes.addAndGet(-info.size); + stats.fileCount.decrementAndGet(); + } } // Note: For directories, we rely on individual file deletion events // rather than trying to recursively process the deleted directory @@ -369,16 +507,22 @@ private void handleModify(Path path) throws IOException { return; } + String label = getDirectoryLabel(path); + if (label == null) { + return; // Path not under any monitored directory + } + + DirectoryStats stats = directoryStats.get(label); long newSize = Files.size(path); - Long oldSize = fileSizeCache.put(path, newSize); + DirectoryFileInfo oldInfo = fileSizeCache.put(path, new DirectoryFileInfo(label, newSize)); - if (oldSize != null) { - long delta = newSize - oldSize; - totalBytes.addAndGet(delta); + if (oldInfo != null) { + long delta = newSize - oldInfo.size; + stats.totalBytes.addAndGet(delta); } else { // File wasn't in cache (shouldn't happen, but handle gracefully) - totalBytes.addAndGet(newSize); - fileCount.incrementAndGet(); + stats.totalBytes.addAndGet(newSize); + stats.fileCount.incrementAndGet(); } } @@ -421,9 +565,63 @@ public DiskUsageSnapshot(long totalBytes, long fileCount) { public DiskUsageSnapshot subtract(DiskUsageSnapshot other) { return new DiskUsageSnapshot( - this.totalBytes - other.totalBytes, - this.fileCount - other.fileCount + this.totalBytes - other.totalBytes, + this.fileCount - other.fileCount ); } + + } + /** + * Data class representing disk usage across multiple directories + */ + public static class MultiDirectorySnapshot { + public final Map snapshots; + + public MultiDirectorySnapshot(Map snapshots) { + this.snapshots = new java.util.HashMap<>(snapshots); + } + + /** + * Get the snapshot for a specific directory label + */ + public DiskUsageSnapshot get(String label) { + return snapshots.get(label); + } + + /** + * Get total bytes across all directories + */ + public long getTotalBytes() { + return snapshots.values().stream() + .mapToLong(s -> s.totalBytes) + .sum(); + } + + /** + * Get total file count across all directories + */ + public long getTotalFileCount() { + return snapshots.values().stream() + .mapToLong(s -> s.fileCount) + .sum(); + } + + /** + * Subtract another multi-directory snapshot from this one + */ + public MultiDirectorySnapshot subtract(MultiDirectorySnapshot other) { + Map result = new java.util.HashMap<>(); + for (Map.Entry entry : snapshots.entrySet()) { + String label = entry.getKey(); + DiskUsageSnapshot thisSnap = entry.getValue(); + DiskUsageSnapshot otherSnap = other.snapshots.get(label); + if (otherSnap != null) { + result.put(label, thisSnap.subtract(otherSnap)); + } else { + result.put(label, thisSnap); + } + } + return new MultiDirectorySnapshot(result); + } } } From 0b7e42ebca7a114aa9a048a4e3a945335eefb470 Mon Sep 17 00:00:00 2001 From: Mark Wolters Date: Tue, 17 Feb 2026 15:46:14 -0500 Subject: [PATCH 2/2] resource management --- .../github/jbellis/jvector/example/Grid.java | 171 +++++++++--------- 1 file changed, 86 insertions(+), 85 deletions(-) diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/Grid.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/Grid.java index e18880399..211c70ef3 100644 --- a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/Grid.java +++ b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/Grid.java @@ -227,106 +227,107 @@ static void runOneGraph(OnDiskGraphIndexCache cache, Path workDirectory) throws IOException { // Capture initial memory and disk state - var diagnostics = new BenchmarkDiagnostics(getDiagnosticLevel()); - diagnostics.startMonitoring("testDirectory", workDirectory); - diagnostics.startMonitoring("indexCache", Paths.get(indexCacheDir)); - diagnostics.capturePrePhaseSnapshot("Graph Build"); + try (var diagnostics = new BenchmarkDiagnostics(getDiagnosticLevel())) { + diagnostics.startMonitoring("testDirectory", workDirectory); + diagnostics.startMonitoring("indexCache", Paths.get(indexCacheDir)); + diagnostics.capturePrePhaseSnapshot("Graph Build"); + + Map, ImmutableGraphIndex> indexes = new HashMap<>(); + if (buildCompressor == null) { + indexes = buildInMemory(featureSets, M, efConstruction, neighborOverflow, addHierarchy, refineFinalGraph, ds, workDirectory); + } else { + // If cache is disabled, we use the (tmp) workDirectory as the output + Path outputDir = cache.isEnabled() ? cache.cacheDir().toAbsolutePath() : workDirectory; - Map, ImmutableGraphIndex> indexes = new HashMap<>(); - if (buildCompressor == null) { - indexes = buildInMemory(featureSets, M, efConstruction, neighborOverflow, addHierarchy, refineFinalGraph, ds, workDirectory); - } else { - // If cache is disabled, we use the (tmp) workDirectory as the output - Path outputDir = cache.isEnabled() ? cache.cacheDir().toAbsolutePath() : workDirectory; - - List> missing = new ArrayList<>(); - // Map feature sets to their cache handles for the build phase - Map, OnDiskGraphIndexCache.WriteHandle> handles = new HashMap<>(); - - for (Set fs : featureSets) { - var key = cache.key(ds.getName(), fs, M, efConstruction, neighborOverflow, 1.2f, addHierarchy, refineFinalGraph, buildCompressor); - var cached = cache.tryLoad(key); - - if (cached.isPresent()) { - System.out.printf("%s: Using cached graph index for %s%n", key.datasetName, fs); - indexes.put(fs, cached.get()); - } else { - missing.add(fs); - if (cache.isEnabled()) { - var handle = cache.beginWrite(key, OnDiskGraphIndexCache.Overwrite.ALLOW); - // Log cache miss / build start - System.out.printf("%s: Building graph index (cached enabled) for %s%n", - key.datasetName, fs); - // Prepare the atomic write handle immediately - handles.put(fs, handle); + List> missing = new ArrayList<>(); + // Map feature sets to their cache handles for the build phase + Map, OnDiskGraphIndexCache.WriteHandle> handles = new HashMap<>(); + + for (Set fs : featureSets) { + var key = cache.key(ds.getName(), fs, M, efConstruction, neighborOverflow, 1.2f, addHierarchy, refineFinalGraph, buildCompressor); + var cached = cache.tryLoad(key); + + if (cached.isPresent()) { + System.out.printf("%s: Using cached graph index for %s%n", key.datasetName, fs); + indexes.put(fs, cached.get()); } else { - System.out.printf("%s: Building graph index (cache disabled) for %s%n", key.datasetName, fs); + missing.add(fs); + if (cache.isEnabled()) { + var handle = cache.beginWrite(key, OnDiskGraphIndexCache.Overwrite.ALLOW); + // Log cache miss / build start + System.out.printf("%s: Building graph index (cached enabled) for %s%n", + key.datasetName, fs); + // Prepare the atomic write handle immediately + handles.put(fs, handle); + } else { + System.out.printf("%s: Building graph index (cache disabled) for %s%n", key.datasetName, fs); + } } } - } - if (!missing.isEmpty()) { - // At least one index needs to be built (b/c not in cache or cache is disabled) - // We pass the handles map so buildOnDisk knows exactly where to write - var newIndexes = buildOnDisk(missing, M, efConstruction, neighborOverflow, addHierarchy, refineFinalGraph, - ds, outputDir, buildCompressor, handles); - indexes.putAll(newIndexes); + if (!missing.isEmpty()) { + // At least one index needs to be built (b/c not in cache or cache is disabled) + // We pass the handles map so buildOnDisk knows exactly where to write + var newIndexes = buildOnDisk(missing, M, efConstruction, neighborOverflow, addHierarchy, refineFinalGraph, + ds, outputDir, buildCompressor, handles); + indexes.putAll(newIndexes); + } } - } - // Capture post-build memory and disk state - diagnostics.capturePostPhaseSnapshot("Graph Build"); + // Capture post-build memory and disk state + diagnostics.capturePostPhaseSnapshot("Graph Build"); - diagnostics.printDiskStatistics("Graph Index Build"); - System.out.printf("Index build time: %f seconds%n", Grid.getIndexBuildTimeSeconds(ds.getName())); + diagnostics.printDiskStatistics("Graph Index Build"); + System.out.printf("Index build time: %f seconds%n", Grid.getIndexBuildTimeSeconds(ds.getName())); - try { - for (var cpSupplier : compressionGrid) { - indexes.forEach((features, index) -> { - final Set featureSetForIndex = index instanceof OnDiskGraphIndex ? ((OnDiskGraphIndex) index).getFeatureSet() : Set.of(); - - CompressedVectors cv; - if (featureSetForIndex.contains(FeatureId.FUSED_PQ)) { - cv = null; - System.out.format("Fused graph index%n"); - } else { - var compressor = getCompressor(cpSupplier, ds); + try { + for (var cpSupplier : compressionGrid) { + indexes.forEach((features, index) -> { + final Set featureSetForIndex = index instanceof OnDiskGraphIndex ? ((OnDiskGraphIndex) index).getFeatureSet() : Set.of(); - if (compressor == null) { + CompressedVectors cv; + if (featureSetForIndex.contains(FeatureId.FUSED_PQ)) { cv = null; - System.out.format("Uncompressed vectors%n"); + System.out.format("Fused graph index%n"); } else { - long start = System.nanoTime(); - cv = compressor.encodeAll(ds.getBaseRavv()); - System.out.format("%s encoded %d vectors [%.2f MB] in %.2fs%n", compressor, ds.getBaseVectors().size(), (cv.ramBytesUsed() / 1024f / 1024f), (System.nanoTime() - start) / 1_000_000_000.0); + var compressor = getCompressor(cpSupplier, ds); + + if (compressor == null) { + cv = null; + System.out.format("Uncompressed vectors%n"); + } else { + long start = System.nanoTime(); + cv = compressor.encodeAll(ds.getBaseRavv()); + System.out.format("%s encoded %d vectors [%.2f MB] in %.2fs%n", compressor, ds.getBaseVectors().size(), (cv.ramBytesUsed() / 1024f / 1024f), (System.nanoTime() - start) / 1_000_000_000.0); + } } - } - try (var cs = new ConfiguredSystem(ds, index, cv, featureSetForIndex)) { - testConfiguration(cs, topKGrid, usePruningGrid, M, efConstruction, neighborOverflow, addHierarchy, refineFinalGraph, featureSetForIndex, - artifacts, workDirectory); - } catch (Exception e) { - throw new RuntimeException(e); - } - }); - } - for (var index : indexes.values()) { - index.close(); - } + try (var cs = new ConfiguredSystem(ds, index, cv, featureSetForIndex)) { + testConfiguration(cs, topKGrid, usePruningGrid, M, efConstruction, neighborOverflow, addHierarchy, refineFinalGraph, featureSetForIndex, + artifacts, workDirectory); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + } + for (var index : indexes.values()) { + index.close(); + } - // Log final diagnostics summary - if (diagnostic_level > 0) { - diagnostics.logSummary(); - } - } finally { - // Clean up the tmp files for this run - for (int n = 0; n < featureSets.size(); n++) { - Path p = workDirectory.resolve("graph" + n); - try { - Files.deleteIfExists(p); - } catch (IOException e) { - // Log and move on; don't let a delete-fail mask a real exception - System.err.println("Cleanup Failed: Could not delete " + p.getFileName() + " -> " + e.getMessage()); + // Log final diagnostics summary + if (diagnostic_level > 0) { + diagnostics.logSummary(); + } + } finally { + // Clean up the tmp files for this run + for (int n = 0; n < featureSets.size(); n++) { + Path p = workDirectory.resolve("graph" + n); + try { + Files.deleteIfExists(p); + } catch (IOException e) { + // Log and move on; don't let a delete-fail mask a real exception + System.err.println("Cleanup Failed: Could not delete " + p.getFileName() + " -> " + e.getMessage()); + } } } }