From e21d054b409bdb6bb0f07da016f766dc7fec5cf2 Mon Sep 17 00:00:00 2001 From: Mihhail Solovjov Date: Tue, 3 Feb 2026 18:55:57 +0200 Subject: [PATCH] feat(cache): add persistent disk cache for registry data Implement a two-tier caching strategy with in-memory and persistent disk storage to improve performance across CLI invocations. The disk cache survives process restarts and reduces network requests for frequently accessed packages. Key features: - 24-hour TTL for disk cache entries - Automatic eviction of oldest entries when cache exceeds 5000 packages - Support for scoped packages with safe filename generation - Seamless integration with npm and jsDelivr registry services - Cache versioning to handle future structure changes --- src/services/index.ts | 1 + src/services/jsdelivr-registry.ts | 41 ++- src/services/npm-registry.ts | 27 +- src/services/persistent-cache.ts | 285 +++++++++++++++++++ test/unit/services/jsdelivr-registry.test.ts | 5 +- test/unit/services/npm-registry.test.ts | 5 +- test/unit/services/persistent-cache.test.ts | 164 +++++++++++ 7 files changed, 515 insertions(+), 13 deletions(-) create mode 100644 src/services/persistent-cache.ts create mode 100644 test/unit/services/persistent-cache.test.ts diff --git a/src/services/index.ts b/src/services/index.ts index d50d376..6c8a607 100644 --- a/src/services/index.ts +++ b/src/services/index.ts @@ -6,3 +6,4 @@ export * from './npm-registry' export * from './jsdelivr-registry' export * from './changelog-fetcher' export * from './version-checker' +export * from './persistent-cache' diff --git a/src/services/jsdelivr-registry.ts b/src/services/jsdelivr-registry.ts index c643e50..7c386f5 100644 --- a/src/services/jsdelivr-registry.ts +++ b/src/services/jsdelivr-registry.ts @@ -2,6 +2,7 @@ import { Pool, request } from 'undici' import * as semver from 'semver' import { CACHE_TTL, JSDELIVR_CDN_URL, MAX_CONCURRENT_REQUESTS, REQUEST_TIMEOUT } from '../config' import { getAllPackageData } from './npm-registry' +import { persistentCache } from './persistent-cache' import { OnBatchReadyCallback } from '../types' // Create a persistent connection pool for jsDelivr CDN with optimal settings @@ -129,15 +130,32 @@ export async function getAllPackageDataFromJsdelivr( const fetchPackageWithFallback = async (packageName: string): Promise => { const currentVersion = currentVersions?.get(packageName) - // Try to get from cache first - const cached = packageCache.get(packageName) - if (cached && Date.now() - cached.timestamp < CACHE_TTL) { - packageData.set(packageName, cached.data) + // Try to get from in-memory cache first (fastest) + const memoryCached = packageCache.get(packageName) + if (memoryCached && Date.now() - memoryCached.timestamp < CACHE_TTL) { + packageData.set(packageName, memoryCached.data) completedCount++ if (onProgress) { onProgress(packageName, completedCount, total) } - addToBatch(packageName, cached.data) + addToBatch(packageName, memoryCached.data) + return + } + + // Try persistent disk cache (fast, survives restarts) + const diskCached = persistentCache.get(packageName) + if (diskCached) { + // Also populate in-memory cache for subsequent accesses + packageCache.set(packageName, { + data: diskCached, + timestamp: Date.now(), + }) + packageData.set(packageName, diskCached) + completedCount++ + if (onProgress) { + onProgress(packageName, completedCount, total) + } + addToBatch(packageName, diskCached) return } @@ -169,10 +187,13 @@ export async function getAllPackageDataFromJsdelivr( if (result) { packageData.set(packageName, result) + // Cache in memory packageCache.set(packageName, { data: result, timestamp: Date.now(), }) + // Cache to disk for persistence + persistentCache.set(packageName, result) addToBatch(packageName, result) } @@ -196,11 +217,13 @@ export async function getAllPackageDataFromJsdelivr( allVersions: allVersions.sort(semver.rcompare), } - // Cache the result + // Cache the result in memory packageCache.set(packageName, { data: result, timestamp: Date.now(), }) + // Cache to disk for persistence + persistentCache.set(packageName, result) packageData.set(packageName, result) completedCount++ @@ -217,10 +240,13 @@ export async function getAllPackageDataFromJsdelivr( if (result) { packageData.set(packageName, result) + // Cache in memory packageCache.set(packageName, { data: result, timestamp: Date.now(), }) + // Cache to disk for persistence + persistentCache.set(packageName, result) addToBatch(packageName, result) } } catch (npmError) { @@ -240,6 +266,9 @@ export async function getAllPackageDataFromJsdelivr( // Flush any remaining batch items flushBatch() + // Flush persistent cache to disk + persistentCache.flush() + // Clear the progress line and show completion time if no custom progress handler if (!onProgress) { process.stdout.write('\r' + ' '.repeat(80) + '\r') diff --git a/src/services/npm-registry.ts b/src/services/npm-registry.ts index fee8e6f..84b839a 100644 --- a/src/services/npm-registry.ts +++ b/src/services/npm-registry.ts @@ -1,5 +1,6 @@ import * as semver from 'semver' import { CACHE_TTL, NPM_REGISTRY_URL, REQUEST_TIMEOUT } from '../config' +import { persistentCache } from './persistent-cache' // In-memory cache for package data interface CacheEntry { @@ -15,10 +16,21 @@ const packageCache = new Map() async function fetchPackageFromRegistry( packageName: string ): Promise<{ latestVersion: string; allVersions: string[] }> { - // Check cache first - const cached = packageCache.get(packageName) - if (cached && Date.now() - cached.timestamp < CACHE_TTL) { - return cached.data + // Check in-memory cache first (fastest) + const memoryCached = packageCache.get(packageName) + if (memoryCached && Date.now() - memoryCached.timestamp < CACHE_TTL) { + return memoryCached.data + } + + // Check persistent disk cache (fast, survives restarts) + const diskCached = persistentCache.get(packageName) + if (diskCached) { + // Also populate in-memory cache for subsequent accesses + packageCache.set(packageName, { + data: diskCached, + timestamp: Date.now(), + }) + return diskCached } try { @@ -70,11 +82,13 @@ async function fetchPackageFromRegistry( allVersions, } - // Cache the result + // Cache the result in memory packageCache.set(packageName, { data: result, timestamp: Date.now(), }) + // Cache to disk for persistence + persistentCache.set(packageName, result) return result } finally { @@ -120,6 +134,9 @@ export async function getAllPackageData( // Wait for all requests to complete await Promise.all(allPromises) + // Flush persistent cache to disk + persistentCache.flush() + // Clear the progress line and show completion time if no custom progress handler if (!onProgress) { process.stdout.write('\r' + ' '.repeat(80) + '\r') diff --git a/src/services/persistent-cache.ts b/src/services/persistent-cache.ts new file mode 100644 index 0000000..14ddf11 --- /dev/null +++ b/src/services/persistent-cache.ts @@ -0,0 +1,285 @@ +import { existsSync, mkdirSync, readFileSync, writeFileSync, readdirSync, unlinkSync } from 'fs' +import { join } from 'path' +import envPaths from 'env-paths' + +/** + * Cache entry structure for package version data + */ +interface PackageCacheEntry { + latestVersion: string + allVersions: string[] + timestamp: number +} + +/** + * Persistent cache index structure + */ +interface CacheIndex { + version: number + entries: Record +} + +// Cache TTL: 24 hours for disk cache (much longer than in-memory 5 minutes) +const DISK_CACHE_TTL = 24 * 60 * 60 * 1000 + +// Maximum cache size (number of packages) +const MAX_CACHE_ENTRIES = 5000 + +// Cache file format version (increment when structure changes) +const CACHE_VERSION = 1 + +/** + * Persistent cache manager for package registry data. + * Stores cache on disk for fast repeated runs across CLI invocations. + */ +class PersistentCacheManager { + private cacheDir: string + private indexPath: string + private index: CacheIndex | null = null + private dirty = false + + constructor() { + const paths = envPaths('inup') + this.cacheDir = join(paths.cache, 'registry') + this.indexPath = join(this.cacheDir, 'index.json') + } + + /** + * Ensure cache directory exists + */ + private ensureCacheDir(): void { + if (!existsSync(this.cacheDir)) { + mkdirSync(this.cacheDir, { recursive: true }) + } + } + + /** + * Load cache index from disk + */ + private loadIndex(): CacheIndex { + if (this.index) { + return this.index + } + + try { + if (existsSync(this.indexPath)) { + const content = readFileSync(this.indexPath, 'utf-8') + const parsed = JSON.parse(content) as CacheIndex + + // Check cache version - invalidate if outdated + if (parsed.version !== CACHE_VERSION) { + this.clearCache() + this.index = { version: CACHE_VERSION, entries: {} } + return this.index + } + + this.index = parsed + return this.index + } + } catch { + // Corrupted index, start fresh + } + + this.index = { version: CACHE_VERSION, entries: {} } + return this.index + } + + /** + * Save cache index to disk + */ + private saveIndex(): void { + if (!this.dirty || !this.index) { + return + } + + try { + this.ensureCacheDir() + writeFileSync(this.indexPath, JSON.stringify(this.index), 'utf-8') + this.dirty = false + } catch { + // Silently fail - cache is not critical + } + } + + /** + * Generate a safe filename for a package name + */ + private getFilename(packageName: string): string { + // Handle scoped packages: @scope/name -> scope__name + const safeName = packageName.replace(/^@/, '').replace(/\//g, '__') + return `${safeName}.json` + } + + /** + * Get cached data for a package + */ + get(packageName: string): { latestVersion: string; allVersions: string[] } | null { + const index = this.loadIndex() + const entry = index.entries[packageName] + + if (!entry) { + return null + } + + // Check TTL + if (Date.now() - entry.timestamp > DISK_CACHE_TTL) { + // Expired, remove from index + delete index.entries[packageName] + this.dirty = true + return null + } + + // Read the actual cache file + try { + const filePath = join(this.cacheDir, entry.file) + if (!existsSync(filePath)) { + delete index.entries[packageName] + this.dirty = true + return null + } + + const content = readFileSync(filePath, 'utf-8') + const cached = JSON.parse(content) as PackageCacheEntry + + return { + latestVersion: cached.latestVersion, + allVersions: cached.allVersions, + } + } catch { + // Corrupted cache file, remove from index + delete index.entries[packageName] + this.dirty = true + return null + } + } + + /** + * Store data for a package + */ + set(packageName: string, data: { latestVersion: string; allVersions: string[] }): void { + const index = this.loadIndex() + + // Evict old entries if cache is too large + const entryCount = Object.keys(index.entries).length + if (entryCount >= MAX_CACHE_ENTRIES) { + this.evictOldest(Math.floor(MAX_CACHE_ENTRIES * 0.1)) // Evict 10% + } + + const filename = this.getFilename(packageName) + const entry: PackageCacheEntry = { + ...data, + timestamp: Date.now(), + } + + try { + this.ensureCacheDir() + const filePath = join(this.cacheDir, filename) + writeFileSync(filePath, JSON.stringify(entry), 'utf-8') + + index.entries[packageName] = { + file: filename, + timestamp: Date.now(), + } + this.dirty = true + } catch { + // Silently fail - cache is not critical + } + } + + /** + * Batch get multiple packages (returns map of found entries) + */ + getMany(packageNames: string[]): Map { + const results = new Map() + + for (const name of packageNames) { + const cached = this.get(name) + if (cached) { + results.set(name, cached) + } + } + + return results + } + + /** + * Batch set multiple packages + */ + setMany(entries: Map): void { + for (const [name, data] of entries) { + this.set(name, data) + } + this.flush() + } + + /** + * Evict oldest cache entries + */ + private evictOldest(count: number): void { + const index = this.loadIndex() + const entries = Object.entries(index.entries) + + // Sort by timestamp (oldest first) + entries.sort((a, b) => a[1].timestamp - b[1].timestamp) + + // Remove oldest entries + const toRemove = entries.slice(0, count) + for (const [packageName, entry] of toRemove) { + try { + const filePath = join(this.cacheDir, entry.file) + if (existsSync(filePath)) { + unlinkSync(filePath) + } + } catch { + // Ignore deletion errors + } + delete index.entries[packageName] + } + + this.dirty = true + } + + /** + * Clear all cache + */ + clearCache(): void { + try { + if (existsSync(this.cacheDir)) { + const files = readdirSync(this.cacheDir) + for (const file of files) { + try { + unlinkSync(join(this.cacheDir, file)) + } catch { + // Ignore + } + } + } + } catch { + // Ignore + } + + this.index = { version: CACHE_VERSION, entries: {} } + this.dirty = true + } + + /** + * Flush pending changes to disk + */ + flush(): void { + this.saveIndex() + } + + /** + * Get cache statistics + */ + getStats(): { entries: number; cacheDir: string } { + const index = this.loadIndex() + return { + entries: Object.keys(index.entries).length, + cacheDir: this.cacheDir, + } + } +} + +// Export singleton instance +export const persistentCache = new PersistentCacheManager() diff --git a/test/unit/services/jsdelivr-registry.test.ts b/test/unit/services/jsdelivr-registry.test.ts index 251e43a..705b7a1 100644 --- a/test/unit/services/jsdelivr-registry.test.ts +++ b/test/unit/services/jsdelivr-registry.test.ts @@ -3,11 +3,13 @@ import { getAllPackageDataFromJsdelivr, clearJsdelivrPackageCache, } from '../../../src/services/jsdelivr-registry' +import { persistentCache } from '../../../src/services/persistent-cache' import { PACKAGE_NAME } from '../../../src/config/constants' describe('jsdelivr-registry', () => { beforeEach(() => { clearJsdelivrPackageCache() + persistentCache.clearCache() }) describe('getAllPackageDataFromJsdelivr()', () => { @@ -119,8 +121,9 @@ describe('jsdelivr-registry', () => { await getAllPackageDataFromJsdelivr([PACKAGE_NAME]) const cachedDuration = Date.now() - start2 - // Clear cache + // Clear both in-memory and persistent cache clearJsdelivrPackageCache() + persistentCache.clearCache() // Third fetch should hit the network again and not be instant const start3 = Date.now() diff --git a/test/unit/services/npm-registry.test.ts b/test/unit/services/npm-registry.test.ts index af53fda..cf569fc 100644 --- a/test/unit/services/npm-registry.test.ts +++ b/test/unit/services/npm-registry.test.ts @@ -1,10 +1,12 @@ import { describe, it, expect, beforeEach } from 'vitest' import { getAllPackageData, clearPackageCache } from '../../../src/services/npm-registry' +import { persistentCache } from '../../../src/services/persistent-cache' import { PACKAGE_NAME } from '../../../src/config/constants' describe('npm-registry', () => { beforeEach(() => { clearPackageCache() + persistentCache.clearCache() }) describe('getAllPackageData()', () => { @@ -98,8 +100,9 @@ describe('npm-registry', () => { // First fetch await getAllPackageData([PACKAGE_NAME]) - // Clear cache + // Clear both in-memory and persistent cache clearPackageCache() + persistentCache.clearCache() // Second fetch should hit the network again const start = Date.now() diff --git a/test/unit/services/persistent-cache.test.ts b/test/unit/services/persistent-cache.test.ts new file mode 100644 index 0000000..8681ff5 --- /dev/null +++ b/test/unit/services/persistent-cache.test.ts @@ -0,0 +1,164 @@ +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest' +import { mkdirSync, writeFileSync, rmSync, existsSync, readFileSync } from 'fs' +import { join } from 'path' +import { tmpdir } from 'os' + +// Mock env-paths before importing the module +const mockCacheDir = join(tmpdir(), `inup-cache-test-${Date.now()}`) + +vi.mock('env-paths', () => ({ + default: () => ({ + cache: mockCacheDir, + config: join(mockCacheDir, 'config'), + data: join(mockCacheDir, 'data'), + }), +})) + +// Import after mocking +const { persistentCache } = await import('../../../src/services/persistent-cache') + +describe('persistent-cache', () => { + beforeEach(() => { + // Clear cache before each test + persistentCache.clearCache() + }) + + afterEach(() => { + // Clean up test directory + try { + rmSync(mockCacheDir, { recursive: true, force: true }) + } catch { + // Ignore cleanup errors + } + }) + + describe('get/set', () => { + it('should return null for non-existent package', () => { + const result = persistentCache.get('non-existent-package') + expect(result).toBeNull() + }) + + it('should store and retrieve package data', () => { + const data = { + latestVersion: '2.0.0', + allVersions: ['2.0.0', '1.5.0', '1.0.0'], + } + + persistentCache.set('test-package', data) + persistentCache.flush() + + const result = persistentCache.get('test-package') + expect(result).toEqual(data) + }) + + it('should handle scoped packages', () => { + const data = { + latestVersion: '3.0.0', + allVersions: ['3.0.0', '2.0.0'], + } + + persistentCache.set('@babel/core', data) + persistentCache.flush() + + const result = persistentCache.get('@babel/core') + expect(result).toEqual(data) + }) + + it('should persist data to disk', () => { + const data = { + latestVersion: '1.0.0', + allVersions: ['1.0.0'], + } + + persistentCache.set('persist-test', data) + persistentCache.flush() + + // Check that cache directory was created + const cacheDir = join(mockCacheDir, 'registry') + expect(existsSync(cacheDir)).toBe(true) + + // Check that index file exists + const indexPath = join(cacheDir, 'index.json') + expect(existsSync(indexPath)).toBe(true) + + // Check that package file exists + const packageFile = join(cacheDir, 'persist-test.json') + expect(existsSync(packageFile)).toBe(true) + }) + }) + + describe('getMany/setMany', () => { + it('should batch get multiple packages', () => { + persistentCache.set('pkg-a', { latestVersion: '1.0.0', allVersions: ['1.0.0'] }) + persistentCache.set('pkg-b', { latestVersion: '2.0.0', allVersions: ['2.0.0'] }) + persistentCache.flush() + + const results = persistentCache.getMany(['pkg-a', 'pkg-b', 'pkg-c']) + + expect(results.size).toBe(2) + expect(results.get('pkg-a')?.latestVersion).toBe('1.0.0') + expect(results.get('pkg-b')?.latestVersion).toBe('2.0.0') + expect(results.has('pkg-c')).toBe(false) + }) + + it('should batch set multiple packages', () => { + const entries = new Map([ + ['batch-a', { latestVersion: '1.0.0', allVersions: ['1.0.0'] }], + ['batch-b', { latestVersion: '2.0.0', allVersions: ['2.0.0'] }], + ]) + + persistentCache.setMany(entries) + + expect(persistentCache.get('batch-a')?.latestVersion).toBe('1.0.0') + expect(persistentCache.get('batch-b')?.latestVersion).toBe('2.0.0') + }) + }) + + describe('clearCache', () => { + it('should clear all cached data', () => { + persistentCache.set('to-clear', { latestVersion: '1.0.0', allVersions: ['1.0.0'] }) + persistentCache.flush() + + expect(persistentCache.get('to-clear')).not.toBeNull() + + persistentCache.clearCache() + + expect(persistentCache.get('to-clear')).toBeNull() + }) + }) + + describe('getStats', () => { + it('should return cache statistics', () => { + persistentCache.set('stats-a', { latestVersion: '1.0.0', allVersions: ['1.0.0'] }) + persistentCache.set('stats-b', { latestVersion: '2.0.0', allVersions: ['2.0.0'] }) + persistentCache.flush() + + const stats = persistentCache.getStats() + + expect(stats.entries).toBe(2) + expect(stats.cacheDir).toContain('registry') + }) + }) + + describe('cache file naming', () => { + it('should handle package names with special characters', () => { + const packages = [ + '@types/node', + '@babel/preset-env', + 'lodash.merge', + '@org/pkg-name', + ] + + for (const pkg of packages) { + persistentCache.set(pkg, { latestVersion: '1.0.0', allVersions: ['1.0.0'] }) + } + persistentCache.flush() + + for (const pkg of packages) { + const result = persistentCache.get(pkg) + expect(result).not.toBeNull() + expect(result?.latestVersion).toBe('1.0.0') + } + }) + }) +})