Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 40 additions & 4 deletions src/commands/cache/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,54 @@ import { Arguments, Argv } from 'yargs'
import { getCommandUsageHeader } from '../../lib/ui/helpers'
import { BaseCommandOptions } from '../types'

export interface CacheOptions extends BaseCommandOptions {}
export interface CacheOptions extends BaseCommandOptions {
/**
* Positional list of language identifiers / aliases for the
* `prefetch` subcommand. Empty → interactive checkbox picker.
* Recognized values mirror the `COCO_PREFETCH` env-var grammar:
* `py`, `python`, `rs`, `rust`, `go`, `golang`, `all`.
*/
languages?: string[]
}

export type CacheArgv = Arguments<CacheOptions>

export const command = 'cache <subcommand>'
/**
* Subcommand vocabulary. Two cache layers coexist under one command:
*
* - **Diff-summary cache** (#845) — `info` / `clear`. Caches LLM-
* produced file summaries keyed on diff content; clearing
* forces fresh summaries on the next commit run.
* - **Tree-sitter parser cache** (#933) — `parsers` / `prefetch` /
* `clear-parsers`. Manages the lazy-loaded `.wasm` parser files
* under `~/.cache/coco/tree-sitter/`.
*
* Kept under one verb because users think of "cache" as a single
* concept; the subcommand discriminator makes the scope unambiguous.
*/
export const CACHE_SUBCOMMANDS = [
'clear',
'info',
'parsers',
'prefetch',
'clear-parsers',
] as const

export type CacheSubcommand = typeof CACHE_SUBCOMMANDS[number]

export const command = 'cache <subcommand> [languages..]'

export const builder = (yargs: Argv) => {
return yargs
.positional('subcommand', {
describe: 'Cache action to run (clear, info)',
describe: 'Cache action to run',
type: 'string',
choices: CACHE_SUBCOMMANDS,
})
.positional('languages', {
describe: 'Languages to act on (for `prefetch`). Empty → interactive picker.',
type: 'string',
choices: ['clear', 'info'] as const,
array: true,
})
.usage(getCommandUsageHeader(command))
}
60 changes: 60 additions & 0 deletions src/commands/cache/handler.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,4 +70,64 @@ describe('coco cache <subcommand>', () => {
expect(process.exitCode).toBe(1)
process.exitCode = previousExit
})

describe('tree-sitter subcommands (#933 phase 7)', () => {
// Each test sets COCO_CACHE_DIR to the same tmpRoot the existing
// tests use for XDG_CACHE_HOME, ensuring the tree-sitter cache
// dir lives inside our isolated tmp dir and gets wiped by the
// afterEach in the outer describe.
beforeEach(() => {
process.env.COCO_CACHE_DIR = path.join(tmpRoot, 'coco')
})
afterEach(() => {
delete process.env.COCO_CACHE_DIR
})

it('parsers: lists every manifest language with cached/not-cached state', async () => {
await handler({ subcommand: 'parsers' } as never, logger as never)
const out = logger.log.mock.calls.map((args) => args[0]).join('\n')
expect(out).toContain('Tree-sitter parser cache')
expect(out).toContain('Python')
expect(out).toContain('Rust')
expect(out).toContain('Go')
// Every entry is not-cached in this fresh tmp dir.
expect(out).toContain('not cached')
})

it('prefetch: warns about unknown language tokens', async () => {
// Bare unknown token → handler should warn then no-op (empty
// resolved list → "Nothing to do").
await handler({
subcommand: 'prefetch',
languages: ['fortran'],
} as never, logger as never)
const out = logger.log.mock.calls.map((args) => args[0]).join('\n')
expect(out).toContain('ignoring unknown language(s): fortran')
expect(out).toContain('Nothing to do')
})

it('clear-parsers: reports no-op when nothing is cached', async () => {
await handler({ subcommand: 'clear-parsers' } as never, logger as never)
expect(logger.log).toHaveBeenCalledWith(
expect.stringContaining('No tree-sitter parsers cached'),
)
})

it('clear-parsers: removes cached .wasm files', async () => {
// Seed two fake .wasm files in the cache dir to simulate a
// populated cache without doing a real network download.
const cacheDir = path.join(process.env.COCO_CACHE_DIR as string, 'tree-sitter')
fs.mkdirSync(cacheDir, { recursive: true })
fs.writeFileSync(path.join(cacheDir, 'tree-sitter-python.wasm'), 'fake')
fs.writeFileSync(path.join(cacheDir, 'tree-sitter-rust.wasm'), 'fake')

await handler({ subcommand: 'clear-parsers' } as never, logger as never)

const out = logger.log.mock.calls.map((args) => args[0]).join('\n')
expect(out).toContain('cleared Python')
expect(out).toContain('cleared Rust')
expect(fs.existsSync(path.join(cacheDir, 'tree-sitter-python.wasm'))).toBe(false)
expect(fs.existsSync(path.join(cacheDir, 'tree-sitter-rust.wasm'))).toBe(false)
})
})
})
183 changes: 182 additions & 1 deletion src/commands/cache/handler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,24 @@ import * as fs from 'node:fs'

import chalk from 'chalk'

import {
clearCachedParser,
getCachedParserStatus,
type LazyTreeSitterLanguageId,
} from '../../lib/parsers/default/__tree_sitter__/cache'
import {
listManifestLanguages,
TREE_SITTER_MANIFEST,
} from '../../lib/parsers/default/__tree_sitter__/manifest'
import {
parsePrefetchEnv,
prefetchTreeSitterParsers,
} from '../../lib/parsers/default/__tree_sitter__/prefetch'
import {
clearDiffSummaryCache,
getDiffSummaryCachePath,
} from '../../lib/parsers/default/utils/diffSummaryCache'
import { checkboxPrompt } from '../../lib/ui/inquirerPrompts'
import { CommandHandler } from '../../lib/types'
import { applyRepoCwd } from '../utils/applyRepoFlag'
import { CacheArgv } from './config'
Expand Down Expand Up @@ -39,8 +53,114 @@ function formatBytes(bytes: number): string {
return `${(bytes / 1024 / 1024).toFixed(2)} MB`
}

/**
* Render the tree-sitter parser cache table (`coco cache parsers`).
* One row per manifest entry — cached size + version + URL — plus
* a footer summarizing total disk usage. Mirrors the diff-summary
* `info` output style.
*/
function renderParsersTable(
logger: { log: (s: string) => void },
): void {
const languages = listManifestLanguages()
let totalBytes = 0
let cachedCount = 0

logger.log(chalk.bold('Tree-sitter parser cache'))
logger.log('')
for (const language of languages) {
const entry = TREE_SITTER_MANIFEST[language]
const status = getCachedParserStatus(language)
const stateLabel = status.cached
? chalk.green('cached')
: chalk.dim('not cached')
const size = status.cached && status.bytes !== undefined
? chalk.dim(`(${formatBytes(status.bytes)})`)
: chalk.dim(`(${formatBytes(entry.approxBytes)} when fetched)`)
if (status.cached && status.bytes !== undefined) {
totalBytes += status.bytes
cachedCount += 1
}
logger.log(
` ${chalk.bold(entry.displayName.padEnd(8))} ${stateLabel.padEnd(20)}${size}`,
)
logger.log(` ${chalk.dim(`v${entry.version} · ${entry.wasmUrl}`)}`)
}

logger.log('')
logger.log(
` ${chalk.dim('cached:')} ${cachedCount}/${languages.length} ` +
`${chalk.dim('total on disk:')} ${formatBytes(totalBytes)}`,
)
logger.log('')
logger.log(chalk.dim(' Prefetch a language: coco cache prefetch py'))
logger.log(chalk.dim(' Pick interactively: coco cache prefetch'))
logger.log(chalk.dim(' Clear the parser cache: coco cache clear-parsers'))
}

/**
* Resolve a list of user-supplied tokens (and aliases) into canonical
* language ids. Reuses the prefetch module's env-var parser so the
* grammar stays in lockstep — `py` / `python` / `rs` / `rust` / `go` /
* `golang` / `all` all map the same way they do for `COCO_PREFETCH`.
*
* Empty input returns an empty result with `interactive: true` to
* signal the caller should show the checkbox picker.
*/
function resolveLanguageTokens(tokens: string[]): {
resolved: LazyTreeSitterLanguageId[]
unknown: string[]
interactive: boolean
} {
if (tokens.length === 0) {
return { resolved: [], unknown: [], interactive: true }
}
const parsed = parsePrefetchEnv(tokens.join(','))
return { ...parsed, interactive: false }
}

/**
* Interactive checkbox prompt: pick which languages to download.
* Each row shows the language, its current cache status, and the
* approximate / actual on-disk size.
*
* Gated by `process.stdin.isTTY` — non-interactive contexts (CI,
* pipes) get an error message instead of hanging on the prompt.
*/
async function promptLanguageSelection(
logger: { log: (s: string) => void },
): Promise<LazyTreeSitterLanguageId[] | undefined> {
if (!process.stdin.isTTY) {
logger.log(chalk.red('`coco cache prefetch` with no args requires an interactive TTY.'))
logger.log(chalk.dim('In a pipe / CI, pass the languages explicitly:'))
logger.log(chalk.dim(' coco cache prefetch py rs go'))
logger.log(chalk.dim(' coco cache prefetch all'))
return undefined
}
const choices = listManifestLanguages().map((language) => {
const entry = TREE_SITTER_MANIFEST[language]
const status = getCachedParserStatus(language)
return {
name: status.cached
? `${entry.displayName} (cached, ${formatBytes(status.bytes ?? entry.approxBytes)})`
: `${entry.displayName} (~${formatBytes(entry.approxBytes)})`,
value: language,
checked: false,
}
})
const picked = await checkboxPrompt<LazyTreeSitterLanguageId>({
message: 'Which tree-sitter parsers to (re)download?',
choices,
instructions: ' (Space toggles · Enter confirms)',
})
return picked
}

export const handler: CommandHandler<CacheArgv> = async (argv, logger) => {
const subcommand = (argv as { subcommand?: string }).subcommand
const positionalLanguages = ((argv as { languages?: string[] }).languages || [])
.map((s) => s.trim())
.filter(Boolean)
// Honor the global --repo flag so `coco cache info --repo <X>`
// inspects X's cache, not the launcher's cwd. applyRepoCwd
// performs the chdir when needed and returns the canonical path.
Expand Down Expand Up @@ -82,7 +202,68 @@ export const handler: CommandHandler<CacheArgv> = async (argv, logger) => {
return
}

if (subcommand === 'parsers') {
renderParsersTable(logger)
return
}

if (subcommand === 'prefetch') {
const { resolved: resolvedFromArgs, unknown, interactive } =
resolveLanguageTokens(positionalLanguages)
if (unknown.length > 0) {
logger.log(chalk.yellow(
`! ignoring unknown language(s): ${unknown.join(', ')}. ` +
`Known: ${listManifestLanguages().join(', ')}`,
))
}
let resolved = resolvedFromArgs
if (interactive) {
const picked = await promptLanguageSelection(logger)
if (!picked) {
process.exitCode = 1
return
}
resolved = picked
}
if (resolved.length === 0) {
logger.log(chalk.dim('No languages selected. Nothing to do.'))
return
}
const result = await prefetchTreeSitterParsers(resolved, {
writeLine: (line: string) => logger.log(line),
})
logger.log('')
logger.log(
`${chalk.bold('Summary:')} ` +
`${chalk.green(`${result.downloaded.length} downloaded`)} · ` +
`${chalk.dim(`${result.alreadyCached.length} already cached`)} · ` +
`${chalk.red(`${result.failed.length} failed`)}`,
)
if (result.failed.length > 0) {
process.exitCode = 1
}
return
}

if (subcommand === 'clear-parsers') {
const languages = listManifestLanguages()
let cleared = 0
for (const language of languages) {
if (clearCachedParser(language)) {
cleared += 1
logger.log(chalk.green(`✓ cleared ${TREE_SITTER_MANIFEST[language].displayName}`))
}
}
if (cleared === 0) {
logger.log(chalk.dim('No tree-sitter parsers cached. Nothing to clear.'))
return
}
logger.log('')
logger.log(chalk.dim(`Cleared ${cleared} parser(s) from ~/.cache/coco/tree-sitter/`))
return
}

logger.log(chalk.red(`Unknown cache subcommand: ${subcommand}`))
logger.log(chalk.dim('Use one of: clear, info'))
logger.log(chalk.dim('Use one of: clear, info, parsers, prefetch, clear-parsers'))
process.exitCode = 1
}
51 changes: 50 additions & 1 deletion src/lib/parsers/default/__tree_sitter__/cache.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
* polish-phase concern; today, users `rm -rf` the dir manually.
*/

import { existsSync, mkdirSync } from 'node:fs'
import { existsSync, mkdirSync, statSync, unlinkSync } from 'node:fs'
import { homedir, platform } from 'node:os'
import { join } from 'node:path'

Expand Down Expand Up @@ -113,3 +113,52 @@ export function ensureTreeSitterCacheDir(): string {
export function isLanguageCached(language: LazyTreeSitterLanguageId): boolean {
return existsSync(getCachedWasmPath(language))
}

export type CachedParserStatus = {
language: LazyTreeSitterLanguageId
/** True when the .wasm exists on disk in the cache. */
cached: boolean
/** Filesystem path the cache lookup checks. */
path: string
/** On-disk size in bytes when cached; undefined otherwise. */
bytes?: number
/** Last-modified timestamp when cached; undefined otherwise. */
mtime?: Date
}

/**
* Inspect the on-disk state of a single lazy-loaded parser. Used by
* `coco cache parsers` to render the status table and by the
* interactive prefetch picker to mark already-cached entries.
*/
export function getCachedParserStatus(
language: LazyTreeSitterLanguageId,
): CachedParserStatus {
const path = getCachedWasmPath(language)
const cached = existsSync(path)
if (!cached) return { language, cached: false, path }
try {
const stat = statSync(path)
return { language, cached: true, path, bytes: stat.size, mtime: stat.mtime }
} catch {
// Race window: file disappeared between existsSync and statSync.
// Report uncached rather than crash.
return { language, cached: false, path }
}
}

/**
* Remove a single language's cached .wasm. Idempotent — no-op when
* the file isn't present. Returns true when a file was actually
* deleted, false otherwise.
*/
export function clearCachedParser(language: LazyTreeSitterLanguageId): boolean {
const path = getCachedWasmPath(language)
if (!existsSync(path)) return false
try {
unlinkSync(path)
return true
} catch {
return false
}
}
Loading
Loading