Skip to content

Commit ff46bdb

Browse files
christsoclaudeCopilotTest User
authored
feat(results): git-native storage — design doc + implementation (#1261)
* docs: design plan for git-native results storage (#1259) Captures the agreed architecture before implementation: - Git is the canonical store; local clone is the working copy - No separate index file — git tree IS the index - Eval writes directly to clone working tree (not project-local .agentv/results/) - Reads via git ls-tree + git cat-file --batch (no checkout) - Pagination via cursor - mode: github explicit in config (extension point) Supersedes closed PR #1260. See docs/plans/git-native-results.md for full design. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * feat(results): Pass 1 — config schema + path renames - Add `mode: 'github'` as required field to ResultsConfig - Repurpose `results.path` as optional local filesystem path for clone (default: ~/.agentv/results/<slug>/); reject old-style subdir values (e.g. 'runs') with a migration message - Rename ResultsRepoCachePaths → ResultsRepoLocalPaths - Rename getResultsRepoCachePaths → getResultsRepoLocalPaths - Rename cache_dir → local_dir in ResultsRepoStatus wire format - normalizeResultsConfig: fill default path, expand ~, include mode - Remove redundant local normalizeResultsConfig copy in remote.ts - Update config-validator.ts to enforce mode and filesystem-path rule - Update tests for new schema Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * fix(results): fix lint + update resolveResultsRepoRunsDir + serve tests - Fix biome string-concat lint error (single template literal) - resolveResultsRepoRunsDir: use normalized.path directly (new design) - getResultsRepoStatus: check existsSync(normalized.path) for available, set local_dir to normalized.path - serve.test.ts: update two tests to use mode:github schema and new default path layout (~/.agentv/results/<slug>/runs/...) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * wip: initial git-native listing skeleton + implementation goal - Added listGitRuns() using git ls-tree + cat-file --batch - Improved batch parser - Saved implementation goal document This is early progress toward the full git-native results implementation. More to come in follow-up commits. * fix: remove duplicate execFileAsync declaration * feat(results): improve git-native listing metadata shape - Enrich GitListedRun with display_name, test_count, avg_score, size_bytes - Update remote.ts mapping to populate ResultFileMeta fields - Read path now returns data Studio can render * chore: update implementation goal + docker ownership fix - Add user: ${UID}:${GID} to docker-compose for mounted repo permissions - Update goal document with current status - Reinstall dependencies in worktree * fix(results): restore git-native run listing Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * chore(results): satisfy lint Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * fix(test): stabilize git subprocess checks Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * chore(test): satisfy lint and timeouts Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * feat(results): finish git-native results flow Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * fix(results): complete remote-only studio flow Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * seed repo * fix(test): isolate git env in serve regression Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * fix(test): restore readme after temp repo setup Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * fix(test): trim low-value flaky coverage Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * fix(results): materialize synced remote runs Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * fix(results): atomically materialize synced runs Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * docs(studio): clarify remote results behavior Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * fix(cli): treat AGENTV_HOME log as info Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * docs(studio): refresh remote results screenshots Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com> Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Co-authored-by: Test User <test@example.com>
1 parent af118c6 commit ff46bdb

26 files changed

Lines changed: 2032 additions & 435 deletions

File tree

apps/cli/src/commands/results/remote.ts

Lines changed: 76 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import { existsSync } from 'node:fs';
12
import path from 'node:path';
23

34
import {
@@ -8,7 +9,10 @@ import {
89
directPushResults,
910
directorySizeBytes,
1011
getResultsRepoStatus,
12+
listGitRuns,
1113
loadConfig,
14+
materializeGitRun,
15+
normalizeResultsConfig,
1216
resolveResultsRepoRunsDir,
1317
syncResultsRepo,
1418
} from '@agentv/core';
@@ -59,15 +63,6 @@ function getStatusMessage(error: unknown): string {
5963
return error instanceof Error ? error.message : String(error);
6064
}
6165

62-
function normalizeResultsConfig(config: ResultsConfig): Required<ResultsConfig> {
63-
return {
64-
repo: config.repo,
65-
path: config.path,
66-
auto_push: config.auto_push === true,
67-
branch_prefix: config.branch_prefix?.trim() || 'eval-results',
68-
};
69-
}
70-
7166
function statusForResult(result: EvaluationResult): 'PASS' | 'FAIL' | 'ERROR' {
7267
if (result.executionStatus === 'execution_error' || result.error) {
7368
return 'ERROR';
@@ -131,10 +126,14 @@ export function decodeRemoteRunId(filename: string): string {
131126
export async function getRemoteResultsStatus(cwd: string): Promise<RemoteResultsStatus> {
132127
const config = await loadNormalizedResultsConfig(cwd);
133128
const status = getResultsRepoStatus(config);
134-
const runCount =
135-
config && status.available
136-
? listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).length
137-
: 0;
129+
let runCount = 0;
130+
if (config && status.available) {
131+
try {
132+
runCount = (await listGitRuns(config.path)).length;
133+
} catch {
134+
runCount = listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).length;
135+
}
136+
}
138137
return {
139138
...status,
140139
run_count: runCount,
@@ -185,15 +184,45 @@ export async function listMergedResultFiles(
185184
};
186185
}
187186

188-
const remoteRuns = listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).map(
189-
(meta) =>
190-
({
191-
...meta,
192-
filename: encodeRemoteRunId(meta.filename),
193-
raw_filename: meta.filename,
187+
let remoteRuns: SourcedResultFileMeta[] = [];
188+
if (config.mode === 'github') {
189+
try {
190+
const gitRuns = await listGitRuns(config.path);
191+
remoteRuns = gitRuns.map((r) => ({
192+
filename: encodeRemoteRunId(r.run_id),
193+
raw_filename: r.run_id,
194194
source: 'remote' as const,
195-
}) satisfies SourcedResultFileMeta,
196-
);
195+
path: path.join(config.path, r.manifest_path),
196+
displayName: r.display_name,
197+
timestamp: r.timestamp,
198+
testCount: r.test_count,
199+
passRate: r.pass_rate || 0,
200+
avgScore: r.avg_score || 0,
201+
sizeBytes: r.size_bytes || 0,
202+
}));
203+
} catch (error) {
204+
console.error('git-native listing failed, falling back', error);
205+
remoteRuns = listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).map(
206+
(meta) =>
207+
({
208+
...meta,
209+
filename: encodeRemoteRunId(meta.filename),
210+
raw_filename: meta.filename,
211+
source: 'remote' as const,
212+
}) satisfies SourcedResultFileMeta,
213+
);
214+
}
215+
} else {
216+
remoteRuns = listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).map(
217+
(meta) =>
218+
({
219+
...meta,
220+
filename: encodeRemoteRunId(meta.filename),
221+
raw_filename: meta.filename,
222+
source: 'remote' as const,
223+
}) satisfies SourcedResultFileMeta,
224+
);
225+
}
197226

198227
const merged = [...localRuns, ...remoteRuns].sort((a, b) =>
199228
b.timestamp.localeCompare(a.timestamp),
@@ -212,6 +241,32 @@ export async function findRunById(
212241
return runs.find((run) => run.filename === runId);
213242
}
214243

244+
export async function ensureRemoteRunAvailable(
245+
cwd: string,
246+
meta: Pick<SourcedResultFileMeta, 'source' | 'path'>,
247+
): Promise<void> {
248+
if (meta.source !== 'remote' || existsSync(meta.path)) {
249+
return;
250+
}
251+
252+
const config = await loadNormalizedResultsConfig(cwd);
253+
if (!config) {
254+
throw new Error('Remote results are not configured');
255+
}
256+
257+
const relativeManifestPath = path.relative(config.path, meta.path).split(path.sep).join('/');
258+
if (
259+
relativeManifestPath.length === 0 ||
260+
relativeManifestPath === meta.path ||
261+
relativeManifestPath.startsWith('../')
262+
) {
263+
throw new Error(`Remote manifest path is outside the results repo clone: ${meta.path}`);
264+
}
265+
266+
const relativeRunPath = path.posix.relative('runs', path.posix.dirname(relativeManifestPath));
267+
await materializeGitRun(config.path, relativeRunPath);
268+
}
269+
215270
export async function maybeAutoExportRunArtifacts(payload: RemoteExportPayload): Promise<void> {
216271
const config = await loadNormalizedResultsConfig(payload.cwd);
217272
if (!config?.auto_push) {

0 commit comments

Comments
 (0)