From ed26112c289f8a174c641c8d5b25d89338f0808f Mon Sep 17 00:00:00 2001 From: Garrison Snelling Date: Wed, 6 May 2026 00:53:25 +0000 Subject: [PATCH 1/2] docs: add draft git infrastructure benchmark spec --- GIT_BENCHMARK_SPEC.md | 164 ++++++++++++++++++++++++++++++++++++++++++ README.md | 1 + 2 files changed, 165 insertions(+) create mode 100644 GIT_BENCHMARK_SPEC.md diff --git a/GIT_BENCHMARK_SPEC.md b/GIT_BENCHMARK_SPEC.md new file mode 100644 index 0000000..2294c92 --- /dev/null +++ b/GIT_BENCHMARK_SPEC.md @@ -0,0 +1,164 @@ +# Git Infrastructure Benchmark v1 (Draft) + +This document proposes a reproducible benchmark for Git platforms used by humans and AI agents. + +## Scope + +The benchmark compares two categories without forcing them into the same product assumptions: + +- Traditional SCM hosts: GitHub, GitLab, Bitbucket +- API-first / agent-native Git platforms: Freestyle, code.storage + +Optional baseline: + +- Self-hosted Forge: Gitea or Forgejo + +## Goals + +- Measure real developer and agent workflows (`clone`, `fetch`, `commit`, `push`, PR/MR-like flow) +- Stress deep history and high commit volume behavior +- Capture reliability under concurrency and failure conditions +- Produce repeatable p50/p95/p99 metrics and cost-normalized comparisons + +## Fairness Rules + +- Use the same repo fixtures, commit graph, and runner hardware across providers +- Separate cold and warm runs; do not mix results +- Run each scenario at least 20 times and report variance +- Pin runner region for single-region tests; report multi-region separately +- Use provider-native auth and recommended SDK/API paths where applicable +- Report unsupported capabilities explicitly as `N/A`, not `0` + +## Test Matrix + +### A) Core Git Transport + +- `T1` Cold full clone (small, medium, large history) +- `T2` Warm full clone (same fixture) +- `T3` Shallow clone (`--depth 1`, `--depth 100`) +- `T4` Partial clone (`--filter=blob:none`) when supported +- `T5` Incremental fetch after `10`, `100`, `1000` new commits +- `T6` Push latency: single small commit, single large diff, binary/LFS payload + +### B) High-Commit and Deep-History Stress + +- `T7` Bulk-history clone: repositories at ~1k, ~10k, ~100k, ~1M commits +- `T8` Continuous tiny commits: each worker creates 200 commits and pushes every 20 commits +- `T9` Batch push: one push containing 500+ commits after branch divergence +- `T10` Rebase/squash stress: large rewritten history push behavior and visibility delay + +### C) Collaboration and Automation + +- `T11` Branch lifecycle: create/switch/merge/delete at scale +- `T12` PR/MR lifecycle: open, attach diff, add comments/review state, merge with checks +- `T13` Webhook e2e latency: `git push` to webhook receiver timestamp +- `T14` CI trigger latency: push-to-first-job and push-to-first-log-line + +### D) API / SDK Workflows + +- `T15` Programmatic repo create and remote URL retrieval +- `T16` Programmatic branch create and commit write +- `T17` Programmatic diff/log/list metadata read +- `T18` Sync behaviors with GitHub mirror/sync features (where offered) + +### E) Reliability and Recovery + +- `T19` Parallel workers: 50 and 100 workers performing branch+commit+push loops +- `T20` Failure injection: token expiration, transient network drops, retriable 5xx +- `T21` Event reliability: webhook delivery success and retry completion + +### F) Security, Governance, and Cost + +- `T22` Access controls and token scope granularity (capability checklist) +- `T23` Auditability: log availability and export ergonomics +- `T24` Cost model based on measured usage: storage, transfer, request/operation costs + +## Standard Workload Profiles + +- `human-dev`: low concurrency, frequent small fetch/push, PR-heavy +- `agent-burst`: high concurrency, many branches, high commit/write frequency +- `ci-heavy`: frequent clone/fetch and webhook/CI trigger sensitivity + +Each profile runs the same test IDs with different concurrency and payload settings. + +## Metrics to Record + +Per test/scenario capture: + +- Latency: p50, p95, p99, min, max +- Throughput: operations/sec, MB/sec +- Reliability: success rate, error class distribution, retry success +- Transfer characteristics: packfile bytes, wall-clock transfer, server processing delay +- Freshness: time from push accepted to ref visibility/API visibility +- Cost units: storage GB-month, transfer GB, operation/API counts + +## Repo Fixture Generator (Deterministic) + +Generate identical fixture repositories with a seeded generator: + +- Sizes: tiny (~10MB), medium (~1GB logical history), large (deep history + binaries) +- Commit graph: linear, fan-out branches, merge-heavy, rebased segments +- File mix: text-heavy, binary-heavy, optional LFS tracks +- Churn model: hot files (frequent edits) + cold files (rare edits) + +Publish the fixture seed, generation script version, and resulting commit hashes. + +## Initial Scoring Model (v1) + +- 40% performance (latency + throughput) +- 25% reliability +- 15% workflow/API completeness +- 10% security/governance +- 10% cost efficiency + +Rules: + +- Reliability is multiplicative within relevant sections (high failure rates cap score) +- Unsupported feature for optional tests stays `N/A`; required unsupported features score `0` for that metric +- Publish both weighted composite and raw metric tables + +## Output Schema (JSON) + +```json +{ + "benchmark": "git-infra-v1", + "date": "2026-05-06", + "provider": "github", + "profile": "agent-burst", + "testId": "T19", + "fixture": { + "name": "deep-history-large", + "seed": 42, + "commitCount": 100000 + }, + "run": { + "region": "us-east-1", + "attempts": 30, + "successRate": 0.97 + }, + "metrics": { + "latencyMs": { "p50": 920, "p95": 2410, "p99": 3900 }, + "throughput": { "opsPerSec": 7.2, "mbPerSec": 48.5 }, + "freshnessMs": { "pushToRefVisibleP50": 340 } + }, + "cost": { + "storageGbMonth": 12.4, + "egressGb": 88.1, + "apiOps": 13420 + } +} +``` + +## Minimal v1 Launch Plan + +Start with 8 tests that provide immediate signal: + +- `T1`, `T3`, `T5`, `T6`, `T7`, `T12`, `T13`, `T19` + +Then add API and governance layers (`T15+`, `T22+`) in v1.1. + +## Open Questions + +- Should CI timing be benchmarked using each provider's native CI only, or externalized CI only? +- Should mirrored/synced GitHub repos be scored separately from primary repo storage? +- How should per-seat pricing be normalized against pure usage-based models? diff --git a/README.md b/README.md index 25350b7..72ef10b 100644 --- a/README.md +++ b/README.md @@ -103,6 +103,7 @@ Sponsors enable independent benchmark infrastructure. - [ ] Cold start vs warm start metrics - [ ] Multi-region testing - [x] Cost-per-sandbox-minute +- [ ] Git infrastructure benchmark ([draft spec](./GIT_BENCHMARK_SPEC.md))
From 5736b1d2d752467f5a6b6adfcaf27fb5575638ab Mon Sep 17 00:00:00 2001 From: Garrison Snelling Date: Wed, 6 May 2026 01:02:10 +0000 Subject: [PATCH 2/2] feat: scaffold draft git benchmark runner --- package.json | 1 + src/git/benchmark.ts | 150 +++++++++++++++++++++++++++++++++++++++++++ src/git/table.ts | 17 +++++ src/git/types.ts | 30 +++++++++ src/run.ts | 44 ++++++++++++- 5 files changed, 240 insertions(+), 2 deletions(-) create mode 100644 src/git/benchmark.ts create mode 100644 src/git/table.ts create mode 100644 src/git/types.ts diff --git a/package.json b/package.json index 5fcd889..3ce3508 100644 --- a/package.json +++ b/package.json @@ -28,6 +28,7 @@ "bench:browser:steel": "tsx src/run.ts --mode browser --provider steel", "bench:browser:browseruse": "tsx src/run.ts --mode browser --provider browseruse", "bench:browser:anchorbrowser": "tsx src/run.ts --mode browser --provider anchorbrowser", + "bench:git": "tsx src/run.ts --mode git", "bench:storage": "tsx src/run.ts --mode storage", "bench:storage:s3": "tsx src/run.ts --mode storage --provider aws-s3", "bench:storage:r2": "tsx src/run.ts --mode storage --provider cloudflare-r2", diff --git a/src/git/benchmark.ts b/src/git/benchmark.ts new file mode 100644 index 0000000..cc640b7 --- /dev/null +++ b/src/git/benchmark.ts @@ -0,0 +1,150 @@ +import fs from 'fs'; +import os from 'os'; +import path from 'path'; +import { promisify } from 'util'; +import { execFile } from 'child_process'; +import { computeStats } from '../util/stats.js'; +import type { GitBenchmarkResult, GitIterationResult } from './types.js'; + +const execFileAsync = promisify(execFile); + +interface GitRunConfig { + iterations: number; + fixtureCommitCount: number; +} + +async function git(args: string[], cwd: string): Promise<{ stdout: string; stderr: string }> { + return execFileAsync('git', args, { cwd, maxBuffer: 10 * 1024 * 1024 }); +} + +async function seedFixture(remoteDir: string, workDir: string, commitCount: number): Promise { + fs.mkdirSync(workDir, { recursive: true }); + await git(['init'], workDir); + await git(['config', 'user.name', 'Benchmark Bot'], workDir); + await git(['config', 'user.email', 'bench@example.com'], workDir); + fs.writeFileSync(path.join(workDir, 'README.md'), '# git fixture\n'); + await git(['add', '.'], workDir); + await git(['commit', '-m', 'chore: initial commit'], workDir); + + const historyFile = path.join(workDir, 'history.txt'); + for (let i = 0; i < commitCount; i++) { + fs.appendFileSync(historyFile, `line-${i}\n`); + await git(['add', 'history.txt'], workDir); + await git(['commit', '-m', `chore: seed ${i + 1}`], workDir); + } + + await git(['branch', '-M', 'main'], workDir); + await git(['remote', 'add', 'origin', remoteDir], workDir); + await git(['push', '-u', 'origin', 'main'], workDir); +} + +export async function runGitBenchmark(config: GitRunConfig): Promise { + const { iterations, fixtureCommitCount } = config; + const root = fs.mkdtempSync(path.join(os.tmpdir(), 'git-bench-')); + const remote = path.join(root, 'remote.git'); + const fixtureWriter = path.join(root, 'fixture-writer'); + const stableClone = path.join(root, 'stable-clone'); + const results: GitIterationResult[] = []; + + try { + await git(['init', '--bare', remote], root); + await seedFixture(remote, fixtureWriter, fixtureCommitCount); + await git(['clone', remote, stableClone], root); + + for (let i = 0; i < iterations; i++) { + const coldTarget = path.join(root, `cold-${i}`); + let start = performance.now(); + try { + await git(['clone', remote, coldTarget], root); + results.push({ operation: 'cold_clone', latencyMs: performance.now() - start }); + } catch (err) { + results.push({ operation: 'cold_clone', latencyMs: 0, error: err instanceof Error ? err.message : String(err) }); + } + + const churnFile = path.join(fixtureWriter, 'churn.txt'); + fs.appendFileSync(churnFile, `tick-${i}\n`); + await git(['add', 'churn.txt'], fixtureWriter); + await git(['commit', '-m', `chore: churn ${i + 1}`], fixtureWriter); + await git(['push', 'origin', 'main'], fixtureWriter); + + start = performance.now(); + try { + await git(['fetch', 'origin'], stableClone); + results.push({ operation: 'incremental_fetch', latencyMs: performance.now() - start }); + } catch (err) { + results.push({ operation: 'incremental_fetch', latencyMs: 0, error: err instanceof Error ? err.message : String(err) }); + } + + const commitFile = path.join(stableClone, 'agent.log'); + fs.appendFileSync(commitFile, `push-${i}\n`); + await git(['add', 'agent.log'], stableClone); + await git(['commit', '-m', `feat: agent commit ${i + 1}`], stableClone); + + start = performance.now(); + try { + await git(['pull', '--rebase', 'origin', 'main'], stableClone); + await git(['push', 'origin', 'main'], stableClone); + results.push({ operation: 'commit_push', latencyMs: performance.now() - start }); + } catch (err) { + results.push({ operation: 'commit_push', latencyMs: 0, error: err instanceof Error ? err.message : String(err) }); + } + + fs.rmSync(coldTarget, { recursive: true, force: true }); + console.log(` Iteration ${i + 1}/${iterations} complete`); + } + + const successful = results.filter(r => !r.error); + const operationValues = (op: GitIterationResult['operation']) => + results.filter(r => r.operation === op && !r.error).map(r => r.latencyMs); + + return { + provider: 'local-git', + mode: 'git', + fixtureCommitCount, + iterations, + results, + summary: { + coldCloneMs: computeStats(operationValues('cold_clone')), + incrementalFetchMs: computeStats(operationValues('incremental_fetch')), + commitPushMs: computeStats(operationValues('commit_push')), + }, + successRate: results.length ? successful.length / results.length : 0, + }; + } finally { + fs.rmSync(root, { recursive: true, force: true }); + } +} + +export async function writeGitResultsJson(result: GitBenchmarkResult, outPath: string): Promise { + const rounded = { + ...result, + summary: { + coldCloneMs: roundStats(result.summary.coldCloneMs), + incrementalFetchMs: roundStats(result.summary.incrementalFetchMs), + commitPushMs: roundStats(result.summary.commitPushMs), + }, + results: result.results.map(r => ({ + ...r, + latencyMs: round(r.latencyMs), + })), + successRate: round(result.successRate), + }; + + fs.writeFileSync(outPath, JSON.stringify({ + version: '1.0', + timestamp: new Date().toISOString(), + result: rounded, + }, null, 2)); +} + +function round(value: number): number { + return Math.round(value * 100) / 100; +} + +function roundStats(stats: { median: number; p95: number; p99: number }) { + return { + median: round(stats.median), + p95: round(stats.p95), + p99: round(stats.p99), + }; +} diff --git a/src/git/table.ts b/src/git/table.ts new file mode 100644 index 0000000..fd64d9f --- /dev/null +++ b/src/git/table.ts @@ -0,0 +1,17 @@ +import type { GitBenchmarkResult } from './types.js'; + +function fmt(ms: number): string { + return `${(ms / 1000).toFixed(2)}s`; +} + +export function printGitResults(result: GitBenchmarkResult): void { + console.log('\n--- Git Benchmark Results ---'); + console.log(`Provider: ${result.provider}`); + console.log(`Fixture commits: ${result.fixtureCommitCount}`); + console.log(`Iterations: ${result.iterations}`); + console.log(`Success rate: ${(result.successRate * 100).toFixed(1)}%`); + console.log(''); + console.log(`Cold clone median ${fmt(result.summary.coldCloneMs.median)} p95 ${fmt(result.summary.coldCloneMs.p95)} p99 ${fmt(result.summary.coldCloneMs.p99)}`); + console.log(`Incremental fetch median ${fmt(result.summary.incrementalFetchMs.median)} p95 ${fmt(result.summary.incrementalFetchMs.p95)} p99 ${fmt(result.summary.incrementalFetchMs.p99)}`); + console.log(`Commit + push median ${fmt(result.summary.commitPushMs.median)} p95 ${fmt(result.summary.commitPushMs.p95)} p99 ${fmt(result.summary.commitPushMs.p99)}`); +} diff --git a/src/git/types.ts b/src/git/types.ts new file mode 100644 index 0000000..10bc72d --- /dev/null +++ b/src/git/types.ts @@ -0,0 +1,30 @@ +export type GitOperation = 'cold_clone' | 'incremental_fetch' | 'commit_push'; + +export interface GitIterationResult { + operation: GitOperation; + latencyMs: number; + transferBytes?: number; + error?: string; +} + +export interface GitOperationStats { + median: number; + p95: number; + p99: number; +} + +export interface GitBenchmarkSummary { + coldCloneMs: GitOperationStats; + incrementalFetchMs: GitOperationStats; + commitPushMs: GitOperationStats; +} + +export interface GitBenchmarkResult { + provider: string; + mode: 'git'; + fixtureCommitCount: number; + iterations: number; + results: GitIterationResult[]; + summary: GitBenchmarkSummary; + successRate: number; +} diff --git a/src/run.ts b/src/run.ts index fdd5962..ff960f6 100644 --- a/src/run.ts +++ b/src/run.ts @@ -9,7 +9,9 @@ import { runConcurrentBenchmark } from './sandbox/concurrent.js'; import { runStaggeredBenchmark } from './sandbox/staggered.js'; import { runStorageBenchmark, writeStorageResultsJson } from './storage/benchmark.js'; import { runBrowserBenchmark, writeBrowserResultsJson } from './browser/benchmark.js'; +import { runGitBenchmark, writeGitResultsJson } from './git/benchmark.js'; import { printResultsTable, writeResultsJson } from './sandbox/table.js'; +import { printGitResults } from './git/table.js'; import { providers } from './sandbox/providers.js'; import { storageProviders } from './storage/providers.js'; import { browserProviders } from './browser/providers.js'; @@ -38,26 +40,56 @@ function getArgValue(args: string[], flag: string): string | undefined { } /** Resolve which modes to run */ -function getModesToRun(): BenchmarkMode[] | ['storage'] | ['browser'] { +function getModesToRun(): BenchmarkMode[] | ['storage'] | ['browser'] | ['git'] { if (!rawMode) return ['sequential', 'staggered', 'burst']; if (rawMode === 'storage') return ['storage']; if (rawMode === 'browser') return ['browser']; + if (rawMode === 'git') return ['git']; const m = rawMode === 'concurrent' ? 'burst' : rawMode as BenchmarkMode; return [m]; } /** Map mode to results subdirectory name */ -function modeToDir(m: BenchmarkMode | 'storage'): string { +function modeToDir(m: BenchmarkMode | 'storage' | 'git'): string { switch (m) { case 'sequential': return 'sequential_tti'; case 'staggered': return 'staggered_tti'; case 'burst': case 'concurrent': return 'burst_tti'; case 'storage': return 'storage'; + case 'git': return 'git'; default: return `${m}_tti`; } } +async function runGit(): Promise { + const fixtureCommitCount = parseInt(getArgValue(args, '--fixture-commits') || '1000', 10); + + console.log('\n' + '='.repeat(70)); + console.log(' MODE: GIT'); + console.log(` Iterations: ${iterations}`); + console.log(` Fixture commits: ${fixtureCommitCount}`); + console.log('='.repeat(70)); + + const result = await runGitBenchmark({ + iterations, + fixtureCommitCount, + }); + + printGitResults(result); + + const timestamp = new Date().toISOString().slice(0, 10); + const resultsDir = path.resolve(__dirname, '../results/git'); + fs.mkdirSync(resultsDir, { recursive: true }); + + const outPath = path.join(resultsDir, `${timestamp}.json`); + await writeGitResultsJson(result, outPath); + + const latestPath = path.join(resultsDir, 'latest.json'); + fs.copyFileSync(outPath, latestPath); + console.log(`Copied latest: ${latestPath}`); +} + async function runMode(mode: BenchmarkMode, toRun: typeof providers): Promise { console.log('\n' + '='.repeat(70)); console.log(` MODE: ${mode.toUpperCase()}`); @@ -246,6 +278,14 @@ async function main() { return; } + if (modes[0] === 'git') { + console.log('ComputeSDK Git Infrastructure Benchmark (Draft)'); + console.log(`Date: ${new Date().toISOString()}\n`); + await runGit(); + console.log('\nGit benchmark complete.'); + return; + } + // Handle storage mode separately if (modes[0] === 'storage') { console.log('ComputeSDK Storage Provider Benchmarks');