diff --git a/GIT_BENCHMARK_SPEC.md b/GIT_BENCHMARK_SPEC.md
new file mode 100644
index 0000000..2294c92
--- /dev/null
+++ b/GIT_BENCHMARK_SPEC.md
@@ -0,0 +1,164 @@
+# Git Infrastructure Benchmark v1 (Draft)
+
+This document proposes a reproducible benchmark for Git platforms used by humans and AI agents.
+
+## Scope
+
+The benchmark compares two categories without forcing them into the same product assumptions:
+
+- Traditional SCM hosts: GitHub, GitLab, Bitbucket
+- API-first / agent-native Git platforms: Freestyle, code.storage
+
+Optional baseline:
+
+- Self-hosted Forge: Gitea or Forgejo
+
+## Goals
+
+- Measure real developer and agent workflows (`clone`, `fetch`, `commit`, `push`, PR/MR-like flow)
+- Stress deep history and high commit volume behavior
+- Capture reliability under concurrency and failure conditions
+- Produce repeatable p50/p95/p99 metrics and cost-normalized comparisons
+
+## Fairness Rules
+
+- Use the same repo fixtures, commit graph, and runner hardware across providers
+- Separate cold and warm runs; do not mix results
+- Run each scenario at least 20 times and report variance
+- Pin runner region for single-region tests; report multi-region separately
+- Use provider-native auth and recommended SDK/API paths where applicable
+- Report unsupported capabilities explicitly as `N/A`, not `0`
+
+## Test Matrix
+
+### A) Core Git Transport
+
+- `T1` Cold full clone (small, medium, large history)
+- `T2` Warm full clone (same fixture)
+- `T3` Shallow clone (`--depth 1`, `--depth 100`)
+- `T4` Partial clone (`--filter=blob:none`) when supported
+- `T5` Incremental fetch after `10`, `100`, `1000` new commits
+- `T6` Push latency: single small commit, single large diff, binary/LFS payload
+
+### B) High-Commit and Deep-History Stress
+
+- `T7` Bulk-history clone: repositories at ~1k, ~10k, ~100k, ~1M commits
+- `T8` Continuous tiny commits: each worker creates 200 commits and pushes every 20 commits
+- `T9` Batch push: one push containing 500+ commits after branch divergence
+- `T10` Rebase/squash stress: large rewritten history push behavior and visibility delay
+
+### C) Collaboration and Automation
+
+- `T11` Branch lifecycle: create/switch/merge/delete at scale
+- `T12` PR/MR lifecycle: open, attach diff, add comments/review state, merge with checks
+- `T13` Webhook e2e latency: `git push` to webhook receiver timestamp
+- `T14` CI trigger latency: push-to-first-job and push-to-first-log-line
+
+### D) API / SDK Workflows
+
+- `T15` Programmatic repo create and remote URL retrieval
+- `T16` Programmatic branch create and commit write
+- `T17` Programmatic diff/log/list metadata read
+- `T18` Sync behaviors with GitHub mirror/sync features (where offered)
+
+### E) Reliability and Recovery
+
+- `T19` Parallel workers: 50 and 100 workers performing branch+commit+push loops
+- `T20` Failure injection: token expiration, transient network drops, retriable 5xx
+- `T21` Event reliability: webhook delivery success and retry completion
+
+### F) Security, Governance, and Cost
+
+- `T22` Access controls and token scope granularity (capability checklist)
+- `T23` Auditability: log availability and export ergonomics
+- `T24` Cost model based on measured usage: storage, transfer, request/operation costs
+
+## Standard Workload Profiles
+
+- `human-dev`: low concurrency, frequent small fetch/push, PR-heavy
+- `agent-burst`: high concurrency, many branches, high commit/write frequency
+- `ci-heavy`: frequent clone/fetch and webhook/CI trigger sensitivity
+
+Each profile runs the same test IDs with different concurrency and payload settings.
+
+## Metrics to Record
+
+Per test/scenario capture:
+
+- Latency: p50, p95, p99, min, max
+- Throughput: operations/sec, MB/sec
+- Reliability: success rate, error class distribution, retry success
+- Transfer characteristics: packfile bytes, wall-clock transfer, server processing delay
+- Freshness: time from push accepted to ref visibility/API visibility
+- Cost units: storage GB-month, transfer GB, operation/API counts
+
+## Repo Fixture Generator (Deterministic)
+
+Generate identical fixture repositories with a seeded generator:
+
+- Sizes: tiny (~10MB), medium (~1GB logical history), large (deep history + binaries)
+- Commit graph: linear, fan-out branches, merge-heavy, rebased segments
+- File mix: text-heavy, binary-heavy, optional LFS tracks
+- Churn model: hot files (frequent edits) + cold files (rare edits)
+
+Publish the fixture seed, generation script version, and resulting commit hashes.
+
+## Initial Scoring Model (v1)
+
+- 40% performance (latency + throughput)
+- 25% reliability
+- 15% workflow/API completeness
+- 10% security/governance
+- 10% cost efficiency
+
+Rules:
+
+- Reliability is multiplicative within relevant sections (high failure rates cap score)
+- Unsupported feature for optional tests stays `N/A`; required unsupported features score `0` for that metric
+- Publish both weighted composite and raw metric tables
+
+## Output Schema (JSON)
+
+```json
+{
+ "benchmark": "git-infra-v1",
+ "date": "2026-05-06",
+ "provider": "github",
+ "profile": "agent-burst",
+ "testId": "T19",
+ "fixture": {
+ "name": "deep-history-large",
+ "seed": 42,
+ "commitCount": 100000
+ },
+ "run": {
+ "region": "us-east-1",
+ "attempts": 30,
+ "successRate": 0.97
+ },
+ "metrics": {
+ "latencyMs": { "p50": 920, "p95": 2410, "p99": 3900 },
+ "throughput": { "opsPerSec": 7.2, "mbPerSec": 48.5 },
+ "freshnessMs": { "pushToRefVisibleP50": 340 }
+ },
+ "cost": {
+ "storageGbMonth": 12.4,
+ "egressGb": 88.1,
+ "apiOps": 13420
+ }
+}
+```
+
+## Minimal v1 Launch Plan
+
+Start with 8 tests that provide immediate signal:
+
+- `T1`, `T3`, `T5`, `T6`, `T7`, `T12`, `T13`, `T19`
+
+Then add API and governance layers (`T15+`, `T22+`) in v1.1.
+
+## Open Questions
+
+- Should CI timing be benchmarked using each provider's native CI only, or externalized CI only?
+- Should mirrored/synced GitHub repos be scored separately from primary repo storage?
+- How should per-seat pricing be normalized against pure usage-based models?
diff --git a/README.md b/README.md
index 25350b7..72ef10b 100644
--- a/README.md
+++ b/README.md
@@ -103,6 +103,7 @@ Sponsors enable independent benchmark infrastructure.
- [ ] Cold start vs warm start metrics
- [ ] Multi-region testing
- [x] Cost-per-sandbox-minute
+- [ ] Git infrastructure benchmark ([draft spec](./GIT_BENCHMARK_SPEC.md))
diff --git a/package.json b/package.json
index 5fcd889..3ce3508 100644
--- a/package.json
+++ b/package.json
@@ -28,6 +28,7 @@
"bench:browser:steel": "tsx src/run.ts --mode browser --provider steel",
"bench:browser:browseruse": "tsx src/run.ts --mode browser --provider browseruse",
"bench:browser:anchorbrowser": "tsx src/run.ts --mode browser --provider anchorbrowser",
+ "bench:git": "tsx src/run.ts --mode git",
"bench:storage": "tsx src/run.ts --mode storage",
"bench:storage:s3": "tsx src/run.ts --mode storage --provider aws-s3",
"bench:storage:r2": "tsx src/run.ts --mode storage --provider cloudflare-r2",
diff --git a/src/git/benchmark.ts b/src/git/benchmark.ts
new file mode 100644
index 0000000..cc640b7
--- /dev/null
+++ b/src/git/benchmark.ts
@@ -0,0 +1,150 @@
+import fs from 'fs';
+import os from 'os';
+import path from 'path';
+import { promisify } from 'util';
+import { execFile } from 'child_process';
+import { computeStats } from '../util/stats.js';
+import type { GitBenchmarkResult, GitIterationResult } from './types.js';
+
+const execFileAsync = promisify(execFile);
+
+interface GitRunConfig {
+ iterations: number;
+ fixtureCommitCount: number;
+}
+
+async function git(args: string[], cwd: string): Promise<{ stdout: string; stderr: string }> {
+ return execFileAsync('git', args, { cwd, maxBuffer: 10 * 1024 * 1024 });
+}
+
+async function seedFixture(remoteDir: string, workDir: string, commitCount: number): Promise {
+ fs.mkdirSync(workDir, { recursive: true });
+ await git(['init'], workDir);
+ await git(['config', 'user.name', 'Benchmark Bot'], workDir);
+ await git(['config', 'user.email', 'bench@example.com'], workDir);
+ fs.writeFileSync(path.join(workDir, 'README.md'), '# git fixture\n');
+ await git(['add', '.'], workDir);
+ await git(['commit', '-m', 'chore: initial commit'], workDir);
+
+ const historyFile = path.join(workDir, 'history.txt');
+ for (let i = 0; i < commitCount; i++) {
+ fs.appendFileSync(historyFile, `line-${i}\n`);
+ await git(['add', 'history.txt'], workDir);
+ await git(['commit', '-m', `chore: seed ${i + 1}`], workDir);
+ }
+
+ await git(['branch', '-M', 'main'], workDir);
+ await git(['remote', 'add', 'origin', remoteDir], workDir);
+ await git(['push', '-u', 'origin', 'main'], workDir);
+}
+
+export async function runGitBenchmark(config: GitRunConfig): Promise {
+ const { iterations, fixtureCommitCount } = config;
+ const root = fs.mkdtempSync(path.join(os.tmpdir(), 'git-bench-'));
+ const remote = path.join(root, 'remote.git');
+ const fixtureWriter = path.join(root, 'fixture-writer');
+ const stableClone = path.join(root, 'stable-clone');
+ const results: GitIterationResult[] = [];
+
+ try {
+ await git(['init', '--bare', remote], root);
+ await seedFixture(remote, fixtureWriter, fixtureCommitCount);
+ await git(['clone', remote, stableClone], root);
+
+ for (let i = 0; i < iterations; i++) {
+ const coldTarget = path.join(root, `cold-${i}`);
+ let start = performance.now();
+ try {
+ await git(['clone', remote, coldTarget], root);
+ results.push({ operation: 'cold_clone', latencyMs: performance.now() - start });
+ } catch (err) {
+ results.push({ operation: 'cold_clone', latencyMs: 0, error: err instanceof Error ? err.message : String(err) });
+ }
+
+ const churnFile = path.join(fixtureWriter, 'churn.txt');
+ fs.appendFileSync(churnFile, `tick-${i}\n`);
+ await git(['add', 'churn.txt'], fixtureWriter);
+ await git(['commit', '-m', `chore: churn ${i + 1}`], fixtureWriter);
+ await git(['push', 'origin', 'main'], fixtureWriter);
+
+ start = performance.now();
+ try {
+ await git(['fetch', 'origin'], stableClone);
+ results.push({ operation: 'incremental_fetch', latencyMs: performance.now() - start });
+ } catch (err) {
+ results.push({ operation: 'incremental_fetch', latencyMs: 0, error: err instanceof Error ? err.message : String(err) });
+ }
+
+ const commitFile = path.join(stableClone, 'agent.log');
+ fs.appendFileSync(commitFile, `push-${i}\n`);
+ await git(['add', 'agent.log'], stableClone);
+ await git(['commit', '-m', `feat: agent commit ${i + 1}`], stableClone);
+
+ start = performance.now();
+ try {
+ await git(['pull', '--rebase', 'origin', 'main'], stableClone);
+ await git(['push', 'origin', 'main'], stableClone);
+ results.push({ operation: 'commit_push', latencyMs: performance.now() - start });
+ } catch (err) {
+ results.push({ operation: 'commit_push', latencyMs: 0, error: err instanceof Error ? err.message : String(err) });
+ }
+
+ fs.rmSync(coldTarget, { recursive: true, force: true });
+ console.log(` Iteration ${i + 1}/${iterations} complete`);
+ }
+
+ const successful = results.filter(r => !r.error);
+ const operationValues = (op: GitIterationResult['operation']) =>
+ results.filter(r => r.operation === op && !r.error).map(r => r.latencyMs);
+
+ return {
+ provider: 'local-git',
+ mode: 'git',
+ fixtureCommitCount,
+ iterations,
+ results,
+ summary: {
+ coldCloneMs: computeStats(operationValues('cold_clone')),
+ incrementalFetchMs: computeStats(operationValues('incremental_fetch')),
+ commitPushMs: computeStats(operationValues('commit_push')),
+ },
+ successRate: results.length ? successful.length / results.length : 0,
+ };
+ } finally {
+ fs.rmSync(root, { recursive: true, force: true });
+ }
+}
+
+export async function writeGitResultsJson(result: GitBenchmarkResult, outPath: string): Promise {
+ const rounded = {
+ ...result,
+ summary: {
+ coldCloneMs: roundStats(result.summary.coldCloneMs),
+ incrementalFetchMs: roundStats(result.summary.incrementalFetchMs),
+ commitPushMs: roundStats(result.summary.commitPushMs),
+ },
+ results: result.results.map(r => ({
+ ...r,
+ latencyMs: round(r.latencyMs),
+ })),
+ successRate: round(result.successRate),
+ };
+
+ fs.writeFileSync(outPath, JSON.stringify({
+ version: '1.0',
+ timestamp: new Date().toISOString(),
+ result: rounded,
+ }, null, 2));
+}
+
+function round(value: number): number {
+ return Math.round(value * 100) / 100;
+}
+
+function roundStats(stats: { median: number; p95: number; p99: number }) {
+ return {
+ median: round(stats.median),
+ p95: round(stats.p95),
+ p99: round(stats.p99),
+ };
+}
diff --git a/src/git/table.ts b/src/git/table.ts
new file mode 100644
index 0000000..fd64d9f
--- /dev/null
+++ b/src/git/table.ts
@@ -0,0 +1,17 @@
+import type { GitBenchmarkResult } from './types.js';
+
+function fmt(ms: number): string {
+ return `${(ms / 1000).toFixed(2)}s`;
+}
+
+export function printGitResults(result: GitBenchmarkResult): void {
+ console.log('\n--- Git Benchmark Results ---');
+ console.log(`Provider: ${result.provider}`);
+ console.log(`Fixture commits: ${result.fixtureCommitCount}`);
+ console.log(`Iterations: ${result.iterations}`);
+ console.log(`Success rate: ${(result.successRate * 100).toFixed(1)}%`);
+ console.log('');
+ console.log(`Cold clone median ${fmt(result.summary.coldCloneMs.median)} p95 ${fmt(result.summary.coldCloneMs.p95)} p99 ${fmt(result.summary.coldCloneMs.p99)}`);
+ console.log(`Incremental fetch median ${fmt(result.summary.incrementalFetchMs.median)} p95 ${fmt(result.summary.incrementalFetchMs.p95)} p99 ${fmt(result.summary.incrementalFetchMs.p99)}`);
+ console.log(`Commit + push median ${fmt(result.summary.commitPushMs.median)} p95 ${fmt(result.summary.commitPushMs.p95)} p99 ${fmt(result.summary.commitPushMs.p99)}`);
+}
diff --git a/src/git/types.ts b/src/git/types.ts
new file mode 100644
index 0000000..10bc72d
--- /dev/null
+++ b/src/git/types.ts
@@ -0,0 +1,30 @@
+export type GitOperation = 'cold_clone' | 'incremental_fetch' | 'commit_push';
+
+export interface GitIterationResult {
+ operation: GitOperation;
+ latencyMs: number;
+ transferBytes?: number;
+ error?: string;
+}
+
+export interface GitOperationStats {
+ median: number;
+ p95: number;
+ p99: number;
+}
+
+export interface GitBenchmarkSummary {
+ coldCloneMs: GitOperationStats;
+ incrementalFetchMs: GitOperationStats;
+ commitPushMs: GitOperationStats;
+}
+
+export interface GitBenchmarkResult {
+ provider: string;
+ mode: 'git';
+ fixtureCommitCount: number;
+ iterations: number;
+ results: GitIterationResult[];
+ summary: GitBenchmarkSummary;
+ successRate: number;
+}
diff --git a/src/run.ts b/src/run.ts
index fdd5962..ff960f6 100644
--- a/src/run.ts
+++ b/src/run.ts
@@ -9,7 +9,9 @@ import { runConcurrentBenchmark } from './sandbox/concurrent.js';
import { runStaggeredBenchmark } from './sandbox/staggered.js';
import { runStorageBenchmark, writeStorageResultsJson } from './storage/benchmark.js';
import { runBrowserBenchmark, writeBrowserResultsJson } from './browser/benchmark.js';
+import { runGitBenchmark, writeGitResultsJson } from './git/benchmark.js';
import { printResultsTable, writeResultsJson } from './sandbox/table.js';
+import { printGitResults } from './git/table.js';
import { providers } from './sandbox/providers.js';
import { storageProviders } from './storage/providers.js';
import { browserProviders } from './browser/providers.js';
@@ -38,26 +40,56 @@ function getArgValue(args: string[], flag: string): string | undefined {
}
/** Resolve which modes to run */
-function getModesToRun(): BenchmarkMode[] | ['storage'] | ['browser'] {
+function getModesToRun(): BenchmarkMode[] | ['storage'] | ['browser'] | ['git'] {
if (!rawMode) return ['sequential', 'staggered', 'burst'];
if (rawMode === 'storage') return ['storage'];
if (rawMode === 'browser') return ['browser'];
+ if (rawMode === 'git') return ['git'];
const m = rawMode === 'concurrent' ? 'burst' : rawMode as BenchmarkMode;
return [m];
}
/** Map mode to results subdirectory name */
-function modeToDir(m: BenchmarkMode | 'storage'): string {
+function modeToDir(m: BenchmarkMode | 'storage' | 'git'): string {
switch (m) {
case 'sequential': return 'sequential_tti';
case 'staggered': return 'staggered_tti';
case 'burst':
case 'concurrent': return 'burst_tti';
case 'storage': return 'storage';
+ case 'git': return 'git';
default: return `${m}_tti`;
}
}
+async function runGit(): Promise {
+ const fixtureCommitCount = parseInt(getArgValue(args, '--fixture-commits') || '1000', 10);
+
+ console.log('\n' + '='.repeat(70));
+ console.log(' MODE: GIT');
+ console.log(` Iterations: ${iterations}`);
+ console.log(` Fixture commits: ${fixtureCommitCount}`);
+ console.log('='.repeat(70));
+
+ const result = await runGitBenchmark({
+ iterations,
+ fixtureCommitCount,
+ });
+
+ printGitResults(result);
+
+ const timestamp = new Date().toISOString().slice(0, 10);
+ const resultsDir = path.resolve(__dirname, '../results/git');
+ fs.mkdirSync(resultsDir, { recursive: true });
+
+ const outPath = path.join(resultsDir, `${timestamp}.json`);
+ await writeGitResultsJson(result, outPath);
+
+ const latestPath = path.join(resultsDir, 'latest.json');
+ fs.copyFileSync(outPath, latestPath);
+ console.log(`Copied latest: ${latestPath}`);
+}
+
async function runMode(mode: BenchmarkMode, toRun: typeof providers): Promise {
console.log('\n' + '='.repeat(70));
console.log(` MODE: ${mode.toUpperCase()}`);
@@ -246,6 +278,14 @@ async function main() {
return;
}
+ if (modes[0] === 'git') {
+ console.log('ComputeSDK Git Infrastructure Benchmark (Draft)');
+ console.log(`Date: ${new Date().toISOString()}\n`);
+ await runGit();
+ console.log('\nGit benchmark complete.');
+ return;
+ }
+
// Handle storage mode separately
if (modes[0] === 'storage') {
console.log('ComputeSDK Storage Provider Benchmarks');