computesdk · HeyGarrison · May 6, 2026 · May 6, 2026
diff --git a/GIT_BENCHMARK_SPEC.md b/GIT_BENCHMARK_SPEC.md
@@ -0,0 +1,164 @@
+# Git Infrastructure Benchmark v1 (Draft)
+
+This document proposes a reproducible benchmark for Git platforms used by humans and AI agents.
+
+## Scope
+
+The benchmark compares two categories without forcing them into the same product assumptions:
+
+- Traditional SCM hosts: GitHub, GitLab, Bitbucket
+- API-first / agent-native Git platforms: Freestyle, code.storage
+
+Optional baseline:
+
+- Self-hosted Forge: Gitea or Forgejo
+
+## Goals
+
+- Measure real developer and agent workflows (`clone`, `fetch`, `commit`, `push`, PR/MR-like flow)
+- Stress deep history and high commit volume behavior
+- Capture reliability under concurrency and failure conditions
+- Produce repeatable p50/p95/p99 metrics and cost-normalized comparisons
+
+## Fairness Rules
+
+- Use the same repo fixtures, commit graph, and runner hardware across providers
+- Separate cold and warm runs; do not mix results
+- Run each scenario at least 20 times and report variance
+- Pin runner region for single-region tests; report multi-region separately
+- Use provider-native auth and recommended SDK/API paths where applicable
+- Report unsupported capabilities explicitly as `N/A`, not `0`
+
+## Test Matrix
+
+### A) Core Git Transport
+
+- `T1` Cold full clone (small, medium, large history)
+- `T2` Warm full clone (same fixture)
+- `T3` Shallow clone (`--depth 1`, `--depth 100`)
+- `T4` Partial clone (`--filter=blob:none`) when supported
+- `T5` Incremental fetch after `10`, `100`, `1000` new commits
+- `T6` Push latency: single small commit, single large diff, binary/LFS payload
+
+### B) High-Commit and Deep-History Stress
+
+- `T7` Bulk-history clone: repositories at ~1k, ~10k, ~100k, ~1M commits
+- `T8` Continuous tiny commits: each worker creates 200 commits and pushes every 20 commits
+- `T9` Batch push: one push containing 500+ commits after branch divergence
+- `T10` Rebase/squash stress: large rewritten history push behavior and visibility delay
+
+### C) Collaboration and Automation
+
+- `T11` Branch lifecycle: create/switch/merge/delete at scale
+- `T12` PR/MR lifecycle: open, attach diff, add comments/review state, merge with checks
+- `T13` Webhook e2e latency: `git push` to webhook receiver timestamp
+- `T14` CI trigger latency: push-to-first-job and push-to-first-log-line
+
+### D) API / SDK Workflows
+
+- `T15` Programmatic repo create and remote URL retrieval
+- `T16` Programmatic branch create and commit write
+- `T17` Programmatic diff/log/list metadata read
+- `T18` Sync behaviors with GitHub mirror/sync features (where offered)
+
+### E) Reliability and Recovery
+
+- `T19` Parallel workers: 50 and 100 workers performing branch+commit+push loops
+- `T20` Failure injection: token expiration, transient network drops, retriable 5xx
+- `T21` Event reliability: webhook delivery success and retry completion
+
+### F) Security, Governance, and Cost
+
+- `T22` Access controls and token scope granularity (capability checklist)
+- `T23` Auditability: log availability and export ergonomics
+- `T24` Cost model based on measured usage: storage, transfer, request/operation costs
+
+## Standard Workload Profiles
+
+- `human-dev`: low concurrency, frequent small fetch/push, PR-heavy
+- `agent-burst`: high concurrency, many branches, high commit/write frequency
+- `ci-heavy`: frequent clone/fetch and webhook/CI trigger sensitivity
+
+Each profile runs the same test IDs with different concurrency and payload settings.
+
+## Metrics to Record
+
+Per test/scenario capture:
+
+- Latency: p50, p95, p99, min, max
+- Throughput: operations/sec, MB/sec
+- Reliability: success rate, error class distribution, retry success
+- Transfer characteristics: packfile bytes, wall-clock transfer, server processing delay
+- Freshness: time from push accepted to ref visibility/API visibility
+- Cost units: storage GB-month, transfer GB, operation/API counts
+
+## Repo Fixture Generator (Deterministic)
+
+Generate identical fixture repositories with a seeded generator:
+
+- Sizes: tiny (~10MB), medium (~1GB logical history), large (deep history + binaries)
+- Commit graph: linear, fan-out branches, merge-heavy, rebased segments
+- File mix: text-heavy, binary-heavy, optional LFS tracks
+- Churn model: hot files (frequent edits) + cold files (rare edits)
+
+Publish the fixture seed, generation script version, and resulting commit hashes.
+
+## Initial Scoring Model (v1)
+
+- 40% performance (latency + throughput)
+- 25% reliability
+- 15% workflow/API completeness
+- 10% security/governance
+- 10% cost efficiency
+
+Rules:
+
+- Reliability is multiplicative within relevant sections (high failure rates cap score)
+- Unsupported feature for optional tests stays `N/A`; required unsupported features score `0` for that metric
+- Publish both weighted composite and raw metric tables
+
+## Output Schema (JSON)
+
+```json
+{
+  "benchmark": "git-infra-v1",
+  "date": "2026-05-06",
+  "provider": "github",
+  "profile": "agent-burst",
+  "testId": "T19",
+  "fixture": {
+    "name": "deep-history-large",
+    "seed": 42,
+    "commitCount": 100000
+  },
+  "run": {
+    "region": "us-east-1",
+    "attempts": 30,
+    "successRate": 0.97
+  },
+  "metrics": {
+    "latencyMs": { "p50": 920, "p95": 2410, "p99": 3900 },
+    "throughput": { "opsPerSec": 7.2, "mbPerSec": 48.5 },
+    "freshnessMs": { "pushToRefVisibleP50": 340 }
+  },
+  "cost": {
+    "storageGbMonth": 12.4,
+    "egressGb": 88.1,
+    "apiOps": 13420
+  }
+}
+```
+
+## Minimal v1 Launch Plan
+
+Start with 8 tests that provide immediate signal:
+
+- `T1`, `T3`, `T5`, `T6`, `T7`, `T12`, `T13`, `T19`
+
+Then add API and governance layers (`T15+`, `T22+`) in v1.1.
+
+## Open Questions
+
+- Should CI timing be benchmarked using each provider's native CI only, or externalized CI only?
+- Should mirrored/synced GitHub repos be scored separately from primary repo storage?
+- How should per-seat pricing be normalized against pure usage-based models?
diff --git a/README.md b/README.md
@@ -103,6 +103,7 @@ Sponsors enable independent benchmark infrastructure.
 - [ ] Cold start vs warm start metrics
 - [ ] Multi-region testing
 - [x] Cost-per-sandbox-minute
+- [ ] Git infrastructure benchmark ([draft spec](./GIT_BENCHMARK_SPEC.md))
 
 <br>
 

diff --git a/package.json b/package.json
@@ -28,6 +28,7 @@
     "bench:browser:steel": "tsx src/run.ts --mode browser --provider steel",
     "bench:browser:browseruse": "tsx src/run.ts --mode browser --provider browseruse",
     "bench:browser:anchorbrowser": "tsx src/run.ts --mode browser --provider anchorbrowser",
+    "bench:git": "tsx src/run.ts --mode git",
     "bench:storage": "tsx src/run.ts --mode storage",
     "bench:storage:s3": "tsx src/run.ts --mode storage --provider aws-s3",
     "bench:storage:r2": "tsx src/run.ts --mode storage --provider cloudflare-r2",

diff --git a/src/git/benchmark.ts b/src/git/benchmark.ts
@@ -0,0 +1,150 @@
+import fs from 'fs';
+import os from 'os';
+import path from 'path';
+import { promisify } from 'util';
+import { execFile } from 'child_process';
+import { computeStats } from '../util/stats.js';
+import type { GitBenchmarkResult, GitIterationResult } from './types.js';
+
+const execFileAsync = promisify(execFile);
+
+interface GitRunConfig {
+  iterations: number;
+  fixtureCommitCount: number;
+}
+
+async function git(args: string[], cwd: string): Promise<{ stdout: string; stderr: string }> {
+  return execFileAsync('git', args, { cwd, maxBuffer: 10 * 1024 * 1024 });
+}
+
+async function seedFixture(remoteDir: string, workDir: string, commitCount: number): Promise<void> {
+  fs.mkdirSync(workDir, { recursive: true });
+  await git(['init'], workDir);
+  await git(['config', 'user.name', 'Benchmark Bot'], workDir);
+  await git(['config', 'user.email', 'bench@example.com'], workDir);
+  fs.writeFileSync(path.join(workDir, 'README.md'), '# git fixture\n');
+  await git(['add', '.'], workDir);
+  await git(['commit', '-m', 'chore: initial commit'], workDir);
+
+  const historyFile = path.join(workDir, 'history.txt');
+  for (let i = 0; i < commitCount; i++) {
+    fs.appendFileSync(historyFile, `line-${i}\n`);
+    await git(['add', 'history.txt'], workDir);
+    await git(['commit', '-m', `chore: seed ${i + 1}`], workDir);
+  }
+
+  await git(['branch', '-M', 'main'], workDir);
+  await git(['remote', 'add', 'origin', remoteDir], workDir);
+  await git(['push', '-u', 'origin', 'main'], workDir);
+}
+
+export async function runGitBenchmark(config: GitRunConfig): Promise<GitBenchmarkResult> {
+  const { iterations, fixtureCommitCount } = config;
+  const root = fs.mkdtempSync(path.join(os.tmpdir(), 'git-bench-'));
+  const remote = path.join(root, 'remote.git');
+  const fixtureWriter = path.join(root, 'fixture-writer');
+  const stableClone = path.join(root, 'stable-clone');
+  const results: GitIterationResult[] = [];
+
+  try {
+    await git(['init', '--bare', remote], root);
+    await seedFixture(remote, fixtureWriter, fixtureCommitCount);
+    await git(['clone', remote, stableClone], root);
+
+    for (let i = 0; i < iterations; i++) {
+      const coldTarget = path.join(root, `cold-${i}`);
+      let start = performance.now();
+      try {
+        await git(['clone', remote, coldTarget], root);
+        results.push({ operation: 'cold_clone', latencyMs: performance.now() - start });
+      } catch (err) {
+        results.push({ operation: 'cold_clone', latencyMs: 0, error: err instanceof Error ? err.message : String(err) });
+      }
+
+      const churnFile = path.join(fixtureWriter, 'churn.txt');
+      fs.appendFileSync(churnFile, `tick-${i}\n`);
+      await git(['add', 'churn.txt'], fixtureWriter);
+      await git(['commit', '-m', `chore: churn ${i + 1}`], fixtureWriter);
+      await git(['push', 'origin', 'main'], fixtureWriter);
+
+      start = performance.now();
+      try {
+        await git(['fetch', 'origin'], stableClone);
+        results.push({ operation: 'incremental_fetch', latencyMs: performance.now() - start });
+      } catch (err) {
+        results.push({ operation: 'incremental_fetch', latencyMs: 0, error: err instanceof Error ? err.message : String(err) });
+      }
+
+      const commitFile = path.join(stableClone, 'agent.log');
+      fs.appendFileSync(commitFile, `push-${i}\n`);
+      await git(['add', 'agent.log'], stableClone);
+      await git(['commit', '-m', `feat: agent commit ${i + 1}`], stableClone);
+
+      start = performance.now();
+      try {
+        await git(['pull', '--rebase', 'origin', 'main'], stableClone);
+        await git(['push', 'origin', 'main'], stableClone);
+        results.push({ operation: 'commit_push', latencyMs: performance.now() - start });
+      } catch (err) {
+        results.push({ operation: 'commit_push', latencyMs: 0, error: err instanceof Error ? err.message : String(err) });
+      }
+
+      fs.rmSync(coldTarget, { recursive: true, force: true });
+      console.log(`  Iteration ${i + 1}/${iterations} complete`);
+    }
+
+    const successful = results.filter(r => !r.error);
+    const operationValues = (op: GitIterationResult['operation']) =>
+      results.filter(r => r.operation === op && !r.error).map(r => r.latencyMs);
+
+    return {
+      provider: 'local-git',
+      mode: 'git',
+      fixtureCommitCount,
+      iterations,
+      results,
+      summary: {
+        coldCloneMs: computeStats(operationValues('cold_clone')),
+        incrementalFetchMs: computeStats(operationValues('incremental_fetch')),
+        commitPushMs: computeStats(operationValues('commit_push')),
+      },
+      successRate: results.length ? successful.length / results.length : 0,
+    };
+  } finally {
+    fs.rmSync(root, { recursive: true, force: true });
+  }
+}
+
+export async function writeGitResultsJson(result: GitBenchmarkResult, outPath: string): Promise<void> {
+  const rounded = {
+    ...result,
+    summary: {
+      coldCloneMs: roundStats(result.summary.coldCloneMs),
+      incrementalFetchMs: roundStats(result.summary.incrementalFetchMs),
+      commitPushMs: roundStats(result.summary.commitPushMs),
+    },
+    results: result.results.map(r => ({
+      ...r,
+      latencyMs: round(r.latencyMs),
+    })),
+    successRate: round(result.successRate),
+  };
+
+  fs.writeFileSync(outPath, JSON.stringify({
+    version: '1.0',
+    timestamp: new Date().toISOString(),
+    result: rounded,
+  }, null, 2));
+}
+
+function round(value: number): number {
+  return Math.round(value * 100) / 100;
+}
+
+function roundStats(stats: { median: number; p95: number; p99: number }) {
+  return {
+    median: round(stats.median),
+    p95: round(stats.p95),
+    p99: round(stats.p99),
+  };
+}
diff --git a/src/git/table.ts b/src/git/table.ts
@@ -0,0 +1,17 @@
+import type { GitBenchmarkResult } from './types.js';
+
+function fmt(ms: number): string {
+  return `${(ms / 1000).toFixed(2)}s`;
+}
+
+export function printGitResults(result: GitBenchmarkResult): void {
+  console.log('\n--- Git Benchmark Results ---');
+  console.log(`Provider: ${result.provider}`);
+  console.log(`Fixture commits: ${result.fixtureCommitCount}`);
+  console.log(`Iterations: ${result.iterations}`);
+  console.log(`Success rate: ${(result.successRate * 100).toFixed(1)}%`);
+  console.log('');
+  console.log(`Cold clone      median ${fmt(result.summary.coldCloneMs.median)}  p95 ${fmt(result.summary.coldCloneMs.p95)}  p99 ${fmt(result.summary.coldCloneMs.p99)}`);
+  console.log(`Incremental fetch median ${fmt(result.summary.incrementalFetchMs.median)}  p95 ${fmt(result.summary.incrementalFetchMs.p95)}  p99 ${fmt(result.summary.incrementalFetchMs.p99)}`);
+  console.log(`Commit + push   median ${fmt(result.summary.commitPushMs.median)}  p95 ${fmt(result.summary.commitPushMs.p95)}  p99 ${fmt(result.summary.commitPushMs.p99)}`);
+}
diff --git a/src/git/types.ts b/src/git/types.ts
@@ -0,0 +1,30 @@
+export type GitOperation = 'cold_clone' | 'incremental_fetch' | 'commit_push';
+
+export interface GitIterationResult {
+  operation: GitOperation;
+  latencyMs: number;
+  transferBytes?: number;
+  error?: string;
+}
+
+export interface GitOperationStats {
+  median: number;
+  p95: number;
+  p99: number;
+}
+
+export interface GitBenchmarkSummary {
+  coldCloneMs: GitOperationStats;
+  incrementalFetchMs: GitOperationStats;
+  commitPushMs: GitOperationStats;
+}
+
+export interface GitBenchmarkResult {
+  provider: string;
+  mode: 'git';
+  fixtureCommitCount: number;
+  iterations: number;
+  results: GitIterationResult[];
+  summary: GitBenchmarkSummary;
+  successRate: number;
+}