From 04aad35d369c977c1461e020960b1807f33150b8 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 5 Feb 2026 16:43:43 -0800 Subject: [PATCH 1/3] Add benchmark instrumentation and benchmark suite Signed-off-by: Cong Wang --- bench/README.md | 100 ++++++++ bench/branchfs_bench.py | 536 ++++++++++++++++++++++++++++++++++++++++ bench/plot_results.py | 194 +++++++++++++++ bench/quick_bench.sh | 212 ++++++++++++++++ src/branch.rs | 17 ++ 5 files changed, 1059 insertions(+) create mode 100644 bench/README.md create mode 100755 bench/branchfs_bench.py create mode 100755 bench/plot_results.py create mode 100755 bench/quick_bench.sh diff --git a/bench/README.md b/bench/README.md new file mode 100644 index 0000000..8807bf0 --- /dev/null +++ b/bench/README.md @@ -0,0 +1,100 @@ +# BranchFS Benchmarks + +Microbenchmark suite for evaluating BranchFS performance. This suite measures **internal operation timing** by parsing daemon logs, providing accurate measurements without CLI/socket overhead. + +## Prerequisites + +```bash +# Build branchfs +cd .. +cargo build --release + +# For plotting (optional) +pip install matplotlib +``` + +## Running Benchmarks + +### Quick Test (Shell Script) + +```bash +./quick_bench.sh +``` + +### Full Benchmark Suite (Python) + +```bash +# Run all benchmarks +python3 branchfs_bench.py + +# Quick mode (smaller parameters) +python3 branchfs_bench.py --quick + +# Specific benchmark +python3 branchfs_bench.py --bench creation # Branch creation O(1) test +python3 branchfs_bench.py --bench commit # Commit latency +python3 branchfs_bench.py --bench abort # Abort latency +python3 branchfs_bench.py --bench throughput # Read/write throughput +python3 branchfs_bench.py --bench nested # Nested branch depth + +# Generate LaTeX tables for paper +python3 branchfs_bench.py --latex ../paper/ +``` + +## Benchmarks + +### 1. Branch Creation Latency (O(1) Verification) + +Measures internal branch creation time with varying base directory sizes. +Validates that branch creation is O(1) - constant time regardless of base size. + +- **Parameters**: Base sizes of 100, 1K, 10K files +- **Expected**: ~300 µs constant latency + +### 2. Commit Latency + +Measures internal commit time for varying modification sizes. +Validates linear scaling with modification size. + +- **Parameters**: Modifications of 1KB, 10KB, 100KB, 1MB +- **Expected**: Linear increase with modification size + +### 3. Abort Latency + +Measures internal abort time. Abort only removes delta storage. + +- **Parameters**: Modifications of 1KB, 100KB, 1MB +- **Expected**: Fast, independent of modification size + +### 4. Read/Write Throughput + +Measures sequential read/write throughput through FUSE layer. + +- **Parameters**: 50MB file, 64KB block size +- **Output**: MB/s throughput + +### 5. Nested Branch Depth + +Measures read latency at various branch depths. +Tests path resolution overhead. + +- **Parameters**: Depths 1, 2, 4, 8 +- **Expected**: Minimal increase with depth (metadata cached) + +## Internal Timing + +The benchmark parses `[BENCH]` log lines from the daemon to measure actual operation time: + +``` +[BENCH] create_branch 'test': 287.461µs (287 us) +[BENCH] commit 'test': 321.194µs (321 us), 0 deletions, 1 files, 12 bytes +[BENCH] abort 'test': 156.234µs (156 us) +``` + +This excludes CLI process spawn and socket communication overhead (~200ms), giving accurate internal operation times. + +## Output + +- `bench_results.json`: Raw benchmark data +- `branch_creation_table.tex`: LaTeX table for paper +- `commit_table.tex`: LaTeX table for paper diff --git a/bench/branchfs_bench.py b/bench/branchfs_bench.py new file mode 100755 index 0000000..7078bbc --- /dev/null +++ b/bench/branchfs_bench.py @@ -0,0 +1,536 @@ +#!/usr/bin/env python3 +""" +BranchFS Microbenchmark Suite + +Benchmarks: +1. Branch creation latency vs base directory size (O(1) verification) +2. Commit latency vs modification size +3. Abort latency +4. Read/write throughput + +This benchmark captures internal daemon timing via log parsing for accurate +measurements without CLI/socket overhead. + +Usage: + python3 branchfs_bench.py [--branchfs PATH] [--output results.json] +""" + +import argparse +import json +import os +import re +import shutil +import subprocess +import sys +import tempfile +import time +from dataclasses import dataclass, asdict +from pathlib import Path +from typing import Optional + + +@dataclass +class BenchmarkResult: + name: str + params: dict + latency_us: float # microseconds + throughput_mbps: Optional[float] = None + iterations: int = 1 + + +class BranchFSBench: + def __init__(self, branchfs_bin: str = "branchfs"): + self.branchfs = branchfs_bin + self.results: list[BenchmarkResult] = [] + + def _run(self, *args, check=True, env=None) -> subprocess.CompletedProcess: + """Run branchfs command.""" + cmd = [self.branchfs] + list(args) + return subprocess.run(cmd, check=check, capture_output=True, text=True, env=env) + + def _start_daemon(self, base_dir: Path, storage: Path, mountpoint: Path, log_file: Path): + """Start branchfs daemon with logging enabled.""" + env = os.environ.copy() + env["RUST_LOG"] = "debug" + + with open(log_file, "w") as f: + proc = subprocess.Popen( + [self.branchfs, "mount", "--base", str(base_dir), + "--storage", str(storage), str(mountpoint)], + env=env, + stdout=f, + stderr=subprocess.STDOUT + ) + time.sleep(1.5) # Wait for mount + return proc + + def _stop_daemon(self, mountpoint: Path, storage: Path): + """Stop daemon and unmount.""" + try: + self._run("unmount", str(mountpoint), "--storage", str(storage), check=False) + except: + pass + subprocess.run(["fusermount3", "-u", str(mountpoint)], + capture_output=True, check=False) + time.sleep(0.2) + + def _parse_bench_logs(self, log_file: Path, operation: str) -> list[float]: + """Parse [BENCH] lines from daemon log and extract latencies in microseconds.""" + latencies = [] + pattern = rf"\[BENCH\] {operation}.*\((\d+) us\)" + + with open(log_file) as f: + for line in f: + match = re.search(pattern, line) + if match: + latencies.append(float(match.group(1))) + return latencies + + def _create_base_files(self, base_dir: Path, num_files: int, file_size: int = 1024): + """Create test files in base directory.""" + base_dir.mkdir(parents=True, exist_ok=True) + for i in range(num_files): + subdir = base_dir / f"dir_{i // 100}" + subdir.mkdir(exist_ok=True) + file_path = subdir / f"file_{i}.txt" + file_path.write_bytes(os.urandom(file_size)) + + # ========================================================================= + # Benchmark 1: Branch Creation Latency vs Base Size (O(1) verification) + # ========================================================================= + def bench_branch_creation(self, base_sizes: list[int] = [100, 1000, 10000], + iterations: int = 5): + """Measure internal branch creation time for various base directory sizes.""" + print("\n=== Benchmark: Branch Creation Latency (Internal) ===") + + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = Path(tmpdir) + + for num_files in base_sizes: + print(f" Testing with {num_files} files...") + + base_dir = tmpdir / f"base_{num_files}" + mountpoint = tmpdir / f"mnt_{num_files}" + storage = tmpdir / f"storage_{num_files}" + log_file = tmpdir / f"daemon_{num_files}.log" + + mountpoint.mkdir() + storage.mkdir() + + # Create base files + self._create_base_files(base_dir, num_files) + + # Start daemon with logging + proc = self._start_daemon(base_dir, storage, mountpoint, log_file) + + try: + for i in range(iterations): + branch_name = f"bench_{i}" + self._run("create", branch_name, str(mountpoint), + "--storage", str(storage)) + self._run("abort", str(mountpoint), "--storage", str(storage), + check=False) + + # Parse internal timing from logs + latencies = self._parse_bench_logs(log_file, "create_branch") + if latencies: + avg_lat = sum(latencies) / len(latencies) + result = BenchmarkResult( + name="branch_creation", + params={"base_files": num_files}, + latency_us=avg_lat, + iterations=len(latencies) + ) + self.results.append(result) + print(f" {num_files} files: {avg_lat:.1f} us (internal, avg of {len(latencies)})") + else: + print(f" {num_files} files: No timing data found in logs") + + finally: + self._stop_daemon(mountpoint, storage) + proc.wait(timeout=5) + + # ========================================================================= + # Benchmark 2: Commit Latency vs Modification Size + # ========================================================================= + def bench_commit_latency(self, mod_sizes_kb: list[int] = [1, 10, 100, 1000], + iterations: int = 3): + """Measure internal commit time for various modification sizes.""" + print("\n=== Benchmark: Commit Latency (Internal) ===") + + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = Path(tmpdir) + base_dir = tmpdir / "base" + + # Create modest base (100 files, 1KB each) + self._create_base_files(base_dir, 100, file_size=1024) + + for mod_size_kb in mod_sizes_kb: + print(f" Testing with {mod_size_kb} KB modifications...") + + latencies = [] + for i in range(iterations): + mountpoint = tmpdir / f"mnt_{mod_size_kb}_{i}" + storage = tmpdir / f"storage_{mod_size_kb}_{i}" + log_file = tmpdir / f"daemon_{mod_size_kb}_{i}.log" + mountpoint.mkdir() + storage.mkdir() + + proc = self._start_daemon(base_dir, storage, mountpoint, log_file) + + try: + # Create branch + self._run("create", "bench", str(mountpoint), + "--storage", str(storage)) + + # Make modifications + mod_file = mountpoint / "modification.bin" + mod_file.write_bytes(os.urandom(mod_size_kb * 1024)) + os.sync() + time.sleep(0.1) + + # Commit via ctl file (direct FUSE, triggers internal timing) + (mountpoint / ".branchfs_ctl").write_text("commit") + time.sleep(0.2) + + # Parse timing + commit_lats = self._parse_bench_logs(log_file, "commit") + if commit_lats: + latencies.extend(commit_lats) + + finally: + self._stop_daemon(mountpoint, storage) + proc.wait(timeout=5) + + if latencies: + avg_lat = sum(latencies) / len(latencies) + result = BenchmarkResult( + name="commit_latency", + params={"modification_kb": mod_size_kb}, + latency_us=avg_lat, + iterations=len(latencies) + ) + self.results.append(result) + print(f" {mod_size_kb} KB: {avg_lat:.1f} us ({avg_lat/1000:.2f} ms)") + + # ========================================================================= + # Benchmark 3: Abort Latency + # ========================================================================= + def bench_abort_latency(self, mod_sizes_kb: list[int] = [1, 100, 1000], + iterations: int = 3): + """Measure internal abort time.""" + print("\n=== Benchmark: Abort Latency (Internal) ===") + + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = Path(tmpdir) + base_dir = tmpdir / "base" + self._create_base_files(base_dir, 100, file_size=1024) + + for mod_size_kb in mod_sizes_kb: + print(f" Testing abort with {mod_size_kb} KB modifications...") + + latencies = [] + for i in range(iterations): + mountpoint = tmpdir / f"abort_mnt_{mod_size_kb}_{i}" + storage = tmpdir / f"abort_storage_{mod_size_kb}_{i}" + log_file = tmpdir / f"abort_daemon_{mod_size_kb}_{i}.log" + mountpoint.mkdir() + storage.mkdir() + + proc = self._start_daemon(base_dir, storage, mountpoint, log_file) + + try: + self._run("create", "bench", str(mountpoint), + "--storage", str(storage)) + + mod_file = mountpoint / "modification.bin" + mod_file.write_bytes(os.urandom(mod_size_kb * 1024)) + os.sync() + time.sleep(0.1) + + # Abort via ctl file + (mountpoint / ".branchfs_ctl").write_text("abort") + time.sleep(0.2) + + abort_lats = self._parse_bench_logs(log_file, "abort") + if abort_lats: + latencies.extend(abort_lats) + + finally: + self._stop_daemon(mountpoint, storage) + proc.wait(timeout=5) + + if latencies: + avg_lat = sum(latencies) / len(latencies) + result = BenchmarkResult( + name="abort_latency", + params={"modification_kb": mod_size_kb}, + latency_us=avg_lat, + iterations=len(latencies) + ) + self.results.append(result) + print(f" {mod_size_kb} KB: {avg_lat:.1f} us ({avg_lat/1000:.2f} ms)") + + # ========================================================================= + # Benchmark 4: Read/Write Throughput + # ========================================================================= + def bench_throughput(self, file_size_mb: int = 50, block_size_kb: int = 64): + """Measure read/write throughput on branch.""" + print("\n=== Benchmark: Read/Write Throughput ===") + + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = Path(tmpdir) + base_dir = tmpdir / "base" + mountpoint = tmpdir / "mnt" + storage = tmpdir / "storage" + log_file = tmpdir / "daemon.log" + + base_dir.mkdir() + mountpoint.mkdir() + storage.mkdir() + + # Create a test file in base + test_file_base = base_dir / "testfile.bin" + test_file_base.write_bytes(os.urandom(file_size_mb * 1024 * 1024)) + + proc = self._start_daemon(base_dir, storage, mountpoint, log_file) + + try: + self._run("create", "bench", str(mountpoint), "--storage", str(storage)) + + test_file = mountpoint / "testfile.bin" + + # Sequential read throughput + start = time.perf_counter() + with open(test_file, "rb") as f: + while f.read(block_size_kb * 1024): + pass + read_time = time.perf_counter() - start + read_throughput = file_size_mb / read_time + + # Sequential write throughput (new file) + write_file = mountpoint / "writefile.bin" + data = os.urandom(block_size_kb * 1024) + num_blocks = (file_size_mb * 1024) // block_size_kb + + start = time.perf_counter() + with open(write_file, "wb") as f: + for _ in range(num_blocks): + f.write(data) + f.flush() + os.fsync(f.fileno()) + write_time = time.perf_counter() - start + write_throughput = file_size_mb / write_time + + self.results.append(BenchmarkResult( + name="read_throughput", + params={"file_size_mb": file_size_mb, "block_size_kb": block_size_kb}, + latency_us=read_time * 1_000_000, + throughput_mbps=read_throughput + )) + self.results.append(BenchmarkResult( + name="write_throughput", + params={"file_size_mb": file_size_mb, "block_size_kb": block_size_kb}, + latency_us=write_time * 1_000_000, + throughput_mbps=write_throughput + )) + + print(f" Read: {read_throughput:.1f} MB/s") + print(f" Write: {write_throughput:.1f} MB/s") + + finally: + self._stop_daemon(mountpoint, storage) + proc.wait(timeout=5) + + # ========================================================================= + # Benchmark 5: Nested Branch Depth + # ========================================================================= + def bench_nested_depth(self, depths: list[int] = [1, 2, 4, 8], + iterations: int = 50): + """Measure read latency at various branch depths.""" + print("\n=== Benchmark: Nested Branch Depth ===") + + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = Path(tmpdir) + base_dir = tmpdir / "base" + base_dir.mkdir() + + # Create test file in base + test_file_base = base_dir / "testfile.txt" + test_file_base.write_text("Hello, World!" * 100) + + for depth in depths: + print(f" Testing depth {depth}...") + + mountpoint = tmpdir / f"mnt_{depth}" + storage = tmpdir / f"storage_{depth}" + log_file = tmpdir / f"daemon_{depth}.log" + mountpoint.mkdir() + storage.mkdir() + + proc = self._start_daemon(base_dir, storage, mountpoint, log_file) + + try: + # Create nested branches + parent = "main" + for d in range(depth): + branch_name = f"branch_{d}" + self._run("create", branch_name, str(mountpoint), + "-p", parent, "--storage", str(storage)) + parent = branch_name + + test_file = mountpoint / "testfile.txt" + + # Warm up + for _ in range(10): + test_file.read_bytes() + + # Measure read latency + latencies = [] + for _ in range(iterations): + start = time.perf_counter() + test_file.read_bytes() + lat = (time.perf_counter() - start) * 1_000_000 + latencies.append(lat) + + avg_lat = sum(latencies) / len(latencies) + self.results.append(BenchmarkResult( + name="nested_read_latency", + params={"depth": depth}, + latency_us=avg_lat, + iterations=iterations + )) + print(f" Depth {depth}: {avg_lat:.1f} us") + + finally: + self._stop_daemon(mountpoint, storage) + proc.wait(timeout=5) + + def save_results(self, output_path: str): + """Save results to JSON file.""" + results_dict = [asdict(r) for r in self.results] + with open(output_path, "w") as f: + json.dump(results_dict, f, indent=2) + print(f"\nResults saved to {output_path}") + + def print_summary(self): + """Print summary table.""" + print("\n" + "=" * 60) + print("SUMMARY (Internal Timing)") + print("=" * 60) + for r in self.results: + params_str = ", ".join(f"{k}={v}" for k, v in r.params.items()) + if r.throughput_mbps: + print(f"{r.name} ({params_str}): {r.throughput_mbps:.1f} MB/s") + else: + lat_ms = r.latency_us / 1000 + if lat_ms < 1: + print(f"{r.name} ({params_str}): {r.latency_us:.1f} us") + else: + print(f"{r.name} ({params_str}): {lat_ms:.2f} ms") + + def generate_latex_tables(self, output_dir: str = "."): + """Generate LaTeX tables for paper.""" + output_dir = Path(output_dir) + + # Branch creation table + creation_data = [r for r in self.results if r.name == "branch_creation"] + if creation_data: + with open(output_dir / "branch_creation_table.tex", "w") as f: + f.write("% Auto-generated from benchmark\n") + f.write("\\begin{tabular}{rr}\n\\toprule\n") + f.write("Base Size (files) & Creation Latency ($\\mu$s) \\\\\n\\midrule\n") + for r in creation_data: + f.write(f"{r.params['base_files']:,} & {r.latency_us:.0f} \\\\\n") + f.write("\\bottomrule\n\\end{tabular}\n") + print(f" Generated branch_creation_table.tex") + + # Commit latency table + commit_data = [r for r in self.results if r.name == "commit_latency"] + if commit_data: + with open(output_dir / "commit_table.tex", "w") as f: + f.write("% Auto-generated from benchmark\n") + f.write("\\begin{tabular}{rr}\n\\toprule\n") + f.write("Modification Size & Commit Latency (ms) \\\\\n\\midrule\n") + for r in commit_data: + size_kb = r.params['modification_kb'] + if size_kb >= 1024: + size_str = f"{size_kb // 1024} MB" + else: + size_str = f"{size_kb} KB" + f.write(f"{size_str} & {r.latency_us/1000:.1f} \\\\\n") + f.write("\\bottomrule\n\\end{tabular}\n") + print(f" Generated commit_table.tex") + + +def main(): + parser = argparse.ArgumentParser(description="BranchFS Microbenchmark Suite") + parser.add_argument("--branchfs", default="branchfs", + help="Path to branchfs binary") + parser.add_argument("--output", "-o", default="bench_results.json", + help="Output JSON file") + parser.add_argument("--quick", action="store_true", + help="Run quick benchmarks with smaller parameters") + parser.add_argument("--bench", choices=["creation", "commit", "abort", + "throughput", "nested", "all"], + default="all", help="Which benchmark to run") + parser.add_argument("--latex", default=None, + help="Generate LaTeX tables in specified directory") + args = parser.parse_args() + + # Check branchfs is available + if shutil.which(args.branchfs) is None: + # Try relative path + branchfs_path = Path(__file__).parent.parent / "target" / "release" / "branchfs" + if branchfs_path.exists(): + args.branchfs = str(branchfs_path) + else: + print(f"Error: branchfs binary not found. Build with 'cargo build --release'") + sys.exit(1) + + bench = BranchFSBench(args.branchfs) + + if args.quick: + base_sizes = [100, 1000] + mod_sizes = [1, 100] + abort_sizes = [1, 100] + depths = [1, 4] + iterations = 2 + else: + base_sizes = [100, 1000, 10000] + mod_sizes = [1, 10, 100, 1000] + abort_sizes = [1, 100, 1000] + depths = [1, 2, 4, 8] + iterations = 5 + + try: + if args.bench in ["creation", "all"]: + bench.bench_branch_creation(base_sizes, iterations=iterations) + + if args.bench in ["commit", "all"]: + bench.bench_commit_latency(mod_sizes, iterations=min(iterations, 3)) + + if args.bench in ["abort", "all"]: + bench.bench_abort_latency(abort_sizes, iterations=min(iterations, 3)) + + if args.bench in ["throughput", "all"]: + bench.bench_throughput(file_size_mb=20 if args.quick else 50) + + if args.bench in ["nested", "all"]: + bench.bench_nested_depth(depths, iterations=30 if args.quick else 50) + + bench.print_summary() + bench.save_results(args.output) + + if args.latex: + print(f"\nGenerating LaTeX tables in {args.latex}...") + bench.generate_latex_tables(args.latex) + + except KeyboardInterrupt: + print("\nInterrupted") + bench.print_summary() + bench.save_results(args.output) + + +if __name__ == "__main__": + main() diff --git a/bench/plot_results.py b/bench/plot_results.py new file mode 100755 index 0000000..7eb4844 --- /dev/null +++ b/bench/plot_results.py @@ -0,0 +1,194 @@ +#!/usr/bin/env python3 +""" +Generate paper figures from benchmark results. + +Usage: + python3 plot_results.py bench_results.json --output figures/ +""" + +import argparse +import json +import sys +from pathlib import Path + +try: + import matplotlib.pyplot as plt + import matplotlib + matplotlib.use('Agg') # Non-interactive backend +except ImportError: + print("matplotlib not installed. Install with: pip install matplotlib") + sys.exit(1) + + +def load_results(path: str) -> list[dict]: + with open(path) as f: + return json.load(f) + + +def plot_branch_creation(results: list[dict], output_dir: Path): + """Plot branch creation latency vs base size.""" + data = [r for r in results if r["name"] == "branch_creation"] + if not data: + print("No branch_creation data found") + return + + x = [r["params"]["base_files"] for r in data] + y = [r["latency_us"] for r in data] + + fig, ax = plt.subplots(figsize=(4, 3)) + ax.plot(x, y, 'o-', markersize=8, linewidth=2, color='#2E86AB') + ax.set_xscale('log') + ax.set_xlabel('Base Directory Size (files)', fontsize=10) + ax.set_ylabel('Branch Creation Latency (μs)', fontsize=10) + ax.set_title('(a) Branch Creation', fontsize=11) + ax.grid(True, alpha=0.3) + + # Add horizontal reference line for O(1) + avg = sum(y) / len(y) + ax.axhline(y=avg, color='red', linestyle='--', alpha=0.5, label=f'Avg: {avg:.0f} μs') + ax.legend(fontsize=8) + + plt.tight_layout() + plt.savefig(output_dir / 'branch_creation.pdf', dpi=300, bbox_inches='tight') + plt.savefig(output_dir / 'branch_creation.png', dpi=150, bbox_inches='tight') + plt.close() + print(f" Saved branch_creation.pdf") + + +def plot_commit_latency(results: list[dict], output_dir: Path): + """Plot commit latency vs modification size.""" + data = [r for r in results if r["name"] == "commit_latency"] + if not data: + print("No commit_latency data found") + return + + x = [r["params"]["modification_kb"] for r in data] + y = [r["latency_us"] / 1000 for r in data] # Convert to ms + + fig, ax = plt.subplots(figsize=(4, 3)) + ax.plot(x, y, 's-', markersize=8, linewidth=2, color='#A23B72') + ax.set_xscale('log') + ax.set_yscale('log') + ax.set_xlabel('Modification Size (KB)', fontsize=10) + ax.set_ylabel('Commit Latency (ms)', fontsize=10) + ax.set_title('(b) Commit Latency', fontsize=11) + ax.grid(True, alpha=0.3) + + plt.tight_layout() + plt.savefig(output_dir / 'commit_latency.pdf', dpi=300, bbox_inches='tight') + plt.savefig(output_dir / 'commit_latency.png', dpi=150, bbox_inches='tight') + plt.close() + print(f" Saved commit_latency.pdf") + + +def plot_combined_microbench(results: list[dict], output_dir: Path): + """Plot combined figure for paper (branch creation + commit latency).""" + creation_data = [r for r in results if r["name"] == "branch_creation"] + commit_data = [r for r in results if r["name"] == "commit_latency"] + + if not creation_data or not commit_data: + print("Missing data for combined plot") + return + + fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(7, 2.8)) + + # (a) Branch creation + x1 = [r["params"]["base_files"] for r in creation_data] + y1 = [r["latency_us"] for r in creation_data] + ax1.plot(x1, y1, 'o-', markersize=6, linewidth=1.5, color='#2E86AB') + ax1.set_xscale('log') + ax1.set_xlabel('Base Directory Size (files)', fontsize=9) + ax1.set_ylabel('Latency (μs)', fontsize=9) + ax1.set_title('(a) Branch Creation', fontsize=10) + ax1.grid(True, alpha=0.3) + + # (b) Commit latency + x2 = [r["params"]["modification_kb"] for r in commit_data] + y2 = [r["latency_us"] / 1000 for r in commit_data] # Convert to ms + ax2.plot(x2, y2, 's-', markersize=6, linewidth=1.5, color='#A23B72') + ax2.set_xscale('log') + ax2.set_yscale('log') + ax2.set_xlabel('Modification Size (KB)', fontsize=9) + ax2.set_ylabel('Latency (ms)', fontsize=9) + ax2.set_title('(b) Commit Latency', fontsize=10) + ax2.grid(True, alpha=0.3) + + plt.tight_layout() + plt.savefig(output_dir / 'microbench.pdf', dpi=300, bbox_inches='tight') + plt.savefig(output_dir / 'microbench.png', dpi=150, bbox_inches='tight') + plt.close() + print(f" Saved microbench.pdf (combined figure)") + + +def plot_nested_depth(results: list[dict], output_dir: Path): + """Plot read latency vs branch depth.""" + data = [r for r in results if r["name"] == "nested_read_latency"] + if not data: + print("No nested_read_latency data found") + return + + x = [r["params"]["depth"] for r in data] + y = [r["latency_us"] for r in data] + + fig, ax = plt.subplots(figsize=(4, 3)) + ax.plot(x, y, '^-', markersize=8, linewidth=2, color='#F18F01') + ax.set_xlabel('Branch Depth', fontsize=10) + ax.set_ylabel('Read Latency (μs)', fontsize=10) + ax.set_title('Read Latency vs Branch Depth', fontsize=11) + ax.grid(True, alpha=0.3) + + plt.tight_layout() + plt.savefig(output_dir / 'nested_depth.pdf', dpi=300, bbox_inches='tight') + plt.savefig(output_dir / 'nested_depth.png', dpi=150, bbox_inches='tight') + plt.close() + print(f" Saved nested_depth.pdf") + + +def generate_latex_table(results: list[dict], output_dir: Path): + """Generate LaTeX table data for paper.""" + agent_data = [r for r in results if r["name"] == "agent_workload"] + + if not agent_data: + print("No agent_workload data found") + return + + r = agent_data[0] + params = r["params"] + total_ms = r["latency_us"] / 1000 + + latex = f"""% Agent workload results +% {params['num_branches']} branches, {params['base_size_mb']} MB base, {params['mod_size_kb']} KB mods +% Total time: {total_ms:.2f} ms +""" + + output_file = output_dir / 'agent_table.txt' + with open(output_file, 'w') as f: + f.write(latex) + print(f" Saved agent_table.txt") + + +def main(): + parser = argparse.ArgumentParser(description="Plot benchmark results") + parser.add_argument("results", help="JSON results file from branchfs_bench.py") + parser.add_argument("--output", "-o", default=".", help="Output directory") + args = parser.parse_args() + + output_dir = Path(args.output) + output_dir.mkdir(parents=True, exist_ok=True) + + print(f"Loading results from {args.results}") + results = load_results(args.results) + print(f"Found {len(results)} results") + + print("\nGenerating plots...") + plot_branch_creation(results, output_dir) + plot_commit_latency(results, output_dir) + plot_combined_microbench(results, output_dir) + plot_nested_depth(results, output_dir) + generate_latex_table(results, output_dir) + + print("\nDone!") + + +if __name__ == "__main__": + main() diff --git a/bench/quick_bench.sh b/bench/quick_bench.sh new file mode 100755 index 0000000..fcc9fc2 --- /dev/null +++ b/bench/quick_bench.sh @@ -0,0 +1,212 @@ +#!/bin/bash +# Quick BranchFS benchmark script +# Usage: ./quick_bench.sh [branchfs_binary] +# +# This script measures: +# - Branch creation (via daemon socket - includes CLI overhead) +# - Commit latency (via .branchfs_ctl - pure FUSE operation) +# - Read latency + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +DEFAULT_BRANCHFS="$SCRIPT_DIR/../target/release/branchfs" +BRANCHFS="${1:-$DEFAULT_BRANCHFS}" + +if [ ! -x "$BRANCHFS" ]; then + echo "Error: branchfs binary not found at $BRANCHFS" + echo "Build with: cd $(dirname $SCRIPT_DIR) && cargo build --release" + exit 1 +fi + +TMPDIR=$(mktemp -d) +trap "rm -rf $TMPDIR" EXIT + +echo "Using branchfs: $BRANCHFS" +echo "Temp dir: $TMPDIR" + +# Helper to measure time in microseconds using high-res timer +time_us() { + local start=$(date +%s%N) + "$@" >/dev/null 2>&1 + local end=$(date +%s%N) + echo $(( (end - start) / 1000 )) +} + +# Helper to time a file write (pure FUSE operation) +time_write_us() { + local file="$1" + local content="$2" + local start=$(date +%s%N) + echo -n "$content" > "$file" + local end=$(date +%s%N) + echo $(( (end - start) / 1000 )) +} + +# ============================================================================ +# Test 1: Branch Creation Latency (via CLI - includes socket overhead) +# ============================================================================ +echo "" +echo "=== Branch Creation Latency (CLI, includes socket overhead) ===" + +for num_files in 100 1000 10000; do + BASE="$TMPDIR/base_$num_files" + MNT="$TMPDIR/mnt_$num_files" + STORAGE="$TMPDIR/storage_$num_files" + + mkdir -p "$BASE" "$MNT" "$STORAGE" + + # Create base files + echo " Creating $num_files files..." + for i in $(seq 1 $num_files); do + dir="$BASE/dir_$((i / 100))" + mkdir -p "$dir" + dd if=/dev/urandom of="$dir/file_$i.txt" bs=1024 count=1 2>/dev/null + done + + # Mount + $BRANCHFS mount --base "$BASE" --storage "$STORAGE" "$MNT" + sleep 0.2 + + # Measure branch creation (average of 5) + total=0 + for i in 1 2 3 4 5; do + lat=$(time_us $BRANCHFS create "branch_$i" "$MNT" --storage "$STORAGE") + total=$((total + lat)) + $BRANCHFS abort "$MNT" --storage "$STORAGE" 2>/dev/null || true + done + avg=$((total / 5)) + + echo " $num_files files: $avg us ($((avg / 1000)) ms) - includes CLI/socket overhead" + + # Cleanup + $BRANCHFS unmount "$MNT" --storage "$STORAGE" 2>/dev/null || fusermount3 -u "$MNT" 2>/dev/null || true +done + +# ============================================================================ +# Test 2: Commit Latency (direct .branchfs_ctl write - pure FUSE) +# ============================================================================ +echo "" +echo "=== Commit Latency (direct ctl write, pure FUSE) ===" + +BASE="$TMPDIR/commit_base" +mkdir -p "$BASE" +for i in $(seq 1 100); do + dd if=/dev/urandom of="$BASE/file_$i.txt" bs=1024 count=10 2>/dev/null +done + +for mod_kb in 1 10 100 1000; do + MNT="$TMPDIR/commit_mnt_$mod_kb" + STORAGE="$TMPDIR/commit_storage_$mod_kb" + mkdir -p "$MNT" "$STORAGE" + + $BRANCHFS mount --base "$BASE" --storage "$STORAGE" "$MNT" + sleep 0.1 + + $BRANCHFS create "bench" "$MNT" --storage "$STORAGE" + + # Make modification + dd if=/dev/urandom of="$MNT/mod.bin" bs=1024 count=$mod_kb 2>/dev/null + sync + + # Measure commit via direct ctl file write (pure FUSE operation) + lat=$(time_write_us "$MNT/.branchfs_ctl" "commit") + echo " ${mod_kb} KB modification: $lat us ($((lat / 1000)) ms)" + + # Cleanup + $BRANCHFS unmount "$MNT" --storage "$STORAGE" 2>/dev/null || fusermount3 -u "$MNT" 2>/dev/null || true +done + +# ============================================================================ +# Test 3: Abort Latency (direct .branchfs_ctl write - pure FUSE) +# ============================================================================ +echo "" +echo "=== Abort Latency (direct ctl write, pure FUSE) ===" + +for mod_kb in 1 100 1000; do + MNT="$TMPDIR/abort_mnt_$mod_kb" + STORAGE="$TMPDIR/abort_storage_$mod_kb" + mkdir -p "$MNT" "$STORAGE" + + $BRANCHFS mount --base "$BASE" --storage "$STORAGE" "$MNT" + sleep 0.1 + + $BRANCHFS create "bench" "$MNT" --storage "$STORAGE" + + # Make modification + dd if=/dev/urandom of="$MNT/mod.bin" bs=1024 count=$mod_kb 2>/dev/null + sync + + # Measure abort via direct ctl file write + lat=$(time_write_us "$MNT/.branchfs_ctl" "abort") + echo " ${mod_kb} KB to discard: $lat us ($((lat / 1000)) ms)" + + # Cleanup + $BRANCHFS unmount "$MNT" --storage "$STORAGE" 2>/dev/null || fusermount3 -u "$MNT" 2>/dev/null || true +done + +# ============================================================================ +# Test 4: Read Latency (file on base, read through branch) +# ============================================================================ +echo "" +echo "=== Read Latency ===" + +MNT="$TMPDIR/read_mnt" +STORAGE="$TMPDIR/read_storage" +mkdir -p "$MNT" "$STORAGE" + +# Create test file +dd if=/dev/urandom of="$BASE/testfile.bin" bs=1M count=1 2>/dev/null + +$BRANCHFS mount --base "$BASE" --storage "$STORAGE" "$MNT" +sleep 0.1 +$BRANCHFS create "bench" "$MNT" --storage "$STORAGE" + +# Warm up +cat "$MNT/testfile.bin" > /dev/null + +# Measure +total=0 +for i in 1 2 3 4 5; do + lat=$(time_us cat "$MNT/testfile.bin") + total=$((total + lat)) +done +avg=$((total / 5)) +echo " 1MB file read: $avg us ($((avg / 1000)) ms)" + +$BRANCHFS unmount "$MNT" --storage "$STORAGE" 2>/dev/null || fusermount3 -u "$MNT" 2>/dev/null || true + +# ============================================================================ +# Test 5: Switch Branch Latency (direct ctl write) +# ============================================================================ +echo "" +echo "=== Switch Branch Latency (direct ctl write, pure FUSE) ===" + +MNT="$TMPDIR/switch_mnt" +STORAGE="$TMPDIR/switch_storage" +mkdir -p "$MNT" "$STORAGE" + +$BRANCHFS mount --base "$BASE" --storage "$STORAGE" "$MNT" +sleep 0.1 + +# Create multiple branches +$BRANCHFS create "branch_a" "$MNT" --storage "$STORAGE" +$BRANCHFS create "branch_b" "$MNT" --storage "$STORAGE" + +# Measure switch latency +total=0 +for i in 1 2 3 4 5; do + lat=$(time_write_us "$MNT/.branchfs_ctl" "switch:branch_a") + total=$((total + lat)) + echo -n "switch:branch_b" > "$MNT/.branchfs_ctl" +done +avg=$((total / 5)) +echo " Switch branch: $avg us" + +$BRANCHFS unmount "$MNT" --storage "$STORAGE" 2>/dev/null || fusermount3 -u "$MNT" 2>/dev/null || true + +echo "" +echo "=== Done ===" +echo "" +echo "Note: Branch creation goes through CLI+socket, so it shows higher latency." +echo " Commit/abort/switch go directly through FUSE (.branchfs_ctl)." diff --git a/src/branch.rs b/src/branch.rs index 9f2c9a0..3b7d3a8 100644 --- a/src/branch.rs +++ b/src/branch.rs @@ -4,6 +4,7 @@ use std::io::{BufRead, BufReader, Write}; use std::path::{Path, PathBuf}; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Arc; +use std::time::Instant; use fuser::Notifier; use parking_lot::{Mutex, RwLock}; @@ -150,6 +151,7 @@ impl BranchManager { } pub fn create_branch(&self, name: &str, parent: &str) -> Result<()> { + let start = Instant::now(); validate_branch_name(name)?; let mut branches = self.branches.write(); @@ -165,6 +167,9 @@ impl BranchManager { let branch = Branch::new(name, Some(parent), &self.storage_path)?; branches.insert(name.to_string(), branch); + let elapsed = start.elapsed(); + log::debug!("[BENCH] create_branch '{}': {:?} ({} us)", name, elapsed, elapsed.as_micros()); + Ok(()) } @@ -359,6 +364,7 @@ impl BranchManager { } pub fn commit(&self, branch_name: &str) -> Result<()> { + let start = Instant::now(); if branch_name == "main" { return Err(BranchError::CannotOperateOnMain); } @@ -367,6 +373,9 @@ impl BranchManager { let chain = self.get_branch_chain(branch_name, &branches)?; let (deletions, files) = self.collect_changes(&chain, &branches)?; + let num_deletions = deletions.len(); + let num_files = files.len(); + let total_bytes: u64 = files.iter().filter_map(|(_, p)| p.metadata().ok()).map(|m| m.len()).sum(); for path in &deletions { let full_path = self.base_path.join(path.trim_start_matches('/')); @@ -398,10 +407,15 @@ impl BranchManager { drop(branches); self.invalidate_all_mounts(); + let elapsed = start.elapsed(); + log::debug!("[BENCH] commit '{}': {:?} ({} us), {} deletions, {} files, {} bytes", + branch_name, elapsed, elapsed.as_micros(), num_deletions, num_files, total_bytes); + Ok(()) } pub fn abort(&self, branch_name: &str) -> Result<()> { + let start = Instant::now(); if branch_name == "main" { return Err(BranchError::CannotOperateOnMain); } @@ -431,6 +445,9 @@ impl BranchManager { drop(branches); self.invalidate_branches(&aborted_branches); + let elapsed = start.elapsed(); + log::debug!("[BENCH] abort '{}': {:?} ({} us)", branch_name, elapsed, elapsed.as_micros()); + Ok(()) } From 61fc22b54bc5f297ca15fead3c7fc2f3df27bd88 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 5 Feb 2026 16:49:48 -0800 Subject: [PATCH 2/3] Fix read performance: use seek+read instead of reading entire file Signed-off-by: Cong Wang --- src/fs.rs | 44 ++++++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/src/fs.rs b/src/fs.rs index fd09811..4e6c43c 100644 --- a/src/fs.rs +++ b/src/fs.rs @@ -1,5 +1,7 @@ use std::collections::HashMap; use std::ffi::OsStr; +use std::fs::File; +use std::io::{Read as IoRead, Seek, SeekFrom}; use std::path::Path; use std::sync::atomic::{AtomicU32, AtomicU64, Ordering}; use std::sync::Arc; @@ -397,14 +399,16 @@ impl Filesystem for BranchFs { return; } }; - match std::fs::read(&resolved) { - Ok(data) => { - let start = offset as usize; - let end = std::cmp::min(start + size as usize, data.len()); - if start < data.len() { - reply.data(&data[start..end]); - } else { - reply.data(&[]); + match File::open(&resolved) { + Ok(mut file) => { + if let Err(_) = file.seek(SeekFrom::Start(offset as u64)) { + reply.error(libc::EIO); + return; + } + let mut buf = vec![0u8; size as usize]; + match file.read(&mut buf) { + Ok(n) => reply.data(&buf[..n]), + Err(_) => reply.error(libc::EIO), } } Err(_) => reply.error(libc::EIO), @@ -436,18 +440,22 @@ impl Filesystem for BranchFs { } }; - match std::fs::read(&resolved) { - Ok(data) => { - if self.is_stale() { - reply.error(libc::ESTALE); + match File::open(&resolved) { + Ok(mut file) => { + if let Err(_) = file.seek(SeekFrom::Start(offset as u64)) { + reply.error(libc::EIO); return; } - let start = offset as usize; - let end = std::cmp::min(start + size as usize, data.len()); - if start < data.len() { - reply.data(&data[start..end]); - } else { - reply.data(&[]); + let mut buf = vec![0u8; size as usize]; + match file.read(&mut buf) { + Ok(n) => { + if self.is_stale() { + reply.error(libc::ESTALE); + return; + } + reply.data(&buf[..n]) + } + Err(_) => reply.error(libc::EIO), } } Err(_) => reply.error(libc::EIO), From 1ae957d2adef3b4a560466907c4ba806f465bce2 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 5 Feb 2026 17:23:36 -0800 Subject: [PATCH 3/3] Fix fmt and clippy warnings Signed-off-by: Cong Wang --- src/branch.rs | 31 ++++++++++++++++++++++++++----- src/fs.rs | 4 ++-- 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/src/branch.rs b/src/branch.rs index 3b7d3a8..eea112e 100644 --- a/src/branch.rs +++ b/src/branch.rs @@ -168,7 +168,12 @@ impl BranchManager { branches.insert(name.to_string(), branch); let elapsed = start.elapsed(); - log::debug!("[BENCH] create_branch '{}': {:?} ({} us)", name, elapsed, elapsed.as_micros()); + log::debug!( + "[BENCH] create_branch '{}': {:?} ({} us)", + name, + elapsed, + elapsed.as_micros() + ); Ok(()) } @@ -375,7 +380,11 @@ impl BranchManager { let (deletions, files) = self.collect_changes(&chain, &branches)?; let num_deletions = deletions.len(); let num_files = files.len(); - let total_bytes: u64 = files.iter().filter_map(|(_, p)| p.metadata().ok()).map(|m| m.len()).sum(); + let total_bytes: u64 = files + .iter() + .filter_map(|(_, p)| p.metadata().ok()) + .map(|m| m.len()) + .sum(); for path in &deletions { let full_path = self.base_path.join(path.trim_start_matches('/')); @@ -408,8 +417,15 @@ impl BranchManager { self.invalidate_all_mounts(); let elapsed = start.elapsed(); - log::debug!("[BENCH] commit '{}': {:?} ({} us), {} deletions, {} files, {} bytes", - branch_name, elapsed, elapsed.as_micros(), num_deletions, num_files, total_bytes); + log::debug!( + "[BENCH] commit '{}': {:?} ({} us), {} deletions, {} files, {} bytes", + branch_name, + elapsed, + elapsed.as_micros(), + num_deletions, + num_files, + total_bytes + ); Ok(()) } @@ -446,7 +462,12 @@ impl BranchManager { self.invalidate_branches(&aborted_branches); let elapsed = start.elapsed(); - log::debug!("[BENCH] abort '{}': {:?} ({} us)", branch_name, elapsed, elapsed.as_micros()); + log::debug!( + "[BENCH] abort '{}': {:?} ({} us)", + branch_name, + elapsed, + elapsed.as_micros() + ); Ok(()) } diff --git a/src/fs.rs b/src/fs.rs index 4e6c43c..cd541ed 100644 --- a/src/fs.rs +++ b/src/fs.rs @@ -401,7 +401,7 @@ impl Filesystem for BranchFs { }; match File::open(&resolved) { Ok(mut file) => { - if let Err(_) = file.seek(SeekFrom::Start(offset as u64)) { + if file.seek(SeekFrom::Start(offset as u64)).is_err() { reply.error(libc::EIO); return; } @@ -442,7 +442,7 @@ impl Filesystem for BranchFs { match File::open(&resolved) { Ok(mut file) => { - if let Err(_) = file.seek(SeekFrom::Start(offset as u64)) { + if file.seek(SeekFrom::Start(offset as u64)).is_err() { reply.error(libc::EIO); return; }