From 9ac0570d8890aa43e08a8b93f4684c8232b53834 Mon Sep 17 00:00:00 2001 From: 0xDevNinja Date: Wed, 13 May 2026 15:22:24 +0530 Subject: [PATCH] fix(gbrain-sync): fold hostname into code-source id hash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-fix `deriveCodeSourceId` hashed the absolute repo path alone, so two machines with identical home-dir layouts (chezmoi-managed dotfiles, ansible-provisioned VMs) derived the same id and clobbered each other's `local_path` in a federated brain. Last-writer-wins, with cryptic "Not a git repository" errors on the loser. Hash key is now `${hostname}::${path}`. Conductor worktrees on a single host stay distinct (path entropy unchanged within a host); cross-machine federations stop colliding. Legacy path-only-hashed sources age out naturally — in-place migration would force a brain-wide rewrite for a minority workflow, and the existing `deriveLegacyCodeSourceId` orphan cleanup pattern can pick them up in a follow-up if needed. `GSTACK_HOSTNAME` env var is a test-only knob; production uses `os.hostname()`. Fixes #1414 --- bin/gstack-gbrain-sync.ts | 25 ++++++++------ test/gstack-gbrain-sync.test.ts | 58 ++++++++++++++++++++++++++++++++- 2 files changed, 72 insertions(+), 11 deletions(-) diff --git a/bin/gstack-gbrain-sync.ts b/bin/gstack-gbrain-sync.ts index 36b265e42d..623639a084 100644 --- a/bin/gstack-gbrain-sync.ts +++ b/bin/gstack-gbrain-sync.ts @@ -32,7 +32,7 @@ import { existsSync, statSync, mkdirSync, writeFileSync, readFileSync, unlinkSync, renameSync } from "fs"; import { join, dirname } from "path"; import { execSync, spawnSync } from "child_process"; -import { homedir } from "os"; +import { homedir, hostname } from "os"; import { createHash } from "crypto"; import { detectEngineTier, withErrorContext, canonicalizeRemote } from "../lib/gstack-memory-helpers"; @@ -159,30 +159,35 @@ function originUrl(): string | null { } /** - * Derive a worktree-aware source id for the cwd code corpus. + * Derive a host- and worktree-aware source id for the cwd code corpus. * - * Pattern: `gstack-code--` where slug comes from origin - * (org/repo) and pathhash8 is the first 8 hex chars of sha1(absolute repo - * path). The pathhash8 is what makes Conductor worktrees of the same repo - * coexist as separate sources in the same gbrain DB instead of stomping on - * each other. + * Pattern: `gstack-code--` where slug comes from origin + * (org/repo) and hostpathhash8 is the first 8 hex chars of + * sha1(`${hostname}::${absolute repo path}`). Folding hostname into the hash + * keeps Conductor worktrees of the same repo as distinct sources on one host + * AND keeps two machines that share an absolute layout (e.g. chezmoi-managed + * home dirs against a federated brain) from colliding on each other. * * Falls back to the repo basename when there is no origin (local repo). * + * `GSTACK_HOSTNAME` env override is honored for deterministic tests; in + * production paths it is unset and `os.hostname()` is used. + * * gbrain enforces source ids to be 1-32 lowercase alnum chars with * optional interior hyphens. `constrainSourceId` handles the 32-char cap * with a hashed-tail fallback when the combined slug exceeds budget. */ function deriveCodeSourceId(repoPath: string): string { - const pathHash = createHash("sha1").update(repoPath).digest("hex").slice(0, 8); + const host = process.env.GSTACK_HOSTNAME || hostname(); + const hostPathHash = createHash("sha1").update(`${host}::${repoPath}`).digest("hex").slice(0, 8); const remote = canonicalizeRemote(originUrl()); if (remote) { const segs = remote.split("/").filter(Boolean); const slugSource = segs.slice(-2).join("-"); - return constrainSourceId("gstack-code", `${slugSource}-${pathHash}`); + return constrainSourceId("gstack-code", `${slugSource}-${hostPathHash}`); } const base = repoPath.split("/").pop() || "repo"; - return constrainSourceId("gstack-code", `${base}-${pathHash}`); + return constrainSourceId("gstack-code", `${base}-${hostPathHash}`); } /** diff --git a/test/gstack-gbrain-sync.test.ts b/test/gstack-gbrain-sync.test.ts index 528d6deed7..d75ec0163f 100644 --- a/test/gstack-gbrain-sync.test.ts +++ b/test/gstack-gbrain-sync.test.ts @@ -8,7 +8,7 @@ */ import { describe, it, expect } from "bun:test"; -import { mkdtempSync, writeFileSync, readFileSync, existsSync, rmSync, mkdirSync } from "fs"; +import { mkdtempSync, writeFileSync, readFileSync, existsSync, rmSync, mkdirSync, chmodSync } from "fs"; import { tmpdir } from "os"; import { join } from "path"; import { spawnSync } from "child_process"; @@ -215,6 +215,62 @@ describe("gstack-gbrain-sync CLI", () => { rmSync(home, { recursive: true, force: true }); }); + it("derives distinct source ids for the same absolute path on different hosts", () => { + // Issue #1414: two machines with identical home-dir layouts (chezmoi-managed + // dotfiles, ansible-provisioned VMs) collide on the same source id when + // federated against a shared gbrain DB, because the pre-fix `pathHash` was + // sha1(absolute path) only — host-agnostic. Folding hostname into the hash + // key keeps them distinct. `GSTACK_HOSTNAME` env var is the test-only knob; + // production uses `os.hostname()`. + const home = makeTestHome(); + const gstackHome = join(home, ".gstack"); + mkdirSync(gstackHome, { recursive: true }); + const repo = mkdtempSync(join(tmpdir(), "gstack-host-collide-")); + spawnSync("git", ["init", "--quiet", "-b", "main"], { cwd: repo }); + spawnSync("git", ["remote", "add", "origin", "https://github.com/example/multihost.git"], { cwd: repo }); + + // Dry-run still gates the code stage on `command -v gbrain`. Drop a no-op + // shim on PATH so the stage runs (we only assert the preview line, never + // invoke gbrain itself). + const bindir = mkdtempSync(join(tmpdir(), "gstack-host-collide-bin-")); + const shim = join(bindir, "gbrain"); + writeFileSync(shim, "#!/bin/sh\nexit 0\n"); + chmodSync(shim, 0o755); + const PATH = `${bindir}:${process.env.PATH || ""}`; + + const runAs = (host: string) => + spawnSync("bun", [SCRIPT, "--dry-run", "--code-only", "--quiet"], { + encoding: "utf-8", + timeout: 60000, + cwd: repo, + env: { ...process.env, HOME: home, GSTACK_HOME: gstackHome, GSTACK_HOSTNAME: host, PATH }, + }); + + const a = runAs("machine-a"); + const b = runAs("machine-b"); + expect(a.status).toBe(0); + expect(b.status).toBe(0); + const idA = (a.stdout || "").match(/gbrain sources add (\S+)/)?.[1]; + const idB = (b.stdout || "").match(/gbrain sources add (\S+)/)?.[1]; + expect(idA).toBeTruthy(); + expect(idB).toBeTruthy(); + expect(idA).not.toBe(idB); + // Both still gbrain-valid. + const VALID_ID = /^[a-z0-9](?:[a-z0-9-]{0,30}[a-z0-9])?$/; + expect(idA!).toMatch(VALID_ID); + expect(idB!).toMatch(VALID_ID); + + // Same host + same path stays stable across invocations. + const a2 = runAs("machine-a"); + expect(a2.status).toBe(0); + const idA2 = (a2.stdout || "").match(/gbrain sources add (\S+)/)?.[1]; + expect(idA2).toBe(idA); + + rmSync(repo, { recursive: true, force: true }); + rmSync(home, { recursive: true, force: true }); + rmSync(bindir, { recursive: true, force: true }); + }); + it("dry-run does NOT acquire the lock file (lock is for write paths only)", () => { const home = makeTestHome(); const gstackHome = join(home, ".gstack");