From 8666d8cc60e5a88118f40544d1934045504d15dd Mon Sep 17 00:00:00 2001 From: jiashuoz Date: Tue, 26 May 2026 21:55:50 -0700 Subject: [PATCH 1/2] fix(identity): keep verification_token stable on domain reclaim ClaimOrCreateDomain minted a fresh verification_token on every call and the upsert wrote it on conflict, so a second register on an unverified domain row silently rotated the e2a-verify= TXT value. A caller who had already published the original TXT (the whole point of the first call) would then fail verify_domain with no signal that the token had changed under them. Drop verification_token from the ON CONFLICT SET clause so the existing value sticks, mirroring how DKIM is already handled. The newly-minted token in the Go layer is harmlessly discarded by Postgres on the re-claim path. created_at and the DKIM key were already stable. --- internal/identity/store.go | 25 +++++++++++++--------- internal/identity/store_test.go | 37 +++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 10 deletions(-) diff --git a/internal/identity/store.go b/internal/identity/store.go index 6c5516b..8c5a91b 100644 --- a/internal/identity/store.go +++ b/internal/identity/store.go @@ -263,9 +263,13 @@ func (s *Store) EnsureSharedDomain(ctx context.Context, domain string) error { // ClaimOrCreateDomain implements the atomic create/claim logic from the design doc. // Creates if new, overwrites user_id if unverified, returns if verified+same -// user, errors if verified+different user. Callers are responsible for -// rejecting the configured shared domain before invoking this — the store -// has no concept of a reserved domain. +// user, errors if verified+different user. The verification_token and DKIM +// keypair are minted on first INSERT and remain stable across re-claims of +// an unverified domain — so a caller that has already published the TXT +// record (or has mail in flight signed with the DKIM key) isn't silently +// invalidated by a second call. Callers are responsible for rejecting the +// configured shared domain before invoking this — the store has no concept +// of a reserved domain. func (s *Store) ClaimOrCreateDomain(ctx context.Context, domain, userID string) (*Domain, error) { domain = normalizeDomain(domain) @@ -287,18 +291,19 @@ func (s *Store) ClaimOrCreateDomain(ctx context.Context, domain, userID string) log.Printf("[identity] dkim keygen failed for %s: %v", domain, kerr) } - // Atomic upsert: insert new or overwrite unverified. The DKIM - // columns are only written on a true INSERT — the ON CONFLICT - // branch leaves any existing key in place so re-claiming an - // unverified domain doesn't invalidate signatures on mail already - // in flight. + // Atomic upsert: insert new or claim unverified. The DKIM columns + // and verification_token are only written on a true INSERT — the + // ON CONFLICT branch leaves existing values in place. DKIM + // stability avoids invalidating signatures on mail in flight; + // verification_token stability means a caller that already + // published the TXT record on DNS isn't silently invalidated by a + // second register call (e.g. to re-fetch the records). d := &Domain{} err := s.pool.QueryRow(ctx, `INSERT INTO domains (domain, user_id, verified, verification_token, dkim_selector, dkim_public_key, dkim_private_key) VALUES ($1, $2, false, $3, $4, $5, $6) ON CONFLICT (domain) DO UPDATE - SET user_id = EXCLUDED.user_id, - verification_token = EXCLUDED.verification_token + SET user_id = EXCLUDED.user_id WHERE domains.verified = false RETURNING domain, user_id, verified, verification_token, created_at, verified_at, is_primary, last_checked_at, COALESCE(dkim_selector, ''), COALESCE(dkim_public_key, '')`, domain, userID, verificationToken, nullIfEmpty(dkimSelector), nullIfEmpty(dkimPubKey), nullIfEmptyBytes(dkimPrivKey), diff --git a/internal/identity/store_test.go b/internal/identity/store_test.go index 8cdea83..95e4443 100644 --- a/internal/identity/store_test.go +++ b/internal/identity/store_test.go @@ -59,6 +59,43 @@ func TestCreateAgentDuplicate(t *testing.T) { } } +// TestClaimOrCreateDomain_StableOnReclaim asserts that re-claiming an +// unverified domain returns the row unchanged: the verification_token +// and DKIM public key minted on the first call must survive the second. +// A caller that has already published the TXT record on DNS would +// otherwise be silently invalidated by a benign second register call +// (e.g. an agent re-fetching the records to show the user). +func TestClaimOrCreateDomain_StableOnReclaim(t *testing.T) { + pool := testutil.TestDB(t) + store := identity.NewStore(pool) + ctx := context.Background() + + user, _ := store.CreateOrGetUser(ctx, "owner-stable@example.com", "Owner", "google-stable-token") + + first, err := store.ClaimOrCreateDomain(ctx, "stable.example.com", user.ID) + if err != nil { + t.Fatalf("first ClaimOrCreateDomain: %v", err) + } + if first.VerificationToken == "" { + t.Fatal("first call returned empty VerificationToken") + } + + second, err := store.ClaimOrCreateDomain(ctx, "stable.example.com", user.ID) + if err != nil { + t.Fatalf("second ClaimOrCreateDomain: %v", err) + } + + if second.VerificationToken != first.VerificationToken { + t.Errorf("VerificationToken rotated on reclaim: first=%q second=%q", first.VerificationToken, second.VerificationToken) + } + if second.DKIMPublicKey != first.DKIMPublicKey { + t.Errorf("DKIMPublicKey rotated on reclaim: first=%q second=%q", first.DKIMPublicKey, second.DKIMPublicKey) + } + if !second.CreatedAt.Equal(first.CreatedAt) { + t.Errorf("CreatedAt changed on reclaim: first=%v second=%v", first.CreatedAt, second.CreatedAt) + } +} + func TestGetAgentByID(t *testing.T) { pool := testutil.TestDB(t) store := identity.NewStore(pool) From 48054e128cc85cde334acb2a022f70b6297a3314 Mon Sep 17 00:00:00 2001 From: jiashuoz Date: Wed, 27 May 2026 16:26:50 -0700 Subject: [PATCH 2/2] fix(identity): reject cross-user reclaim of unverified domain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous fix stopped verification_token from rotating on re-claim, but the conflict branch still ran SET user_id = EXCLUDED.user_id on unverified rows. That left a squatting window: user A registers foo.example.com and publishes the e2a-verify TXT, user B then calls register_domain on the same name. user_id flips to user B, the token is now stable (good for A's lost case, bad here) — and user B can verify against the TXT record A already published on DNS that A controls. Tighten the ON CONFLICT WHERE to AND domains.user_id = \$2 so only the same user can re-claim an unverified row. Change the SET to a no-op (SET user_id = domains.user_id) so RETURNING still surfaces the existing row for the same-user case. A different-user attempt falls through to the existing SELECT path and returns "domain not available", matching the verified+different-user behavior. The only non-test caller (handleRegisterDomain) already maps that error to 400. No behavior change for legitimate same-user flows. Add TestClaimOrCreateDomain_CrossUserReclaimRejected covering: user B's ClaimOrCreateDomain returns an error, user B's LookupDomain still can't see the row, and user A's row keeps its original user_id and verification_token. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/identity/store.go | 46 ++++++++++++++++++++------------- internal/identity/store_test.go | 39 ++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 18 deletions(-) diff --git a/internal/identity/store.go b/internal/identity/store.go index 8c5a91b..325a549 100644 --- a/internal/identity/store.go +++ b/internal/identity/store.go @@ -262,14 +262,17 @@ func (s *Store) EnsureSharedDomain(ctx context.Context, domain string) error { } // ClaimOrCreateDomain implements the atomic create/claim logic from the design doc. -// Creates if new, overwrites user_id if unverified, returns if verified+same -// user, errors if verified+different user. The verification_token and DKIM -// keypair are minted on first INSERT and remain stable across re-claims of -// an unverified domain — so a caller that has already published the TXT -// record (or has mail in flight signed with the DKIM key) isn't silently -// invalidated by a second call. Callers are responsible for rejecting the -// configured shared domain before invoking this — the store has no concept -// of a reserved domain. +// Creates if new, returns the existing row when the same user already owns +// it (verified or not), and errors if a different user owns it. The +// verification_token and DKIM keypair are minted on first INSERT and remain +// stable across re-claims — a caller that has already published the TXT +// record on DNS (or has mail in flight signed with the DKIM key) isn't +// silently invalidated by a second call. A different user cannot take +// over an unverified row; that closes a squatting window where the new +// owner could verify against a TXT record the original owner already +// published. Callers are responsible for rejecting the configured shared +// domain before invoking this — the store has no concept of a reserved +// domain. func (s *Store) ClaimOrCreateDomain(ctx context.Context, domain, userID string) (*Domain, error) { domain = normalizeDomain(domain) @@ -291,20 +294,24 @@ func (s *Store) ClaimOrCreateDomain(ctx context.Context, domain, userID string) log.Printf("[identity] dkim keygen failed for %s: %v", domain, kerr) } - // Atomic upsert: insert new or claim unverified. The DKIM columns - // and verification_token are only written on a true INSERT — the - // ON CONFLICT branch leaves existing values in place. DKIM - // stability avoids invalidating signatures on mail in flight; - // verification_token stability means a caller that already - // published the TXT record on DNS isn't silently invalidated by a - // second register call (e.g. to re-fetch the records). + // Atomic upsert. The conflict branch only fires for a same-user + // re-claim of an unverified row, and runs as a no-op SET so + // RETURNING surfaces the existing row. DKIM columns and the + // verification_token are only written on a true INSERT, so they + // stay stable across re-claims — DKIM stability avoids + // invalidating signatures on mail in flight, and token stability + // means a caller who already published the TXT record on DNS + // isn't silently invalidated. A different-user conflict falls + // through to the SELECT below and returns "domain not available", + // preventing squatting on an unverified row whose TXT record the + // original owner may have already published. d := &Domain{} err := s.pool.QueryRow(ctx, `INSERT INTO domains (domain, user_id, verified, verification_token, dkim_selector, dkim_public_key, dkim_private_key) VALUES ($1, $2, false, $3, $4, $5, $6) ON CONFLICT (domain) DO UPDATE - SET user_id = EXCLUDED.user_id - WHERE domains.verified = false + SET user_id = domains.user_id + WHERE domains.verified = false AND domains.user_id = $2 RETURNING domain, user_id, verified, verification_token, created_at, verified_at, is_primary, last_checked_at, COALESCE(dkim_selector, ''), COALESCE(dkim_public_key, '')`, domain, userID, verificationToken, nullIfEmpty(dkimSelector), nullIfEmpty(dkimPubKey), nullIfEmptyBytes(dkimPrivKey), ).Scan(&d.Domain, &d.UserID, &d.Verified, &d.VerificationToken, &d.CreatedAt, &d.VerifiedAt, &d.IsPrimary, &d.LastCheckedAt, &d.DKIMSelector, &d.DKIMPublicKey) @@ -313,7 +320,10 @@ func (s *Store) ClaimOrCreateDomain(ctx context.Context, domain, userID string) return d, nil } - // No row returned — domain exists and is verified. Check ownership. + // No row returned — the row exists but the conflict UPDATE was + // skipped because either it's already verified or a different user + // owns it. Re-read to decide between "verified + same user → return + // it" and "different user → not available". existing := &Domain{} err = s.pool.QueryRow(ctx, `SELECT domain, user_id, verified, verification_token, created_at, verified_at, is_primary, last_checked_at, COALESCE(dkim_selector, ''), COALESCE(dkim_public_key, '') diff --git a/internal/identity/store_test.go b/internal/identity/store_test.go index 95e4443..43948af 100644 --- a/internal/identity/store_test.go +++ b/internal/identity/store_test.go @@ -96,6 +96,45 @@ func TestClaimOrCreateDomain_StableOnReclaim(t *testing.T) { } } +// TestClaimOrCreateDomain_CrossUserReclaimRejected asserts that a second +// user cannot take over an unverified domain that another user has +// already claimed. Combined with the stable verification_token, this +// closes the squatting window where the takeover user could verify +// against a TXT record the original owner had already published. +func TestClaimOrCreateDomain_CrossUserReclaimRejected(t *testing.T) { + pool := testutil.TestDB(t) + store := identity.NewStore(pool) + ctx := context.Background() + + userA, _ := store.CreateOrGetUser(ctx, "owner-a@example.com", "Owner A", "google-a") + userB, _ := store.CreateOrGetUser(ctx, "owner-b@example.com", "Owner B", "google-b") + + first, err := store.ClaimOrCreateDomain(ctx, "squat.example.com", userA.ID) + if err != nil { + t.Fatalf("userA ClaimOrCreateDomain: %v", err) + } + + if _, err := store.ClaimOrCreateDomain(ctx, "squat.example.com", userB.ID); err == nil { + t.Fatal("userB reclaim should fail when userA already owns the unverified row") + } + + // userB cannot read the row either; userA still owns it and the + // verification_token is unchanged. + if _, err := store.LookupDomain(ctx, "squat.example.com", userB.ID); err == nil { + t.Error("userB LookupDomain should not see squat.example.com") + } + after, err := store.LookupDomain(ctx, "squat.example.com", userA.ID) + if err != nil { + t.Fatalf("userA LookupDomain: %v", err) + } + if after.UserID == nil || *after.UserID != userA.ID { + t.Errorf("ownership changed: got user_id=%v, want %s", after.UserID, userA.ID) + } + if after.VerificationToken != first.VerificationToken { + t.Errorf("verification_token rotated under cross-user reclaim: first=%q after=%q", first.VerificationToken, after.VerificationToken) + } +} + func TestGetAgentByID(t *testing.T) { pool := testutil.TestDB(t) store := identity.NewStore(pool)