From 9a1df3d8156ed5bbf5b7ff2b9f16510c3a6af130 Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Tue, 17 Feb 2026 16:19:30 -0800 Subject: [PATCH 1/3] Add opt-in PII redaction with typed tokens --- cmd/entire/cli/checkpoint/temporary.go | 2 + cmd/entire/cli/settings/settings.go | 27 ++ cmd/entire/cli/settings/settings_test.go | 86 ++++++- cmd/entire/cli/strategy/auto_commit.go | 2 + cmd/entire/cli/strategy/common.go | 27 ++ cmd/entire/cli/strategy/manual_commit_git.go | 2 + .../cli/strategy/manual_commit_hooks.go | 2 + redact/pii.go | 151 +++++++++++ redact/pii_test.go | 236 ++++++++++++++++++ redact/redact.go | 35 ++- 10 files changed, 559 insertions(+), 11 deletions(-) create mode 100644 redact/pii.go create mode 100644 redact/pii_test.go diff --git a/cmd/entire/cli/checkpoint/temporary.go b/cmd/entire/cli/checkpoint/temporary.go index 1fc1b64c0..e1fdb1964 100644 --- a/cmd/entire/cli/checkpoint/temporary.go +++ b/cmd/entire/cli/checkpoint/temporary.go @@ -922,6 +922,8 @@ func addDirectoryToEntriesWithAbsPath(repo *git.Repository, dirPathAbs, dirPathR treePath := filepath.ToSlash(filepath.Join(dirPathRel, relWithinDir)) + // Use redacted blob creation for metadata files (transcripts, prompts, etc.) + // to ensure PII and secrets are redacted before writing to git. blobHash, mode, err := createRedactedBlobFromFile(repo, path, treePath) if err != nil { return fmt.Errorf("failed to create blob for %s: %w", path, err) diff --git a/cmd/entire/cli/settings/settings.go b/cmd/entire/cli/settings/settings.go index 381c9993a..73316a8c0 100644 --- a/cmd/entire/cli/settings/settings.go +++ b/cmd/entire/cli/settings/settings.go @@ -49,6 +49,24 @@ type EntireSettings struct { // Telemetry controls anonymous usage analytics. // nil = not asked yet (show prompt), true = opted in, false = opted out Telemetry *bool `json:"telemetry,omitempty"` + + // Redaction configures PII redaction behavior for transcripts and metadata. + Redaction *RedactionSettings `json:"redaction,omitempty"` +} + +// RedactionSettings configures redaction behavior beyond the default secret detection. +type RedactionSettings struct { + PII *PIISettings `json:"pii,omitempty"` +} + +// PIISettings configures PII detection categories. +// When Enabled is true, email and phone default to true; address defaults to false. +type PIISettings struct { + Enabled bool `json:"enabled"` + Email *bool `json:"email,omitempty"` + Phone *bool `json:"phone,omitempty"` + Address *bool `json:"address,omitempty"` + CustomPatterns map[string]string `json:"custom_patterns,omitempty"` } // Load loads the Entire settings from .entire/settings.json, @@ -204,6 +222,15 @@ func mergeJSON(settings *EntireSettings, data []byte) error { settings.Telemetry = &t } + // Override redaction if present + if redactionRaw, ok := raw["redaction"]; ok { + var r RedactionSettings + if err := json.Unmarshal(redactionRaw, &r); err != nil { + return fmt.Errorf("parsing redaction field: %w", err) + } + settings.Redaction = &r + } + return nil } diff --git a/cmd/entire/cli/settings/settings_test.go b/cmd/entire/cli/settings/settings_test.go index ad09bc57a..d2329426b 100644 --- a/cmd/entire/cli/settings/settings_test.go +++ b/cmd/entire/cli/settings/settings_test.go @@ -59,7 +59,8 @@ func TestLoad_AcceptsValidKeys(t *testing.T) { "local_dev": false, "log_level": "debug", "strategy_options": {"key": "value"}, - "telemetry": true + "telemetry": true, + "redaction": {"pii": {"enabled": true, "email": true, "phone": false}} }` if err := os.WriteFile(settingsFile, []byte(settingsContent), 0644); err != nil { t.Fatalf("failed to write settings file: %v", err) @@ -92,6 +93,21 @@ func TestLoad_AcceptsValidKeys(t *testing.T) { if settings.Telemetry == nil || !*settings.Telemetry { t.Error("expected telemetry to be true") } + if settings.Redaction == nil { + t.Fatal("expected redaction to be non-nil") + } + if settings.Redaction.PII == nil { + t.Fatal("expected redaction.pii to be non-nil") + } + if !settings.Redaction.PII.Enabled { + t.Error("expected redaction.pii.enabled to be true") + } + if settings.Redaction.PII.Email == nil || !*settings.Redaction.PII.Email { + t.Error("expected redaction.pii.email to be true") + } + if settings.Redaction.PII.Phone == nil || *settings.Redaction.PII.Phone { + t.Error("expected redaction.pii.phone to be false") + } } func TestLoad_LocalSettingsRejectsUnknownKeys(t *testing.T) { @@ -135,6 +151,74 @@ func TestLoad_LocalSettingsRejectsUnknownKeys(t *testing.T) { } } +func TestLoad_MissingRedactionIsNil(t *testing.T) { + tmpDir := t.TempDir() + entireDir := filepath.Join(tmpDir, ".entire") + if err := os.MkdirAll(entireDir, 0755); err != nil { + t.Fatalf("failed to create .entire directory: %v", err) + } + + settingsFile := filepath.Join(entireDir, "settings.json") + if err := os.WriteFile(settingsFile, []byte(`{"strategy": "manual-commit"}`), 0644); err != nil { + t.Fatalf("failed to write settings file: %v", err) + } + if err := os.MkdirAll(filepath.Join(tmpDir, ".git"), 0755); err != nil { + t.Fatalf("failed to create .git directory: %v", err) + } + t.Chdir(tmpDir) + + settings, err := Load() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if settings.Redaction != nil { + t.Error("expected redaction to be nil when not in settings") + } +} + +func TestLoad_LocalOverridesRedaction(t *testing.T) { + tmpDir := t.TempDir() + entireDir := filepath.Join(tmpDir, ".entire") + if err := os.MkdirAll(entireDir, 0755); err != nil { + t.Fatalf("failed to create .entire directory: %v", err) + } + + // Base settings: PII disabled + settingsFile := filepath.Join(entireDir, "settings.json") + if err := os.WriteFile(settingsFile, []byte(`{"strategy": "manual-commit", "redaction": {"pii": {"enabled": false}}}`), 0644); err != nil { + t.Fatalf("failed to write settings file: %v", err) + } + + // Local override: PII enabled with custom patterns + localFile := filepath.Join(entireDir, "settings.local.json") + localContent := `{"redaction": {"pii": {"enabled": true, "custom_patterns": {"employee_id": "EMP-\\d{6}"}}}}` + if err := os.WriteFile(localFile, []byte(localContent), 0644); err != nil { + t.Fatalf("failed to write local settings file: %v", err) + } + + if err := os.MkdirAll(filepath.Join(tmpDir, ".git"), 0755); err != nil { + t.Fatalf("failed to create .git directory: %v", err) + } + t.Chdir(tmpDir) + + settings, err := Load() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if settings.Redaction == nil || settings.Redaction.PII == nil { + t.Fatal("expected redaction.pii to be non-nil after local override") + } + if !settings.Redaction.PII.Enabled { + t.Error("expected local override to enable PII") + } + if settings.Redaction.PII.CustomPatterns == nil { + t.Fatal("expected custom_patterns to be non-nil") + } + if settings.Redaction.PII.CustomPatterns["employee_id"] != `EMP-\d{6}` { + t.Errorf("expected employee_id pattern, got %v", settings.Redaction.PII.CustomPatterns) + } +} + // containsUnknownField checks if the error message indicates an unknown field func containsUnknownField(msg string) bool { // Go's json package reports unknown fields with this message format diff --git a/cmd/entire/cli/strategy/auto_commit.go b/cmd/entire/cli/strategy/auto_commit.go index 5ee5ced3e..4a2e89153 100644 --- a/cmd/entire/cli/strategy/auto_commit.go +++ b/cmd/entire/cli/strategy/auto_commit.go @@ -121,6 +121,8 @@ func (s *AutoCommitStrategy) PrePush(remote string) error { } func (s *AutoCommitStrategy) SaveStep(ctx StepContext) error { + EnsureRedactionConfigured() + repo, err := OpenRepository() if err != nil { return fmt.Errorf("failed to open git repository: %w", err) diff --git a/cmd/entire/cli/strategy/common.go b/cmd/entire/cli/strategy/common.go index 9b8f2439d..7c4c535ea 100644 --- a/cmd/entire/cli/strategy/common.go +++ b/cmd/entire/cli/strategy/common.go @@ -17,7 +17,9 @@ import ( "github.com/entireio/cli/cmd/entire/cli/checkpoint" "github.com/entireio/cli/cmd/entire/cli/checkpoint/id" "github.com/entireio/cli/cmd/entire/cli/paths" + "github.com/entireio/cli/cmd/entire/cli/settings" "github.com/entireio/cli/cmd/entire/cli/trailers" + "github.com/entireio/cli/redact" "github.com/go-git/go-git/v5" "github.com/go-git/go-git/v5/plumbing" @@ -238,6 +240,31 @@ var ( protectedDirsCache []string ) +var initRedactionOnce sync.Once + +// EnsureRedactionConfigured loads PII redaction settings and configures the +// redact package. Called once before any checkpoint writes. No-op if PII is +// not enabled in settings. +func EnsureRedactionConfigured() { + initRedactionOnce.Do(func() { + s, err := settings.Load() + if err != nil || s.Redaction == nil || s.Redaction.PII == nil || !s.Redaction.PII.Enabled { + return + } + pii := s.Redaction.PII + cfg := redact.PIIConfig{ + Enabled: true, + Categories: make(map[redact.PIICategory]bool), + CustomPatterns: pii.CustomPatterns, + } + // Email and phone default to true when PII is enabled; address defaults to false. + cfg.Categories[redact.PIIEmail] = pii.Email == nil || *pii.Email + cfg.Categories[redact.PIIPhone] = pii.Phone == nil || *pii.Phone + cfg.Categories[redact.PIIAddress] = pii.Address != nil && *pii.Address + redact.ConfigurePII(cfg) + }) +} + // homeRelativePath strips the $HOME/ prefix from an absolute path, // returning a home-relative path suitable for persisting in metadata. // Returns "" if the path is empty or not under $HOME. diff --git a/cmd/entire/cli/strategy/manual_commit_git.go b/cmd/entire/cli/strategy/manual_commit_git.go index 37e252d50..51975c6c2 100644 --- a/cmd/entire/cli/strategy/manual_commit_git.go +++ b/cmd/entire/cli/strategy/manual_commit_git.go @@ -22,6 +22,8 @@ import ( // SaveStep saves a checkpoint to the shadow branch. // Uses checkpoint.GitStore.WriteTemporary for git operations. func (s *ManualCommitStrategy) SaveStep(ctx StepContext) error { + EnsureRedactionConfigured() + repo, err := OpenRepository() if err != nil { return fmt.Errorf("failed to open git repository: %w", err) diff --git a/cmd/entire/cli/strategy/manual_commit_hooks.go b/cmd/entire/cli/strategy/manual_commit_hooks.go index e4e3353dc..985f52740 100644 --- a/cmd/entire/cli/strategy/manual_commit_hooks.go +++ b/cmd/entire/cli/strategy/manual_commit_hooks.go @@ -550,6 +550,8 @@ func (h *postCommitActionHandler) HandleWarnStaleSession(_ *session.State) error // //nolint:unparam // error return required by interface but hooks must return nil func (s *ManualCommitStrategy) PostCommit() error { + EnsureRedactionConfigured() + logCtx := logging.WithComponent(context.Background(), "checkpoint") repo, err := OpenRepository() diff --git a/redact/pii.go b/redact/pii.go new file mode 100644 index 000000000..6c3fd815c --- /dev/null +++ b/redact/pii.go @@ -0,0 +1,151 @@ +package redact + +import ( + "fmt" + "os" + "regexp" + "strings" + "sync" +) + +// PIICategory identifies a category of personally identifiable information. +type PIICategory string + +const ( + PIIEmail PIICategory = "email" + PIIPhone PIICategory = "phone" + PIIAddress PIICategory = "address" +) + +// PIIConfig controls which PII categories are detected and redacted. +type PIIConfig struct { + // Enabled globally enables/disables PII redaction. + // When false, no PII patterns are checked (secrets still redacted). + Enabled bool + + // Categories maps each PII category to whether it is enabled. + // Missing keys default to false (disabled). + Categories map[PIICategory]bool + + // CustomPatterns allows teams to define additional regex patterns. + // Each key is a label used in the replacement token (uppercased), + // and each value is a regex pattern string. + // Example: {"employee_id": `EMP-\d{6}`} produces [REDACTED_EMPLOYEE_ID]. + CustomPatterns map[string]string +} + +// piiPattern is a compiled regex with its replacement token label. +type piiPattern struct { + regex *regexp.Regexp + label string // e.g., "EMAIL", "PHONE", "ADDRESS" +} + +var ( + piiConfig *PIIConfig + piiConfigMu sync.Mutex + + compiledPIIPatterns []piiPattern + compiledPIIPatternsOnce sync.Once +) + +// ConfigurePII sets the global PII redaction configuration. +// Call once at startup after loading settings. Thread-safe. +func ConfigurePII(cfg PIIConfig) { + piiConfigMu.Lock() + defer piiConfigMu.Unlock() + cfgCopy := cfg + piiConfig = &cfgCopy + // Reset compiled patterns so they are recompiled with new config. + compiledPIIPatternsOnce = sync.Once{} +} + +// getPIIConfig returns the current PII configuration, or nil if not configured. +func getPIIConfig() *PIIConfig { + piiConfigMu.Lock() + defer piiConfigMu.Unlock() + return piiConfig +} + +// builtinPIIPattern associates a regex pattern string with a category and label. +type builtinPIIPattern struct { + category PIICategory + label string + pattern string +} + +// builtinPIIPatterns returns the default PII detection patterns. +func builtinPIIPatterns() []builtinPIIPattern { + return []builtinPIIPattern{ + // Email: standard format + {PIIEmail, "EMAIL", `[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}`}, + // Phone: US formats (xxx-xxx-xxxx, (xxx) xxx-xxxx, +1xxxxxxxxxx, etc.) + {PIIPhone, "PHONE", `(?:\+?1[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}`}, + // Address: US street address patterns (123 Main St, 456 Elm Avenue, etc.) + {PIIAddress, "ADDRESS", `\d{1,5}\s+[A-Z][a-zA-Z]+(?:\s+[A-Z][a-zA-Z]+)*\s+(?:St(?:reet)?|Ave(?:nue)?|Blvd|Boulevard|Dr(?:ive)?|Ln|Lane|Rd|Road|Ct|Court|Pl(?:ace)?|Way|Cir(?:cle)?|Ter(?:race)?|Pkwy|Parkway)\.?`}, + } +} + +// detectPII returns tagged regions for PII matches in s. +// Returns nil immediately if PII redaction is not configured or not enabled. +func detectPII(s string) []taggedRegion { + cfg := getPIIConfig() + if cfg == nil || !cfg.Enabled { + return nil + } + + patterns := getCompiledPIIPatterns(cfg) + var regions []taggedRegion + for _, p := range patterns { + for _, loc := range p.regex.FindAllStringIndex(s, -1) { + regions = append(regions, taggedRegion{ + region: region{loc[0], loc[1]}, + label: p.label, + }) + } + } + return regions +} + +// getCompiledPIIPatterns compiles and caches PII patterns based on the current config. +// Patterns are compiled once per ConfigurePII call. +func getCompiledPIIPatterns(cfg *PIIConfig) []piiPattern { + compiledPIIPatternsOnce.Do(func() { + var patterns []piiPattern + + for _, bp := range builtinPIIPatterns() { + if enabled, ok := cfg.Categories[bp.category]; ok && enabled { + compiled, err := regexp.Compile(bp.pattern) + if err != nil { + continue // skip broken patterns silently + } + patterns = append(patterns, piiPattern{regex: compiled, label: bp.label}) + } + } + + // Custom patterns + for label, pattern := range cfg.CustomPatterns { + compiled, err := regexp.Compile(pattern) + if err != nil { + fmt.Fprintf(os.Stderr, "[redact] Warning: invalid custom PII pattern %q: %v\n", label, err) + continue + } + patterns = append(patterns, piiPattern{ + regex: compiled, + label: strings.ToUpper(label), + }) + } + + compiledPIIPatterns = patterns + }) + return compiledPIIPatterns +} + +// replacementToken returns the redaction placeholder for a given label. +// Empty label (secrets) returns "REDACTED" for backward compatibility. +// Non-empty label (PII) returns "[REDACTED_