smart-mcp-proxy
diff --git a/‎CLAUDE.md‎
Lines changed: 72 additions & 0 deletions b/‎CLAUDE.md‎
Lines changed: 72 additions & 0 deletions
diff --git a/‎cmd/mcpproxy/activity_cmd.go‎
Lines changed: 49 additions & 1 deletion b/‎cmd/mcpproxy/activity_cmd.go‎
Lines changed: 49 additions & 1 deletion
diff --git a/‎cmd/mcpproxy/doctor_cmd.go‎
Lines changed: 11 additions & 0 deletions b/‎cmd/mcpproxy/doctor_cmd.go‎
Lines changed: 11 additions & 0 deletions
@@ -61,6 +61,17 @@ mcpproxy doctor                     # Run health checks
 
 See [docs/cli-management-commands.md](docs/cli-management-commands.md) for complete reference.
 
+### Hook Integration CLI (Spec 027)
+```bash
+mcpproxy hook install --agent claude-code          # Install hooks (project scope)
+mcpproxy hook install --agent claude-code --scope user  # Install hooks (user scope)
+mcpproxy hook uninstall --agent claude-code         # Remove hooks
+mcpproxy hook status --agent claude-code            # Check hook installation status
+mcpproxy hook evaluate --event PreToolUse           # Evaluate tool call (reads JSON from stdin)
+```
+
+Hooks enable full data flow security by intercepting agent-internal tool calls (Read, Write, Bash, etc.) that the MCP proxy cannot see directly.
+
 ### Activity Log CLI
 ```bash
 mcpproxy activity list              # List recent activity
@@ -105,6 +116,7 @@ See [docs/cli-output-formatting.md](docs/cli-output-formatting.md) for complete
 | `internal/storage/` | BBolt database |
 | `internal/management/` | Centralized server management |
 | `internal/oauth/` | OAuth 2.1 with PKCE |
+| `internal/security/flow/` | Data flow security: classification, tracking, policy |
 | `internal/logs/` | Structured logging with per-server files |
 
 See [docs/architecture.md](docs/architecture.md) for diagrams and details.
@@ -194,6 +206,7 @@ See [docs/configuration.md](docs/configuration.md) for complete reference.
 | `POST /api/v1/servers/{name}/enable` | Enable/disable server |
 | `POST /api/v1/servers/{name}/quarantine` | Quarantine/unquarantine server |
 | `GET /api/v1/tools` | Search tools across servers |
+| `POST /api/v1/hooks/evaluate` | Evaluate tool call for data flow security |
 | `GET /api/v1/activity` | List activity records with filtering |
 | `GET /api/v1/activity/{id}` | Get activity record details |
 | `GET /api/v1/activity/export` | Export activity records (JSON/CSV) |
@@ -379,6 +392,63 @@ mcpproxy activity export --sensitive-data --output audit.jsonl  # Export for com
 
 See [docs/features/sensitive-data-detection.md](docs/features/sensitive-data-detection.md) for complete reference.
 
+## Data Flow Security (Spec 027)
+
+Detects data exfiltration patterns by tracking how data flows between internal tools (Read, databases) and external tools (WebFetch, Slack). Operates in two modes:
+
+- **Proxy-only mode**: Monitors MCP tool calls through the proxy (universal, any agent)
+- **Full mode**: Also intercepts agent-internal tools via hooks (requires hook installation)
+
+### Key Concepts
+
+- **Classification**: Tools/servers classified as internal, external, hybrid, or unknown
+- **Flow Types**: internal→internal (safe), internal→external (critical), external→internal, external→external
+- **Content Hashing**: SHA256 per-field hashing to detect data movement without storing content
+- **Session Correlation**: Links agent hook sessions to MCP proxy sessions via argument hash matching
+
+### Configuration
+
+```json
+{
+  "security": {
+    "flow_tracking": {
+      "enabled": true,
+      "session_timeout_minutes": 30,
+      "max_origins_per_session": 10000,
+      "hash_min_length": 20
+    },
+    "classification": {
+      "server_overrides": {
+        "my-private-slack": "internal"
+      }
+    },
+    "flow_policy": {
+      "internal_to_external": "ask",
+      "sensitive_data_external": "deny",
+      "suspicious_endpoints": ["pastebin.com", "webhook.site"]
+    },
+    "hooks": {
+      "enabled": true,
+      "fail_open": true,
+      "correlation_ttl_seconds": 5
+    }
+  }
+}
+```
+
+### Key Files
+
+| File | Purpose |
+|------|---------|
+| `internal/security/flow/classifier.go` | Server/tool classification (internal/external) |
+| `internal/security/flow/tracker.go` | Flow session and origin tracking |
+| `internal/security/flow/hasher.go` | Content hashing for flow detection |
+| `internal/security/flow/service.go` | Flow service orchestrator |
+| `internal/security/flow/correlator.go` | Session correlation (hook↔MCP) |
+| `internal/security/flow/policy.go` | Policy evaluation engine |
+| `internal/httpapi/hooks.go` | POST /api/v1/hooks/evaluate endpoint |
+| `cmd/mcpproxy/hook_cmd.go` | Hook CLI commands (install/uninstall/status/evaluate) |
+
 ### Exit Codes
 
 | Code | Meaning |
@@ -471,6 +541,8 @@ See `docs/prerelease-builds.md` for download instructions.
 - BBolt database (`~/.mcpproxy/config.db`) - ActivityRecord model (024-expand-activity-log)
 - Go 1.24 (toolchain go1.24.10) + BBolt (storage), Chi router (HTTP), Zap (logging), regexp (stdlib), existing ActivityService (026-pii-detection)
 - BBolt database (`~/.mcpproxy/config.db`) - ActivityRecord.Metadata extension (026-pii-detection)
+- Go 1.24 (toolchain go1.24.10) + BBolt (storage), Chi router (HTTP), Zap (logging), mcp-go (MCP protocol), regexp (stdlib), crypto/sha256 (stdlib), existing `security.Detector` (027-data-flow-security)
+- BBolt database (`~/.mcpproxy/config.db`) - ActivityRecord.Metadata extension for hook_evaluation type. Flow sessions are in-memory only (not persisted). (027-data-flow-security)
 
 ## Recent Changes
 - 001-update-version-display: Added Go 1.24 (toolchain go1.24.10)
@@ -42,6 +42,8 @@ var (
 	activityNoIcons       bool   // Disable emoji icons in output
 	activityDetectionType string // Spec 026: Filter by detection type (e.g., "aws_access_key")
 	activitySeverity      string // Spec 026: Filter by severity level (critical, high, medium, low)
+	activityFlowType      string // Spec 027: Filter by flow type (e.g., "internal_to_external")
+	activityRiskLevel     string // Spec 027: Filter by risk level (e.g., "critical", "high")
 
 	// Show command flags
 	activityIncludeResponse bool
@@ -72,6 +74,8 @@ type ActivityFilter struct {
 	SensitiveData *bool  // Spec 026: Filter by sensitive data detection
 	DetectionType string // Spec 026: Filter by detection type
 	Severity      string // Spec 026: Filter by severity level
+	FlowType      string // Spec 027: Filter by flow type
+	RiskLevel     string // Spec 027: Filter by risk level
 }
 
 // Validate validates the filter options
@@ -81,6 +85,8 @@ func (f *ActivityFilter) Validate() error {
 		validTypes := []string{
 			"tool_call", "policy_decision", "quarantine_change", "server_change",
 			"system_start", "system_stop", "internal_tool_call", "config_change", // Spec 024: new types
+			"hook_evaluation", // Spec 027: hook evaluation events
+			"flow_summary",    // Spec 027: flow session summaries
 		}
 		// Split by comma for multi-type support
 		types := strings.Split(f.Type, ",")
@@ -144,6 +150,36 @@ func (f *ActivityFilter) Validate() error {
 		}
 	}
 
+	// Validate flow_type (Spec 027)
+	if f.FlowType != "" {
+		validFlowTypes := []string{"internal_to_internal", "internal_to_external", "external_to_internal", "external_to_external"}
+		valid := false
+		for _, ft := range validFlowTypes {
+			if f.FlowType == ft {
+				valid = true
+				break
+			}
+		}
+		if !valid {
+			return fmt.Errorf("invalid flow-type '%s': must be one of %v", f.FlowType, validFlowTypes)
+		}
+	}
+
+	// Validate risk_level (Spec 027)
+	if f.RiskLevel != "" {
+		validRiskLevels := []string{"none", "low", "medium", "high", "critical"}
+		valid := false
+		for _, rl := range validRiskLevels {
+			if f.RiskLevel == rl {
+				valid = true
+				break
+			}
+		}
+		if !valid {
+			return fmt.Errorf("invalid risk-level '%s': must be one of %v", f.RiskLevel, validRiskLevels)
+		}
+	}
+
 	// Validate time formats
 	if f.StartTime != "" {
 		if _, err := time.Parse(time.RFC3339, f.StartTime); err != nil {
@@ -213,6 +249,13 @@ func (f *ActivityFilter) ToQueryParams() url.Values {
 	if f.Severity != "" {
 		q.Set("severity", f.Severity)
 	}
+	// Spec 027: Add data flow security filters
+	if f.FlowType != "" {
+		q.Set("flow_type", f.FlowType)
+	}
+	if f.RiskLevel != "" {
+		q.Set("risk_level", f.RiskLevel)
+	}
 	return q
 }
 
@@ -706,7 +749,7 @@ func init() {
 	activityCmd.AddCommand(activityExportCmd)
 
 	// List command flags
-	activityListCmd.Flags().StringVarP(&activityType, "type", "t", "", "Filter by type (comma-separated for multiple): tool_call, system_start, system_stop, internal_tool_call, config_change, policy_decision, quarantine_change, server_change")
+	activityListCmd.Flags().StringVarP(&activityType, "type", "t", "", "Filter by type (comma-separated for multiple): tool_call, system_start, system_stop, internal_tool_call, config_change, policy_decision, quarantine_change, server_change, hook_evaluation, flow_summary")
 	activityListCmd.Flags().StringVarP(&activityServer, "server", "s", "", "Filter by server name")
 	activityListCmd.Flags().StringVar(&activityTool, "tool", "", "Filter by tool name")
 	activityListCmd.Flags().StringVar(&activityStatus, "status", "", "Filter by status: success, error, blocked")
@@ -722,6 +765,9 @@ func init() {
 	activityListCmd.Flags().Bool("sensitive-data", false, "Filter to show only activities with sensitive data detected")
 	activityListCmd.Flags().StringVar(&activityDetectionType, "detection-type", "", "Filter by detection type (e.g., aws_access_key, stripe_key)")
 	activityListCmd.Flags().StringVar(&activitySeverity, "severity", "", "Filter by severity level: critical, high, medium, low")
+	// Spec 027: Data flow security filters
+	activityListCmd.Flags().StringVar(&activityFlowType, "flow-type", "", "Filter by data flow type: internal_to_internal, internal_to_external, external_to_internal, external_to_external")
+	activityListCmd.Flags().StringVar(&activityRiskLevel, "risk-level", "", "Filter by risk level (>= comparison): none, low, medium, high, critical")
 
 	// Watch command flags
 	activityWatchCmd.Flags().StringVarP(&activityType, "type", "t", "", "Filter by type (comma-separated): tool_call, system_start, system_stop, internal_tool_call, config_change, policy_decision, quarantine_change, server_change")
@@ -816,6 +862,8 @@ func runActivityList(cmd *cobra.Command, _ []string) error {
 		SensitiveData: sensitiveDataPtr,
 		DetectionType: activityDetectionType,
 		Severity:      activitySeverity,
+		FlowType:      activityFlowType,
+		RiskLevel:     activityRiskLevel,
 	}
 
 	if err := filter.Validate(); err != nil {
 
@@ -417,6 +417,17 @@ func displaySecurityFeaturesStatus() {
 		fmt.Println("  ✗ Sensitive Data Detection: disabled")
 		fmt.Println("    Enable: set sensitive_data_detection.enabled = true in config")
 	}
+
+	// Data Flow Security status (Spec 027)
+	secCfg := cfg.GetSecurityConfig()
+	if secCfg.IsFlowTrackingEnabled() {
+		fmt.Println("  ✓ Data Flow Security: enabled")
+		fmt.Println("    Coverage: proxy_only (hooks not installed)")
+		fmt.Println("    Upgrade:  mcpproxy hook install --agent claude-code")
+	} else {
+		fmt.Println("  ✗ Data Flow Security: disabled")
+		fmt.Println("    Enable: set security.flow_tracking.enabled = true in config")
+	}
 }
 
 // formatCategoryList formats a list of categories for display, truncating if too long.