|
| 1 | +// pkg/hecate/add/fix_caddy.go - Drift correction for Caddy configuration |
| 2 | +// |
| 3 | +// P0 FIX: Connection Reset to Caddy Admin API |
| 4 | +// ROOT CAUSE: Two issues preventing host→container Admin API communication |
| 5 | +// 1. Network name mismatch: Docker Compose prefixes network names |
| 6 | +// 2. Admin API binding: Caddy binds to localhost:2019 (127.0.0.1 only) inside container |
| 7 | +// |
| 8 | +// SOLUTION: This fixer applies both fixes to existing deployments |
| 9 | +// 1. Updates docker-compose.yml with explicit network name |
| 10 | +// 2. Updates Caddyfile with admin 0.0.0.0:2019 binding |
| 11 | +// 3. Restarts Caddy container to apply changes |
| 12 | +// |
| 13 | +// ARCHITECTURE: "Shift Left" - Template fixes prevent issue in new deployments |
| 14 | +// This fixer corrects existing deployments |
| 15 | +// |
| 16 | +// EVIDENCE: User diagnostic showed connection refused to 172.21.0.3:2019 |
| 17 | +// Network named "hecate_hecate-net" instead of "hecate-net" |
| 18 | + |
| 19 | +package add |
| 20 | + |
| 21 | +import ( |
| 22 | + "fmt" |
| 23 | + "os" |
| 24 | + "path/filepath" |
| 25 | + "strings" |
| 26 | + |
| 27 | + "github.com/CodeMonkeyCybersecurity/eos/pkg/eos_io" |
| 28 | + "github.com/CodeMonkeyCybersecurity/eos/pkg/execute" |
| 29 | + "github.com/CodeMonkeyCybersecurity/eos/pkg/hecate" |
| 30 | + "github.com/uptrace/opentelemetry-go-extra/otelzap" |
| 31 | + "go.uber.org/zap" |
| 32 | +) |
| 33 | + |
| 34 | +// CaddyFixer implements ServiceFixer for Caddy configuration drift correction |
| 35 | +type CaddyFixer struct{} |
| 36 | + |
| 37 | +// init registers the Caddy fixer |
| 38 | +func init() { |
| 39 | + RegisterServiceFixer("caddy", func() ServiceFixer { |
| 40 | + return &CaddyFixer{} |
| 41 | + }) |
| 42 | +} |
| 43 | + |
| 44 | +// Fix corrects Caddy configuration drift |
| 45 | +// PATTERN: Assess → Intervene → Evaluate |
| 46 | +func (f *CaddyFixer) Fix(rc *eos_io.RuntimeContext, opts *FixOptions) error { |
| 47 | + logger := otelzap.Ctx(rc.Ctx) |
| 48 | + |
| 49 | + logger.Info("Starting Caddy configuration drift correction", |
| 50 | + zap.Bool("dry_run", opts.DryRun)) |
| 51 | + |
| 52 | + // ASSESS: Check current configuration state |
| 53 | + issues, err := f.assessCaddyConfig(rc) |
| 54 | + if err != nil { |
| 55 | + return fmt.Errorf("failed to assess Caddy configuration: %w", err) |
| 56 | + } |
| 57 | + |
| 58 | + if len(issues) == 0 { |
| 59 | + logger.Info("✓ No Caddy configuration drift detected - all checks passed") |
| 60 | + return nil |
| 61 | + } |
| 62 | + |
| 63 | + // Log detected issues |
| 64 | + logger.Warn("Detected Caddy configuration drift", |
| 65 | + zap.Int("issue_count", len(issues))) |
| 66 | + for i, issue := range issues { |
| 67 | + logger.Warn(fmt.Sprintf("Issue %d: %s", i+1, issue), |
| 68 | + zap.String("type", "drift")) |
| 69 | + } |
| 70 | + |
| 71 | + if opts.DryRun { |
| 72 | + logger.Info("DRY RUN: Would fix the following issues:") |
| 73 | + for i, issue := range issues { |
| 74 | + logger.Info(fmt.Sprintf(" %d. %s", i+1, issue)) |
| 75 | + } |
| 76 | + logger.Info("Run without --dry-run to apply fixes") |
| 77 | + return nil |
| 78 | + } |
| 79 | + |
| 80 | + // INTERVENE: Apply fixes |
| 81 | + logger.Info("Applying Caddy configuration fixes") |
| 82 | + |
| 83 | + if err := f.applyCaddyFixes(rc, issues); err != nil { |
| 84 | + return fmt.Errorf("failed to apply Caddy fixes: %w", err) |
| 85 | + } |
| 86 | + |
| 87 | + // EVALUATE: Verify fixes |
| 88 | + logger.Info("Verifying Caddy configuration fixes") |
| 89 | + |
| 90 | + remainingIssues, err := f.assessCaddyConfig(rc) |
| 91 | + if err != nil { |
| 92 | + return fmt.Errorf("failed to verify fixes: %w", err) |
| 93 | + } |
| 94 | + |
| 95 | + if len(remainingIssues) > 0 { |
| 96 | + logger.Warn("Some issues remain after fixes", |
| 97 | + zap.Int("remaining_issues", len(remainingIssues))) |
| 98 | + for i, issue := range remainingIssues { |
| 99 | + logger.Warn(fmt.Sprintf("Remaining issue %d: %s", i+1, issue)) |
| 100 | + } |
| 101 | + return fmt.Errorf("configuration drift correction incomplete - %d issues remain", len(remainingIssues)) |
| 102 | + } |
| 103 | + |
| 104 | + logger.Info("✓ Caddy configuration drift correction completed successfully") |
| 105 | + return nil |
| 106 | +} |
| 107 | + |
| 108 | +// assessCaddyConfig checks for configuration drift |
| 109 | +func (f *CaddyFixer) assessCaddyConfig(rc *eos_io.RuntimeContext) ([]string, error) { |
| 110 | + logger := otelzap.Ctx(rc.Ctx) |
| 111 | + var issues []string |
| 112 | + |
| 113 | + logger.Debug("Assessing Caddy configuration") |
| 114 | + |
| 115 | + // Check 1: Caddyfile admin API binding |
| 116 | + caddyfilePath := filepath.Join(hecate.BaseDir, "Caddyfile") |
| 117 | + caddyfileContent, err := os.ReadFile(caddyfilePath) |
| 118 | + if err != nil { |
| 119 | + return nil, fmt.Errorf("failed to read Caddyfile: %w", err) |
| 120 | + } |
| 121 | + |
| 122 | + if !strings.Contains(string(caddyfileContent), "admin 0.0.0.0:2019") { |
| 123 | + issues = append(issues, "Caddyfile missing 'admin 0.0.0.0:2019' binding (Admin API only accessible from localhost)") |
| 124 | + } |
| 125 | + |
| 126 | + // Check 2: docker-compose.yml network configuration |
| 127 | + composeFilePath := filepath.Join(hecate.BaseDir, "docker-compose.yml") |
| 128 | + composeContent, err := os.ReadFile(composeFilePath) |
| 129 | + if err != nil { |
| 130 | + return nil, fmt.Errorf("failed to read docker-compose.yml: %w", err) |
| 131 | + } |
| 132 | + |
| 133 | + if !strings.Contains(string(composeContent), "name: hecate-net") { |
| 134 | + issues = append(issues, "docker-compose.yml missing explicit 'name: hecate-net' (Docker Compose will prefix network name)") |
| 135 | + } |
| 136 | + |
| 137 | + logger.Debug("Configuration assessment complete", |
| 138 | + zap.Int("issues_found", len(issues))) |
| 139 | + |
| 140 | + return issues, nil |
| 141 | +} |
| 142 | + |
| 143 | +// applyCaddyFixes applies configuration fixes |
| 144 | +func (f *CaddyFixer) applyCaddyFixes(rc *eos_io.RuntimeContext, issues []string) error { |
| 145 | + logger := otelzap.Ctx(rc.Ctx) |
| 146 | + |
| 147 | + // Fix 1: Update Caddyfile |
| 148 | + if f.needsCaddyfileAdminFix(issues) { |
| 149 | + logger.Info("Fixing Caddyfile admin API binding") |
| 150 | + if err := f.fixCaddyfileAdmin(rc); err != nil { |
| 151 | + return fmt.Errorf("failed to fix Caddyfile: %w", err) |
| 152 | + } |
| 153 | + logger.Info("✓ Caddyfile admin API binding fixed") |
| 154 | + } |
| 155 | + |
| 156 | + // Fix 2: Update docker-compose.yml |
| 157 | + if f.needsDockerComposeNetworkFix(issues) { |
| 158 | + logger.Info("Fixing docker-compose.yml network name") |
| 159 | + if err := f.fixDockerComposeNetwork(rc); err != nil { |
| 160 | + return fmt.Errorf("failed to fix docker-compose.yml: %w", err) |
| 161 | + } |
| 162 | + logger.Info("✓ docker-compose.yml network name fixed") |
| 163 | + } |
| 164 | + |
| 165 | + // Fix 3: Restart Caddy container to apply changes |
| 166 | + logger.Info("Restarting Caddy container to apply configuration changes") |
| 167 | + if err := f.restartCaddyContainer(rc); err != nil { |
| 168 | + return fmt.Errorf("failed to restart Caddy: %w", err) |
| 169 | + } |
| 170 | + logger.Info("✓ Caddy container restarted successfully") |
| 171 | + |
| 172 | + return nil |
| 173 | +} |
| 174 | + |
| 175 | +// needsCaddyfileAdminFix checks if Caddyfile needs admin binding fix |
| 176 | +func (f *CaddyFixer) needsCaddyfileAdminFix(issues []string) bool { |
| 177 | + for _, issue := range issues { |
| 178 | + if strings.Contains(issue, "Caddyfile missing 'admin 0.0.0.0:2019'") { |
| 179 | + return true |
| 180 | + } |
| 181 | + } |
| 182 | + return false |
| 183 | +} |
| 184 | + |
| 185 | +// needsDockerComposeNetworkFix checks if docker-compose.yml needs network name fix |
| 186 | +func (f *CaddyFixer) needsDockerComposeNetworkFix(issues []string) bool { |
| 187 | + for _, issue := range issues { |
| 188 | + if strings.Contains(issue, "docker-compose.yml missing explicit 'name: hecate-net'") { |
| 189 | + return true |
| 190 | + } |
| 191 | + } |
| 192 | + return false |
| 193 | +} |
| 194 | + |
| 195 | +// fixCaddyfileAdmin adds admin 0.0.0.0:2019 to Caddyfile global block |
| 196 | +func (f *CaddyFixer) fixCaddyfileAdmin(rc *eos_io.RuntimeContext) error { |
| 197 | + logger := otelzap.Ctx(rc.Ctx) |
| 198 | + |
| 199 | + caddyfilePath := filepath.Join(hecate.BaseDir, "Caddyfile") |
| 200 | + |
| 201 | + // Read existing Caddyfile |
| 202 | + content, err := os.ReadFile(caddyfilePath) |
| 203 | + if err != nil { |
| 204 | + return fmt.Errorf("failed to read Caddyfile: %w", err) |
| 205 | + } |
| 206 | + |
| 207 | + contentStr := string(content) |
| 208 | + |
| 209 | + // Find global block opening |
| 210 | + globalBlockStart := strings.Index(contentStr, "{") |
| 211 | + if globalBlockStart == -1 { |
| 212 | + return fmt.Errorf("Caddyfile has no global block - cannot apply fix automatically") |
| 213 | + } |
| 214 | + |
| 215 | + // Find line after opening brace |
| 216 | + lineAfterBrace := globalBlockStart + 1 |
| 217 | + for lineAfterBrace < len(contentStr) && (contentStr[lineAfterBrace] == '\n' || contentStr[lineAfterBrace] == '\r') { |
| 218 | + lineAfterBrace++ |
| 219 | + } |
| 220 | + |
| 221 | + // Insert admin binding with documentation |
| 222 | + adminConfig := ` # Admin API binding (P0 FIX - Connection Reset) |
| 223 | + # ROOT CAUSE: Default binding (localhost:2019) only listens on 127.0.0.1 inside container |
| 224 | + # Host cannot connect to container's localhost → connection refused |
| 225 | + # SOLUTION: Bind to 0.0.0.0:2019 to listen on all interfaces including bridge network |
| 226 | + # SECURITY: Admin API still protected by Docker network isolation |
| 227 | + # Only accessible from: container localhost, host machine, same bridge network |
| 228 | + # NOT accessible from external networks (no port publish in docker-compose) |
| 229 | + # APPLIED: eos update hecate --fix caddy |
| 230 | + admin 0.0.0.0:2019 |
| 231 | +
|
| 232 | +` |
| 233 | + |
| 234 | + newContent := contentStr[:lineAfterBrace] + adminConfig + contentStr[lineAfterBrace:] |
| 235 | + |
| 236 | + // Backup existing Caddyfile |
| 237 | + backupPath := caddyfilePath + ".backup" |
| 238 | + if err := os.WriteFile(backupPath, content, 0644); err != nil { |
| 239 | + logger.Warn("Failed to create Caddyfile backup", zap.Error(err)) |
| 240 | + } else { |
| 241 | + logger.Debug("Created Caddyfile backup", zap.String("path", backupPath)) |
| 242 | + } |
| 243 | + |
| 244 | + // Write updated Caddyfile |
| 245 | + if err := os.WriteFile(caddyfilePath, []byte(newContent), 0644); err != nil { |
| 246 | + return fmt.Errorf("failed to write Caddyfile: %w", err) |
| 247 | + } |
| 248 | + |
| 249 | + logger.Info("Updated Caddyfile with admin 0.0.0.0:2019 binding") |
| 250 | + |
| 251 | + return nil |
| 252 | +} |
| 253 | + |
| 254 | +// fixDockerComposeNetwork adds explicit network name to docker-compose.yml |
| 255 | +func (f *CaddyFixer) fixDockerComposeNetwork(rc *eos_io.RuntimeContext) error { |
| 256 | + logger := otelzap.Ctx(rc.Ctx) |
| 257 | + |
| 258 | + composeFilePath := filepath.Join(hecate.BaseDir, "docker-compose.yml") |
| 259 | + |
| 260 | + // Read existing docker-compose.yml |
| 261 | + content, err := os.ReadFile(composeFilePath) |
| 262 | + if err != nil { |
| 263 | + return fmt.Errorf("failed to read docker-compose.yml: %w", err) |
| 264 | + } |
| 265 | + |
| 266 | + contentStr := string(content) |
| 267 | + |
| 268 | + // Find networks section |
| 269 | + networksSection := "networks:\n hecate-net:" |
| 270 | + networksIdx := strings.Index(contentStr, networksSection) |
| 271 | + if networksIdx == -1 { |
| 272 | + return fmt.Errorf("docker-compose.yml has no 'networks: hecate-net:' section - cannot apply fix automatically") |
| 273 | + } |
| 274 | + |
| 275 | + // Find end of hecate-net network definition (next top-level key or end of file) |
| 276 | + insertIdx := networksIdx + len(networksSection) |
| 277 | + |
| 278 | + // Insert explicit network name with documentation |
| 279 | + networkConfig := ` |
| 280 | + # P0 FIX - Network Name Mismatch |
| 281 | + # ROOT CAUSE: Docker Compose prefixes network names with project name |
| 282 | + # Without explicit name: "hecate_hecate-net" (project_network format) |
| 283 | + # With explicit name: "hecate-net" (exactly as specified) |
| 284 | + # SOLUTION: Set explicit name to prevent Docker Compose prefixing |
| 285 | + # RATIONALE: Docker SDK code expects "hecate-net", not "hecate_hecate-net" |
| 286 | + # APPLIED: eos update hecate --fix caddy |
| 287 | + name: hecate-net |
| 288 | + driver: bridge` |
| 289 | + |
| 290 | + newContent := contentStr[:insertIdx] + networkConfig + contentStr[insertIdx:] |
| 291 | + |
| 292 | + // Backup existing docker-compose.yml |
| 293 | + backupPath := composeFilePath + ".backup" |
| 294 | + if err := os.WriteFile(backupPath, content, 0644); err != nil { |
| 295 | + logger.Warn("Failed to create docker-compose.yml backup", zap.Error(err)) |
| 296 | + } else { |
| 297 | + logger.Debug("Created docker-compose.yml backup", zap.String("path", backupPath)) |
| 298 | + } |
| 299 | + |
| 300 | + // Write updated docker-compose.yml |
| 301 | + if err := os.WriteFile(composeFilePath, []byte(newContent), 0644); err != nil { |
| 302 | + return fmt.Errorf("failed to write docker-compose.yml: %w", err) |
| 303 | + } |
| 304 | + |
| 305 | + logger.Info("Updated docker-compose.yml with explicit network name") |
| 306 | + |
| 307 | + return nil |
| 308 | +} |
| 309 | + |
| 310 | +// restartCaddyContainer restarts the Caddy container to apply configuration changes |
| 311 | +func (f *CaddyFixer) restartCaddyContainer(rc *eos_io.RuntimeContext) error { |
| 312 | + logger := otelzap.Ctx(rc.Ctx) |
| 313 | + |
| 314 | + // Use docker compose to restart Caddy service |
| 315 | + output, err := execute.Run(rc.Ctx, execute.Options{ |
| 316 | + Command: "docker", |
| 317 | + Args: []string{"compose", "-f", filepath.Join(hecate.BaseDir, "docker-compose.yml"), "restart", "caddy"}, |
| 318 | + Dir: hecate.BaseDir, |
| 319 | + Capture: true, |
| 320 | + }) |
| 321 | + |
| 322 | + if err != nil { |
| 323 | + return fmt.Errorf("docker compose restart failed: %w\nOutput: %s", err, output) |
| 324 | + } |
| 325 | + |
| 326 | + logger.Debug("Docker compose restart output", zap.String("output", strings.TrimSpace(output))) |
| 327 | + |
| 328 | + return nil |
| 329 | +} |
0 commit comments