Skip to content

Commit 9654fe6

Browse files
feat: fix Caddy connectivity issues and network configuration
- Added Admin API binding fix to listen on 0.0.0.0:2019 instead of localhost in Caddyfile - Fixed Docker network name resolution by adding explicit "hecate-net" name in docker-compose.yml - Enhanced network detection to support Docker Compose project name prefixes (e.g., hecate_hecate-net) - Added new --fix caddy command with dry-run option to correct configuration drift - Updated error messages to provide clearer guidance on network
1 parent 92cccd7 commit 9654fe6

6 files changed

Lines changed: 430 additions & 19 deletions

File tree

cmd/update/hecate.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ Examples:
6464
eos update hecate certs # Only renew certificates
6565
eos update hecate k3s # Update k3s deployment
6666
67+
# Fix Caddy configuration drift (Admin API binding + network name)
68+
eos update hecate --fix caddy # Apply both fixes and restart Caddy
69+
eos update hecate --fix caddy --dry-run # Preview fixes without applying
70+
6771
# Add BionicGPT (auto-detects port :8513 and enables SSO automatically)
6872
eos update hecate --add bionicgpt \
6973
--dns chat.codemonkey.ai \

pkg/hecate/add/fix.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ func FixService(rc *eos_io.RuntimeContext, opts *FixOptions) error {
4545
if !exists {
4646
return fmt.Errorf("service '%s' does not support drift correction\n\n"+
4747
"Supported services:\n"+
48-
" - bionicgpt\n\n"+
48+
" - bionicgpt\n"+
49+
" - caddy\n\n"+
4950
"To add support for other services, implement ServiceFixer interface",
5051
opts.Service)
5152
}

pkg/hecate/add/fix_caddy.go

Lines changed: 329 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,329 @@
1+
// pkg/hecate/add/fix_caddy.go - Drift correction for Caddy configuration
2+
//
3+
// P0 FIX: Connection Reset to Caddy Admin API
4+
// ROOT CAUSE: Two issues preventing host→container Admin API communication
5+
// 1. Network name mismatch: Docker Compose prefixes network names
6+
// 2. Admin API binding: Caddy binds to localhost:2019 (127.0.0.1 only) inside container
7+
//
8+
// SOLUTION: This fixer applies both fixes to existing deployments
9+
// 1. Updates docker-compose.yml with explicit network name
10+
// 2. Updates Caddyfile with admin 0.0.0.0:2019 binding
11+
// 3. Restarts Caddy container to apply changes
12+
//
13+
// ARCHITECTURE: "Shift Left" - Template fixes prevent issue in new deployments
14+
// This fixer corrects existing deployments
15+
//
16+
// EVIDENCE: User diagnostic showed connection refused to 172.21.0.3:2019
17+
// Network named "hecate_hecate-net" instead of "hecate-net"
18+
19+
package add
20+
21+
import (
22+
"fmt"
23+
"os"
24+
"path/filepath"
25+
"strings"
26+
27+
"github.com/CodeMonkeyCybersecurity/eos/pkg/eos_io"
28+
"github.com/CodeMonkeyCybersecurity/eos/pkg/execute"
29+
"github.com/CodeMonkeyCybersecurity/eos/pkg/hecate"
30+
"github.com/uptrace/opentelemetry-go-extra/otelzap"
31+
"go.uber.org/zap"
32+
)
33+
34+
// CaddyFixer implements ServiceFixer for Caddy configuration drift correction
35+
type CaddyFixer struct{}
36+
37+
// init registers the Caddy fixer
38+
func init() {
39+
RegisterServiceFixer("caddy", func() ServiceFixer {
40+
return &CaddyFixer{}
41+
})
42+
}
43+
44+
// Fix corrects Caddy configuration drift
45+
// PATTERN: Assess → Intervene → Evaluate
46+
func (f *CaddyFixer) Fix(rc *eos_io.RuntimeContext, opts *FixOptions) error {
47+
logger := otelzap.Ctx(rc.Ctx)
48+
49+
logger.Info("Starting Caddy configuration drift correction",
50+
zap.Bool("dry_run", opts.DryRun))
51+
52+
// ASSESS: Check current configuration state
53+
issues, err := f.assessCaddyConfig(rc)
54+
if err != nil {
55+
return fmt.Errorf("failed to assess Caddy configuration: %w", err)
56+
}
57+
58+
if len(issues) == 0 {
59+
logger.Info("✓ No Caddy configuration drift detected - all checks passed")
60+
return nil
61+
}
62+
63+
// Log detected issues
64+
logger.Warn("Detected Caddy configuration drift",
65+
zap.Int("issue_count", len(issues)))
66+
for i, issue := range issues {
67+
logger.Warn(fmt.Sprintf("Issue %d: %s", i+1, issue),
68+
zap.String("type", "drift"))
69+
}
70+
71+
if opts.DryRun {
72+
logger.Info("DRY RUN: Would fix the following issues:")
73+
for i, issue := range issues {
74+
logger.Info(fmt.Sprintf(" %d. %s", i+1, issue))
75+
}
76+
logger.Info("Run without --dry-run to apply fixes")
77+
return nil
78+
}
79+
80+
// INTERVENE: Apply fixes
81+
logger.Info("Applying Caddy configuration fixes")
82+
83+
if err := f.applyCaddyFixes(rc, issues); err != nil {
84+
return fmt.Errorf("failed to apply Caddy fixes: %w", err)
85+
}
86+
87+
// EVALUATE: Verify fixes
88+
logger.Info("Verifying Caddy configuration fixes")
89+
90+
remainingIssues, err := f.assessCaddyConfig(rc)
91+
if err != nil {
92+
return fmt.Errorf("failed to verify fixes: %w", err)
93+
}
94+
95+
if len(remainingIssues) > 0 {
96+
logger.Warn("Some issues remain after fixes",
97+
zap.Int("remaining_issues", len(remainingIssues)))
98+
for i, issue := range remainingIssues {
99+
logger.Warn(fmt.Sprintf("Remaining issue %d: %s", i+1, issue))
100+
}
101+
return fmt.Errorf("configuration drift correction incomplete - %d issues remain", len(remainingIssues))
102+
}
103+
104+
logger.Info("✓ Caddy configuration drift correction completed successfully")
105+
return nil
106+
}
107+
108+
// assessCaddyConfig checks for configuration drift
109+
func (f *CaddyFixer) assessCaddyConfig(rc *eos_io.RuntimeContext) ([]string, error) {
110+
logger := otelzap.Ctx(rc.Ctx)
111+
var issues []string
112+
113+
logger.Debug("Assessing Caddy configuration")
114+
115+
// Check 1: Caddyfile admin API binding
116+
caddyfilePath := filepath.Join(hecate.BaseDir, "Caddyfile")
117+
caddyfileContent, err := os.ReadFile(caddyfilePath)
118+
if err != nil {
119+
return nil, fmt.Errorf("failed to read Caddyfile: %w", err)
120+
}
121+
122+
if !strings.Contains(string(caddyfileContent), "admin 0.0.0.0:2019") {
123+
issues = append(issues, "Caddyfile missing 'admin 0.0.0.0:2019' binding (Admin API only accessible from localhost)")
124+
}
125+
126+
// Check 2: docker-compose.yml network configuration
127+
composeFilePath := filepath.Join(hecate.BaseDir, "docker-compose.yml")
128+
composeContent, err := os.ReadFile(composeFilePath)
129+
if err != nil {
130+
return nil, fmt.Errorf("failed to read docker-compose.yml: %w", err)
131+
}
132+
133+
if !strings.Contains(string(composeContent), "name: hecate-net") {
134+
issues = append(issues, "docker-compose.yml missing explicit 'name: hecate-net' (Docker Compose will prefix network name)")
135+
}
136+
137+
logger.Debug("Configuration assessment complete",
138+
zap.Int("issues_found", len(issues)))
139+
140+
return issues, nil
141+
}
142+
143+
// applyCaddyFixes applies configuration fixes
144+
func (f *CaddyFixer) applyCaddyFixes(rc *eos_io.RuntimeContext, issues []string) error {
145+
logger := otelzap.Ctx(rc.Ctx)
146+
147+
// Fix 1: Update Caddyfile
148+
if f.needsCaddyfileAdminFix(issues) {
149+
logger.Info("Fixing Caddyfile admin API binding")
150+
if err := f.fixCaddyfileAdmin(rc); err != nil {
151+
return fmt.Errorf("failed to fix Caddyfile: %w", err)
152+
}
153+
logger.Info("✓ Caddyfile admin API binding fixed")
154+
}
155+
156+
// Fix 2: Update docker-compose.yml
157+
if f.needsDockerComposeNetworkFix(issues) {
158+
logger.Info("Fixing docker-compose.yml network name")
159+
if err := f.fixDockerComposeNetwork(rc); err != nil {
160+
return fmt.Errorf("failed to fix docker-compose.yml: %w", err)
161+
}
162+
logger.Info("✓ docker-compose.yml network name fixed")
163+
}
164+
165+
// Fix 3: Restart Caddy container to apply changes
166+
logger.Info("Restarting Caddy container to apply configuration changes")
167+
if err := f.restartCaddyContainer(rc); err != nil {
168+
return fmt.Errorf("failed to restart Caddy: %w", err)
169+
}
170+
logger.Info("✓ Caddy container restarted successfully")
171+
172+
return nil
173+
}
174+
175+
// needsCaddyfileAdminFix checks if Caddyfile needs admin binding fix
176+
func (f *CaddyFixer) needsCaddyfileAdminFix(issues []string) bool {
177+
for _, issue := range issues {
178+
if strings.Contains(issue, "Caddyfile missing 'admin 0.0.0.0:2019'") {
179+
return true
180+
}
181+
}
182+
return false
183+
}
184+
185+
// needsDockerComposeNetworkFix checks if docker-compose.yml needs network name fix
186+
func (f *CaddyFixer) needsDockerComposeNetworkFix(issues []string) bool {
187+
for _, issue := range issues {
188+
if strings.Contains(issue, "docker-compose.yml missing explicit 'name: hecate-net'") {
189+
return true
190+
}
191+
}
192+
return false
193+
}
194+
195+
// fixCaddyfileAdmin adds admin 0.0.0.0:2019 to Caddyfile global block
196+
func (f *CaddyFixer) fixCaddyfileAdmin(rc *eos_io.RuntimeContext) error {
197+
logger := otelzap.Ctx(rc.Ctx)
198+
199+
caddyfilePath := filepath.Join(hecate.BaseDir, "Caddyfile")
200+
201+
// Read existing Caddyfile
202+
content, err := os.ReadFile(caddyfilePath)
203+
if err != nil {
204+
return fmt.Errorf("failed to read Caddyfile: %w", err)
205+
}
206+
207+
contentStr := string(content)
208+
209+
// Find global block opening
210+
globalBlockStart := strings.Index(contentStr, "{")
211+
if globalBlockStart == -1 {
212+
return fmt.Errorf("Caddyfile has no global block - cannot apply fix automatically")
213+
}
214+
215+
// Find line after opening brace
216+
lineAfterBrace := globalBlockStart + 1
217+
for lineAfterBrace < len(contentStr) && (contentStr[lineAfterBrace] == '\n' || contentStr[lineAfterBrace] == '\r') {
218+
lineAfterBrace++
219+
}
220+
221+
// Insert admin binding with documentation
222+
adminConfig := ` # Admin API binding (P0 FIX - Connection Reset)
223+
# ROOT CAUSE: Default binding (localhost:2019) only listens on 127.0.0.1 inside container
224+
# Host cannot connect to container's localhost → connection refused
225+
# SOLUTION: Bind to 0.0.0.0:2019 to listen on all interfaces including bridge network
226+
# SECURITY: Admin API still protected by Docker network isolation
227+
# Only accessible from: container localhost, host machine, same bridge network
228+
# NOT accessible from external networks (no port publish in docker-compose)
229+
# APPLIED: eos update hecate --fix caddy
230+
admin 0.0.0.0:2019
231+
232+
`
233+
234+
newContent := contentStr[:lineAfterBrace] + adminConfig + contentStr[lineAfterBrace:]
235+
236+
// Backup existing Caddyfile
237+
backupPath := caddyfilePath + ".backup"
238+
if err := os.WriteFile(backupPath, content, 0644); err != nil {
239+
logger.Warn("Failed to create Caddyfile backup", zap.Error(err))
240+
} else {
241+
logger.Debug("Created Caddyfile backup", zap.String("path", backupPath))
242+
}
243+
244+
// Write updated Caddyfile
245+
if err := os.WriteFile(caddyfilePath, []byte(newContent), 0644); err != nil {
246+
return fmt.Errorf("failed to write Caddyfile: %w", err)
247+
}
248+
249+
logger.Info("Updated Caddyfile with admin 0.0.0.0:2019 binding")
250+
251+
return nil
252+
}
253+
254+
// fixDockerComposeNetwork adds explicit network name to docker-compose.yml
255+
func (f *CaddyFixer) fixDockerComposeNetwork(rc *eos_io.RuntimeContext) error {
256+
logger := otelzap.Ctx(rc.Ctx)
257+
258+
composeFilePath := filepath.Join(hecate.BaseDir, "docker-compose.yml")
259+
260+
// Read existing docker-compose.yml
261+
content, err := os.ReadFile(composeFilePath)
262+
if err != nil {
263+
return fmt.Errorf("failed to read docker-compose.yml: %w", err)
264+
}
265+
266+
contentStr := string(content)
267+
268+
// Find networks section
269+
networksSection := "networks:\n hecate-net:"
270+
networksIdx := strings.Index(contentStr, networksSection)
271+
if networksIdx == -1 {
272+
return fmt.Errorf("docker-compose.yml has no 'networks: hecate-net:' section - cannot apply fix automatically")
273+
}
274+
275+
// Find end of hecate-net network definition (next top-level key or end of file)
276+
insertIdx := networksIdx + len(networksSection)
277+
278+
// Insert explicit network name with documentation
279+
networkConfig := `
280+
# P0 FIX - Network Name Mismatch
281+
# ROOT CAUSE: Docker Compose prefixes network names with project name
282+
# Without explicit name: "hecate_hecate-net" (project_network format)
283+
# With explicit name: "hecate-net" (exactly as specified)
284+
# SOLUTION: Set explicit name to prevent Docker Compose prefixing
285+
# RATIONALE: Docker SDK code expects "hecate-net", not "hecate_hecate-net"
286+
# APPLIED: eos update hecate --fix caddy
287+
name: hecate-net
288+
driver: bridge`
289+
290+
newContent := contentStr[:insertIdx] + networkConfig + contentStr[insertIdx:]
291+
292+
// Backup existing docker-compose.yml
293+
backupPath := composeFilePath + ".backup"
294+
if err := os.WriteFile(backupPath, content, 0644); err != nil {
295+
logger.Warn("Failed to create docker-compose.yml backup", zap.Error(err))
296+
} else {
297+
logger.Debug("Created docker-compose.yml backup", zap.String("path", backupPath))
298+
}
299+
300+
// Write updated docker-compose.yml
301+
if err := os.WriteFile(composeFilePath, []byte(newContent), 0644); err != nil {
302+
return fmt.Errorf("failed to write docker-compose.yml: %w", err)
303+
}
304+
305+
logger.Info("Updated docker-compose.yml with explicit network name")
306+
307+
return nil
308+
}
309+
310+
// restartCaddyContainer restarts the Caddy container to apply configuration changes
311+
func (f *CaddyFixer) restartCaddyContainer(rc *eos_io.RuntimeContext) error {
312+
logger := otelzap.Ctx(rc.Ctx)
313+
314+
// Use docker compose to restart Caddy service
315+
output, err := execute.Run(rc.Ctx, execute.Options{
316+
Command: "docker",
317+
Args: []string{"compose", "-f", filepath.Join(hecate.BaseDir, "docker-compose.yml"), "restart", "caddy"},
318+
Dir: hecate.BaseDir,
319+
Capture: true,
320+
})
321+
322+
if err != nil {
323+
return fmt.Errorf("docker compose restart failed: %w\nOutput: %s", err, output)
324+
}
325+
326+
logger.Debug("Docker compose restart output", zap.String("output", strings.TrimSpace(output)))
327+
328+
return nil
329+
}

0 commit comments

Comments
 (0)