|
| 1 | +// pkg/hecate/caddy_docker.go - Docker SDK integration for Caddy Admin API |
| 2 | +// |
| 3 | +// ARCHITECTURE: Solves "connection reset" issue by using Docker SDK |
| 4 | +// ROOT CAUSE: Caddy binds Admin API to 127.0.0.1 inside container (IPv4 only) |
| 5 | +// Host's `localhost` resolves to ::1 (IPv6) first → connection refused |
| 6 | +// SOLUTION: Use Docker SDK to get container's internal IP address on bridge network |
| 7 | +// Then connect directly to container IP, bypassing localhost resolution |
| 8 | +// |
| 9 | +// VENDOR EVIDENCE: |
| 10 | +// - Caddy Community: "Connection reset to Docker container usually means wrong bind address" |
| 11 | +// - Docker Docs: "Containers have their own network namespace, use bridge IP for host access" |
| 12 | +// - Go net: "localhost can resolve to IPv6 ::1 or IPv4 127.0.0.1 depending on OS" |
| 13 | +// |
| 14 | +// SECURITY: Docker SDK respects same security model as docker CLI |
| 15 | +// Only works if user has docker socket access (/var/run/docker.sock) |
| 16 | + |
| 17 | +package hecate |
| 18 | + |
| 19 | +import ( |
| 20 | + "context" |
| 21 | + "fmt" |
| 22 | + |
| 23 | + "github.com/CodeMonkeyCybersecurity/eos/pkg/eos_io" |
| 24 | + "github.com/docker/docker/api/types/container" |
| 25 | + "github.com/docker/docker/client" |
| 26 | + "github.com/uptrace/opentelemetry-go-extra/otelzap" |
| 27 | + "go.uber.org/zap" |
| 28 | +) |
| 29 | + |
| 30 | +// GetCaddyContainerIP retrieves the IP address of the Caddy container on the hecate-net bridge network |
| 31 | +// |
| 32 | +// RATIONALE: Caddy Admin API binds to 127.0.0.1 inside container, but that's not accessible from host |
| 33 | +// Docker SDK provides container's bridge network IP, which IS accessible from host |
| 34 | +// |
| 35 | +// ARCHITECTURE: |
| 36 | +// Host (Eos) → Docker Bridge (172.x.x.x) → Container (hecate-caddy) |
| 37 | +// Container has BOTH: |
| 38 | +// - Internal localhost (127.0.0.1) - only accessible inside container |
| 39 | +// - Bridge IP (172.x.x.x) - accessible from host and other containers |
| 40 | +// |
| 41 | +// SECURITY: Docker socket access required (/var/run/docker.sock) |
| 42 | +// Same permissions as `docker inspect hecate-caddy` |
| 43 | +// Safe: Read-only operation, no container modification |
| 44 | +// |
| 45 | +// RETURNS: |
| 46 | +// - Container's IP on hecate-net network (e.g., "172.21.0.5") |
| 47 | +// - Error if container not found, not running, or not on hecate-net |
| 48 | +func GetCaddyContainerIP(ctx context.Context) (string, error) { |
| 49 | + // Create Docker client from environment (respects DOCKER_HOST, DOCKER_CERT_PATH, etc.) |
| 50 | + // SECURITY: Uses same credentials as docker CLI |
| 51 | + // RATIONALE: Supports both local socket and remote Docker daemons |
| 52 | + dockerClient, err := client.NewClientWithOpts( |
| 53 | + client.FromEnv, |
| 54 | + client.WithAPIVersionNegotiation(), // Auto-negotiate API version (best practice) |
| 55 | + ) |
| 56 | + if err != nil { |
| 57 | + return "", fmt.Errorf("failed to create Docker client: %w\n\n"+ |
| 58 | + "Troubleshooting:\n"+ |
| 59 | + " 1. Docker installed? Run: docker --version\n"+ |
| 60 | + " 2. Docker running? Run: docker ps\n"+ |
| 61 | + " 3. Socket accessible? Run: ls -l /var/run/docker.sock\n"+ |
| 62 | + " 4. User in docker group? Run: groups | grep docker", err) |
| 63 | + } |
| 64 | + defer dockerClient.Close() |
| 65 | + |
| 66 | + // Inspect Caddy container to get network settings |
| 67 | + // ARCHITECTURE: ContainerInspect returns full container metadata including all networks |
| 68 | + containerInfo, err := dockerClient.ContainerInspect(ctx, CaddyContainerName) |
| 69 | + if err != nil { |
| 70 | + return "", fmt.Errorf("failed to inspect Caddy container '%s': %w\n\n"+ |
| 71 | + "Troubleshooting:\n"+ |
| 72 | + " 1. Container running? Run: docker ps -a | grep %s\n"+ |
| 73 | + " 2. Container name correct? Expected: %s\n"+ |
| 74 | + " 3. Start container: docker compose -f /opt/hecate/docker-compose.yml up -d caddy", |
| 75 | + CaddyContainerName, err, CaddyContainerName, CaddyContainerName) |
| 76 | + } |
| 77 | + |
| 78 | + // Verify container is actually running (not stopped/paused/restarting) |
| 79 | + // RATIONALE: Container could exist but not be running → IP would be invalid |
| 80 | + if !containerInfo.State.Running { |
| 81 | + return "", fmt.Errorf("Caddy container '%s' is not running (state: %s)\n\n"+ |
| 82 | + "Start the container:\n"+ |
| 83 | + " docker compose -f /opt/hecate/docker-compose.yml up -d caddy\n\n"+ |
| 84 | + "Check logs for errors:\n"+ |
| 85 | + " docker logs %s --tail 50", |
| 86 | + CaddyContainerName, containerInfo.State.Status, CaddyContainerName) |
| 87 | + } |
| 88 | + |
| 89 | + // Get IP from hecate-net bridge network |
| 90 | + // ARCHITECTURE: Docker Compose creates a custom bridge network named "hecate-net" |
| 91 | + // RATIONALE: Custom networks provide DNS, isolation, and predictable IPs |
| 92 | + // FALLBACK: If hecate-net doesn't exist, try "bridge" (default Docker network) |
| 93 | + networkName := "hecate-net" |
| 94 | + if network, ok := containerInfo.NetworkSettings.Networks[networkName]; ok { |
| 95 | + if network.IPAddress == "" { |
| 96 | + return "", fmt.Errorf("Caddy container on network '%s' but has no IP address\n\n"+ |
| 97 | + "This usually means the network is starting up.\n"+ |
| 98 | + "Wait 5 seconds and retry, or restart container:\n"+ |
| 99 | + " docker compose -f /opt/hecate/docker-compose.yml restart caddy", |
| 100 | + networkName) |
| 101 | + } |
| 102 | + return network.IPAddress, nil |
| 103 | + } |
| 104 | + |
| 105 | + // Fallback: Try default bridge network |
| 106 | + // RATIONALE: User might have modified docker-compose.yml to use default bridge |
| 107 | + if network, ok := containerInfo.NetworkSettings.Networks["bridge"]; ok { |
| 108 | + if network.IPAddress != "" { |
| 109 | + return network.IPAddress, nil |
| 110 | + } |
| 111 | + } |
| 112 | + |
| 113 | + // No suitable network found |
| 114 | + availableNetworks := make([]string, 0, len(containerInfo.NetworkSettings.Networks)) |
| 115 | + for name := range containerInfo.NetworkSettings.Networks { |
| 116 | + availableNetworks = append(availableNetworks, name) |
| 117 | + } |
| 118 | + |
| 119 | + return "", fmt.Errorf("Caddy container not connected to '%s' network\n\n"+ |
| 120 | + "Available networks: %v\n\n"+ |
| 121 | + "Fix docker-compose.yml:\n"+ |
| 122 | + " caddy:\n"+ |
| 123 | + " networks:\n"+ |
| 124 | + " - hecate-net\n\n"+ |
| 125 | + "Then recreate container:\n"+ |
| 126 | + " docker compose -f /opt/hecate/docker-compose.yml up -d --force-recreate caddy", |
| 127 | + networkName, availableNetworks) |
| 128 | +} |
| 129 | + |
| 130 | +// GetCaddyContainerIPWithLogging is a wrapper around GetCaddyContainerIP that adds structured logging |
| 131 | +// |
| 132 | +// RATIONALE: Observability - log Docker SDK operations for debugging |
| 133 | +// USAGE: Use this in production code, use GetCaddyContainerIP in tests |
| 134 | +func GetCaddyContainerIPWithLogging(rc *eos_io.RuntimeContext) (string, error) { |
| 135 | + logger := otelzap.Ctx(rc.Ctx) |
| 136 | + |
| 137 | + logger.Debug("Detecting Caddy container IP via Docker SDK", |
| 138 | + zap.String("container_name", CaddyContainerName), |
| 139 | + zap.String("expected_network", "hecate-net")) |
| 140 | + |
| 141 | + ip, err := GetCaddyContainerIP(rc.Ctx) |
| 142 | + if err != nil { |
| 143 | + logger.Error("Failed to detect Caddy container IP", |
| 144 | + zap.String("container_name", CaddyContainerName), |
| 145 | + zap.Error(err)) |
| 146 | + return "", err |
| 147 | + } |
| 148 | + |
| 149 | + logger.Info("✓ Caddy container IP detected via Docker SDK", |
| 150 | + zap.String("container_name", CaddyContainerName), |
| 151 | + zap.String("bridge_ip", ip), |
| 152 | + zap.String("admin_api_url", fmt.Sprintf("http://%s:%d", ip, CaddyAdminAPIPort))) |
| 153 | + |
| 154 | + return ip, nil |
| 155 | +} |
| 156 | + |
| 157 | +// IsCaddyContainerRunning checks if the Caddy container is running |
| 158 | +// |
| 159 | +// RATIONALE: Pre-flight check before attempting Admin API operations |
| 160 | +// RETURNS: true if container exists and is running, false otherwise |
| 161 | +// ERROR: Only returns error if Docker SDK fails, NOT if container is stopped |
| 162 | +func IsCaddyContainerRunning(ctx context.Context) (bool, error) { |
| 163 | + dockerClient, err := client.NewClientWithOpts(client.FromEnv, client.WithAPIVersionNegotiation()) |
| 164 | + if err != nil { |
| 165 | + return false, fmt.Errorf("failed to create Docker client: %w", err) |
| 166 | + } |
| 167 | + defer dockerClient.Close() |
| 168 | + |
| 169 | + containerInfo, err := dockerClient.ContainerInspect(ctx, CaddyContainerName) |
| 170 | + if err != nil { |
| 171 | + // Container not found is not an error - just return false |
| 172 | + if client.IsErrNotFound(err) { |
| 173 | + return false, nil |
| 174 | + } |
| 175 | + return false, fmt.Errorf("failed to inspect container: %w", err) |
| 176 | + } |
| 177 | + |
| 178 | + return containerInfo.State.Running, nil |
| 179 | +} |
| 180 | + |
| 181 | +// GetCaddyContainerLogs retrieves recent logs from Caddy container for debugging |
| 182 | +// |
| 183 | +// RATIONALE: When Admin API fails, logs often contain the root cause |
| 184 | +// RETURNS: Last N lines of logs as string |
| 185 | +// USAGE: Call this when Admin API operations fail to provide user with debugging context |
| 186 | +func GetCaddyContainerLogs(ctx context.Context, tailLines int) (string, error) { |
| 187 | + dockerClient, err := client.NewClientWithOpts(client.FromEnv, client.WithAPIVersionNegotiation()) |
| 188 | + if err != nil { |
| 189 | + return "", fmt.Errorf("failed to create Docker client: %w", err) |
| 190 | + } |
| 191 | + defer dockerClient.Close() |
| 192 | + |
| 193 | + // ContainerLogs options |
| 194 | + opts := container.LogsOptions{ |
| 195 | + ShowStdout: true, |
| 196 | + ShowStderr: true, |
| 197 | + Tail: fmt.Sprintf("%d", tailLines), |
| 198 | + Timestamps: true, |
| 199 | + } |
| 200 | + |
| 201 | + logs, err := dockerClient.ContainerLogs(ctx, CaddyContainerName, opts) |
| 202 | + if err != nil { |
| 203 | + return "", fmt.Errorf("failed to get container logs: %w", err) |
| 204 | + } |
| 205 | + defer logs.Close() |
| 206 | + |
| 207 | + // Read logs (Docker returns multiplexed stream, but for simple text we can read directly) |
| 208 | + buf := make([]byte, 4096) |
| 209 | + n, _ := logs.Read(buf) |
| 210 | + return string(buf[:n]), nil |
| 211 | +} |
0 commit comments