diff --git a/cmd/update/moni.go b/cmd/update/moni.go
index cafe03c1..4173327a 100644
--- a/cmd/update/moni.go
+++ b/cmd/update/moni.go
@@ -12,6 +12,7 @@ import (
"github.com/CodeMonkeyCybersecurity/eos/pkg/bionicgpt/refresh"
eos "github.com/CodeMonkeyCybersecurity/eos/pkg/eos_cli"
"github.com/CodeMonkeyCybersecurity/eos/pkg/eos_io"
+ "github.com/CodeMonkeyCybersecurity/eos/pkg/moni"
"github.com/spf13/cobra"
"github.com/uptrace/opentelemetry-go-extra/otelzap"
"go.uber.org/zap"
@@ -28,6 +29,22 @@ var (
moniPostInstall bool
moniRotateAPIKeys bool
moniInstallDir string
+
+ // Moni init (worker) flags
+ moniInit bool
+ moniSkipSSL bool
+ moniSkipDatabase bool
+ moniSkipSecurity bool
+ moniSkipVerification bool
+ moniValidateCerts bool
+ moniFixCerts bool
+ moniVerifyDB bool
+ moniVerifyRLS bool
+ moniVerifyCSP bool
+ moniVerifySecurity bool
+ moniCleanupBackups bool
+ moniWorkDir string
+ moniForce bool
)
// MoniCmd is the command for Moni (BionicGPT) operations
@@ -112,6 +129,44 @@ Examples:
MoniCmd.Flags().StringVar(&moniInstallDir, "install-dir", "/opt/bionicgpt",
"Path to Moni installation directory")
+ // Moni init (worker) flags - full initialization
+ MoniCmd.Flags().BoolVar(&moniInit, "init", false,
+ "Run full Moni initialization (SSL, database, security)")
+
+ // Phase control flags
+ MoniCmd.Flags().BoolVar(&moniSkipSSL, "skip-ssl", false,
+ "Skip SSL certificate generation")
+ MoniCmd.Flags().BoolVar(&moniSkipDatabase, "skip-database", false,
+ "Skip database configuration")
+ MoniCmd.Flags().BoolVar(&moniSkipSecurity, "skip-security", false,
+ "Skip security hardening")
+ MoniCmd.Flags().BoolVar(&moniSkipVerification, "skip-verification", false,
+ "Skip security verification")
+
+ // Targeted action flags
+ MoniCmd.Flags().BoolVar(&moniValidateCerts, "validate-certs", false,
+ "Validate SSL certificate readability")
+ MoniCmd.Flags().BoolVar(&moniFixCerts, "fix-certs", false,
+ "Fix SSL certificate permissions")
+ MoniCmd.Flags().BoolVar(&moniVerifyDB, "verify-db", false,
+ "Verify database configuration")
+ MoniCmd.Flags().BoolVar(&moniVerifyRLS, "verify-rls", false,
+ "Verify Row Level Security (RLS)")
+ MoniCmd.Flags().BoolVar(&moniVerifyCSP, "verify-csp", false,
+ "Verify Content Security Policy (CSP)")
+ MoniCmd.Flags().BoolVar(&moniVerifySecurity, "verify-security", false,
+ "Run all security verifications (RLS + CSP)")
+ MoniCmd.Flags().BoolVar(&moniCleanupBackups, "cleanup-backups", false,
+ "Cleanup old .env backups")
+
+ // Work directory flag
+ MoniCmd.Flags().StringVar(&moniWorkDir, "work-dir", "/opt/moni",
+ "Working directory for Moni initialization (default: /opt/moni)")
+
+ // Force flag (skip confirmations for RLS breaking changes)
+ MoniCmd.Flags().BoolVar(&moniForce, "force", false,
+ "Skip confirmation prompts (use for automation/CI/CD)")
+
MoniCmd.AddCommand(refreshCmd)
}
@@ -120,17 +175,32 @@ Examples:
func runMoniOperations(rc *eos_io.RuntimeContext, cmd *cobra.Command, args []string) error {
logger := otelzap.Ctx(rc.Ctx)
- // Check which operation was requested
+ // Check which operation was requested (priority order)
+
+ // 1. Init/worker operations (new functionality)
+ if moniInit || moniValidateCerts || moniFixCerts || moniVerifyDB ||
+ moniVerifyRLS || moniVerifyCSP || moniVerifySecurity || moniCleanupBackups {
+ return runMoniInit(rc, cmd, args)
+ }
+
+ // 2. Post-install
if moniPostInstall {
return runMoniPostInstall(rc, cmd, args)
}
+ // 3. API key rotation
if moniRotateAPIKeys {
return runMoniRotateAPIKeys(rc, cmd, args)
}
// If no operation specified, show help
- logger.Info("No operation specified. Use --post-install or --rotate-api-keys")
+ logger.Info("No operation specified")
+ logger.Info("Common operations:")
+ logger.Info(" --init # Full initialization (SSL, database, security)")
+ logger.Info(" --post-install # Post-installation configuration")
+ logger.Info(" --rotate-api-keys # Rotate API keys")
+ logger.Info(" --validate-certs # Validate SSL certificates")
+ logger.Info(" --verify-security # Security verification")
return cmd.Help()
}
@@ -224,3 +294,70 @@ func runMoniRefresh(rc *eos_io.RuntimeContext, cmd *cobra.Command, args []string
logger.Info("Moni refresh completed successfully")
return nil
}
+
+// runMoniInit handles the Moni initialization worker
+// Orchestration layer: delegates to pkg/moni for business logic
+func runMoniInit(rc *eos_io.RuntimeContext, cmd *cobra.Command, args []string) error {
+ logger := otelzap.Ctx(rc.Ctx)
+
+ // Build worker configuration
+ config := &moni.WorkerConfig{
+ SkipSSL: moniSkipSSL,
+ SkipDatabase: moniSkipDatabase,
+ SkipSecurity: moniSkipSecurity,
+ SkipVerification: moniSkipVerification,
+ ValidateCertsOnly: moniValidateCerts,
+ FixCertsOnly: moniFixCerts,
+ VerifyDBOnly: moniVerifyDB,
+ VerifyRLSOnly: moniVerifyRLS,
+ VerifyCSPOnly: moniVerifyCSP,
+ VerifySecurityOnly: moniVerifySecurity,
+ CleanupBackups: moniCleanupBackups,
+ WorkDir: moniWorkDir,
+ Force: moniForce,
+ }
+
+ // Log operation
+ if moniInit {
+ logger.Info("Starting Moni full initialization",
+ zap.String("work_dir", moniWorkDir))
+ } else if moniValidateCerts {
+ logger.Info("Validating SSL certificates")
+ } else if moniFixCerts {
+ logger.Info("Fixing SSL certificate permissions")
+ } else if moniVerifyDB {
+ logger.Info("Verifying database configuration")
+ } else if moniVerifyRLS {
+ logger.Info("Verifying Row Level Security")
+ } else if moniVerifyCSP {
+ logger.Info("Verifying Content Security Policy")
+ } else if moniVerifySecurity {
+ logger.Info("Running security verification")
+ } else if moniCleanupBackups {
+ logger.Info("Cleaning up old backups")
+ }
+
+ // Run worker
+ result, err := moni.RunWorker(rc, config)
+ if err != nil {
+ logger.Error("Moni worker failed", zap.Error(err))
+ return fmt.Errorf("moni worker failed: %w", err)
+ }
+
+ // Check result
+ if !result.Success {
+ logger.Error("Moni operation did not complete successfully")
+
+ if len(result.CriticalIssues) > 0 {
+ logger.Error("Critical issues detected:")
+ for _, issue := range result.CriticalIssues {
+ logger.Error(fmt.Sprintf(" • %s", issue))
+ }
+ }
+
+ return fmt.Errorf("moni operation failed")
+ }
+
+ logger.Info("Moni operation completed successfully")
+ return nil
+}
diff --git a/pkg/moni/README.md b/pkg/moni/README.md
new file mode 100644
index 00000000..d857b5a0
--- /dev/null
+++ b/pkg/moni/README.md
@@ -0,0 +1,424 @@
+# pkg/moni - Moni (BionicGPT) Initialization Worker
+
+*Last Updated: 2025-01-07*
+
+This package implements the Moni (BionicGPT) initialization worker, handling SSL certificate generation, database configuration, security hardening, and verification for multi-tenant LLM deployments.
+
+## Architecture
+
+### Core Files
+
+- **worker.go** (813 lines) - Main orchestration with 9-phase setup flow
+- **ssl.go** (465 lines) - SSL certificate generation + SHIFT-LEFT v2 testing
+- **database.go** (399 lines) - Database config + security hardening + RLS
+- **verification.go** (577 lines) - Health checks + security verification
+- **types.go** (107 lines) - Type definitions (WorkerConfig, SetupResult, etc.)
+- **constants.go** (88 lines) - Configuration constants (paths, UIDs, timeouts)
+
+### Total: ~2,449 lines of production Go code
+
+## Features
+
+### 9-Phase Setup Flow
+
+1. **Phase 1: SSL Certificate Generation**
+ - Generates self-signed certificates for PostgreSQL
+ - Checks existing certificates for validity (365-day expiration)
+ - Creates `/opt/moni/certs/` directory structure
+
+2. **Phase 2: Certificate Permission Validation**
+ - **SHIFT-LEFT v2 Testing**: Validates certificates are readable BEFORE deployment
+ - Detects Alpine (UID 70) vs Standard (UID 999) PostgreSQL images
+ - Implements multi-strategy certificate management:
+ - Single UID 70 (Alpine-only)
+ - Single UID 999 (Standard-only)
+ - Separate certs (Mixed Alpine + Standard)
+ - Sets correct ownership: `0:70` or `999:999`
+ - Sets correct permissions: `640` for keys, `644` for certs
+
+3. **Phase 3: Environment Configuration**
+ - Updates `.env` to enable SSL (`sslmode=disable` → `sslmode=require`)
+ - Creates timestamped backups with `0600` permissions
+ - Cleans up old backups (keeps last 3)
+
+4. **Phase 4: Container Restart**
+ - Stops containers: `docker compose down`
+ - Starts with new config: `docker compose up -d`
+ - Waits 30 seconds for initialization
+ - Thread-safe: Uses `WorkDir` instead of `os.Chdir()`
+
+5. **Phase 5: Database Configuration**
+ - **Upserts 3 models**:
+ - `nomic-embed-text` (Embeddings, 8192 context)
+ - `Moni` (LLM, GPT-5-mini, 16384 max tokens)
+ - `Moni-4.1` (LLM, GPT-4.1-mini, 16384 max tokens)
+ - **Renames default assistant**: `llama3` → `Moni`
+ - Links Moni prompt to Moni model (ID 2)
+
+6. **Phase 6: API Key Regeneration**
+ - Runs `/opt/moni/api_keys.sh` if present
+ - Generates new LiteLLM virtual keys
+ - Updates `.env` and database
+
+7. **Phase 7: Database Security Hardening**
+ - **Least Privilege**:
+ - `litellm`: NOSUPERUSER, DML only (no DDL)
+ - `bionic_application`: DML only (no schema changes)
+ - `bionic_readonly`: Created for monitoring (read-only)
+ - **Row Level Security (RLS)**:
+ - Enables RLS on 15 critical tables
+ - Creates tenant isolation policies
+ - **P0 SECURITY FIX**: Fails loudly if `app.current_team_id` not set
+ - Verifies `bionic_application` is NOT a superuser (P0 check)
+
+8. **Phase 8: Security Verification**
+ - **RLS Verification**:
+ - Checks 15 tables have RLS enabled
+ - Verifies policies exist
+ - **P1 PERFORMANCE**: Checks for `team_id` indexes
+ - **CSP Verification**:
+ - Checks Content-Security-Policy headers
+ - Scores 0-100 based on security directives
+ - Detects dangerous patterns (`'unsafe-eval'`, wildcards)
+
+9. **Phase 9: Final Health Check**
+ - Verifies PostgreSQL SSL status
+ - Checks LiteLLM models endpoint
+ - Validates web search configuration
+ - Confirms all containers healthy
+
+## Security Implementation
+
+### Row Level Security (RLS)
+
+**Protected Tables** (15 total):
+- **Direct team_id** (12): api_key_connections, api_keys, audit_trail, conversations, datasets, document_pipelines, integrations, invitations, oauth2_connections, objects, prompts, team_users
+- **Indirect team_id** (3): chats, documents, chunks
+
+**Policy Pattern**:
+```sql
+CREATE POLICY tenant_isolation_
ON
+ FOR ALL TO bionic_application
+ USING (team_id = current_setting('app.current_team_id')::int);
+```
+
+**P0 SECURITY FIX**: Removed `true` parameter from `current_setting()` to fail loudly if session variable not set.
+
+**CRITICAL REQUIREMENT**: Application MUST set `app.current_team_id` on every connection:
+```sql
+SET app.current_team_id = ;
+```
+
+Without this, queries will ERROR (intentional - fail loudly, not silently).
+
+### SSL Certificate Management
+
+**Alpine PostgreSQL** (postgres:*-alpine, pgvector/pgvector:*-alpine):
+- UID: 70 (postgres group)
+- Ownership: `0:70` (root:postgres)
+- Permissions: `640` (owner RW, group R)
+
+**Standard PostgreSQL** (postgres:*, pgvector/pgvector:*):
+- UID: 999 (postgres user)
+- Ownership: `999:999` (postgres:postgres)
+- Permissions: `600` (owner RW only)
+
+**SHIFT-LEFT v2 Testing**:
+```go
+TestCertReadability(rc, image, uid, certPath)
+```
+Validates certificates are readable by running test containers BEFORE deployment.
+
+### Database Hardening
+
+**litellm user**:
+```sql
+ALTER USER litellm NOSUPERUSER NOCREATEDB NOCREATEROLE NOREPLICATION NOBYPASSRLS;
+GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA public TO litellm;
+```
+
+**bionic_application user**:
+```sql
+REVOKE ALL ON DATABASE "bionic-gpt" FROM bionic_application;
+GRANT CONNECT ON DATABASE "bionic-gpt" TO bionic_application;
+GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA public TO bionic_application;
+```
+
+**bionic_readonly user**:
+```sql
+CREATE USER bionic_readonly WITH PASSWORD '';
+GRANT CONNECT ON DATABASE "bionic-gpt" TO bionic_readonly;
+GRANT SELECT ON ALL TABLES IN SCHEMA public TO bionic_readonly;
+```
+
+## Usage
+
+### Command Integration
+
+Integrated into `cmd/update/moni.go` as `eos update moni` command.
+
+### Full Initialization
+
+```bash
+sudo eos update moni --init
+```
+
+Runs all 9 phases.
+
+### Targeted Operations
+
+```bash
+# Certificate operations
+sudo eos update moni --validate-certs # SHIFT-LEFT v2 testing
+sudo eos update moni --fix-certs # Fix permissions
+
+# Security verification
+sudo eos update moni --verify-security # RLS + CSP
+sudo eos update moni --verify-rls # Row Level Security only
+sudo eos update moni --verify-csp # Content Security Policy only
+
+# Database operations
+sudo eos update moni --verify-db # Verify models/prompts
+
+# Maintenance
+sudo eos update moni --cleanup-backups # Clean old .env backups
+```
+
+### Phase Control
+
+```bash
+# Skip specific phases
+sudo eos update moni --init --skip-ssl
+sudo eos update moni --init --skip-database
+sudo eos update moni --init --skip-security
+sudo eos update moni --init --skip-verification
+```
+
+### Custom Working Directory
+
+```bash
+sudo eos update moni --init --work-dir /opt/custom-moni
+```
+
+## Configuration
+
+### Constants (constants.go)
+
+**Paths**:
+- `MoniDir`: `/opt/moni`
+- `MoniEnvFile`: `/opt/moni/.env`
+- `MoniCertsDir`: `/opt/moni/certs`
+- `MoniDockerCompose`: `/opt/moni/docker-compose.yml`
+
+**Timeouts**:
+- `MaxWaitSeconds`: 120s (service readiness)
+- `CheckIntervalSecs`: 2s (health check polling)
+- `CommandTimeout`: 30s (SQL, docker exec)
+- `LongCommandTimeout`: 5m (API key regeneration)
+
+**SSL Certificate**:
+- `CertOwnerUID`: 0 (root)
+- `CertOwnerGID`: 70 (Alpine postgres group)
+- `StandardUID`: 999 (Standard postgres user)
+- `CertKeyPerms`: 0640
+- `CertCrtPerms`: 0644
+
+**Model Configuration**:
+- `ModelContextSize`: 16384 (max completion tokens)
+- `EmbeddingsContextSize`: 8192
+
+### Environment Variables
+
+Required in `/opt/moni/.env`:
+- `POSTGRES_PASSWORD` - PostgreSQL root password
+- `LITELLM_MASTER_KEY` - LiteLLM master API key
+- `APP_DATABASE_URL` - BionicGPT database connection string
+
+Optional:
+- `ENABLE_WEB_SEARCH` - Enable web search (`true`/`false`)
+- `MONI_SYSTEM_PROMPT` - Custom system prompt
+
+## Error Handling
+
+### User-Fixable Errors (Exit 0)
+
+- Missing prerequisites (Docker, OpenSSL, curl, sudo)
+- Working directory doesn't exist
+- docker-compose.yml not found
+
+**Returns**: Clear error with remediation steps
+
+### System Failures (Exit 1)
+
+- SSL certificate generation failed
+- Docker Compose operations failed
+- Database hardening failed
+- RLS enablement failed
+
+**Returns**: Error with context and troubleshooting steps
+
+## Pattern Compliance
+
+✅ **Logging**: Uses `otelzap.Ctx(rc.Ctx)` everywhere
+✅ **Architecture**: Business logic in `pkg/moni/`, orchestration in `cmd/`
+✅ **AIE Pattern**: All functions follow Assess→Intervene→Evaluate
+✅ **RuntimeContext**: Passed to all operations
+✅ **Constants**: No hardcoded values (all in constants.go)
+✅ **Thread-Safe**: Uses `execute.Options.WorkDir` instead of `os.Chdir()`
+✅ **Error Context**: All errors include remediation steps
+
+## Testing
+
+### Prerequisites Check
+
+```go
+checkPrerequisites(rc) error
+```
+
+Validates:
+- Docker CLI available
+- Docker daemon responding
+- Docker Compose available
+- OpenSSL available
+- sudo available
+- curl available
+
+### Container Health Check
+
+```go
+checkContainerHealth(rc) error
+```
+
+Detects unhealthy containers and logs last 10 lines of logs.
+
+### Service Readiness
+
+```go
+WaitForService(rc, name, checkFunc, maxWait, checkInterval) error
+```
+
+Polls until service ready or timeout:
+- PostgreSQL: `pg_isready -U postgres`
+- LiteLLM: `curl -sf http://localhost:4000/health/readiness`
+
+## Troubleshooting
+
+### SSL Certificate Issues
+
+**Symptom**: PostgreSQL fails to start with "certificate permission denied"
+
+**Fix**:
+```bash
+sudo eos update moni --fix-certs
+```
+
+**Verify**:
+```bash
+sudo eos update moni --validate-certs
+```
+
+### RLS Not Working
+
+**Symptom**: Users see all data across tenants OR users see no data
+
+**Check**:
+```bash
+sudo eos update moni --verify-rls
+```
+
+**Fix**: Ensure application sets `app.current_team_id`:
+```sql
+-- In connection initialization
+SET app.current_team_id = ;
+```
+
+### Performance Issues
+
+**Symptom**: Slow queries after enabling RLS
+
+**Check**:
+```bash
+sudo eos update moni --verify-rls
+```
+
+Look for warnings about missing `team_id` indexes.
+
+**Fix** (if indexes missing):
+```sql
+CREATE INDEX idx__team_id ON (team_id);
+```
+
+### Container Startup Failures
+
+**Check logs**:
+```bash
+docker compose -f /opt/moni/docker-compose.yml logs -f
+```
+
+**Check health**:
+```bash
+docker ps -a
+```
+
+**Restart with fresh config**:
+```bash
+sudo eos update moni --init --skip-database --skip-security
+```
+
+## Best Practices
+
+### Production Deployment
+
+1. ✅ Run `--init` on fresh installation
+2. ✅ Verify RLS with `--verify-rls`
+3. ✅ Verify CSP with `--verify-csp`
+4. ✅ Ensure application sets `app.current_team_id`
+5. ✅ Monitor container health
+6. ✅ Create `team_id` indexes if missing
+7. ✅ Backup `.env` before changes
+
+### Security Checklist
+
+- [ ] RLS enabled on all 15 critical tables
+- [ ] `bionic_application` is NOT a superuser
+- [ ] `team_id` indexes exist for performance
+- [ ] SSL certificates have correct permissions
+- [ ] Application sets `app.current_team_id` on every connection
+- [ ] CSP headers present and scored ≥40
+- [ ] Web search disabled (unless explicitly needed)
+
+### Maintenance
+
+**Backup Cleanup**:
+```bash
+sudo eos update moni --cleanup-backups
+```
+Keeps last 3 `.env.backup.*` files.
+
+**Certificate Validation**:
+```bash
+sudo eos update moni --validate-certs
+```
+Run monthly or after infrastructure changes.
+
+**Security Verification**:
+```bash
+sudo eos update moni --verify-security
+```
+Run weekly or after application updates.
+
+## References
+
+- **BionicGPT**: https://github.com/bionic-gpt/bionic-gpt
+- **PostgreSQL RLS**: https://www.postgresql.org/docs/current/ddl-rowsecurity.html
+- **Docker PostgreSQL SSL**: https://www.red-gate.com/simple-talk/databases/running-postgresql-in-docker-with-proper-ssl-and-configuration/
+- **Multi-Tenant RLS Best Practices**: https://docs.aws.amazon.com/prescriptive-guidance/latest/saas-multitenant-managed-postgresql/rls.html
+
+## License
+
+See main Eos LICENSE file (AGPL-3.0-or-later + Do No Harm License).
+
+---
+
+**Code Monkey Cybersecurity** (ABN 77 177 673 061)
+*Cybersecurity. With humans.*
diff --git a/pkg/moni/constants.go b/pkg/moni/constants.go
new file mode 100644
index 00000000..deeb73d9
--- /dev/null
+++ b/pkg/moni/constants.go
@@ -0,0 +1,86 @@
+package moni
+
+import "time"
+
+// Container names
+const (
+ PostgresContainer = "bionicgpt-postgres"
+ LiteLLMContainer = "bionicgpt-litellm"
+ LiteLLMDBContainer = "bionicgpt-litellm-db"
+ LangfuseDBContainer = "bionicgpt-langfuse-db"
+ AppContainer = "bionicgpt-app"
+)
+
+// Service URLs
+const (
+ LiteLLMURL = "http://localhost:4000"
+ AppURL = "http://localhost:8513"
+)
+
+// Database configuration
+const (
+ DBName = "bionic-gpt"
+ DBUser = "postgres"
+)
+
+// Paths
+const (
+ MoniDir = "/opt/moni"
+ MoniEnvFile = "/opt/moni/.env"
+ MoniAPIKeysScript = "/opt/moni/api_keys.sh"
+ MoniCertsDir = "/opt/moni/certs"
+ MoniCertsAlpineDir = "/opt/moni/certs-alpine"
+ MoniCertsStandardDir = "/opt/moni/certs-standard"
+ MoniDockerCompose = "/opt/moni/docker-compose.yml"
+)
+
+// Timeouts and intervals
+const (
+ MaxWaitSeconds = 120
+ CheckIntervalSecs = 2
+ InitWaitSeconds = 30
+ CommandTimeout = 30 * time.Second
+ LongCommandTimeout = 5 * time.Minute
+)
+
+// Backup settings
+const (
+ KeepBackups = 3
+)
+
+// SSL Certificate ownership (Alpine PostgreSQL containers)
+const (
+ CertOwnerUID = 0 // root
+ CertOwnerGID = 70 // postgres group in Alpine
+ CertKeyPerms = 0640
+ CertCrtPerms = 0644
+ StandardUID = 999 // Standard PostgreSQL UID
+ TempKeyPerms = 0600
+)
+
+// Certificate strategies
+const (
+ StrategySingleSSLCert = "single-ssl-cert"
+ StrategySingleUID70 = "single-uid-70"
+ StrategySeparateCerts = "separate-certs"
+)
+
+// Model configuration
+// CRITICAL: Context size set to 16384 (not 1M) to match Azure GPT-4o-mini
+// max completion tokens. This prevents API errors when BionicGPT reads this
+// value and sends it as max_tokens in requests.
+const (
+ ModelContextSize = 16384
+ EmbeddingsContextSize = 8192
+ ModelTPMLimit = 50000
+ ModelRPMLimit = 1000
+ ModelFallbackTPMLimit = 30000
+ ModelFallbackRPMLimit = 500
+ EmbeddingsTPMLimit = 10000
+ EmbeddingsRPMLimit = 10000
+)
+
+// RLS table counts
+const (
+ ExpectedRLSTables = 15
+)
diff --git a/pkg/moni/database.go b/pkg/moni/database.go
new file mode 100644
index 00000000..2eb4890d
--- /dev/null
+++ b/pkg/moni/database.go
@@ -0,0 +1,474 @@
+package moni
+
+import (
+ "context"
+ "fmt"
+ "os/exec"
+ "strings"
+ "time"
+
+ "github.com/CodeMonkeyCybersecurity/eos/pkg/eos_io"
+ "github.com/CodeMonkeyCybersecurity/eos/pkg/execute"
+ "github.com/CodeMonkeyCybersecurity/eos/pkg/interaction"
+ "go.uber.org/zap"
+ "go.uber.org/zap/otelzap"
+)
+
+// ConfigureDatabase configures database models and prompts
+// THIS IS WHERE llama3 → Moni RENAMING HAPPENS
+func ConfigureDatabase(rc *eos_io.RuntimeContext) error {
+ logger := otelzap.Ctx(rc.Ctx)
+ logger.Info("Phase 5: Database Configuration")
+
+ // Step 1: Upsert models (3 models)
+ modelsSQL := fmt.Sprintf(`
+INSERT INTO models (id, model_type, name, base_url, context_size, tpm_limit, rpm_limit, api_key)
+VALUES
+ (1, 'Embeddings', 'nomic-embed-text', 'http://litellm-proxy:4000', %d, %d, %d, 'PLACEHOLDER'),
+ (2, 'LLM', 'Moni', 'http://litellm-proxy:4000', %d, %d, %d, 'PLACEHOLDER'),
+ (3, 'LLM', 'Moni-4.1', 'http://litellm-proxy:4000', %d, %d, %d, 'PLACEHOLDER')
+ON CONFLICT (id) DO UPDATE SET
+ model_type = EXCLUDED.model_type,
+ name = EXCLUDED.name,
+ base_url = EXCLUDED.base_url,
+ context_size = EXCLUDED.context_size,
+ tpm_limit = EXCLUDED.tpm_limit,
+ rpm_limit = EXCLUDED.rpm_limit,
+ api_key = EXCLUDED.api_key;
+`, EmbeddingsContextSize, EmbeddingsTPMLimit, EmbeddingsRPMLimit,
+ ModelContextSize, ModelTPMLimit, ModelRPMLimit,
+ ModelContextSize, ModelFallbackTPMLimit, ModelFallbackRPMLimit)
+
+ logger.Info("Upserting models",
+ zap.String("model_1", "nomic-embed-text (Embeddings, 8192 context)"),
+ zap.String("model_2", "Moni (LLM, GPT-5-mini, 16384 max output tokens)"),
+ zap.String("model_3", "Moni-4.1 (LLM, GPT-4.1-mini, 16384 max output tokens)"))
+
+ if err := executeSQL(rc, modelsSQL, "Upsert models"); err != nil {
+ return err
+ }
+
+ // Step 2: Update prompt name from llama3 to Moni
+ // THIS IS THE KEY RENAMING OPERATION
+ promptSQL := `
+UPDATE prompts
+SET name = 'Moni',
+ description = 'Moni AI Assistant powered by GPT-5-mini',
+ updated_at = now()
+WHERE name = 'llama3' OR id = 1;
+`
+
+ logger.Info("Renaming default assistant: llama3 → Moni")
+ if err := executeSQL(rc, promptSQL, "Update prompt name"); err != nil {
+ return err
+ }
+
+ // Step 3: Link Moni prompt to Moni model
+ linkSQL := `
+UPDATE prompts
+SET model_id = 2,
+ updated_at = now()
+WHERE name = 'Moni';
+`
+
+ logger.Info("Linking Moni assistant to Moni model (ID 2)")
+ if err := executeSQL(rc, linkSQL, "Link prompt to model"); err != nil {
+ return err
+ }
+
+ logger.Info("Database configuration complete")
+ return nil
+}
+
+// ApplyDatabaseSecurity applies database security hardening
+func ApplyDatabaseSecurity(rc *eos_io.RuntimeContext) error {
+ logger := otelzap.Ctx(rc.Ctx)
+ logger.Info("Phase 7: Database Security Hardening")
+
+ // Get postgres password from .env
+ envVars, err := readEnvFile(MoniEnvFile)
+ if err != nil {
+ return fmt.Errorf("failed to read .env file: %w", err)
+ }
+
+ postgresPassword, ok := envVars["POSTGRES_PASSWORD"]
+ if !ok || postgresPassword == "" {
+ return fmt.Errorf("POSTGRES_PASSWORD not found in .env")
+ }
+
+ // Step 1: Harden LiteLLM database
+ logger.Info("Hardening LiteLLM database",
+ zap.String("action", "removing superuser privileges"),
+ zap.String("restriction", "DML only (no DDL)"))
+
+ litellmSQL := `
+ALTER USER litellm NOSUPERUSER NOCREATEDB NOCREATEROLE NOREPLICATION NOBYPASSRLS;
+GRANT CONNECT ON DATABASE litellm TO litellm;
+GRANT USAGE ON SCHEMA public TO litellm;
+GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA public TO litellm;
+GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public TO litellm;
+ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT, INSERT, UPDATE, DELETE ON TABLES TO litellm;
+ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT USAGE, SELECT ON SEQUENCES TO litellm;
+`
+
+ if err := executeSQLInContainer(rc, LiteLLMDBContainer, "litellm", "litellm", litellmSQL); err != nil {
+ return fmt.Errorf("failed to harden LiteLLM database: %w", err)
+ }
+
+ logger.Info("LiteLLM database hardened")
+
+ // Step 2: Harden BionicGPT database
+ logger.Info("Hardening BionicGPT database",
+ zap.String("action", "creating bionic_readonly user"),
+ zap.String("restriction", "bionic_application to DML only"),
+ zap.String("privileges", "default privileges configured"))
+
+ // Escape single quotes in password
+ escapedPassword := strings.ReplaceAll(postgresPassword, "'", "''")
+
+ bionicSQL := fmt.Sprintf(`
+DO $$
+BEGIN
+ IF NOT EXISTS (SELECT FROM pg_catalog.pg_user WHERE usename = 'bionic_readonly') THEN
+ CREATE USER bionic_readonly WITH PASSWORD '%s';
+ END IF;
+END $$;
+
+REVOKE ALL ON DATABASE "bionic-gpt" FROM bionic_application;
+REVOKE ALL ON SCHEMA public FROM bionic_application;
+
+GRANT CONNECT ON DATABASE "bionic-gpt" TO bionic_application;
+GRANT USAGE ON SCHEMA public TO bionic_application;
+GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA public TO bionic_application;
+GRANT USAGE, SELECT, UPDATE ON ALL SEQUENCES IN SCHEMA public TO bionic_application;
+
+ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public
+ GRANT SELECT, INSERT, UPDATE, DELETE ON TABLES TO bionic_application;
+ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public
+ GRANT USAGE, SELECT, UPDATE ON SEQUENCES TO bionic_application;
+
+GRANT CONNECT ON DATABASE "bionic-gpt" TO bionic_readonly;
+GRANT USAGE ON SCHEMA public TO bionic_readonly;
+GRANT SELECT ON ALL TABLES IN SCHEMA public TO bionic_readonly;
+
+ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA public
+ GRANT SELECT ON TABLES TO bionic_readonly;
+`, escapedPassword)
+
+ if err := executeSQLInContainer(rc, PostgresContainer, "postgres", DBName, bionicSQL); err != nil {
+ return fmt.Errorf("failed to harden BionicGPT database: %w", err)
+ }
+
+ logger.Info("BionicGPT database hardened")
+
+ logger.Info("Database Security Hardening Complete",
+ zap.String("litellm", "No superuser, DML only"),
+ zap.String("bionic_application", "DML only (no schema changes)"),
+ zap.String("bionic_readonly", "Created for monitoring (read-only)"),
+ zap.String("benefits", "Prevents privilege escalation, limits blast radius"))
+
+ return nil
+}
+
+// EnableRowLevelSecurity enables RLS for multi-tenant data isolation
+func EnableRowLevelSecurity(rc *eos_io.RuntimeContext, config *WorkerConfig) error {
+ logger := otelzap.Ctx(rc.Ctx)
+ logger.Info("Enabling Row Level Security (Multi-Tenant Isolation)")
+
+ // P1 FIX (Issue #9): Breaking change warning and confirmation
+ // RLS requires application code changes - must set app.current_team_id
+ // This will break existing installations that don't set this variable
+ logger.Warn("⚠️ BREAKING CHANGE: Row Level Security (RLS) requires application code changes")
+ logger.Warn("")
+ logger.Warn("Impact:")
+ logger.Warn(" • Existing installations will ERROR without app.current_team_id")
+ logger.Warn(" • Application MUST set session variable on EVERY database connection:")
+ logger.Warn(" SET app.current_team_id = ;")
+ logger.Warn("")
+ logger.Warn("Before proceeding:")
+ logger.Warn(" 1. Verify BionicGPT application has been updated to set app.current_team_id")
+ logger.Warn(" 2. Ensure you have a rollback plan (database backup)")
+ logger.Warn(" 3. Test in non-production environment first")
+ logger.Warn("")
+
+ // Skip confirmation if --force flag is set
+ if !config.Force {
+ // Prompt user for confirmation (default: No for safety)
+ confirmed, err := interaction.PromptYesNoSafe(rc,
+ "Do you want to proceed with enabling RLS?", false)
+ if err != nil {
+ return fmt.Errorf("failed to get user confirmation: %w", err)
+ }
+ if !confirmed {
+ logger.Info("RLS enablement cancelled by user")
+ return fmt.Errorf("RLS enablement cancelled - application code changes required first")
+ }
+ logger.Info("User confirmed RLS enablement")
+ } else {
+ logger.Info("Skipping confirmation (--force flag set)")
+ }
+
+ // RLS SQL from enable_rls.sql
+ // SECURITY FIX (P0): Removed 'true' parameter from current_setting() to fail loudly if not set
+ // This prevents silent failures where users see no data instead of getting a clear error
+ // If app.current_team_id is not set, queries will ERROR instead of silently returning empty results
+ rlsSQL := `
+-- Enable RLS on tables with direct team_id
+ALTER TABLE api_key_connections ENABLE ROW LEVEL SECURITY;
+ALTER TABLE api_keys ENABLE ROW LEVEL SECURITY;
+ALTER TABLE audit_trail ENABLE ROW LEVEL SECURITY;
+ALTER TABLE conversations ENABLE ROW LEVEL SECURITY;
+ALTER TABLE datasets ENABLE ROW LEVEL SECURITY;
+ALTER TABLE document_pipelines ENABLE ROW LEVEL SECURITY;
+ALTER TABLE integrations ENABLE ROW LEVEL SECURITY;
+ALTER TABLE invitations ENABLE ROW LEVEL SECURITY;
+ALTER TABLE oauth2_connections ENABLE ROW LEVEL SECURITY;
+ALTER TABLE objects ENABLE ROW LEVEL SECURITY;
+ALTER TABLE prompts ENABLE ROW LEVEL SECURITY;
+ALTER TABLE team_users ENABLE ROW LEVEL SECURITY;
+
+-- Enable RLS on tables with indirect team_id (via foreign keys)
+ALTER TABLE chats ENABLE ROW LEVEL SECURITY;
+ALTER TABLE documents ENABLE ROW LEVEL SECURITY;
+ALTER TABLE chunks ENABLE ROW LEVEL SECURITY;
+
+-- Create RLS policies for direct team_id tables
+-- P0 FIX (Issue #8): Add DROP POLICY IF EXISTS for idempotency
+-- SECURITY: No 'true' parameter - will ERROR if app.current_team_id not set (fail loudly, not silently)
+DROP POLICY IF EXISTS tenant_isolation_api_key_connections ON api_key_connections;
+CREATE POLICY tenant_isolation_api_key_connections ON api_key_connections
+ FOR ALL TO bionic_application
+ USING (team_id = current_setting('app.current_team_id')::int);
+
+DROP POLICY IF EXISTS tenant_isolation_api_keys ON api_keys;
+CREATE POLICY tenant_isolation_api_keys ON api_keys
+ FOR ALL TO bionic_application
+ USING (team_id = current_setting('app.current_team_id')::int);
+
+DROP POLICY IF EXISTS tenant_isolation_audit_trail ON audit_trail;
+CREATE POLICY tenant_isolation_audit_trail ON audit_trail
+ FOR ALL TO bionic_application
+ USING (team_id = current_setting('app.current_team_id')::int);
+
+DROP POLICY IF EXISTS tenant_isolation_conversations ON conversations;
+CREATE POLICY tenant_isolation_conversations ON conversations
+ FOR ALL TO bionic_application
+ USING (team_id = current_setting('app.current_team_id')::int);
+
+DROP POLICY IF EXISTS tenant_isolation_datasets ON datasets;
+CREATE POLICY tenant_isolation_datasets ON datasets
+ FOR ALL TO bionic_application
+ USING (team_id = current_setting('app.current_team_id')::int);
+
+DROP POLICY IF EXISTS tenant_isolation_document_pipelines ON document_pipelines;
+CREATE POLICY tenant_isolation_document_pipelines ON document_pipelines
+ FOR ALL TO bionic_application
+ USING (team_id = current_setting('app.current_team_id')::int);
+
+DROP POLICY IF EXISTS tenant_isolation_integrations ON integrations;
+CREATE POLICY tenant_isolation_integrations ON integrations
+ FOR ALL TO bionic_application
+ USING (team_id = current_setting('app.current_team_id')::int);
+
+DROP POLICY IF EXISTS tenant_isolation_invitations ON invitations;
+CREATE POLICY tenant_isolation_invitations ON invitations
+ FOR ALL TO bionic_application
+ USING (team_id = current_setting('app.current_team_id')::int);
+
+DROP POLICY IF EXISTS tenant_isolation_oauth2_connections ON oauth2_connections;
+CREATE POLICY tenant_isolation_oauth2_connections ON oauth2_connections
+ FOR ALL TO bionic_application
+ USING (team_id = current_setting('app.current_team_id')::int);
+
+DROP POLICY IF EXISTS tenant_isolation_objects ON objects;
+CREATE POLICY tenant_isolation_objects ON objects
+ FOR ALL TO bionic_application
+ USING (team_id = current_setting('app.current_team_id')::int);
+
+DROP POLICY IF EXISTS tenant_isolation_prompts ON prompts;
+CREATE POLICY tenant_isolation_prompts ON prompts
+ FOR ALL TO bionic_application
+ USING (team_id = current_setting('app.current_team_id')::int);
+
+DROP POLICY IF EXISTS tenant_isolation_team_users ON team_users;
+CREATE POLICY tenant_isolation_team_users ON team_users
+ FOR ALL TO bionic_application
+ USING (team_id = current_setting('app.current_team_id')::int);
+
+-- Create RLS policies for indirect team_id tables
+-- P0 FIX (Issue #8): Add DROP POLICY IF EXISTS for idempotency
+-- SECURITY: No 'true' parameter - will ERROR if app.current_team_id not set (fail loudly, not silently)
+DROP POLICY IF EXISTS tenant_isolation_chats ON chats;
+CREATE POLICY tenant_isolation_chats ON chats
+ FOR ALL TO bionic_application
+ USING (
+ conversation_id IN (
+ SELECT id FROM conversations
+ WHERE team_id = current_setting('app.current_team_id')::int
+ )
+ );
+
+DROP POLICY IF EXISTS tenant_isolation_documents ON documents;
+CREATE POLICY tenant_isolation_documents ON documents
+ FOR ALL TO bionic_application
+ USING (
+ dataset_id IN (
+ SELECT id FROM datasets
+ WHERE team_id = current_setting('app.current_team_id')::int
+ )
+ );
+
+DROP POLICY IF EXISTS tenant_isolation_chunks ON chunks;
+CREATE POLICY tenant_isolation_chunks ON chunks
+ FOR ALL TO bionic_application
+ USING (
+ document_id IN (
+ SELECT d.id FROM documents d
+ JOIN datasets ds ON d.dataset_id = ds.id
+ WHERE ds.team_id = current_setting('app.current_team_id')::int
+ )
+ );
+`
+
+ logger.Info("Enabling RLS on 15 critical tables",
+ zap.Int("direct_team_id", 12),
+ zap.Int("indirect_team_id", 3))
+
+ if err := executeSQLInContainer(rc, PostgresContainer, "postgres", DBName, rlsSQL); err != nil {
+ return fmt.Errorf("failed to enable RLS: %w", err)
+ }
+
+ logger.Info("RLS enabled on all tables",
+ zap.Int("tables", 15),
+ zap.Int("policies", 15))
+
+ // Verify RLS enabled
+ verifySQL := `
+SELECT COUNT(*) FROM pg_tables
+WHERE schemaname = 'public' AND rowsecurity = true;
+`
+
+ count, err := querySingleValue(rc, PostgresContainer, DBUser, DBName, verifySQL)
+ if err != nil {
+ logger.Warn("Could not verify RLS count", zap.Error(err))
+ } else {
+ logger.Info("RLS verification complete", zap.String("tables_with_rls", count))
+ }
+
+ logger.Info("Row Level Security Enabled",
+ zap.Int("protected_tables", 15),
+ zap.String("isolation", "database-level tenant isolation enforced"),
+ zap.String("security", "application bypasses cannot leak cross-tenant data"))
+
+ // P0 SECURITY FIX: Verify bionic_application is NOT a superuser
+ // Superusers bypass RLS, making all policies useless
+ logger.Info("Verifying bionic_application is not a superuser")
+
+ verifySuperuserSQL := `SELECT rolsuper FROM pg_roles WHERE rolname = 'bionic_application';`
+ isSuperuser, err := querySingleValue(rc, PostgresContainer, DBUser, DBName, verifySuperuserSQL)
+ if err != nil {
+ // P1 FIX (Issue #11): Fail hard instead of warning
+ // Cannot verify superuser status = cannot guarantee RLS security
+ return fmt.Errorf("CRITICAL SECURITY CHECK FAILED: could not verify bionic_application superuser status: %w\n"+
+ "RLS security cannot be guaranteed without this verification.\n"+
+ "Possible causes:\n"+
+ " • Database connection failed\n"+
+ " • PostgreSQL container not running\n"+
+ " • bionic_application user does not exist\n"+
+ "Fix: Ensure PostgreSQL is running and bionic_application user exists", err)
+ } else if isSuperuser == "t" || isSuperuser == "true" {
+ return fmt.Errorf("CRITICAL SECURITY FAILURE: bionic_application is a superuser and will BYPASS all RLS policies\n"+
+ "RLS is completely ineffective when the user is a superuser.\n"+
+ "Fix: Revoke superuser: ALTER USER bionic_application NOSUPERUSER;")
+ } else {
+ logger.Info("Verified: bionic_application is NOT a superuser (RLS will work correctly)")
+ }
+
+ // P1 CRITICAL WARNING: Application MUST set session variable
+ logger.Warn("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
+ logger.Warn("⚠️ CRITICAL: APPLICATION INTEGRATION REQUIRED")
+ logger.Warn("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
+ logger.Warn("")
+ logger.Warn("BionicGPT application MUST set the session variable on EVERY connection:")
+ logger.Warn("")
+ logger.Warn(" SET app.current_team_id = ;")
+ logger.Warn("")
+ logger.Warn("Without this:")
+ logger.Warn(" • All database queries will ERROR with: unrecognized configuration parameter")
+ logger.Warn(" • Users will see NO DATA (RLS will block everything)")
+ logger.Warn(" • This is INTENTIONAL - fail loudly, not silently")
+ logger.Warn("")
+ logger.Warn("Where to add:")
+ logger.Warn(" • In database connection initialization code")
+ logger.Warn(" • Before any queries execute")
+ logger.Warn(" • Use the authenticated user's team_id from their session")
+ logger.Warn("")
+ logger.Warn("Example (pseudocode):")
+ logger.Warn(" conn = db.connect()")
+ logger.Warn(" conn.execute('SET app.current_team_id = ?', [user.team_id])")
+ logger.Warn(" # Now queries will be filtered by RLS policies")
+ logger.Warn("")
+ logger.Warn("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
+
+ return nil
+}
+
+// executeSQL executes SQL in the PostgreSQL container
+func executeSQL(rc *eos_io.RuntimeContext, sql, description string) error {
+ logger := otelzap.Ctx(rc.Ctx)
+ logger.Info("Executing SQL", zap.String("operation", description))
+
+ return executeSQLInContainer(rc, PostgresContainer, DBUser, DBName, sql)
+}
+
+// executeSQLInContainer executes SQL in a specific container
+func executeSQLInContainer(rc *eos_io.RuntimeContext, container, user, database, sql string) error {
+ ctx, cancel := context.WithTimeout(rc.Ctx, CommandTimeout)
+ defer cancel()
+
+ // Use -c for single commands, or pipe for multi-line SQL
+ var output string
+ var err error
+
+ if strings.Contains(sql, "\n") {
+ // Multi-line SQL - use stdin
+ cmd := exec.CommandContext(ctx, "docker", "exec", "-i", container,
+ "psql", "-U", user, "-d", database)
+ cmd.Stdin = strings.NewReader(sql)
+
+ out, execErr := cmd.CombinedOutput()
+ output = string(out)
+ err = execErr
+ } else {
+ // Single line SQL - use -c
+ output, err = execute.Run(ctx, execute.Options{
+ Command: "docker",
+ Args: []string{"exec", container, "psql", "-U", user, "-d", database, "-c", sql},
+ Capture: true,
+ })
+ }
+
+ if err != nil {
+ return fmt.Errorf("SQL execution failed: %s: %w", output, err)
+ }
+
+ return nil
+}
+
+// querySingleValue queries a single value from the database
+func querySingleValue(rc *eos_io.RuntimeContext, container, user, database, sql string) (string, error) {
+ ctx, cancel := context.WithTimeout(rc.Ctx, 10*time.Second)
+ defer cancel()
+
+ output, err := execute.Run(ctx, execute.Options{
+ Command: "docker",
+ Args: []string{"exec", container, "psql", "-U", user, "-d", database, "-t", "-c", sql},
+ Capture: true,
+ })
+
+ if err != nil {
+ return "", err
+ }
+
+ return strings.TrimSpace(output), nil
+}
diff --git a/pkg/moni/ssl.go b/pkg/moni/ssl.go
new file mode 100644
index 00000000..7dbf6382
--- /dev/null
+++ b/pkg/moni/ssl.go
@@ -0,0 +1,577 @@
+package moni
+
+import (
+ "context"
+ "fmt"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "time"
+
+ "github.com/CodeMonkeyCybersecurity/eos/pkg/eos_io"
+ "github.com/CodeMonkeyCybersecurity/eos/pkg/execute"
+ "go.uber.org/zap"
+ "go.uber.org/zap/otelzap"
+ "gopkg.in/yaml.v3"
+)
+
+// GenerateSSLCerts generates SSL certificates if missing
+// ASSESS: Check if certificates exist and are valid
+// INTERVENE: Generate new certificates if needed
+// EVALUATE: Verify certificates were created successfully
+func GenerateSSLCerts(rc *eos_io.RuntimeContext) error {
+ logger := otelzap.Ctx(rc.Ctx)
+ logger.Info("Phase 1: SSL Certificate Generation")
+
+ certFile := filepath.Join(MoniCertsDir, "server.crt")
+ keyFile := filepath.Join(MoniCertsDir, "server.key")
+
+ // ASSESS: Check if certificates exist and are valid
+ if fileExists(certFile) && fileExists(keyFile) {
+ // Check if still valid (not expired)
+ ctx, cancel := context.WithTimeout(rc.Ctx, CommandTimeout)
+ defer cancel()
+
+ output, err := execute.Run(ctx, execute.Options{
+ Command: "openssl",
+ Args: []string{"x509", "-in", certFile, "-noout", "-checkend", "0"},
+ Capture: true,
+ })
+
+ if err == nil {
+ logger.Info("SSL certificates already exist and are valid")
+ return nil
+ }
+
+ logger.Warn("Existing certificates are invalid or expired, regenerating",
+ zap.String("reason", output))
+ }
+
+ // INTERVENE: Generate new certificates
+ logger.Info("Generating SSL certificates...")
+
+ // Create certs directory
+ if err := os.MkdirAll(MoniCertsDir, 0755); err != nil {
+ return fmt.Errorf("failed to create certs directory: %w", err)
+ }
+
+ ctx, cancel := context.WithTimeout(rc.Ctx, CommandTimeout)
+ defer cancel()
+
+ // Generate self-signed certificate
+ output, err := execute.Run(ctx, execute.Options{
+ Command: "openssl",
+ Args: []string{
+ "req", "-new", "-x509", "-days", "365",
+ "-nodes", "-text",
+ "-out", certFile,
+ "-keyout", keyFile,
+ "-subj", "/CN=postgres",
+ },
+ Capture: true,
+ })
+
+ if err != nil {
+ return fmt.Errorf("failed to generate SSL certificates: %s: %w", output, err)
+ }
+
+ // Set initial permissions (will be fixed in next phase)
+ if err := os.Chmod(certFile, CertCrtPerms); err != nil {
+ return fmt.Errorf("failed to set certificate permissions: %w", err)
+ }
+
+ if err := os.Chmod(keyFile, TempKeyPerms); err != nil {
+ return fmt.Errorf("failed to set key permissions: %w", err)
+ }
+
+ // EVALUATE: Verify certificates were created
+ if !fileExists(certFile) || !fileExists(keyFile) {
+ return fmt.Errorf("certificates were not created successfully")
+ }
+
+ logger.Info("SSL certificates generated successfully",
+ zap.String("certificate", certFile),
+ zap.String("private_key", keyFile))
+
+ return nil
+}
+
+// DetectPostgresImages detects PostgreSQL images from docker-compose.yml
+func DetectPostgresImages(rc *eos_io.RuntimeContext) ([]PostgresImage, error) {
+ logger := otelzap.Ctx(rc.Ctx)
+ logger.Debug("Detecting PostgreSQL images from docker-compose.yml")
+
+ if !fileExists(MoniDockerCompose) {
+ logger.Warn("docker-compose.yml not found", zap.String("path", MoniDockerCompose))
+ return nil, nil
+ }
+
+ // Read and parse docker-compose.yml
+ data, err := os.ReadFile(MoniDockerCompose)
+ if err != nil {
+ return nil, fmt.Errorf("failed to read docker-compose.yml: %w", err)
+ }
+
+ var compose struct {
+ Services map[string]struct {
+ Image string `yaml:"image"`
+ } `yaml:"services"`
+ }
+
+ if err := yaml.Unmarshal(data, &compose); err != nil {
+ logger.Warn("Failed to parse docker-compose.yml, using grep fallback", zap.Error(err))
+ return grepFallbackDetection(rc)
+ }
+
+ var images []PostgresImage
+
+ for serviceName, service := range compose.Services {
+ image := service.Image
+ imageLower := strings.ToLower(image)
+
+ isPostgres := strings.Contains(imageLower, "postgres") ||
+ strings.Contains(imageLower, "pgvector") ||
+ strings.Contains(imageLower, "postgresql")
+
+ if !isPostgres {
+ continue
+ }
+
+ // Determine expected UID based on image type
+ expectedUID := StandardUID
+ if strings.Contains(imageLower, "alpine") {
+ expectedUID = CertOwnerGID // 70 for Alpine
+ }
+
+ img := PostgresImage{
+ Service: serviceName,
+ Image: image,
+ ExpectedUID: expectedUID,
+ }
+
+ // Try to get actual UID from running container
+ containerName := fmt.Sprintf("bionicgpt-%s", serviceName)
+ if actualUID := checkContainerPostgresUID(rc, containerName); actualUID > 0 {
+ img.ActualUID = actualUID
+ if actualUID != expectedUID {
+ logger.Warn("UID mismatch detected",
+ zap.String("service", serviceName),
+ zap.Int("expected", expectedUID),
+ zap.Int("actual", actualUID))
+ }
+ }
+
+ images = append(images, img)
+ logger.Debug("Found PostgreSQL service",
+ zap.String("service", serviceName),
+ zap.String("image", image),
+ zap.Int("expected_uid", expectedUID))
+ }
+
+ return images, nil
+}
+
+// checkContainerPostgresUID checks the UID of postgres user inside a running container
+func checkContainerPostgresUID(rc *eos_io.RuntimeContext, containerName string) int {
+ ctx, cancel := context.WithTimeout(rc.Ctx, 5*time.Second)
+ defer cancel()
+
+ output, err := execute.Run(ctx, execute.Options{
+ Command: "docker",
+ Args: []string{"exec", containerName, "id", "-u", "postgres"},
+ Capture: true,
+ })
+
+ if err != nil {
+ return 0
+ }
+
+ uid, err := strconv.Atoi(strings.TrimSpace(output))
+ if err != nil {
+ return 0
+ }
+
+ return uid
+}
+
+// grepFallbackDetection uses grep to find postgres references
+func grepFallbackDetection(rc *eos_io.RuntimeContext) ([]PostgresImage, error) {
+ logger := otelzap.Ctx(rc.Ctx)
+ logger.Debug("Using grep fallback for postgres detection")
+
+ ctx, cancel := context.WithTimeout(rc.Ctx, CommandTimeout)
+ defer cancel()
+
+ output, err := execute.Run(ctx, execute.Options{
+ Command: "grep",
+ Args: []string{"-i", "postgres", MoniDockerCompose},
+ Capture: true,
+ })
+
+ if err != nil {
+ return nil, nil
+ }
+
+ if output != "" {
+ logger.Debug("Found postgres references (fallback detection)")
+ }
+
+ return nil, nil
+}
+
+// TestCertReadability tests if postgres user can read the certificate
+// SHIFT-LEFT v2: Test before deploying
+func TestCertReadability(rc *eos_io.RuntimeContext, image string, uid int, certPath string) bool {
+ logger := otelzap.Ctx(rc.Ctx)
+ logger.Debug("Testing certificate readability",
+ zap.String("image", image),
+ zap.Int("uid", uid),
+ zap.String("cert", certPath))
+
+ if certPath == "" {
+ certPath = filepath.Join(MoniCertsDir, "server.key")
+ }
+
+ if !fileExists(certPath) {
+ logger.Error("Certificate not found", zap.String("path", certPath))
+ return false
+ }
+
+ ctx, cancel := context.WithTimeout(rc.Ctx, CommandTimeout)
+ defer cancel()
+
+ absPath, err := filepath.Abs(certPath)
+ if err != nil {
+ logger.Error("Failed to get absolute path", zap.Error(err))
+ return false
+ }
+
+ _, err = execute.Run(ctx, execute.Options{
+ Command: "docker",
+ Args: []string{
+ "run", "--rm",
+ "--user", "postgres",
+ "-v", fmt.Sprintf("%s:/test.key:ro", absPath),
+ image,
+ "cat", "/test.key",
+ },
+ Capture: true,
+ })
+
+ success := err == nil
+ if success {
+ logger.Debug("Certificate is readable", zap.String("image", image))
+ } else {
+ logger.Warn("Certificate is NOT readable", zap.String("image", image))
+ }
+
+ return success
+}
+
+// DetermineCertStrategy determines optimal certificate strategy
+func DetermineCertStrategy(rc *eos_io.RuntimeContext, images []PostgresImage) string {
+ logger := otelzap.Ctx(rc.Ctx)
+
+ if len(images) == 0 {
+ logger.Info("No PostgreSQL images detected - using default strategy")
+ return StrategySingleUID70
+ }
+
+ var alpineImages, standardImages []PostgresImage
+ for _, img := range images {
+ if strings.Contains(strings.ToLower(img.Image), "alpine") {
+ alpineImages = append(alpineImages, img)
+ } else {
+ standardImages = append(standardImages, img)
+ }
+ }
+
+ logger.Info("Determining optimal certificate strategy",
+ zap.Int("alpine_containers", len(alpineImages)),
+ zap.Int("standard_containers", len(standardImages)))
+
+ for _, img := range alpineImages {
+ logger.Info("Alpine container detected",
+ zap.String("service", img.Service),
+ zap.String("image", img.Image))
+ }
+ for _, img := range standardImages {
+ logger.Info("Standard container detected",
+ zap.String("service", img.Service),
+ zap.String("image", img.Image))
+ }
+
+ // If only one type, use simple strategy
+ if len(standardImages) == 0 {
+ logger.Info("Strategy: Single cert with UID 70 (Alpine only)")
+ return StrategySingleUID70
+ }
+
+ if len(alpineImages) == 0 {
+ logger.Info("Strategy: Single cert with UID 999 (standard only)")
+ return StrategySingleUID70
+ }
+
+ // Mixed containers - use separate certs
+ logger.Info("Strategy: Separate certificate directories (mixed)")
+ return StrategySeparateCerts
+}
+
+// FixCertPermissionsImmediate fixes certificate permissions for Alpine containers
+func FixCertPermissionsImmediate(rc *eos_io.RuntimeContext) error {
+ logger := otelzap.Ctx(rc.Ctx)
+ logger.Info("Fixing certificate permissions for Alpine PostgreSQL containers")
+
+ keyFile := filepath.Join(MoniCertsDir, "server.key")
+ certFile := filepath.Join(MoniCertsDir, "server.crt")
+
+ images, err := DetectPostgresImages(rc)
+ if err != nil {
+ return fmt.Errorf("failed to detect postgres images: %w", err)
+ }
+
+ var alpineImages, standardImages []PostgresImage
+ for _, img := range images {
+ if strings.Contains(strings.ToLower(img.Image), "alpine") {
+ alpineImages = append(alpineImages, img)
+ } else {
+ standardImages = append(standardImages, img)
+ }
+ }
+
+ if len(alpineImages) > 0 {
+ logger.Info("Setting permissions for Alpine containers",
+ zap.Int("owner_uid", CertOwnerUID),
+ zap.Int("owner_gid", CertOwnerGID),
+ zap.String("key_perms", fmt.Sprintf("%o", CertKeyPerms)))
+
+ // Set owner:group to 0:70 (root:postgres)
+ if err := runSudo(rc, "chown", "0:70", keyFile); err != nil {
+ return fmt.Errorf("failed to chown key file: %w", err)
+ }
+
+ if err := runSudo(rc, "chown", "0:0", certFile); err != nil {
+ return fmt.Errorf("failed to chown cert file: %w", err)
+ }
+
+ // Set permissions
+ if err := runSudo(rc, "chmod", "640", keyFile); err != nil {
+ return fmt.Errorf("failed to chmod key file: %w", err)
+ }
+
+ if err := runSudo(rc, "chmod", "644", certFile); err != nil {
+ return fmt.Errorf("failed to chmod cert file: %w", err)
+ }
+
+ logger.Info("Permissions updated successfully")
+
+ // Test with Alpine container
+ if len(alpineImages) > 0 {
+ testImage := alpineImages[0].Image
+ if TestCertReadability(rc, testImage, CertOwnerGID, "") {
+ logger.Info("Alpine (UID 70) can read the key")
+ } else {
+ return fmt.Errorf("alpine (UID 70) CANNOT read the key")
+ }
+ }
+
+ return nil
+ }
+
+ // Only standard containers
+ logger.Info("Only standard PostgreSQL containers detected, using 0:999 640 permissions")
+
+ if err := runSudo(rc, "chown", "0:999", keyFile); err != nil {
+ return fmt.Errorf("failed to chown key file: %w", err)
+ }
+
+ if err := runSudo(rc, "chown", "0:0", certFile); err != nil {
+ return fmt.Errorf("failed to chown cert file: %w", err)
+ }
+
+ if err := runSudo(rc, "chmod", "640", keyFile); err != nil {
+ return fmt.Errorf("failed to chmod key file: %w", err)
+ }
+
+ if err := runSudo(rc, "chmod", "644", certFile); err != nil {
+ return fmt.Errorf("failed to chmod cert file: %w", err)
+ }
+
+ logger.Info("Permissions updated successfully")
+ return nil
+}
+
+// ValidateAndFixCertPermissions validates and fixes SSL certificate permissions
+func ValidateAndFixCertPermissions(rc *eos_io.RuntimeContext) error {
+ logger := otelzap.Ctx(rc.Ctx)
+ logger.Info("Phase 2: Certificate Permission Validation & Fix")
+
+ keyFile := filepath.Join(MoniCertsDir, "server.key")
+ if !fileExists(keyFile) {
+ return fmt.Errorf("certificate files don't exist")
+ }
+
+ // Detect PostgreSQL images and determine strategy
+ images, err := DetectPostgresImages(rc)
+ if err != nil {
+ return fmt.Errorf("failed to detect postgres images: %w", err)
+ }
+
+ strategy := DetermineCertStrategy(rc, images)
+
+ // Implement the strategy
+ if strategy == StrategySeparateCerts {
+ logger.Info("Implementing separate certificate directories strategy")
+ if err := createSeparateCertDirs(rc, images); err != nil {
+ return fmt.Errorf("failed to create separate certificate directories: %w", err)
+ }
+ } else {
+ logger.Info("Implementing single certificate strategy")
+ if err := FixCertPermissionsImmediate(rc); err != nil {
+ return fmt.Errorf("failed to fix certificate permissions: %w", err)
+ }
+ }
+
+ logger.Info("Certificate permissions validated and working",
+ zap.String("strategy", strategy))
+
+ return nil
+}
+
+// createSeparateCertDirs creates separate certificate directories for mixed containers
+func createSeparateCertDirs(rc *eos_io.RuntimeContext, images []PostgresImage) error {
+ logger := otelzap.Ctx(rc.Ctx)
+ logger.Info("Creating separate certificate directories")
+
+ sourceKey := filepath.Join(MoniCertsDir, "server.key")
+ sourceCrt := filepath.Join(MoniCertsDir, "server.crt")
+
+ if !fileExists(sourceKey) || !fileExists(sourceCrt) {
+ return fmt.Errorf("source certificates not found in ./certs/")
+ }
+
+ // Create directories
+ if err := os.MkdirAll(MoniCertsAlpineDir, 0755); err != nil {
+ return fmt.Errorf("failed to create alpine certs dir: %w", err)
+ }
+
+ if err := os.MkdirAll(MoniCertsStandardDir, 0755); err != nil {
+ return fmt.Errorf("failed to create standard certs dir: %w", err)
+ }
+
+ logger.Info("Certificate directories created",
+ zap.String("alpine", MoniCertsAlpineDir),
+ zap.String("standard", MoniCertsStandardDir))
+
+ // Copy certificates
+ alpineKey := filepath.Join(MoniCertsAlpineDir, "server.key")
+ alpineCrt := filepath.Join(MoniCertsAlpineDir, "server.crt")
+ standardKey := filepath.Join(MoniCertsStandardDir, "server.key")
+ standardCrt := filepath.Join(MoniCertsStandardDir, "server.crt")
+
+ for _, pair := range []struct{ src, dst string }{
+ {sourceKey, alpineKey},
+ {sourceCrt, alpineCrt},
+ {sourceKey, standardKey},
+ {sourceCrt, standardCrt},
+ } {
+ if err := runSudo(rc, "cp", pair.src, pair.dst); err != nil {
+ return fmt.Errorf("failed to copy %s to %s: %w", pair.src, pair.dst, err)
+ }
+ }
+
+ logger.Info("Certificates copied")
+
+ // Set permissions for Alpine (UID 70)
+ logger.Info("Setting permissions for Alpine containers (UID 70)")
+ if err := runSudo(rc, "chown", "0:70", alpineKey); err != nil {
+ return err
+ }
+ if err := runSudo(rc, "chmod", "640", alpineKey); err != nil {
+ return err
+ }
+ if err := runSudo(rc, "chown", "0:0", alpineCrt); err != nil {
+ return err
+ }
+ if err := runSudo(rc, "chmod", "644", alpineCrt); err != nil {
+ return err
+ }
+
+ // Set permissions for Standard (UID 999)
+ logger.Info("Setting permissions for standard containers (UID 999)")
+ if err := runSudo(rc, "chown", "999:999", standardKey); err != nil {
+ return err
+ }
+ if err := runSudo(rc, "chmod", "600", standardKey); err != nil {
+ return err
+ }
+ if err := runSudo(rc, "chown", "0:0", standardCrt); err != nil {
+ return err
+ }
+ if err := runSudo(rc, "chmod", "644", standardCrt); err != nil {
+ return err
+ }
+
+ // Test readability
+ logger.Info("Testing readability")
+
+ var alpineImages, standardImages []PostgresImage
+ for _, img := range images {
+ if strings.Contains(strings.ToLower(img.Image), "alpine") {
+ alpineImages = append(alpineImages, img)
+ } else {
+ standardImages = append(standardImages, img)
+ }
+ }
+
+ allReadable := true
+
+ if len(alpineImages) > 0 {
+ testImage := alpineImages[0].Image
+ if TestCertReadability(rc, testImage, CertOwnerGID, alpineKey) {
+ logger.Info("Alpine can read certificate", zap.String("cert", alpineKey))
+ } else {
+ logger.Error("Alpine CANNOT read certificate", zap.String("cert", alpineKey))
+ allReadable = false
+ }
+ }
+
+ if len(standardImages) > 0 {
+ testImage := standardImages[0].Image
+ if TestCertReadability(rc, testImage, StandardUID, standardKey) {
+ logger.Info("Standard can read certificate", zap.String("cert", standardKey))
+ } else {
+ logger.Error("Standard CANNOT read certificate", zap.String("cert", standardKey))
+ allReadable = false
+ }
+ }
+
+ if !allReadable {
+ return fmt.Errorf("certificate readability test failed")
+ }
+
+ return nil
+}
+
+// runSudo runs a command with sudo
+func runSudo(rc *eos_io.RuntimeContext, command string, args ...string) error {
+ ctx, cancel := context.WithTimeout(rc.Ctx, CommandTimeout)
+ defer cancel()
+
+ fullArgs := append([]string{command}, args...)
+ _, err := execute.Run(ctx, execute.Options{
+ Command: "sudo",
+ Args: fullArgs,
+ Capture: true,
+ })
+
+ return err
+}
+
+// fileExists checks if a file exists
+func fileExists(path string) bool {
+ _, err := os.Stat(path)
+ return err == nil
+}
diff --git a/pkg/moni/types.go b/pkg/moni/types.go
new file mode 100644
index 00000000..67810780
--- /dev/null
+++ b/pkg/moni/types.go
@@ -0,0 +1,141 @@
+package moni
+
+import "time"
+
+// WorkerConfig contains configuration for the Moni worker
+type WorkerConfig struct {
+ // Phase control
+ SkipSSL bool
+ SkipDatabase bool
+ SkipSecurity bool
+ SkipVerification bool
+
+ // Validation only
+ ValidateCertsOnly bool
+ FixCertsOnly bool
+ VerifyDBOnly bool
+ VerifyRLSOnly bool
+ VerifyCSPOnly bool
+ VerifySecurityOnly bool
+
+ // Cleanup
+ CleanupBackups bool
+
+ // Paths (overridable for testing)
+ WorkDir string
+
+ // Force flag (skip confirmations for RLS breaking changes)
+ Force bool
+}
+
+// PostgresImage represents a detected PostgreSQL container image
+type PostgresImage struct {
+ Service string
+ Image string
+ ExpectedUID int
+ ActualUID int // 0 if not detected
+}
+
+// SetupPhase represents a phase of the setup process
+type SetupPhase struct {
+ Number int
+ Name string
+ Description string
+ StartTime time.Time
+ EndTime time.Time
+ Success bool
+ Errors []string
+ Warnings []string
+}
+
+// HealthCheckResult contains results from health checks
+type HealthCheckResult struct {
+ PostgresSSL bool
+ LiteLLMModels int
+ WebSearchEnabled bool
+ SystemPromptSet bool
+ ContainerHealth map[string]bool
+ Errors []string
+ Warnings []string
+}
+
+// RLSVerificationResult contains RLS verification results
+type RLSVerificationResult struct {
+ RLSEnabled bool
+ TablesWithRLS []string
+ TablesWithoutRLS []string
+ PoliciesFound []RLSPolicy
+ CriticalTablesProtected bool
+ Warnings []string
+ Errors []string
+}
+
+// RLSPolicy represents a Row Level Security policy
+type RLSPolicy struct {
+ Table string
+ PolicyName string
+ Command string
+}
+
+// CSPVerificationResult contains CSP verification results
+type CSPVerificationResult struct {
+ CSPPresent bool
+ CSPHeader string
+ SecurityScore int
+ GoodDirectives []string
+ WeakDirectives []string
+ MissingDirectives []string
+ Warnings []string
+ Errors []string
+}
+
+// DBVerificationResult contains database verification results
+type DBVerificationResult struct {
+ ModelCount int
+ MoniExists bool
+ Models []DBModel
+ Prompts []DBPrompt
+ Errors []string
+ Warnings []string
+}
+
+// DBModel represents a database model record
+type DBModel struct {
+ ID int
+ ModelType string
+ Name string
+ BaseURL string
+ ContextSize int
+ TPMLimit int
+ RPMLimit int
+}
+
+// DBPrompt represents a database prompt record
+type DBPrompt struct {
+ ID int
+ Name string
+ ModelID int
+ ModelName string
+ Description string
+}
+
+// EnvCheckResult contains .env file validation results
+type EnvCheckResult struct {
+ Exists bool
+ Variables map[string]interface{}
+ Warnings []string
+ Errors []string
+}
+
+// SetupResult contains the overall setup result
+type SetupResult struct {
+ Success bool
+ Phases []SetupPhase
+ HealthCheck *HealthCheckResult
+ RLSVerification *RLSVerificationResult
+ CSPVerification *CSPVerificationResult
+ DBVerification *DBVerificationResult
+ StartTime time.Time
+ EndTime time.Time
+ CriticalIssues []string
+}
diff --git a/pkg/moni/verification.go b/pkg/moni/verification.go
new file mode 100644
index 00000000..9f098d3b
--- /dev/null
+++ b/pkg/moni/verification.go
@@ -0,0 +1,647 @@
+package moni
+
+import (
+ "context"
+ "fmt"
+ "os"
+ "strings"
+ "time"
+
+ "github.com/CodeMonkeyCybersecurity/eos/pkg/eos_io"
+ "github.com/CodeMonkeyCybersecurity/eos/pkg/execute"
+ "go.uber.org/zap"
+ "go.uber.org/zap/otelzap"
+)
+
+// WaitForService waits for a service to become ready
+func WaitForService(rc *eos_io.RuntimeContext, name string, checkFunc func() bool, maxWait, checkInterval int) error {
+ logger := otelzap.Ctx(rc.Ctx)
+ logger.Info("Waiting for service", zap.String("service", name))
+
+ elapsed := 0
+ ticker := time.NewTicker(time.Duration(checkInterval) * time.Second)
+ defer ticker.Stop()
+
+ timeout := time.After(time.Duration(maxWait) * time.Second)
+
+ for {
+ select {
+ case <-timeout:
+ return fmt.Errorf("%s did not become ready within %ds", name, maxWait)
+
+ case <-ticker.C:
+ if checkFunc() {
+ logger.Info("Service ready",
+ zap.String("service", name),
+ zap.Int("elapsed_seconds", elapsed))
+ return nil
+ }
+
+ elapsed += checkInterval
+ if elapsed%10 == 0 && elapsed < maxWait {
+ logger.Info("Still waiting for service",
+ zap.String("service", name),
+ zap.Int("elapsed", elapsed))
+ }
+ }
+ }
+}
+
+// CheckPostgres checks if PostgreSQL is ready
+func CheckPostgres(rc *eos_io.RuntimeContext) bool {
+ ctx, cancel := context.WithTimeout(rc.Ctx, 5*time.Second)
+ defer cancel()
+
+ _, err := execute.Run(ctx, execute.Options{
+ Command: "docker",
+ Args: []string{"exec", PostgresContainer, "pg_isready", "-U", DBUser},
+ Capture: true,
+ })
+
+ return err == nil
+}
+
+// CheckLiteLLM checks if LiteLLM is ready
+func CheckLiteLLM(rc *eos_io.RuntimeContext) bool {
+ ctx, cancel := context.WithTimeout(rc.Ctx, 5*time.Second)
+ defer cancel()
+
+ _, err := execute.Run(ctx, execute.Options{
+ Command: "curl",
+ Args: []string{"-sf", LiteLLMURL + "/health/readiness"},
+ Capture: true,
+ })
+
+ return err == nil
+}
+
+// VerifyConfiguration verifies database configuration
+func VerifyConfiguration(rc *eos_io.RuntimeContext) (*DBVerificationResult, error) {
+ logger := otelzap.Ctx(rc.Ctx)
+ logger.Info("Verifying configuration")
+
+ result := &DBVerificationResult{
+ Models: []DBModel{},
+ Prompts: []DBPrompt{},
+ Errors: []string{},
+ Warnings: []string{},
+ }
+
+ // Check models
+ logger.Info("Checking models in database")
+ modelsSQL := "SELECT id, model_type, name, context_size, tpm_limit, rpm_limit FROM models ORDER BY id;"
+
+ // Execute and capture output
+ ctx, cancel := context.WithTimeout(rc.Ctx, CommandTimeout)
+ defer cancel()
+
+ output, err := execute.Run(ctx, execute.Options{
+ Command: "docker",
+ Args: []string{"exec", PostgresContainer, "psql", "-U", DBUser, "-d", DBName, "-c", modelsSQL},
+ Capture: true,
+ })
+
+ if err != nil {
+ result.Errors = append(result.Errors, fmt.Sprintf("Failed to query models: %v", err))
+ } else {
+ logger.Info("Models in database", zap.String("output", output))
+ }
+
+ // Count models
+ modelCount, err := querySingleValue(rc, PostgresContainer, DBUser, DBName, "SELECT COUNT(*) FROM models;")
+ if err != nil {
+ result.Warnings = append(result.Warnings, "Could not count models")
+ } else {
+ fmt.Sscanf(modelCount, "%d", &result.ModelCount)
+ logger.Info("Model count", zap.Int("count", result.ModelCount))
+ }
+
+ // Check prompts
+ logger.Info("Checking default assistants")
+ promptsSQL := `
+SELECT p.id, p.name, p.model_id, m.name as model_name, p.description
+FROM prompts p
+JOIN models m ON p.model_id = m.id
+ORDER BY p.id
+LIMIT 5;
+`
+
+ output, err = execute.Run(ctx, execute.Options{
+ Command: "docker",
+ Args: []string{"exec", PostgresContainer, "psql", "-U", DBUser, "-d", DBName, "-c", promptsSQL},
+ Capture: true,
+ })
+
+ if err != nil {
+ result.Errors = append(result.Errors, fmt.Sprintf("Failed to query prompts: %v", err))
+ } else {
+ logger.Info("Prompts in database", zap.String("output", output))
+ }
+
+ // Verify Moni prompt exists
+ moniCount, err := querySingleValue(rc, PostgresContainer, DBUser, DBName,
+ "SELECT COUNT(*) FROM prompts WHERE name = 'Moni';")
+ if err != nil {
+ result.Warnings = append(result.Warnings, "Could not check Moni prompt")
+ } else {
+ var count int
+ fmt.Sscanf(moniCount, "%d", &count)
+ result.MoniExists = count > 0
+
+ if result.MoniExists {
+ logger.Info("'Moni' assistant is configured")
+ } else {
+ logger.Warn("'Moni' assistant not found in database")
+ result.Warnings = append(result.Warnings, "Moni assistant not found")
+ }
+ }
+
+ return result, nil
+}
+
+// VerifyRowLevelSecurity verifies RLS is properly configured
+func VerifyRowLevelSecurity(rc *eos_io.RuntimeContext) (*RLSVerificationResult, error) {
+ logger := otelzap.Ctx(rc.Ctx)
+ logger.Info("Verifying Row Level Security (RLS)")
+
+ result := &RLSVerificationResult{
+ TablesWithRLS: []string{},
+ TablesWithoutRLS: []string{},
+ PoliciesFound: []RLSPolicy{},
+ Warnings: []string{},
+ Errors: []string{},
+ }
+
+ // Critical tables that MUST have RLS
+ criticalTables := []string{"chats", "documents", "datasets", "models", "prompts", "api_keys"}
+
+ // Step 1: Check which tables have RLS enabled
+ logger.Info("Checking RLS status on tables")
+
+ rlsCheckSQL := `
+SELECT tablename, rowsecurity
+FROM pg_tables
+WHERE schemaname = 'public'
+ORDER BY tablename;
+`
+
+ ctx, cancel := context.WithTimeout(rc.Ctx, 10*time.Second)
+ defer cancel()
+
+ output, err := execute.Run(ctx, execute.Options{
+ Command: "docker",
+ Args: []string{"exec", PostgresContainer, "psql", "-U", DBUser, "-d", DBName, "-t", "-c", rlsCheckSQL},
+ Capture: true,
+ })
+
+ if err != nil {
+ result.Errors = append(result.Errors, fmt.Sprintf("Failed to query RLS status: %v", err))
+ return result, nil
+ }
+
+ // Parse results
+ rlsTables := make(map[string]bool)
+ lines := strings.Split(output, "\n")
+ for _, line := range lines {
+ if !strings.Contains(line, "|") {
+ continue
+ }
+
+ parts := strings.Split(line, "|")
+ if len(parts) < 2 {
+ continue
+ }
+
+ tablename := strings.TrimSpace(parts[0])
+ rowsecurity := strings.TrimSpace(parts[1])
+
+ if tablename == "" {
+ continue
+ }
+
+ hasRLS := rowsecurity == "t" || rowsecurity == "true" || rowsecurity == "on"
+ rlsTables[tablename] = hasRLS
+
+ if hasRLS {
+ result.TablesWithRLS = append(result.TablesWithRLS, tablename)
+ } else {
+ result.TablesWithoutRLS = append(result.TablesWithoutRLS, tablename)
+ }
+ }
+
+ logger.Info("RLS table status",
+ zap.Int("with_rls", len(result.TablesWithRLS)),
+ zap.Int("without_rls", len(result.TablesWithoutRLS)))
+
+ // Step 2: Check critical tables
+ logger.Info("Verifying critical tables have RLS")
+
+ criticalProtected := []string{}
+ criticalUnprotected := []string{}
+
+ for _, table := range criticalTables {
+ if hasRLS, exists := rlsTables[table]; exists {
+ if hasRLS {
+ logger.Info("Critical table protected", zap.String("table", table))
+ criticalProtected = append(criticalProtected, table)
+ } else {
+ logger.Error("Critical table NOT protected (SECURITY RISK!)", zap.String("table", table))
+ criticalUnprotected = append(criticalUnprotected, table)
+ result.Errors = append(result.Errors, fmt.Sprintf("Critical table '%s' does not have RLS enabled", table))
+ }
+ } else {
+ logger.Warn("Critical table not found (may not exist yet)", zap.String("table", table))
+ result.Warnings = append(result.Warnings, fmt.Sprintf("Critical table '%s' not found in database", table))
+ }
+ }
+
+ // Step 3: Check RLS policies
+ logger.Info("Checking RLS policies")
+
+ policiesSQL := `
+SELECT schemaname, tablename, policyname, cmd
+FROM pg_policies
+WHERE schemaname = 'public'
+ORDER BY tablename, policyname;
+`
+
+ output, err = execute.Run(ctx, execute.Options{
+ Command: "docker",
+ Args: []string{"exec", PostgresContainer, "psql", "-U", DBUser, "-d", DBName, "-t", "-c", policiesSQL},
+ Capture: true,
+ })
+
+ if err == nil {
+ policyCount := 0
+ lines := strings.Split(output, "\n")
+ for _, line := range lines {
+ if !strings.Contains(line, "|") || strings.TrimSpace(line) == "" {
+ continue
+ }
+
+ parts := strings.Split(line, "|")
+ if len(parts) >= 4 {
+ policy := RLSPolicy{
+ Table: strings.TrimSpace(parts[1]),
+ PolicyName: strings.TrimSpace(parts[2]),
+ Command: strings.TrimSpace(parts[3]),
+ }
+ result.PoliciesFound = append(result.PoliciesFound, policy)
+ policyCount++
+ }
+ }
+
+ if policyCount > 0 {
+ logger.Info("RLS policies found", zap.Int("count", policyCount))
+ result.RLSEnabled = true
+ } else {
+ logger.Warn("No RLS policies found")
+ result.Warnings = append(result.Warnings, "No RLS policies found - multi-tenancy may rely on application-level checks only")
+ }
+ } else {
+ logger.Warn("Could not query RLS policies")
+ result.Warnings = append(result.Warnings, "Failed to query RLS policies")
+ }
+
+ // Step 4: Check team_id indexes (P1 CRITICAL - Performance)
+ logger.Info("Checking team_id indexes for RLS performance")
+
+ tablesNeedingIndexes := []string{"api_keys", "conversations", "datasets", "documents",
+ "api_key_connections", "audit_trail", "document_pipelines", "integrations",
+ "invitations", "oauth2_connections", "objects", "prompts", "team_users"}
+
+ missingIndexes := []string{}
+
+ for _, table := range tablesNeedingIndexes {
+ indexSQL := fmt.Sprintf(`
+ SELECT COUNT(*) FROM pg_indexes
+ WHERE tablename = '%s' AND indexdef LIKE '%%team_id%%';
+ `, table)
+
+ indexCount, err := querySingleValue(rc, PostgresContainer, DBUser, DBName, indexSQL)
+ if err != nil {
+ logger.Debug("Could not check indexes", zap.String("table", table), zap.Error(err))
+ continue
+ }
+
+ var count int
+ fmt.Sscanf(indexCount, "%d", &count)
+
+ if count == 0 {
+ missingIndexes = append(missingIndexes, table)
+ logger.Warn("Missing team_id index (performance issue)",
+ zap.String("table", table),
+ zap.String("recommendation", fmt.Sprintf("CREATE INDEX idx_%s_team_id ON %s(team_id)", table, table)),
+ zap.String("impact", "Queries will do sequential scans instead of index scans"))
+ } else {
+ logger.Debug("Index exists", zap.String("table", table), zap.Int("count", count))
+ }
+ }
+
+ if len(missingIndexes) > 0 {
+ logger.Warn("RLS Performance Warning",
+ zap.Int("tables_without_indexes", len(missingIndexes)),
+ zap.Strings("tables", missingIndexes))
+ result.Warnings = append(result.Warnings,
+ fmt.Sprintf("%d tables missing team_id indexes (will cause performance issues)", len(missingIndexes)))
+ } else {
+ logger.Info("All critical tables have team_id indexes")
+ }
+
+ // Step 5: Summary
+ result.CriticalTablesProtected = len(criticalUnprotected) == 0 && len(criticalProtected) > 0
+
+ if result.CriticalTablesProtected {
+ logger.Info("Row Level Security: GOOD",
+ zap.Int("critical_protected", len(criticalProtected)),
+ zap.Int("policies_active", len(result.PoliciesFound)))
+ } else if len(criticalUnprotected) > 0 {
+ logger.Error("Row Level Security: CRITICAL ISSUES",
+ zap.Int("unprotected_tables", len(criticalUnprotected)),
+ zap.Strings("tables", criticalUnprotected))
+ logger.Error("This is a SERIOUS security vulnerability! Multi-tenant data isolation is at risk")
+ } else {
+ logger.Warn("Row Level Security: UNKNOWN (database may not be initialized)")
+ }
+
+ return result, nil
+}
+
+// VerifyContentSecurityPolicy verifies CSP headers are properly configured
+func VerifyContentSecurityPolicy(rc *eos_io.RuntimeContext) (*CSPVerificationResult, error) {
+ logger := otelzap.Ctx(rc.Ctx)
+ logger.Info("Verifying Content Security Policy (CSP)")
+
+ result := &CSPVerificationResult{
+ GoodDirectives: []string{},
+ WeakDirectives: []string{},
+ MissingDirectives: []string{},
+ Warnings: []string{},
+ Errors: []string{},
+ }
+
+ // Expected secure CSP directives
+ recommendedDirectives := map[string]string{
+ "default-src": "'self'",
+ "script-src": "'self'",
+ "style-src": "'self' 'unsafe-inline'",
+ "img-src": "'self' data:",
+ "font-src": "'self'",
+ "connect-src": "'self'",
+ "frame-ancestors": "'none'",
+ "base-uri": "'self'",
+ "form-action": "'self'",
+ }
+
+ // Dangerous patterns
+ dangerousPatterns := map[string]string{
+ "'unsafe-eval'": "Allows eval() - major XSS risk",
+ "* 'unsafe-inline' 'unsafe-eval'": "Extremely permissive - defeats CSP purpose",
+ "*": "Wildcard allows any source - too permissive",
+ }
+
+ // Step 1: Check if app is responding
+ logger.Info("Checking for CSP headers")
+
+ ctx, cancel := context.WithTimeout(rc.Ctx, 10*time.Second)
+ defer cancel()
+
+ statusCode, err := execute.Run(ctx, execute.Options{
+ Command: "curl",
+ Args: []string{"-s", "-I", AppURL, "-o", "/dev/null", "-w", "%{http_code}"},
+ Capture: true,
+ })
+
+ if err != nil || statusCode != "200" {
+ logger.Warn("App not responding", zap.String("url", AppURL), zap.String("status", statusCode))
+ result.Warnings = append(result.Warnings, fmt.Sprintf("Could not connect to app at %s", AppURL))
+ result.Errors = append(result.Errors, "App is not accessible - cannot verify CSP")
+ return result, nil
+ }
+
+ // Step 2: Get headers
+ output, err := execute.Run(ctx, execute.Options{
+ Command: "curl",
+ Args: []string{"-s", "-D", "-", AppURL, "-o", "/dev/null"},
+ Capture: true,
+ })
+
+ if err != nil {
+ logger.Warn("Could not fetch headers")
+ result.Errors = append(result.Errors, "Failed to fetch HTTP headers")
+ return result, nil
+ }
+
+ // Step 3: Parse headers for CSP
+ var cspHeader string
+ lines := strings.Split(output, "\n")
+ for _, line := range lines {
+ if strings.Contains(strings.ToLower(line), "content-security-policy:") {
+ parts := strings.SplitN(line, ":", 2)
+ if len(parts) == 2 {
+ cspHeader = strings.TrimSpace(parts[1])
+ result.CSPPresent = true
+ result.CSPHeader = cspHeader
+ break
+ }
+ }
+ }
+
+ // Step 4: Analyze CSP if found
+ if cspHeader != "" {
+ logger.Info("Analyzing CSP configuration")
+
+ // Check for dangerous patterns
+ for pattern, risk := range dangerousPatterns {
+ if strings.Contains(cspHeader, pattern) {
+ logger.Error("DANGEROUS pattern found in CSP",
+ zap.String("pattern", pattern),
+ zap.String("risk", risk))
+ result.WeakDirectives = append(result.WeakDirectives, fmt.Sprintf("%s: %s", pattern, risk))
+ result.SecurityScore -= 30
+ }
+ }
+
+ // Parse CSP directives
+ directives := make(map[string]string)
+ for _, directive := range strings.Split(cspHeader, ";") {
+ directive = strings.TrimSpace(directive)
+ if directive == "" {
+ continue
+ }
+
+ parts := strings.SplitN(directive, " ", 2)
+ if len(parts) >= 1 {
+ key := parts[0]
+ value := ""
+ if len(parts) > 1 {
+ value = parts[1]
+ }
+ directives[key] = value
+ }
+ }
+
+ // Check for good directives
+ for directive, expected := range recommendedDirectives {
+ if actual, exists := directives[directive]; exists {
+ if strings.Contains(actual, expected) || actual == "'self'" {
+ logger.Info("Secure directive", zap.String("directive", directive))
+ result.GoodDirectives = append(result.GoodDirectives, directive)
+ result.SecurityScore += 10
+ } else {
+ logger.Warn("Suboptimal directive",
+ zap.String("directive", directive),
+ zap.String("actual", actual),
+ zap.String("expected", expected))
+ result.Warnings = append(result.Warnings, fmt.Sprintf("%s is not optimally configured", directive))
+ result.SecurityScore += 5
+ }
+ } else {
+ logger.Warn("Missing directive", zap.String("directive", directive))
+ result.MissingDirectives = append(result.MissingDirectives, directive)
+ }
+ }
+
+ // Summary
+ if result.SecurityScore >= 70 {
+ logger.Info("Content Security Policy: STRONG",
+ zap.Int("score", result.SecurityScore),
+ zap.Int("good_directives", len(result.GoodDirectives)),
+ zap.Int("weak_directives", len(result.WeakDirectives)))
+ } else if result.SecurityScore >= 40 {
+ logger.Warn("Content Security Policy: MODERATE",
+ zap.Int("score", result.SecurityScore))
+ logger.Warn("Consider strengthening CSP configuration")
+ } else {
+ logger.Error("Content Security Policy: WEAK",
+ zap.Int("score", result.SecurityScore))
+ logger.Error("CSP provides minimal protection - vulnerable to XSS and injection attacks")
+ }
+ } else {
+ // No CSP found
+ logger.Error("NO Content Security Policy found!")
+ logger.Error("Application is vulnerable to XSS, clickjacking, and data injection attacks")
+ logger.Error("Recommendation: Configure CSP headers in web server")
+
+ result.Errors = append(result.Errors, "No Content-Security-Policy header found")
+ result.SecurityScore = 0
+ }
+
+ return result, nil
+}
+
+// RunFinalHealthCheck runs final health checks
+func RunFinalHealthCheck(rc *eos_io.RuntimeContext) (*HealthCheckResult, error) {
+ logger := otelzap.Ctx(rc.Ctx)
+ logger.Info("Phase 8: Final System Verification")
+
+ result := &HealthCheckResult{
+ ContainerHealth: make(map[string]bool),
+ Errors: []string{},
+ Warnings: []string{},
+ }
+
+ // Check PostgreSQL SSL status
+ logger.Info("Verifying PostgreSQL SSL status")
+
+ sslStatus, err := querySingleValue(rc, PostgresContainer, DBUser, DBName, "SHOW ssl;")
+ if err == nil && strings.Contains(sslStatus, "on") {
+ logger.Info("PostgreSQL SSL enabled")
+ result.PostgresSSL = true
+ } else {
+ logger.Warn("Could not verify SSL status")
+ result.Warnings = append(result.Warnings, "Could not verify PostgreSQL SSL")
+ }
+
+ // Check LiteLLM models endpoint
+ logger.Info("Checking LiteLLM models")
+
+ ctx, cancel := context.WithTimeout(rc.Ctx, 10*time.Second)
+ defer cancel()
+
+ output, err := execute.Run(ctx, execute.Options{
+ Command: "curl",
+ Args: []string{"-sf", LiteLLMURL + "/v1/models"},
+ Capture: true,
+ })
+
+ if err == nil {
+ modelCount := strings.Count(output, `"id"`)
+ if modelCount > 0 {
+ logger.Info("LiteLLM models available", zap.Int("count", modelCount))
+ result.LiteLLMModels = modelCount
+ } else {
+ logger.Warn("Could not parse LiteLLM models response")
+ }
+ } else {
+ logger.Warn("Could not verify LiteLLM models")
+ result.Warnings = append(result.Warnings, "Could not verify LiteLLM models")
+ }
+
+ // Check web search configuration
+ envVars, err := readEnvFile(MoniEnvFile)
+ if err == nil {
+ webSearch := envVars["ENABLE_WEB_SEARCH"]
+ if strings.ToLower(webSearch) == "true" {
+ logger.Warn("WARNING: Web search is ENABLED in .env")
+ logger.Warn("To disable: Set ENABLE_WEB_SEARCH=false in .env")
+ result.WebSearchEnabled = true
+ } else {
+ logger.Info("Web search is disabled")
+ result.WebSearchEnabled = false
+ }
+
+ systemPrompt := envVars["MONI_SYSTEM_PROMPT"]
+ if systemPrompt != "" {
+ logger.Info("System prompt configured in .env")
+ result.SystemPromptSet = true
+ } else {
+ logger.Warn("MONI_SYSTEM_PROMPT not set in .env")
+ logger.Info("Note: Will fall back to prompt.txt if available")
+ result.SystemPromptSet = false
+ }
+ }
+
+ // Check container statuses
+ logger.Info("Container statuses")
+
+ output, err = execute.Run(ctx, execute.Options{
+ Command: "docker",
+ Args: []string{"ps", "--format", "table {{.Names}}\t{{.Status}}"},
+ Capture: true,
+ })
+
+ if err == nil {
+ logger.Info("Container status output", zap.String("output", output))
+ }
+
+ return result, nil
+}
+
+// readEnvFile reads .env file and returns key-value pairs
+func readEnvFile(path string) (map[string]string, error) {
+ data, err := os.ReadFile(path)
+ if err != nil {
+ return nil, err
+ }
+
+ vars := make(map[string]string)
+ lines := strings.Split(string(data), "\n")
+
+ for _, line := range lines {
+ line = strings.TrimSpace(line)
+ if line == "" || strings.HasPrefix(line, "#") {
+ continue
+ }
+
+ parts := strings.SplitN(line, "=", 2)
+ if len(parts) == 2 {
+ key := strings.TrimSpace(parts[0])
+ value := strings.TrimSpace(parts[1])
+ value = strings.Trim(value, `"'`)
+ vars[key] = value
+ }
+ }
+
+ return vars, nil
+}
diff --git a/pkg/moni/worker.go b/pkg/moni/worker.go
new file mode 100644
index 00000000..a5e9fe26
--- /dev/null
+++ b/pkg/moni/worker.go
@@ -0,0 +1,834 @@
+package moni
+
+import (
+ "context"
+ "fmt"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "strings"
+ "time"
+
+ "github.com/CodeMonkeyCybersecurity/eos/pkg/eos_io"
+ "github.com/CodeMonkeyCybersecurity/eos/pkg/execute"
+ "go.uber.org/zap"
+ "go.uber.org/zap/otelzap"
+)
+
+// RunWorker runs the Moni initialization worker
+// This orchestrates the full setup: SSL, database config, security hardening
+func RunWorker(rc *eos_io.RuntimeContext, config *WorkerConfig) (*SetupResult, error) {
+ logger := otelzap.Ctx(rc.Ctx)
+ logger.Info("Moni Consolidated Setup Worker",
+ zap.String("version", "1.0"),
+ zap.String("mode", "Full end-to-end configuration"))
+
+ startTime := time.Now()
+
+ result := &SetupResult{
+ Success: false,
+ Phases: []SetupPhase{},
+ StartTime: startTime,
+ }
+
+ // P1 FIX: Validate working directory (don't use os.Chdir - not thread-safe)
+ workDir := MoniDir
+ if config.WorkDir != "" {
+ workDir = config.WorkDir
+ }
+
+ // Check if working directory exists
+ if !fileExists(workDir) {
+ return nil, fmt.Errorf("working directory does not exist: %s\n"+
+ "This usually means Moni has not been installed yet.\n"+
+ "Fix: Run 'eos create moni' first to install Moni", workDir)
+ }
+
+ // Check if docker-compose.yml exists
+ composeFile := filepath.Join(workDir, "docker-compose.yml")
+ if !fileExists(composeFile) {
+ return nil, fmt.Errorf("docker-compose.yml not found: %s\n"+
+ "This usually means Moni installation is incomplete.\n"+
+ "Fix: Run 'eos create moni' to complete installation", composeFile)
+ }
+
+ // Check if .env file exists (warning only)
+ envFile := filepath.Join(workDir, ".env")
+ if !fileExists(envFile) {
+ logger.Warn(".env file not found",
+ zap.String("path", envFile),
+ zap.String("impact", "Some configuration steps may be skipped"))
+ }
+
+ logger.Info("Working directory validated", zap.String("path", workDir))
+
+ // Pre-flight checks
+ if err := checkPrerequisites(rc); err != nil {
+ return nil, fmt.Errorf("pre-flight checks failed: %w", err)
+ }
+
+ // Handle targeted actions
+ if config.ValidateCertsOnly {
+ return handleValidateCerts(rc)
+ }
+
+ if config.FixCertsOnly {
+ return handleFixCerts(rc)
+ }
+
+ if config.VerifyDBOnly {
+ return handleVerifyDB(rc)
+ }
+
+ if config.VerifyRLSOnly {
+ return handleVerifyRLS(rc)
+ }
+
+ if config.VerifyCSPOnly {
+ return handleVerifyCSP(rc)
+ }
+
+ if config.VerifySecurityOnly {
+ return handleVerifySecurity(rc)
+ }
+
+ if config.CleanupBackups {
+ return handleCleanupBackups(rc)
+ }
+
+ // Run full setup
+ return runFullSetup(rc, config)
+}
+
+// runFullSetup runs the complete setup workflow
+func runFullSetup(rc *eos_io.RuntimeContext, config *WorkerConfig) (*SetupResult, error) {
+ logger := otelzap.Ctx(rc.Ctx)
+
+ result := &SetupResult{
+ Success: false,
+ Phases: []SetupPhase{},
+ StartTime: time.Now(),
+ }
+
+ // Phase 1: SSL Certificates
+ if !config.SkipSSL {
+ phase := runPhase(rc, 1, "SSL Certificate Generation", func() error {
+ return GenerateSSLCerts(rc)
+ })
+ result.Phases = append(result.Phases, phase)
+ if !phase.Success {
+ return result, fmt.Errorf("phase 1 failed: SSL certificate generation")
+ }
+ }
+
+ // Phase 2: Certificate Permissions
+ if !config.SkipSSL {
+ phase := runPhase(rc, 2, "Certificate Permission Validation & Fix", func() error {
+ return ValidateAndFixCertPermissions(rc)
+ })
+ result.Phases = append(result.Phases, phase)
+ if !phase.Success {
+ return result, fmt.Errorf("phase 2 failed: certificate permission validation")
+ }
+ }
+
+ // Phase 3: Environment Configuration
+ phase3 := runPhase(rc, 3, "Environment Configuration", func() error {
+ if err := enableSSLInEnv(rc); err != nil {
+ return err
+ }
+ cleanupOldBackups(rc)
+ return nil
+ })
+ result.Phases = append(result.Phases, phase3)
+ if !phase3.Success {
+ return result, fmt.Errorf("phase 3 failed: environment configuration")
+ }
+
+ // Phase 4: Restart Containers
+ phase4 := runPhase(rc, 4, "Container Restart", func() error {
+ return restartContainers(rc, workDir)
+ })
+ result.Phases = append(result.Phases, phase4)
+ if !phase4.Success {
+ return result, fmt.Errorf("phase 4 failed: container restart")
+ }
+
+ // Check container health
+ if err := checkContainerHealth(rc); err != nil {
+ logger.Error("Container health check failed",
+ zap.Error(err),
+ zap.String("troubleshooting", "Check logs: docker compose logs -f"))
+ return result, fmt.Errorf("container health check failed: %w", err)
+ }
+
+ // Wait for services
+ logger.Info("Waiting for services to be ready")
+
+ if err := WaitForService(rc, "PostgreSQL", func() bool {
+ return CheckPostgres(rc)
+ }, MaxWaitSeconds, CheckIntervalSecs); err != nil {
+ return result, fmt.Errorf("PostgreSQL did not become ready: %w", err)
+ }
+
+ if err := WaitForService(rc, "LiteLLM", func() bool {
+ return CheckLiteLLM(rc)
+ }, MaxWaitSeconds, CheckIntervalSecs); err != nil {
+ return result, fmt.Errorf("LiteLLM did not become ready: %w", err)
+ }
+
+ // Phase 5: Database Configuration
+ if !config.SkipDatabase {
+ phase5 := runPhase(rc, 5, "Database Configuration", func() error {
+ return ConfigureDatabase(rc)
+ })
+ result.Phases = append(result.Phases, phase5)
+ if !phase5.Success {
+ return result, fmt.Errorf("phase 5 failed: database configuration")
+ }
+
+ // Verify configuration
+ dbVerification, err := VerifyConfiguration(rc)
+ if err != nil {
+ logger.Warn("Database verification failed", zap.Error(err))
+ }
+ result.DBVerification = dbVerification
+
+ if dbVerification != nil && !dbVerification.MoniExists {
+ return result, fmt.Errorf("Moni assistant not found after configuration")
+ }
+ }
+
+ // Phase 6: API Key Regeneration
+ phase6 := runPhase(rc, 6, "API Key Regeneration", func() error {
+ return regenerateAPIKeys(rc)
+ })
+ result.Phases = append(result.Phases, phase6)
+ // Note: Don't fail if API key regeneration fails (script might not exist)
+
+ // Phase 7: Security Hardening
+ if !config.SkipSecurity {
+ phase7 := runPhase(rc, 7, "Database Security Hardening", func() error {
+ return ApplyDatabaseSecurity(rc)
+ })
+ result.Phases = append(result.Phases, phase7)
+ if !phase7.Success {
+ logger.Warn("Security hardening had issues but continuing")
+ }
+
+ // Phase 7.5: Row Level Security
+ phase75 := runPhase(rc, 7, "Row Level Security (RLS)", func() error {
+ return EnableRowLevelSecurity(rc, config)
+ })
+ result.Phases = append(result.Phases, phase75)
+ if !phase75.Success {
+ logger.Error("Row Level Security enablement failed")
+ logger.Error("This is a CRITICAL security feature for multi-tenant isolation")
+ logger.Error("Continuing, but system is NOT production-ready")
+ result.CriticalIssues = append(result.CriticalIssues, "RLS enablement failed")
+ }
+ }
+
+ // Phase 8: Security Verification
+ if !config.SkipVerification {
+ logger.Info("Phase 8: Security Verification")
+
+ // Verify RLS
+ rlsVerification, err := VerifyRowLevelSecurity(rc)
+ if err != nil {
+ logger.Warn("RLS verification failed", zap.Error(err))
+ }
+ result.RLSVerification = rlsVerification
+
+ if rlsVerification != nil && len(rlsVerification.Errors) > 0 {
+ logger.Warn("RLS verification found issues")
+ for _, err := range rlsVerification.Errors[:min(3, len(rlsVerification.Errors))] {
+ logger.Warn("RLS issue", zap.String("error", err))
+ }
+ }
+
+ if rlsVerification != nil && !rlsVerification.CriticalTablesProtected {
+ logger.Warn("Critical tables may not be properly protected by RLS")
+ result.CriticalIssues = append(result.CriticalIssues, "Critical tables not protected by RLS")
+ }
+
+ // Verify CSP
+ cspVerification, err := VerifyContentSecurityPolicy(rc)
+ if err != nil {
+ logger.Warn("CSP verification failed", zap.Error(err))
+ }
+ result.CSPVerification = cspVerification
+
+ if cspVerification != nil && !cspVerification.CSPPresent {
+ logger.Warn("No Content Security Policy found")
+ result.CriticalIssues = append(result.CriticalIssues, "No CSP found")
+ } else if cspVerification != nil && cspVerification.SecurityScore < 40 {
+ logger.Warn("CSP security score is low", zap.Int("score", cspVerification.SecurityScore))
+ }
+ }
+
+ // Phase 9: Final Health Check
+ healthCheck, err := RunFinalHealthCheck(rc)
+ if err != nil {
+ logger.Warn("Final health check failed", zap.Error(err))
+ }
+ result.HealthCheck = healthCheck
+
+ // Mark as successful
+ result.Success = true
+ result.EndTime = time.Now()
+
+ // Print summary
+ printSetupSummary(rc, result)
+
+ return result, nil
+}
+
+// runPhase runs a setup phase with error handling
+func runPhase(rc *eos_io.RuntimeContext, number int, name string, fn func() error) SetupPhase {
+ logger := otelzap.Ctx(rc.Ctx)
+ logger.Info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
+ logger.Info(fmt.Sprintf("Phase %d: %s", number, name))
+ logger.Info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
+
+ phase := SetupPhase{
+ Number: number,
+ Name: name,
+ StartTime: time.Now(),
+ Errors: []string{},
+ Warnings: []string{},
+ }
+
+ err := fn()
+ phase.EndTime = time.Now()
+
+ if err != nil {
+ phase.Success = false
+ phase.Errors = append(phase.Errors, err.Error())
+ logger.Error("Phase failed",
+ zap.Int("phase", number),
+ zap.String("name", name),
+ zap.Error(err))
+ } else {
+ phase.Success = true
+ logger.Info("Phase completed",
+ zap.Int("phase", number),
+ zap.String("name", name),
+ zap.Duration("duration", phase.EndTime.Sub(phase.StartTime)))
+ }
+
+ return phase
+}
+
+// checkPrerequisites ensures required tooling is available
+func checkPrerequisites(rc *eos_io.RuntimeContext) error {
+ logger := otelzap.Ctx(rc.Ctx)
+ logger.Info("Phase 0: Pre-flight Checks")
+
+ requirements := map[string]string{
+ "docker": "Docker CLI",
+ "openssl": "OpenSSL (certificate management)",
+ "sudo": "sudo (used for permission adjustments)",
+ "curl": "curl (for API checks)",
+ }
+
+ ok := true
+ for command, description := range requirements {
+ if !commandExists(command) {
+ logger.Error("Missing dependency",
+ zap.String("command", command),
+ zap.String("description", description))
+ ok = false
+ } else {
+ logger.Debug("Dependency available", zap.String("command", command))
+ }
+ }
+
+ // Check Docker daemon
+ if commandExists("docker") {
+ ctx, cancel := context.WithTimeout(rc.Ctx, 5*time.Second)
+ defer cancel()
+
+ _, err := execute.Run(ctx, execute.Options{
+ Command: "docker",
+ Args: []string{"info"},
+ Capture: true,
+ })
+
+ if err != nil {
+ logger.Error("Docker daemon not reachable")
+ ok = false
+ } else {
+ logger.Debug("Docker daemon responding")
+ }
+
+ // Check Docker Compose
+ _, err = execute.Run(ctx, execute.Options{
+ Command: "docker",
+ Args: []string{"compose", "version"},
+ Capture: true,
+ })
+
+ if err != nil {
+ logger.Error("Docker Compose not available")
+ ok = false
+ } else {
+ logger.Debug("Docker Compose available")
+ }
+ }
+
+ // Check required files
+ if !fileExists(MoniEnvFile) {
+ logger.Warn(".env not found", zap.String("path", MoniEnvFile))
+ logger.Warn("Some configuration steps may be skipped")
+ }
+
+ if !fileExists(MoniDockerCompose) {
+ logger.Warn("docker-compose.yml not found", zap.String("path", MoniDockerCompose))
+ }
+
+ if !ok {
+ return fmt.Errorf("pre-flight checks failed")
+ }
+
+ return nil
+}
+
+// enableSSLInEnv updates .env to use SSL connections
+func enableSSLInEnv(rc *eos_io.RuntimeContext) error {
+ logger := otelzap.Ctx(rc.Ctx)
+
+ if !fileExists(MoniEnvFile) {
+ logger.Warn(".env not found")
+ return nil
+ }
+
+ content, err := os.ReadFile(MoniEnvFile)
+ if err != nil {
+ return fmt.Errorf("failed to read .env: %w", err)
+ }
+
+ contentStr := string(content)
+
+ if contains(contentStr, "sslmode=require") {
+ logger.Info("SSL connections already enabled in .env")
+ return nil
+ }
+
+ if !contains(contentStr, "sslmode=disable") {
+ logger.Info("No sslmode found in .env")
+ return nil
+ }
+
+ // Backup
+ backup := filepath.Join(filepath.Dir(MoniEnvFile),
+ fmt.Sprintf(".env.backup.%s", time.Now().Format("20060102_150405")))
+
+ if err := copyFile(MoniEnvFile, backup); err != nil {
+ return fmt.Errorf("failed to backup .env: %w", err)
+ }
+
+ if err := os.Chmod(backup, 0600); err != nil {
+ return fmt.Errorf("failed to set backup permissions: %w", err)
+ }
+
+ // Update
+ newContent := replace(contentStr, "sslmode=disable", "sslmode=require")
+ if err := os.WriteFile(MoniEnvFile, []byte(newContent), 0600); err != nil {
+ return fmt.Errorf("failed to write .env: %w", err)
+ }
+
+ changes := countOccurrences(contentStr, "sslmode=disable")
+ logger.Info("Enabled SSL in connection strings",
+ zap.Int("changes", changes),
+ zap.String("backup", filepath.Base(backup)))
+
+ return nil
+}
+
+// cleanupOldBackups keeps only N most recent backups
+func cleanupOldBackups(rc *eos_io.RuntimeContext) {
+ logger := otelzap.Ctx(rc.Ctx)
+
+ backupPattern := filepath.Join(filepath.Dir(MoniEnvFile), ".env.backup.*")
+ matches, err := filepath.Glob(backupPattern)
+ if err != nil {
+ logger.Warn("Failed to list backups", zap.Error(err))
+ return
+ }
+
+ if len(matches) <= KeepBackups {
+ logger.Debug("Backup cleanup not needed", zap.Int("backups", len(matches)))
+ return
+ }
+
+ // Sort by modification time (newest first)
+ // Note: This is simplified - in production, you'd sort by actual mtime
+ toDelete := matches[KeepBackups:]
+
+ for _, backup := range toDelete {
+ // Try to shred first
+ ctx, cancel := context.WithTimeout(rc.Ctx, 30*time.Second)
+ _, err := execute.Run(ctx, execute.Options{
+ Command: "shred",
+ Args: []string{"-uvz", backup},
+ Capture: true,
+ })
+ cancel()
+
+ if err != nil {
+ // Fallback to regular delete
+ os.Remove(backup)
+ }
+ }
+
+ logger.Info("Deleted old backups", zap.Int("count", len(toDelete)))
+}
+
+// restartContainers restarts Docker containers
+// P1 FIX: Uses WorkDir instead of relying on os.Chdir (thread-safe)
+func restartContainers(rc *eos_io.RuntimeContext, workDir string) error {
+ logger := otelzap.Ctx(rc.Ctx)
+
+ logger.Info("Stopping containers")
+ ctx, cancel := context.WithTimeout(rc.Ctx, 2*time.Minute)
+ defer cancel()
+
+ _, err := execute.Run(ctx, execute.Options{
+ Command: "docker",
+ Args: []string{"compose", "down"},
+ WorkDir: workDir, // P1 FIX: Explicit working directory (thread-safe)
+ Capture: false, // Show output to user
+ })
+
+ if err != nil {
+ return fmt.Errorf("failed to stop containers: %w", err)
+ }
+
+ logger.Info("Starting containers with SSL enabled")
+
+ ctx, cancel = context.WithTimeout(rc.Ctx, 2*time.Minute)
+ defer cancel()
+
+ _, err = execute.Run(ctx, execute.Options{
+ Command: "docker",
+ Args: []string{"compose", "up", "-d"},
+ WorkDir: workDir, // P1 FIX: Explicit working directory (thread-safe)
+ Capture: false, // Show output to user
+ })
+
+ if err != nil {
+ return fmt.Errorf("failed to start containers: %w", err)
+ }
+
+ logger.Info("Waiting for services to initialize",
+ zap.Int("seconds", InitWaitSeconds))
+
+ // Wait for initialization
+ time.Sleep(time.Duration(InitWaitSeconds) * time.Second)
+
+ logger.Info("Container initialization period complete")
+ return nil
+}
+
+// checkContainerHealth checks for unhealthy containers
+func checkContainerHealth(rc *eos_io.RuntimeContext) error {
+ logger := otelzap.Ctx(rc.Ctx)
+ logger.Info("Checking container health")
+
+ ctx, cancel := context.WithTimeout(rc.Ctx, 10*time.Second)
+ defer cancel()
+
+ output, _ := execute.Run(ctx, execute.Options{
+ Command: "docker",
+ Args: []string{"ps", "-a", "--filter", "health=unhealthy", "--format", "{{.Names}}"},
+ Capture: true,
+ })
+
+ unhealthy := []string{}
+ for _, name := range splitLines(output) {
+ if name != "" {
+ unhealthy = append(unhealthy, name)
+ }
+ }
+
+ if len(unhealthy) > 0 {
+ logger.Error("Found unhealthy containers", zap.Int("count", len(unhealthy)))
+
+ for _, container := range unhealthy {
+ logger.Error("Unhealthy container", zap.String("name", container))
+
+ // Get last 10 lines of logs
+ logOutput, _ := execute.Run(ctx, execute.Options{
+ Command: "docker",
+ Args: []string{"logs", "--tail", "10", container},
+ Capture: true,
+ })
+
+ if logOutput != "" {
+ logger.Debug("Last 10 log lines", zap.String("container", container), zap.String("logs", logOutput))
+ }
+ }
+
+ return fmt.Errorf("found %d unhealthy container(s)", len(unhealthy))
+ }
+
+ logger.Info("All containers are healthy")
+ return nil
+}
+
+// regenerateAPIKeys runs the API key regeneration script
+func regenerateAPIKeys(rc *eos_io.RuntimeContext) error {
+ logger := otelzap.Ctx(rc.Ctx)
+
+ if !fileExists(MoniAPIKeysScript) {
+ logger.Warn("API keys script not found - skipping")
+ return nil
+ }
+
+ logger.Info("Regenerating API keys")
+
+ ctx, cancel := context.WithTimeout(rc.Ctx, LongCommandTimeout)
+ defer cancel()
+
+ _, err := execute.Run(ctx, execute.Options{
+ Command: MoniAPIKeysScript,
+ Capture: false, // Show output to user
+ })
+
+ if err != nil {
+ return fmt.Errorf("API key regeneration failed: %w", err)
+ }
+
+ logger.Info("API key regeneration complete")
+ return nil
+}
+
+// printSetupSummary prints the final setup summary
+func printSetupSummary(rc *eos_io.RuntimeContext, result *SetupResult) {
+ logger := otelzap.Ctx(rc.Ctx)
+
+ logger.Info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
+ if result.Success {
+ logger.Info("✅ SETUP COMPLETE")
+ } else {
+ logger.Error("❌ SETUP FAILED")
+ }
+ logger.Info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
+
+ if result.Success {
+ logger.Info("Configuration Summary:")
+ logger.Info("• SSL certificates generated and validated")
+ logger.Info("• Certificate permissions tested (SHIFT-LEFT v2)")
+ logger.Info("• Default assistant: Moni (was: llama3)")
+ logger.Info("• Primary model: Moni (GPT-5-mini) - 16K max tokens")
+ logger.Info("• Fallback model: Moni-4.1 (GPT-4.1-mini)")
+ logger.Info("• Embeddings: nomic-embed-text")
+ logger.Info("• All containers healthy")
+ logger.Info("")
+ logger.Info("Access Moni: http://localhost:8513")
+ logger.Info("Monitor services: docker compose ps")
+ logger.Info("View logs: docker compose logs -f app litellm-proxy")
+
+ if len(result.CriticalIssues) > 0 {
+ logger.Warn("")
+ logger.Warn("⚠️ SECURITY WARNINGS:")
+ for _, issue := range result.CriticalIssues {
+ logger.Warn(fmt.Sprintf("• %s", issue))
+ }
+ }
+ } else {
+ logger.Info("Troubleshooting:")
+ logger.Info("• Check logs: docker compose logs -f")
+ logger.Info("• Validate certs: eos update moni --validate-certs")
+ logger.Info("• Check container health: docker ps -a")
+ }
+
+ duration := result.EndTime.Sub(result.StartTime)
+ logger.Info("Setup duration", zap.Duration("duration", duration))
+}
+
+// Helper functions
+func commandExists(cmd string) bool {
+ _, err := exec.LookPath(cmd)
+ return err == nil
+}
+
+func contains(s, substr string) bool {
+ return strings.Contains(s, substr)
+}
+
+func replace(s, old, new string) string {
+ return strings.ReplaceAll(s, old, new)
+}
+
+func countOccurrences(s, substr string) int {
+ return strings.Count(s, substr)
+}
+
+func splitLines(s string) []string {
+ return strings.Split(strings.TrimSpace(s), "\n")
+}
+
+func copyFile(src, dst string) error {
+ data, err := os.ReadFile(src)
+ if err != nil {
+ return err
+ }
+ return os.WriteFile(dst, data, 0600)
+}
+
+func min(a, b int) int {
+ if a < b {
+ return a
+ }
+ return b
+}
+
+// Targeted action handlers
+
+func handleValidateCerts(rc *eos_io.RuntimeContext) (*SetupResult, error) {
+ logger := otelzap.Ctx(rc.Ctx)
+ logger.Info("Validating SSL certificates")
+
+ images, err := DetectPostgresImages(rc)
+ if err != nil {
+ return nil, err
+ }
+
+ if len(images) == 0 {
+ logger.Warn("No PostgreSQL images detected")
+ return &SetupResult{Success: true}, nil
+ }
+
+ allPassed := true
+ for _, img := range images {
+ if TestCertReadability(rc, img.Image, img.ExpectedUID, "") {
+ logger.Info("Certificate readable",
+ zap.String("service", img.Service),
+ zap.String("image", img.Image))
+ } else {
+ logger.Error("Certificate NOT readable",
+ zap.String("service", img.Service),
+ zap.String("image", img.Image))
+ allPassed = false
+ }
+ }
+
+ return &SetupResult{Success: allPassed}, nil
+}
+
+func handleFixCerts(rc *eos_io.RuntimeContext) (*SetupResult, error) {
+ logger := otelzap.Ctx(rc.Ctx)
+ logger.Info("Fixing certificate permissions")
+
+ if err := FixCertPermissionsImmediate(rc); err != nil {
+ return &SetupResult{Success: false}, err
+ }
+
+ logger.Info("Certificate permissions fixed successfully")
+ return &SetupResult{Success: true}, nil
+}
+
+func handleVerifyDB(rc *eos_io.RuntimeContext) (*SetupResult, error) {
+ logger := otelzap.Ctx(rc.Ctx)
+ logger.Info("Verifying database configuration")
+
+ dbResult, err := VerifyConfiguration(rc)
+ if err != nil {
+ return &SetupResult{Success: false}, err
+ }
+
+ result := &SetupResult{
+ Success: dbResult.MoniExists,
+ DBVerification: dbResult,
+ }
+
+ return result, nil
+}
+
+func handleVerifyRLS(rc *eos_io.RuntimeContext) (*SetupResult, error) {
+ logger := otelzap.Ctx(rc.Ctx)
+ logger.Info("Verifying Row Level Security")
+
+ rlsResult, err := VerifyRowLevelSecurity(rc)
+ if err != nil {
+ return &SetupResult{Success: false}, err
+ }
+
+ result := &SetupResult{
+ Success: rlsResult.CriticalTablesProtected,
+ RLSVerification: rlsResult,
+ }
+
+ return result, nil
+}
+
+func handleVerifyCSP(rc *eos_io.RuntimeContext) (*SetupResult, error) {
+ logger := otelzap.Ctx(rc.Ctx)
+ logger.Info("Verifying Content Security Policy")
+
+ cspResult, err := VerifyContentSecurityPolicy(rc)
+ if err != nil {
+ return &SetupResult{Success: false}, err
+ }
+
+ result := &SetupResult{
+ Success: cspResult.CSPPresent,
+ CSPVerification: cspResult,
+ }
+
+ return result, nil
+}
+
+func handleVerifySecurity(rc *eos_io.RuntimeContext) (*SetupResult, error) {
+ logger := otelzap.Ctx(rc.Ctx)
+ logger.Info("Phase 0: Security Verification")
+
+ // Verify RLS
+ rlsResult, err := VerifyRowLevelSecurity(rc)
+ if err != nil {
+ logger.Warn("RLS verification failed", zap.Error(err))
+ }
+
+ // Verify CSP
+ cspResult, err := VerifyContentSecurityPolicy(rc)
+ if err != nil {
+ logger.Warn("CSP verification failed", zap.Error(err))
+ }
+
+ // Determine overall success
+ rlsGood := rlsResult != nil && rlsResult.CriticalTablesProtected
+ cspGood := cspResult != nil && cspResult.CSPPresent && cspResult.SecurityScore >= 40
+
+ success := rlsGood && cspGood
+
+ if success {
+ logger.Info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
+ logger.Info("✅ SECURITY VERIFICATION PASSED")
+ logger.Info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
+ } else {
+ logger.Warn("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
+ logger.Warn("⚠️ SECURITY VERIFICATION FOUND ISSUES")
+ if !rlsGood {
+ logger.Warn("• Row Level Security needs attention")
+ }
+ if !cspGood {
+ logger.Warn("• Content Security Policy needs attention")
+ }
+ logger.Warn("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
+ }
+
+ result := &SetupResult{
+ Success: success,
+ RLSVerification: rlsResult,
+ CSPVerification: cspResult,
+ }
+
+ return result, nil
+}
+
+func handleCleanupBackups(rc *eos_io.RuntimeContext) (*SetupResult, error) {
+ cleanupOldBackups(rc)
+ return &SetupResult{Success: true}, nil
+}