diff --git a/backend/rag_solution/data_ingestion/docling_processor.py b/backend/rag_solution/data_ingestion/docling_processor.py index 2aca04d8..963d1ef2 100644 --- a/backend/rag_solution/data_ingestion/docling_processor.py +++ b/backend/rag_solution/data_ingestion/docling_processor.py @@ -6,6 +6,7 @@ """ # Standard library imports +import asyncio import logging import os import uuid @@ -119,8 +120,10 @@ async def process(self, file_path: str, document_id: str) -> AsyncIterator[Docum if self.converter is None: raise ImportError("Docling DocumentConverter not available") - # Convert document using Docling - result = self.converter.convert(file_path) + # Convert document using Docling (run in thread pool to avoid blocking event loop) + # Docling's AI models are CPU-intensive and can block the async event loop + logger.debug("Running Docling conversion in thread pool for: %s", file_path) + result = await asyncio.to_thread(self.converter.convert, file_path) # Extract metadata metadata = self._extract_docling_metadata(result.document, file_path) @@ -393,10 +396,16 @@ def _get_page_number(self, item: Any) -> int | None: # Try new API first (page_no), fallback to old API (page) page_no = getattr(item.prov[0], "page_no", None) if page_no is not None: - return int(page_no) + try: + return int(page_no) + except (ValueError, TypeError): + logger.warning("Invalid page_no value: %s", page_no) page = getattr(item.prov[0], "page", None) if page is not None: - return int(page) + try: + return int(page) + except (ValueError, TypeError): + logger.warning("Invalid page value: %s", page) return None def _table_to_text(self, table_data: dict) -> str: diff --git a/backend/rag_solution/router/health_router.py b/backend/rag_solution/router/health_router.py index 52908f47..e855f0b2 100644 --- a/backend/rag_solution/router/health_router.py +++ b/backend/rag_solution/router/health_router.py @@ -152,3 +152,45 @@ def health_check( raise HTTPException(status_code=503, detail=f"System unhealthy. Components: {', '.join(unhealthy_components)}") return {"status": "healthy", "components": components} + + +@router.get( + "/health/ready", + summary="Readiness probe", + description="Lightweight readiness check for Kubernetes readiness probe", + response_model=dict, + responses={ + 200: {"description": "Application is ready to serve traffic"}, + 503: {"description": "Application is not ready"}, + }, +) +def readiness_check(db: Annotated[Session, Depends(get_db)]) -> dict[str, Any]: + """ + Perform a lightweight readiness check for Kubernetes readiness probe. + + This endpoint is optimized for fast response times and checks only + critical dependencies required to serve traffic (database connection). + Unlike /health, it doesn't check external services like vector DB or LLM providers. + + Args: + db: The database session. + + Returns: + dict: Readiness status + + Raises: + HTTPException: If the application is not ready to serve traffic + """ + # Check only critical database connection + datastore_status = check_datastore(db) + + if datastore_status["status"] == "unhealthy": + raise HTTPException( + status_code=503, + detail=f"Application not ready: {datastore_status['message']}" + ) + + return { + "status": "ready", + "message": "Application is ready to serve traffic" + } diff --git a/deployment/README.md b/deployment/README.md new file mode 100644 index 00000000..5edc3b76 --- /dev/null +++ b/deployment/README.md @@ -0,0 +1,329 @@ +# RAG Modulo Deployment + +This directory contains all deployment configurations for RAG Modulo on Kubernetes/OpenShift. + +## Directory Structure + +``` +deployment/ +├── k8s/ # Raw Kubernetes manifests +│ ├── base/ # Base configurations +│ │ ├── namespace.yaml +│ │ ├── configmaps/ # Application configuration +│ │ ├── secrets/ # Secret templates +│ │ ├── storage/ # PersistentVolumeClaims +│ │ ├── statefulsets/ # StatefulSets (PostgreSQL, Milvus, etc.) +│ │ ├── deployments/ # Deployments (Backend, Frontend, MLFlow) +│ │ ├── services/ # Kubernetes Services +│ │ ├── ingress/ # Ingress/Route configurations +│ │ └── hpa/ # HorizontalPodAutoscaler +│ └── overlays/ # Environment-specific overlays +│ ├── dev/ +│ ├── staging/ +│ └── prod/ +├── helm/ # Helm chart +│ └── rag-modulo/ +│ ├── Chart.yaml +│ ├── values.yaml # Default values +│ ├── values-dev.yaml # Development values +│ ├── values-staging.yaml # Staging values +│ ├── values-prod.yaml # Production values +│ └── templates/ # Helm templates +└── scripts/ # Deployment scripts + ├── deploy-k8s.sh # Raw K8s deployment + └── deploy-helm.sh # Helm deployment +``` + +## Quick Start + +### 1. Prerequisites + +- Kubernetes 1.24+ or OpenShift 4.10+ +- kubectl/oc CLI configured +- Helm 3.8+ (for Helm deployments) +- `.env` file with credentials + +### 2. Deploy + +Choose your deployment method: + +**Helm (Recommended):** +```bash +# Development +make helm-install-dev + +# Staging +make helm-install-staging + +# Production +make helm-install-prod +``` + +**Raw Kubernetes:** +```bash +# Development +./deployment/scripts/deploy-k8s.sh dev + +# Production +./deployment/scripts/deploy-k8s.sh prod +``` + +## Deployment Methods + +### Method 1: Helm Chart + +**Pros:** +- Easy upgrades and rollbacks +- Environment-specific configurations +- Template-based customization +- Release management + +**Usage:** +```bash +helm install rag-modulo ./deployment/helm/rag-modulo \ + --namespace rag-modulo \ + --values ./deployment/helm/rag-modulo/values-prod.yaml +``` + +### Method 2: Raw Kubernetes Manifests + +**Pros:** +- Full control over resources +- No Helm dependency +- GitOps-friendly +- Simple debugging + +**Usage:** +```bash +kubectl apply -f deployment/k8s/base/ -R -n rag-modulo +``` + +### Method 3: Deployment Scripts + +**Pros:** +- Automated deployment workflow +- Environment validation +- Consistent deployment process + +**Usage:** +```bash +./deployment/scripts/deploy-helm.sh prod install +``` + +## Configuration + +### Secrets + +Create secrets from `.env` file: + +```bash +kubectl create secret generic rag-modulo-secrets \ + --from-env-file=.env \ + --namespace rag-modulo +``` + +Required secrets: +- Database credentials +- MinIO credentials +- JWT secret +- LLM provider API keys + +### Environment-Specific Values + +**Development (`values-dev.yaml`):** +- Minimal resources +- No autoscaling +- HTTP (no TLS) +- Debug logging + +**Staging (`values-staging.yaml`):** +- Medium resources +- Autoscaling enabled (2-5 replicas) +- TLS enabled +- Info logging + +**Production (`values-prod.yaml`):** +- Full resources +- Autoscaling enabled (3-15 replicas) +- TLS with cert-manager +- Info logging +- High-performance storage + +## Cloud Provider Specific + +### IBM Cloud + +```bash +make ibmcloud-deploy CLUSTER_NAME= +``` + +Storage classes: +- `ibmc-block-gold` (recommended) +- `ibmc-block-silver` +- `ibmc-file-gold` + +### OpenShift + +```bash +make openshift-login OC_TOKEN= OC_SERVER= +make openshift-deploy +``` + +Features: +- Routes instead of Ingress +- Built-in container registry +- Security Context Constraints + +### AWS EKS + +```bash +aws eks update-kubeconfig --name +helm install rag-modulo ./deployment/helm/rag-modulo \ + --set ingress.className=alb +``` + +### Google GKE + +```bash +gcloud container clusters get-credentials +helm install rag-modulo ./deployment/helm/rag-modulo +``` + +### Azure AKS + +```bash +az aks get-credentials --resource-group --name +helm install rag-modulo ./deployment/helm/rag-modulo +``` + +## Components + +### Stateful Services + +- **PostgreSQL**: Metadata database +- **Milvus**: Vector database +- **MinIO**: Object storage +- **etcd**: Milvus coordination + +### Stateless Services + +- **Backend**: FastAPI application (3 replicas) +- **Frontend**: React application (2 replicas) +- **MLFlow**: Model tracking (1 replica) + +### Auto-Scaling + +HorizontalPodAutoscaler configured for: +- Backend: 2-10 replicas (CPU 70%, Memory 80%) +- Frontend: 2-5 replicas (CPU 70%, Memory 80%) + +## Monitoring + +### Metrics Endpoints + +- Backend: `/metrics` (Prometheus format) +- Milvus: `:9091/metrics` + +### Logs + +```bash +# View logs +kubectl logs -f deployment/rag-modulo-backend -n rag-modulo + +# Using Makefile +make k8s-logs-backend +make k8s-logs-frontend +``` + +### Status + +```bash +# Check deployment +kubectl get pods -n rag-modulo +kubectl get svc -n rag-modulo +kubectl get hpa -n rag-modulo + +# Using Makefile +make k8s-status +``` + +## Maintenance + +### Upgrade + +```bash +# Helm upgrade +make helm-upgrade-prod + +# Or manually +helm upgrade rag-modulo ./deployment/helm/rag-modulo \ + --namespace rag-modulo \ + --values ./deployment/helm/rag-modulo/values-prod.yaml +``` + +### Rollback + +```bash +# Helm rollback +helm rollback rag-modulo -n rag-modulo + +# Or to specific revision +helm rollback rag-modulo 2 -n rag-modulo +``` + +### Cleanup + +```bash +# Uninstall Helm release +make helm-uninstall + +# Delete namespace +make k8s-cleanup +``` + +## Troubleshooting + +### Check Pod Status +```bash +kubectl get pods -n rag-modulo +kubectl describe pod -n rag-modulo +``` + +### Check Logs +```bash +kubectl logs -n rag-modulo +kubectl logs -f deployment/rag-modulo-backend -n rag-modulo +``` + +### Check Events +```bash +kubectl get events -n rag-modulo --sort-by='.lastTimestamp' +``` + +### Debug Services +```bash +# Port forward +make k8s-port-forward-backend # localhost:8000 +make k8s-port-forward-frontend # localhost:3000 + +# Open shell +make k8s-shell-backend +``` + +## CI/CD Integration + +GitHub Actions workflows available: +- `.github/workflows/k8s-deploy-production.yml` - Production deployment +- `.github/workflows/k8s-deploy-staging.yml` - Staging deployment + +## Documentation + +- [Kubernetes Deployment Guide](../docs/deployment/kubernetes.md) +- [Quick Start Guide](../docs/deployment/QUICKSTART.md) +- [Helm Chart README](./helm/rag-modulo/README.md) + +## Support + +- Issues: https://github.com/manavgup/rag_modulo/issues +- Discussions: https://github.com/manavgup/rag_modulo/discussions +- Documentation: https://github.com/manavgup/rag_modulo/docs diff --git a/deployment/helm/rag-modulo/.helmignore b/deployment/helm/rag-modulo/.helmignore new file mode 100644 index 00000000..0e8a0eb3 --- /dev/null +++ b/deployment/helm/rag-modulo/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/deployment/helm/rag-modulo/Chart.yaml b/deployment/helm/rag-modulo/Chart.yaml new file mode 100644 index 00000000..d5ffdc05 --- /dev/null +++ b/deployment/helm/rag-modulo/Chart.yaml @@ -0,0 +1,23 @@ +apiVersion: v2 +name: rag-modulo +description: A Helm chart for RAG Modulo - Modular RAG solution with flexible vector database support +type: application +version: 1.0.0 +appVersion: "1.0.0" +keywords: + - rag + - vector-database + - llm + - ai + - machine-learning + - milvus + - openai + - watsonx +home: https://github.com/manavgup/rag_modulo +sources: + - https://github.com/manavgup/rag_modulo +maintainers: + - name: RAG Modulo Team + email: maintainers@rag-modulo.com +dependencies: [] +icon: https://raw.githubusercontent.com/manavgup/rag_modulo/main/docs/assets/logo.png diff --git a/deployment/helm/rag-modulo/README.md b/deployment/helm/rag-modulo/README.md new file mode 100644 index 00000000..c509d7a8 --- /dev/null +++ b/deployment/helm/rag-modulo/README.md @@ -0,0 +1,194 @@ +# RAG Modulo Helm Chart + +This Helm chart deploys RAG Modulo to Kubernetes/OpenShift with production-ready configuration. + +## Prerequisites + +- Kubernetes 1.24+ or OpenShift 4.10+ +- Helm 3.8+ +- kubectl or oc CLI configured +- Persistent Volume provisioner support in the underlying infrastructure +- (Optional) cert-manager for automatic TLS certificate management + +## Installation + +### Quick Install + +```bash +# Install with default values (production) +helm install rag-modulo ./deployment/helm/rag-modulo \ + --namespace rag-modulo \ + --create-namespace + +# Install development environment +helm install rag-modulo ./deployment/helm/rag-modulo \ + --namespace rag-modulo-dev \ + --create-namespace \ + --values ./deployment/helm/rag-modulo/values-dev.yaml + +# Install staging environment +helm install rag-modulo ./deployment/helm/rag-modulo \ + --namespace rag-modulo-staging \ + --create-namespace \ + --values ./deployment/helm/rag-modulo/values-staging.yaml +``` + +### Install with Secrets + +```bash +# Create secrets from .env file +kubectl create secret generic rag-modulo-secrets \ + --from-env-file=.env \ + --namespace rag-modulo + +# Install chart +helm install rag-modulo ./deployment/helm/rag-modulo \ + --namespace rag-modulo \ + --create-namespace +``` + +### Install with Custom Values + +```bash +helm install rag-modulo ./deployment/helm/rag-modulo \ + --namespace rag-modulo \ + --create-namespace \ + --set backend.replicaCount=5 \ + --set ingress.hosts.frontend=myapp.example.com +``` + +## Configuration + +The following table lists the configurable parameters of the RAG Modulo chart and their default values. + +### Global Parameters + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `global.namespace` | Kubernetes namespace | `rag-modulo` | +| `global.environment` | Environment name | `production` | + +### Backend Parameters + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `backend.enabled` | Enable backend deployment | `true` | +| `backend.replicaCount` | Number of backend replicas | `3` | +| `backend.autoscaling.enabled` | Enable HPA | `true` | +| `backend.autoscaling.minReplicas` | Minimum replicas | `2` | +| `backend.autoscaling.maxReplicas` | Maximum replicas | `10` | +| `backend.resources.requests.memory` | Memory request | `2Gi` | +| `backend.resources.requests.cpu` | CPU request | `1000m` | + +### Frontend Parameters + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `frontend.enabled` | Enable frontend deployment | `true` | +| `frontend.replicaCount` | Number of frontend replicas | `2` | +| `frontend.autoscaling.enabled` | Enable HPA | `true` | + +### Database Parameters + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `postgresql.enabled` | Enable PostgreSQL | `true` | +| `postgresql.persistence.size` | PVC size | `50Gi` | +| `milvus.enabled` | Enable Milvus | `true` | +| `milvus.persistence.size` | PVC size | `100Gi` | + +### Ingress Parameters + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `ingress.enabled` | Enable ingress | `true` | +| `ingress.className` | Ingress class | `nginx` | +| `ingress.tls.enabled` | Enable TLS | `true` | +| `ingress.hosts.frontend` | Frontend hostname | `rag-modulo.example.com` | + +## Upgrading + +```bash +# Upgrade to new version +helm upgrade rag-modulo ./deployment/helm/rag-modulo \ + --namespace rag-modulo \ + --values ./deployment/helm/rag-modulo/values-prod.yaml + +# Upgrade with specific values +helm upgrade rag-modulo ./deployment/helm/rag-modulo \ + --namespace rag-modulo \ + --set images.backend.tag=v1.1.0 +``` + +## Uninstalling + +```bash +# Uninstall release +helm uninstall rag-modulo --namespace rag-modulo + +# Delete namespace (optional) +kubectl delete namespace rag-modulo +``` + +## OpenShift Deployment + +For OpenShift deployments: + +```bash +helm install rag-modulo ./deployment/helm/rag-modulo \ + --namespace rag-modulo \ + --create-namespace \ + --set openshift.enabled=true \ + --set openshift.routes.enabled=true \ + --set ingress.enabled=false +``` + +## IBM Cloud Deployment + +For IBM Cloud Kubernetes Service: + +```bash +helm install rag-modulo ./deployment/helm/rag-modulo \ + --namespace rag-modulo \ + --create-namespace \ + --set postgresql.persistence.storageClassName=ibmc-block-gold \ + --set milvus.persistence.storageClassName=ibmc-block-gold +``` + +## Monitoring + +The chart includes Prometheus metrics endpoints on: +- Backend: `http://backend-service:8000/metrics` +- Milvus: `http://milvus-service:9091/metrics` + +## Troubleshooting + +### Check pod status +```bash +kubectl get pods -n rag-modulo +``` + +### View logs +```bash +kubectl logs -f deployment/rag-modulo-backend -n rag-modulo +``` + +### Debug failed pods +```bash +kubectl describe pod -n rag-modulo +``` + +### Access services locally +```bash +# Backend +kubectl port-forward svc/backend-service 8000:8000 -n rag-modulo + +# Frontend +kubectl port-forward svc/frontend-service 8080:8080 -n rag-modulo +``` + +## Support + +For issues and questions, please visit: +- GitHub: https://github.com/manavgup/rag_modulo/issues +- Documentation: https://github.com/manavgup/rag_modulo/docs diff --git a/deployment/helm/rag-modulo/templates/NOTES.txt b/deployment/helm/rag-modulo/templates/NOTES.txt new file mode 100644 index 00000000..2402b725 --- /dev/null +++ b/deployment/helm/rag-modulo/templates/NOTES.txt @@ -0,0 +1,56 @@ +Thank you for installing {{ .Chart.Name }}! + +Your release is named {{ .Release.Name }}. + +To learn more about the release, try: + + $ helm status {{ .Release.Name }} -n {{ .Release.Namespace }} + $ helm get all {{ .Release.Name }} -n {{ .Release.Namespace }} + +RAG Modulo has been deployed to namespace: {{ .Release.Namespace }} + +{{- if .Values.ingress.enabled }} + +Application URLs: + Frontend: https://{{ .Values.ingress.hosts.frontend }} + Backend: https://{{ .Values.ingress.hosts.backend }} + MLFlow: https://{{ .Values.ingress.hosts.mlflow }} + +{{- else }} + +The application is not exposed via Ingress. To access it locally: + + Backend: + $ kubectl port-forward svc/backend-service 8000:8000 -n {{ .Release.Namespace }} + Visit: http://localhost:8000 + + Frontend: + $ kubectl port-forward svc/frontend-service 8080:8080 -n {{ .Release.Namespace }} + Visit: http://localhost:8080 + +{{- end }} + +Check deployment status: + $ kubectl get pods -n {{ .Release.Namespace }} + $ kubectl get svc -n {{ .Release.Namespace }} + +View logs: + Backend: $ kubectl logs -f deployment/rag-modulo-backend -n {{ .Release.Namespace }} + Frontend: $ kubectl logs -f deployment/rag-modulo-frontend -n {{ .Release.Namespace }} + +{{- if .Values.backend.autoscaling.enabled }} + +Auto-scaling is enabled for backend: + Min replicas: {{ .Values.backend.autoscaling.minReplicas }} + Max replicas: {{ .Values.backend.autoscaling.maxReplicas }} + +Check HPA status: + $ kubectl get hpa -n {{ .Release.Namespace }} + +{{- end }} + +For more information, visit: + https://github.com/manavgup/rag_modulo + +⚠️ IMPORTANT: Make sure to configure secrets before use: + $ kubectl create secret generic rag-modulo-secrets --from-env-file=.env -n {{ .Release.Namespace }} diff --git a/deployment/helm/rag-modulo/templates/_helpers.tpl b/deployment/helm/rag-modulo/templates/_helpers.tpl new file mode 100644 index 00000000..97059d3d --- /dev/null +++ b/deployment/helm/rag-modulo/templates/_helpers.tpl @@ -0,0 +1,82 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "rag-modulo.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +*/}} +{{- define "rag-modulo.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "rag-modulo.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "rag-modulo.labels" -}} +helm.sh/chart: {{ include "rag-modulo.chart" . }} +{{ include "rag-modulo.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "rag-modulo.selectorLabels" -}} +app.kubernetes.io/name: {{ include "rag-modulo.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "rag-modulo.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "rag-modulo.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + +{{/* +Return the appropriate apiVersion for HPA +*/}} +{{- define "rag-modulo.hpa.apiVersion" -}} +{{- if .Capabilities.APIVersions.Has "autoscaling/v2" }} +{{- print "autoscaling/v2" }} +{{- else }} +{{- print "autoscaling/v2beta2" }} +{{- end }} +{{- end }} + +{{/* +Create the namespace name +*/}} +{{- define "rag-modulo.namespace" -}} +{{- if .Values.global.namespace }} +{{- .Values.global.namespace }} +{{- else }} +{{- .Release.Namespace }} +{{- end }} +{{- end }} diff --git a/deployment/helm/rag-modulo/templates/backend-deployment.yaml b/deployment/helm/rag-modulo/templates/backend-deployment.yaml new file mode 100644 index 00000000..33fcfc6a --- /dev/null +++ b/deployment/helm/rag-modulo/templates/backend-deployment.yaml @@ -0,0 +1,146 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "rag-modulo.fullname" . }}-backend + namespace: {{ include "rag-modulo.namespace" . }} + labels: + {{- include "rag-modulo.labels" . | nindent 4 }} + app.kubernetes.io/component: backend +spec: + replicas: {{ .Values.backend.replicaCount | default 3 }} + selector: + matchLabels: + {{- include "rag-modulo.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: backend + template: + metadata: + labels: + {{- include "rag-modulo.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: backend + spec: + containers: + - name: backend + image: "{{ .Values.images.backend.repository }}:{{ .Values.images.backend.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.images.backend.pullPolicy | default "IfNotPresent" }} + ports: + - name: http + containerPort: 8000 + protocol: TCP + env: + - name: COLLECTIONDB_HOST + valueFrom: + configMapKeyRef: + name: {{ include "rag-modulo.fullname" . }}-config + key: COLLECTIONDB_HOST + - name: COLLECTIONDB_PORT + valueFrom: + configMapKeyRef: + name: {{ include "rag-modulo.fullname" . }}-config + key: COLLECTIONDB_PORT + - name: COLLECTIONDB_NAME + valueFrom: + configMapKeyRef: + name: {{ include "rag-modulo.fullname" . }}-config + key: COLLECTIONDB_NAME + - name: COLLECTIONDB_USER + valueFrom: + secretKeyRef: + name: {{ .Values.secrets.name | default "rag-modulo-secrets" }} + key: COLLECTIONDB_USER + - name: COLLECTIONDB_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Values.secrets.name | default "rag-modulo-secrets" }} + key: COLLECTIONDB_PASSWORD + - name: MILVUS_HOST + valueFrom: + configMapKeyRef: + name: {{ include "rag-modulo.fullname" . }}-config + key: MILVUS_HOST + - name: MILVUS_PORT + valueFrom: + configMapKeyRef: + name: {{ include "rag-modulo.fullname" . }}-config + key: MILVUS_PORT + - name: MINIO_ENDPOINT + valueFrom: + configMapKeyRef: + name: {{ include "rag-modulo.fullname" . }}-config + key: MINIO_ENDPOINT + - name: MINIO_ROOT_USER + valueFrom: + secretKeyRef: + name: {{ .Values.secrets.name | default "rag-modulo-secrets" }} + key: MINIO_ROOT_USER + - name: MINIO_ROOT_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Values.secrets.name | default "rag-modulo-secrets" }} + key: MINIO_ROOT_PASSWORD + - name: JWT_SECRET_KEY + valueFrom: + secretKeyRef: + name: {{ .Values.secrets.name | default "rag-modulo-secrets" }} + key: JWT_SECRET_KEY + - name: WATSONX_APIKEY + valueFrom: + secretKeyRef: + name: {{ .Values.secrets.name | default "rag-modulo-secrets" }} + key: WATSONX_APIKEY + optional: true + - name: WATSONX_PROJECT_ID + valueFrom: + secretKeyRef: + name: {{ .Values.secrets.name | default "rag-modulo-secrets" }} + key: WATSONX_PROJECT_ID + optional: true + - name: OPENAI_API_KEY + valueFrom: + secretKeyRef: + name: {{ .Values.secrets.name | default "rag-modulo-secrets" }} + key: OPENAI_API_KEY + optional: true + - name: ANTHROPIC_API_KEY + valueFrom: + secretKeyRef: + name: {{ .Values.secrets.name | default "rag-modulo-secrets" }} + key: ANTHROPIC_API_KEY + optional: true + resources: + {{- toYaml .Values.backend.resources | nindent 12 }} + livenessProbe: + httpGet: + path: /api/health + port: http + initialDelaySeconds: 60 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /api/health/ready + port: http + initialDelaySeconds: 30 + periodSeconds: 5 + timeoutSeconds: 3 + failureThreshold: 3 + startupProbe: + httpGet: + path: /api/health + port: http + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 30 + {{- with .Values.backend.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.backend.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.backend.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/deployment/helm/rag-modulo/templates/backend-service.yaml b/deployment/helm/rag-modulo/templates/backend-service.yaml new file mode 100644 index 00000000..7382ff8e --- /dev/null +++ b/deployment/helm/rag-modulo/templates/backend-service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "rag-modulo.fullname" . }}-backend + namespace: {{ include "rag-modulo.namespace" . }} + labels: + {{- include "rag-modulo.labels" . | nindent 4 }} + app.kubernetes.io/component: backend +spec: + type: ClusterIP + ports: + - port: 8000 + targetPort: http + protocol: TCP + name: http + selector: + {{- include "rag-modulo.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: backend diff --git a/deployment/helm/rag-modulo/templates/configmap.yaml b/deployment/helm/rag-modulo/templates/configmap.yaml new file mode 100644 index 00000000..069a8f07 --- /dev/null +++ b/deployment/helm/rag-modulo/templates/configmap.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "rag-modulo.fullname" . }}-config + namespace: {{ include "rag-modulo.namespace" . }} + labels: + {{- include "rag-modulo.labels" . | nindent 4 }} + app.kubernetes.io/component: config +data: + COLLECTIONDB_HOST: "{{ .Values.postgresql.host | default "postgresql" }}" + COLLECTIONDB_PORT: "{{ .Values.postgresql.port | default "5432" }}" + COLLECTIONDB_NAME: "{{ .Values.postgresql.database | default "collectiondb" }}" + MILVUS_HOST: "{{ .Values.milvus.host | default "milvus" }}" + MILVUS_PORT: "{{ .Values.milvus.port | default "19530" }}" + MINIO_ENDPOINT: "{{ .Values.minio.endpoint | default "minio:9000" }}" + MLFLOW_TRACKING_URI: "{{ .Values.mlflow.trackingUri | default "http://mlflow:5000" }}" + LOG_LEVEL: "{{ .Values.config.logLevel | default "INFO" }}" + ENVIRONMENT: "{{ .Values.config.environment | default "production" }}" + SKIP_AUTH: "{{ .Values.config.skipAuth | default "false" }}" + DEVELOPMENT_MODE: "{{ .Values.config.developmentMode | default "false" }}" diff --git a/deployment/helm/rag-modulo/templates/frontend-deployment.yaml b/deployment/helm/rag-modulo/templates/frontend-deployment.yaml new file mode 100644 index 00000000..09c50c00 --- /dev/null +++ b/deployment/helm/rag-modulo/templates/frontend-deployment.yaml @@ -0,0 +1,63 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "rag-modulo.fullname" . }}-frontend + namespace: {{ include "rag-modulo.namespace" . }} + labels: + {{- include "rag-modulo.labels" . | nindent 4 }} + app.kubernetes.io/component: frontend +spec: + replicas: {{ .Values.frontend.replicaCount | default 2 }} + selector: + matchLabels: + {{- include "rag-modulo.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: frontend + template: + metadata: + labels: + {{- include "rag-modulo.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: frontend + spec: + containers: + - name: frontend + image: "{{ .Values.images.frontend.repository }}:{{ .Values.images.frontend.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.images.frontend.pullPolicy | default "IfNotPresent" }} + ports: + - name: http + containerPort: 3000 + protocol: TCP + env: + - name: REACT_APP_API_URL + value: "http://{{ include "rag-modulo.fullname" . }}-backend:8000" + - name: REACT_APP_WS_URL + value: "ws://{{ include "rag-modulo.fullname" . }}-backend:8000" + resources: + {{- toYaml .Values.frontend.resources | nindent 12 }} + livenessProbe: + httpGet: + path: / + port: http + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 + readinessProbe: + httpGet: + path: / + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 3 + failureThreshold: 3 + {{- with .Values.frontend.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.frontend.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.frontend.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/deployment/helm/rag-modulo/templates/frontend-service.yaml b/deployment/helm/rag-modulo/templates/frontend-service.yaml new file mode 100644 index 00000000..f910cc4c --- /dev/null +++ b/deployment/helm/rag-modulo/templates/frontend-service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "rag-modulo.fullname" . }}-frontend + namespace: {{ include "rag-modulo.namespace" . }} + labels: + {{- include "rag-modulo.labels" . | nindent 4 }} + app.kubernetes.io/component: frontend +spec: + type: ClusterIP + ports: + - port: 3000 + targetPort: http + protocol: TCP + name: http + selector: + {{- include "rag-modulo.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: frontend diff --git a/deployment/helm/rag-modulo/templates/hpa.yaml b/deployment/helm/rag-modulo/templates/hpa.yaml new file mode 100644 index 00000000..88a15d91 --- /dev/null +++ b/deployment/helm/rag-modulo/templates/hpa.yaml @@ -0,0 +1,70 @@ +{{- if .Values.backend.autoscaling.enabled }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "rag-modulo.fullname" . }}-backend + namespace: {{ include "rag-modulo.namespace" . }} + labels: + {{- include "rag-modulo.labels" . | nindent 4 }} + app.kubernetes.io/component: backend +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "rag-modulo.fullname" . }}-backend + minReplicas: {{ .Values.backend.autoscaling.minReplicas | default 2 }} + maxReplicas: {{ .Values.backend.autoscaling.maxReplicas | default 10 }} + metrics: + {{- if .Values.backend.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.backend.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.backend.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: {{ .Values.backend.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} + +{{- if .Values.frontend.autoscaling.enabled }} +--- +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "rag-modulo.fullname" . }}-frontend + namespace: {{ include "rag-modulo.namespace" . }} + labels: + {{- include "rag-modulo.labels" . | nindent 4 }} + app.kubernetes.io/component: frontend +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "rag-modulo.fullname" . }}-frontend + minReplicas: {{ .Values.frontend.autoscaling.minReplicas | default 2 }} + maxReplicas: {{ .Values.frontend.autoscaling.maxReplicas | default 5 }} + metrics: + {{- if .Values.frontend.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.frontend.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.frontend.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: {{ .Values.frontend.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} diff --git a/deployment/helm/rag-modulo/templates/ingress.yaml b/deployment/helm/rag-modulo/templates/ingress.yaml new file mode 100644 index 00000000..7ea6091f --- /dev/null +++ b/deployment/helm/rag-modulo/templates/ingress.yaml @@ -0,0 +1,43 @@ +{{- if .Values.ingress.enabled -}} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ include "rag-modulo.fullname" . }} + namespace: {{ include "rag-modulo.namespace" . }} + labels: + {{- include "rag-modulo.labels" . | nindent 4 }} + app.kubernetes.io/component: ingress + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- if .Values.ingress.tls.enabled }} + tls: + - hosts: + - {{ .Values.ingress.hosts.frontend | quote }} + - {{ .Values.ingress.hosts.backend | quote }} + secretName: {{ .Values.ingress.tls.secretName }} + {{- end }} + rules: + - host: {{ .Values.ingress.hosts.frontend | quote }} + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: {{ include "rag-modulo.fullname" . }}-frontend + port: + number: 3000 + - host: {{ .Values.ingress.hosts.backend | quote }} + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: {{ include "rag-modulo.fullname" . }}-backend + port: + number: 8000 +{{- end }} diff --git a/deployment/helm/rag-modulo/templates/namespace.yaml b/deployment/helm/rag-modulo/templates/namespace.yaml new file mode 100644 index 00000000..67486bd5 --- /dev/null +++ b/deployment/helm/rag-modulo/templates/namespace.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: {{ include "rag-modulo.namespace" . }} + labels: + {{- include "rag-modulo.labels" . | nindent 4 }} + app.kubernetes.io/component: namespace diff --git a/deployment/helm/rag-modulo/templates/networkpolicy.yaml b/deployment/helm/rag-modulo/templates/networkpolicy.yaml new file mode 100644 index 00000000..9a4037cc --- /dev/null +++ b/deployment/helm/rag-modulo/templates/networkpolicy.yaml @@ -0,0 +1,122 @@ +{{- if .Values.networkPolicy.enabled }} +--- +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ include "rag-modulo.fullname" . }}-backend-netpol + namespace: {{ include "rag-modulo.namespace" . }} + labels: + {{- include "rag-modulo.labels" . | nindent 4 }} +spec: + podSelector: + matchLabels: + {{- include "rag-modulo.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: backend + policyTypes: + - Ingress + - Egress + ingress: + - from: + - podSelector: + matchLabels: + {{- include "rag-modulo.selectorLabels" . | nindent 12 }} + app.kubernetes.io/component: frontend + ports: + - protocol: TCP + port: 8000 + {{- if .Values.ingress.enabled }} + - from: + - namespaceSelector: + matchLabels: + name: {{ .Values.networkPolicy.ingressNamespace | default "ingress-nginx" }} + ports: + - protocol: TCP + port: 8000 + {{- end }} + egress: + - to: + - namespaceSelector: {} + podSelector: + matchLabels: + k8s-app: kube-dns + ports: + - protocol: UDP + port: 53 + - to: + - podSelector: + matchLabels: + {{- include "rag-modulo.selectorLabels" . | nindent 12 }} + app.kubernetes.io/component: postgres + ports: + - protocol: TCP + port: 5432 + - to: + - podSelector: + matchLabels: + {{- include "rag-modulo.selectorLabels" . | nindent 12 }} + app.kubernetes.io/component: milvus + ports: + - protocol: TCP + port: 19530 + - to: + - podSelector: + matchLabels: + {{- include "rag-modulo.selectorLabels" . | nindent 12 }} + app.kubernetes.io/component: minio + ports: + - protocol: TCP + port: 9000 + - to: + - podSelector: + matchLabels: + {{- include "rag-modulo.selectorLabels" . | nindent 12 }} + app.kubernetes.io/component: mlflow + ports: + - protocol: TCP + port: 5000 + - to: + - namespaceSelector: {} + ports: + - protocol: TCP + port: 443 + - protocol: TCP + port: 80 +--- +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ include "rag-modulo.fullname" . }}-frontend-netpol + namespace: {{ include "rag-modulo.namespace" . }} + labels: + {{- include "rag-modulo.labels" . | nindent 4 }} +spec: + podSelector: + matchLabels: + {{- include "rag-modulo.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: frontend + policyTypes: + - Ingress + - Egress + ingress: + - from: [] + ports: + - protocol: TCP + port: 8080 + egress: + - to: + - namespaceSelector: {} + podSelector: + matchLabels: + k8s-app: kube-dns + ports: + - protocol: UDP + port: 53 + - to: + - podSelector: + matchLabels: + {{- include "rag-modulo.selectorLabels" . | nindent 12 }} + app.kubernetes.io/component: backend + ports: + - protocol: TCP + port: 8000 +{{- end }} diff --git a/deployment/helm/rag-modulo/templates/resourcequota.yaml b/deployment/helm/rag-modulo/templates/resourcequota.yaml new file mode 100644 index 00000000..32a7e636 --- /dev/null +++ b/deployment/helm/rag-modulo/templates/resourcequota.yaml @@ -0,0 +1,72 @@ +{{- if .Values.resourceQuota.enabled }} +apiVersion: v1 +kind: ResourceQuota +metadata: + name: {{ include "rag-modulo.fullname" . }}-quota + namespace: {{ include "rag-modulo.namespace" . }} + labels: + {{- include "rag-modulo.labels" . | nindent 4 }} +spec: + hard: + # Compute resources + requests.cpu: {{ .Values.resourceQuota.hard.requestsCpu | quote }} + requests.memory: {{ .Values.resourceQuota.hard.requestsMemory | quote }} + limits.cpu: {{ .Values.resourceQuota.hard.limitsCpu | quote }} + limits.memory: {{ .Values.resourceQuota.hard.limitsMemory | quote }} + + # Storage resources + requests.storage: {{ .Values.resourceQuota.hard.requestsStorage | quote }} + persistentvolumeclaims: {{ .Values.resourceQuota.hard.persistentvolumeclaims | quote }} + + # Object counts + pods: {{ .Values.resourceQuota.hard.pods | quote }} + services: {{ .Values.resourceQuota.hard.services | quote }} + configmaps: {{ .Values.resourceQuota.hard.configmaps | quote }} + secrets: {{ .Values.resourceQuota.hard.secrets | quote }} + + # LoadBalancer limits + services.loadbalancers: {{ .Values.resourceQuota.hard.servicesLoadbalancers | quote }} +--- +apiVersion: v1 +kind: LimitRange +metadata: + name: {{ include "rag-modulo.fullname" . }}-limits + namespace: {{ include "rag-modulo.namespace" . }} + labels: + {{- include "rag-modulo.labels" . | nindent 4 }} +spec: + limits: + # Pod-level limits + - type: Pod + max: + cpu: {{ .Values.limitRange.pod.max.cpu | quote }} + memory: {{ .Values.limitRange.pod.max.memory | quote }} + min: + cpu: {{ .Values.limitRange.pod.min.cpu | quote }} + memory: {{ .Values.limitRange.pod.min.memory | quote }} + + # Container-level limits + - type: Container + max: + cpu: {{ .Values.limitRange.container.max.cpu | quote }} + memory: {{ .Values.limitRange.container.max.memory | quote }} + min: + cpu: {{ .Values.limitRange.container.min.cpu | quote }} + memory: {{ .Values.limitRange.container.min.memory | quote }} + default: + cpu: {{ .Values.limitRange.container.default.cpu | quote }} + memory: {{ .Values.limitRange.container.default.memory | quote }} + defaultRequest: + cpu: {{ .Values.limitRange.container.defaultRequest.cpu | quote }} + memory: {{ .Values.limitRange.container.defaultRequest.memory | quote }} + maxLimitRequestRatio: + cpu: {{ .Values.limitRange.container.maxLimitRequestRatio.cpu }} + memory: {{ .Values.limitRange.container.maxLimitRequestRatio.memory }} + + # PVC limits + - type: PersistentVolumeClaim + max: + storage: {{ .Values.limitRange.pvc.max.storage | quote }} + min: + storage: {{ .Values.limitRange.pvc.min.storage | quote }} +{{- end }} diff --git a/deployment/helm/rag-modulo/values-dev.yaml b/deployment/helm/rag-modulo/values-dev.yaml new file mode 100644 index 00000000..01d9c10e --- /dev/null +++ b/deployment/helm/rag-modulo/values-dev.yaml @@ -0,0 +1,89 @@ +# Development environment values +# Override default values.yaml for development deployment + +global: + namespace: rag-modulo-dev + environment: development + +backend: + replicaCount: 1 + autoscaling: + enabled: false + resources: + requests: + memory: "1Gi" + cpu: "500m" + limits: + memory: "2Gi" + cpu: "1000m" + +frontend: + replicaCount: 1 + autoscaling: + enabled: false + resources: + requests: + memory: "256Mi" + cpu: "100m" + limits: + memory: "512Mi" + cpu: "250m" + +postgresql: + persistence: + size: 10Gi + resources: + requests: + memory: "256Mi" + cpu: "250m" + limits: + memory: "1Gi" + cpu: "500m" + +milvus: + persistence: + size: 20Gi + resources: + requests: + memory: "1Gi" + cpu: "500m" + limits: + memory: "4Gi" + cpu: "2000m" + +minio: + persistence: + size: 20Gi + resources: + requests: + memory: "256Mi" + cpu: "250m" + limits: + memory: "1Gi" + cpu: "500m" + +etcd: + persistence: + size: 5Gi + resources: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "512Mi" + cpu: "500m" + +ingress: + enabled: true + tls: + enabled: false # Use HTTP for dev + hosts: + frontend: rag-modulo-dev.example.com + backend: api-dev.rag-modulo.example.com + mlflow: mlflow-dev.rag-modulo.example.com + +config: + logLevel: "DEBUG" + environment: "development" + skipAuth: "true" + developmentMode: "true" diff --git a/deployment/helm/rag-modulo/values-prod.yaml b/deployment/helm/rag-modulo/values-prod.yaml new file mode 100644 index 00000000..bf54470f --- /dev/null +++ b/deployment/helm/rag-modulo/values-prod.yaml @@ -0,0 +1,102 @@ +# Production environment values +# Override default values.yaml for production deployment + +global: + namespace: rag-modulo + environment: production + +backend: + replicaCount: 3 + autoscaling: + enabled: true + minReplicas: 3 + maxReplicas: 15 + resources: + requests: + memory: "2Gi" + cpu: "1000m" + limits: + memory: "4Gi" + cpu: "2000m" + +frontend: + replicaCount: 3 + autoscaling: + enabled: true + minReplicas: 3 + maxReplicas: 8 + resources: + requests: + memory: "512Mi" + cpu: "250m" + limits: + memory: "1Gi" + cpu: "500m" + +postgresql: + persistence: + size: 100Gi + # Use high-performance storage class for production + # storageClassName: "fast-ssd" + resources: + requests: + memory: "1Gi" + cpu: "750m" + limits: + memory: "4Gi" + cpu: "3000m" + +milvus: + persistence: + size: 200Gi + # storageClassName: "fast-ssd" + resources: + requests: + memory: "4Gi" + cpu: "2000m" + limits: + memory: "16Gi" + cpu: "8000m" + +minio: + persistence: + size: 200Gi + resources: + requests: + memory: "1Gi" + cpu: "750m" + limits: + memory: "4Gi" + cpu: "2000m" + +etcd: + persistence: + size: 20Gi + resources: + requests: + memory: "512Mi" + cpu: "500m" + limits: + memory: "2Gi" + cpu: "1500m" + +ingress: + enabled: true + tls: + enabled: true + annotations: + cert-manager.io/cluster-issuer: "letsencrypt-prod" + nginx.ingress.kubernetes.io/ssl-redirect: "true" + nginx.ingress.kubernetes.io/force-ssl-redirect: "true" + nginx.ingress.kubernetes.io/proxy-body-size: "100m" + nginx.ingress.kubernetes.io/rate-limit: "100" + hosts: + frontend: rag-modulo.example.com + backend: api.rag-modulo.example.com + mlflow: mlflow.rag-modulo.example.com + +config: + logLevel: "INFO" + environment: "production" + skipAuth: "false" + developmentMode: "false" diff --git a/deployment/helm/rag-modulo/values-staging.yaml b/deployment/helm/rag-modulo/values-staging.yaml new file mode 100644 index 00000000..4a6fb6e6 --- /dev/null +++ b/deployment/helm/rag-modulo/values-staging.yaml @@ -0,0 +1,86 @@ +# Staging environment values +# Override default values.yaml for staging deployment + +global: + namespace: rag-modulo-staging + environment: staging + +backend: + replicaCount: 2 + autoscaling: + enabled: true + minReplicas: 2 + maxReplicas: 5 + resources: + requests: + memory: "1Gi" + cpu: "750m" + limits: + memory: "3Gi" + cpu: "1500m" + +frontend: + replicaCount: 2 + autoscaling: + enabled: true + minReplicas: 2 + maxReplicas: 4 + resources: + requests: + memory: "384Mi" + cpu: "200m" + limits: + memory: "768Mi" + cpu: "400m" + +postgresql: + persistence: + size: 30Gi + resources: + requests: + memory: "384Mi" + cpu: "350m" + limits: + memory: "1Gi" + cpu: "1000m" + +milvus: + persistence: + size: 50Gi + resources: + requests: + memory: "1Gi" + cpu: "750m" + limits: + memory: "6Gi" + cpu: "3000m" + +minio: + persistence: + size: 50Gi + resources: + requests: + memory: "384Mi" + cpu: "350m" + limits: + memory: "1Gi" + cpu: "750m" + +etcd: + persistence: + size: 8Gi + +ingress: + enabled: true + tls: + enabled: true + hosts: + frontend: rag-modulo-staging.example.com + backend: api-staging.rag-modulo.example.com + mlflow: mlflow-staging.rag-modulo.example.com + +config: + logLevel: "INFO" + environment: "staging" + skipAuth: "false" + developmentMode: "false" diff --git a/deployment/helm/rag-modulo/values.yaml b/deployment/helm/rag-modulo/values.yaml new file mode 100644 index 00000000..42c8627a --- /dev/null +++ b/deployment/helm/rag-modulo/values.yaml @@ -0,0 +1,305 @@ +# Default values for rag-modulo +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# Global settings +global: + namespace: rag-modulo + environment: production + +# Image settings +# IMPORTANT: Always use immutable tags (git SHA or semantic version) for production! +# Never use :latest in production as it breaks reproducibility and rollback capability. +# Set via: --set images.backend.tag="$(git rev-parse --short HEAD)" +images: + backend: + repository: ghcr.io/manavgup/rag_modulo/backend + # tag: "v1.0.0" # MUST be set to immutable tag (git SHA or version) + tag: "" # Required: set via --set or CI/CD + pullPolicy: IfNotPresent + frontend: + repository: ghcr.io/manavgup/rag_modulo/frontend + # tag: "v1.0.0" # MUST be set to immutable tag (git SHA or version) + tag: "" # Required: set via --set or CI/CD + pullPolicy: IfNotPresent + mlflow: + repository: ghcr.io/mlflow/mlflow + tag: v2.9.2 + pullPolicy: IfNotPresent + postgres: + repository: pgvector/pgvector + tag: pg16 + pullPolicy: IfNotPresent + milvus: + repository: milvusdb/milvus + tag: v2.3.3 + pullPolicy: IfNotPresent + minio: + repository: minio/minio + tag: RELEASE.2024-01-16T16-07-38Z # Use specific release tag + pullPolicy: IfNotPresent + etcd: + repository: quay.io/coreos/etcd + tag: v3.5.5 + pullPolicy: IfNotPresent + +# Backend configuration +backend: + enabled: true + replicaCount: 3 + autoscaling: + enabled: true + minReplicas: 2 + maxReplicas: 10 + targetCPUUtilizationPercentage: 70 + targetMemoryUtilizationPercentage: 80 + resources: + requests: + memory: "2Gi" + cpu: "1000m" + limits: + memory: "4Gi" + cpu: "2000m" + service: + type: ClusterIP + port: 8000 + +# Frontend configuration +frontend: + enabled: true + replicaCount: 2 + autoscaling: + enabled: true + minReplicas: 2 + maxReplicas: 5 + targetCPUUtilizationPercentage: 70 + targetMemoryUtilizationPercentage: 80 + resources: + requests: + memory: "512Mi" + cpu: "250m" + limits: + memory: "1Gi" + cpu: "500m" + service: + type: ClusterIP + port: 8080 + +# PostgreSQL configuration +postgresql: + enabled: true + replicaCount: 1 + resources: + requests: + memory: "512Mi" + cpu: "500m" + limits: + memory: "2Gi" + cpu: "2000m" + persistence: + enabled: true + size: 50Gi + # storageClassName: "" # Set to your storage class + service: + type: ClusterIP + port: 5432 + +# Milvus configuration +milvus: + enabled: true + replicaCount: 1 + resources: + requests: + memory: "2Gi" + cpu: "1000m" + limits: + memory: "8Gi" + cpu: "4000m" + persistence: + enabled: true + size: 100Gi + service: + type: ClusterIP + grpcPort: 19530 + metricsPort: 9091 + +# MinIO configuration +minio: + enabled: true + replicaCount: 1 + resources: + requests: + memory: "512Mi" + cpu: "500m" + limits: + memory: "2Gi" + cpu: "1000m" + persistence: + enabled: true + size: 100Gi + service: + type: ClusterIP + apiPort: 9000 + consolePort: 9001 + +# etcd configuration +etcd: + enabled: true + replicaCount: 1 + resources: + requests: + memory: "256Mi" + cpu: "250m" + limits: + memory: "1Gi" + cpu: "1000m" + persistence: + enabled: true + size: 10Gi + service: + type: ClusterIP + clientPort: 2379 + peerPort: 2380 + +# MLFlow configuration +mlflow: + enabled: true + replicaCount: 1 + resources: + requests: + memory: "512Mi" + cpu: "250m" + limits: + memory: "1Gi" + cpu: "500m" + service: + type: ClusterIP + port: 5000 + +# Ingress configuration +ingress: + enabled: true + className: nginx # or 'openshift-default' for OpenShift + annotations: + cert-manager.io/cluster-issuer: "letsencrypt-prod" + nginx.ingress.kubernetes.io/ssl-redirect: "true" + nginx.ingress.kubernetes.io/proxy-body-size: "50m" + tls: + enabled: true + secretName: rag-modulo-tls + hosts: + frontend: rag-modulo.example.com + backend: api.rag-modulo.example.com + mlflow: mlflow.rag-modulo.example.com + +# OpenShift-specific configuration +openshift: + enabled: false + routes: + enabled: false + +# Secrets Configuration +# SECURITY CRITICAL: Do NOT set secret values here! +# +# Secrets MUST be provided via one of these secure methods: +# +# 1. Helm --set-string flags (recommended for CI/CD): +# helm install rag-modulo ./helm/rag-modulo \ +# --set-string secrets.collectiondb.user="$POSTGRES_USER" \ +# --set-string secrets.collectiondb.password="$POSTGRES_PASSWORD" \ +# --set-string secrets.jwt.secretKey="$JWT_SECRET" +# +# 2. External secrets management (recommended for production): +# - Use External Secrets Operator with AWS Secrets Manager +# - Use Sealed Secrets (bitnami-labs/sealed-secrets) +# - Use Vault by HashiCorp +# - Use IBM Cloud Secrets Manager +# - Set secrets.externalSecretRef: "my-external-secret" +# +# 3. Kubernetes secret creation (development only): +# kubectl create secret generic rag-modulo-secrets \ +# --from-literal=COLLECTIONDB_USER="postgres" \ +# --from-literal=COLLECTIONDB_PASSWORD="secret123" \ +# -n rag-modulo +# +# Required secret keys (must be provided): +# - COLLECTIONDB_USER +# - COLLECTIONDB_PASSWORD +# - MINIO_ROOT_USER +# - MINIO_ROOT_PASSWORD +# - JWT_SECRET_KEY +# +# Optional secret keys (provider-specific): +# - WATSONX_APIKEY +# - WATSONX_PROJECT_ID +# - OPENAI_API_KEY +# - ANTHROPIC_API_KEY +# +secrets: + # Name of existing Kubernetes secret (leave empty to create from values) + name: "" + # Use external secrets operator (set to true if using external secret management) + externalSecretRef: "" + +# ConfigMap values +config: + vectorDb: "milvus" + logLevel: "INFO" + environment: "production" + skipAuth: "false" + developmentMode: "false" + +# Resource Quotas (prevent cluster resource exhaustion) +resourceQuota: + enabled: true + hard: + # Compute resources + requestsCpu: "20" + requestsMemory: "40Gi" + limitsCpu: "40" + limitsMemory: "80Gi" + # Storage resources + requestsStorage: "500Gi" + persistentvolumeclaims: "10" + # Object counts + pods: "50" + services: "20" + configmaps: "30" + secrets: "30" + servicesLoadbalancers: "3" + +# Limit Ranges (default and maximum resource limits per pod/container) +limitRange: + pod: + max: + cpu: "8" + memory: "16Gi" + min: + cpu: "100m" + memory: "128Mi" + container: + max: + cpu: "4" + memory: "8Gi" + min: + cpu: "50m" + memory: "64Mi" + default: + cpu: "500m" + memory: "512Mi" + defaultRequest: + cpu: "250m" + memory: "256Mi" + maxLimitRequestRatio: + cpu: 4 + memory: 4 + pvc: + max: + storage: "200Gi" + min: + storage: "1Gi" + +# Network Policies (security isolation between components) +networkPolicy: + enabled: true + ingressNamespace: "ingress-nginx" # Namespace where ingress controller runs diff --git a/deployment/k8s/base/configmaps/backend-config.yaml b/deployment/k8s/base/configmaps/backend-config.yaml new file mode 100644 index 00000000..2dba7944 --- /dev/null +++ b/deployment/k8s/base/configmaps/backend-config.yaml @@ -0,0 +1,34 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: backend-config + namespace: rag-modulo + labels: + app: rag-modulo + component: backend +data: + # Database Configuration + COLLECTIONDB_HOST: "postgres-service" + COLLECTIONDB_PORT: "5432" + COLLECTIONDB_NAME: "myappdb" + + # Vector Database Configuration + VECTOR_DB: "milvus" + MILVUS_HOST: "milvus-service" + MILVUS_PORT: "19530" + + # MinIO Configuration + MINIO_ENDPOINT: "minio-service:9000" + MINIO_BUCKET_NAME: "rag-modulo" + + # MLFlow Configuration + MLFLOW_TRACKING_URI: "http://mlflow-service:5000" + + # Application Configuration + LOG_LEVEL: "INFO" + PYTHONUNBUFFERED: "1" + ENVIRONMENT: "production" + + # Feature Flags + SKIP_AUTH: "false" + DEVELOPMENT_MODE: "false" diff --git a/deployment/k8s/base/deployments/backend.yaml b/deployment/k8s/base/deployments/backend.yaml new file mode 100644 index 00000000..21e80b78 --- /dev/null +++ b/deployment/k8s/base/deployments/backend.yaml @@ -0,0 +1,74 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: rag-modulo-backend + namespace: rag-modulo + labels: + app: rag-modulo + component: backend +spec: + replicas: 3 + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + selector: + matchLabels: + app: rag-modulo + component: backend + template: + metadata: + labels: + app: rag-modulo + component: backend + spec: + containers: + - name: backend + # IMPORTANT: Replace with immutable tag (git SHA or version) + # Example: ghcr.io/manavgup/rag_modulo/backend:v1.0.0 + # Or: ghcr.io/manavgup/rag_modulo/backend:$(git rev-parse --short HEAD) + image: ghcr.io/manavgup/rag_modulo/backend:${IMAGE_TAG:-v1.0.0} + imagePullPolicy: IfNotPresent + ports: + - containerPort: 8000 + name: http + envFrom: + - configMapRef: + name: backend-config + - secretRef: + name: rag-modulo-secrets + resources: + requests: + memory: "2Gi" + cpu: "1000m" + limits: + memory: "4Gi" + cpu: "2000m" + livenessProbe: + httpGet: + path: /api/health + port: 8000 + initialDelaySeconds: 60 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /api/health/ready + port: 8000 + initialDelaySeconds: 30 + periodSeconds: 5 + timeoutSeconds: 3 + failureThreshold: 3 + startupProbe: + httpGet: + path: /api/health + port: 8000 + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 30 + # Use imagePullSecrets if using private registry + # imagePullSecrets: + # - name: ghcr-secret diff --git a/deployment/k8s/base/deployments/frontend.yaml b/deployment/k8s/base/deployments/frontend.yaml new file mode 100644 index 00000000..e73ce756 --- /dev/null +++ b/deployment/k8s/base/deployments/frontend.yaml @@ -0,0 +1,56 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: rag-modulo-frontend + namespace: rag-modulo + labels: + app: rag-modulo + component: frontend +spec: + replicas: 2 + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + selector: + matchLabels: + app: rag-modulo + component: frontend + template: + metadata: + labels: + app: rag-modulo + component: frontend + spec: + containers: + - name: frontend + image: ghcr.io/manavgup/rag_modulo/frontend:latest + imagePullPolicy: Always + ports: + - containerPort: 8080 + name: http + env: + - name: BACKEND_URL + value: "http://backend-service:8000" + resources: + requests: + memory: "512Mi" + cpu: "250m" + limits: + memory: "1Gi" + cpu: "500m" + livenessProbe: + httpGet: + path: / + port: 8080 + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 5 + readinessProbe: + httpGet: + path: / + port: 8080 + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 3 diff --git a/deployment/k8s/base/deployments/mlflow.yaml b/deployment/k8s/base/deployments/mlflow.yaml new file mode 100644 index 00000000..71f2477b --- /dev/null +++ b/deployment/k8s/base/deployments/mlflow.yaml @@ -0,0 +1,74 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: mlflow-server + namespace: rag-modulo + labels: + app: rag-modulo + component: mlflow +spec: + replicas: 1 + selector: + matchLabels: + app: rag-modulo + component: mlflow + template: + metadata: + labels: + app: rag-modulo + component: mlflow + spec: + containers: + - name: mlflow + image: ghcr.io/mlflow/mlflow:v2.9.2 + args: + - server + - --host=0.0.0.0 + - --port=5000 + - --backend-store-uri=postgresql://$(COLLECTIONDB_USER):$(COLLECTIONDB_PASSWORD)@postgres-service:5432/mlflow + - --default-artifact-root=s3://mlflow + ports: + - containerPort: 5000 + name: http + env: + - name: COLLECTIONDB_USER + valueFrom: + secretKeyRef: + name: rag-modulo-secrets + key: COLLECTIONDB_USER + - name: COLLECTIONDB_PASSWORD + valueFrom: + secretKeyRef: + name: rag-modulo-secrets + key: COLLECTIONDB_PASSWORD + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: rag-modulo-secrets + key: MINIO_ACCESS_KEY + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: rag-modulo-secrets + key: MINIO_SECRET_KEY + - name: MLFLOW_S3_ENDPOINT_URL + value: "http://minio-service:9000" + resources: + requests: + memory: "512Mi" + cpu: "250m" + limits: + memory: "1Gi" + cpu: "500m" + livenessProbe: + httpGet: + path: /health + port: 5000 + initialDelaySeconds: 30 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /health + port: 5000 + initialDelaySeconds: 15 + periodSeconds: 5 diff --git a/deployment/k8s/base/hpa/backend-hpa.yaml b/deployment/k8s/base/hpa/backend-hpa.yaml new file mode 100644 index 00000000..ba75bfc5 --- /dev/null +++ b/deployment/k8s/base/hpa/backend-hpa.yaml @@ -0,0 +1,51 @@ +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: backend-hpa + namespace: rag-modulo + labels: + app: rag-modulo + component: backend +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: rag-modulo-backend + minReplicas: 2 + maxReplicas: 10 + metrics: + # CPU-based scaling + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 70 + # Memory-based scaling + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: 80 + behavior: + scaleUp: + stabilizationWindowSeconds: 60 + policies: + - type: Percent + value: 50 + periodSeconds: 60 + - type: Pods + value: 2 + periodSeconds: 60 + selectPolicy: Max + scaleDown: + stabilizationWindowSeconds: 300 + policies: + - type: Percent + value: 10 + periodSeconds: 60 + - type: Pods + value: 1 + periodSeconds: 120 + selectPolicy: Min diff --git a/deployment/k8s/base/hpa/frontend-hpa.yaml b/deployment/k8s/base/hpa/frontend-hpa.yaml new file mode 100644 index 00000000..82884e66 --- /dev/null +++ b/deployment/k8s/base/hpa/frontend-hpa.yaml @@ -0,0 +1,43 @@ +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: frontend-hpa + namespace: rag-modulo + labels: + app: rag-modulo + component: frontend +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: rag-modulo-frontend + minReplicas: 2 + maxReplicas: 5 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 70 + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: 80 + behavior: + scaleUp: + stabilizationWindowSeconds: 60 + policies: + - type: Percent + value: 50 + periodSeconds: 60 + selectPolicy: Max + scaleDown: + stabilizationWindowSeconds: 300 + policies: + - type: Percent + value: 25 + periodSeconds: 120 + selectPolicy: Min diff --git a/deployment/k8s/base/ingress/ingress.yaml b/deployment/k8s/base/ingress/ingress.yaml new file mode 100644 index 00000000..276b3232 --- /dev/null +++ b/deployment/k8s/base/ingress/ingress.yaml @@ -0,0 +1,67 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: rag-modulo-ingress + namespace: rag-modulo + labels: + app: rag-modulo + annotations: + # nginx ingress annotations + nginx.ingress.kubernetes.io/ssl-redirect: "true" + nginx.ingress.kubernetes.io/force-ssl-redirect: "true" + nginx.ingress.kubernetes.io/proxy-body-size: "50m" + nginx.ingress.kubernetes.io/proxy-read-timeout: "300" + nginx.ingress.kubernetes.io/proxy-send-timeout: "300" + + # cert-manager annotations for automatic TLS + cert-manager.io/cluster-issuer: "letsencrypt-prod" + + # IBM Cloud annotations (uncomment if using IBM Cloud) + # ingress.bluemix.net/ALB-ID: "private-" + # ingress.bluemix.net/redirect-to-https: "True" + + # OpenShift Route annotations (alternative for OpenShift) + # route.openshift.io/termination: "edge" +spec: + ingressClassName: nginx # or 'openshift-default' for OpenShift + tls: + - hosts: + - rag-modulo.example.com # Replace with your domain + - api.rag-modulo.example.com + secretName: rag-modulo-tls + rules: + # Frontend + - host: rag-modulo.example.com + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: frontend-service + port: + number: 8080 + + # Backend API + - host: api.rag-modulo.example.com + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: backend-service + port: + number: 8000 + + # MLFlow (optional - can restrict access) + - host: mlflow.rag-modulo.example.com + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: mlflow-service + port: + number: 5000 diff --git a/deployment/k8s/base/ingress/openshift-routes.yaml b/deployment/k8s/base/ingress/openshift-routes.yaml new file mode 100644 index 00000000..f484e813 --- /dev/null +++ b/deployment/k8s/base/ingress/openshift-routes.yaml @@ -0,0 +1,68 @@ +# OpenShift Routes (alternative to Ingress for OpenShift deployments) +# Use these instead of ingress.yaml when deploying to OpenShift + +--- +apiVersion: route.openshift.io/v1 +kind: Route +metadata: + name: rag-modulo-frontend + namespace: rag-modulo + labels: + app: rag-modulo + component: frontend +spec: + host: rag-modulo.apps.your-cluster.com # Replace with your OpenShift cluster domain + to: + kind: Service + name: frontend-service + weight: 100 + port: + targetPort: http + tls: + termination: edge + insecureEdgeTerminationPolicy: Redirect + wildcardPolicy: None + +--- +apiVersion: route.openshift.io/v1 +kind: Route +metadata: + name: rag-modulo-backend + namespace: rag-modulo + labels: + app: rag-modulo + component: backend +spec: + host: api.rag-modulo.apps.your-cluster.com + to: + kind: Service + name: backend-service + weight: 100 + port: + targetPort: http + tls: + termination: edge + insecureEdgeTerminationPolicy: Redirect + wildcardPolicy: None + +--- +apiVersion: route.openshift.io/v1 +kind: Route +metadata: + name: rag-modulo-mlflow + namespace: rag-modulo + labels: + app: rag-modulo + component: mlflow +spec: + host: mlflow.rag-modulo.apps.your-cluster.com + to: + kind: Service + name: mlflow-service + weight: 100 + port: + targetPort: http + tls: + termination: edge + insecureEdgeTerminationPolicy: Redirect + wildcardPolicy: None diff --git a/deployment/k8s/base/namespace.yaml b/deployment/k8s/base/namespace.yaml new file mode 100644 index 00000000..f83482e5 --- /dev/null +++ b/deployment/k8s/base/namespace.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: rag-modulo + labels: + name: rag-modulo + environment: production + app: rag-modulo diff --git a/deployment/k8s/base/networkpolicy/backend-netpol.yaml b/deployment/k8s/base/networkpolicy/backend-netpol.yaml new file mode 100644 index 00000000..f022f82c --- /dev/null +++ b/deployment/k8s/base/networkpolicy/backend-netpol.yaml @@ -0,0 +1,85 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: rag-modulo-backend-netpol + namespace: rag-modulo +spec: + podSelector: + matchLabels: + app: rag-modulo + component: backend + policyTypes: + - Ingress + - Egress + ingress: + # Allow traffic from frontend + - from: + - podSelector: + matchLabels: + app: rag-modulo + component: frontend + ports: + - protocol: TCP + port: 8000 + # Allow traffic from ingress controller + - from: + - namespaceSelector: + matchLabels: + name: ingress-nginx + ports: + - protocol: TCP + port: 8000 + egress: + # Allow DNS resolution + - to: + - namespaceSelector: {} + podSelector: + matchLabels: + k8s-app: kube-dns + ports: + - protocol: UDP + port: 53 + # Allow access to PostgreSQL + - to: + - podSelector: + matchLabels: + app: rag-modulo + component: postgres + ports: + - protocol: TCP + port: 5432 + # Allow access to Milvus + - to: + - podSelector: + matchLabels: + app: rag-modulo + component: milvus + ports: + - protocol: TCP + port: 19530 + # Allow access to MinIO + - to: + - podSelector: + matchLabels: + app: rag-modulo + component: minio + ports: + - protocol: TCP + port: 9000 + # Allow access to MLFlow + - to: + - podSelector: + matchLabels: + app: rag-modulo + component: mlflow + ports: + - protocol: TCP + port: 5000 + # Allow HTTPS egress for LLM APIs (WatsonX, OpenAI, Anthropic) + - to: + - namespaceSelector: {} + ports: + - protocol: TCP + port: 443 + - protocol: TCP + port: 80 diff --git a/deployment/k8s/base/networkpolicy/database-netpol.yaml b/deployment/k8s/base/networkpolicy/database-netpol.yaml new file mode 100644 index 00000000..3aa6f572 --- /dev/null +++ b/deployment/k8s/base/networkpolicy/database-netpol.yaml @@ -0,0 +1,256 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: rag-modulo-postgres-netpol + namespace: rag-modulo +spec: + podSelector: + matchLabels: + app: rag-modulo + component: postgres + policyTypes: + - Ingress + - Egress + ingress: + # Only allow backend to access PostgreSQL + - from: + - podSelector: + matchLabels: + app: rag-modulo + component: backend + ports: + - protocol: TCP + port: 5432 + # Allow MLFlow to access PostgreSQL + - from: + - podSelector: + matchLabels: + app: rag-modulo + component: mlflow + ports: + - protocol: TCP + port: 5432 + egress: + # Allow DNS resolution only + - to: + - namespaceSelector: {} + podSelector: + matchLabels: + k8s-app: kube-dns + ports: + - protocol: UDP + port: 53 +--- +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: rag-modulo-milvus-netpol + namespace: rag-modulo +spec: + podSelector: + matchLabels: + app: rag-modulo + component: milvus + policyTypes: + - Ingress + - Egress + ingress: + # Only allow backend to access Milvus + - from: + - podSelector: + matchLabels: + app: rag-modulo + component: backend + ports: + - protocol: TCP + port: 19530 + - protocol: TCP + port: 9091 + egress: + # Allow DNS resolution + - to: + - namespaceSelector: {} + podSelector: + matchLabels: + k8s-app: kube-dns + ports: + - protocol: UDP + port: 53 + # Allow access to etcd + - to: + - podSelector: + matchLabels: + app: rag-modulo + component: etcd + ports: + - protocol: TCP + port: 2379 + # Allow access to MinIO + - to: + - podSelector: + matchLabels: + app: rag-modulo + component: minio + ports: + - protocol: TCP + port: 9000 +--- +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: rag-modulo-minio-netpol + namespace: rag-modulo +spec: + podSelector: + matchLabels: + app: rag-modulo + component: minio + policyTypes: + - Ingress + - Egress + ingress: + # Allow backend and Milvus to access MinIO + - from: + - podSelector: + matchLabels: + app: rag-modulo + component: backend + ports: + - protocol: TCP + port: 9000 + - protocol: TCP + port: 9001 + - from: + - podSelector: + matchLabels: + app: rag-modulo + component: milvus + ports: + - protocol: TCP + port: 9000 + # Allow MLFlow to access MinIO + - from: + - podSelector: + matchLabels: + app: rag-modulo + component: mlflow + ports: + - protocol: TCP + port: 9000 + egress: + # Allow DNS resolution only + - to: + - namespaceSelector: {} + podSelector: + matchLabels: + k8s-app: kube-dns + ports: + - protocol: UDP + port: 53 +--- +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: rag-modulo-etcd-netpol + namespace: rag-modulo +spec: + podSelector: + matchLabels: + app: rag-modulo + component: etcd + policyTypes: + - Ingress + - Egress + ingress: + # Only allow Milvus to access etcd + - from: + - podSelector: + matchLabels: + app: rag-modulo + component: milvus + ports: + - protocol: TCP + port: 2379 + - protocol: TCP + port: 2380 + egress: + # Allow DNS resolution + - to: + - namespaceSelector: {} + podSelector: + matchLabels: + k8s-app: kube-dns + ports: + - protocol: UDP + port: 53 + # Allow etcd cluster communication + - to: + - podSelector: + matchLabels: + app: rag-modulo + component: etcd + ports: + - protocol: TCP + port: 2379 + - protocol: TCP + port: 2380 +--- +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: rag-modulo-mlflow-netpol + namespace: rag-modulo +spec: + podSelector: + matchLabels: + app: rag-modulo + component: mlflow + policyTypes: + - Ingress + - Egress + ingress: + # Allow backend to access MLFlow + - from: + - podSelector: + matchLabels: + app: rag-modulo + component: backend + ports: + - protocol: TCP + port: 5000 + # Allow ingress for web UI + - from: + - namespaceSelector: + matchLabels: + name: ingress-nginx + ports: + - protocol: TCP + port: 5000 + egress: + # Allow DNS resolution + - to: + - namespaceSelector: {} + podSelector: + matchLabels: + k8s-app: kube-dns + ports: + - protocol: UDP + port: 53 + # Allow access to PostgreSQL + - to: + - podSelector: + matchLabels: + app: rag-modulo + component: postgres + ports: + - protocol: TCP + port: 5432 + # Allow access to MinIO + - to: + - podSelector: + matchLabels: + app: rag-modulo + component: minio + ports: + - protocol: TCP + port: 9000 diff --git a/deployment/k8s/base/networkpolicy/frontend-netpol.yaml b/deployment/k8s/base/networkpolicy/frontend-netpol.yaml new file mode 100644 index 00000000..7528436f --- /dev/null +++ b/deployment/k8s/base/networkpolicy/frontend-netpol.yaml @@ -0,0 +1,46 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: rag-modulo-frontend-netpol + namespace: rag-modulo +spec: + podSelector: + matchLabels: + app: rag-modulo + component: frontend + policyTypes: + - Ingress + - Egress + ingress: + # Allow traffic from ingress controller + - from: + - namespaceSelector: + matchLabels: + name: ingress-nginx + ports: + - protocol: TCP + port: 8080 + # Allow traffic from anywhere (public frontend) + - from: [] + ports: + - protocol: TCP + port: 8080 + egress: + # Allow DNS resolution + - to: + - namespaceSelector: {} + podSelector: + matchLabels: + k8s-app: kube-dns + ports: + - protocol: UDP + port: 53 + # Allow access to backend API + - to: + - podSelector: + matchLabels: + app: rag-modulo + component: backend + ports: + - protocol: TCP + port: 8000 diff --git a/deployment/k8s/base/resourcequota/namespace-quota.yaml b/deployment/k8s/base/resourcequota/namespace-quota.yaml new file mode 100644 index 00000000..b934b30e --- /dev/null +++ b/deployment/k8s/base/resourcequota/namespace-quota.yaml @@ -0,0 +1,66 @@ +apiVersion: v1 +kind: ResourceQuota +metadata: + name: rag-modulo-quota + namespace: rag-modulo +spec: + hard: + # Compute resources + requests.cpu: "20" + requests.memory: 40Gi + limits.cpu: "40" + limits.memory: 80Gi + + # Storage resources + requests.storage: 500Gi + persistentvolumeclaims: "10" + + # Object counts (prevent resource exhaustion) + pods: "50" + services: "20" + configmaps: "30" + secrets: "30" + + # Prevent LoadBalancer proliferation + services.loadbalancers: "3" +--- +apiVersion: v1 +kind: LimitRange +metadata: + name: rag-modulo-limits + namespace: rag-modulo +spec: + limits: + # Pod-level limits + - type: Pod + max: + cpu: "8" + memory: 16Gi + min: + cpu: "100m" + memory: 128Mi + + # Container-level limits + - type: Container + max: + cpu: "4" + memory: 8Gi + min: + cpu: "50m" + memory: 64Mi + default: + cpu: "500m" + memory: 512Mi + defaultRequest: + cpu: "250m" + memory: 256Mi + maxLimitRequestRatio: + cpu: "4" + memory: "4" + + # PVC limits + - type: PersistentVolumeClaim + max: + storage: 200Gi + min: + storage: 1Gi diff --git a/deployment/k8s/base/secrets/secrets-template.yaml b/deployment/k8s/base/secrets/secrets-template.yaml new file mode 100644 index 00000000..7225d272 --- /dev/null +++ b/deployment/k8s/base/secrets/secrets-template.yaml @@ -0,0 +1,38 @@ +# This is a template file - DO NOT commit actual secrets to git +# Use this to create actual secrets: +# kubectl create secret generic rag-modulo-secrets --from-env-file=.env -n rag-modulo +# +# Or use sealed-secrets, external-secrets, or vault for production + +apiVersion: v1 +kind: Secret +metadata: + name: rag-modulo-secrets + namespace: rag-modulo + labels: + app: rag-modulo +type: Opaque +stringData: + # Database Credentials (base64 encoded in actual deployment) + COLLECTIONDB_USER: "changeme" + COLLECTIONDB_PASSWORD: "changeme" + + # MinIO Credentials + MINIO_ROOT_USER: "changeme" + MINIO_ROOT_PASSWORD: "changeme" + MINIO_ACCESS_KEY: "changeme" + MINIO_SECRET_KEY: "changeme" + + # JWT Configuration + JWT_SECRET_KEY: "changeme" + JWT_ALGORITHM: "HS256" + + # LLM Provider API Keys + WATSONX_APIKEY: "changeme" + WATSONX_URL: "changeme" + WATSONX_PROJECT_ID: "changeme" + OPENAI_API_KEY: "changeme" + ANTHROPIC_API_KEY: "changeme" + + # Milvus Token (if using authentication) + MILVUS_TOKEN: "changeme" diff --git a/deployment/k8s/base/services/backend-service.yaml b/deployment/k8s/base/services/backend-service.yaml new file mode 100644 index 00000000..cc4d332d --- /dev/null +++ b/deployment/k8s/base/services/backend-service.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: Service +metadata: + name: backend-service + namespace: rag-modulo + labels: + app: rag-modulo + component: backend +spec: + type: ClusterIP + selector: + app: rag-modulo + component: backend + ports: + - port: 8000 + targetPort: 8000 + protocol: TCP + name: http + sessionAffinity: ClientIP diff --git a/deployment/k8s/base/services/etcd-service.yaml b/deployment/k8s/base/services/etcd-service.yaml new file mode 100644 index 00000000..d3484f1e --- /dev/null +++ b/deployment/k8s/base/services/etcd-service.yaml @@ -0,0 +1,23 @@ +apiVersion: v1 +kind: Service +metadata: + name: etcd-service + namespace: rag-modulo + labels: + app: rag-modulo + component: etcd +spec: + type: ClusterIP + clusterIP: None # Headless service for StatefulSet + selector: + app: rag-modulo + component: etcd + ports: + - port: 2379 + targetPort: 2379 + protocol: TCP + name: client + - port: 2380 + targetPort: 2380 + protocol: TCP + name: peer diff --git a/deployment/k8s/base/services/frontend-service.yaml b/deployment/k8s/base/services/frontend-service.yaml new file mode 100644 index 00000000..5cc3ee07 --- /dev/null +++ b/deployment/k8s/base/services/frontend-service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: frontend-service + namespace: rag-modulo + labels: + app: rag-modulo + component: frontend +spec: + type: ClusterIP + selector: + app: rag-modulo + component: frontend + ports: + - port: 8080 + targetPort: 8080 + protocol: TCP + name: http diff --git a/deployment/k8s/base/services/milvus-service.yaml b/deployment/k8s/base/services/milvus-service.yaml new file mode 100644 index 00000000..9e150798 --- /dev/null +++ b/deployment/k8s/base/services/milvus-service.yaml @@ -0,0 +1,23 @@ +apiVersion: v1 +kind: Service +metadata: + name: milvus-service + namespace: rag-modulo + labels: + app: rag-modulo + component: milvus +spec: + type: ClusterIP + clusterIP: None # Headless service for StatefulSet + selector: + app: rag-modulo + component: milvus + ports: + - port: 19530 + targetPort: 19530 + protocol: TCP + name: grpc + - port: 9091 + targetPort: 9091 + protocol: TCP + name: metrics diff --git a/deployment/k8s/base/services/minio-service.yaml b/deployment/k8s/base/services/minio-service.yaml new file mode 100644 index 00000000..ee81fa21 --- /dev/null +++ b/deployment/k8s/base/services/minio-service.yaml @@ -0,0 +1,23 @@ +apiVersion: v1 +kind: Service +metadata: + name: minio-service + namespace: rag-modulo + labels: + app: rag-modulo + component: minio +spec: + type: ClusterIP + clusterIP: None # Headless service for StatefulSet + selector: + app: rag-modulo + component: minio + ports: + - port: 9000 + targetPort: 9000 + protocol: TCP + name: api + - port: 9001 + targetPort: 9001 + protocol: TCP + name: console diff --git a/deployment/k8s/base/services/mlflow-service.yaml b/deployment/k8s/base/services/mlflow-service.yaml new file mode 100644 index 00000000..62670181 --- /dev/null +++ b/deployment/k8s/base/services/mlflow-service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: mlflow-service + namespace: rag-modulo + labels: + app: rag-modulo + component: mlflow +spec: + type: ClusterIP + selector: + app: rag-modulo + component: mlflow + ports: + - port: 5000 + targetPort: 5000 + protocol: TCP + name: http diff --git a/deployment/k8s/base/services/postgres-service.yaml b/deployment/k8s/base/services/postgres-service.yaml new file mode 100644 index 00000000..8ac85e76 --- /dev/null +++ b/deployment/k8s/base/services/postgres-service.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: Service +metadata: + name: postgres-service + namespace: rag-modulo + labels: + app: rag-modulo + component: postgres +spec: + type: ClusterIP + clusterIP: None # Headless service for StatefulSet + selector: + app: rag-modulo + component: postgres + ports: + - port: 5432 + targetPort: 5432 + protocol: TCP + name: postgres diff --git a/deployment/k8s/base/statefulsets/etcd.yaml b/deployment/k8s/base/statefulsets/etcd.yaml new file mode 100644 index 00000000..12d110ad --- /dev/null +++ b/deployment/k8s/base/statefulsets/etcd.yaml @@ -0,0 +1,70 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: milvus-etcd + namespace: rag-modulo + labels: + app: rag-modulo + component: etcd +spec: + serviceName: etcd-service + replicas: 1 + selector: + matchLabels: + app: rag-modulo + component: etcd + template: + metadata: + labels: + app: rag-modulo + component: etcd + spec: + containers: + - name: etcd + image: quay.io/coreos/etcd:v3.5.5 + ports: + - containerPort: 2379 + name: client + - containerPort: 2380 + name: peer + env: + - name: ETCD_AUTO_COMPACTION_MODE + value: "revision" + - name: ETCD_AUTO_COMPACTION_RETENTION + value: "1000" + - name: ETCD_QUOTA_BACKEND_BYTES + value: "4294967296" + - name: ETCD_SNAPSHOT_COUNT + value: "50000" + - name: ETCD_LISTEN_CLIENT_URLS + value: "http://0.0.0.0:2379" + - name: ETCD_ADVERTISE_CLIENT_URLS + value: "http://milvus-etcd:2379" + - name: ETCD_DATA_DIR + value: "/etcd" + volumeMounts: + - name: etcd-storage + mountPath: /etcd + resources: + requests: + memory: "256Mi" + cpu: "250m" + limits: + memory: "1Gi" + cpu: "1000m" + livenessProbe: + httpGet: + path: /health + port: 2379 + initialDelaySeconds: 30 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /health + port: 2379 + initialDelaySeconds: 10 + periodSeconds: 5 + volumes: + - name: etcd-storage + persistentVolumeClaim: + claimName: etcd-storage diff --git a/deployment/k8s/base/statefulsets/milvus.yaml b/deployment/k8s/base/statefulsets/milvus.yaml new file mode 100644 index 00000000..3ee713a8 --- /dev/null +++ b/deployment/k8s/base/statefulsets/milvus.yaml @@ -0,0 +1,76 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: milvus-standalone + namespace: rag-modulo + labels: + app: rag-modulo + component: milvus +spec: + serviceName: milvus-service + replicas: 1 + selector: + matchLabels: + app: rag-modulo + component: milvus + template: + metadata: + labels: + app: rag-modulo + component: milvus + spec: + containers: + - name: milvus + image: milvusdb/milvus:v2.3.3 + args: + - milvus + - run + - standalone + ports: + - containerPort: 19530 + name: grpc + - containerPort: 9091 + name: metrics + env: + - name: ETCD_ENDPOINTS + value: "milvus-etcd:2379" + - name: MINIO_ADDRESS + value: "minio-service:9000" + - name: MINIO_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: rag-modulo-secrets + key: MINIO_ACCESS_KEY + - name: MINIO_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: rag-modulo-secrets + key: MINIO_SECRET_KEY + volumeMounts: + - name: milvus-storage + mountPath: /var/lib/milvus + resources: + requests: + memory: "2Gi" + cpu: "1000m" + limits: + memory: "8Gi" + cpu: "4000m" + livenessProbe: + httpGet: + path: /healthz + port: 9091 + initialDelaySeconds: 90 + periodSeconds: 30 + timeoutSeconds: 5 + readinessProbe: + httpGet: + path: /healthz + port: 9091 + initialDelaySeconds: 60 + periodSeconds: 10 + timeoutSeconds: 5 + volumes: + - name: milvus-storage + persistentVolumeClaim: + claimName: milvus-storage diff --git a/deployment/k8s/base/statefulsets/minio.yaml b/deployment/k8s/base/statefulsets/minio.yaml new file mode 100644 index 00000000..20e8f150 --- /dev/null +++ b/deployment/k8s/base/statefulsets/minio.yaml @@ -0,0 +1,71 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: minio + namespace: rag-modulo + labels: + app: rag-modulo + component: minio +spec: + serviceName: minio-service + replicas: 1 + selector: + matchLabels: + app: rag-modulo + component: minio + template: + metadata: + labels: + app: rag-modulo + component: minio + spec: + containers: + - name: minio + image: minio/minio:latest + args: + - server + - /data + - --console-address + - ":9001" + ports: + - containerPort: 9000 + name: api + - containerPort: 9001 + name: console + env: + - name: MINIO_ROOT_USER + valueFrom: + secretKeyRef: + name: rag-modulo-secrets + key: MINIO_ROOT_USER + - name: MINIO_ROOT_PASSWORD + valueFrom: + secretKeyRef: + name: rag-modulo-secrets + key: MINIO_ROOT_PASSWORD + volumeMounts: + - name: minio-storage + mountPath: /data + resources: + requests: + memory: "512Mi" + cpu: "500m" + limits: + memory: "2Gi" + cpu: "1000m" + livenessProbe: + httpGet: + path: /minio/health/live + port: 9000 + initialDelaySeconds: 30 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /minio/health/ready + port: 9000 + initialDelaySeconds: 10 + periodSeconds: 5 + volumes: + - name: minio-storage + persistentVolumeClaim: + claimName: minio-storage diff --git a/deployment/k8s/base/statefulsets/postgres.yaml b/deployment/k8s/base/statefulsets/postgres.yaml new file mode 100644 index 00000000..12213ec4 --- /dev/null +++ b/deployment/k8s/base/statefulsets/postgres.yaml @@ -0,0 +1,75 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: postgres + namespace: rag-modulo + labels: + app: rag-modulo + component: postgres +spec: + serviceName: postgres-service + replicas: 1 + selector: + matchLabels: + app: rag-modulo + component: postgres + template: + metadata: + labels: + app: rag-modulo + component: postgres + spec: + containers: + - name: postgres + image: pgvector/pgvector:pg16 + ports: + - containerPort: 5432 + name: postgres + env: + - name: POSTGRES_DB + valueFrom: + configMapKeyRef: + name: backend-config + key: COLLECTIONDB_NAME + - name: POSTGRES_USER + valueFrom: + secretKeyRef: + name: rag-modulo-secrets + key: COLLECTIONDB_USER + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: rag-modulo-secrets + key: COLLECTIONDB_PASSWORD + - name: PGDATA + value: /var/lib/postgresql/data/pgdata + volumeMounts: + - name: postgres-storage + mountPath: /var/lib/postgresql/data + resources: + requests: + memory: "512Mi" + cpu: "500m" + limits: + memory: "2Gi" + cpu: "2000m" + livenessProbe: + exec: + command: + - pg_isready + - -U + - $(POSTGRES_USER) + initialDelaySeconds: 30 + periodSeconds: 10 + readinessProbe: + exec: + command: + - pg_isready + - -U + - $(POSTGRES_USER) + initialDelaySeconds: 10 + periodSeconds: 5 + volumes: + - name: postgres-storage + persistentVolumeClaim: + claimName: postgres-storage diff --git a/deployment/k8s/base/storage/etcd-pvc.yaml b/deployment/k8s/base/storage/etcd-pvc.yaml new file mode 100644 index 00000000..1c528e7e --- /dev/null +++ b/deployment/k8s/base/storage/etcd-pvc.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: etcd-storage + namespace: rag-modulo + labels: + app: rag-modulo + component: etcd +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi diff --git a/deployment/k8s/base/storage/milvus-pvc.yaml b/deployment/k8s/base/storage/milvus-pvc.yaml new file mode 100644 index 00000000..9ba523a2 --- /dev/null +++ b/deployment/k8s/base/storage/milvus-pvc.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: milvus-storage + namespace: rag-modulo + labels: + app: rag-modulo + component: milvus +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Gi diff --git a/deployment/k8s/base/storage/minio-pvc.yaml b/deployment/k8s/base/storage/minio-pvc.yaml new file mode 100644 index 00000000..29e83f1c --- /dev/null +++ b/deployment/k8s/base/storage/minio-pvc.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: minio-storage + namespace: rag-modulo + labels: + app: rag-modulo + component: minio +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Gi diff --git a/deployment/k8s/base/storage/postgres-pvc.yaml b/deployment/k8s/base/storage/postgres-pvc.yaml new file mode 100644 index 00000000..10a570fa --- /dev/null +++ b/deployment/k8s/base/storage/postgres-pvc.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: postgres-storage + namespace: rag-modulo + labels: + app: rag-modulo + component: postgres +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 50Gi + # Uncomment for IBM Cloud + # storageClassName: ibmc-block-gold + # Uncomment for AWS + # storageClassName: gp3 + # Uncomment for GCP + # storageClassName: standard-rwo diff --git a/deployment/scripts/deploy-helm.sh b/deployment/scripts/deploy-helm.sh new file mode 100755 index 00000000..36580247 --- /dev/null +++ b/deployment/scripts/deploy-helm.sh @@ -0,0 +1,156 @@ +#!/bin/bash +# Helm deployment script for RAG Modulo +# Usage: ./deploy-helm.sh [dev|staging|prod] [install|upgrade] + +set -e + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +# Configuration +ENVIRONMENT="${1:-dev}" +ACTION="${2:-install}" +RELEASE_NAME="rag-modulo" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +HELM_CHART_DIR="${SCRIPT_DIR}/../helm/rag-modulo" + +# Set namespace and values file based on environment +case $ENVIRONMENT in + dev) + NAMESPACE="rag-modulo-dev" + VALUES_FILE="${HELM_CHART_DIR}/values-dev.yaml" + ;; + staging) + NAMESPACE="rag-modulo-staging" + VALUES_FILE="${HELM_CHART_DIR}/values-staging.yaml" + ;; + prod) + NAMESPACE="rag-modulo" + VALUES_FILE="${HELM_CHART_DIR}/values-prod.yaml" + ;; + *) + echo -e "${RED}Invalid environment: $ENVIRONMENT${NC}" + echo "Usage: $0 [dev|staging|prod] [install|upgrade]" + exit 1 + ;; +esac + +echo -e "${GREEN}Deploying RAG Modulo using Helm${NC}" +echo -e "${GREEN}Environment: ${ENVIRONMENT}${NC}" +echo -e "${GREEN}Namespace: ${NAMESPACE}${NC}" +echo -e "${GREEN}Action: ${ACTION}${NC}" + +# Check prerequisites +echo -e "\n${YELLOW}Checking prerequisites...${NC}" +if ! command -v helm &> /dev/null; then + echo -e "${RED}Helm not found. Please install Helm 3.${NC}" + exit 1 +fi + +if ! command -v kubectl &> /dev/null; then + echo -e "${RED}kubectl not found. Please install kubectl.${NC}" + exit 1 +fi + +# Check Helm version +HELM_VERSION=$(helm version --short | grep -oE 'v[0-9]+\.[0-9]+\.[0-9]+') +if [[ ! $HELM_VERSION =~ ^v3\. ]]; then + echo -e "${RED}Helm 3.x is required. Current version: $HELM_VERSION${NC}" + exit 1 +fi + +echo -e "${GREEN}✓ Helm $HELM_VERSION${NC}" +echo -e "${GREEN}✓ kubectl configured${NC}" + +# Lint Helm chart +echo -e "\n${YELLOW}Linting Helm chart...${NC}" +helm lint ${HELM_CHART_DIR} +echo -e "${GREEN}✓ Helm chart linting passed${NC}" + +# Create namespace +echo -e "\n${YELLOW}Creating namespace...${NC}" +kubectl create namespace ${NAMESPACE} --dry-run=client -o yaml | kubectl apply -f - +echo -e "${GREEN}✓ Namespace created/verified${NC}" + +# Create secrets from .env file if exists +if [ -f "${SCRIPT_DIR}/../../.env" ]; then + echo -e "\n${YELLOW}Creating secrets from .env file...${NC}" + kubectl create secret generic rag-modulo-secrets \ + --from-env-file="${SCRIPT_DIR}/../../.env" \ + --namespace=${NAMESPACE} \ + --dry-run=client -o yaml | kubectl apply -f - + echo -e "${GREEN}✓ Secrets created${NC}" +else + echo -e "${YELLOW}⚠️ No .env file found.${NC}" + echo -e "${YELLOW}⚠️ Make sure secrets are created before deployment!${NC}" + read -p "Continue anyway? (y/N): " -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + exit 1 + fi +fi + +# Perform Helm deployment +case $ACTION in + install) + echo -e "\n${YELLOW}Installing Helm chart...${NC}" + helm install ${RELEASE_NAME} ${HELM_CHART_DIR} \ + --namespace ${NAMESPACE} \ + --values ${VALUES_FILE} \ + --create-namespace \ + --wait \ + --timeout 10m + echo -e "${GREEN}✓ Helm chart installed${NC}" + ;; + upgrade) + echo -e "\n${YELLOW}Upgrading Helm chart...${NC}" + helm upgrade ${RELEASE_NAME} ${HELM_CHART_DIR} \ + --namespace ${NAMESPACE} \ + --values ${VALUES_FILE} \ + --wait \ + --timeout 10m + echo -e "${GREEN}✓ Helm chart upgraded${NC}" + ;; + *) + echo -e "${RED}Invalid action: $ACTION${NC}" + echo "Valid actions: install, upgrade" + exit 1 + ;; +esac + +# Display deployment status +echo -e "\n${GREEN}╔════════════════════════════════════════╗${NC}" +echo -e "${GREEN}║ Helm Deployment Complete! ║${NC}" +echo -e "${GREEN}╚════════════════════════════════════════╝${NC}" + +echo -e "\n${YELLOW}Helm Release Status:${NC}" +helm status ${RELEASE_NAME} -n ${NAMESPACE} + +echo -e "\n${YELLOW}Deployment Resources:${NC}" +echo -e "\n${YELLOW}Pods:${NC}" +kubectl get pods -n ${NAMESPACE} + +echo -e "\n${YELLOW}Services:${NC}" +kubectl get svc -n ${NAMESPACE} + +echo -e "\n${YELLOW}Ingress:${NC}" +kubectl get ingress -n ${NAMESPACE} 2>/dev/null || kubectl get routes -n ${NAMESPACE} 2>/dev/null || echo "No ingress/routes found" + +echo -e "\n${YELLOW}HPA (if enabled):${NC}" +kubectl get hpa -n ${NAMESPACE} 2>/dev/null || echo "No HPA configured" + +echo -e "\n${YELLOW}Useful Commands:${NC}" +echo " Check status: helm status ${RELEASE_NAME} -n ${NAMESPACE}" +echo " View values: helm get values ${RELEASE_NAME} -n ${NAMESPACE}" +echo " Rollback: helm rollback ${RELEASE_NAME} -n ${NAMESPACE}" +echo " Uninstall: helm uninstall ${RELEASE_NAME} -n ${NAMESPACE}" +echo "" +echo " Backend logs: kubectl logs -f deployment/rag-modulo-backend -n ${NAMESPACE}" +echo " Frontend logs: kubectl logs -f deployment/rag-modulo-frontend -n ${NAMESPACE}" +echo "" +echo " Port forward: kubectl port-forward svc/backend-service 8000:8000 -n ${NAMESPACE}" + +echo -e "\n${GREEN}Deployment completed successfully!${NC}" diff --git a/deployment/scripts/deploy-k8s.sh b/deployment/scripts/deploy-k8s.sh new file mode 100755 index 00000000..69aee56a --- /dev/null +++ b/deployment/scripts/deploy-k8s.sh @@ -0,0 +1,152 @@ +#!/bin/bash +# Kubernetes deployment script for RAG Modulo +# Usage: ./deploy-k8s.sh [dev|staging|prod] + +set -e + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +# Configuration +ENVIRONMENT="${1:-dev}" +NAMESPACE="rag-modulo" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +K8S_DIR="${SCRIPT_DIR}/../k8s/base" + +# Set namespace based on environment +case $ENVIRONMENT in + dev) + NAMESPACE="rag-modulo-dev" + ;; + staging) + NAMESPACE="rag-modulo-staging" + ;; + prod) + NAMESPACE="rag-modulo" + ;; + *) + echo -e "${RED}Invalid environment: $ENVIRONMENT${NC}" + echo "Usage: $0 [dev|staging|prod]" + exit 1 + ;; +esac + +echo -e "${GREEN}Deploying RAG Modulo to ${ENVIRONMENT} environment${NC}" +echo -e "${GREEN}Namespace: ${NAMESPACE}${NC}" + +# Check prerequisites +echo -e "\n${YELLOW}Checking prerequisites...${NC}" +if ! command -v kubectl &> /dev/null; then + echo -e "${RED}kubectl not found. Please install kubectl.${NC}" + exit 1 +fi + +# Check if kubectl is configured +if ! kubectl cluster-info &> /dev/null; then + echo -e "${RED}kubectl is not configured. Please configure kubectl.${NC}" + exit 1 +fi + +echo -e "${GREEN}✓ kubectl is configured${NC}" + +# Create namespace +echo -e "\n${YELLOW}Creating namespace...${NC}" +kubectl create namespace ${NAMESPACE} --dry-run=client -o yaml | kubectl apply -f - +echo -e "${GREEN}✓ Namespace created/verified${NC}" + +# Create secrets (if .env file exists) +if [ -f "${SCRIPT_DIR}/../../.env" ]; then + echo -e "\n${YELLOW}Creating secrets from .env file...${NC}" + kubectl create secret generic rag-modulo-secrets \ + --from-env-file="${SCRIPT_DIR}/../../.env" \ + --namespace=${NAMESPACE} \ + --dry-run=client -o yaml | kubectl apply -f - + echo -e "${GREEN}✓ Secrets created${NC}" +else + echo -e "${YELLOW}⚠️ No .env file found. Skipping secrets creation.${NC}" + echo -e "${YELLOW}⚠️ Please create secrets manually before deployment.${NC}" +fi + +# Apply ConfigMaps +echo -e "\n${YELLOW}Applying ConfigMaps...${NC}" +kubectl apply -f ${K8S_DIR}/configmaps/ -n ${NAMESPACE} +echo -e "${GREEN}✓ ConfigMaps applied${NC}" + +# Apply PersistentVolumeClaims +echo -e "\n${YELLOW}Applying PersistentVolumeClaims...${NC}" +kubectl apply -f ${K8S_DIR}/storage/ -n ${NAMESPACE} +echo -e "${GREEN}✓ PVCs applied${NC}" + +# Apply StatefulSets +echo -e "\n${YELLOW}Applying StatefulSets...${NC}" +kubectl apply -f ${K8S_DIR}/statefulsets/ -n ${NAMESPACE} +echo -e "${GREEN}✓ StatefulSets applied${NC}" + +# Wait for StatefulSets to be ready +echo -e "\n${YELLOW}Waiting for stateful services to be ready...${NC}" +kubectl wait --for=condition=ready pod -l component=postgres -n ${NAMESPACE} --timeout=300s || true +kubectl wait --for=condition=ready pod -l component=etcd -n ${NAMESPACE} --timeout=300s || true +kubectl wait --for=condition=ready pod -l component=minio -n ${NAMESPACE} --timeout=300s || true +kubectl wait --for=condition=ready pod -l component=milvus -n ${NAMESPACE} --timeout=300s || true +echo -e "${GREEN}✓ Stateful services are ready${NC}" + +# Apply Services +echo -e "\n${YELLOW}Applying Services...${NC}" +kubectl apply -f ${K8S_DIR}/services/ -n ${NAMESPACE} +echo -e "${GREEN}✓ Services applied${NC}" + +# Apply Deployments +echo -e "\n${YELLOW}Applying Deployments...${NC}" +kubectl apply -f ${K8S_DIR}/deployments/ -n ${NAMESPACE} +echo -e "${GREEN}✓ Deployments applied${NC}" + +# Wait for Deployments to be ready +echo -e "\n${YELLOW}Waiting for application deployments to be ready...${NC}" +kubectl wait --for=condition=available deployment/rag-modulo-backend -n ${NAMESPACE} --timeout=300s || true +kubectl wait --for=condition=available deployment/rag-modulo-frontend -n ${NAMESPACE} --timeout=300s || true +kubectl wait --for=condition=available deployment/mlflow-server -n ${NAMESPACE} --timeout=300s || true +echo -e "${GREEN}✓ Application deployments are ready${NC}" + +# Apply HPA (only for staging/prod) +if [ "$ENVIRONMENT" != "dev" ]; then + echo -e "\n${YELLOW}Applying HorizontalPodAutoscalers...${NC}" + kubectl apply -f ${K8S_DIR}/hpa/ -n ${NAMESPACE} + echo -e "${GREEN}✓ HPAs applied${NC}" +fi + +# Apply Ingress/Routes +echo -e "\n${YELLOW}Applying Ingress configuration...${NC}" +if kubectl get crd routes.route.openshift.io &> /dev/null; then + echo -e "${YELLOW}OpenShift detected, applying Routes...${NC}" + kubectl apply -f ${K8S_DIR}/ingress/openshift-routes.yaml -n ${NAMESPACE} +else + echo -e "${YELLOW}Applying Ingress...${NC}" + kubectl apply -f ${K8S_DIR}/ingress/ingress.yaml -n ${NAMESPACE} +fi +echo -e "${GREEN}✓ Ingress/Routes applied${NC}" + +# Display deployment status +echo -e "\n${GREEN}╔════════════════════════════════════════╗${NC}" +echo -e "${GREEN}║ Deployment Complete! ║${NC}" +echo -e "${GREEN}╔════════════════════════════════════════╝${NC}" +echo -e "\n${YELLOW}Deployment Status:${NC}" +kubectl get pods -n ${NAMESPACE} + +echo -e "\n${YELLOW}Services:${NC}" +kubectl get svc -n ${NAMESPACE} + +echo -e "\n${YELLOW}Ingress:${NC}" +kubectl get ingress -n ${NAMESPACE} 2>/dev/null || kubectl get routes -n ${NAMESPACE} 2>/dev/null || echo "No ingress/routes found" + +echo -e "\n${YELLOW}To check logs:${NC}" +echo " Backend: kubectl logs -f deployment/rag-modulo-backend -n ${NAMESPACE}" +echo " Frontend: kubectl logs -f deployment/rag-modulo-frontend -n ${NAMESPACE}" + +echo -e "\n${YELLOW}To access services locally:${NC}" +echo " Backend: kubectl port-forward svc/backend-service 8000:8000 -n ${NAMESPACE}" +echo " Frontend: kubectl port-forward svc/frontend-service 8080:8080 -n ${NAMESPACE}" + +echo -e "\n${GREEN}Deployment completed successfully!${NC}"