diff --git a/.github/workflows/labels.yml b/.github/workflows/labels.yml index f95bb42..8266d4f 100644 --- a/.github/workflows/labels.yml +++ b/.github/workflows/labels.yml @@ -17,7 +17,7 @@ jobs: pull-requests: write steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Sync labels uses: EndBug/label-sync@v2 diff --git a/.github/workflows/pipeline.yml b/.github/workflows/pipeline.yml index e80ea7c..3a201b9 100644 --- a/.github/workflows/pipeline.yml +++ b/.github/workflows/pipeline.yml @@ -43,7 +43,7 @@ jobs: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws-session-token: ${{ secrets.AWS_SESSION_TOKEN }} - aws-region: ${{ secrets.AWS_REGION || 'us-east-1' }} + aws-region: ${{ secrets.AWS_REGION != '' && secrets.AWS_REGION || 'us-east-1' }} - name: Configure Azure credentials if: matrix.cloud == 'azure' @@ -104,7 +104,7 @@ jobs: sudo mv conftest /usr/local/bin/conftest - name: Evaluate Kubernetes policy pack - run: conftest test k8s/app -p policies/kubernetes + run: conftest test k8s -p policies/kubernetes - name: Upload Terraform plan artifact uses: actions/upload-artifact@v6 diff --git a/.gitignore b/.gitignore index 09349c8..f6b0bc7 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,8 @@ .terraform.lock.hcl *.tfplan *.tfplan.json +tfplan* +tfplan-* crash.log crash.*.log override.tf diff --git a/Makefile b/Makefile index de1fc84..09d2619 100644 --- a/Makefile +++ b/Makefile @@ -2,8 +2,13 @@ TF_DIR=infra/terraform POLICY_TF_DIR=policies/terraform POLICY_K8S_DIR=policies/kubernetes CLOUD?=aws +ARGOCD_NAMESPACE?=argocd +ARGOCD_APP_NAME?=dissertation-sample-api +ARGOCD_APP_MANIFEST?=argocd/application.yaml +ARGOCD_TEST_APP_NAME?=dissertation-test-api +ARGOCD_TEST_APP_MANIFEST?=argocd/test-application.yaml -.PHONY: help tf-init tf-fmt tf-validate tf-plan tf-apply tf-destroy policy-tf policy-k8s clean all +.PHONY: help tf-init tf-fmt tf-validate tf-plan tf-apply tf-destroy policy-tf policy-k8s k8s-dry-run argocd-apply argocd-status argocd-test argocd-test-app-apply argocd-test-app-status argocd-test-app-sync clean all help: ## Display this help message @echo "Available targets:" @@ -31,11 +36,41 @@ tf-destroy: ## Destroy Terraform resources (use CLOUD=aws|azure|gcp) terraform -chdir=$(TF_DIR)/$(CLOUD) destroy policy-tf: ## Evaluate Terraform policies - opa eval --fail-defined --format pretty --data $(POLICY_TF_DIR) --input $(POLICY_TF_DIR)/sample-tfplan.json "data.terraform.deny" + opa eval --fail-defined --format pretty --data $(POLICY_TF_DIR) --input $(POLICY_TF_DIR)/sample-tfplan.json "data.terraform.deny[_]" policy-k8s: ## Evaluate Kubernetes policies - conftest test k8s/app -p $(POLICY_K8S_DIR) + conftest test k8s -p $(POLICY_K8S_DIR) + +k8s-dry-run: ## Validate Kubernetes manifests with kubectl client-side dry run + kubectl apply --dry-run=client -f k8s + +argocd-apply: ## Apply ArgoCD Application manifest + kubectl apply -f $(ARGOCD_APP_MANIFEST) + +argocd-status: ## Show ArgoCD Application status + kubectl get application -n $(ARGOCD_NAMESPACE) $(ARGOCD_APP_NAME) + +argocd-test: ## Apply app and wait until ArgoCD reports Synced + Healthy + kubectl apply -f $(ARGOCD_APP_MANIFEST) + kubectl wait --for=jsonpath='{.status.sync.status}'=Synced application/$(ARGOCD_APP_NAME) -n $(ARGOCD_NAMESPACE) --timeout=300s + kubectl wait --for=jsonpath='{.status.health.status}'=Healthy application/$(ARGOCD_APP_NAME) -n $(ARGOCD_NAMESPACE) --timeout=300s + kubectl get application -n $(ARGOCD_NAMESPACE) $(ARGOCD_APP_NAME) + kubectl get all -n dissertation + +argocd-test-app-apply: ## Apply dedicated test ArgoCD Application manifest + kubectl apply -f $(ARGOCD_TEST_APP_MANIFEST) + +argocd-test-app-status: ## Show dedicated test ArgoCD Application status + kubectl get application -n $(ARGOCD_NAMESPACE) $(ARGOCD_TEST_APP_NAME) + +argocd-test-app-sync: ## Apply test app and wait until ArgoCD reports Synced + Healthy + kubectl apply -f $(ARGOCD_TEST_APP_MANIFEST) + kubectl wait --for=jsonpath='{.status.sync.status}'=Synced application/$(ARGOCD_TEST_APP_NAME) -n $(ARGOCD_NAMESPACE) --timeout=300s + kubectl wait --for=jsonpath='{.status.health.status}'=Healthy application/$(ARGOCD_TEST_APP_NAME) -n $(ARGOCD_NAMESPACE) --timeout=300s + kubectl get application -n $(ARGOCD_NAMESPACE) $(ARGOCD_TEST_APP_NAME) + kubectl get all -n dissertation-test clean: ## Clean generated files - rm -f $(TF_DIR)/*.tfplan $(TF_DIR)/*.tfplan.json $(TF_DIR)/.terraform.lock.hcl - rm -rf $(TF_DIR)/.terraform + rm -f $(TF_DIR)/aws/*.tfplan $(TF_DIR)/aws/tfplan $(TF_DIR)/aws/tfplan*.json $(TF_DIR)/azure/*.tfplan $(TF_DIR)/azure/tfplan $(TF_DIR)/azure/tfplan*.json $(TF_DIR)/gcp/*.tfplan $(TF_DIR)/gcp/tfplan $(TF_DIR)/gcp/tfplan*.json + rm -rf $(TF_DIR)/aws/.terraform $(TF_DIR)/azure/.terraform $(TF_DIR)/gcp/.terraform + rm -f $(TF_DIR)/aws/.terraform.lock.hcl $(TF_DIR)/azure/.terraform.lock.hcl $(TF_DIR)/gcp/.terraform.lock.hcl diff --git a/README.md b/README.md index 99ea017..601117a 100644 --- a/README.md +++ b/README.md @@ -2,24 +2,148 @@ This repository provides a practical implementation for the dissertation topic: -`Impact of governance and scalability with automated vendor-agnostic multi-cloud deployment pipelines` - -## What is included - -- **`infra/terraform/`**: Multi-cloud Infrastructure as Code (AWS, Azure, GCP) with a consistent interface -- **`policies/`**: OPA/Rego governance policies for Terraform and Kubernetes -- **`.github/workflows/pipeline.yml`**: CI pipeline for IaC validation and policy checks across all cloud providers -- **`k8s/app/`**: Production-ready Kubernetes workload with security best practices, deployed through GitOps -- **`argocd/application.yaml`**: Argo CD app manifest for GitOps delivery +> **Impact of governance and scalability with automated vendor-agnostic multi-cloud deployment pipelines** + +--- + +## Table of Contents + +- [What Is Included](#what-is-included) +- [Architecture](#architecture) +- [Prerequisites](#prerequisites) +- [Quick Start](#quick-start) +- [Project Structure](#project-structure) +- [Terraform Infrastructure](#terraform-infrastructure) +- [Kubernetes Deployment](#kubernetes-deployment) +- [Policy Enforcement (OPA/Rego)](#policy-enforcement-oparego) +- [CI/CD Pipeline](#cicd-pipeline) +- [ArgoCD / GitOps Setup](#argocd--gitops-setup) +- [Pre-commit Hooks](#pre-commit-hooks) +- [Troubleshooting](#troubleshooting) +- [Security Features](#security-features) +- [Best Practices](#best-practices) +- [License](#license) +- [Acknowledgments](#acknowledgments) + +--- + +## What Is Included + +| Component | Path | Description | +|-----------|------|-------------| +| Terraform IaC | `infra/terraform/` | Multi-cloud infrastructure (AWS, Azure, GCP) with consistent interface | +| OPA Policies | `policies/` | Governance policies for Terraform and Kubernetes | +| CI/CD Pipeline | `.github/workflows/pipeline.yml` | GitHub Actions for IaC validation and policy checks | +| Kubernetes App | `k8s/app/` | Production-ready workload with security best practices | +| Kubernetes Test App | `k8s/test-app/` | Dedicated smoke-test workload for cluster validation | +| ArgoCD Manifests | `argocd/*.yaml` | GitOps delivery for primary and test workloads | + +--- ## Architecture This project demonstrates vendor-agnostic multi-cloud deployment through: + - **Abstraction Layer**: Consistent Terraform interface regardless of cloud provider - **Policy-as-Code**: Automated governance enforcement with OPA - **GitOps**: Declarative Kubernetes deployments via ArgoCD - **CI/CD**: Automated validation and testing for all three cloud providers +```text +┌─────────────────────────────────────────────────────────────────────────┐ +│ Developer Workflow │ +│ │ +│ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ +│ │ Code │───▶│ Git │───▶│ GitHub │───▶│ CI/CD │ │ +│ │ Changes │ │ Commit │ │ Push │ │ Pipeline │ │ +│ └──────────┘ └──────────┘ └──────────┘ └─────┬────┘ │ +└──────────────────────────────────────────────────────────┼──────────────┘ + │ + ┌──────────────────────────────────────┼───────────────┐ + │ CI/CD Pipeline (GitHub Actions) │ + │ │ + │ ┌──────────────────────────────────────────────┐ │ + │ │ 1. Terraform Format & Validation │ │ + │ └──────────────┬───────────────────────────────┘ │ + │ │ │ + │ ┌──────────────▼───────────────────────────────┐ │ + │ │ 2. Multi-Cloud Plan Generation │ │ + │ │ (AWS, Azure, GCP in parallel) │ │ + │ └──────────────┬───────────────────────────────┘ │ + │ │ │ + │ ┌──────────────▼───────────────────────────────┐ │ + │ │ 3. Policy Evaluation (OPA) │ │ + │ │ - Terraform Security Policies │ │ + │ │ - Kubernetes Governance Policies │ │ + │ └──────────────┬───────────────────────────────┘ │ + │ │ │ + │ │ [Policies Pass] │ + └─────────────────┼────────────────────────────────────┘ + │ + ┌─────────────────────────┴─────────────────────────┐ + │ │ + ▼ ▼ +┌───────────────────────┐ ┌───────────────────────┐ +│ Infrastructure Layer │ │ Application Layer │ +│ (Terraform) │ │ (Kubernetes) │ +│ │ │ │ +│ ┌─────────────────┐ │ │ ┌─────────────────┐ │ +│ │ Cloud Provider │ │ │ │ GitOps Sync │ │ +│ │ Selection │ │ │ │ (ArgoCD) │ │ +│ │ (aws/azure/ │ │ │ └────────┬────────┘ │ +│ │ gcp) │ │ │ │ │ +│ └────────┬────────┘ │ │ ┌────────▼────────┐ │ +│ │ │ │ │ Kubernetes │ │ +│ ┌────────▼────────┐ │ │ │ Manifests │ │ +│ │ Module Routing │ │ │ │ - Namespace │ │ +│ │ (Per-cloud │ │ │ │ - Deployment │ │ +│ │ directories) │ │ │ │ - Service │ │ +│ └────────┬────────┘ │ │ │ - HPA │ │ +│ │ │ │ │ - PDB │ │ +│ ┌────────▼────────┐ │ │ └─────────────────┘ │ +│ │ Provider Module │ │ │ │ +│ │ aws/azure/gcp │ │ │ │ +│ └────────┬────────┘ │ │ │ +│ │ │ │ │ +│ ┌────────▼────────┐ │ │ │ +│ │ Infrastructure │ │ │ │ +│ │ Resources │ │ │ │ +│ │ - VPC/VNet │ │ │ │ +│ │ - Subnets │ │ │ │ +│ │ - Tags/Labels │ │ │ │ +│ └─────────────────┘ │ │ │ +└───────────────────────┘ └───────────────────────┘ +``` + +### Data Flow + +```text +Developer → Git → GitHub → CI/CD Pipeline + + ↓ + + Policy Check Format/Validate + + ↓ + + [Pass/Fail] + + ↓ + + Infrastructure (Terraform) Application (ArgoCD → K8s) +``` + +### Component Details + +| Layer | Purpose | Key Benefits | +|-------|---------|--------------| +| Infrastructure (Terraform) | Provision cloud infrastructure in a vendor-agnostic manner | Clear separation per cloud, consistent patterns, independent deployments | +| Policy (OPA/Rego) | Enforce governance and security before deployment | Shift-left security, automated compliance, policy-as-code | +| CI/CD (GitHub Actions) | Automate validation across all cloud providers | Parallel testing, early detection, audit trail | +| Application (Kubernetes) | Deploy applications using GitOps methodology | Declarative, Git as source of truth, auto-sync, rollback | + +--- + ## Prerequisites - [Terraform](https://www.terraform.io/downloads.html) >= 1.6.0 @@ -29,13 +153,16 @@ This project demonstrates vendor-agnostic multi-cloud deployment through: - [kubectl](https://kubernetes.io/docs/tasks/tools/) (for Kubernetes deployment) - [ArgoCD](https://argo-cd.readthedocs.io/en/stable/) (for GitOps) -## Quick start +--- + +## Quick Start ### 1. Configure Cloud Provider -Select target cloud (`aws`, `azure`, or `gcp`) and set up credentials: +Select target cloud (`aws`, `azure`, or `gcp`) and set up credentials. **AWS:** + ```bash export AWS_ACCESS_KEY_ID="your-access-key" export AWS_SECRET_ACCESS_KEY="your-secret-key" @@ -43,11 +170,13 @@ export AWS_DEFAULT_REGION="us-east-1" ``` **Azure:** + ```bash az login ``` **GCP:** + ```bash gcloud auth application-default login export GOOGLE_PROJECT="your-project-id" @@ -71,6 +200,7 @@ terraform show -json tfplan > tfplan.json ### 3. Evaluate Governance Policies **Terraform policies:** + ```bash opa eval --fail-defined --format pretty \ --data ../../../policies/terraform \ @@ -79,203 +209,558 @@ opa eval --fail-defined --format pretty \ ``` **Kubernetes policies:** -```bash -conftest test ../../../k8s/app -p ../../../policies/kubernetes -``` -### 4. Apply Infrastructure (if policies pass) +## CI/CD Pipeline -```bash -terraform apply -``` +The GitHub Actions workflow includes: +- Multi-cloud matrix strategy (tests all three providers) +- Terraform formatting, initialization, and validation +- Policy evaluation for both Terraform and Kubernetes +- Artifact upload for Terraform plans +- Automated dependency updates via Dependabot -## Using the Makefile +## Troubleshooting -The project includes a comprehensive Makefile for common tasks: +### Terraform Issues +**Problem: Provider authentication fails** ```bash -# Show all available commands -make help +# AWS: verify identity +aws sts get-caller-identity -# Run all checks -make all +# Azure: show current account +az account show -# Terraform operations (specify CLOUD=aws, azure, or gcp) -make tf-init CLOUD=aws # Initialize Terraform -make tf-fmt # Format Terraform code -make tf-validate CLOUD=aws # Validate configuration -make tf-plan CLOUD=aws # Generate plan for specific cloud -make tf-apply CLOUD=azure # Apply changes -make tf-destroy CLOUD=gcp # Destroy resources - -# Policy evaluation -make policy-tf # Evaluate Terraform policies -make policy-k8s # Evaluate Kubernetes policies - -# Cleanup -make clean # Remove generated files +# GCP: list active accounts +gcloud auth list ``` +--- + ## Project Structure ``` . -├── .github/ -│ ├── workflows/ -│ │ └── pipeline.yml # CI/CD pipeline with matrix strategy -│ └── dependabot.yml # Automated dependency updates -├── argocd/ -│ └── application.yaml # ArgoCD application manifest -├── infra/ -│ └── terraform/ -│ ├── aws/ # AWS-specific Terraform configuration -│ ├── azure/ # Azure-specific Terraform configuration -│ ├── gcp/ # GCP-specific Terraform configuration -│ ├── terraform.tfvars.example # Example variables file -│ ├── backend.tf.example # Backend configuration examples -│ └── modules/ -│ ├── aws/ # AWS-specific resources -│ ├── azure/ # Azure-specific resources -│ └── gcp/ # GCP-specific resources -├── k8s/ -│ └── app/ -│ ├── namespace.yaml # Namespace definition -│ ├── deployment.yaml # Application deployment (production-ready) -│ ├── service.yaml # Service definition -│ ├── pdb.yaml # Pod Disruption Budget -│ └── hpa.yaml # Horizontal Pod Autoscaler -├── policies/ -│ ├── kubernetes/ -│ │ └── required-labels.rego # K8s governance policies -│ └── terraform/ -│ ├── security.rego # Terraform security policies -│ └── sample-tfplan.json # Sample plan for testing -├── CONTRIBUTING.md # Contribution guidelines +├── .github/ # CI/CD workflows +├── argocd/ # ArgoCD application manifests +├── infra/terraform/ # Multi-cloud Terraform IaC +├── k8s/app/ # Primary Kubernetes manifests +├── k8s/test-app/ # Test application manifests +├── policies/ # OPA/Rego policies +├── .pre-commit-config.yaml # Pre-commit hook configuration ├── LICENSE # MIT License ├── Makefile # Build automation └── README.md # This file ``` -## Cloud Provider Details +--- -### AWS -- Creates VPC with DNS support and hostnames enabled -- Deploys subnet in first availability zone -- Applies consistent tagging +## Terraform Infrastructure -### Azure -- Creates Resource Group -- Deploys Virtual Network and Subnet -- Applies consistent tagging +### Directory Layout -### GCP -- Creates VPC network (custom mode) -- Deploys subnet with specified CIDR -- Note: GCP networks don't support labels directly +Each cloud provider has its own isolated directory with independent provider configuration, state, and variables. This avoids cross-cloud authentication issues and allows independent deployments. -## Kubernetes Deployment Features +``` +infra/terraform/ +├── aws/ # AWS-specific configuration +├── azure/ # Azure-specific configuration +├── gcp/ # GCP-specific configuration +└── modules/ # Cloud provider modules + ├── aws/ + ├── azure/ + └── gcp/ +``` + +### Cloud Provider Details + +#### AWS Module + +**Resources created:** +- VPC with DNS support and hostnames enabled +- Subnet in the first availability zone +- VPC Flow Logs for network traffic monitoring +- CloudWatch Log Group for flow log storage +- IAM role and policy for VPC Flow Logs +- All resources tagged according to governance requirements + +```hcl +module "aws" { + source = "../modules/aws" + name_prefix = "my-project-dev" + cidr_block = "10.42.0.0/16" + subnet_cidr_block = "10.42.1.0/24" + tags = { + owner = "platform-team" + cost_center = "cc-001" + compliance = "baseline" + } +} +``` + +| Input | Description | Type | +|-------|-------------|------| +| `name_prefix` | Prefix for resource names | `string` | +| `cidr_block` | VPC CIDR block | `string` | +| `subnet_cidr_block` | Subnet CIDR block | `string` | +| `tags` | Tags to apply to all resources | `map(string)` | + +**Requirements:** Terraform >= 1.6.0, AWS provider ~> 6.31 + +#### Azure Module + +**Resources created:** +- Resource Group +- Virtual Network +- Subnet +- All resources tagged according to governance requirements + +```hcl +module "azure" { + source = "../modules/azure" + name_prefix = "my-project-dev" + location = "eastus" + cidr_block = "10.42.0.0/16" + subnet_cidr_block = "10.42.1.0/24" + tags = { + owner = "platform-team" + cost_center = "cc-001" + compliance = "baseline" + } +} +``` + +| Input | Description | Type | +|-------|-------------|------| +| `name_prefix` | Prefix for resource names | `string` | +| `location` | Azure region location | `string` | +| `cidr_block` | VNet CIDR block | `string` | +| `subnet_cidr_block` | Subnet CIDR block | `string` | +| `tags` | Tags to apply to all resources | `map(string)` | + +**Requirements:** Terraform >= 1.6.0, AzureRM provider ~> 4.58 + +#### GCP Module + +**Resources created:** +- VPC Network (custom mode, no auto-created subnets) +- Subnet with specified CIDR range + +> **Note:** GCP Compute networks and subnetworks do not support labels directly. + +```hcl +module "gcp" { + source = "../modules/gcp" + name_prefix = "my-project-dev" + region = "us-central1" + cidr_block = "10.42.0.0/16" + subnet_cidr_block = "10.42.1.0/24" +} +``` + +| Input | Description | Type | +|-------|-------------|------| +| `name_prefix` | Prefix for resource names | `string` | +| `region` | GCP region | `string` | +| `cidr_block` | VPC CIDR block (kept for interface consistency) | `string` | +| `subnet_cidr_block` | Subnet CIDR block | `string` | + +**Requirements:** Terraform >= 1.6.0, Google provider ~> 7.18 + +### Backend Configuration + +Each cloud directory has a `backend.tf.example` file. To use a remote backend: + +1. Choose your target cloud directory (`aws/`, `azure/`, or `gcp/`). +2. Copy `backend.tf.example` to `backend.tf`. +3. Edit with your backend-specific settings. + +| Cloud | Backend | Lock Mechanism | +|-------|---------|----------------| +| AWS | S3 bucket | DynamoDB table | +| Azure | Azure Storage Account | Built-in | +| GCP | Cloud Storage bucket | Built-in | + +--- + +## Kubernetes Deployment The sample application includes production-ready configurations: -- ✅ Resource requests and limits -- ✅ Liveness and readiness probes -- ✅ Security contexts (non-root, read-only filesystem) -- ✅ Pod Disruption Budget for high availability -- ✅ Horizontal Pod Autoscaler -- ✅ Proper labeling for governance -## Policy Enforcement +- Resource requests and limits +- Liveness and readiness probes +- Security contexts (non-root, read-only filesystem, drop all capabilities) +- Pod Disruption Budget for high availability +- Horizontal Pod Autoscaler (CPU and memory based) +- Proper labeling for governance compliance +- Volume mounts for nginx with read-only root filesystem + +### Manifests + +| File | Kind | Purpose | +|------|------|---------| +| `namespace.yaml` | Namespace | Creates `dissertation` namespace | +| `deployment.yaml` | Deployment | nginx 1.27 with 2 replicas, full security context | +| `service.yaml` | Service | TCP port 80 exposure | +| `hpa.yaml` | HorizontalPodAutoscaler | Auto-scale 2–10 pods (70% CPU, 80% memory) | +| `pdb.yaml` | PodDisruptionBudget | Minimum 1 pod always available | + +### Scalability Features + +1. Horizontal Scaling: HPA automatically scales pods based on CPU and memory. +2. High Availability: PDB ensures minimum availability during disruptions. +3. Resource Management: Defined requests and limits enable efficient scheduling. + +--- + +## Policy Enforcement (OPA/Rego) ### Kubernetes Policies -- Required labels validation -- Resource limits enforcement -- Security context requirements -- Health probe requirements -- Read-only filesystem enforcement + +**Location:** `policies/kubernetes/required-labels.rego` + +| Rule | Description | +|------|-------------| +| Required Labels | `app.kubernetes.io/name`, `app.kubernetes.io/part-of`, `owner`, `compliance` | +| Resource Limits | All containers must define `resources.requests` and `resources.limits` | +| Security Context | Must be defined; `privileged` must not be true; `readOnlyRootFilesystem` should be true | +| Health Probes | All containers should define `livenessProbe` and `readinessProbe` | + +**Test locally:** + +### Policy Issues + +**Problem: Policy evaluation fails** +```bash +conftest test k8s -p policies/kubernetes +``` ### Terraform Policies -- Required tags/labels on all resources -- Public access restrictions -- Encryption requirements -- Network security validations + +**Location:** `policies/terraform/security.rego` + +| Rule | Description | +|------|-------------| +| Required Tags | `owner`, `cost_center`, `compliance`, `project`, `environment`, `managed_by` | +| Storage Security | S3/Azure Storage/GCS must not allow public access; S3 must have encryption; Azure must enable HTTPS only | +| Network Security | AWS VPCs should have Flow Logs enabled; GCP firewalls should not allow unrestricted `0.0.0.0/0` ingress | + +> **Note:** Certain resource types that don't support tags/labels are automatically exempted (e.g., `aws_iam_role_policy`, `azurerm_subnet`, `google_compute_network`). + +**Test locally:** + +```bash +# Against sample plan +opa eval --fail-defined --format pretty \ + --data policies/terraform \ + --input policies/terraform/sample-tfplan.json \ + "data.terraform.deny" + +# Against a real plan +terraform -chdir=infra/terraform/aws plan -out=tfplan +terraform -chdir=infra/terraform/aws show -json tfplan > tfplan.json + +opa eval --fail-defined --format pretty \ + --data policies/terraform \ + --input tfplan.json \ + "data.terraform.deny" +``` + +### Adding New Policies + +**Kubernetes:** + +```rego +deny[msg] { + # Your condition logic + msg := sprintf("Your error message: %v", [variables]) +} +``` + +**Terraform:** + +```rego +deny contains msg if { + some rc in input.resource_changes + # Your condition logic + msg := sprintf("Your error message: %v", [variables]) +} +``` + +--- ## CI/CD Pipeline -The GitHub Actions workflow includes: -- Multi-cloud matrix strategy (tests all three providers) -- Terraform formatting, initialization, and validation -- Policy evaluation for both Terraform and Kubernetes -- Artifact upload for Terraform plans -- Automated dependency updates via Dependabot +### Overview + +The GitHub Actions workflow (`.github/workflows/pipeline.yml`) runs on every push to `main` and on all pull requests. It uses a matrix strategy to test all three cloud providers in parallel. + +**Pipeline stages:** + +1. Terraform Format Check — Ensures code consistency +2. Terraform Init and Validate — Verifies syntax (`-backend=false` for CI) +3. Terraform Plan — Generates plans per cloud provider +4. OPA Policy Evaluation — Runs against both the sample plan and the real plan +5. Conftest Kubernetes Policy Evaluation — Validates K8s manifests +6. Artifact Upload — Saves Terraform plans for review (30-day retention) + +### GitHub Repository Secrets + +Configure these in **Settings → Secrets and variables → Actions → New repository secret**. + +#### AWS + +``` +AWS_ACCESS_KEY_ID +AWS_SECRET_ACCESS_KEY +AWS_SESSION_TOKEN (optional) +AWS_REGION (optional, defaults to us-east-1) +``` + +#### Azure + +``` +ARM_CLIENT_ID +ARM_CLIENT_SECRET +ARM_SUBSCRIPTION_ID +ARM_TENANT_ID +``` + +To generate Azure credentials: + +```bash +az login +az account show --query id -o tsv # subscription id +az account show --query tenantId -o tsv # tenant id + +az ad sp create-for-rbac \ + --name "dissertation-terraform" \ + --role "Contributor" \ + --scopes "/subscriptions/" \ + --sdk-auth +``` + +Map the service principal output: +- `clientId` → `ARM_CLIENT_ID` +- `clientSecret` → `ARM_CLIENT_SECRET` +- `tenantId` → `ARM_TENANT_ID` +- `subscriptionId` → `ARM_SUBSCRIPTION_ID` + +> **Troubleshooting 403 AuthorizationFailed:** Ensure the role assignment exists at the subscription level: +> ```bash +> az role assignment create \ +> --assignee "" \ +> --role "Contributor" \ +> --scope "/subscriptions/" +> ``` + +#### GCP + +``` +GCP_PROJECT_ID +GCP_SA_KEY (service account JSON key) +``` + +### Dependabot + +Dependabot is configured (`.github/dependabot.yml`) to check weekly for: + +- GitHub Actions version updates +- Terraform provider updates (for all 6 Terraform directories) + +### Customisation + +**Test only specific clouds:** + +```yaml +strategy: + matrix: + cloud: [aws] # Only test AWS +``` + +**Add deployment step (use with caution):** + +```yaml +- name: Terraform Apply + if: github.ref == 'refs/heads/main' + run: terraform -chdir=infra/terraform/${{ matrix.cloud }} apply -auto-approve +``` + +--- + +## ArgoCD / GitOps Setup + +### Install ArgoCD + +```bash +kubectl create namespace argocd +kubectl apply -n argocd -f https://raw.githubusercontent.com/argoproj/argo-cd/stable/manifests/install.yaml +kubectl wait --for=condition=available --timeout=600s deployment/argocd-server -n argocd +``` + +### Access ArgoCD UI + +```bash +# Get initial admin password +kubectl -n argocd get secret argocd-initial-admin-secret -o jsonpath="{.data.password}" | base64 -d + +# Port forward to access UI +kubectl port-forward svc/argocd-server -n argocd 8081:443 +# Access at https://localhost:8081 (admin / ) +``` + +### Deploy the Application + +1. Confirm both ArgoCD Application manifests point at your repository. + +```yaml +source: + repoURL: https://github.com/stephenjtyrrell/dissertation.git +``` + +2. Deploy the primary application and wait for a healthy ArgoCD sync. + +```bash +make argocd-test +``` + +3. Deploy the dedicated test application and wait for health. + +```bash +make argocd-test-app-sync +``` + +4. Verify both applications and workloads. + +```bash +kubectl get application -n argocd +kubectl get all -n dissertation +kubectl get all -n dissertation-test +``` + +The ArgoCD applications are configured with: + +- Automated sync with self-healing and pruning +- Auto namespace creation (`CreateNamespace=true`) +- Retry policy with exponential backoff (5 attempts, max 3 minutes) + +--- + +## Pre-commit Hooks + +### Setup + +```bash +pip install pre-commit +pre-commit install +pre-commit run --all-files # Test on all files +``` + +### What Gets Checked + +- Trailing whitespace and end-of-file fixes +- YAML syntax validation +- Large file detection +- Merge conflict markers +- Private key detection +- Terraform formatting and validation +- Terraform documentation generation +- Rego policy verification via Conftest + +### Skip Hooks (When Necessary) + +```bash +git commit --no-verify -m "urgent fix" +SKIP=terraform_fmt git commit -m "commit message" +``` + +--- ## Troubleshooting ### Terraform Issues -**Problem: Provider authentication fails** +**Provider authentication fails:** + ```bash -# Verify credentials are set correctly aws sts get-caller-identity # AWS az account show # Azure gcloud auth list # GCP ``` -**Problem: Backend initialization fails** -- Ensure you've configured the backend in `backend.tf` +**Backend initialization fails:** + +- Ensure you've copied `backend.tf.example` to `backend.tf` and configured it - Check that you have permissions to access the state storage +**Run validation locally:** + +```bash +make tf-init CLOUD=aws +make tf-validate CLOUD=aws +``` + ### Policy Issues -**Problem: Policy evaluation fails** +**Validate Rego syntax:** + ```bash -# Validate Rego syntax opa check policies/terraform/security.rego opa check policies/kubernetes/required-labels.rego +``` + +**Test with verbose output:** -# Test policies with verbose output +```bash opa eval --format pretty \ --data policies/terraform \ --input policies/terraform/sample-tfplan.json \ "data.terraform" ``` -**Problem: Kubernetes policies fail** -- Ensure all required labels are present -- Check that resource limits are defined -- Verify security contexts are properly configured - -## Development +**Kubernetes policies fail:** -### Pre-commit Hooks +- Ensure all required labels are present on every resource +- Check that resource limits are defined on all containers +- Verify security contexts are properly configured -Install pre-commit hooks for automatic validation: +### ArgoCD Issues ```bash -pip install pre-commit -pre-commit install +kubectl logs -n argocd deployment/argocd-application-controller +kubectl describe application dissertation-sample-api -n argocd ``` -This will automatically run: -- Terraform formatting and validation -- YAML linting -- Secret detection -- Trailing whitespace cleanup +--- -### Contributing +## Security Features -See [CONTRIBUTING.md](CONTRIBUTING.md) for detailed contribution guidelines. +1. Policy Enforcement — Automated validation before any deployment +2. Security Contexts — Non-root containers, read-only filesystems, dropped capabilities +3. Network Security — HTTPS-only, no public access by default, VPC Flow Logs +4. Secret Management — All credentials stored in GitHub Secrets, never committed +5. Dependency Scanning — Dependabot monitors for outdated dependencies +6. Pre-commit Hooks — Private key detection, secret scanning before commits -## Notes +--- -- This is intentionally provider-neutral at the orchestration level -- Cloud-specific details are isolated inside provider modules -- Governance controls are codified and enforced before deployment -- The project demonstrates scalability through automated multi-cloud testing +## Best Practices + +1. Always create feature branches — Don't push directly to main +2. Wait for CI checks before merging pull requests +3. Review policy violations carefully — they're there for a reason +4. Keep dependencies updated — Merge Dependabot PRs regularly +5. Monitor ArgoCD — Ensure applications stay in sync +6. Use pre-commit hooks — Catch issues before they reach CI + +--- ## License -This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. +This project is licensed under the MIT License — see the [LICENSE](LICENSE) file for details. ## Acknowledgments This implementation demonstrates concepts from the dissertation: + *"Impact of governance and scalability with automated vendor-agnostic multi-cloud deployment pipelines"* diff --git a/argocd/application.yaml b/argocd/application.yaml index 9ee9f0a..f35ad43 100644 --- a/argocd/application.yaml +++ b/argocd/application.yaml @@ -9,9 +9,8 @@ metadata: spec: project: default source: - # Update this to your actual repository URL - repoURL: https://github.com/YOUR_USERNAME/dissertation - targetRevision: HEAD + repoURL: https://github.com/stephenjtyrrell/dissertation.git + targetRevision: main path: k8s/app destination: server: https://kubernetes.default.svc diff --git a/argocd/test-application.yaml b/argocd/test-application.yaml new file mode 100644 index 0000000..ad3c8cf --- /dev/null +++ b/argocd/test-application.yaml @@ -0,0 +1,26 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: dissertation-test-api + namespace: argocd +spec: + project: default + source: + repoURL: https://github.com/stephenjtyrrell/dissertation.git + targetRevision: main + path: k8s/test-app + destination: + server: https://kubernetes.default.svc + namespace: dissertation-test + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true + retry: + limit: 5 + backoff: + duration: 5s + factor: 2 + maxDuration: 3m diff --git a/k8s/test-app/deployment.yaml b/k8s/test-app/deployment.yaml new file mode 100644 index 0000000..138dfda --- /dev/null +++ b/k8s/test-app/deployment.yaml @@ -0,0 +1,78 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: smoke-api + labels: + app.kubernetes.io/name: smoke-api + app.kubernetes.io/part-of: dissertation-platform + owner: platform-team + compliance: baseline +spec: + replicas: 2 + selector: + matchLabels: + app: smoke-api + template: + metadata: + labels: + app: smoke-api + app.kubernetes.io/name: smoke-api + app.kubernetes.io/part-of: dissertation-platform + owner: platform-team + compliance: baseline + spec: + securityContext: + runAsNonRoot: true + runAsUser: 1000 + fsGroup: 1000 + containers: + - name: smoke-api + image: nginx:1.27 + ports: + - containerPort: 80 + name: http + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 200m + memory: 256Mi + livenessProbe: + httpGet: + path: / + port: http + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 + readinessProbe: + httpGet: + path: / + port: http + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 3 + failureThreshold: 3 + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + capabilities: + drop: + - ALL + volumeMounts: + - name: tmp + mountPath: /tmp + - name: cache + mountPath: /var/cache/nginx + - name: run + mountPath: /var/run + volumes: + - name: tmp + emptyDir: {} + - name: cache + emptyDir: {} + - name: run + emptyDir: {} diff --git a/k8s/test-app/hpa.yaml b/k8s/test-app/hpa.yaml new file mode 100644 index 0000000..3254a56 --- /dev/null +++ b/k8s/test-app/hpa.yaml @@ -0,0 +1,29 @@ +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: smoke-api-hpa + labels: + app.kubernetes.io/name: smoke-api + app.kubernetes.io/part-of: dissertation-platform + owner: platform-team + compliance: baseline +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: smoke-api + minReplicas: 2 + maxReplicas: 5 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 70 + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: 80 diff --git a/k8s/test-app/namespace.yaml b/k8s/test-app/namespace.yaml new file mode 100644 index 0000000..c22a373 --- /dev/null +++ b/k8s/test-app/namespace.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: dissertation-test + labels: + app.kubernetes.io/name: dissertation-test + app.kubernetes.io/part-of: dissertation-platform + owner: platform-team + compliance: baseline diff --git a/k8s/test-app/pdb.yaml b/k8s/test-app/pdb.yaml new file mode 100644 index 0000000..173c07f --- /dev/null +++ b/k8s/test-app/pdb.yaml @@ -0,0 +1,14 @@ +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: smoke-api-pdb + labels: + app.kubernetes.io/name: smoke-api + app.kubernetes.io/part-of: dissertation-platform + owner: platform-team + compliance: baseline +spec: + minAvailable: 1 + selector: + matchLabels: + app: smoke-api diff --git a/k8s/test-app/service.yaml b/k8s/test-app/service.yaml new file mode 100644 index 0000000..cd65178 --- /dev/null +++ b/k8s/test-app/service.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Service +metadata: + name: smoke-api + labels: + app.kubernetes.io/name: smoke-api + app.kubernetes.io/part-of: dissertation-platform + owner: platform-team + compliance: baseline +spec: + selector: + app: smoke-api + ports: + - protocol: TCP + port: 80 + targetPort: 80 diff --git a/policies/kubernetes/required-labels.rego b/policies/kubernetes/required-labels.rego index 721cea8..509345b 100644 --- a/policies/kubernetes/required-labels.rego +++ b/policies/kubernetes/required-labels.rego @@ -1,4 +1,7 @@ -package kubernetes +package main + +import future.keywords.contains +import future.keywords.if required_labels := {"app.kubernetes.io/name", "app.kubernetes.io/part-of", "owner", "compliance"} @@ -14,7 +17,7 @@ deny[msg] { } # Check for resource limits on containers -deny[msg] { +deny contains msg if { obj := input obj.kind == "Deployment" container := obj.spec.template.spec.containers[_] @@ -22,7 +25,7 @@ deny[msg] { msg := sprintf("Deployment/%s: container '%s' must define resource limits", [obj.metadata.name, container.name]) } -deny[msg] { +deny contains msg if { obj := input obj.kind == "Deployment" container := obj.spec.template.spec.containers[_] @@ -31,7 +34,7 @@ deny[msg] { } # Check for security context -deny[msg] { +deny contains msg if { obj := input obj.kind == "Deployment" container := obj.spec.template.spec.containers[_] @@ -39,7 +42,7 @@ deny[msg] { msg := sprintf("Deployment/%s: container '%s' must define securityContext", [obj.metadata.name, container.name]) } -deny[msg] { +deny contains msg if { obj := input obj.kind == "Deployment" container := obj.spec.template.spec.containers[_] @@ -47,7 +50,7 @@ deny[msg] { msg := sprintf("Deployment/%s: container '%s' must not run in privileged mode", [obj.metadata.name, container.name]) } -deny[msg] { +deny contains msg if { obj := input obj.kind == "Deployment" container := obj.spec.template.spec.containers[_] @@ -58,7 +61,7 @@ deny[msg] { } # Check for liveness and readiness probes -deny[msg] { +deny contains msg if { obj := input obj.kind == "Deployment" container := obj.spec.template.spec.containers[_] @@ -66,7 +69,7 @@ deny[msg] { msg := sprintf("Deployment/%s: container '%s' should define a livenessProbe", [obj.metadata.name, container.name]) } -deny[msg] { +deny contains msg if { obj := input obj.kind == "Deployment" container := obj.spec.template.spec.containers[_] diff --git a/policies/terraform/security.rego b/policies/terraform/security.rego index cef6240..ceec6f8 100644 --- a/policies/terraform/security.rego +++ b/policies/terraform/security.rego @@ -1,5 +1,8 @@ package terraform +import future.keywords.contains +import future.keywords.if + required_tags := {"owner", "cost_center", "compliance", "project", "environment", "managed_by"} # Resource types that do not support tags/labels