From 3b9da4ff3e724c209e17220207ce7257530bf1b2 Mon Sep 17 00:00:00 2001 From: Moritz Bracht Date: Thu, 4 Jun 2026 12:18:56 +0200 Subject: [PATCH 1/3] feat: add argo-workflows v4.0.5 as OCM component --- .github/workflows/release-ocm-components.yml | 1 + README.md | 24 ++ argo-workflows/README.md | 117 ++++++++++ argo-workflows/component-constructor.yaml | 64 ++++++ argo-workflows/minimal-values.yaml | 61 +++++ argo-workflows/production-values.yaml | 117 ++++++++++ argo-workflows/tests/test-minimal.sh | 177 +++++++++++++++ argo-workflows/tests/test-production.sh | 223 +++++++++++++++++++ 8 files changed, 784 insertions(+) create mode 100644 argo-workflows/README.md create mode 100644 argo-workflows/component-constructor.yaml create mode 100644 argo-workflows/minimal-values.yaml create mode 100644 argo-workflows/production-values.yaml create mode 100755 argo-workflows/tests/test-minimal.sh create mode 100755 argo-workflows/tests/test-production.sh diff --git a/.github/workflows/release-ocm-components.yml b/.github/workflows/release-ocm-components.yml index 2fa1e44..5418c2f 100644 --- a/.github/workflows/release-ocm-components.yml +++ b/.github/workflows/release-ocm-components.yml @@ -13,6 +13,7 @@ jobs: strategy: matrix: component: + - ./argo-workflows - ./artifact-conduit - ./cert-manager - ./cloudnative-pg diff --git a/README.md b/README.md index 27aec22..485e3a0 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,30 @@ Components can reference each other to fulfill dependencies rather than duplicat ## Available Components +### Argo Workflows (v4.0.5) + +Kubernetes-native Workflow Engine + +- **Status**: ✅ Ready +- **CNCF**: Graduated Project +- **License**: Apache 2.0 +- **Configurations**: + - Minimal (single replica, server auth, dev/test) + - Production (HA with 2 replicas, client/SSO auth, PDBs, monitoring) +- **Documentation**: [argo-workflows/README.md](argo-workflows/README.md) +- **Used by**: Artifact Conduit (artifact-conduit component) + +Quick Start: + +```bash +helm repo add argo https://argoproj.github.io/argo-helm +helm install argo-workflows argo/argo-workflows \ + --version 1.0.14 \ + --namespace argo \ + --create-namespace \ + --values argo-workflows/minimal-values.yaml +``` + ### Keycloak (v26.4.5) Identity and Access Management diff --git a/argo-workflows/README.md b/argo-workflows/README.md new file mode 100644 index 0000000..08213d6 --- /dev/null +++ b/argo-workflows/README.md @@ -0,0 +1,117 @@ +# Argo Workflows OCM Component + +Argo Workflows v4.0.5 packaged as an OCM component. Argo Workflows is a CNCF graduated project providing a Kubernetes-native workflow engine for orchestrating parallel jobs, ML pipelines, data processing, and CI/CD. + +## OCM Resources + +| Resource | Type | Description | +|---|---|---| +| `argo-workflows-chart` | helmChart | Official Argo Workflows Helm chart v1.0.14 | +| `argo-workflows-controller-image` | ociImage | Workflow controller (`quay.io/argoproj/workflow-controller:v4.0.5`) | +| `argo-workflows-server-image` | ociImage | Argo server/UI (`quay.io/argoproj/argocli:v4.0.5`) | +| `argo-workflows-executor-image` | ociImage | Workflow executor (`quay.io/argoproj/argoexec:v4.0.5`) | +| `argo-workflows-minimal-config` | yaml | Minimal Helm values (dev/test) | +| `argo-workflows-production-config` | yaml | Production Helm values (HA) | + +## Quick Start + +### Build OCM Component + +```bash +cd argo-workflows +ocm add componentversion --version 4.0.5 --create --file ./ctf component-constructor.yaml +``` + +### Install via Helm (Minimal) + +```bash +helm repo add argo https://argoproj.github.io/argo-helm +helm install argo-workflows argo/argo-workflows \ + --version 1.0.14 \ + --namespace argo \ + --create-namespace \ + --values minimal-values.yaml +``` + +### Install via Helm (Production) + +```bash +helm install argo-workflows argo/argo-workflows \ + --version 1.0.14 \ + --namespace argo \ + --create-namespace \ + --values production-values.yaml +``` + +## Configuration Profiles + +### Minimal (dev/test) + +| Parameter | Value | +|---|---| +| Controller replicas | 1 | +| Server replicas | 1 | +| Auth mode | `server` (no token required) | +| Workflow archive | disabled | +| Prometheus monitoring | disabled | +| Controller CPU request | 50m | +| Controller memory request | 128Mi | + +### Production (HA) + +| Parameter | Value | +|---|---| +| Controller replicas | 2 | +| Server replicas | 2 | +| Auth mode | `client` (Kubernetes RBAC) | +| Max concurrent workflows | 50 | +| Prometheus monitoring | enabled | +| PodDisruptionBudgets | enabled (minAvailable: 1) | +| Pod anti-affinity | preferred (hostname) | +| TopologySpreadConstraints | zone-aware | +| Controller CPU request | 100m | +| Controller memory request | 256Mi | + +#### SSO/OIDC Authentication + +The production profile defaults to `client` auth (Kubernetes service account tokens). To enable SSO, set in your values override: + +```yaml +server: + authModes: + - sso + sso: + issuer: https://your-oidc-provider + clientId: + name: argo-workflows-sso + key: client-id + clientSecret: + name: argo-workflows-sso + key: client-secret + redirectUrl: https://argo-workflows.example.com/oauth2/callback +``` + +## Architecture + +Argo Workflows consists of three main components: + +- **Workflow Controller**: Watches Workflow CRs and schedules pods +- **Argo Server**: REST API + web UI + CLI proxy +- **Executor (argoexec)**: Sidecar injected into each workflow step pod; manages step lifecycle and artifact collection + +## Testing + +```bash +# Minimal deployment on a local kind cluster +bash tests/test-minimal.sh + +# Production HA deployment on a multi-node kind cluster +bash tests/test-production.sh + +# Keep the cluster after tests for inspection +bash tests/test-minimal.sh --skip-cleanup +``` + +## Dependency Note + +Argo Workflows is a core dependency of the `artifact-conduit` component. When deploying artifact-conduit, ensure argo-workflows is installed in the same cluster first. diff --git a/argo-workflows/component-constructor.yaml b/argo-workflows/component-constructor.yaml new file mode 100644 index 0000000..a3412e3 --- /dev/null +++ b/argo-workflows/component-constructor.yaml @@ -0,0 +1,64 @@ +components: + - name: opendefense.cloud/argo-workflows + provider: + name: argoproj.io + labels: + - name: app.kubernetes.io/name + value: argo-workflows + - name: app.kubernetes.io/component + value: workflow-engine + - name: app.kubernetes.io/part-of + value: argo-workflows + resources: + # Argo Workflows Helm Chart (official argoproj) + - name: argo-workflows-chart + type: helmChart + version: 1.0.14 + relation: external + access: + type: helm + helmChart: argo-workflows:1.0.14 + helmRepository: https://argoproj.github.io/argo-helm + + # Workflow Controller Container Image + - name: argo-workflows-controller-image + type: ociImage + version: v4.0.5 + relation: external + access: + type: ociArtifact + imageReference: quay.io/argoproj/workflow-controller:v4.0.5 + + # Argo Server (CLI/UI) Container Image + - name: argo-workflows-server-image + type: ociImage + version: v4.0.5 + relation: external + access: + type: ociArtifact + imageReference: quay.io/argoproj/argocli:v4.0.5 + + # Workflow Executor (argoexec) Container Image + - name: argo-workflows-executor-image + type: ociImage + version: v4.0.5 + relation: external + access: + type: ociArtifact + imageReference: quay.io/argoproj/argoexec:v4.0.5 + + # Minimal Configuration (single replica, no auth, dev/test) + - name: argo-workflows-minimal-config + type: yaml + relation: local + input: + type: file + path: minimal-values.yaml + + # Production Configuration (HA, client auth, monitoring) + - name: argo-workflows-production-config + type: yaml + relation: local + input: + type: file + path: production-values.yaml diff --git a/argo-workflows/minimal-values.yaml b/argo-workflows/minimal-values.yaml new file mode 100644 index 0000000..fbd5b2d --- /dev/null +++ b/argo-workflows/minimal-values.yaml @@ -0,0 +1,61 @@ +# Argo Workflows Minimal Configuration +# Suitable for development and testing environments +# Single replica, server auth mode (no token required), minimal resources + +crds: + install: true + keep: true + +# Create a dedicated service account and RBAC for workflow pods +workflow: + serviceAccount: + create: true + rbac: + create: true + +controller: + replicas: 1 + image: + registry: quay.io + repository: argoproj/workflow-controller + tag: "v4.0.5" + resources: + requests: + cpu: 50m + memory: 128Mi + limits: + cpu: 200m + memory: 256Mi + serviceMonitor: + enabled: false + +executor: + image: + registry: quay.io + repository: argoproj/argoexec + tag: "v4.0.5" + resources: + requests: + cpu: 50m + memory: 64Mi + limits: + cpu: 200m + memory: 128Mi + +server: + enabled: true + replicas: 1 + image: + registry: quay.io + repository: argoproj/argocli + tag: "v4.0.5" + # server mode: no authentication required — suitable for dev/test only + authModes: + - server + resources: + requests: + cpu: 50m + memory: 128Mi + limits: + cpu: 200m + memory: 256Mi diff --git a/argo-workflows/production-values.yaml b/argo-workflows/production-values.yaml new file mode 100644 index 0000000..bac5389 --- /dev/null +++ b/argo-workflows/production-values.yaml @@ -0,0 +1,117 @@ +# Argo Workflows Production Configuration +# HA setup: multiple replicas, client auth (k8s RBAC), workflow archive, +# Prometheus monitoring, PodDisruptionBudgets, pod anti-affinity. +# +# Auth note: authModes is set to "client" (Kubernetes service account tokens). +# To use SSO/OIDC instead, change authModes to ["sso"] and configure server.sso.* +# with your OIDC provider details. + +crds: + install: true + keep: true + +# Create a dedicated service account and RBAC for workflow pods +workflow: + serviceAccount: + create: true + rbac: + create: true + +controller: + replicas: 2 + image: + registry: quay.io + repository: argoproj/workflow-controller + tag: "v4.0.5" + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + # Limit total concurrent workflows across the cluster + parallelism: 50 + serviceMonitor: + enabled: true + pdb: + enabled: true + minAvailable: 1 + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchLabels: + app: workflow-controller + topologyKey: kubernetes.io/hostname + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: topology.kubernetes.io/zone + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: workflow-controller + +executor: + image: + registry: quay.io + repository: argoproj/argoexec + tag: "v4.0.5" + resources: + requests: + cpu: 100m + memory: 64Mi + limits: + cpu: 500m + memory: 512Mi + +server: + enabled: true + replicas: 2 + image: + registry: quay.io + repository: argoproj/argocli + tag: "v4.0.5" + # client mode: enforces Kubernetes RBAC via service account tokens + authModes: + - client + # SSO/OIDC alternative — uncomment and configure if an OIDC provider is available: + # authModes: + # - sso + # sso: + # issuer: https://your-oidc-provider + # clientId: + # name: argo-workflows-sso + # key: client-id + # clientSecret: + # name: argo-workflows-sso + # key: client-secret + # redirectUrl: https://argo-workflows.example.com/oauth2/callback + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + pdb: + enabled: true + minAvailable: 1 + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchLabels: + app: argo-server + topologyKey: kubernetes.io/hostname + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: topology.kubernetes.io/zone + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: argo-server diff --git a/argo-workflows/tests/test-minimal.sh b/argo-workflows/tests/test-minimal.sh new file mode 100755 index 0000000..ae43089 --- /dev/null +++ b/argo-workflows/tests/test-minimal.sh @@ -0,0 +1,177 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Argo Workflows Minimal Deployment Test +# Deploys argo-workflows on a local kind cluster and verifies: +# CRD creation, pod readiness, and successful workflow execution. + +CLUSTER_NAME="${KIND_CLUSTER_NAME:-argo-workflows-test}" +NAMESPACE="argo" +TIMEOUT="300s" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +COMPONENT_DIR="$(dirname "$SCRIPT_DIR")" + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' + +log_info() { echo -e "${GREEN}[INFO]${NC} $*"; } +log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } +log_error() { echo -e "${RED}[ERROR]${NC} $*"; } + +cleanup() { + log_info "Cleaning up kind cluster '${CLUSTER_NAME}'..." + kind delete cluster --name "${CLUSTER_NAME}" 2>/dev/null || true +} + +SKIP_CLEANUP=false +for arg in "$@"; do + case $arg in + --skip-cleanup) SKIP_CLEANUP=true ;; + esac +done + +if [ "$SKIP_CLEANUP" = false ]; then + trap cleanup EXIT +fi + +for cmd in kind kubectl helm; do + if ! command -v "$cmd" &>/dev/null; then + log_error "'$cmd' is required but not found in PATH" + exit 1 + fi +done + +log_info "=== Argo Workflows Minimal Deployment Test ===" + +# Step 1: Create kind cluster +log_info "Creating kind cluster '${CLUSTER_NAME}'..." +if kind get clusters 2>/dev/null | grep -q "^${CLUSTER_NAME}$"; then + log_warn "Cluster '${CLUSTER_NAME}' already exists, deleting..." + kind delete cluster --name "${CLUSTER_NAME}" +fi +kind create cluster --name "${CLUSTER_NAME}" --wait 60s + +# Step 2: Install Argo Workflows via Helm +log_info "Adding argo Helm repository..." +helm repo add argo https://argoproj.github.io/argo-helm --force-update +helm repo update argo + +log_info "Installing argo-workflows with minimal values..." +helm install argo-workflows argo/argo-workflows \ + --version 1.0.14 \ + --namespace "${NAMESPACE}" \ + --create-namespace \ + --values "${COMPONENT_DIR}/minimal-values.yaml" \ + --wait \ + --timeout "${TIMEOUT}" + +# Step 3: Verify CRDs +log_info "Verifying CRDs..." +EXPECTED_CRDS=( + "workflows.argoproj.io" + "workflowtemplates.argoproj.io" + "clusterworkflowtemplates.argoproj.io" + "cronworkflows.argoproj.io" + "workflowartifactgctasks.argoproj.io" +) + +for crd in "${EXPECTED_CRDS[@]}"; do + if kubectl get crd "$crd" &>/dev/null; then + log_info " CRD found: $crd" + else + log_error " CRD missing: $crd" + exit 1 + fi +done + +# Step 4: Verify pods are running +log_info "Waiting for all argo-workflows pods to be ready..." +kubectl wait --for=condition=Ready pods --all \ + -n "${NAMESPACE}" \ + --timeout="${TIMEOUT}" + +log_info "Pod status:" +kubectl get pods -n "${NAMESPACE}" -o wide + +# Step 5: Verify deployments +log_info "Checking deployments..." +for deploy in argo-workflows-workflow-controller argo-workflows-server; do + READY=$(kubectl get deployment "$deploy" -n "${NAMESPACE}" -o jsonpath='{.status.readyReplicas}' 2>/dev/null || echo "0") + if [ "${READY:-0}" -ge 1 ]; then + log_info " Deployment $deploy: $READY replica(s) ready" + else + log_error " Deployment $deploy: not ready" + kubectl describe deployment "$deploy" -n "${NAMESPACE}" + exit 1 + fi +done + +# Step 6: Submit a hello-world workflow +log_info "Submitting hello-world workflow..." +kubectl apply -f - <<'EOF' +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + name: hello-world + namespace: argo +spec: + serviceAccountName: argo-workflow + entrypoint: hello + templates: + - name: hello + container: + image: busybox:1.36 + command: [sh, -c] + args: ["echo 'Hello from Argo Workflows!'"] + resources: + requests: + cpu: 50m + memory: 32Mi +EOF + +# Step 7: Wait for workflow to succeed +log_info "Waiting for workflow to complete..." +for i in $(seq 1 120); do + PHASE=$(kubectl get workflow hello-world -n argo -o jsonpath='{.status.phase}' 2>/dev/null || echo "") + case "$PHASE" in + Succeeded) + log_info " Workflow completed successfully (phase: $PHASE)" + break + ;; + Failed|Error) + log_error " Workflow failed (phase: $PHASE)" + kubectl describe workflow hello-world -n argo + kubectl get pods -n argo -l workflows.argoproj.io/workflow=hello-world -o wide + exit 1 + ;; + esac + if [ "$i" -eq 120 ]; then + log_error " Workflow did not complete within 120 seconds (phase: ${PHASE:-unknown})" + kubectl describe workflow hello-world -n argo + exit 1 + fi + sleep 1 +done + +# Step 8: Verify workflow pod logs +log_info "Checking workflow pod logs..." +WORKFLOW_POD=$(kubectl get pods -n argo -l workflows.argoproj.io/workflow=hello-world -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "") +if [ -n "$WORKFLOW_POD" ]; then + LOGS=$(kubectl logs "$WORKFLOW_POD" -n argo -c main 2>/dev/null || echo "") + if echo "$LOGS" | grep -q "Hello from Argo Workflows"; then + log_info " Workflow logs verified: output found" + else + log_warn " Could not verify workflow output in logs (pod may have been cleaned up)" + fi +fi + +# Summary +echo "" +log_info "=== Test Summary ===" +log_info "CRDs: ${#EXPECTED_CRDS[@]} argo-workflows CRDs installed" +log_info "Pods: $(kubectl get pods -n ${NAMESPACE} --no-headers | grep -c Running) running" +log_info "Workflows: $(kubectl get workflows -n argo --no-headers 2>/dev/null | wc -l | tr -d ' ') completed" +echo "" +log_info "=== All minimal tests passed! ===" diff --git a/argo-workflows/tests/test-production.sh b/argo-workflows/tests/test-production.sh new file mode 100755 index 0000000..cb7069e --- /dev/null +++ b/argo-workflows/tests/test-production.sh @@ -0,0 +1,223 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Argo Workflows Production Deployment Test +# Deploys argo-workflows on a multi-node kind cluster and verifies: +# HA replicas, PodDisruptionBudgets, pod distribution, and DAG workflow execution. + +CLUSTER_NAME="${KIND_CLUSTER_NAME:-argo-workflows-prod-test}" +NAMESPACE="argo" +TIMEOUT="300s" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +COMPONENT_DIR="$(dirname "$SCRIPT_DIR")" + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' + +log_info() { echo -e "${GREEN}[INFO]${NC} $*"; } +log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } +log_error() { echo -e "${RED}[ERROR]${NC} $*"; } + +cleanup() { + log_info "Cleaning up kind cluster '${CLUSTER_NAME}'..." + kind delete cluster --name "${CLUSTER_NAME}" 2>/dev/null || true +} + +SKIP_CLEANUP=false +for arg in "$@"; do + case $arg in + --skip-cleanup) SKIP_CLEANUP=true ;; + esac +done + +if [ "$SKIP_CLEANUP" = false ]; then + trap cleanup EXIT +fi + +for cmd in kind kubectl helm; do + if ! command -v "$cmd" &>/dev/null; then + log_error "'$cmd' is required but not found in PATH" + exit 1 + fi +done + +log_info "=== Argo Workflows Production Deployment Test ===" + +# Step 1: Create multi-node kind cluster +log_info "Creating multi-node kind cluster '${CLUSTER_NAME}'..." +if kind get clusters 2>/dev/null | grep -q "^${CLUSTER_NAME}$"; then + log_warn "Cluster '${CLUSTER_NAME}' already exists, deleting..." + kind delete cluster --name "${CLUSTER_NAME}" +fi + +cat <<'EOF' | kind create cluster --name "${CLUSTER_NAME}" --config=- --wait 60s +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +nodes: + - role: control-plane + - role: worker + - role: worker + - role: worker +EOF + +# Step 2: Install Argo Workflows with production values +log_info "Adding argo Helm repository..." +helm repo add argo https://argoproj.github.io/argo-helm --force-update +helm repo update argo + +log_info "Installing argo-workflows with production values..." +helm install argo-workflows argo/argo-workflows \ + --version 1.0.14 \ + --namespace "${NAMESPACE}" \ + --create-namespace \ + --values "${COMPONENT_DIR}/production-values.yaml" \ + --wait \ + --timeout "${TIMEOUT}" + +# Step 3: Verify pods are ready +log_info "Waiting for all argo-workflows pods to be ready..." +kubectl wait --for=condition=Ready pods --all \ + -n "${NAMESPACE}" \ + --timeout="${TIMEOUT}" + +log_info "Pod status:" +kubectl get pods -n "${NAMESPACE}" -o wide + +# Step 4: Verify HA replica counts +log_info "Verifying HA replica counts..." +CONTROLLER_READY=$(kubectl get deployment argo-workflows-workflow-controller -n "${NAMESPACE}" \ + -o jsonpath='{.status.readyReplicas}' 2>/dev/null || echo "0") +SERVER_READY=$(kubectl get deployment argo-workflows-server -n "${NAMESPACE}" \ + -o jsonpath='{.status.readyReplicas}' 2>/dev/null || echo "0") + +if [ "${CONTROLLER_READY:-0}" -ge 2 ]; then + log_info " workflow-controller: $CONTROLLER_READY replicas ready (>= 2 required)" +else + log_error " workflow-controller: only $CONTROLLER_READY replica(s) ready, expected >= 2" + exit 1 +fi + +if [ "${SERVER_READY:-0}" -ge 2 ]; then + log_info " argo-server: $SERVER_READY replicas ready (>= 2 required)" +else + log_error " argo-server: only $SERVER_READY replica(s) ready, expected >= 2" + exit 1 +fi + +# Step 5: Verify PodDisruptionBudgets exist +log_info "Verifying PodDisruptionBudgets..." +for pdb in $(kubectl get pdb -n "${NAMESPACE}" -o jsonpath='{.items[*].metadata.name}' 2>/dev/null); do + log_info " PDB found: $pdb" +done + +PDB_COUNT=$(kubectl get pdb -n "${NAMESPACE}" --no-headers 2>/dev/null | wc -l | tr -d ' ') +if [ "$PDB_COUNT" -ge 1 ]; then + log_info " $PDB_COUNT PodDisruptionBudget(s) configured" +else + log_warn " No PodDisruptionBudgets found — check production-values.yaml pdb settings" +fi + +# Step 6: Verify pod spread across nodes +log_info "Checking pod distribution across nodes..." +CONTROLLER_NODES=$(kubectl get pods -n "${NAMESPACE}" -l app=workflow-controller \ + -o jsonpath='{.items[*].spec.nodeName}' 2>/dev/null | tr ' ' '\n' | sort -u | wc -l | tr -d ' ') +log_info " Controller pods spread across $CONTROLLER_NODES unique node(s)" + +# Step 7: Submit a DAG workflow with parallel steps +log_info "Submitting DAG workflow with parallel steps..." +kubectl apply -f - <<'EOF' +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + name: dag-parallel-test + namespace: argo +spec: + serviceAccountName: argo-workflow + entrypoint: dag-pipeline + templates: + - name: dag-pipeline + dag: + tasks: + - name: step-a + template: echo-step + arguments: + parameters: + - name: message + value: "Step A" + - name: step-b + template: echo-step + arguments: + parameters: + - name: message + value: "Step B" + - name: step-c + template: echo-step + dependencies: + - step-a + - step-b + arguments: + parameters: + - name: message + value: "Step C (depends on A and B)" + + - name: echo-step + inputs: + parameters: + - name: message + container: + image: busybox:1.36 + command: [sh, -c] + args: ["echo '{{ inputs.parameters.message }}'"] + resources: + requests: + cpu: 50m + memory: 32Mi +EOF + +# Step 8: Wait for DAG workflow to succeed +log_info "Waiting for DAG workflow to complete..." +for i in $(seq 1 180); do + PHASE=$(kubectl get workflow dag-parallel-test -n argo -o jsonpath='{.status.phase}' 2>/dev/null || echo "") + case "$PHASE" in + Succeeded) + log_info " DAG workflow completed successfully (phase: $PHASE)" + break + ;; + Failed|Error) + log_error " DAG workflow failed (phase: $PHASE)" + kubectl describe workflow dag-parallel-test -n argo + kubectl get pods -n argo -l workflows.argoproj.io/workflow=dag-parallel-test -o wide + exit 1 + ;; + esac + if [ "$i" -eq 180 ]; then + log_error " DAG workflow did not complete within 180 seconds (phase: ${PHASE:-unknown})" + kubectl describe workflow dag-parallel-test -n argo + exit 1 + fi + sleep 1 +done + +# Step 9: Verify DAG nodes all succeeded +log_info "Verifying all DAG nodes succeeded..." +FAILED_NODES=$(kubectl get workflow dag-parallel-test -n argo \ + -o jsonpath='{.status.nodes[?(@.phase!="Succeeded")].displayName}' 2>/dev/null | tr ' ' '\n' | grep -v '^$' || true) +if [ -z "$FAILED_NODES" ]; then + log_info " All DAG nodes succeeded" +else + log_error " Some DAG nodes did not succeed: $FAILED_NODES" + exit 1 +fi + +# Summary +echo "" +log_info "=== Test Summary ===" +log_info "Controller replicas: $CONTROLLER_READY ready" +log_info "Server replicas: $SERVER_READY ready" +log_info "PodDisruptionBudgets: $PDB_COUNT configured" +log_info "Node spread: Controller pods on $CONTROLLER_NODES node(s)" +log_info "DAG workflow: Succeeded (3 steps: A, B, C)" +echo "" +log_info "=== All production tests passed! ===" From 6ac0672d49b54fc2c6c7a03e66c055ad1f773c34 Mon Sep 17 00:00:00 2001 From: Moritz Bracht Date: Thu, 4 Jun 2026 14:06:51 +0200 Subject: [PATCH 2/3] test: some improvements to test-production.sh Thanks for pointing them out @coderabbitai --- argo-workflows/tests/test-production.sh | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/argo-workflows/tests/test-production.sh b/argo-workflows/tests/test-production.sh index cb7069e..67dd0a8 100755 --- a/argo-workflows/tests/test-production.sh +++ b/argo-workflows/tests/test-production.sh @@ -116,7 +116,8 @@ PDB_COUNT=$(kubectl get pdb -n "${NAMESPACE}" --no-headers 2>/dev/null | wc -l | if [ "$PDB_COUNT" -ge 1 ]; then log_info " $PDB_COUNT PodDisruptionBudget(s) configured" else - log_warn " No PodDisruptionBudgets found — check production-values.yaml pdb settings" + log_error " No PodDisruptionBudgets found — production profile requires pdb.enabled: true" + exit 1 fi # Step 6: Verify pod spread across nodes @@ -124,15 +125,19 @@ log_info "Checking pod distribution across nodes..." CONTROLLER_NODES=$(kubectl get pods -n "${NAMESPACE}" -l app=workflow-controller \ -o jsonpath='{.items[*].spec.nodeName}' 2>/dev/null | tr ' ' '\n' | sort -u | wc -l | tr -d ' ') log_info " Controller pods spread across $CONTROLLER_NODES unique node(s)" +if [ "${CONTROLLER_NODES:-0}" -lt 2 ]; then + log_error " Controller pods are only on $CONTROLLER_NODES node(s), expected >= 2 for HA" + exit 1 +fi # Step 7: Submit a DAG workflow with parallel steps log_info "Submitting DAG workflow with parallel steps..." -kubectl apply -f - <<'EOF' +kubectl apply -f - </dev/null || echo "") + PHASE=$(kubectl get workflow dag-parallel-test -n "${NAMESPACE}" -o jsonpath='{.status.phase}' 2>/dev/null || echo "") case "$PHASE" in Succeeded) log_info " DAG workflow completed successfully (phase: $PHASE)" @@ -187,14 +192,14 @@ for i in $(seq 1 180); do ;; Failed|Error) log_error " DAG workflow failed (phase: $PHASE)" - kubectl describe workflow dag-parallel-test -n argo - kubectl get pods -n argo -l workflows.argoproj.io/workflow=dag-parallel-test -o wide + kubectl describe workflow dag-parallel-test -n "${NAMESPACE}" + kubectl get pods -n "${NAMESPACE}" -l workflows.argoproj.io/workflow=dag-parallel-test -o wide exit 1 ;; esac if [ "$i" -eq 180 ]; then log_error " DAG workflow did not complete within 180 seconds (phase: ${PHASE:-unknown})" - kubectl describe workflow dag-parallel-test -n argo + kubectl describe workflow dag-parallel-test -n "${NAMESPACE}" exit 1 fi sleep 1 @@ -202,7 +207,7 @@ done # Step 9: Verify DAG nodes all succeeded log_info "Verifying all DAG nodes succeeded..." -FAILED_NODES=$(kubectl get workflow dag-parallel-test -n argo \ +FAILED_NODES=$(kubectl get workflow dag-parallel-test -n "${NAMESPACE}" \ -o jsonpath='{.status.nodes[?(@.phase!="Succeeded")].displayName}' 2>/dev/null | tr ' ' '\n' | grep -v '^$' || true) if [ -z "$FAILED_NODES" ]; then log_info " All DAG nodes succeeded" From 25fe0f7d5027308b21e8927b6048740f52c9fd57 Mon Sep 17 00:00:00 2001 From: Moritz Bracht Date: Fri, 5 Jun 2026 08:28:23 +0200 Subject: [PATCH 3/3] feat: add values.yaml.tpl to argo-workflows --- argo-workflows/values.yaml.tpl | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 argo-workflows/values.yaml.tpl diff --git a/argo-workflows/values.yaml.tpl b/argo-workflows/values.yaml.tpl new file mode 100644 index 0000000..7c90cb1 --- /dev/null +++ b/argo-workflows/values.yaml.tpl @@ -0,0 +1,20 @@ +{{- $controller := index .OCIResources "argo-workflows-controller-image" }} +controller: + image: + registry: {{ $controller.Host }} + repository: {{ $controller.Repository }} + tag: "{{ $controller.Tag }}" + +{{- $server := index .OCIResources "argo-workflows-server-image" }} +server: + image: + registry: {{ $server.Host }} + repository: {{ $server.Repository }} + tag: "{{ $server.Tag }}" + +{{- $executor := index .OCIResources "argo-workflows-executor-image" }} +executor: + image: + registry: {{ $executor.Host }} + repository: {{ $executor.Repository }} + tag: "{{ $executor.Tag }}"