Skip to content

Reliability Soak/Chaos #37

Reliability Soak/Chaos

Reliability Soak/Chaos #37

Workflow file for this run

name: Reliability Soak/Chaos
on:
schedule:
- cron: "0 6 * * *"
workflow_dispatch:
inputs:
ga_soak_minutes:
description: "Requested GA soak duration (minutes) for soak-ga job metadata"
required: false
default: "15"
type: string
flake_repeat_count:
description: "Repeat count for integration-flake-repeat job"
required: false
default: "5"
type: string
jobs:
benchmark-smoke:
runs-on: ubuntu-latest
timeout-minutes: 20
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: "stable"
- name: Install dependencies
run: go mod download
- name: Run benchmark smoke guardrails
run: |
set -euo pipefail
mkdir -p .artifacts
./scripts/check-bench-smoke.sh
- name: Summarize benchmark smoke guardrails
if: always()
run: |
summary=".artifacts/bench-smoke-summary.md"
if [[ -f "${summary}" ]]; then
cat "${summary}" >> "$GITHUB_STEP_SUMMARY"
else
echo "benchmark smoke summary not found" >> "$GITHUB_STEP_SUMMARY"
fi
- name: Upload benchmark smoke artifacts
if: always()
uses: actions/upload-artifact@v4
with:
name: benchmark-smoke
path: .artifacts/*
if-no-files-found: warn
integration-evidence:
runs-on: ubuntu-latest
timeout-minutes: 45
strategy:
fail-fast: false
matrix:
backend: [redis, mysql, postgres, sqlite, nats, sqs, rabbitmq]
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: "stable"
- name: Install dependencies
run: go mod download
- name: Run shared integration scenario matrix (evidence)
env:
INTEGRATION_BACKEND: ${{ matrix.backend }}
run: |
set -euo pipefail
mkdir -p .artifacts
go test -tags=integration ./integration/... \
-run '^TestIntegrationScenarios_AllBackends$' \
-count=1 -v \
| tee ".artifacts/integration-scenarios-${{ matrix.backend }}.log"
- name: Summarize evidence durations
if: always()
run: |
.github/scripts/extract_scenario_durations.sh \
".artifacts/integration-scenarios-${{ matrix.backend }}.log" \
".artifacts/integration-scenarios-${{ matrix.backend }}-durations.md"
{
echo "### integration-evidence / ${{ matrix.backend }}"
cat ".artifacts/integration-scenarios-${{ matrix.backend }}-durations.md"
echo
} >> "$GITHUB_STEP_SUMMARY"
- name: Upload evidence logs
if: always()
uses: actions/upload-artifact@v4
with:
name: integration-evidence-${{ matrix.backend }}
path: .artifacts/*
if-no-files-found: warn
integration-soak:
runs-on: ubuntu-latest
timeout-minutes: 90
strategy:
fail-fast: false
matrix:
backend: [redis, mysql, postgres, sqlite, nats, sqs, rabbitmq]
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: "stable"
- name: Install dependencies
run: go mod download
- name: Run integration soak scenario suite
env:
RUN_SOAK: "1"
INTEGRATION_BACKEND: ${{ matrix.backend }}
run: |
set -euo pipefail
mkdir -p .artifacts
go test -tags=integration ./integration/... \
-run '^TestIntegrationScenarios_AllBackends$' \
-count=1 -v \
| tee ".artifacts/integration-soak-${{ matrix.backend }}.log"
- name: Summarize soak durations
if: always()
run: |
.github/scripts/extract_scenario_durations.sh \
".artifacts/integration-soak-${{ matrix.backend }}.log" \
".artifacts/integration-soak-${{ matrix.backend }}-durations.md"
{
echo "### integration-soak / ${{ matrix.backend }}"
cat ".artifacts/integration-soak-${{ matrix.backend }}-durations.md"
echo
} >> "$GITHUB_STEP_SUMMARY"
- name: Upload soak logs
if: always()
uses: actions/upload-artifact@v4
with:
name: integration-soak-${{ matrix.backend }}
path: .artifacts/*
if-no-files-found: warn
soak-ga:
runs-on: ubuntu-latest
timeout-minutes: 180
strategy:
fail-fast: false
matrix:
backend: [redis, rabbitmq]
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: "stable"
- name: Install dependencies
run: go mod download
- name: Run GA soak scenario suite (timed)
env:
RUN_SOAK: "1"
INTEGRATION_BACKEND: ${{ matrix.backend }}
REQUESTED_SOAK_MINUTES: ${{ github.event.inputs.ga_soak_minutes || '15' }}
run: |
set -euo pipefail
mkdir -p .artifacts
start_epoch="$(date +%s)"
{
echo "backend=${{ matrix.backend }}"
echo "requested_soak_minutes=${REQUESTED_SOAK_MINUTES}"
echo "started_at_utc=$(date -u +%Y-%m-%dT%H:%M:%SZ)"
} > ".artifacts/soak-ga-${{ matrix.backend }}-meta.txt"
go test -tags=integration ./integration/... \
-run '^TestIntegrationScenarios_AllBackends$' \
-count=1 -v \
| tee ".artifacts/soak-ga-${{ matrix.backend }}.log"
end_epoch="$(date +%s)"
elapsed_seconds="$((end_epoch - start_epoch))"
{
echo "ended_at_utc=$(date -u +%Y-%m-%dT%H:%M:%SZ)"
echo "elapsed_seconds=${elapsed_seconds}"
} >> ".artifacts/soak-ga-${{ matrix.backend }}-meta.txt"
- name: Summarize GA soak durations and metadata
if: always()
env:
REQUESTED_SOAK_MINUTES: ${{ github.event.inputs.ga_soak_minutes || '15' }}
run: |
set -euo pipefail
.github/scripts/extract_scenario_durations.sh \
".artifacts/soak-ga-${{ matrix.backend }}.log" \
".artifacts/soak-ga-${{ matrix.backend }}-durations.md"
elapsed_seconds="$(grep '^elapsed_seconds=' ".artifacts/soak-ga-${{ matrix.backend }}-meta.txt" | cut -d= -f2 || true)"
if [[ -z "${elapsed_seconds}" ]]; then
elapsed_seconds="unknown"
fi
summary_file=".artifacts/soak-ga-${{ matrix.backend }}-summary.md"
{
echo "## GA Soak Summary"
echo
echo "| Field | Value |"
echo "|---|---|"
echo "| Backend | \`${{ matrix.backend }}\` |"
echo "| Requested soak duration | \`${REQUESTED_SOAK_MINUTES}m\` |"
echo "| Observed wall time | \`${elapsed_seconds}s\` |"
echo "| Log artifact | \`soak-ga-${{ matrix.backend }}.log\` |"
echo
cat ".artifacts/soak-ga-${{ matrix.backend }}-durations.md"
} > "${summary_file}"
{
echo "### soak-ga / ${{ matrix.backend }}"
cat "${summary_file}"
echo
} >> "$GITHUB_STEP_SUMMARY"
- name: Upload GA soak logs
if: always()
uses: actions/upload-artifact@v4
with:
name: soak-ga-${{ matrix.backend }}
path: .artifacts/*
if-no-files-found: warn
integration-chaos:
runs-on: ubuntu-latest
timeout-minutes: 60
strategy:
fail-fast: false
matrix:
backend: [redis, mysql, postgres, sqlite, nats, sqs, rabbitmq]
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: "stable"
- name: Install dependencies
run: go mod download
- name: Run integration chaos scenario subset
env:
INTEGRATION_BACKEND: ${{ matrix.backend }}
run: |
set -euo pipefail
mkdir -p .artifacts
go test -tags=integration ./integration/... \
-run "TestIntegrationScenarios_AllBackends/.*/scenario_(dispatch_during_broker_fault|consume_after_broker_recovery|worker_restart_recovery|worker_restart_delay_recovery|shutdown_during_delay_retry|multi_worker_contention)$" \
-count=1 -v \
| tee ".artifacts/integration-chaos-${{ matrix.backend }}.log"
- name: Summarize chaos durations
if: always()
run: |
.github/scripts/extract_scenario_durations.sh \
".artifacts/integration-chaos-${{ matrix.backend }}.log" \
".artifacts/integration-chaos-${{ matrix.backend }}-durations.md"
{
echo "### integration-chaos / ${{ matrix.backend }}"
cat ".artifacts/integration-chaos-${{ matrix.backend }}-durations.md"
echo
} >> "$GITHUB_STEP_SUMMARY"
- name: Upload chaos logs
if: always()
uses: actions/upload-artifact@v4
with:
name: integration-chaos-${{ matrix.backend }}
path: .artifacts/*
if-no-files-found: warn
integration-flake-repeat:
runs-on: ubuntu-latest
timeout-minutes: 120
strategy:
fail-fast: false
matrix:
backend: [redis, rabbitmq, sqs]
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: "stable"
- name: Install dependencies
run: go mod download
- name: Run repeated integration flake probes
env:
INTEGRATION_BACKEND: ${{ matrix.backend }}
FLAKE_REPEAT_COUNT: ${{ github.event.inputs.flake_repeat_count || '5' }}
run: |
set -euo pipefail
mkdir -p .artifacts
scripts/integration-flake-repeat.sh
- name: Summarize repeated flake probes
if: always()
run: |
summary=".artifacts/integration-flake-${{ matrix.backend }}-summary.md"
if [[ -f "${summary}" ]]; then
{
echo "### integration-flake-repeat / ${{ matrix.backend }}"
cat "${summary}"
echo
} >> "$GITHUB_STEP_SUMMARY"
else
echo "integration flake repeat summary not found for ${{ matrix.backend }}" >> "$GITHUB_STEP_SUMMARY"
fi
- name: Upload flake probe logs
if: always()
uses: actions/upload-artifact@v4
with:
name: integration-flake-repeat-${{ matrix.backend }}
path: .artifacts/*
if-no-files-found: warn