Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
230 changes: 186 additions & 44 deletions .github/workflows/_required.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,65 @@ permissions:
actions: read

jobs:
# ============================================================
# 0. forbid-suppressions
# Regression guard ported from HomericIntelligence/Odysseus#280.
# Blocks `|| true` and `continue-on-error: true` from re-entering
# the tree. The corresponding pygrep hooks in .pre-commit-config.yaml
# catch these locally before commit; this job is the CI backstop.
# ============================================================
forbid-suppressions:
name: forbid-suppressions
runs-on: ubuntu-latest
timeout-minutes: 5
steps:
- uses: actions/checkout@v4

- name: "Reject silent-failure workaround in shell/YAML/Dockerfile/justfile/HCL"
run: |
set -euo pipefail
mapfile -t files < <(git ls-files \
-- \
'*.sh' '*.bash' '*.yml' '*.yaml' '*.hcl' \
'Dockerfile*' '**/Dockerfile*' \
'justfile' '**/justfile' 'Justfile' '**/Justfile')
declare -a scan_files=()
for f in "${files[@]}"; do
case "$f" in
# Exempt this workflow file (heredoc would otherwise self-match)
.github/workflows/_required.yml) continue ;;
# Exempt any runbook that documents the rule (if present)
docs/runbooks/no-silent-failures.md) continue ;;
esac
scan_files+=("$f")
done
if [ "${#scan_files[@]}" -eq 0 ]; then
echo "No files to scan"
exit 0
fi
if grep -nE '\|\|[[:space:]]*true([[:space:]]*$|[[:space:]]+#)' "${scan_files[@]}"; then
echo ""
echo '::error::Found silent-failure workarounds above. Refactor per HomericIntelligence/Odysseus#280.'
exit 1
fi
echo 'OK: no silent-failure workarounds found'

- name: "Reject continue-on-error workflow opt-out"
run: |
set -euo pipefail
mapfile -t files < <(git ls-files -- '.github/workflows/*.yml' '.github/workflows/*.yaml')
if [ "${#files[@]}" -eq 0 ]; then
echo "No workflow files"
exit 0
fi
if grep -nE '^[[:space:]]*continue-on-error:[[:space:]]*true[[:space:]]*$' "${files[@]}"; then
echo ""
echo '::error::Found "continue-on-error: true" above.'
echo '::error::Fix the root cause per HomericIntelligence/Odysseus#280.'
exit 1
fi
echo 'OK: no "continue-on-error: true" found'

# ============================================================
# 1. lint
# clang-format, ruff, pre-commit, CMake cycle check,
Expand Down Expand Up @@ -61,8 +120,14 @@ jobs:

- name: Check CMake dependency graph for cycles
run: |
cmake -S . -B build/graphviz -G Ninja --graphviz=build/graphviz/deps.dot \
-DCMAKE_BUILD_TYPE=Debug 2>/dev/null || true
set -euo pipefail
# Graphviz configure can fail (missing optional deps, partial config); the
# cycle detector below is gated on the dot file existing, so we only need
# to log a warning if configure errored — not silently mask it.
if ! cmake -S . -B build/graphviz -G Ninja --graphviz=build/graphviz/deps.dot \
-DCMAKE_BUILD_TYPE=Debug 2>/dev/null; then
echo "::warning::cmake graphviz configure failed; cycle check skipped if deps.dot missing"
fi
if [ -f build/graphviz/deps.dot ]; then
python3 << 'EOF'
import re, sys
Expand Down Expand Up @@ -160,20 +225,43 @@ jobs:
$CONAN_TOOLCHAIN

- name: Build with clang-tidy
run: cmake --build build/x86.debug.clang-tidy -j$(nproc) 2>&1 | tee clang-tidy-output.txt
continue-on-error: true
run: |
# clang-tidy build often returns non-zero when diagnostics are found.
# The next step parses clang-tidy-output.txt and decides whether those
# diagnostics reference real source files (vs. third-party headers).
# We must capture the build output without aborting the job — but we
# also must record the build rc for the next step to inspect, rather
# than silently masking it with continue-on-error.
set +e
set -o pipefail
cmake --build build/x86.debug.clang-tidy -j"$(nproc)" 2>&1 | tee clang-tidy-output.txt
rc=${PIPESTATUS[0]}
set -e
echo "$rc" > clang-tidy-build.rc
echo "clang-tidy build exited rc=$rc (gating happens in next step)"

- name: Fail on clang-tidy errors
run: |
set -euo pipefail
# Only fail on errors that reference actual source files (src/ or include/).
REAL_ERRORS=$(grep -E "error:" clang-tidy-output.txt \
| grep -v "no input files\|no such file or directory\|unable to handle compilation" \
|| true)
# awk always exits 0; emits only error: lines that aren't third-party noise.
REAL_ERRORS=$(awk '
/error:/ &&
!/no input files/ &&
!/no such file or directory/ &&
!/unable to handle compilation/ { print }
' clang-tidy-output.txt)
if [ -n "$REAL_ERRORS" ]; then
echo "::error::clang-tidy reported errors in source files"
echo "$REAL_ERRORS"
exit 1
fi
# If the build itself failed for reasons other than diagnostics
# (e.g. compiler crash, missing dep), surface that now.
build_rc=$(cat clang-tidy-build.rc 2>/dev/null || echo 0)
if [ "$build_rc" -ne 0 ]; then
echo "::warning::clang-tidy build rc=$build_rc but no source-file errors detected; treating as transient"
fi
echo "clang-tidy passed"

- name: Run mypy (Python typecheck)
Expand Down Expand Up @@ -597,8 +685,14 @@ jobs:

- name: Python dependency audit (pip-audit)
run: |
set -euo pipefail
pip install -e ".[dev]" --quiet
pip-audit --strict || true
# pip-audit --strict exits non-zero on any finding. We want to surface
# the report as a warning (Trivy below is the gating scan) without
# silently masking the rc.
if ! pip-audit --strict; then
echo "::warning::pip-audit found Python dependency advisories — see step log above"
fi

- name: Run Trivy filesystem scan (SARIF)
uses: aquasecurity/trivy-action@v0.36.0
Expand All @@ -608,8 +702,9 @@ jobs:
format: "sarif"
output: "dep-scan.sarif"
severity: "CRITICAL,HIGH,MEDIUM"
# exit-code: "0" ensures the step never fails on findings — the gate
# step below decides pass/fail based on the SARIF/JSON contents.
exit-code: "0"
continue-on-error: true

- name: Run Trivy filesystem scan (JSON)
uses: aquasecurity/trivy-action@v0.36.0
Expand All @@ -619,8 +714,8 @@ jobs:
format: "json"
output: "dep-scan.json"
severity: "CRITICAL,HIGH,MEDIUM,LOW"
# exit-code: "0" — see SARIF step above; gating is centralised below.
exit-code: "0"
continue-on-error: true

- name: Upload Trivy FS results to Security tab
uses: github/codeql-action/upload-sarif@v4
Expand All @@ -641,35 +736,49 @@ jobs:

- name: Build Docker image for scanning
id: docker_build
run: docker build --target production -t projectkeystone:scan .
continue-on-error: true
# The container scan is a best-effort enrichment of the dependency-scan
# job — if the production Containerfile doesn't build cleanly we still
# want the FS scan to gate the job. Capture the rc into a step output
# so downstream steps can gate explicitly, without continue-on-error.
run: |
set -euo pipefail
if docker build --target production -t projectkeystone:scan .; then
echo "built=true" >> "$GITHUB_OUTPUT"
else
echo "built=false" >> "$GITHUB_OUTPUT"
echo "::warning::docker build failed — skipping container scan"
fi

- name: Warn if Docker build skipped
if: steps.docker_build.outcome == 'failure'
run: echo "::warning::Docker build failed — skipping container scan. Fix the Dockerfile to enable Trivy container scanning."
if: steps.docker_build.outputs.built == 'false'
run: |
echo "::warning::Docker build failed — skipping container scan."
echo "::warning::Fix the Containerfile to enable Trivy container scanning."

- name: Run Trivy container scan (SARIF)
if: steps.docker_build.outcome == 'success'
if: steps.docker_build.outputs.built == 'true'
uses: aquasecurity/trivy-action@v0.36.0
with:
image-ref: "projectkeystone:scan"
format: "sarif"
output: "container-scan.sarif"
severity: "CRITICAL,HIGH,MEDIUM,LOW"
continue-on-error: true
# exit-code: "0" — never block on findings; gating is centralised below.
exit-code: "0"

- name: Run Trivy container scan (JSON)
if: steps.docker_build.outcome == 'success'
if: steps.docker_build.outputs.built == 'true'
uses: aquasecurity/trivy-action@v0.36.0
with:
image-ref: "projectkeystone:scan"
format: "json"
output: "container-scan.json"
severity: "CRITICAL,HIGH,MEDIUM,LOW"
continue-on-error: true
# exit-code: "0" — see SARIF step above.
exit-code: "0"

- name: Upload Trivy container results to Security tab
if: steps.docker_build.outcome == 'success' && hashFiles('container-scan.sarif') != ''
if: steps.docker_build.outputs.built == 'true' && hashFiles('container-scan.sarif') != ''
uses: github/codeql-action/upload-sarif@v4
with:
sarif_file: "container-scan.sarif"
Expand Down Expand Up @@ -735,15 +844,18 @@ jobs:
gitleaks version

- name: Run Gitleaks
# gitleaks --exit-code 0 already prevents this step from failing on
# findings; the gating step below decides job pass/fail from the SARIF
# report. No continue-on-error needed.
run: |
set -euo pipefail
if [ -f .gitleaks.toml ]; then
gitleaks detect --source . --config .gitleaks.toml \
--report-format sarif --report-path gitleaks.sarif --exit-code 0
else
gitleaks detect --source . \
--report-format sarif --report-path gitleaks.sarif --exit-code 0
fi
continue-on-error: true

- name: Upload Gitleaks SARIF
if: always() && hashFiles('gitleaks.sarif') != ''
Expand All @@ -755,12 +867,20 @@ jobs:

# ---------- Semgrep SAST ----------
- name: Run Semgrep
uses: semgrep/semgrep-action@v1
with:
config: >-
p/security-audit
p/docker
continue-on-error: true
id: semgrep
# Semgrep findings are informational here — they are uploaded to the
# Security tab as SARIF. The job gate below blocks only on Gitleaks
# findings, so we capture Semgrep's rc into a step output rather than
# silently masking it with continue-on-error.
run: |
set -euo pipefail
pip install --quiet "semgrep>=1,<2"
rc=0
semgrep scan --config p/security-audit --config p/docker --sarif --output semgrep.sarif || rc=$?
echo "rc=$rc" >> "$GITHUB_OUTPUT"
if [ "$rc" -ne 0 ]; then
echo "::warning::semgrep exited rc=$rc (findings or runtime error); SARIF still uploaded if present"
fi

- name: Upload Semgrep SARIF
if: always() && hashFiles('semgrep.sarif') != ''
Expand All @@ -778,28 +898,50 @@ jobs:
queries: security-and-quality

- name: Install C++ build deps for CodeQL
continue-on-error: true
id: codeql_deps
# CodeQL c-cpp analysis can still produce useful output even when this
# transient apt/conan step fails (network blips, mirror outages). Record
# rc in a step output so the analyse step can gate explicitly, rather
# than silently masking with continue-on-error.
run: |
sudo apt-get update && sudo apt-get install -y \
cmake ninja-build clang-18 clang++-18 \
libc++-18-dev libc++abi-18-dev libssl-dev
pip install conan --break-system-packages
conan profile detect --force
conan install . \
--output-folder=build/conan-deps \
--lockfile=conan.lock \
--build=missing \
-s build_type=Release \
-s compiler.cppstd=20
set -euo pipefail
rc=0
{
sudo apt-get update && sudo apt-get install -y \
cmake ninja-build clang-18 clang++-18 \
libc++-18-dev libc++abi-18-dev libssl-dev
pip install conan --break-system-packages
conan profile detect --force
conan install . \
--output-folder=build/conan-deps \
--lockfile=conan.lock \
--build=missing \
-s build_type=Release \
-s compiler.cppstd=20
} || rc=$?
echo "rc=$rc" >> "$GITHUB_OUTPUT"
if [ "$rc" -ne 0 ]; then
echo "::warning::CodeQL build-deps step rc=$rc (CodeQL c-cpp analysis may be incomplete)"
fi

- name: Build for CodeQL (c-cpp)
continue-on-error: true
if: steps.codeql_deps.outputs.rc == '0'
id: codeql_build
# CodeQL build is gated on the deps step. If the build itself fails,
# surface as a warning — CodeQL analyse can still emit a partial DB.
run: |
cmake -S . -B build/codeql \
-G Ninja \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_TOOLCHAIN_FILE=build/conan-deps/conan_toolchain.cmake
cmake --build build/codeql
set -euo pipefail
rc=0
{
cmake -S . -B build/codeql \
-G Ninja \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_TOOLCHAIN_FILE=build/conan-deps/conan_toolchain.cmake
cmake --build build/codeql
} || rc=$?
if [ "$rc" -ne 0 ]; then
echo "::warning::CodeQL c-cpp build rc=$rc — analysis may be incomplete"
fi

- name: Perform CodeQL Analysis (c-cpp)
uses: github/codeql-action/analyze@v4
Expand Down
8 changes: 7 additions & 1 deletion .github/workflows/extras.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,13 @@ jobs:
run: sccache --show-stats

- name: Run benchmarks
run: make benchmark.native || true
run: |
set -euo pipefail
# Benchmarks can legitimately regress without blocking the workflow —
# report-only. Surface the rc as a warning instead of silently masking.
if ! make benchmark.native; then
echo "::warning::benchmark.native exited non-zero — see step log above"
fi

- name: Upload benchmark results
uses: actions/upload-artifact@v7
Expand Down
28 changes: 28 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,34 @@ repos:
files: CHANGELOG\.md
pass_filenames: false

# ============================================================================
# Silent-Failure Guard (ports HomericIntelligence/Odysseus#280)
# ============================================================================
- repo: local
hooks:
- id: forbid-or-true
name: "forbid silent-failure workaround in shell/YAML/Dockerfile/justfile/HCL"
description: >
Rejects the `or-true` idiom (and its whitespace variants) in shell,
YAML, Dockerfile, justfile, and HCL sources. Refactor to an explicit
`if`-guard, a real conditional, or a documented `set +e`/`set -e`
bracket. See HomericIntelligence/Odysseus#280.
language: pygrep
entry: '\|\|\s*true(\s*$|\s+#)'
files: \.(sh|bash|yml|yaml|hcl)$|(^|/)Dockerfile[^/]*$|(^|/)[Jj]ustfile$
types: [text]
pass_filenames: true

- id: forbid-continue-on-error
name: "forbid continue-on-error workflow opt-out"
description: >
GitHub Actions `continue-on-error true` (truthy form) hides real CI
failures. Fix the underlying job/step instead.
language: pygrep
entry: '^\s*continue-on-error:\s*true\s*$'
files: ^\.github/workflows/.*\.ya?ml$
pass_filenames: true

# ============================================================================
# Git Commit Message Linting (Optional)
# ============================================================================
Expand Down
Loading
Loading