diff --git a/.gitignore b/.gitignore index 5375a9b..fa94dfe 100644 --- a/.gitignore +++ b/.gitignore @@ -111,9 +111,12 @@ logs/ *.mkv # ===== Benchmark Results ===== +# Flat files (legacy pattern) benchmarks/results/*.json benchmarks/results/*.html benchmarks/results/*.csv +# Run directories (run-/) — all raw output is gitignored +benchmarks/results/run-*/ # ===== HAProxy / Coraza Runtime ===== *.sock @@ -121,6 +124,7 @@ benchmarks/results/*.csv haproxy.stats # ===== Thesis ===== +# Separate repository — not tracked here. thesis/ # ===== Keep Empty Directories ===== diff --git a/Makefile b/Makefile index 59b2fff..77a4134 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,8 @@ COMPOSE_FILE := deploy/docker/docker-compose.yml COMPOSE_DEBUG_FILE := deploy/docker/docker-compose.debug.yml ENV_FILE := deploy/docker/.env -.PHONY: run dev down clean logs ps seed users coraza-build +.PHONY: run dev down clean logs ps seed users coraza-build \ + eval-up eval-down eval-clean eval-ftw eval-zap eval-nuclei eval-load eval-metrics eval-all eval-results run: docker-compose -f $(COMPOSE_FILE) --env-file $(ENV_FILE) up --build -d @@ -30,3 +31,48 @@ users: coraza-build: docker build -f deploy/docker/coraza.Dockerfile -t guard-proxy/coraza-spoa:dev . + +# ── Evaluation lab (delegates to benchmarks/Makefile) ───────────────────── +# See benchmarks/Makefile for full documentation and variable overrides. + +eval-up: + $(MAKE) -C benchmarks lab-up + +eval-down: + $(MAKE) -C benchmarks lab-down + +eval-clean: + $(MAKE) -C benchmarks lab-clean + +eval-ftw: + $(MAKE) -C benchmarks eval-ftw \ + $(if $(RUN_ID),RUN_ID=$(RUN_ID)) \ + $(if $(TARGET_VHOST),TARGET_VHOST=$(TARGET_VHOST)) + +eval-zap: + $(MAKE) -C benchmarks eval-zap \ + $(if $(RUN_ID),RUN_ID=$(RUN_ID)) \ + $(if $(TARGET_VHOST),TARGET_VHOST=$(TARGET_VHOST)) + +eval-nuclei: + $(MAKE) -C benchmarks eval-nuclei \ + $(if $(RUN_ID),RUN_ID=$(RUN_ID)) \ + $(if $(TARGET_VHOST),TARGET_VHOST=$(TARGET_VHOST)) + +eval-load: + $(MAKE) -C benchmarks eval-load \ + $(if $(RUN_ID),RUN_ID=$(RUN_ID)) \ + $(if $(TARGET_VHOST),TARGET_VHOST=$(TARGET_VHOST)) + +eval-metrics: + $(MAKE) -C benchmarks eval-metrics \ + $(if $(RUN_ID),RUN_ID=$(RUN_ID)) + +eval-all: + $(MAKE) -C benchmarks eval-all \ + $(if $(RUN_ID),RUN_ID=$(RUN_ID)) \ + $(if $(TARGET_VHOST),TARGET_VHOST=$(TARGET_VHOST)) + +eval-results: + $(MAKE) -C benchmarks results \ + $(if $(RUN_ID),RUN_ID=$(RUN_ID)) diff --git a/README.testing.md b/README.testing.md index 09912c5..1320d6e 100644 --- a/README.testing.md +++ b/README.testing.md @@ -6,8 +6,8 @@ |-------|----------|-------|---------------| | **Unit** (many, fast) | `src/backend/tests/unit/` | pytest, Vitest | >80% | | **Integration** (some) | `src/backend/tests/integration/` | pytest, Docker | Key flows | -| **Security** | `tests/security/` | sqlmap, OWASP ZAP, custom payloads | OWASP Top 10 | -| **Performance** | `benchmarks/` | wrk, k6, Locust | <20% WAF overhead | +| **Security** | `benchmarks/lab/` | OWASP ZAP, Nuclei, go-ftw (CRS corpus) | OWASP Top 10 | +| **Performance** | `benchmarks/lab/` | wrk (WAF vs direct) | <20% WAF overhead | ## WAF Testing @@ -54,10 +54,32 @@ uv run pytest -m e2e tests/e2e/test_policy_apply.py The test uses the same prerequisites as the smoke test and is wired into the nightly smoke workflow. Normal backend pytest runs exclude tests marked `e2e`. +## Evaluation Lab (thesis M6) + +Full WAF evaluation with real target apps (WordPress, Juice Shop, DVWA): + +```sh +# Prerequisites +cp deploy/demo/.env.example deploy/demo/.env +cp benchmarks/lab/.env.example benchmarks/lab/.env +git submodule update --init --recursive + +# Bring up the lab +make eval-up + +# Run all scenarios (ftw → zap → nuclei → load → metrics) +make eval-all + +# View results +make eval-results +``` + +See `benchmarks/lab/` for scenario configs and `docs/evaluation-plan.md` for methodology. + ## Test Data -- Payloads: `benchmarks/payloads/` (sqli.txt, xss.txt, legitimate.txt) -- Results: `benchmarks/results/` (timestamped JSON, gitignored) +- Payloads: `benchmarks/payloads/` (sqli.txt, xss.txt, lfi.txt, legitimate.txt) +- Results: `benchmarks/results/` (timestamped JSON/CSV, gitignored) ## Commands diff --git a/benchmarks/Makefile b/benchmarks/Makefile new file mode 100644 index 0000000..df4399a --- /dev/null +++ b/benchmarks/Makefile @@ -0,0 +1,108 @@ +REPO_ROOT := $(shell git rev-parse --show-toplevel 2>/dev/null || pwd) +DEMO_COMPOSE := $(REPO_ROOT)/deploy/demo/docker-compose.yml +LAB_COMPOSE := $(REPO_ROOT)/benchmarks/lab/docker-compose.targets.yml +DEMO_ENV := $(REPO_ROOT)/deploy/demo/.env +LAB_ENV := $(REPO_ROOT)/benchmarks/lab/.env +RUNNERS := $(REPO_ROOT)/benchmarks/lab/runners + +RUN_ID ?= $(shell date +%Y%m%d-%H%M%S) +TARGET_VHOST ?= juice.local +DIRECT_HOST ?= juiceshop +DIRECT_PORT ?= 3000 + +.PHONY: lab-up lab-down lab-clean \ + eval-ftw eval-zap eval-nuclei eval-load eval-metrics eval-all \ + results help + +# ── Lab lifecycle ────────────────────────────────────────────────────────── + +## Bring up the demo + all lab targets and register vhosts. +lab-up: + @echo "==> Starting guard-proxy demo + lab targets..." + docker compose \ + -f $(DEMO_COMPOSE) \ + -f $(LAB_COMPOSE) \ + --env-file $(DEMO_ENV) \ + --env-file $(LAB_ENV) \ + up -d --build + @echo "==> Seeding vhosts..." + bash $(REPO_ROOT)/deploy/demo/setup-demo.sh + bash $(REPO_ROOT)/benchmarks/lab/setup-lab.sh --skip-compose + +## Stop the lab (preserve volumes). +lab-down: + bash $(REPO_ROOT)/benchmarks/lab/teardown-lab.sh + +## Stop the lab and remove all volumes. +lab-clean: + bash $(REPO_ROOT)/benchmarks/lab/teardown-lab.sh --clean + +# ── Individual scenario runners ──────────────────────────────────────────── + +## CRS regression suite (TPR gold standard). Uses go-ftw against the CRS corpus. +eval-ftw: + @mkdir -p $(REPO_ROOT)/benchmarks/results/run-$(RUN_ID)/ftw + RUN_ID=$(RUN_ID) TARGET_VHOST=$(TARGET_VHOST) \ + bash $(RUNNERS)/run-ftw.sh + +## ZAP baseline scan (FPR measurement, WordPress is best target for FP). +eval-zap: + @mkdir -p $(REPO_ROOT)/benchmarks/results/run-$(RUN_ID)/zap-$(TARGET_VHOST) + RUN_ID=$(RUN_ID) TARGET_VHOST=$(TARGET_VHOST) \ + bash $(RUNNERS)/run-zap.sh + +## Nuclei CVE templates (WAF TPR against real attack payloads). +eval-nuclei: + @mkdir -p $(REPO_ROOT)/benchmarks/results/run-$(RUN_ID)/nuclei-$(TARGET_VHOST) + RUN_ID=$(RUN_ID) TARGET_VHOST=$(TARGET_VHOST) \ + bash $(RUNNERS)/run-nuclei.sh + +## Latency + RPS load test (WAF vs direct). Measures overhead. +eval-load: + @mkdir -p $(REPO_ROOT)/benchmarks/results/run-$(RUN_ID)/load-$(TARGET_VHOST) + RUN_ID=$(RUN_ID) TARGET_VHOST=$(TARGET_VHOST) \ + DIRECT_HOST=$(DIRECT_HOST) DIRECT_PORT=$(DIRECT_PORT) \ + bash $(RUNNERS)/run-load.sh + +## Aggregate all scenario outputs into results.csv + report.json. +eval-metrics: + RUN_ID=$(RUN_ID) bash $(RUNNERS)/collect-metrics.sh + +## Run all scenarios (ftw → zap → nuclei → load → metrics) in one pass. +eval-all: eval-ftw eval-zap eval-nuclei eval-load eval-metrics + +# ── Results summary ──────────────────────────────────────────────────────── + +## Print a summary of the most recent run (or RUN_ID= a specific run). +results: + @latest=$$(ls -1t $(REPO_ROOT)/benchmarks/results/ 2>/dev/null \ + | grep '^run-' | head -1 | sed 's/^run-//'); \ + run=$${RUN_ID:-$$latest}; \ + csv="$(REPO_ROOT)/benchmarks/results/run-$${run}/results.csv"; \ + if [[ -f "$$csv" ]]; then \ + echo "Run: $${run}"; column -t -s, "$$csv"; \ + else \ + echo "No results found. Run 'make eval-all RUN_ID=' first."; \ + fi + +# ── Help ─────────────────────────────────────────────────────────────────── + +help: + @echo "Guard Proxy Evaluation Lab" + @echo "" + @echo "Setup:" + @echo " cp deploy/demo/.env.example deploy/demo/.env" + @echo " cp benchmarks/lab/.env.example benchmarks/lab/.env" + @echo " git submodule update --init --recursive" + @echo "" + @echo "Targets:" + @grep -E '^## ' $(MAKEFILE_LIST) | sed 's/^## / /' + @echo "" + @echo "Variables:" + @echo " RUN_ID= Override run ID (default: timestamp)" + @echo " TARGET_VHOST= Target vhost (default: juice.local)" + @echo " DIRECT_HOST= Direct-access Docker service name" + @echo " DIRECT_PORT= Direct-access port (bypasses HAProxy)" + @echo "" + @echo "Example (3 runs for thesis median):" + @echo " for i in 1 2 3; do make eval-all; done" diff --git a/benchmarks/lab/.env.example b/benchmarks/lab/.env.example new file mode 100644 index 0000000..6463d43 --- /dev/null +++ b/benchmarks/lab/.env.example @@ -0,0 +1,34 @@ +# Copy to benchmarks/lab/.env before running the eval lab. +# These are local-lab defaults only — never use these credentials in production. + +# ── DVWA database ───────────────────────────────────────────────────────── +DVWA_DB_ROOT_PASSWORD=dvwa_root_pw +DVWA_DB_PASSWORD=dvwa_pw + +# ── WordPress database ──────────────────────────────────────────────────── +WP_DB_ROOT_PASSWORD=wp_root_pw +WP_DB_PASSWORD=wp_pw +WP_ADMIN_PASSWORD=LabAdmin12345! + +# ── Lab policy settings (used by setup-lab.sh) ──────────────────────────── +# Baseline policy: PL1, anomaly threshold 5, block mode +LAB_POLICY_NAME=Lab Baseline +LAB_POLICY_PARANOIA=1 +LAB_POLICY_INBOUND_THRESHOLD=5 +LAB_POLICY_OUTBOUND_THRESHOLD=4 + +# High-paranoia policy for sweep tests: PL2, anomaly threshold 3, block mode +LAB_PL2_POLICY_NAME=Lab PL2 +LAB_PL2_POLICY_PARANOIA=2 +LAB_PL2_POLICY_INBOUND_THRESHOLD=3 +LAB_PL2_POLICY_OUTBOUND_THRESHOLD=3 + +# ── Vhost domains ───────────────────────────────────────────────────────── +LAB_JUICESHOP_DOMAIN=juice.local +LAB_JUICESHOP_BACKEND_URL=http://juiceshop:3000 + +LAB_DVWA_DOMAIN=dvwa.local +LAB_DVWA_BACKEND_URL=http://dvwa:80 + +LAB_WP_DOMAIN=wp.local +LAB_WP_BACKEND_URL=http://wordpress:80 diff --git a/benchmarks/lab/docker-compose.targets.yml b/benchmarks/lab/docker-compose.targets.yml new file mode 100644 index 0000000..95caf52 --- /dev/null +++ b/benchmarks/lab/docker-compose.targets.yml @@ -0,0 +1,162 @@ +# Evaluation lab target applications. +# +# This is an OVERLAY on top of the demo stack. Run as: +# docker compose \ +# -f deploy/demo/docker-compose.yml \ +# -f benchmarks/lab/docker-compose.targets.yml \ +# --env-file deploy/demo/.env \ +# --env-file benchmarks/lab/.env \ +# up -d --build +# +# All target containers attach to gp_internal only; they are exposed +# to the outside world exclusively through HAProxy (via vhost routing). +# +# Pinned image digests ensure reproducible test results across runs. +# Update pins by running: docker pull && docker inspect --format '{{index .RepoDigests 0}}' + +name: guard-proxy-demo + +services: + + # ── OWASP Juice Shop ────────────────────────────────────────────────────── + # Intentionally vulnerable Node.js app designed for security testing. + # Vhost: juice.local + juiceshop: + image: bkimminich/juice-shop:v17.1.1 + restart: unless-stopped + healthcheck: + test: ["CMD-SHELL", "wget -q -O- http://localhost:3000/rest/admin/application-version >/dev/null 2>&1"] + interval: 15s + timeout: 10s + retries: 10 + start_period: 30s + networks: + - gp_internal + + # ── DVWA (Damn Vulnerable Web Application) ──────────────────────────────── + # Classic PHP vulnerable app. Requires MariaDB sidecar. + # Vhost: dvwa.local + dvwa-db: + image: mariadb:11.4 + restart: unless-stopped + environment: + MARIADB_ROOT_PASSWORD: ${DVWA_DB_ROOT_PASSWORD:-dvwa_root_pw} + MARIADB_DATABASE: dvwa + MARIADB_USER: dvwa + MARIADB_PASSWORD: ${DVWA_DB_PASSWORD:-dvwa_pw} + volumes: + - dvwa_db_data:/var/lib/mysql + healthcheck: + test: ["CMD", "healthcheck.sh", "--connect", "--innodb_initialized"] + interval: 10s + timeout: 5s + retries: 10 + networks: + - gp_internal + + dvwa: + image: ghcr.io/digininja/dvwa:latest + restart: unless-stopped + environment: + DB_SERVER: dvwa-db + DB_DATABASE: dvwa + DB_USER: dvwa + DB_PASSWORD: ${DVWA_DB_PASSWORD:-dvwa_pw} + RECAPTCHA_PRIV_KEY: "" + RECAPTCHA_PUB_KEY: "" + depends_on: + dvwa-db: + condition: service_healthy + healthcheck: + test: ["CMD-SHELL", "curl -sf http://localhost/setup.php >/dev/null"] + interval: 15s + timeout: 10s + retries: 10 + start_period: 20s + networks: + - gp_internal + + # ── WordPress ───────────────────────────────────────────────────────────── + # Real-world CMS. Used to measure false positive rate of the WAF against + # legitimate CMS traffic (no exclusion plugins applied — intentional). + # Vhost: wp.local + wp-db: + image: mariadb:11.4 + restart: unless-stopped + environment: + MARIADB_ROOT_PASSWORD: ${WP_DB_ROOT_PASSWORD:-wp_root_pw} + MARIADB_DATABASE: wordpress + MARIADB_USER: wordpress + MARIADB_PASSWORD: ${WP_DB_PASSWORD:-wp_pw} + volumes: + - wp_db_data:/var/lib/mysql + healthcheck: + test: ["CMD", "healthcheck.sh", "--connect", "--innodb_initialized"] + interval: 10s + timeout: 5s + retries: 10 + networks: + - gp_internal + + wordpress: + image: wordpress:php8.3-apache + restart: unless-stopped + environment: + WORDPRESS_DB_HOST: wp-db + WORDPRESS_DB_NAME: wordpress + WORDPRESS_DB_USER: wordpress + WORDPRESS_DB_PASSWORD: ${WP_DB_PASSWORD:-wp_pw} + WORDPRESS_TABLE_PREFIX: wp_ + depends_on: + wp-db: + condition: service_healthy + volumes: + - wp_data:/var/www/html + healthcheck: + test: ["CMD-SHELL", "curl -sf http://localhost/wp-login.php >/dev/null"] + interval: 15s + timeout: 10s + retries: 10 + start_period: 30s + networks: + - gp_internal + + # One-shot WP CLI container to install WordPress deterministically. + # Runs once (no restart), exits 0 after wp core install succeeds. + wp-cli: + image: wordpress:cli-php8.3 + restart: "no" + environment: + WORDPRESS_DB_HOST: wp-db + WORDPRESS_DB_NAME: wordpress + WORDPRESS_DB_USER: wordpress + WORDPRESS_DB_PASSWORD: ${WP_DB_PASSWORD:-wp_pw} + volumes: + - wp_data:/var/www/html + depends_on: + wordpress: + condition: service_healthy + command: > + sh -c " + sleep 5 && + wp core is-installed 2>/dev/null && echo 'WordPress already installed.' || + wp core install + --url=http://wp.local + --title='Guard Proxy Lab' + --admin_user=admin + --admin_password=${WP_ADMIN_PASSWORD:-LabAdmin12345!} + --admin_email=admin@lab.local + --skip-email + " + networks: + - gp_internal + +networks: + gp_internal: + external: true + name: guard-proxy-demo_gp_internal + +volumes: + dvwa_db_data: + wp_db_data: + wp_data: diff --git a/benchmarks/lab/runners/collect-metrics.sh b/benchmarks/lab/runners/collect-metrics.sh new file mode 100755 index 0000000..c06fe4c --- /dev/null +++ b/benchmarks/lab/runners/collect-metrics.sh @@ -0,0 +1,167 @@ +#!/usr/bin/env bash +# collect-metrics.sh — Aggregate all scenario summaries into results.csv. +# +# Reads all summary.json files in a run directory and produces: +# benchmarks/results/run-/results.csv — flat table for thesis tables +# benchmarks/results/run-/report.json — full structured report +# +# Optionally cross-references the Coraza audit log to compute confirmed +# TP/FP counts for ZAP and Nuclei scenarios. +# +# Usage: +# RUN_ID=20260602-141500 bash benchmarks/lab/runners/collect-metrics.sh +# RUN_ID=... AUDIT_LOG=/path/to/audit.log bash benchmarks/lab/runners/collect-metrics.sh + +set -Eeuo pipefail +: "${RUN_ID:=$(date +%Y%m%d-%H%M%S)}" +source "$(dirname "${BASH_SOURCE[0]}")/lib.sh" + +RUN_DIR="${REPO_ROOT}/benchmarks/results/run-${RUN_ID}" +# Coraza audit log — mounted from the coraza_audit Docker volume. +# If not provided, skip audit-log cross-reference. +AUDIT_LOG="${AUDIT_LOG:-}" + +if [[ ! -d "${RUN_DIR}" ]]; then + echo "Run directory not found: ${RUN_DIR}" >&2 + echo "Set RUN_ID to an existing run." >&2 + exit 1 +fi + +echo "=== Aggregating metrics for run ${RUN_ID} ===" + +# ── Optional: extract audit log from Docker volume ───────────────────────── +if [[ -z "${AUDIT_LOG}" ]]; then + AUDIT_LOG="${RUN_DIR}/coraza-audit.log" + if ! docker cp "$(docker ps --filter "name=coraza" --format "{{.ID}}" | head -1)":/var/log/coraza/audit.log \ + "${AUDIT_LOG}" 2>/dev/null; then + echo "Note: could not copy audit log from coraza container. Skipping log cross-reference." + AUDIT_LOG="" + fi +fi + +# ── Aggregate summaries ──────────────────────────────────────────────────── +python3 - <6} {'FPR':>6} {'RPS':>8} {'DEG%':>6} {'p99ms':>7}") +print("-" * 70) +for row in csv_rows: + tpr = f"{float(row['tpr'])*100:.1f}%" if row['tpr'] != '' else "—" + fpr = f"{float(row['fpr'])*100:.1f}%" if row['fpr'] != '' else "—" + rps = f"{float(row['rps_waf']):.0f}" if row['rps_waf'] != '' else "—" + deg = f"{row['rps_degradation_pct']}%" if row['rps_degradation_pct'] != '' else "—" + p99 = f"{row['lat_p99_ms']}" if row['lat_p99_ms'] != '' else "—" + print(f"{row['scenario']:<35} {tpr:>6} {fpr:>6} {rps:>8} {deg:>6} {p99:>7}") +PY + +echo "" +echo "Done. Results → ${RUN_DIR}/" +echo "" +echo "To copy to thesis assets (after review):" +echo " cp ${RUN_DIR}/results.csv thesis/assets/figures/eval-results-${RUN_ID}.csv" diff --git a/benchmarks/lab/runners/lib.sh b/benchmarks/lab/runners/lib.sh new file mode 100755 index 0000000..0f5976b --- /dev/null +++ b/benchmarks/lab/runners/lib.sh @@ -0,0 +1,170 @@ +#!/usr/bin/env bash +# lib.sh — Shared helpers for eval lab runner scripts. +# +# Source this file at the top of each runner: +# source "$(dirname "${BASH_SOURCE[0]}")/lib.sh" +# +# Provides: REPO_ROOT, RESULTS_DIR, RUN_DIR, manifest helpers, docker network name. + +: "${RUN_ID:?RUN_ID must be set before sourcing lib.sh}" + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd -- "${SCRIPT_DIR}/../../.." && pwd)" +LAB_DIR="${REPO_ROOT}/benchmarks/lab" +RESULTS_BASE="${REPO_ROOT}/benchmarks/results" +RUN_DIR="${RESULTS_BASE}/run-${RUN_ID}" +DEMO_ENV="${REPO_ROOT}/deploy/demo/.env" +LAB_ENV="${LAB_DIR}/.env" + +# Docker network shared by the demo stack and targets. +DOCKER_NETWORK="guard-proxy-demo_gp_internal" + +# ── Environment helpers ──────────────────────────────────────────────────── + +env_value() { + local name="$1"; local fallback="${2:-}"; local value + value="$(grep -E "^${name}=" "${LAB_ENV}" "${DEMO_ENV}" 2>/dev/null | tail -n 1 | cut -d= -f2- || true)" + if [[ -z "${value}" ]]; then printf '%s' "${fallback}"; else printf '%s' "${value}"; fi +} + +HAPROXY_HTTP_PORT="$(env_value HAPROXY_HTTP_PORT 8080)" +BACKEND_HTTP_PORT="$(env_value BACKEND_HTTP_PORT 8000)" +LAB_JUICESHOP_DOMAIN="$(env_value LAB_JUICESHOP_DOMAIN juice.local)" +LAB_DVWA_DOMAIN="$(env_value LAB_DVWA_DOMAIN dvwa.local)" +LAB_WP_DOMAIN="$(env_value LAB_WP_DOMAIN wp.local)" + +# ── Directory setup ──────────────────────────────────────────────────────── + +setup_run_dir() { + local scenario="$1" + local dir="${RUN_DIR}/${scenario}" + mkdir -p "${dir}" + printf '%s' "${dir}" +} + +# ── Manifest ─────────────────────────────────────────────────────────────── + +write_manifest() { + local manifest="${RUN_DIR}/manifest.json" + if [[ -f "${manifest}" ]]; then return; fi # written once per run + + local git_sha; git_sha="$(git -C "${REPO_ROOT}" rev-parse --short HEAD 2>/dev/null || echo "unknown")" + local host_cpu; host_cpu="$(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo "unknown")" + local host_mem_gb; host_mem_gb="$(awk '/^MemTotal:/{printf "%.0f", $2/1024/1024}' /proc/meminfo 2>/dev/null || echo "unknown")" + local host_load; host_load="$(cut -d' ' -f1-3 /proc/loadavg 2>/dev/null || uptime | awk -F'load averages:' '{print $2}' | xargs || echo "unknown")" + local timestamp; timestamp="$(date -u +"%Y-%m-%dT%H:%M:%SZ")" + + python3 - </dev/null || true +} + +# Sample peak memory + avg CPU for a container over a duration. +# Writes to a file and prints the final JSON snippet. +sample_container_resources() { + local container_name="$1" # docker service name + local duration_s="${2:-60}" + local out_file="$3" + local interval=2 + local samples=0 + local cpu_sum=0 + local mem_peak=0 + + local end_time=$(( SECONDS + duration_s )) + while (( SECONDS < end_time )); do + local stats + stats="$(docker stats --no-stream --format '{{.CPUPerc}}\t{{.MemUsage}}' "${container_name}" 2>/dev/null || true)" + if [[ -n "${stats}" ]]; then + local cpu_pct mem_mb + cpu_pct="$(awk -F'\t' '{gsub(/%/,"",$1); print $1}' <<< "${stats}")" + mem_mb="$(awk -F'\t' '{split($2,a,/[A-Za-z]/); print a[1]+0}' <<< "${stats}")" + cpu_sum="$(python3 -c "print(${cpu_sum} + ${cpu_pct:-0})")" + if python3 -c "exit(0 if ${mem_mb:-0} > ${mem_peak} else 1)" 2>/dev/null; then + mem_peak="${mem_mb:-0}" + fi + samples=$(( samples + 1 )) + fi + sleep "${interval}" + done + + local cpu_avg=0 + if (( samples > 0 )); then + cpu_avg="$(python3 -c "print(round(${cpu_sum} / ${samples}, 2))")" + fi + + python3 - < "${out_file}" +import json +print(json.dumps({"mem_mb_peak": ${mem_peak}, "cpu_pct_avg": ${cpu_avg}, "samples": ${samples}})) +PY +} + +# ── Output helpers ───────────────────────────────────────────────────────── + +write_summary() { + local scenario="$1" + local target_vhost="$2" + local policy_name="$3" + local detection_json="$4" # {"true_positive":...,"false_negative":...,"tpr":...,"fpr":...} + local performance_json="$5" # {"rps":...,"latency_ms":...} or {} + local resources_json="${6:-{}}" + + python3 - </ftw/raw.json (go-ftw JSON output) +# benchmarks/results/run-/ftw/summary.json +# +# Usage: +# RUN_ID=20260602-141500 bash benchmarks/lab/runners/run-ftw.sh +# RUN_ID=... TARGET_VHOST=dvwa.local bash benchmarks/lab/runners/run-ftw.sh + +set -Eeuo pipefail +: "${RUN_ID:=$(date +%Y%m%d-%H%M%S)}" +source "$(dirname "${BASH_SOURCE[0]}")/lib.sh" + +TARGET_VHOST="${TARGET_VHOST:-${LAB_JUICESHOP_DOMAIN}}" +FTW_IMAGE="ghcr.io/coreruleset/go-ftw:v1.4.0" +CRS_TESTS="${REPO_ROOT}/configs/coraza/crs/tests/regression/tests" +FTW_CONFIG="${REPO_ROOT}/benchmarks/lab/scenarios/crs-ftw/config.yaml" + +if [[ ! -d "${CRS_TESTS}" ]]; then + echo "CRS test corpus not found at ${CRS_TESTS}." >&2 + echo "Run: git submodule update --init --recursive" >&2 + exit 1 +fi + +write_manifest +OUT_DIR="$(setup_run_dir ftw)" + +echo "=== CRS regression (go-ftw) ===" +echo "Target vhost : ${TARGET_VHOST}" +echo "Output dir : ${OUT_DIR}" +echo "Image : ${FTW_IMAGE}" +echo "" + +docker run --rm \ + --network "${DOCKER_NETWORK}" \ + -v "${CRS_TESTS}:/tests:ro" \ + -v "${FTW_CONFIG}:/config.yaml:ro" \ + "${FTW_IMAGE}" \ + run \ + --config /config.yaml \ + --dir /tests \ + --output json \ + > "${OUT_DIR}/raw.json" 2> "${OUT_DIR}/stderr.txt" || true + +echo "go-ftw complete. Parsing results..." + +python3 - < 0 else 0.0 +fpr = fp / (fp + tn) if (fp + tn) > 0 else 0.0 + +detection = { + "true_positive": tp, + "false_negative": fn, + "true_negative": tn, + "false_positive": fp, + "tpr": round(tpr, 4), + "fpr": round(fpr, 4), + "total_cases": total, + "skipped": skipped, + "note": "TP/FP split estimated from pass/fail totals (attack_ratio=0.85). Re-run with go-ftw v2 --output json-per-test for exact split." +} + +print(json.dumps(detection, indent=2)) + +with open("${OUT_DIR}/detection.json", "w") as f: + json.dump(detection, f, indent=2) +PY + +# Write final summary.json. +DETECTION="$(cat "${OUT_DIR}/detection.json")" +POLICY_NAME="$(env_value LAB_POLICY_NAME 'Lab Baseline')" + +write_summary "ftw" "${TARGET_VHOST}" "${POLICY_NAME}" "${DETECTION}" "{}" "{}" + +echo "" +echo "FTW TPR: $(python3 -c "import json; d=json.load(open('${OUT_DIR}/detection.json')); print(f\"{d['tpr']*100:.1f}%\")")" +echo "Results → ${OUT_DIR}/" diff --git a/benchmarks/lab/runners/run-load.sh b/benchmarks/lab/runners/run-load.sh new file mode 100755 index 0000000..f73b98a --- /dev/null +++ b/benchmarks/lab/runners/run-load.sh @@ -0,0 +1,212 @@ +#!/usr/bin/env bash +# run-load.sh — Latency and RPS measurement (WAF vs direct). +# +# Runs wrk twice against each target: +# 1. Through HAProxy+Coraza (production path) +# 2. Directly against the target container (bypass WAF) +# +# The delta is the WAF overhead: latency (p50/p95/p99) and RPS degradation %. +# +# Simultaneously samples coraza + haproxy container resource usage. +# +# Output: +# benchmarks/results/run-/load-/waf.txt +# benchmarks/results/run-/load-/direct.txt +# benchmarks/results/run-/load-/resources-coraza.json +# benchmarks/results/run-/load-/resources-haproxy.json +# benchmarks/results/run-/load-/summary.json +# +# Usage: +# RUN_ID=... bash benchmarks/lab/runners/run-load.sh +# RUN_ID=... TARGET_VHOST=juice.local DIRECT_HOST=juiceshop DIRECT_PORT=3000 \ +# bash benchmarks/lab/runners/run-load.sh + +set -Eeuo pipefail +: "${RUN_ID:=$(date +%Y%m%d-%H%M%S)}" +source "$(dirname "${BASH_SOURCE[0]}")/lib.sh" + +TARGET_VHOST="${TARGET_VHOST:-${LAB_JUICESHOP_DOMAIN}}" +DIRECT_HOST="${DIRECT_HOST:-juiceshop}" # Docker service name for direct access +DIRECT_PORT="${DIRECT_PORT:-3000}" # Target app port (no HAProxy) +WRK_IMAGE="ghcr.io/williamyeh/wrk:4.2.0" +LUA_SCRIPT="${REPO_ROOT}/benchmarks/lab/scenarios/load/benign-mix.lua" + +THREADS="${LOAD_THREADS:-4}" +CONNECTIONS="${LOAD_CONNECTIONS:-50}" +DURATION="${LOAD_DURATION:-60s}" + +write_manifest +SCENARIO="load-${TARGET_VHOST}" +OUT_DIR="$(setup_run_dir "${SCENARIO}")" + +echo "=== Load test: WAF vs direct ===" +echo "Target vhost : ${TARGET_VHOST}" +echo "Direct host : ${DIRECT_HOST}:${DIRECT_PORT}" +echo "Load : ${THREADS} threads, ${CONNECTIONS} connections, ${DURATION}" +echo "Output dir : ${OUT_DIR}" +echo "" + +# ── Through WAF ──────────────────────────────────────────────────────────── + +echo "--- Run 1: through HAProxy+Coraza ---" + +# Start resource sampling in the background during this run. +CORAZA_CONTAINER="$(docker ps --filter "name=coraza" --format "{{.Names}}" | head -1 || true)" +HAPROXY_CONTAINER="$(docker ps --filter "name=haproxy" --format "{{.Names}}" | head -1 || true)" + +# Convert duration string to seconds for sampler. +DURATION_S="$(echo "${DURATION}" | sed 's/s$//')" + +if [[ -n "${CORAZA_CONTAINER}" ]]; then + sample_container_resources "${CORAZA_CONTAINER}" "${DURATION_S}" "${OUT_DIR}/resources-coraza.json" & + SAMPLER_CORAZA_PID=$! +fi +if [[ -n "${HAPROXY_CONTAINER}" ]]; then + sample_container_resources "${HAPROXY_CONTAINER}" "${DURATION_S}" "${OUT_DIR}/resources-haproxy.json" & + SAMPLER_HAPROXY_PID=$! +fi + +docker run --rm \ + --network "${DOCKER_NETWORK}" \ + -v "${LUA_SCRIPT}:/benign-mix.lua:ro" \ + -e "LOAD_VHOST=${TARGET_VHOST}" \ + "${WRK_IMAGE}" \ + -t "${THREADS}" -c "${CONNECTIONS}" -d "${DURATION}" \ + -s /benign-mix.lua \ + --latency \ + "http://haproxy:80/" \ + > "${OUT_DIR}/waf.txt" 2>&1 + +# Wait for samplers to finish. +wait "${SAMPLER_CORAZA_PID:-}" 2>/dev/null || true +wait "${SAMPLER_HAPROXY_PID:-}" 2>/dev/null || true + +echo "WAF run complete. Output: ${OUT_DIR}/waf.txt" + +# ── Direct (bypass WAF) ──────────────────────────────────────────────────── + +echo "--- Run 2: direct to ${DIRECT_HOST}:${DIRECT_PORT} ---" + +docker run --rm \ + --network "${DOCKER_NETWORK}" \ + -v "${LUA_SCRIPT}:/benign-mix.lua:ro" \ + -e "LOAD_VHOST=${TARGET_VHOST}" \ + "${WRK_IMAGE}" \ + -t "${THREADS}" -c "${CONNECTIONS}" -d "${DURATION}" \ + -s /benign-mix.lua \ + --latency \ + "http://${DIRECT_HOST}:${DIRECT_PORT}/" \ + > "${OUT_DIR}/direct.txt" 2>&1 + +echo "Direct run complete. Output: ${OUT_DIR}/direct.txt" + +# ── Parse & compute overhead ─────────────────────────────────────────────── + +echo "Parsing results..." + +python3 - <<'PY' +import re, json, os + +def parse_wrk(path): + """Parse wrk --latency output into a structured dict.""" + if not os.path.exists(path): + return {} + text = open(path).read() + + def find_us(pattern): + m = re.search(pattern, text, re.IGNORECASE) + if not m: return None + val, unit = float(m.group(1)), m.group(2).lower() + multipliers = {"us": 1, "ms": 1000, "s": 1_000_000} + return val * multipliers.get(unit, 1) + + # Latency percentiles from the --latency histogram section. + p50 = find_us(r'50%\s+([\d.]+)(\w+)') + p95 = find_us(r'95%\s+([\d.]+)(\w+)') + p99 = find_us(r'99%\s+([\d.]+)(\w+)') + + # RPS from the summary line: "Requests/sec: 1234.56" + rps_m = re.search(r'Requests/sec:\s+([\d.]+)', text) + rps = float(rps_m.group(1)) if rps_m else None + + return { + "latency_us": {"p50": p50, "p95": p95, "p99": p99}, + "rps": rps, + "raw_path": path + } + +waf = parse_wrk("${OUT_DIR}/waf.txt") +direct = parse_wrk("${OUT_DIR}/direct.txt") + +def us_to_ms(us): + return round(us / 1000, 3) if us is not None else None + +def pct_degradation(waf_val, direct_val): + if waf_val and direct_val and direct_val > 0: + return round((direct_val - waf_val) / direct_val * 100, 2) + return None + +waf_rps = waf.get("rps") +direct_rps = direct.get("rps") +rps_deg = None +if waf_rps and direct_rps and direct_rps > 0: + rps_deg = round((direct_rps - waf_rps) / direct_rps * 100, 2) + +waf_lat = waf.get("latency_us", {}) +direct_lat = direct.get("latency_us", {}) + +performance = { + "rps": waf_rps, + "baseline_rps": direct_rps, + "rps_degradation_pct": rps_deg, + "latency_ms": { + "p50": us_to_ms(waf_lat.get("p50")), + "p95": us_to_ms(waf_lat.get("p95")), + "p99": us_to_ms(waf_lat.get("p99")), + }, + "latency_overhead_ms": { + "p50": us_to_ms((waf_lat.get("p50") or 0) - (direct_lat.get("p50") or 0)), + "p95": us_to_ms((waf_lat.get("p95") or 0) - (direct_lat.get("p95") or 0)), + "p99": us_to_ms((waf_lat.get("p99") or 0) - (direct_lat.get("p99") or 0)), + }, + "config": { + "threads": int("${THREADS}"), + "connections": int("${CONNECTIONS}"), + "duration": "${DURATION}" + } +} + +print(json.dumps(performance, indent=2)) + +with open("${OUT_DIR}/performance.json", "w") as f: + json.dump(performance, f, indent=2) +PY + +PERFORMANCE="$(cat "${OUT_DIR}/performance.json")" +RESOURCES_CORAZA="$(cat "${OUT_DIR}/resources-coraza.json" 2>/dev/null || echo '{}')" +RESOURCES_HAPROXY="$(cat "${OUT_DIR}/resources-haproxy.json" 2>/dev/null || echo '{}')" + +RESOURCES_JSON="$(python3 -c " +import json, sys +c = json.loads('''${RESOURCES_CORAZA}''') +h = json.loads('''${RESOURCES_HAPROXY}''') +print(json.dumps({'coraza': c, 'haproxy': h})) +")" +POLICY_NAME="$(env_value LAB_POLICY_NAME 'Lab Baseline')" + +write_summary "${SCENARIO}" "${TARGET_VHOST}" "${POLICY_NAME}" "{}" "${PERFORMANCE}" "${RESOURCES_JSON}" + +echo "" +python3 - </nuclei-/raw.jsonl +# benchmarks/results/run-/nuclei-/summary.json +# +# Usage: +# RUN_ID=... bash benchmarks/lab/runners/run-nuclei.sh +# RUN_ID=... TARGET_VHOST=juice.local bash benchmarks/lab/runners/run-nuclei.sh + +set -Eeuo pipefail +: "${RUN_ID:=$(date +%Y%m%d-%H%M%S)}" +source "$(dirname "${BASH_SOURCE[0]}")/lib.sh" + +TARGET_VHOST="${TARGET_VHOST:-${LAB_JUICESHOP_DOMAIN}}" +NUCLEI_IMAGE="projectdiscovery/nuclei:v3.3.9" +NUCLEI_CONF="${REPO_ROOT}/benchmarks/lab/scenarios/nuclei/nuclei.yaml" + +# HAProxy on port 80 inside gp_internal, Host: header injected per-request. +TARGET_URL="http://haproxy:80" + +write_manifest +SCENARIO="nuclei-${TARGET_VHOST}" +OUT_DIR="$(setup_run_dir "${SCENARIO}")" +export OUT_DIR TARGET_VHOST # must be set before the Python heredoc reads os.environ + +echo "=== Nuclei CVE template scan ===" +echo "Target vhost : ${TARGET_VHOST} → ${TARGET_URL}" +echo "Output dir : ${OUT_DIR}" +echo "Image : ${NUCLEI_IMAGE}" +echo "" + +# Pull nuclei-templates inside the container on first run (cached on next run +# by mounting a local volume). The -header flag injects the Host: vhost. +docker run --rm \ + --network "${DOCKER_NETWORK}" \ + -v "${OUT_DIR}:/output:rw" \ + -v "${NUCLEI_CONF}:/nuclei.yaml:ro" \ + "${NUCLEI_IMAGE}" \ + -config /nuclei.yaml \ + -u "${TARGET_URL}" \ + -header "Host: ${TARGET_VHOST}" \ + -jsonl -output /output/raw.jsonl \ + -update-templates \ + 2>/dev/null || true + +echo "Nuclei scan complete. Parsing results..." + +python3 - <<'PY' +import json, os + +out_dir = os.environ.get("OUT_DIR", ".") +raw_file = os.path.join(out_dir, "raw.jsonl") +vhost = os.environ.get("TARGET_VHOST", "unknown") + +findings = [] +if os.path.exists(raw_file): + with open(raw_file) as f: + for line in f: + line = line.strip() + if line: + try: + findings.append(json.loads(line)) + except json.JSONDecodeError: + pass + +# Severity classification for WAF TPR estimation. +# Nuclei findings with severity critical/high/medium are WAF-relevant attacks. +# Each finding represents a template match — the request was NOT blocked by the WAF +# (nuclei receives a response), so these are False Negatives from the WAF's perspective. +# Requests that were blocked (WAF returned 403) produce connection errors / 403 responses +# in Nuclei and typically don't generate a finding for the underlying vulnerability. + +WAF_RELEVANT = {"critical", "high", "medium"} +fn_findings = [f for f in findings if f.get("info", {}).get("severity", "").lower() in WAF_RELEVANT] +info_findings = [f for f in findings if f.get("info", {}).get("severity", "").lower() not in WAF_RELEVANT] + +detection = { + "total_findings": len(findings), + "waf_relevant_findings": len(fn_findings), + "info_findings": len(info_findings), + "note": "Each waf_relevant_finding is a potential WAF false-negative (attack payload reached the app). Run collect-metrics.sh to cross-reference with audit log for confirmed TP/FN split.", + "top_findings": [ + { + "template_id": f.get("template-id"), + "severity": f.get("info", {}).get("severity"), + "name": f.get("info", {}).get("name"), + "matched_at": f.get("matched-at") + } + for f in sorted(fn_findings, key=lambda x: {"critical":0,"high":1,"medium":2}.get(x.get("info",{}).get("severity",""),3))[:20] + ] +} + +print(json.dumps(detection, indent=2)) + +with open(os.path.join(out_dir, "detection.json"), "w") as f: + json.dump(detection, f, indent=2) +PY + +DETECTION="$(cat "${OUT_DIR}/detection.json")" +POLICY_NAME="$(env_value LAB_POLICY_NAME 'Lab Baseline')" + +write_summary "${SCENARIO}" "${TARGET_VHOST}" "${POLICY_NAME}" "${DETECTION}" "{}" "{}" + +echo "" +echo "Nuclei findings (waf-relevant): $(python3 -c "import json; d=json.load(open('${OUT_DIR}/detection.json')); print(d.get('waf_relevant_findings', 'n/a'))")" +echo "Results → ${OUT_DIR}/" diff --git a/benchmarks/lab/runners/run-zap.sh b/benchmarks/lab/runners/run-zap.sh new file mode 100755 index 0000000..4bcbc1c --- /dev/null +++ b/benchmarks/lab/runners/run-zap.sh @@ -0,0 +1,126 @@ +#!/usr/bin/env bash +# run-zap.sh — OWASP ZAP baseline scan for false positive measurement. +# +# Runs a ZAP baseline (passive + active) scan against each lab target through +# HAProxy and classifies WAF alerts as FPs (WAF blocked a legitimate scan +# request) vs TPs (WAF blocked a genuine attack found by ZAP). +# +# Output: +# benchmarks/results/run-/zap-/zap.json +# benchmarks/results/run-/zap-/zap.html +# benchmarks/results/run-/zap-/summary.json +# +# Usage: +# RUN_ID=... bash benchmarks/lab/runners/run-zap.sh +# RUN_ID=... TARGET_VHOST=wp.local bash benchmarks/lab/runners/run-zap.sh + +set -Eeuo pipefail +: "${RUN_ID:=$(date +%Y%m%d-%H%M%S)}" +source "$(dirname "${BASH_SOURCE[0]}")/lib.sh" + +TARGET_VHOST="${TARGET_VHOST:-${LAB_WP_DOMAIN}}" # default: WordPress (best FPR target) +ZAP_IMAGE="ghcr.io/zaproxy/zaproxy:stable" +ZAP_CONF="${REPO_ROOT}/benchmarks/lab/scenarios/zap/zap-baseline.conf" + +# HAProxy listens on port 80 inside gp_internal; ZAP container joins that network. +TARGET_URL="http://haproxy:80" + +write_manifest +SCENARIO="zap-${TARGET_VHOST}" +OUT_DIR="$(setup_run_dir "${SCENARIO}")" +export OUT_DIR # must be set before the Python heredoc reads os.environ + +echo "=== OWASP ZAP baseline scan ===" +echo "Target vhost : ${TARGET_VHOST} → ${TARGET_URL}" +echo "Output dir : ${OUT_DIR}" +echo "Image : ${ZAP_IMAGE}" +echo "" + +# ZAP needs a writable /zap/wrk directory for reports. +# The Host: header is injected via ZAP's built-in HTTP Request Header Replacer +# so that every request ZAP sends to haproxy:80 carries the correct vhost name +# and HAProxy routes it to the right backend. +docker run --rm \ + --network "${DOCKER_NETWORK}" \ + -v "${OUT_DIR}:/zap/wrk:rw" \ + -v "${ZAP_CONF}:/zap/rules.conf:ro" \ + "${ZAP_IMAGE}" \ + zap-baseline.py \ + -t "${TARGET_URL}" \ + -c /zap/rules.conf \ + -J zap.json \ + -r zap.html \ + -I \ + -config "replacer.full_list(0).description=host-header" \ + -config "replacer.full_list(0).enabled=true" \ + -config "replacer.full_list(0).matchtype=REQ_HEADER" \ + -config "replacer.full_list(0).matchstr=Host" \ + -config "replacer.full_list(0).replacement=${TARGET_VHOST}" \ + -config "replacer.full_list(0).initiators=" \ + > "${OUT_DIR}/zap-stdout.txt" 2>&1 || true + +echo "ZAP scan complete. Parsing results..." + +# Parse ZAP JSON output and compute a detection summary. +# ZAP alerts with risk >= Medium against the WAF-proxied target are WAF-visible +# attacks. The WAF's job on ZAP traffic: +# - Block high-risk attacks (SQLi, XSS, ...) → TP if blocked, FN if passed +# - Allow legitimate ZAP probes (header checks, info gathering) → TN if allowed, FP if blocked + +python3 - <<'PY' +import json, sys, os + +out_dir = os.environ.get("OUT_DIR", ".") +zap_json = os.path.join(out_dir, "zap.json") + +if not os.path.exists(zap_json): + print(json.dumps({"error": "zap.json not found — scan may have failed or produced no output"})) + sys.exit(0) + +with open(zap_json) as f: + report = json.load(f) + +# ZAP JSON structure: {"site": [{"alerts": [{"riskcode":"3","alert":"SQL Injection",...}]}]} +alerts = [] +for site in report.get("site", []): + alerts.extend(site.get("alerts", [])) + +# Classify alerts: risk 2 (Medium) or 3 (High) are WAF-relevant attack signals. +# Risk 0 (Informational) / 1 (Low) are cosmetic — not WAF signals. +ATTACK_RISKS = {2, 3} # Medium, High +attack_alerts = [a for a in alerts if int(a.get("riskcode", 0)) in ATTACK_RISKS] +info_alerts = [a for a in alerts if int(a.get("riskcode", 0)) not in ATTACK_RISKS] + +total_attack_instances = sum(int(a.get("count", 1)) for a in attack_alerts) +total_info_instances = sum(int(a.get("count", 1)) for a in info_alerts) + +# We cannot directly observe WAF blocks from ZAP output alone (ZAP sees the +# app's response, not the WAF block). A separate audit-log cross-reference is +# done in collect-metrics.sh. Here we report ZAP findings as-is. +detection = { + "total_alerts": len(alerts), + "attack_severity_alerts": len(attack_alerts), + "info_severity_alerts": len(info_alerts), + "attack_instances": total_attack_instances, + "info_instances": total_info_instances, + "top_alerts": [ + {"risk": a.get("riskdesc"), "name": a.get("alert"), "count": a.get("count")} + for a in sorted(attack_alerts, key=lambda x: -int(x.get("riskcode", 0)))[:10] + ], + "note": "TP/FP counts require audit-log cross-reference — run collect-metrics.sh after all scenarios." +} + +print(json.dumps(detection, indent=2)) + +with open(os.path.join(out_dir, "detection.json"), "w") as f: + json.dump(detection, f, indent=2) +PY + +DETECTION="$(cat "${OUT_DIR}/detection.json")" +POLICY_NAME="$(env_value LAB_POLICY_NAME 'Lab Baseline')" + +write_summary "${SCENARIO}" "${TARGET_VHOST}" "${POLICY_NAME}" "${DETECTION}" "{}" "{}" + +echo "" +echo "ZAP alerts: $(python3 -c "import json; d=json.load(open('${OUT_DIR}/detection.json')); print(d.get('total_alerts', 'n/a'))")" +echo "Results → ${OUT_DIR}/" diff --git a/benchmarks/lab/scenarios/crs-ftw/config.yaml b/benchmarks/lab/scenarios/crs-ftw/config.yaml new file mode 100644 index 0000000..832968e --- /dev/null +++ b/benchmarks/lab/scenarios/crs-ftw/config.yaml @@ -0,0 +1,42 @@ +# go-ftw configuration for CRS regression suite against guard-proxy. +# +# The OWASP CRS test corpus lives in configs/coraza/crs/tests/regression/tests/ +# (the CRS git submodule). go-ftw replays each labeled test case against the +# live HAProxy+Coraza stack and reports pass/fail per rule. +# +# Usage (from repo root): +# docker run --rm \ +# --network guard-proxy-demo_gp_internal \ +# -v "$(pwd)/configs/coraza/crs/tests/regression/tests:/tests:ro" \ +# -v "$(pwd)/benchmarks/lab/scenarios/crs-ftw/config.yaml:/config.yaml:ro" \ +# ghcr.io/coreruleset/go-ftw:latest \ +# run --config /config.yaml --dir /tests --output json \ +# > benchmarks/results/run-/crs-ftw/raw.json + +# The proxy address as seen from inside the Docker network. +# go-ftw sends requests to this host; Host: headers come from the test yamls +# but we override the destination to route through HAProxy. +override_destination: + address: haproxy + port: 80 + protocol: http + +# Expect the WAF to return 403 on blocked requests (as configured in haproxy.cfg). +# go-ftw uses this to determine whether a "deny" expectation was met. +# +# Default expected status for blocked requests: +# 200 = pass-through (WAF allowed) +# 403 = blocked by WAF +# others = treated as unexpected + +# Log file to inspect for WAF audit entries during test runs. +# Mounted read-only from the coraza_audit volume in the runner script. +# go-ftw tail-reads this to correlate rule firings with test outcomes. +logfile: /var/log/coraza/audit.log + +# Maximum time to wait for a response per test case. +timeout: 10s + +# Number of consecutive connection errors before aborting the run. +max_marker_retries: 3 +max_marker_log_lines: 500 diff --git a/benchmarks/lab/scenarios/load/benign-mix.lua b/benchmarks/lab/scenarios/load/benign-mix.lua new file mode 100644 index 0000000..f8c1804 --- /dev/null +++ b/benchmarks/lab/scenarios/load/benign-mix.lua @@ -0,0 +1,63 @@ +-- benign-mix.lua — wrk Lua script for realistic benign load. +-- +-- Cycles through a mix of legitimate-looking HTTP requests against a +-- target vhost. Used to measure baseline latency / RPS (no WAF) +-- and WAF-in-path latency / RPS (through HAProxy+Coraza). +-- +-- Usage: +-- wrk -t4 -c50 -d60s -s benchmarks/lab/scenarios/load/benign-mix.lua \ +-- --latency http://:/ +-- +-- The Host: header is injected per-request so HAProxy routes to the +-- correct vhost. Override VHOST env var or edit the list below. + +local vhost = os.getenv("LOAD_VHOST") or "juice.local" + +-- Request pool: realistic paths for the target application. +-- Add/remove paths to match the target's URL surface. +local requests = { + { method = "GET", path = "/", body = nil }, + { method = "GET", path = "/index.html", body = nil }, + { method = "GET", path = "/rest/admin/application-version", body = nil }, + { method = "GET", path = "/api/v1/status", body = nil }, + { method = "GET", path = "/search?q=apple", body = nil }, + { method = "GET", path = "/search?q=login", body = nil }, + { method = "GET", path = "/robots.txt", body = nil }, + { method = "GET", path = "/favicon.ico", body = nil }, + { method = "POST", path = "/api/v1/user/login", + body = '{"email":"user@example.com","password":"password123"}' }, +} + +local idx = 0 + +function request() + idx = (idx % #requests) + 1 + local r = requests[idx] + local hdrs = { + ["Host"] = vhost, + ["User-Agent"] = "Mozilla/5.0 (eval-lab/1.0)", + ["Accept"] = "application/json, text/html, */*", + ["Connection"] = "keep-alive", + } + if r.body then + hdrs["Content-Type"] = "application/json" + hdrs["Content-Length"] = tostring(#r.body) + return wrk.format(r.method, r.path, hdrs, r.body) + end + return wrk.format(r.method, r.path, hdrs, nil) +end + +function done(summary, latency, requests_per_sec) + -- Print a machine-readable summary line for collect-metrics.sh to parse. + io.write(string.format( + "WRK_SUMMARY requests=%d duration_us=%d rps=%.2f ".. + "lat_p50_us=%d lat_p95_us=%d lat_p99_us=%d errors=%d\n", + summary.requests, + summary.duration, + summary.requests / (summary.duration / 1e6), + latency:percentile(50), + latency:percentile(95), + latency:percentile(99), + summary.errors.connect + summary.errors.read + summary.errors.write + summary.errors.status + )) +end diff --git a/benchmarks/lab/scenarios/nuclei/nuclei.yaml b/benchmarks/lab/scenarios/nuclei/nuclei.yaml new file mode 100644 index 0000000..a3943a7 --- /dev/null +++ b/benchmarks/lab/scenarios/nuclei/nuclei.yaml @@ -0,0 +1,32 @@ +# Nuclei configuration for guard-proxy WAF evaluation. +# +# Template selection: broad coverage of common CVEs and exposures that +# the CRS ruleset is designed to detect, without noisy/dangerous templates. +# +# Reference: https://nuclei.projectdiscovery.io/templating-guide/ + +# Template tags to include (comma-separated). +# Focus on attack categories the WAF is expected to block. +tags: sqli,xss,lfi,rfi,ssrf,injection,traversal,exposure + +# Template severity levels to run. +severity: low,medium,high,critical + +# Exclude safe-unverified templates that generate noise without payloads. +exclude-tags: dos,fuzz,helpers + +# Rate limiting — be gentle on the WAF under test to avoid saturating it +# before the dedicated load test runs. +rate-limit: 50 +bulk-size: 10 +concurrency: 5 + +# Timeout per template request. +timeout: 10 + +# Retry on network errors. +retries: 1 + +# Stop after this many matches (prevents runaway runs against intentionally +# vulnerable apps that match everything). +max-host-error: 30 diff --git a/benchmarks/lab/scenarios/nuclei/targets.txt b/benchmarks/lab/scenarios/nuclei/targets.txt new file mode 100644 index 0000000..ddf0259 --- /dev/null +++ b/benchmarks/lab/scenarios/nuclei/targets.txt @@ -0,0 +1,8 @@ +# Nuclei target list for eval lab. +# Format: one URL per line (scheme://host[:port] — no path). +# Host headers are resolved via Docker internal DNS when the container +# runs on the gp_internal network. +# +# Targets hit HAProxy on port 80 with the vhost Host: header. +# Nuclei resolves "haproxy" as the Docker service name when run on gp_internal. +http://haproxy:80 diff --git a/benchmarks/lab/scenarios/zap/alert-filter.yaml b/benchmarks/lab/scenarios/zap/alert-filter.yaml new file mode 100644 index 0000000..c0da837 --- /dev/null +++ b/benchmarks/lab/scenarios/zap/alert-filter.yaml @@ -0,0 +1,27 @@ +# ZAP alert filter — suppress known false positives for the eval lab. +# +# These alerts are expected for the lab setup (self-signed certs, dev configs) +# and should not count toward the FPR measurement. +# +# Reference: https://www.zaproxy.org/docs/desktop/addons/alert-filters/ + +alertfilters: + # Self-signed / untrusted TLS certificate (expected in lab) + - ruleId: 10038 + newRisk: False Positive + + # X-Content-Type-Options not set — cosmetic for lab targets + - ruleId: 10021 + newRisk: False Positive + + # Cache-Control headers — not relevant to WAF testing + - ruleId: 10015 + newRisk: False Positive + + # Server header leakage — lab only, not a WAF effectiveness signal + - ruleId: 10036 + newRisk: False Positive + + # Information disclosure: suspicious comments — lab only + - ruleId: 10027 + newRisk: False Positive diff --git a/benchmarks/lab/scenarios/zap/zap-baseline.conf b/benchmarks/lab/scenarios/zap/zap-baseline.conf new file mode 100644 index 0000000..5827a37 --- /dev/null +++ b/benchmarks/lab/scenarios/zap/zap-baseline.conf @@ -0,0 +1,22 @@ +# ZAP baseline scan options. +# Used by run-zap.sh via zap-baseline.py -c zap-baseline.conf +# +# Format: = +# Actions: IGNORE, WARN (default), FAIL +# +# Suppress purely cosmetic/config alerts so the scan report focuses on +# WAF-relevant security findings. Keep FAIL for high-severity issues that +# should always be reported regardless of WAF state. + +# Missing security headers — suppressed (not WAF signals) +10021=IGNORE +10015=IGNORE +10038=IGNORE +10036=IGNORE + +# CSP not set — suppress for lab targets +10038=IGNORE + +# Cookies without secure flag — ignore for HTTP lab +10011=IGNORE +10012=IGNORE diff --git a/benchmarks/lab/setup-lab.sh b/benchmarks/lab/setup-lab.sh new file mode 100755 index 0000000..2b2c7d4 --- /dev/null +++ b/benchmarks/lab/setup-lab.sh @@ -0,0 +1,234 @@ +#!/usr/bin/env bash +# setup-lab.sh — Bring up the evaluation lab and register all target vhosts. +# +# Extends the demo stack with WordPress/Juice Shop/DVWA targets, seeds two +# WAF policies (baseline PL1 and high-paranoia PL2), and wires each target +# domain through HAProxy via the guard-proxy backend API. +# +# Prerequisites: +# - deploy/demo/.env (copy from deploy/demo/.env.example) +# - benchmarks/lab/.env (copy from benchmarks/lab/.env.example) +# - CRS submodule initialised: git submodule update --init --recursive +# - Docker with Docker Compose v2 +# +# Usage: ./benchmarks/lab/setup-lab.sh [--skip-compose] + +set -Eeuo pipefail + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd -- "${SCRIPT_DIR}/../.." && pwd)" +DEMO_COMPOSE="${REPO_ROOT}/deploy/demo/docker-compose.yml" +TARGETS_COMPOSE="${SCRIPT_DIR}/docker-compose.targets.yml" +DEMO_ENV="${REPO_ROOT}/deploy/demo/.env" +LAB_ENV="${SCRIPT_DIR}/.env" +TIMEOUT_SECONDS="${TIMEOUT_SECONDS:-240}" +SKIP_COMPOSE=false + +for arg in "$@"; do + case "$arg" in + --skip-compose) SKIP_COMPOSE=true ;; + esac +done + +for f in "${DEMO_ENV}" "${LAB_ENV}"; do + if [[ ! -f "${f}" ]]; then + echo "Missing ${f}. Copy the matching .env.example first." >&2 + exit 1 + fi +done + +if docker compose version >/dev/null 2>&1; then + COMPOSE=(docker compose -f "${DEMO_COMPOSE}" -f "${TARGETS_COMPOSE}" --env-file "${DEMO_ENV}" --env-file "${LAB_ENV}") +elif command -v docker-compose >/dev/null 2>&1; then + COMPOSE=(docker-compose -f "${DEMO_COMPOSE}" -f "${TARGETS_COMPOSE}" --env-file "${DEMO_ENV}" --env-file "${LAB_ENV}") +else + echo "Docker Compose is required." >&2 + exit 1 +fi + +# ── Helpers (mirrored from deploy/demo/setup-demo.sh) ────────────────────── + +env_value() { + local name="$1" + local fallback="${2:-}" + local value + value="$(grep -E "^${name}=" "${LAB_ENV}" "${DEMO_ENV}" 2>/dev/null | tail -n 1 | cut -d= -f2- || true)" + if [[ -z "${value}" ]]; then printf '%s' "${fallback}"; else printf '%s' "${value}"; fi +} + +json_string() { + python3 -c 'import json, sys; print(json.dumps(sys.argv[1]))' "$1" +} + +api_json() { + local method="$1"; local path="$2"; local token="${3:-}"; local body="${4:-}" + local response_file http_code + response_file="$(mktemp)" + if [[ -n "${body}" ]]; then + http_code="$(curl --silent --show-error --output "${response_file}" --write-out '%{http_code}' \ + --request "${method}" --header "Content-Type: application/json" \ + ${token:+--header "Authorization: Bearer ${token}"} --data "${body}" "${API_BASE_URL}${path}")" + else + http_code="$(curl --silent --show-error --output "${response_file}" --write-out '%{http_code}' \ + --request "${method}" ${token:+--header "Authorization: Bearer ${token}"} "${API_BASE_URL}${path}")" + fi + if [[ "${http_code}" -lt 200 || "${http_code}" -ge 300 ]]; then + echo "API ${method} ${path} failed with HTTP ${http_code}:" >&2 + cat "${response_file}" >&2; rm -f "${response_file}"; return 1 + fi + cat "${response_file}"; rm -f "${response_file}" +} + +health_status() { + local service="$1"; local id + id="$("${COMPOSE[@]}" ps -q "${service}" 2>/dev/null || true)" + if [[ -z "${id}" ]]; then echo "missing"; return; fi + docker inspect --format '{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}' "${id}" +} + +wait_for_healthy() { + local service="$1"; local deadline=$((SECONDS + TIMEOUT_SECONDS)); local status + echo "Waiting for ${service}..." + while (( SECONDS < deadline )); do + status="$(health_status "${service}")" + case "${status}" in + healthy) echo "${service} is healthy."; return 0 ;; + exited|dead) echo "${service} is ${status}." >&2; return 1 ;; + esac + sleep 3 + done + echo "Timed out waiting for ${service}; last status: ${status:-unknown}." >&2; return 1 +} + +ensure_crs_bundle() { + if compgen -G "${REPO_ROOT}/configs/coraza/crs/rules/*.conf" >/dev/null; then return; fi + echo "Missing OWASP CRS rules in configs/coraza/crs." >&2 + echo "Run: git submodule update --init --recursive" >&2; exit 1 +} + +ensure_policy() { + local name="$1"; local body="$2" + echo "Ensuring WAF policy '${name}' exists..." + local response + response="$(api_json POST /policies "${token}" "${body}" || true)" + if [[ -z "${response}" ]]; then + response="$(api_json GET /policies "${token}")" + fi + POLICY_NAME="${name}" POLICY_RESPONSE="${response}" python3 - <<'PY' +import json, sys, os +data = json.loads(os.environ["POLICY_RESPONSE"]) +name = os.environ["POLICY_NAME"] +items = data if isinstance(data, list) else [data] +for item in items: + if item["name"] == name: + print(item["id"]); sys.exit(0) +sys.exit(f"Policy '{name}' not found after create/list") +PY +} + +ensure_vhost() { + local domain="$1"; local backend_url="$2"; local description="$3"; local policy_id="$4" + echo "Ensuring vhost ${domain} -> ${backend_url}..." + local vhost_body vhost_response vhost_id + vhost_body="$(printf '{"domain":%s,"backend_url":%s,"description":%s,"ssl_enabled":false,"is_active":true,"policy_id":%s}' \ + "$(json_string "${domain}")" "$(json_string "${backend_url}")" \ + "$(json_string "${description}")" "${policy_id}")" + vhost_response="$(api_json POST /vhosts "${token}" "${vhost_body}" || true)" + if [[ -n "${vhost_response}" ]]; then return; fi + local vhosts_response + vhosts_response="$(api_json GET /vhosts "${token}")" + vhost_id="$(VHOSTS="${vhosts_response}" DOMAIN="${domain}" python3 - <<'PY' +import json, os +data = json.loads(os.environ["VHOSTS"]); domain = os.environ["DOMAIN"] +for item in data: + if item["domain"] == domain: + print(item["id"]); exit(0) +exit(f"vhost {domain!r} not found") +PY + )" + api_json PATCH "/vhosts/${vhost_id}" "${token}" "${vhost_body}" >/dev/null +} + +# ── Main ─────────────────────────────────────────────────────────────────── + +ensure_crs_bundle + +if [[ "${SKIP_COMPOSE}" == false ]]; then + echo "Starting demo + lab target stack..." + "${COMPOSE[@]}" up -d --build + + wait_for_healthy backend + wait_for_healthy coraza + wait_for_healthy haproxy + wait_for_healthy demo-app + wait_for_healthy demo-api + wait_for_healthy juiceshop + wait_for_healthy dvwa + wait_for_healthy wordpress +fi + +ADMIN_EMAIL="$(env_value ADMIN_EMAIL admin@example.com)" +ADMIN_PASSWORD="$(env_value ADMIN_PASSWORD GuardProxyDemo12345)" +BACKEND_HTTP_PORT="$(env_value BACKEND_HTTP_PORT 8000)" +HAPROXY_HTTP_PORT="$(env_value HAPROXY_HTTP_PORT 8080)" +API_BASE_URL="http://127.0.0.1:${BACKEND_HTTP_PORT}" +WAF_BASE_URL="http://127.0.0.1:${HAPROXY_HTTP_PORT}" + +echo "Logging in..." +login_body="$(printf '{"email":%s,"password":%s}' "$(json_string "${ADMIN_EMAIL}")" "$(json_string "${ADMIN_PASSWORD}")")" +token="$(api_json POST /auth/login "" "${login_body}" | python3 -c 'import json,sys; print(json.load(sys.stdin)["access_token"])')" + +# ── Policies ─────────────────────────────────────────────────────────────── + +LAB_POLICY_NAME="$(env_value LAB_POLICY_NAME 'Lab Baseline')" +LAB_POLICY_PARANOIA="$(env_value LAB_POLICY_PARANOIA 1)" +LAB_POLICY_INBOUND_THRESHOLD="$(env_value LAB_POLICY_INBOUND_THRESHOLD 5)" + +baseline_body="$(printf '{"name":%s,"description":"Lab evaluation baseline — PL%s anomaly threshold %s block","paranoia_level":%s,"inbound_anomaly_threshold":%s,"enforcement_mode":"block"}' \ + "$(json_string "${LAB_POLICY_NAME}")" "${LAB_POLICY_PARANOIA}" "${LAB_POLICY_INBOUND_THRESHOLD}" \ + "${LAB_POLICY_PARANOIA}" "${LAB_POLICY_INBOUND_THRESHOLD}")" +baseline_policy_id="$(ensure_policy "${LAB_POLICY_NAME}" "${baseline_body}")" + +LAB_PL2_POLICY_NAME="$(env_value LAB_PL2_POLICY_NAME 'Lab PL2')" +LAB_PL2_POLICY_PARANOIA="$(env_value LAB_PL2_POLICY_PARANOIA 2)" +LAB_PL2_POLICY_INBOUND_THRESHOLD="$(env_value LAB_PL2_POLICY_INBOUND_THRESHOLD 3)" + +pl2_body="$(printf '{"name":%s,"description":"Lab evaluation high-paranoia — PL%s anomaly threshold %s block","paranoia_level":%s,"inbound_anomaly_threshold":%s,"enforcement_mode":"block"}' \ + "$(json_string "${LAB_PL2_POLICY_NAME}")" "${LAB_PL2_POLICY_PARANOIA}" "${LAB_PL2_POLICY_INBOUND_THRESHOLD}" \ + "${LAB_PL2_POLICY_PARANOIA}" "${LAB_PL2_POLICY_INBOUND_THRESHOLD}")" +pl2_policy_id="$(ensure_policy "${LAB_PL2_POLICY_NAME}" "${pl2_body}")" + +# ── Vhosts ───────────────────────────────────────────────────────────────── + +LAB_JUICESHOP_DOMAIN="$(env_value LAB_JUICESHOP_DOMAIN juice.local)" +LAB_JUICESHOP_BACKEND_URL="$(env_value LAB_JUICESHOP_BACKEND_URL http://juiceshop:3000)" +LAB_DVWA_DOMAIN="$(env_value LAB_DVWA_DOMAIN dvwa.local)" +LAB_DVWA_BACKEND_URL="$(env_value LAB_DVWA_BACKEND_URL http://dvwa:80)" +LAB_WP_DOMAIN="$(env_value LAB_WP_DOMAIN wp.local)" +LAB_WP_BACKEND_URL="$(env_value LAB_WP_BACKEND_URL http://wordpress:80)" + +ensure_vhost "${LAB_JUICESHOP_DOMAIN}" "${LAB_JUICESHOP_BACKEND_URL}" "OWASP Juice Shop — intentionally vulnerable app" "${baseline_policy_id}" +ensure_vhost "${LAB_DVWA_DOMAIN}" "${LAB_DVWA_BACKEND_URL}" "DVWA — Damn Vulnerable Web Application" "${baseline_policy_id}" +ensure_vhost "${LAB_WP_DOMAIN}" "${LAB_WP_BACKEND_URL}" "WordPress — real CMS for FP measurement (no CRS exclusions)" "${baseline_policy_id}" + +echo "Applying generated HAProxy/Coraza config..." +api_json POST /config/apply "${token}" >/dev/null + +# ── DVWA DB initialisation (idempotent) ──────────────────────────────────── +echo "Initialising DVWA database..." +curl -sf --max-time 30 \ + -c /tmp/dvwa-cookies.txt \ + -b /tmp/dvwa-cookies.txt \ + -d "create_db=Create+%2F+Reset+Database" \ + "http://127.0.0.1:${HAPROXY_HTTP_PORT}/setup.php" \ + -H "Host: ${LAB_DVWA_DOMAIN}" >/dev/null || echo "DVWA setup.php returned non-200 (may already be initialised)" + +echo +echo "Eval lab is ready." +echo " Juice Shop: curl -H 'Host: ${LAB_JUICESHOP_DOMAIN}' ${WAF_BASE_URL}/" +echo " DVWA: curl -H 'Host: ${LAB_DVWA_DOMAIN}' ${WAF_BASE_URL}/" +echo " WordPress: curl -H 'Host: ${LAB_WP_DOMAIN}' ${WAF_BASE_URL}/" +echo +echo "Quick smoke:" +echo " curl -si -H 'Host: ${LAB_JUICESHOP_DOMAIN}' '${WAF_BASE_URL}/?q=1+UNION+SELECT+1--' | grep 'HTTP/'" +echo " (expect 403 — WAF blocking SQLi)" diff --git a/benchmarks/lab/teardown-lab.sh b/benchmarks/lab/teardown-lab.sh new file mode 100755 index 0000000..955b37a --- /dev/null +++ b/benchmarks/lab/teardown-lab.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# teardown-lab.sh — Stop and optionally remove the evaluation lab stack. +# +# Usage: +# ./benchmarks/lab/teardown-lab.sh # stop containers, keep volumes +# ./benchmarks/lab/teardown-lab.sh --clean # stop + remove all volumes + +set -Eeuo pipefail + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd -- "${SCRIPT_DIR}/../.." && pwd)" +DEMO_COMPOSE="${REPO_ROOT}/deploy/demo/docker-compose.yml" +TARGETS_COMPOSE="${SCRIPT_DIR}/docker-compose.targets.yml" +DEMO_ENV="${REPO_ROOT}/deploy/demo/.env" +LAB_ENV="${SCRIPT_DIR}/.env" + +CLEAN=false +for arg in "$@"; do + case "$arg" in --clean) CLEAN=true ;; esac +done + +if docker compose version >/dev/null 2>&1; then + COMPOSE=(docker compose -f "${DEMO_COMPOSE}" -f "${TARGETS_COMPOSE}") + [[ -f "${DEMO_ENV}" ]] && COMPOSE+=(--env-file "${DEMO_ENV}") + [[ -f "${LAB_ENV}" ]] && COMPOSE+=(--env-file "${LAB_ENV}") +else + COMPOSE=(docker-compose -f "${DEMO_COMPOSE}" -f "${TARGETS_COMPOSE}") +fi + +if [[ "${CLEAN}" == true ]]; then + echo "Stopping lab and removing all volumes..." + "${COMPOSE[@]}" down -v +else + echo "Stopping lab (volumes preserved)..." + "${COMPOSE[@]}" down +fi diff --git a/benchmarks/payloads/legitimate.txt b/benchmarks/payloads/legitimate.txt new file mode 100644 index 0000000..c27fe73 --- /dev/null +++ b/benchmarks/payloads/legitimate.txt @@ -0,0 +1,28 @@ +# Legitimate request paths — should all produce 200 (NOT blocked by WAF). +# Used to verify the false positive rate. +/ +/index.html +/robots.txt +/favicon.ico +/about +/contact +/login +/search?q=hello+world +/search?q=buy+laptop +/search?q=how+to+cook+pasta +/api/v1/status +/api/v1/products +/api/v1/products?page=1&per_page=20 +/api/v1/products?sort=price&order=asc +/api/v1/users/me +/wp-login.php +/wp-admin/ +/wp-content/themes/twentytwentyfour/style.css +/wp-json/wp/v2/posts +/?p=1 +/?page_id=2 +/category/news/ +/tag/technology/ +/?s=wordpress+tutorial +/feed/ +/sitemap.xml diff --git a/benchmarks/payloads/lfi.txt b/benchmarks/payloads/lfi.txt new file mode 100644 index 0000000..a23441e --- /dev/null +++ b/benchmarks/payloads/lfi.txt @@ -0,0 +1,19 @@ +# Local File Inclusion / Path Traversal payloads — should be blocked at PL1. +../etc/passwd +../../etc/passwd +../../../etc/passwd +../../../../etc/passwd +../../../../../etc/passwd +../../../../../../etc/passwd +..\..\..\windows\system32\cmd.exe +../etc/shadow +/etc/passwd +/etc/shadow +/proc/self/environ +/proc/self/fd/0 +../etc/hosts +%2e%2e%2fetc%2fpasswd +%2e%2e/%2e%2e/etc/passwd +..%2fetc%2fpasswd +%252e%252e%252fetc%252fpasswd +....//....//....//etc/passwd diff --git a/benchmarks/payloads/sqli.txt b/benchmarks/payloads/sqli.txt new file mode 100644 index 0000000..7aefd43 --- /dev/null +++ b/benchmarks/payloads/sqli.txt @@ -0,0 +1,22 @@ +# SQL injection payloads — used in manual verification and smoke tests. +# These should all produce a 403 response from the WAF (PL1, threshold 5, block). +# Source: OWASP Testing Guide, CRS test corpus. +' OR '1'='1 +' OR '1'='1' -- +' OR 1=1-- +1' ORDER BY 1-- +1' ORDER BY 2-- +1' ORDER BY 3-- +1 UNION SELECT null-- +1 UNION SELECT null,null-- +1 UNION ALL SELECT NULL,NULL,NULL-- +'; DROP TABLE users;-- +1; SELECT * FROM users-- +' AND 1=2 UNION SELECT username,password FROM users-- +admin'-- +' OR 'x'='x +1' AND SLEEP(5)-- +1' AND (SELECT * FROM (SELECT(SLEEP(5)))a)-- +'; EXEC xp_cmdshell('whoami')-- +1; WAITFOR DELAY '0:0:5'-- +' HAVING 1=1-- diff --git a/benchmarks/payloads/xss.txt b/benchmarks/payloads/xss.txt new file mode 100644 index 0000000..018fb7c --- /dev/null +++ b/benchmarks/payloads/xss.txt @@ -0,0 +1,14 @@ +# XSS payloads — should all be blocked by the WAF at PL1. + + +"> + + +javascript:alert(1) + +