Add Top 10 2025 and API Top 10 2023 Alert Tags#7230
Conversation
43b1b05 to
c94bd43
Compare
|
New Issues (158)Checkmarx found the following issues in this Pull Request
Use @Checkmarx to interact with Checkmarx PR Assistant. |
1c1945c to
196cb55
Compare
|
I'll add a full set of tags to httpsinfo in another PR. |
39707fb to
d7d5492
Compare
|
Validation script, I'm sure this could be cleaner but I largely just trusted Cursor with it. (Download the spreadsheet as csv) Validation Script (python)#!/usr/bin/env python3
"""
Compare OWASP mapping CSV rows to scan-rule Java (and selected scripts) per add-on.
CommandInjectionTimingScanRule does not list OWASP_2025/API tags inline; it uses
alertTags.putAll(CommandInjectionScanRule.ALERT_TAGS)
plus CommonAlertTag.TEST_TIMING. For CSV tag comparison, OWASP_2025_* and
API_2023_* are taken from CommandInjectionScanRule.java.
"""
from __future__ import annotations
import argparse
import csv
import re
import sys
from collections.abc import Callable
from dataclasses import dataclass
from pathlib import Path
# Class -> delegate ScanRule whose ALERT_TAGS are merged (same package).
MERGE_ALERT_TAGS_FROM: dict[str, str] = {
"CommandInjectionTimingScanRule": "CommandInjectionScanRule",
}
def _notes(row: list[str]) -> str:
return row[9].lower() if len(row) > 9 else ""
def _cls(row: list[str]) -> str:
c = row[1].strip()
if c.startswith("(") and c.endswith(")"):
return c[1:-1]
return c
def _csv_release_core(status: str) -> bool:
if "add-on" in status or "Alpha" in status or "Beta" in status:
return False
return status.startswith("Release")
@dataclass(frozen=True)
class AddonSpec:
"""addOns/{name}/..."""
name: str
# Path under repo to directory containing listed globs
java_dir: str
globs: tuple[str, ...]
# CSV data row (full row list) -> include
csv_row_ok: Callable[[list[str]], bool]
def row_active_release(row: list[str]) -> bool:
if len(row) < 5:
return False
return row[3] == "Active" and _csv_release_core(row[4])
def row_passive_release(row: list[str]) -> bool:
if len(row) < 5:
return False
return row[3] == "Passive" and _csv_release_core(row[4])
def row_active_beta(row: list[str]) -> bool:
if len(row) < 5:
return False
return row[3] == "Active" and "Beta" in row[4] and "add-on" not in row[4]
def row_passive_beta(row: list[str]) -> bool:
if len(row) < 5:
return False
return row[3] == "Passive" and "Beta" in row[4] and "add-on" not in row[4]
def row_passive_alpha(row: list[str]) -> bool:
if len(row) < 5:
return False
return row[3] == "Passive" and "Alpha" in row[4] and "add-on" not in row[4]
def row_soap(row: list[str]) -> bool:
return "soap add-on" in _notes(row)
def row_sqliplugin(row: list[str]) -> bool:
return "sqliplugin" in _notes(row)
def row_access_control(row: list[str]) -> bool:
return "accesscontrol" in _notes(row).replace(" ", "")
def row_retire(row: list[str]) -> bool:
if len(row) < 5:
return False
return "retire" in _notes(row) and "add-on" in row[4]
def row_graphql(row: list[str]) -> bool:
return _cls(row) in {
"GraphQlParser",
"GraphQlFingerprinter",
"GraphQlCycleDetector",
}
# java_dir is relative to repo root (use / for Path joining)
ADDONS: dict[str, AddonSpec] = {
"accessControl": AddonSpec(
"accessControl",
"addOns/accessControl/src/main/java/org/zaproxy/zap/extension/accessControl",
("AccessControlAlertsProcessor.java",),
row_access_control,
),
"ascanrules": AddonSpec(
"ascanrules",
"addOns/ascanrules/src/main/java/org/zaproxy/zap/extension/ascanrules",
("*ScanRule.java",),
row_active_release,
),
"ascanrulesBeta": AddonSpec(
"ascanrulesBeta",
"addOns/ascanrulesBeta/src/main/java/org/zaproxy/zap/extension/ascanrulesBeta",
("*ScanRule.java",),
row_active_beta,
),
"graphql": AddonSpec(
"graphql",
"addOns/graphql/src/main/java/org/zaproxy/addon/graphql",
(
"GraphQlParser.java",
"GraphQlFingerprinter.java",
"GraphQlCycleDetector.java",
),
row_graphql,
),
"pscanrules": AddonSpec(
"pscanrules",
"addOns/pscanrules/src/main/java/org/zaproxy/zap/extension/pscanrules",
("*ScanRule.java",),
row_passive_release,
),
"pscanrulesAlpha": AddonSpec(
"pscanrulesAlpha",
"addOns/pscanrulesAlpha/src/main/java/org/zaproxy/zap/extension/pscanrulesAlpha",
("*ScanRule.java", "Base64Disclosure.java"),
row_passive_alpha,
),
"pscanrulesBeta": AddonSpec(
"pscanrulesBeta",
"addOns/pscanrulesBeta/src/main/java/org/zaproxy/zap/extension/pscanrulesBeta",
("*ScanRule.java",),
row_passive_beta,
),
"retire": AddonSpec(
"retire",
"addOns/retire/src/main/java/org/zaproxy/addon/retire",
("*ScanRule.java",),
row_retire,
),
"soap": AddonSpec(
"soap",
"addOns/soap/src/main/java/org/zaproxy/zap/extension/soap",
("*ScanRule.java",),
row_soap,
),
"sqliplugin": AddonSpec(
"sqliplugin",
"addOns/sqliplugin/src/main/java/org/zaproxy/zap/extension/sqliplugin",
("*ScanRule.java",),
row_sqliplugin,
),
}
def load_csv_classes(csv_path: Path, spec: AddonSpec) -> dict[str, dict]:
raw = csv_path.read_text(encoding="utf-8").replace("\r\n", "\n").replace("\r", "\n")
lines = raw.split("\n")
start = next(i for i, ln in enumerate(lines) if ln.startswith("Buffer Overflow,"))
rows = list(csv.reader(lines[start:]))
out: dict[str, dict] = {}
for row in rows:
if len(row) < 9:
continue
name = row[0]
cls = _cls(row)
if not row[1].strip() or row[1].strip() == "Class Name":
continue
if not spec.csv_row_ok(row):
continue
if cls in ("ActiveScriptScanRule", "PassiveScriptScanRule"):
continue
out[cls] = {
"rule": name,
"2025": row[6].strip(),
"api": row[8].strip(),
"notes": row[9].strip() if len(row) > 9 else "",
}
return out
def collect_java_files(repo: Path, spec: AddonSpec) -> dict[str, Path]:
root = repo / spec.java_dir
if not root.is_dir():
raise FileNotFoundError(f"Java root not found: {root}")
files: dict[str, Path] = {}
for pattern in spec.globs:
for p in root.glob(pattern):
files[p.stem] = p
return files
def tags_in_java(text: str) -> tuple[set[str], set[str]]:
owasp = set(re.findall(r"CommonAlertTag\.(OWASP_2025_[A-Z0-9_]+)", text))
api = set(re.findall(r"CommonAlertTag\.(API_2023_[A-Z0-9_]+)", text))
return owasp, api
def effective_tags_for_rule(cls: str, files: dict[str, Path]) -> tuple[set[str], set[str]]:
paths: list[Path] = []
if cls in MERGE_ALERT_TAGS_FROM:
dep = MERGE_ALERT_TAGS_FROM[cls]
if dep not in files:
raise KeyError(f"delegate {dep}.java not found for {cls}")
paths.append(files[dep])
if cls not in files:
raise KeyError(cls)
paths.append(files[cls])
o: set[str] = set()
a: set[str] = set()
for p in paths:
t = p.read_text(encoding="utf-8")
o2, a2 = tags_in_java(t)
o |= o2
a |= a2
return o, a
def parse_csv_tags(cell: str) -> set[str]:
if not cell or cell in ("-", "—"):
return set()
parts = re.split(r",\s*", cell)
return {p.strip() for p in parts if p.strip() and not p.startswith("[")}
def main() -> None:
parser = argparse.ArgumentParser(
description="Compare OWASP mapping CSV to an add-on's Java alert tags.",
)
parser.add_argument("csv", type=Path, help="Path to mapping CSV")
parser.add_argument("repo", type=Path, help="zap-extensions repository root")
parser.add_argument(
"addon",
choices=sorted(ADDONS.keys()),
help="Add-on id (see choices)",
)
args = parser.parse_args()
spec = ADDONS[args.addon]
if not args.csv.is_file():
print(f"error: CSV not found: {args.csv}", file=sys.stderr)
sys.exit(2)
csv_map = load_csv_classes(args.csv, spec)
try:
files = collect_java_files(args.repo, spec)
except FileNotFoundError as e:
print(f"error: {e}", file=sys.stderr)
sys.exit(2)
for cls in sorted(set(files) - set(csv_map)):
print(f"EXTRA (code, not in CSV filter): {cls}")
for cls in sorted(set(csv_map) - set(files)):
print(f"MISSING (CSV row, no java): {cls}")
for cls, _path in sorted(files.items()):
if cls not in csv_map:
continue
try:
c25, c_api = effective_tags_for_rule(cls, files)
except KeyError as e:
print(f"ERROR {cls}: {e}")
continue
row = csv_map[cls]
e25 = parse_csv_tags(row["2025"])
e_api = parse_csv_tags(row["api"])
if c25 != e25:
src = f"+{MERGE_ALERT_TAGS_FROM[cls]}" if cls in MERGE_ALERT_TAGS_FROM else ""
print(f"MISMATCH 2025 {cls}{src}: csv={sorted(e25)} code={sorted(c25)}")
if c_api != e_api:
src = f"+{MERGE_ALERT_TAGS_FROM[cls]}" if cls in MERGE_ALERT_TAGS_FROM else ""
print(f"MISMATCH API {cls}{src}: csv={sorted(e_api)} code={sorted(c_api)}")
if __name__ == "__main__":
main()Usage example: |
|
Now has conflicts. |
|
Ok, I'll tackle that in a bit |
|
Deconflicted. |
|
Running the script I get some mismatches (I double checked with the original doc that they are correct). I don't know if these are the only ones (edit: double checked and these are the only mismatches). |
|
These were not included in the original doc, on purpose? |
|
DOM and ILS were just missed in the original. |
|
Thanks, I'll check the misses. I thought I had it down to zero but it was a hectic day and I may have lost track of something. I'll recheck them all. |
|
These ones are missing from the PR: |
|
All addressed, as discussed on slack. |
Signed-off-by: kingthorin <kingthorin@users.noreply.github.com>
|
Thank you! |




Overview
Update scan rules/alerts with tags for the OWASP Top 10 2025, and API Top 10 2023.
I know it's a lot. If you'd like I could break it into: Active, Passive, Other, just let me know.