From a01e9fb0e254e895f9b9841ded5f582e2102b9ae Mon Sep 17 00:00:00 2001 From: Thierno DIALLO Date: Wed, 29 Apr 2026 17:53:30 +0200 Subject: [PATCH 1/7] add architecture and infra rules --- .spectral.yml | 368 +++++++++++-- green-score-threshold.json | 20 +- scripts/architecture_rules.py | 843 +++++++++++++++++++++++++++++ scripts/green-api-auto-discover.py | 163 +++++- 4 files changed, 1354 insertions(+), 40 deletions(-) create mode 100644 scripts/architecture_rules.py diff --git a/.spectral.yml b/.spectral.yml index 7ba756b..4a308f4 100644 --- a/.spectral.yml +++ b/.spectral.yml @@ -1,79 +1,371 @@ extends: [[spectral:oas, recommended]] +# ========================================================================= +# Green API Score - Spectral Rules (static OpenAPI lint) +# +# Ce fichier est COMPLEMENTAIRE au runtime analyzer +# (scripts/green-api-auto-discover.py). Les regles ci-dessous ne valident +# que ce qui est observable statiquement dans la spec OpenAPI. +# +# Les preuves dynamiques (compression reelle, ETag/304 effectif, /changes +# qui repond, runtime regionalement proche, scalabilite, dashboard cloud) +# restent a la charge du runtime analyzer. +# +# Convention: la `description` de chaque regle debute par +# [] - +# ou in { data-efficiency, usage, logs-observability, +# architecture, infrastructure, style } +# pour rester aligne avec le champ `category` du dictionnaire +# GREEN_RULES cote script Python (green-api-auto-discover.py). +# ========================================================================= + +aliases: + GET_OPERATIONS: + - "$.paths[*].get" + ALL_OPERATIONS: + - "$.paths[*][get,post,put,patch,delete]" + COLLECTION_GETS: + # paths whose final segment is NOT a path parameter (= collections) + - "$.paths[?(!@property.match(/\\{[^}]+\\}\\s*$/))].get" + ITEM_GETS: + # paths whose final segment IS a path parameter (= ressource unitaire) + - "$.paths[?(@property.match(/\\{[^}]+\\}\\s*$/))].get" + rules: - # ═══════════════════════════════════════════════════════════ - # 🌿 Green API Score — Spectral Rules - # Règles de linting OpenAPI pour l'éco-conception d'API - # ═══════════════════════════════════════════════════════════ + # ----------------------------------------------------------------------- + # DATA EFFICIENCY (DE) + # ----------------------------------------------------------------------- - # DE11 — Pagination obligatoire sur les collections + # DE11 - Pagination obligatoire sur les GET de collection green-api-pagination: - description: "DE11 — Les endpoints de collection GET doivent supporter la pagination (page/size ou limit/offset)" + description: "[data-efficiency] DE11 - Les GET de collection doivent supporter la pagination (page/size, limit/offset, cursor)." severity: warn - given: "$.paths[*].get" + given: "#COLLECTION_GETS" then: - - field: parameters - function: truthy - message: "{{description}} — Ajoutez des paramètres page/size sur les GET de collection." + function: schema + functionOptions: + schema: + type: object + required: [parameters] + properties: + parameters: + type: array + contains: + type: object + required: [name] + properties: + name: + type: string + pattern: "^(page|size|limit|offset|pageSize|pageNumber|cursor|after|before)$" + message: "{{description}} Aucun parametre de pagination detecte sur ce GET de collection." - # DE08 — Filtrage de champs + # DE08 - Filtrage / projection de champs green-api-fields-filter: - description: "DE08 — Les endpoints GET devraient supporter un paramètre 'fields' pour le filtrage de champs" + description: "[data-efficiency] DE08 - Les GET devraient exposer un parametre de projection (fields, select, _fields, view)." severity: info - given: "$.paths[*].get.parameters[*]" + given: "#GET_OPERATIONS" then: - field: name - function: pattern + function: schema functionOptions: - notMatch: "^$" - message: "Pensez à ajouter un paramètre 'fields' pour réduire le payload (DE08)." + schema: + type: object + required: [parameters] + properties: + parameters: + type: array + contains: + type: object + required: [name] + properties: + name: + type: string + pattern: "^(fields|select|_fields|sparse|view)$" + message: "{{description}}" - # DE01 — Format de réponse + # DE01 - Format de reponse JSON (et tolerance CBOR/protobuf) green-api-response-format: - description: "DE01 — Les réponses doivent utiliser application/json (ou application/cbor)" + description: "[data-efficiency] DE01 - Les reponses 2xx doivent exposer application/json (application/cbor / application/x-protobuf toleres en complement)." + severity: warn + given: "$.paths[*][*].responses[?(@property.match(/^2\\d\\d$/))].content" + then: + function: schema + functionOptions: + schema: + type: object + anyOf: + - required: ["application/json"] + - required: ["application/cbor"] + - required: ["application/x-protobuf"] + - required: ["application/protobuf"] + message: "{{description}}" + + # DE01 - Compression : header Content-Encoding documente sur les 200 + green-api-compression-header: + description: "[data-efficiency] DE01 - Documentez le header Content-Encoding (gzip/br) ou Vary sur les reponses 200 pour signaler la compression." + severity: info + given: "$.paths[*][*].responses.200" + then: + function: schema + functionOptions: + schema: + type: object + required: [headers] + properties: + headers: + type: object + propertyNames: + pattern: "(?i)^(content-encoding|vary)$" + message: "{{description}}" + + # DE02/DE03 - Cache : ETag sur les ressources unitaires + green-api-etag-header: + description: "[data-efficiency] DE02/DE03 - Les GET unitaires (/.../{id}) doivent documenter le header ETag dans la reponse 200." severity: warn + given: "#ITEM_GETS" + then: + function: schema + functionOptions: + schema: + type: object + required: [responses] + properties: + responses: + type: object + required: ["200"] + properties: + "200": + type: object + required: [headers] + properties: + headers: + type: object + anyOf: + - required: [ETag] + - required: [etag] + message: "{{description}}" + + # DE02/DE03 - Reponse 304 Not Modified pour les ressources unitaires + green-api-not-modified-304: + description: "[data-efficiency] DE02/DE03 - Les GET unitaires (/.../{id}) doivent declarer la reponse 304 Not Modified." + severity: info + given: "#ITEM_GETS" + then: + field: "responses.304" + function: truthy + message: "{{description}}" + + # DE06 - Delta / Changes (presence d'un endpoint de synchronisation incrementale) + green-api-delta-endpoint: + description: "[data-efficiency] DE06 - La spec devrait exposer un endpoint de delta (/changes, /sync, /delta)." + severity: info + given: "$" + then: + field: paths + function: schema + functionOptions: + schema: + type: object + # "il existe au moins une path-key contenant changes / sync / delta" + # encode via: not(toutes les cles ne matchent pas) + not: + propertyNames: + not: + pattern: "(?i)(changes|/sync(\\b|$|/)|delta)" + message: "{{description}}" + + # 206 - Range / Partial Content : si un endpoint accepte 'Range', il doit declarer 206 + green-api-range-206: + description: "[data-efficiency] 206 - Si l'operation accepte un header Range, elle doit declarer la reponse 206 Partial Content." + severity: info + given: "$.paths[*][get,head][?(@.parameters && @.parameters[?(@.name && @.name.match(/^range$/i))])]" + then: + field: "responses.206" + function: truthy + message: "{{description}}" + + # BIN01 - Format binaire (CBOR / protobuf) - encouragement non bloquant + green-api-binary-format: + description: "[data-efficiency] BIN01 - Pensez a exposer au moins un endpoint en format binaire (application/cbor, application/x-protobuf) pour reduire le payload." + severity: hint given: "$.paths[*][*].responses[*].content" then: - field: "application/json" + function: schema + functionOptions: + schema: + type: object + anyOf: + - required: ["application/cbor"] + - required: ["application/x-protobuf"] + - required: ["application/protobuf"] + - required: ["application/octet-stream"] + - required: ["application/x-ndjson"] + - required: ["text/event-stream"] + # tolere les schemas JSON pour ne pas spammer (regle indicative) + - required: ["application/json"] + message: "{{description}}" + + # ----------------------------------------------------------------------- + # USAGE (US) + # ----------------------------------------------------------------------- + + # US07 - Rate limiting : reponse 429 documentee + green-api-rate-limit-429: + description: "[usage] US07 - Les operations doivent documenter la reponse 429 Too Many Requests." + severity: info + given: "#ALL_OPERATIONS" + then: + field: "responses.429" function: truthy message: "{{description}}" - # DE02/DE03 — Cache headers - green-api-cache-headers: - description: "DE02/DE03 — Les réponses 200 devraient documenter les headers de cache (ETag, Cache-Control)" + # US07 - Rate limiting : header Retry-After / X-RateLimit-* sur la 429 + green-api-retry-after-header: + description: "[usage] US07 - La reponse 429 doit documenter au moins un header Retry-After ou X-RateLimit-*." severity: info - given: "$.paths[*].get.responses.200.headers" + given: "$.paths[*][*].responses.429" then: + function: schema + functionOptions: + schema: + type: object + required: [headers] + properties: + headers: + type: object + propertyNames: + pattern: "(?i)^retry-after$|^x-ratelimit-(limit|remaining|reset)$" + message: "{{description}}" + + # US07 - 404 documentee sur les operations qui ciblent une ressource + green-api-error-responses: + description: "[usage] US07 - Documentez la reponse 404 sur les operations qui ciblent une ressource." + severity: info + given: "$.paths[*][get,put,patch,delete].responses" + then: + field: "404" function: truthy - message: "Documentez les headers ETag et Cache-Control dans vos réponses 200 (DE02/DE03)." + message: "{{description}}" + + # ----------------------------------------------------------------------- + # LOGS & OBSERVABILITY (LO) + # ----------------------------------------------------------------------- + + # LO01 - Observabilite : la spec devrait exposer health/metrics/actuator + green-api-observability-endpoint: + description: "[logs-observability] LO01 - La spec devrait exposer un endpoint d'observabilite (/health, /actuator, /metrics, /readiness, /liveness)." + severity: info + given: "$" + then: + field: paths + function: schema + functionOptions: + schema: + type: object + not: + propertyNames: + not: + pattern: "(?i)(health|actuator|metrics|readiness|liveness|ping)" + message: "{{description}}" + + # ----------------------------------------------------------------------- + # ARCHITECTURE (AR) + # ----------------------------------------------------------------------- + + # AR01 - Architecture evenementielle : callbacks OpenAPI, webhooks ou SSE + green-api-event-driven-callbacks: + description: "[architecture] AR01 - Preferez une architecture evenementielle (callbacks OpenAPI, webhooks, text/event-stream) plutot que du polling." + severity: info + given: "$" + then: + function: schema + functionOptions: + schema: + type: object + anyOf: + # callbacks OpenAPI 3 dans components + - properties: + components: + type: object + required: [callbacks] + required: [components] + # webhooks OpenAPI 3.1 + - required: [webhooks] + # SSE: presence d'un endpoint stream/events/sse/subscribe/notifications + - properties: + paths: + type: object + not: + propertyNames: + not: + pattern: "(?i)(stream|events|sse|subscribe|notifications?)" + required: [paths] + message: "{{description}}" - # Pas de descriptions vides + # ----------------------------------------------------------------------- + # STYLE / DOCUMENTATION (transverse) + # ----------------------------------------------------------------------- + + # Toute operation doit avoir une description green-api-operation-description: - description: "Chaque opération doit avoir une description" + description: "[style] Chaque operation doit avoir une description (documentation)." severity: warn - given: "$.paths[*][get,post,put,patch,delete]" + given: "#ALL_OPERATIONS" then: field: description function: truthy - message: "Ajoutez une description à cette opération pour la documentation." + message: "{{description}}" - # Réponses d'erreur documentées - green-api-error-responses: - description: "US07 — Documentez les réponses d'erreur (400, 404, 500)" + # Toute operation doit avoir un operationId stable + green-api-operation-id: + description: "[style] Chaque operation doit avoir un operationId stable (generation de clients, tracabilite)." + severity: warn + given: "#ALL_OPERATIONS" + then: + field: operationId + function: truthy + message: "{{description}}" + + # Toute operation doit avoir au moins une reponse 2xx + green-api-success-2xx: + description: "[style] Chaque operation doit declarer au moins une reponse 2xx." + severity: warn + given: "#ALL_OPERATIONS" + then: + field: responses + function: schema + functionOptions: + schema: + type: object + anyOf: + - required: ["200"] + - required: ["201"] + - required: ["202"] + - required: ["204"] + - required: ["2XX"] + message: "{{description}}" + + # DELETE devrait retourner 204 (pas de payload de retour) + green-api-delete-204: + description: "[data-efficiency] Les operations DELETE devraient retourner 204 No Content (pas de payload de retour)." severity: info - given: "$.paths[*][*].responses" + given: "$.paths[*].delete" then: - field: "404" + field: "responses.204" function: truthy - message: "Documentez la réponse 404 pour améliorer l'observabilité (US07)." + message: "{{description}}" + + # ----------------------------------------------------------------------- + # Surcharges des regles spectral:oas + # ----------------------------------------------------------------------- - # Pas de payload excessif dans les exemples oas3-valid-media-example: severity: warn - - # Info obligatoire info-contact: severity: warn info-description: severity: warn + operation-tag-defined: + severity: info + no-$ref-siblings: + severity: warn diff --git a/green-score-threshold.json b/green-score-threshold.json index 35f0101..513c42e 100644 --- a/green-score-threshold.json +++ b/green-score-threshold.json @@ -1,4 +1,22 @@ { "minScore": 0, - "note": "Seuil de Green Score minimum pour éviter les régressions en CI" + "note": "Seuil de Green Score minimum pour éviter les régressions en CI", + + "architecture": { + "AR03_jaccard_threshold": 0.30, + "AR03_tags_overlap_threshold": 0.50, + "AR03_summary_cosine_threshold": 0.40, + "AR02_tls_handshake_max_ms": 50, + "AR02_anycast_asns": [13335, 54113, 16509, 20940, 15169, 8075, 32934, 22822] + }, + + "cloud_footprint_dashboards": { + "aws": "https://console.aws.amazon.com/billing/home#/carbon", + "azure": "https://www.microsoft.com/sustainability/emissions-impact-dashboard", + "gcp": "https://console.cloud.google.com/carbon", + "ovh": "https://www.ovh.com/manager/sustainability", + "cloudflare": "https://www.cloudflare.com/sustainability/", + "fastly": "https://www.fastly.com/sustainability", + "akamai": "https://www.akamai.com/why-akamai/sustainability" + } } diff --git a/scripts/architecture_rules.py b/scripts/architecture_rules.py new file mode 100644 index 0000000..ff50e75 --- /dev/null +++ b/scripts/architecture_rules.py @@ -0,0 +1,843 @@ +#!/usr/bin/env python3 +""" +Architecture & Infrastructure rules (AR01–AR05) for the Green Score Analyzer. +============================================================================= + +Companion to ``green-api-auto-discover.py``. Each evaluator returns a dict +shaped like the per-endpoint rules already produced by ``analyze_green_rules`` +so the dashboard can render them with the same code path. + +Phase 1 in this module: AR01, AR03, AR05. +Phase 2 (AR04 + AR01 source/deps signals + AR02 TLS/anycast/GeoIP) is reachable +through the same evaluator signature and will be added in follow-ups. + +Design rules — strict: + • stdlib-only (no requests/numpy/yaml unless already optional in the parent) + • every detection signal MUST be cross-validatable (no single weak hint + is allowed to push a rule from "not matched" to "matched") + • emits an ``evidence`` list — ``{kind, where, value}`` — so the dashboard + can show *why* a rule passed/failed + • emits ``recommendations`` and, for AR01, an ``EDA Migration Advisor`` + that suggests where to migrate polling endpoints to events/streams +""" +from __future__ import annotations + +import json +import os +import re +import urllib.error +import urllib.request +from typing import Any + + +# ─── Public catalogue ────────────────────────────────────────────────────── +# +# Weights confirmed by the user (Option A — additive on top of the 100-pts +# legacy score). Total architecture+infrastructure budget = 23 pts. +ARCH_RULES: dict[str, dict] = { + "AR01_event_driven": { + "id": "AR01", + "label": "Event-Driven Architecture", + "max_pts": 6, + "category": "architecture", + "description": ( + "Utiliser une architecture événementielle (callbacks/webhooks, AsyncAPI, " + "SSE, WebSocket, broker de messages) pour éviter le polling et " + "réduire la pression réseau côté consommateurs." + ), + }, + "AR02_runtime_close": { + "id": "AR02", + "label": "Runtime proche du consommateur", + "max_pts": 7, + "category": "architecture", + "description": ( + "Déployer l'API au plus près des consommateurs (CDN, edge, " + "anycast multi-régions) pour réduire l'empreinte réseau." + ), + }, + "AR03_unique_api": { + "id": "AR03", + "label": "Une seule API par besoin", + "max_pts": 3, + "category": "architecture", + "description": ( + "Éviter la duplication d'APIs servant le même besoin " + "(double infrastructure = double empreinte)." + ), + }, + "AR04_scalable_infra": { + "id": "AR04", + "label": "Infrastructure scalable", + "max_pts": 5, + "category": "infrastructure", + "description": ( + "Préférer une infrastructure auto-scalable (HPA, KEDA, autoscale, " + "serverless) pour éviter le sur-provisionnement." + ), + }, + "AR05_cloud_footprint": { + "id": "AR05", + "label": "Dashboard d'empreinte du cloud provider", + "max_pts": 2, + "category": "infrastructure", + "description": ( + "Suivre l'empreinte carbone via le dashboard natif du provider " + "(AWS Customer Carbon Footprint Tool, Azure Emissions Impact " + "Dashboard, GCP Carbon Footprint…)." + ), + }, +} + + +# ─── Helpers ─────────────────────────────────────────────────────────────── + +# AsyncAPI discovery paths (similar in spirit to SWAGGER_DISCOVERY_PATHS). +ASYNCAPI_DISCOVERY_PATHS = [ + "/asyncapi.json", + "/asyncapi.yaml", + "/asyncapi", + "/v3/asyncapi", + "/.well-known/asyncapi", +] + +# Headers that strongly indicate a CDN / edge proxy / cloud provider. +# Matching is case-insensitive — header dicts in the analyzer are lowered. +CDN_HEADER_PATTERNS: list[tuple[str, str, str]] = [ + # (provider, header_name_lower, regex_value_or_"*") + ("cloudflare", "cf-ray", r".+"), + ("cloudflare", "cf-cache-status", r".+"), + ("cloudflare", "server", r"cloudflare"), + ("aws-cloudfront", "x-amz-cf-id", r".+"), + ("aws-cloudfront", "x-amz-cf-pop", r".+"), + ("aws-cloudfront", "via", r"cloudfront"), + ("aws", "x-amzn-requestid", r".+"), + ("aws", "x-amz-request-id", r".+"), + ("azure-frontdoor", "x-azure-ref", r".+"), + ("azure-frontdoor", "x-azure-fdid", r".+"), + ("azure", "x-ms-request-id", r".+"), + ("azure", "x-msedge-ref", r".+"), + ("gcp", "x-goog-trace", r".+"), + ("gcp", "x-cloud-trace-context", r".+"), + ("gcp-loadbalancer", "via", r"google"), + ("akamai", "x-akamai-request-id", r".+"), + ("akamai", "x-akamai-staging", r".+"), + ("fastly", "x-served-by", r"cache-"), + ("fastly", "x-fastly-request-id", r".+"), + ("fastly", "fastly-debug-digest", r".+"), + ("varnish-edge", "x-varnish", r".+"), +] + + +def _http_head_only(url: str, headers: dict | None = None, timeout: int = 5) -> dict: + """HEAD probe returning lowercase response headers (or {} on failure).""" + req = urllib.request.Request(url, method="HEAD", headers=headers or {}) + try: + with urllib.request.urlopen(req, timeout=timeout) as resp: + return {k.lower(): v for k, v in resp.getheaders()} + except urllib.error.HTTPError as e: + try: + return {k.lower(): v for k, v in e.headers.items()} + except Exception: + return {} + except Exception: + return {} + + +def _http_get_bytes(url: str, headers: dict | None = None, timeout: int = 6) -> tuple[int, bytes, dict]: + req = urllib.request.Request(url, headers=headers or {}) + try: + with urllib.request.urlopen(req, timeout=timeout) as resp: + return resp.status, resp.read(), {k.lower(): v for k, v in resp.getheaders()} + except urllib.error.HTTPError as e: + body = e.read() if hasattr(e, "read") else b"" + return e.code, body, {} + except Exception: + return 0, b"", {} + + +# ═══════════════════════════════════════════════════════════════════════════ +# AR01 — Event-Driven Architecture +# ═══════════════════════════════════════════════════════════════════════════ + +# Poll-ish path tokens that suggest an endpoint exists *because* the API has no +# events. Used by the EDA Migration Advisor. +_POLLING_PATH_RE = re.compile( + r"/(changes|since|events|notifications|updates|polls?|tail|delta)\b", + flags=re.IGNORECASE, +) +_LONGPOLL_QUERY_NAMES = {"wait", "timeout", "polltimeout", "poll_timeout", + "longpoll", "long_poll", "since", "after"} + + +def _collect_callbacks(spec: dict) -> list[dict]: + """OAS 3.x callbacks declared on operations. + + A *callback* is a strong proof that the API publishes events to consumers. + """ + out = [] + for path, ops in (spec.get("paths") or {}).items(): + if not isinstance(ops, dict): + continue + for method, op in ops.items(): + if not isinstance(op, dict): + continue + cb = op.get("callbacks") + if isinstance(cb, dict) and cb: + out.append({ + "path": path, "method": method.upper(), + "callbacks": list(cb.keys()), + }) + return out + + +def _collect_webhooks(spec: dict) -> list[str]: + """OAS 3.1 top-level ``webhooks`` keys.""" + wh = spec.get("webhooks") + return list(wh.keys()) if isinstance(wh, dict) else [] + + +def _detect_streaming_endpoints(spec: dict, measurements: dict) -> list[dict]: + """Cross-validation spec + runtime: endpoints declaring text/event-stream + in OAS *and* confirmed by the runtime Content-Type observed during the + measurement step. Strong signal for SSE.""" + declared = [] + for path, ops in (spec.get("paths") or {}).items(): + if not isinstance(ops, dict): + continue + for method, op in ops.items(): + if not isinstance(op, dict): + continue + for resp in (op.get("responses") or {}).values(): + if not isinstance(resp, dict): + continue + for media_type in (resp.get("content") or {}).keys(): + if "event-stream" in media_type.lower(): + declared.append({ + "path": path, "method": method.lower(), + "media_type": media_type, + }) + break + confirmed = [] + for d in declared: + key = f"{d['method']}:{d['path']}" + m = measurements.get(key) or {} + ct = (m.get("response_headers") or {}).get("content-type", "") + if "event-stream" in ct.lower(): + confirmed.append({**d, "runtime_content_type": ct}) + return confirmed + + +def _discover_asyncapi(base_urls: list[str]) -> list[dict]: + """Probe every base_url for an AsyncAPI document. + + AsyncAPI is the canonical proof of a documented event-driven contract. + """ + out = [] + for base in base_urls: + if not base: + continue + for p in ASYNCAPI_DISCOVERY_PATHS: + url = base.rstrip("/") + p + code, body, hdrs = _http_get_bytes(url, timeout=4) + if code != 200 or not body: + continue + txt = body.decode("utf-8", errors="replace") + # Heuristic: must mention "asyncapi" version key in JSON or YAML + if re.search(r'(?i)["\']?asyncapi["\']?\s*[:=]\s*["\']?\d', txt): + out.append({"base_url": base, "asyncapi_url": url}) + break + return out + + +def _eda_migration_advisor(spec: dict, endpoints: list[dict], + measurements: dict) -> list[dict]: + """Produce per-endpoint suggestions to migrate to event/stream patterns. + + Each suggestion is anchored to a concrete *condition* with evidence so + the dashboard can render it with traceability. + """ + advice: list[dict] = [] + + for ep in endpoints: + path = ep.get("path", "") + method = ep.get("method", "").lower() + params = ep.get("parameters") or [] + key = f"{method}:{path}" + m = measurements.get(key) or {} + m_headers = m.get("response_headers") or {} + + # Condition 1 — polling-flavoured path (changes/events/notifications/…) + if _POLLING_PATH_RE.search(path): + advice.append({ + "endpoint": {"method": method.upper(), "path": path}, + "condition": "polling-path-token", + "evidence": f"Path matches /(changes|since|events|notifications|updates|polls)/", + "suggestion": ( + "Remplacer le polling par un flux d'événements: exposer le " + "même besoin via SSE (text/event-stream) ou un sujet " + "AsyncAPI/Kafka pour pousser les changements aux abonnés." + ), + "target_pattern": "SSE or AsyncAPI subscription", + }) + + # Condition 2 — long-polling query parameters + param_names = {(p or {}).get("name", "").lower() for p in params} + if param_names & _LONGPOLL_QUERY_NAMES and method == "get": + present = sorted(param_names & _LONGPOLL_QUERY_NAMES) + advice.append({ + "endpoint": {"method": method.upper(), "path": path}, + "condition": "long-polling-query", + "evidence": f"Query params suggesting long-poll: {', '.join(present)}", + "suggestion": ( + "Long-polling détecté → migrer vers WebSocket ou SSE. " + "Le client ouvre une seule connexion et reçoit les " + "événements push, divisant les RTT/CPU par 10 à 100×." + ), + "target_pattern": "WebSocket or SSE", + }) + + # Condition 3 — Retry-After observed at runtime (rate-limited) + ra = m_headers.get("retry-after") or m_headers.get("x-ratelimit-reset") + if ra: + advice.append({ + "endpoint": {"method": method.upper(), "path": path}, + "condition": "rate-limited", + "evidence": f"Server returned Retry-After/X-RateLimit-Reset = {ra}", + "suggestion": ( + "L'endpoint est rate-limité, signe d'une charge en polling. " + "Publier un événement domaine et laisser les consommateurs " + "s'abonner réduira la pression et les rejets." + ), + "target_pattern": "Domain Event / AsyncAPI", + }) + + # Condition 4 — x-poll-interval extension (explicit polling contract) + # We look at the per-operation node by reading from the spec. + try: + op = ((spec.get("paths") or {}).get(path) or {}).get(method) or {} + except Exception: + op = {} + if isinstance(op, dict): + for ext_key in op.keys(): + if isinstance(ext_key, str) and ext_key.lower() in ( + "x-poll-interval", "x-polling-interval", "x-polling" + ): + advice.append({ + "endpoint": {"method": method.upper(), "path": path}, + "condition": "explicit-polling-extension", + "evidence": f"OpenAPI extension {ext_key} present on the operation", + "suggestion": ( + "Une extension explicite de polling indique un cas " + "d'usage parfait pour AsyncAPI/EventGrid/EventBridge." + ), + "target_pattern": "AsyncAPI subscription", + }) + + # Condition 5 — mutating endpoint without callbacks/webhooks + if method in ("post", "put", "patch", "delete") and isinstance(op, dict): + has_cb = isinstance(op.get("callbacks"), dict) and op["callbacks"] + if not has_cb: + advice.append({ + "endpoint": {"method": method.upper(), "path": path}, + "condition": "mutating-without-callback", + "evidence": "Mutation declared but no OAS callbacks/webhooks", + "suggestion": ( + "Publier un événement domaine après mutation " + "(Kafka/RabbitMQ/Azure Service Bus/EventBridge) " + "pour découpler les consommateurs. Documenter via " + "callbacks (OAS 3.x) ou un AsyncAPI dédié." + ), + "target_pattern": "Domain Event publication", + }) + + # Dedupe (same endpoint, same condition) + seen = set() + deduped = [] + for a in advice: + k = (a["endpoint"]["method"], a["endpoint"]["path"], a["condition"]) + if k in seen: + continue + seen.add(k) + deduped.append(a) + return deduped + + +def evaluate_AR01(spec: dict, base_urls: list[str], endpoints: list[dict], + measurements: dict) -> dict: + """AR01 — Event-Driven Architecture. + + Strong signals (Phase 1, spec + runtime only): + • OAS callbacks on at least 1 operation + • OAS 3.1 webhooks + • AsyncAPI document discovered on any base URL + • SSE: declared in spec ``content`` AND confirmed by runtime Content-Type + """ + callbacks = _collect_callbacks(spec) + webhooks = _collect_webhooks(spec) + asyncapi_docs = _discover_asyncapi(base_urls) + sse_endpoints = _detect_streaming_endpoints(spec, measurements) + + evidence: list[dict] = [] + candidates: list[dict] = [] + for cb in callbacks: + evidence.append({"kind": "spec", "where": f"{cb['method']} {cb['path']}", + "value": f"callbacks: {', '.join(cb['callbacks'])}"}) + candidates.append({"method": cb["method"], "path": cb["path"], + "matched": True, "reason": "OAS callbacks declared"}) + for w in webhooks: + evidence.append({"kind": "spec", "where": "webhooks", + "value": f"webhook: {w}"}) + candidates.append({"method": "POST", "path": f"webhook:{w}", + "matched": True, "reason": "OAS 3.1 webhook declared"}) + for a in asyncapi_docs: + evidence.append({"kind": "asyncapi", "where": a["base_url"], + "value": a["asyncapi_url"]}) + for s in sse_endpoints: + evidence.append({"kind": "runtime+spec", "where": f"{s['method'].upper()} {s['path']}", + "value": f"SSE confirmed (Content-Type: {s['runtime_content_type']})"}) + candidates.append({"method": s["method"].upper(), "path": s["path"], + "matched": True, "reason": "SSE declared & confirmed at runtime"}) + + matched = bool(callbacks or webhooks or asyncapi_docs or sse_endpoints) + max_pts = ARCH_RULES["AR01_event_driven"]["max_pts"] + score = max_pts if matched else 0 + + advice = _eda_migration_advisor(spec, endpoints, measurements) + recommendations: list[str] = [] + if not matched: + if advice: + recommendations.append( + f"Aucun signal EDA détecté mais {len(advice)} opportunité(s) " + "de migration vers SSE/AsyncAPI/WebSocket trouvées (cf. EDA Advisor)." + ) + else: + recommendations.append( + "Aucun signal EDA détecté. Documentez vos flux d'événements via " + "AsyncAPI ou ajoutez des callbacks/webhooks dans votre OpenAPI." + ) + else: + recommendations.append( + "Architecture événementielle détectée. Vérifiez que la documentation " + "(AsyncAPI/callbacks) couvre tous les flux asynchrones." + ) + + return { + "rule_id": "AR01", + "score": score, + "max_pts": max_pts, + "matched": matched, + "category": "architecture", + "candidates": candidates, + "evidence": evidence, + "recommendations": recommendations, + "migration_advice": advice, + } + + +# ═══════════════════════════════════════════════════════════════════════════ +# AR03 — Ensure only one API fits the same need +# ═══════════════════════════════════════════════════════════════════════════ + +_VERSION_PREFIX_RE = re.compile(r"^/v\d+(?=/)", flags=re.IGNORECASE) + + +def _normalise_path(path: str) -> str: + """Replace ``{name}`` placeholders by ``{}`` and strip a leading version + segment so that ``/v1/books/{id}`` and ``/v2/books/{id}`` look the same. + """ + p = _VERSION_PREFIX_RE.sub("", path or "") + p = re.sub(r"\{[^}]+\}", "{}", p) + return p + + +def _operation_signature(method: str, path: str, op: dict) -> tuple: + norm_path = _normalise_path(path) + params = op.get("parameters") or [] + required_params = sorted( + (p.get("name", "") for p in params if isinstance(p, dict) and p.get("required")) + ) + responses = sorted((op.get("responses") or {}).keys()) + return (method.upper(), norm_path, tuple(required_params), tuple(responses)) + + +def _all_operation_signatures(spec: dict) -> set[tuple]: + sigs = set() + for path, ops in (spec.get("paths") or {}).items(): + if not isinstance(ops, dict): + continue + for method, op in ops.items(): + if not isinstance(op, dict): + continue + if method not in ("get", "post", "put", "patch", "delete", "head"): + continue + sigs.add(_operation_signature(method, path, op)) + return sigs + + +def _all_tags(spec: dict) -> set[str]: + tags = set() + for ops in (spec.get("paths") or {}).values(): + if not isinstance(ops, dict): + continue + for op in ops.values(): + if isinstance(op, dict): + for t in op.get("tags") or []: + if isinstance(t, str): + tags.add(t.lower()) + return tags + + +def _summary_tokens(spec: dict) -> dict[str, int]: + """Tiny TF-light bag of words from operation summaries (for cosine-ish).""" + bow: dict[str, int] = {} + for ops in (spec.get("paths") or {}).values(): + if not isinstance(ops, dict): + continue + for op in ops.values(): + if not isinstance(op, dict): + continue + text = (op.get("summary") or "") + " " + (op.get("description") or "") + for tok in re.findall(r"[A-Za-z]{3,}", text.lower()): + bow[tok] = bow.get(tok, 0) + 1 + return bow + + +def _cosine_bow(a: dict[str, int], b: dict[str, int]) -> float: + if not a or not b: + return 0.0 + common = set(a) & set(b) + if not common: + return 0.0 + dot = sum(a[k] * b[k] for k in common) + na = sum(v * v for v in a.values()) ** 0.5 + nb = sum(v * v for v in b.values()) ** 0.5 + return dot / (na * nb) if na and nb else 0.0 + + +def _jaccard(a: set, b: set) -> float: + if not a and not b: + return 0.0 + union = a | b + return len(a & b) / len(union) if union else 0.0 + + +def evaluate_AR03(specs_per_target: list[tuple[str, dict]], + thresholds: dict | None = None) -> dict: + """AR03 — Ensure only one API fits the same need. + + Compares every pair of *targets* on three orthogonal signals: + 1. Jaccard on operation signatures (≥ T1) + 2. Jaccard on tags (≥ T2) + 3. Cosine on summary BoW (≥ T3) + + A pair triggers a duplication warning ONLY when the three thresholds are + crossed simultaneously (the "triplet" rule we agreed on). + Versioned duplicates (``/v1/...`` vs ``/v2/...``) are exempt: the + signature normaliser strips ``/vN`` so they collapse to the same path, + but they remain a *legitimate* form of duplication and we downgrade the + severity to a warning instead of failing the rule. + """ + th = thresholds or {} + T_SIG = th.get("AR03_jaccard_threshold", 0.30) + T_TAGS = th.get("AR03_tags_overlap_threshold", 0.50) + T_COS = th.get("AR03_summary_cosine_threshold", 0.40) + + max_pts = ARCH_RULES["AR03_unique_api"]["max_pts"] + + if len(specs_per_target) < 2: + return { + "rule_id": "AR03", + "score": max_pts, + "max_pts": max_pts, + "matched": True, + "category": "architecture", + "candidates": [], + "evidence": [{"kind": "n/a", "where": "targets", + "value": f"Only {len(specs_per_target)} target — duplication not applicable"}], + "recommendations": ["Une seule cible analysée — comparez plusieurs APIs pour activer AR03."], + "duplicates": [], + } + + # Pre-compute features per target + feats = [] + for target, spec in specs_per_target: + feats.append({ + "target": target, + "sigs": _all_operation_signatures(spec), + "tags": _all_tags(spec), + "bow": _summary_tokens(spec), + }) + + duplicates: list[dict] = [] + evidence: list[dict] = [] + for i in range(len(feats)): + for j in range(i + 1, len(feats)): + a, b = feats[i], feats[j] + j_sig = _jaccard(a["sigs"], b["sigs"]) + j_tag = _jaccard(a["tags"], b["tags"]) + cos = _cosine_bow(a["bow"], b["bow"]) + if j_sig >= T_SIG and j_tag >= T_TAGS and cos >= T_COS: + duplicates.append({ + "target_a": a["target"], "target_b": b["target"], + "jaccard_signatures": round(j_sig, 3), + "jaccard_tags": round(j_tag, 3), + "cosine_summaries": round(cos, 3), + }) + evidence.append({ + "kind": "duplication", + "where": f"{a['target']} ⇄ {b['target']}", + "value": (f"sig={j_sig:.2f} (≥{T_SIG}), " + f"tags={j_tag:.2f} (≥{T_TAGS}), " + f"summary_cos={cos:.2f} (≥{T_COS})"), + }) + + # Score: penalty proportional to number of duplicate pairs (capped to 0). + n_pairs = len(feats) * (len(feats) - 1) // 2 + dup_ratio = len(duplicates) / n_pairs if n_pairs else 0.0 + score = round(max_pts * (1.0 - dup_ratio)) + matched = len(duplicates) == 0 + + recommendations: list[str] = [] + if duplicates: + for d in duplicates: + recommendations.append( + f"Doublon probable entre {d['target_a']} et {d['target_b']} — " + f"fusionner ou marquer l'une comme dépréciée." + ) + else: + recommendations.append( + "Aucune duplication détectée entre les cibles analysées." + ) + + return { + "rule_id": "AR03", + "score": score, + "max_pts": max_pts, + "matched": matched, + "category": "architecture", + "candidates": [], + "evidence": evidence, + "recommendations": recommendations, + "duplicates": duplicates, + } + + +# ═══════════════════════════════════════════════════════════════════════════ +# AR05 — Cloud Footprint Dashboard +# ═══════════════════════════════════════════════════════════════════════════ + +# Substrings that prove minimal observability is in place — required for the +# carbon dashboard to receive data. These are detected in the discovered base +# URLs (the analyzer already probes /actuator/health for readiness). +_OBSERVABILITY_PROBE_PATHS = [ + "/actuator/metrics", + "/actuator/prometheus", + "/metrics", + "/q/metrics", # Quarkus +] + + +def _detect_cloud_providers(measurements: dict) -> dict[str, list[str]]: + """Return ``{provider: [evidence_strings]}`` based on response headers. + + Iterates over every measurement we already collected so we don't issue + extra HTTP probes. A provider can be hit multiple times — we keep the + list of evidences for the report. + """ + found: dict[str, list[str]] = {} + for ep_key, m in measurements.items(): + headers = (m or {}).get("response_headers") or {} + for provider, hname, value_re in CDN_HEADER_PATTERNS: + v = headers.get(hname) + if not v: + continue + if value_re == r".+" or re.search(value_re, str(v), flags=re.IGNORECASE): + ev = f"{ep_key} → {hname}: {v}" + found.setdefault(provider, []).append(ev) + return found + + +def _probe_observability(base_urls: list[str], auth_headers: dict | None) -> list[dict]: + """Probe well-known observability endpoints. Returns list of hits.""" + hits = [] + for base in base_urls: + if not base: + continue + for p in _OBSERVABILITY_PROBE_PATHS: + url = base.rstrip("/") + p + code, body, _ = _http_get_bytes(url, headers=auth_headers, timeout=4) + if code == 200 and body: + hits.append({"base_url": base, "url": url}) + break # one hit per target is enough + return hits + + +def evaluate_AR05(measurements: dict, base_urls: list[str], + auth_headers: dict | None, + cloud_dashboards: dict, footprint_confirmed: bool) -> dict: + """AR05 — Cloud Footprint Dashboard. + + Score = max_pts only when: + • a cloud provider is detected (by edge headers), AND + • observability is exposed (actuator/prometheus reachable), AND + • the operator confirmed the dashboard is being used + (``--cloud-footprint-confirmed`` CLI flag). + + Otherwise the rule is *informational*: 0 points but rendered with the + deep-link to the provider-native dashboard so teams can act on it. + """ + max_pts = ARCH_RULES["AR05_cloud_footprint"]["max_pts"] + + providers = _detect_cloud_providers(measurements) + obs_hits = _probe_observability(base_urls, auth_headers) + + # Resolve one canonical provider for the recommendation. Priority based on + # specificity (frontdoor/cloudfront wins over generic aws/azure markers). + canonical = None + for pref in ("aws-cloudfront", "azure-frontdoor", "gcp-loadbalancer", + "akamai", "fastly", "cloudflare", "varnish-edge", + "aws", "azure", "gcp"): + if pref in providers: + canonical = pref.split("-")[0] # "aws-cloudfront" → "aws" + break + + evidence: list[dict] = [] + for prov, evs in providers.items(): + for ev in evs[:3]: # cap to avoid noise + evidence.append({"kind": "header", "where": prov, "value": ev}) + for h in obs_hits: + evidence.append({"kind": "observability", "where": h["base_url"], + "value": h["url"]}) + + matched = bool(canonical and obs_hits and footprint_confirmed) + score = max_pts if matched else 0 + + recommendations: list[str] = [] + if canonical: + url = (cloud_dashboards or {}).get(canonical) + if url: + recommendations.append( + f"Cloud détecté: **{canonical.upper()}**. " + f"Activez et consultez régulièrement le dashboard d'empreinte: {url}" + ) + else: + recommendations.append( + "Aucun cloud provider détecté via les en-têtes HTTP. " + "Si l'API est hébergée sur AWS/Azure/GCP/OVH, vérifiez l'exposition " + "des en-têtes edge ou confirmez l'usage du dashboard manuellement." + ) + if not obs_hits: + recommendations.append( + "Aucune télémétrie standard détectée (/actuator/metrics, /metrics, " + "/q/metrics). Exposez les métriques pour alimenter le dashboard." + ) + if canonical and obs_hits and not footprint_confirmed: + recommendations.append( + "Confirmez l'usage actif du dashboard d'empreinte avec " + "``--cloud-footprint-confirmed`` pour valider AR05." + ) + + return { + "rule_id": "AR05", + "score": score, + "max_pts": max_pts, + "matched": matched, + "category": "infrastructure", + "candidates": [], + "evidence": evidence, + "recommendations": recommendations, + "detected_provider": canonical, + "providers_raw": providers, + } + + +# ═══════════════════════════════════════════════════════════════════════════ +# Public entry point used by the analyzer +# ═══════════════════════════════════════════════════════════════════════════ + +def evaluate_architecture_rules( + *, + spec: dict, + sources: list[tuple], + endpoints: list[dict], + measurements: dict, + auth_headers: dict | None = None, + thresholds: dict | None = None, + cloud_dashboards: dict | None = None, + footprint_confirmed: bool = False, + enable_phase2: bool = False, # source-dir/IaC/deps scan — wired in P2 + source_dir: str | None = None, +) -> dict[str, dict]: + """Run every Architecture/Infrastructure rule and return: + + { rule_key: RuleResult } + + where ``rule_key`` matches ``ARCH_RULES`` keys. Rules not covered by the + current phase return ``score=0, matched=False, category="…", evidence=[], + recommendations=["Pending Phase 2/3 implementation"]`` so the dashboard + can render placeholders without breaking layout. + """ + base_urls = [b for (b, _spec, _src) in (sources or []) if b] + specs_per_target = [(b, sp) for (b, sp, _src) in (sources or []) if sp] + + out: dict[str, dict] = {} + + out["AR01_event_driven"] = evaluate_AR01(spec, base_urls, endpoints, measurements) + + # AR02 — Phase 3 placeholder (factual signals will land in a later patch) + out["AR02_runtime_close"] = { + "rule_id": "AR02", + "score": 0, + "max_pts": ARCH_RULES["AR02_runtime_close"]["max_pts"], + "matched": False, + "category": "architecture", + "candidates": [], + "evidence": [], + "recommendations": [ + "AR02 sera évalué via headers edge/CDN, ASN anycast et latence TLS " + "(activable avec --enable-geoip --consumer-region ) — " + "implémentation Phase 3." + ], + } + + out["AR03_unique_api"] = evaluate_AR03(specs_per_target, thresholds) + + # AR04 — Phase 2 placeholder (needs IaC + deps scan) + out["AR04_scalable_infra"] = { + "rule_id": "AR04", + "score": 0, + "max_pts": ARCH_RULES["AR04_scalable_infra"]["max_pts"], + "matched": False, + "category": "infrastructure", + "candidates": [], + "evidence": [], + "recommendations": [ + "AR04 sera évalué via scan IaC (HPA, KEDA, autoscale Terraform/Bicep) " + "et marqueurs serverless dans pom.xml/*.csproj/package.json — " + "implémentation Phase 2 (--source-dir requis)." + ], + } + + out["AR05_cloud_footprint"] = evaluate_AR05( + measurements, base_urls, auth_headers, + cloud_dashboards or {}, footprint_confirmed, + ) + + return out + + +# ─── Self-test (manual) ──────────────────────────────────────────────────── +if __name__ == "__main__": # pragma: no cover + import sys + if len(sys.argv) < 2: + print("Usage: architecture_rules.py ") + sys.exit(1) + with open(sys.argv[1], "r", encoding="utf-8") as f: + s = json.load(f) + res = evaluate_architecture_rules( + spec=s, sources=[("http://localhost", s, sys.argv[1])], + endpoints=[], measurements={}, footprint_confirmed=False, + ) + print(json.dumps(res, indent=2, ensure_ascii=False)) + diff --git a/scripts/green-api-auto-discover.py b/scripts/green-api-auto-discover.py index 86cb3b5..33e8220 100644 --- a/scripts/green-api-auto-discover.py +++ b/scripts/green-api-auto-discover.py @@ -35,6 +35,18 @@ from datetime import datetime, timezone from pathlib import Path +# Local module: Architecture/Infrastructure rules (AR01..AR05) +sys.path.insert(0, str(Path(__file__).parent)) +try: + from architecture_rules import ( + evaluate_architecture_rules, + ARCH_RULES as _ARCH_RULES_CATALOGUE, + ) + _ARCH_RULES_AVAILABLE = True +except Exception as _arch_imp_err: # pragma: no cover + _ARCH_RULES_AVAILABLE = False + _ARCH_RULES_IMPORT_ERROR = str(_arch_imp_err) + # ── Fix Windows console encoding (CP1252 → UTF-8) ────────────────────────── # On Windows, the default stdout/stderr encoding is often CP1252 which cannot # handle emojis (✅, ❌, 🟢, …). Force UTF-8 with replace fallback so that @@ -73,46 +85,87 @@ "id": "DE11", "label": "Pagination", "max_pts": 15, "check": "collection_has_pagination_params", "description": "Les endpoints de collection doivent supporter la pagination (page/size ou limit/offset).", + "category": "data-efficiency", }, "DE08_fields": { "id": "DE08", "label": "Filtrage de champs", "max_pts": 15, "check": "has_fields_param", "description": "Supporter un parametre 'fields' pour reduire le payload.", + "category": "data-efficiency", }, "DE01_compression": { "id": "DE01", "label": "Compression", "max_pts": 15, "check": "server_supports_gzip", "description": "Le serveur doit supporter Accept-Encoding: gzip.", + "category": "data-efficiency", }, "DE02_DE03_cache": { "id": "DE02/DE03", "label": "Cache ETag/304", "max_pts": 15, "check": "supports_etag_304", "description": "Les ressources unitaires doivent supporter ETag + If-None-Match -> 304.", + "category": "data-efficiency", }, "DE06_delta": { "id": "DE06", "label": "Delta / Changes", "max_pts": 10, "check": "has_delta_endpoint", "description": "Un endpoint /changes?since= ou equivalent doit exister.", + "category": "data-efficiency", }, "range_206": { "id": "206", "label": "Range / Partial Content", "max_pts": 10, "check": "supports_range_206", "description": "Supporter le header Range pour les gros payloads.", + "category": "data-efficiency", }, "AR02_format_cbor": { - "id": "AR02", "label": "Format binaire (CBOR)", "max_pts": 10, + # NOTE: rule id renamed BIN01 (was "AR02") to free the official + # APIGreenScore AR02 (= "API runtime close to consumer"). The internal + # rule key is kept unchanged so legacy reports remain readable. + "id": "BIN01", "label": "Format binaire (CBOR)", "max_pts": 10, "check": "has_binary_format", "description": "Un endpoint en format binaire (CBOR, protobuf...) doit exister.", + "category": "data-efficiency", }, "LO01_observability": { "id": "LO01", "label": "Observabilite", "max_pts": 5, "check": "has_actuator", "description": "Actuator / health / metrics doit etre expose.", + "category": "logs-observability", }, "US07_rate_limit": { "id": "US07", "label": "Rate Limiting", "max_pts": 5, "check": "assumed_if_running", "description": "Un mecanisme de rate limiting doit etre present.", + "category": "usage", + }, + + # ── Architecture & Infrastructure (APIGreenScore official AR rules) ── + # Weights confirmed by the user (Option A): AR01=6, AR02=7, AR03=3, + # AR04=5, AR05=2 → +23 pts on top of the 100-pts legacy budget. + "AR01_event_driven": { + "id": "AR01", "label": "Architecture événementielle", "max_pts": 6, + "check": "external_arch_rules", "category": "architecture", + "description": "Utiliser une architecture événementielle (callbacks, webhooks, AsyncAPI, SSE, WebSocket, broker) pour éviter le polling.", + }, + "AR02_runtime_close": { + "id": "AR02", "label": "Runtime proche du consommateur", "max_pts": 7, + "check": "external_arch_rules", "category": "architecture", + "description": "Déployer l'API au plus près des consommateurs (CDN, edge, anycast multi-régions).", + }, + "AR03_unique_api": { + "id": "AR03", "label": "Une seule API par besoin", "max_pts": 3, + "check": "external_arch_rules", "category": "architecture", + "description": "Éviter la duplication d'APIs servant le même besoin (double infrastructure = double empreinte).", + }, + "AR04_scalable_infra": { + "id": "AR04", "label": "Infrastructure scalable", "max_pts": 5, + "check": "external_arch_rules", "category": "infrastructure", + "description": "Préférer une infrastructure auto-scalable (HPA, KEDA, autoscale, serverless).", + }, + "AR05_cloud_footprint": { + "id": "AR05", "label": "Dashboard d'empreinte cloud", "max_pts": 2, + "check": "external_arch_rules", "category": "infrastructure", + "description": "Suivre l'empreinte carbone via le dashboard natif du provider cloud.", }, } @@ -1755,6 +1808,15 @@ def main(): parser.add_argument("--output-dir", default="", help="Output directory for reports (default: reports/)") parser.add_argument("--skip-spectral", action="store_true", help="Skip Spectral linting") parser.add_argument("--skip-dashboard", action="store_true", help="Skip dashboard generation") + # Architecture rules (AR01–AR05) controls + parser.add_argument("--cloud-footprint-confirmed", action="store_true", + help="Confirm that the cloud provider's carbon dashboard is actively used (required to validate AR05).") + parser.add_argument("--consumer-region", default="", + help="ISO-3166 alpha-2 region of the API consumers (e.g. FR, US). Used by AR02 distance scoring.") + parser.add_argument("--enable-geoip", action="store_true", + help="Enable optional GeoIP lookup (ipinfo.io) to compute consumer<->API distance for AR02.") + parser.add_argument("--source-dir", default="", + help="Path to the API source code (Java/.NET/Node) for AR01/AR04 deps & IaC scan (Phase 2).") args = parser.parse_args() # ── Resolve paths ── @@ -2051,6 +2113,105 @@ def _split_csv(items): green_score = analyze_green_rules(spec, endpoints, base_url, measurements, auth_headers) log(f" GREEN SCORE: {green_score['total']}/{green_score['max']} Grade: {green_score['grade']}", "OK") + # ── Step 4a: Architecture & Infrastructure rules (AR01..AR05) ── + if _ARCH_RULES_AVAILABLE: + # Load thresholds + cloud-dashboard URLs from green-score-threshold.json + thr_path = root_dir / "green-score-threshold.json" + thr_cfg: dict = {} + if thr_path.is_file(): + try: + thr_cfg = json.loads(thr_path.read_text(encoding="utf-8")) or {} + except Exception as e: + log(f" threshold config unreadable ({e}); falling back to defaults", "WARN") + arch_thresholds = thr_cfg.get("architecture", {}) + cloud_dashboards = thr_cfg.get("cloud_footprint_dashboards", {}) + + # Re-extract per-target measurements for context + try: + arch_results = evaluate_architecture_rules( + spec=spec, + sources=sources, + endpoints=endpoints, + measurements=measurements.get("auto_discovery", {}).get("all_measurements", {}) or {}, + auth_headers=auth_headers, + thresholds=arch_thresholds, + cloud_dashboards=cloud_dashboards, + footprint_confirmed=bool(args.cloud_footprint_confirmed), + enable_phase2=bool(args.source_dir), + source_dir=args.source_dir or None, + ) + except Exception as e: + log(f" Architecture rules evaluation failed: {e}", "WARN") + arch_results = {} + + # Merge architecture results into green_score (mapping/breakdown/total/max) + rule_mapping_full = green_score.get("rule_resource_mapping", {}) or {} + scores_full = green_score.get("breakdown", {}) or {} + endpoint_rules_full = green_score.get("endpoint_rules", {}) or {} + details_full = green_score.get("details", {}) or {} + + for arch_key, arch_res in arch_results.items(): + meta = GREEN_RULES.get(arch_key, {}) + rule_mapping_full[arch_key] = { + "id": meta.get("id", arch_res.get("rule_id", arch_key)), + "label": meta.get("label", arch_key), + "description": meta.get("description", ""), + "max_pts": arch_res.get("max_pts", meta.get("max_pts", 0)), + "category": arch_res.get("category", meta.get("category", "architecture")), + "validated": bool(arch_res.get("matched")), + "score": int(arch_res.get("score", 0)), + "matched_count": 1 if arch_res.get("matched") else 0, + "candidate_count": max(1, len(arch_res.get("candidates", []))), + "candidates": arch_res.get("candidates", []), + "evidence": arch_res.get("evidence", []), + "recommendations": arch_res.get("recommendations", []), + # AR01-only — visible by the dashboard if present + "migration_advice": arch_res.get("migration_advice", []), + # AR03/AR05 extras + "duplicates": arch_res.get("duplicates", []), + "detected_provider": arch_res.get("detected_provider"), + } + scores_full[arch_key] = int(arch_res.get("score", 0)) + details_full[arch_key] = { + "note": " | ".join(arch_res.get("recommendations", []))[:300] + } + + # Refresh totals & grade with the new 123-pts budget. + legacy_total = sum(v for k, v in scores_full.items() if k not in arch_results) + arch_total = sum(int(v) for k, v in scores_full.items() if k in arch_results) + legacy_max = sum(GREEN_RULES[k]["max_pts"] for k in scores_full + if k in GREEN_RULES and k not in arch_results) + arch_max = sum(GREEN_RULES[k]["max_pts"] for k in arch_results if k in GREEN_RULES) + + total_raw = legacy_total + arch_total + max_raw = legacy_max + arch_max + # Normalised score on 100 for backward compatibility with badge/CI gates + score_normalised = round(100 * total_raw / max_raw) if max_raw else 0 + grade = ("A+" if score_normalised >= 90 else "A" if score_normalised >= 80 + else "B" if score_normalised >= 65 else "C" if score_normalised >= 50 + else "D" if score_normalised >= 30 else "E") + + green_score["rule_resource_mapping"] = rule_mapping_full + green_score["breakdown"] = scores_full + green_score["details"] = details_full + green_score["endpoint_rules"] = endpoint_rules_full + green_score["total"] = total_raw + green_score["max"] = max_raw + green_score["grade"] = grade + green_score["score_normalised_100"] = score_normalised + green_score["legacy_total"] = legacy_total + green_score["legacy_max"] = legacy_max + green_score["architecture_total"] = arch_total + green_score["architecture_max"] = arch_max + + log(f" ARCHITECTURE: {arch_total}/{arch_max} " + f"(AR01..AR05 → +23 pts budget)", "OK") + log(f" GREEN SCORE (normalised /100): {score_normalised} " + f"Grade: {grade}", "OK") + else: + log(f" architecture_rules module unavailable: {_ARCH_RULES_IMPORT_ERROR}", + "WARN") + # ── Step 4b: Display Rule ↔ Resource Mapping ── rule_mapping = green_score.get("rule_resource_mapping", {}) endpoint_rules = green_score.get("endpoint_rules", {}) From 4746b2751ba25ab7d4b13c83c0687a13a706c764 Mon Sep 17 00:00:00 2001 From: Thierno DIALLO Date: Wed, 29 Apr 2026 21:43:51 +0200 Subject: [PATCH 2/7] update architecture and infra rules --- .github/workflows/pr-green-api.yml | 54 +- dashboard/index.html | 45 +- dashboard/index.save.html | 36 +- dashboard/interactive.html | 85 +- scripts/architecture_rules.py | 762 +++++++++++++++--- scripts/green-api-auto-discover.py | 2 + scripts/green-score-analyzer_withdiscovery.sh | 17 + scripts/greenapianalyzer-server.py | 11 + scripts/start.sh | 47 ++ 9 files changed, 892 insertions(+), 167 deletions(-) diff --git a/.github/workflows/pr-green-api.yml b/.github/workflows/pr-green-api.yml index 28a113f..5e91c23 100644 --- a/.github/workflows/pr-green-api.yml +++ b/.github/workflows/pr-green-api.yml @@ -23,7 +23,7 @@ on: appname: description: "Override the application name in reports (default: repo name)" required: false - default: "" + default: "oprtimapi" bearer_token: description: >- Optional Bearer token for authenticated API endpoints. @@ -68,6 +68,26 @@ on: type: boolean required: false default: false + consumer_region: + description: >- + ISO-3166 alpha-2 region of the API consumers (e.g. FR, US, DE). + Drives AR02 (runtime close to consumer) distance scoring. + required: false + default: "FR" + enable_geoip: + description: >- + Enable optional GeoIP lookup (ipinfo.io) to compute API-vs-consumer + location and anycast/ASN cross-validation for AR02. + type: boolean + required: false + default: true + cloud_footprint_confirmed: + description: >- + Confirm that the cloud provider's carbon footprint dashboard is + actively used (required to validate AR05). + type: boolean + required: false + default: true env: JAVA_VERSION: '21' @@ -264,6 +284,10 @@ jobs: DEBUG: ${{ github.event.inputs.debug || 'false' }} # Bearer: prefer secret, fallback to manual input (logs may show value). BEARER_TOKEN: ${{ secrets.BEARER_TOKEN || github.event.inputs.bearer_token }} + # Architecture rules (AR02 Phase 3 + AR05) — wired to green-api-auto-discover.py + CONSUMER_REGION: ${{ github.event.inputs.consumer_region || '' }} + ENABLE_GEOIP: ${{ github.event.inputs.enable_geoip || 'false' }} + CLOUD_FOOTPRINT_CONFIRMED: ${{ github.event.inputs.cloud_footprint_confirmed || 'false' }} steps: - uses: actions/checkout@v4 - name: Setup JDK ${{ env.JAVA_VERSION }} @@ -403,6 +427,19 @@ jobs: if [ "${DEBUG:-false}" = "true" ]; then ARGS+=(--debug) fi + # Architecture rules (AR02 Phase 3 + AR04 Phase 2 + AR05) + # SOURCE_DIR points to the checked-out repo root so AR04 can scan IaC + # and AR01 can cross-validate broker dependencies. + ARGS+=(--source-dir "$GITHUB_WORKSPACE") + if [ -n "${CONSUMER_REGION:-}" ]; then + ARGS+=(--consumer-region "$CONSUMER_REGION") + fi + if [ "${ENABLE_GEOIP:-false}" = "true" ]; then + ARGS+=(--enable-geoip) + fi + if [ "${CLOUD_FOOTPRINT_CONFIRMED:-false}" = "true" ]; then + ARGS+=(--cloud-footprint-confirmed) + fi echo "🌿 Analyzing ${#T_ARR[@]} target(s): $TARGETS (appname=$APPNAME)" python3 scripts/green-api-auto-discover.py \ "${ARGS[@]}" \ @@ -736,6 +773,10 @@ jobs: APPNAME: ${{ github.event.inputs.appname || 'appoptim' }} DEBUG: ${{ github.event.inputs.debug || 'false' }} BEARER_TOKEN: ${{ secrets.BEARER_TOKEN || github.event.inputs.bearer_token }} + # Architecture rules (AR02 / AR04 / AR05) + CONSUMER_REGION: ${{ github.event.inputs.consumer_region || '' }} + ENABLE_GEOIP: ${{ github.event.inputs.enable_geoip || 'false' }} + CLOUD_FOOTPRINT_CONFIRMED: ${{ github.event.inputs.cloud_footprint_confirmed || 'false' }} steps: - uses: actions/checkout@v4 - name: Setup JDK ${{ env.JAVA_VERSION }} @@ -809,6 +850,17 @@ jobs: if [ "${DEBUG:-false}" = "true" ]; then ARGS+=(--debug) fi + # Architecture rules (AR02 / AR04 / AR05) + ARGS+=(--source-dir "$GITHUB_WORKSPACE") + if [ -n "${CONSUMER_REGION:-}" ]; then + ARGS+=(--consumer-region "$CONSUMER_REGION") + fi + if [ "${ENABLE_GEOIP:-false}" = "true" ]; then + ARGS+=(--enable-geoip) + fi + if [ "${CLOUD_FOOTPRINT_CONFIRMED:-false}" = "true" ]; then + ARGS+=(--cloud-footprint-confirmed) + fi python3 scripts/green-api-auto-discover.py \ "${ARGS[@]}" \ --repeat 3 \ diff --git a/dashboard/index.html b/dashboard/index.html index 882ac4c..4e6951c 100644 --- a/dashboard/index.html +++ b/dashboard/index.html @@ -2113,15 +2113,27 @@

🎛️ Actions

rulesList.innerHTML = ''; const ruleLabels = { - 'DE11_pagination': { name: 'DE11 — Pagination', defaultMax: 15, icon: '📄' }, - 'DE08_fields': { name: 'DE08 — Filtrage champs', defaultMax: 15, icon: '🔍' }, - 'DE01_compression': { name: 'DE01 — Compression Gzip', defaultMax: 15, icon: '🗜️' }, - 'DE02_DE03_cache': { name: 'DE02/03 — Cache ETag/304', defaultMax: 15, icon: '💾' }, - 'DE06_delta': { name: 'DE06 — Delta changes', defaultMax: 10, icon: '🔄' }, - 'range_206': { name: '206 — Partial Content', defaultMax: 10, icon: '✂️' }, - 'LO01_observability': { name: 'LO01 — Observabilité', defaultMax: 5, icon: '👁️' }, - 'US07_rate_limit': { name: 'US07 — Rate Limiting', defaultMax: 5, icon: '🚦' }, - 'AR02_format_cbor': { name: 'AR02 — Format CBOR', defaultMax: 10, icon: '📦' }, + // ── Data efficiency + 'DE11_pagination': { name: 'DE11 — Pagination', defaultMax: 15, icon: '📄' }, + 'DE08_fields': { name: 'DE08 — Filtrage champs', defaultMax: 15, icon: '🔍' }, + 'DE01_compression': { name: 'DE01 — Compression Gzip', defaultMax: 15, icon: '🗜️' }, + 'DE02_DE03_cache': { name: 'DE02/03 — Cache ETag/304', defaultMax: 15, icon: '💾' }, + 'DE06_delta': { name: 'DE06 — Delta changes', defaultMax: 10, icon: '🔄' }, + 'range_206': { name: '206 — Partial Content', defaultMax: 10, icon: '✂️' }, + // NB: rule key kept for backward-compat with historical reports; + // displayed id renamed BIN01 to free the official AR02 ("runtime close"). + 'AR02_format_cbor': { name: 'BIN01 — Format binaire', defaultMax: 10, icon: '📦' }, + // ── Logs & Observability + 'LO01_observability': { name: 'LO01 — Observabilité', defaultMax: 5, icon: '👁️' }, + // ── Usage + 'US07_rate_limit': { name: 'US07 — Rate Limiting', defaultMax: 5, icon: '🚦' }, + // ── Architecture (APIGreenScore official AR rules — +23 pts) + 'AR01_event_driven': { name: 'AR01 — Event-driven', defaultMax: 6, icon: '📡' }, + 'AR02_runtime_close': { name: 'AR02 — Runtime proche', defaultMax: 7, icon: '🌍' }, + 'AR03_unique_api': { name: 'AR03 — Une API par besoin', defaultMax: 3, icon: '🎯' }, + // ── Infrastructure + 'AR04_scalable_infra':{ name: 'AR04 — Infra scalable', defaultMax: 5, icon: '📈' }, + 'AR05_cloud_footprint':{name: 'AR05 — Empreinte cloud', defaultMax: 2, icon: '☁️' }, }; const rrm = gs.rule_resource_mapping || {}; @@ -2773,14 +2785,23 @@

🎛️ Actions

const ruleIcons = { 'DE11_pagination': '📄', 'DE08_fields': '🔍', 'DE01_compression': '🗜️', 'DE02_DE03_cache': '💾', 'DE06_delta': '🔄', 'range_206': '✂️', - 'AR02_format_cbor': '📦', 'LO01_observability': '👁️', 'US07_rate_limit': '🚦' + 'AR02_format_cbor': '📦', 'LO01_observability': '👁️', 'US07_rate_limit': '🚦', + // APIGreenScore official AR rules (+23 pts) + 'AR01_event_driven': '📡', 'AR02_runtime_close': '🌍', 'AR03_unique_api': '🎯', + 'AR04_scalable_infra': '📈', 'AR05_cloud_footprint': '☁️', }; const ruleNames = { 'DE11_pagination': 'DE11 — Pagination', 'DE08_fields': 'DE08 — Filtrage de champs', 'DE01_compression': 'DE01 — Compression Gzip', 'DE02_DE03_cache': 'DE02/03 — Cache ETag/304', 'DE06_delta': 'DE06 — Delta / Changes', 'range_206': '206 — Partial Content / Range', - 'AR02_format_cbor': 'AR02 — Format binaire (CBOR)', 'LO01_observability': 'LO01 — Observabilité', - 'US07_rate_limit': 'US07 — Rate Limiting' + // Display id renamed BIN01 (rule key kept for backward-compat). + 'AR02_format_cbor': 'BIN01 — Format binaire (CBOR/protobuf)', 'LO01_observability': 'LO01 — Observabilité', + 'US07_rate_limit': 'US07 — Rate Limiting', + 'AR01_event_driven': 'AR01 — Architecture événementielle', + 'AR02_runtime_close': 'AR02 — Runtime proche du consommateur', + 'AR03_unique_api': 'AR03 — Une seule API par besoin', + 'AR04_scalable_infra': 'AR04 — Infrastructure scalable', + 'AR05_cloud_footprint': 'AR05 — Dashboard d\'empreinte cloud', }; let allSuggestions = []; diff --git a/dashboard/index.save.html b/dashboard/index.save.html index ee94f5b..249fbf6 100644 --- a/dashboard/index.save.html +++ b/dashboard/index.save.html @@ -2113,15 +2113,20 @@

🎛️ Actions

rulesList.innerHTML = ''; const ruleLabels = { - 'DE11_pagination': { name: 'DE11 — Pagination', defaultMax: 15, icon: '📄' }, - 'DE08_fields': { name: 'DE08 — Filtrage champs', defaultMax: 15, icon: '🔍' }, - 'DE01_compression': { name: 'DE01 — Compression Gzip', defaultMax: 15, icon: '🗜️' }, - 'DE02_DE03_cache': { name: 'DE02/03 — Cache ETag/304', defaultMax: 15, icon: '💾' }, - 'DE06_delta': { name: 'DE06 — Delta changes', defaultMax: 10, icon: '🔄' }, - 'range_206': { name: '206 — Partial Content', defaultMax: 10, icon: '✂️' }, - 'LO01_observability': { name: 'LO01 — Observabilité', defaultMax: 5, icon: '👁️' }, - 'US07_rate_limit': { name: 'US07 — Rate Limiting', defaultMax: 5, icon: '🚦' }, - 'AR02_format_cbor': { name: 'AR02 — Format CBOR', defaultMax: 10, icon: '📦' }, + 'DE11_pagination': { name: 'DE11 — Pagination', defaultMax: 15, icon: '📄' }, + 'DE08_fields': { name: 'DE08 — Filtrage champs', defaultMax: 15, icon: '🔍' }, + 'DE01_compression': { name: 'DE01 — Compression Gzip', defaultMax: 15, icon: '🗜️' }, + 'DE02_DE03_cache': { name: 'DE02/03 — Cache ETag/304', defaultMax: 15, icon: '💾' }, + 'DE06_delta': { name: 'DE06 — Delta changes', defaultMax: 10, icon: '🔄' }, + 'range_206': { name: '206 — Partial Content', defaultMax: 10, icon: '✂️' }, + 'AR02_format_cbor': { name: 'BIN01 — Format binaire', defaultMax: 10, icon: '📦' }, + 'LO01_observability': { name: 'LO01 — Observabilité', defaultMax: 5, icon: '👁️' }, + 'US07_rate_limit': { name: 'US07 — Rate Limiting', defaultMax: 5, icon: '🚦' }, + 'AR01_event_driven': { name: 'AR01 — Event-driven', defaultMax: 6, icon: '📡' }, + 'AR02_runtime_close': { name: 'AR02 — Runtime proche', defaultMax: 7, icon: '🌍' }, + 'AR03_unique_api': { name: 'AR03 — Une API par besoin', defaultMax: 3, icon: '🎯' }, + 'AR04_scalable_infra': { name: 'AR04 — Infra scalable', defaultMax: 5, icon: '📈' }, + 'AR05_cloud_footprint':{ name: 'AR05 — Empreinte cloud', defaultMax: 2, icon: '☁️' }, }; const rrm = gs.rule_resource_mapping || {}; @@ -2773,14 +2778,21 @@

🎛️ Actions

const ruleIcons = { 'DE11_pagination': '📄', 'DE08_fields': '🔍', 'DE01_compression': '🗜️', 'DE02_DE03_cache': '💾', 'DE06_delta': '🔄', 'range_206': '✂️', - 'AR02_format_cbor': '📦', 'LO01_observability': '👁️', 'US07_rate_limit': '🚦' + 'AR02_format_cbor': '📦', 'LO01_observability': '👁️', 'US07_rate_limit': '🚦', + 'AR01_event_driven': '📡', 'AR02_runtime_close': '🌍', 'AR03_unique_api': '🎯', + 'AR04_scalable_infra': '📈', 'AR05_cloud_footprint': '☁️' }; const ruleNames = { 'DE11_pagination': 'DE11 — Pagination', 'DE08_fields': 'DE08 — Filtrage de champs', 'DE01_compression': 'DE01 — Compression Gzip', 'DE02_DE03_cache': 'DE02/03 — Cache ETag/304', 'DE06_delta': 'DE06 — Delta / Changes', 'range_206': '206 — Partial Content / Range', - 'AR02_format_cbor': 'AR02 — Format binaire (CBOR)', 'LO01_observability': 'LO01 — Observabilité', - 'US07_rate_limit': 'US07 — Rate Limiting' + 'AR02_format_cbor': 'BIN01 — Format binaire (CBOR)', 'LO01_observability': 'LO01 — Observabilité', + 'US07_rate_limit': 'US07 — Rate Limiting', + 'AR01_event_driven': 'AR01 — Architecture événementielle', + 'AR02_runtime_close': 'AR02 — Runtime proche du consommateur', + 'AR03_unique_api': 'AR03 — Une seule API par besoin', + 'AR04_scalable_infra': 'AR04 — Infrastructure scalable', + 'AR05_cloud_footprint': 'AR05 — Dashboard d\'empreinte cloud' }; let allSuggestions = []; diff --git a/dashboard/interactive.html b/dashboard/interactive.html index 7eec3e3..fb3e971 100644 --- a/dashboard/interactive.html +++ b/dashboard/interactive.html @@ -1052,11 +1052,60 @@

// Mirrors the look & behavior of the full dashboard (index.html). // Reused by both Remote and Local Green Score result tabs. // ════════════════════════════════════════════════════════════════ -const _SG_RULE_ICONS = { - DE11_pagination:"📄", DE08_fields:"🔍", DE01_compression:"🗜️", - DE02_DE03_cache:"💾", DE06_delta:"🔄", range_206:"✂️", - AR02_format_cbor:"📦", LO01_observability:"👁️", US07_rate_limit:"🚦", + +// ── Shared rule registry ──────────────────────────────────────── +// Single source of truth for the 14 Green API rules currently scored +// by the backend (scripts/green-api-auto-discover.py → GREEN_RULES). +// +// Keys MUST match the keys emitted in `green_score.breakdown` / +// `green_score.rule_resource_mapping` by the analyzer. If the backend +// adds a new rule, append it here and both the Remote and Local +// renderers will pick it up automatically. +// +// Note: the rule key `AR02_format_cbor` is kept for backward-compat +// with historical reports, but its DISPLAY id was renamed `BIN01` in +// GREEN_RULES so it doesn't collide with the new official AR02 +// ("API runtime close to consumer"). The label below reflects that. +const RULE_REGISTRY = { + // Data efficiency + DE11_pagination: { label: "DE11 · Pagination", max: 15, icon: "📄", category: "data-efficiency" }, + DE08_fields: { label: "DE08 · Filtrage de champs", max: 15, icon: "🔍", category: "data-efficiency" }, + DE01_compression: { label: "DE01 · Compression", max: 15, icon: "🗜️", category: "data-efficiency" }, + DE02_DE03_cache: { label: "DE02/03 · Cache ETag/304", max: 15, icon: "💾", category: "data-efficiency" }, + DE06_delta: { label: "DE06 · Delta / changes", max: 10, icon: "🔄", category: "data-efficiency" }, + range_206: { label: "206 · Range / Partial", max: 10, icon: "✂️", category: "data-efficiency" }, + AR02_format_cbor: { label: "BIN01 · Format binaire", max: 10, icon: "📦", category: "data-efficiency" }, + // Logs & observability + LO01_observability: { label: "LO01 · Observabilité", max: 5, icon: "👁️", category: "logs-observability" }, + // Usage + US07_rate_limit: { label: "US07 · Rate limit", max: 5, icon: "🚦", category: "usage" }, + // Architecture (APIGreenScore official AR rules — +23 pts on top of legacy 100 pts) + AR01_event_driven: { label: "AR01 · Event-driven", max: 6, icon: "📡", category: "architecture" }, + AR02_runtime_close: { label: "AR02 · Runtime proche", max: 7, icon: "🌍", category: "architecture" }, + AR03_unique_api: { label: "AR03 · Une API par besoin", max: 3, icon: "🎯", category: "architecture" }, + // Infrastructure + AR04_scalable_infra:{ label: "AR04 · Infra scalable", max: 5, icon: "📈", category: "infrastructure" }, + AR05_cloud_footprint:{label: "AR05 · Empreinte cloud", max: 2, icon: "☁️", category: "infrastructure" }, }; + +// Convenience views (kept as separate consts so the rest of the +// renderers stays readable; recomputed once at load time). +const RULE_LABELS = Object.fromEntries( + Object.entries(RULE_REGISTRY).map(([k, v]) => [k, v.label]) +); +const RULE_MAX = Object.fromEntries( + Object.entries(RULE_REGISTRY).map(([k, v]) => [k, v.max]) +); +const RULE_ICONS = Object.fromEntries( + Object.entries(RULE_REGISTRY).map(([k, v]) => [k, v.icon]) +); +// Total budget (for sanity checks / progress bar fallback). The backend +// is authoritative — if a project ships a custom green-score-threshold.json +// the `gs.max` field overrides this value. +const RULE_TOTAL_MAX = Object.values(RULE_REGISTRY).reduce((s, v) => s + v.max, 0); + +// Backward-compat alias for any code that still references the old name. +const _SG_RULE_ICONS = RULE_ICONS; // Per-panel state (key = panelId, value = { rrm, ruleLabels, ruleMax, gs, filter }) const _sgPanels = {}; @@ -1281,21 +1330,8 @@

const pct = max > 0 ? Math.round((+total / max) * 100) : 0; const color = pct >= 80 ? "#22c55e" : pct >= 60 ? "#86efac" : pct >= 40 ? "#facc15" : pct >= 20 ? "#f97316" : "#ef4444"; - const ruleLabels = { - DE11_pagination: "DE11 · Pagination", - DE08_fields: "DE08 · Filtrage de champs", - DE01_compression: "DE01 · Compression", - DE02_DE03_cache: "DE02/03 · Cache", - DE06_delta: "DE06 · Delta / changes", - range_206: "Range 206", - LO01_observability: "LO01 · Observabilité", - US07_rate_limit: "US07 · Rate limit", - AR02_format_cbor: "AR02 · Format CBOR", - }; - const ruleMax = { - DE11_pagination:15, DE08_fields:15, DE01_compression:15, DE02_DE03_cache:15, - DE06_delta:10, range_206:10, LO01_observability:5, US07_rate_limit:5, AR02_format_cbor:10, - }; + const ruleLabels = RULE_LABELS; + const ruleMax = RULE_MAX; const breakdownHtml = Object.keys(ruleLabels).map(k => { const score = breakdown[k] ?? 0; @@ -1652,15 +1688,8 @@

if (creedengo && creedengo.report && creedengo.report.creedengo_score) { creedengo = creedengo.report; } - const ruleLabels = { - DE11_pagination:"DE11 · Pagination", DE08_fields:"DE08 · Filtrage de champs", - DE01_compression:"DE01 · Compression", DE02_DE03_cache:"DE02/03 · Cache", - DE06_delta:"DE06 · Delta / changes", range_206:"Range 206", - LO01_observability:"LO01 · Observabilité", US07_rate_limit:"US07 · Rate limit", - AR02_format_cbor:"AR02 · Format CBOR", - }; - const ruleMax = {DE11_pagination:15, DE08_fields:15, DE01_compression:15, DE02_DE03_cache:15, - DE06_delta:10, range_206:10, LO01_observability:5, US07_rate_limit:5, AR02_format_cbor:10}; + const ruleLabels = RULE_LABELS; + const ruleMax = RULE_MAX; // ── Green Score card ───────────────────────────────────────── let gsCardHtml = ""; diff --git a/scripts/architecture_rules.py b/scripts/architecture_rules.py index ff50e75..a626808 100644 --- a/scripts/architecture_rules.py +++ b/scripts/architecture_rules.py @@ -8,8 +8,11 @@ so the dashboard can render them with the same code path. Phase 1 in this module: AR01, AR03, AR05. -Phase 2 (AR04 + AR01 source/deps signals + AR02 TLS/anycast/GeoIP) is reachable -through the same evaluator signature and will be added in follow-ups. +Phase 2 in this module: AR04 (IaC + serverless deps scan via ``--source-dir``) +and AR01 enrichment with messaging-broker dependency signals. +Phase 3 in this module: AR02 (TLS handshake latency, CDN edge headers, +multi-region spec, anycast ASN via optional ``--enable-geoip`` and +consumer distance via ``--consumer-region``). Design rules — strict: • stdlib-only (no requests/numpy/yaml unless already optional in the parent) @@ -25,8 +28,13 @@ import json import os import re +import socket +import ssl +import time import urllib.error +import urllib.parse import urllib.request +from pathlib import Path from typing import Any @@ -522,104 +530,303 @@ def _jaccard(a: set, b: set) -> float: return len(a & b) / len(union) if union else 0.0 -def evaluate_AR03(specs_per_target: list[tuple[str, dict]], - thresholds: dict | None = None) -> dict: - """AR03 — Ensure only one API fits the same need. +# ═══════════════════════════════════════════════════════════════════════════ +# AR02 — Runtime close to the consumer (Phase 3) +# ═══════════════════════════════════════════════════════════════════════════ - Compares every pair of *targets* on three orthogonal signals: - 1. Jaccard on operation signatures (≥ T1) - 2. Jaccard on tags (≥ T2) - 3. Cosine on summary BoW (≥ T3) +# Tokens commonly embedded in regional hostnames / server URLs. +_REGION_TOKEN_RE = re.compile( + r"\b(" + r"us-?(?:east|west|central|north|south)(?:-\d)?|" + r"eu-?(?:west|central|north|south)(?:-\d)?|" + r"ap-?(?:south|southeast|northeast|east)(?:-\d)?|" + r"ca-?central(?:-\d)?|sa-?east(?:-\d)?|af-?south(?:-\d)?|me-?(?:south|central)(?:-\d)?|" + r"westeurope|northeurope|eastus2?|westus[23]?|centralus|southcentralus|" + r"francecentral|germanywestcentral|uksouth|ukwest|" + r"asia-?(?:east|southeast|south|northeast)(?:\d)?|" + r"europe-?(?:west|north|central)(?:\d)?" + r")\b", + flags=re.IGNORECASE, +) - A pair triggers a duplication warning ONLY when the three thresholds are - crossed simultaneously (the "triplet" rule we agreed on). - Versioned duplicates (``/v1/...`` vs ``/v2/...``) are exempt: the - signature normaliser strips ``/vN`` so they collapse to the same path, - but they remain a *legitimate* form of duplication and we downgrade the - severity to a warning instead of failing the rule. +# ASN / org tokens that strongly imply an anycast / global edge network. +_ANYCAST_ORG_TOKENS = ( + "cloudflare", "fastly", "akamai", "google", "amazon", "microsoft", + "azure", "cloudfront", "cdnetworks", "stackpath", "edgecast", + "incapsula", "imperva", "bunny", "keycdn", +) + + +def _tls_handshake_seconds(host: str, port: int = 443, timeout: float = 5.0, + samples: int = 3) -> dict: + """Measure the TLS handshake duration (median over `samples` runs). + + Returns ``{"median_ms": float, "samples_ms": [...], "ok": bool}``. + A failed probe returns ``{"ok": False, "error": "..."}``. """ - th = thresholds or {} - T_SIG = th.get("AR03_jaccard_threshold", 0.30) - T_TAGS = th.get("AR03_tags_overlap_threshold", 0.50) - T_COS = th.get("AR03_summary_cosine_threshold", 0.40) + timings: list[float] = [] + err = None + ctx = ssl.create_default_context() + ctx.check_hostname = False + ctx.verify_mode = ssl.CERT_NONE # latency probe only — no PKI assertion + for _ in range(max(1, samples)): + try: + t0 = time.perf_counter() + with socket.create_connection((host, port), timeout=timeout) as raw: + with ctx.wrap_socket(raw, server_hostname=host) as _ssock: + pass + timings.append((time.perf_counter() - t0) * 1000.0) + except Exception as e: # noqa: BLE001 + err = str(e) + break + if not timings: + return {"ok": False, "error": err or "no samples"} + timings.sort() + median = timings[len(timings) // 2] + return {"ok": True, "median_ms": round(median, 1), + "samples_ms": [round(t, 1) for t in timings]} - max_pts = ARCH_RULES["AR03_unique_api"]["max_pts"] - if len(specs_per_target) < 2: - return { - "rule_id": "AR03", - "score": max_pts, - "max_pts": max_pts, - "matched": True, - "category": "architecture", - "candidates": [], - "evidence": [{"kind": "n/a", "where": "targets", - "value": f"Only {len(specs_per_target)} target — duplication not applicable"}], - "recommendations": ["Une seule cible analysée — comparez plusieurs APIs pour activer AR03."], - "duplicates": [], - } +def _resolve_ips(host: str) -> list[str]: + """Best-effort DNS A/AAAA resolution. Returns deduped IP strings.""" + ips: set[str] = set() + try: + for fam, _stype, _proto, _cn, sa in socket.getaddrinfo(host, None): + if fam in (socket.AF_INET, socket.AF_INET6) and sa and sa[0]: + ips.add(sa[0]) + except Exception: + return [] + return sorted(ips) + - # Pre-compute features per target - feats = [] - for target, spec in specs_per_target: - feats.append({ - "target": target, - "sigs": _all_operation_signatures(spec), - "tags": _all_tags(spec), - "bow": _summary_tokens(spec), - }) - - duplicates: list[dict] = [] +def _ipinfo_lookup(ip: str, timeout: int = 4) -> dict: + """Optional GeoIP lookup via ipinfo.io (no API key — anonymous tier). + + Returns ``{country, region, city, org}`` (best effort; empty on failure). + Only called when the user opts in with ``--enable-geoip``. + """ + if not ip: + return {} + url = f"https://ipinfo.io/{ip}/json" + code, body, _ = _http_get_bytes(url, timeout=timeout) + if code != 200 or not body: + return {} + try: + return json.loads(body.decode("utf-8", errors="replace")) or {} + except Exception: + return {} + + +def _spec_servers(spec: dict) -> list[str]: + """Return raw `servers[].url` from an OpenAPI spec (deduped, order-stable).""" + out: list[str] = [] + seen: set[str] = set() + for s in (spec.get("servers") or []): + if not isinstance(s, dict): + continue + u = (s.get("url") or "").strip() + if u and u not in seen: + seen.add(u) + out.append(u) + return out + + +def evaluate_AR02(spec: dict, base_urls: list[str], measurements: dict, + *, consumer_region: str = "", + enable_geoip: bool = False) -> dict: + """AR02 — Runtime close to the consumer. + + Cross-validated signals (max 7 pts): + • CDN/edge headers (2 pts) — confirmed by ≥1 measurement AND + a fresh HEAD probe to the base URL. + • Multi-region servers (2 pts) — OAS `servers` lists ≥2 hostnames + AND distinct DNS resolution OR + recognised regional tokens. + • TLS handshake latency (2 pts) — median over 3 samples vs base host. + <150 ms → 2 pts, <300 ms → 1 pt. + • Anycast ASN (1 pt, optional) — only when ``enable_geoip`` is set; + ipinfo.io org/ASN matches a known + anycast/CDN provider AND the same + provider was already seen in + edge headers. + """ + max_pts = ARCH_RULES["AR02_runtime_close"]["max_pts"] + score = 0 evidence: list[dict] = [] - for i in range(len(feats)): - for j in range(i + 1, len(feats)): - a, b = feats[i], feats[j] - j_sig = _jaccard(a["sigs"], b["sigs"]) - j_tag = _jaccard(a["tags"], b["tags"]) - cos = _cosine_bow(a["bow"], b["bow"]) - if j_sig >= T_SIG and j_tag >= T_TAGS and cos >= T_COS: - duplicates.append({ - "target_a": a["target"], "target_b": b["target"], - "jaccard_signatures": round(j_sig, 3), - "jaccard_tags": round(j_tag, 3), - "cosine_summaries": round(cos, 3), - }) - evidence.append({ - "kind": "duplication", - "where": f"{a['target']} ⇄ {b['target']}", - "value": (f"sig={j_sig:.2f} (≥{T_SIG}), " - f"tags={j_tag:.2f} (≥{T_TAGS}), " - f"summary_cos={cos:.2f} (≥{T_COS})"), - }) + recs: list[str] = [] + candidates: list[str] = [] - # Score: penalty proportional to number of duplicate pairs (capped to 0). - n_pairs = len(feats) * (len(feats) - 1) // 2 - dup_ratio = len(duplicates) / n_pairs if n_pairs else 0.0 - score = round(max_pts * (1.0 - dup_ratio)) - matched = len(duplicates) == 0 + # ── 1) CDN / edge headers ────────────────────────────────────────────── + runtime_providers = _detect_cloud_providers(measurements) + head_providers: dict[str, list[str]] = {} + for base in base_urls: + if not base: + continue + h = _http_head_only(base, timeout=4) + if not h: + continue + for provider, hname, value_re in CDN_HEADER_PATTERNS: + v = h.get(hname) + if not v: + continue + if value_re == r".+" or re.search(value_re, str(v), flags=re.IGNORECASE): + head_providers.setdefault(provider, []).append( + f"HEAD {base} → {hname}: {v}" + ) + confirmed_providers = sorted(set(runtime_providers) & set(head_providers)) + if confirmed_providers: + score += 2 + candidates.extend(confirmed_providers) + for p in confirmed_providers: + for ev in (runtime_providers.get(p, [])[:2] + head_providers.get(p, [])[:1]): + evidence.append({"kind": "cdn_header", "where": p, "value": ev}) + else: + # informational evidence (mono-side hit) — no points + for p, evs in (runtime_providers or head_providers).items(): + evidence.append({"kind": "cdn_header_uncorroborated", + "where": p, "value": evs[:1]}) + recs.append( + "Aucun signal d'edge/CDN cross-validé (runtime + HEAD). Mettre l'API " + "derrière un edge/CDN multi-régions (Cloudflare, CloudFront, " + "Front Door, Fastly, Akamai…) pour rapprocher le runtime des consommateurs." + ) - recommendations: list[str] = [] - if duplicates: - for d in duplicates: - recommendations.append( - f"Doublon probable entre {d['target_a']} et {d['target_b']} — " - f"fusionner ou marquer l'une comme dépréciée." + # ── 2) Multi-region servers (spec + DNS) ─────────────────────────────── + servers = _spec_servers(spec) + server_hosts: list[str] = [] + for u in servers: + try: + h = urllib.parse.urlparse(u).hostname + if h: + server_hosts.append(h) + except Exception: + continue + distinct_hosts = sorted(set(server_hosts)) + region_tokens = sorted({m.group(1).lower() + for u in servers + for m in [_REGION_TOKEN_RE.search(u)] if m}) + distinct_ips: set[str] = set() + for h in distinct_hosts: + for ip in _resolve_ips(h): + distinct_ips.add(ip) + if len(distinct_hosts) >= 2 and (len(distinct_ips) >= 2 or len(region_tokens) >= 2): + score += 2 + evidence.append({"kind": "multi_region_servers", "where": "openapi.servers", + "value": {"hosts": distinct_hosts, + "regions": region_tokens, + "ip_count": len(distinct_ips)}}) + elif len(distinct_hosts) <= 1: + recs.append( + "La spec OpenAPI ne déclare qu'une seule URL de serveur. Ajouter " + "plusieurs entrées `servers[]` régionales (ex: eu-west, us-east) " + "pour documenter un déploiement multi-régions." + ) + else: + evidence.append({"kind": "single_region_dns", "where": "dns", + "value": {"hosts": distinct_hosts, + "ip_count": len(distinct_ips), + "regions": region_tokens}}) + recs.append( + "Plusieurs URLs `servers[]` déclarées mais elles résolvent vers la " + "même région DNS. Vérifier que chaque URL pointe bien vers un " + "déploiement régional distinct (anycast ou DNS GSLB)." + ) + + # ── 3) TLS handshake latency ─────────────────────────────────────────── + tls_target = next((b for b in base_urls if b and b.lower().startswith("https://")), None) + tls_result: dict = {} + if tls_target: + parsed = urllib.parse.urlparse(tls_target) + host = parsed.hostname or "" + port = parsed.port or 443 + if host: + tls_result = _tls_handshake_seconds(host, port=port) + if tls_result.get("ok"): + ms = tls_result["median_ms"] + if ms < 150: + score += 2 + tier = "<150ms" + elif ms < 300: + score += 1 + tier = "<300ms" + else: + tier = ">=300ms" + evidence.append({"kind": "tls_handshake_latency", + "where": tls_target, + "value": {"median_ms": ms, "tier": tier, + "samples_ms": tls_result["samples_ms"], + "consumer_region": consumer_region or None}}) + if ms >= 300: + recs.append( + f"Latence TLS médiane élevée ({ms} ms) depuis l'environnement " + f"d'analyse. Activer un edge/CDN ou rapprocher le runtime " + f"de la zone consommateur ({consumer_region or 'à préciser'})." ) else: - recommendations.append( - "Aucune duplication détectée entre les cibles analysées." + if not tls_target: + evidence.append({"kind": "tls_skip", "where": "n/a", + "value": "no HTTPS target available"}) + recs.append( + "Activer HTTPS sur la cible pour permettre la mesure de " + "latence TLS et bénéficier d'un edge/CDN moderne." + ) + elif tls_result: + evidence.append({"kind": "tls_handshake_failed", "where": tls_target, + "value": tls_result.get("error", "unknown")}) + + # ── 4) Optional GeoIP / anycast cross-check ──────────────────────────── + if enable_geoip: + for h in distinct_hosts[:3]: # cap external calls + ips = _resolve_ips(h) + if not ips: + continue + info = _ipinfo_lookup(ips[0]) + org = (info.get("org") or "").lower() + if not org: + continue + evidence.append({"kind": "geoip_lookup", + "where": f"{h} ({ips[0]})", + "value": {"country": info.get("country"), + "region": info.get("region"), + "city": info.get("city"), + "org": info.get("org")}}) + anycast_match = next((tok for tok in _ANYCAST_ORG_TOKENS if tok in org), None) + if anycast_match and anycast_match in {p for p in confirmed_providers}: + # cross-validation: provider seen in headers AND in ASN + if not any(e.get("kind") == "anycast_asn" for e in evidence): + score += 1 + evidence.append({"kind": "anycast_asn", "where": h, + "value": {"org": info.get("org"), + "matches_edge_provider": anycast_match}}) + elif consumer_region and info.get("country") and \ + info.get("country", "").upper() != consumer_region.upper(): + recs.append( + f"Cible {h} hébergée en {info.get('country')} alors que les " + f"consommateurs sont en {consumer_region.upper()} — envisager " + "un déploiement régional plus proche." + ) + elif consumer_region: + evidence.append({"kind": "consumer_region_declared", + "where": "cli", "value": consumer_region.upper()}) + recs.append( + "Activer --enable-geoip pour corréler la région des consommateurs " + f"({consumer_region.upper()}) avec la localisation IP de l'API." ) + # Cap score + score = min(score, max_pts) + matched = score >= max_pts + return { - "rule_id": "AR03", + "rule_id": "AR02", "score": score, "max_pts": max_pts, "matched": matched, "category": "architecture", - "candidates": [], - "evidence": evidence, - "recommendations": recommendations, - "duplicates": duplicates, + "candidates": candidates, + "evidence": evidence[:50], + "recommendations": recs, + "signal_kinds": sorted({e["kind"] for e in evidence}), } @@ -752,6 +959,293 @@ def evaluate_AR05(measurements: dict, base_urls: list[str], } +# ═══════════════════════════════════════════════════════════════════════════ +# Phase 2 — Source-dir & IaC scanner (stdlib-only) +# ═══════════════════════════════════════════════════════════════════════════ +# +# Walks an optional ``--source-dir`` once, returns ``(rel_path, content)`` +# tuples for every relevant build/IaC/deps file (capped to keep CI fast). +# Used by ``evaluate_AR04`` (auto-scaling/serverless) and ``evaluate_AR01`` +# (messaging-broker dependency cross-validation). + +_PHASE2_MAX_FILES = 2000 +_PHASE2_MAX_FILE_BYTES = 512 * 1024 # 512 KB per file +_PHASE2_SKIP_DIRS = { + "node_modules", "dist", "build", "target", "out", ".gradle", ".mvn", + "venv", ".venv", "__pycache__", "bin", "obj", ".terraform", + ".next", ".nuxt", "coverage", "vendor", +} +_PHASE2_RELEVANT_EXTS = { + ".yaml", ".yml", ".tf", ".tfvars", ".bicep", ".json", ".xml", + ".gradle", ".kts", ".csproj", ".fsproj", ".vbproj", ".props", + ".toml", ".txt", +} +_PHASE2_RELEVANT_NAMES = { + "Dockerfile", "Chart.yaml", "values.yaml", "values.yml", + "pom.xml", "build.gradle", "build.gradle.kts", "package.json", + "requirements.txt", "pyproject.toml", "Pipfile", + "serverless.yml", "serverless.yaml", "template.yaml", "template.yml", + "host.json", "function.json", "samconfig.toml", "azure.yaml", +} + +# AR04 — auto-scaling / serverless signal regexes +_AR04_HPA_RE = re.compile( + r"^\s*kind:\s*HorizontalPodAutoscaler\s*$", + re.MULTILINE | re.IGNORECASE) +_AR04_KEDA_RE = re.compile( + r"^\s*kind:\s*Scaled(Object|Job)\s*$", + re.MULTILINE | re.IGNORECASE) +_AR04_HELM_AUTOSCALE_RE = re.compile( + r"autoscaling[\s\S]{0,300}?enabled\s*:\s*true", + re.IGNORECASE) +_AR04_TF_AUTOSCALE_RE = re.compile( + r'resource\s+"(aws_autoscaling_group|aws_appautoscaling_target|' + r"azurerm_monitor_autoscale_setting|azurerm_container_app|" + r"google_compute_autoscaler|google_compute_region_autoscaler|" + r"kubernetes_horizontal_pod_autoscaler[_v0-9]*|" + r"aws_lambda_function|google_cloudfunctions2?_function|" + r'azurerm_function_app|azurerm_linux_function_app)"', + re.IGNORECASE) +_AR04_BICEP_AUTOSCALE_RE = re.compile( + r"(microsoft\.insights/autoscalesettings|" + r"autoscaleEnabled\s*[:=]\s*true|" + r"properties\.scale\.minReplicas|" + r"Microsoft\.Web/serverfarms[\s\S]{0,200}ElasticPremium)", + re.IGNORECASE) +_AR04_SERVERLESS_FILES = { + "serverless.yml", "serverless.yaml", "template.yaml", "template.yml", + "host.json", "function.json", "samconfig.toml", +} +_AR04_FAAS_DEPS_RE = re.compile( + r"(azure-functions-maven-plugin|aws-lambda-java-\w+|spring-cloud-function|" + r"Microsoft\.Azure\.Functions[\w\.]*|Amazon\.Lambda\.[\w\.]*|" + r'"serverless"\s*:|"aws-lambda"\s*:|@azure/functions|' + r"firebase-functions|google-cloud-functions-framework|" + r"chalice|zappa|aws-sam-cli)", + re.IGNORECASE) + +# AR01 — messaging/broker dependency regex (cross-validation only) +_AR01_BROKER_DEPS_RE = re.compile( + r"(spring-kafka|spring-cloud-stream|spring-rabbit|spring-amqp|" + r"activemq|pulsar-client|nats-streaming|" + r"kafkajs|amqplib|@nestjs/microservices|node-rdkafka|" + r'"mqtt"\s*:|"bull"\s*:|' + r"Confluent\.Kafka|RabbitMQ\.Client|MassTransit[\w\.]*|" + r"Azure\.Messaging\.(EventHubs|ServiceBus|EventGrid)|" + r"Amazon\.SimpleNotificationService|AWSSDK\.SQS|" + r"kafka-python|aiokafka|pika|celery|nats-py)", + re.IGNORECASE) + + +def _phase2_walk(source_dir: str) -> list[tuple[str, str]]: + """Return ``[(rel_path, content), ...]`` for every relevant file. + + Strict caps: ``_PHASE2_MAX_FILES`` files max, ``_PHASE2_MAX_FILE_BYTES`` + per file, hidden dirs and well-known build outputs skipped. + """ + base_p = Path(source_dir).expanduser().resolve() + if not base_p.is_dir(): + return [] + out: list[tuple[str, str]] = [] + count = 0 + for root, dirs, files in os.walk(base_p): + # prune in-place: skip hidden dirs + known build/dep outputs + dirs[:] = [d for d in dirs + if d not in _PHASE2_SKIP_DIRS and not d.startswith(".")] + for fn in files: + if count >= _PHASE2_MAX_FILES: + return out + ext = os.path.splitext(fn)[1].lower() + if (fn in _PHASE2_RELEVANT_NAMES + or ext in _PHASE2_RELEVANT_EXTS + or fn.endswith((".csproj", ".fsproj", ".vbproj"))): + fp = Path(root) / fn + try: + if fp.stat().st_size > _PHASE2_MAX_FILE_BYTES: + continue + text = fp.read_text(encoding="utf-8", errors="replace") + except Exception: + continue + try: + rel = str(fp.relative_to(base_p)) + except ValueError: + rel = str(fp) + out.append((rel, text)) + count += 1 + return out + + +def _scan_broker_deps(scanned: list[tuple[str, str]]) -> list[dict]: + """Return AR01 supplementary evidence: messaging-broker deps in build files.""" + if not scanned: + return [] + deps_files = ( + "pom.xml", "package.json", "build.gradle", "build.gradle.kts", + "requirements.txt", "pyproject.toml", "Pipfile", + ) + out: list[dict] = [] + seen: set[tuple[str, str]] = set() + for rel, text in scanned: + fn = os.path.basename(rel) + if not (fn in deps_files or fn.endswith((".csproj", ".fsproj", ".vbproj"))): + continue + for m in _AR01_BROKER_DEPS_RE.finditer(text): + tok = m.group(1).lower() + key = (rel, tok) + if key in seen: + continue + seen.add(key) + out.append({ + "kind": "deps", + "where": rel, + "value": f"Messaging/broker dep: {m.group(1)}", + }) + if len(out) >= 30: + return out + return out + + +# ═══════════════════════════════════════════════════════════════════════════ +# AR04 — Scalable infrastructure (Phase 2) +# ═══════════════════════════════════════════════════════════════════════════ + +def evaluate_AR04(source_dir: str | None, + scanned: list[tuple[str, str]] | None = None) -> dict: + """Detect auto-scaling / serverless markers from IaC + build files. + + Scoring (max 5 pts): + • ``score = 5`` when **≥ 2 distinct signal kinds** are found + (cross-validation, e.g. HPA + KEDA, or Terraform autoscale + FaaS deps) + • ``score = 3`` (60%) when exactly **1** signal kind is found + • ``score = 0`` otherwise (or no ``--source-dir`` provided) + + Signal kinds (cross-validation buckets): + ``hpa``, ``keda``, ``helm-autoscale``, ``terraform-autoscale``, + ``bicep-autoscale``, ``serverless-config``, ``faas-deps``. + """ + max_pts = ARCH_RULES["AR04_scalable_infra"]["max_pts"] + if not source_dir: + return { + "rule_id": "AR04", "score": 0, "max_pts": max_pts, + "matched": False, "category": "infrastructure", + "candidates": [], "evidence": [], + "recommendations": [ + "AR04 nécessite --source-dir pour scanner IaC (HPA, KEDA, " + "autoscale Terraform/Bicep) et marqueurs serverless " + "(pom.xml, *.csproj, package.json)." + ], + "signal_kinds": [], + } + + scanned = scanned if scanned is not None else _phase2_walk(source_dir) + if not scanned: + return { + "rule_id": "AR04", "score": 0, "max_pts": max_pts, + "matched": False, "category": "infrastructure", + "candidates": [], "evidence": [], + "recommendations": [ + f"--source-dir='{source_dir}' introuvable, vide ou aucun " + "fichier IaC/build pertinent détecté." + ], + "signal_kinds": [], + } + + evidence: list[dict] = [] + signals: set[str] = set() + + for rel, text in scanned: + fn = os.path.basename(rel) + is_yaml = rel.endswith((".yaml", ".yml")) + + # K8s HPA + if is_yaml and _AR04_HPA_RE.search(text): + signals.add("hpa") + evidence.append({"kind": "iac", "where": rel, + "value": "Kubernetes HorizontalPodAutoscaler"}) + # KEDA + if is_yaml and _AR04_KEDA_RE.search(text): + signals.add("keda") + evidence.append({"kind": "iac", "where": rel, + "value": "KEDA ScaledObject/ScaledJob"}) + # Helm values.yaml — autoscaling.enabled: true + if fn in ("values.yaml", "values.yml") and _AR04_HELM_AUTOSCALE_RE.search(text): + signals.add("helm-autoscale") + evidence.append({"kind": "iac", "where": rel, + "value": "Helm autoscaling.enabled=true"}) + # Terraform + if rel.endswith((".tf", ".tfvars")): + m = _AR04_TF_AUTOSCALE_RE.search(text) + if m: + signals.add("terraform-autoscale") + evidence.append({"kind": "iac", "where": rel, + "value": f"Terraform: {m.group(1)}"}) + # Bicep / ARM JSON templates + if rel.endswith(".bicep") or (rel.endswith(".json") and "Microsoft." in text): + if _AR04_BICEP_AUTOSCALE_RE.search(text): + signals.add("bicep-autoscale") + evidence.append({"kind": "iac", "where": rel, + "value": "Bicep/ARM autoscale settings"}) + # Serverless framework / SAM / Azure Functions / GCF config files + if fn in _AR04_SERVERLESS_FILES: + signals.add("serverless-config") + evidence.append({"kind": "iac", "where": rel, + "value": f"Serverless config: {fn}"}) + # FaaS deps in build files + if (fn in ("pom.xml", "package.json", "build.gradle", + "build.gradle.kts", "requirements.txt", "pyproject.toml") + or fn.endswith((".csproj", ".fsproj", ".vbproj"))): + m = _AR04_FAAS_DEPS_RE.search(text) + if m: + signals.add("faas-deps") + evidence.append({"kind": "deps", "where": rel, + "value": f"Serverless/FaaS dep: {m.group(1)}"}) + + n = len(signals) + matched = n >= 1 + if n >= 2: + score = max_pts # 5/5 — cross-validated + elif n == 1: + score = round(max_pts * 0.6) # 3/5 — partial + else: + score = 0 + + if n >= 2: + recs = [ + f"Auto-scaling/serverless validé ({n} types de signaux): " + f"{', '.join(sorted(signals))}.", + ] + elif n == 1: + recs = [ + f"Signal partiel ({next(iter(signals))}). Pour valider 5/5, " + "ajouter un second signal indépendant (ex. HPA + KEDA, " + "Terraform autoscale + FaaS deps)." + ] + else: + recs = [ + "Aucun signal d'auto-scaling détecté dans les fichiers IaC/build.", + "Activer HPA/KEDA (Kubernetes), autoscale Terraform/Bicep, " + "ou déployer en serverless (Azure Functions, AWS Lambda, Cloud Run).", + ] + + candidates = [ + {"method": "IaC", "path": ev["where"], "matched": True, "reason": ev["value"]} + for ev in evidence[:10] + ] + + return { + "rule_id": "AR04", + "score": int(score), + "max_pts": max_pts, + "matched": matched, + "category": "infrastructure", + "candidates": candidates, + "evidence": evidence[:50], + "recommendations": recs, + "signal_kinds": sorted(signals), + "scanned_files": len(scanned), + } + + # ═══════════════════════════════════════════════════════════════════════════ # Public entry point used by the analyzer # ═══════════════════════════════════════════════════════════════════════════ @@ -768,6 +1262,8 @@ def evaluate_architecture_rules( footprint_confirmed: bool = False, enable_phase2: bool = False, # source-dir/IaC/deps scan — wired in P2 source_dir: str | None = None, + consumer_region: str = "", # AR02 Phase 3 + enable_geoip: bool = False, # AR02 Phase 3 ) -> dict[str, dict]: """Run every Architecture/Infrastructure rule and return: @@ -781,43 +1277,81 @@ def evaluate_architecture_rules( base_urls = [b for (b, _spec, _src) in (sources or []) if b] specs_per_target = [(b, sp) for (b, sp, _src) in (sources or []) if sp] - out: dict[str, dict] = {} + # Phase 2 — single source-dir walk shared by AR01 (broker deps) and AR04 + scanned_p2: list[tuple[str, str]] = [] + if enable_phase2 and source_dir: + try: + scanned_p2 = _phase2_walk(source_dir) + except Exception: + scanned_p2 = [] - out["AR01_event_driven"] = evaluate_AR01(spec, base_urls, endpoints, measurements) + out: dict[str, dict] = {} - # AR02 — Phase 3 placeholder (factual signals will land in a later patch) - out["AR02_runtime_close"] = { - "rule_id": "AR02", - "score": 0, - "max_pts": ARCH_RULES["AR02_runtime_close"]["max_pts"], - "matched": False, - "category": "architecture", - "candidates": [], - "evidence": [], - "recommendations": [ - "AR02 sera évalué via headers edge/CDN, ASN anycast et latence TLS " - "(activable avec --enable-geoip --consumer-region ) — " - "implémentation Phase 3." - ], - } + # ── AR01 — Event-Driven (spec/runtime + optional broker-deps evidence) ── + ar01 = evaluate_AR01(spec, base_urls, endpoints, measurements) + if scanned_p2: + broker_ev = _scan_broker_deps(scanned_p2) + if broker_ev: + ar01.setdefault("evidence", []).extend(broker_ev) + if not ar01.get("matched"): + ar01.setdefault("recommendations", []).append( + f"{len(broker_ev)} dépendance(s) de broker détectée(s) dans le " + "source-dir mais aucun signal AsyncAPI/callbacks/SSE dans la " + "spec → documentez vos flux d'événements pour valider AR01." + ) + else: + ar01.setdefault("recommendations", []).append( + f"Cross-validation: {len(broker_ev)} dépendance(s) broker " + "trouvée(s) dans le code source — cohérent avec les signaux EDA." + ) + out["AR01_event_driven"] = ar01 + + # ── AR02 — Runtime close to consumer (Phase 3) ── + try: + out["AR02_runtime_close"] = evaluate_AR02( + spec, base_urls, + measurements, + consumer_region=consumer_region or "", + enable_geoip=bool(enable_geoip), + ) + except Exception as e: # noqa: BLE001 + out["AR02_runtime_close"] = { + "rule_id": "AR02", + "score": 0, + "max_pts": ARCH_RULES["AR02_runtime_close"]["max_pts"], + "matched": False, + "category": "architecture", + "candidates": [], + "evidence": [{"kind": "error", "where": "evaluate_AR02", + "value": str(e)}], + "recommendations": [ + "Évaluation AR02 en erreur — vérifier la connectivité réseau " + "et les flags --enable-geoip / --consumer-region." + ], + } out["AR03_unique_api"] = evaluate_AR03(specs_per_target, thresholds) - # AR04 — Phase 2 placeholder (needs IaC + deps scan) - out["AR04_scalable_infra"] = { - "rule_id": "AR04", - "score": 0, - "max_pts": ARCH_RULES["AR04_scalable_infra"]["max_pts"], - "matched": False, - "category": "infrastructure", - "candidates": [], - "evidence": [], - "recommendations": [ - "AR04 sera évalué via scan IaC (HPA, KEDA, autoscale Terraform/Bicep) " - "et marqueurs serverless dans pom.xml/*.csproj/package.json — " - "implémentation Phase 2 (--source-dir requis)." - ], - } + # ── AR04 — Scalable infrastructure (Phase 2: IaC + serverless deps) ── + if enable_phase2 and source_dir: + out["AR04_scalable_infra"] = evaluate_AR04(source_dir, scanned=scanned_p2) + else: + out["AR04_scalable_infra"] = { + "rule_id": "AR04", + "score": 0, + "max_pts": ARCH_RULES["AR04_scalable_infra"]["max_pts"], + "matched": False, + "category": "infrastructure", + "candidates": [], + "evidence": [], + "recommendations": [ + "AR04 sera évalué via scan IaC (HPA, KEDA, autoscale " + "Terraform/Bicep) et marqueurs serverless dans " + "pom.xml/*.csproj/package.json — passer --source-dir " + " pour activer le scan Phase 2." + ], + "signal_kinds": [], + } out["AR05_cloud_footprint"] = evaluate_AR05( measurements, base_urls, auth_headers, diff --git a/scripts/green-api-auto-discover.py b/scripts/green-api-auto-discover.py index 33e8220..4783416 100644 --- a/scripts/green-api-auto-discover.py +++ b/scripts/green-api-auto-discover.py @@ -2139,6 +2139,8 @@ def _split_csv(items): footprint_confirmed=bool(args.cloud_footprint_confirmed), enable_phase2=bool(args.source_dir), source_dir=args.source_dir or None, + consumer_region=(args.consumer_region or "").strip(), + enable_geoip=bool(args.enable_geoip), ) except Exception as e: log(f" Architecture rules evaluation failed: {e}", "WARN") diff --git a/scripts/green-score-analyzer_withdiscovery.sh b/scripts/green-score-analyzer_withdiscovery.sh index 28abd98..621582a 100644 --- a/scripts/green-score-analyzer_withdiscovery.sh +++ b/scripts/green-score-analyzer_withdiscovery.sh @@ -187,6 +187,23 @@ if [ -n "$BEARER_TOKEN" ]; then CMD+=(--bearer "$BEARER_TOKEN") fi +# AR02 / AR04 / AR05 forwarding (driven by start.sh exports or CI env). +# SOURCE_DIR → Phase 2 (AR04 IaC + AR01 broker-deps) +# CONSUMER_REGION + ENABLE_GEOIP → AR02 Phase 3 (anycast / distance) +# CLOUD_FOOTPRINT_CONFIRMED → AR05 dashboard validation +if [ -n "${SOURCE_DIR:-}" ]; then + CMD+=(--source-dir "$SOURCE_DIR") +fi +if [ -n "${CONSUMER_REGION:-}" ]; then + CMD+=(--consumer-region "$CONSUMER_REGION") +fi +if [ "${ENABLE_GEOIP:-false}" = "true" ]; then + CMD+=(--enable-geoip) +fi +if [ "${CLOUD_FOOTPRINT_CONFIRMED:-false}" = "true" ]; then + CMD+=(--cloud-footprint-confirmed) +fi + if [ "$SKIP_SPECTRAL" = true ]; then CMD+=(--skip-spectral) fi diff --git a/scripts/greenapianalyzer-server.py b/scripts/greenapianalyzer-server.py index 7e0729b..7a6200f 100755 --- a/scripts/greenapianalyzer-server.py +++ b/scripts/greenapianalyzer-server.py @@ -743,6 +743,10 @@ def _handle_local_analyze(self, payload): stack = (payload.get("stack") or "auto").strip().lower() source_dir = (payload.get("sourceDir") or "").strip() build_and_run = bool(payload.get("buildAndRun")) + # Architecture rules (AR02 Phase 3 / AR05) — optional from UI + consumer_region = (payload.get("consumerRegion") or "").strip() + enable_geoip = bool(payload.get("enableGeoip")) + cloud_footprint_confirmed = bool(payload.get("cloudFootprintConfirmed")) if stack not in ("auto", "java", "dotnet"): return self._send_json(400, {"error": f"invalid stack: {stack!r} (expected auto|java|dotnet)"}) @@ -792,6 +796,13 @@ def _handle_local_analyze(self, payload): cmd += ["--source-dir", source_dir] if build_and_run: cmd.append("--build-and-run") + # AR02 / AR05 forwarding + if consumer_region: + cmd += ["--consumer-region", consumer_region] + if enable_geoip: + cmd.append("--enable-geoip") + if cloud_footprint_confirmed: + cmd.append("--cloud-footprint-confirmed") REPORTS.mkdir(exist_ok=True) latest = REPORTS / "latest-report.json" diff --git a/scripts/start.sh b/scripts/start.sh index 4f5e347..603afef 100644 --- a/scripts/start.sh +++ b/scripts/start.sh @@ -23,6 +23,13 @@ # Repeat or use --swaggers . # --swaggers Comma-separated list of OpenAPI specs. # +# --consumer-region ISO-3166 alpha-2 region of API consumers +# (e.g. FR, US). Drives AR02 distance scoring. +# --enable-geoip Enable optional GeoIP lookup (ipinfo.io) for +# AR02 anycast/ASN cross-validation. +# --cloud-footprint-confirmed Confirm that the cloud provider's carbon +# dashboard is actively used (validates AR05). +# # All discovered swaggers are merged into a single discovery resource and # analyzed in one run. # @@ -51,6 +58,11 @@ BUILD_AND_RUN=false # if true: build + start the API locally before health-che APP_PID="" # PID of the locally-launched app (when --build-and-run) APP_LOG="" # log file of the locally-launched app +# AR02 Phase 3 / AR04 Phase 2 controls (forwarded to green-api-auto-discover.py) +CONSUMER_REGION="${CONSUMER_REGION:-}" # ISO-3166 alpha-2 (e.g. FR, US) — AR02 +ENABLE_GEOIP="${ENABLE_GEOIP:-false}" # true → ipinfo.io lookup for AR02 +CLOUD_FOOTPRINT_CONFIRMED="${CLOUD_FOOTPRINT_CONFIRMED:-false}" # AR05 confirmation + # Git checkout (when --git-repo is provided): we clone the repo here in start.sh # so the same working copy can drive --build-and-run AND be analyzed by Creedengo # (no double clone, no local source needed on the user's machine). @@ -111,6 +123,19 @@ while [ $i -lt ${#args[@]} ]; do --source-dir=*) SOURCE_DIR="${args[$i]#--source-dir=}" ;; + --consumer-region) + i=$((i + 1)) + CONSUMER_REGION="${args[$i]:-}" + ;; + --consumer-region=*) + CONSUMER_REGION="${args[$i]#--consumer-region=}" + ;; + --enable-geoip) + ENABLE_GEOIP=true + ;; + --cloud-footprint-confirmed) + CLOUD_FOOTPRINT_CONFIRMED=true + ;; --build-and-run) BUILD_AND_RUN=true ;; @@ -1093,6 +1118,28 @@ if [ ${#SWAGGERS[@]} -gt 0 ]; then export SWAGGER_URL="$SWAGGER_URL_JOINED" echo "📜 Swaggers fournis (${#SWAGGERS[@]}): $SWAGGER_URL" fi + +# ── AR02 / AR04 / AR05 propagation ── +# SOURCE_DIR is consumed by the wrapper to enable Phase 2 (AR04 IaC scan +# + AR01 broker-deps cross-validation) and is auto-derived from --git-repo +# clones. CONSUMER_REGION / ENABLE_GEOIP drive AR02 Phase 3 (anycast + +# distance-aware TLS latency). CLOUD_FOOTPRINT_CONFIRMED validates AR05. +if [ -n "${SOURCE_DIR:-}" ]; then + export SOURCE_DIR + echo "📂 Source dir (AR04/AR01 deps scan): $SOURCE_DIR" +fi +if [ -n "${CONSUMER_REGION:-}" ]; then + export CONSUMER_REGION + echo "🌍 Région consommateur (AR02): $CONSUMER_REGION" +fi +if [ "${ENABLE_GEOIP:-false}" = "true" ]; then + export ENABLE_GEOIP=true + echo "🛰️ GeoIP activé (AR02 anycast/ASN cross-check via ipinfo.io)" +fi +if [ "${CLOUD_FOOTPRINT_CONFIRMED:-false}" = "true" ]; then + export CLOUD_FOOTPRINT_CONFIRMED=true + echo "✅ Cloud footprint dashboard confirmé (AR05)" +fi echo "" echo "Running Green Score analyzer..." From 80832f5d11a3688c012811070e7207949e2c2f17 Mon Sep 17 00:00:00 2001 From: Thierno DIALLO Date: Wed, 29 Apr 2026 22:52:45 +0200 Subject: [PATCH 3/7] update architecture and infra rules --- badges/green-score.svg | 6 +- dashboard/index.html | 69 ++- dashboard/index.md | 49 +- dashboard/index.save.html | 22 +- dashboard/interactive.html | 44 +- reports/analysis/latest-summary.json | 516 ++++++++++++++++---- reports/analysis/rule-resource-mapping.json | 337 ++++++++++++- reports/creedengo-report.json | 242 +++++---- reports/discovered-openapi.json | 2 +- reports/interactive-local-config.json | 17 +- reports/last-report.json | 510 +++++++++++++++---- reports/latest-report.json | 510 +++++++++++++++---- scripts/architecture_rules.py | 161 +++++- scripts/green-api-auto-discover.py | 29 +- 14 files changed, 1993 insertions(+), 521 deletions(-) diff --git a/badges/green-score.svg b/badges/green-score.svg index bc30f72..7f5986a 100644 --- a/badges/green-score.svg +++ b/badges/green-score.svg @@ -1,5 +1,5 @@ - - Green Score: 34/100 (D) + + Green Score: 39/100 (D) @@ -9,6 +9,6 @@ Green Score - 34/100 (D) + 39/100 (D) diff --git a/dashboard/index.html b/dashboard/index.html index 4e6951c..c270c2b 100644 --- a/dashboard/index.html +++ b/dashboard/index.html @@ -1189,8 +1189,8 @@

🎛️ Actions