From 0eb62dc2e9047da1f125df6929cd9bf9b761a23a Mon Sep 17 00:00:00 2001 From: Felipe Chaves Date: Sun, 10 May 2026 19:18:00 -0300 Subject: [PATCH 1/4] fix: stabilize local model review flow --- Makefile | 4 ++- adlint/policy.py | 2 +- adlint/rules/engine.py | 2 +- adlint/static/app.js | 56 ++++++++++++++++++++++++++++++++++------ adlint/static/index.html | 20 +++++++------- tests/test_api.py | 2 +- tests/test_policy.py | 16 ++++++++++++ tests/test_ui_static.py | 31 +++++++++++++++++----- 8 files changed, 105 insertions(+), 28 deletions(-) diff --git a/Makefile b/Makefile index 5c4ebc2..33bb361 100644 --- a/Makefile +++ b/Makefile @@ -3,6 +3,8 @@ VENV := .venv BIN := $(VENV)/bin STAMP := $(VENV)/.installed MODEL_EVAL_FLAGS ?= --ollama-model gpt-oss-safeguard:20b +ADLINT_OLLAMA_TIMEOUT ?= 180 +ADLINT_OLLAMA_NUM_PREDICT ?= 512 .PHONY: api dev scan eval benchmark benchmark-data policy-coverage policy-coverage-validate rewrite-quality model-benchmark model-smoke model-usefulness pr-preflight real-cases real-cases-ci real-cases-hybrid real-cases-model-quality real-cases-validate real-world-blind-candidates real-world-blind-ci real-world-blind-validate real-world-blind real-world-blind-model-quality research-summary test install @@ -17,7 +19,7 @@ dev: $(STAMP) $(BIN)/python -m adlint scan examples/high_risk_tiktok_health.json --output-dir reports api: $(STAMP) - $(BIN)/uvicorn adlint.api:app --reload + ADLINT_OLLAMA_TIMEOUT=$(ADLINT_OLLAMA_TIMEOUT) ADLINT_OLLAMA_NUM_PREDICT=$(ADLINT_OLLAMA_NUM_PREDICT) $(BIN)/uvicorn adlint.api:app --reload scan: $(STAMP) $(BIN)/python -m adlint scan examples/needs_review_google_wellness.json diff --git a/adlint/policy.py b/adlint/policy.py index 12d38a7..2c98bcf 100644 --- a/adlint/policy.py +++ b/adlint/policy.py @@ -49,7 +49,7 @@ def filter_policies(policies: Iterable[Policy], submission: Submission) -> list[ for policy in policies: if policy.modules and not enabled_modules.intersection(policy.modules): continue - if policy.platforms and submission.platform not in policy.platforms: + if policy.platforms and submission.platform != "all" and submission.platform not in policy.platforms: continue if policy.industries and submission.industry not in policy.industries: continue diff --git a/adlint/rules/engine.py b/adlint/rules/engine.py index ae73e4f..0c139c2 100644 --- a/adlint/rules/engine.py +++ b/adlint/rules/engine.py @@ -151,7 +151,7 @@ def _derived_linkedin_professional_claim_hits( policies: list[Policy], existing_hits: list[PolicyHit], ) -> list[PolicyHit]: - if submission.platform != "linkedin": + if submission.platform not in {"linkedin", "all"}: return [] if any(hit.policy_id == "linkedin_professional_claim_review" for hit in existing_hits): return [] diff --git a/adlint/static/app.js b/adlint/static/app.js index eb19773..9ae57d2 100644 --- a/adlint/static/app.js +++ b/adlint/static/app.js @@ -13,9 +13,17 @@ const exportMarkdownButton = document.querySelector("#export-markdown"); const modelEnabledInput = document.querySelector("#model_enabled"); const modelAffectsScoreInput = document.querySelector("#model_affects_score"); const ollamaModelInput = document.querySelector("#ollama_model"); -const ollamaModelOptions = document.querySelector("#ollama-model-options"); const DEFAULT_OLLAMA_MODEL = "gpt-oss-safeguard:20b"; +const FALLBACK_OLLAMA_MODELS = [ + DEFAULT_OLLAMA_MODEL, + "gpt-oss:20b", + "qwen3-coder:30b", + "qwen3.5:35b-a3b", + "gemma4:26b", +]; +const RULE_ONLY_TIMEOUT_MS = 30000; +const LOCAL_MODEL_TIMEOUT_MS = 210000; const MODEL_STATUSES = ["disabled", "unavailable", "invalid_response", "ok"]; const ANALYSIS_STEPS = [ ["intake", "Input normalized", "Copy, campaign context, modules, and optional landing inputs are prepared for review."], @@ -44,11 +52,11 @@ form.addEventListener("submit", async (event) => { setSubmitting(true); try { - const response = await fetch("/analyze", { + const response = await fetchWithTimeout("/analyze", { method: "POST", headers: { "content-type": "application/json" }, body: JSON.stringify(payload), - }); + }, requestTimeoutMs(payload)); if (!response.ok) { const detail = await response.text(); @@ -76,6 +84,26 @@ form.addEventListener("submit", async (event) => { } }); +async function fetchWithTimeout(url, options, timeoutMs) { + const controller = new AbortController(); + const timer = window.setTimeout(() => controller.abort(), timeoutMs); + try { + return await fetch(url, { ...options, signal: controller.signal }); + } catch (error) { + if (error && error.name === "AbortError") { + const seconds = Math.round(timeoutMs / 1000); + throw new Error(`Review timed out after ${seconds}s. Try a smaller local model or run again after the model has warmed up.`); + } + throw error; + } finally { + window.clearTimeout(timer); + } +} + +function requestTimeoutMs(payload) { + return payload.model_enabled ? LOCAL_MODEL_TIMEOUT_MS : RULE_ONLY_TIMEOUT_MS; +} + form.addEventListener( "invalid", () => { @@ -137,19 +165,19 @@ async function discoverModels() { const payload = await response.json(); const models = normalizeModelList(payload); const defaultModel = modelName(payload?.default_model) || DEFAULT_OLLAMA_MODEL; - populateModelOptions(models.length > 0 ? [defaultModel, ...models] : [defaultModel]); + populateModelOptions([defaultModel, ...models, ...FALLBACK_OLLAMA_MODELS]); if (!ollamaModelInput.value.trim() || ollamaModelInput.value === DEFAULT_OLLAMA_MODEL) { ollamaModelInput.value = defaultModel; } } catch { - populateModelOptions([DEFAULT_OLLAMA_MODEL]); + populateModelOptions(FALLBACK_OLLAMA_MODELS); if (!ollamaModelInput.value.trim()) ollamaModelInput.value = DEFAULT_OLLAMA_MODEL; } } function normalizeModelList(payload) { const source = Array.isArray(payload) ? payload : payload && Array.isArray(payload.models) ? payload.models : []; - return [...new Set(source.map(modelName).filter(Boolean))]; + return [...new Set(source.map(modelName).filter(isReviewModelOption))]; } function modelName(item) { @@ -160,14 +188,26 @@ function modelName(item) { return ""; } +function isReviewModelOption(value) { + if (!value) return false; + const normalized = value.toLowerCase(); + return !normalized.includes("embed") && !normalized.startsWith("bge-"); +} + function populateModelOptions(models) { const values = uniqueModelOptions(models); - ollamaModelOptions.innerHTML = values.map((model) => ``).join(""); + const currentValue = ollamaModelInput.value.trim(); + ollamaModelInput.innerHTML = values.map((model) => ``).join(""); + if (currentValue && values.includes(currentValue)) { + ollamaModelInput.value = currentValue; + } else { + ollamaModelInput.value = values.includes(DEFAULT_OLLAMA_MODEL) ? DEFAULT_OLLAMA_MODEL : values[0] || ""; + } } function uniqueModelOptions(models) { const values = []; - for (const model of [...models, DEFAULT_OLLAMA_MODEL]) { + for (const model of [...models, ...FALLBACK_OLLAMA_MODELS]) { const value = modelName(model); if (value && !values.includes(value)) values.push(value); } diff --git a/adlint/static/index.html b/adlint/static/index.html index dc058b5..1facde3 100644 --- a/adlint/static/index.html +++ b/adlint/static/index.html @@ -49,7 +49,8 @@

Draft ad

- - -
diff --git a/tests/test_api.py b/tests/test_api.py index fe4f5b8..1cdeee9 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -201,7 +201,7 @@ def test_ui_assets_are_served() -> None: assert js_response.status_code == 200 assert css_response.status_code == 200 - assert 'fetch("/analyze"' in js_response.text + assert 'fetchWithTimeout("/analyze"' in js_response.text assert "logging_enabled: true" not in js_response.text assert ".result-panel" in css_response.text diff --git a/tests/test_policy.py b/tests/test_policy.py index 7ef7cba..3d3f3b6 100644 --- a/tests/test_policy.py +++ b/tests/test_policy.py @@ -74,6 +74,22 @@ def test_filter_policies_applies_platform_and_industry_filters(tmp_path) -> None assert filter_policies(policies, wrong_industry) == [] +def test_filter_policies_all_platform_includes_platform_scoped_policies(tmp_path) -> None: + policy_path = tmp_path / "custom.yml" + policy_path.write_text(CUSTOM_POLICY, encoding="utf-8") + policies = load_policies([policy_path]) + all_platforms = Submission.from_dict( + { + "platform": "all", + "industry": "health", + "headline": "Clinical guarantee", + "policy_modules": ["health_claims"], + } + ) + + assert [policy.id for policy in filter_policies(policies, all_platforms)] == ["custom_health_claim"] + + def test_bundled_meta_ads_policy_module_is_platform_scoped() -> None: meta_policy_ids = { "meta_personal_attributes_health", diff --git a/tests/test_ui_static.py b/tests/test_ui_static.py index d991b13..9425c1f 100644 --- a/tests/test_ui_static.py +++ b/tests/test_ui_static.py @@ -18,9 +18,11 @@ def test_local_model_controls_are_present_and_default_off() -> None: assert 'name="model_affects_score"' in INDEX_HTML assert 'id="ollama_model"' in INDEX_HTML assert 'name="ollama_model"' in INDEX_HTML - assert 'list="ollama-model-options"' in INDEX_HTML - assert 'value="gpt-oss-safeguard:20b"' in INDEX_HTML - assert 'id="ollama-model-options"' in INDEX_HTML + assert '' in INDEX_HTML + assert '' in INDEX_HTML + assert '' in INDEX_HTML + assert '' in INDEX_HTML + assert '' in INDEX_HTML def test_copy_fields_are_required_so_placeholders_do_not_submit() -> None: @@ -42,12 +44,16 @@ def test_page_starts_with_glp1_sample_context() -> None: def test_model_discovery_fetches_models_and_keeps_fallback_option() -> None: assert 'const DEFAULT_OLLAMA_MODEL = "gpt-oss-safeguard:20b";' in APP_JS + assert "const FALLBACK_OLLAMA_MODELS = [" in APP_JS assert 'fetch("/models")' in APP_JS assert "normalizeModelList(payload)" in APP_JS assert "modelName(payload?.default_model)" in APP_JS - assert "populateModelOptions([DEFAULT_OLLAMA_MODEL])" in APP_JS + assert "populateModelOptions(FALLBACK_OLLAMA_MODELS)" in APP_JS + assert "function isReviewModelOption(value)" in APP_JS + assert '!normalized.includes("embed")' in APP_JS + assert '!normalized.startsWith("bge-")' in APP_JS assert "function uniqueModelOptions(models)" in APP_JS - assert "for (const model of [...models, DEFAULT_OLLAMA_MODEL])" in APP_JS + assert "for (const model of [...models, ...FALLBACK_OLLAMA_MODELS])" in APP_JS assert "if (value && !values.includes(value)) values.push(value)" in APP_JS @@ -68,7 +74,18 @@ def test_analyze_payload_includes_model_keys_when_enabled() -> None: assert "if (modelEnabled)" in APP_JS assert "payload.ollama_model" in APP_JS assert "payload.model_affects_score" in APP_JS - assert 'fetch("/analyze"' in APP_JS + assert 'fetchWithTimeout("/analyze"' in APP_JS + + +def test_analyze_fetch_has_timeout_recovery_for_stuck_model_runs() -> None: + assert "const RULE_ONLY_TIMEOUT_MS = 30000;" in APP_JS + assert "const LOCAL_MODEL_TIMEOUT_MS = 210000;" in APP_JS + assert "fetchWithTimeout(\"/analyze\"" in APP_JS + assert "new AbortController()" in APP_JS + assert "controller.abort()" in APP_JS + assert "requestTimeoutMs(payload)" in APP_JS + assert "Review timed out after" in APP_JS + assert "Try a smaller local model" in APP_JS def test_results_and_markdown_expose_model_status() -> None: @@ -163,4 +180,6 @@ def test_geist_style_system_font_and_restrained_surfaces_are_preserved() -> None def test_platform_select_includes_meta_ads() -> None: + assert '' in INDEX_HTML + assert '' in INDEX_HTML assert '' in INDEX_HTML From 9f2206331384bd547fd69b2e2a973164e2c07cb7 Mon Sep 17 00:00:00 2001 From: Felipe Chaves Date: Sun, 10 May 2026 19:30:07 -0300 Subject: [PATCH 2/4] fix: support thinking local model outputs --- Makefile | 2 +- adlint/classifiers/ollama.py | 19 +++++++++++++++++++ tests/test_ollama.py | 30 ++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 33bb361..98c9643 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ BIN := $(VENV)/bin STAMP := $(VENV)/.installed MODEL_EVAL_FLAGS ?= --ollama-model gpt-oss-safeguard:20b ADLINT_OLLAMA_TIMEOUT ?= 180 -ADLINT_OLLAMA_NUM_PREDICT ?= 512 +ADLINT_OLLAMA_NUM_PREDICT ?= 1024 .PHONY: api dev scan eval benchmark benchmark-data policy-coverage policy-coverage-validate rewrite-quality model-benchmark model-smoke model-usefulness pr-preflight real-cases real-cases-ci real-cases-hybrid real-cases-model-quality real-cases-validate real-world-blind-candidates real-world-blind-ci real-world-blind-validate real-world-blind real-world-blind-model-quality research-summary test install diff --git a/adlint/classifiers/ollama.py b/adlint/classifiers/ollama.py index e83197a..7673c27 100644 --- a/adlint/classifiers/ollama.py +++ b/adlint/classifiers/ollama.py @@ -148,6 +148,7 @@ def _generation_payload(endpoint: str, model: str, prompt: str) -> dict[str, Any "model": model, "stream": False, "format": "json", + "think": False, "options": options, } if urllib.parse.urlparse(endpoint).path.endswith("/api/generate"): @@ -280,6 +281,7 @@ def _clip(value: str, *, max_chars: int) -> str: def _parse_model_response(response_text: str) -> tuple[dict[str, Any], bool, str | None]: + response_text = _json_response_candidate(response_text) try: parsed = json.loads(response_text) except json.JSONDecodeError: @@ -299,6 +301,23 @@ def _parse_model_response(response_text: str) -> tuple[dict[str, Any], bool, str return parsed, True, None +def _json_response_candidate(response_text: str) -> str: + text = response_text.strip() + if text.startswith("```"): + lines = text.splitlines() + if lines: + lines = lines[1:] + if lines and lines[-1].strip() == "```": + lines = lines[:-1] + text = "\n".join(lines).strip() + + start = text.find("{") + end = text.rfind("}") + if start != -1 and end != -1 and start < end: + return text[start : end + 1] + return text + + def _is_string_list(value: Any) -> bool: return isinstance(value, list) and all(isinstance(item, str) for item in value) diff --git a/tests/test_ollama.py b/tests/test_ollama.py index 6f71f34..2c31b96 100644 --- a/tests/test_ollama.py +++ b/tests/test_ollama.py @@ -98,6 +98,7 @@ def fake_urlopen(request, timeout): assert seen["payload"]["model"] == "llama3.2:latest" assert seen["payload"]["stream"] is False assert seen["payload"]["format"] == "json" + assert seen["payload"]["think"] is False assert seen["payload"]["options"] == {"temperature": 0} assert seen["payload"]["messages"][0]["role"] == "user" @@ -218,6 +219,35 @@ def fake_urlopen(request, timeout): assert "valid JSON" in info["validation_error"] +def test_classify_with_ollama_accepts_fenced_json_response(monkeypatch) -> None: + def fake_urlopen(request, timeout): + return FakeResponse( + { + "message": { + "content": """```json +{ + "decision": "needs_review", + "categories": ["platform"], + "evidence": ["review claim"], + "recommended_action": "Route for platform review." +} +```""" + } + } + ) + + monkeypatch.setattr("urllib.request.urlopen", fake_urlopen) + + hits, info = classify_with_ollama( + Submission(platform="google", country="US", industry="general"), + endpoint="http://localhost:11434/api/chat", + ) + + assert info["status"] == "ok" + assert info["raw_decision"] == "needs_review" + assert [hit.policy_id for hit in hits] == ["model_policy_review"] + + def test_classify_with_ollama_rejects_unknown_decision_without_hits(monkeypatch) -> None: def fake_urlopen(request, timeout): return FakeResponse({"message": {"content": '{"decision": "banana", "evidence": []}'}}) From 75309a285ec43f6b588188d4c9afd3686e7c28ba Mon Sep 17 00:00:00 2001 From: Felipe Chaves Date: Sun, 10 May 2026 19:46:30 -0300 Subject: [PATCH 3/4] fix: prevent dev server reload churn --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 98c9643..6d3202b 100644 --- a/Makefile +++ b/Makefile @@ -19,7 +19,7 @@ dev: $(STAMP) $(BIN)/python -m adlint scan examples/high_risk_tiktok_health.json --output-dir reports api: $(STAMP) - ADLINT_OLLAMA_TIMEOUT=$(ADLINT_OLLAMA_TIMEOUT) ADLINT_OLLAMA_NUM_PREDICT=$(ADLINT_OLLAMA_NUM_PREDICT) $(BIN)/uvicorn adlint.api:app --reload + ADLINT_OLLAMA_TIMEOUT=$(ADLINT_OLLAMA_TIMEOUT) ADLINT_OLLAMA_NUM_PREDICT=$(ADLINT_OLLAMA_NUM_PREDICT) $(BIN)/uvicorn adlint.api:app --reload --reload-dir adlint scan: $(STAMP) $(BIN)/python -m adlint scan examples/needs_review_google_wellness.json From 13fe8d58d91e048eecba4cd8774411c53ec08913 Mon Sep 17 00:00:00 2001 From: Felipe Chaves Date: Sun, 10 May 2026 19:59:52 -0300 Subject: [PATCH 4/4] refactor: tighten local model review code paths - Promote "all" platform sentinel to ALL_PLATFORMS constant in models. - Promote embedding-model exclusion list to EMBEDDING_MODEL_MARKERS. - Drop unreachable branch in fenced-JSON candidate extractor. - Cache escapeHtml output per option in populateModelOptions. Co-Authored-By: Claude Opus 4.7 (1M context) --- adlint/classifiers/ollama.py | 4 +--- adlint/models.py | 2 ++ adlint/policy.py | 4 ++-- adlint/rules/engine.py | 4 ++-- adlint/static/app.js | 10 ++++++++-- tests/test_ui_static.py | 4 ++-- 6 files changed, 17 insertions(+), 11 deletions(-) diff --git a/adlint/classifiers/ollama.py b/adlint/classifiers/ollama.py index 7673c27..8074ac2 100644 --- a/adlint/classifiers/ollama.py +++ b/adlint/classifiers/ollama.py @@ -304,9 +304,7 @@ def _parse_model_response(response_text: str) -> tuple[dict[str, Any], bool, str def _json_response_candidate(response_text: str) -> str: text = response_text.strip() if text.startswith("```"): - lines = text.splitlines() - if lines: - lines = lines[1:] + lines = text.splitlines()[1:] if lines and lines[-1].strip() == "```": lines = lines[:-1] text = "\n".join(lines).strip() diff --git a/adlint/models.py b/adlint/models.py index af7922c..e5ce701 100644 --- a/adlint/models.py +++ b/adlint/models.py @@ -7,6 +7,8 @@ Decision = str Severity = str +ALL_PLATFORMS = "all" + @dataclass(frozen=True) class Evidence: diff --git a/adlint/policy.py b/adlint/policy.py index 2c98bcf..6049711 100644 --- a/adlint/policy.py +++ b/adlint/policy.py @@ -6,7 +6,7 @@ import yaml -from adlint.models import Policy, Submission +from adlint.models import ALL_PLATFORMS, Policy, Submission DEFAULT_MODULES = ( @@ -49,7 +49,7 @@ def filter_policies(policies: Iterable[Policy], submission: Submission) -> list[ for policy in policies: if policy.modules and not enabled_modules.intersection(policy.modules): continue - if policy.platforms and submission.platform != "all" and submission.platform not in policy.platforms: + if policy.platforms and submission.platform != ALL_PLATFORMS and submission.platform not in policy.platforms: continue if policy.industries and submission.industry not in policy.industries: continue diff --git a/adlint/rules/engine.py b/adlint/rules/engine.py index 0c139c2..e25de26 100644 --- a/adlint/rules/engine.py +++ b/adlint/rules/engine.py @@ -3,7 +3,7 @@ import re from collections import defaultdict -from adlint.models import Evidence, LandingPageSnapshot, Policy, PolicyHit, Submission +from adlint.models import ALL_PLATFORMS, Evidence, LandingPageSnapshot, Policy, PolicyHit, Submission MAX_EVIDENCE_PER_POLICY = 5 @@ -151,7 +151,7 @@ def _derived_linkedin_professional_claim_hits( policies: list[Policy], existing_hits: list[PolicyHit], ) -> list[PolicyHit]: - if submission.platform not in {"linkedin", "all"}: + if submission.platform not in {"linkedin", ALL_PLATFORMS}: return [] if any(hit.policy_id == "linkedin_professional_claim_review" for hit in existing_hits): return [] diff --git a/adlint/static/app.js b/adlint/static/app.js index 9ae57d2..0f9ccad 100644 --- a/adlint/static/app.js +++ b/adlint/static/app.js @@ -24,6 +24,7 @@ const FALLBACK_OLLAMA_MODELS = [ ]; const RULE_ONLY_TIMEOUT_MS = 30000; const LOCAL_MODEL_TIMEOUT_MS = 210000; +const EMBEDDING_MODEL_MARKERS = ["embed", "bge-"]; const MODEL_STATUSES = ["disabled", "unavailable", "invalid_response", "ok"]; const ANALYSIS_STEPS = [ ["intake", "Input normalized", "Copy, campaign context, modules, and optional landing inputs are prepared for review."], @@ -191,13 +192,18 @@ function modelName(item) { function isReviewModelOption(value) { if (!value) return false; const normalized = value.toLowerCase(); - return !normalized.includes("embed") && !normalized.startsWith("bge-"); + return !EMBEDDING_MODEL_MARKERS.some((marker) => normalized.includes(marker)); } function populateModelOptions(models) { const values = uniqueModelOptions(models); const currentValue = ollamaModelInput.value.trim(); - ollamaModelInput.innerHTML = values.map((model) => ``).join(""); + ollamaModelInput.innerHTML = values + .map((model) => { + const safe = escapeHtml(model); + return ``; + }) + .join(""); if (currentValue && values.includes(currentValue)) { ollamaModelInput.value = currentValue; } else { diff --git a/tests/test_ui_static.py b/tests/test_ui_static.py index 9425c1f..36b12c3 100644 --- a/tests/test_ui_static.py +++ b/tests/test_ui_static.py @@ -50,8 +50,8 @@ def test_model_discovery_fetches_models_and_keeps_fallback_option() -> None: assert "modelName(payload?.default_model)" in APP_JS assert "populateModelOptions(FALLBACK_OLLAMA_MODELS)" in APP_JS assert "function isReviewModelOption(value)" in APP_JS - assert '!normalized.includes("embed")' in APP_JS - assert '!normalized.startsWith("bge-")' in APP_JS + assert 'const EMBEDDING_MODEL_MARKERS = ["embed", "bge-"];' in APP_JS + assert "EMBEDDING_MODEL_MARKERS.some((marker) => normalized.includes(marker))" in APP_JS assert "function uniqueModelOptions(models)" in APP_JS assert "for (const model of [...models, ...FALLBACK_OLLAMA_MODELS])" in APP_JS assert "if (value && !values.includes(value)) values.push(value)" in APP_JS