From cb78c61eb4c42ff21a0b54c9baa34d8d38c89b71 Mon Sep 17 00:00:00 2001
From: sungdark <sungdark@users.noreply.github.com>
Date: Thu, 26 Feb 2026 19:14:30 +0000
Subject: [PATCH] feat: add weighted quality scoring engine + benchmark
 endpoints

---
 README.md              |  10 +++
 app.py                 |  22 +++++
 sample_scorecards.json |  51 ++++++++++++
 scoring.py             | 179 +++++++++++++++++++++++++++++++++++++++++
 test_scoring.py        |  43 ++++++++++
 5 files changed, 305 insertions(+)
 create mode 100644 sample_scorecards.json
 create mode 100644 scoring.py
 create mode 100644 test_scoring.py

diff --git a/README.md b/README.md
index 63e71e4..5b635e5 100644
--- a/README.md
+++ b/README.md
@@ -37,6 +37,16 @@ curl -X POST http://localhost:8080/api/repurpose \
 curl http://localhost:8080/api/usage -H "X-API-Key: cs_your_key"
 ```
 
+### Quality Scoring (Bounty #1)
+```bash
+curl -X POST http://localhost:8080/api/quality/score \
+  -H "Content-Type: application/json" \
+  -d '{"submission":"# Proposal\n- goal: ...\n- approach: ...\n- result: ...\n- risk: ...\n- timeline: ...\n- test: ..."}'
+```
+
+Returns:
+`{weighted_score, quality_rating, scores:{...}, feedback:[...], pass_threshold}`
+
 ## 🎯 Platforms
 
 | Platform | Output |
diff --git a/app.py b/app.py
index 99235a3..ba35034 100644
--- a/app.py
+++ b/app.py
@@ -17,6 +17,7 @@
 from pydantic import BaseModel, Field
 
 from middleware import validate_api_key, track_usage, get_usage_stats, get_or_create_key, PLANS
+from scoring import QualityScorer
 
 app = FastAPI(
     title="ContentSplit",
@@ -57,6 +58,14 @@ class RepurposeResponse(BaseModel):
     created_at: str
 
 
+class QualityScoreRequest(BaseModel):
+    submission: str = Field(..., min_length=1, description="Submission content: JSON, markdown, code, or text")
+
+
+class QualityBenchmarkRequest(BaseModel):
+    submissions: list[str] = Field(..., min_length=1, description="Batch submissions for benchmark")
+
+
 # ── Content Generation (using prompts, model-agnostic) ────────────────────
 
 PLATFORM_PROMPTS = {
@@ -386,6 +395,19 @@ async def list_platforms():
     }
 
 
+quality_scorer = QualityScorer()
+
+
+@app.post("/api/quality/score")
+async def quality_score(req: QualityScoreRequest):
+    return quality_scorer.score(req.submission)
+
+
+@app.post("/api/quality/benchmark")
+async def quality_benchmark(req: QualityBenchmarkRequest):
+    return quality_scorer.benchmark(req.submissions)
+
+
 @app.get("/", response_class=HTMLResponse)
 async def landing_page():
     """Landing page."""
diff --git a/sample_scorecards.json b/sample_scorecards.json
new file mode 100644
index 0000000..1062503
--- /dev/null
+++ b/sample_scorecards.json
@@ -0,0 +1,51 @@
+[
+  {
+    "weighted_score": 0.6391,
+    "quality_rating": "fair",
+    "scores": {
+      "completeness": 0.1427,
+      "format_compliance": 1.0,
+      "coverage": 0.775,
+      "clarity": 0.75,
+      "validity": 0.9
+    },
+    "feedback": [
+      "Detected format: markdown.",
+      "Improve completeness (current 0.14)."
+    ],
+    "pass_threshold": false
+  },
+  {
+    "weighted_score": 0.7188,
+    "quality_rating": "fair",
+    "scores": {
+      "completeness": 0.3127,
+      "format_compliance": 1.0,
+      "coverage": 1.0,
+      "clarity": 0.5,
+      "validity": 1.0
+    },
+    "feedback": [
+      "Detected format: json.",
+      "Improve completeness (current 0.31).",
+      "Improve clarity (current 0.50)."
+    ],
+    "pass_threshold": true
+  },
+  {
+    "weighted_score": 0.5965,
+    "quality_rating": "poor",
+    "scores": {
+      "completeness": 0.0382,
+      "format_compliance": 0.85,
+      "coverage": 0.7,
+      "clarity": 1.0,
+      "validity": 0.9
+    },
+    "feedback": [
+      "Detected format: text.",
+      "Improve completeness (current 0.04)."
+    ],
+    "pass_threshold": false
+  }
+]
\ No newline at end of file
diff --git a/scoring.py b/scoring.py
new file mode 100644
index 0000000..c70dfeb
--- /dev/null
+++ b/scoring.py
@@ -0,0 +1,179 @@
+from __future__ import annotations
+
+import json
+import re
+import time
+from dataclasses import dataclass
+from typing import Any, Dict, List, Tuple
+
+WEIGHTS: Dict[str, float] = {
+    "completeness": 0.30,
+    "format_compliance": 0.20,
+    "coverage": 0.25,
+    "clarity": 0.15,
+    "validity": 0.10,
+}
+
+
+@dataclass
+class ScoreResult:
+    weighted_score: float
+    quality_rating: str
+    scores: Dict[str, float]
+    feedback: List[str]
+    pass_threshold: bool
+
+    def as_dict(self) -> Dict[str, Any]:
+        return {
+            "weighted_score": round(self.weighted_score, 4),
+            "quality_rating": self.quality_rating,
+            "scores": {k: round(v, 4) for k, v in self.scores.items()},
+            "feedback": self.feedback,
+            "pass_threshold": self.pass_threshold,
+        }
+
+
+class QualityScorer:
+    def __init__(self, pass_threshold: float = 0.7):
+        self.pass_threshold = pass_threshold
+
+    def score(self, submission: str) -> Dict[str, Any]:
+        submission = submission or ""
+        fmt = self._detect_format(submission)
+        sections = self._extract_sections(submission, fmt)
+
+        scores = {
+            "completeness": self._score_completeness(submission, sections),
+            "format_compliance": self._score_format_compliance(submission, fmt),
+            "coverage": self._score_coverage(submission, sections),
+            "clarity": self._score_clarity(submission),
+            "validity": self._score_validity(submission, fmt),
+        }
+
+        weighted = sum(scores[k] * WEIGHTS[k] for k in WEIGHTS)
+        feedback = self._feedback(scores, fmt)
+
+        return ScoreResult(
+            weighted_score=weighted,
+            quality_rating=self._rating(weighted),
+            scores=scores,
+            feedback=feedback,
+            pass_threshold=weighted >= self.pass_threshold,
+        ).as_dict()
+
+    def benchmark(self, submissions: List[str]) -> Dict[str, Any]:
+        start = time.perf_counter()
+        scorecards = [self.score(s) for s in submissions]
+        elapsed = time.perf_counter() - start
+        return {
+            "count": len(submissions),
+            "elapsed_seconds": round(elapsed, 4),
+            "scorecards": scorecards,
+        }
+
+    def _detect_format(self, text: str) -> str:
+        stripped = text.strip()
+        if not stripped:
+            return "text"
+        if stripped.startswith("{") or stripped.startswith("["):
+            try:
+                json.loads(stripped)
+                return "json"
+            except Exception:
+                pass
+        if "```" in text or re.search(r"\b(def|class|function|const|let)\b", text):
+            return "code"
+        if re.search(r"^#{1,6}\s+", text, flags=re.M) or re.search(r"^[-*]\s+", text, flags=re.M):
+            return "markdown"
+        return "text"
+
+    def _extract_sections(self, text: str, fmt: str) -> List[str]:
+        if fmt == "json":
+            try:
+                obj = json.loads(text)
+                if isinstance(obj, dict):
+                    return list(obj.keys())
+                return ["list"]
+            except Exception:
+                return []
+        return re.findall(r"^#{1,6}\s+(.+)$", text, flags=re.M)
+
+    def _score_completeness(self, text: str, sections: List[str]) -> float:
+        length = len(text.split())
+        section_bonus = min(len(sections) / 5.0, 1.0) * 0.3
+        length_score = min(length / 220.0, 1.0) * 0.7
+        return min(length_score + section_bonus, 1.0)
+
+    def _score_format_compliance(self, text: str, fmt: str) -> float:
+        if fmt == "json":
+            try:
+                obj = json.loads(text)
+                return 1.0 if isinstance(obj, (dict, list)) else 0.7
+            except Exception:
+                return 0.2
+        if fmt == "code":
+            return 1.0 if "```" in text else 0.75
+        if fmt == "markdown":
+            has_heading = bool(re.search(r"^#{1,6}\s+", text, flags=re.M))
+            has_list = bool(re.search(r"^[-*]\s+", text, flags=re.M))
+            return 0.6 + 0.2 * has_heading + 0.2 * has_list
+        return 0.85
+
+    def _score_coverage(self, text: str, sections: List[str]) -> float:
+        key_terms = ["goal", "approach", "result", "risk", "timeline", "test"]
+        txt = text.lower()
+        hits = sum(1 for k in key_terms if k in txt)
+        section_factor = min(len(sections) / 4.0, 1.0)
+        return min((hits / len(key_terms)) * 0.7 + section_factor * 0.3, 1.0)
+
+    def _score_clarity(self, text: str) -> float:
+        sentences = re.split(r"[.!?]+", text)
+        sentences = [s.strip() for s in sentences if s.strip()]
+        if not sentences:
+            return 0.0
+        avg_len = sum(len(s.split()) for s in sentences) / len(sentences)
+        # best around 12-24 words/sentence
+        if avg_len < 6:
+            base = 0.5
+        elif avg_len <= 24:
+            base = 1.0
+        elif avg_len <= 36:
+            base = 0.75
+        else:
+            base = 0.5
+        punctuation = 1.0 if re.search(r"[,;:]", text) else 0.85
+        return min(base * punctuation, 1.0)
+
+    def _score_validity(self, text: str, fmt: str) -> float:
+        if fmt == "json":
+            try:
+                json.loads(text)
+                return 1.0
+            except Exception:
+                return 0.1
+        if fmt == "code":
+            opens = text.count("{") + text.count("(") + text.count("[")
+            closes = text.count("}") + text.count(")") + text.count("]")
+            return 1.0 if opens == closes else 0.6
+        # basic sanity check for plain text/markdown
+        bad_patterns = ["lorem ipsum", "TODO", "TBD"]
+        penalty = sum(1 for p in bad_patterns if p.lower() in text.lower()) * 0.1
+        return max(0.9 - penalty, 0.4)
+
+    def _rating(self, weighted: float) -> str:
+        if weighted >= 0.9:
+            return "excellent"
+        if weighted >= 0.75:
+            return "good"
+        if weighted >= 0.6:
+            return "fair"
+        return "poor"
+
+    def _feedback(self, scores: Dict[str, float], fmt: str) -> List[str]:
+        fb = [f"Detected format: {fmt}."]
+        for dim, val in scores.items():
+            if val < 0.6:
+                fb.append(f"Improve {dim.replace('_', ' ')} (current {val:.2f}).")
+        if len(fb) == 1:
+            fb.append("Strong submission across all dimensions.")
+        return fb
diff --git a/test_scoring.py b/test_scoring.py
new file mode 100644
index 0000000..f4affbd
--- /dev/null
+++ b/test_scoring.py
@@ -0,0 +1,43 @@
+import json
+import unittest
+
+from scoring import QualityScorer
+
+
+class TestQualityScorer(unittest.TestCase):
+    def setUp(self):
+        self.scorer = QualityScorer(pass_threshold=0.7)
+
+    def test_json_submission(self):
+        submission = json.dumps(
+            {
+                "goal": "Build scoring",
+                "approach": "Weighted rubric",
+                "result": "0-1 output",
+                "risk": "edge formats",
+                "timeline": "2 days",
+                "test": "20-case benchmark",
+            }
+        )
+        out = self.scorer.score(submission)
+        self.assertIn("weighted_score", out)
+        self.assertIn("scores", out)
+        self.assertGreaterEqual(out["scores"]["format_compliance"], 0.95)
+
+    def test_markdown_submission(self):
+        submission = """# Proposal\n- goal: build engine\n- approach: rubric scoring\n- result: reliable scores\n- risk: malformed input\n- timeline: 2d\n- test: benchmark suite\n"""
+        out = self.scorer.score(submission)
+        self.assertEqual(out["quality_rating"] in {"good", "excellent", "fair", "poor"}, True)
+        self.assertTrue(isinstance(out["feedback"], list))
+
+    def test_benchmark_100_under_10s(self):
+        samples = [
+            f"goal approach result risk timeline test sample {i}" for i in range(100)
+        ]
+        b = self.scorer.benchmark(samples)
+        self.assertEqual(b["count"], 100)
+        self.assertLess(b["elapsed_seconds"], 10.0)
+
+
+if __name__ == "__main__":
+    unittest.main()