Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,16 @@ curl -X POST http://localhost:8080/api/repurpose \
curl http://localhost:8080/api/usage -H "X-API-Key: cs_your_key"
```

### Quality Scoring (Bounty #1)
```bash
curl -X POST http://localhost:8080/api/quality/score \
-H "Content-Type: application/json" \
-d '{"submission":"# Proposal\n- goal: ...\n- approach: ...\n- result: ...\n- risk: ...\n- timeline: ...\n- test: ..."}'
```

Returns:
`{weighted_score, quality_rating, scores:{...}, feedback:[...], pass_threshold}`

## 🎯 Platforms

| Platform | Output |
Expand Down
22 changes: 22 additions & 0 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from pydantic import BaseModel, Field

from middleware import validate_api_key, track_usage, get_usage_stats, get_or_create_key, PLANS
from scoring import QualityScorer

app = FastAPI(
title="ContentSplit",
Expand Down Expand Up @@ -57,6 +58,14 @@ class RepurposeResponse(BaseModel):
created_at: str


class QualityScoreRequest(BaseModel):
submission: str = Field(..., min_length=1, description="Submission content: JSON, markdown, code, or text")


class QualityBenchmarkRequest(BaseModel):
submissions: list[str] = Field(..., min_length=1, description="Batch submissions for benchmark")


# ── Content Generation (using prompts, model-agnostic) ────────────────────

PLATFORM_PROMPTS = {
Expand Down Expand Up @@ -386,6 +395,19 @@ async def list_platforms():
}


quality_scorer = QualityScorer()


@app.post("/api/quality/score")
async def quality_score(req: QualityScoreRequest):
return quality_scorer.score(req.submission)


@app.post("/api/quality/benchmark")
async def quality_benchmark(req: QualityBenchmarkRequest):
return quality_scorer.benchmark(req.submissions)


@app.get("/", response_class=HTMLResponse)
async def landing_page():
"""Landing page."""
Expand Down
51 changes: 51 additions & 0 deletions sample_scorecards.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
[
{
"weighted_score": 0.6391,
"quality_rating": "fair",
"scores": {
"completeness": 0.1427,
"format_compliance": 1.0,
"coverage": 0.775,
"clarity": 0.75,
"validity": 0.9
},
"feedback": [
"Detected format: markdown.",
"Improve completeness (current 0.14)."
],
"pass_threshold": false
},
{
"weighted_score": 0.7188,
"quality_rating": "fair",
"scores": {
"completeness": 0.3127,
"format_compliance": 1.0,
"coverage": 1.0,
"clarity": 0.5,
"validity": 1.0
},
"feedback": [
"Detected format: json.",
"Improve completeness (current 0.31).",
"Improve clarity (current 0.50)."
],
"pass_threshold": true
},
{
"weighted_score": 0.5965,
"quality_rating": "poor",
"scores": {
"completeness": 0.0382,
"format_compliance": 0.85,
"coverage": 0.7,
"clarity": 1.0,
"validity": 0.9
},
"feedback": [
"Detected format: text.",
"Improve completeness (current 0.04)."
],
"pass_threshold": false
}
]
179 changes: 179 additions & 0 deletions scoring.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
from __future__ import annotations

import json
import re
import time
from dataclasses import dataclass
from typing import Any, Dict, List, Tuple

WEIGHTS: Dict[str, float] = {
"completeness": 0.30,
"format_compliance": 0.20,
"coverage": 0.25,
"clarity": 0.15,
"validity": 0.10,
}


@dataclass
class ScoreResult:
weighted_score: float
quality_rating: str
scores: Dict[str, float]
feedback: List[str]
pass_threshold: bool

def as_dict(self) -> Dict[str, Any]:
return {
"weighted_score": round(self.weighted_score, 4),
"quality_rating": self.quality_rating,
"scores": {k: round(v, 4) for k, v in self.scores.items()},
"feedback": self.feedback,
"pass_threshold": self.pass_threshold,
}


class QualityScorer:
def __init__(self, pass_threshold: float = 0.7):
self.pass_threshold = pass_threshold

def score(self, submission: str) -> Dict[str, Any]:
submission = submission or ""
fmt = self._detect_format(submission)
sections = self._extract_sections(submission, fmt)

scores = {
"completeness": self._score_completeness(submission, sections),
"format_compliance": self._score_format_compliance(submission, fmt),
"coverage": self._score_coverage(submission, sections),
"clarity": self._score_clarity(submission),
"validity": self._score_validity(submission, fmt),
}

weighted = sum(scores[k] * WEIGHTS[k] for k in WEIGHTS)
feedback = self._feedback(scores, fmt)

return ScoreResult(
weighted_score=weighted,
quality_rating=self._rating(weighted),
scores=scores,
feedback=feedback,
pass_threshold=weighted >= self.pass_threshold,
).as_dict()

def benchmark(self, submissions: List[str]) -> Dict[str, Any]:
start = time.perf_counter()
scorecards = [self.score(s) for s in submissions]
elapsed = time.perf_counter() - start
return {
"count": len(submissions),
"elapsed_seconds": round(elapsed, 4),
"scorecards": scorecards,
}

def _detect_format(self, text: str) -> str:
stripped = text.strip()
if not stripped:
return "text"
if stripped.startswith("{") or stripped.startswith("["):
try:
json.loads(stripped)
return "json"
except Exception:
pass
if "```" in text or re.search(r"\b(def|class|function|const|let)\b", text):
return "code"
if re.search(r"^#{1,6}\s+", text, flags=re.M) or re.search(r"^[-*]\s+", text, flags=re.M):
return "markdown"
return "text"

def _extract_sections(self, text: str, fmt: str) -> List[str]:
if fmt == "json":
try:
obj = json.loads(text)
if isinstance(obj, dict):
return list(obj.keys())
return ["list"]
except Exception:
return []
return re.findall(r"^#{1,6}\s+(.+)$", text, flags=re.M)

def _score_completeness(self, text: str, sections: List[str]) -> float:
length = len(text.split())
section_bonus = min(len(sections) / 5.0, 1.0) * 0.3
length_score = min(length / 220.0, 1.0) * 0.7
return min(length_score + section_bonus, 1.0)

def _score_format_compliance(self, text: str, fmt: str) -> float:
if fmt == "json":
try:
obj = json.loads(text)
return 1.0 if isinstance(obj, (dict, list)) else 0.7
except Exception:
return 0.2
if fmt == "code":
return 1.0 if "```" in text else 0.75
if fmt == "markdown":
has_heading = bool(re.search(r"^#{1,6}\s+", text, flags=re.M))
has_list = bool(re.search(r"^[-*]\s+", text, flags=re.M))
return 0.6 + 0.2 * has_heading + 0.2 * has_list
return 0.85

def _score_coverage(self, text: str, sections: List[str]) -> float:
key_terms = ["goal", "approach", "result", "risk", "timeline", "test"]
txt = text.lower()
hits = sum(1 for k in key_terms if k in txt)
section_factor = min(len(sections) / 4.0, 1.0)
return min((hits / len(key_terms)) * 0.7 + section_factor * 0.3, 1.0)

def _score_clarity(self, text: str) -> float:
sentences = re.split(r"[.!?]+", text)
sentences = [s.strip() for s in sentences if s.strip()]
if not sentences:
return 0.0
avg_len = sum(len(s.split()) for s in sentences) / len(sentences)
# best around 12-24 words/sentence
if avg_len < 6:
base = 0.5
elif avg_len <= 24:
base = 1.0
elif avg_len <= 36:
base = 0.75
else:
base = 0.5
punctuation = 1.0 if re.search(r"[,;:]", text) else 0.85
return min(base * punctuation, 1.0)

def _score_validity(self, text: str, fmt: str) -> float:
if fmt == "json":
try:
json.loads(text)
return 1.0
except Exception:
return 0.1
if fmt == "code":
opens = text.count("{") + text.count("(") + text.count("[")
closes = text.count("}") + text.count(")") + text.count("]")
return 1.0 if opens == closes else 0.6
# basic sanity check for plain text/markdown
bad_patterns = ["lorem ipsum", "TODO", "TBD"]
penalty = sum(1 for p in bad_patterns if p.lower() in text.lower()) * 0.1
return max(0.9 - penalty, 0.4)

def _rating(self, weighted: float) -> str:
if weighted >= 0.9:
return "excellent"
if weighted >= 0.75:
return "good"
if weighted >= 0.6:
return "fair"
return "poor"

def _feedback(self, scores: Dict[str, float], fmt: str) -> List[str]:
fb = [f"Detected format: {fmt}."]
for dim, val in scores.items():
if val < 0.6:
fb.append(f"Improve {dim.replace('_', ' ')} (current {val:.2f}).")
if len(fb) == 1:
fb.append("Strong submission across all dimensions.")
return fb
43 changes: 43 additions & 0 deletions test_scoring.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import json
import unittest

from scoring import QualityScorer


class TestQualityScorer(unittest.TestCase):
def setUp(self):
self.scorer = QualityScorer(pass_threshold=0.7)

def test_json_submission(self):
submission = json.dumps(
{
"goal": "Build scoring",
"approach": "Weighted rubric",
"result": "0-1 output",
"risk": "edge formats",
"timeline": "2 days",
"test": "20-case benchmark",
}
)
out = self.scorer.score(submission)
self.assertIn("weighted_score", out)
self.assertIn("scores", out)
self.assertGreaterEqual(out["scores"]["format_compliance"], 0.95)

def test_markdown_submission(self):
submission = """# Proposal\n- goal: build engine\n- approach: rubric scoring\n- result: reliable scores\n- risk: malformed input\n- timeline: 2d\n- test: benchmark suite\n"""
out = self.scorer.score(submission)
self.assertEqual(out["quality_rating"] in {"good", "excellent", "fair", "poor"}, True)
self.assertTrue(isinstance(out["feedback"], list))

def test_benchmark_100_under_10s(self):
samples = [
f"goal approach result risk timeline test sample {i}" for i in range(100)
]
b = self.scorer.benchmark(samples)
self.assertEqual(b["count"], 100)
self.assertLess(b["elapsed_seconds"], 10.0)


if __name__ == "__main__":
unittest.main()