From cf9f5bad9d38b51c7e47a06eef3bd99922ea1923 Mon Sep 17 00:00:00 2001 From: ildyria Date: Tue, 16 Jun 2026 08:10:18 +0200 Subject: [PATCH] Improve documentation --- README.md | 18 +++++++++++++- docs/0-overview/README.md | 36 ++++++++++++++++------------ docs/1-concepts/README.md | 49 +++++++++++++++++++++++++-------------- tests/test_classify.py | 42 ++++++++++++++++++++++++++++++++- 4 files changed, 110 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index 6d8d95d..0939cd0 100644 --- a/README.md +++ b/README.md @@ -182,7 +182,7 @@ Interactive docs: `http://localhost:8000/docs` The endpoint returns **`202 Accepted`** immediately. Results arrive via callback. -### Callback payload (POSTed to Lychee) +### Callback payload (POSTed to `{VISION_NSFW_LYCHEE_API_URL}/api/v2/NsfwDetection/results`) ```json { @@ -191,6 +191,22 @@ The endpoint returns **`202 Accepted`** immediately. Results arrive via callback "should_block": true, "should_review": false, "is_sensitive": true, + "all_detected": [ + { + "label": "FEMALE_GENITALIA_EXPOSED", + "confidence": 0.91, + "bbox": {"x": 120, "y": 200, "width": 300, "height": 280}, + "area_pixels": 84000, + "area_ratio": 0.175 + }, + { + "label": "FEMALE_BREAST_COVERED", + "confidence": 0.74, + "bbox": {"x": 50, "y": 80, "width": 150, "height": 140}, + "area_pixels": 21000, + "area_ratio": 0.044 + } + ], "block_detected": [ { "label": "FEMALE_GENITALIA_EXPOSED", diff --git a/docs/0-overview/README.md b/docs/0-overview/README.md index ded7abe..b2f3f7f 100644 --- a/docs/0-overview/README.md +++ b/docs/0-overview/README.md @@ -18,37 +18,43 @@ Lychee itself has no built-in content moderation. This service fills that gap: w Lychee NSFW Service │ │ │ POST /api/nsfw/detect │ - │ { photo_id, image_url } │ + │ { photo_id, photo_path } │ │ ──────────────────────────► │ - │ │ 1. Fetch image + │ 202 Accepted │ + │ ◄────────────────────────── │ + │ │ 1. Read image from shared volume │ │ 2. Run NudeNet inference │ │ 3. Apply classification logic - │ │ 4. Build response - │ 200 OK │ - │ { is_safe, detections } │ + │ │ 4. POST callback to Lychee + │ │ + │ POST /api/v2/NsfwDetection/results + │ { photo_id, should_block, … }│ │ ◄────────────────────────── │ ``` -Detection is **synchronous**: Lychee sends a request and waits for the result. The classification logic runs entirely inside the single HTTP round-trip — there is no callback or polling model. +Detection is **asynchronous**: Lychee sends a request and receives `202 Accepted` immediately. The service enqueues the job and a background worker runs NudeNet inference. Results are POSTed back to Lychee's callback endpoint (`/api/v2/NsfwDetection/results`) once detection completes. --- ## Key design decisions -### Synchronous detection +### Asynchronous detection with callback + +The service uses a job queue and callback flow. Lychee submits a job and returns immediately; the result arrives asynchronously. This decouples upload latency from inference latency and allows the queue to absorb bursts. -Unlike some AI sidecars that use a job queue and callback flow, this service responds inline. This keeps the integration surface minimal (one endpoint, one request, one response) and is appropriate because NudeNet inference is fast: typically 100–300 ms per image on CPU. +### Queue-backed processing -### Stateless +The service maintains a job queue (in-memory or database-backed) that bounds concurrency and provides back-pressure via `429 Too Many Requests` when full. Queue depth and position are queryable via `/api/nsfw/queue`. -The service holds no persistent state. Every request is independent. There is no embedding store, job queue, or database. This simplifies deployment and makes horizontal scaling trivial — run as many replicas as needed behind a load balancer. +### Three-tier classification -### Two-stage classification +Raw NudeNet detections are classified into three independent tiers, each with its own label set and thresholds: -Raw NudeNet detections do not map directly to an `is_safe` verdict. The service applies two independent safety tests: +- **block** — hide the photo entirely (`should_block: true`). +- **review** — queue for human moderation (`should_review: true`). +- **sensitive** — mark the photo but keep it visible (`is_sensitive: true`). -1. **Always-block categories** — certain body parts are never acceptable regardless of image area covered (e.g. exposed male or anal genitalia). A single detection above the banned threshold marks the image unsafe. -2. **Area-based threshold** — other sensitive categories (e.g. exposed female genitalia) are flagged only when their total detected area exceeds a configurable fraction of the image. This reduces false positives from incidental framing. +A photo can match multiple tiers simultaneously. All raw detections (regardless of tier) are included in `all_detected` in the callback payload, which is useful for Lychee-side filtering and threshold tuning. See [concepts](../1-concepts/README.md) for the full classification logic. @@ -80,4 +86,4 @@ All endpoints except `GET /health` require a shared-secret `X-API-Key` header. W --- -*Last updated: June 15, 2026* +*Last updated: June 16, 2026* diff --git a/docs/1-concepts/README.md b/docs/1-concepts/README.md index ae4c6d3..25c7782 100644 --- a/docs/1-concepts/README.md +++ b/docs/1-concepts/README.md @@ -44,25 +44,22 @@ NudeNet's full label set: Raw NudeNet detections are classified into three independent tiers. Each tier has its own configurable set of labels and thresholds. ``` -NudeNet detections - │ - ▼ -┌─────────────┐ should_block ──► block_detected[ ] -│ block │ -└─────────────┘ - │ - ▼ -┌─────────────┐ should_review ──► review_detected[ ] -│ review │ -└─────────────┘ - │ - ▼ -┌─────────────┐ is_sensitive ──► sensitive_detected[ ] -│ sensitive │ -└─────────────┘ + ┌───────────────────────────────────► all_detected[ ] + │ + │ ┌─────────────┐ + ├─►│ block │── should_block ──► block_detected[ ] + │ └─────────────┘ +NudeNet detections ── each ───┤ + detection │ ┌─────────────┐ + ├─►│ review │── should_review ─► review_detected[ ] + │ └─────────────┘ + │ + │ ┌─────────────┐ + └─►│ sensitive │── is_sensitive ──► sensitive_detected[ ] + └─────────────┘ ``` -All three tiers evaluate every detection independently — a single detection can appear in more than one tier if its label is listed in multiple sets. +Every detection is evaluated against all three tiers independently — tiers are **not** mutually exclusive. A single detection can appear in `block_detected`, `review_detected`, and `sensitive_detected` simultaneously if its label is listed in multiple tier configurations. `all_detected` always contains every detection that passed `VISION_NSFW_CONFIDENCE_THRESHOLD`, regardless of tier membership, and is useful for Lychee-side filtering and threshold tuning. ### Tier meanings @@ -144,6 +141,22 @@ The result is POSTed to `{VISION_NSFW_LYCHEE_API_URL}/api/v2/NsfwDetection/resul "should_block": false, "should_review": true, "is_sensitive": true, + "all_detected": [ + { + "label": "FEMALE_BREAST_EXPOSED", + "confidence": 0.83, + "bbox": {"x": 50, "y": 100, "width": 200, "height": 180}, + "area_pixels": 36000, + "area_ratio": 0.075 + }, + { + "label": "FEMALE_BREAST_COVERED", + "confidence": 0.71, + "bbox": {"x": 260, "y": 110, "width": 180, "height": 160}, + "area_pixels": 28800, + "area_ratio": 0.060 + } + ], "block_detected": [], "review_detected": [ { @@ -201,4 +214,4 @@ NudeNet inference is CPU-bound. The `NudeDetector` is loaded lazily on the first --- -*Last updated: June 15, 2026* +*Last updated: June 16, 2026* diff --git a/tests/test_classify.py b/tests/test_classify.py index 77711dd..c7dc69c 100644 --- a/tests/test_classify.py +++ b/tests/test_classify.py @@ -182,7 +182,7 @@ def test_classify_label_not_in_any_set_ignored() -> None: assert result["is_sensitive"] is False -def test_classify_label_in_multiple_sets() -> None: +def test_classify_label_in_block_and_review() -> None: raw = [_make_raw("ANUS_EXPOSED", 0.9, [0, 0, 50, 50])] result = classify( raw, @@ -199,6 +199,46 @@ def test_classify_label_in_multiple_sets() -> None: assert len(result["review_detected"]) == 1 +def test_classify_single_detection_in_all_three_tiers() -> None: + """Tiers are independent: one detection can trigger block, review, and sensitive simultaneously.""" + raw = [_make_raw("ANUS_EXPOSED", 0.9, [0, 0, 50, 50])] + result = classify( + raw, + 800, + 600, + _make_settings( + block=_make_set(["ANUS_EXPOSED"]), + review=_make_set(["ANUS_EXPOSED"]), + sensitive=_make_set(["ANUS_EXPOSED"]), + ), + ) + assert result["should_block"] is True + assert result["should_review"] is True + assert result["is_sensitive"] is True + assert len(result["block_detected"]) == 1 + assert len(result["review_detected"]) == 1 + assert len(result["sensitive_detected"]) == 1 + assert result["block_detected"][0] is result["review_detected"][0] is result["sensitive_detected"][0] + + +def test_classify_block_does_not_prevent_review_or_sensitive() -> None: + """Triggering block must not suppress review or sensitive checks on the same detection.""" + raw = [_make_raw("FEMALE_GENITALIA_EXPOSED", 0.95, [0, 0, 200, 200])] + result = classify( + raw, + 800, + 600, + _make_settings( + block=_make_set(["FEMALE_GENITALIA_EXPOSED"]), + review=_make_set(["FEMALE_GENITALIA_EXPOSED"]), + sensitive=_make_set(["FEMALE_GENITALIA_EXPOSED"]), + ), + ) + assert result["should_block"] is True + assert result["should_review"] is True + assert result["is_sensitive"] is True + + def test_classify_area_pixels_and_ratio_computed() -> None: raw = [_make_raw("FEMALE_BREAST_EXPOSED", 0.8, [0, 0, 80, 60])] result = classify(raw, 800, 600, _make_settings())