visual_integrity_api/main.py at develop · TrueLensLK/visual_integrity_api · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""
AI Image Detection System
Multi-Layer Forensic Analysis Engine (Full Spectrum Edition)

SYSTEM ARCHITECTURE:
Layer 0:   C2PA Content Credentials (Cryptographic Truth)
Layer 1:   Quick Forensic Triage (File Validation)
Layer 2:   Metadata Analysis (EXIF/AI Signatures)
Layer 3:   Physics Analysis (ELA/Noise)
Layer 3.5: Face Consistency (Face vs Background)
Layer 4:   Neural Network Ensemble (SDXL-Detector + ViT + Ateeqq + ConvNeXt + Swin + TTA)
Layer 5:   Master Judge (Weighted Multi-Layer Consensus)
Layer 6:   Spectrum Analysis (FFT Frequency Domain)
Layer 7:   Eye Reflection Physics (Optical Consistency)
Layer 8:   Watermark Detection (Visible, Stego, Hash, SynthID)
Layer 8.5: PRNU Sensor Fingerprint (Wavelet + Reference DB)
Layer 9:   Contextual Provenance (Reverse Image Search)
Layer 10:  Shadow Convergence Analysis (Light Source Consistency)
Layer 11:  Physical Continuity (Geometry)
Layer 12:  GAN/Diffusion Artifacts

FINAL BOSS (LLM + Adversarial Debate):
  - Gray zone (score 35-65)     → Single LLM call (Gemini/OpenRouter/Groq)
  - Contradiction detected       → Adversarial Debate:
      Prosecution (Gemini Vision) vs Defense (OpenRouter Vision)
      judged by Convergence Detector (Groq text), max 3 rounds
"""

import io
import os
import sys
import shutil
import uuid
import json
import numpy as np
from typing import Dict, Tuple, Optional, Any
from dataclasses import dataclass, asdict
from datetime import datetime
from urllib.parse import urlparse

from dotenv import load_dotenv
load_dotenv()  # Load .env file for API keys

from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, HttpUrl, field_validator
import httpx
import requests

# --- PATH CONFIGURATION ---
# Add layers directory to Python path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "Universal_Detector", "src"))

# --- IMPORTS ---
try:
       from Universal_Detector.src.layers.layer_0_c2pa import verify_c2pa
       from Universal_Detector.src.layers.layer_1_triage import quick_check
       from Universal_Detector.src.layers.layer_2_metadata import analyze_metadata
       from Universal_Detector.src.layers.layer_3_physics import analyze_physics
       from Universal_Detector.src.layers.layer_3_5_face import analyze_face_consistency
       from Universal_Detector.src.layers.layer_4_visual import predict_visuals_detailed
       from Universal_Detector.src.layers.layer_5_judge import calculate_integrity, humanize_verdict_description
       from Universal_Detector.src.layers.layer_6_spectrum import analyze_spectrum
       from Universal_Detector.src.layers.layer_7_eyes import analyze_eyes
       from Universal_Detector.src.layers.layer_8_watermark import detect_watermarks
       from Universal_Detector.src.layers.layer_8_5_The_Sensor_Fingerprint import analyze_prnu
       from Universal_Detector.src.layers.layer_9_context import analyze_context
       from Universal_Detector.src.layers.layer_10_Shadow_Convergence import get_shadow_score
       from Universal_Detector.src.layers.layer_11_physical_continuity import get_physical_continuity_score
       from Universal_Detector.src.layers.layer_12_artifacts import analyze_artifacts
       from Universal_Detector.src.utils.origin_classifier import classify_origin
except ImportError as e:
    print(f"CRITICAL: Missing forensic layer modules. {e}")
    sys.exit(1)

# Import the new Case Builder and Modular Judge
try:
       from Universal_Detector.src.layers.forensic_case_builder import compile_case_file
       from Universal_Detector.src.layers.llm_judge import HybridJudge, LLMVerdict, generate_user_description
       from Universal_Detector.src.layers.debate.models import OPENROUTER_VISION_MODELS
except ImportError as e:
    print(f"CRITICAL: Missing core system modules (builder/judge/debate). {e}")
    # Allow partial failure if debate models missing, but warn
    OPENROUTER_VISION_MODELS = []

@dataclass
class DetectionResult:
    """Standardized output format for detection results"""
    final_score: int  # 0-100 scale
    verdict: str  # "REAL", "AI-GENERATED", "AI-ENHANCED", or "EDITED"
    confidence: str  # "HIGH", "MEDIUM", "LOW"
    technical_description: str
    user_description: str
    layer_scores: Dict[str, float]
    layer_details: Dict[str, str]
    processing_time_ms: int
    warnings: list
    timestamp: str
    llm_reasoning: Optional[str] = None
    judge_source: str = "rule-based"
    debate_data: Optional[Dict] = None  # Populated when adversarial debate is used


class AIImageDetector:
    """
    Main orchestrator for the AI image detection system.
    Runs all layers in sequence and produces a final verdict.
    """

    def __init__(self, enable_gpu: bool = True, verbose: bool = True, enable_llm_judge: bool = True):
        self.enable_gpu = enable_gpu
        self.verbose = verbose
        self.warnings = []
        self.enable_llm_judge = enable_llm_judge

        # Initialize Hybrid Judge
        self.hybrid_judge = None
        if enable_llm_judge:
            try:
                self.hybrid_judge = HybridJudge(
                    enable_llm=True,
                    gemini_api_key=os.getenv("GOOGLE_AI_API_KEY") or os.getenv("GEMINI_API_KEY"),
                    groq_api_key=os.getenv("GROQ_API_KEY"),
                    openrouter_api_key=os.getenv("OPENROUTER_API_KEY")
                )
                self.log("LLM Final Boss enabled - Ready to review ambiguous cases", "INFO")
            except Exception as e:
                self.log(f"LLM Judge disabled: {e}", "WARN")
                self.hybrid_judge = None

    def log(self, message: str, level: str = "INFO"):
        """Internal logging function"""
        if self.verbose:
            timestamp = datetime.now().strftime("%H:%M:%S")
            prefix = {"INFO": "[INFO]", "WARN": "[WARN]", "ERROR": "[ERROR]", "SUCCESS": "[OK]"}.get(level, "[INFO]")
            print(f"[{timestamp}] {prefix} {message}")

    def analyze_image(self, image_path: str) -> DetectionResult:
        start_time = datetime.now()
        self.warnings = []

        self.log(f"Starting analysis of: {os.path.basename(image_path)}", "INFO")
        self.log("=" * 60)

        # --- LAYER 1: TRIAGE ---
        triage_result = quick_check(image_path)
        if triage_result["status"] == "FAIL":
            return self._create_error_result(f"File validation failed: {triage_result['reason']}", start_time)

        # Global Flags
        is_jpeg = triage_result.get("details", {}).get("format", "").upper() in ("JPEG", "WEBP")
        if image_path.lower().endswith((".jpg", ".jpeg", ".webp")): is_jpeg = True

        layer_scores = {}
        layer_details = {}

        # --- LAYER 0: C2PA ---
        self.log("Layer 0: C2PA Content Credentials")
        c2pa_result = verify_c2pa(image_path)
        layer_details["c2pa"] = c2pa_result.get("message", "No credentials")

        # ====================================================================
        # === THE EARLY EXIT (SHORT-CIRCUIT) ===
        # ====================================================================
        if c2pa_result.get("is_ai_flagged") is True:
            reasons = ", ".join(c2pa_result.get("forensic_matches", []))
            self.log(f"C2PA hard-signature found! {reasons}", "ERROR")
            self.log("SHORT-CIRCUITING PIPELINE: Bypassing heavy layers to save compute.", "SUCCESS")

            processing_time = int((datetime.now() - start_time).total_seconds() * 1000)

            technical_desc = f"Deterministic proof: C2PA Content Credentials confirm this is AI. ({reasons})"
            user_desc = generate_user_description(
                verdict="AI-GENERATED",
                score=5,
                technical_description=technical_desc,
                judge_source="early-exit-c2pa",
                is_web_sourced=False,
                face_detected=False,
                groq_client=None
            )

            return DetectionResult(
                final_score=5,  # 5/100 indicates heavily Fake in your scale
                verdict="AI-GENERATED",
                confidence="HIGH",
                technical_description=technical_desc,
                user_description=user_desc,
                layer_scores={"c2pa": -100.0},  # Massive penalty score
                layer_details={"c2pa": c2pa_result.get("message")},
                processing_time_ms=processing_time,
                warnings=self.warnings + ["C2PA Early Exit triggered."],
                timestamp=datetime.now().isoformat(),
                llm_reasoning=None,
                judge_source="early-exit-c2pa"
            )
        else:
            self.log("C2PA clear. Proceeding to forensic layers.", "INFO")
        # ====================================================================

        # --- LAYER 2: METADATA ---
        self.log("Layer 2: Metadata")
        try:
            m_score, m_det = analyze_metadata(image_path)
            layer_scores["metadata"] = m_score
            layer_details["metadata"] = str(m_det)
        except Exception as e:
            layer_scores["metadata"] = 0
            layer_details["metadata"] = f"Error: {e}"

        # --- LAYER 3: PHYSICS ---
        self.log("Layer 3: Physics")
        has_bayer_pattern = False
        try:
            p_res = analyze_physics(image_path)
            layer_scores["physics"] = p_res["impact"] if isinstance(p_res, dict) else p_res
            findings = p_res.get("findings", []) if isinstance(p_res, dict) else []
            layer_details["physics"] = f"Findings: {'; '.join(findings)}" if findings else "Normal"
            # Extract Bayer pattern flag from physics details
            if isinstance(p_res, dict):
                details = p_res.get("details", {})
                has_bayer_pattern = bool(details.get("has_bayer_pattern", False))
        except Exception as e:
            layer_scores["physics"] = 0
            layer_details["physics"] = "Error"

        # --- LAYER 3.5: FACES ---
        self.log("Layer 3.5: Face Consistency")
        face_count = 0
        try:
            f_res = analyze_face_consistency(image_path)
            layer_scores["face_consistency"] = f_res["impact"] if isinstance(f_res, dict) else f_res
            if isinstance(f_res, dict):
                face_count = f_res.get('face_count', 0)
            layer_details["face_consistency"] = f"Faces: {face_count}"
        except Exception:
            layer_scores["face_consistency"] = 0

        # --- LAYER 4: VISUAL ENSEMBLE ---
        self.log("Layer 4: Neural Ensemble")
        visual_confidence = 1.0
        model_consensus = 0.0
        model_real_votes, model_ai_votes = 0, 0
        model_breakdown = {}
        try:
            v_det = predict_visuals_detailed(image_path, face_count=face_count)
            visual_score = v_det.get("impact", 0)
            visual_confidence = v_det.get("confidence", 1.0)
            model_real_votes = v_det.get("real_votes", 0)
            model_ai_votes = v_det.get("ai_votes", 0)
            model_consensus = v_det.get("model_consensus", 0.0)
            model_breakdown = v_det.get("model_breakdown", {})  # Capture breakdown for Fix 3, 15, 16

            layer_scores["neural_network"] = visual_score
            layer_details["neural_network"] = f"Votes: Real {model_real_votes} / AI {model_ai_votes}"
        except Exception:
            layer_scores["neural_network"] = 0
            self.warnings.append("Neural models unavailable")

        # --- LAYER 6: SPECTRUM ---
        self.log("Layer 6: Spectrum")
        try:
            s_score, s_desc = analyze_spectrum(image_path)
            layer_scores["spectrum"] = s_score
            layer_details["spectrum"] = s_desc
        except Exception: layer_scores["spectrum"] = 0

        # --- LAYER 7: EYES ---
        self.log("Layer 7: Eye Physics")
        try:
            e_score, e_desc = analyze_eyes(image_path)
            layer_scores["eye_physics"] = e_score
            layer_details["eye_physics"] = e_desc
        except Exception: layer_scores["eye_physics"] = 0

        # --- LAYER 8: WATERMARK ---
        self.log("Layer 8: Watermarks")
        try:
            w_score, w_desc = detect_watermarks(image_path, is_jpeg=is_jpeg)
            layer_scores["watermark"] = w_score
            layer_details["watermark"] = w_desc
        except Exception: layer_scores["watermark"] = 0

        # --- LAYER 8.5: PRNU ---
        prnu_details_dict = {}
        self.log("Layer 8.5: PRNU Sensor Fingerprint")
        try:
            prnu_score, prnu_desc, prnu_details_dict = analyze_prnu(image_path, is_jpeg_hint=is_jpeg)
            layer_scores["prnu"] = prnu_score
            layer_details["prnu"] = prnu_desc
        except Exception: layer_scores["prnu"] = 0

        # --- LAYER 9: CONTEXT ---
        context_data_dict = {}
        self.log("Layer 9: Context")
        try:
            c_score, context_data_dict = analyze_context(image_path)
            layer_scores["context"] = c_score
            layer_details["context"] = context_data_dict.get("note", "")
        except Exception: layer_scores["context"] = 0

        # --- LAYER 10: SHADOW ---
        self.log("Layer 10: Shadow Convergence")
        try:
            sh_score = get_shadow_score(image_path)
            layer_scores["shadow"] = sh_score
            layer_details["shadow"] = f"Score: {sh_score}"
        except Exception: layer_scores["shadow"] = 0

        # --- LAYER 11: GEOMETRY ---
        self.log("Layer 11: Geometry")
        try:
            pc_score, pc_desc = get_physical_continuity_score(image_path)
            layer_scores["physical_continuity"] = pc_score
            layer_details["physical_continuity"] = pc_desc
        except Exception: layer_scores["physical_continuity"] = 0

        # --- LAYER 12: ARTIFACTS ---
        self.log("Layer 12: Artifacts")
        try:
            a_res = analyze_artifacts(image_path, is_jpeg=is_jpeg)
            layer_scores["artifacts"] = a_res["score"]
            layer_details["artifacts"] = a_res["description"]
        except Exception: layer_scores["artifacts"] = 0

        # ========================================
        # FIX 5: ORIGIN CLASSIFICATION
        # ========================================
        self.log("Fix 5: Origin Classification")
        origin_classification = "CAMERA_ORIGINAL"
        is_web_sourced = False
        try:
             # Gather signals
             from PIL import Image
             try:
                 with Image.open(image_path) as img:
                     dims = img.size
             except: dims = None

             origin_res = classify_origin(
                 file_path=image_path,
                 meta_score=layer_scores.get("metadata", 0),
                 spectrum_details=layer_details.get("spectrum", ""),
                 image_dims=dims
             )

             origin_classification = origin_res["classification"]
             is_web_sourced = origin_res["is_web_sourced"]
             unreliable_layers = origin_res["unreliable_layers"]

             self.log(f"Origin: {origin_classification} (Web: {is_web_sourced})")
             if origin_res["reasoning"]:
                 self.log(f"Reasoning: {', '.join(origin_res['reasoning'])}")

             # Zero out unreliable layers (ONLY if strictly WEB_SOURCED)
             if origin_classification == "WEB_SOURCED":
                 # Fix A & Fix B: Zero specific layers and hide details
                 # List of layers that are unreliable on web images (compression destroys signature)
                 always_zero_layers = [
                     "prnu", "spectrum", "watermark", "metadata",
                     "eye_physics", "shadow", "physical_continuity", "context"
                 ]

                 for layer in always_zero_layers:
                     if layer in layer_scores:
                         self.log(f"  -> Zeroing {layer} (unreliable on web image)")
                         layer_scores[layer] = 0.0
                         # Fix B: Suppress detail string so Defense doesn't cite invalid evidence
                         layer_details[layer] = (
                             "DISABLED | Signal zeroed — web-sourced image. "
                             "High-frequency signatures are unreliable due to compression pipeline. "
                             "Do not cite this as evidence."
                         )

                 # Fix A (Part 2): Conditional zeroing for face_consistency
                 if "face_consistency" in layer_scores:
                     fc_score = layer_scores["face_consistency"]
                     # Range -35 to -5 is the "danger zone" where compression artifacts mimic face anomalies
                     if -35 <= fc_score <= -5:
                        self.log(f"  -> Zeroing face_consistency ({fc_score}) - likely compression artifact")
                        layer_scores["face_consistency"] = 0.0
                        layer_details["face_consistency"] = (
                            "DISABLED | Score zeroed — moderate face anomaly likely caused by "
                            "web compression (mozjpeg). Do not cite."
                        )
                     else:
                        self.log(f"  -> Keeping face_consistency ({fc_score}) - severe anomaly survives compression")

             # Fix 2: LIKELY_WEB_SOURCED Zeroing (Watermark & Face)
             elif origin_classification == "LIKELY_WEB_SOURCED":
                 # Watermark > -50 on LIKELY_WEB is unreliable (mozjpeg dampening)
                 if "watermark" in layer_scores and layer_scores["watermark"] <= -45: # Check strict threshold
                     self.log(f"  -> Zeroing watermark ({layer_scores['watermark']}) - unreliable on LIKELY_WEB")
                     layer_scores["watermark"] = 0.0
                     layer_details["watermark"] = (
                         "DISABLED | Score zeroed — strong watermark signal dampened by likely web compression. "
                         "Unreliable evidence."
                     )

                 # Face Consistency in compression artifact range (-35 to -5)
                 if "face_consistency" in layer_scores:
                     fc_score = layer_scores["face_consistency"]
                     if -35 <= fc_score <= -5:
                        self.log(f"  -> Zeroing face_consistency ({fc_score}) - compression artifact risk")
                        layer_scores["face_consistency"] = 0.0
                        layer_details["face_consistency"] = (
                            "DISABLED | Score zeroed — moderate face anomaly overlaps with "
                            "compression artifacts on likely web image."
                        )

             layer_details["origin"] = origin_classification

        except Exception as e:
            self.log(f"Origin classification error: {e}", "WARN")

        # ========================================
        # LAYER 5: MASTER JUDGE (Rule-Based)
        # ========================================
        self.log("Layer 5: Master Judge (Rule-Based)")
        final_score, verdict, description, effective_scores = calculate_integrity(
            c2pa_res=c2pa_result,
            meta_score=layer_scores.get("metadata", 0),
            physics_score=layer_scores.get("physics", 0),
            face_score=layer_scores.get("face_consistency", 0),
            visual_score=layer_scores.get("neural_network", 0),
            spectrum_score=layer_scores.get("spectrum", 0),
            eye_score=layer_scores.get("eye_physics", 0),
            watermark_score=layer_scores.get("watermark", 0),
            watermark_desc=layer_details.get("watermark", ""),
            prnu_score=layer_scores.get("prnu", 0),
            prnu_details=prnu_details_dict,
            context_score=layer_scores.get("context", 0),
            context_details=context_data_dict,
            shadow_score=layer_scores.get("shadow", 0),
            shadow_desc=layer_details.get("shadow", ""),
            artifact_score=layer_scores.get("artifacts", 0),
            physical_continuity_score=layer_scores.get("physical_continuity", 0),
            visual_confidence=visual_confidence,
            is_jpeg=is_jpeg,
            visual_uncertain=False,
            model_real_votes=model_real_votes,
            model_ai_votes=model_ai_votes,
            model_count=5,
            model_consensus=model_consensus,
            has_bayer_pattern=has_bayer_pattern,
            face_count=face_count,
            model_breakdown=model_breakdown,
            origin_classification=origin_classification, # Fix 5
            is_web_sourced=is_web_sourced # Fix 5
        )

        rule_based_verdict = verdict
        rule_based_score = final_score
        rule_based_description = description
        judge_source = "rule-based"
        llm_reasoning = None
        llm_obj = None  # Track LLM/Debate result for debate_data extraction

        # Initial user description (Rule-based)
        user_description = generate_user_description(
            verdict=rule_based_verdict,
            score=rule_based_score,
            technical_description=rule_based_description,
            judge_source="rule-based",
            is_web_sourced=is_web_sourced,
            face_detected=face_count > 0,
            groq_client=None
        )

        # ========================================
        # LLM FINAL BOSS
        # ========================================
        if self.hybrid_judge is not None:
            try:
                # 1. Compile Case File (Categorized)
                case_file = compile_case_file(
                    image_path=image_path,
                    layer_scores=layer_scores,
                    layer_details=layer_details,
                    rule_based_verdict=rule_based_verdict,
                    rule_based_score=rule_based_score,
                    rule_based_description=rule_based_description,
                    c2pa_result=c2pa_result,
                    image_description=None,
                    is_jpeg=is_jpeg,
                    visual_confidence=visual_confidence,
                    model_consensus=model_consensus,
                    model_real_votes=model_real_votes,
                    model_ai_votes=model_ai_votes,
                    warnings=self.warnings,
                    effective_scores=effective_scores,
                    model_breakdown=model_breakdown  # Added for fallback logic
                )

                # 2. Consult Hybrid Judge
                self.log("Consulting LLM Judge for second opinion...", "INFO")
                final_verdict, f_score, f_desc, llm_obj, f_user_desc = self.hybrid_judge.judge(
                    case_file=case_file,
                    rule_based_verdict=rule_based_verdict,
                    rule_based_score=rule_based_score,
                    rule_based_description=rule_based_description,
                    image_path=image_path
                )

                # 3. Apply Override if LLM intervened
                if llm_obj:
                    # NEW LOGIC: Do not allow LLM to override a Rule-Based Kill Switch
                    if "KILL SWITCH ACTIVATED" in rule_based_description and llm_obj.verdict != rule_based_verdict:
                        self.log(f"LLM attempted {llm_obj.verdict} override, but KILL SWITCH takes priority. Denied.", "WARN")
                        # Revert back to the mathematical rule-based scores
                        verdict = rule_based_verdict
                        final_score = rule_based_score
                        # Keep technical description from rule based but append note
                        description = rule_based_description + f" [LLM override to {llm_obj.verdict} denied by Layer 5 Kill Switch]"
                        # Keep rule-based user_description (already set)
                        judge_source = "hybrid (rule-enforced)"
                        llm_reasoning = llm_obj.reasoning
                    else:
                        # Allow normal override for ambiguous cases
                        # Use the pre-calculated tuple values (not llm_obj attributes)
                        verdict = final_verdict
                        final_score = f_score
                        description = f_desc
                        user_description = f_user_desc # Update user description

                        is_debate = getattr(llm_obj, 'method', '') == 'adversarial_debate'
                        judge_source = "debate" if is_debate else "llm"
                        llm_reasoning = llm_obj.reasoning
                        label = "DEBATE" if is_debate else "LLM"
                        self.log(f"{label} OVERRIDE: {verdict}", "SUCCESS")
                else:
                    self.log("LLM agrees with Rule-Based verdict.", "INFO")

            except Exception as e:
                self.log(f"LLM Judge process failed: {e}", "WARN")

        # Final Cleanup
        confidence = self._calculate_confidence(final_score, layer_scores, c2pa_result)
        processing_time = int((datetime.now() - start_time).total_seconds() * 1000)

        # Extract debate data if adversarial debate was used
        debate_data = None
        if llm_obj and getattr(llm_obj, 'method', '') == 'adversarial_debate':
            debate_data = {
                "rounds_taken": llm_obj.rounds_taken,
                "debate_summary": llm_obj.debate_summary,
                "debate_history": llm_obj.debate_history
            }

        self.log(f"FINAL: {verdict} ({final_score}/100) - {judge_source}", "SUCCESS")

        # Convert technical description to user-friendly language
        friendly_description = humanize_verdict_description(verdict, description, final_score)

        return DetectionResult(
            final_score=final_score,
            verdict=verdict,
            confidence=confidence,
            technical_description=description,
            user_description=user_description,
            layer_scores=layer_scores,
            layer_details=layer_details,
            processing_time_ms=processing_time,
            warnings=self.warnings.copy(),
            timestamp=datetime.now().isoformat(),
            llm_reasoning=llm_reasoning,
            judge_source=judge_source,
            debate_data=debate_data
        )

    def _calculate_confidence(self, final_score: int, layer_scores: Dict, c2pa_result: Dict) -> str:
        if c2pa_result.get("status") == "valid": return "HIGH"
        agreement_count = 0
        target = "REAL" if final_score > 50 else "FAKE"
        for s in layer_scores.values():
            if target == "REAL" and s > 10: agreement_count += 1
            if target == "FAKE" and s < -10: agreement_count += 1

        ratio = agreement_count / max(1, len(layer_scores))
        if ratio > 0.6: return "HIGH"
        if ratio > 0.4: return "MEDIUM"
        return "LOW"

    def _create_error_result(self, msg: str, start_time: datetime) -> DetectionResult:
        return DetectionResult(0, "ERROR", "N/A", msg, msg, {}, {"error": msg},
                               int((datetime.now()-start_time).total_seconds()*1000),
                               [msg], datetime.now().isoformat())


# ========================================
# CLI & API SETUP
# ========================================
def _sanitize(obj):
    """Deep clean object for JSON serialization (Fixes float32 errors)"""
    if isinstance(obj, dict): return {k: _sanitize(v) for k, v in obj.items()}
    if isinstance(obj, list): return [_sanitize(v) for v in obj]
    if isinstance(obj, (np.integer, int)): return int(obj)
    if isinstance(obj, (np.floating, float)): return float(obj)
    if isinstance(obj, np.ndarray): return obj.tolist()
    return obj

# --- CLI Execution ---
def main():
    if len(sys.argv) < 2:
        print("Usage: python main.py <image_path>")
        sys.exit(1)

    detector = AIImageDetector()
    res = detector.analyze_image(sys.argv[1])

    print(f"\n=== REPORT: {res.verdict} ===")
    print(f"Score: {res.final_score}")
    print(f"Source: {res.judge_source}")
    if res.llm_reasoning:
        print(f"LLM Reasoning: {res.llm_reasoning}")
    print("-" * 30)
    for k, v in res.layer_scores.items():
        print(f"{k:20}: {v:>6.1f}")

if __name__ == "__main__":
    if len(sys.argv) > 1 and "uvicorn" not in sys.argv[0]:
        main()

# ===========================================================================
# /extract-phash bootstrap  –  must be defined before FastAPI app creation
# ===========================================================================
from contextlib import asynccontextmanager

from Universal_Detector.src.layers.phash_extractor import (
    extract_phash as _run_extract_phash,
    PhashResult,
    PHASH_MAX_BYTES as _PHASH_MAX_BYTES,
)

# Module-level HTTP client singleton.
# Re-using a single AsyncClient across requests avoids per-request TCP
# handshake and TLS negotiation overhead — critical for a fast-path endpoint.
_http_client: httpx.AsyncClient | None = None


@asynccontextmanager
async def _lifespan(application: FastAPI):
    """Open / close the shared httpx client around the app lifetime."""
    global _http_client

    # Configure timeouts explicitly for different phases:
    # - connect: time to establish TCP connection (including DNS)
    # - read: time to receive data chunks
    # - write: time to send request
    # - pool: time to acquire a connection from pool
    timeout_config = httpx.Timeout(
        connect=10.0,    # 10s for DNS + TCP handshake
        read=30.0,       # 30s for reading response (large images)
        write=10.0,      # 10s for sending request
        pool=5.0,        # 5s to acquire connection from pool
    )

    # Connection limits tuned for high-throughput:
    # - max_connections: total concurrent connections
    # - max_keepalive_connections: kept warm for reuse
    # - keepalive_expiry: how long to keep idle connections
    limits_config = httpx.Limits(
        max_connections=200,
        max_keepalive_connections=50,
        keepalive_expiry=30.0,
    )

    _http_client = httpx.AsyncClient(
        follow_redirects=True,
        timeout=timeout_config,
        limits=limits_config,
        http2=True,  # Enable HTTP/2 for better multiplexing
    )

    # --- Startup health check (moved here because @app.on_event is ignored
    #     when lifespan= is used) ---
    print("\n[Startup] Checking Model Health...")

    gemini_key = os.getenv("GOOGLE_AI_API_KEY") or os.getenv("GEMINI_API_KEY")
    if gemini_key:
        print(f"Gemini API Key found: {gemini_key[:5]}...")
    else:
        print("Gemini API Key MISSING")

    openrouter_key = os.getenv("OPENROUTER_API_KEY")
    if openrouter_key:
        print(f"OpenRouter API Key found: {openrouter_key[:5]}...")
        print(f"   Configured Fallback Chain ({len(OPENROUTER_VISION_MODELS)} models):")
        for i, model in enumerate(OPENROUTER_VISION_MODELS):
            print(f"   {i+1}. {model}")

        print("   Checking OpenRouter connectivity...")
        try:
            resp = await _http_client.get(
                "https://openrouter.ai/api/v1/auth/key",
                headers={"Authorization": f"Bearer {openrouter_key}"},
                timeout=2.0,
            )
            if resp.status_code == 200:
                print("   OpenRouter Connectivity: OK")
            else:
                print(f"   OpenRouter Connectivity Check Failed: {resp.status_code}")
        except Exception as e:
            print(f"   ⚠️ OpenRouter Connectivity Check Error: {e}")
    else:
        print("OpenRouter API Key MISSING - Debate/Defense agents will fail.")

    print("[Startup] Health check complete.\n")

    yield
    await _http_client.aclose()
    _http_client = None


# --- FastAPI App ---
app = FastAPI(title="AI Image Detection v6.0", lifespan=_lifespan)

app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])

api_detector = AIImageDetector(enable_llm_judge=os.getenv("ENABLE_LLM_JUDGE", "true").lower() == "true")

@app.post("/analyze")
async def api_analyze(file: UploadFile = File(...)):
    if not file.content_type.startswith("image/"):
        raise HTTPException(400, "Invalid file type")

    temp_path = os.path.join("temp_uploads", f"{uuid.uuid4().hex}{os.path.splitext(file.filename)[1]}")
    os.makedirs("temp_uploads", exist_ok=True)

    try:
        with open(temp_path, "wb") as f:
            shutil.copyfileobj(file.file, f)
        result = api_detector.analyze_image(temp_path)
        return _sanitize(asdict(result))
    finally:
        if os.path.exists(temp_path): os.remove(temp_path)

@app.get("/health")
def health(): return {"status": "ok"}


# ---------------------------------------------------------------------------
# Request model for the URL-based analysis endpoint
# ---------------------------------------------------------------------------
_ALLOWED_IMAGE_CONTENT_TYPES = {
    "image/jpeg", "image/png",
}
_MAX_IMAGE_SIZE_BYTES = 20 * 1024 * 1024  # 20 MB hard limit
_DOWNLOAD_TIMEOUT_SECONDS = 15


class AnalyzeUrlRequest(BaseModel):
    s3_url: HttpUrl  # Changed from 'url' to 's3_url' for consistency with /extract-phash

    @field_validator("s3_url")
    @classmethod
    def must_be_http_or_https(cls, v: HttpUrl) -> HttpUrl:
        if v.scheme not in ("http", "https"):
            raise ValueError("Only http/https URLs are supported.")
        return v


@app.post("/analyze-url")
async def api_analyze_url(body: AnalyzeUrlRequest):
    """
    Download an image from the provided URL and run the same multi-layer
    forensic analysis as the /analyze endpoint.

    - Validates that the URL is http/https.
    - Streams the response to check Content-Type and enforce a size limit
      before writing to disk (avoids downloading huge/non-image payloads).
    - Cleans up the temporary file regardless of success or failure.
    """
    url_str = str(body.s3_url)  # Changed from body.url to body.s3_url

    # Derive a safe file extension from the URL path (fallback to .jpg)
    url_path = urlparse(url_str).path
    _, ext = os.path.splitext(url_path)
    ext = ext.lower() if ext.lower() in (".jpg", ".jpeg", ".png", ".webp", ".bmp", ".gif", ".tiff") else ".jpg"

    temp_path = os.path.join("temp_uploads", f"{uuid.uuid4().hex}{ext}")
    os.makedirs("temp_uploads", exist_ok=True)

    try:
        # Stream the download so we can validate headers before buffering the body
        with requests.get(url_str, stream=True, timeout=_DOWNLOAD_TIMEOUT_SECONDS,
                          allow_redirects=True) as response:

            if response.status_code != 200:
                raise HTTPException(
                    status_code=400,
                    detail=f"Failed to download image: HTTP {response.status_code} from remote server."
                )

            # Validate Content-Type header
            content_type = response.headers.get("Content-Type", "").split(";")[0].strip().lower()
            if content_type not in _ALLOWED_IMAGE_CONTENT_TYPES:
                raise HTTPException(
                    status_code=415,
                    detail=f"Remote URL does not point to a supported image. "
                           f"Content-Type received: '{content_type}'."
                )

            # Stream to disk while enforcing the size limit
            downloaded = 0
            with open(temp_path, "wb") as f:
                for chunk in response.iter_content(chunk_size=65536):
                    downloaded += len(chunk)
                    if downloaded > _MAX_IMAGE_SIZE_BYTES:
                        raise HTTPException(
                            status_code=413,
                            detail=f"Remote image exceeds the maximum allowed size of "
                                   f"{_MAX_IMAGE_SIZE_BYTES // (1024 * 1024)} MB."
                        )
                    f.write(chunk)

        if downloaded == 0:
            raise HTTPException(status_code=400, detail="Downloaded file is empty.")

        result = api_detector.analyze_image(temp_path)
        return _sanitize(asdict(result))

    except HTTPException:
        raise  # Re-raise FastAPI HTTP exceptions as-is
    except requests.exceptions.Timeout:
        raise HTTPException(
            status_code=504,
            detail=f"Request timed out while downloading image from the provided URL "
                   f"(limit: {_DOWNLOAD_TIMEOUT_SECONDS}s)."
        )
    except requests.exceptions.ConnectionError as e:
        raise HTTPException(status_code=502, detail=f"Could not connect to remote server: {e}")
    except requests.exceptions.RequestException as e:
        raise HTTPException(status_code=400, detail=f"Error downloading image: {e}")
    finally:
        if os.path.exists(temp_path):
            os.remove(temp_path)


# ===========================================================================
# /extract-phash  –  High-throughput perceptual hash endpoint
# ===========================================================================


# ---------------------------------------------------------------------------
# Request / response models
# ---------------------------------------------------------------------------
class ExtractPhashRequest(BaseModel):
    """Payload for the /extract-phash endpoint."""

    s3_url: str  # Pre-signed or public S3 URL (http/https)

    @field_validator("s3_url")
    @classmethod
    def _validate_url_scheme(cls, v: str) -> str:
        parsed = urlparse(v)
        if parsed.scheme not in ("http", "https"):
            raise ValueError("s3_url must use http or https scheme.")
        if not parsed.netloc:
            raise ValueError("s3_url must be a fully qualified URL.")
        return v


class ExtractPhashResponse(BaseModel):
    """Hashes for the original and the horizontally mirrored image."""

    original_hash: str    # Binary string, e.g. "10110010…"
    mirrored_hash: str    # Binary string of the FLIP_LEFT_RIGHT variant
    hash_algorithm: str   # "pdq" | "phash"
    hash_bits: int        # Length of each binary string
    border_stripped: bool # Whether a uniform border was detected and removed


# ---------------------------------------------------------------------------
# Endpoint
# ---------------------------------------------------------------------------
@app.post("/extract-phash", response_model=ExtractPhashResponse)
async def extract_phash_endpoint(body: ExtractPhashRequest) -> ExtractPhashResponse:
    """
    High-throughput perceptual hash extraction endpoint.

    Downloads an image from *s3_url* entirely in memory (no disk I/O),
    applies forensic pre-processing mitigations (border stripping, mirror defense),
    and returns PDQ/pHash hex strings.

    Optimizations:
    - Connection pooling with HTTP/2 multiplexing
    - Streaming download with size limits
    - CPU-bound hashing offloaded to thread pool
    - Comprehensive error handling with retry hints

    Returns:
        - **original_hash** – hash of the border-stripped image (hex)
        - **mirrored_hash** – hash of the horizontally flipped variant (hex)
    """
    import asyncio
    import logging

    logger = logging.getLogger("extract-phash")

    # Validate client is ready
    if _http_client is None:
        logger.error("HTTP client not initialized - lifespan event may have failed")
        raise HTTPException(
            status_code=503,
            detail="Service temporarily unavailable: HTTP client not initialized. Please retry.",
        )

    url_str = str(body.s3_url)

    # ------------------------------------------------------------------
    # 1. Async stream download into memory (no disk I/O)
    # ------------------------------------------------------------------
    buf = io.BytesIO()
    downloaded = 0

    # Retry configuration for transient failures
    max_retries = 2
    retry_delay = 0.5  # seconds
    last_error: Exception | None = None

    for attempt in range(max_retries + 1):
        try:
            buf.seek(0)
            buf.truncate(0)
            downloaded = 0

            async with _http_client.stream("GET", url_str) as response:
                # Check HTTP status
                if response.status_code == 404:
                    raise HTTPException(
                        status_code=404,
                        detail="Image not found at the provided URL.",
                    )
                if response.status_code == 403:
                    raise HTTPException(
                        status_code=403,
                        detail="Access denied. The signed URL may have expired.",
                    )
                if response.status_code >= 500:
                    # Server error - worth retrying
                    raise httpx.HTTPStatusError(
                        f"Upstream server error: {response.status_code}",
                        request=response.request,
                        response=response,
                    )
                if response.status_code != 200:
                    raise HTTPException(
                        status_code=400,
                        detail=f"Failed to download image: HTTP {response.status_code}.",
                    )

                # Validate content type (but be lenient - S3 sometimes returns generic types)
                content_type = (
                    response.headers.get("content-type", "")
                    .split(";")[0]
                    .strip()
                    .lower()
                )
                # Allow: image/*, application/octet-stream, or empty (S3 pre-signed URLs)
                if content_type and not (
                    content_type.startswith("image/") or
                    content_type == "application/octet-stream" or
                    content_type == "binary/octet-stream"
                ):
                    raise HTTPException(
                        status_code=415,
                        detail=f"URL does not point to a supported image. "
                               f"Content-Type received: '{content_type}'.",
                    )

                # Stream chunks with size limit
                async for chunk in response.aiter_bytes(chunk_size=65536):
                    downloaded += len(chunk)
                    if downloaded > _PHASH_MAX_BYTES:
                        raise HTTPException(
                            status_code=413,
                            detail=f"Image exceeds the "
                                   f"{_PHASH_MAX_BYTES // (1024 * 1024)} MB limit.",
                        )
                    buf.write(chunk)

            # Success - break out of retry loop
            last_error = None
            break

        except HTTPException:
            # Don't retry client errors (4xx)
            raise
        except httpx.TimeoutException as exc:
            last_error = exc
            logger.warning(f"Timeout on attempt {attempt + 1}/{max_retries + 1}: {exc}")
            if attempt < max_retries:
                await asyncio.sleep(retry_delay * (attempt + 1))
                continue
        except httpx.ConnectError as exc:
            # DNS resolution failures, connection refused, etc.
            last_error = exc
            error_msg = str(exc)