OpenAgriNet · Abhishek-Kumar-Rai5 · May 16, 2026
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,38 @@
+# Virtual environments
+.venv/
+venv/
+env/
+
+# Python cache
+__pycache__/
+*.py[cod]
+
+# Pytest
+.pytest_cache/
+
+# Coverage
+.coverage
+htmlcov/
+
+# Jupyter
+.ipynb_checkpoints/
+
+# OS files
+.DS_Store
+Thumbs.db
+
+# IDE/editor
+.vscode/
+.idea/
+
+# Build artifacts
+build/
+dist/
+*.egg-info/
+
+# Local test outputs
+test_results.txt
+
+# Temporary files
+*.tmp
+*.log
diff --git a/tests/__init__.py b/tests/__init__.py
diff --git a/tests/fixtures.py b/tests/fixtures.py
@@ -0,0 +1,235 @@
+"""
+fixtures.py
+-----------
+Realistic test fixtures for multilingual and trajectory tests.
+"""
+
+from __future__ import annotations
+
+from typing import List
+
+from training_setup_logs.trajectory.models import (
+    MessageRole,
+    ToolCall,
+    ToolCallStatus,
+    Trajectory,
+    TurnMessage,
+)
+
+
+# ---------------------------------------------------------------------------
+# Multilingual query fixtures
+# ---------------------------------------------------------------------------
+
+
+WEATHER_QUERIES_MULTILINGUAL: List[str] = [
+    # Devanagari
+    "कल मौसम कैसा रहेगा",
+    # Transliterated variants
+    "kal mausam kaisa rahega",
+    "kal mosam kaisa rahega",
+    "kal mousam kaisa hoga",
+    # Code-switched
+    "kal weather kaisa hai",
+    "मौसम tomorrow kaisa hoga",
+    # English
+    "what will the weather be like tomorrow",
+    # Unrelated (should NOT cluster with weather queries)
+    "मुझे एक अच्छा रेस्टोरेंट बताओ",
+    "recommend a restaurant near me",
+    "find me a good book to read",
+]
+
+HINDI_QUERIES: List[str] = [
+    "आज का तापमान क्या है",
+    "कल बारिश होगी क्या",
+    "मुझे दिल्ली का मौसम बताओ",
+]
+
+TRANSLITERATED_QUERIES: List[str] = [
+    "aaj ka tapmaan kya hai",
+    "kal barish hogi kya",
+    "mujhe delhi ka mausam batao",
+]
+
+CODE_SWITCHED_QUERIES: List[str] = [
+    "aaj temperature kitna hai",
+    "kal rain hogi kya Delhi mein",
+    "मुझे weather update do",
+]
+
+ENGLISH_QUERIES: List[str] = [
+    "what is the temperature today",
+    "will it rain tomorrow",
+    "give me the weather update for Delhi",
+]
+
+
+# ---------------------------------------------------------------------------
+# Trajectory fixtures
+# ---------------------------------------------------------------------------
+
+
+def make_clean_trajectory(tid: str = "traj_clean_001") -> Trajectory:
+    """A clean, efficient, successful trajectory."""
+    return Trajectory(
+        trajectory_id=tid,
+        turns=[
+            TurnMessage(role=MessageRole.USER, content="What is the weather in Delhi tomorrow?", language="en"),
+            TurnMessage(role=MessageRole.ASSISTANT, content="Let me check the weather for you.", language="en"),
+            TurnMessage(role=MessageRole.ASSISTANT, content="Tomorrow in Delhi: 32°C, partly cloudy.", language="en"),
+        ],
+        tool_calls=[
+            ToolCall(
+                tool_name="weather_api",
+                arguments={"city": "Delhi", "date": "tomorrow"},
+                return_value={"temp": 32, "condition": "partly cloudy"},
+                status=ToolCallStatus.SUCCESS,
+                latency_ms=230.0,
+            )
+        ],
+    )
+
+
+def make_retry_trajectory(tid: str = "traj_retry_001") -> Trajectory:
+    """Trajectory with a retry that eventually succeeds."""
+    return Trajectory(
+        trajectory_id=tid,
+        turns=[
+            TurnMessage(role=MessageRole.USER, content="kal mausam kaisa rahega", language="hi-latn"),
+            TurnMessage(role=MessageRole.ASSISTANT, content="Let me check. One moment.", language="en"),
+            TurnMessage(role=MessageRole.ASSISTANT, content="kal Delhi mein 30°C hoga.", language="hi-latn"),
+        ],
+        tool_calls=[
+            ToolCall(
+                tool_name="weather_api",
+                arguments={"city": "Delhi", "date": "tomorrow"},
+                return_value=None,
+                status=ToolCallStatus.FAILURE,
+                latency_ms=40.0,
+            ),
+            ToolCall(
+                tool_name="weather_api",
+                arguments={"city": "Delhi", "date": "tomorrow"},
+                return_value={"temp": 30, "condition": "sunny"},
+                status=ToolCallStatus.SUCCESS,
+                latency_ms=280.0,
+                retry_of=0,
+            ),
+        ],
+    )
+
+
+def make_redundant_trajectory(tid: str = "traj_redundant_001") -> Trajectory:
+    """Trajectory with redundant tool calls."""
+    return Trajectory(
+        trajectory_id=tid,
+        turns=[
+            TurnMessage(role=MessageRole.USER, content="Weather in Mumbai?", language="en"),
+            TurnMessage(role=MessageRole.ASSISTANT, content="Mumbai: 28°C, humid.", language="en"),
+        ],
+        tool_calls=[
+            ToolCall(
+                tool_name="weather_api",
+                arguments={"city": "Mumbai"},
+                return_value={"temp": 28},
+                status=ToolCallStatus.SUCCESS,
+                latency_ms=200.0,
+            ),
+            # Identical call — redundant
+            ToolCall(
+                tool_name="weather_api",
+                arguments={"city": "Mumbai"},
+                return_value={"temp": 28},
+                status=ToolCallStatus.SUCCESS,
+                latency_ms=190.0,
+            ),
+        ],
+    )
+
+
+def make_incomplete_trajectory(tid: str = "traj_incomplete_001") -> Trajectory:
+    """Trajectory ending in a user turn (no assistant response)."""
+    return Trajectory(
+        trajectory_id=tid,
+        turns=[
+            TurnMessage(role=MessageRole.USER, content="कल बारिश होगी क्या?", language="hi"),
+            TurnMessage(role=MessageRole.ASSISTANT, content="Let me check...", language="en"),
+            TurnMessage(role=MessageRole.USER, content="जल्दी बताओ", language="hi"),  # last = user
+        ],
+        tool_calls=[
+            ToolCall(
+                tool_name="weather_api",
+                arguments={"city": "unknown"},
+                return_value=None,
+                status=ToolCallStatus.MISSING_RETURN,
+                latency_ms=5000.0,
+            )
+        ],
+    )
+
+
+def make_multilingual_recovery_trajectory(tid: str = "traj_ml_recovery_001") -> Trajectory:
+    """
+    Hard multilingual trajectory with:
+    - code-switched user query
+    - tool failure
+    - fallback tool success
+    - clarification loop
+    """
+    return Trajectory(
+        trajectory_id=tid,
+        turns=[
+            TurnMessage(role=MessageRole.USER, content="kal rain hogi kya Delhi mein?", language="hi-en-mixed"),
+            TurnMessage(role=MessageRole.ASSISTANT, content="Which area of Delhi?", language="en"),
+            TurnMessage(role=MessageRole.USER, content="South Delhi", language="en"),
+            TurnMessage(role=MessageRole.ASSISTANT, content="South Delhi: moderate rain expected.", language="en"),
+        ],
+        tool_calls=[
+            ToolCall(
+                tool_name="rainfall_api",
+                arguments={"city": "Delhi"},
+                return_value=None,
+                status=ToolCallStatus.FAILURE,
+                latency_ms=45.0,
+            ),
+            ToolCall(
+                tool_name="weather_api",
+                arguments={"city": "South Delhi", "date": "tomorrow"},
+                return_value={"rain_prob": 0.75},
+                status=ToolCallStatus.SUCCESS,
+                latency_ms=310.0,
+                is_fallback=True,
+            ),
+        ],
+    )
+
+
+def make_hallucinated_args_trajectory(tid: str = "traj_halluc_001") -> Trajectory:
+    """Trajectory where tool args contain placeholder/hallucinated values."""
+    return Trajectory(
+        trajectory_id=tid,
+        turns=[
+            TurnMessage(role=MessageRole.USER, content="Book me a flight to Goa", language="en"),
+            TurnMessage(role=MessageRole.ASSISTANT, content="I've booked a flight for you.", language="en"),
+        ],
+        tool_calls=[
+            ToolCall(
+                tool_name="flight_booking",
+                arguments={"destination": "Goa", "departure": "<FILL_DATE>", "passenger": "TODO"},
+                return_value=None,
+                status=ToolCallStatus.HALLUCINATED,
+                latency_ms=120.0,
+            )
+        ],
+    )
+
+
+ALL_TRAJECTORIES = [
+    make_clean_trajectory(),
+    make_retry_trajectory(),
+    make_redundant_trajectory(),
+    make_incomplete_trajectory(),
+    make_multilingual_recovery_trajectory(),
+    make_hallucinated_args_trajectory(),
+]
diff --git a/tests/test_leakage.py b/tests/test_leakage.py
@@ -0,0 +1,77 @@
+"""
+test_leakage.py
+---------------
+Tests for train/eval split leakage detection.
+"""
+
+import pytest
+
+from training_setup_logs.multilingual.leakage_detector import (
+    LeakageReport,
+    detect_leakage,
+)
+
+
+class TestLeakageDetector:
+    def test_no_leakage_on_distinct_splits(self):
+        train = [
+            "how do I apply for a passport",
+            "best restaurants in Kolkata",
+            "Python list comprehension tutorial",
+        ]
+        eval_ = [
+            "how to file income tax return",
+            "train schedule from Delhi to Mumbai",
+            "machine learning overfitting explained",
+        ]
+        report = detect_leakage(train, eval_, skip_semantic=True)
+        assert report.total_leaks == 0
+        assert report.leak_rate == 0.0
+
+    def test_exact_leak_detected(self):
+        train = ["कल मौसम कैसा रहेगा", "best hotel in Goa"]
+        eval_ = ["कल मौसम कैसा रहेगा", "completely different query"]
+        report = detect_leakage(train, eval_, skip_semantic=True)
+        assert len(report.exact_leaks) >= 1
+
+    def test_transliteration_leak_detected(self):
+        train = ["kal mausam kaisa rahega"]
+        eval_ = ["kal mosam kaisa rahega"]  # spelling variant
+        report = detect_leakage(train, eval_, skip_semantic=True)
+        # After canonicalization these should match
+        assert report.total_leaks >= 1
+
+    def test_empty_splits(self):
+        report = detect_leakage([], [])
+        assert report.total_leaks == 0
+
+    def test_empty_train(self):
+        report = detect_leakage([], ["some query"])
+        assert report.total_leaks == 0
+
+    def test_report_structure(self):
+        train = ["hello world"]
+        eval_ = ["hello world"]
+        report = detect_leakage(train, eval_, skip_semantic=True)
+        d = report.to_dict()
+        assert "train_size" in d
+        assert "eval_size" in d
+        assert "leak_rate" in d
+        assert "cross_split_leaks" in d
+        assert isinstance(d["cross_split_leaks"], list)
+
+    def test_leak_rate_bounded(self):
+        train = ["q1", "q2", "q3"]
+        eval_ = ["q1", "q2", "q4"]
+        report = detect_leakage(train, eval_, skip_semantic=True)
+        assert 0.0 <= report.leak_rate <= 1.0
+
+    def test_full_leak_detected_semantic(self):
+        """
+        Test the semantic (embedding-based) detection path.
+        Uses near-identical text that TF-IDF char-ngram similarity will also catch.
+        """
+        train = ["delhi weather tomorrow forecast rain"]
+        eval_ = ["delhi weather tomorrow forecast rain sunny"]  # superset
+        report = detect_leakage(train, eval_, semantic_threshold=0.50, skip_semantic=False)
+        assert report.total_leaks >= 1