Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Reference Docs
plan.md
PRD.md

# Dev Scripts
verify.py

# Data Protection
# Ignore raw/production logs by default
data/**
# But allow samples and gold sets
!data/
!data/sample_logs/
!data/sample_logs/*
!data/gold/
!data/gold/*

# Python Environment & Cache
.venv/
env/
venv/
ENV/
__pycache__/
*.py[cod]
*$py.class

# Test & Lint Cache
.pytest_cache/
.ruff_cache/
.coverage
htmlcov/
.mypy_cache/

# OS Specific
.DS_Store
Thumbs.db
5 changes: 5 additions & 0 deletions data/gold/dpo_candidates.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{"prompt": [{"role": "system", "content": "You are an agricultural assistant. Always respond in the farmer's language. Use tools efficiently."}, {"role": "user", "content": "मेरे खेत का मौसम बताओ"}], "chosen": [{"role": "assistant", "content": null, "tool_calls": [{"type": "function", "function": {"name": "fetch_agristack_data", "arguments": "{\"farmer_id\": \"[GOVT_ID_1]\", \"fields\": [\"location\"]}"}, "id": "call_c1"}]}, {"role": "tool", "content": "Location: Pune, Maharashtra (18.52, 73.85)", "tool_call_id": "call_c1"}, {"role": "assistant", "content": null, "tool_calls": [{"type": "function", "function": {"name": "weather_forecast", "arguments": "{\"lat\": 18.52, \"lon\": 73.85, \"days\": 5}"}, "id": "call_c2"}]}, {"role": "tool", "content": "Sunny, 28-34C, no rain expected.", "tool_call_id": "call_c2"}, {"role": "assistant", "content": "आपके पुणे के खेत में अगले 5 दिन धूप रहेगी, तापमान 28-34°C।"}], "rejected": [{"role": "assistant", "content": null, "tool_calls": [{"type": "function", "function": {"name": "fetch_agristack_data", "arguments": "{\"farmer_id\": \"[GOVT_ID_1]\", \"fields\": [\"location\"]}"}, "id": "call_r1"}]}, {"role": "tool", "content": "Location: Pune, Maharashtra (18.52, 73.85)", "tool_call_id": "call_r1"}, {"role": "assistant", "content": null, "tool_calls": [{"type": "function", "function": {"name": "fetch_agristack_data", "arguments": "{\"farmer_id\": \"[GOVT_ID_1]\", \"fields\": [\"location\"]}"}, "id": "call_r2"}]}, {"role": "tool", "content": "Location: Pune, Maharashtra (18.52, 73.85)", "tool_call_id": "call_r2"}, {"role": "assistant", "content": null, "tool_calls": [{"type": "function", "function": {"name": "weather_forecast", "arguments": "{\"lat\": 18.52, \"lon\": 73.85, \"days\": 5}"}, "id": "call_r3"}]}, {"role": "tool", "content": "Sunny, 28-34C, no rain expected.", "tool_call_id": "call_r3"}, {"role": "assistant", "content": "आपके पुणे के खेत में अगले 5 दिन धूप रहेगी।"}], "pair_type": "redundant_tool_call", "metadata": {"session_id": "dpo_001", "complexity_tier": "low", "hard_negative_kind": "redundant_tool_call"}}
{"prompt": [{"role": "system", "content": "You are an agricultural assistant. Always respond in the farmer's language. Use tools efficiently."}, {"role": "user", "content": "मेरी फसल की जानकारी दो"}], "chosen": [{"role": "assistant", "content": null, "tool_calls": [{"type": "function", "function": {"name": "fetch_agristack_data", "arguments": "{\"farmer_id\": \"[GOVT_ID_1]\", \"fields\": [\"crop\"]}"}, "id": "call_c1"}]}, {"role": "tool", "content": "error: farmer record not found", "tool_call_id": "call_c1"}, {"role": "assistant", "content": "माफ करें, आपकी फसल की जानकारी अभी उपलब्ध नहीं है। कृपया अपना किसान ID जांचें।"}], "rejected": [{"role": "assistant", "content": null, "tool_calls": [{"type": "function", "function": {"name": "fetch_agristack_data", "arguments": "{\"farmer_id\": \"[GOVT_ID_1]\", \"fields\": [\"crop\"]}"}, "id": "call_r1"}]}, {"role": "tool", "content": "error: farmer record not found", "tool_call_id": "call_r1"}, {"role": "assistant", "content": "Your crop is wheat."}], "pair_type": "tool_failure_no_recovery", "metadata": {"session_id": "dpo_002", "complexity_tier": "medium", "hard_negative_kind": "hallucinated_response_on_failure"}}
{"prompt": [{"role": "system", "content": "You are an agricultural assistant. Always respond in the farmer's language. Use tools efficiently."}, {"role": "user", "content": "क्या मुझे अभी सिंचाई करनी चाहिए?"}], "chosen": [{"role": "assistant", "content": null, "tool_calls": [{"type": "function", "function": {"name": "fetch_agristack_data", "arguments": "{\"farmer_id\": \"[GOVT_ID_1]\", \"fields\": [\"location\", \"crop\"]}"}, "id": "call_c1"}]}, {"role": "tool", "content": "Location: Nashik (20.0, 73.78). Crop: Grapes.", "tool_call_id": "call_c1"}, {"role": "assistant", "content": null, "tool_calls": [{"type": "function", "function": {"name": "weather_forecast", "arguments": "{\"lat\": 20.0, \"lon\": 73.78, \"days\": 3}"}, "id": "call_c2"}]}, {"role": "tool", "content": "Rain expected tomorrow: 15mm.", "tool_call_id": "call_c2"}, {"role": "assistant", "content": "कल बारिश होने वाली है (15mm), इसलिए आज सिंचाई न करें।"}], "rejected": [{"role": "assistant", "content": null, "tool_calls": [{"type": "function", "function": {"name": "fetch_agristack_data", "arguments": "{\"farmer_id\": \"[GOVT_ID_1]\", \"fields\": [\"location\"]}"}, "id": "call_r1"}]}, {"role": "tool", "content": "Location: Nashik (20.0, 73.78).", "tool_call_id": "call_r1"}, {"role": "assistant", "content": null, "tool_calls": [{"type": "function", "function": {"name": "fetch_agristack_data", "arguments": "{\"farmer_id\": \"[GOVT_ID_1]\", \"fields\": [\"crop\"]}"}, "id": "call_r2"}]}, {"role": "tool", "content": "Crop: Grapes.", "tool_call_id": "call_r2"}, {"role": "assistant", "content": null, "tool_calls": [{"type": "function", "function": {"name": "weather_forecast", "arguments": "{\"lat\": 20.0, \"lon\": 73.78, \"days\": 3}"}, "id": "call_r3"}]}, {"role": "tool", "content": "Rain expected tomorrow: 15mm.", "tool_call_id": "call_r3"}, {"role": "assistant", "content": null, "tool_calls": [{"type": "function", "function": {"name": "irrigation_advisory", "arguments": "{\"lat\": 20.0, \"lon\": 73.78, \"crop\": \"Grapes\"}"}, "id": "call_r4"}]}, {"role": "tool", "content": "Standard irrigation: every 3 days.", "tool_call_id": "call_r4"}, {"role": "assistant", "content": "सिंचाई हर 3 दिन में करें।"}], "pair_type": "suboptimal_path", "metadata": {"session_id": "dpo_003", "complexity_tier": "medium", "hard_negative_kind": "extra_tool_call"}}
{"prompt": [{"role": "system", "content": "You are an agricultural assistant. Always respond in the farmer's language. Use tools efficiently."}, {"role": "user", "content": "मेरे खेत की मिट्टी कैसी है?"}], "chosen": [{"role": "assistant", "content": null, "tool_calls": [{"type": "function", "function": {"name": "fetch_agristack_data", "arguments": "{\"farmer_id\": \"[GOVT_ID_1]\", \"fields\": [\"soil_health\"]}"}, "id": "call_c1"}]}, {"role": "tool", "content": "Soil pH: 6.5. Nitrogen: medium. Organic matter: 1.8%.", "tool_call_id": "call_c1"}, {"role": "assistant", "content": "आपकी मिट्टी का pH 6.5 है, नाइट्रोजन मध्यम स्तर पर है और जैविक पदार्थ 1.8% है। गेहूं के लिए यह अच्छी स्थिति है।"}], "rejected": [{"role": "assistant", "content": null, "tool_calls": [{"type": "function", "function": {"name": "fetch_agristack_data", "arguments": "{\"farmer_id\": \"[GOVT_ID_1]\", \"fields\": [\"soil_health\"]}"}, "id": "call_r1"}]}, {"role": "tool", "content": "Soil pH: 6.5. Nitrogen: medium. Organic matter: 1.8%.", "tool_call_id": "call_r1"}, {"role": "assistant", "content": "Your soil pH is 6.5, nitrogen is at medium levels, and organic matter is 1.8%. This is suitable for wheat cultivation."}], "pair_type": "persona_violation", "metadata": {"session_id": "dpo_004", "complexity_tier": "low", "hard_negative_kind": "persona_leakage"}}
{"prompt": [{"role": "system", "content": "You are an agricultural assistant. Always respond in the farmer's language. Use tools efficiently."}, {"role": "user", "content": "अगले हफ्ते मेरे खेत में क्या करना चाहिए?"}], "chosen": [{"role": "assistant", "content": null, "tool_calls": [{"type": "function", "function": {"name": "fetch_agristack_data", "arguments": "{\"farmer_id\": \"[GOVT_ID_1]\", \"fields\": [\"location\", \"crop\", \"soil_health\"]}"}, "id": "call_c1"}]}, {"role": "tool", "content": "Location: Aurangabad (19.87, 75.34). Crop: Cotton. Soil pH: 7.1.", "tool_call_id": "call_c1"}, {"role": "assistant", "content": null, "tool_calls": [{"type": "function", "function": {"name": "weather_forecast", "arguments": "{\"lat\": 19.87, \"lon\": 75.34, \"days\": 7}"}, "id": "call_c2"}]}, {"role": "tool", "content": "Dry week ahead, max 36C.", "tool_call_id": "call_c2"}, {"role": "assistant", "content": "अगले हफ्ते गर्मी रहेगी (36°C), कपास की फसल के लिए सिंचाई बढ़ाएं और सुबह जल्दी करें।"}], "rejected": [{"role": "assistant", "content": null, "tool_calls": [{"type": "function", "function": {"name": "weather_forecast", "arguments": "{\"lat\": 19.87, \"lon\": 75.34, \"days\": 7}"}, "id": "call_r1"}]}, {"role": "tool", "content": "Dry week ahead, max 36C.", "tool_call_id": "call_r1"}, {"role": "assistant", "content": null, "tool_calls": [{"type": "function", "function": {"name": "fetch_agristack_data", "arguments": "{\"farmer_id\": \"[GOVT_ID_1]\", \"fields\": [\"location\", \"crop\"]}"}, "id": "call_r2"}]}, {"role": "tool", "content": "Location: Aurangabad (19.87, 75.34). Crop: Cotton.", "tool_call_id": "call_r2"}, {"role": "assistant", "content": "Next week will be hot. You should irrigate your crops."}], "pair_type": "wrong_tool_order", "metadata": {"session_id": "dpo_005", "complexity_tier": "low", "hard_negative_kind": "wrong_tool_order"}}
56 changes: 56 additions & 0 deletions data/gold/gold_session.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
[
{
"session_id": "gold_001",
"user_question": "मेरे खेत में इस हफ्ते बारिश होगी क्या?",
"agent_turns": [
{
"parts": [
{
"part_kind": "tool-call",
"tool_name": "fetch_agristack_data",
"args": {
"farmer_id": "FARM_GOLD_01",
"fields": ["location", "crop"]
},
"tool_call_id": "call_g001"
},
{
"part_kind": "tool-return",
"tool_name": "fetch_agristack_data",
"content": "Location: Loha, Maharashtra (19.066, 77.174). Crop: Soybean. Season: Kharif.",
"tool_call_id": "call_g001"
},
{
"part_kind": "tool-call",
"tool_name": "weather_forecast",
"args": {
"lat": 19.066,
"lon": 77.174,
"days": 7
},
"tool_call_id": "call_g002"
},
{
"part_kind": "tool-return",
"tool_name": "weather_forecast",
"content": "Days 1-3: Partly cloudy, no rain. Day 4: 60% chance of light rain (8mm). Days 5-7: Clear skies.",
"tool_call_id": "call_g002"
}
],
"model_name": "gemini-2.0-flash",
"timestamp": "2024-11-15T08:00:00Z",
"kind": "response",
"finish_reason": "stop",
"run_id": "run_gold_001",
"usage": {
"input_tokens": 280,
"output_tokens": 120,
"cache_write_tokens": 0,
"cache_read_tokens": 0
},
"metadata": null
}
],
"bot_response": "आपके लोहा, महाराष्ट्र के खेत में इस हफ्ते चौथे दिन हल्की बारिश (8mm) की संभावना है। बाकी दिन साफ मौसम रहेगा। सोयाबीन की फसल के लिए यह सामान्य है।"
}
]
100 changes: 100 additions & 0 deletions data/sample_logs/sample_session.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
[
{
"session_id": "sess_001",
"user_question": "मेरे खेत का मौसम और फसल डेटा बताओ। मेरा नंबर 9876543210 है।",
"agent_turns": [
{
"parts": [
{
"part_kind": "tool-call",
"tool_name": "fetch_agristack_data",
"args": {
"farmer_id": "FARM12345",
"fields": ["crop", "land_area"]
},
"tool_call_id": "call_001",
"timestamp": "2024-11-15T10:23:40Z"
},
{
"part_kind": "tool-return",
"tool_name": "fetch_agristack_data",
"content": "Name: Ramesh Kumar\nMobile: 9876543210\nLand: 2.5 acres\nCrop: Wheat\nSeason: Rabi",
"tool_call_id": "call_001",
"timestamp": "2024-11-15T10:23:43Z"
},
{
"part_kind": "tool-call",
"tool_name": "weather_forecast",
"args": {
"lat": 28.6139,
"lon": 77.209,
"days": 7
},
"tool_call_id": "call_002",
"timestamp": "2024-11-15T10:23:44Z"
},
{
"part_kind": "tool-return",
"tool_name": "weather_forecast",
"content": "Next 7 days: sunny with occasional clouds. Min 18°C Max 32°C. No rain expected.",
"tool_call_id": "call_002",
"timestamp": "2024-11-15T10:23:47Z"
}
],
"usage": {
"input_tokens": 320,
"output_tokens": 180,
"cache_write_tokens": 0,
"cache_read_tokens": 0
},
"model_name": "gemini-2.0-flash",
"timestamp": "2024-11-15T10:23:45Z",
"kind": "response",
"finish_reason": "stop",
"run_id": "run_abc123",
"metadata": null
}
],
"bot_response": "आपके खेत का डेटा मिल गया। आपकी 2.5 एकड़ जमीन पर गेहूं की फसल है। अगले 7 दिन धूप रहेगी, तापमान 18-32°C के बीच रहेगा। बारिश की संभावना नहीं है।"
},
{
"session_id": "sess_002",
"user_question": "What is the soil health score for farm FARM12345?",
"agent_turns": [
{
"parts": [
{
"part_kind": "tool-call",
"tool_name": "fetch_agristack_data",
"args": {
"farmer_id": "FARM12345",
"fields": ["soil_health"]
},
"tool_call_id": "call_003",
"timestamp": "2024-11-15T10:25:10Z"
},
{
"part_kind": "tool-return",
"tool_name": "fetch_agristack_data",
"content": "Soil health score: 72/100. pH: 6.8. Organic matter: 2.1%. Nitrogen: medium.",
"tool_call_id": "call_003",
"timestamp": "2024-11-15T10:25:13Z"
}
],
"usage": {
"input_tokens": 180,
"output_tokens": 95,
"cache_write_tokens": 0,
"cache_read_tokens": 0
},
"model_name": "gemini-2.0-flash",
"timestamp": "2024-11-15T10:25:12Z",
"kind": "response",
"finish_reason": "stop",
"run_id": "run_def456",
"metadata": null
}
],
"bot_response": "Your farm's soil health score is 72 out of 100. The pH is 6.8 and organic matter is at 2.1%, which is good for wheat cultivation. Nitrogen levels are medium."
}
]
Loading