SQL_Agent/main.py at main · HamCraft/SQL_Agent · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
# import pathlib
# import requests
# from dotenv import load_dotenv
# from fastapi import FastAPI
# from pydantic import BaseModel
# from fastapi.middleware.cors import CORSMiddleware

# from langchain_google_genai import ChatGoogleGenerativeAI
# from langchain_community.utilities import SQLDatabase
# from langchain_community.agent_toolkits import SQLDatabaseToolkit
# from langchain.agents import create_agent

# # Load environment variables
# load_dotenv()

# # Download database if it doesn't exist
# url = "https://storage.googleapis.com/benchmarks-artifacts/chinook/Chinook.db"
# local_path = pathlib.Path("Chinook.db")
# if not local_path.exists():
#     response = requests.get(url)
#     if response.status_code == 200:
#         local_path.write_bytes(response.content)
#         print(f"Database downloaded: {local_path}")
#     else:
#         raise Exception(f"Failed to download database (status {response.status_code})")

# # Initialize model and database
# model = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0.7)
# db = SQLDatabase.from_uri("sqlite:///Chinook.db")
# toolkit = SQLDatabaseToolkit(db=db, llm=model)
# tools = toolkit.get_tools()

# # Updated system prompt to prevent exposing DB internals
# system_prompt = f"""

# You are a merchant agent designed to answer questions about the data in a SQL database.

# Do NOT reveal table names, column names, database schema, or any internal database details.

# You can only provide answers to user questions using the data content.

# Always limit query results to at most 5 rows.

# Do NOT run any DML statements (INSERT, UPDATE, DELETE, DROP, etc.).

# If a question asks for internal structure, respond politely that you cannot reveal it.

# """

# # Create agent
# agent = create_agent(model, tools, system_prompt=system_prompt)

# # FastAPI app
# app = FastAPI(title="SQL Agent API")

# # Enable CORS if needed
# app.add_middleware(
#     CORSMiddleware,
#     allow_origins=["*"],
#     allow_methods=["*"],
#     allow_headers=["*"],
# )

# # Request model
# class QueryRequest(BaseModel):
#     question: str

# # API endpoint
# @app.post("/ask")
# async def ask_question(request: QueryRequest):
#     question = request.question.strip()

#     # Simple sanitization: block questions explicitly asking for schema or table names
#     forbidden_keywords = ["schema", "table", "tables", "columns", "database structure"]
#     if any(word in question.lower() for word in forbidden_keywords):
#         return {"answer": "I'm sorry, I cannot provide internal database structure details."}

#     final_answer = ""
#     for step in agent.stream(
#         {"messages": [{"role": "user", "content": question}]},
#         stream_mode="values",
#     ):
#         final_answer = step["messages"][-1].text
#     return {"answer": final_answer}


from dotenv import load_dotenv
from fastapi import FastAPI
from langchain_deepseek import ChatDeepSeek
from pydantic import BaseModel
from fastapi.middleware.cors import CORSMiddleware
import os
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_community.utilities import SQLDatabase
from langchain_community.agent_toolkits import SQLDatabaseToolkit
from langchain.agents import create_agent


# Load environment variables
load_dotenv()

OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")

model = ChatDeepSeek(
    model="x-ai/grok-4.1-fast:free",
    api_key=OPENROUTER_API_KEY,
    api_base="https://openrouter.ai/api/v1",
    extra_body={"reasoning": {"enabled": True}},
    temperature=0
)

# Read full DATABASE_URL from .env
DATABASE_URL = os.getenv("DATABASE_URL")
if not DATABASE_URL:
    raise Exception("Please set DATABASE_URL in your .env file")

# #Initialize model and database
# try:
#     model = ChatGoogleGenerativeAI(
# model="gemini-2.5-flash", temperature=0, thinking_budget=1024, include_thoughts=True)
# except Exception as e:
#     raise Exception(f"Failed to initialize model: {str(e)}")

db = SQLDatabase.from_uri(DATABASE_URL)
toolkit = SQLDatabaseToolkit(db=db, llm=model)
tools = toolkit.get_tools()

# System prompt to prevent exposing DB internals
# system_prompt = f"""
# You are a Merchant agent.
# DO Not talk about irrelevant stuff.
# When greeted, just say "Hello, How may i help you"
# Don't mention to users about SQL or other technical terms
# DO Not talk about irrelevant stuff  that doesn't help with our data.
# Answer questions about the data and forcasting .
# You need to do prediction analyst of data when asked to
# Do NOT reveal table names, column names, or schema.
# Limit query results to 5 rows max.
# Do NOT run INSERT, UPDATE, DELETE, DROP, or other DML.
# If asked about structure, politely refuse.
# Answer concisely.
# DO Not talk about irrelevant stuff.
# """

system_prompt = f"""
You are a Merchant Agent connected to a SQL database.
You operate in STRICT MODE and ONLY assist with business- and sales-related queries.

=====================
ALLOWED REQUESTS
=====================
You MUST answer ONLY if the user asks about:
- Sales data, KPIs, product performance
- Demand patterns, seasonal trends, market behavior
- Forecasting or prediction related to products, regions, time periods, or events
- Product recommendations based on sales insights (e.g., Ramadan, holidays, promotions)
- Business insights or commercial opportunities supported by sales data

Examples of ALLOWED questions:
- "Which product is suited for Ramadan 2025 in Pakistan?"
- "Show me the top-selling products last month."
- "Forecast sales for next quarter."
- "Which region has declining demand?"

=====================
NOT ALLOWED (Refuse)
=====================
If the request is **NOT** related to business, commerce, products, sales, or forecasting,
you MUST refuse.

Examples to refuse:
- Travel information
- Cooking, health, entertainment, movies
- Coding help (Python, SQL, JS, etc.)
- Historical facts, geography, trivia
- Personal questions or chit-chat
- Anything not connected to business or sales insights

Use this refusal template:
"I'm sorry, but I can only assist with sales, product insights, business data, or forecasting."

=====================
SQL SAFETY RULES
=====================
- NEVER reveal SQL queries, table names, column names, schema, or technical details.
- NEVER execute or suggest INSERT, UPDATE, DELETE, DROP, ALTER, or any destructive SQL.
- You may only perform safe, read-only analytical queries internally.
- Limit all returned data to a maximum of 5 rows.
- If asked about the data structure, politely refuse.

=====================
BEHAVIOR RULES
=====================
- When greeted, respond exactly: "Hello, how may I help you?"
- All output must be concise, business-focused, and free of irrelevant content.
- Stay strictly within your domain: sales data, business insights, and forecasting.
- If user intent is unclear, ask a clarifying question ONLY if it relates to business.

=====================
END OF SPECIFICATION
=====================
"""


# Create agent
agent = create_agent(
    model,
    tools,
    system_prompt=system_prompt,
    )

# FastAPI app
app = FastAPI(title="Postgres SQL Agent API")

# Enable CORS if needed
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
)

# Request model
class QueryRequest(BaseModel):
    question: str

# API endpoint
@app.post("/ask")
async def ask_question(request: QueryRequest):
    question = request.question.strip()

    # Simple sanitization: block questions explicitly asking for schema or table names
    forbidden_keywords = ["schema", "table", "tables", "columns", "database structure","id",]
    if any(word in question.lower() for word in forbidden_keywords):
        return {"answer": "Apologies, I can't provide details on that. Is there anything else I can assist you with or any other questions you have?"}

    final_answer = ""
    for step in agent.stream(
        {"messages": [{"role": "user", "content": question}]},
        stream_mode="values",
    ):
        final_answer = step["messages"][-1].text
    return {"answer": final_answer}

# This is important for Vercel
if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)