-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathserver.py
More file actions
81 lines (66 loc) · 2.11 KB
/
server.py
File metadata and controls
81 lines (66 loc) · 2.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import os
import httpx
from dotenv import load_dotenv
from fastapi import FastAPI
from fastapi.responses import FileResponse, StreamingResponse
from pydantic import BaseModel
load_dotenv()
app = FastAPI()
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
OPENROUTER_URL = "https://openrouter.ai/api/v1/completions"
class CompletionRequest(BaseModel):
prompt: str
max_tokens: int = 256
temperature: float = 1.0
top_p: float = 1.0
top_k: int = 0
repetition_penalty: float = 1.0
presence_penalty: float = 0.0
frequency_penalty: float = 0.0
stop: list[str] = []
stream: bool = False
@app.post("/complete")
async def complete(req: CompletionRequest):
headers = {
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
"Content-Type": "application/json",
}
payload = {
"model": "meta-llama/llama-3.1-405b",
"prompt": req.prompt,
"max_tokens": req.max_tokens,
"temperature": req.temperature,
"top_p": req.top_p,
"top_k": req.top_k,
"repetition_penalty": req.repetition_penalty,
"presence_penalty": req.presence_penalty,
"frequency_penalty": req.frequency_penalty,
"stream": req.stream,
}
if req.stop:
payload["stop"] = req.stop
if req.stream:
async def generate():
async with httpx.AsyncClient() as client:
async with client.stream(
"POST",
OPENROUTER_URL,
headers=headers,
json=payload,
timeout=120.0,
) as response:
async for chunk in response.aiter_text():
yield chunk
return StreamingResponse(generate(), media_type="text/event-stream")
else:
async with httpx.AsyncClient() as client:
response = await client.post(
OPENROUTER_URL,
headers=headers,
json=payload,
timeout=120.0,
)
return response.json()
@app.get("/")
async def root():
return FileResponse("index.html")