-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_openai_chat.py
More file actions
107 lines (91 loc) · 3.03 KB
/
test_openai_chat.py
File metadata and controls
107 lines (91 loc) · 3.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/usr/bin/env python3
"""Simple helper to hit the OpenAI-compatible /v1/chat/completions endpoint.
Usage:
python test_openai_chat.py --api-key sk-... \\
[--base-url http://127.0.0.1:8000] \\
[--model qwen2.5-vl-7b-instruct]
The script prints the JSON response to stdout and exits with a non-zero code on error.
"""
from __future__ import annotations
import argparse
import json
import sys
from typing import Optional
import requests
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="QueryLake OpenAI-compatible chat tester")
parser.add_argument(
"--api-key",
required=True,
help="Bearer token (QueryLake API key)",
)
parser.add_argument(
"--base-url",
default="http://127.0.0.1:8000",
help="Base URL for the QueryLake server (default: %(default)s)",
)
parser.add_argument(
"--model",
default="qwen2.5-vl-7b-instruct",
help="Model identifier to request (default: %(default)s)",
)
parser.add_argument(
"--temperature",
type=float,
default=0.2,
help="Sampling temperature (default: %(default)s)",
)
parser.add_argument(
"--max-tokens",
type=int,
default=128,
help="Maximum output tokens (default: %(default)s)",
)
parser.add_argument(
"--timeout",
type=float,
default=180.0,
help="Request timeout in seconds (default: %(default)s)",
)
return parser.parse_args()
def build_payload(model: str, temperature: float, max_tokens: int) -> dict:
return {
"model": model,
"messages": [
{"role": "system", "content": "You are a concise assistant."},
{
"role": "user",
"content": "Give me three bullet points about the benefits of solar power.",
},
],
"temperature": temperature,
"max_tokens": max_tokens,
}
def run_request(base_url: str, api_key: str, payload: dict, timeout: float) -> dict:
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
}
url = base_url.rstrip("/") + "/v1/chat/completions"
response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=timeout)
response.raise_for_status()
return response.json()
def main() -> int:
args = parse_args()
payload = build_payload(args.model, args.temperature, args.max_tokens)
try:
data = run_request(args.base_url, args.api_key, payload, args.timeout)
except requests.Timeout:
print("Request timed out.", file=sys.stderr)
return 2
except requests.HTTPError as exc:
print(f"HTTP error {exc.response.status_code}: {exc.response.text}", file=sys.stderr)
return 3
except requests.RequestException as exc:
print(f"Request failed: {exc}", file=sys.stderr)
return 4
json.dump(data, sys.stdout, indent=2)
print()
return 0
if __name__ == "__main__":
sys.exit(main())