QueryLake/test_openai_chat.py at main · kmccleary3301/QueryLake · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/usr/bin/env python3
"""Simple helper to hit the OpenAI-compatible /v1/chat/completions endpoint.

Usage:
    python test_openai_chat.py --api-key sk-... \\
        [--base-url http://127.0.0.1:8000] \\
        [--model qwen2.5-vl-7b-instruct]

The script prints the JSON response to stdout and exits with a non-zero code on error.
"""
from __future__ import annotations

import argparse
import json
import sys
from typing import Optional

import requests


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="QueryLake OpenAI-compatible chat tester")
    parser.add_argument(
        "--api-key",
        required=True,
        help="Bearer token (QueryLake API key)",
    )
    parser.add_argument(
        "--base-url",
        default="http://127.0.0.1:8000",
        help="Base URL for the QueryLake server (default: %(default)s)",
    )
    parser.add_argument(
        "--model",
        default="qwen2.5-vl-7b-instruct",
        help="Model identifier to request (default: %(default)s)",
    )
    parser.add_argument(
        "--temperature",
        type=float,
        default=0.2,
        help="Sampling temperature (default: %(default)s)",
    )
    parser.add_argument(
        "--max-tokens",
        type=int,
        default=128,
        help="Maximum output tokens (default: %(default)s)",
    )
    parser.add_argument(
        "--timeout",
        type=float,
        default=180.0,
        help="Request timeout in seconds (default: %(default)s)",
    )
    return parser.parse_args()


def build_payload(model: str, temperature: float, max_tokens: int) -> dict:
    return {
        "model": model,
        "messages": [
            {"role": "system", "content": "You are a concise assistant."},
            {
                "role": "user",
                "content": "Give me three bullet points about the benefits of solar power.",
            },
        ],
        "temperature": temperature,
        "max_tokens": max_tokens,
    }


def run_request(base_url: str, api_key: str, payload: dict, timeout: float) -> dict:
    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json",
    }
    url = base_url.rstrip("/") + "/v1/chat/completions"
    response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=timeout)
    response.raise_for_status()
    return response.json()


def main() -> int:
    args = parse_args()
    payload = build_payload(args.model, args.temperature, args.max_tokens)

    try:
        data = run_request(args.base_url, args.api_key, payload, args.timeout)
    except requests.Timeout:
        print("Request timed out.", file=sys.stderr)
        return 2
    except requests.HTTPError as exc:
        print(f"HTTP error {exc.response.status_code}: {exc.response.text}", file=sys.stderr)
        return 3
    except requests.RequestException as exc:
        print(f"Request failed: {exc}", file=sys.stderr)
        return 4

    json.dump(data, sys.stdout, indent=2)
    print()
    return 0


if __name__ == "__main__":
    sys.exit(main())