math_tutor/test_mlx.py at main · omkarchandra/math_tutor · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
"""Quick test of MLX model"""
from mlx_lm import load, generate
import time

print("Testing MLX model...")
print("Loading model (will download ~2GB on first run)...\n")

# Load model
model, tokenizer = load("mlx-community/Qwen2.5-3B-Instruct-4bit")
print("✓ Model loaded!\n")

# Test generation
messages = [
    {"role": "system", "content": "You are a helpful math tutor."},
    {"role": "user", "content": "What is 5 + 3?"}
]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

print("Test question: What is 5 + 3?")
print("Generating response...\n")

start_time = time.time()
response = generate(
    model,
    tokenizer,
    prompt=prompt,
    max_tokens=100,
    verbose=False
)
end_time = time.time()

# Clean response
if prompt in response:
    response = response[len(prompt):].strip()

print("="*60)
print(f"Response: {response}")
print("="*60)
print(f"\nGeneration time: {end_time - start_time:.2f} seconds")
print("✓ MLX is working correctly!\n")