-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_mlx.py
More file actions
41 lines (33 loc) · 1006 Bytes
/
test_mlx.py
File metadata and controls
41 lines (33 loc) · 1006 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
"""Quick test of MLX model"""
from mlx_lm import load, generate
import time
print("Testing MLX model...")
print("Loading model (will download ~2GB on first run)...\n")
# Load model
model, tokenizer = load("mlx-community/Qwen2.5-3B-Instruct-4bit")
print("✓ Model loaded!\n")
# Test generation
messages = [
{"role": "system", "content": "You are a helpful math tutor."},
{"role": "user", "content": "What is 5 + 3?"}
]
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
print("Test question: What is 5 + 3?")
print("Generating response...\n")
start_time = time.time()
response = generate(
model,
tokenizer,
prompt=prompt,
max_tokens=100,
verbose=False
)
end_time = time.time()
# Clean response
if prompt in response:
response = response[len(prompt):].strip()
print("="*60)
print(f"Response: {response}")
print("="*60)
print(f"\nGeneration time: {end_time - start_time:.2f} seconds")
print("✓ MLX is working correctly!\n")