math_tutor/test_model_fixed.py at main · omkarchandra/math_tutor · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
"""
Test the Qwen2.5-Omni model with corrected conversation format
"""

from transformers import Qwen2_5OmniForConditionalGeneration, AutoTokenizer
import torch

print("="*60)
print("Testing Qwen2.5-Omni-3B Model")
print("="*60)

print("\nLoading model...")
model = Qwen2_5OmniForConditionalGeneration.from_pretrained(
    "Qwen/Qwen2.5-Omni-3B",
    torch_dtype="auto",
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-Omni-3B")

print(f"✓ Model loaded on device: {model.device}\n")

# Test question - use tokenizer's chat template
messages = [
    {"role": "system", "content": "You are a helpful math tutor."},
    {"role": "user", "content": "What is 5 + 3?"}
]

print("Test Question: What is 5 + 3?")
print("Generating response...\n")

# Generate response using tokenizer
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)

inputs = tokenizer([text], return_tensors="pt").to(model.device)

with torch.no_grad():
    output = model.generate(
        **inputs,
        max_new_tokens=100,
        do_sample=False
    )

# Handle tuple return (text_ids, audio) or just text_ids
if isinstance(output, tuple):
    generated_ids = output[0]
else:
    generated_ids = output

response = tokenizer.batch_decode(
    generated_ids[:, inputs['input_ids'].shape[1]:],
    skip_special_tokens=True
)[0]

print("="*60)
print("Model Response:")
print("="*60)
print(response)
print("\n" + "="*60)
print("✓ Model is working correctly!")
print("="*60)
print("\nYou can now run the web app with:")
print("  ~/anaconda3/envs/tutor/bin/python app.py")
print("\nThen open: http://localhost:5000")