-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathalternative_tutor.py
More file actions
161 lines (129 loc) · 4.97 KB
/
alternative_tutor.py
File metadata and controls
161 lines (129 loc) · 4.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
"""
ALTERNATIVE: Lightweight Math Tutor Bot
Uses smaller models that fit your disk space:
- Qwen2.5-1.5B for text generation (~3GB)
- Kokoro-82M for text-to-speech (~200MB)
- Whisper-tiny for speech-to-text (~150MB)
Total: ~3.5GB instead of 12GB
"""
from transformers import AutoModelForCausalLM, AutoTokenizer
from kokoro import KPipeline
import soundfile as sf
import torch
print("Loading lightweight models for Math Tutor Bot...")
print("Total download: ~3-4GB (much smaller than Qwen2.5-Omni)\n")
# ============================================================================
# 1. Text Generation: Qwen2.5-1.5B (Small but powerful)
# ============================================================================
print("Loading Qwen2.5-1.5B text model...")
text_model = AutoModelForCausalLM.from_pretrained(
"Qwen/Qwen2.5-1.5B-Instruct",
torch_dtype="auto",
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-1.5B-Instruct")
print("✓ Text model loaded (1.5B params)\n")
# ============================================================================
# 2. Text-to-Speech: Kokoro-82M (Already installed!)
# ============================================================================
print("Loading Kokoro TTS...")
tts_pipeline = KPipeline(lang_code='a')
print("✓ TTS loaded (82M params)\n")
print("="*60)
print("Models ready! Total: ~1.6B parameters")
print("="*60)
# ============================================================================
# Function: Text-to-Text Math Tutoring
# ============================================================================
def ask_tutor(question, get_audio=False):
"""
Ask a math question and get text (and optionally audio) response
Args:
question: Student's question
get_audio: If True, also generate audio response
Returns:
text_response, audio_file (if get_audio=True)
"""
# Prepare conversation
messages = [
{
"role": "system",
"content": "You are a helpful, patient math tutor. Explain concepts clearly with step-by-step examples."
},
{
"role": "user",
"content": question
}
]
# Format with chat template
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
# Generate text response
model_inputs = tokenizer([text], return_tensors="pt").to(text_model.device)
with torch.no_grad():
generated_ids = text_model.generate(
**model_inputs,
max_new_tokens=512,
do_sample=False # Deterministic for consistent explanations
)
# Extract only the response (not the prompt)
generated_ids = [
output_ids[len(input_ids):]
for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
# Generate audio if requested
audio_file = None
if get_audio:
print(" Generating audio response...")
generator = tts_pipeline(response, voice='af_heart')
# Save audio
audio_file = "tutor_response.wav"
for i, (gs, ps, audio) in enumerate(generator):
sf.write(audio_file, audio, 24000)
break # Only first segment
print(f" Audio saved to: {audio_file}")
return response, audio_file
# ============================================================================
# Demo: Test the Tutor Bot
# ============================================================================
if __name__ == "__main__":
print("\n" + "="*60)
print("LIGHTWEIGHT MATH TUTOR BOT - DEMO")
print("="*60)
# Test 1: Text-only response
print("\n📝 Question 1: (Text-to-Text)")
q1 = "How do I solve the equation 2x + 5 = 13?"
print(f" Student: {q1}")
answer1, _ = ask_tutor(q1, get_audio=False)
print(f"\n Tutor: {answer1}")
# Test 2: Text + Audio response
print("\n" + "="*60)
print("\n📝 Question 2: (Text-to-Text + Audio)")
q2 = "What is the Pythagorean theorem?"
print(f" Student: {q2}")
answer2, audio_file = ask_tutor(q2, get_audio=True)
print(f"\n Tutor: {answer2}")
if audio_file:
print(f"\n 🔊 Audio response saved!")
# Test 3: More complex question
print("\n" + "="*60)
print("\n📝 Question 3: (Step-by-step explanation)")
q3 = "Explain fractions to a 5th grader"
print(f" Student: {q3}")
answer3, _ = ask_tutor(q3, get_audio=False)
print(f"\n Tutor: {answer3}")
print("\n" + "="*60)
print("✓ Demo complete!")
print("="*60)
print("\nWhat you have:")
print(" ✓ Text-to-Text tutoring (Qwen2.5-1.5B)")
print(" ✓ Text-to-Speech (Kokoro-82M)")
print(" ✓ Small footprint (~3.5GB total)")
print("\nTo add Speech-to-Text:")
print(" pip install openai-whisper")
print(" Use whisper.load_model('tiny') # Only 150MB")
print("\nTotal system: ~4GB instead of 12GB!")