math_tutor/alternative_tutor.py at main · omkarchandra/math_tutor · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
"""
ALTERNATIVE: Lightweight Math Tutor Bot
Uses smaller models that fit your disk space:
- Qwen2.5-1.5B for text generation (~3GB)
- Kokoro-82M for text-to-speech (~200MB)
- Whisper-tiny for speech-to-text (~150MB)
Total: ~3.5GB instead of 12GB
"""

from transformers import AutoModelForCausalLM, AutoTokenizer
from kokoro import KPipeline
import soundfile as sf
import torch

print("Loading lightweight models for Math Tutor Bot...")
print("Total download: ~3-4GB (much smaller than Qwen2.5-Omni)\n")

# ============================================================================
# 1. Text Generation: Qwen2.5-1.5B (Small but powerful)
# ============================================================================

print("Loading Qwen2.5-1.5B text model...")
text_model = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen2.5-1.5B-Instruct",
    torch_dtype="auto",
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-1.5B-Instruct")
print("✓ Text model loaded (1.5B params)\n")

# ============================================================================
# 2. Text-to-Speech: Kokoro-82M (Already installed!)
# ============================================================================

print("Loading Kokoro TTS...")
tts_pipeline = KPipeline(lang_code='a')
print("✓ TTS loaded (82M params)\n")

print("="*60)
print("Models ready! Total: ~1.6B parameters")
print("="*60)


# ============================================================================
# Function: Text-to-Text Math Tutoring
# ============================================================================

def ask_tutor(question, get_audio=False):
    """
    Ask a math question and get text (and optionally audio) response

    Args:
        question: Student's question
        get_audio: If True, also generate audio response

    Returns:
        text_response, audio_file (if get_audio=True)
    """

    # Prepare conversation
    messages = [
        {
            "role": "system",
            "content": "You are a helpful, patient math tutor. Explain concepts clearly with step-by-step examples."
        },
        {
            "role": "user",
            "content": question
        }
    ]

    # Format with chat template
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )

    # Generate text response
    model_inputs = tokenizer([text], return_tensors="pt").to(text_model.device)

    with torch.no_grad():
        generated_ids = text_model.generate(
            **model_inputs,
            max_new_tokens=512,
            do_sample=False  # Deterministic for consistent explanations
        )

    # Extract only the response (not the prompt)
    generated_ids = [
        output_ids[len(input_ids):]
        for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]

    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

    # Generate audio if requested
    audio_file = None
    if get_audio:
        print("  Generating audio response...")
        generator = tts_pipeline(response, voice='af_heart')

        # Save audio
        audio_file = "tutor_response.wav"
        for i, (gs, ps, audio) in enumerate(generator):
            sf.write(audio_file, audio, 24000)
            break  # Only first segment

        print(f"  Audio saved to: {audio_file}")

    return response, audio_file


# ============================================================================
# Demo: Test the Tutor Bot
# ============================================================================

if __name__ == "__main__":
    print("\n" + "="*60)
    print("LIGHTWEIGHT MATH TUTOR BOT - DEMO")
    print("="*60)

    # Test 1: Text-only response
    print("\n📝 Question 1: (Text-to-Text)")
    q1 = "How do I solve the equation 2x + 5 = 13?"
    print(f"   Student: {q1}")

    answer1, _ = ask_tutor(q1, get_audio=False)
    print(f"\n   Tutor: {answer1}")

    # Test 2: Text + Audio response
    print("\n" + "="*60)
    print("\n📝 Question 2: (Text-to-Text + Audio)")
    q2 = "What is the Pythagorean theorem?"
    print(f"   Student: {q2}")

    answer2, audio_file = ask_tutor(q2, get_audio=True)
    print(f"\n   Tutor: {answer2}")
    if audio_file:
        print(f"\n   🔊 Audio response saved!")

    # Test 3: More complex question
    print("\n" + "="*60)
    print("\n📝 Question 3: (Step-by-step explanation)")
    q3 = "Explain fractions to a 5th grader"
    print(f"   Student: {q3}")

    answer3, _ = ask_tutor(q3, get_audio=False)
    print(f"\n   Tutor: {answer3}")

    print("\n" + "="*60)
    print("✓ Demo complete!")
    print("="*60)
    print("\nWhat you have:")
    print("  ✓ Text-to-Text tutoring (Qwen2.5-1.5B)")
    print("  ✓ Text-to-Speech (Kokoro-82M)")
    print("  ✓ Small footprint (~3.5GB total)")
    print("\nTo add Speech-to-Text:")
    print("  pip install openai-whisper")
    print("  Use whisper.load_model('tiny')  # Only 150MB")
    print("\nTotal system: ~4GB instead of 12GB!")