KittenML · iamgroot42 · Aug 5, 2025 · Aug 5, 2025
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+*.egg-info/
diff --git a/README.md b/README.md
@@ -6,16 +6,13 @@ Kitten TTS is an open-source realistic text-to-speech model with just 15 million
 
 [Join our discord](https://discord.gg/upcyF5s6)
 
-
 ## ✨ Features
 
 - **Ultra-lightweight**: Model size less than 25MB
 - **CPU-optimized**: Runs without GPU on any device
 - **High-quality voices**: Several premium voice options available
 - **Fast inference**: Optimized for real-time speech synthesis
 
-
-
 ## 🚀 Quick Start
 
 ### Installation
@@ -24,15 +21,13 @@ Kitten TTS is an open-source realistic text-to-speech model with just 15 million
 pip install https://github.com/KittenML/KittenTTS/releases/download/0.1/kittentts-0.1.0-py3-none-any.whl
 ```
 
-
-
- ### Basic Usage 
+### Basic Usage 
 
 ```
 from kittentts import KittenTTS
 m = KittenTTS("KittenML/kitten-tts-nano-0.1")
 
-audio = m.generate("This high quality TTS model works without a GPU", voice='expr-voice-2-f' )
+audio = m.generate("This high quality TTS model works without a GPU", voice='expr-voice-2-f')
 
 # available_voices : [  'expr-voice-2-m', 'expr-voice-2-f', 'expr-voice-3-m', 'expr-voice-3-f',  'expr-voice-4-m', 'expr-voice-4-f', 'expr-voice-5-m', 'expr-voice-5-f' ]
 
@@ -42,20 +37,13 @@ sf.write('output.wav', audio, 24000)
 
 ```
 
-
-
-
-
 ## 💻 System Requirements
 
 Works literally everywhere
 
-
-
 ## Checklist 
 
 - [x] Release a preview model
 - [ ] Release the fully trained model weights
 - [ ] Release mobile SDK 
 - [ ] Release web version 
-
diff --git a/kittentts/.gitignore b/kittentts/.gitignore
@@ -0,0 +1 @@
+__pycache__/*
diff --git a/kittentts/get_model.py b/kittentts/get_model.py
@@ -1,5 +1,6 @@
 import json
 import os
+import numpy as np
 from huggingface_hub import hf_hub_download
 from .onnx_model import KittenTTS_1_Onnx
 
@@ -22,8 +23,8 @@ def __init__(self, model_name="KittenML/kitten-tts-nano-0.1", cache_dir=None):
             repo_id = model_name
 
         self.model = download_from_huggingface(repo_id=repo_id, cache_dir=cache_dir)
-    
-    def generate(self, text, voice="expr-voice-5-m", speed=1.0):
+
+    def generate(self, text: str, voice: str = "expr-voice-5-m", speed: float = 1.0) -> np.ndarray:
         """Generate audio from text.
 
         Args:
@@ -34,9 +35,11 @@ def generate(self, text, voice="expr-voice-5-m", speed=1.0):
         Returns:
             Audio data as numpy array
         """
+        if not text:
+            raise ValueError("Input text cannot be empty.")
         return self.model.generate(text, voice=voice, speed=speed)
-    
-    def generate_to_file(self, text, output_path, voice="expr-voice-5-m", speed=1.0, sample_rate=24000):
+
+    def generate_to_file(self, text: str, output_path: str, voice: str = "expr-voice-5-m", speed: float = 1.0, sample_rate: int = 24000):
         """Generate audio from text and save to file.
 
         Args:
@@ -46,15 +49,15 @@ def generate_to_file(self, text, output_path, voice="expr-voice-5-m", speed=1.0,
             speed: Speech speed (1.0 = normal)
             sample_rate: Audio sample rate
         """
-        return self.model.generate_to_file(text, output_path, voice=voice, speed=speed, sample_rate=sample_rate)
+        self.model.generate_to_file(text, output_path, voice=voice, speed=speed, sample_rate=sample_rate)
 
     @property
     def available_voices(self):
         """Get list of available voices."""
         return self.model.available_voices
 
 
-def download_from_huggingface(repo_id="KittenML/kitten-tts-nano-0.1", cache_dir=None):
+def download_from_huggingface(repo_id: str="KittenML/kitten-tts-nano-0.1", cache_dir=None) -> KittenTTS_1_Onnx:
     """Download model files from Hugging Face repository.
 
     Args:
@@ -97,6 +100,6 @@ def download_from_huggingface(repo_id="KittenML/kitten-tts-nano-0.1", cache_dir=
     return model
 
 
-def get_model(repo_id="KittenML/kitten-tts-nano-0.1", cache_dir=None):
+def get_model(repo_id: str="KittenML/kitten-tts-nano-0.1", cache_dir=None) -> KittenTTS:
     """Get a KittenTTS model (legacy function for backward compatibility)."""
     return KittenTTS(repo_id, cache_dir)
diff --git a/kittentts/onnx_model.py b/kittentts/onnx_model.py
@@ -5,7 +5,7 @@
 import onnxruntime as ort
 
 
-def basic_english_tokenize(text):
+def basic_english_tokenize(text: str) -> list:
     """Basic English tokenizer that splits on whitespace and punctuation."""
     import re
     tokens = re.findall(r"\w+|[^\w\s]", text)
@@ -27,14 +27,9 @@ def __init__(self, dummy=None):
 
         self.word_index_dictionary = dicts
 
-    def __call__(self, text):
-        indexes = []
-        for char in text:
-            try:
-                indexes.append(self.word_index_dictionary[char])
-            except KeyError:
-                pass
-        return indexes
+    def __call__(self, text: str) -> list:
+        dicts = self.word_index_dictionary
+        return [dicts[char] for char in text if char in dicts]
 
 
 class KittenTTS_1_Onnx:
@@ -48,7 +43,6 @@ def __init__(self, model_path="kitten_tts_nano_preview.onnx", voices_path="voice
         self.model_path = model_path
         self.voices = np.load(voices_path)
         self.session = ort.InferenceSession(model_path)
-
         self.phonemizer = phonemizer.backend.EspeakBackend(
             language="en-us", preserve_punctuation=True, with_stress=True
         )
@@ -124,10 +118,10 @@ def generate_to_file(self, text: str, output_path: str, voice: str = "expr-voice
 
 # Example usage
 if __name__ == "__main__":
-    tts = KittenTTS()
+    tts = KittenTTS_1_Onnx()
 
     text = """
     It begins with an "Ugh!" Another mysterious stain appears on a favorite shirt. Every trick has been tried, but the stain persists.
     """
 
-    tts.generate_to_file(text, "inference_output25.wav", voice="expr-voice-5-m")
+    tts.generate_to_file(text, "inference_output25.wav", voice="expr-voice-5-m")