diff --git a/README.md b/README.md
index 40c1429..3dea499 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,8 @@ This Python project focuses on detecting and recognizing Tibetan text in images.
 3. **Inference**: Detect Tibetan text blocks in new images, including support for Staatsbibliothek zu Berlin digital collections
 4. **OCR**: Apply Tesseract OCR to the detected text blocks to extract the actual text content
 
-![Validation results](res/results_val_1.png)
+## Example of synthetic data
+![generated synthetic data](res/results_val_1.jpg)
 
 ## Quick Start Guide
 
@@ -17,8 +18,8 @@ This Python project focuses on detecting and recognizing Tibetan text in images.
 
 ```bash
 # Clone the repository
-git clone https://github.com/nih23/Tibetan-NLP.git
-cd Tibetan-NLP
+git clone https://github.com/CodexAITeam/PechaBridge
+cd PechaBridge
 
 # Install dependencies
 pip install -r requirements.txt
@@ -33,7 +34,37 @@ pip install -r requirements.txt
 
 ```bash
 # 1. Generate dataset
-python generate_training_data.py --train_samples 1000 --val_samples 200 --image_size 1024
+python generate_training_data.py --train_samples 10 --val_samples 10 --font_path_tibetan ext/Microsoft\ Himalaya.ttf --font_path_chinese ext/simkai.ttf --dataset_name tibetan-yolo
+
+# 1.5 Inspect and validate dataset with Label Studio (optional)
+# Install Label Studio if not already installed:
+# pip install label-studio label-studio-converter
+
+# Set up environment variables for local file serving
+export LABEL_STUDIO_LOCAL_FILES_SERVING_ENABLED=true
+export LABEL_STUDIO_LOCAL_FILES_DOCUMENT_ROOT=$(pwd)/datasets/tibetan-yolo
+
+# Create classes.txt for Label studio compatibility
+echo "tibetan_no\ntext_body\nchinese_no" > datasets/tibetan-yolo/train/classes.txt
+echo "tibetan_no\ntext_body\nchinese_no" > datasets/tibetan-yolo/val/classes.txt
+
+# Convert YOLO annotations to Label Studio format
+label-studio-converter import yolo -i datasets/tibetan-yolo/train -o ls-tasks.json --image-ext ".png" --image-root-url "/data/local-files/?d=train/images"
+
+# Start Label Studio web interface (opens at http://localhost:8080)
+label-studio
+
+# In Label Studio:
+# 1. Create a new project:
+# 1.1 Go to the project settings and select Cloud Storage.
+# 1.2 Click Add Source Storage and select Local files from the Storage Type options.
+# 1.3 Set the Absolute local path to `$(pwd)/datasets/tibetan-yolo` (You need to resolv `$(pwd)`)
+# 1.4 Click Add storage.
+# 2. Import the generated ls-tasks.json file
+# 3. Review and validate the generated annotations
+# 4. Export corrections if needed
+
+# [1] https://github.com/HumanSignal/label-studio-sdk/tree/master/src/label_studio_sdk/converter#tutorial-importing-yolo-pre-annotated-images-to-label-studio-using-local-storage
 
 # 2. Train model
 python train_model.py --epochs 100 --export
diff --git a/data/tibetan numbers/annotations/complete_layout/bg_PPN337138764X_00000005.csv b/data/tibetan numbers/annotations/complete_layout/bg_PPN337138764X_00000005.csv
new file mode 100644
index 0000000..2c91139
--- /dev/null
+++ b/data/tibetan numbers/annotations/complete_layout/bg_PPN337138764X_00000005.csv	
@@ -0,0 +1,6 @@
+yolo_label,label_name,bbox_x,bbox_y,bbox_width,bbox_height,image_name,image_width,image_height
+0,tibetan_no,18,40,54,275,bg_PPN337138764X_00000005.png,1024,361
+1,illustration_left,73,40,211,276,bg_PPN337138764X_00000005.png,1024,361
+2,text_body,286,40,442,278,bg_PPN337138764X_00000005.png,1024,361
+3,illustration_right,731,40,224,277,bg_PPN337138764X_00000005.png,1024,361
+4,chinese_no,956,41,52,279,bg_PPN337138764X_00000005.png,1024,361
\ No newline at end of file
diff --git a/data/tibetan numbers/annotations/complete_layout/bg_PPN337138764X_00000005.txt b/data/tibetan numbers/annotations/complete_layout/bg_PPN337138764X_00000005.txt
new file mode 100644
index 0000000..f0ea684
--- /dev/null
+++ b/data/tibetan numbers/annotations/complete_layout/bg_PPN337138764X_00000005.txt	
@@ -0,0 +1,5 @@
+0 0.044237 0.491942 0.052760 0.762097
+1 0.174513 0.493093 0.206169 0.764399
+2 0.495130 0.496546 0.431818 0.771306
+3 0.823052 0.494244 0.219156 0.766701
+4 0.958604 0.500000 0.050325 0.773609
\ No newline at end of file
diff --git a/data/tibetan numbers/annotations/illustrations/bg_PPN337138764X_00000005.csv b/data/tibetan numbers/annotations/illustrations/bg_PPN337138764X_00000005.csv
new file mode 100644
index 0000000..51de394
--- /dev/null
+++ b/data/tibetan numbers/annotations/illustrations/bg_PPN337138764X_00000005.csv	
@@ -0,0 +1,6 @@
+yolo_label,label_name,bbox_x,bbox_y,bbox_width,bbox_height,image_name,image_width,image_height
+0,tibetan_no,24,28,52,304,bg_PPN337138764X_00000005.png,1024,361
+1,illustration_left,80,28,263,303,bg_PPN337138764X_00000005.png,1024,361
+2,illustration_centered,348,28,313,304,bg_PPN337138764X_00000005.png,1024,361
+3,illustration_right,668,28,267,304,bg_PPN337138764X_00000005.png,1024,361
+4,chinese_no,940,28,57,305,bg_PPN337138764X_00000005.png,1024,361
\ No newline at end of file
diff --git a/data/tibetan numbers/annotations/illustrations/bg_PPN337138764X_00000005.txt b/data/tibetan numbers/annotations/illustrations/bg_PPN337138764X_00000005.txt
new file mode 100644
index 0000000..c26f063
--- /dev/null
+++ b/data/tibetan numbers/annotations/illustrations/bg_PPN337138764X_00000005.txt	
@@ -0,0 +1,5 @@
+0 0.049107 0.497698 0.051136 0.842681
+1 0.206575 0.496546 0.257305 0.840378
+2 0.493101 0.500000 0.306006 0.842681
+3 0.782873 0.497698 0.260552 0.842681
+4 0.945617 0.498849 0.055195 0.844983
\ No newline at end of file
diff --git a/data/tibetan numbers/annotations/tibetan_chinese_no/bg_PPN337138764X_00000005.csv b/data/tibetan numbers/annotations/tibetan_chinese_no/bg_PPN337138764X_00000005.csv
new file mode 100644
index 0000000..df7d615
--- /dev/null
+++ b/data/tibetan numbers/annotations/tibetan_chinese_no/bg_PPN337138764X_00000005.csv	
@@ -0,0 +1,4 @@
+yolo_label,label_name,bbox_x,bbox_y,bbox_width,bbox_height,image_name,image_width,image_height
+0,tibetan_no,14,46,113,283,bg_PPN337138764X_00000005.png,1024,361
+1,text_body,130,45,772,284,bg_PPN337138764X_00000005.png,1024,361
+2,chinese_no,906,45,107,284,bg_PPN337138764X_00000005.png,1024,361
\ No newline at end of file
diff --git a/data/tibetan numbers/annotations/tibetan_chinese_no/bg_PPN337138764X_00000005.txt b/data/tibetan numbers/annotations/tibetan_chinese_no/bg_PPN337138764X_00000005.txt
new file mode 100644
index 0000000..16bc6fc
--- /dev/null
+++ b/data/tibetan numbers/annotations/tibetan_chinese_no/bg_PPN337138764X_00000005.txt	
@@ -0,0 +1,3 @@
+0 0.068994 0.519570 0.110390 0.785121
+1 0.504464 0.518419 0.754058 0.787423
+2 0.937094 0.518419 0.104708 0.787423
\ No newline at end of file
diff --git a/data/tibetan numbers/backgrounds/bg_IMG_5086.jpg b/data/tibetan numbers/backgrounds/bg_IMG_5086.jpg
new file mode 100644
index 0000000..911d5c9
Binary files /dev/null and b/data/tibetan numbers/backgrounds/bg_IMG_5086.jpg differ
diff --git a/data/tibetan numbers/backgrounds/bg_PPN3371387534_00000007.jpg b/data/tibetan numbers/backgrounds/bg_PPN3371387534_00000007.jpg
new file mode 100644
index 0000000..e2c3501
Binary files /dev/null and b/data/tibetan numbers/backgrounds/bg_PPN3371387534_00000007.jpg differ
diff --git a/data/tibetan numbers/backgrounds/bg_PPN337138764X_00000005.jpg b/data/tibetan numbers/backgrounds/bg_PPN337138764X_00000005.jpg
new file mode 100644
index 0000000..6eb6675
Binary files /dev/null and b/data/tibetan numbers/backgrounds/bg_PPN337138764X_00000005.jpg differ
diff --git a/data/tibetan numbers/backgrounds/bg_PPN3371388603_00000004.jpg b/data/tibetan numbers/backgrounds/bg_PPN3371388603_00000004.jpg
new file mode 100644
index 0000000..d4bb148
Binary files /dev/null and b/data/tibetan numbers/backgrounds/bg_PPN3371388603_00000004.jpg differ
diff --git a/data/tibetan numbers/backgrounds/bg_PPN3371389286_00000003.jpg b/data/tibetan numbers/backgrounds/bg_PPN3371389286_00000003.jpg
new file mode 100644
index 0000000..d38e582
Binary files /dev/null and b/data/tibetan numbers/backgrounds/bg_PPN3371389286_00000003.jpg differ
diff --git a/data/tibetan numbers/bg_train/Dalle_1.jpg b/data/tibetan numbers/bg_train/Dalle_1.jpg
new file mode 100644
index 0000000..d23ab00
Binary files /dev/null and b/data/tibetan numbers/bg_train/Dalle_1.jpg differ
diff --git a/data/tibetan numbers/bg_train/Dalle_2.jpg b/data/tibetan numbers/bg_train/Dalle_2.jpg
new file mode 100644
index 0000000..831fcf6
Binary files /dev/null and b/data/tibetan numbers/bg_train/Dalle_2.jpg differ
diff --git a/data/tibetan numbers/bg_val/Dalle2.jpg b/data/tibetan numbers/bg_val/Dalle2.jpg
new file mode 100644
index 0000000..6ec8966
Binary files /dev/null and b/data/tibetan numbers/bg_val/Dalle2.jpg differ
diff --git a/data/tibetan numbers/bg_val/Dalle3.jpg b/data/tibetan numbers/bg_val/Dalle3.jpg
new file mode 100644
index 0000000..cce4abe
Binary files /dev/null and b/data/tibetan numbers/bg_val/Dalle3.jpg differ
diff --git a/data/tibetan numbers/bg_val/Dalle4.jpg b/data/tibetan numbers/bg_val/Dalle4.jpg
new file mode 100644
index 0000000..e2c314d
Binary files /dev/null and b/data/tibetan numbers/bg_val/Dalle4.jpg differ
diff --git a/data/tibetan numbers/buddha_illustrations/buddha_01.png b/data/tibetan numbers/buddha_illustrations/buddha_01.png
new file mode 100644
index 0000000..27d5c81
Binary files /dev/null and b/data/tibetan numbers/buddha_illustrations/buddha_01.png differ
diff --git a/data/tibetan numbers/buddha_illustrations/buddha_02.png b/data/tibetan numbers/buddha_illustrations/buddha_02.png
new file mode 100644
index 0000000..58041d9
Binary files /dev/null and b/data/tibetan numbers/buddha_illustrations/buddha_02.png differ
diff --git a/data/tibetan numbers/buddha_illustrations/buddha_03.png b/data/tibetan numbers/buddha_illustrations/buddha_03.png
new file mode 100644
index 0000000..57723f4
Binary files /dev/null and b/data/tibetan numbers/buddha_illustrations/buddha_03.png differ
diff --git a/data/tibetan numbers/buddha_illustrations/buddha_05.png b/data/tibetan numbers/buddha_illustrations/buddha_05.png
new file mode 100644
index 0000000..11d4e0d
Binary files /dev/null and b/data/tibetan numbers/buddha_illustrations/buddha_05.png differ
diff --git a/data/tibetan numbers/buddha_illustrations/buddha_06.png b/data/tibetan numbers/buddha_illustrations/buddha_06.png
new file mode 100644
index 0000000..ea39d8e
Binary files /dev/null and b/data/tibetan numbers/buddha_illustrations/buddha_06.png differ
diff --git a/data/tibetan numbers/buddha_illustrations/buddha_07.png b/data/tibetan numbers/buddha_illustrations/buddha_07.png
new file mode 100644
index 0000000..9c72c1b
Binary files /dev/null and b/data/tibetan numbers/buddha_illustrations/buddha_07.png differ
diff --git a/data/tibetan numbers/corpora/tib_no_0001.txt b/data/tibetan numbers/corpora/tib_no_0001.txt
new file mode 100644
index 0000000..822f14d
--- /dev/null
+++ b/data/tibetan numbers/corpora/tib_no_0001.txt	
@@ -0,0 +1 @@
+གཅིག་
\ No newline at end of file
diff --git a/data/tibetan numbers/corpora/tib_no_0002.txt b/data/tibetan numbers/corpora/tib_no_0002.txt
new file mode 100644
index 0000000..c4e945d
--- /dev/null
+++ b/data/tibetan numbers/corpora/tib_no_0002.txt	
@@ -0,0 +1 @@
+གཉིས
\ No newline at end of file
diff --git a/data/tibetan numbers/corpora/tib_no_0003.txt b/data/tibetan numbers/corpora/tib_no_0003.txt
new file mode 100644
index 0000000..71882c5
--- /dev/null
+++ b/data/tibetan numbers/corpora/tib_no_0003.txt	
@@ -0,0 +1 @@
+གསུམ
\ No newline at end of file
diff --git a/data/tibetan numbers/corpora/tib_no_0004.txt b/data/tibetan numbers/corpora/tib_no_0004.txt
new file mode 100644
index 0000000..f69087e
--- /dev/null
+++ b/data/tibetan numbers/corpora/tib_no_0004.txt	
@@ -0,0 +1 @@
+བཞི
\ No newline at end of file
diff --git a/data/tibetan numbers/corpora/tib_no_0005.txt b/data/tibetan numbers/corpora/tib_no_0005.txt
new file mode 100644
index 0000000..aa5711f
--- /dev/null
+++ b/data/tibetan numbers/corpora/tib_no_0005.txt	
@@ -0,0 +1 @@
+ལྔ
\ No newline at end of file
diff --git a/data/tibetan numbers/corpora/tib_no_0006.txt b/data/tibetan numbers/corpora/tib_no_0006.txt
new file mode 100644
index 0000000..f50b901
--- /dev/null
+++ b/data/tibetan numbers/corpora/tib_no_0006.txt	
@@ -0,0 +1 @@
+དྲུག་
\ No newline at end of file
diff --git a/data/tibetan numbers/corpora/tib_no_0007.txt b/data/tibetan numbers/corpora/tib_no_0007.txt
new file mode 100644
index 0000000..11ee7bc
--- /dev/null
+++ b/data/tibetan numbers/corpora/tib_no_0007.txt	
@@ -0,0 +1 @@
+བདུན་
\ No newline at end of file
diff --git a/data/tibetan numbers/corpora/tib_no_0008.txt b/data/tibetan numbers/corpora/tib_no_0008.txt
new file mode 100644
index 0000000..32cceb1
--- /dev/null
+++ b/data/tibetan numbers/corpora/tib_no_0008.txt	
@@ -0,0 +1 @@
+བརྒྱད
\ No newline at end of file
diff --git a/data/tibetan numbers/corpora/tib_no_0009.txt b/data/tibetan numbers/corpora/tib_no_0009.txt
new file mode 100644
index 0000000..f958559
--- /dev/null
+++ b/data/tibetan numbers/corpora/tib_no_0009.txt	
@@ -0,0 +1 @@
+དགུ
\ No newline at end of file
diff --git a/data/tibetan numbers/corpora/tib_no_0010.txt b/data/tibetan numbers/corpora/tib_no_0010.txt
new file mode 100644
index 0000000..26946c7
--- /dev/null
+++ b/data/tibetan numbers/corpora/tib_no_0010.txt	
@@ -0,0 +1 @@
+བཅུ
\ No newline at end of file
diff --git a/generate_training_data.py b/generate_training_data.py
index 40d9851..ea52593 100644
--- a/generate_training_data.py
+++ b/generate_training_data.py
@@ -1,12 +1,20 @@
 #!/usr/bin/env python3
 """
-Skript zur Generierung von Trainingsdaten für Tibetische OCR.
-Erstellt synthetische Bilder mit Tibetischem Text für YOLO-Training.
+Skript zur Generierung von Multi-Klassen-Trainingsdaten für Tibetische OCR.
+Erstellt synthetische Bilder mit Tibetischem Text, chinesischen Zahlen und allgemeinem Text für YOLO-Training.
+
+Unterstützt 3 Klassen:
+- Klasse 0: tibetan_number_word (Tibetische Zahlen)
+- Klasse 1: tibetan_text (Allgemeiner tibetischer Text)  
+- Klasse 2: chinese_number_word (Chinesische Zahlen)
 """
 
 from pathlib import Path
 from collections import OrderedDict
-from ultralytics.data.utils import DATASETS_DIR
+try:
+    from ultralytics.data.utils import DATASETS_DIR
+except ImportError:
+    DATASETS_DIR = "./datasets"  # Fallback if ultralytics not installed
 from tibetanDataGenerator.dataset_generator import generate_dataset
 
 # Importiere Funktionen aus der tibetan_utils-Bibliothek
@@ -15,38 +23,56 @@
 
 
 def main():
-    # Parse arguments
+    # Parse arguments (Multi-Klassen-Support)
     parser = create_generate_dataset_parser()
     args = parser.parse_args()
 
     # Set dataset path
-    datasets_dir = Path(DATASETS_DIR)
-    path = str(datasets_dir / args.dataset_name)
-    args.dataset_name = path
-    print(f"Generiere YOLO-Datensatz {args.dataset_name}...")
-
-    # Generate training dataset
-    train_dataset_dict = generate_dataset(args, validation=False)
-
-    # Generate validation dataset
-    val_dataset_dict = generate_dataset(args, validation=True)
-
-    # Combine train and val dataset information
-    dataset_dict = {
-        'path': args.dataset_name,
-        'train': 'train/images',
-        'val': 'val/images',
-        'nc': train_dataset_dict['nc'],
-        'names': train_dataset_dict['names']
-    }
-
-    # Save dataset configuration
-    yaml_path = f"{args.dataset_name}/data.yml"
-    save_yaml(dataset_dict, yaml_path)
-
-    print("Datensatzgenerierung abgeschlossen.")
+    full_dataset_path = Path(args.output_dir) / args.dataset_name
+    original_dataset_name = args.dataset_name
+    args.dataset_name = str(full_dataset_path)
+
+    print(f"Generiere Multi-Klassen YOLO-Datensatz in {args.dataset_name}...")
+    print("Speicherort kann geändert werden per `yolo settings`.")
+    print("Unterstützte Klassen:")
+    print("  - Klasse 0: tibetan_number_word (Tibetische Zahlen)")
+    print("  - Klasse 1: tibetan_text (Allgemeiner tibetischer Text)")
+    print("  - Klasse 2: chinese_number_word (Chinesische Zahlen)")
+
+    # Generate training dataset (Multi-Klassen)
+    train_dataset_info = generate_dataset(args, validation=False)
+
+    # Generate validation dataset (Multi-Klassen)
+    val_dataset_info = generate_dataset(args, validation=True)
+
+    # Multi-Klassen YAML-Konfiguration
+    yaml_content = OrderedDict()
+    yaml_content['path'] = original_dataset_name
+    yaml_content['train'] = 'train/images'
+    yaml_content['val'] = 'val/images'
+    yaml_content['test'] = ''
+
+    if 'nc' not in train_dataset_info or 'names' not in train_dataset_info:
+        raise ValueError("generate_dataset did not return 'nc' or 'names' in its info dictionary.")
+    yaml_content['nc'] = train_dataset_info['nc']
+    yaml_content['names'] = train_dataset_info['names']
+
+    # YAML speichern mit korrekter Funktion
+    yaml_file_path = Path(args.output_dir) / f"{original_dataset_name}.yaml"
+    
+    # Verwende die modulare save_yaml Funktion
+    import yaml
+    def represent_ordereddict(dumper, data):
+        return dumper.represent_mapping('tag:yaml.org,2002:map', data.items())
+    
+    yaml.add_representer(OrderedDict, represent_ordereddict)
+    
+    with open(yaml_file_path, 'w', encoding='utf-8') as f:
+        yaml.dump(dict(yaml_content), f, sort_keys=False, allow_unicode=True)
+
+    print(f"\nMulti-Klassen-Datensatzgenerierung abgeschlossen. YAML-Konfiguration gespeichert: {yaml_file_path}")
     print("Training kann mit folgendem Befehl gestartet werden:\n")
-    print(f"yolo detect train data={yaml_path} epochs=100 imgsz=1024 model=yolov8n.pt")
+    print(f"yolo detect train data={yaml_file_path} epochs=100 imgsz=[{args.image_height},{args.image_width}] model=yolov8n.pt")
 
 
 if __name__ == "__main__":
diff --git a/res/results_val_1.jpg b/res/results_val_1.jpg
new file mode 100644
index 0000000..cd5463f
Binary files /dev/null and b/res/results_val_1.jpg differ
diff --git a/res/results_val_1.png b/res/results_val_1.png
deleted file mode 100644
index 9bd4798..0000000
Binary files a/res/results_val_1.png and /dev/null differ
diff --git a/res/results_val_2.png b/res/results_val_2.png
deleted file mode 100644
index 89b0ade..0000000
Binary files a/res/results_val_2.png and /dev/null differ
diff --git a/tibetanDataGenerator/README.md b/tibetanDataGenerator/README.md
new file mode 100644
index 0000000..9044cdb
--- /dev/null
+++ b/tibetanDataGenerator/README.md
@@ -0,0 +1,70 @@
+# Tibetan Text Detection Dataset Generator
+
+A tool for generating synthetic YOLO-formatted datasets for detecting Tibetan text, numbers, and their Chinese number counterparts in document images.
+
+## Features
+- Generates synthetic document images with Tibetan text, numbers, and Chinese numbers
+- Creates corresponding YOLO-format annotations
+- Maintains consistent numbering between Tibetan and Chinese number representations
+- Supports multiple text corpora with intelligent text placement
+- Includes data augmentation options (rotation, noise)
+
+## New Options
+python main.py \
+  --corpora_tibetan_numbers_path ./data/corpora/Tibetan\ Number\ Words/ \
+  --corpora_tibetan_text_path ./data/corpora/UVA\ Tibetan\ Spoken\ Corpus/ \
+  --corpora_chinese_numbers_path ./data/corpora/Chinese\ Number\ Words/ \ 
+  --font_path_tibetan ./fonts/Microsoft\ Himalaya.ttf \
+  --font_path_chinese ./fonts/simkai.ttf \
+  --image_width 1024 \
+  --image_height 361 \
+  --annotations_file_path ./data/annotations/tibetan_chinese_no.txt \
+
+## Example Usage
+python path/to/main.py \
+  --corpora_tibetan_numbers_path "path/to/data/corpora/Tibetan Number Words" \
+  --corpora_tibetan_text_path "path/to/data/corpora/UVA Tibetan Spoken Corpus" \
+  --corpora_chinese_numbers_path "path/to/data/corpora/Chinese Number Words" \
+  --background_train "path/to/data/background_images_train" \
+  --background_val "path/to/data/background_images_val" \
+  --annotations_file_path "path/to/data/annotations/tibetan_chinese_no/bg_example_0001.txt" \
+  --font_path_tibetan "path/to/fonts/Microsoft Himalaya.ttf" \
+  --font_path_chinese "path/to/fonts/simkai.ttf" \
+  --train_samples 2 \
+  --val_samples 2
+  
+## List of altered scripts
+- main.py (for correct use shift the script to the [initial project directory](https://github.com/CodexAITeam/TibetanOCR/tree/synthetic_generation_tib_chi_no)
+- dataset_generator.py => altered to dataset_generator_tib_chi_no.py
+- text_renderer.py =>altered to text_renderer_img_size.py
+
+## Script Details
+The script loads the Corpus path inputs from main.py to their corresponding bounding boxes of their ann_class_id (YOLO CLASS ID) in order to produce different texts with generate_dataset_tib_chi_no.py. 
+The ann_class_id are parsed from a preconfigured annotation template named bg_PPN337138764X_00000005.txt which is located in the Tibetan Layout Analyser project. See our [Tibetan Numbers Dataset Folder](https://github.com/CodexAITeam/TibetanLayoutAnalyzer/tree/main/data/tibetan%20numbers) for sample files. Furthermore, the script uses different background image from that project in the format 1024x361 
+because it reflects the original historical data format. The argparse input font_path_tibetan is used to display generated tibetan text, while is font_path_chinese used for chinese text.
+
+Here is the table of the label mapping: 
+
+| Class Name            | Corpus Path                     | Planned Label ID Range* | ann_class_id / YOLO Class ID |
+|-----------------------|---------------------------------|-------------------------|------------------------------|
+| Tibetan Number Words  | `corpora_tibetan_numbers_path`  | 000-009                 | 0                            |
+| Tibetan Text Body     | `corpora_tibetan_text_path`     | 101-110                 | 1                            |
+| Chinese Number Words  | `corpora_chinese_numbers_path`  | 201-210                 | 2                            |
+
+\* see Limitations
+
+The different text inputs are given by:
+- Tibetan Numbers: tib_no_0001.txt to tib_no_0010.txt: Randomly selected
+- Tibetan Text: uvrip*.txt: Randomly selected
+- Chinese Numbers: chi_no_0001.txt to chi_no_0010.txt: Simultaneosly selected (for instance chi_no_001.txt is selected when tib_no_0001.txt is selected)  
+See our [Corpora Folder](https://github.com/CodexAITeam/TibetanOCR/tree/synthetic_generation_tib_chi_no/data/corpora) for sample files.
+
+## Generated synthetic image sample
+- generated_sample.png
+
+## Limitations and Outline for future development
+- Label_dict is still not producing correct results of Planned Label ID Ranges because it only uses tibetan number file labels so far. 
+- Augmentations are still very limited and will be expanded.
+
+## License
+This project is licensed under the MIT License - see the [LICENSE](https://github.com/CodexAITeam/TibetanOCR/blob/synthetic_generation_tib_chi_no/LICENSE) file for details.
diff --git a/tibetanDataGenerator/data/text_renderer.py b/tibetanDataGenerator/data/text_renderer.py
index 4cdddc4..895f0de 100644
--- a/tibetanDataGenerator/data/text_renderer.py
+++ b/tibetanDataGenerator/data/text_renderer.py
@@ -31,9 +31,15 @@ def set_font(self, font_path, font_size=24):
             print("Warning: Default font used.")
         return self
 
-    def add_text(self, text, position, box_size):
+    def add_text(self, text, position, box_size, rotation=0):
         """
         Fügt Text auf dem Bild an einer bestimmten Position mit automatischer Begrenzung hinzu.
+        
+        Args:
+            text: Text to render
+            position: (x, y) position
+            box_size: (width, height) of text box
+            rotation: Rotation angle in degrees (0, 90, 180, 270)
         """
         if not self.font:
             raise ValueError("Font not set. Use set_font() before adding text.")
@@ -42,26 +48,64 @@ def add_text(self, text, position, box_size):
         box_w, box_h = box_size
         max_y = box_y + box_h
 
-        wrapped_text = []
-        for line in text.split('\n'):
-            while line:
-                for i in range(1, len(line) + 1):
-                    if self.draw.textlength(line[:i], font=self.font) > box_w:
-                        break
-                else:
-                    i = len(line)
-
-                wrapped_text.append(line[:i])
-                line = line[i:]
-
-        y_offset = 0
-        for line in wrapped_text:
-            left, top, right, bottom = self.font.getbbox(line)
-            line_height = bottom - top
-            if box_y + y_offset + line_height > max_y:
-                break
-            self.draw.text((box_x, box_y + y_offset), line, font=self.font, fill=(0, 0, 0))
-            y_offset += line_height
+        if rotation == 0:
+            # Standard horizontal text rendering
+            wrapped_text = []
+            for line in text.split('\n'):
+                while line:
+                    for i in range(1, len(line) + 1):
+                        if self.draw.textlength(line[:i], font=self.font) > box_w:
+                            break
+                    else:
+                        i = len(line)
+
+                    wrapped_text.append(line[:i])
+                    line = line[i:]
+
+            y_offset = 0
+            for line in wrapped_text:
+                left, top, right, bottom = self.font.getbbox(line)
+                line_height = bottom - top
+                if box_y + y_offset + line_height > max_y:
+                    break
+                self.draw.text((box_x, box_y + y_offset), line, font=self.font, fill=(0, 0, 0))
+                y_offset += line_height
+        
+        elif rotation == 90:
+            # Vertical text rendering (90 degrees clockwise)
+            # Create a temporary image for the rotated text
+            temp_img = Image.new('RGBA', (box_h, box_w), (255, 255, 255, 0))
+            temp_draw = ImageDraw.Draw(temp_img)
+            
+            # Render text on temporary image
+            wrapped_text = []
+            for line in text.split('\n'):
+                while line:
+                    for i in range(1, len(line) + 1):
+                        if temp_draw.textlength(line[:i], font=self.font) > box_h:
+                            break
+                    else:
+                        i = len(line)
+                    wrapped_text.append(line[:i])
+                    line = line[i:]
+            
+            y_offset = 0
+            for line in wrapped_text:
+                left, top, right, bottom = self.font.getbbox(line)
+                line_height = bottom - top
+                if y_offset + line_height > box_w:
+                    break
+                temp_draw.text((0, y_offset), line, font=self.font, fill=(0, 0, 0))
+                y_offset += line_height
+            
+            # Rotate the temporary image and paste it
+            rotated = temp_img.rotate(-90, expand=True)
+            self.image.paste(rotated, (box_x, box_y), rotated)
+        
+        else:
+            # For other rotations, fall back to standard rendering
+            print(f"Warning: Rotation {rotation}° not fully supported, using 0°")
+            return self.add_text(text, position, box_size, rotation=0)
 
         return self
 
diff --git a/tibetanDataGenerator/data/text_renderer_img_size.py b/tibetanDataGenerator/data/text_renderer_img_size.py
new file mode 100644
index 0000000..42beb64
--- /dev/null
+++ b/tibetanDataGenerator/data/text_renderer_img_size.py
@@ -0,0 +1,107 @@
+import os
+from PIL import Image, ImageDraw, ImageFont
+
+from tibetanDataGenerator.data.augmentation import AugmentationStrategy
+
+
+class ImageBuilder:
+    def __init__(self, image_size=(1024, 361)):
+        self.image_width = image_size[0]
+        self.image_height = image_size[1]
+        self.image = Image.new('RGB', (self.image_width, self.image_height), color='white')  # Leeres Bild standardmäßig
+        self.draw = ImageDraw.Draw(self.image)
+        self.font = None
+
+    def set_background(self, background_path):
+        """
+        Setze ein Hintergrundbild für das Bild.
+        """
+        if not os.path.exists(background_path):
+            raise FileNotFoundError(f"Background image {background_path} not found.")
+        self.image = Image.open(background_path).resize(self.image.size, Image.Resampling.LANCZOS)
+        self.draw = ImageDraw.Draw(self.image)
+        return self
+
+    def set_font(self, font_path, font_size=24):
+        """
+        Lade eine Schriftart für das Rendern von Text.
+        """
+        try:
+            self.font = ImageFont.truetype(font_path, font_size)
+        except IOError:
+            self.font = ImageFont.load_default()
+            print("Warning: Default font used.")
+        return self
+
+    def add_text(self, text, position, box_size):
+        """
+        Fügt Text auf dem Bild an einer bestimmten Position mit automatischer Begrenzung hinzu.
+        """
+        if not self.font:
+            raise ValueError("Font not set. Use set_font() before adding text.")
+
+        box_x, box_y = position
+        box_w, box_h = box_size
+        max_y = box_y + box_h
+
+        wrapped_text = []
+        for line in text.split('\n'):
+            while line:
+                for i in range(1, len(line) + 1):
+                    if self.draw.textlength(line[:i], font=self.font) > box_w:
+                        break
+                else:
+                    i = len(line)
+
+                wrapped_text.append(line[:i])
+                line = line[i:]
+
+        y_offset = 0
+        for line in wrapped_text:
+            left, top, right, bottom = self.font.getbbox(line)
+            line_height = bottom - top
+            if box_y + y_offset + line_height > max_y:
+                break
+            self.draw.text((box_x, box_y + y_offset), line, font=self.font, fill=(0, 0, 0))
+            y_offset += line_height
+
+        return self
+
+    def add_bounding_box(self, position, size, color=(255, 0, 0)):
+        """
+        Zeichne eine Bounding Box auf dem Bild.
+        """
+        x, y = position
+        w, h = size
+        self.draw.rectangle([x, y, x + w, y + h], outline=color, width=2)
+        return self
+
+    def apply_augmentation(self, augmentation_strategy):
+        """
+        Apply an augmentation strategy to the current image.
+
+        :param augmentation_strategy: An instance of AugmentationStrategy
+        :return: self for method chaining
+        """
+        if not isinstance(augmentation_strategy, AugmentationStrategy):
+            raise ValueError("augmentation_strategy must be an instance of AugmentationStrategy")
+
+        self.image = augmentation_strategy.apply(self.image)
+        self.draw = ImageDraw.Draw(self.image)
+        return self
+
+
+    def save(self, output_path):
+        """
+        Speichert das fertige Bild.
+        """
+        os.makedirs(os.path.dirname(output_path), exist_ok=True)
+        self.image.save(output_path)
+        return self
+
+    def show(self):
+        """
+        Zeigt das Bild zur Vorschau an.
+        """
+        self.image.show()
+        return self
diff --git a/tibetanDataGenerator/dataset_generator.py b/tibetanDataGenerator/dataset_generator.py
index 0cda876..7af2a70 100644
--- a/tibetanDataGenerator/dataset_generator.py
+++ b/tibetanDataGenerator/dataset_generator.py
@@ -1,253 +1,798 @@
-import argparse
-import multiprocessing
-import random
-import re
-import os
-from typing import Tuple, Dict, List
-
-import yaml
-from pathlib import Path
-from collections import OrderedDict
-from ultralytics.data.utils import DATASETS_DIR
-from tibetanDataGenerator.utils.data_loader import TextFactory
-from tibetanDataGenerator.data.text_renderer import ImageBuilder
-from tibetanDataGenerator.data.augmentation import RotateAugmentation, NoiseAugmentation
-from tibetanDataGenerator.utils.bounding_box import BoundingBoxCalculator
-from tibetanDataGenerator.utils.identifier import hash_current_time
-
-# Define a dictionary of augmentation strategies
-augmentation_strategies = {
-    'rotate': RotateAugmentation(),
-    'noise': NoiseAugmentation()
-}
-
-
-def generate_dataset(args: argparse.Namespace, validation: bool = False) -> Dict:
-    """
-    Generate a dataset for training or validation.
-
-    Args:
-        args (argparse.Namespace): Command-line arguments.
-        validation (bool): Whether to generate validation dataset. Defaults to False.
-
-    Returns:
-        Dict: A dictionary containing dataset information.
-    """
-    dataset_info = _setup_dataset_info(args, validation)
-    label_dict = _create_label_dict(args)
-    background_images = _load_background_images(dataset_info['background_folder'])
-
-    generation_args = _prepare_generation_args(args, dataset_info, label_dict, background_images)
-
-    results = _generate_images_in_parallel(generation_args, dataset_info['no_samples'])
-
-    return _create_dataset_dict(dataset_info['folder'], label_dict)
-
-
-def generate_synthetic_image(
-        images: List[str],
-        label_dict: Dict[str, int],
-        folder_with_background: str,
-        folder_with_corpora: str,
-        folder_for_train_data: str,
-        debug: bool = True,
-        font_path: str = 'res/Microsoft Himalaya.ttf',
-        single_label: bool = False,
-        image_size: int = 1024,
-        augmentation: str = "noise"
-) -> Tuple[str, str]:
-    # Constants
-    FONT_SIZE = 24
-    BORDER_OFFSET_RATIO = 0.05
-
-    ctr = hash_current_time()
-    border_offset = int(BORDER_OFFSET_RATIO * image_size)
-
-    # Image setup
-    image_path = _select_random_background(folder_with_background, images)
-    builder = _setup_image_builder(image_path, image_size, font_path, FONT_SIZE)
-
-    # Text generation and positioning
-    text, file_name = _generate_text(folder_with_corpora)
-    text_position, box_position, fitted_box_size = _calculate_text_position(
-        text, image_size, border_offset, font_path, FONT_SIZE
-    )
-
-    # Add text and bounding box
-    builder.add_text(text, text_position, fitted_box_size)
-    if debug == True:
-        builder.add_bounding_box(box_position, fitted_box_size)
-
-    # Apply augmentation
-    _apply_augmentation(builder, augmentation)
-
-    # Prepare and save image and label
-    image_filename, label_filename = _save_image_and_label(
-        builder, text, ctr, folder_for_train_data, label_dict,
-        single_label, file_name, box_position, fitted_box_size,
-        image_size, debug
-    )
-
-    return image_filename, label_filename
-
-
-def _select_random_background(folder: str, images: List[str]) -> str:
-    return os.path.join(folder, random.choice(images))
-
-
-def _setup_image_builder(image_path: str, image_size: int, font_path: str, font_size: int) -> ImageBuilder:
-    builder = ImageBuilder((image_size, image_size))
-    builder.set_background(image_path)
-    builder.set_font(font_path, font_size=font_size)
-    return builder
-
-
-def _generate_text(folder_with_corpora: str) -> Tuple[str, str]:
-    text_generator = TextFactory.create_text_source("corpus", folder_with_corpora)
-    return text_generator.generate_text()
-
-
-def _calculate_text_position(
-        text: str,
-        image_size: int,
-        border_offset: int,
-        font_path: str,
-        font_size: int
-) -> Tuple[Tuple[int, int], Tuple[int, int], Tuple[int, int]]:
-    max_box_size_w = random.randint(100, image_size)
-    max_box_size = (max_box_size_w, 400)
-
-    fitted_box_size = BoundingBoxCalculator.fit(text, max_box_size, font_size=font_size, font_path=font_path)
-
-    text_pos_x = random.randint(border_offset, image_size - (fitted_box_size[0] + border_offset))
-    text_pos_y = random.randint(border_offset, image_size - (fitted_box_size[1] + border_offset))
-
-    text_position = (text_pos_x, text_pos_y)
-    box_position = (text_pos_x + int(fitted_box_size[0] / 2), text_pos_y - int(fitted_box_size[1] / 2))
-
-    return text_position, box_position, fitted_box_size
-
-
-def _apply_augmentation(builder: ImageBuilder, augmentation: str):
-    augmentation_strategy = augmentation_strategies[augmentation.lower()]
-    builder.apply_augmentation(augmentation_strategy)
-
-
-def _save_image_and_label(
-        builder: ImageBuilder,
-        text: str,
-        ctr: str,
-        folder_for_train_data: str,
-        label_dict: Dict[str, int],
-        single_label: bool,
-        file_name: str,
-        box_position: Tuple[int, int],
-        fitted_box_size: Tuple[int, int],
-        image_size: int,
-        debug: bool
-) -> Tuple[str, str]:
-    label = next(iter(label_dict.keys())) if single_label else os.path.splitext(file_name)[0]
-    label_id = label_dict[label]
-
-    image_filename = f"{label}_{ctr}.png"
-    image_path = os.path.join(folder_for_train_data, 'images', image_filename)
-    builder.save(image_path)
-
-    bbox_str = _create_bbox_string(label_id, box_position, fitted_box_size, image_size)
-
-    labels_dir = os.path.join(folder_for_train_data, 'labels')
-    os.makedirs(labels_dir, exist_ok=True)
-
-    label_filename = f"{label}_{ctr}.txt"
-    label_path = os.path.join(labels_dir, label_filename)
-    with open(label_path, 'w') as f:
-        f.write(bbox_str)
-
-    if debug == True:
-        print(f"Generated sample: {image_filename}")
-        print(f"Bounding boxes:\n{bbox_str}")
-
-    return image_filename, label_filename
-
-
-def _create_bbox_string(label_id: int, box_position: Tuple[int, int], box_size: Tuple[int, int], image_size: int) -> str:
-    x, y = box_position
-    w, h = box_size
-    return f"{label_id} {x / image_size} {y / image_size} {w / image_size} {h / image_size}\n"
-
-
-def _fill_label_dict(folder_path):
-    label_dict = {}
-    label_id = 0
-
-    # Get all txt files
-    files = [f for f in os.listdir(folder_path) if f.endswith(".txt")]
-
-    # Sort files based on the numeric part
-    sorted_files = sorted(files, key=lambda x: int(re.findall(r'\d+', x)[-1]))
-
-    for filename in sorted_files:
-        label = os.path.splitext(filename)[0]
-        if label not in label_dict:
-            label_dict[label] = label_id
-            label_id += 1
-
-    return label_dict
-
-
-def _setup_dataset_info(args: argparse.Namespace, validation: bool) -> Dict:
-    """Set up basic dataset information based on validation flag."""
-    if validation:
-        return {
-            'background_folder': args.background_val,
-            'folder': f'{args.dataset_name}/val/',
-            'no_samples': args.val_samples
-        }
-    else:
-        return {
-            'background_folder': args.background_train,
-            'folder': f'{args.dataset_name}/train/',
-            'no_samples': args.train_samples
-        }
-
-
-def _create_label_dict(args: argparse.Namespace) -> Dict[str, int]:
-    """Create a dictionary of labels based on single_label flag."""
-    if args.single_label:
-        return {'tibetan': 0}
-    else:
-        return _fill_label_dict(args.corpora_folder)
-
-
-def _load_background_images(folder: str) -> List[str]:
-    """Load background image filenames from the specified folder."""
-    return [file for file in os.listdir(folder) if file.lower().endswith(('.jpg', '.png'))]
-
-
-def _prepare_generation_args(args: argparse.Namespace, dataset_info: Dict, label_dict: Dict,
-                             images: List[str]) -> Tuple:
-    """Prepare arguments for image generation."""
-    return (
-        images, label_dict, dataset_info['background_folder'], args.corpora_folder,
-        dataset_info['folder'], args.debug, args.font_path, args.single_label,
-        args.image_size, args.augmentation
-    )
-
-
-def _generate_images_in_parallel(generation_args: Tuple, no_samples: int) -> List:
-    """Generate images in parallel using multiprocessing."""
-    max_parallel_calls = os.cpu_count()
-    with multiprocessing.Pool(max_parallel_calls) as pool:
-        return pool.starmap(generate_synthetic_image, [generation_args] * no_samples)
-
-
-def _create_dataset_dict(folder: str, label_dict: Dict[str, int]) -> OrderedDict:
-    """Create a dictionary containing dataset information."""
-    label_dict_swap = {v: k for k, v in label_dict.items()}
-    return OrderedDict([
-        ('path', f"../{folder}"),
-        ('train', 'train/images'),
-        ('val', 'val/images'),
-        ('nc', len(label_dict_swap)),
-        ('names', label_dict_swap)
-    ])
\ No newline at end of file
+import argparse
+import multiprocessing
+import random
+import re
+import os
+import csv
+import time
+import traceback
+from typing import Tuple, Dict, List, Optional  # Added Optional
+
+import yaml
+from pathlib import Path
+from collections import OrderedDict
+try:
+    from ultralytics.data.utils import DATASETS_DIR
+except ImportError:
+    DATASETS_DIR = "./datasets"  # Fallback if ultralytics not installed
+from tibetanDataGenerator.utils.data_loader import TextFactory
+from tibetanDataGenerator.data.text_renderer import ImageBuilder
+from tibetanDataGenerator.data.augmentation import RotateAugmentation, NoiseAugmentation, \
+     AugmentationStrategy
+from tibetan_utils.image_utils import BoundingBoxCalculator
+from tibetan_utils.io_utils import hash_current_time
+
+# Define a dictionary of augmentation strategies
+augmentation_strategies: Dict[str, AugmentationStrategy] = {
+    'rotate': RotateAugmentation(),
+    'noise': NoiseAugmentation()
+}
+
+def _parse_yolo_annotations(file_path: str) -> List[Tuple[int, float, float, float, float]]:
+    """
+    Parses a YOLO annotation file.
+    Each line is expected to be: class_id center_x center_y width height
+    Returns a list of tuples (class_id, x_center, y_center, width, height).
+    """
+    annotations = []
+    if not file_path:  # If file_path is None or empty string
+        return annotations
+
+    if not os.path.exists(file_path):
+        print(f"Warning: Annotation file '{file_path}' not found. No annotations will be loaded from this file.")
+        return annotations
+
+    try:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            for i, line in enumerate(f):
+                line_strip = line.strip()
+                if not line_strip:  # Skip empty lines
+                    continue
+                parts = line_strip.split()
+                if len(parts) == 5:
+                    try:
+                        class_id = int(parts[0])
+                        x_center = float(parts[1])
+                        y_center = float(parts[2])
+                        width = float(parts[3])
+                        height = float(parts[4])
+
+                        # Basic validation for YOLO coordinates (normalized)
+                        if not (0.0 <= x_center <= 1.0 and \
+                                0.0 <= y_center <= 1.0 and \
+                                0.0 <= width <= 1.0 and \
+                                0.0 <= height <= 1.0):
+                            # This warning can be made conditional on debug flag if too verbose
+                            # print(f"Debug: Annotation values out of [0,1] range in {file_path}, line {i+1}: {line_strip}")
+                            pass
+
+                            # Ensure width and height are positive for valid bounding box
+                        if width <= 0 or height <= 0:
+                            print(
+                                f"Warning: Non-positive width/height in annotation file {file_path}, line {i + 1}: {line_strip}. Skipping this annotation.")
+                            continue
+
+                        annotations.append((class_id, x_center, y_center, width, height))
+                    except ValueError:
+                        print(
+                            f"Warning: Malformed line (numeric conversion) in annotation file {file_path}, line {i + 1}: {line_strip}")
+                else:  # Incorrect number of parts
+                    print(
+                        f"Warning: Incorrect number of parts in line in annotation file {file_path}, line {i + 1}: {line_strip}")
+    except Exception as e:
+        print(f"Error reading or parsing annotation file {file_path}: {e}")
+    return annotations
+
+
+def generate_dataset(args: argparse.Namespace, validation: bool = False) -> Dict:
+    """
+    Generate a dataset for training or validation.
+
+    Args:
+        args (argparse.Namespace): Command-line arguments.
+        validation (bool): Whether to generate validation dataset. Defaults to False.
+
+    Returns:
+        Dict: A dictionary containing dataset information.
+    """
+    print(f"Starting dataset generation (validation={validation})...")
+    start_time = time.time()
+    
+    dataset_info = _setup_dataset_info(args, validation)
+    print(f"Dataset info setup completed. Target samples: {dataset_info['no_samples']}")
+    
+    label_dict = _create_label_dict(args)
+    print(f"Label dictionary created with {len(label_dict)} labels: {list(label_dict.keys())}")
+    
+    background_images = _load_background_images(dataset_info['background_folder'])
+    print(f"Loaded {len(background_images)} background images from {dataset_info['background_folder']}")
+
+    # _prepare_generation_args now gets annotations_file_path from args
+    generation_args_tuple = _prepare_generation_args(args, dataset_info, label_dict, background_images)
+    print("Generation arguments prepared")
+
+    results = _generate_images_in_parallel(generation_args_tuple, dataset_info['no_samples'])
+    
+    elapsed = time.time() - start_time
+    successful_results = [r for r in results if r[0] and r[1]]  # Filter out failed generations
+    print(f"Dataset generation completed in {elapsed:.1f}s. Success rate: {len(successful_results)}/{len(results)}")
+
+    return _create_dataset_dict(str(dataset_info['folder']), label_dict)
+
+
+def generate_synthetic_image(
+        images: List[str],
+        label_dict: Dict[str, int],
+        folder_with_background: str,
+        corpora_tibetan_numbers_path: str,
+        corpora_tibetan_text_path: str,
+        corpora_chinese_numbers_path: str,
+        folder_for_train_data: str,
+        debug: bool = True,
+        font_path_tibetan: str = 'res/Microsoft Himalaya.ttf',
+        font_path_chinese: str = 'res/simkai.ttf',
+        single_label: bool = False,
+        image_width: int = 1024,
+        image_height: int = 361,
+        augmentation: str = "noise",
+        annotations_file_path: Optional[str] = None
+) -> Tuple[str, str]:
+    """
+    Generate a synthetic image with improved error handling and resource management.
+    """
+    try:
+        return _generate_synthetic_image_impl(
+            images, label_dict, folder_with_background,
+            corpora_tibetan_numbers_path, corpora_tibetan_text_path, corpora_chinese_numbers_path,
+            folder_for_train_data, debug, font_path_tibetan, font_path_chinese,
+            single_label, image_width, image_height, augmentation, annotations_file_path
+        )
+    except Exception as e:
+        # Log the error and return empty paths to indicate failure
+        print(f"Error in generate_synthetic_image: {e}")
+        if debug:
+            import traceback
+            traceback.print_exc()
+        return "", ""
+
+
+def _generate_synthetic_image_impl(
+        images: List[str],
+        label_dict: Dict[str, int],
+        folder_with_background: str,
+        corpora_tibetan_numbers_path: str,
+        corpora_tibetan_text_path: str,
+        corpora_chinese_numbers_path: str,
+        folder_for_train_data: str,
+        debug: bool = True,
+        font_path_tibetan: str = 'res/Microsoft Himalaya.ttf',
+        font_path_chinese: str = 'res/simkai.ttf',
+        single_label: bool = False,
+        image_width: int = 1024,
+        image_height: int = 361,
+        augmentation: str = "noise",
+        annotations_file_path: Optional[str] = None
+) -> Tuple[str, str]:
+    # Font configuration
+    BORDER_OFFSET_RATIO = 0.05
+    font_size_class1 = None
+    font_size_0_2 = None
+
+    ctr = hash_current_time()
+
+    border_offset_x = int(BORDER_OFFSET_RATIO * image_width)
+    border_offset_y = int(BORDER_OFFSET_RATIO * image_height)
+
+    image_path_bg = _select_random_background(folder_with_background, images)
+    # Determine which font to use based on annotation class
+    current_font_path = font_path_tibetan  # Default to Tibetan font
+    if annotations_file_path:
+        parsed_annotations = _parse_yolo_annotations(annotations_file_path)
+        if parsed_annotations and parsed_annotations[0][0] == 2:  # Check first annotation's class_id
+            current_font_path = font_path_chinese
+
+    builder = _setup_image_builder(image_path_bg, image_width, image_height, current_font_path, 24)  # Default font size only used if no annotations
+
+    bbox_str_list = []  # Collect bounding box strings for all text instances
+    tibetan_number_match = None  # Store the matching number if we find a Tibetan number file
+
+    # ---- Start: Draw bounding boxes from YOLO annotation file ----
+    if annotations_file_path:
+        parsed_annotations = _parse_yolo_annotations(annotations_file_path)
+        for ann_class_id, norm_cx, norm_cy, norm_w, norm_h in parsed_annotations:
+            # Convert YOLO normalized coordinates to pixel coordinates for drawing
+            x_center_pixel = norm_cx * image_width
+            y_center_pixel = norm_cy * image_height
+            pixel_w = norm_w * image_width
+            pixel_h = norm_h * image_height
+
+            # Calculate top-left corner for add_bounding_box
+            tl_x = x_center_pixel - (pixel_w / 2)
+            tl_y = y_center_pixel - (pixel_h / 2)
+
+            draw_tl_pos = (int(round(tl_x)), int(round(tl_y)))
+            draw_box_size = (int(round(pixel_w)), int(round(pixel_h)))
+
+            # Draw only if width and height are positive
+            if draw_box_size[0] > 0 and draw_box_size[1] > 0:
+                # Select the text corpus based on ann_class_id
+                if ann_class_id == 0:  # Tibetan numbers
+                    text, file_name_from_corpus = _generate_text(corpora_tibetan_numbers_path)
+                    # Calculate font size for class 1 with bounding box constraints
+                    text_for_sizing = text if text else "default"
+                    max_font = BoundingBoxCalculator.find_max_font(
+                        text_for_sizing,
+                        (draw_box_size[0], draw_box_size[1]),
+                        font_path_tibetan,
+                        max_size=100,
+                        debug=debug
+                    )
+                    font_size_class1 = random.randint(24, max(24, min(100, max_font)))
+                    
+                    # Set sibling classes to be ±1-2 sizes different
+                    delta = random.choice([-2, -1, 1, 2])
+                    font_size_0_2 = max(1, min(100, font_size_class1 + delta))
+                    
+                    builder.set_font(font_path_tibetan, font_size_class1)
+                    # Extract the number part from the Tibetan filename
+                    try:
+                        tibetan_number_match = re.search(r'tib_no_(\d+)', file_name_from_corpus)
+                        if tibetan_number_match:
+                            tibetan_number_match = tibetan_number_match.group(1)
+                    except:
+                        tibetan_number_match = None
+                elif ann_class_id == 1:  # Tibetan text
+                    text, file_name_from_corpus = _generate_text(corpora_tibetan_text_path)
+                    # Calculate font size for class 1 with bounding box constraints
+                    text_for_sizing = text if text else "default"
+                    max_font = BoundingBoxCalculator.find_max_font(
+                        text_for_sizing,
+                        (draw_box_size[0], draw_box_size[1]),
+                        font_path_tibetan,
+                        max_size=100,
+                        debug=debug
+                    )
+                    font_size_class1 = random.randint(24, max(24, min(100, max_font)))
+                    builder.set_font(font_path_tibetan, font_size_class1)
+                elif ann_class_id == 2:  # Chinese numbers
+                    # Use the same number as Tibetan if available
+                    chinese_number = f"chi_no_{tibetan_number_match}" if tibetan_number_match else None
+                    text, file_name_from_corpus = _generate_text(corpora_chinese_numbers_path, chinese_number)
+                    builder.set_font(font_path_chinese, font_size_0_2)
+                else:
+                    if debug:
+                        print(f"Debug: Unknown ann_class_id {ann_class_id}. Skipping this annotation box.")
+                    continue
+
+                # Ensure the text fits within the bounding box
+                # Calculate actual text dimensions and centered position
+                actual_text_box_size = BoundingBoxCalculator.fit(
+                    text,
+                    draw_box_size,
+                    font_size=font_size_class1 if ann_class_id == 1 else font_size_0_2,
+                    font_path=current_font_path,
+                    debug=debug
+                )
+                # Calculate random offset based on class ID
+                def get_offset(box_dim, percentage):
+                    max_offset = box_dim * percentage / 100
+                    return random.uniform(-max_offset, max_offset)
+                
+                # Apply different variation based on class ID
+                if ann_class_id in [0, 2]:  # Tibetan and Chinese numbers
+                    x_offset = get_offset(draw_box_size[0], 10)
+                    y_offset = get_offset(draw_box_size[1], 10)
+                else:  # Tibetan text (class 1)
+                    x_offset = get_offset(draw_box_size[0], 10)
+                    y_offset = get_offset(draw_box_size[1], 10)
+                
+                # Calculate centered position with random offset
+                base_x = draw_tl_pos[0] + (draw_box_size[0] - actual_text_box_size[0]) // 2
+                base_y = draw_tl_pos[1] + (draw_box_size[1] - actual_text_box_size[1]) // 2
+                
+                # Apply offsets and clamp to stay within bounding box
+                text_tl_x = int(base_x + x_offset)
+                text_tl_y = int(base_y + y_offset)
+                
+                # Ensure text stays within bounding box
+                text_tl_x = max(draw_tl_pos[0], min(text_tl_x, draw_tl_pos[0] + draw_box_size[0] - actual_text_box_size[0]))
+                text_tl_y = max(draw_tl_pos[1], min(text_tl_y, draw_tl_pos[1] + draw_box_size[1] - actual_text_box_size[1]))
+                text_render_top_left_pos = (text_tl_x, text_tl_y)
+                yolo_box_center_pos = (int(round(x_center_pixel)), int(round(y_center_pixel)))
+
+                # Apply rotation for Tibetan numbers (class 0)
+                rotation_angle = 90 if ann_class_id == 0 else 0
+                builder.add_text(text, text_render_top_left_pos, actual_text_box_size, rotation=rotation_angle)
+                # Get the base filename without extension
+                label_key = os.path.splitext(file_name_from_corpus)[0]
+
+                # For Tibetan numbers (class 0), ensure we use the tib_no_ prefix
+                if ann_class_id == 0:
+                    if not label_key.startswith('tib_no_'):
+                        # Extract the number from the filename if it exists
+                        try:
+                            num_part = re.search(r'\d+', label_key).group()
+                            label_key = f'tib_no_{num_part.zfill(4)}'  # Format as tib_no_0001
+                        except AttributeError:
+                            label_key = 'tib_no_0001'  # Default fallback
+
+                # For ann_class_id 0, always use 0 as the label_id
+                # For other classes, get label ID from dictionary or use class ID as fallback
+                if ann_class_id == 0:
+                    label_id = 0
+                else:
+                    label_id = label_dict.get(label_key, ann_class_id)
+                    if label_key not in label_dict and debug:
+                        print(f"Debug: Label '{label_key}' not found in label_dict. Using class_id {ann_class_id}")
+
+                bbox_str = _create_bbox_string(
+                    label_id,
+                    yolo_box_center_pos,
+                    actual_text_box_size,
+                    image_width,
+                    image_height
+                )
+                bbox_str_list.append(bbox_str)
+
+                if debug:
+                    builder.add_bounding_box(text_render_top_left_pos, actual_text_box_size, color=(0, 255, 0))  # Green
+                    builder.add_bounding_box(draw_tl_pos, draw_box_size, color=(255, 0, 0))  # Red
+
+            else:
+                if debug:
+                    print(
+                        f"Debug: Skipping drawing annotation box from file (class {ann_class_id}) due to non-positive dimensions: size {draw_box_size}")
+
+    if augmentation.lower() != 'none' and augmentation.lower() in augmentation_strategies:
+        _apply_augmentation(builder, augmentation)
+    elif augmentation.lower() != 'none':
+        print(f"Warning: Augmentation strategy '{augmentation}' not found. Skipping augmentation.")
+
+    image_filename_saved = f"{ctr}.png"
+    image_full_path = os.path.join(folder_for_train_data, 'images', image_filename_saved)
+    os.makedirs(os.path.dirname(image_full_path), exist_ok=True)
+    builder.save(image_full_path)
+
+    labels_dir = os.path.join(folder_for_train_data, 'labels')
+    os.makedirs(labels_dir, exist_ok=True)
+
+    label_filename_saved = f"{ctr}.txt"
+    label_full_path = os.path.join(labels_dir, label_filename_saved)
+    with open(label_full_path, 'w', encoding='utf-8') as f:
+        f.writelines(bbox_str_list)  # Write all bounding box strings into the file
+
+    if debug:
+        print(f"Generated sample: {image_full_path}")
+        print(f"Label file: {label_full_path}")
+        print(f"Bounding boxes (YOLO format for synthetic text):\n{''.join(bbox_str_list).strip()}")
+
+    return image_full_path, label_full_path
+
+
+def _select_random_background(folder: str, images: List[str]) -> str:
+    if not images:
+        raise ValueError(f"No images found in background folder: {folder}. Cannot select a random background.")
+    return os.path.join(folder, random.choice(images))
+
+
+def _setup_image_builder(image_path_bg: str, image_width: int, image_height: int,
+                        font_path: str, font_size: int) -> ImageBuilder:
+    builder = ImageBuilder(image_size=(image_width, image_height))
+    try:
+        if image_path_bg and os.path.exists(image_path_bg):
+            builder.set_background(image_path_bg)
+        else:
+            if image_path_bg:
+                print(f"Warning: Background image {image_path_bg} not found. Using default white background.")
+    except FileNotFoundError:
+        print(f"Warning: Background image {image_path_bg} not found during set_background. Using default white background.")
+    except Exception as e:
+        print(f"Error setting background {image_path_bg}: {e}. Using default white background.")
+
+    # Font will be set separately during text rendering
+    return builder
+
+
+def _generate_text(folder_with_corpora: str, matching_number: str = None) -> Tuple[str, str]:
+    text_generator = TextFactory.create_text_source("corpus", folder_with_corpora)
+    if matching_number:
+        # If a matching number is specified, try to find the exact file
+        matching_file = f"{matching_number}.txt"
+        file_path = os.path.join(folder_with_corpora, matching_file)
+        if os.path.exists(file_path):
+            with open(file_path, 'r', encoding='utf-8') as f:
+                text = f.read().strip()
+            return text, matching_file
+    # Fall back to random selection if no matching number or file not found
+    return text_generator.generate_text()
+
+
+def _calculate_text_layout(
+        text: str,
+        image_width: int,
+        image_height: int,
+        border_offset_x: int,
+        border_offset_y: int,
+        font_path: str,
+        font_size: int
+) -> Tuple[Tuple[int, int], Tuple[int, int], Tuple[int, int]]:
+    min_text_box_width = font_size * 2
+    min_text_box_height = int(font_size * 1.2)
+
+    max_width_for_text_area = image_width - 2 * border_offset_x
+    max_height_for_text_area = image_height - 2 * border_offset_y
+
+    if max_width_for_text_area < min_text_box_width or max_height_for_text_area < min_text_box_height:
+        # Fallback: try to use at least minimal dimensions if text is very short.
+        # This might not be ideal if text is truly too large for the area.
+        # A more robust solution might involve text wrapping or scaling, but that's complex.
+        print(
+            f"Warning: Text area ({max_width_for_text_area}x{max_height_for_text_area}) might be too small for text. Attempting to fit.")
+        max_width_for_text_area = max(max_width_for_text_area, min_text_box_width)
+        max_height_for_text_area = max(max_height_for_text_area, min_text_box_height)
+
+    conceptual_box_w = random.randint(min_text_box_width, max_width_for_text_area)
+    conceptual_box_h = random.randint(min_text_box_height, max_height_for_text_area)
+    max_placement_box = (conceptual_box_w, conceptual_box_h)
+
+    actual_text_box_size = BoundingBoxCalculator.fit(text, max_placement_box, font_size=font_size, font_path=font_path, debug=False)
+    actual_w, actual_h = actual_text_box_size
+
+    if actual_w <= 0 or actual_h <= 0:
+        print(
+            f"Warning: BoundingBoxCalculator.fit returned non-positive dimensions ({actual_w}x{actual_h}) for text: '{text[:50]}...'. Defaulting to minimal.")
+        actual_w = max(actual_w, font_size // 2 if text else 1)
+        actual_h = max(actual_h, font_size // 2 if text else 1)
+        actual_text_box_size = (actual_w, actual_h)
+
+    pos_x_upper_bound = image_width - border_offset_x - actual_w
+    pos_y_upper_bound = image_height - border_offset_y - actual_h
+
+    # Ensure random range is valid: lower_bound <= upper_bound
+    # If upper bound is less than lower, it means the box is too large.
+    # We should place it at the border_offset in such cases.
+    tl_pos_x = random.randint(border_offset_x, max(border_offset_x,
+                                                   pos_x_upper_bound)) if pos_x_upper_bound >= border_offset_x else border_offset_x
+    tl_pos_y = random.randint(border_offset_y, max(border_offset_y,
+                                                   pos_y_upper_bound)) if pos_y_upper_bound >= border_offset_y else border_offset_y
+
+    text_render_top_left_pos = (tl_pos_x, tl_pos_y)
+
+    center_x = tl_pos_x + actual_w // 2
+    center_y = tl_pos_y + actual_h // 2
+    yolo_box_center_pos = (center_x, center_y)
+
+    return text_render_top_left_pos, yolo_box_center_pos, actual_text_box_size
+
+
+def _apply_augmentation(builder: ImageBuilder, augmentation_name: str):
+    augmentation_strategy = augmentation_strategies[augmentation_name.lower()]
+    builder.apply_augmentation(augmentation_strategy)
+
+
+def _save_image_and_label(
+        builder: ImageBuilder,
+        text_content: str,
+        ctr: str,
+        folder_for_train_data: str,
+        label_dict: Dict[str, int],
+        single_label: bool,
+        file_name_from_corpus: str,
+        yolo_box_center_pos: Tuple[int, int],
+        actual_text_box_size: Tuple[int, int],
+        image_width: int,
+        image_height: int,
+        debug: bool
+) -> Tuple[str, str]:
+    label_str = next(iter(label_dict.keys())) if single_label else os.path.splitext(file_name_from_corpus)[0]
+    if label_str not in label_dict:
+        print(
+            f"Warning: Label '{label_str}' from corpus file '{file_name_from_corpus}' not found in label_dict. Defaulting to first available label.")
+        if not label_dict:
+            raise ValueError("Label dictionary is empty. Cannot determine a label.")
+        label_str = next(iter(label_dict.keys()))
+    label_id = label_dict[label_str]
+
+    image_base_filename = f"{label_str}_{ctr}.png"
+    image_full_path = os.path.join(folder_for_train_data, 'images', image_base_filename)
+    builder.save(image_full_path)
+
+    bbox_str = _create_bbox_string(
+        label_id, yolo_box_center_pos, actual_text_box_size, image_width, image_height
+    )
+
+    labels_dir = os.path.join(folder_for_train_data, 'labels')
+    os.makedirs(labels_dir, exist_ok=True)
+
+    label_base_filename = f"{label_str}_{ctr}.txt"
+    label_full_path = os.path.join(labels_dir, label_base_filename)
+    with open(label_full_path, 'w', encoding='utf-8') as f:
+        f.write(bbox_str)
+
+    if debug:
+        print(f"Generated sample: {image_full_path}")
+        print(f"Label file: {label_full_path}")
+        print(f"Bounding box (YOLO format for synthetic text):\n{bbox_str.strip()}")
+
+    return image_full_path, label_full_path
+
+
+def _create_bbox_string(
+        label_id: int,
+        box_center_xy: Tuple[int, int],
+        box_wh: Tuple[int, int],
+        image_width: int = 1024,
+        image_height: int = 361
+) -> str:
+    center_x, center_y = box_center_xy
+    box_w, box_h = box_wh
+
+    if image_width == 0: raise ValueError("image_width cannot be zero.")
+    if image_height == 0: raise ValueError("image_height cannot be zero.")
+
+    norm_center_x = max(0.0, min(1.0, center_x / image_width))
+    norm_center_y = max(0.0, min(1.0, center_y / image_height))
+    norm_w = max(0.0, min(1.0, box_w / image_width))
+    norm_h = max(0.0, min(1.0, box_h / image_height))
+
+    return f"{label_id} {norm_center_x:.6f} {norm_center_y:.6f} {norm_w:.6f} {norm_h:.6f}\n"
+
+
+def _fill_label_dict(folder_path: str) -> Dict[str, int]:
+    label_dict = OrderedDict()
+    label_id_counter = 0
+
+    if not os.path.isdir(folder_path):
+        print(f"Warning: Corpora folder '{folder_path}' not found. Returning empty label dict.")
+        return label_dict
+
+    # Get all .txt files and sort them numerically by their suffix
+    files = [f for f in os.listdir(folder_path) if f.endswith(".txt") and f.startswith("tib_no_")]
+
+    try:
+        # Sort files by their numeric suffix (tib_no_0001.txt -> 1)
+        sorted_files = sorted(
+            files,
+            key=lambda x: int(x.split("_")[-1].split(".")[0])
+        )
+    except (ValueError, IndexError):
+        print("Warning: Could not sort corpus files numerically. Using simple alphabetical sort.")
+        sorted_files = sorted(files)
+
+    for filename in sorted_files:
+        label_name = os.path.splitext(filename)[0]  # Gets 'tib_no_0001' from 'tib_no_0001.txt'
+        if label_name not in label_dict:
+            label_dict[label_name] = label_id_counter
+            label_id_counter += 1
+
+    if not label_dict:
+        print(f"Warning: No valid .txt files found in corpora folder '{folder_path}'. Label dictionary is empty.")
+    return label_dict
+
+
+def _setup_dataset_info(args: argparse.Namespace, validation: bool) -> Dict:
+    base_output_folder = Path(args.dataset_name)
+
+    if validation:
+        folder_path = base_output_folder / 'val'
+        num_samples = args.val_samples
+        bg_folder = args.background_val
+    else:
+        folder_path = base_output_folder / 'train'
+        num_samples = args.train_samples
+        bg_folder = args.background_train
+
+    os.makedirs(folder_path / 'images', exist_ok=True)
+    os.makedirs(folder_path / 'labels', exist_ok=True)
+
+    return {
+        'background_folder': bg_folder,
+        'folder': folder_path,
+        'no_samples': num_samples
+    }
+
+
+def _read_labels_from_csv(csv_path: str) -> Dict[str, int]:
+    """
+    Read label names from a CSV file.
+    The CSV file should have columns 'yolo_label' and 'label_name'.
+    Returns a dictionary mapping label names to their corresponding class IDs.
+    """
+    label_dict = OrderedDict()
+    
+    try:
+        with open(csv_path, 'r', encoding='utf-8') as f:
+            reader = csv.DictReader(f)
+            for row in reader:
+                if 'yolo_label' in row and 'label_name' in row:
+                    class_id = int(row['yolo_label'])
+                    label_name = row['label_name']
+                    label_dict[label_name] = class_id
+    except Exception as e:
+        print(f"Error reading CSV file {csv_path}: {e}")
+    
+    if not label_dict:
+        print(f"Warning: No valid labels found in CSV file '{csv_path}'. Label dictionary is empty.")
+    
+    return label_dict
+
+def _create_label_dict(args: argparse.Namespace) -> Dict[str, int]:
+    if args.single_label:
+        return {'tibetan': 0}
+    else:
+        # Check if annotations_file_path is provided and has a corresponding CSV file
+        if args.annotations_file_path and os.path.exists(args.annotations_file_path):
+            # Try to find the corresponding CSV file
+            csv_path = args.annotations_file_path.replace('.txt', '.csv')
+            if os.path.exists(csv_path):
+                return _read_labels_from_csv(csv_path)
+        
+        # Fallback to the original method if CSV doesn't exist
+        return _fill_label_dict(args.corpora_tibetan_numbers_path)
+
+
+def _load_background_images(folder: str) -> List[str]:
+    if not os.path.isdir(folder):
+        print(f"Warning: Background folder '{folder}' not found. No background images will be loaded.")
+        return []
+    return [file for file in os.listdir(folder) if file.lower().endswith(('.jpg', '.jpeg', '.png'))]
+
+
+def _prepare_generation_args(args: argparse.Namespace, dataset_info: Dict, label_dict: Dict,
+                             images_bg_list: List[str]) -> Tuple:
+    """Prepare arguments for each call to generate_synthetic_image."""
+    return (
+        images_bg_list,
+        label_dict,
+        dataset_info['background_folder'],
+        args.corpora_tibetan_numbers_path,
+        args.corpora_tibetan_text_path,
+        args.corpora_chinese_numbers_path,
+        dataset_info['folder'],
+        args.debug,
+        args.font_path_tibetan,
+        args.font_path_chinese,
+        args.single_label,
+        args.image_width,
+        args.image_height,
+        args.augmentation,
+        args.annotations_file_path
+    )
+
+
+def _generate_images_in_parallel(generation_args_tuple: Tuple, no_samples: int) -> List:
+    if no_samples <= 0:
+        return []
+    
+    list_of_generation_args = [generation_args_tuple] * no_samples
+    # Ensure os.cpu_count() returns a valid number or default to 1
+    num_cpus = os.cpu_count()
+    # Reduce parallel processes to avoid resource conflicts
+    max_parallel_calls = min((num_cpus // 2) if num_cpus and num_cpus > 2 else 1, no_samples, 4)
+    
+    if max_parallel_calls == 0:
+        max_parallel_calls = 1  # Ensure at least one process
+    
+    print(f"Generating {no_samples} images using {max_parallel_calls} parallel processes...")
+    
+    results = []
+    pool = None
+    
+    try:
+        # Use spawn method to avoid potential issues with fork on some systems
+        ctx = multiprocessing.get_context('spawn')
+        pool = ctx.Pool(processes=max_parallel_calls)
+        
+        # Add timeout and progress tracking
+        import time
+        start_time = time.time()
+        timeout_seconds = 300  # 5 minutes timeout
+        
+        # Use starmap_async for better control
+        async_result = pool.starmap_async(generate_synthetic_image, list_of_generation_args)
+        
+        # Wait with timeout and progress updates
+        while not async_result.ready():
+            elapsed = time.time() - start_time
+            if elapsed > timeout_seconds:
+                print(f"Timeout after {timeout_seconds} seconds. Terminating processes...")
+                pool.terminate()
+                pool.join()
+                raise TimeoutError(f"Image generation timed out after {timeout_seconds} seconds")
+            
+            # Show progress every 10 seconds
+            if int(elapsed) % 10 == 0 and elapsed > 0:
+                print(f"Still generating... ({elapsed:.0f}s elapsed)")
+            
+            time.sleep(1)
+        
+        results = async_result.get()
+        elapsed = time.time() - start_time
+        print(f"Successfully generated {len(results)} images in {elapsed:.1f} seconds")
+        
+    except Exception as e:
+        print(f"Error during parallel image generation: {e}")
+        if pool:
+            try:
+                pool.terminate()  # Forcefully terminate worker processes
+                pool.join(timeout=10)  # Wait max 10 seconds for cleanup
+            except Exception as cleanup_error:
+                print(f"Error during pool cleanup: {cleanup_error}")
+        
+        # Fallback to sequential processing
+        print("Falling back to sequential processing...")
+        results = _generate_images_sequentially(generation_args_tuple, no_samples)
+        
+    finally:
+        if pool:
+            try:
+                pool.close()
+                pool.join()
+            except Exception:
+                pass  # Ignore cleanup errors
+    
+    return results
+
+
+def _generate_images_sequentially(generation_args_tuple: Tuple, no_samples: int) -> List:
+    """Fallback sequential image generation when parallel processing fails."""
+    print(f"Generating {no_samples} images sequentially...")
+    results = []
+    start_time = time.time()
+    
+    for i in range(no_samples):
+        try:
+            if i % 10 == 0 and i > 0:
+                elapsed = time.time() - start_time
+                rate = i / elapsed if elapsed > 0 else 0
+                eta = (no_samples - i) / rate if rate > 0 else 0
+                print(f"Generated {i}/{no_samples} images... ({rate:.1f} img/s, ETA: {eta:.0f}s)")
+            
+            img_start = time.time()
+            result = generate_synthetic_image(*generation_args_tuple)
+            img_time = time.time() - img_start
+            
+            if result[0] and result[1]:  # Check if generation was successful
+                results.append(result)
+            else:
+                print(f"Warning: Image {i+1} generation failed (took {img_time:.2f}s)")
+                
+        except Exception as e:
+            print(f"Error generating image {i+1}: {e}")
+            if generation_args_tuple[7]:  # debug flag
+                traceback.print_exc()
+            continue
+    
+    elapsed = time.time() - start_time
+    success_rate = len(results) / no_samples * 100 if no_samples > 0 else 0
+    print(f"Sequential generation completed: {len(results)}/{no_samples} images ({success_rate:.1f}% success) in {elapsed:.1f}s")
+    return results
+
+
+def _create_dataset_dict(output_folder_str: str, label_dict: Dict[str, int]) -> OrderedDict:
+    # Create a mapping from class IDs to label names
+    # If the label_dict contains entries like {'tibetan_no': 0, 'text_body': 1, 'chinese_no': 2},
+    # then class_names will be {0: 'tibetan_no', 1: 'text_body', 2: 'chinese_no'}
+    class_names = {}
+    
+    # First, create a reverse mapping from class IDs to label names
+    for label_name, class_id in label_dict.items():
+        class_names[class_id] = label_name
+    
+    # Ensure we have entries for class IDs 0, 1, and 2 if they're not in the dictionary
+    if 0 not in class_names:
+        class_names[0] = 'tibetan_no'
+    if 1 not in class_names:
+        class_names[1] = 'text_body'
+    if 2 not in class_names:
+        class_names[2] = 'chinese_no'
+    
+    dataset_name_part = Path(output_folder_str).parent.name
+    split_name = Path(output_folder_str).name
+
+    return OrderedDict([
+        ('path', f"../{dataset_name_part}"),
+        (split_name, f'{split_name}/images'),
+        ('nc', len(class_names)),
+        ('names', class_names)
+    ])
diff --git a/tibetanDataGenerator/dataset_generator_tib_no.py b/tibetanDataGenerator/dataset_generator_tib_no.py
new file mode 100644
index 0000000..4e927cd
--- /dev/null
+++ b/tibetanDataGenerator/dataset_generator_tib_no.py
@@ -0,0 +1,517 @@
+import argparse
+import multiprocessing
+import random
+import re
+import os
+from typing import Tuple, Dict, List, Optional  # Added Optional
+
+import yaml
+from pathlib import Path
+from collections import OrderedDict
+from ultralytics.data.utils import DATASETS_DIR
+from tibetanDataGenerator.utils.data_loader import TextFactory
+from tibetanDataGenerator.data.text_renderer_img_size import ImageBuilder
+from tibetanDataGenerator.data.augmentation import RotateAugmentation, NoiseAugmentation, \
+     AugmentationStrategy
+from tibetanDataGenerator.utils.bounding_box import BoundingBoxCalculator
+from tibetanDataGenerator.utils.identifier import hash_current_time
+
+# Define a dictionary of augmentation strategies
+augmentation_strategies: Dict[str, AugmentationStrategy] = {
+    'rotate': RotateAugmentation(),
+    'noise': NoiseAugmentation()
+}
+
+def _parse_yolo_annotations(file_path: str) -> List[Tuple[int, float, float, float, float]]:
+    """
+    Parses a YOLO annotation file.
+    Each line is expected to be: class_id center_x center_y width height
+    Returns a list of tuples (class_id, x_center, y_center, width, height).
+    """
+    annotations = []
+    if not file_path:  # If file_path is None or empty string
+        return annotations
+
+    if not os.path.exists(file_path):
+        print(f"Warning: Annotation file '{file_path}' not found. No annotations will be loaded from this file.")
+        return annotations
+
+    try:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            for i, line in enumerate(f):
+                line_strip = line.strip()
+                if not line_strip:  # Skip empty lines
+                    continue
+                parts = line_strip.split()
+                if len(parts) == 5:
+                    try:
+                        class_id = int(parts[0])
+                        x_center = float(parts[1])
+                        y_center = float(parts[2])
+                        width = float(parts[3])
+                        height = float(parts[4])
+
+                        # Basic validation for YOLO coordinates (normalized)
+                        if not (0.0 <= x_center <= 1.0 and \
+                                0.0 <= y_center <= 1.0 and \
+                                0.0 <= width <= 1.0 and \
+                                0.0 <= height <= 1.0):
+                            # This warning can be made conditional on debug flag if too verbose
+                            # print(f"Debug: Annotation values out of [0,1] range in {file_path}, line {i+1}: {line_strip}")
+                            pass
+
+                            # Ensure width and height are positive for valid bounding box
+                        if width <= 0 or height <= 0:
+                            print(
+                                f"Warning: Non-positive width/height in annotation file {file_path}, line {i + 1}: {line_strip}. Skipping this annotation.")
+                            continue
+
+                        annotations.append((class_id, x_center, y_center, width, height))
+                    except ValueError:
+                        print(
+                            f"Warning: Malformed line (numeric conversion) in annotation file {file_path}, line {i + 1}: {line_strip}")
+                else:  # Incorrect number of parts
+                    print(
+                        f"Warning: Incorrect number of parts in line in annotation file {file_path}, line {i + 1}: {line_strip}")
+    except Exception as e:
+        print(f"Error reading or parsing annotation file {file_path}: {e}")
+    return annotations
+
+
+def generate_dataset(args: argparse.Namespace, validation: bool = False) -> Dict:
+    """
+    Generate a dataset for training or validation.
+
+    Args:
+        args (argparse.Namespace): Command-line arguments.
+        validation (bool): Whether to generate validation dataset. Defaults to False.
+
+    Returns:
+        Dict: A dictionary containing dataset information.
+    """
+    dataset_info = _setup_dataset_info(args, validation)
+    label_dict = _create_label_dict(args)
+    background_images = _load_background_images(dataset_info['background_folder'])
+
+    # _prepare_generation_args now gets annotations_file_path from args
+    generation_args_tuple = _prepare_generation_args(args, dataset_info, label_dict, background_images)
+
+    results = _generate_images_in_parallel(generation_args_tuple, dataset_info['no_samples'])
+
+    return _create_dataset_dict(str(dataset_info['folder']), label_dict)
+
+
+def generate_synthetic_image(
+        images: List[str],
+        label_dict: Dict[str, int],
+        folder_with_background: str,
+        corpora_tibetan_numbers_path: str,
+        corpora_tibetan_text_path: str,
+        corpora_chinese_numbers_path: str,
+        folder_for_train_data: str,
+        debug: bool = True,
+        font_path: str = 'res/Microsoft Himalaya.ttf',
+        single_label: bool = False,
+        image_width: int = 1024,
+        image_height: int = 361,
+        augmentation: str = "noise",
+        annotations_file_path: Optional[str] = None  # <<< NEW ARGUMENT
+) -> Tuple[str, str]:
+    # Constants
+    FONT_SIZE = 24
+    BORDER_OFFSET_RATIO = 0.05
+
+    ctr = hash_current_time()
+
+    border_offset_x = int(BORDER_OFFSET_RATIO * image_width)
+    border_offset_y = int(BORDER_OFFSET_RATIO * image_height)
+
+    image_path_bg = _select_random_background(folder_with_background, images)
+    builder = _setup_image_builder(image_path_bg, image_width, image_height, font_path, FONT_SIZE)
+
+    bbox_str_list = []  # Collect bounding box strings for all text instances
+
+    # ---- Start: Draw bounding boxes from YOLO annotation file ----
+    if annotations_file_path:
+        parsed_annotations = _parse_yolo_annotations(annotations_file_path)
+        for ann_class_id, norm_cx, norm_cy, norm_w, norm_h in parsed_annotations:
+            # Convert YOLO normalized coordinates to pixel coordinates for drawing
+            x_center_pixel = norm_cx * image_width
+            y_center_pixel = norm_cy * image_height
+            pixel_w = norm_w * image_width
+            pixel_h = norm_h * image_height
+
+            # Calculate top-left corner for add_bounding_box
+            tl_x = x_center_pixel - (pixel_w / 2)
+            tl_y = y_center_pixel - (pixel_h / 2)
+
+            draw_tl_pos = (int(round(tl_x)), int(round(tl_y)))
+            draw_box_size = (int(round(pixel_w)), int(round(pixel_h)))
+
+            # Draw only if width and height are positive
+            if draw_box_size[0] > 0 and draw_box_size[1] > 0:
+                # Select the text corpus based on ann_class_id
+                if ann_class_id == 0:
+                    text, file_name_from_corpus = _generate_text(corpora_tibetan_numbers_path)
+                elif ann_class_id == 1:
+                    text, file_name_from_corpus = _generate_text(corpora_tibetan_text_path)
+                elif ann_class_id == 2:
+                    text, file_name_from_corpus = _generate_text(corpora_chinese_numbers_path)
+                else:
+                    if debug:
+                        print(f"Debug: Unknown ann_class_id {ann_class_id}. Skipping this annotation box.")
+                    continue
+
+                # Ensure the text fits within the bounding box
+                text_render_top_left_pos = (draw_tl_pos[0], draw_tl_pos[1])
+                yolo_box_center_pos = (int(round(x_center_pixel)), int(round(y_center_pixel)))
+                actual_text_box_size = (draw_box_size[0], draw_box_size[1])
+
+                builder.add_text(text, text_render_top_left_pos, actual_text_box_size)
+                # Get the base filename without extension
+                label_key = os.path.splitext(file_name_from_corpus)[0]
+
+                # For Tibetan numbers (class 0), ensure we use the tib_no_ prefix
+                if ann_class_id == 0:
+                    if not label_key.startswith('tib_no_'):
+                        # Extract the number from the filename if it exists
+                        try:
+                            num_part = re.search(r'\d+', label_key).group()
+                            label_key = f'tib_no_{num_part.zfill(4)}'  # Format as tib_no_0001
+                        except AttributeError:
+                            label_key = 'tib_no_0001'  # Default fallback
+
+                # Get label ID from dictionary or use class ID as fallback
+                label_id = label_dict.get(label_key, ann_class_id)
+                if label_key not in label_dict and debug:
+                    print(f"Debug: Label '{label_key}' not found in label_dict. Using class_id {ann_class_id}")
+
+                bbox_str = _create_bbox_string(
+                    label_id,
+                    yolo_box_center_pos,
+                    actual_text_box_size,
+                    image_width,
+                    image_height
+                )
+                bbox_str_list.append(bbox_str)
+
+                if debug:
+                    builder.add_bounding_box(text_render_top_left_pos, actual_text_box_size, color=(0, 255, 0))  # Green
+                    builder.add_bounding_box(draw_tl_pos, draw_box_size, color=(255, 0, 0))  # Red
+
+            else:
+                if debug:
+                    print(
+                        f"Debug: Skipping drawing annotation box from file (class {ann_class_id}) due to non-positive dimensions: size {draw_box_size}")
+
+    if augmentation.lower() != 'none' and augmentation.lower() in augmentation_strategies:
+        _apply_augmentation(builder, augmentation)
+    elif augmentation.lower() != 'none':
+        print(f"Warning: Augmentation strategy '{augmentation}' not found. Skipping augmentation.")
+
+    image_filename_saved = f"{ctr}.png"
+    image_full_path = os.path.join(folder_for_train_data, 'images', image_filename_saved)
+    os.makedirs(os.path.dirname(image_full_path), exist_ok=True)
+    builder.save(image_full_path)
+
+    labels_dir = os.path.join(folder_for_train_data, 'labels')
+    os.makedirs(labels_dir, exist_ok=True)
+
+    label_filename_saved = f"{ctr}.txt"
+    label_full_path = os.path.join(labels_dir, label_filename_saved)
+    with open(label_full_path, 'w', encoding='utf-8') as f:
+        f.writelines(bbox_str_list)  # Write all bounding box strings into the file
+
+    if debug:
+        print(f"Generated sample: {image_full_path}")
+        print(f"Label file: {label_full_path}")
+        print(f"Bounding boxes (YOLO format for synthetic text):\n{''.join(bbox_str_list).strip()}")
+
+    return image_full_path, label_full_path
+
+
+def _select_random_background(folder: str, images: List[str]) -> str:
+    if not images:
+        raise ValueError(f"No images found in background folder: {folder}. Cannot select a random background.")
+    return os.path.join(folder, random.choice(images))
+
+
+def _setup_image_builder(image_path_bg: str, image_width: int, image_height: int, font_path: str,
+                         font_size: int) -> ImageBuilder:
+    builder = ImageBuilder(image_size=(image_width, image_height))
+    try:
+        if image_path_bg and os.path.exists(image_path_bg):
+            builder.set_background(image_path_bg)
+        else:
+            if image_path_bg:
+                print(f"Warning: Background image {image_path_bg} not found. Using default white background.")
+    except FileNotFoundError:
+        print(
+            f"Warning: Background image {image_path_bg} not found during set_background. Using default white background.")
+    except Exception as e:
+        print(f"Error setting background {image_path_bg}: {e}. Using default white background.")
+
+    builder.set_font(font_path, font_size=font_size)
+    return builder
+
+
+def _generate_text(folder_with_corpora: str) -> Tuple[str, str]:
+    text_generator = TextFactory.create_text_source("corpus", folder_with_corpora)
+    return text_generator.generate_text()
+
+
+def _calculate_text_layout(
+        text: str,
+        image_width: int,
+        image_height: int,
+        border_offset_x: int,
+        border_offset_y: int,
+        font_path: str,
+        font_size: int
+) -> Tuple[Tuple[int, int], Tuple[int, int], Tuple[int, int]]:
+    min_text_box_width = font_size * 2
+    min_text_box_height = int(font_size * 1.2)
+
+    max_width_for_text_area = image_width - 2 * border_offset_x
+    max_height_for_text_area = image_height - 2 * border_offset_y
+
+    if max_width_for_text_area < min_text_box_width or max_height_for_text_area < min_text_box_height:
+        # Fallback: try to use at least minimal dimensions if text is very short.
+        # This might not be ideal if text is truly too large for the area.
+        # A more robust solution might involve text wrapping or scaling, but that's complex.
+        print(
+            f"Warning: Text area ({max_width_for_text_area}x{max_height_for_text_area}) might be too small for text. Attempting to fit.")
+        max_width_for_text_area = max(max_width_for_text_area, min_text_box_width)
+        max_height_for_text_area = max(max_height_for_text_area, min_text_box_height)
+
+    conceptual_box_w = random.randint(min_text_box_width, max_width_for_text_area)
+    conceptual_box_h = random.randint(min_text_box_height, max_height_for_text_area)
+    max_placement_box = (conceptual_box_w, conceptual_box_h)
+
+    actual_text_box_size = BoundingBoxCalculator.fit(text, max_placement_box, font_size=font_size, font_path=font_path)
+    actual_w, actual_h = actual_text_box_size
+
+    if actual_w <= 0 or actual_h <= 0:
+        print(
+            f"Warning: BoundingBoxCalculator.fit returned non-positive dimensions ({actual_w}x{actual_h}) for text: '{text[:50]}...'. Defaulting to minimal.")
+        actual_w = max(actual_w, font_size // 2 if text else 1)
+        actual_h = max(actual_h, font_size // 2 if text else 1)
+        actual_text_box_size = (actual_w, actual_h)
+
+    pos_x_upper_bound = image_width - border_offset_x - actual_w
+    pos_y_upper_bound = image_height - border_offset_y - actual_h
+
+    # Ensure random range is valid: lower_bound <= upper_bound
+    # If upper bound is less than lower, it means the box is too large.
+    # We should place it at the border_offset in such cases.
+    tl_pos_x = random.randint(border_offset_x, max(border_offset_x,
+                                                   pos_x_upper_bound)) if pos_x_upper_bound >= border_offset_x else border_offset_x
+    tl_pos_y = random.randint(border_offset_y, max(border_offset_y,
+                                                   pos_y_upper_bound)) if pos_y_upper_bound >= border_offset_y else border_offset_y
+
+    text_render_top_left_pos = (tl_pos_x, tl_pos_y)
+
+    center_x = tl_pos_x + actual_w // 2
+    center_y = tl_pos_y + actual_h // 2
+    yolo_box_center_pos = (center_x, center_y)
+
+    return text_render_top_left_pos, yolo_box_center_pos, actual_text_box_size
+
+
+def _apply_augmentation(builder: ImageBuilder, augmentation_name: str):
+    augmentation_strategy = augmentation_strategies[augmentation_name.lower()]
+    builder.apply_augmentation(augmentation_strategy)
+
+
+def _save_image_and_label(
+        builder: ImageBuilder,
+        text_content: str,
+        ctr: str,
+        folder_for_train_data: str,
+        label_dict: Dict[str, int],
+        single_label: bool,
+        file_name_from_corpus: str,
+        yolo_box_center_pos: Tuple[int, int],
+        actual_text_box_size: Tuple[int, int],
+        image_width: int,
+        image_height: int,
+        debug: bool
+) -> Tuple[str, str]:
+    label_str = next(iter(label_dict.keys())) if single_label else os.path.splitext(file_name_from_corpus)[0]
+    if label_str not in label_dict:
+        print(
+            f"Warning: Label '{label_str}' from corpus file '{file_name_from_corpus}' not found in label_dict. Defaulting to first available label.")
+        if not label_dict:
+            raise ValueError("Label dictionary is empty. Cannot determine a label.")
+        label_str = next(iter(label_dict.keys()))
+    label_id = label_dict[label_str]
+
+    image_base_filename = f"{label_str}_{ctr}.png"
+    image_full_path = os.path.join(folder_for_train_data, 'images', image_base_filename)
+    builder.save(image_full_path)
+
+    bbox_str = _create_bbox_string(
+        label_id, yolo_box_center_pos, actual_text_box_size, image_width, image_height
+    )
+
+    labels_dir = os.path.join(folder_for_train_data, 'labels')
+    os.makedirs(labels_dir, exist_ok=True)
+
+    label_base_filename = f"{label_str}_{ctr}.txt"
+    label_full_path = os.path.join(labels_dir, label_base_filename)
+    with open(label_full_path, 'w', encoding='utf-8') as f:
+        f.write(bbox_str)
+
+    if debug:
+        print(f"Generated sample: {image_full_path}")
+        print(f"Label file: {label_full_path}")
+        print(f"Bounding box (YOLO format for synthetic text):\n{bbox_str.strip()}")
+
+    return image_full_path, label_full_path
+
+
+def _create_bbox_string(
+        label_id: int,
+        box_center_xy: Tuple[int, int],
+        box_wh: Tuple[int, int],
+        image_width: int = 1024,
+        image_height: int = 361
+) -> str:
+    center_x, center_y = box_center_xy
+    box_w, box_h = box_wh
+
+    if image_width == 0: raise ValueError("image_width cannot be zero.")
+    if image_height == 0: raise ValueError("image_height cannot be zero.")
+
+    norm_center_x = max(0.0, min(1.0, center_x / image_width))
+    norm_center_y = max(0.0, min(1.0, center_y / image_height))
+    norm_w = max(0.0, min(1.0, box_w / image_width))
+    norm_h = max(0.0, min(1.0, box_h / image_height))
+
+    return f"{label_id} {norm_center_x:.6f} {norm_center_y:.6f} {norm_w:.6f} {norm_h:.6f}\n"
+
+
+def _fill_label_dict(folder_path: str) -> Dict[str, int]:
+    label_dict = OrderedDict()
+    label_id_counter = 0
+
+    if not os.path.isdir(folder_path):
+        print(f"Warning: Corpora folder '{folder_path}' not found. Returning empty label dict.")
+        return label_dict
+
+    # Get all .txt files and sort them numerically by their suffix
+    files = [f for f in os.listdir(folder_path) if f.endswith(".txt") and f.startswith("tib_no_")]
+
+    try:
+        # Sort files by their numeric suffix (tib_no_0001.txt -> 1)
+        sorted_files = sorted(
+            files,
+            key=lambda x: int(x.split("_")[-1].split(".")[0])
+        )
+    except (ValueError, IndexError):
+        print("Warning: Could not sort corpus files numerically. Using simple alphabetical sort.")
+        sorted_files = sorted(files)
+
+    for filename in sorted_files:
+        label_name = os.path.splitext(filename)[0]  # Gets 'tib_no_0001' from 'tib_no_0001.txt'
+        if label_name not in label_dict:
+            label_dict[label_name] = label_id_counter
+            label_id_counter += 1
+
+    if not label_dict:
+        print(f"Warning: No valid .txt files found in corpora folder '{folder_path}'. Label dictionary is empty.")
+    return label_dict
+
+
+def _setup_dataset_info(args: argparse.Namespace, validation: bool) -> Dict:
+    base_output_folder = Path(args.dataset_name)
+
+    if validation:
+        folder_path = base_output_folder / 'val'
+        num_samples = args.val_samples
+        bg_folder = args.background_val
+    else:
+        folder_path = base_output_folder / 'train'
+        num_samples = args.train_samples
+        bg_folder = args.background_train
+
+    os.makedirs(folder_path / 'images', exist_ok=True)
+    os.makedirs(folder_path / 'labels', exist_ok=True)
+
+    return {
+        'background_folder': bg_folder,
+        'folder': folder_path,
+        'no_samples': num_samples
+    }
+
+
+def _create_label_dict(args: argparse.Namespace) -> Dict[str, int]:
+    if args.single_label:
+        return {'tibetan': 0}
+    else:
+        return _fill_label_dict(args.corpora_tibetan_numbers_path)
+
+
+def _load_background_images(folder: str) -> List[str]:
+    if not os.path.isdir(folder):
+        print(f"Warning: Background folder '{folder}' not found. No background images will be loaded.")
+        return []
+    return [file for file in os.listdir(folder) if file.lower().endswith(('.jpg', '.jpeg', '.png'))]
+
+
+def _prepare_generation_args(args: argparse.Namespace, dataset_info: Dict, label_dict: Dict,
+                             images_bg_list: List[str]) -> Tuple:
+    """Prepare arguments for each call to generate_synthetic_image."""
+    return (
+        images_bg_list,
+        label_dict,
+        dataset_info['background_folder'],
+        args.corpora_tibetan_numbers_path,
+        args.corpora_tibetan_text_path,
+        args.corpora_chinese_numbers_path,
+        dataset_info['folder'],
+        args.debug,
+        args.font_path,
+        args.single_label,
+        args.image_width,
+        args.image_height,
+        args.augmentation,
+        args.annotations_file_path  # <<< NEW ARGUMENT ADDED HERE
+    )
+
+
+def _generate_images_in_parallel(generation_args_tuple: Tuple, no_samples: int) -> List:
+    if no_samples <= 0:
+        return []
+    list_of_generation_args = [generation_args_tuple] * no_samples
+    # Ensure os.cpu_count() returns a valid number or default to 1
+    num_cpus = os.cpu_count()
+    max_parallel_calls = min(num_cpus if num_cpus else 1, no_samples)
+
+    results = []
+    # Use try-finally for pool shutdown if issues arise, but starmap should handle clean exit.
+    # Consider reducing max_parallel_calls if memory is an issue for large images/many processes.
+    if max_parallel_calls == 0: max_parallel_calls = 1  # Ensure at least one process
+
+    with multiprocessing.Pool(processes=max_parallel_calls) as pool:
+        try:
+            results = pool.starmap(generate_synthetic_image, list_of_generation_args)
+        except Exception as e:
+            print(f"Error during parallel image generation: {e}")
+            pool.terminate()  # Forcefully terminate worker processes
+            pool.join()  # Wait for worker processes to exit
+            raise  # Re-raise the exception to make the error visible
+    return results
+
+
+def _create_dataset_dict(output_folder_str: str, label_dict: Dict[str, int]) -> OrderedDict:
+    class_names = {int(v): str(k) for k, v in label_dict.items()}
+    dataset_name_part = Path(output_folder_str).parent.name
+    split_name = Path(output_folder_str).name
+
+    return OrderedDict([
+        ('path', f"../{dataset_name_part}"),
+        (split_name, f'{split_name}/images'),
+        ('nc', len(class_names)),
+        ('names', class_names)
+    ])
diff --git a/tibetanDataGenerator/generated_sample.png b/tibetanDataGenerator/generated_sample.png
new file mode 100644
index 0000000..60e3dde
Binary files /dev/null and b/tibetanDataGenerator/generated_sample.png differ
diff --git a/tibetanDataGenerator/main.py b/tibetanDataGenerator/main.py
new file mode 100644
index 0000000..4ff8945
--- /dev/null
+++ b/tibetanDataGenerator/main.py
@@ -0,0 +1,92 @@
+import argparse
+from pathlib import Path
+import yaml
+from collections import OrderedDict
+from ultralytics.data.utils import DATASETS_DIR
+from tibetanDataGenerator.dataset_generator_tib_no import generate_dataset
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Generate YOLO dataset for Tibetan text detection")
+
+    parser.add_argument('--background_train', type=str, default='./data/background_images_train/',
+                        help='Folder with background images for training')
+    parser.add_argument('--background_val', type=str, default='./data/background_images_val/',
+                        help='Folder with background images for validation')
+    parser.add_argument('--output_dir', type=str, default=str(Path(DATASETS_DIR)),
+                        help='Base directory to save the generated dataset. (Default: Ultralytics DATASETS_DIR)')
+    parser.add_argument('--dataset_name', type=str, default='yolo_tibetan_dataset',
+                        help='Name for the generated dataset folder.')
+    parser.add_argument('--corpora_tibetan_numbers_path', type=str, default='./data/corpora/Tibetan Number Words/',
+                        help='Folder with Tibetan number words (maps to class_id 0: "tibetan_number_word").')
+    parser.add_argument('--corpora_tibetan_text_path', type=str, default='./data/corpora/UVA Tibetan Spoken Corpus/',
+                        help='Folder with general Tibetan text (maps to class_id 1: "tibetan_text").')
+    parser.add_argument('--corpora_chinese_numbers_path', type=str, default='./data/corpora/Chinese Number Words/',
+                        help='Folder with Chinese number words (maps to class_id 2: "chinese_number_word").')
+    parser.add_argument('--train_samples', type=int, default=100,
+                        help='Number of training samples to generate')
+    parser.add_argument('--val_samples', type=int, default=20,
+                        help='Number of validation samples to generate')
+    parser.add_argument('--font_path_tibetan', type=str, required=True, default='ext/Microsoft Himalaya.ttf',
+                        help='Path to a font file that supports Tibetan characters')
+    parser.add_argument('--font_path_chinese', type=str, required=True, default='ext/simkai.ttf',
+                        help='Path to a font file that supports Chinese characters')
+    parser.add_argument('--single_label', action='store_true',
+                        help='Use a single label "tibetan" for all files instead of using filenames as labels')
+    parser.add_argument('--debug', action='store_true',
+                        help='More verbose output with debug information about the image generation process.')
+    parser.add_argument('--image_width', type=int, default=1024,
+                        help='Width (pixels) of each generated image.')
+    parser.add_argument('--image_height', type=int, default=361,
+                        help='Height (pixels) of each generated image.')
+    parser.add_argument("--augmentation", choices=['rotate', 'noise', 'none'], default='noise',
+                        help="Type of augmentation to apply")
+    parser.add_argument('--annotations_file_path', type=str,
+                        default='./data/tibetan numbers/annotations/tibetan_chinese_no',
+                        help='Path to a YOLO annotation file to load and draw bounding boxes from.')
+
+
+    args = parser.parse_args()
+
+    full_dataset_path = Path(args.output_dir) / args.dataset_name
+    original_dataset_name = args.dataset_name
+    args.dataset_name = str(full_dataset_path)
+
+    print(f"Generating YOLO dataset in {args.dataset_name}...")
+
+    # Generate training dataset
+    # args object (containing args.annotations_file_path) is passed to generate_dataset
+    train_dataset_info = generate_dataset(args, validation=False)
+
+    # Generate validation dataset
+    val_dataset_info = generate_dataset(args, validation=True)
+
+    yaml_content = OrderedDict()
+    yaml_content['path'] = original_dataset_name
+    yaml_content['train'] = 'train/images'
+    yaml_content['val'] = 'val/images'
+    yaml_content['test'] = ''
+
+    if 'nc' not in train_dataset_info or 'names' not in train_dataset_info:
+        raise ValueError("generate_dataset did not return 'nc' or 'names' in its info dictionary.")
+    yaml_content['nc'] = train_dataset_info['nc']
+    yaml_content['names'] = train_dataset_info['names']
+
+    def represent_ordereddict(dumper, data):
+        return dumper.represent_mapping('tag:yaml.org,2002:map', data.items())
+
+    yaml.add_representer(OrderedDict, represent_ordereddict)
+
+    yaml_file_path = Path(args.output_dir) / f"{original_dataset_name}.yaml"
+
+    with open(yaml_file_path, 'w', encoding='utf-8') as f:  # Added encoding='utf-8'
+        yaml.dump(dict(yaml_content), f, sort_keys=False, allow_unicode=True)
+
+    print(f"\nDataset generation completed. YAML configuration saved to: {yaml_file_path}")
+    print("Training can be started with a command like:\n")
+    print(
+        f"yolo detect train data={yaml_file_path} epochs=100 imgsz=[{args.image_height},{args.image_width}] model=yolov8n.pt")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tibetan_utils/arg_utils.py b/tibetan_utils/arg_utils.py
index 14d8116..dbb5baa 100644
--- a/tibetan_utils/arg_utils.py
+++ b/tibetan_utils/arg_utils.py
@@ -1,251 +1,207 @@
 """
-Command-line argument utilities for the TibetanOCR project.
+Argument parsing utilities for the TibetanOCR project.
+Multi-class support with Tibetan numbers, Tibetan text, and Chinese numbers.
 """
 
 import argparse
+from pathlib import Path
+try:
+    from ultralytics.data.utils import DATASETS_DIR
+except ImportError:
+    DATASETS_DIR = "./datasets"  # Fallback if ultralytics not installed
+
 from .config import (
-    DEFAULT_MODEL_PATH, DEFAULT_IMAGE_SIZE, DEFAULT_CONFIDENCE,
-    DEFAULT_OUTPUT_DIR, DEFAULT_DATASET_DIR, DEFAULT_OCR_LANG,
-    DEFAULT_TRAIN_SAMPLES, DEFAULT_VAL_SAMPLES, DEFAULT_AUGMENTATION,
-    DEFAULT_FONT_PATH, DEFAULT_SBB_OUTPUT, DEFAULT_OCR_OUTPUT
+    DEFAULT_BACKGROUND_TRAIN_PATH,
+    DEFAULT_BACKGROUND_VAL_PATH,
+    DEFAULT_CORPORA_PATH,
+    DEFAULT_FONT_PATH,
+    DEFAULT_IMAGE_SIZE,
+    DEFAULT_BATCH_SIZE,
+    DEFAULT_EPOCHS,
+    DEFAULT_WORKERS,
+    DEFAULT_TRAIN_SAMPLES,
+    DEFAULT_VAL_SAMPLES,
+    DEFAULT_AUGMENTATION,
+    DEFAULT_ANNOTATION_FILE_PATH
 )
 
 
 def add_model_arguments(parser):
-    """
-    Add model-related arguments to an ArgumentParser.
-    
-    Args:
-        parser: ArgumentParser instance
-        
-    Returns:
-        ArgumentParser: Updated parser
-    """
-    group = parser.add_argument_group('Model Options')
-    group.add_argument('--model', type=str, default=DEFAULT_MODEL_PATH,
-                      help='Path to the model (e.g., yolov8n.pt, best.pt)')
-    group.add_argument('--imgsz', type=int, default=DEFAULT_IMAGE_SIZE,
-                      help='Image size for inference/training')
-    group.add_argument('--conf', type=float, default=DEFAULT_CONFIDENCE,
-                      help='Confidence threshold for detections')
-    group.add_argument('--device', type=str, default='',
-                      help='Device for inference/training (e.g., cpu, 0, 0,1,2,3)')
-    return parser
+    """Add model-related arguments."""
+    parser.add_argument('--model', type=str, default='yolov8n.pt',
+                       help='Path to the model file')
+    parser.add_argument('--imgsz', type=int, default=DEFAULT_IMAGE_SIZE,
+                       help='Image size for inference')
+    parser.add_argument('--conf', type=float, default=0.25,
+                       help='Confidence threshold for detections')
 
 
 def add_output_arguments(parser):
-    """
-    Add output-related arguments to an ArgumentParser.
-    
-    Args:
-        parser: ArgumentParser instance
-        
-    Returns:
-        ArgumentParser: Updated parser
-    """
-    group = parser.add_argument_group('Output Options')
-    group.add_argument('--project', '--output', type=str, default=DEFAULT_OUTPUT_DIR,
-                      help='Directory for output')
-    group.add_argument('--name', type=str, default='exp',
-                      help='Experiment name')
-    group.add_argument('--save', action='store_true', default=True,
-                      help='Save results')
-    group.add_argument('--save-txt', action='store_true',
-                      help='Save results as .txt files')
-    group.add_argument('--save-conf', action='store_true',
-                      help='Save confidence values in .txt files')
-    return parser
+    """Add output-related arguments."""
+    parser.add_argument('--output', type=str, default='output',
+                       help='Output directory')
+    parser.add_argument('--save-crops', action='store_true',
+                       help='Save cropped text regions')
+    parser.add_argument('--debug', action='store_true',
+                       help='Enable debug mode with verbose output')
 
 
 def add_dataset_generation_arguments(parser):
-    """
-    Add dataset generation arguments to an ArgumentParser.
+    """Add dataset generation arguments for multi-class support."""
+    parser.add_argument('--background_train', type=str, default=DEFAULT_BACKGROUND_TRAIN_PATH,
+                       help='Folder with background images for training')
+    parser.add_argument('--background_val', type=str, default=DEFAULT_BACKGROUND_VAL_PATH,
+                       help='Folder with background images for validation')
+    parser.add_argument('--output_dir', type=str, default=str(Path(DATASETS_DIR)),
+                       help='Base directory to save the generated dataset. (Default: Ultralytics DATASETS_DIR)')
+    parser.add_argument('--dataset_name', type=str, default='yolo_tibetan_dataset',
+                       help='Name for the generated dataset folder.')
     
-    Args:
-        parser: ArgumentParser instance
-        
-    Returns:
-        ArgumentParser: Updated parser
-    """
-    group = parser.add_argument_group('Dataset Generation Options')
-    group.add_argument('--background_train', type=str, default='./data/background_images_train/',
-                      help='Folder with background images for training')
-    group.add_argument('--background_val', type=str, default='./data/background_images_val/',
-                      help='Folder with background images for validation')
-    group.add_argument('--dataset_name', type=str, default=DEFAULT_DATASET_DIR,
-                      help='Folder for the generated YOLO dataset')
-    group.add_argument('--corpora_folder', type=str, default='./data/corpora/Tibetan Number Words/',
-                      help='Folder with Tibetan corpora')
-    group.add_argument('--train_samples', type=int, default=DEFAULT_TRAIN_SAMPLES,
-                      help='Number of training samples to generate')
-    group.add_argument('--val_samples', type=int, default=DEFAULT_VAL_SAMPLES,
-                      help='Number of validation samples to generate')
-    group.add_argument('--no_cols', type=int, default=1,
-                      help='Number of text columns to generate [1-5]')
-    group.add_argument('--font_path', type=str, default=DEFAULT_FONT_PATH,
-                      help='Path to a Tibetan font file')
-    group.add_argument('--single_label', action='store_true',
-                      help='Use a single label "tibetan" for all files')
-    group.add_argument('--debug', action='store_true',
-                      help='Enable debug mode for verbose output')
-    group.add_argument('--image_size', type=int, default=DEFAULT_IMAGE_SIZE,
-                      help='Size of generated images in pixels')
-    group.add_argument('--augmentation', choices=['rotate', 'noise'], default=DEFAULT_AUGMENTATION,
-                      help='Type of augmentation to apply')
-    return parser
+    # Multi-class corpora paths
+    parser.add_argument('--corpora_tibetan_numbers_path', type=str, 
+                       default='./data/corpora/Tibetan Number Words/',
+                       help='Folder with Tibetan number words (maps to class_id 0: "tibetan_number_word").')
+    parser.add_argument('--corpora_tibetan_text_path', type=str, 
+                       default='./data/corpora/UVA Tibetan Spoken Corpus/',
+                       help='Folder with general Tibetan text (maps to class_id 1: "tibetan_text").')
+    parser.add_argument('--corpora_chinese_numbers_path', type=str, 
+                       default='./data/corpora/Chinese Number Words/',
+                       help='Folder with Chinese number words (maps to class_id 2: "chinese_number_word").')
+    
+    # Sample counts
+    parser.add_argument('--train_samples', type=int, default=DEFAULT_TRAIN_SAMPLES,
+                       help='Number of training samples to generate')
+    parser.add_argument('--val_samples', type=int, default=DEFAULT_VAL_SAMPLES,
+                       help='Number of validation samples to generate')
+    
+    # Multi-font support
+    parser.add_argument('--font_path_tibetan', type=str, required=True, 
+                       default='ext/Microsoft Himalaya.ttf',
+                       help='Path to a font file that supports Tibetan characters')
+    parser.add_argument('--font_path_chinese', type=str, required=True, 
+                       default='ext/simkai.ttf',
+                       help='Path to a font file that supports Chinese characters')
+    
+    # Image dimensions
+    parser.add_argument('--image_width', type=int, default=1024,
+                       help='Width (pixels) of each generated image.')
+    parser.add_argument('--image_height', type=int, default=361,
+                       help='Height (pixels) of each generated image.')
+    
+    # Labels and augmentation
+    parser.add_argument('--single_label', action='store_true',
+                       help='Use a single label "tibetan" for all files instead of using filenames as labels')
+    parser.add_argument("--augmentation", choices=['rotate', 'noise', 'none'], default=DEFAULT_AUGMENTATION,
+                       help="Type of augmentation to apply")
+    
+    # YOLO annotations support
+    parser.add_argument('--annotations_file_path', type=str,
+                       default=DEFAULT_ANNOTATION_FILE_PATH,
+                       help='Path to a YOLO annotation file to load and draw bounding boxes from.')
 
 
 def add_training_arguments(parser):
-    """
-    Add training-related arguments to an ArgumentParser.
-    
-    Args:
-        parser: ArgumentParser instance
-        
-    Returns:
-        ArgumentParser: Updated parser
-    """
-    group = parser.add_argument_group('Training Options')
-    group.add_argument('--dataset', type=str, default=DEFAULT_DATASET_DIR,
-                      help='Name of the dataset folder')
-    group.add_argument('--epochs', type=int, default=100,
-                      help='Number of training epochs')
-    group.add_argument('--batch', type=int, default=16,
-                      help='Batch size for training')
-    group.add_argument('--workers', type=int, default=8,
-                      help='Number of workers for data loading')
-    group.add_argument('--patience', type=int, default=50,
-                      help='EarlyStopping patience in epochs')
-    group.add_argument('--export', action='store_true',
-                      help='Export the model after training as TorchScript')
-    return parser
+    """Add training-related arguments."""
+    parser.add_argument('--dataset', type=str, default='yolo_tibetan/',
+                       help='Path to dataset YAML file')
+    parser.add_argument('--epochs', type=int, default=DEFAULT_EPOCHS,
+                       help='Number of training epochs')
+    parser.add_argument('--batch', type=int, default=DEFAULT_BATCH_SIZE,
+                       help='Batch size')
+    parser.add_argument('--workers', type=int, default=DEFAULT_WORKERS,
+                       help='Number of worker threads')
+    parser.add_argument('--device', type=str, default='',
+                       help='Device to use for training')
+    parser.add_argument('--project', type=str, default='runs/detect',
+                       help='Project directory')
+    parser.add_argument('--name', type=str, default='train',
+                       help='Experiment name')
+    parser.add_argument('--export', action='store_true',
+                       help='Export model after training')
+    parser.add_argument('--patience', type=int, default=50,
+                       help='EarlyStopping patience')
 
 
 def add_wandb_arguments(parser):
-    """
-    Add Weights & Biases related arguments to an ArgumentParser.
-    
-    Args:
-        parser: ArgumentParser instance
-        
-    Returns:
-        ArgumentParser: Updated parser
-    """
-    group = parser.add_argument_group('Weights & Biases Options')
-    group.add_argument('--wandb', action='store_true',
-                      help='Enable Weights & Biases logging')
-    group.add_argument('--wandb-project', type=str, default='TibetanOCR',
-                      help='Weights & Biases project name')
-    group.add_argument('--wandb-entity', type=str, default=None,
-                      help='Weights & Biases entity (team or username)')
-    group.add_argument('--wandb-tags', type=str, default=None,
-                      help='Comma-separated tags for the experiment (e.g., "yolov8,tibetan")')
-    group.add_argument('--wandb-name', type=str, default=None,
-                      help='Name of the experiment in wandb (default: same as --name)')
-    return parser
+    """Add Weights & Biases arguments."""
+    parser.add_argument('--wandb', action='store_true',
+                       help='Enable Weights & Biases logging')
+    parser.add_argument('--wandb-project', type=str, default='PechaBridge',
+                       help='W&B project name')
+    parser.add_argument('--wandb-entity', type=str,
+                       help='W&B entity (team or username)')
+    parser.add_argument('--wandb-tags', type=str,
+                       help='Comma-separated tags for the experiment')
+    parser.add_argument('--wandb-name', type=str,
+                       help='Name of the experiment in wandb')
 
 
 def add_sbb_arguments(parser):
-    """
-    Add Staatsbibliothek zu Berlin related arguments to an ArgumentParser.
-    
-    Args:
-        parser: ArgumentParser instance
-        
-    Returns:
-        ArgumentParser: Updated parser
-    """
-    group = parser.add_argument_group('SBB Options')
-    group.add_argument('--ppn', type=str,
-                      help='PPN (Pica Production Number) of the document in the Staatsbibliothek zu Berlin')
-    group.add_argument('--download', action='store_true',
-                      help='Download images instead of processing them directly')
-    group.add_argument('--no-ssl-verify', action='store_true',
-                      help='Disable SSL certificate verification (not recommended for production environments)')
-    group.add_argument('--max-images', type=int, default=0,
-                      help='Maximum number of images for inference (0 = all)')
-    group.add_argument('--output', type=str, default=DEFAULT_SBB_OUTPUT,
-                      help='Directory for saving downloaded images')
-    return parser
+    """Add Staatsbibliothek zu Berlin arguments."""
+    parser.add_argument('--ppn', type=str, required=True,
+                       help='PPN (Pica Production Number) of the document')
+    parser.add_argument('--download', action='store_true',
+                       help='Download images instead of processing them directly')
+    parser.add_argument('--max-images', type=int, default=0,
+                       help='Maximum number of images to process (0 = all)')
+    parser.add_argument('--no-ssl-verify', action='store_true',
+                       help='Disable SSL certificate verification')
 
 
 def add_ocr_arguments(parser):
-    """
-    Add OCR-related arguments to an ArgumentParser.
-    
-    Args:
-        parser: ArgumentParser instance
-        
-    Returns:
-        ArgumentParser: Updated parser
-    """
-    group = parser.add_argument_group('OCR Options')
-    group.add_argument('--lang', type=str, default=DEFAULT_OCR_LANG,
-                      help='Language for Tesseract OCR (e.g., eng, deu, eng+deu, bod for Tibetan)')
-    group.add_argument('--tesseract-config', type=str, default='',
-                      help='Additional Tesseract configuration')
-    group.add_argument('--save-crops', action='store_true',
-                      help='Save cropped text blocks as images')
-    group.add_argument('--output', type=str, default=DEFAULT_OCR_OUTPUT,
-                      help='Directory for saving OCR results')
-    return parser
+    """Add OCR-related arguments."""
+    parser.add_argument('--lang', type=str, default='eng+deu',
+                       help='Language for Tesseract OCR')
+    parser.add_argument('--tesseract-config', type=str, default='',
+                       help='Additional Tesseract configuration')
 
 
 def add_source_argument(parser):
-    """
-    Add source argument to an ArgumentParser.
-    
-    Args:
-        parser: ArgumentParser instance
-        
-    Returns:
-        ArgumentParser: Updated parser
-    """
+    """Add source argument for input files."""
     parser.add_argument('--source', type=str,
-                      help='Path to image or directory for inference')
-    return parser
+                       help='Path to image file or directory')
 
 
 def create_generate_dataset_parser():
-    """Create an ArgumentParser for dataset generation."""
+    """Create parser for multi-class dataset generation."""
     parser = argparse.ArgumentParser(description="Generate YOLO dataset for Tibetan text detection")
-    parser = add_dataset_generation_arguments(parser)
+    add_dataset_generation_arguments(parser)
+    add_output_arguments(parser)
     return parser
 
 
 def create_train_parser():
-    """Create an ArgumentParser for model training."""
-    parser = argparse.ArgumentParser(description="Train a YOLO model with Tibetan OCR data")
-    parser = add_model_arguments(parser)
-    parser = add_training_arguments(parser)
-    parser = add_output_arguments(parser)
-    parser = add_wandb_arguments(parser)
+    """Create parser for model training."""
+    parser = argparse.ArgumentParser(description="Train YOLO model for Tibetan text detection")
+    add_training_arguments(parser)
+    add_wandb_arguments(parser)
     return parser
 
 
 def create_inference_parser():
-    """Create an ArgumentParser for inference."""
-    parser = argparse.ArgumentParser(description="Run inference with a trained YOLO model")
-    parser = add_model_arguments(parser)
-    parser = add_output_arguments(parser)
-    parser = add_source_argument(parser)
+    """Create parser for inference."""
+    parser = argparse.ArgumentParser(description="Run inference on images")
+    add_model_arguments(parser)
+    add_source_argument(parser)
+    add_output_arguments(parser)
     return parser
 
 
 def create_sbb_inference_parser():
-    """Create an ArgumentParser for SBB inference."""
-    parser = argparse.ArgumentParser(description="Run inference on Staatsbibliothek zu Berlin data")
-    parser = add_model_arguments(parser)
-    parser = add_output_arguments(parser)
-    parser = add_sbb_arguments(parser)
+    """Create parser for SBB inference."""
+    parser = argparse.ArgumentParser(description="Run inference on SBB data")
+    add_model_arguments(parser)
+    add_sbb_arguments(parser)
+    add_output_arguments(parser)
     return parser
 
 
 def create_ocr_parser():
-    """Create an ArgumentParser for OCR on detected text blocks."""
+    """Create parser for OCR on detections."""
     parser = argparse.ArgumentParser(description="Apply OCR to detected text blocks")
-    parser = add_model_arguments(parser)
-    parser = add_source_argument(parser)
-    parser = add_sbb_arguments(parser)
-    parser = add_ocr_arguments(parser)
+    add_model_arguments(parser)
+    add_source_argument(parser)
+    add_sbb_arguments(parser)
+    add_ocr_arguments(parser)
+    add_output_arguments(parser)
     return parser
diff --git a/tibetan_utils/config.py b/tibetan_utils/config.py
index ad08967..2015237 100644
--- a/tibetan_utils/config.py
+++ b/tibetan_utils/config.py
@@ -11,6 +11,15 @@
 DEFAULT_OUTPUT_DIR = 'runs/detect'
 DEFAULT_DATASET_DIR = 'yolo_tibetan/'
 
+# Additional default constants for multi-class support
+DEFAULT_ANNOTATION_FILE_PATH = './data/tibetan numbers/annotations/tibetan_chinese_no/bg_PPN337138764X_00000005.txt'
+DEFAULT_BACKGROUND_TRAIN_PATH = './data/tibetan numbers/backgrounds/'
+DEFAULT_BACKGROUND_VAL_PATH = './data/tibetan numbers/backgrounds/'
+DEFAULT_CORPORA_PATH = './data/corpora/'
+DEFAULT_BATCH_SIZE = 16
+DEFAULT_EPOCHS = 100
+DEFAULT_WORKERS = 8
+
 # Default model settings
 DEFAULT_MODEL_PATH = 'yolov8n.pt'
 DEFAULT_IMAGE_SIZE = 1024
diff --git a/tibetan_utils/image_utils.py b/tibetan_utils/image_utils.py
index ef670c9..713303f 100644
--- a/tibetan_utils/image_utils.py
+++ b/tibetan_utils/image_utils.py
@@ -4,7 +4,7 @@
 
 import cv2
 import numpy as np
-from PIL import Image
+from PIL import Image, ImageDraw, ImageFont
 import io
 from typing import Tuple, List, Union, Dict, Any
 
@@ -217,3 +217,202 @@ def denormalize_box(box: List[float], image_size: Tuple[int, int]) -> Tuple[int,
     y_max = int((y + h/2) * height)
     
     return (x_min, y_min, x_max, y_max)
+
+
+class BoundingBoxCalculator:
+    """
+    Utility class for calculating bounding boxes and font sizes for text rendering.
+    """
+    
+    @staticmethod
+    def fit(text: str, box_size: Tuple[int, int], font_size: int = 24, font_path: str = 'ext/Microsoft Himalaya.ttf', debug: bool = False) -> Tuple[int, int]:
+        """
+        Calculate the true bounding box size for the specified text when it is wrapped and terminated to fit a given box size.
+        Enhanced with timeout protection and iteration limits.
+
+        Args:
+            text: Text to be measured
+            box_size: Tuple (width, height) specifying the size of the box to fit the text
+            font_size: Size of the font
+            font_path: Path to the font file
+            debug: Enable debug output
+
+        Returns:
+            Tuple (width, height) representing the actual bounding box size of the wrapped and terminated text
+        """
+        import time
+        start_time = time.time()
+        timeout_seconds = 5  # 5 second timeout for fit operation
+        max_lines = 100      # Maximum lines to process
+        max_chars_per_line = 1000  # Maximum characters per line to prevent infinite loops
+        
+        # Validate inputs
+        if not text or not text.strip():
+            return (0, 0)
+            
+        if box_size[0] <= 0 or box_size[1] <= 0:
+            if debug:
+                print(f"Warning: Invalid box size {box_size}")
+            return (0, 0)
+        
+        # Create a dummy image to get a drawing context
+        dummy_image = Image.new('RGB', (1, 1))
+        draw = ImageDraw.Draw(dummy_image)
+
+        # Define the font
+        try:
+            font = ImageFont.truetype(font_path, font_size)
+        except IOError:
+            font = ImageFont.load_default()
+            if debug:
+                print("Warning: Default font used, may not accurately measure text.")
+
+        box_w, box_h = box_size
+        actual_text_width, actual_text_height = 0, 0
+        y_offset = 0
+        lines_processed = 0
+
+        # Process each line with safety limits
+        for line in text.split('\n'):
+            if lines_processed >= max_lines:
+                if debug:
+                    print(f"Warning: Reached maximum line limit ({max_lines})")
+                break
+                
+            # Check timeout
+            if time.time() - start_time > timeout_seconds:
+                if debug:
+                    print(f"Warning: fit() timed out after {timeout_seconds}s")
+                break
+            
+            char_iterations = 0
+            while line and char_iterations < max_chars_per_line:
+                char_iterations += 1
+                
+                # Find the breakpoint for wrapping with safety limit
+                i = 0
+                try:
+                    for i in range(min(len(line), max_chars_per_line)):
+                        if draw.textlength(line[:i + 1], font=font) > box_w:
+                            break
+                    else:
+                        i = len(line)
+                except Exception as e:
+                    if debug:
+                        print(f"Error in textlength calculation: {e}")
+                    i = min(10, len(line))  # Fallback to small chunk
+
+                # Ensure we make progress
+                if i == 0:
+                    i = 1  # Take at least one character to avoid infinite loop
+
+                # Add the line to wrapped text
+                wrapped_line = line[:i]
+
+                try:
+                    left, top, right, bottom = font.getbbox(wrapped_line)
+                    line_width, line_height = right - left, bottom - top
+                except Exception as e:
+                    if debug:
+                        print(f"Error in getbbox calculation: {e}")
+                    # Fallback estimation
+                    line_width = len(wrapped_line) * font_size // 2
+                    line_height = font_size
+
+                actual_text_width = max(actual_text_width, line_width)
+                y_offset += line_height
+
+                # Check if the next line exceeds the box height
+                if y_offset > box_h:
+                    y_offset -= line_height  # Remove the last line's height if it exceeds
+                    break
+
+                line = line[i:]
+
+            lines_processed += 1
+            if y_offset > box_h:
+                break
+
+        elapsed = time.time() - start_time
+        if debug and elapsed > 1.0:
+            print(f"fit() took {elapsed:.2f}s for text length {len(text)}, font size {font_size}")
+
+        return actual_text_width, y_offset + 10
+
+    @staticmethod
+    def find_max_font(text: str, box_size: Tuple[int, int], font_path: str, max_size: int = 100, debug: bool = False) -> int:
+        """
+        Find maximum font size where text fits in box using binary search with timeout protection.
+        
+        Args:
+            text: Text to fit
+            box_size: Target box size (width, height)
+            font_path: Path to font file
+            max_size: Maximum font size to try
+            debug: Enable debug output
+            
+        Returns:
+            int: Maximum font size that fits
+        """
+        import time
+        start_time = time.time()
+        timeout_seconds = 10  # 10 second timeout
+        max_iterations = 50   # Maximum iterations to prevent infinite loops
+        
+        # Validate inputs
+        if not text or not text.strip():
+            if debug:
+                print("Warning: Empty text provided to find_max_font, returning minimum font size")
+            return 1
+            
+        if box_size[0] <= 0 or box_size[1] <= 0:
+            if debug:
+                print(f"Warning: Invalid box size {box_size}, returning minimum font size")
+            return 1
+        
+        low, high = 1, min(max_size, 200)  # Cap maximum size to prevent extreme values
+        best = 1
+        iterations = 0
+        
+        if debug:
+            print(f"Starting font size search for text: '{text[:50]}...' in box {box_size}")
+        
+        while low <= high and iterations < max_iterations:
+            # Check timeout
+            if time.time() - start_time > timeout_seconds:
+                if debug:
+                    print(f"find_max_font timed out after {timeout_seconds}s, returning best so far: {best}")
+                break
+                
+            iterations += 1
+            mid = (low + high) // 2
+            
+            try:
+                fit_start = time.time()
+                w, h = BoundingBoxCalculator.fit(text, box_size, mid, font_path)
+                fit_time = time.time() - fit_start
+                
+                if debug and fit_time > 1.0:  # Log slow fit operations
+                    print(f"Slow fit operation: {fit_time:.2f}s for font size {mid}")
+                
+                if w <= box_size[0] and h <= box_size[1]:
+                    best = mid
+                    low = mid + 1
+                    if debug:
+                        print(f"Font size {mid} fits ({w}x{h} <= {box_size})")
+                else:
+                    high = mid - 1
+                    if debug:
+                        print(f"Font size {mid} too large ({w}x{h} > {box_size})")
+                        
+            except Exception as e:
+                if debug:
+                    print(f"Error in fit calculation for font size {mid}: {e}")
+                # If fit fails, assume font is too large
+                high = mid - 1
+        
+        elapsed = time.time() - start_time
+        if debug:
+            print(f"find_max_font completed in {elapsed:.2f}s after {iterations} iterations, best font size: {best}")
+            
+        return best
diff --git a/tibetan_utils/io_utils.py b/tibetan_utils/io_utils.py
index 67b72ac..7935e09 100644
--- a/tibetan_utils/io_utils.py
+++ b/tibetan_utils/io_utils.py
@@ -6,6 +6,8 @@
 import re
 import json
 import yaml
+import hashlib
+import time
 from pathlib import Path
 from typing import Dict, List, Union, Any
 
@@ -155,3 +157,28 @@ def get_output_path(base_dir: str, name: str, filename: str, create_dir: bool =
         ensure_dir(output_dir)
     
     return os.path.join(output_dir, filename)
+
+
+def hash_current_time() -> str:
+    """
+    Generate a hash based on current time for unique identifiers.
+    
+    Returns:
+        str: SHA256 hash of current time in nanoseconds
+    """
+    # Get the current time
+    current_time = time.time_ns()
+
+    # Convert the current time to a string
+    time_str = str(current_time)
+
+    # Create a hash object (using SHA256)
+    hash_object = hashlib.sha256()
+
+    # Update the hash object with the time string
+    hash_object.update(time_str.encode())
+
+    # Get the hexadecimal digest of the hash
+    time_hash = hash_object.hexdigest()
+
+    return time_hash