diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 2806bdc..c5f5134 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -5,14 +5,18 @@ on: branches: - main paths: - - "src/modules/model.py" # Trigger only if this file is modified + - "src/modules/model_sentiment_analysis.py" # Trigger only if this file is modified - "src/modules/data_processor.py" # Trigger only if this file is modified + - "src/modules/transformer_components.py" + - "src/translation_french_english.py" pull_request: branches: - main paths: - - "src/modules/model.py" # Trigger only if this file is modified + - "src/modules/model_sentiment_analysis.py" # Trigger only if this file is modified - "src/modules/data_processor.py" # Trigger only if this file is modified + - "src/modules/transformer_components.py" + - "src/translation_french_english.py" jobs: test: diff --git a/src/modules/optuna_transformer.py b/src/modules/optuna_transformer.py new file mode 100644 index 0000000..8a68f61 --- /dev/null +++ b/src/modules/optuna_transformer.py @@ -0,0 +1,154 @@ +import optuna +import tensorflow as tf +from modules.data_processor import DatasetProcessor, TextPreprocessor +from modules.transformer_components import ( + PositionalEmbedding, + TransformerEncoder, + TransformerDecoder, + evaluate_bleu, +) +from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau +import logging +import json + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", +) + + +def build_transformer_model(trial, preprocessor): + """ + Build a Transformer model with hyperparameters suggested by Optuna. + + Args: + trial (optuna.trial.Trial): The trial object for hyperparameter optimization. + preprocessor (TextPreprocessor): Preprocessor object containing sequence length and vocabulary size. + + Returns: + tf.keras.Model: The compiled Transformer model. + """ + # Hyperparameters to optimize + embed_dim = trial.suggest_categorical("embed_dim", [64, 128]) + dense_dim = trial.suggest_int("dense_dim", 512, 2048, step=512) + num_heads = trial.suggest_categorical("num_heads", [2, 4, 8]) + dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.5, step=0.1) + + sequence_length = preprocessor.sequence_length + vocab_size = preprocessor.vocab_size + + # Build the Transformer model + encoder_inputs = tf.keras.Input(shape=(None,), dtype="int32", name="english") + encoder_embeddings = PositionalEmbedding(sequence_length, vocab_size, embed_dim)( + encoder_inputs + ) + encoder_outputs = TransformerEncoder(embed_dim, dense_dim, num_heads)( + encoder_embeddings + ) + + decoder_inputs = tf.keras.Input(shape=(None,), dtype="int32", name="french") + decoder_embeddings = PositionalEmbedding(sequence_length, vocab_size, embed_dim)( + decoder_inputs + ) + decoder_outputs = TransformerDecoder(embed_dim, dense_dim, num_heads)( + decoder_embeddings, encoder_outputs + ) + dropout_outputs = tf.keras.layers.Dropout(dropout_rate)(decoder_outputs) + final_outputs = tf.keras.layers.Dense(vocab_size, activation="softmax")( + dropout_outputs + ) + + transformer = tf.keras.Model([encoder_inputs, decoder_inputs], final_outputs) + + # Compile the model + transformer.compile( + optimizer=tf.keras.optimizers.Adam(), + loss=tf.keras.losses.SparseCategoricalCrossentropy(), + metrics=["accuracy"], + ) + + return transformer + + +def objective(trial): + """ + Objective function for Optuna to optimize the Transformer model using BLEU score. + + Args: + trial (optuna.trial.Trial): The trial object for hyperparameter optimization. + + Returns: + float: BLEU score of the model on the validation dataset. + """ + # Load and preprocess the dataset + processor = DatasetProcessor(file_path="src/data/en-fr.parquet") + processor.load_data() + processor.process_data() + data_splits = processor.shuffle_and_split() + train_df, val_df = data_splits["train"], data_splits["validation"] + + preprocessor = TextPreprocessor() + preprocessor.adapt(train_df) + + train_ds = preprocessor.make_dataset(train_df) + val_ds = preprocessor.make_dataset(val_df) + + # Build the model + model = build_transformer_model(trial, preprocessor) + + # Define callbacks + callbacks = [ + EarlyStopping( + monitor="val_loss", + patience=2, + mode="min", + verbose=1, + restore_best_weights=True, + ), + ReduceLROnPlateau( + monitor="val_loss", + factor=0.5, + patience=3, + mode="min", + verbose=1, + ), + ] + + # Train the model + device = "/GPU:0" if tf.config.list_physical_devices("GPU") else "/CPU:0" + with tf.device(device): + model.fit( + train_ds, + validation_data=val_ds, + epochs=3, # Use fewer epochs for faster optimization + verbose=1, + callbacks=callbacks, + ) + + # Calculate BLEU score on the validation dataset + bleu_score = evaluate_bleu(model, val_ds, preprocessor) + return bleu_score + + +def main(): + """ + Main function to run the Optuna optimization. + """ + study = optuna.create_study(direction="maximize") + study.optimize(objective, n_trials=5) + + logging.info("Best trial:") + logging.info(f"Value (BLEU Score): {study.best_trial.value}") + logging.info("Params:") + for key, value in study.best_trial.params.items(): + logging.info(f" {key}: {value}") + + # Save the best hyperparameters + best_params = study.best_trial.params + with open("src/models/optuna_transformer_best_params.json", "w") as f: + json.dump(best_params, f, indent=4) + + +if __name__ == "__main__": + main() diff --git a/src/translation_french_english.py b/src/translation_french_english.py index a19f451..9c65b3b 100644 --- a/src/translation_french_english.py +++ b/src/translation_french_english.py @@ -34,6 +34,7 @@ def transformer_model( provided training and validation datasets. Args: + transformer_model_path (str): Path to the saved Transformer model. preprocessor (TextPreprocessor): Preprocessor object containing sequence length and vocabulary size information. train_ds (tf.data.Dataset): Training dataset. @@ -46,32 +47,44 @@ def transformer_model( # Load the saved model logging.info("Loading the saved Transformer model.") return tf.keras.models.load_model( - "src/models/transformer_best_model.keras", + transformer_model_path, custom_objects={ "PositionalEmbedding": PositionalEmbedding, "TransformerEncoder": TransformerEncoder, "TransformerDecoder": TransformerDecoder, }, ) + # Define model parameters - embed_dim = 128 - dense_dim = 2048 - num_heads = 8 + embed_dim = 64 + dense_dim = 1536 + num_heads = 2 + dropout_rate = 0.4 sequence_length = preprocessor.sequence_length vocab_size = preprocessor.vocab_size # Build the Transformer model encoder_inputs = tf.keras.Input(shape=(None,), dtype="int32", name="english") - x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(encoder_inputs) - encoder_outputs = TransformerEncoder(embed_dim, dense_dim, num_heads)(x) + encoder_embeddings = PositionalEmbedding(sequence_length, vocab_size, embed_dim)( + encoder_inputs + ) + encoder_outputs = TransformerEncoder(embed_dim, dense_dim, num_heads)( + encoder_embeddings + ) decoder_inputs = tf.keras.Input(shape=(None,), dtype="int32", name="french") - x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(decoder_inputs) - x = TransformerDecoder(embed_dim, dense_dim, num_heads)(x, encoder_outputs) - x = tf.keras.layers.Dropout(0.5)(x) - decoder_outputs = tf.keras.layers.Dense(vocab_size, activation="softmax")(x) + decoder_embeddings = PositionalEmbedding(sequence_length, vocab_size, embed_dim)( + decoder_inputs + ) + decoder_outputs = TransformerDecoder(embed_dim, dense_dim, num_heads)( + decoder_embeddings, encoder_outputs + ) + dropout_outputs = tf.keras.layers.Dropout(dropout_rate)(decoder_outputs) + final_outputs = tf.keras.layers.Dense(vocab_size, activation="softmax")( + dropout_outputs + ) - transformer = tf.keras.Model([encoder_inputs, decoder_inputs], decoder_outputs) + transformer = tf.keras.Model([encoder_inputs, decoder_inputs], final_outputs) # Compile the model transformer.compile( diff --git a/tests/test_transformer_model.py b/tests/test_transformer_model.py new file mode 100644 index 0000000..363f42d --- /dev/null +++ b/tests/test_transformer_model.py @@ -0,0 +1,144 @@ +import pytest +import tensorflow as tf +from modules.data_processor import DatasetProcessor, TextPreprocessor +from modules.transformer_components import ( + PositionalEmbedding, + TransformerEncoder, + TransformerDecoder, + evaluate_bleu, +) +from translation_french_english import transformer_model +from modules.utils import ModelPaths +import os + + +@pytest.fixture +def setup_data(): + """ + Fixture to set up a mocked dataset and preprocessor for testing. + """ + import pandas as pd + + # Create a small mock dataset + mock_data = { + "en": ["hello", "how are you", "good morning", "thank you", "goodbye"], + "fr": ["bonjour", "comment ça va", "bon matin", "merci", "au revoir"], + } + mock_df = pd.DataFrame(mock_data) + + # Split the mock dataset + train_df = mock_df.sample(frac=0.6, random_state=42) + val_df = mock_df.drop(train_df.index).sample(frac=0.5, random_state=42) + test_df = mock_df.drop(train_df.index).drop(val_df.index) + + # Initialize the preprocessor + preprocessor = TextPreprocessor() + preprocessor.adapt(train_df) + + # Create TensorFlow datasets + train_ds = preprocessor.make_dataset(train_df) + val_ds = preprocessor.make_dataset(val_df) + test_ds = preprocessor.make_dataset(test_df) + + return preprocessor, train_ds, val_ds, test_ds + + +def test_transformer_model_build(setup_data): + """ + Test if the Transformer model is built correctly. + """ + preprocessor, train_ds, val_ds, _ = setup_data + transformer_model_path = "src/models/test_transformer_model.keras" + + # Build the model + model = transformer_model(transformer_model_path, preprocessor, train_ds, val_ds) + + # Check if the model is compiled + assert model.optimizer is not None, "Model is not compiled." + assert model.loss is not None, "Loss function is not defined." + assert model.metrics is not None, "Metrics are not defined." + + +def test_transformer_model_training(setup_data): + """ + Test if the Transformer model can be trained without errors. + """ + preprocessor, train_ds, val_ds, _ = setup_data + transformer_model_path = "src/models/test_transformer_model.keras" + + # Build the model + model = transformer_model(transformer_model_path, preprocessor, train_ds, val_ds) + + # Train the model for 1 epoch + history = model.fit( + train_ds, + validation_data=val_ds, + epochs=1, + verbose=0, + ) + + # Check if training history is returned + assert "loss" in history.history, "Training loss is not recorded." + assert "val_loss" in history.history, "Validation loss is not recorded." + + +def test_transformer_model_evaluation(setup_data): + """ + Test if the Transformer model can be evaluated without errors. + """ + preprocessor, train_ds, val_ds, test_ds = setup_data + transformer_model_path = "src/models/test_transformer_model.keras" + + # Build the model + model = transformer_model(transformer_model_path, preprocessor, train_ds, val_ds) + + # Evaluate the model + results = model.evaluate(test_ds, verbose=0) + + # Check if evaluation results are returned + assert len(results) == 2, "Evaluation did not return loss and accuracy." + assert results[0] >= 0, "Test loss is invalid." + assert 0 <= results[1] <= 1, "Test accuracy is invalid." + + +def test_transformer_model_bleu_score(setup_data): + """ + Test if the BLEU score can be calculated for the Transformer model. + """ + preprocessor, train_ds, val_ds, test_ds = setup_data + transformer_model_path = "src/models/test_transformer_model.keras" + + # Build the model + model = transformer_model(transformer_model_path, preprocessor, train_ds, val_ds) + + # Calculate BLEU score + bleu_score = evaluate_bleu(model, test_ds, preprocessor) + + # Check if BLEU score is valid + assert 0 <= bleu_score <= 1, "BLEU score is invalid." + + +def test_transformer_model_loading(setup_data): + """ + Test if the Transformer model can be loaded from a saved file. + """ + preprocessor, train_ds, val_ds, _ = setup_data + transformer_model_path = "src/models/test_transformer_model.keras" + + # Build and save the model + model = transformer_model(transformer_model_path, preprocessor, train_ds, val_ds) + model.save(transformer_model_path) + + # Load the model + loaded_model = tf.keras.models.load_model( + transformer_model_path, + custom_objects={ + "PositionalEmbedding": PositionalEmbedding, + "TransformerEncoder": TransformerEncoder, + "TransformerDecoder": TransformerDecoder, + }, + ) + + # Check if the loaded model is valid + assert loaded_model is not None, "Failed to load the Transformer model." + assert loaded_model.optimizer is not None, "Loaded model is not compiled."