Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,18 @@ on:
branches:
- main
paths:
- "src/modules/model.py" # Trigger only if this file is modified
- "src/modules/model_sentiment_analysis.py" # Trigger only if this file is modified
- "src/modules/data_processor.py" # Trigger only if this file is modified
- "src/modules/transformer_components.py"
- "src/translation_french_english.py"
pull_request:
branches:
- main
paths:
- "src/modules/model.py" # Trigger only if this file is modified
- "src/modules/model_sentiment_analysis.py" # Trigger only if this file is modified
- "src/modules/data_processor.py" # Trigger only if this file is modified
- "src/modules/transformer_components.py"
- "src/translation_french_english.py"

jobs:
test:
Expand Down
154 changes: 154 additions & 0 deletions src/modules/optuna_transformer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
import optuna
import tensorflow as tf
from modules.data_processor import DatasetProcessor, TextPreprocessor
from modules.transformer_components import (
PositionalEmbedding,
TransformerEncoder,
TransformerDecoder,
evaluate_bleu,
)
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import logging
import json

# Configure logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
)


def build_transformer_model(trial, preprocessor):
"""
Build a Transformer model with hyperparameters suggested by Optuna.

Args:
trial (optuna.trial.Trial): The trial object for hyperparameter optimization.
preprocessor (TextPreprocessor): Preprocessor object containing sequence length and vocabulary size.

Returns:
tf.keras.Model: The compiled Transformer model.
"""
# Hyperparameters to optimize
embed_dim = trial.suggest_categorical("embed_dim", [64, 128])
dense_dim = trial.suggest_int("dense_dim", 512, 2048, step=512)
num_heads = trial.suggest_categorical("num_heads", [2, 4, 8])
dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.5, step=0.1)

sequence_length = preprocessor.sequence_length
vocab_size = preprocessor.vocab_size

# Build the Transformer model
encoder_inputs = tf.keras.Input(shape=(None,), dtype="int32", name="english")
encoder_embeddings = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(
encoder_inputs
)
encoder_outputs = TransformerEncoder(embed_dim, dense_dim, num_heads)(
encoder_embeddings
)

decoder_inputs = tf.keras.Input(shape=(None,), dtype="int32", name="french")
decoder_embeddings = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(
decoder_inputs
)
decoder_outputs = TransformerDecoder(embed_dim, dense_dim, num_heads)(
decoder_embeddings, encoder_outputs
)
dropout_outputs = tf.keras.layers.Dropout(dropout_rate)(decoder_outputs)
final_outputs = tf.keras.layers.Dense(vocab_size, activation="softmax")(
dropout_outputs
)

transformer = tf.keras.Model([encoder_inputs, decoder_inputs], final_outputs)

# Compile the model
transformer.compile(
optimizer=tf.keras.optimizers.Adam(),
loss=tf.keras.losses.SparseCategoricalCrossentropy(),
metrics=["accuracy"],
)

return transformer


def objective(trial):
"""
Objective function for Optuna to optimize the Transformer model using BLEU score.

Args:
trial (optuna.trial.Trial): The trial object for hyperparameter optimization.

Returns:
float: BLEU score of the model on the validation dataset.
"""
# Load and preprocess the dataset
processor = DatasetProcessor(file_path="src/data/en-fr.parquet")
processor.load_data()
processor.process_data()
data_splits = processor.shuffle_and_split()
train_df, val_df = data_splits["train"], data_splits["validation"]

preprocessor = TextPreprocessor()
preprocessor.adapt(train_df)

train_ds = preprocessor.make_dataset(train_df)
val_ds = preprocessor.make_dataset(val_df)

# Build the model
model = build_transformer_model(trial, preprocessor)

# Define callbacks
callbacks = [
EarlyStopping(
monitor="val_loss",
patience=2,
mode="min",
verbose=1,
restore_best_weights=True,
),
ReduceLROnPlateau(
monitor="val_loss",
factor=0.5,
patience=3,
mode="min",
verbose=1,
),
]

# Train the model
device = "/GPU:0" if tf.config.list_physical_devices("GPU") else "/CPU:0"
with tf.device(device):
model.fit(
train_ds,
validation_data=val_ds,
epochs=3, # Use fewer epochs for faster optimization
verbose=1,
callbacks=callbacks,
)

# Calculate BLEU score on the validation dataset
bleu_score = evaluate_bleu(model, val_ds, preprocessor)
return bleu_score


def main():
"""
Main function to run the Optuna optimization.
"""
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=5)

logging.info("Best trial:")
logging.info(f"Value (BLEU Score): {study.best_trial.value}")
logging.info("Params:")
for key, value in study.best_trial.params.items():
logging.info(f" {key}: {value}")

# Save the best hyperparameters
best_params = study.best_trial.params
with open("src/models/optuna_transformer_best_params.json", "w") as f:
json.dump(best_params, f, indent=4)


if __name__ == "__main__":
main()
35 changes: 24 additions & 11 deletions src/translation_french_english.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def transformer_model(
provided training and validation datasets.

Args:
transformer_model_path (str): Path to the saved Transformer model.
preprocessor (TextPreprocessor): Preprocessor object containing sequence length
and vocabulary size information.
train_ds (tf.data.Dataset): Training dataset.
Expand All @@ -46,32 +47,44 @@ def transformer_model(
# Load the saved model
logging.info("Loading the saved Transformer model.")
return tf.keras.models.load_model(
"src/models/transformer_best_model.keras",
transformer_model_path,
custom_objects={
"PositionalEmbedding": PositionalEmbedding,
"TransformerEncoder": TransformerEncoder,
"TransformerDecoder": TransformerDecoder,
},
)

# Define model parameters
embed_dim = 128
dense_dim = 2048
num_heads = 8
embed_dim = 64
dense_dim = 1536
num_heads = 2
Comment on lines 58 to +61
Copy link

Copilot AI May 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] Consider adding a comment or updating the docstring to explain the rationale behind the updated hyperparameter values (e.g., embed_dim, dense_dim, and num_heads) for improved clarity and maintainability.

Copilot uses AI. Check for mistakes.
dropout_rate = 0.4
sequence_length = preprocessor.sequence_length
vocab_size = preprocessor.vocab_size

# Build the Transformer model
encoder_inputs = tf.keras.Input(shape=(None,), dtype="int32", name="english")
x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(encoder_inputs)
encoder_outputs = TransformerEncoder(embed_dim, dense_dim, num_heads)(x)
encoder_embeddings = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(
encoder_inputs
)
encoder_outputs = TransformerEncoder(embed_dim, dense_dim, num_heads)(
encoder_embeddings
)

decoder_inputs = tf.keras.Input(shape=(None,), dtype="int32", name="french")
x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(decoder_inputs)
x = TransformerDecoder(embed_dim, dense_dim, num_heads)(x, encoder_outputs)
x = tf.keras.layers.Dropout(0.5)(x)
decoder_outputs = tf.keras.layers.Dense(vocab_size, activation="softmax")(x)
decoder_embeddings = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(
decoder_inputs
)
decoder_outputs = TransformerDecoder(embed_dim, dense_dim, num_heads)(
decoder_embeddings, encoder_outputs
)
dropout_outputs = tf.keras.layers.Dropout(dropout_rate)(decoder_outputs)
final_outputs = tf.keras.layers.Dense(vocab_size, activation="softmax")(
dropout_outputs
)

transformer = tf.keras.Model([encoder_inputs, decoder_inputs], decoder_outputs)
transformer = tf.keras.Model([encoder_inputs, decoder_inputs], final_outputs)

# Compile the model
transformer.compile(
Expand Down
144 changes: 144 additions & 0 deletions tests/test_transformer_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import pytest
import tensorflow as tf
from modules.data_processor import DatasetProcessor, TextPreprocessor
from modules.transformer_components import (
PositionalEmbedding,
TransformerEncoder,
TransformerDecoder,
evaluate_bleu,
)
from translation_french_english import transformer_model
from modules.utils import ModelPaths
import os


@pytest.fixture
def setup_data():
"""
Fixture to set up a mocked dataset and preprocessor for testing.
"""
import pandas as pd

# Create a small mock dataset
mock_data = {
"en": ["hello", "how are you", "good morning", "thank you", "goodbye"],
"fr": ["bonjour", "comment ça va", "bon matin", "merci", "au revoir"],
}
mock_df = pd.DataFrame(mock_data)

# Split the mock dataset
train_df = mock_df.sample(frac=0.6, random_state=42)
val_df = mock_df.drop(train_df.index).sample(frac=0.5, random_state=42)
test_df = mock_df.drop(train_df.index).drop(val_df.index)

# Initialize the preprocessor
preprocessor = TextPreprocessor()
preprocessor.adapt(train_df)

# Create TensorFlow datasets
train_ds = preprocessor.make_dataset(train_df)
val_ds = preprocessor.make_dataset(val_df)
test_ds = preprocessor.make_dataset(test_df)

return preprocessor, train_ds, val_ds, test_ds


def test_transformer_model_build(setup_data):
"""
Test if the Transformer model is built correctly.
"""
preprocessor, train_ds, val_ds, _ = setup_data
transformer_model_path = "src/models/test_transformer_model.keras"

# Build the model
model = transformer_model(transformer_model_path, preprocessor, train_ds, val_ds)

# Check if the model is compiled
assert model.optimizer is not None, "Model is not compiled."
assert model.loss is not None, "Loss function is not defined."
assert model.metrics is not None, "Metrics are not defined."


def test_transformer_model_training(setup_data):
"""
Test if the Transformer model can be trained without errors.
"""
preprocessor, train_ds, val_ds, _ = setup_data
transformer_model_path = "src/models/test_transformer_model.keras"

# Build the model
model = transformer_model(transformer_model_path, preprocessor, train_ds, val_ds)

# Train the model for 1 epoch
history = model.fit(
train_ds,
validation_data=val_ds,
epochs=1,
verbose=0,
)

# Check if training history is returned
assert "loss" in history.history, "Training loss is not recorded."
assert "val_loss" in history.history, "Validation loss is not recorded."


def test_transformer_model_evaluation(setup_data):
"""
Test if the Transformer model can be evaluated without errors.
"""
preprocessor, train_ds, val_ds, test_ds = setup_data
transformer_model_path = "src/models/test_transformer_model.keras"

# Build the model
model = transformer_model(transformer_model_path, preprocessor, train_ds, val_ds)

# Evaluate the model
results = model.evaluate(test_ds, verbose=0)

# Check if evaluation results are returned
assert len(results) == 2, "Evaluation did not return loss and accuracy."
assert results[0] >= 0, "Test loss is invalid."
assert 0 <= results[1] <= 1, "Test accuracy is invalid."


def test_transformer_model_bleu_score(setup_data):
"""
Test if the BLEU score can be calculated for the Transformer model.
"""
preprocessor, train_ds, val_ds, test_ds = setup_data
transformer_model_path = "src/models/test_transformer_model.keras"

# Build the model
model = transformer_model(transformer_model_path, preprocessor, train_ds, val_ds)

# Calculate BLEU score
bleu_score = evaluate_bleu(model, test_ds, preprocessor)

# Check if BLEU score is valid
assert 0 <= bleu_score <= 1, "BLEU score is invalid."


def test_transformer_model_loading(setup_data):
"""
Test if the Transformer model can be loaded from a saved file.
"""
preprocessor, train_ds, val_ds, _ = setup_data
transformer_model_path = "src/models/test_transformer_model.keras"

# Build and save the model
model = transformer_model(transformer_model_path, preprocessor, train_ds, val_ds)
model.save(transformer_model_path)

# Load the model
loaded_model = tf.keras.models.load_model(
transformer_model_path,
custom_objects={
"PositionalEmbedding": PositionalEmbedding,
"TransformerEncoder": TransformerEncoder,
"TransformerDecoder": TransformerDecoder,
},
)

# Check if the loaded model is valid
assert loaded_model is not None, "Failed to load the Transformer model."
assert loaded_model.optimizer is not None, "Loaded model is not compiled."