-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhyperopt_search.py
More file actions
100 lines (80 loc) · 3.39 KB
/
hyperopt_search.py
File metadata and controls
100 lines (80 loc) · 3.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# -*- coding: utf-8 -*-
"""
Created on Tue Mar 12 15:11:04 2024
@author: Mels
"""
from hyperopt import fmin, tpe, hp
from functools import partial
import pickle
import torch
import mlflow
import numpy as np
# Define the objective function to minimize
def objective(params : dict):
# Convert integer choices to integers
params['batch_size'] = int(params['batch_size'])
params['block_size'] = int(params['block_size'])
params['n_embd'] = int(params['n_embd'])
params['n_heads'] = int(params['n_heads'])
params['n_layer'] = int(params['n_layer'])
with mlflow.start_run(): # Log hyperparameters and evaluation results
# Train the model with the current set of hyperparameters
model, losses = train_model(train_data, val_data, dictionary.vocab_size, **params, show_fig=False)
val_loss = losses['val'].item() if isinstance(losses['val'], torch.Tensor) else losses['val']
np.save("temp/train_data_hyperopt.npy", train_data.numpy())
np.save("temp/val_data_hyperopt.npy", val_data.numpy())
for key, value in params.items():
mlflow.log_param(key, int(value))
mlflow.log_params(fixed_params)
mlflow.log_artifact("temp/train_data_hyperopt.npy", artifact_path="data")
mlflow.log_artifact("temp/val_data_hyperopt.npy", artifact_path="data")
mlflow.pytorch.log_model(model, 'Test Model in HyperOpt Run')
mlflow.log_metric("val_loss", val_loss)
mlflow.set_tag("model_name","HyperOpt Run")
return val_loss
#%%
# Constants for hyperparameters you want to keep constant
fixed_params = {
'eval_interval': 500,
'max_iters': 500,
'eval_iters': 200,
'patience': 5
}
# Define the search space (excluding constants)
space = {
'batch_size': hp.quniform('batch_size', 10 ,20, q=1),
'block_size': hp.quniform('block_size', 16, 64, q=1),
'learning_rate': hp.loguniform('learning_rate', -5, -1),
'n_embd': hp.quniform('n_embd', 64, 256, q=1),
'n_heads': hp.quniform('n_heads', 2, 16, q=1),
'n_layer': hp.quniform('n_layer', 2, 8, q=1),
'dropout': hp.uniform('dropout', 0.0, 0.5)
}
# Merge the search space with fixed parameters
space.update(fixed_params)
if __name__=="__main__":
from Train import train_model
from Dictionary import load_dict
dictionary = load_dict()
# Load the list back from the Pickle file
with open('Dataset/Dataset.pickle', 'rb') as f:
Dataset = pickle.load(f)
data = torch.tensor(dictionary.encode(Dataset), dtype=torch.long)
print('vocab_size equals',dictionary.vocab_size)
print("The data is encoded in", data.shape, ",",data.dtype)
N1 = int(0.25*len(data)) # first 90% will be train, rest val
N2 = int(0.3*len(data)) # first 90% will be train, rest val
train_data = data[:N1]
val_data = data[N1:N2]
# initialise MLFlow
mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("HyperOpt Run")
# Create a partial function for the objective with fixed parameters
partial_objective = partial(objective)
# Run hyperparameter search
best_params = fmin(fn=partial_objective, space=space, algo=tpe.suggest, max_evals=4)
print("Best hyperparameters:", best_params)
#%%
# Save the best hyperparameters to a pickle file
with open('Results/best_hyperparameters.pickle', 'wb') as f:
pickle.dump(best_params, f)