This repository was archived by the owner on Sep 22, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathoptuna_optimization.py
More file actions
80 lines (60 loc) · 2.94 KB
/
optuna_optimization.py
File metadata and controls
80 lines (60 loc) · 2.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import optuna
import pandas as pd
from base_model import Evaluator
from crf_model import CRFModel
from crf_pos_we_model import CRFWithPoSAndEmbeddingsModel
from data_processing import PreProcessData, train_val_split
class SaveResults(object):
def __init__(self):
self.results_df = pd.DataFrame(columns=["result"])
def __call__(self, optuna_study, optuna_trial):
hyperparam_dict = optuna_trial.params.copy()
hyperparam_dict["result"] = optuna_trial.values[0]
self.results_df.loc[len(self.results_df)] = hyperparam_dict
def objective_function(model_type, optuna_trial, train_data, val_data):
if model_type == "CRF":
model = CRFModel(train_data,
c1=optuna_trial.suggest_float("c1", 0.0, 1.0),
c2=optuna_trial.suggest_float("c2", 0.0, 1.0),
max_iterations=100)
model.train_model(save_model=False)
if model_type == "CRF_POS_EM":
model = CRFWithPoSAndEmbeddingsModel(train_data,
c1=optuna_trial.suggest_float("c1", 0.0, 1.0),
c2=optuna_trial.suggest_float("c2", 0.0, 1.0),
max_iterations=200)
model.train_model(save_model=False)
evaluator_validation = Evaluator(val_data)
results = evaluator_validation.evaluate_model(model, verbose=True)
return results["f1"]
def optuna_study(model_type, n_trials = 50):
data = PreProcessData('data/negacio_train_v2024.json')
train_idx, val_idx = train_val_split(data.text)
train_data = PreProcessData.from_existing(
text=[data.text[i] for i in train_idx],
results=[data.results[i] for i in train_idx],
neg_words=[data.neg_words[i] for i in train_idx if i < len(data.neg_words)],
unc_words=[data.unc_words[i] for i in train_idx if i < len(data.unc_words)]
)
val_data = PreProcessData.from_existing(
text=[data.text[i] for i in val_idx],
results=[data.results[i] for i in val_idx],
)
optuna_study = optuna.create_study(direction="maximize")
save_results = SaveResults()
optuna_study.optimize(
lambda trial: objective_function(model_type, trial, train_data, val_data),
callbacks=[save_results], n_trials=n_trials)
pruned_trials = [t for t in optuna_study.trials if t.state == optuna.trial.TrialState.PRUNED]
complete_trials = [t for t in optuna_study.trials if t.state == optuna.trial.TrialState.COMPLETE]
print("Study statistics: ")
print(" Number of finished trials: ", len(optuna_study.trials))
print(" Number of pruned trials: ", len(pruned_trials))
print(" Number of complete trials: ", len(complete_trials))
print("Best trial:")
print(" Value Validation: ", optuna_study.best_trial.value)
print(" Params:")
for key, value in optuna_study.best_trial.params.items():
print(f" {key} = {value}")
if __name__ == '__main__':
optuna_study("CRF_POS_EM", n_trials=15)