-
Notifications
You must be signed in to change notification settings - Fork 43
Expand file tree
/
Copy pathbase_estimator.py
More file actions
78 lines (57 loc) · 2.49 KB
/
base_estimator.py
File metadata and controls
78 lines (57 loc) · 2.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import numpy as np
# from sklearn.metrics import roc_auc_score, log_loss
from metrics import logloss as log_loss, auc as roc_auc_score
from tqdm import tqdm
import logging
class BaseEstimator:
def __init__(self, data_source):
self._data_source = data_source
def get_metrics(self, scores, labels, prefix):
scores = np.asarray(scores)
labels = np.asarray(labels)
metrics = {'{}_logloss'.format(prefix): log_loss(y_true=labels, y_pred=scores),
'{}_auc'.format(prefix): roc_auc_score(y_true=labels, y_score=scores)}
pred_labels = (scores > 0.5).astype(int)
metrics['{}_accuracy'.format(prefix)] = np.sum(pred_labels == labels) / len(labels)
return metrics
def train_batch(self, features, labels):
"""
:param features: dict, field_name ==> dense matrix or SparseInput
:param labels: [batch_size] ndarray
:return: [batch_size] ndarray of predicted probabilities in that batch
"""
raise NotImplementedError()
def predict(self, features):
"""
:param features: dict, field_name ==> dense matrix or SparseInput
:return: [batch_size] ndarray of predicted probabilities in that batch
"""
raise NotImplementedError()
def _train_epoch(self):
scores = []
labels = []
batch_stream = self._data_source.train_batches_per_epoch()
for batch_features, batch_labels in tqdm(batch_stream):
pred_probas = self.train_batch(batch_features, batch_labels)
scores.extend(pred_probas)
labels.extend(batch_labels)
return self.get_metrics(scores=scores, labels=labels, prefix='train')
def _eval_epoch(self):
scores = []
labels = []
batch_stream = self._data_source.test_batches_per_epoch()
for batch_features, batch_labels in tqdm(batch_stream):
pred_probas = self.predict(batch_features)
scores.extend(pred_probas)
labels.extend(batch_labels)
return self.get_metrics(scores=scores, labels=labels, prefix='test')
def train(self, n_epochs):
metrics_history = []
for epoch_idx in range(n_epochs):
logging.info("\n=============== {}-th EPOCH".format(epoch_idx + 1))
metrics = {}
metrics.update(self._train_epoch())
metrics.update(self._eval_epoch())
logging.info(metrics)
metrics_history.append(metrics)
return metrics_history