forked from ajtulloch/adpredictor
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathadpredictor.py
More file actions
99 lines (79 loc) · 3.65 KB
/
adpredictor.py
File metadata and controls
99 lines (79 loc) · 3.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
from scipy.stats import norm
import logging
import numpy as np
import protobufs.adpredictor_pb2 as pb
import util
from collections import namedtuple
logger = logging.getLogger(__name__)
class AdPredictor(object):
Config = namedtuple(
'Config',
['beta', 'prior_probability', 'epsilon', 'num_features'])
def __init__(self, config):
self._config = config
self._weights = {}
# Initial bias weight
bias_weight = util.prior_bias_weight(
config.prior_probability,
config.beta,
config.num_features)
self._set_weight(util.bias_feature(), bias_weight)
def predict(self, features):
logger.info("Predicting: %s features", len(features))
assert len(features) == self._config.num_features
total_mean, total_variance = self._active_mean_variance(features)
return norm.cdf(total_mean / total_variance)
def train(self, features, label):
logger.info("Training: %s, %s features", label, len(features))
assert len(features) == self._config.num_features
y = util.label_to_float(label)
total_mean, total_variance = self._active_mean_variance(features)
v, w = util.gaussian_corrections(y * total_mean / np.sqrt(total_variance))
for feature in features:
weight = self._get_weight(feature)
mean_delta = y * weight.variance / np.sqrt(total_variance) * v
variance_multiplier = 1.0 - weight.variance / total_variance * w
updated = pb.Gaussian(
mean=weight.mean + mean_delta,
variance=weight.variance * variance_multiplier)
self._set_weight(feature, self._apply_dynamics(updated))
def _active_mean_variance(self, features):
means = (self._get_weight(f).mean for f in features)
variances = (self._get_weight(f).variance for f in features)
return sum(means), sum(variances) + self._config.beta ** 2
def _get_weight(self, feature):
return self._weights.get(
util.serialize_feature(feature),
util.prior_weight())
def _set_weight(self, feature, weight):
logger.debug("Setting feature: %s frow weight: %s to weight: %s",
util.pp(feature),
util.pp(self._get_weight(feature)),
util.pp(weight))
assert not np.isnan(weight.mean)
assert weight.variance >= 0.0
self._weights[util.serialize_feature(feature)] = weight
@property
def weights(self):
return [(util.deserialize_feature(f), w)
for (f, w) in self._weights.iteritems()]
def _apply_dynamics(self, weight):
prior = util.prior_weight()
adjusted_variance = weight.variance * prior.variance / \
((1.0 - self._config.epsilon) * prior.variance +
self._config.epsilon * weight.variance)
adjusted_mean = adjusted_variance * (
(1.0 - self._config.epsilon) * weight.mean / weight.variance +
self._config.epsilon * prior.mean / prior.variance)
adjusted = pb.Gaussian(mean=adjusted_mean, variance=adjusted_variance)
logger.debug("Adjusting weight %s to %s",
util.pp(weight), util.pp(adjusted))
return adjusted
def _importance(self, feature):
prior_prediction = self.predict(
[util.bias_feature()] +
[pb.Feature()] * (self._config.num_features - 1))
with_weight_prediction = self.predict(
[util.bias_feature()] +
[pb.Feature()] * (self._config.num_features - 2) + [feature])
return util.kl_divergence(with_weight_prediction, prior_prediction)