-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmodel.py
More file actions
144 lines (116 loc) · 6.35 KB
/
model.py
File metadata and controls
144 lines (116 loc) · 6.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as Data
from torch.utils.data import DataLoader
from sklearn.metrics import *
from tqdm import tqdm
import time
from layers import *
from utils import *
from basemodel import Linear, BaseModel
def compute_input_dim(feature_columns, include_sparse=True, include_dense=True, feature_group=False):
sparse_feature_columns = list(
filter(lambda x: isinstance(x, (SparseFeat, VarLenSparseFeat)), feature_columns)) if len(
feature_columns) else []
dense_feature_columns = list(
filter(lambda x: isinstance(x, DenseFeat), feature_columns)) if len(feature_columns) else []
dense_input_dim = sum(
map(lambda x: x.dimension, dense_feature_columns))
if feature_group:
sparse_input_dim = len(sparse_feature_columns)
else:
sparse_input_dim = sum(feat.embedding_dim for feat in sparse_feature_columns)
input_dim = 0
if include_sparse:
input_dim += sparse_input_dim
if include_dense:
input_dim += dense_input_dim
return input_dim
def combined_dnn_input(sparse_embedding_list, dense_value_list):
if len(sparse_embedding_list) > 0 and len(dense_value_list) > 0:
sparse_dnn_input = torch.flatten(
torch.cat(sparse_embedding_list, dim=-1), start_dim=1)
dense_dnn_input = torch.flatten(
torch.cat(dense_value_list, dim=-1), start_dim=1)
return concat_fun([sparse_dnn_input, dense_dnn_input])
elif len(sparse_embedding_list) > 0:
return torch.flatten(torch.cat(sparse_embedding_list, dim=-1), start_dim=1)
elif len(dense_value_list) > 0:
return torch.flatten(torch.cat(dense_value_list, dim=-1), start_dim=1)
else:
raise NotImplementedError
class PNN(BaseModel):
"""Instantiates the Product-based Neural Network architecture.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net
:param l2_reg_embedding: float . L2 regularizer strength applied to embedding vector
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
:param init_std: float,to use as the initialize std of embedding vector
:param seed: integer ,to use as random seed.
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param dnn_activation: Activation function to use in DNN
:param use_inner: bool,whether use inner-product or not.
:param use_outter: bool,whether use outter-product or not.
:param kernel_type: str,kernel_type used in outter-product,can be ``'mat'`` , ``'vec'`` or ``'num'``
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:param device: str, ``"cpu"`` or ``"cuda:0"``
:param gpus: list of int or torch.device for multiple gpus. If None, run on `device`. `gpus[0]` should be the same gpu with `device`.
:return: A PyTorch model instance.
"""
def __init__(self, dnn_feature_columns, dnn_hidden_units=(128, 128), l2_reg_embedding=1e-5, l2_reg_dnn=0,
init_std=0.0001, seed=1024, dnn_dropout=0, dnn_activation='relu', use_inner=True, use_outter=False,
kernel_type='mat', task='binary', device='cpu', gpus=None):
super(PNN, self).__init__([], dnn_feature_columns, l2_reg_linear=0, l2_reg_embedding=l2_reg_embedding,
init_std=init_std, seed=seed, task=task, device=device, gpus=gpus)
if kernel_type not in ['mat', 'vec', 'num']:
raise ValueError("kernel_type must be mat,vec or num")
self.use_inner = use_inner
self.use_outter = use_outter
self.kernel_type = kernel_type
self.task = task
product_out_dim = 0
num_inputs = self.compute_input_dim(dnn_feature_columns, include_dense=False, feature_group=True)
num_pairs = int(num_inputs * (num_inputs - 1) / 2)
if self.use_inner:
product_out_dim += num_pairs
self.innerproduct = InnerProductLayer(device=device)
if self.use_outter:
product_out_dim += num_pairs
self.outterproduct = OutterProductLayer(
num_inputs, self.embedding_size, kernel_type=kernel_type, device=device)
self.dnn = DNN(product_out_dim + self.compute_input_dim(dnn_feature_columns), dnn_hidden_units,
activation=dnn_activation, l2_reg=l2_reg_dnn, dropout_rate=dnn_dropout, use_bn=False,
init_std=init_std, device=device)
self.dnn_linear = nn.Linear(
dnn_hidden_units[-1], 1, bias=False).to(device)
self.add_regularization_weight(
filter(lambda x: 'weight' in x[0] and 'bn' not in x[0], self.dnn.named_parameters()), l2=l2_reg_dnn)
self.add_regularization_weight(self.dnn_linear.weight, l2=l2_reg_dnn)
self.to(device)
def forward(self, X):
sparse_embedding_list, dense_value_list = self.input_from_feature_columns(X, self.dnn_feature_columns,
self.embedding_dict)
linear_signal = torch.flatten(
concat_fun(sparse_embedding_list), start_dim=1)
if self.use_inner:
inner_product = torch.flatten(
self.innerproduct(sparse_embedding_list), start_dim=1)
if self.use_outter:
outer_product = self.outterproduct(sparse_embedding_list)
if self.use_outter and self.use_inner:
product_layer = torch.cat(
[linear_signal, inner_product, outer_product], dim=1)
elif self.use_outter:
product_layer = torch.cat([linear_signal, outer_product], dim=1)
elif self.use_inner:
product_layer = torch.cat([linear_signal, inner_product], dim=1)
else:
product_layer = linear_signal
dnn_input = combined_dnn_input([product_layer], dense_value_list)
dnn_output = self.dnn(dnn_input)
dnn_logit = self.dnn_linear(dnn_output)
logit = dnn_logit
y_pred = self.out(logit)
return y_pred