-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
118 lines (94 loc) · 3.34 KB
/
utils.py
File metadata and controls
118 lines (94 loc) · 3.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import os
import numpy as np
import pandas as pd
import statsmodels.api as sm
# custom metric function
def my_scorer(clf, X, y_true, lamb=None):
print("The scorer is called")
'''
print(clf.lambda_path_.shape)
print(clf.lambda_path_)
prediction_proba = clf.predict_proba(X, lamb=clf.lambda_path_)
fold_loss = -np.mean(y_true[:,None]*np.log(prediction_proba[:,1,:]) + (1-y_true[:,None])*(np.log(1-prediction_proba[:,1,:])), axis=0)
print(fold_loss.shape)
return -fold_loss
'''
if lamb is not None:
lambda_list = np.asarray(lamb)
else:
lambda_list = clf.lambda_path_
predictions = clf.predict(X, lamb=lambda_list)
print(predictions.shape)
odds_ratio = np.zeros(lambda_list.shape)
print(odds_ratio.shape)
for i, _ in enumerate(lambda_list):
hn = he = dn = de = 0
for idx, _ in enumerate(y_true):
if predictions[idx, i] == 0:
if y_true[idx] == 0:
hn += 1
elif y_true[idx] == 1:
dn += 1
else:
print("scorer error")
elif predictions[idx, i] == 1:
if y_true[idx] == 0:
he += 1
elif y_true[idx] == 1:
de += 1
else:
print("scorer error")
else:
print("scorer error")
if de == 0 or dn == 0 or he == 0 or hn == 0:
odds_ratio[i] = 1
else:
odds_ratio[i] = (de / dn) / (he / hn)
print(i, de, dn, he, hn, odds_ratio[i])
return odds_ratio
# custom metric function
def auc_scorer(clf, X, y_true, lamb=None):
print("The scorer is called")
'''
print(clf.lambda_path_.shape)
print(clf.lambda_path_)
prediction_proba = clf.predict_proba(X, lamb=clf.lambda_path_)
fold_loss = -np.mean(y_true[:,None]*np.log(prediction_proba[:,1,:]) + (1-y_true[:,None])*(np.log(1-prediction_proba[:,1,:])), axis=0)
print(fold_loss.shape)
return -fold_loss
'''
if lamb is not None:
lambda_list = np.asarray(lamb)
else:
lambda_list = clf.lambda_path_
predictions = clf.predict(X, lamb=lambda_list)
prediction_proba = clf.predict_proba(X, lamb=lambda_list)
print(predictions.shape)
auc_curve = np.zeros(lambda_list.shape)
print(auc_curve.shape)
for i, _ in enumerate(lambda_list):
auc_curve[i] = roc_auc_score(y_true, prediction_proba[:, 1, i])
print(auc_curve[i])
return auc_curve
def ca_test(X, y):
X_train_fold = X
y_train_fold = y
n_genotype = X.shape[1]
fold_p_value = np.ones(X.shape[1])
for i in range(n_genotype):
if i % 5000 == 0:
print("Processed {0} SNPs".format(i))
tab = pd.crosstab(X_train_fold[:, i], y_train_fold)
table = sm.stats.Table(tab)
fold_p_value[i] = table.test_ordinal_association().pvalue
if fold_p_value[i] < 1.0e-5:
print("{0}:{1}".format(i, fold_p_value[i]))
sorted_snp_idx = np.argsort(fold_p_value)
print("Top 20 snps: ")
for idx, item in enumerate(sorted_snp_idx[0:20]):
print("{0}: {1}".format(item, fold_p_value[item]))
return fold_p_value, sorted_snp_idx
def set_random_seed(random_seed):
if random_seed is None:
random_seed = 4242
np.random.seed(random_seed)