Network-Inference-Workspace/WeightedConsensus.py at master · jmlingeman/Network-Inference-Workspace · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# coding: utf-8
import math, itertools, random
from scipy import linspace, polyval, polyfit, sqrt, stats, randn
import scipy
import itertools
from AnalyzeResults import *

class GradientDescent:

    def run(self, jobs, goldnets):
        # The idea is to run a gradient descent to create a new model
        # for each gene across the jobs, solving for weights a_n for the
        # model target_gene = a1A + a2B + a3C + a4AC + a5AB + a6BC +
        # a7ABC ...

        # Are A B C etc weights or expvals? I'll try weights first

        # Just try this for one job first
        for job in jobs:
            gene_list = job.alg.gene_list

            combination_list = []
            for L in range(len(gene_list)+1):
                combination_list.append( itertools.combinations(gene_list), L )

            for target_gene in gene_list:
                # We want to build a model for target_gene from a
                # combination of all of the other genes in the network
                # (most of which will be 0, but include them... those
                # terms will just be 0) using those genes in-weights to
                # target_gene

                # Initialize the weights for this gene between -1 and 1
                a_weights = {}
                for c in combination_list:
                    a_weights[c] = random.random() * 2 - 1


# TODO
class LinearRegression:

    def run(self, jobs, goldnet=None):
        return 0


class SimulatedAnnealing:

  def run(self, jobs, goldnets, starting_temp=10, seed=None, max_iter=3000, alpha=0.9992, restart=True, transition_prob=0.3):

    self.jobs = jobs
    self.goldnets = goldnets
    self.starting_temp = starting_temp

    # Set seed, if defined
    if seed != None:
      random.seed(seed)

    best_e = 9999999
    e = 9999999
    best_weights = []
    best_net = None
    best_iter = 0
    iter = 0
    self.temp = starting_temp

    # Initialize weights
    weights = []
    for job in jobs[0]:
      # TODO Make this rand? Option?
      weights.append(random.random() * 10)

    while iter < max_iter and best_e > 0 and self.temp > 0 :
      new_weights = self.select_neighbor(weights, self.temp)
      new_e, new_net = self.calculate_energy(jobs, new_weights, goldnets)

      if abs(e - new_e) < 0.01:
        reset_count += 1

      print self.P(e, new_e, self.temp)
      if self.P(e, new_e, self.temp) > random.random():
        weights = new_weights
        e = new_e
        net = new_net

      if new_e < best_e:
        best_weights = weights
        best_e = e
        best_iter = iter
        best_net = new_net
        reset_count = 0

      if restart and reset_count > 100:
        weights = []
        for job in jobs[0]:
          weights.append(random.random() * 10)
        reset_count = 0


      iter += 1
      self.update_temp(alpha)

      print "Iter: {6}\tTemp: {7}\tBestE: {0}\t\tBestW: {1}\t\tCurE: {2}\t\tCurW: {3}\t\tNewE: {4}\t\tNewW: {5}".format(best_e, best_weights, e, weights, new_e, new_weights, iter, self.temp)

    self.best_e = best_e
    self.best_iter = best_iter
    self.best_weights = best_weights
    self.best_net = best_net
    return best_weights, best_e

# Try this first by adjusting weights in the matrix, then sum the matrices, then try to adjust matrix output with weight
  def calculate_energy(self, jobs, weights, goldnets):
    for i in range(len(jobs)):
      jobs[i] = sorted(jobs[i], key=lambda job: job.alg.name)

    batch_nets = []
    for batch in jobs:
      nets = []
      for i,job in enumerate(batch):
        net = job.alg.network.copy()
        for gene1 in net.network:
          for gene2 in net.network[gene1]:
            net.network[gene1][gene2] *= weights[i]
        nets.append(net)
      batch_nets.append(nets)

    sum_roc = 0
    for i,batch in enumerate(batch_nets):
      compiled_net = batch[0].copy()
      for net in batch[1:]:
        for gene1 in net.network:
          for gene2 in net.network[gene1]:
            compiled_net.network[gene1][gene2] += net.network[gene1][gene2]

      tpr, fpr, roc_auc = GenerateROC(compiled_net, goldnets[i])
      sum_roc += roc_auc

    return (len(jobs)-sum_roc) * 1000, compiled_net


  def update_temp(self, alpha):
    self.temp *= alpha

  def select_neighbor(self, weights, temp):
    new_weights = weights[:]
    for i in range(len(new_weights)):
      new_weights[i] += (random.random() * 2 - 1) * ((2.0 / self.starting_temp) * self.temp)
      #if new_weights[i] < 0:
        #new_weights[i] = 0
      #if new_weights[i] > 10:
        #new_weights[i] = 10
    return new_weights


  def P(self, e, new_e, temp):
    #In the formulation of the method by Kirkpatrick et al., the acceptance
    #probability function P(e,e',T) was defined as 1 if e' < e, and exp((e − e') / T)
    #otherwise
    if abs(new_e - e) < 0.0001:
      return 0.1
    elif new_e < e:
      return 1
    else:
      return math.exp((e - new_e) / temp)

  def test(self, jobs, goldnet, settings):
    baseweights = []
    jobs = sorted(jobs, key=lambda job: job.alg.name)
    print "New jobs:"
    for job in jobs:
      print job.alg.name
      if "mcz" in job.alg.name.lower():
        basejob = job
        baseweights.append(1)
      else:
        baseweights.append(0)
    print "Old jobs:"
    for batch in self.jobs:
      for job in batch:
        print job.alg.name

    baseline_roc, baseline_network = self.calculate_energy([jobs], baseweights, [goldnet])
    roc, network = self.calculate_energy([jobs], self.best_weights, [goldnet])
    print network.calculateAccuracy(goldnet)
    print "Test network AUROC: {0}".format(1 - (roc / 1000.0))
    print self.best_weights
    print "Test baseline: {0}".format(1 - (baseline_roc / 1000.0))
    return roc, baseline_roc, network, baseline_network, basejob