-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathFounder_Model.py
More file actions
226 lines (188 loc) · 12.3 KB
/
Founder_Model.py
File metadata and controls
226 lines (188 loc) · 12.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
import numpy as np
import copy
################################### Parameters #############################################
N = 300 # population size
Nbud = 250 # budding size
seed = 50 # seed size
r = 0.2 # growth rate
k = 50 # number of village
mu = 0.0249 # mutation rate
m = 0.135 # migration rate
p = 0.1 # 1 - heritability
sigma = 0.8 # reproductive advantage
delta = 0.00 # fraction of dominance (set to zero for nondominance island model)
nd = int(round(N*delta)) # number of dominant individuals
ndseed = int(round(seed*delta))
T = 100 # number of generation
ns = 60 # sampling size
Nens = 200 # number of realizations
##############################################################################################
''' The origin (where the first village is drawn from) is simulated with neutral model '''
populationO = np.ones(N, dtype=np.int) # single haplotype
probO = np.ones(N, dtype=float) # reproduction probability
probO = probO / float(N) # normalized the sum to one
t = 0
pop0 = copy.copy(populationO) # current generation
pop1 = copy.copy(populationO) # next generation
HO = 1 # number which had been used to represent the haplo
while t < T:
numoffspringO = np.random.multinomial(N, probO, size=1)
# produce N number of offsprings, prob0: prob for each (of N) father to be chosen, size: do the drawing 1 time
zS = 0 # record index of son
for ii in range(N): # go over all father
if numoffspringO[0,ii] > 0: # if father ii reproduce
sonO = np.arange(zS, zS + numoffspringO[0, ii]) # assign index to the son
pop1[sonO] = pop0[ii] # son's haplotype follow father
zS = zS + numoffspringO[0, ii]
mup = np.random.random((N, 1)) # mutation probability
mutationO = [x for x in xrange(N) if mup[x, 0] < mu]
if len(mutationO) >= 1:
pop1[mutationO] = np.arange(1, len(mutationO)+1) + HO # create new haplotypes
HO += len(mutationO)
pop0 = copy.copy(pop1)
t += 1
US, countsS = np.unique(pop0, return_counts=True)
print len(US)
##############################################################################################
HetS = np.zeros((Nens, k-5), dtype=np.float) # heterozygosity of haplotype
############################### Founder Model ###############################
for ens in range(Nens):
print ens
done = False
########################## Initialization #############################
G0S = np.zeros((N,k), dtype=np.int) # current generation
G0S[0:seed, 0] = np.random.choice(pop0, seed, replace=False) # first village
G1S = copy.copy(G0S) # updated generation
HS = np.max(pop0) # max number used to represent the haplotype
M = np.zeros(k, dtype=np.int) # population size of each village
M[0] = seed # population size of the first village
nv = 1 # number of active village
probS = np.zeros((N,k), dtype=float) # probability to reproduce
probS[0:seed, 0] = 1.0
if nd > 0: # for dominance model
dominanceS = 999 * np.ones((nd,k), dtype=np.int) # indexes for dominant individuals
dominanceS[0:ndseed, 0] = np.random.choice(seed, ndseed, replace=False) # random sample nd dominant individuals
probS[dominanceS[0:ndseed, 0], 0] = 1.0 + sigma
probS[:, 0] = probS[:, 0] / ( seed + ndseed * sigma ) # normalization
####################### Simulate until k villages are formed ####################
while nv <= 50 and not done:
if nv == 50:
done = True
########################### Migration #######################
if nv > 1:
G1S = copy.copy(G0S)
for v in range(k):
if M[v] > 0:
pp = M[v] # number of ppl in village
mp = np.random.random((pp, 1)) # migration probability
migrationS = [x for x in xrange(pp) if mp[x, 0] < m] # index of ppl to be replaced by migrants
if len(migrationS) >= 1:
nei = [x for x in xrange(k) if x != v and M[x] > 0] # index of neighbor villages with size>0
pool = G0S[:, nei]
pools0 = pool.ravel()
pools = [value for value in pools0 if value != 0]
G1S[migrationS, v] = np.random.choice(pools, len(migrationS), replace=False)
G0S = copy.copy(G1S) # activate migration
G1S = np.zeros((N,k), dtype=np.int)
########################## Reproduction ######################
for v in range(k):
if M[v] > 0:
pp1 = M[v] # population size for current generation
pp2 = int( round( M[v] * (1 + r * ( 1 - M[v]/float(N) ) ) ) ) # population size of the next generation
###### choose father from previous generation ######
if nd > 0:
DoS = [] # dominant son
while len(DoS) < int(round(pp2*delta)): # redo if number of son of dominant less than pp2*delta
# record the number of offsprings each (of the N) father produced
numoffspringS = np.random.multinomial(pp2, probS[0:pp1, v], size=1) # pp1 father --> pp2 son
zS = 0
DoS = []
for ii in range(pp1):
if numoffspringS[0, ii] > 0:
sonS = np.arange(zS, zS + numoffspringS[0, ii])
if ii in dominanceS[:,v]: # if father is dominant
DoS = np.concatenate((DoS, sonS))
G1S[sonS, v] = G0S[ii, v]
zS = zS + numoffspringS[0, ii]
nd_son = int(round(pp2*delta))
dominanceS[:nd_son,v] = np.random.choice(DoS, nd_son, replace=False) # choose from DoS
dominanceS[nd_son:,v] = 999
nonDS = [x for x in xrange(pp2) if not x in dominanceS[:nd_son,v]]
rp = np.random.random((nd_son,1)) # replace probability
replaceS = [x for x in xrange(nd_son) if rp[x, 0] < p] # replace based on heritability
if len(replaceS) >= 1:
dominanceS[replaceS,v] = np.random.choice(nonDS, len(replaceS), replace=False)
probS[:pp2,v] = np.ones((pp2), dtype=float)
probS[pp2:,v] = 0.0
probS[dominanceS[0:nd_son, v], v] = 1.0 + sigma
probS[:,v] = probS[:,v] / float((pp2 + round(pp2*delta)*sigma))
else: # non-dominance model
numoffspringS = np.random.multinomial(pp2, probS[0:pp1, v], size=1)
# produce pp2 number of offsprings, probS: prob for each (of pp1) father to be chosen, size: do the drawing 1 time
zS = 0 # record index of son
for ii in range(pp1): # go over all father
if numoffspringS[0,ii] > 0: # if father ii reproduce
sonS = np.arange(zS, zS + numoffspringS[0, ii])
G1S[sonS, v] = G0S[ii, v] # son haplotype follow father
zS = zS + numoffspringS[0, ii]
probS[:pp2,v] = np.ones(pp2, dtype=float) / float(pp2)
probS[pp2:,v] = 0.0
M[v] = pp2
########################### Mutation #######################
for v in range(k):
if M[v] > 0:
mup = np.random.random((M[v], 1)) # mutation probability
mutationS = [x for x in xrange(M[v]) if mup[x, 0] < mu]
if len(mutationS) >= 1:
G1S[mutationS, v] = np.arange(1, len(mutationS)+1) + HS # create new haplotypes
HS += len(mutationS) # record total number of ever existed haplotype
########################### Founders #######################
budding = np.random.permutation(k) # randomize the budding order
for v in budding:
if M[v] >= Nbud:
space = [x for x in range(k) if M[x]==0] # empty space
if len(space) > 0:
new = space[0]
villagers = G1S[0:M[v],v]
founder = np.random.choice(M[v], seed, replace=False) # random choose seed ppl
G1S[0:seed,new] = villagers[founder] # add founders to new village
index = [x for x in range(M[v]) if not x in founder]
G1S[0:M[v]-seed,v] = villagers[index] # remaining villagers
G1S[M[v]-seed:,v] = 0
if nd > 0:
Od = dominanceS[:,v]
Ld = [val for val in Od if val in index] # remaining villagers who are dominance
ind2 = [dd for dd in range(len(index)) if index[dd] in Ld]
dominanceS[:,v] = 999
dominanceS[0:len(Ld),v] = ind2
probS[:,v] = 0.0
probS[0:M[v]-seed,v] = 1.0
probS[ind2,v] = 1.0 + sigma
probS[:,v] = probS[:,v] / np.sum(probS[:,v])
Nd_new = int(round(seed*delta))
dominanceS[0:Nd_new,new] = np.random.choice(seed, Nd_new, replace=False)
probS[0:seed,new] = 1.0
probS[dominanceS[0:Nd_new,new],new] = 1.0 + sigma
probS[:,new] = probS[:,new] / np.sum(probS[:,new])
else:
probS[:,v] = 0.0
probS[0:M[v]-seed,v] = 1.0 / float(M[v]-seed)
probS[:,new] = 0.0
probS[0:seed,new] = 1.0 / float(seed)
nv += 1
M[new] = seed
M[v] = M[v] - seed
G0S = copy.copy(G1S)
for v in range(k-5):
villageS = G0S[0:M[v], v]
sampS = np.random.choice(villageS, ns, replace=False)
US, countsS = np.unique(sampS, return_counts=True)
tyS = len(US)
freqS = countsS / float(ns)
sqS = np.power(freqS, 2)
HetS[ens, v] = 1.0 - np.sum(sqS)
mhetS_v = np.mean(HetS, axis=1) # mean heterozygosity over villages
mhetS = np.mean(mhetS_v, axis=0) # mean heterozygosity over ensembles
print mhetS
#np.savetxt('FM_het_patri_mtDNA_mu0018.txt', HetS, fmt='%f')
np.savetxt('FM_het_matri_Y_mu0024.txt', HetS, fmt='%f')