-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathValidation_2_simulations.py
More file actions
90 lines (65 loc) · 2.66 KB
/
Validation_2_simulations.py
File metadata and controls
90 lines (65 loc) · 2.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import numpy as np
import ete3
import obdp
import nt
import os
def simulateOneDataset(dirname):
os.mkdir(dirname)
os.chdir(dirname)
##### SIMULATION OF PARAMETERS ######
# tor prior: uniform on (tmin, tmax)
tmin = 1
tmax = 5
tor = tmin + np.random.random()*(tmax-tmin)
# div_mean = np.log( np.log(5) / tor )
# div_sd = 0.2
# lambMinusMu = np.random.lognormal(mean=div_mean, sigma=div_sd)
# mu prior: exponential distribution with mean 1
mu = np.random.exponential(1.)
# lambda prior: knowing mu, it's mu + exponential distribution with mean 0.01 (i.e. rate 100)
lambMinusMu = np.random.exponential(0.01)
lamb = mu + lambMinusMu
# psi and omega priors are exponential distributions with mean 0.2 (i.e. rate 5)
psi = np.random.exponential(0.2)
omega = np.random.exponential(0.2)
# r is uniformly chosen on (0,1)
r = np.random.random()
# rho is uniformly distributed on (0.5, 1)
rho = 0.8 + np.random.random()*0.2
# mutation rate prior: exponential distribution with mean 0.05 (i.e. rate 20)
alpha = np.random.exponential(0.05)
# we record the simulated parameters
params_obdp = lamb, mu, rho, psi, r, omega
print(params_obdp)
params_nt = [alpha]
allParams = [lamb, mu, rho, psi, r, omega, alpha]
allParamsNames = ['lamb', 'mu', 'rho', 'psi', 'r', 'omega', 'alpha']
obdp.exportParams(allParams, allParamsNames)
#print(params_obdp)
##### SIMULATION OF THE TREE ######
# We simulate trees conditioned on having one leaf reaching the present
t, obs, ttruth = obdp.simTOconditionedOnSurvival(params_obdp, tor)
#t.show()
print(len(obs[0]), len(obs[1]), len(obs[2]), len(obs[3]), len(obs[4]), len(obs[5]))
# record the reconstructed tree
t.write(format=1, outfile="tree_reconstructed.nw")
# record the full tree (with extinct branches)
ttruth.write(format=1, outfile="tree_full.nw")
# record the list of occurrences
obdp.exportOccurrences( obs )
# and finally, the list of psi and rho sampled individuals
obdp.exportTaxa( t )
##### SIMULATION OF SEQUENCES ######
# number of nucleotides in the sequence
m_nt = 1000
# simulation of genetic sequences along the reconstructed tree, under a strict clock, with model JC69
tree_seq = nt.simSeqAlongTree(params_nt, t, m_nt)
# we export sequences for psi and rho sampled individuals (i.e. "epidemiology-style")
nt.writeNexus( tree_seq, "extant-extinct" )
os.chdir("..")
return 'done'
# uncomment the following line to get a deterministic output
np.random.seed(1)
for i in range(0, 1000):
dirname = "dataset"+str(i)
simulateOneDataset(dirname)