-
Notifications
You must be signed in to change notification settings - Fork 69
Expand file tree
/
Copy pathPRScs.py
More file actions
executable file
·120 lines (90 loc) · 4.92 KB
/
PRScs.py
File metadata and controls
executable file
·120 lines (90 loc) · 4.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/env python
"""
PRS-CS: a polygenic prediction method that infers posterior SNP effect sizes under continuous shrinkage (CS) priors
using GWAS summary statistics and an external LD reference panel.
Reference: T Ge, CY Chen, Y Ni, YCA Feng, JW Smoller. Polygenic Prediction via Bayesian Regression and Continuous Shrinkage Priors.
Nature Communications, 10:1776, 2019.
Usage:
python PRScs.py --ref_dir=PATH_TO_REFERENCE --bim_prefix=VALIDATION_BIM_PREFIX --sst_file=SUM_STATS_FILE --n_gwas=GWAS_SAMPLE_SIZE --out_dir=OUTPUT_DIR
[--a=PARAM_A --b=PARAM_B --phi=PARAM_PHI --n_iter=MCMC_ITERATIONS --n_burnin=MCMC_BURNIN --thin=MCMC_THINNING_FACTOR
--chrom=CHROM --write_psi=WRITE_PSI --write_pst=WRITE_POSTERIOR_SAMPLES --seed=SEED]
"""
import os
import sys
import getopt
import parse_genet
import mcmc_gtb
import gigrnd
def parse_param():
long_opts_list = ['ref_dir=', 'bim_prefix=', 'sst_file=', 'a=', 'b=', 'phi=', 'n_gwas=',
'n_iter=', 'n_burnin=', 'thin=', 'out_dir=', 'chrom=', 'beta_std=', 'write_psi=', 'write_pst=', 'seed=', 'help']
param_dict = {'ref_dir': None, 'bim_prefix': None, 'sst_file': None, 'a': 1, 'b': 0.5, 'phi': None, 'n_gwas': None,
'n_iter': 1000, 'n_burnin': 500, 'thin': 5, 'out_dir': None, 'chrom': range(1,23),
'beta_std': 'FALSE', 'write_psi': 'FALSE', 'write_pst': 'FALSE', 'seed': None}
print('\n')
if len(sys.argv) > 1:
try:
opts, args = getopt.getopt(sys.argv[1:], "h", long_opts_list)
except:
print('Option not recognized.')
print('Use --help for usage information.\n')
sys.exit(2)
for opt, arg in opts:
if opt == "-h" or opt == "--help":
print(__doc__)
sys.exit(0)
elif opt == "--ref_dir": param_dict['ref_dir'] = arg
elif opt == "--bim_prefix": param_dict['bim_prefix'] = arg
elif opt == "--sst_file": param_dict['sst_file'] = arg
elif opt == "--a": param_dict['a'] = float(arg)
elif opt == "--b": param_dict['b'] = float(arg)
elif opt == "--phi": param_dict['phi'] = float(arg)
elif opt == "--n_gwas": param_dict['n_gwas'] = int(arg)
elif opt == "--n_iter": param_dict['n_iter'] = int(arg)
elif opt == "--n_burnin": param_dict['n_burnin'] = int(arg)
elif opt == "--thin": param_dict['thin'] = int(arg)
elif opt == "--out_dir": param_dict['out_dir'] = arg
elif opt == "--chrom": param_dict['chrom'] = arg.split(',')
elif opt == "--beta_std": param_dict['beta_std'] = arg.upper()
elif opt == "--write_psi": param_dict['write_psi'] = arg.upper()
elif opt == "--write_pst": param_dict['write_pst'] = arg.upper()
elif opt == "--seed": param_dict['seed'] = int(arg)
else:
print(__doc__)
sys.exit(0)
if param_dict['ref_dir'] == None:
print('* Please specify the directory to the reference panel using --ref_dir\n')
sys.exit(2)
elif param_dict['bim_prefix'] == None:
print('* Please specify the directory and prefix of the bim file for the target dataset using --bim_prefix\n')
sys.exit(2)
elif param_dict['sst_file'] == None:
print('* Please specify the summary statistics file using --sst_file\n')
sys.exit(2)
elif param_dict['n_gwas'] == None:
print('* Please specify the sample size of the GWAS using --n_gwas\n')
sys.exit(2)
elif param_dict['out_dir'] == None:
print('* Please specify the output directory using --out_dir\n')
sys.exit(2)
for key in param_dict:
print('--%s=%s' % (key, param_dict[key]))
print('\n')
return param_dict
def main():
param_dict = parse_param()
for chrom in param_dict['chrom']:
print('##### process chromosome %d #####' % int(chrom))
if '1kg' in os.path.basename(param_dict['ref_dir']):
ref_dict = parse_genet.parse_ref(param_dict['ref_dir'] + '/snpinfo_1kg_hm3', int(chrom))
elif 'ukbb' in os.path.basename(param_dict['ref_dir']):
ref_dict = parse_genet.parse_ref(param_dict['ref_dir'] + '/snpinfo_ukbb_hm3', int(chrom))
vld_dict = parse_genet.parse_bim(param_dict['bim_prefix'], int(chrom))
sst_dict = parse_genet.parse_sumstats(ref_dict, vld_dict, param_dict['sst_file'], param_dict['n_gwas'])
ld_blk, blk_size = parse_genet.parse_ldblk(param_dict['ref_dir'], sst_dict, int(chrom))
mcmc_gtb.mcmc(param_dict['a'], param_dict['b'], param_dict['phi'], sst_dict, param_dict['n_gwas'], ld_blk, blk_size,
param_dict['n_iter'], param_dict['n_burnin'], param_dict['thin'], int(chrom), param_dict['out_dir'], param_dict['beta_std'],
param_dict['write_psi'], param_dict['write_pst'], param_dict['seed'])
print('\n')
if __name__ == '__main__':
main()