-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcalcPVT.py
More file actions
76 lines (63 loc) · 3.06 KB
/
calcPVT.py
File metadata and controls
76 lines (63 loc) · 3.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import argparse
import numpy as np
#import effectsize
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
def calcPVT(VCFFile, begin, end, outFolder, ancList):
dictEffectiveSize = {}
coda = importr("coda")
for POP in ancList:
for chrom in range(begin, end+1):
dictLA = {}
VCF = VCFFile.replace("#", POP).replace("*", f"{chrom}")
fileIn = open(VCF)
header = True
for line in fileIn:
if header:
if line.startswith("#CHROM"):
headerInfo = line.strip().split()
header = False
else:
split = line.strip().split()
for i in range(9, len(split)):
sampleID = headerInfo[i]
A1, A2 = split[i].split("|")
dosage = int(A1) + int(A2)
if sampleID not in dictLA:
dictLA[sampleID] = [dosage]
else:
dictLA[sampleID].append(dosage)
for sampleID in dictLA:
if sampleID not in dictEffectiveSize:
dictEffectiveSize[sampleID] = {}
x = ro.FloatVector(dictLA[sampleID])
ess = coda.effectiveSize(x)
dictEffectiveSize[sampleID][chrom] = ess[0]
ESS = open(f"{outFolder}/ESS_{POP}_PerSampleAndChrom.txt", "w")
ESS.write(f"ID")
for chrom in range(begin, end + 1):
ESS.write(f"\tchrom{chrom}")
ESS.write("\n")
listESS = []
for sampleID in dictEffectiveSize:
sum = 0
ESS.write(f"{sampleID}")
for chrom in dictEffectiveSize[sampleID]:
ESS.write(f"\t{dictEffectiveSize[sampleID][chrom]}")
sum = sum + dictEffectiveSize[sampleID][chrom]
ESS.write(f"\n")
listESS.append(sum)
ESS.close()
print(f"Mean number of tests ({POP}): {np.mean(listESS)}")
print(f"PVT (BonFerroni) ({POP}): {0.05/np.mean(listESS)}")
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Calcule p-value threshold using R')
requiredGeneral = parser.add_argument_group("Required arguments for all steps")
requiredGeneral.add_argument('-v', '--vcf', help='VCF generated by GENESIS_AM.py with ancestry ID replaced by # and chrom replaced by *'
'Example: Phase2_All_#_chrom*.vcf',required=True)
requiredGeneral.add_argument('-O', '--outputFolder', help='Name of output folder', required=True)
requiredGeneral.add_argument('-b', '--begin', help='First chromosome (default = 1)', default=1, type=int)
requiredGeneral.add_argument('-e', '--end', help='Last chromosome (default = 22)', default=22, type=int)
requiredGeneral.add_argument('-a', '--anc', help='List of ancestry', required= True, nargs = "+")
args = parser.parse_args()
calcPVT(args.vcf, args.begin, args.end, args.outputFolder, args.anc)