-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSnakefile
More file actions
97 lines (72 loc) · 2.65 KB
/
Snakefile
File metadata and controls
97 lines (72 loc) · 2.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
shell.executable("bash")
import os
from snakemake.utils import min_version
import multiprocessing
min_version("8.10.7")
configfile: "config.yaml"
log_dir = config['output_dir'] + "/logs"
working_dir = config['output_dir'] + "/working"
ref_basename=os.path.basename(config['reference_genome'])
ref_name=os.path.splitext(os.path.basename(config['reference_genome']))[0]
sample_files = snakemake.utils.listfiles(config["bam_dir"]+"/{sample}.bam")
samples = dict((y[0], x) for x, y in sample_files)
#singularity: "docker://broadinstitute/gatk"
def process_intervals(file_path, max_size=500000):
result = []
small_group = []
small_total = 0
with open(file_path, 'r') as f:
for line in f:
parts = line.strip().split()
if len(parts) != 3:
continue # skip lines
chrom, start, end = parts
start, end = int(start), int(end)
size = end - start
if size >= max_size:
# Split into chunks of size max_size
while start < end:
chunk_end = min(start + max_size, end)
result.append([[chrom, start, chunk_end]])
start = chunk_end
else:
# Add to small group for merging
small_group.append([chrom, start, end])
small_total += size
if small_total >= max_size:
# Flush the current group
result.append(small_group)
small_group = []
small_total = 0
# Add any remaining small intervals
if small_group:
result.append(small_group)
return result
INTERVALS= process_intervals(config['bed_file'])
IDX=list(range(len(INTERVALS)))
print(IDX)
"""
INTERVALS = []
for n in result:
l = []
for i in n:
chrom, start, end = i
l.append(f" -L {chrom}:{start+1}-{end}") # Format string
INTERVALS.append(l)
INTERVALS = [f"-L {name}:{start+1}-{end}" for name, length in gd.items() for start, end in length]
big_stuff={"2L","2R","3L","3R","X","Y","4"}
bigints=[interv for interv in INTERVALS if interv.split(":")[0].split(" ")[1] in big_stuff]
smallints=[interv for interv in INTERVALS if interv.split(":")[0].split(" ")[1] not in big_stuff]
bigints.append(" ".join(smallints))
print(len(INTERVALS),len(bigints),len(smallints))
INTERVALS=bigints
realints=[x for x in INTERVALS]
"""
rule all:
input:
working_dir + "/JointCallSNPs/all_samples.vcf.gz"
include: "rules/genome_prepare.smk"
include: "rules/BQSR.smk"
include: "rules/HaplotypeCaller.smk"
include: "rules/JointCallSNPs.smk"
#include: "rules/VQSR.smk"