This repository was archived by the owner on Mar 11, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsarek_cli.py
More file actions
98 lines (74 loc) · 2.76 KB
/
sarek_cli.py
File metadata and controls
98 lines (74 loc) · 2.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/env python3
import os
import subprocess
import argparse
from shutil import copyfile
from sample_tsv_builder import SarekSampleFile
if __name__ == "__main__":
parser = argparse.ArgumentParser()
# input options
input_group = parser.add_argument_group("Input Options")
input_group_me = input_group.add_mutually_exclusive_group(required=True)
input_group_me.add_argument("-d", "--samples_directory", help="Directory containing ngs
raw samples")
input_group.add_argument("-s", "--ngs_script", help="Script that is applied to each
sample",
required=True)
# output group
output_group = parser.add_argument_group("Output Options")
output_group.add_argument("-o", "--output_directory", default="ngs-workspace",
help="Base directory where further working-directories for ngs-analysis runs will
be set up")
args = parser.parse_args()
if not os.path.exists(args.ngs_script):
print("[!] Script not found!")
exit(1)
with open(os.path.abspath(args.ngs_script), "r") as file:
script = file.read()
if not os.path.exists(args.samples_directory):
print("[!] Invalid Samples Directory")
exit(1)
samples_dir = os.path.abspath(args.samples_directory)
# transform to absolute paths
samples = list(map(lambda x: os.path.abspath(os.path.join(args.samples_directory, x)),
os.listdir(samples_dir)))
# only keep directories
samples = list(filter(lambda x: os.path.isdir(x), samples))
if args.output_directory:
base_dir = os.path.abspath(args.output_directory)
else:
base_dir = os.path.abspath("ngs-workspace")
# create workdir if not existent
if not os.path.exists(base_dir):
os.mkdir(base_dir)
os.chdir(base_dir)
for ngs_sample_dir in samples:
print("Collecting samples in {}".format(ngs_sample_dir))
run_dir = os.path.split(ngs_sample_dir)[1]
# tsv files
samples_tsv = SarekSampleFile(ngs_sample_dir)
# only for valid ngs_sample_dir paths, a SarekSampleFile object is returned
if samples_tsv and samples_tsv.is_valid:
# create a workdir
if not os.path.exists(run_dir):
os.mkdir(run_dir)
# change to workdir
os.chdir(run_dir)
# write to tsv
sample_tsv = os.path.join(base_dir, run_dir, "samples.tsv")
samples_tsv.write_tsv(sample_tsv)
# place a modified version of the script in workdir
script_dest = os.path.join(base_dir, run_dir,
"sarek-germline-analysis.sh")
placeholder = "#DUMMY-STRING#"
with open(script_dest, "w") as file:
# wite location information into the script
content = script.replace(placeholder,
os.path.join(base_dir, run_dir))
file.write(content)
# run ngs-bash-script in background
cmdline = "qsub -q long {}".format(script_dest)
print("submitting script via {}".format(cmdline))
os.system(cmdline)
# change back to base directory
os.chdir(base_dir)