Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion atacseq.interface.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ description: "The interface for the ATAC-seq pipeline"
path: "pipelines/atacseq.py"
input_schema: "pipelines/atacseq.input_schema.yaml"
# output_schema: pipelines/atacseq.output_schema.yaml
command_template: "{pipeline.path} --sample-yaml {looper.output_dir}/submission/{sample.sample_name}_sample.yaml --output-parent {looper.sample_output_folder}"
command_template: "{pipeline.path} --sample-yaml {looper.output_dir}/submission/{sample.sample_name}.yaml --output-parent {looper.sample_output_folder}"
compute:
size_dependent_variables: "pipelines/atacseq.resources-sample.tsv"
pre_submit:
Expand Down
23 changes: 8 additions & 15 deletions pipelines/atacseq.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
class ATACseqSample:
"""
Class to model ATAC-seq samples based on the ChIPseqSample class.

:param series: Pandas `Series` object.
:type series: pandas.Series
"""
Expand All @@ -48,7 +47,6 @@ def __init__(self, series):
# Use pd.Series object to have all sample attributes
if not isinstance(series, pd.Series):
raise TypeError("Provided object is not a pandas Series.")
# super(ATACseqSample, self).__init__(series)

self.tagmented = True
for k, v in series.items():
Expand All @@ -57,12 +55,12 @@ def __init__(self, series):
def __repr__(self):
return "ATAC-seq sample '%s'" % self.sample_name

def set_file_paths(self, project):
def set_file_paths(self):
"""
Sets the paths of all files for this sample.
"""
# Inherit paths from Sample by running Sample's set_file_paths()
super(ATACseqSample, self) # .set_file_paths(project)
super(ATACseqSample, self)

# Files in the root of the sample dir
prefix = pjoin(self.sample_root, self.sample_name)
Expand Down Expand Up @@ -134,7 +132,6 @@ def set_file_paths(self, project):
class DNaseSample(ATACseqSample):
"""
Class to model DNase-seq samples based on the ChIPseqSample class.

:param series: Pandas `Series` object.
:type series: pandas.Series
"""
Expand All @@ -151,8 +148,8 @@ def __init__(self, series):
def __repr__(self):
return "DNase-seq sample '%s'" % self.sample_name

def set_file_paths(self, project):
super(DNaseSample, self).set_file_paths(project)
def set_file_paths(self):
super(DNaseSample, self).set_file_paths()


def main():
Expand Down Expand Up @@ -185,7 +182,6 @@ def main():
sample.merged = True
else:
sample.merged = False
sample.prj = AttributeDict(sample.prj)
sample.paths = AttributeDict(sample.__dict__)

# Check read type if not provided
Expand All @@ -201,7 +197,7 @@ def main():
sample.paired = False

# Set file paths
sample.set_file_paths(sample.prj)
sample.set_file_paths()

# Start Pypiper object
# Best practice is to name the pipeline with the name of the script;
Expand Down Expand Up @@ -431,12 +427,12 @@ def process(sample, pipe_manager, args):
# Call peaks
pipe_manager.timestamp("Calling peaks with MACS2")
# make dir for output (macs fails if it does not exist)
if not os.path.exists(os.dirname(sample.peaks)):
os.makedirs(os.dirname(sample.peaks))
if not os.path.exists(os.path.dirname(sample.peaks)):
os.makedirs(os.path.dirname(sample.peaks))

cmd = tk.macs2_call_peaks_atacseq(
treatment_bam=sample.filtered,
output_dir=sample.peaks,
output_dir=sample.peaks_dir,
sample_name=sample.sample_name,
genome=sample.genome,
)
Expand Down Expand Up @@ -899,7 +895,6 @@ def parse_mapping_stats(stats_file, prefix="", paired_end=True):
def parse_duplicate_stats(stats_file, prefix=""):
"""
Parses sambamba markdup output, returns series with values.

:param stats_file: sambamba output file with duplicate statistics.
:type stats_file: str
:param prefix: A string to be used as prefix to the output dictionary keys.
Expand Down Expand Up @@ -972,7 +967,6 @@ def calculate_frip(input_bam, input_bed, output, cpus=4):
def parse_frip(frip_file, total_reads, prefix=""):
"""
Calculates the fraction of reads in peaks for a given sample.

:param frip_file: A sting path to a file with the FRiP output.
:type frip_file: str
:param total_reads: A Sample object with the "peaks" attribute.
Expand All @@ -998,7 +992,6 @@ def parse_frip(frip_file, total_reads, prefix=""):
def parse_nsc_rsc(nsc_rsc_file):
"""
Parses the values of NSC and RSC from a stats file.

:param nsc_rsc_file: A sting path to a file with the NSC and RSC output (generally a tsv file).
:type nsc_rsc_file: str
"""
Expand Down
3 changes: 2 additions & 1 deletion pipelines/atacseq.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ tools:
macs2: macs2
# optional:
Rscript: Rscript
spp: # You can find this here: https://raw.githubusercontent.com/crazyhottommy/phantompeakqualtools/master/run_spp.R
# spp: # You can find this here: https://raw.githubusercontent.com/crazyhottommy/phantompeakqualtools/master/run_spp.R
spp: /home/fzhao/workspace/run_spp.R

# The following section parameters used to control pipeline behaviour
parameters:
Expand Down