Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 63 additions & 9 deletions pipelines/pipeline_scrnaseq.py
Original file line number Diff line number Diff line change
Expand Up @@ -934,22 +934,57 @@ def featureCounts(infiles, outfile):
P.run(statement)


@active_if(PARAMS["sql_load_concatenated_table"])
@merge(featureCounts,
"featureCounts.dir/featurecounts.load")
def loadFeatureCounts(infiles, outfile):
"featureCounts.dir/featurecounts.txt.gz")
def concatenateFeatureCounts(infiles, outfile):
'''
Combine count data in the project database.
'''

infiles = " ".join(infiles)

statement = '''python -m cgatcore.tables
--cat=track
--missing-value=na
--regex-filename='.*/(.*).counts.gz'
--no-titles
%(infiles)s
| gzip -c
> %(outfile)s
'''

P.run(statement, job_memory=PARAMS["sql_himem"])


@transform(concatenateFeatureCounts,
regex(r"featureCounts.dir/(.*).txt.gz"),
r"featureCounts.dir/\1.load")
def loadFeatureCounts(infile, outfile):
'''
Combine and load count data in the project database.
'''

P.concatenate_and_load(infiles, outfile,
regex_filename=".*/(.*).counts.gz",
has_titles=False,
cat="track",
header="track,gene_id,counts",
options='-i "gene_id"',
job_memory=PARAMS["sql_himem"])
tablename = infile.replace(".load", "")

database_url = PARAMS["database"]["url"]

statement = '''zcat %(infile)s
| python -m cgatcore.csv2db
--retry
--database-url=%(database_url)s
--add-index=track
--header-names=track,gene_id,counts -i "gene_id"
--table=featurecounts
> %(outfile)s
'''

to_cluster = False

P.run(statement)


@active_if(PARAMS["sql_load_concatenated_table"])
@files(loadFeatureCounts,
"featureCounts.dir/featurecounts_counts.txt")
def featurecountsGeneCounts(infile, outfile):
Expand All @@ -970,6 +1005,7 @@ def featurecountsGeneCounts(infile, outfile):
df.to_csv(outfile, sep="\t", index=True, index_label="gene_id")


@active_if(PARAMS["sql_load_concatenated_table"])
@transform(featurecountsGeneCounts,
suffix(".txt"),
".load")
Expand Down Expand Up @@ -1034,6 +1070,7 @@ def salmon(infiles, outfile):
P.run(statement)


@active_if(PARAMS["sql_load_concatenated_table"])
@active_if(fastqMode)
@merge(salmon, "salmon.dir/salmon.transcripts.load")
def loadSalmonTranscriptQuant(infiles, outfile):
Expand All @@ -1050,6 +1087,7 @@ def loadSalmonTranscriptQuant(infiles, outfile):
job_memory=PARAMS["sql_himem"])


@active_if(PARAMS["sql_load_concatenated_table"])
@active_if(fastqMode)
@merge(salmon, "salmon.dir/salmon.genes.load")
def loadSalmonGeneQuant(infiles, outfile):
Expand Down Expand Up @@ -1303,6 +1341,7 @@ def loadCuffNormUQ(infile, outfile):
run_copy_number_estimation = False


@active_if(PARAMS["sql_load_concatenated_table"])
@active_if(run_copy_number_estimation)
@follows(mkdir("copy.number.dir"), loadSalmonTPMs)
@files("salmon.dir/salmon.genes.tpms.txt",
Expand All @@ -1324,6 +1363,7 @@ def estimateCopyNumber(infile, outfile):
P.run(statement)


@active_if(PARAMS["sql_load_concatenated_table"])
@active_if(run_copy_number_estimation)
@transform(estimateCopyNumber,
suffix(".txt"),
Expand Down Expand Up @@ -1408,6 +1448,7 @@ def collectRnaSeqMetrics(infiles, outfile):
P.run(statement)


@active_if(PARAMS["sql_load_concatenated_table"])
@merge(collectRnaSeqMetrics,
"qc.dir/qc_rnaseq_metrics.load")
def loadCollectRnaSeqMetrics(infiles, outfile):
Expand Down Expand Up @@ -1448,6 +1489,7 @@ def threePrimeBias(infile, outfile):
out_file.write("%.2f\n" % bias)


@active_if(PARAMS["sql_load_concatenated_table"])
@merge(threePrimeBias,
"qc.dir/qc_three_prime_bias.load")
def loadThreePrimeBias(infiles, outfile):
Expand Down Expand Up @@ -1498,6 +1540,7 @@ def estimateLibraryComplexity(infile, outfile):
P.run(statement)


@active_if(PARAMS["sql_load_concatenated_table"])
@active_if(PAIRED)
@merge(estimateLibraryComplexity,
"qc.dir/qc_library_complexity.load")
Expand Down Expand Up @@ -1553,6 +1596,7 @@ def alignmentSummaryMetrics(infile, outfile):
P.run(statement)


@active_if(PARAMS["sql_load_concatenated_table"])
@merge(alignmentSummaryMetrics,
"qc.dir/qc_alignment_summary_metrics.load")
def loadAlignmentSummaryMetrics(infiles, outfile):
Expand Down Expand Up @@ -1624,6 +1668,7 @@ def insertSizeMetricsAndHistograms(infile, outfiles):
P.run(statement)


@active_if(PARAMS["sql_load_concatenated_table"])
@merge(insertSizeMetricsAndHistograms,
"qc.dir/qc_insert_size_metrics.load")
def loadInsertSizeMetrics(infiles, outfile):
Expand All @@ -1647,6 +1692,7 @@ def loadInsertSizeMetrics(infiles, outfile):
P.run(statement)


@active_if(PARAMS["sql_load_concatenated_table"])
@merge(insertSizeMetricsAndHistograms,
"qc.dir/qc_insert_size_histogram.load")
def loadInsertSizeHistograms(infiles, outfile):
Expand Down Expand Up @@ -1702,6 +1748,7 @@ def spikeVsGenome(infile, outfile):
P.run(statement)


@active_if(PARAMS["sql_load_concatenated_table"])
@merge(spikeVsGenome,
"qc.dir/qc_spike_vs_genome.load")
def loadSpikeVsGenome(infiles, outfile):
Expand All @@ -1718,6 +1765,7 @@ def loadSpikeVsGenome(infiles, outfile):

# ------------------------- No. genes detected ------------------------------ #

@active_if(PARAMS["sql_load_concatenated_table"])
@active_if(fastqMode)
@follows(mkdir("qc.dir/"), loadSalmonTPMs, loadEnsemblAnnotations)
@files("salmon.dir/salmon.genes.tpms.load",
Expand Down Expand Up @@ -1753,6 +1801,7 @@ def numberGenesDetectedSalmon(infile, outfile):
count_df.to_csv(outfile, index=False, sep="\t")


@active_if(PARAMS["sql_load_concatenated_table"])
@active_if(fastqMode)
@follows(annotations)
@files(numberGenesDetectedSalmon,
Expand All @@ -1766,6 +1815,7 @@ def loadNumberGenesDetectedSalmon(infile, outfile):
options='-i "sample_id"')


@active_if(PARAMS["sql_load_concatenated_table"])
@follows(annotations)
@files(loadFeatureCounts,
"qc.dir/number.genes.detected.featurecounts")
Expand Down Expand Up @@ -1799,6 +1849,7 @@ def numberGenesDetectedFeatureCounts(infile, outfile):
count_df.to_csv(outfile, index=False, sep="\t")


@active_if(PARAMS["sql_load_concatenated_table"])
@files(numberGenesDetectedFeatureCounts,
"qc.dir/qc_no_genes_featurecounts.load")
def loadNumberGenesDetectedFeatureCounts(infile, outfile):
Expand Down Expand Up @@ -1836,6 +1887,7 @@ def fractionReadsSpliced(infile, outfile):
P.run(statement)


@active_if(PARAMS["sql_load_concatenated_table"])
@merge(fractionReadsSpliced,
"qc.dir/qc_fraction_spliced.load")
def loadFractionReadsSpliced(infiles, outfile):
Expand Down Expand Up @@ -1873,6 +1925,7 @@ def loadSampleInformation(infile, outfile):
P.load(infile, outfile)


@active_if(PARAMS["sql_load_concatenated_table"])
@merge([loadSampleInformation,
loadCollectRnaSeqMetrics,
loadThreePrimeBias,
Expand Down Expand Up @@ -1968,6 +2021,7 @@ def qcSummary(infiles, outfile):
df.to_csv(outfile, sep="\t", index=False)


@active_if(PARAMS["sql_load_concatenated_table"])
@transform(qcSummary,
suffix(".txt"),
".load")
Expand Down
3 changes: 3 additions & 0 deletions pipelines/pipeline_scrnaseq/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ strandedness: none
# location of the local sqlite3 database
database:
file: csvdb
url: sqlite:///./csvdb


# Spike-in options
Expand Down Expand Up @@ -287,3 +288,5 @@ sql:

# RAM required for high memory operations (e.g. 5000M)
himem: 10000M

load_concatenated_table: True