diff --git a/mGAP/resources/views/releaseNotes.html b/mGAP/resources/views/releaseNotes.html
index afb2ea2a..03bfa8b1 100644
--- a/mGAP/resources/views/releaseNotes.html
+++ b/mGAP/resources/views/releaseNotes.html
@@ -1,3 +1,9 @@
+
Release 3.0:
+
+ - This release involves a major change in the processing of variants. All prior releases omitted variants in complex or repetitive regions. We originally excluded these because genotypes can be less accurate; however, we made this change because some repetitive regions overlap coding regions and can contain valuable information. This also results in a significant increase in the total number of variants.
+ - This is the first release to include a second species. The dataset now contains both Rhesus macaques and Japanese macaques (which are separated into a separate track).
+
+
Release 2.5:
- This release includes a major change in how we perform variant annotation, described in more detail here.
diff --git a/mGAP/src/org/labkey/mgap/pipeline/AnnotationStep.java b/mGAP/src/org/labkey/mgap/pipeline/AnnotationStep.java
index 2854ce3c..95f1eb4f 100644
--- a/mGAP/src/org/labkey/mgap/pipeline/AnnotationStep.java
+++ b/mGAP/src/org/labkey/mgap/pipeline/AnnotationStep.java
@@ -31,6 +31,7 @@
import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStep;
import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStepOutputImpl;
import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep;
+import org.labkey.api.sequenceanalysis.run.LiftoverBcfToolsWrapper;
import org.labkey.api.sequenceanalysis.run.SelectVariantsWrapper;
import org.labkey.api.util.PageFlowUtil;
import org.labkey.api.writer.PrintWriters;
@@ -327,8 +328,8 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
File liftoverRejects = new File(outputDirectory, SequenceAnalysisService.get().getUnzippedBaseName(currentVcf.getName()) + ".liftoverReject" + grch37Genome.getGenomeId() + ".vcf.gz");
if (!indexExists(liftoverRejects) || !indexExists(liftedToGRCh37))
{
- LiftoverVcfRunner liftoverVcfRunner = new LiftoverVcfRunner(getPipelineCtx().getLogger());
- liftoverVcfRunner.doLiftover(currentVcf, chainFile, grch37Genome.getWorkingFastaFile(), liftoverRejects, liftedToGRCh37, 0.95);
+ LiftoverBcfToolsWrapper liftoverVcfRunner = new LiftoverBcfToolsWrapper(getPipelineCtx().getLogger());
+ liftoverVcfRunner.doLiftover(currentVcf, chainFile, genome.getWorkingFastaFile(), grch37Genome.getWorkingFastaFile(), liftoverRejects, liftedToGRCh37);
}
else
{
diff --git a/mGAP/src/org/labkey/mgap/pipeline/LiftoverVcfRunner.java b/mGAP/src/org/labkey/mgap/pipeline/LiftoverVcfRunner.java
deleted file mode 100644
index 17be1002..00000000
--- a/mGAP/src/org/labkey/mgap/pipeline/LiftoverVcfRunner.java
+++ /dev/null
@@ -1,80 +0,0 @@
-package org.labkey.mgap.pipeline;
-
-import org.apache.logging.log4j.Logger;
-import org.jetbrains.annotations.Nullable;
-import org.labkey.api.pipeline.PipelineJobException;
-import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
-import org.labkey.api.sequenceanalysis.run.PicardWrapper;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.List;
-
-public class LiftoverVcfRunner extends PicardWrapper
-{
- public LiftoverVcfRunner(Logger log)
- {
- super(log);
- }
-
- @Override
- protected String getToolName()
- {
- return "LiftoverVcf";
- }
-
- public void doLiftover(File inputVcf, File chainFile, File referenceFasta, @Nullable File rejectVcf, File outputVcf, double minPctMatch) throws PipelineJobException
- {
- getLogger().info("Liftover VCF: " + inputVcf.getPath());
-
- List params = getBaseArgs();
- params.add("--INPUT");
- params.add(inputVcf.getPath());
-
- params.add("--OUTPUT");
- params.add(outputVcf.getPath());
-
- params.add("--CHAIN");
- params.add(chainFile.getPath());
-
- params.add("--REFERENCE_SEQUENCE");
- params.add(referenceFasta.getPath());
-
- params.add("--WRITE_ORIGINAL_POSITION");
- params.add("true");
-
- params.add("--WRITE_ORIGINAL_ALLELES");
- params.add("true");
-
- params.add("--LOG_FAILED_INTERVALS");
- params.add("false");
-
- params.add("--LIFTOVER_MIN_MATCH");
- params.add(String.valueOf(minPctMatch));
-
- if (rejectVcf != null)
- {
- params.add("--REJECT");
- params.add(rejectVcf.getPath());
- }
-
- execute(params);
-
- if (!outputVcf.exists())
- {
- throw new PipelineJobException("Output file could not be found: " + outputVcf.getPath());
- }
-
- if (rejectVcf != null && rejectVcf.exists())
- {
- try
- {
- SequenceAnalysisService.get().ensureVcfIndex(rejectVcf, getLogger());
- }
- catch (IOException e)
- {
- throw new PipelineJobException(e);
- }
- }
- }
-}
diff --git a/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseGenerator.java b/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseGenerator.java
index 6c353305..a6a03642 100644
--- a/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseGenerator.java
+++ b/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseGenerator.java
@@ -31,7 +31,6 @@
import org.labkey.api.data.TableSelector;
import org.labkey.api.exp.api.ExpData;
import org.labkey.api.exp.api.ExperimentService;
-import org.labkey.api.jbrowse.JBrowseService;
import org.labkey.api.module.ModuleLoader;
import org.labkey.api.pipeline.PipelineJob;
import org.labkey.api.pipeline.PipelineJobException;
@@ -56,6 +55,7 @@
import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
import org.labkey.api.sequenceanalysis.run.GeneToNameTranslator;
+import org.labkey.api.sequenceanalysis.run.LiftoverBcfToolsWrapper;
import org.labkey.api.sequenceanalysis.run.SelectVariantsWrapper;
import org.labkey.api.util.FileType;
import org.labkey.api.util.FileUtil;
@@ -77,7 +77,6 @@
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
-import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
@@ -723,7 +722,11 @@ private Double parseCadd(String cadd)
{
if (cadd.contains("|"))
{
- return Arrays.stream(cadd.split("\\|")).map(Double::parseDouble).max(Double::compare).get();
+ return Arrays.stream(cadd.split("\\|")).filter(x -> !".".equals(x)).map(Double::parseDouble).max(Double::compare).orElse(null);
+ }
+ else if (".".equals(cadd))
+ {
+ return null;
}
return Double.parseDouble(cadd);
@@ -1020,7 +1023,7 @@ public void processFilesRemote(List inputFiles, JobContext c
File sitesOnlyVcf = getSitesOnlyVcf(ctx, primaryTrackVcf, genome);
- File lifted = liftToHuman(ctx, primaryTrackVcf, sitesOnlyVcf, grch37Genome);
+ File lifted = liftToHuman(ctx, primaryTrackVcf, sitesOnlyVcf, genome, grch37Genome);
SequenceOutputFile output3 = new SequenceOutputFile();
output3.setFile(lifted);
output3.setName("mGAP Release: " + species + " " + releaseVersion + " Lifted to Human");
@@ -1091,7 +1094,7 @@ private File getSitesOnlyVcf(JobContext ctx, File primaryTrackVcf, ReferenceGeno
return noGenotypes;
}
- private File liftToHuman(JobContext ctx, File primaryTrackVcf, File noGenotypes, ReferenceGenome grch37Genome) throws PipelineJobException
+ private File liftToHuman(JobContext ctx, File primaryTrackVcf, File noGenotypes, ReferenceGenome sourceGenome, ReferenceGenome grch37Genome) throws PipelineJobException
{
//lift to target genome
Integer chainFileId = ctx.getSequenceSupport().getCachedObject(AnnotationStep.CHAIN_FILE, Integer.class);
@@ -1104,8 +1107,8 @@ private File liftToHuman(JobContext ctx, File primaryTrackVcf, File noGenotypes,
File liftoverRejects = new File(ctx.getOutputDir(), SequenceAnalysisService.get().getUnzippedBaseName(primaryTrackVcf.getName()) + ".liftoverRejectGRCh37.vcf.gz");
if (!indexExists(liftoverRejects))
{
- LiftoverVcfRunner liftoverVcfRunner = new LiftoverVcfRunner(ctx.getLogger());
- liftoverVcfRunner.doLiftover(noGenotypes, chainFile, grch37Genome.getWorkingFastaFile(), liftoverRejects, liftedToGRCh37, 0.95);
+ LiftoverBcfToolsWrapper liftoverVcfRunner = new LiftoverBcfToolsWrapper(ctx.getLogger());
+ liftoverVcfRunner.doLiftover(noGenotypes, chainFile, sourceGenome.getWorkingFastaFile(), grch37Genome.getWorkingFastaFile(), liftoverRejects, liftedToGRCh37);
}
else
{
@@ -1388,12 +1391,12 @@ private void inspectAndSummarizeVcf(JobContext ctx, File vcfInput, GeneToNameTra
try
{
String as = StringUtils.trimToNull(polyphenScores.get(alleleIdx));
- if (as == null)
+ if (as == null || ".".equals(as))
{
continue;
}
- Double maxScore = Arrays.stream(as.split("\\|")).filter(x -> !x.isEmpty()).map(Double::parseDouble).max(Double::compare).orElse(-1.0);
+ double maxScore = Arrays.stream(as.split("\\|")).filter(x -> !x.isEmpty()).filter(x -> !".".equals(x)).map(Double::parseDouble).max(Double::compare).orElse(-1.0);
if (maxScore == 0.0)
{
ctx.getLogger().error("Suspicious values for Polyphen2_HVAR_S: " + maxScore + ", at position: " + vc.toStringWithoutGenotypes());
@@ -1581,7 +1584,7 @@ else if (af != null)
cadd = Arrays.stream(String.valueOf(cadd).split("\\|")).map(x -> {
try
{
- double y = Double.parseDouble(x);
+ Double.parseDouble(x);
}
catch (Exception e)
{
diff --git a/primeseq/src/org/labkey/primeseq/analysis/CombineMethylationRatesHandler.java b/primeseq/src/org/labkey/primeseq/analysis/CombineMethylationRatesHandler.java
index f8c732dd..055e25e9 100644
--- a/primeseq/src/org/labkey/primeseq/analysis/CombineMethylationRatesHandler.java
+++ b/primeseq/src/org/labkey/primeseq/analysis/CombineMethylationRatesHandler.java
@@ -83,7 +83,7 @@ public SequenceOutputProcessor getProcessor()
return new Processor();
}
- public class Processor implements SequenceOutputProcessor
+ public static class Processor implements SequenceOutputProcessor
{
@Override
public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport support, List inputFiles, JSONObject params, File outputDir, List actions, List outputsToCreate) throws UnsupportedOperationException, PipelineJobException
diff --git a/primeseq/src/org/labkey/primeseq/analysis/MethylationRateComparison.java b/primeseq/src/org/labkey/primeseq/analysis/MethylationRateComparison.java
index 5ad679d2..d7d0bce2 100644
--- a/primeseq/src/org/labkey/primeseq/analysis/MethylationRateComparison.java
+++ b/primeseq/src/org/labkey/primeseq/analysis/MethylationRateComparison.java
@@ -172,7 +172,7 @@ public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport
Map fileMap = new HashMap<>();
for (SequenceOutputFile f : inputFiles)
{
- action.addInputIfNotPresent(f.getFile(), "Methlylation Rates");
+ action.addInputIfNotPresent(f.getFile(), "Methylation Rates");
fileMap.put(f.getRowid().toString(), f);
}
diff --git a/primeseq/src/org/labkey/primeseq/pipeline/ClusterMaintenanceTask.java b/primeseq/src/org/labkey/primeseq/pipeline/ClusterMaintenanceTask.java
index b35cc621..cbeddc19 100644
--- a/primeseq/src/org/labkey/primeseq/pipeline/ClusterMaintenanceTask.java
+++ b/primeseq/src/org/labkey/primeseq/pipeline/ClusterMaintenanceTask.java
@@ -26,6 +26,7 @@
import org.labkey.api.pipeline.RemoteExecutionEngine;
import org.labkey.api.query.FieldKey;
import org.labkey.api.security.UserManager;
+import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper;
import org.labkey.api.util.FileUtil;
import org.labkey.api.util.Job;
@@ -84,7 +85,7 @@ public void run(Logger log)
jobGuids.addAll(ts2.getArrayList(String.class));
JobRunner jr = JobRunner.getDefault();
- for (RemoteExecutionEngine engine : PipelineJobService.get().getRemoteExecutionEngines())
+ for (RemoteExecutionEngine> engine : PipelineJobService.get().getRemoteExecutionEngines())
{
log.info("Starting maintenance task for: " + engine.getType());
@@ -158,6 +159,8 @@ public void run(Logger log)
//hacky, but this is only planned to be used by us
inspectFolder(log, new File("/home/exacloud/gscratch/prime-seq/workDir/"));
inspectFolder(log, new File("/home/exacloud/gscratch/prime-seq/cachedData/"));
+
+ runDockerPrune(log);
}
private void deleteDirectory(File child, Logger log)
@@ -230,6 +233,23 @@ private void inspectFolder(Logger log, File workDirBase)
}
+ private static void runDockerPrune(Logger log)
+ {
+ try
+ {
+ new SimpleScriptWrapper(log).execute(Arrays.asList(
+ SequencePipelineService.get().getDockerCommand(),
+ "system",
+ "prune",
+ "-f"
+ ));
+ }
+ catch (PipelineJobException e)
+ {
+ _log.error("Error running docker prune", e);
+ }
+ }
+
public static class TestCase extends Assert
{
@Test