From 0dd271ec08d9a15d8473374b40f073405b0895c3 Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 27 Feb 2025 13:22:38 -0800 Subject: [PATCH 1/4] Add checks for missing data --- .../labkey/mgap/pipeline/mGapReleaseGenerator.java | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseGenerator.java b/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseGenerator.java index 6c353305..0d6a1c57 100644 --- a/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseGenerator.java +++ b/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseGenerator.java @@ -723,7 +723,11 @@ private Double parseCadd(String cadd) { if (cadd.contains("|")) { - return Arrays.stream(cadd.split("\\|")).map(Double::parseDouble).max(Double::compare).get(); + return Arrays.stream(cadd.split("\\|")).filter(x -> !".".equals(x)).map(Double::parseDouble).max(Double::compare).orElse(null); + } + else if (".".equals(cadd)) + { + return null; } return Double.parseDouble(cadd); @@ -1388,12 +1392,12 @@ private void inspectAndSummarizeVcf(JobContext ctx, File vcfInput, GeneToNameTra try { String as = StringUtils.trimToNull(polyphenScores.get(alleleIdx)); - if (as == null) + if (as == null || ".".equals(as)) { continue; } - Double maxScore = Arrays.stream(as.split("\\|")).filter(x -> !x.isEmpty()).map(Double::parseDouble).max(Double::compare).orElse(-1.0); + double maxScore = Arrays.stream(as.split("\\|")).filter(x -> !x.isEmpty()).filter(x -> !".".equals(x)).map(Double::parseDouble).max(Double::compare).orElse(-1.0); if (maxScore == 0.0) { ctx.getLogger().error("Suspicious values for Polyphen2_HVAR_S: " + maxScore + ", at position: " + vc.toStringWithoutGenotypes()); @@ -1581,7 +1585,7 @@ else if (af != null) cadd = Arrays.stream(String.valueOf(cadd).split("\\|")).map(x -> { try { - double y = Double.parseDouble(x); + Double.parseDouble(x); } catch (Exception e) { From f0f425fa15d64eb3cd1cdee5e5c974905e7d6642 Mon Sep 17 00:00:00 2001 From: bbimber Date: Fri, 28 Feb 2025 14:34:41 -0800 Subject: [PATCH 2/4] Add docker prune to maintenance --- .../pipeline/ClusterMaintenanceTask.java | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/primeseq/src/org/labkey/primeseq/pipeline/ClusterMaintenanceTask.java b/primeseq/src/org/labkey/primeseq/pipeline/ClusterMaintenanceTask.java index b35cc621..cbeddc19 100644 --- a/primeseq/src/org/labkey/primeseq/pipeline/ClusterMaintenanceTask.java +++ b/primeseq/src/org/labkey/primeseq/pipeline/ClusterMaintenanceTask.java @@ -26,6 +26,7 @@ import org.labkey.api.pipeline.RemoteExecutionEngine; import org.labkey.api.query.FieldKey; import org.labkey.api.security.UserManager; +import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper; import org.labkey.api.util.FileUtil; import org.labkey.api.util.Job; @@ -84,7 +85,7 @@ public void run(Logger log) jobGuids.addAll(ts2.getArrayList(String.class)); JobRunner jr = JobRunner.getDefault(); - for (RemoteExecutionEngine engine : PipelineJobService.get().getRemoteExecutionEngines()) + for (RemoteExecutionEngine engine : PipelineJobService.get().getRemoteExecutionEngines()) { log.info("Starting maintenance task for: " + engine.getType()); @@ -158,6 +159,8 @@ public void run(Logger log) //hacky, but this is only planned to be used by us inspectFolder(log, new File("/home/exacloud/gscratch/prime-seq/workDir/")); inspectFolder(log, new File("/home/exacloud/gscratch/prime-seq/cachedData/")); + + runDockerPrune(log); } private void deleteDirectory(File child, Logger log) @@ -230,6 +233,23 @@ private void inspectFolder(Logger log, File workDirBase) } + private static void runDockerPrune(Logger log) + { + try + { + new SimpleScriptWrapper(log).execute(Arrays.asList( + SequencePipelineService.get().getDockerCommand(), + "system", + "prune", + "-f" + )); + } + catch (PipelineJobException e) + { + _log.error("Error running docker prune", e); + } + } + public static class TestCase extends Assert { @Test From c1964d4b0a69a661c34789152e3aff4d0ebba019 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 1 Mar 2025 08:52:32 -0800 Subject: [PATCH 3/4] Prefer bcftools/liftover --- .../labkey/mgap/pipeline/AnnotationStep.java | 5 +- .../mgap/pipeline/LiftoverVcfRunner.java | 80 ------------------- .../mgap/pipeline/mGapReleaseGenerator.java | 11 ++- .../CombineMethylationRatesHandler.java | 2 +- .../analysis/MethylationRateComparison.java | 2 +- 5 files changed, 10 insertions(+), 90 deletions(-) delete mode 100644 mGAP/src/org/labkey/mgap/pipeline/LiftoverVcfRunner.java diff --git a/mGAP/src/org/labkey/mgap/pipeline/AnnotationStep.java b/mGAP/src/org/labkey/mgap/pipeline/AnnotationStep.java index 2854ce3c..95f1eb4f 100644 --- a/mGAP/src/org/labkey/mgap/pipeline/AnnotationStep.java +++ b/mGAP/src/org/labkey/mgap/pipeline/AnnotationStep.java @@ -31,6 +31,7 @@ import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStep; import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStepOutputImpl; import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep; +import org.labkey.api.sequenceanalysis.run.LiftoverBcfToolsWrapper; import org.labkey.api.sequenceanalysis.run.SelectVariantsWrapper; import org.labkey.api.util.PageFlowUtil; import org.labkey.api.writer.PrintWriters; @@ -327,8 +328,8 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno File liftoverRejects = new File(outputDirectory, SequenceAnalysisService.get().getUnzippedBaseName(currentVcf.getName()) + ".liftoverReject" + grch37Genome.getGenomeId() + ".vcf.gz"); if (!indexExists(liftoverRejects) || !indexExists(liftedToGRCh37)) { - LiftoverVcfRunner liftoverVcfRunner = new LiftoverVcfRunner(getPipelineCtx().getLogger()); - liftoverVcfRunner.doLiftover(currentVcf, chainFile, grch37Genome.getWorkingFastaFile(), liftoverRejects, liftedToGRCh37, 0.95); + LiftoverBcfToolsWrapper liftoverVcfRunner = new LiftoverBcfToolsWrapper(getPipelineCtx().getLogger()); + liftoverVcfRunner.doLiftover(currentVcf, chainFile, genome.getWorkingFastaFile(), grch37Genome.getWorkingFastaFile(), liftoverRejects, liftedToGRCh37); } else { diff --git a/mGAP/src/org/labkey/mgap/pipeline/LiftoverVcfRunner.java b/mGAP/src/org/labkey/mgap/pipeline/LiftoverVcfRunner.java deleted file mode 100644 index 17be1002..00000000 --- a/mGAP/src/org/labkey/mgap/pipeline/LiftoverVcfRunner.java +++ /dev/null @@ -1,80 +0,0 @@ -package org.labkey.mgap.pipeline; - -import org.apache.logging.log4j.Logger; -import org.jetbrains.annotations.Nullable; -import org.labkey.api.pipeline.PipelineJobException; -import org.labkey.api.sequenceanalysis.SequenceAnalysisService; -import org.labkey.api.sequenceanalysis.run.PicardWrapper; - -import java.io.File; -import java.io.IOException; -import java.util.List; - -public class LiftoverVcfRunner extends PicardWrapper -{ - public LiftoverVcfRunner(Logger log) - { - super(log); - } - - @Override - protected String getToolName() - { - return "LiftoverVcf"; - } - - public void doLiftover(File inputVcf, File chainFile, File referenceFasta, @Nullable File rejectVcf, File outputVcf, double minPctMatch) throws PipelineJobException - { - getLogger().info("Liftover VCF: " + inputVcf.getPath()); - - List params = getBaseArgs(); - params.add("--INPUT"); - params.add(inputVcf.getPath()); - - params.add("--OUTPUT"); - params.add(outputVcf.getPath()); - - params.add("--CHAIN"); - params.add(chainFile.getPath()); - - params.add("--REFERENCE_SEQUENCE"); - params.add(referenceFasta.getPath()); - - params.add("--WRITE_ORIGINAL_POSITION"); - params.add("true"); - - params.add("--WRITE_ORIGINAL_ALLELES"); - params.add("true"); - - params.add("--LOG_FAILED_INTERVALS"); - params.add("false"); - - params.add("--LIFTOVER_MIN_MATCH"); - params.add(String.valueOf(minPctMatch)); - - if (rejectVcf != null) - { - params.add("--REJECT"); - params.add(rejectVcf.getPath()); - } - - execute(params); - - if (!outputVcf.exists()) - { - throw new PipelineJobException("Output file could not be found: " + outputVcf.getPath()); - } - - if (rejectVcf != null && rejectVcf.exists()) - { - try - { - SequenceAnalysisService.get().ensureVcfIndex(rejectVcf, getLogger()); - } - catch (IOException e) - { - throw new PipelineJobException(e); - } - } - } -} diff --git a/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseGenerator.java b/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseGenerator.java index 0d6a1c57..a6a03642 100644 --- a/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseGenerator.java +++ b/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseGenerator.java @@ -31,7 +31,6 @@ import org.labkey.api.data.TableSelector; import org.labkey.api.exp.api.ExpData; import org.labkey.api.exp.api.ExperimentService; -import org.labkey.api.jbrowse.JBrowseService; import org.labkey.api.module.ModuleLoader; import org.labkey.api.pipeline.PipelineJob; import org.labkey.api.pipeline.PipelineJobException; @@ -56,6 +55,7 @@ import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; import org.labkey.api.sequenceanalysis.run.GeneToNameTranslator; +import org.labkey.api.sequenceanalysis.run.LiftoverBcfToolsWrapper; import org.labkey.api.sequenceanalysis.run.SelectVariantsWrapper; import org.labkey.api.util.FileType; import org.labkey.api.util.FileUtil; @@ -77,7 +77,6 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; -import java.util.Comparator; import java.util.Date; import java.util.HashMap; import java.util.HashSet; @@ -1024,7 +1023,7 @@ public void processFilesRemote(List inputFiles, JobContext c File sitesOnlyVcf = getSitesOnlyVcf(ctx, primaryTrackVcf, genome); - File lifted = liftToHuman(ctx, primaryTrackVcf, sitesOnlyVcf, grch37Genome); + File lifted = liftToHuman(ctx, primaryTrackVcf, sitesOnlyVcf, genome, grch37Genome); SequenceOutputFile output3 = new SequenceOutputFile(); output3.setFile(lifted); output3.setName("mGAP Release: " + species + " " + releaseVersion + " Lifted to Human"); @@ -1095,7 +1094,7 @@ private File getSitesOnlyVcf(JobContext ctx, File primaryTrackVcf, ReferenceGeno return noGenotypes; } - private File liftToHuman(JobContext ctx, File primaryTrackVcf, File noGenotypes, ReferenceGenome grch37Genome) throws PipelineJobException + private File liftToHuman(JobContext ctx, File primaryTrackVcf, File noGenotypes, ReferenceGenome sourceGenome, ReferenceGenome grch37Genome) throws PipelineJobException { //lift to target genome Integer chainFileId = ctx.getSequenceSupport().getCachedObject(AnnotationStep.CHAIN_FILE, Integer.class); @@ -1108,8 +1107,8 @@ private File liftToHuman(JobContext ctx, File primaryTrackVcf, File noGenotypes, File liftoverRejects = new File(ctx.getOutputDir(), SequenceAnalysisService.get().getUnzippedBaseName(primaryTrackVcf.getName()) + ".liftoverRejectGRCh37.vcf.gz"); if (!indexExists(liftoverRejects)) { - LiftoverVcfRunner liftoverVcfRunner = new LiftoverVcfRunner(ctx.getLogger()); - liftoverVcfRunner.doLiftover(noGenotypes, chainFile, grch37Genome.getWorkingFastaFile(), liftoverRejects, liftedToGRCh37, 0.95); + LiftoverBcfToolsWrapper liftoverVcfRunner = new LiftoverBcfToolsWrapper(ctx.getLogger()); + liftoverVcfRunner.doLiftover(noGenotypes, chainFile, sourceGenome.getWorkingFastaFile(), grch37Genome.getWorkingFastaFile(), liftoverRejects, liftedToGRCh37); } else { diff --git a/primeseq/src/org/labkey/primeseq/analysis/CombineMethylationRatesHandler.java b/primeseq/src/org/labkey/primeseq/analysis/CombineMethylationRatesHandler.java index f8c732dd..055e25e9 100644 --- a/primeseq/src/org/labkey/primeseq/analysis/CombineMethylationRatesHandler.java +++ b/primeseq/src/org/labkey/primeseq/analysis/CombineMethylationRatesHandler.java @@ -83,7 +83,7 @@ public SequenceOutputProcessor getProcessor() return new Processor(); } - public class Processor implements SequenceOutputProcessor + public static class Processor implements SequenceOutputProcessor { @Override public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport support, List inputFiles, JSONObject params, File outputDir, List actions, List outputsToCreate) throws UnsupportedOperationException, PipelineJobException diff --git a/primeseq/src/org/labkey/primeseq/analysis/MethylationRateComparison.java b/primeseq/src/org/labkey/primeseq/analysis/MethylationRateComparison.java index 5ad679d2..d7d0bce2 100644 --- a/primeseq/src/org/labkey/primeseq/analysis/MethylationRateComparison.java +++ b/primeseq/src/org/labkey/primeseq/analysis/MethylationRateComparison.java @@ -172,7 +172,7 @@ public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport Map fileMap = new HashMap<>(); for (SequenceOutputFile f : inputFiles) { - action.addInputIfNotPresent(f.getFile(), "Methlylation Rates"); + action.addInputIfNotPresent(f.getFile(), "Methylation Rates"); fileMap.put(f.getRowid().toString(), f); } From 5346c7ccec74e864c2a7108d047a379033c28537 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 1 Mar 2025 16:18:16 -0800 Subject: [PATCH 4/4] Support tool to write 10x barcodes to file --- mGAP/resources/views/releaseNotes.html | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mGAP/resources/views/releaseNotes.html b/mGAP/resources/views/releaseNotes.html index afb2ea2a..03bfa8b1 100644 --- a/mGAP/resources/views/releaseNotes.html +++ b/mGAP/resources/views/releaseNotes.html @@ -1,3 +1,9 @@ +

Release 3.0:

+
    +
  • This release involves a major change in the processing of variants. All prior releases omitted variants in complex or repetitive regions. We originally excluded these because genotypes can be less accurate; however, we made this change because some repetitive regions overlap coding regions and can contain valuable information. This also results in a significant increase in the total number of variants.
  • +
  • This is the first release to include a second species. The dataset now contains both Rhesus macaques and Japanese macaques (which are separated into a separate track).
  • +
+

Release 2.5: