From f7538d08ac48d0322ad158703fbcc25a509079b6 Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 29 Jul 2025 06:07:49 -0700 Subject: [PATCH 1/7] Update HTSJDK and picard versions --- SequenceAnalysis/build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SequenceAnalysis/build.gradle b/SequenceAnalysis/build.gradle index df62b2ff0..eea51be25 100644 --- a/SequenceAnalysis/build.gradle +++ b/SequenceAnalysis/build.gradle @@ -124,7 +124,7 @@ dependencies { BuildUtils.addExternalDependency( project, new ExternalDependency( - "com.github.broadinstitute:picard:3.1.0", + "com.github.broadinstitute:picard:3.4.0", "Picard Tools Lib", "PicardTools", "https://github.com/broadinstitute/picard", From 7a5242453aaa4b86be02ad87ee5563f0f3c5ddb5 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 30 Jul 2025 06:15:42 -0700 Subject: [PATCH 2/7] Make FastqcRunner more tolerant to HTSJDK version bumps --- .../run/util/FastqcRunner.java | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/FastqcRunner.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/FastqcRunner.java index 8252c7876..3a2c67599 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/FastqcRunner.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/FastqcRunner.java @@ -368,7 +368,7 @@ private List getBaseParams() throws FileNotFoundException throw new RuntimeException("Not found: " + jbzip2.getPath()); } - File htsjdkJar = new File(libDir, "htsjdk-4.0.0.jar"); + File htsjdkJar = findJar(libDir, "htsjdk-"); if (!htsjdkJar.exists()) { throw new RuntimeException("Not found: " + htsjdkJar.getPath()); @@ -403,6 +403,27 @@ private List getBaseParams() throws FileNotFoundException return params; } + private File findJar(final File libDir, final String prefix) + { + if (!libDir.exists()) + { + throw new RuntimeException("Missing directory: " + libDir); + } + + List jarNames = Arrays.stream(libDir.list()).filter(fn -> fn.startsWith(prefix)).sorted().toList(); + if (jarNames.isEmpty()) + { + throw new RuntimeException("Unable to find JAR with prefix: " + prefix); + } + + if (jarNames.size() > 1) + { + _logger.info("More than one JAR found with prefix: " + prefix); + } + + return new File(libDir, jarNames.get(jarNames.size() - 1)); + } + private int getThreads() { return _threads; From 58842c3f1e329582c4ae6a3683ea2989824a5b87 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 4 Aug 2025 06:43:42 -0700 Subject: [PATCH 3/7] Test fix and support VCF 4.4 --- .../labkey/api/sequenceanalysis/run/AbstractGatk4Wrapper.java | 1 + 1 file changed, 1 insertion(+) diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/AbstractGatk4Wrapper.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/AbstractGatk4Wrapper.java index 91959921b..29ac8b8ee 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/AbstractGatk4Wrapper.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/AbstractGatk4Wrapper.java @@ -91,6 +91,7 @@ public List getBaseArgs(@Nullable String toolName) args.add(SequencePipelineService.get().getJavaFilepath()); args.addAll(SequencePipelineService.get().getJavaOpts(_maxRamOverride)); args.add("-DGATK_STACKTRACE_ON_USER_EXCEPTION=true"); + args.add("-Dsamjdk.optimistic_vcf_4_4=true"); args.add("-jar"); args.add(getJAR().getPath()); From 20db4a4d1f8eb5e2ae3d6ef78fd08fc49363cca7 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 4 Aug 2025 09:45:57 -0700 Subject: [PATCH 4/7] Ignore half-called GTs --- .../org/labkey/sequenceanalysis/run/variant/PlinkPcaStep.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/PlinkPcaStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/PlinkPcaStep.java index 1750c4c4e..b30cf598c 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/PlinkPcaStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/PlinkPcaStep.java @@ -227,6 +227,10 @@ private void runBatch(File inputVCF, File outputDirectory, VariantProcessingStep args.add(String.valueOf(maxRam)); } + // NOTE: tools like sawfish can report half-called genotypes, like 0/.. For now, be most conservative in PCA: + args.add("--vcf-half-call"); + args.add("missing"); + args.addAll(getClientCommandArgs()); getWrapper().execute(args); From ea3d4f59d0ccf580d7c44d421b5c49ce57c1b880 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 4 Aug 2025 13:58:51 -0700 Subject: [PATCH 5/7] Ignore half-called GTs in KING --- .../sequenceanalysis/run/variant/KingInferenceStep.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java index 9a21691b5..66e61f3a9 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java @@ -115,6 +115,10 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno plinkArgs.add("--max-alleles"); plinkArgs.add("2"); + // NOTE: tools like sawfish can report half-called genotypes, like 0/.. For now, be most conservative in PCA: + plinkArgs.add("--vcf-half-call"); + plinkArgs.add("missing"); + Integer threads = SequencePipelineService.get().getMaxThreads(getPipelineCtx().getLogger()); if (threads != null) { From 34e803d04ca2038e24e47ad16e19394619c15f3d Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 5 Aug 2025 11:00:20 -0700 Subject: [PATCH 6/7] Add ApplyKnownClonotypicData step --- .../run/util/FastqcRunner.java | 5 +-- .../chunks/ApplyKnownClontypicData.R | 20 ++++++++++ .../labkey/singlecell/SingleCellModule.java | 2 + .../singlecell/ApplyKnownClonotypicData.java | 39 +++++++++++++++++++ 4 files changed, 63 insertions(+), 3 deletions(-) create mode 100644 singlecell/resources/chunks/ApplyKnownClontypicData.R create mode 100644 singlecell/src/org/labkey/singlecell/pipeline/singlecell/ApplyKnownClonotypicData.java diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/FastqcRunner.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/FastqcRunner.java index 3a2c67599..58f6be91a 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/FastqcRunner.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/FastqcRunner.java @@ -17,16 +17,16 @@ import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; -import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.jetbrains.annotations.Nullable; import org.junit.Assert; import org.junit.Test; import org.labkey.api.module.Module; import org.labkey.api.module.ModuleLoader; import org.labkey.api.pipeline.PipelineJobService; -import org.labkey.api.resource.FileResource; import org.labkey.api.resource.DirectoryResource; +import org.labkey.api.resource.FileResource; import org.labkey.api.resource.Resource; import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; import org.labkey.api.settings.AppProps; @@ -52,7 +52,6 @@ import java.util.List; import java.util.Map; import java.util.Set; -import java.util.stream.Collectors; import java.util.zip.GZIPInputStream; /** diff --git a/singlecell/resources/chunks/ApplyKnownClontypicData.R b/singlecell/resources/chunks/ApplyKnownClontypicData.R new file mode 100644 index 000000000..d0bef4d07 --- /dev/null +++ b/singlecell/resources/chunks/ApplyKnownClontypicData.R @@ -0,0 +1,20 @@ +netRc <- paste0(Sys.getenv('USER_HOME'), '/.netrc') +if (!file.exists(netRc)) { + print(list.files(Sys.getenv('USER_HOME'))) + stop(paste0('Unable to find file: ', netRc)) +} + +invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = netRc, connect-timeout = 10)) +Rdiscvr::SetLabKeyDefaults(baseUrl = serverBaseUrl, defaultFolder = defaultLabKeyFolder) + +for (datasetId in names(seuratObjects)) { + printName(datasetId) + seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) + + seuratObj <- ApplyKnownClontypicData(seuratObj, groupFields = groupFields, addMetadata = addMetadata) + saveData(seuratObj, datasetId) + + # Cleanup + rm(seuratObj) + gc() +} \ No newline at end of file diff --git a/singlecell/src/org/labkey/singlecell/SingleCellModule.java b/singlecell/src/org/labkey/singlecell/SingleCellModule.java index 5559f897f..e41e4980c 100644 --- a/singlecell/src/org/labkey/singlecell/SingleCellModule.java +++ b/singlecell/src/org/labkey/singlecell/SingleCellModule.java @@ -42,6 +42,7 @@ import org.labkey.singlecell.pipeline.singlecell.AppendNimble; import org.labkey.singlecell.pipeline.singlecell.AppendSaturation; import org.labkey.singlecell.pipeline.singlecell.AppendTcr; +import org.labkey.singlecell.pipeline.singlecell.ApplyKnownClonotypicData; import org.labkey.singlecell.pipeline.singlecell.AvgExpression; import org.labkey.singlecell.pipeline.singlecell.CalculateGeneComponentScores; import org.labkey.singlecell.pipeline.singlecell.CalculateUCellScores; @@ -299,6 +300,7 @@ public static void registerPipelineSteps() SequencePipelineService.get().registerPipelineStep(new PerformDefaultNimbleAppend.Provider()); SequencePipelineService.get().registerPipelineStep(new PerformMhcDimRedux.Provider()); SequencePipelineService.get().registerPipelineStep(new RunTricycle.Provider()); + SequencePipelineService.get().registerPipelineStep(new ApplyKnownClonotypicData.Provider()); SequenceAnalysisService.get().registerReadsetListener(new SingleCellReadsetListener()); } diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/ApplyKnownClonotypicData.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/ApplyKnownClonotypicData.java new file mode 100644 index 000000000..1f6dbf45b --- /dev/null +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/ApplyKnownClonotypicData.java @@ -0,0 +1,39 @@ +package org.labkey.singlecell.pipeline.singlecell; + +import org.labkey.api.sequenceanalysis.pipeline.AbstractPipelineStepProvider; +import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; +import org.labkey.api.singlecell.pipeline.SingleCellStep; + +import java.util.List; + +public class ApplyKnownClonotypicData extends AbstractRDiscvrStep +{ + public ApplyKnownClonotypicData(PipelineContext ctx, ApplyKnownClonotypicData.Provider provider) + { + super(provider, ctx); + } + + public static class Provider extends AbstractPipelineStepProvider + { + public Provider() + { + super("ApplyKnownClonotypicData", "Append Known Clonotype/Antigen Data", "RDiscvr", "This will query the clone_responses table and append a column tagging each cell for matching antigens (based on clonotype)", List.of( + + ), null, null); + } + + + @Override + public ApplyKnownClonotypicData create(PipelineContext ctx) + { + return new ApplyKnownClonotypicData(ctx, this); + } + } + + @Override + public String getFileSuffix() + { + return "ctd"; + } +} + From 8d630fd64118d7033c36cfc44839281919443219 Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 5 Aug 2025 11:05:50 -0700 Subject: [PATCH 7/7] Update FastqcRunner.java --- .../src/org/labkey/sequenceanalysis/run/util/FastqcRunner.java | 1 + 1 file changed, 1 insertion(+) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/FastqcRunner.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/FastqcRunner.java index 2020f874d..58f6be91a 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/FastqcRunner.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/FastqcRunner.java @@ -46,6 +46,7 @@ import java.io.InputStreamReader; import java.io.StringWriter; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashSet; import java.util.LinkedList; import java.util.List;