diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/AbstractAlignmentStepProvider.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/AbstractAlignmentStepProvider.java index 9d7b53536..d477e68d1 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/AbstractAlignmentStepProvider.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/AbstractAlignmentStepProvider.java @@ -34,6 +34,7 @@ abstract public class AbstractAlignmentStepProvider getParamList(List getProvider(); default String getAlignmentDescription() { - return "Aligner: " + getProvider().getName(); + ToolParameterDescriptor cramArchivalParam = getProvider().getParameterByName(AbstractAlignmentStepProvider.CRAM_ARCHIVAL_MODE); + boolean doArchival = cramArchivalParam != null && cramArchivalParam.extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false); + String ret = "Aligner: " + getProvider().getName(); + if (doArchival) + { + ret = ret + "\nCRAM Archival Mode"; + } + + return ret; } interface AlignmentOutput extends PipelineStepOutput diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SamtoolsCramConverter.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SamtoolsCramConverter.java index f85bbe41e..174ac5fe2 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SamtoolsCramConverter.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SamtoolsCramConverter.java @@ -18,25 +18,44 @@ public SamtoolsCramConverter(Logger log) super(log); } - public File convert(File inputBam, File outputCram, File gzippedFasta, boolean doIndex, @Nullable Integer threads) throws PipelineJobException + public File convert(File inputBam, File outputCram, File gzippedFasta, boolean doIndex, @Nullable Integer threads, boolean archivalMode) throws PipelineJobException { getLogger().info("Converting SAM/BAM to CRAM: " + inputBam.getPath()); + if (inputBam.equals(outputCram)) + { + throw new PipelineJobException("Input/output files are the same"); + } List params = new ArrayList<>(); params.add(getSamtoolsPath().getPath()); params.add("view"); - params.add("-C"); + params.add("--output-fmt"); + params.add("cram,version=3.0" + (archivalMode ? ",lossy_names=1" : "")); params.add("-o"); params.add(outputCram.getPath()); + // CRAM does, however, have an optional archive settings mode (samtools view ...) + // which is a lossy compression, doing things like removing read names, removing additional accessory fields, and additional compression of quality scores. + // In all cases, the base sequence of the reads is preserved: https://www.htslib.org/doc/samtools.html + if (archivalMode) + { + params.add("--output-fmt-option"); + params.add("archive"); + } + params.add("-T"); params.add(gzippedFasta.getPath()); + if (doIndex) + { + params.add("--write-index"); + } + if (threads != null) { - params.add("--threads"); + params.add("-@"); params.add(String.valueOf(threads)); } @@ -49,11 +68,6 @@ public File convert(File inputBam, File outputCram, File gzippedFasta, boolean d throw new PipelineJobException("Missing output: " + outputCram.getPath()); } - if (doIndex) - { - doIndex(outputCram, threads); - } - return outputCram; } diff --git a/SequenceAnalysis/pipeline_code/extra_tools_install.sh b/SequenceAnalysis/pipeline_code/extra_tools_install.sh index 823aa7dcd..e3cce3c55 100755 --- a/SequenceAnalysis/pipeline_code/extra_tools_install.sh +++ b/SequenceAnalysis/pipeline_code/extra_tools_install.sh @@ -305,3 +305,17 @@ then else echo "Already installed" fi + +if [[ ! -e ${LKTOOLS_DIR}/bbmap || ! -z $FORCE_REINSTALL ]]; +then + echo "Cleaning up previous installs" + rm -Rf $LKTOOLS_DIR/bbmap + + wget https://sourceforge.net/projects/bbmap/files/BBMap_39.25.tar.gz + tar -xf BBMap_39.25.tar.gz + + mv bbmap $LKTOOLS_DIR/ + ln -s $LKTOOLS_DIR/bbmap/bbmap.sh $LKTOOLS_DIR/bbmap.sh +else + echo "Already installed" +fi diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisController.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisController.java index 3ca197e72..c7f842e4b 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisController.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisController.java @@ -5183,4 +5183,62 @@ public void setDoNotRequireSra(boolean doNotRequireSra) _doNotRequireSra = doNotRequireSra; } } + + @RequiresSiteAdmin + public static class CreateExpDataForFileAction extends ConfirmAction + { + @Override + public void validateCommand(CreateExpDataForFileForm form, Errors errors) + { + + } + + @Override + public URLHelper getSuccessURL(CreateExpDataForFileForm form) + { + return getContainer().getStartURL(getUser()); + } + + @Override + public ModelAndView getConfirmView(CreateExpDataForFileForm form, BindException errors) throws Exception + { + return new HtmlView(HtmlString.unsafe("This will create a new ExpData with a DataFileUrl pointing to the provided URI. This should be a full URI, such as file:///my/path/myFile.txt." + + "

" + + "
")); + } + + @Override + public boolean handlePost(CreateExpDataForFileForm form, BindException errors) throws Exception + { + URI newUri = URI.create(form.getDataFileUrl()); + File f = new File(newUri); + if (!f.exists()) + { + throw new PipelineJobException("Missing file: " + form.getDataFileUrl()); + } + + DataType dataType = new DataType("File"); + + ExpData d = ExperimentService.get().createData(getContainer(), dataType, f.getName()); + d.setDataFileURI(newUri); + d.save(getUser()); + + return true; + } + } + + public static class CreateExpDataForFileForm + { + private String _dataFileUrl; + + public String getDataFileUrl() + { + return _dataFileUrl; + } + + public void setDataFileUrl(String dataFileUrl) + { + _dataFileUrl = dataFileUrl; + } + } } \ No newline at end of file diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java index df804a5aa..94f2bde2e 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java @@ -91,6 +91,7 @@ import org.labkey.sequenceanalysis.query.SequenceAnalysisUserSchema; import org.labkey.sequenceanalysis.query.SequenceTriggerHelper; import org.labkey.sequenceanalysis.run.RestoreSraDataHandler; +import org.labkey.sequenceanalysis.run.alignment.BBMapWrapper; import org.labkey.sequenceanalysis.run.alignment.BWAMem2Wrapper; import org.labkey.sequenceanalysis.run.alignment.BWAMemWrapper; import org.labkey.sequenceanalysis.run.alignment.BWASWWrapper; @@ -157,8 +158,6 @@ import org.labkey.sequenceanalysis.run.reference.VirusReferenceLibraryStep; import org.labkey.sequenceanalysis.run.util.CombineGVCFsHandler; import org.labkey.sequenceanalysis.run.util.FastqcRunner; -import org.labkey.sequenceanalysis.run.util.GenomicsDBAppendHandler; -import org.labkey.sequenceanalysis.run.util.GenomicsDBImportHandler; import org.labkey.sequenceanalysis.run.util.SVAnnotateStep; import org.labkey.sequenceanalysis.run.variant.DepthOfCoverageHandler; import org.labkey.sequenceanalysis.run.variant.GenotypeConcordanceStep; @@ -309,6 +308,7 @@ public static void registerPipelineSteps() SequencePipelineService.get().registerPipelineStep(new StarWrapper.Provider()); SequencePipelineService.get().registerPipelineStep(new Pbmm2Wrapper.Provider()); SequencePipelineService.get().registerPipelineStep(new VulcanWrapper.Provider()); + SequencePipelineService.get().registerPipelineStep(new BBMapWrapper.Provider()); //de novo assembly SequencePipelineService.get().registerPipelineStep(new TrinityRunner.Provider()); @@ -395,8 +395,6 @@ public static void registerPipelineSteps() SequenceAnalysisService.get().registerFileHandler(new RecalculateSequenceMetricsHandler()); SequenceAnalysisService.get().registerFileHandler(new ListVcfSamplesHandler()); SequenceAnalysisService.get().registerFileHandler(new MultiQCBamHandler()); - SequenceAnalysisService.get().registerFileHandler(new GenomicsDBImportHandler()); - SequenceAnalysisService.get().registerFileHandler(new GenomicsDBAppendHandler()); SequenceAnalysisService.get().registerFileHandler(new MergeLoFreqVcfHandler()); SequenceAnalysisService.get().registerFileHandler(new PangolinHandler()); SequenceAnalysisService.get().registerFileHandler(new NextCladeHandler()); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/api/picard/CigarPositionIterable.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/api/picard/CigarPositionIterable.java index 020974609..a797a327a 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/api/picard/CigarPositionIterable.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/api/picard/CigarPositionIterable.java @@ -81,7 +81,7 @@ private void initializeCigar() int i = 0; for (char el : _explodedCigar) { - CigarOperator op = CigarOperator.valueOf(Character.toString(el)); + CigarOperator op = CigarOperator.characterToEnum(el); if (op.consumesReadBases()) { _readPositions[i] = readPos; @@ -155,7 +155,7 @@ public PositionInfo(SAMRecord record, int pos, char[] ops, Integer[] readPos, In { _record = record; _pos = pos; - _op = CigarOperator.valueOf(Character.toString(ops[pos])); + _op = CigarOperator.characterToEnum(ops[pos]); _readPos = readPos[pos]; _refPos = refPos[pos]; diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ConvertToCramHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ConvertToCramHandler.java index 6fdb4e338..a7906eaa6 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ConvertToCramHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ConvertToCramHandler.java @@ -1,5 +1,6 @@ package org.labkey.sequenceanalysis.pipeline; +import org.apache.commons.io.FileUtils; import org.json.JSONObject; import org.labkey.api.collections.CaseInsensitiveHashMap; import org.labkey.api.data.Container; @@ -27,6 +28,7 @@ import org.labkey.sequenceanalysis.util.SequenceUtil; import java.io.File; +import java.io.IOException; import java.sql.SQLException; import java.util.ArrayList; import java.util.Arrays; @@ -39,20 +41,23 @@ public class ConvertToCramHandler extends AbstractParameterizedOutputHandler inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException { boolean replaceOriginal = ctx.getParams().optBoolean("replaceOriginal", false); + boolean doCramArchivalMode = ctx.getParams().optBoolean("doCramArchivalMode", false); ctx.getLogger().info("Replace input BAM: " + replaceOriginal); Integer threads = SequencePipelineService.get().getMaxThreads(ctx.getLogger()); for (SequenceOutputFile so : inputFiles) { ReferenceGenome genome = ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id()); - File cram = new File(so.getFile().getParentFile(), FileUtil.getBaseName(so.getFile()) + ".cram"); - File cramIdx = SamtoolsCramConverter.getExpectedCramIndex(cram); + File outputFile = new File(ctx.getWorkingDirectory(), FileUtil.getBaseName(so.getFile()) + ".cram"); if (!so.getFile().exists()) { - if (replaceOriginal && cramIdx.exists()) + File inputAsCram = new File(so.getFile().getParentFile(), FileUtil.getBaseName(so.getFile()) + ".cram"); + File inputAsCramIdx = SamtoolsCramConverter.getExpectedCramIndex(inputAsCram); + if (replaceOriginal && SequenceUtil.FILETYPE.bam.getFileType().isType(so.getFile()) && inputAsCram.exists() && inputAsCramIdx.exists()) { ctx.getLogger().debug("BAM does not exist, but CRAM index does. Proceeding on the assumption this is a resume of a failed job."); } else { - throw new PipelineJobException("Unable to find BAM: " + so.getFile().getPath()); + throw new PipelineJobException("Unable to find input CRAM/BAM: " + so.getFile().getPath()); } } else { - new SamtoolsCramConverter(ctx.getLogger()).convert(so.getFile(), cram, genome.getWorkingFastaFileGzipped(), true, threads); + new SamtoolsCramConverter(ctx.getLogger()).convert(so.getFile(), outputFile, genome.getWorkingFastaFileGzipped(), true, threads, doCramArchivalMode); } - checkCramAndIndex(so); + if (!outputFile.exists()) + { + throw new PipelineJobException("Missing CRAM: " + outputFile.getPath()); + } if (replaceOriginal) { - ctx.getLogger().info("Deleting original BAM: " + so.getFile().getPath()); - if (so.getFile().exists()) + ctx.getLogger().info("Deleting original BAM/CRAM: {}", so.getFile().getPath()); + if (SequenceUtil.FILETYPE.bam.getFileType().isType(so.getFile())) { - SequenceAnalysisService.get().getExpectedBamOrCramIndex(so.getFile()).delete(); - so.getFile().delete(); + if (so.getFile().exists()) + { + SequenceAnalysisService.get().getExpectedBamOrCramIndex(so.getFile()).delete(); + so.getFile().delete(); + } + else + { + ctx.getLogger().debug("Input BAM not found, possibly deleted in earlier job iteration?"); + } + + ctx.getLogger().debug("Moving CRAM to replace original BAM file: " + so.getFile().getPath()); + try + { + File targetCram = new File(so.getFile().getParentFile(), outputFile.getName()); + if (targetCram.exists()) + { + ctx.getLogger().debug("Deleting file: " + targetCram.getPath()); + targetCram.delete(); + } + + File targetCramIdx = new File(so.getFile().getParentFile(), outputFile.getName() + ".crai"); + if (targetCramIdx.exists()) + { + ctx.getLogger().debug("Deleting file: " + targetCramIdx.getPath()); + targetCramIdx.delete(); + } + + FileUtils.moveFile(outputFile, targetCram); + FileUtils.moveFile(new File(outputFile.getPath() + ".crai"), targetCramIdx); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + } + else if (SequenceUtil.FILETYPE.cram.getFileType().isType(so.getFile())) + { + try + { + if (!so.getFile().exists()) + { + throw new PipelineJobException("Unable to find input CRAM/BAM: " + so.getFile().getPath()); + } + + SequenceAnalysisService.get().getExpectedBamOrCramIndex(so.getFile()).delete(); + so.getFile().delete(); + + ctx.getLogger().debug("Replacing original file: " + so.getFile().getPath()); + FileUtils.moveFile(outputFile, so.getFile()); + FileUtils.moveFile(new File(outputFile.getPath() + ".crai"), new File(so.getFile() + ".crai")); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } } else { - ctx.getLogger().debug("Input BAM not found, possibly deleted in earlier job iteration?"); + throw new PipelineJobException("Unknown file type: " + so.getFile().getPath()); } } - } - } - - private void checkCramAndIndex(SequenceOutputFile so) throws PipelineJobException - { - File cram = new File(so.getFile().getParentFile(), FileUtil.getBaseName(so.getFile()) + ".cram"); - if (!cram.exists()) - { - throw new PipelineJobException("Unable to find file: " + cram.getPath()); - } - - File cramIdx = new File(cram.getPath() + ".crai"); - if (!cramIdx.exists()) - { - throw new PipelineJobException("Unable to find file: " + cramIdx.getPath()); + else + { + String description = (so.getDescription() == null ? "" : so.getDescription() + "\n") + "CRAM Archival Mode"; + ctx.getFileManager().addSequenceOutput(outputFile, so.getName(), so.getCategory(), so.getReadset(), null, so.getLibrary_id(), description); + } } } @@ -170,37 +223,57 @@ public void complete(JobContext ctx, List inputs, List row = new CaseInsensitiveHashMap<>(); row.put("rowid", so.getRowid()); row.put("container", so.getContainer()); - row.put("name", so.getName().replaceAll("\\.bam", "\\.cram")); - row.put("description", (so.getDescription() == null ? "" : so.getDescription() + "\n") + "Converted from BAM to CRAM"); - toUpdate.add(row); + boolean doUpdate = false; + String description = so.getDescription(); + if (so.getName().contains(".bam")) + { + row.put("name", so.getName().replaceAll("\\.bam", "\\.cram")); + description = (description == null ? "" : description + "\n") + "Converted from BAM to CRAM"; + row.put("description", description); + doUpdate = true; + } + + if (doCramArchivalMode) + { + description = (description == null ? "" : description + "\n") + "CRAM Archival Mode"; + row.put("description", description); + doUpdate = true; + } + + if (doUpdate) + { + toUpdate.add(row); + } } } - try - { - Container target = ctx.getJob().getContainer().isWorkbook() ? ctx.getJob().getContainer().getParent() : ctx.getJob().getContainer(); - QueryService.get().getUserSchema(ctx.getJob().getUser(), target, SequenceAnalysisSchema.SCHEMA_NAME).getTable(SequenceAnalysisSchema.TABLE_OUTPUTFILES).getUpdateService().updateRows(ctx.getJob().getUser(), target, toUpdate, oldKeys, null, null); - } - catch (QueryUpdateServiceException | InvalidKeyException | BatchValidationException | SQLException e) + if (!toUpdate.isEmpty()) { - throw new PipelineJobException(e); - + try + { + Container target = ctx.getJob().getContainer().isWorkbook() ? ctx.getJob().getContainer().getParent() : ctx.getJob().getContainer(); + QueryService.get().getUserSchema(ctx.getJob().getUser(), target, SequenceAnalysisSchema.SCHEMA_NAME).getTable(SequenceAnalysisSchema.TABLE_OUTPUTFILES).getUpdateService().updateRows(ctx.getJob().getUser(), target, toUpdate, oldKeys, null, null); + } + catch (QueryUpdateServiceException | InvalidKeyException | BatchValidationException | SQLException e) + { + throw new PipelineJobException(e); + } } } } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/OrphanFilePipelineJob.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/OrphanFilePipelineJob.java index 69f1bef17..61c87a9e3 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/OrphanFilePipelineJob.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/OrphanFilePipelineJob.java @@ -48,6 +48,7 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; @@ -55,7 +56,11 @@ import java.util.LinkedHashSet; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; +import java.util.stream.Collectors; + +import static org.labkey.sequenceanalysis.pipeline.SequenceTaskHelper.SHARED_SUBFOLDER_NAME; public class OrphanFilePipelineJob extends PipelineJob { @@ -373,6 +378,16 @@ public void getOrphanFilesForContainer(Container c, User u, Set orphanFile unexpectedPipelineDirs.add(subdir); } + File sharedDir = new File(subdir, SHARED_SUBFOLDER_NAME); + if (sharedDir.exists()) + { + long size = FileUtils.sizeOfDirectory(sharedDir); + if (size > 1e6) + { + getJob().getLogger().warn("Large Shared folder: " + sharedDir.getPath()); + } + } + getOrphanFilesForDirectory(knownExpDatas, dataMap, subdir, orphanFiles, orphanIndexes); } } @@ -410,8 +425,7 @@ public void getOrphanFilesForContainer(Container c, User u, Set orphanFile @Override public boolean accept(File pathname) { - //50mb - return (pathname.length() >= 5e7); + return (pathname.length() >= 5e3); } }); @@ -439,7 +453,16 @@ public boolean accept(File pathname) } } - for (Container child : ContainerManager.getChildren(c)) + List children = ContainerManager.getChildren(c); + + // Check for unexpected subfolders: + Set allowableSubfolders = children.stream().map(Container::getName).collect(Collectors.toSet()); + Set unknownFolders = Arrays.stream(Objects.requireNonNull(root.getRootPath().getParentFile().listFiles())).filter(fn -> !fn.getName().startsWith("@") & !allowableSubfolders.contains(fn.getName())).collect(Collectors.toSet()); + if (!unknownFolders.isEmpty()) { + unknownFolders.forEach(x -> getJob().getLogger().warn("Folder does not match expected child: " + x.getPath())); + } + + for (Container child : children) { if (child.isWorkbook()) { diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java index 19ea891a7..5cecca0da 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java @@ -209,10 +209,6 @@ public WorkDirectory createWorkDirectory(String jobGUID, FileAnalysisJobSupport @Override public boolean isJobComplete(PipelineJob job) { - FileAnalysisJobSupport support = (FileAnalysisJobSupport) job; - String baseName = support.getBaseName(); - File dirAnalysis = support.getAnalysisDirectory(); - return false; } } @@ -469,6 +465,8 @@ else if (doCopy) actions.add(action); referenceGenome.setWorkingFasta(new File(targetDir, refFasta.getName())); + + getTaskFileManagerImpl().addIntermediateFile(targetDir); } catch (IOException e) { @@ -1145,7 +1143,9 @@ else if (step.expectToCreateNewBam()) // optional convert to CRAM: ToolParameterDescriptor cramParam = alignmentStep.getProvider().getParameterByName(AbstractAlignmentStepProvider.CONVERT_TO_CRAM); + ToolParameterDescriptor cramArchivalParam = alignmentStep.getProvider().getParameterByName(AbstractAlignmentStepProvider.CRAM_ARCHIVAL_MODE); boolean doCramConvert = cramParam != null && cramParam.extractValue(getJob(), alignmentStep.getProvider(), alignmentStep.getStepIdx(), Boolean.class, false); + boolean doArchival = cramArchivalParam != null && cramArchivalParam.extractValue(getJob(), alignmentStep.getProvider(), alignmentStep.getStepIdx(), Boolean.class, false); if (doCramConvert) { getJob().getLogger().info("BAM will be converted to CRAM"); @@ -1154,7 +1154,7 @@ else if (step.expectToCreateNewBam()) Integer threads = SequenceTaskHelper.getMaxThreads(getJob()); if (!cramFileIdx.exists()) { - new SamtoolsCramConverter(getJob().getLogger()).convert(renamedBam, cramFile, referenceGenome.getWorkingFastaFileGzipped(), true, threads); + new SamtoolsCramConverter(getJob().getLogger()).convert(renamedBam, cramFile, referenceGenome.getWorkingFastaFileGzipped(), true, threads, doArchival); } else { diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java new file mode 100644 index 000000000..3ad0b00ef --- /dev/null +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java @@ -0,0 +1,321 @@ +package org.labkey.sequenceanalysis.run.alignment; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.logging.log4j.Logger; +import org.jetbrains.annotations.Nullable; +import org.json.JSONObject; +import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.sequenceanalysis.model.Readset; +import org.labkey.api.sequenceanalysis.pipeline.AbstractAlignmentStepProvider; +import org.labkey.api.sequenceanalysis.pipeline.AlignerIndexUtil; +import org.labkey.api.sequenceanalysis.pipeline.AlignmentOutputImpl; +import org.labkey.api.sequenceanalysis.pipeline.AlignmentStep; +import org.labkey.api.sequenceanalysis.pipeline.AlignmentStepProvider; +import org.labkey.api.sequenceanalysis.pipeline.IndexOutputImpl; +import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; +import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; +import org.labkey.api.sequenceanalysis.pipeline.SamtoolsRunner; +import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; +import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; +import org.labkey.api.sequenceanalysis.run.AbstractAlignmentPipelineStep; +import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * User: bimber + * Date: 12/14/12 + * Time: 7:40 AM + */ +public class BBMapWrapper extends AbstractCommandWrapper +{ + public BBMapWrapper(@Nullable Logger logger) + { + super(logger); + } + + public static class BBMapAlignmentStep extends AbstractAlignmentPipelineStep implements AlignmentStep + { + public BBMapAlignmentStep(AlignmentStepProvider provider, PipelineContext ctx) + { + super(provider, ctx, new BBMapWrapper(ctx.getLogger())); + } + + @Override + public boolean supportsGzipFastqs() + { + return true; + } + + @Override + public IndexOutput createIndex(ReferenceGenome referenceGenome, File outputDir) throws PipelineJobException + { + IndexOutputImpl output = new IndexOutputImpl(referenceGenome); + + File indexDir = new File(outputDir, getProvider().getName()); + boolean hasCachedIndex = AlignerIndexUtil.hasCachedIndex(this.getPipelineCtx(), getIndexCachedDirName(getPipelineCtx().getJob()), referenceGenome); + if (!hasCachedIndex) + { + getWrapper().buildIndex(referenceGenome.getWorkingFastaFile(), indexDir); + } + + AlignerIndexUtil.saveCachedIndex(hasCachedIndex, getPipelineCtx(), indexDir, getProvider().getName(), referenceGenome); + + return output; + } + + @Override + public AlignmentOutput performAlignment(Readset rs, List inputFastqs1, @Nullable List inputFastqs2, File outputDirectory, ReferenceGenome referenceGenome, String basename, String readGroupId, @Nullable String platformUnit) throws PipelineJobException + { + File inputFastq1 = assertSingleFile(inputFastqs1); + File inputFastq2 = assertSingleFile(inputFastqs2); + + AlignmentOutputImpl output = new AlignmentOutputImpl(); + AlignerIndexUtil.copyIndexIfExists(this.getPipelineCtx(), output, getProvider().getName(), getProvider().getName(), referenceGenome, true); + File localIdx = new File(getPipelineCtx().getWorkingDirectory(), "Shared/" + getProvider().getName()); + if (!localIdx.exists()) + { + throw new PipelineJobException("Index not copied: " + localIdx); + } + output.addIntermediateFile(new File(getPipelineCtx().getWorkingDirectory(), "Shared")); + + // NOTE: bbmap only supports the location ./ref for the index: + localIdx = new File(localIdx, "ref"); + if (!localIdx.exists()) + { + throw new PipelineJobException("ref dir not found: " + localIdx); + } + + File refDir = new File(outputDirectory, "ref"); + try + { + if (refDir.exists()) + { + getPipelineCtx().getLogger().debug("Deleting existing ref dir: " + refDir); + FileUtils.deleteDirectory(refDir); + } + + FileUtils.moveDirectory(localIdx, refDir); + output.addIntermediateFile(refDir); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + + getWrapper().setOutputDir(outputDirectory); + + List params = new ArrayList<>(); + + String ambig = StringUtils.trimToNull(getProvider().getParameterByName("ambiguous").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class)); + if (ambig != null) + { + params.add("ambig=" + ambig); + if ("all".equals(ambig)) + { + params.add("xmtag=t"); + } + } + + for (String paramName : Arrays.asList("local", "semiperfectmode")) + { + if (getProvider().getParameterByName(paramName).hasValueInJson(getPipelineCtx().getJob(), getProvider(), getStepIdx())) + { + boolean val = getProvider().getParameterByName(paramName).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false); + params.add(paramName + "=" + (val ? "t" : "f")); + } + } + + if (getProvider().getParameterByName("minid").hasValueInJson(getPipelineCtx().getJob(), getProvider(), getStepIdx())) + { + Double val = getProvider().getParameterByName("minid").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Double.class); + params.add("minid=" + val); + } + + boolean retainUnmapped = getProvider().getParameterByName("retainUnmapped").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false); + File bam = getWrapper().doAlignment(inputFastq1, inputFastq2, outputDirectory, basename, params, retainUnmapped); + if (!bam.exists()) + { + throw new PipelineJobException("BAM not created, expected: " + bam.getPath()); + } + + output.setBAM(bam); + output.addCommandsExecuted(getWrapper().getCommandsExecuted()); + + return output; + } + + @Override + public boolean doAddReadGroups() + { + return true; + } + + @Override + public boolean doSortIndexBam() + { + return true; + } + + @Override + public boolean alwaysCopyIndexToWorkingDir() + { + return false; + } + } + + public static class Provider extends AbstractAlignmentStepProvider + { + public Provider() + { + super("BBMap", "BBMap is suitable for longer reads and has the option to retain multiple hits per read. The only downside is that it can be slower. When this pipeline was first written, this aligner was preferred for sequence-based genotyping and similar applications which require retaining multiple hits.", Arrays.asList( + ToolParameterDescriptor.create("ambiguous", "Ambiguous Handing", "Set behavior on ambiguously-mapped reads (with multiple top-scoring mapping locations)", "ldk-simplecombo", new JSONObject() + {{ + put("storeValues", "all;best;toss;random"); + put("delimiter", ";"); + put("multiSelect", false); + }}, "all"), + ToolParameterDescriptor.create("local", "Local Alignment", "Set to true to use local, rather than global, alignments. This will soft-clip ugly ends of poor alignments", "checkbox", new JSONObject() + {{ + put("checked", true); + }}, true), + ToolParameterDescriptor.create("semiperfectmode", "Semi-perfectmode", "Allow only perfect and semiperfect (perfect except for N's in the reference) mappings", "checkbox", new JSONObject() + {{ + put("checked", false); + }}, false), + ToolParameterDescriptor.create("minid", "Minimum Identity", "Approximate minimum alignment identity to look for. Higher is faster and less sensitive", "ldk-numberfield", new JSONObject() + {{ + put("minValue", 0); + put("maxValue", 1); + put("decimalPrecision", 2); + }}, 0.95), + ToolParameterDescriptor.create("retainUnmapped", "Retain Unmapped", "If checked, unmapped reads are written to a separate BAM file", "checkbox", new JSONObject() + {{ + put("checked", false); + }}, false) + ), null, "https://prost.readthedocs.io/en/latest/bbmap.html", true, false); + } + + @Override + public BBMapAlignmentStep create(PipelineContext context) + { + return new BBMapAlignmentStep(this, context); + } + } + + protected File getExe() + { + return SequencePipelineService.get().getExeForPackage("BBMAPPATH", "bbmap.sh"); + } + + public File doAlignment(File inputFastq1, @Nullable File inputFastq2, File outputDirectory, String basename, List options, boolean retainUnmapped) throws PipelineJobException + { + List args = new ArrayList<>(); + args.add(getExe().getPath()); + args.add("-in=" + inputFastq1.getPath()); + if (inputFastq2 != null) + { + args.add("-in2=" + inputFastq2.getPath()); + } + + args.add("-eoom"); + + args.add("mdtag=t"); + args.add("nhtag=t"); + args.add("amtag=t"); + args.add("nmtag=t"); + args.add("printunmappedcount=t"); + args.add("overwrite=t"); + + // Maximum number of total alignments to print per read. Only relevant when secondary=t. + args.add("maxsites=50"); + + // Only print secondary alignments for ambiguously-mapped reads. + args.add("secondary=t"); + args.add("ssao=t"); + + // NOTE: this will increase BAM size. Consider whether really needed: + args.add("saa=f"); + + File outputSam = new File(outputDirectory, basename + ".bbmap.sam"); + if (outputSam.exists()) + { + outputSam.delete(); + } + + args.add((retainUnmapped ? "out=" : "outm=") + outputSam.getPath()); + + Integer maxRam = SequencePipelineService.get().getMaxRam(); + if (maxRam != null) + { + args.add("-Xmx" + maxRam + "g"); + } + + Integer maxThreads = SequencePipelineService.get().getMaxThreads(getLogger()); + args.add(maxThreads == null ? "threads=1" : "threads=" + maxThreads); + + args.addAll(options); + + setWorkingDir(outputDirectory); + execute(args); + + if (!outputSam.exists()) + { + throw new PipelineJobException("File not found: " + outputSam.getPath()); + } + + File outputBam = new File(outputDirectory, basename + ".bbmap.bam"); + if (outputBam.exists()) + { + outputBam.delete(); + } + + SamtoolsRunner samtoolsRunner = new SamtoolsRunner(getLogger()); + List stArgs = new ArrayList<>(); + stArgs.add(samtoolsRunner.getSamtoolsPath().getPath()); + stArgs.add("view"); + stArgs.add("-o"); + stArgs.add(outputBam.getPath()); + stArgs.add(outputSam.getPath()); + samtoolsRunner.execute(stArgs); + + if (!outputBam.exists()) + { + throw new PipelineJobException("File not found: " + outputBam.getPath()); + } + + outputSam.delete(); + + return outputBam; + } + + public File buildIndex(File inputFasta, File outDir) throws PipelineJobException + { + if (!outDir.exists()) + { + outDir.mkdirs(); + } + + List args = new ArrayList<>(); + args.add(getExe().getPath()); + args.add("k=7"); + args.add("path=" + outDir.getPath()); + args.add("ref=" + inputFasta.getPath()); + + setWorkingDir(outDir); + execute(args); + + File output = new File(outDir, "ref"); + if (!output.exists()) + { + throw new PipelineJobException("Unable to find file: " + output); + } + + return output; + } +} diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/MosaikWrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/MosaikWrapper.java index 378707865..5565d03ac 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/MosaikWrapper.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/MosaikWrapper.java @@ -58,7 +58,7 @@ public MosaikWrapper(@Nullable Logger logger) public static class MosaikAlignmentStep extends AbstractAlignmentPipelineStep implements AlignmentStep { - public MosaikAlignmentStep(AlignmentStepProvider provider, PipelineContext ctx) + public MosaikAlignmentStep(AlignmentStepProvider provider, PipelineContext ctx) { super(provider, ctx, new MosaikWrapper(ctx.getLogger())); } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/BamIterator.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/BamIterator.java index 6e138c513..482626fbc 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/BamIterator.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/BamIterator.java @@ -149,7 +149,15 @@ public void iterateReads(String refName, int start, int stop) throws IOException if (r.getAlignmentEnd() < start || r.getAlignmentStart() > stop) continue; - processAlignment(r, indexedRef); + try + { + processAlignment(r, indexedRef); + } + catch (Exception e) + { + _logger.error("Unable to parse alignment: " + r.toString() + " / " + r.getCigarString()); + throw e; + } if (i % 10000 == 0) { diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/AbstractGenomicsDBImportHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/AbstractGenomicsDBImportHandler.java index 4b281369c..59f9b945a 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/AbstractGenomicsDBImportHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/AbstractGenomicsDBImportHandler.java @@ -644,7 +644,7 @@ else if (genomeIds.isEmpty()) wrapper.execute(genome, vcfsToProcess, workingDestinationWorkspaceFolder, intervals, options, _append); - if (ctx.getParams().optBoolean("consolidate", true)) + if (ctx.getParams().optBoolean("consolidate", false)) { ctx.getLogger().info("Will consolidate the workspace using consolidate_genomicsdb_array"); doConsolidate(ctx, workingDestinationWorkspaceFolder, genome); diff --git a/Studies/api-src/org/labkey/api/studies/StudiesService.java b/Studies/api-src/org/labkey/api/studies/StudiesService.java index 6ddb36fc0..249137680 100644 --- a/Studies/api-src/org/labkey/api/studies/StudiesService.java +++ b/Studies/api-src/org/labkey/api/studies/StudiesService.java @@ -2,6 +2,7 @@ import org.labkey.api.data.Container; import org.labkey.api.module.Module; +import org.labkey.api.resource.Resource; import org.labkey.api.security.User; import org.labkey.api.util.Path; @@ -25,4 +26,6 @@ static public void setInstance(StudiesService instance) } abstract public void importFolderDefinition(Container container, User user, Module m, Path sourceFolderDirPath) throws IOException; + + abstract public void loadTsv(Resource tsv, String schemaName, User u, Container c); } diff --git a/Studies/api-src/org/labkey/api/studies/security/StudiesDataAdminPermission.java b/Studies/api-src/org/labkey/api/studies/security/StudiesDataAdminPermission.java new file mode 100644 index 000000000..719944c7a --- /dev/null +++ b/Studies/api-src/org/labkey/api/studies/security/StudiesDataAdminPermission.java @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2016-2019 LabKey Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.labkey.api.studies.security; + +import org.labkey.api.security.permissions.AbstractPermission; + +/** + * User: bimber + * Date: 1/17/13 + * Time: 7:49 PM + */ +public class StudiesDataAdminPermission extends AbstractPermission +{ + public StudiesDataAdminPermission() + { + super("StudiesDataAdminPermission", "This is the base permission used control which users can manage administrative data in the EHR, such as assignments"); + } +} diff --git a/Studies/api-src/org/labkey/api/studies/security/StudiesDataAdminRole.java b/Studies/api-src/org/labkey/api/studies/security/StudiesDataAdminRole.java new file mode 100644 index 000000000..3b3b0ea13 --- /dev/null +++ b/Studies/api-src/org/labkey/api/studies/security/StudiesDataAdminRole.java @@ -0,0 +1,22 @@ +package org.labkey.api.studies.security; + +import org.jetbrains.annotations.NotNull; +import org.labkey.api.security.permissions.DeletePermission; +import org.labkey.api.security.permissions.InsertPermission; +import org.labkey.api.security.permissions.ReadPermission; +import org.labkey.api.security.permissions.UpdatePermission; +import org.labkey.api.security.roles.AbstractRole; + +public class StudiesDataAdminRole extends AbstractRole +{ + public StudiesDataAdminRole() + { + super("StudiesDataAdmin", "These users can administer data from the studies module", ReadPermission.class, InsertPermission.class, UpdatePermission.class, DeletePermission.class, StudiesDataAdminPermission.class); + } + + @Override + public @NotNull String getDisplayName() + { + return "Studies Data Admin"; + } +} diff --git a/Studies/resources/module.xml b/Studies/resources/module.xml new file mode 100644 index 000000000..5a8029b41 --- /dev/null +++ b/Studies/resources/module.xml @@ -0,0 +1,5 @@ + + + + + diff --git a/Studies/resources/schemas/dbscripts/postgresql/studies-23.000-23.001.sql b/Studies/resources/schemas/dbscripts/postgresql/studies-23.000-23.001.sql new file mode 100644 index 000000000..14f33187b --- /dev/null +++ b/Studies/resources/schemas/dbscripts/postgresql/studies-23.000-23.001.sql @@ -0,0 +1,34 @@ +CREATE TABLE studies.lookup_sets ( + rowid serial, + setname varchar(100), + label varchar(500), + description varchar(4000), + keyField varchar(4000), + titleColumn varchar(4000), + container entityid, + created timestamp, + createdby int, + modified timestamp, + modifiedby int, + + CONSTRAINT PK_lookup_sets PRIMARY KEY (rowid) +); + +CREATE TABLE studies.lookups ( + rowid serial, + setname varchar(100), + value varchar(4000), + title varchar(4000), + category varchar(4000), + description varchar(4000), + sort_order int, + date_disabled timestamp, + objectid varchar(4000), + container entityid, + created timestamp, + createdby int, + modified timestamp, + modifiedby int, + + CONSTRAINT PK_lookups PRIMARY KEY (rowid) +); \ No newline at end of file diff --git a/Studies/resources/schemas/dbscripts/sqlserver/studies-23.000-23.001.sql b/Studies/resources/schemas/dbscripts/sqlserver/studies-23.000-23.001.sql new file mode 100644 index 000000000..fe59ea113 --- /dev/null +++ b/Studies/resources/schemas/dbscripts/sqlserver/studies-23.000-23.001.sql @@ -0,0 +1,34 @@ +CREATE TABLE studies.lookup_sets ( + rowid int identity(1,1), + setname nvarchar(100), + label nvarchar(500), + description nvarchar(MAX), + keyField nvarchar(MAX), + titleColumn nvarchar(MAX), + container entityid, + created datetime, + createdby int, + modified datetime, + modifiedby int, + + CONSTRAINT PK_lookup_sets PRIMARY KEY (rowid) +); + +CREATE TABLE studies.lookups ( + rowid int identity(1,1), + setname nvarchar(100), + value nvarchar(MAX), + title nvarchar(MAX), + category nvarchar(MAX), + description nvarchar(MAX), + sort_order int, + date_disabled datetime, + objectid nvarchar(100), + container entityid, + created datetime, + createdby int, + modified datetime, + modifiedby int, + + CONSTRAINT PK_lookups PRIMARY KEY (rowid) +); \ No newline at end of file diff --git a/Studies/resources/schemas/studies.xml b/Studies/resources/schemas/studies.xml index 7918b339b..054cd9393 100644 --- a/Studies/resources/schemas/studies.xml +++ b/Studies/resources/schemas/studies.xml @@ -1,3 +1,127 @@ \ No newline at end of file + xmlns="http://labkey.org/data/xml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + + + Each lookup set is exposed as a separate virtual table in the studies schema, making it easy to add simple value tables without having to create discrete tables in the underlying database + Lookup Sets + DETAILED + rowid + + + + + + + + + + Set Name + + + Label + + + Description + + + Key Field + + + Title Column + + + + + + false + true + true + + + false + true + true + + + false + true + true + + + false + true + true + + +
+ + Lookup Values + DETAILED + + + + + + + true + false + false + true + + + Set Name + + + Value + + + Title + + + Category + + + Description + + + Sort Order + + + Date Disabled + + + Key + false + false + false + true + + + false + true + true + + + false + true + true + + + false + true + true + + + false + true + true + + + false + true + true + + +
+
\ No newline at end of file diff --git a/Studies/src/org/labkey/studies/StudiesController.java b/Studies/src/org/labkey/studies/StudiesController.java index 4e1176393..58d6b51dd 100644 --- a/Studies/src/org/labkey/studies/StudiesController.java +++ b/Studies/src/org/labkey/studies/StudiesController.java @@ -1,12 +1,46 @@ package org.labkey.studies; +import org.apache.logging.log4j.Logger; +import org.jetbrains.annotations.NotNull; +import org.labkey.api.action.ConfirmAction; import org.labkey.api.action.SpringActionController; +import org.labkey.api.data.TableInfo; +import org.labkey.api.module.ModuleLoader; +import org.labkey.api.pipeline.PipelineUrls; +import org.labkey.api.query.BatchValidationException; +import org.labkey.api.query.DuplicateKeyException; +import org.labkey.api.query.QueryService; +import org.labkey.api.query.QueryUpdateService; +import org.labkey.api.query.QueryUpdateServiceException; +import org.labkey.api.reader.DataLoader; +import org.labkey.api.reader.TabLoader; +import org.labkey.api.resource.Resource; +import org.labkey.api.security.RequiresPermission; +import org.labkey.api.security.permissions.AdminPermission; +import org.labkey.api.studies.StudiesService; +import org.labkey.api.util.FileUtil; +import org.labkey.api.util.HtmlString; +import org.labkey.api.util.PageFlowUtil; +import org.labkey.api.util.Path; +import org.labkey.api.util.URLHelper; +import org.labkey.api.util.logging.LogHelper; +import org.labkey.api.view.HtmlView; +import org.springframework.validation.BindException; +import org.springframework.validation.Errors; +import org.springframework.web.servlet.ModelAndView; + +import java.io.IOException; +import java.sql.SQLException; +import java.util.List; +import java.util.Map; public class StudiesController extends SpringActionController { private static final DefaultActionResolver _actionResolver = new DefaultActionResolver(StudiesController.class); public static final String NAME = "studies"; + private static final Logger _log = LogHelper.getLogger(StudiesController.class, "Messages from StudiesController"); + public StudiesController() { setActionResolver(_actionResolver); diff --git a/Studies/src/org/labkey/studies/StudiesModule.java b/Studies/src/org/labkey/studies/StudiesModule.java index 9b944c1a6..abb00167c 100644 --- a/Studies/src/org/labkey/studies/StudiesModule.java +++ b/Studies/src/org/labkey/studies/StudiesModule.java @@ -3,16 +3,21 @@ import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; import org.labkey.api.data.Container; -import org.labkey.api.module.DefaultModule; +import org.labkey.api.ldk.ExtendedSimpleModule; +import org.labkey.api.module.Module; import org.labkey.api.module.ModuleContext; +import org.labkey.api.query.DefaultSchema; +import org.labkey.api.query.QuerySchema; +import org.labkey.api.security.roles.RoleManager; import org.labkey.api.studies.StudiesService; -import org.labkey.api.view.WebPartFactory; +import org.labkey.studies.query.StudiesUserSchema; +import org.labkey.api.studies.security.StudiesDataAdminRole; import java.util.Collection; import java.util.Collections; import java.util.Set; -public class StudiesModule extends DefaultModule +public class StudiesModule extends ExtendedSimpleModule { public static final String NAME = "Studies"; @@ -25,20 +30,7 @@ public String getName() @Override public @Nullable Double getSchemaVersion() { - return 23.000; - } - - @Override - public boolean hasScripts() - { - return true; - } - - @Override - @NotNull - protected Collection createWebPartFactories() - { - return Collections.emptyList(); + return 23.001; } @Override @@ -47,10 +39,11 @@ protected void init() addController(StudiesController.NAME, StudiesController.class); StudiesService.setInstance(StudiesServiceImpl.get()); + RoleManager.registerRole(new StudiesDataAdminRole()); } @Override - public void doStartup(ModuleContext moduleContext) + public void doStartupAfterSpringConfig(ModuleContext moduleContext) { } @@ -68,4 +61,17 @@ public Set getSchemaNames() { return Collections.singleton(StudiesSchema.NAME); } + + @Override + public void registerSchemas() + { + DefaultSchema.registerProvider(StudiesSchema.NAME, new DefaultSchema.SchemaProvider(this) + { + @Override + public QuerySchema createSchema(final DefaultSchema schema, Module module) + { + return new StudiesUserSchema(schema.getUser(), schema.getContainer(), StudiesSchema.getInstance().getSchema()); + } + }); + } } \ No newline at end of file diff --git a/Studies/src/org/labkey/studies/StudiesServiceImpl.java b/Studies/src/org/labkey/studies/StudiesServiceImpl.java index 1768e2450..9edb5f9a9 100644 --- a/Studies/src/org/labkey/studies/StudiesServiceImpl.java +++ b/Studies/src/org/labkey/studies/StudiesServiceImpl.java @@ -3,9 +3,17 @@ import org.apache.logging.log4j.Logger; import org.labkey.api.admin.ImportOptions; import org.labkey.api.data.Container; +import org.labkey.api.data.TableInfo; import org.labkey.api.module.Module; import org.labkey.api.pipeline.PipeRoot; import org.labkey.api.pipeline.PipelineService; +import org.labkey.api.query.BatchValidationException; +import org.labkey.api.query.DuplicateKeyException; +import org.labkey.api.query.QueryService; +import org.labkey.api.query.QueryUpdateService; +import org.labkey.api.query.QueryUpdateServiceException; +import org.labkey.api.reader.DataLoader; +import org.labkey.api.reader.TabLoader; import org.labkey.api.resource.DirectoryResource; import org.labkey.api.resource.Resource; import org.labkey.api.security.User; @@ -19,6 +27,9 @@ import java.io.InputStream; import java.io.OutputStream; import java.nio.file.Files; +import java.sql.SQLException; +import java.util.List; +import java.util.Map; public class StudiesServiceImpl extends StudiesService { @@ -91,4 +102,32 @@ private void copyResourceToPath(Resource resource, java.nio.file.Path target) th } } } + + @Override + public void loadTsv(Resource tsv, String schemaName, User u, Container c) + { + try (DataLoader loader = DataLoader.get().createLoader(tsv, true, null, TabLoader.TSV_FILE_TYPE)) + { + TableInfo ti = QueryService.get().getUserSchema(u, c, schemaName).getTable(FileUtil.getBaseName(tsv.getName())); + if (ti == null) + { + throw new IllegalStateException("Missing table: " + tsv.getName()); + } + + List> rows = loader.load(); + + QueryUpdateService qus = ti.getUpdateService(); + qus.setBulkLoad(true); + + qus.truncateRows(u, c, null, null); + qus.insertRows(u, c, rows, new BatchValidationException(), null, null); + } + catch (IOException | SQLException | BatchValidationException | QueryUpdateServiceException | + DuplicateKeyException e) + { + _log.error("Error populating TSV", e); + + throw new RuntimeException(e); + } + } } diff --git a/Studies/src/org/labkey/studies/query/LookupSetTable.java b/Studies/src/org/labkey/studies/query/LookupSetTable.java new file mode 100644 index 000000000..b333008f3 --- /dev/null +++ b/Studies/src/org/labkey/studies/query/LookupSetTable.java @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2013-2019 LabKey Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.labkey.studies.query; + +import org.labkey.api.data.ColumnInfo; +import org.labkey.api.data.Container; +import org.labkey.api.data.ContainerFilter; +import org.labkey.api.data.SchemaTableInfo; +import org.labkey.api.ldk.LDKService; +import org.labkey.api.ldk.table.AbstractDataDefinedTable; +import org.labkey.api.query.QueryUpdateService; +import org.labkey.api.query.SimpleUserSchema; + +import java.util.Map; + +/** + * User: bimber + * Date: 1/31/13 + * Time: 4:33 PM + */ +public class LookupSetTable extends AbstractDataDefinedTable +{ + private static final String CACHE_KEY = LookupSetTable.class.getName() + "||values"; + + private static final String FILTER_COL = "setname"; + private static final String VALUE_COL = "value"; + + private String _keyField; + + public static String getCacheKey(Container c) + { + return CACHE_KEY + "||" + c.getId(); + } + + public LookupSetTable(StudiesUserSchema schema, SchemaTableInfo table, ContainerFilter cf, String setName, Map map) + { + super(schema, table, cf, FILTER_COL, VALUE_COL, setName, setName); + + setTitleColumn(VALUE_COL); + + if (map.containsKey("label")) + setTitle((String)map.get("label")); + + if (map.containsKey("description")) + setDescription((String) map.get("description")); + + if (map.containsKey("keyField") && map.get("keyField") != null) + _keyField = (String)map.get("keyField"); + + if (map.containsKey("titleColumn") && map.get("titleColumn") != null) + _titleColumn = (String)map.get("titleColumn"); + else + _titleColumn = VALUE_COL; + } + + @Override + public LookupSetTable init() + { + super.init(); + + if (_keyField != null) + { + var keyCol = getMutableColumn(_keyField); + if (keyCol != null) + { + keyCol.setKeyField(true); + getMutableColumnOrThrow("rowid").setKeyField(false); + } + } + else + { + getMutableColumnOrThrow(VALUE_COL).setKeyField(false); + getMutableColumnOrThrow("rowid").setKeyField(true); + } + + if (_titleColumn != null) + { + ColumnInfo titleCol = getColumn(_titleColumn); + if (titleCol != null) + { + setTitleColumn(titleCol.getName()); + } + } + LDKService.get().getDefaultTableCustomizer().customize(this); + return this; + } + + @Override + public QueryUpdateService getUpdateService() + { + return new EHRLookupsUpdateService(this); + } + + protected class EHRLookupsUpdateService extends UpdateService + { + public EHRLookupsUpdateService(SimpleUserSchema.SimpleTable ti) + { + super(ti); + } + } +} + + diff --git a/Studies/src/org/labkey/studies/query/LookupSetsManager.java b/Studies/src/org/labkey/studies/query/LookupSetsManager.java new file mode 100644 index 000000000..6cb54f239 --- /dev/null +++ b/Studies/src/org/labkey/studies/query/LookupSetsManager.java @@ -0,0 +1,31 @@ +package org.labkey.studies.query; + +import org.apache.logging.log4j.Logger; +import org.labkey.api.cache.Cache; +import org.labkey.api.cache.CacheManager; +import org.labkey.api.util.logging.LogHelper; + +public class LookupSetsManager +{ + private static final LookupSetsManager _instance = new LookupSetsManager(); + private static final Logger _log = LogHelper.getLogger(LookupSetsManager.class, "Messages from the Studies LookupSetsManager"); + + public static final String TABLE_LOOKUPS = "lookups"; + public static final String TABLE_LOOKUP_SETS = "lookup_sets"; + private final Cache _cache; + + private LookupSetsManager() + { + _cache = CacheManager.getStringKeyCache(1000, CacheManager.UNLIMITED, "LookupSetsManagerCache"); + } + + public static LookupSetsManager get() + { + return _instance; + } + + public Cache getCache() + { + return _cache; + } +} diff --git a/Studies/src/org/labkey/studies/query/LookupSetsTable.java b/Studies/src/org/labkey/studies/query/LookupSetsTable.java new file mode 100644 index 000000000..4d6ff8230 --- /dev/null +++ b/Studies/src/org/labkey/studies/query/LookupSetsTable.java @@ -0,0 +1,60 @@ +package org.labkey.studies.query; + +import org.labkey.api.data.Container; +import org.labkey.api.data.ContainerFilter; +import org.labkey.api.data.TableInfo; +import org.labkey.api.ldk.table.ContainerScopedTable; +import org.labkey.api.query.BatchValidationException; +import org.labkey.api.query.InvalidKeyException; +import org.labkey.api.query.QueryUpdateService; +import org.labkey.api.query.QueryUpdateServiceException; +import org.labkey.api.query.SimpleUserSchema; +import org.labkey.api.query.UserSchema; +import org.labkey.api.security.User; + +import java.sql.SQLException; +import java.util.Map; + +public class LookupSetsTable extends ContainerScopedTable +{ + public LookupSetsTable(SchemaType schema, TableInfo st, ContainerFilter cf, String newPk) + { + super(schema, st, cf, newPk); + } + + @Override + public QueryUpdateService getUpdateService() + { + return new UpdateService(this); + } + + private class UpdateService extends ContainerScopedTable.UpdateService + { + public UpdateService(SimpleUserSchema.SimpleTable ti) + { + super(ti); + } + + @Override + protected void afterInsertUpdate(int count, BatchValidationException errors) + { + LookupSetsManager.get().getCache().clear(); + } + + @Override + protected Map deleteRow(User user, Container container, Map oldRowMap) throws QueryUpdateServiceException, SQLException, InvalidKeyException + { + Map row = super.deleteRow(user, container, oldRowMap); + LookupSetsManager.get().getCache().clear(); + return row; + } + + @Override + protected int truncateRows(User user, Container container) throws QueryUpdateServiceException, SQLException + { + int i = super.truncateRows(user, container); + LookupSetsManager.get().getCache().clear(); + return i; + } + } +} diff --git a/Studies/src/org/labkey/studies/query/ResultsOOODisplayColumn.java b/Studies/src/org/labkey/studies/query/ResultsOOODisplayColumn.java new file mode 100644 index 000000000..4fddcc0e3 --- /dev/null +++ b/Studies/src/org/labkey/studies/query/ResultsOOODisplayColumn.java @@ -0,0 +1,68 @@ +package org.labkey.studies.query; + +import org.apache.commons.lang3.StringUtils; +import org.labkey.api.data.ColumnInfo; +import org.labkey.api.data.DataColumn; +import org.labkey.api.data.RenderContext; +import org.labkey.api.query.FieldKey; + +import java.text.DecimalFormat; +import java.util.Set; + +public class ResultsOOODisplayColumn extends DataColumn +{ + public ResultsOOODisplayColumn(ColumnInfo col) + { + super(col); + } + + @Override + public Class getDisplayValueClass() + { + return String.class; + } + + @Override + public Object getDisplayValue(RenderContext ctx) + { + Object result = ctx.get(getBoundColumn().getFieldKey(), Double.class); + if (result == null) + { + return null; + } + + if (getBoundColumn().getFormat() != null) + { + DecimalFormat fmt = new DecimalFormat(getBoundColumn().getFormat()); + result = fmt.format(result); + } + + String oor = ctx.get(getOOR(), String.class); + if (StringUtils.isEmpty(oor)) + { + return result; + } + + return oor + result; + } + + private FieldKey getOOR() + { + FieldKey oor = FieldKey.fromString("resultOOOIndicator"); + if (getBoundColumn() != null) + { + return FieldKey.fromParts(getBoundColumn().getFieldKey().getParent(), oor); + } + else + { + return oor; + } + } + + @Override + public void addQueryFieldKeys(Set keys) + { + super.addQueryFieldKeys(keys); + keys.add(getOOR()); + } +} diff --git a/Studies/src/org/labkey/studies/query/StudiesUserSchema.java b/Studies/src/org/labkey/studies/query/StudiesUserSchema.java new file mode 100644 index 000000000..2fb451c26 --- /dev/null +++ b/Studies/src/org/labkey/studies/query/StudiesUserSchema.java @@ -0,0 +1,125 @@ +package org.labkey.studies.query; + +import org.labkey.api.collections.CaseInsensitiveHashMap; +import org.labkey.api.collections.CaseInsensitiveTreeSet; +import org.labkey.api.data.Container; +import org.labkey.api.data.ContainerFilter; +import org.labkey.api.data.DbSchema; +import org.labkey.api.data.SchemaTableInfo; +import org.labkey.api.data.SimpleFilter; +import org.labkey.api.data.TableInfo; +import org.labkey.api.data.TableSelector; +import org.labkey.api.ldk.table.ContainerScopedTable; +import org.labkey.api.ldk.table.CustomPermissionsTable; +import org.labkey.api.query.FieldKey; +import org.labkey.api.query.SimpleUserSchema; +import org.labkey.api.security.User; +import org.labkey.api.security.permissions.DeletePermission; +import org.labkey.api.security.permissions.InsertPermission; +import org.labkey.api.security.permissions.ReadPermission; +import org.labkey.api.security.permissions.UpdatePermission; +import org.labkey.studies.StudiesSchema; +import org.labkey.api.studies.security.StudiesDataAdminPermission; + +import java.util.Collections; +import java.util.Map; +import java.util.Set; + +import static org.labkey.studies.query.LookupSetsManager.TABLE_LOOKUPS; +import static org.labkey.studies.query.LookupSetsManager.TABLE_LOOKUP_SETS; + +public class StudiesUserSchema extends SimpleUserSchema +{ + public StudiesUserSchema(User user, Container container, DbSchema dbschema) + { + super(StudiesSchema.NAME, "", user, container, dbschema); + } + + @Override + public Set getTableNames() + { + Set available = new CaseInsensitiveTreeSet(super.getTableNames()); + available.addAll(getPropertySetNames().keySet()); + + return Collections.unmodifiableSet(available); + } + + @Override + public Set getVisibleTableNames() + { + return getTableNames(); + } + + private Container getTargetContainer() + { + return getContainer().isWorkbookOrTab() ? getContainer().getParent() : getContainer(); + } + + private Map> getPropertySetNames() + { + Map> nameMap = (Map>) LookupSetsManager.get().getCache().get(LookupSetTable.getCacheKey(getTargetContainer())); + if (nameMap != null) + { + return nameMap; + } + + nameMap = new CaseInsensitiveHashMap<>(); + + TableSelector ts = new TableSelector(_dbSchema.getTable(TABLE_LOOKUP_SETS), new SimpleFilter(FieldKey.fromString("container"), getTargetContainer().getId()), null); + Map[] rows = ts.getMapArray(); + if (rows.length > 0) + { + Set existing = super.getTableNames(); + for (Map row : rows) + { + String setname = (String)row.get("setname"); + if (setname != null && !existing.contains(setname)) + nameMap.put(setname, row); + } + } + + nameMap = Collections.unmodifiableMap(nameMap); + LookupSetsManager.get().getCache().put(LookupSetTable.getCacheKey(getTargetContainer()), nameMap); + + return nameMap; + } + + @Override + public TableInfo createTable(String name, ContainerFilter cf) + { + if (TABLE_LOOKUP_SETS.equalsIgnoreCase(name)) + { + ContainerScopedTable ret = new LookupSetsTable<>(this, createSourceTable(name), cf, "setname"); + ret.addPermissionMapping(InsertPermission.class, StudiesDataAdminPermission.class); + ret.addPermissionMapping(UpdatePermission.class, StudiesDataAdminPermission.class); + ret.addPermissionMapping(DeletePermission.class, StudiesDataAdminPermission.class); + return ret.init(); + } + else if (TABLE_LOOKUPS.equalsIgnoreCase(name)) + { + CustomPermissionsTable ret = new CustomPermissionsTable<>(this, createSourceTable(name), cf); + ret.addPermissionMapping(InsertPermission.class, StudiesDataAdminPermission.class); + ret.addPermissionMapping(UpdatePermission.class, StudiesDataAdminPermission.class); + ret.addPermissionMapping(DeletePermission.class, StudiesDataAdminPermission.class); + ret.addPermissionMapping(ReadPermission.class, StudiesDataAdminPermission.class); + return ret.init(); + } + + //try to find it in propertySets + Map> nameMap = getPropertySetNames(); + if (nameMap.containsKey(name)) + return createForPropertySet(this, cf, name, nameMap.get(name)); + + return super.createTable(name, cf); + } + + private LookupSetTable createForPropertySet(StudiesUserSchema us, ContainerFilter cf, String setName, Map map) + { + SchemaTableInfo table = _dbSchema.getTable(TABLE_LOOKUPS); + LookupSetTable ret = new LookupSetTable(us, table, cf, setName, map); + ret.addPermissionMapping(InsertPermission.class, StudiesDataAdminPermission.class); + ret.addPermissionMapping(UpdatePermission.class, StudiesDataAdminPermission.class); + ret.addPermissionMapping(DeletePermission.class, StudiesDataAdminPermission.class); + return ret.init(); + } +} diff --git a/cluster/resources/schemas/cluster.xml b/cluster/resources/schemas/cluster.xml index 34b3c06f5..e811450e4 100644 --- a/cluster/resources/schemas/cluster.xml +++ b/cluster/resources/schemas/cluster.xml @@ -59,6 +59,18 @@ yyyy-MM-dd HH:mm + + Cluster Account + + + Duration (seconds) + + + CPU Used + + + GPU Used + diff --git a/cluster/resources/schemas/dbscripts/postgresql/cluster-15.24-15.25.sql b/cluster/resources/schemas/dbscripts/postgresql/cluster-15.24-15.25.sql new file mode 100644 index 000000000..70e847645 --- /dev/null +++ b/cluster/resources/schemas/dbscripts/postgresql/cluster-15.24-15.25.sql @@ -0,0 +1,4 @@ +ALTER TABLE cluster.clusterJobs ADD clusterAccount varchar(1000); +ALTER TABLE cluster.clusterJobs ADD duration double precision; +ALTER TABLE cluster.clusterJobs ADD cpuUsed int; +ALTER TABLE cluster.clusterJobs ADD gpuUsed int; diff --git a/cluster/resources/schemas/dbscripts/sqlserver/cluster-15.24-15.25.sql b/cluster/resources/schemas/dbscripts/sqlserver/cluster-15.24-15.25.sql new file mode 100644 index 000000000..0e742b307 --- /dev/null +++ b/cluster/resources/schemas/dbscripts/sqlserver/cluster-15.24-15.25.sql @@ -0,0 +1,4 @@ +ALTER TABLE cluster.clusterJobs ADD clusterAccount nvarchar(1000); +ALTER TABLE cluster.clusterJobs ADD duration double precision; +ALTER TABLE cluster.clusterJobs ADD cpuUsed int; +ALTER TABLE cluster.clusterJobs ADD gpuUsed int; diff --git a/cluster/src/org/labkey/cluster/ClusterModule.java b/cluster/src/org/labkey/cluster/ClusterModule.java index e7638ddcc..fa3f86a7b 100644 --- a/cluster/src/org/labkey/cluster/ClusterModule.java +++ b/cluster/src/org/labkey/cluster/ClusterModule.java @@ -66,7 +66,7 @@ public String getName() @Override public Double getSchemaVersion() { - return 15.24; + return 15.25; } @Override diff --git a/cluster/src/org/labkey/cluster/pipeline/ClusterJob.java b/cluster/src/org/labkey/cluster/pipeline/ClusterJob.java index dd8ed85f7..afe3c5024 100644 --- a/cluster/src/org/labkey/cluster/pipeline/ClusterJob.java +++ b/cluster/src/org/labkey/cluster/pipeline/ClusterJob.java @@ -22,6 +22,9 @@ public class ClusterJob private String _location; private String _activeTaskId; private String _clusterUser; + private Integer _duration; + private Integer _cpuUsed; + private Integer _gpuUsed; private String _hostname; private Date _logModified; @@ -195,4 +198,34 @@ public void setLogModified(Date logModified) { _logModified = logModified; } + + public Integer getDuration() + { + return _duration; + } + + public void setDuration(Integer duration) + { + _duration = duration; + } + + public Integer getCpuUsed() + { + return _cpuUsed; + } + + public void setCpuUsed(Integer cpuUsed) + { + _cpuUsed = cpuUsed; + } + + public Integer getGpuUsed() + { + return _gpuUsed; + } + + public void setGpuUsed(Integer gpuUsed) + { + _gpuUsed = gpuUsed; + } } diff --git a/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java b/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java index e13c918d3..980fad5ae 100644 --- a/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java +++ b/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java @@ -1,6 +1,7 @@ package org.labkey.cluster.pipeline; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.math.NumberUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.jetbrains.annotations.NotNull; @@ -11,6 +12,7 @@ import org.labkey.api.collections.CaseInsensitiveHashSet; import org.labkey.api.data.Container; import org.labkey.api.data.ContainerManager; +import org.labkey.api.data.Table; import org.labkey.api.pipeline.PipelineJob; import org.labkey.api.pipeline.PipelineJobException; import org.labkey.api.pipeline.PipelineService; @@ -19,6 +21,7 @@ import org.labkey.api.util.Pair; import org.labkey.api.writer.PrintWriters; import org.labkey.cluster.ClusterManager; +import org.labkey.cluster.ClusterSchema; import org.labkey.cluster.ClusterServiceImpl; import org.quartz.JobExecutionException; @@ -94,6 +97,7 @@ protected List submitJobToCluster(ClusterJob j, PipelineJob job) throws line = line.replaceFirst("^Submitted batch job", ""); line = line.trim(); j.setClusterId(line); + j.setClusterUser(ClusterServiceImpl.get().getClusterUser(job.getContainer())); break; } @@ -129,6 +133,7 @@ protected Set updateStatusForAllJobs() throws PipelineJobException int stateIdx = -1; int hostnameIdx = -1; int reasonIdx = -1; + for (String line : ret) { line = StringUtils.trimToNull(line); @@ -177,10 +182,13 @@ protected Set updateStatusForAllJobs() throws PipelineJobException } else { + Map propsToUpdate = new HashMap<>(); + String hostname = hostnameIdx != -1 && tokens.length > hostnameIdx ? StringUtils.trimToNull(tokens[hostnameIdx]) : null; if (hostname != null) { j.setHostname(hostname); + propsToUpdate.put("hostname", hostname); } Pair status = translateSlurmStatusToTaskStatus(StringUtils.trimToNull(tokens[stateIdx])); @@ -199,6 +207,11 @@ protected Set updateStatusForAllJobs() throws PipelineJobException } } + if (!propsToUpdate.isEmpty()) + { + updateClusterSubmission(j, propsToUpdate); + } + updateJobStatus(status == null ? null : status.first, j, status == null ? null : status.second); jobsUpdated.add(j.getClusterId()); } @@ -223,6 +236,42 @@ protected Set updateStatusForAllJobs() throws PipelineJobException return jobsUpdated; } + // parses AllocTRES, such as: cpu=4,gres/disk=1028,mem=20000M,node=1 + private Integer findIntValue(String input, String key) + { + input = StringUtils.trimToNull(input); + if (input == null) + { + return null; + } + + String[] tokens = input.split(","); + for (String token : tokens) + { + if (token.startsWith(key + "=")) + { + String val = token.split("=")[1]; + if (!NumberUtils.isCreatable(val)) + { + _log.error("Non-numeric value for: " + key + ", input: " + input); + return null; + } + + return Integer.parseInt(val); + } + } + + return null; + } + + private void updateClusterSubmission(ClusterJob j, Map toUpdate) + { + _log.debug("Updating job: " + j.getJobId() + ", " + toUpdate.keySet().stream().map(x -> x + "=" + toUpdate.get(x)).collect(Collectors.joining(", "))); + + toUpdate.put("rowid", j.getRowId()); + Table.update(null, ClusterSchema.getInstance().getSchema().getTable(ClusterSchema.CLUSTER_JOBS), toUpdate, j.getRowId()); + } + @Override protected Pair getStatusForJob(ClusterJob job, Container c) { @@ -241,6 +290,7 @@ protected Pair getStatusForJob(ClusterJob job, Container c) //verify success boolean headerFound = false; boolean foundJobLine = false; + List fieldWidths = new ArrayList<>(); LinkedHashSet statuses = new LinkedHashSet<>(); List header; int jobIdx = -1; @@ -248,6 +298,9 @@ protected Pair getStatusForJob(ClusterJob job, Container c) int hostnameIdx = -1; int maxRssIdx = -1; int reqMemIdx = -1; + int elapsedIdx = -1; + int resourcesIdx = -1; + String reqMem = null; for (String line : ret) { @@ -266,6 +319,8 @@ protected Pair getStatusForJob(ClusterJob job, Container c) hostnameIdx = header.indexOf("NODELIST"); maxRssIdx = header.indexOf("MAXRSS"); reqMemIdx = header.indexOf("REQMEM"); + elapsedIdx = header.indexOf("ELAPSEDRAW"); + resourcesIdx = header.indexOf("ALLOCTRES"); if (stateIdx == -1) { @@ -281,34 +336,37 @@ protected Pair getStatusForJob(ClusterJob job, Container c) } else if (foundJobLine && line.startsWith("------------")) { + fieldWidths.addAll(Arrays.asList(line.split(" "))); headerFound = true; } else if (headerFound) { try { - String[] tokens = line.split("( )+"); - String id = StringUtils.trimToNull(tokens[jobIdx]); - if (id.equals(job.getClusterId())) + String id =extractField(line, fieldWidths, jobIdx); + if (id != null && id.equals(job.getClusterId())) { - statuses.add(StringUtils.trimToNull(tokens[stateIdx])); + statuses.add(extractField(line, fieldWidths, stateIdx)); } + Map propsToUpdate = new HashMap<>(); + if (hostnameIdx > -1) { - String hostname = tokens.length > hostnameIdx ? StringUtils.trimToNull(tokens[hostnameIdx]) : null; + String hostname = extractField(line, fieldWidths, hostnameIdx); if (hostname != null) { if (job.getHostname() == null || !job.getHostname().equals(hostname)) { job.setHostname(hostname); + propsToUpdate.put("hostname", hostname); } } } - if (reqMemIdx > -1 && reqMemIdx < tokens.length) + if (reqMemIdx > -1) { - String val = StringUtils.trimToNull(tokens[reqMemIdx]); + String val = extractField(line, fieldWidths, reqMemIdx); if (val != null) { reqMem = val; @@ -316,12 +374,45 @@ else if (headerFound) } + if (resourcesIdx > -1) + { + job.setCpuUsed(findIntValue(extractField(line, fieldWidths, resourcesIdx), "cpu")); + if (job.getCpuUsed() != null) + { + propsToUpdate.put("cpuUsed", job.getCpuUsed()); + } + + job.setGpuUsed(findIntValue(extractField(line, fieldWidths, resourcesIdx), "gpu")); + if (job.getGpuUsed() != null) + { + propsToUpdate.put("gpuUsed", job.getGpuUsed()); + } + } + + if (elapsedIdx > -1) + { + String durationString = extractField(line, fieldWidths, elapsedIdx); + if (durationString != null) + { + job.setDuration(Integer.parseInt(durationString)); + if (job.getDuration() != null) + { + propsToUpdate.put("duration", job.getDuration()); + } + } + } + + if (!propsToUpdate.isEmpty()) + { + updateClusterSubmission(job, propsToUpdate); + } + // NOTE: if the line has blank ending columns, trimmed lines might lack that value - if ((job.getClusterId() + ".0").equals(id) && maxRssIdx > -1 && maxRssIdx < tokens.length) + if ((job.getClusterId() + ".0").equals(id) && maxRssIdx > -1) { try { - String maxRSS = StringUtils.trimToNull(tokens[maxRssIdx]); + String maxRSS = extractField(line, fieldWidths, maxRssIdx); if (maxRSS != null) { double bytes = FileSizeFormatter.convertStringRepresentationToBytes(maxRSS); @@ -360,7 +451,7 @@ else if (headerFound) } catch (Exception e) { - _log.error("Error parsing line: " + line, e); + _log.error("Error parsing line: [" + line + "]", e); throw e; } } @@ -396,6 +487,36 @@ else if (headerFound) return null; } + private String extractField(String line, List fieldWidths, int idx) + { + int start = 0; + for (int i = 0; i < idx; i++) + { + start += fieldWidths.get(i).length() + 1; + } + + int end = start + fieldWidths.get(idx).length(); + + try + { + if (end > line.length()) + { + _log.debug("Encountered slurm line shorter than expected. Slurm field at idx: " + idx + ". Line: [" + line + "]"); + _log.debug("fieldWidths: " + StringUtils.join(fieldWidths, "|")); + + return null; + } + + return StringUtils.trimToNull(line.substring(start, end)); + } + catch (Exception e) + { + _log.error("Unable to parse slurm field at idx: " + idx + ". Line: [" + line + "]"); + _log.error("fieldWidths: " + StringUtils.join(fieldWidths, "|"), e); + return null; + } + } + @Override protected boolean removeJob(ClusterJob clusterJob) { @@ -765,15 +886,23 @@ private Pair getStatusFromQueue(ClusterJob job) String id = StringUtils.trimToNull(tokens[jobIdx]); if (job.getClusterId().equals(id)) { + Map propsToUpdate = new HashMap<>(); + if (hostnameIdx > -1) { String hostname = tokens.length > hostnameIdx ? StringUtils.trimToNull(tokens[hostnameIdx]) : null; if (hostname != null) { job.setHostname(hostname); + propsToUpdate.put("hostname", hostname); } } + if (!propsToUpdate.isEmpty()) + { + updateClusterSubmission(job, propsToUpdate); + } + return translateSlurmStatusToTaskStatus(StringUtils.trimToNull(tokens[stateIdx])); } } diff --git a/jbrowse/package-lock.json b/jbrowse/package-lock.json index 3938e1886..a0f488c77 100644 --- a/jbrowse/package-lock.json +++ b/jbrowse/package-lock.json @@ -3091,9 +3091,9 @@ } }, "node_modules/@labkey/api": { - "version": "1.40.0", - "resolved": "https://labkey.jfrog.io/artifactory/api/npm/libs-client/@labkey/api/-/@labkey/api-1.40.0.tgz", - "integrity": "sha512-ezCVNWtLkzbH5K/CoEb69gK5q6QSgQpG9FVJse3hPR1t5Bxpt6Mt0RbmKKdOfdJcNcA30IfpcbzEOc1gz7vhZQ==" + "version": "1.41.2", + "resolved": "https://labkey.jfrog.io/artifactory/api/npm/libs-client/@labkey/api/-/@labkey/api-1.41.2.tgz", + "integrity": "sha512-ninfc/+Sj5+8Zla9bY2j/4fSy41OS27YAHKtDFPnu52QkC8WsOYh3JFI5PkU6Rn+xIp0In4P6d5Qn/yluJRC/w==" }, "node_modules/@labkey/build": { "version": "8.5.0", @@ -3133,12 +3133,12 @@ } }, "node_modules/@labkey/components": { - "version": "6.38.1", - "resolved": "https://labkey.jfrog.io/artifactory/api/npm/libs-client/@labkey/components/-/@labkey/components-6.38.1.tgz", - "integrity": "sha512-fv47V+NL390BYMku9+rbW1UUtM4E6zWX/TkxK65lBwkyi+ZuM9FZHWLro1+tPS0M3Vc+QFVXTP6iRs1gnNrcCQ==", + "version": "6.45.0", + "resolved": "https://labkey.jfrog.io/artifactory/api/npm/libs-client/@labkey/components/-/@labkey/components-6.45.0.tgz", + "integrity": "sha512-KT4C+NdlS6T54GF8jsde3cYm6Dt88AEZFw+dA39N83MGW3FiJ63s3DhF55dx8ImheCnchYlpK5xDF5/JI/Ux7A==", "dependencies": { "@hello-pangea/dnd": "18.0.1", - "@labkey/api": "1.40.0", + "@labkey/api": "1.41.2", "@testing-library/dom": "~10.4.0", "@testing-library/jest-dom": "~6.6.3", "@testing-library/react": "~16.3.0", @@ -11349,9 +11349,9 @@ } }, "node_modules/tar-fs": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.1.2.tgz", - "integrity": "sha512-EsaAXwxmx8UB7FRKqeozqEPop69DXcmYwTQwXvyAPF352HJsPdkVhvTaDPYqfNgruveJIJy3TA2l+2zj8LJIJA==", + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.1.3.tgz", + "integrity": "sha512-090nwYJDmlhwFwEW3QQl+vaNnxsO2yVsd45eTKRBzSzu+hlb1w2K9inVq5b0ngXuLVqQ4ApvsUHHnu/zQNkWAg==", "dependencies": { "chownr": "^1.1.1", "mkdirp-classic": "^0.5.2", diff --git a/jbrowse/resources/module.xml b/jbrowse/resources/module.xml new file mode 100644 index 000000000..eeeb7de4a --- /dev/null +++ b/jbrowse/resources/module.xml @@ -0,0 +1,15 @@ + + + + false + 1 + The number of cores to allow for lucene searches + + ADMIN + + + + + + + diff --git a/jbrowse/src/client/JBrowse/VariantSearch/components/VariantTableWidget.tsx b/jbrowse/src/client/JBrowse/VariantSearch/components/VariantTableWidget.tsx index 8b5b799b4..98f6ac0f9 100644 --- a/jbrowse/src/client/JBrowse/VariantSearch/components/VariantTableWidget.tsx +++ b/jbrowse/src/client/JBrowse/VariantSearch/components/VariantTableWidget.tsx @@ -68,7 +68,6 @@ const VariantTableWidget = observer(props => { return obj })) - setTotalHits(data.totalHits) setDataLoaded(true) } @@ -76,6 +75,13 @@ const VariantTableWidget = observer(props => { session.hideWidget(widget) } + function resetPaginationToFirstPage() { + setPageSizeModel(prev => ({ + page: 0, + pageSize: prev.pageSize, + })); + } + function handleQuery(passedFilters, pushToHistory, pageQueryModel = pageSizeModel, sortQueryModel = sortModel) { const { page = pageSizeModel.page, pageSize = pageSizeModel.pageSize } = pageQueryModel; const { field = "genomicPosition", sort = false } = sortQueryModel[0] ?? {}; @@ -89,12 +95,35 @@ const VariantTableWidget = observer(props => { currentUrl.searchParams.set("sortDirection", sort.toString()); if (pushToHistory) { - window.history.pushState(null, "", currentUrl.toString()); + window.history.pushState(null, "", currentUrl.toString()); } setFilters(passedFilters); - setDataLoaded(false) - fetchLuceneQuery(passedFilters, sessionId, trackGUID, page, pageSize, field, sort, (json)=>{handleSearch(json)}, (error) => {setDataLoaded(true); setError(error)}); + setDataLoaded(false); + setFeatures([]); + + fetchLuceneQuery( + passedFilters, + sessionId, + trackGUID, + page, + pageSize, + field, + sort, + (row) => { + setFeatures(prev => { + row.id = prev.length; + row.trackId = trackId; + return [...prev, row]; + }); + }, + () => setDataLoaded(true), + (error) => { + console.error("Stream error:", error); + setError(error); + setDataLoaded(true); + } + ); } const handleExport = () => { @@ -267,7 +296,6 @@ const VariantTableWidget = observer(props => { const [filterModalOpen, setFilterModalOpen] = useState(false); const [filters, setFilters] = useState([]); - const [totalHits, setTotalHits] = useState(0); const [fieldTypeInfo, setFieldTypeInfo] = useState([]); const [allowedGroupNames, setAllowedGroupNames] = useState([]); const [promotedFilters, setPromotedFilters] = useState>(null); @@ -461,7 +489,7 @@ const VariantTableWidget = observer(props => { columnVisibilityModel={columnVisibilityModel} pageSizeOptions={[10,25,50,100]} paginationModel={ pageSizeModel } - rowCount={ totalHits } + rowCount={ -1 } paginationMode="server" onPaginationModelChange = {(newModel) => { setPageSizeModel(newModel) @@ -485,6 +513,7 @@ const VariantTableWidget = observer(props => { onSortModelChange={(newModel) => { setSortModel(newModel) handleQuery(filters, true, { page: 0, pageSize: pageSizeModel.pageSize }, newModel); + resetPaginationToFirstPage() }} localeText={{ MuiTablePagination: { @@ -515,7 +544,10 @@ const VariantTableWidget = observer(props => { fieldTypeInfo: fieldTypeInfo, allowedGroupNames: allowedGroupNames, promotedFilters: promotedFilters, - handleQuery: (filters) => handleQuery(filters, true, { page: 0, pageSize: pageSizeModel.pageSize}, sortModel) + handleQuery: (filters) => { + handleQuery(filters, true, { page: 0, pageSize: pageSizeModel.pageSize}, sortModel) + resetPaginationToFirstPage() + } }} /> ); diff --git a/jbrowse/src/client/JBrowse/utils.ts b/jbrowse/src/client/JBrowse/utils.ts index 75fa6a195..ad1870022 100644 --- a/jbrowse/src/client/JBrowse/utils.ts +++ b/jbrowse/src/client/JBrowse/utils.ts @@ -328,23 +328,24 @@ export function serializeLocationToEncodedSearchString(contig, start, end) { return createEncodedFilterString(filters) } -export async function fetchLuceneQuery(filters, sessionId, trackGUID, offset, pageSize, sortField, sortReverseString, successCallback, failureCallback) { +export async function fetchLuceneQuery(filters, sessionId, trackGUID, offset, pageSize, sortField, sortReverseString, + handleRow, handleComplete, handleError) { if (!offset) { offset = 0 } if (!sessionId) { - failureCallback("There was an error: " + "Lucene query: no session ID") + handleError("There was an error: " + "Lucene query: no session ID") return } if (!trackGUID) { - failureCallback("There was an error: " + "Lucene query: no track ID") + handleError("There was an error: " + "Lucene query: no track ID") return } if (!filters) { - failureCallback("There was an error: " + "Lucene query: no filters") + handleError("There was an error: " + "Lucene query: no filters") return } @@ -358,26 +359,60 @@ export async function fetchLuceneQuery(filters, sessionId, trackGUID, offset, pa sortReverse = false } - return Ajax.request({ - url: ActionURL.buildURL('jbrowse', 'luceneQuery.api'), - method: 'GET', - success: async function(res){ - let jsonRes = JSON.parse(res.response); - successCallback(jsonRes) - }, - failure: function(res) { - failureCallback("There was an error: " + res.status + "\n Status Body: " + res.responseText + "\n Session ID:" + sessionId) - }, - params: { - "searchString": encoded, - "sessionId": sessionId, - "trackId": trackGUID, - "offset": offset, - "pageSize": pageSize, - "sortField": sortField ?? "genomicPosition", - "sortReverse": sortReverse - }, - }); + const params = { + searchString: encoded, + sessionId, + trackId: trackGUID, + offset: offset, + pageSize: pageSize, + sortField: sortField ?? "genomicPosition", + sortReverse: sortReverse, + }; + + try { + const url = ActionURL.buildURL('jbrowse', 'luceneQuery.api', null, params); + const response = await fetch(url); + if (!response.ok || !response.body) { + throw new Error(`HTTP error ${response.status}`); + } + + const reader = response.body.getReader(); + const decoder = new TextDecoder("utf-8"); + let buffer = ''; + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + + buffer += decoder.decode(value, { stream: true }); + + let boundary; + while ((boundary = buffer.indexOf('\n')) >= 0) { + const line = buffer.slice(0, boundary).trim(); + buffer = buffer.slice(boundary + 1); + if (line) { + try { + const parsed = JSON.parse(line); + handleRow(parsed); + } catch (err) { + console.error('Failed to parse line:', line, err); + } + } + } + } + + if (buffer.trim()) { + try { + handleRow(JSON.parse(buffer)); + } catch (err) { + console.error('Final line parse error:', buffer, err); + } + } + + handleComplete(); + } catch (error) { + handleError(error.toString()); + } } export class FieldModel { diff --git a/jbrowse/src/org/labkey/jbrowse/JBrowseController.java b/jbrowse/src/org/labkey/jbrowse/JBrowseController.java index 6dc297b95..d7c75b483 100644 --- a/jbrowse/src/org/labkey/jbrowse/JBrowseController.java +++ b/jbrowse/src/org/labkey/jbrowse/JBrowseController.java @@ -28,6 +28,7 @@ import org.json.JSONObject; import org.labkey.api.action.ApiResponse; import org.labkey.api.action.ApiSimpleResponse; +import org.labkey.api.action.ExportAction; import org.labkey.api.action.MutatingApiAction; import org.labkey.api.action.ReadOnlyApiAction; import org.labkey.api.action.SimpleApiJsonForm; @@ -944,10 +945,10 @@ else if (!isValidUUID(form.getTrackId())) } @RequiresPermission(ReadPermission.class) - public static class LuceneQueryAction extends ReadOnlyApiAction + public static class LuceneQueryAction extends ExportAction { @Override - public ApiResponse execute(LuceneQueryForm form, BindException errors) + public void export(LuceneQueryForm form, HttpServletResponse response, BindException errors) throws Exception { JBrowseLuceneSearch searcher; try @@ -957,30 +958,31 @@ public ApiResponse execute(LuceneQueryForm form, BindException errors) catch (IllegalArgumentException e) { errors.reject(ERROR_MSG, e.getMessage()); - return null; + return; } try { - return new ApiSimpleResponse(searcher.doSearchJSON( + response.setContentType("application/x-ndjson"); + searcher.doSearchJSON( getUser(), PageFlowUtil.decode(form.getSearchString()), form.getPageSize(), form.getOffset(), form.getSortField(), - form.getSortReverse() - )); + form.getSortReverse(), + response + ); } catch (Exception e) { _log.error("Error in JBrowse lucene query", e); errors.reject(ERROR_MSG, e.getMessage()); - return null; } } @Override - public void validateForm(LuceneQueryForm form, Errors errors) + public void validate(LuceneQueryForm form, BindException errors) { if ((form.getSearchString() == null || form.getSessionId() == null || form.getTrackId() == null)) { diff --git a/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java b/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java index 532d76156..8f46f069c 100644 --- a/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java +++ b/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java @@ -1,6 +1,7 @@ package org.labkey.jbrowse; import jakarta.servlet.http.HttpServletResponse; +import org.apache.catalina.connector.Response; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.Logger; import org.apache.lucene.analysis.Analyzer; @@ -16,6 +17,7 @@ import org.apache.lucene.queryparser.flexible.standard.config.PointsConfig; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.LRUQueryCache; import org.apache.lucene.search.MatchAllDocsQuery; @@ -24,6 +26,7 @@ import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopFieldDocs; import org.apache.lucene.search.UsageTrackingQueryCachingPolicy; import org.apache.lucene.store.Directory; @@ -65,6 +68,9 @@ import java.util.Set; import java.util.StringTokenizer; import java.util.function.Predicate; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -75,10 +81,6 @@ public class JBrowseLuceneSearch { private static final Logger _log = LogHelper.getLogger(JBrowseLuceneSearch.class, "Logger related to JBrowse/Lucene indexing and queries"); - private final JBrowseSession _session; - private final JsonFile _jsonFile; - private final User _user; - private final String[] specialStartPatterns = {"*:* -", "+", "-"}; private static final String ALL_DOCS = "all"; private static final String GENOMIC_POSITION = "genomicPosition"; private static final int maxCachedQueries = 1000; @@ -86,6 +88,13 @@ public class JBrowseLuceneSearch private static final Cache _cache = new LuceneIndexCache(); + private static ExecutorService _executor = null; + + private final JBrowseSession _session; + private final JsonFile _jsonFile; + private final User _user; + private final String[] specialStartPatterns = {"*:* -", "+", "-"}; + private JBrowseLuceneSearch(final JBrowseSession session, final JsonFile jsonFile, User u) { _session = session; @@ -93,6 +102,16 @@ private JBrowseLuceneSearch(final JBrowseSession session, final JsonFile jsonFil _user = u; } + private static synchronized ExecutorService getSearchExecutor() + { + if (_executor == null) + { + _executor = Executors.newFixedThreadPool(JBrowseServiceImpl.get().getCoresForLuceneSearches()); + } + + return _executor; + } + private Container getContainer() { return ContainerManager.getForId(_session.getContainer()); @@ -108,6 +127,11 @@ public static JBrowseLuceneSearch create(String sessionId, String trackId, User private static synchronized CacheEntry getCacheEntryForSession(String trackObjectId, File indexPath) throws IOException { CacheEntry cacheEntry = _cache.get(trackObjectId); + if (getSearchExecutor().isShutdown() || getSearchExecutor().isTerminated()) + { + throw new IllegalStateException("The server is shutting down!"); + } + // Open directory of lucene path, get a directory reader, and create the index search manager if (cacheEntry == null) { @@ -116,7 +140,7 @@ private static synchronized CacheEntry getCacheEntryForSession(String trackObjec Directory indexDirectory = FSDirectory.open(indexPath.toPath()); LRUQueryCache queryCache = new LRUQueryCache(maxCachedQueries, maxRamBytesUsed); IndexReader indexReader = DirectoryReader.open(indexDirectory); - IndexSearcher indexSearcher = new IndexSearcher(indexReader); + IndexSearcher indexSearcher = new IndexSearcher(indexReader, getSearchExecutor()); indexSearcher.setQueryCache(queryCache); indexSearcher.setQueryCachingPolicy(new ForceMatchAllDocsCachingPolicy()); cacheEntry = new CacheEntry(queryCache, indexSearcher, indexPath); @@ -192,9 +216,9 @@ public String extractFieldName(String queryString) return parts.length > 0 ? parts[0].trim() : null; } - public JSONObject doSearchJSON(User u, String searchString, final int pageSize, final int offset, String sortField, boolean sortReverse) throws IOException, ParseException { + public void doSearchJSON(User u, String searchString, final int pageSize, final int offset, String sortField, boolean sortReverse, HttpServletResponse response) throws IOException, ParseException { SearchConfig searchConfig = createSearchConfig(u, searchString, pageSize, offset, sortField, sortReverse); - return paginateJSON(searchConfig); + paginateJSON(searchConfig, response); } public void doSearchCSV(User u, String searchString, String sortField, boolean sortReverse, HttpServletResponse response) throws IOException, ParseException { @@ -252,7 +276,7 @@ private SearchConfig createSearchConfig(User u, String searchString, final int p if (searchString.equals(ALL_DOCS)) { - booleanQueryBuilder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); + booleanQueryBuilder.add(new ConstantScoreQuery(new MatchAllDocsQuery()), BooleanClause.Occur.MUST); } // Split input into tokens, 1 token per query separated by & @@ -320,71 +344,75 @@ else if (numericQueryParserFields.containsKey(fieldName)) return new SearchConfig(cacheEntry, query, pageSize, offset, sort, fieldsList); } - private JSONObject paginateJSON(SearchConfig c) throws IOException, ParseException { - // Get chunks of size {pageSize}. Default to 1 chunk -- add to the offset to get more. - // We then iterate over the range of documents we want based on the offset. This does grow in memory - // linearly with the number of documents, but my understanding is that these are just score,id pairs - // rather than full documents, so mem usage *should* still be pretty low. - // Perform the search with sorting - TopFieldDocs topDocs = c.cacheEntry.indexSearcher.search(c.query, c.pageSize * (c.offset + 1), c.sort); - JSONObject results = new JSONObject(); + private void paginateJSON(SearchConfig c, HttpServletResponse response) throws IOException, ParseException { + IndexSearcher searcher = c.cacheEntry.indexSearcher; + TopDocs topDocs; + PrintWriter writer = response.getWriter(); - // Iterate over the doc list, (either to the total end or until the page ends) grab the requested docs, - // and add to returned results - List data = new ArrayList<>(); - for (int i = c.pageSize * c.offset; i < Math.min(c.pageSize * (c.offset + 1), topDocs.scoreDocs.length); i++) - { - JSONObject elem = new JSONObject(); - Document doc = c.cacheEntry.indexSearcher.storedFields().document(topDocs.scoreDocs[i].doc); + if (c.offset == 0) { + topDocs = searcher.search(c.query, c.pageSize, c.sort); + } else { + TopFieldDocs prev = searcher.search(c.query, c.pageSize * c.offset, c.sort); + ScoreDoc[] prevHits = prev.scoreDocs; - for (IndexableField field : doc.getFields()) + if (prevHits.length < c.pageSize * c.offset) { - String fieldName = field.name(); - String[] fieldValues = doc.getValues(fieldName); - if (fieldValues.length > 1) - { - elem.put(fieldName, fieldValues); - } - else - { - elem.put(fieldName, fieldValues[0]); - } + return; } - data.add(elem); + + ScoreDoc lastDoc = prevHits[c.pageSize * c.offset - 1]; + topDocs = searcher.searchAfter(lastDoc, c.query, c.pageSize, c.sort); } - results.put("data", data); - results.put("totalHits", topDocs.totalHits.value); + for (ScoreDoc sd : topDocs.scoreDocs) + { + Document doc = searcher.storedFields().document(sd.doc); + JSONObject elem = new JSONObject(); + for (IndexableField f : doc.getFields()) + { + String name = f.name(); + String[] vals = doc.getValues(name); + elem.put(name, vals.length > 1 ? Arrays.asList(vals) : vals[0]); + } + + writer.println(elem); + } - return results; + writer.flush(); } - private void exportCSV(SearchConfig c, HttpServletResponse response) throws IOException - { + private void exportCSV(SearchConfig c, HttpServletResponse response) throws IOException { PrintWriter writer = response.getWriter(); IndexSearcher searcher = c.cacheEntry.indexSearcher; - TopFieldDocs topDocs = searcher.search(c.query, Integer.MAX_VALUE, c.sort); - writer.println(String.join(",", c.fields)); - for (ScoreDoc scoreDoc : topDocs.scoreDocs) - { - Document doc = searcher.storedFields().document(scoreDoc.doc); - List rowValues = new ArrayList<>(); + ScoreDoc lastDoc = null; + int batchSize = 1000; - for (String fieldName : c.fields) - { - String[] values = doc.getValues(fieldName); - String value = values.length > 0 - ? String.join(",", values) - : ""; - - // Escape strings - value = "\"" + value.replace("\"", "\"\"") + "\""; - rowValues.add(value); + while (true) { + TopDocs topDocs = searcher.searchAfter(lastDoc, c.query, batchSize, c.sort); + ScoreDoc[] hits = topDocs.scoreDocs; + + if (hits.length == 0) { + break; } - writer.println(String.join(",", rowValues)); + for (ScoreDoc scoreDoc : hits) { + Document doc = searcher.storedFields().document(scoreDoc.doc); + List rowValues = new ArrayList<>(); + + for (String fieldName : c.fields) { + String[] values = doc.getValues(fieldName); + String value = values.length > 0 + ? String.join(",", values) + : ""; + value = "\"" + value.replace("\"", "\"\"") + "\""; + rowValues.add(value); + } + + writer.println(String.join(",", rowValues)); + } + lastDoc = hits[hits.length - 1]; } writer.flush(); @@ -633,8 +661,9 @@ public void cacheDefaultQuery() { try { + HttpServletResponse response = new Response(); JBrowseLuceneSearch.clearCache(_jsonFile.getObjectId()); - doSearchJSON(_user, ALL_DOCS, 100, 0, GENOMIC_POSITION, false); + doSearchJSON(_user, ALL_DOCS, 100, 0, GENOMIC_POSITION, false, response); } catch (ParseException | IOException e) { @@ -679,21 +708,28 @@ public String getName() return "JBrowse-Lucene Shutdown Listener"; } - @Override - public void shutdownPre() - { - - } - @Override public void shutdownStarted() { _log.info("Clearing all open JBrowse/Lucene cached readers"); JBrowseLuceneSearch.emptyCache(); + + try + { + if (_executor != null) + { + _executor.shutdown(); + } + } + catch (Exception e) + { + _log.error("Error shutting down SEARCH_EXECUTOR", e); + } } } - private static class SearchConfig { + private static class SearchConfig + { CacheEntry cacheEntry; Query query; int pageSize; @@ -701,7 +737,8 @@ private static class SearchConfig { Sort sort; List fields; - public SearchConfig(CacheEntry cacheEntry, Query query, int pageSize, int offset, Sort sort, List fields) { + public SearchConfig(CacheEntry cacheEntry, Query query, int pageSize, int offset, Sort sort, List fields) + { this.cacheEntry = cacheEntry; this.query = query; this.pageSize = pageSize; diff --git a/jbrowse/src/org/labkey/jbrowse/JBrowseServiceImpl.java b/jbrowse/src/org/labkey/jbrowse/JBrowseServiceImpl.java index df8905483..8eed77f11 100644 --- a/jbrowse/src/org/labkey/jbrowse/JBrowseServiceImpl.java +++ b/jbrowse/src/org/labkey/jbrowse/JBrowseServiceImpl.java @@ -1,11 +1,13 @@ package org.labkey.jbrowse; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.math.NumberUtils; import org.apache.logging.log4j.Logger; import org.jetbrains.annotations.Nullable; import org.json.JSONObject; import org.labkey.api.collections.CaseInsensitiveHashMap; import org.labkey.api.data.Container; +import org.labkey.api.data.ContainerManager; import org.labkey.api.data.SimpleFilter; import org.labkey.api.data.TableInfo; import org.labkey.api.data.TableSelector; @@ -17,7 +19,9 @@ import org.labkey.api.jbrowse.JBrowseFieldCustomizer; import org.labkey.api.jbrowse.JBrowseFieldDescriptor; import org.labkey.api.jbrowse.JBrowseService; +import org.labkey.api.module.Module; import org.labkey.api.module.ModuleLoader; +import org.labkey.api.module.ModuleProperty; import org.labkey.api.pipeline.PipeRoot; import org.labkey.api.pipeline.PipelineJobException; import org.labkey.api.pipeline.PipelineService; @@ -431,4 +435,24 @@ public boolean isAvailable(Container c) return c.getActiveModules().contains(ModuleLoader.getInstance().getModule(JBrowseModule.class)); } } + + private static final String JBrowseLuceneCoresProp = "JBrowseLuceneCores"; + + public int getCoresForLuceneSearches() { + Module m = ModuleLoader.getInstance().getModule(JBrowseModule.NAME); + ModuleProperty mp = m.getModuleProperties().get(JBrowseLuceneCoresProp); + String nCores = StringUtils.trimToNull(mp.getEffectiveValue(ContainerManager.getRoot())); + if (nCores == null) + { + return 1; + } + else if (!NumberUtils.isCreatable(nCores)) + { + _log.error("Improper value for " + JBrowseLuceneCoresProp + ": " + nCores); + } + + Number n = NumberUtils.createNumber(nCores); + + return n.intValue(); + } } diff --git a/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java b/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java index d24f1abf1..6bfa58366 100644 --- a/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java +++ b/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java @@ -478,9 +478,9 @@ protected Collection getAdditionalDockerInputs(SequenceOutputHandler.JobCo return Collections.emptySet(); } - protected String printInputFile(SeuratObjectWrapper so) + private String printInputFile(SeuratObjectWrapper so) { - return "'" + so.getFile().getName() + "'"; + return "'" + so.getFile().getPath() + "'"; } protected Chunk createFinalChunk() throws PipelineJobException diff --git a/singlecell/resources/chunks/CalculateUCellScores.R b/singlecell/resources/chunks/CalculateUCellScores.R index 5199e13b5..c11640eea 100644 --- a/singlecell/resources/chunks/CalculateUCellScores.R +++ b/singlecell/resources/chunks/CalculateUCellScores.R @@ -1,9 +1,15 @@ +if (Sys.getenv('SEURAT_MAX_THREADS') != '') { + nCores <- Sys.getenv('SEURAT_MAX_THREADS') +} else { + nCores <- 1 +} + for (datasetId in names(seuratObjects)) { printName(datasetId) seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) message(paste0('Loading dataset ', datasetId, ', with total cells: ', ncol(seuratObj))) - seuratObj <- RIRA::CalculateUCellScores(seuratObj, storeRanks = storeRanks, assayName = assayName, forceRecalculate = forceRecalculate) + seuratObj <- RIRA::CalculateUCellScores(seuratObj, storeRanks = storeRanks, assayName = assayName, forceRecalculate = forceRecalculate, ncores = nCores) saveData(seuratObj, datasetId) diff --git a/singlecell/resources/chunks/CustomUCell.R b/singlecell/resources/chunks/CustomUCell.R index c5c9b0a02..d40a0d5bd 100644 --- a/singlecell/resources/chunks/CustomUCell.R +++ b/singlecell/resources/chunks/CustomUCell.R @@ -23,7 +23,13 @@ for (datasetId in names(seuratObjects)) { toCalculate[[vals[1]]] <- geneList } - seuratObj <- UCell::AddModuleScore_UCell(seuratObj, features = toCalculate, storeRanks = storeRanks, assay = assayName) + if (Sys.getenv('SEURAT_MAX_THREADS') != '') { + nCores <- Sys.getenv('SEURAT_MAX_THREADS') + } else { + nCores <- 1 + } + + seuratObj <- UCell::AddModuleScore_UCell(seuratObj, features = toCalculate, storeRanks = storeRanks, assay = assayName, ncores = nCores) corData <- RIRA::PlotUcellCorrelation(seuratObj, toCalculate) for (n in names(toCalculate)) { diff --git a/singlecell/resources/chunks/RunEscape.R b/singlecell/resources/chunks/RunEscape.R index af9c44251..960bbe631 100644 --- a/singlecell/resources/chunks/RunEscape.R +++ b/singlecell/resources/chunks/RunEscape.R @@ -1,8 +1,14 @@ +if (Sys.getenv('SEURAT_MAX_THREADS') != '') { + nCores <- Sys.getenv('SEURAT_MAX_THREADS') +} else { + nCores <- 1 +} + for (datasetId in names(seuratObjects)) { printName(datasetId) seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) - seuratObj <- CellMembrane::RunEscape(seuratObj, outputAssayBaseName = outputAssayBaseName, doPlot = TRUE, performDimRedux = performDimRedux) + seuratObj <- CellMembrane::RunEscape(seuratObj, outputAssayBaseName = outputAssayBaseName, doPlot = TRUE, performDimRedux = performDimRedux, nCores = nCores) saveData(seuratObj, datasetId) diff --git a/singlecell/resources/queries/singlecell/samples.js b/singlecell/resources/queries/singlecell/samples.js index 0b78206ec..f66be9f90 100644 --- a/singlecell/resources/queries/singlecell/samples.js +++ b/singlecell/resources/queries/singlecell/samples.js @@ -23,6 +23,15 @@ function beforeUpsert(row, oldRow, errors){ else if (['No stim', 'No Stim'].indexOf(row.stim) !== -1){ row.stim = 'NoStim'; } + else if (['Infected cells: SIV+', 'Infected Cells: SIV+'].indexOf(row.stim) !== -1){ + row.stim = 'SIV-Infected CD4s'; + } + else if (['Infected cells: SIV-', 'Infected Cells: SIV-'].indexOf(row.stim) !== -1){ + row.stim = 'SIV-Infected CD4s / SIV-'; + } + else if (['Infected cells: Mock', 'Infected Cells: Mock'].indexOf(row.stim) !== -1){ + row.stim = 'Mock-Infected CD4s'; + } var lookupFields = ['stim']; for (var i=0;i inputFiles, JobContext c List currentFiles; Set originalInputs = inputFiles.stream().map(SequenceOutputFile::getFile).collect(Collectors.toSet()); - Map localCopyToOrig = new HashMap<>(); + Map inputFileMap = new HashMap<>(); if (_doProcessRawCounts) { currentFiles = processRawCounts(ctx, inputFiles, basename); } else { - try - { - Set distinctIds = new HashSet<>(); - Set copiedFiles = new HashSet<>(); + Set distinctIds = new HashSet<>(); - currentFiles = new ArrayList<>(); - for (SequenceOutputFile so : inputFiles) + currentFiles = new ArrayList<>(); + for (SequenceOutputFile so : inputFiles) + { + String datasetId = FileUtil.makeLegalName(so.getReadset() != null ? ctx.getSequenceSupport().getCachedReadset(so.getReadset()).getName() : so.getName()); + if (distinctIds.contains(datasetId)) { - String datasetId = FileUtil.makeLegalName(so.getReadset() != null ? ctx.getSequenceSupport().getCachedReadset(so.getReadset()).getName() : so.getName()); - if (distinctIds.contains(datasetId)) - { - throw new PipelineJobException("Duplicate dataset Ids in input data: " + datasetId); - } - distinctIds.add(datasetId); - - //ensure local copy: - if (copiedFiles.contains(so.getFile().getName())) - { - throw new PipelineJobException("Duplicate files names in input data: " + so.getFile().getName()); - } - copiedFiles.add(so.getFile().getName()); - - File local = new File(ctx.getOutputDir(), so.getFile().getName()); - if (local.exists()) - { - local.delete(); - } - - FileUtils.copyFile(so.getFile(), local); - _resumer.getFileManager().addIntermediateFile(local); - - File cellBarcodes = CellHashingServiceImpl.get().getCellBarcodesFromSeurat(so.getFile(), false); - if (cellBarcodes.exists()) - { - ctx.getLogger().debug("Also making local copy of cellBarcodes TSV: " + cellBarcodes.getPath()); - File cellBarcodesLocal = new File(ctx.getOutputDir(), cellBarcodes.getName()); - if (cellBarcodesLocal.exists()) - { - cellBarcodesLocal.delete(); - } - - FileUtils.copyFile(cellBarcodes, cellBarcodesLocal); - _resumer.getFileManager().addIntermediateFile(cellBarcodesLocal); - } - else - { - ctx.getLogger().debug("cellBarcodes TSV not found, expected: " + cellBarcodes.getPath()); - } - - File metadataFile = CellHashingServiceImpl.get().getMetaTableFromSeurat(so.getFile(), false); - if (metadataFile.exists()) - { - ctx.getLogger().debug("Also making local copy of metadata TSV: " + metadataFile.getPath()); - File metadataFileLocal = new File(ctx.getOutputDir(), metadataFile.getName()); - if (metadataFileLocal.exists()) - { - metadataFileLocal.delete(); - } - - FileUtils.copyFile(metadataFile, metadataFileLocal); - _resumer.getFileManager().addIntermediateFile(metadataFileLocal); - } - else - { - ctx.getLogger().warn("metadataFile TSV not found, expected: " + metadataFile.getPath()); - } - - currentFiles.add(new SingleCellStep.SeuratObjectWrapper(datasetId, datasetId, local, so)); - localCopyToOrig.put(local, so.getFile()); + throw new PipelineJobException("Duplicate dataset Ids in input data: " + datasetId); } - } - catch (IOException e) - { - throw new PipelineJobException(e); + distinctIds.add(datasetId); + + currentFiles.add(new SingleCellStep.SeuratObjectWrapper(datasetId, datasetId, so.getFile(), so)); + inputFileMap.put(so.getName(), so.getFile()); } } @@ -670,14 +610,14 @@ else if (inputFiles.size() == 1) //This indicates the job processed an input file, but did not create a new object (like running FindMarkers) boolean skipOutput = false; - if (localCopyToOrig.containsKey(output.getFile())) + if (inputFileMap.containsKey(output.getFile().getName())) { try { - ctx.getLogger().debug("Comparing file context of output to determine if it matches input: "+ output.getFile().getName()); - ctx.getLogger().debug("Original file: " + localCopyToOrig.get(output.getFile())); + ctx.getLogger().debug("Comparing file context of output to determine if it matches input: " + output.getFile().getName()); + ctx.getLogger().debug("Original file: " + inputFileMap.get(output.getFile().getName())); ctx.getLogger().debug("Pipeline output file: " + output.getFile()); - if (FileUtils.contentEquals(localCopyToOrig.get(output.getFile()), output.getFile())) + if (FileUtils.contentEquals(inputFileMap.get(output.getFile().getName()), output.getFile())) { ctx.getLogger().info("Sequence output is the same as an input, will not re-create output for seurat object: " + output.getFile().getPath()); skipOutput = true; diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/CalculateGeneComponentScores.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/CalculateGeneComponentScores.java index 5b4f8a98a..43be39118 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/CalculateGeneComponentScores.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/CalculateGeneComponentScores.java @@ -24,7 +24,7 @@ public Provider() super("CalculateGeneComponentScores", "Calculate Gene Module Scores", "RIRA", "This will generate UCell scores for a set of pre-defined gene modules", Collections.singletonList( SeuratToolParameter.create("savedComponent", "Saved Component(s)", "This is the name of the saved component (from RIRA) to apply", "ldk-simplecombo", new JSONObject() {{ - put("storeValues", "Tcell_EffectorDifferentiation;TCR_EarlyStimulationComponent;TCR_StimulationComponent1"); + put("storeValues", "Tcell_EffectorDifferentiation;TCR_EarlyStimulationComponent;TCR_StimulationComponent1;PLS_Score_1;PLS_Score_2;PLS_Score_3;PLS_Score_4;PLS_Score_5;PLS_Score_6"); put("multiSelect", true); put("allowBlank", false); put("joinReturnValue", true); diff --git a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java index d2969cb63..c78b87d8c 100644 --- a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java +++ b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java @@ -523,9 +523,26 @@ private File processOutputsForType(String sampleId, Readset rs, ReferenceGenome { throw new PipelineJobException("Unable to find file: " + outputVloupe.getPath()); } + else + { + if (isPrimaryDir) + { + try + { + + getPipelineCtx().getLogger().debug("Creating empty vloupe file as placeholder: " + outputVloupe.getPath()); + FileUtils.touch(outputVloupe); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + } + } } + // NOTE: only tag the vloupe file for a/b: - else if (isPrimaryDir) + if (isPrimaryDir) { String versionString = "Version: " + getWrapper().getVersionString(); output.addSequenceOutput(outputVloupe, rs.getName() + " 10x VLoupe", VLOUPE_CATEGORY, rs.getRowId(), null, referenceGenome.getGenomeId(), versionString); diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java index 5dd635db4..6afdbe366 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java @@ -529,16 +529,20 @@ public static File runNimbleReport(File alignResultsGz, int genomeId, PipelineSt reportArgs.add("-i"); reportArgs.add(alignResultsGz.getPath()); + String resumeString = "nimble.report." + genomeId; + File doneFile = getNimbleDoneFile(ctx.getWorkingDirectory(), resumeString); + File reportResultsGz = new File(ctx.getWorkingDirectory(), "reportResults." + genomeId + ".txt.gz"); - if (reportResultsGz.exists()) + if (reportResultsGz.exists() && !doneFile.exists()) { + ctx.getLogger().debug("Deleting existing result file: " + reportResultsGz.getPath()); reportResultsGz.delete(); } reportArgs.add("-o"); reportArgs.add(reportResultsGz.getPath()); - runUsingDocker(reportArgs, output, "nimble.report." + genomeId, ctx); + runUsingDocker(reportArgs, output, resumeString, ctx); if (!reportResultsGz.exists()) {