From 932e990d29665c468431087fd5b3b16f493dc495 Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 6 May 2025 13:12:26 -0700 Subject: [PATCH 01/58] Pass nCores to UCell --- singlecell/resources/chunks/CalculateUCellScores.R | 8 +++++++- singlecell/resources/chunks/CustomUCell.R | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/singlecell/resources/chunks/CalculateUCellScores.R b/singlecell/resources/chunks/CalculateUCellScores.R index 5199e13b5..c11640eea 100644 --- a/singlecell/resources/chunks/CalculateUCellScores.R +++ b/singlecell/resources/chunks/CalculateUCellScores.R @@ -1,9 +1,15 @@ +if (Sys.getenv('SEURAT_MAX_THREADS') != '') { + nCores <- Sys.getenv('SEURAT_MAX_THREADS') +} else { + nCores <- 1 +} + for (datasetId in names(seuratObjects)) { printName(datasetId) seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) message(paste0('Loading dataset ', datasetId, ', with total cells: ', ncol(seuratObj))) - seuratObj <- RIRA::CalculateUCellScores(seuratObj, storeRanks = storeRanks, assayName = assayName, forceRecalculate = forceRecalculate) + seuratObj <- RIRA::CalculateUCellScores(seuratObj, storeRanks = storeRanks, assayName = assayName, forceRecalculate = forceRecalculate, ncores = nCores) saveData(seuratObj, datasetId) diff --git a/singlecell/resources/chunks/CustomUCell.R b/singlecell/resources/chunks/CustomUCell.R index c5c9b0a02..d40a0d5bd 100644 --- a/singlecell/resources/chunks/CustomUCell.R +++ b/singlecell/resources/chunks/CustomUCell.R @@ -23,7 +23,13 @@ for (datasetId in names(seuratObjects)) { toCalculate[[vals[1]]] <- geneList } - seuratObj <- UCell::AddModuleScore_UCell(seuratObj, features = toCalculate, storeRanks = storeRanks, assay = assayName) + if (Sys.getenv('SEURAT_MAX_THREADS') != '') { + nCores <- Sys.getenv('SEURAT_MAX_THREADS') + } else { + nCores <- 1 + } + + seuratObj <- UCell::AddModuleScore_UCell(seuratObj, features = toCalculate, storeRanks = storeRanks, assay = assayName, ncores = nCores) corData <- RIRA::PlotUcellCorrelation(seuratObj, toCalculate) for (n in names(toCalculate)) { From 46bc04ad45630d6800a5edbbc43fe61ede0dd702 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 7 May 2025 06:32:54 -0700 Subject: [PATCH 02/58] Automatically strip non-numeric characters on import --- .../resources/web/singlecell/panel/PoolImportPanel.js | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/singlecell/resources/web/singlecell/panel/PoolImportPanel.js b/singlecell/resources/web/singlecell/panel/PoolImportPanel.js index 0d200e179..d713b1461 100644 --- a/singlecell/resources/web/singlecell/panel/PoolImportPanel.js +++ b/singlecell/resources/web/singlecell/panel/PoolImportPanel.js @@ -334,7 +334,14 @@ Ext4.define('SingleCell.panel.PoolImportPanel', { }, expt: function(val, panel){ - return val || panel.EXPERIMENT; + val = val || panel.EXPERIMENT; + + // Remove leading characters: + if (val && !Ext4.isNumeric(val)) { + val = val.replace(/[^0-9]+/, ''); + } + + return val; }, celltype: function(val, panel){ From 1f45656cfa3f6bca7e341993ce516e75b33c9ec3 Mon Sep 17 00:00:00 2001 From: bbimber Date: Fri, 9 May 2025 13:58:17 -0700 Subject: [PATCH 03/58] Improve nimble resume --- .../src/org/labkey/singlecell/run/NimbleHelper.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java index 5dd635db4..6afdbe366 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java @@ -529,16 +529,20 @@ public static File runNimbleReport(File alignResultsGz, int genomeId, PipelineSt reportArgs.add("-i"); reportArgs.add(alignResultsGz.getPath()); + String resumeString = "nimble.report." + genomeId; + File doneFile = getNimbleDoneFile(ctx.getWorkingDirectory(), resumeString); + File reportResultsGz = new File(ctx.getWorkingDirectory(), "reportResults." + genomeId + ".txt.gz"); - if (reportResultsGz.exists()) + if (reportResultsGz.exists() && !doneFile.exists()) { + ctx.getLogger().debug("Deleting existing result file: " + reportResultsGz.getPath()); reportResultsGz.delete(); } reportArgs.add("-o"); reportArgs.add(reportResultsGz.getPath()); - runUsingDocker(reportArgs, output, "nimble.report." + genomeId, ctx); + runUsingDocker(reportArgs, output, resumeString, ctx); if (!reportResultsGz.exists()) { From 2fa67c8a16eac59ba9037f4de6e7153a60e8a741 Mon Sep 17 00:00:00 2001 From: hextraza Date: Tue, 13 May 2025 17:18:51 -0700 Subject: [PATCH 04/58] Add executor for multithreaded search (#329) * Add executor for multithreaded search * Update utils.ts with console.error * Add executor graceful shutdown --------- Co-authored-by: Sebastian Benjamin --- .../components/VariantTableWidget.tsx | 15 +++- jbrowse/src/client/JBrowse/utils.ts | 3 +- .../labkey/jbrowse/JBrowseLuceneSearch.java | 82 +++++++++++-------- .../labkey/jbrowse/JBrowseServiceImpl.java | 4 + 4 files changed, 69 insertions(+), 35 deletions(-) diff --git a/jbrowse/src/client/JBrowse/VariantSearch/components/VariantTableWidget.tsx b/jbrowse/src/client/JBrowse/VariantSearch/components/VariantTableWidget.tsx index 8b5b799b4..e50da12a9 100644 --- a/jbrowse/src/client/JBrowse/VariantSearch/components/VariantTableWidget.tsx +++ b/jbrowse/src/client/JBrowse/VariantSearch/components/VariantTableWidget.tsx @@ -76,6 +76,13 @@ const VariantTableWidget = observer(props => { session.hideWidget(widget) } + function resetPaginationToFirstPage() { + setPageSizeModel(prev => ({ + page: 0, + pageSize: prev.pageSize, + })); + } + function handleQuery(passedFilters, pushToHistory, pageQueryModel = pageSizeModel, sortQueryModel = sortModel) { const { page = pageSizeModel.page, pageSize = pageSizeModel.pageSize } = pageQueryModel; const { field = "genomicPosition", sort = false } = sortQueryModel[0] ?? {}; @@ -461,7 +468,7 @@ const VariantTableWidget = observer(props => { columnVisibilityModel={columnVisibilityModel} pageSizeOptions={[10,25,50,100]} paginationModel={ pageSizeModel } - rowCount={ totalHits } + rowCount={ -1 } paginationMode="server" onPaginationModelChange = {(newModel) => { setPageSizeModel(newModel) @@ -485,6 +492,7 @@ const VariantTableWidget = observer(props => { onSortModelChange={(newModel) => { setSortModel(newModel) handleQuery(filters, true, { page: 0, pageSize: pageSizeModel.pageSize }, newModel); + resetPaginationToFirstPage() }} localeText={{ MuiTablePagination: { @@ -515,7 +523,10 @@ const VariantTableWidget = observer(props => { fieldTypeInfo: fieldTypeInfo, allowedGroupNames: allowedGroupNames, promotedFilters: promotedFilters, - handleQuery: (filters) => handleQuery(filters, true, { page: 0, pageSize: pageSizeModel.pageSize}, sortModel) + handleQuery: (filters) => { + handleQuery(filters, true, { page: 0, pageSize: pageSizeModel.pageSize}, sortModel) + resetPaginationToFirstPage() + } }} /> ); diff --git a/jbrowse/src/client/JBrowse/utils.ts b/jbrowse/src/client/JBrowse/utils.ts index 75fa6a195..0b51d8ef7 100644 --- a/jbrowse/src/client/JBrowse/utils.ts +++ b/jbrowse/src/client/JBrowse/utils.ts @@ -366,7 +366,8 @@ export async function fetchLuceneQuery(filters, sessionId, trackGUID, offset, pa successCallback(jsonRes) }, failure: function(res) { - failureCallback("There was an error: " + res.status + "\n Status Body: " + res.responseText + "\n Session ID:" + sessionId) + console.error("There was an error: " + res.status + "\n Status Body: " + res.responseText + "\n Session ID:" + sessionId) + failureCallback("There was an error: status " + res.status) }, params: { "searchString": encoded, diff --git a/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java b/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java index df0adb665..e50875cb4 100644 --- a/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java +++ b/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java @@ -16,6 +16,7 @@ import org.apache.lucene.queryparser.flexible.standard.config.PointsConfig; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.LRUQueryCache; import org.apache.lucene.search.MatchAllDocsQuery; @@ -24,6 +25,7 @@ import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopFieldDocs; import org.apache.lucene.search.UsageTrackingQueryCachingPolicy; import org.apache.lucene.store.Directory; @@ -65,6 +67,8 @@ import java.util.Map; import java.util.Set; import java.util.StringTokenizer; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -75,6 +79,7 @@ public class JBrowseLuceneSearch { private static final Logger _log = LogHelper.getLogger(JBrowseLuceneSearch.class, "Logger related to JBrowse/Lucene indexing and queries"); + private static final ExecutorService SEARCH_EXECUTOR = Executors.newFixedThreadPool(JBrowseServiceImpl.get().getCoresForLuceneSearches()); private final JBrowseSession _session; private final JsonFile _jsonFile; private final User _user; @@ -108,6 +113,11 @@ public static JBrowseLuceneSearch create(String sessionId, String trackId, User private static synchronized CacheEntry getCacheEntryForSession(String trackObjectId, File indexPath) throws IOException { CacheEntry cacheEntry = _cache.get(trackObjectId); + if (SEARCH_EXECUTOR.isShutdown() || SEARCH_EXECUTOR.isTerminated()) + { + throw new IllegalStateException("The server is shutting down!"); + } + // Open directory of lucene path, get a directory reader, and create the index search manager if (cacheEntry == null) { @@ -116,7 +126,7 @@ private static synchronized CacheEntry getCacheEntryForSession(String trackObjec Directory indexDirectory = FSDirectory.open(indexPath.toPath()); LRUQueryCache queryCache = new LRUQueryCache(maxCachedQueries, maxRamBytesUsed); IndexReader indexReader = DirectoryReader.open(indexDirectory); - IndexSearcher indexSearcher = new IndexSearcher(indexReader); + IndexSearcher indexSearcher = new IndexSearcher(indexReader, SEARCH_EXECUTOR); indexSearcher.setQueryCache(queryCache); indexSearcher.setQueryCachingPolicy(new ForceMatchAllDocsCachingPolicy()); cacheEntry = new CacheEntry(queryCache, indexSearcher, indexPath); @@ -252,7 +262,7 @@ private SearchConfig createSearchConfig(User u, String searchString, final int p if (searchString.equals(ALL_DOCS)) { - booleanQueryBuilder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); + booleanQueryBuilder.add(new ConstantScoreQuery(new MatchAllDocsQuery()), BooleanClause.Occur.MUST); } // Split input into tokens, 1 token per query separated by & @@ -321,41 +331,46 @@ else if (numericQueryParserFields.containsKey(fieldName)) } private JSONObject paginateJSON(SearchConfig c) throws IOException, ParseException { - // Get chunks of size {pageSize}. Default to 1 chunk -- add to the offset to get more. - // We then iterate over the range of documents we want based on the offset. This does grow in memory - // linearly with the number of documents, but my understanding is that these are just score,id pairs - // rather than full documents, so mem usage *should* still be pretty low. - // Perform the search with sorting - TopFieldDocs topDocs = c.cacheEntry.indexSearcher.search(c.query, c.pageSize * (c.offset + 1), c.sort); + IndexSearcher searcher = c.cacheEntry.indexSearcher; + TopDocs topDocs; + + if (c.offset == 0) { + topDocs = searcher.search(c.query, c.pageSize, c.sort); + } else { + TopFieldDocs prev = searcher.search(c.query, c.pageSize * c.offset, c.sort); + long totalHits = prev.totalHits.value; + ScoreDoc[] prevHits = prev.scoreDocs; + + if (prevHits.length < c.pageSize * c.offset) + { + JSONObject results = new JSONObject(); + results.put("data", Collections.emptyList()); + results.put("totalHits", totalHits); + return results; + } + + ScoreDoc lastDoc = prevHits[c.pageSize * c.offset - 1]; + topDocs = searcher.searchAfter(lastDoc, c.query, c.pageSize, c.sort); + } + JSONObject results = new JSONObject(); + List data = new ArrayList<>(topDocs.scoreDocs.length); - // Iterate over the doc list, (either to the total end or until the page ends) grab the requested docs, - // and add to returned results - List data = new ArrayList<>(); - for (int i = c.pageSize * c.offset; i < Math.min(c.pageSize * (c.offset + 1), topDocs.scoreDocs.length); i++) + for (ScoreDoc sd : topDocs.scoreDocs) { + Document doc = searcher.storedFields().document(sd.doc); JSONObject elem = new JSONObject(); - Document doc = c.cacheEntry.indexSearcher.storedFields().document(topDocs.scoreDocs[i].doc); - - for (IndexableField field : doc.getFields()) + for (IndexableField f : doc.getFields()) { - String fieldName = field.name(); - String[] fieldValues = doc.getValues(fieldName); - if (fieldValues.length > 1) - { - elem.put(fieldName, fieldValues); - } - else - { - elem.put(fieldName, fieldValues[0]); - } + String name = f.name(); + String[] vals = doc.getValues(name); + elem.put(name, vals.length > 1 ? Arrays.asList(vals) : vals[0]); } data.add(elem); } results.put("data", data); results.put("totalHits", topDocs.totalHits.value); - return results; } @@ -679,17 +694,20 @@ public String getName() return "JBrowse-Lucene Shutdown Listener"; } - @Override - public void shutdownPre() - { - - } - @Override public void shutdownStarted() { _log.info("Clearing all open JBrowse/Lucene cached readers"); JBrowseLuceneSearch.emptyCache(); + + try + { + SEARCH_EXECUTOR.shutdown(); + } + catch (Exception e) + { + _log.error("Error shutting down SEARCH_EXECUTOR", e); + } } } diff --git a/jbrowse/src/org/labkey/jbrowse/JBrowseServiceImpl.java b/jbrowse/src/org/labkey/jbrowse/JBrowseServiceImpl.java index 6ad23749a..db944ea98 100644 --- a/jbrowse/src/org/labkey/jbrowse/JBrowseServiceImpl.java +++ b/jbrowse/src/org/labkey/jbrowse/JBrowseServiceImpl.java @@ -435,4 +435,8 @@ public boolean isAvailable(Container c) return c.getActiveModules().contains(ModuleLoader.getInstance().getModule(JBrowseModule.class)); } } + + public int getCoresForLuceneSearches() { + return Runtime.getRuntime().availableProcessors(); + } } From b2e66bb9b6face2ceecd015a671a5094718bd3d2 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 17 May 2025 07:39:16 -0700 Subject: [PATCH 05/58] Bugfix to JBrowseSessionTransform --- .../pipeline_code/extra_tools_install.sh | 13 +++++++++++++ .../run/alignment/MosaikWrapper.java | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/SequenceAnalysis/pipeline_code/extra_tools_install.sh b/SequenceAnalysis/pipeline_code/extra_tools_install.sh index 823aa7dcd..af042906f 100755 --- a/SequenceAnalysis/pipeline_code/extra_tools_install.sh +++ b/SequenceAnalysis/pipeline_code/extra_tools_install.sh @@ -305,3 +305,16 @@ then else echo "Already installed" fi + +if [[ ! -e ${LKTOOLS_DIR}/bbmap || ! -z $FORCE_REINSTALL ]]; +then + echo "Cleaning up previous installs" + rm -Rf $LKTOOLS_DIR/bbmap + + wget https://sourceforge.net/projects/bbmap/files/BBMap_39.25.tar.gz + tar -xf BBMap_39.25.tar.gz + + mv bbmap $LKTOOLS_DIR/ +else + echo "Already installed" +fi diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/MosaikWrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/MosaikWrapper.java index 378707865..5565d03ac 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/MosaikWrapper.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/MosaikWrapper.java @@ -58,7 +58,7 @@ public MosaikWrapper(@Nullable Logger logger) public static class MosaikAlignmentStep extends AbstractAlignmentPipelineStep implements AlignmentStep { - public MosaikAlignmentStep(AlignmentStepProvider provider, PipelineContext ctx) + public MosaikAlignmentStep(AlignmentStepProvider provider, PipelineContext ctx) { super(provider, ctx, new MosaikWrapper(ctx.getLogger())); } From e7135411114b598668a6cd82f16ab95470d6586c Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 17 May 2025 09:11:50 -0700 Subject: [PATCH 06/58] Initial support of bbmap --- .../pipeline_code/extra_tools_install.sh | 1 + .../SequenceAnalysisModule.java | 2 + .../run/alignment/BBMapWrapper.java | 303 ++++++++++++++++++ 3 files changed, 306 insertions(+) create mode 100644 SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java diff --git a/SequenceAnalysis/pipeline_code/extra_tools_install.sh b/SequenceAnalysis/pipeline_code/extra_tools_install.sh index af042906f..e3cce3c55 100755 --- a/SequenceAnalysis/pipeline_code/extra_tools_install.sh +++ b/SequenceAnalysis/pipeline_code/extra_tools_install.sh @@ -315,6 +315,7 @@ then tar -xf BBMap_39.25.tar.gz mv bbmap $LKTOOLS_DIR/ + ln -s $LKTOOLS_DIR/bbmap/bbmap.sh $LKTOOLS_DIR/bbmap.sh else echo "Already installed" fi diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java index 2d7ea845f..f10c1c9ec 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java @@ -91,6 +91,7 @@ import org.labkey.sequenceanalysis.query.SequenceAnalysisUserSchema; import org.labkey.sequenceanalysis.query.SequenceTriggerHelper; import org.labkey.sequenceanalysis.run.RestoreSraDataHandler; +import org.labkey.sequenceanalysis.run.alignment.BBMapWrapper; import org.labkey.sequenceanalysis.run.alignment.BWAMem2Wrapper; import org.labkey.sequenceanalysis.run.alignment.BWAMemWrapper; import org.labkey.sequenceanalysis.run.alignment.BWASWWrapper; @@ -309,6 +310,7 @@ public static void registerPipelineSteps() SequencePipelineService.get().registerPipelineStep(new StarWrapper.Provider()); SequencePipelineService.get().registerPipelineStep(new Pbmm2Wrapper.Provider()); SequencePipelineService.get().registerPipelineStep(new VulcanWrapper.Provider()); + SequencePipelineService.get().registerPipelineStep(new BBMapWrapper.Provider()); //de novo assembly SequencePipelineService.get().registerPipelineStep(new TrinityRunner.Provider()); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java new file mode 100644 index 000000000..b348cbe9c --- /dev/null +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java @@ -0,0 +1,303 @@ +package org.labkey.sequenceanalysis.run.alignment; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.logging.log4j.Logger; +import org.jetbrains.annotations.Nullable; +import org.json.JSONObject; +import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.sequenceanalysis.model.Readset; +import org.labkey.api.sequenceanalysis.pipeline.AbstractAlignmentStepProvider; +import org.labkey.api.sequenceanalysis.pipeline.AlignerIndexUtil; +import org.labkey.api.sequenceanalysis.pipeline.AlignmentOutputImpl; +import org.labkey.api.sequenceanalysis.pipeline.AlignmentStep; +import org.labkey.api.sequenceanalysis.pipeline.AlignmentStepProvider; +import org.labkey.api.sequenceanalysis.pipeline.IndexOutputImpl; +import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; +import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; +import org.labkey.api.sequenceanalysis.pipeline.SamtoolsRunner; +import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; +import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; +import org.labkey.api.sequenceanalysis.run.AbstractAlignmentPipelineStep; +import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * User: bimber + * Date: 12/14/12 + * Time: 7:40 AM + */ +public class BBMapWrapper extends AbstractCommandWrapper +{ + public BBMapWrapper(@Nullable Logger logger) + { + super(logger); + } + + public static class BBMapAlignmentStep extends AbstractAlignmentPipelineStep implements AlignmentStep + { + public BBMapAlignmentStep(AlignmentStepProvider provider, PipelineContext ctx) + { + super(provider, ctx, new BBMapWrapper(ctx.getLogger())); + } + + @Override + public boolean supportsGzipFastqs() + { + return true; + } + + @Override + public IndexOutput createIndex(ReferenceGenome referenceGenome, File outputDir) throws PipelineJobException + { + IndexOutputImpl output = new IndexOutputImpl(referenceGenome); + + File indexDir = new File(outputDir, getProvider().getName()); + boolean hasCachedIndex = AlignerIndexUtil.hasCachedIndex(this.getPipelineCtx(), getIndexCachedDirName(getPipelineCtx().getJob()), referenceGenome); + if (!hasCachedIndex) + { + getWrapper().buildIndex(referenceGenome.getWorkingFastaFile(), indexDir); + } + + AlignerIndexUtil.saveCachedIndex(hasCachedIndex, getPipelineCtx(), indexDir, getProvider().getName(), referenceGenome); + + return output; + } + + @Override + public AlignmentOutput performAlignment(Readset rs, List inputFastqs1, @Nullable List inputFastqs2, File outputDirectory, ReferenceGenome referenceGenome, String basename, String readGroupId, @Nullable String platformUnit) throws PipelineJobException + { + File inputFastq1 = assertSingleFile(inputFastqs1); + File inputFastq2 = assertSingleFile(inputFastqs2); + + AlignmentOutputImpl output = new AlignmentOutputImpl(); + AlignerIndexUtil.copyIndexIfExists(this.getPipelineCtx(), output, getProvider().getName(), getProvider().getName(), referenceGenome, true); + File localIdx = new File(getPipelineCtx().getWorkingDirectory(), "Shared/" + getProvider().getName()); + if (!localIdx.exists()) + { + throw new PipelineJobException("Index not copied: " + localIdx); + } + output.addIntermediateFile(new File(getPipelineCtx().getWorkingDirectory(), "Shared")); + + // NOTE: bbmap only supports the location ./ref for the index: + localIdx = new File(localIdx, "ref"); + if (!localIdx.exists()) + { + throw new PipelineJobException("ref dir not found: " + localIdx); + } + + File refDir = new File(getPipelineCtx().getWorkingDirectory(), "ref"); + try + { + FileUtils.moveDirectory(localIdx, refDir); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + + getWrapper().setOutputDir(outputDirectory); + + List params = new ArrayList<>(); + + String ambig = StringUtils.trimToNull(getProvider().getParameterByName("ambig").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class)); + if (ambig != null) + { + params.add("ambig=" + ambig); + if ("all".equals(ambig)) + { + params.add("xmtag=t"); + } + } + + for (String paramName : Arrays.asList("local", "semiperfectmode")) + { + if (getProvider().getParameterByName(paramName).hasValueInJson(getPipelineCtx().getJob(), getProvider(), getStepIdx())) + { + boolean val = getProvider().getParameterByName(paramName).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false); + params.add(paramName + "=" + (val ? "t" : "f")); + } + } + + if (getProvider().getParameterByName("midin").hasValueInJson(getPipelineCtx().getJob(), getProvider(), getStepIdx())) + { + Double val = getProvider().getParameterByName("midin").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Double.class); + params.add("midin=" + val); + } + + File bam = getWrapper().doAlignment(inputFastq1, inputFastq2, outputDirectory, basename, params); + if (!bam.exists()) + { + throw new PipelineJobException("BAM not created, expected: " + bam.getPath()); + } + + output.setBAM(bam); + output.addCommandsExecuted(getWrapper().getCommandsExecuted()); + + return output; + } + + @Override + public boolean doAddReadGroups() + { + return true; + } + + @Override + public boolean doSortIndexBam() + { + return true; + } + + @Override + public boolean alwaysCopyIndexToWorkingDir() + { + return false; + } + } + + public static class Provider extends AbstractAlignmentStepProvider + { + public Provider() + { + super("BBMap", "BBMap is suitable for longer reads and has the option to retain multiple hits per read. The only downside is that it can be slower. When this pipeline was first written, this aligner was preferred for sequence-based genotyping and similar applications which require retaining multiple hits.", Arrays.asList( + ToolParameterDescriptor.create("ambiguous", "Ambiguous Handing", "Set behavior on ambiguously-mapped reads (with multiple top-scoring mapping locations)", "ldk-simplecombo", new JSONObject() + {{ + put("storeValues", "all;best;toss;random"); + put("delimiter", ";"); + put("multiSelect", false); + }}, "all"), + ToolParameterDescriptor.create("local", "Local Alignment", "Set to true to use local, rather than global, alignments. This will soft-clip ugly ends of poor alignments", "checkbox", new JSONObject() + {{ + put("checked", true); + }}, true), + ToolParameterDescriptor.create("semiperfectmode", "Semi-perfectmode", "Allow only perfect and semiperfect (perfect except for N's in the reference) mappings", "checkbox", new JSONObject() + {{ + put("checked", true); + }}, true), + ToolParameterDescriptor.create("midin", "Minimum Identity", "Approximate minimum alignment identity to look for. Higher is faster and less sensitive", "ldk-numberfield", new JSONObject() + {{ + put("minValue", 0); + put("maxValue", 1); + put("decimalPrecision", 2); + }}, 0.95) + ), null, "https://prost.readthedocs.io/en/latest/bbmap.html", true, true); + } + + @Override + public BBMapAlignmentStep create(PipelineContext context) + { + return new BBMapAlignmentStep(this, context); + } + } + + protected File getExe() + { + return SequencePipelineService.get().getExeForPackage("BBMAPPATH", "bbmap.sh"); + } + + public File doAlignment(File inputFastq1, @Nullable File inputFastq2, File outputDirectory, String basename, List options) throws PipelineJobException + { + List args = new ArrayList<>(); + args.add(getExe().getPath()); + args.add("-in=" + inputFastq1.getPath()); + if (inputFastq2 != null) + { + args.add("-in2=" + inputFastq2.getPath()); + } + + args.add("-eoom"); + + args.add("mdtag=t"); + args.add("nhtag=t"); + args.add("amtag=t"); + args.add("nmtag=t"); + args.add("printunmappedcount=t"); + args.add("overwrite=t"); + + // Maximum number of total alignments to print per read. Only relevant when secondary=t. + args.add("maxsites=-1"); + + // Only print secondary alignments for ambiguously-mapped reads. + args.add("secondary=t"); + args.add("ssao=t"); + + // CONSIDER: mappedonly=f If true, treats 'out' like 'outm' + // CONSIDER: outu= Write only unmapped reads to this file. Does not include unmapped paired reads with a mapped mate. + File outputSam = new File(outputDirectory, basename + ".bbmap.sam"); + if (outputSam.exists()) + { + outputSam.delete(); + } + + args.add("outm=" + outputSam.getPath()); + + Integer maxRam = SequencePipelineService.get().getMaxRam(); + if (maxRam != null) + { + args.add("-Xmx=" + maxRam); + } + + Integer maxThreads = SequencePipelineService.get().getMaxThreads(getLogger()); + args.add(maxThreads == null ? "threads=1" : "threads=" + maxThreads); + + args.addAll(options); + + setWorkingDir(outputDirectory); + execute(args); + + if (!outputSam.exists()) + { + throw new PipelineJobException("File not found: " + outputSam.getPath()); + } + + File outputBam = new File(outputDirectory, basename + ".bbmap.bam"); + if (outputBam.exists()) + { + outputBam.delete(); + } + + SamtoolsRunner samtoolsRunner = new SamtoolsRunner(getLogger()); + List stArgs = new ArrayList<>(); + stArgs.add(samtoolsRunner.getSamtoolsPath().getPath()); + stArgs.add("view"); + stArgs.add("-o"); + stArgs.add(outputBam.getPath()); + stArgs.add(outputSam.getPath()); + samtoolsRunner.execute(stArgs); + + if (!outputBam.exists()) + { + throw new PipelineJobException("File not found: " + outputBam.getPath()); + } + + outputSam.delete(); + + return outputBam; + } + + public File buildIndex(File inputFasta, File outDir) throws PipelineJobException + { + List args = new ArrayList<>(); + args.add(getExe().getPath()); + args.add("k=7"); + args.add("path=" + outDir.getPath()); + args.add("ref=" + inputFasta.getPath()); + + setWorkingDir(outDir); + execute(args); + + File output = new File(outDir, "ref"); + if (!output.exists()) + { + throw new PipelineJobException("Unable to find file: " + output); + } + + return output; + } +} From 7a2739fd5a0e07d0bbe5c66fb2fa416c6dd72d86 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 17 May 2025 09:22:52 -0700 Subject: [PATCH 07/58] Pass nCores to escape --- singlecell/resources/chunks/RunEscape.R | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/singlecell/resources/chunks/RunEscape.R b/singlecell/resources/chunks/RunEscape.R index af9c44251..960bbe631 100644 --- a/singlecell/resources/chunks/RunEscape.R +++ b/singlecell/resources/chunks/RunEscape.R @@ -1,8 +1,14 @@ +if (Sys.getenv('SEURAT_MAX_THREADS') != '') { + nCores <- Sys.getenv('SEURAT_MAX_THREADS') +} else { + nCores <- 1 +} + for (datasetId in names(seuratObjects)) { printName(datasetId) seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) - seuratObj <- CellMembrane::RunEscape(seuratObj, outputAssayBaseName = outputAssayBaseName, doPlot = TRUE, performDimRedux = performDimRedux) + seuratObj <- CellMembrane::RunEscape(seuratObj, outputAssayBaseName = outputAssayBaseName, doPlot = TRUE, performDimRedux = performDimRedux, nCores = nCores) saveData(seuratObj, datasetId) From 58c7fb852630311b142e499eabbd5e68390c6940 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 17 May 2025 10:51:50 -0700 Subject: [PATCH 08/58] Bugfix to bbmap/index --- .../labkey/sequenceanalysis/run/alignment/BBMapWrapper.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java index b348cbe9c..89d0a2011 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java @@ -283,6 +283,11 @@ public File doAlignment(File inputFastq1, @Nullable File inputFastq2, File outpu public File buildIndex(File inputFasta, File outDir) throws PipelineJobException { + if (!outDir.exists()) + { + outDir.mkdirs(); + } + List args = new ArrayList<>(); args.add(getExe().getPath()); args.add("k=7"); From c4a59ea1fe5c572761cd3e5c150df6bf3f709310 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 17 May 2025 12:36:25 -0700 Subject: [PATCH 09/58] Bugfix to bbmap --- .../org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java index 89d0a2011..4a0dbb21c 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java @@ -105,7 +105,7 @@ public AlignmentOutput performAlignment(Readset rs, List inputFastqs1, @Nu List params = new ArrayList<>(); - String ambig = StringUtils.trimToNull(getProvider().getParameterByName("ambig").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class)); + String ambig = StringUtils.trimToNull(getProvider().getParameterByName("ambiguous").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class)); if (ambig != null) { params.add("ambig=" + ambig); From 38b2b53405ddec1fc8ba76c63975eb76de99ef2e Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 17 May 2025 16:01:22 -0700 Subject: [PATCH 10/58] Bugfix to bbmap --- .../labkey/sequenceanalysis/run/alignment/BBMapWrapper.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java index 4a0dbb21c..9259b1507 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java @@ -94,6 +94,12 @@ public AlignmentOutput performAlignment(Readset rs, List inputFastqs1, @Nu File refDir = new File(getPipelineCtx().getWorkingDirectory(), "ref"); try { + if (refDir.exists()) + { + getPipelineCtx().getLogger().debug("Deleting existing ref dir: " + refDir); + FileUtils.deleteDirectory(refDir); + } + FileUtils.moveDirectory(localIdx, refDir); } catch (IOException e) From 2eda1ad27424074f878535774b2f60cc807f49a6 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 18 May 2025 07:16:12 -0700 Subject: [PATCH 11/58] Bugfix to bbmap --- .../org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java index 9259b1507..426922a36 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java @@ -246,7 +246,7 @@ public File doAlignment(File inputFastq1, @Nullable File inputFastq2, File outpu Integer maxRam = SequencePipelineService.get().getMaxRam(); if (maxRam != null) { - args.add("-Xmx=" + maxRam); + args.add("-Xmx=" + maxRam + "g"); } Integer maxThreads = SequencePipelineService.get().getMaxThreads(getLogger()); From af5a272d070aa129f0188ef1393f5692aa98d89d Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 18 May 2025 07:57:30 -0700 Subject: [PATCH 12/58] Update download docs --- .../org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java index 426922a36..a77b46d2c 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java @@ -227,7 +227,7 @@ public File doAlignment(File inputFastq1, @Nullable File inputFastq2, File outpu args.add("overwrite=t"); // Maximum number of total alignments to print per read. Only relevant when secondary=t. - args.add("maxsites=-1"); + args.add("maxsites=50"); // Only print secondary alignments for ambiguously-mapped reads. args.add("secondary=t"); From 988600e1907d478649db135e0073f5d84780a4a2 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 18 May 2025 08:33:00 -0700 Subject: [PATCH 13/58] BBMap updates --- .../sequenceanalysis/run/alignment/BBMapWrapper.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java index a77b46d2c..6695f9c36 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java @@ -130,10 +130,10 @@ public AlignmentOutput performAlignment(Readset rs, List inputFastqs1, @Nu } } - if (getProvider().getParameterByName("midin").hasValueInJson(getPipelineCtx().getJob(), getProvider(), getStepIdx())) + if (getProvider().getParameterByName("minid").hasValueInJson(getPipelineCtx().getJob(), getProvider(), getStepIdx())) { - Double val = getProvider().getParameterByName("midin").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Double.class); - params.add("midin=" + val); + Double val = getProvider().getParameterByName("minid").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Double.class); + params.add("minid=" + val); } File bam = getWrapper().doAlignment(inputFastq1, inputFastq2, outputDirectory, basename, params); @@ -186,7 +186,7 @@ public Provider() {{ put("checked", true); }}, true), - ToolParameterDescriptor.create("midin", "Minimum Identity", "Approximate minimum alignment identity to look for. Higher is faster and less sensitive", "ldk-numberfield", new JSONObject() + ToolParameterDescriptor.create("minid", "Minimum Identity", "Approximate minimum alignment identity to look for. Higher is faster and less sensitive", "ldk-numberfield", new JSONObject() {{ put("minValue", 0); put("maxValue", 1); @@ -246,7 +246,7 @@ public File doAlignment(File inputFastq1, @Nullable File inputFastq2, File outpu Integer maxRam = SequencePipelineService.get().getMaxRam(); if (maxRam != null) { - args.add("-Xmx=" + maxRam + "g"); + args.add("-Xmx" + maxRam + "g"); } Integer maxThreads = SequencePipelineService.get().getMaxThreads(getLogger()); From bf6e784c29a25298db44e465d64ee370326b2b80 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 18 May 2025 09:35:08 -0700 Subject: [PATCH 14/58] Copy bbmap index to correct dir --- .../org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java index 6695f9c36..1525d6050 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java @@ -91,7 +91,7 @@ public AlignmentOutput performAlignment(Readset rs, List inputFastqs1, @Nu throw new PipelineJobException("ref dir not found: " + localIdx); } - File refDir = new File(getPipelineCtx().getWorkingDirectory(), "ref"); + File refDir = new File(outputDirectory, "ref"); try { if (refDir.exists()) From 5d64be0c5d4c80a404762bbb1f7732b866812106 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 19 May 2025 08:59:46 -0700 Subject: [PATCH 15/58] Support CRAM archival mode --- .../AbstractAlignmentStepProvider.java | 5 ++++ .../pipeline/SamtoolsCramConverter.java | 26 +++++++++++++------ .../pipeline/ConvertToCramHandler.java | 6 ++++- .../pipeline/SequenceAlignmentTask.java | 4 ++- 4 files changed, 31 insertions(+), 10 deletions(-) diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/AbstractAlignmentStepProvider.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/AbstractAlignmentStepProvider.java index 9d7b53536..d477e68d1 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/AbstractAlignmentStepProvider.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/AbstractAlignmentStepProvider.java @@ -34,6 +34,7 @@ abstract public class AbstractAlignmentStepProvider getParamList(List inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException { boolean replaceOriginal = ctx.getParams().optBoolean("replaceOriginal", false); + boolean doCramArchivalMode = ctx.getParams().optBoolean("doCramArchivalMode", false); ctx.getLogger().info("Replace input BAM: " + replaceOriginal); Integer threads = SequencePipelineService.get().getMaxThreads(ctx.getLogger()); @@ -124,7 +128,7 @@ public void processFilesRemote(List inputFiles, JobContext c } else { - new SamtoolsCramConverter(ctx.getLogger()).convert(so.getFile(), cram, genome.getWorkingFastaFileGzipped(), true, threads); + new SamtoolsCramConverter(ctx.getLogger()).convert(so.getFile(), cram, genome.getWorkingFastaFileGzipped(), true, threads, doCramArchivalMode); } checkCramAndIndex(so); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java index 19ea891a7..40116cc01 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java @@ -1145,7 +1145,9 @@ else if (step.expectToCreateNewBam()) // optional convert to CRAM: ToolParameterDescriptor cramParam = alignmentStep.getProvider().getParameterByName(AbstractAlignmentStepProvider.CONVERT_TO_CRAM); + ToolParameterDescriptor cramArchivalParam = alignmentStep.getProvider().getParameterByName(AbstractAlignmentStepProvider.CRAM_ARCHIVAL_MODE); boolean doCramConvert = cramParam != null && cramParam.extractValue(getJob(), alignmentStep.getProvider(), alignmentStep.getStepIdx(), Boolean.class, false); + boolean doArchival = cramArchivalParam != null && cramArchivalParam.extractValue(getJob(), alignmentStep.getProvider(), alignmentStep.getStepIdx(), Boolean.class, false); if (doCramConvert) { getJob().getLogger().info("BAM will be converted to CRAM"); @@ -1154,7 +1156,7 @@ else if (step.expectToCreateNewBam()) Integer threads = SequenceTaskHelper.getMaxThreads(getJob()); if (!cramFileIdx.exists()) { - new SamtoolsCramConverter(getJob().getLogger()).convert(renamedBam, cramFile, referenceGenome.getWorkingFastaFileGzipped(), true, threads); + new SamtoolsCramConverter(getJob().getLogger()).convert(renamedBam, cramFile, referenceGenome.getWorkingFastaFileGzipped(), true, threads, doArchival); } else { From 53bfea1a0c51128b1865a9b02486984dc1023fd6 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 19 May 2025 09:51:11 -0700 Subject: [PATCH 16/58] Expand ConvertToCramHandler to support CRAM --- .../labkey/sequenceanalysis/pipeline/ConvertToCramHandler.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ConvertToCramHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ConvertToCramHandler.java index bb18668a2..fa80ef5f8 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ConvertToCramHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ConvertToCramHandler.java @@ -55,7 +55,7 @@ public ConvertToCramHandler() @Override public boolean canProcess(SequenceOutputFile o) { - return o.getFile() != null && o.getFile().exists() && SequenceUtil.FILETYPE.bam.getFileType().isType(o.getFile()); + return o.getFile() != null && o.getFile().exists() && SequenceUtil.FILETYPE.bamOrCram.getFileType().isType(o.getFile()); } @Override From 4517fce31e59d226f7db69699d94580c2c1f9a23 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 19 May 2025 11:07:01 -0700 Subject: [PATCH 17/58] Bugfix to SamtoolsCramConverter --- .../pipeline/SamtoolsCramConverter.java | 4 ++ .../pipeline/ConvertToCramHandler.java | 55 +++++++++++++------ 2 files changed, 41 insertions(+), 18 deletions(-) diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SamtoolsCramConverter.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SamtoolsCramConverter.java index 2a4354a7d..ed698395e 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SamtoolsCramConverter.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SamtoolsCramConverter.java @@ -21,6 +21,10 @@ public SamtoolsCramConverter(Logger log) public File convert(File inputBam, File outputCram, File gzippedFasta, boolean doIndex, @Nullable Integer threads, boolean archivalMode) throws PipelineJobException { getLogger().info("Converting SAM/BAM to CRAM: " + inputBam.getPath()); + if (inputBam.equals(outputCram)) + { + throw new PipelineJobException("Input/output files are the same"); + } List params = new ArrayList<>(); params.add(getSamtoolsPath().getPath()); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ConvertToCramHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ConvertToCramHandler.java index fa80ef5f8..fc625ef1a 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ConvertToCramHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ConvertToCramHandler.java @@ -113,29 +113,30 @@ public void processFilesRemote(List inputFiles, JobContext c for (SequenceOutputFile so : inputFiles) { ReferenceGenome genome = ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id()); - File cram = new File(so.getFile().getParentFile(), FileUtil.getBaseName(so.getFile()) + ".cram"); - File cramIdx = SamtoolsCramConverter.getExpectedCramIndex(cram); + File outputFile = new File(ctx.getWorkingDirectory(), FileUtil.getBaseName(so.getFile()) + ".cram"); if (!so.getFile().exists()) { - if (replaceOriginal && cramIdx.exists()) + File inputAsCram = new File(so.getFile().getParentFile(), FileUtil.getBaseName(so.getFile()) + ".cram"); + File inputAsCramIdx = SamtoolsCramConverter.getExpectedCramIndex(inputAsCram); + if (replaceOriginal && SequenceUtil.FILETYPE.bam.getFileType().isType(so.getFile()) && inputAsCram.exists() && inputAsCramIdx.exists()) { ctx.getLogger().debug("BAM does not exist, but CRAM index does. Proceeding on the assumption this is a resume of a failed job."); } else { - throw new PipelineJobException("Unable to find BAM: " + so.getFile().getPath()); + throw new PipelineJobException("Unable to find input CRAM/BAM: " + so.getFile().getPath()); } } else { - new SamtoolsCramConverter(ctx.getLogger()).convert(so.getFile(), cram, genome.getWorkingFastaFileGzipped(), true, threads, doCramArchivalMode); + new SamtoolsCramConverter(ctx.getLogger()).convert(so.getFile(), outputFile, genome.getWorkingFastaFileGzipped(), true, threads, doCramArchivalMode); } checkCramAndIndex(so); if (replaceOriginal) { - ctx.getLogger().info("Deleting original BAM: " + so.getFile().getPath()); + ctx.getLogger().info("Deleting original BAM/CRAM: {}", so.getFile().getPath()); if (so.getFile().exists()) { SequenceAnalysisService.get().getExpectedBamOrCramIndex(so.getFile()).delete(); @@ -174,6 +175,7 @@ public void complete(JobContext ctx, List inputs, List inputs, List row = new CaseInsensitiveHashMap<>(); + row.put("rowid", so.getRowid()); + row.put("container", so.getContainer()); + boolean doUpdate = false; + String description = so.getDescription(); if (so.getName().contains(".bam")) { - Map row = new CaseInsensitiveHashMap<>(); - row.put("rowid", so.getRowid()); - row.put("container", so.getContainer()); row.put("name", so.getName().replaceAll("\\.bam", "\\.cram")); - row.put("description", (so.getDescription() == null ? "" : so.getDescription() + "\n") + "Converted from BAM to CRAM"); + description = (description == null ? "" : description + "\n") + "Converted from BAM to CRAM"; + row.put("description", description); + doUpdate = true; + } + + if (doCramArchivalMode) + { + description = (description == null ? "" : description + "\n") + "CRAM Archival Mode"; + row.put("description", description); + doUpdate = true; + } + + if (doUpdate) + { toUpdate.add(row); } } - try - { - Container target = ctx.getJob().getContainer().isWorkbook() ? ctx.getJob().getContainer().getParent() : ctx.getJob().getContainer(); - QueryService.get().getUserSchema(ctx.getJob().getUser(), target, SequenceAnalysisSchema.SCHEMA_NAME).getTable(SequenceAnalysisSchema.TABLE_OUTPUTFILES).getUpdateService().updateRows(ctx.getJob().getUser(), target, toUpdate, oldKeys, null, null); - } - catch (QueryUpdateServiceException | InvalidKeyException | BatchValidationException | SQLException e) + if (!toUpdate.isEmpty()) { - throw new PipelineJobException(e); - + try + { + Container target = ctx.getJob().getContainer().isWorkbook() ? ctx.getJob().getContainer().getParent() : ctx.getJob().getContainer(); + QueryService.get().getUserSchema(ctx.getJob().getUser(), target, SequenceAnalysisSchema.SCHEMA_NAME).getTable(SequenceAnalysisSchema.TABLE_OUTPUTFILES).getUpdateService().updateRows(ctx.getJob().getUser(), target, toUpdate, oldKeys, null, null); + } + catch (QueryUpdateServiceException | InvalidKeyException | BatchValidationException | SQLException e) + { + throw new PipelineJobException(e); + } } } } From 150f4b68b1a9f7125ec4e9c63eea5bde129b9014 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 19 May 2025 11:13:10 -0700 Subject: [PATCH 18/58] Include CRAM Archival Mode in description --- .../api/sequenceanalysis/pipeline/AlignmentStep.java | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/AlignmentStep.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/AlignmentStep.java index bef8780a3..9e6973d38 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/AlignmentStep.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/AlignmentStep.java @@ -82,11 +82,19 @@ default boolean supportsMetrics() boolean supportsGzipFastqs(); @Override - AlignmentStepProvider getProvider(); + AlignmentStepProvider getProvider(); default String getAlignmentDescription() { - return "Aligner: " + getProvider().getName(); + ToolParameterDescriptor cramArchivalParam = getProvider().getParameterByName(AbstractAlignmentStepProvider.CRAM_ARCHIVAL_MODE); + boolean doArchival = cramArchivalParam != null && cramArchivalParam.extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false); + String ret = "Aligner: " + getProvider().getName(); + if (doArchival) + { + ret = ret + "\nCRAM Archival Mode"; + } + + return ret; } interface AlignmentOutput extends PipelineStepOutput From be330ead20cec6b2c48cf53fb24962ab99c9684b Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 19 May 2025 11:30:20 -0700 Subject: [PATCH 19/58] Allow bbmap to retain unmapped reads --- .../run/alignment/BBMapWrapper.java | 71 +++++++++++++++++-- 1 file changed, 66 insertions(+), 5 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java index 1525d6050..698d40a8c 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java @@ -20,6 +20,7 @@ import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; import org.labkey.api.sequenceanalysis.run.AbstractAlignmentPipelineStep; import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper; +import org.labkey.api.util.FileUtil; import java.io.File; import java.io.IOException; @@ -136,7 +137,8 @@ public AlignmentOutput performAlignment(Readset rs, List inputFastqs1, @Nu params.add("minid=" + val); } - File bam = getWrapper().doAlignment(inputFastq1, inputFastq2, outputDirectory, basename, params); + boolean retainUnmapped = getProvider().getParameterByName("retainUnmapped").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false); + File bam = getWrapper().doAlignment(inputFastq1, inputFastq2, outputDirectory, basename, params, retainUnmapped); if (!bam.exists()) { throw new PipelineJobException("BAM not created, expected: " + bam.getPath()); @@ -145,6 +147,17 @@ public AlignmentOutput performAlignment(Readset rs, List inputFastqs1, @Nu output.setBAM(bam); output.addCommandsExecuted(getWrapper().getCommandsExecuted()); + if (retainUnmapped) + { + File unmappedBam = getWrapper().getUnmappedFilename(bam); + if (!unmappedBam.exists()) + { + throw new PipelineJobException("Unable to find file: " + unmappedBam.getPath()); + } + + output.addSequenceOutput(unmappedBam, rs.getName() + ": BBmap unmapped reads", "Alignment", rs.getReadsetId(), null, referenceGenome.getGenomeId(), "BBMap Unmapped Reads"); + } + return output; } @@ -184,14 +197,18 @@ public Provider() }}, true), ToolParameterDescriptor.create("semiperfectmode", "Semi-perfectmode", "Allow only perfect and semiperfect (perfect except for N's in the reference) mappings", "checkbox", new JSONObject() {{ - put("checked", true); - }}, true), + put("checked", false); + }}, false), ToolParameterDescriptor.create("minid", "Minimum Identity", "Approximate minimum alignment identity to look for. Higher is faster and less sensitive", "ldk-numberfield", new JSONObject() {{ put("minValue", 0); put("maxValue", 1); put("decimalPrecision", 2); - }}, 0.95) + }}, 0.95), + ToolParameterDescriptor.create("retainUnmapped", "Retain Unmapped", "If checked, unmapped reads are written to a separate BAM file", "checkbox", new JSONObject() + {{ + put("checked", false); + }}, false) ), null, "https://prost.readthedocs.io/en/latest/bbmap.html", true, true); } @@ -202,12 +219,18 @@ public BBMapAlignmentStep create(PipelineContext context) } } + protected File getUnmappedFilename(File mappedSamOrBam) + { + String fn = FileUtil.getBaseName(mappedSamOrBam.getName()) + ".unmapped." + FileUtil.getExtension(mappedSamOrBam.getName()); + return new File(mappedSamOrBam.getParentFile(), fn); + } + protected File getExe() { return SequencePipelineService.get().getExeForPackage("BBMAPPATH", "bbmap.sh"); } - public File doAlignment(File inputFastq1, @Nullable File inputFastq2, File outputDirectory, String basename, List options) throws PipelineJobException + public File doAlignment(File inputFastq1, @Nullable File inputFastq2, File outputDirectory, String basename, List options, boolean retainUnmaped) throws PipelineJobException { List args = new ArrayList<>(); args.add(getExe().getPath()); @@ -243,6 +266,18 @@ public File doAlignment(File inputFastq1, @Nullable File inputFastq2, File outpu args.add("outm=" + outputSam.getPath()); + File outputUnmappedSam = null; + if (retainUnmaped) + { + outputUnmappedSam = getUnmappedFilename(outputSam); + if (outputUnmappedSam.exists()) + { + outputUnmappedSam.delete(); + } + + args.add("outu=" + outputUnmappedSam.getPath()); + } + Integer maxRam = SequencePipelineService.get().getMaxRam(); if (maxRam != null) { @@ -284,6 +319,32 @@ public File doAlignment(File inputFastq1, @Nullable File inputFastq2, File outpu outputSam.delete(); + // repeat for unmapped: + if (outputUnmappedSam != null) + { + File outputUnmappedBam = getUnmappedFilename(outputBam); + if (outputUnmappedBam != null && outputUnmappedBam.exists()) + { + outputUnmappedBam.delete(); + } + + samtoolsRunner = new SamtoolsRunner(getLogger()); + stArgs = new ArrayList<>(); + stArgs.add(samtoolsRunner.getSamtoolsPath().getPath()); + stArgs.add("view"); + stArgs.add("-o"); + stArgs.add(outputUnmappedBam.getPath()); + stArgs.add(outputUnmappedSam.getPath()); + samtoolsRunner.execute(stArgs); + + if (!outputUnmappedBam.exists()) + { + throw new PipelineJobException("File not found: " + outputBam.getPath()); + } + + outputUnmappedSam.delete(); + } + return outputBam; } From 1c53e3b1663b5a1e0738106322a9bca9e58eb6f0 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 19 May 2025 11:53:07 -0700 Subject: [PATCH 20/58] Add saa=t to bbmap --- .../labkey/sequenceanalysis/run/alignment/BBMapWrapper.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java index 698d40a8c..ec9c3786c 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java @@ -256,6 +256,9 @@ public File doAlignment(File inputFastq1, @Nullable File inputFastq2, File outpu args.add("secondary=t"); args.add("ssao=t"); + // NOTE: this will increase BAM size. Consider whether really needed: + args.add("saa=t"); + // CONSIDER: mappedonly=f If true, treats 'out' like 'outm' // CONSIDER: outu= Write only unmapped reads to this file. Does not include unmapped paired reads with a mapped mate. File outputSam = new File(outputDirectory, basename + ".bbmap.sam"); From 2fb4b480021d4fecff24d7d7bcb0b9b58d96dc28 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 19 May 2025 12:39:04 -0700 Subject: [PATCH 21/58] Simplify bbmap and unmapped reads --- .../run/alignment/BBMapWrapper.java | 64 +------------------ 1 file changed, 3 insertions(+), 61 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java index ec9c3786c..12d314d97 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java @@ -20,7 +20,6 @@ import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; import org.labkey.api.sequenceanalysis.run.AbstractAlignmentPipelineStep; import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper; -import org.labkey.api.util.FileUtil; import java.io.File; import java.io.IOException; @@ -147,17 +146,6 @@ public AlignmentOutput performAlignment(Readset rs, List inputFastqs1, @Nu output.setBAM(bam); output.addCommandsExecuted(getWrapper().getCommandsExecuted()); - if (retainUnmapped) - { - File unmappedBam = getWrapper().getUnmappedFilename(bam); - if (!unmappedBam.exists()) - { - throw new PipelineJobException("Unable to find file: " + unmappedBam.getPath()); - } - - output.addSequenceOutput(unmappedBam, rs.getName() + ": BBmap unmapped reads", "Alignment", rs.getReadsetId(), null, referenceGenome.getGenomeId(), "BBMap Unmapped Reads"); - } - return output; } @@ -209,7 +197,7 @@ public Provider() {{ put("checked", false); }}, false) - ), null, "https://prost.readthedocs.io/en/latest/bbmap.html", true, true); + ), null, "https://prost.readthedocs.io/en/latest/bbmap.html", true, false); } @Override @@ -219,18 +207,12 @@ public BBMapAlignmentStep create(PipelineContext context) } } - protected File getUnmappedFilename(File mappedSamOrBam) - { - String fn = FileUtil.getBaseName(mappedSamOrBam.getName()) + ".unmapped." + FileUtil.getExtension(mappedSamOrBam.getName()); - return new File(mappedSamOrBam.getParentFile(), fn); - } - protected File getExe() { return SequencePipelineService.get().getExeForPackage("BBMAPPATH", "bbmap.sh"); } - public File doAlignment(File inputFastq1, @Nullable File inputFastq2, File outputDirectory, String basename, List options, boolean retainUnmaped) throws PipelineJobException + public File doAlignment(File inputFastq1, @Nullable File inputFastq2, File outputDirectory, String basename, List options, boolean retainUnmapped) throws PipelineJobException { List args = new ArrayList<>(); args.add(getExe().getPath()); @@ -259,27 +241,13 @@ public File doAlignment(File inputFastq1, @Nullable File inputFastq2, File outpu // NOTE: this will increase BAM size. Consider whether really needed: args.add("saa=t"); - // CONSIDER: mappedonly=f If true, treats 'out' like 'outm' - // CONSIDER: outu= Write only unmapped reads to this file. Does not include unmapped paired reads with a mapped mate. File outputSam = new File(outputDirectory, basename + ".bbmap.sam"); if (outputSam.exists()) { outputSam.delete(); } - args.add("outm=" + outputSam.getPath()); - - File outputUnmappedSam = null; - if (retainUnmaped) - { - outputUnmappedSam = getUnmappedFilename(outputSam); - if (outputUnmappedSam.exists()) - { - outputUnmappedSam.delete(); - } - - args.add("outu=" + outputUnmappedSam.getPath()); - } + args.add((retainUnmapped ? "out=" : "outm=") + outputSam.getPath()); Integer maxRam = SequencePipelineService.get().getMaxRam(); if (maxRam != null) @@ -322,32 +290,6 @@ public File doAlignment(File inputFastq1, @Nullable File inputFastq2, File outpu outputSam.delete(); - // repeat for unmapped: - if (outputUnmappedSam != null) - { - File outputUnmappedBam = getUnmappedFilename(outputBam); - if (outputUnmappedBam != null && outputUnmappedBam.exists()) - { - outputUnmappedBam.delete(); - } - - samtoolsRunner = new SamtoolsRunner(getLogger()); - stArgs = new ArrayList<>(); - stArgs.add(samtoolsRunner.getSamtoolsPath().getPath()); - stArgs.add("view"); - stArgs.add("-o"); - stArgs.add(outputUnmappedBam.getPath()); - stArgs.add(outputUnmappedSam.getPath()); - samtoolsRunner.execute(stArgs); - - if (!outputUnmappedBam.exists()) - { - throw new PipelineJobException("File not found: " + outputBam.getPath()); - } - - outputUnmappedSam.delete(); - } - return outputBam; } From 3ed09280fd5fb280de1a283d3a795b3cd7b147ed Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 19 May 2025 12:50:27 -0700 Subject: [PATCH 22/58] Support lossy_names for SamtoolsCramConverter --- .../api/sequenceanalysis/pipeline/SamtoolsCramConverter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SamtoolsCramConverter.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SamtoolsCramConverter.java index ed698395e..174ac5fe2 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SamtoolsCramConverter.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SamtoolsCramConverter.java @@ -31,7 +31,7 @@ public File convert(File inputBam, File outputCram, File gzippedFasta, boolean d params.add("view"); params.add("--output-fmt"); - params.add("cram,version=3.0"); + params.add("cram,version=3.0" + (archivalMode ? ",lossy_names=1" : "")); params.add("-o"); params.add(outputCram.getPath()); From 39cc937fb6a9f09a6007566c8a403268a581ef95 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 19 May 2025 13:35:23 -0700 Subject: [PATCH 23/58] Correct bbmap to use saa=f --- .../org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java index 12d314d97..7151c7718 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java @@ -239,7 +239,7 @@ public File doAlignment(File inputFastq1, @Nullable File inputFastq2, File outpu args.add("ssao=t"); // NOTE: this will increase BAM size. Consider whether really needed: - args.add("saa=t"); + args.add("saa=f"); File outputSam = new File(outputDirectory, basename + ".bbmap.sam"); if (outputSam.exists()) From 2fb3d8cb5c2a67960203a35a12be0c5705b152eb Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 19 May 2025 15:30:57 -0700 Subject: [PATCH 24/58] Better resume for ConvertToCramHandler --- .../pipeline/ConvertToCramHandler.java | 120 +++++++++++------- .../run/analysis/BamIterator.java | 10 +- 2 files changed, 86 insertions(+), 44 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ConvertToCramHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ConvertToCramHandler.java index fc625ef1a..204d9a52b 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ConvertToCramHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ConvertToCramHandler.java @@ -1,5 +1,6 @@ package org.labkey.sequenceanalysis.pipeline; +import org.apache.commons.io.FileUtils; import org.json.JSONObject; import org.labkey.api.collections.CaseInsensitiveHashMap; import org.labkey.api.data.Container; @@ -27,6 +28,7 @@ import org.labkey.sequenceanalysis.util.SequenceUtil; import java.io.File; +import java.io.IOException; import java.sql.SQLException; import java.util.ArrayList; import java.util.Arrays; @@ -39,16 +41,16 @@ public class ConvertToCramHandler extends AbstractParameterizedOutputHandler inputFiles, JSONObject params, File outputDir, List actions, List outputsToCreate) throws UnsupportedOperationException, PipelineJobException @@ -113,7 +115,7 @@ public void processFilesRemote(List inputFiles, JobContext c for (SequenceOutputFile so : inputFiles) { ReferenceGenome genome = ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id()); - File outputFile = new File(ctx.getWorkingDirectory(), FileUtil.getBaseName(so.getFile()) + ".cram"); + File outputFile = new File(ctx.getWorkingDirectory(), FileUtil.getBaseName(so.getFile()) + ".cram"); if (!so.getFile().exists()) { File inputAsCram = new File(so.getFile().getParentFile(), FileUtil.getBaseName(so.getFile()) + ".cram"); @@ -137,16 +139,44 @@ public void processFilesRemote(List inputFiles, JobContext c if (replaceOriginal) { ctx.getLogger().info("Deleting original BAM/CRAM: {}", so.getFile().getPath()); - if (so.getFile().exists()) + if (SequenceUtil.FILETYPE.bam.getFileType().isType(so.getFile())) { - SequenceAnalysisService.get().getExpectedBamOrCramIndex(so.getFile()).delete(); - so.getFile().delete(); + if (so.getFile().exists()) + { + SequenceAnalysisService.get().getExpectedBamOrCramIndex(so.getFile()).delete(); + so.getFile().delete(); + } + else + { + ctx.getLogger().debug("Input BAM not found, possibly deleted in earlier job iteration?"); + } } - else + else if (SequenceUtil.FILETYPE.cram.getFileType().isType(so.getFile())) { - ctx.getLogger().debug("Input BAM not found, possibly deleted in earlier job iteration?"); + try + { + if (!so.getFile().exists()) + { + throw new PipelineJobException("Unable to find input CRAM/BAM: " + so.getFile().getPath()); + } + + SequenceAnalysisService.get().getExpectedBamOrCramIndex(so.getFile()).delete(); + so.getFile().delete(); + + FileUtils.moveFile(outputFile, so.getFile()); + FileUtils.moveFile(new File(outputFile.getPath() + ".crai"), new File(so.getFile() + ".crai")); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } } } + else + { + String description = (so.getDescription() == null ? "" : so.getDescription() + "\n") + "CRAM Archival Mode"; + ctx.getFileManager().addSequenceOutput(outputFile, so.getName(), so.getCategory(), so.getReadset(), null, so.getLibrary_id(), description); + } } } @@ -175,41 +205,45 @@ public void complete(JobContext ctx, List inputs, List row = new CaseInsensitiveHashMap<>(); - row.put("rowid", so.getRowid()); - row.put("container", so.getContainer()); - boolean doUpdate = false; - String description = so.getDescription(); - if (so.getName().contains(".bam")) + if (replaceOriginal) { - row.put("name", so.getName().replaceAll("\\.bam", "\\.cram")); - description = (description == null ? "" : description + "\n") + "Converted from BAM to CRAM"; - row.put("description", description); - doUpdate = true; - } + ctx.getJob().getLogger().info("Updating ExpData record with new filepath: " + cram.getPath()); + ExpData d = so.getExpData(); + d.setDataFileURI(cram.toURI()); + d.setName(cram.getName()); + d.save(ctx.getJob().getUser()); - if (doCramArchivalMode) - { - description = (description == null ? "" : description + "\n") + "CRAM Archival Mode"; - row.put("description", description); - doUpdate = true; - } + Map row = new CaseInsensitiveHashMap<>(); + row.put("rowid", so.getRowid()); + row.put("container", so.getContainer()); + boolean doUpdate = false; + String description = so.getDescription(); + if (so.getName().contains(".bam")) + { + row.put("name", so.getName().replaceAll("\\.bam", "\\.cram")); + description = (description == null ? "" : description + "\n") + "Converted from BAM to CRAM"; + row.put("description", description); + doUpdate = true; + } - if (doUpdate) - { - toUpdate.add(row); + if (doCramArchivalMode) + { + description = (description == null ? "" : description + "\n") + "CRAM Archival Mode"; + row.put("description", description); + doUpdate = true; + } + + if (doUpdate) + { + toUpdate.add(row); + } } } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/BamIterator.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/BamIterator.java index 23991eefb..0630664ae 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/BamIterator.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/BamIterator.java @@ -149,7 +149,15 @@ public void iterateReads(String refName, int start, int stop) throws IOException if (r.getAlignmentEnd() < start || r.getAlignmentStart() > stop) continue; - processAlignment(r, indexedRef); + try + { + processAlignment(r, indexedRef); + } + catch (Exception e) + { + _logger.error("Unable to parse alignment: " + r.toString() + " / " + r.getCigarString()); + throw e; + } if (i % 10000 == 0) { From 723d3d4dc81a0da90c0255cf833c6e4fd6c1c90f Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 19 May 2025 15:44:15 -0700 Subject: [PATCH 25/58] More visibility into orphan aligner indexes --- .../pipeline/OrphanFilePipelineJob.java | 12 ++++++++++++ .../pipeline/SequenceAlignmentTask.java | 6 ++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/OrphanFilePipelineJob.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/OrphanFilePipelineJob.java index e6a657190..27fbd8dc3 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/OrphanFilePipelineJob.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/OrphanFilePipelineJob.java @@ -57,6 +57,8 @@ import java.util.Map; import java.util.Set; +import static org.labkey.sequenceanalysis.pipeline.SequenceTaskHelper.SHARED_SUBFOLDER_NAME; + public class OrphanFilePipelineJob extends PipelineJob { // Default constructor for serialization @@ -373,6 +375,16 @@ public void getOrphanFilesForContainer(Container c, User u, Set orphanFile unexpectedPipelineDirs.add(subdir); } + File sharedDir = new File(subdir, SHARED_SUBFOLDER_NAME); + if (sharedDir.exists()) + { + long size = FileUtils.sizeOfDirectory(sharedDir); + if (size > 1e6) + { + getJob().getLogger().warn("Large Shared folder: " + sharedDir.getPath()); + } + } + getOrphanFilesForDirectory(knownExpDatas, dataMap, subdir, orphanFiles, orphanIndexes); } } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java index 40116cc01..5cecca0da 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java @@ -209,10 +209,6 @@ public WorkDirectory createWorkDirectory(String jobGUID, FileAnalysisJobSupport @Override public boolean isJobComplete(PipelineJob job) { - FileAnalysisJobSupport support = (FileAnalysisJobSupport) job; - String baseName = support.getBaseName(); - File dirAnalysis = support.getAnalysisDirectory(); - return false; } } @@ -469,6 +465,8 @@ else if (doCopy) actions.add(action); referenceGenome.setWorkingFasta(new File(targetDir, refFasta.getName())); + + getTaskFileManagerImpl().addIntermediateFile(targetDir); } catch (IOException e) { From bc8f6173e8e79a8b53bb64e032caf8dbab66e1cb Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 19 May 2025 20:50:33 -0700 Subject: [PATCH 26/58] Bugfix to CigarPositionIterable when operator is equal sign --- .../sequenceanalysis/api/picard/CigarPositionIterable.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/api/picard/CigarPositionIterable.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/api/picard/CigarPositionIterable.java index a8abc065d..e78eea86a 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/api/picard/CigarPositionIterable.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/api/picard/CigarPositionIterable.java @@ -81,7 +81,7 @@ private void initializeCigar() int i = 0; for (char el : _explodedCigar) { - CigarOperator op = CigarOperator.valueOf(Character.toString(el)); + CigarOperator op = CigarOperator.characterToEnum(el); if (op.consumesReadBases()) { _readPositions[i] = readPos; From 0a3827891566a5ae6e7b40a36da8aac286a3aa31 Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 20 May 2025 06:10:57 -0700 Subject: [PATCH 27/58] Bugfix to CigarPositionIterable when operator is equal sign --- .../sequenceanalysis/api/picard/CigarPositionIterable.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/api/picard/CigarPositionIterable.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/api/picard/CigarPositionIterable.java index e78eea86a..bf5c30d8f 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/api/picard/CigarPositionIterable.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/api/picard/CigarPositionIterable.java @@ -155,7 +155,7 @@ public PositionInfo(SAMRecord record, int pos, char[] ops, Integer[] readPos, In { _record = record; _pos = pos; - _op = CigarOperator.valueOf(Character.toString(ops[pos])); + _op = CigarOperator.characterToEnum(ops[pos]); _readPos = readPos[pos]; _refPos = refPos[pos]; From 655c47aff786e06a5b070e8569df3f9d096a9ee9 Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 20 May 2025 13:02:20 -0700 Subject: [PATCH 28/58] Mark index folder for deletion --- .../org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java | 1 + 1 file changed, 1 insertion(+) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java index 7151c7718..3ad0b00ef 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BBMapWrapper.java @@ -101,6 +101,7 @@ public AlignmentOutput performAlignment(Readset rs, List inputFastqs1, @Nu } FileUtils.moveDirectory(localIdx, refDir); + output.addIntermediateFile(refDir); } catch (IOException e) { From c876525677714b283fa2a3104b2ce78cc33fa536 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 21 May 2025 05:55:37 -0700 Subject: [PATCH 29/58] Bugfix to ConvertToCramHandler --- .../pipeline/ConvertToCramHandler.java | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ConvertToCramHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ConvertToCramHandler.java index 204d9a52b..40bc34b38 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ConvertToCramHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ConvertToCramHandler.java @@ -134,8 +134,6 @@ public void processFilesRemote(List inputFiles, JobContext c new SamtoolsCramConverter(ctx.getLogger()).convert(so.getFile(), outputFile, genome.getWorkingFastaFileGzipped(), true, threads, doCramArchivalMode); } - checkCramAndIndex(so); - if (replaceOriginal) { ctx.getLogger().info("Deleting original BAM/CRAM: {}", so.getFile().getPath()); @@ -180,21 +178,6 @@ else if (SequenceUtil.FILETYPE.cram.getFileType().isType(so.getFile())) } } - private void checkCramAndIndex(SequenceOutputFile so) throws PipelineJobException - { - File cram = new File(so.getFile().getParentFile(), FileUtil.getBaseName(so.getFile()) + ".cram"); - if (!cram.exists()) - { - throw new PipelineJobException("Unable to find file: " + cram.getPath()); - } - - File cramIdx = new File(cram.getPath() + ".crai"); - if (!cramIdx.exists()) - { - throw new PipelineJobException("Unable to find file: " + cramIdx.getPath()); - } - } - @Override public void complete(JobContext ctx, List inputs, List outputsCreated) throws PipelineJobException { @@ -210,8 +193,6 @@ public void complete(JobContext ctx, List inputs, List Date: Wed, 21 May 2025 09:41:46 -0700 Subject: [PATCH 30/58] Add action to manually create ExpData for file --- .../SequenceAnalysisController.java | 58 +++++++++++++++++++ .../singlecell/run/CellRangerVDJWrapper.java | 20 ++++++- 2 files changed, 76 insertions(+), 2 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisController.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisController.java index 9e2e7f686..d40d13342 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisController.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisController.java @@ -5293,4 +5293,62 @@ public void setDoNotRequireSra(boolean doNotRequireSra) _doNotRequireSra = doNotRequireSra; } } + + @RequiresSiteAdmin + public static class CreateExpDataForFileAction extends ConfirmAction + { + @Override + public void validateCommand(CreateExpDataForFileForm form, Errors errors) + { + + } + + @Override + public URLHelper getSuccessURL(CreateExpDataForFileForm form) + { + return getContainer().getStartURL(getUser()); + } + + @Override + public ModelAndView getConfirmView(CreateExpDataForFileForm form, BindException errors) throws Exception + { + return new HtmlView(HtmlString.unsafe("This will create a new ExpData with a DataFileUrl pointing to the provided URI. This should be a full URI, such as file:///my/path/myFile.txt." + + "

" + + "
")); + } + + @Override + public boolean handlePost(CreateExpDataForFileForm form, BindException errors) throws Exception + { + URI newUri = URI.create(form.getDataFileUrl()); + File f = new File(newUri); + if (!f.exists()) + { + throw new PipelineJobException("Missing file: " + form.getDataFileUrl()); + } + + DataType dataType = new DataType("File"); + + ExpData d = ExperimentService.get().createData(getContainer(), dataType, f.getName()); + d.setDataFileURI(newUri); + d.save(getUser()); + + return true; + } + } + + public static class CreateExpDataForFileForm + { + private String _dataFileUrl; + + public String getDataFileUrl() + { + return _dataFileUrl; + } + + public void setDataFileUrl(String dataFileUrl) + { + _dataFileUrl = dataFileUrl; + } + } } \ No newline at end of file diff --git a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java index 17972552c..c78b87d8c 100644 --- a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java +++ b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java @@ -3,7 +3,6 @@ import au.com.bytecode.opencsv.CSVReader; import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringUtils; -import org.apache.commons.lang3.stream.IntStreams; import org.apache.logging.log4j.Logger; import org.jetbrains.annotations.Nullable; import org.json.JSONObject; @@ -524,9 +523,26 @@ private File processOutputsForType(String sampleId, Readset rs, ReferenceGenome { throw new PipelineJobException("Unable to find file: " + outputVloupe.getPath()); } + else + { + if (isPrimaryDir) + { + try + { + + getPipelineCtx().getLogger().debug("Creating empty vloupe file as placeholder: " + outputVloupe.getPath()); + FileUtils.touch(outputVloupe); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + } + } } + // NOTE: only tag the vloupe file for a/b: - else if (isPrimaryDir) + if (isPrimaryDir) { String versionString = "Version: " + getWrapper().getVersionString(); output.addSequenceOutput(outputVloupe, rs.getName() + " 10x VLoupe", VLOUPE_CATEGORY, rs.getRowId(), null, referenceGenome.getGenomeId(), versionString); From 1ac2cb4b0d53d457088cd023a9f9d04b41745dbc Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 21 May 2025 10:16:10 -0700 Subject: [PATCH 31/58] Add more logging --- .../sequenceanalysis/pipeline/ConvertToCramHandler.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ConvertToCramHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ConvertToCramHandler.java index 40bc34b38..2be23a63d 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ConvertToCramHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ConvertToCramHandler.java @@ -161,6 +161,7 @@ else if (SequenceUtil.FILETYPE.cram.getFileType().isType(so.getFile())) SequenceAnalysisService.get().getExpectedBamOrCramIndex(so.getFile()).delete(); so.getFile().delete(); + ctx.getLogger().debug("Replacing original file: " + so.getFile().getPath()); FileUtils.moveFile(outputFile, so.getFile()); FileUtils.moveFile(new File(outputFile.getPath() + ".crai"), new File(so.getFile() + ".crai")); } @@ -169,6 +170,10 @@ else if (SequenceUtil.FILETYPE.cram.getFileType().isType(so.getFile())) throw new PipelineJobException(e); } } + else + { + throw new PipelineJobException("Unknown file type: " + so.getFile().getPath()); + } } else { From 9c6d1147298535761a203c4e4f31f2f2bff9b54f Mon Sep 17 00:00:00 2001 From: hextraza Date: Wed, 21 May 2025 12:46:12 -0700 Subject: [PATCH 32/58] Stream JSON results to client (#330) * Stream JSON results to client * Use ActionURL to build lucene queries * Switch to ExportAction --------- Co-authored-by: Sebastian Benjamin Co-authored-by: bbimber --- .../components/VariantTableWidget.tsx | 31 +++++-- jbrowse/src/client/JBrowse/utils.ts | 84 +++++++++++++------ .../org/labkey/jbrowse/JBrowseController.java | 18 ++-- .../labkey/jbrowse/JBrowseLuceneSearch.java | 27 +++--- 4 files changed, 106 insertions(+), 54 deletions(-) diff --git a/jbrowse/src/client/JBrowse/VariantSearch/components/VariantTableWidget.tsx b/jbrowse/src/client/JBrowse/VariantSearch/components/VariantTableWidget.tsx index e50da12a9..98f6ac0f9 100644 --- a/jbrowse/src/client/JBrowse/VariantSearch/components/VariantTableWidget.tsx +++ b/jbrowse/src/client/JBrowse/VariantSearch/components/VariantTableWidget.tsx @@ -68,7 +68,6 @@ const VariantTableWidget = observer(props => { return obj })) - setTotalHits(data.totalHits) setDataLoaded(true) } @@ -96,12 +95,35 @@ const VariantTableWidget = observer(props => { currentUrl.searchParams.set("sortDirection", sort.toString()); if (pushToHistory) { - window.history.pushState(null, "", currentUrl.toString()); + window.history.pushState(null, "", currentUrl.toString()); } setFilters(passedFilters); - setDataLoaded(false) - fetchLuceneQuery(passedFilters, sessionId, trackGUID, page, pageSize, field, sort, (json)=>{handleSearch(json)}, (error) => {setDataLoaded(true); setError(error)}); + setDataLoaded(false); + setFeatures([]); + + fetchLuceneQuery( + passedFilters, + sessionId, + trackGUID, + page, + pageSize, + field, + sort, + (row) => { + setFeatures(prev => { + row.id = prev.length; + row.trackId = trackId; + return [...prev, row]; + }); + }, + () => setDataLoaded(true), + (error) => { + console.error("Stream error:", error); + setError(error); + setDataLoaded(true); + } + ); } const handleExport = () => { @@ -274,7 +296,6 @@ const VariantTableWidget = observer(props => { const [filterModalOpen, setFilterModalOpen] = useState(false); const [filters, setFilters] = useState([]); - const [totalHits, setTotalHits] = useState(0); const [fieldTypeInfo, setFieldTypeInfo] = useState([]); const [allowedGroupNames, setAllowedGroupNames] = useState([]); const [promotedFilters, setPromotedFilters] = useState>(null); diff --git a/jbrowse/src/client/JBrowse/utils.ts b/jbrowse/src/client/JBrowse/utils.ts index 0b51d8ef7..ad1870022 100644 --- a/jbrowse/src/client/JBrowse/utils.ts +++ b/jbrowse/src/client/JBrowse/utils.ts @@ -328,23 +328,24 @@ export function serializeLocationToEncodedSearchString(contig, start, end) { return createEncodedFilterString(filters) } -export async function fetchLuceneQuery(filters, sessionId, trackGUID, offset, pageSize, sortField, sortReverseString, successCallback, failureCallback) { +export async function fetchLuceneQuery(filters, sessionId, trackGUID, offset, pageSize, sortField, sortReverseString, + handleRow, handleComplete, handleError) { if (!offset) { offset = 0 } if (!sessionId) { - failureCallback("There was an error: " + "Lucene query: no session ID") + handleError("There was an error: " + "Lucene query: no session ID") return } if (!trackGUID) { - failureCallback("There was an error: " + "Lucene query: no track ID") + handleError("There was an error: " + "Lucene query: no track ID") return } if (!filters) { - failureCallback("There was an error: " + "Lucene query: no filters") + handleError("There was an error: " + "Lucene query: no filters") return } @@ -358,27 +359,60 @@ export async function fetchLuceneQuery(filters, sessionId, trackGUID, offset, pa sortReverse = false } - return Ajax.request({ - url: ActionURL.buildURL('jbrowse', 'luceneQuery.api'), - method: 'GET', - success: async function(res){ - let jsonRes = JSON.parse(res.response); - successCallback(jsonRes) - }, - failure: function(res) { - console.error("There was an error: " + res.status + "\n Status Body: " + res.responseText + "\n Session ID:" + sessionId) - failureCallback("There was an error: status " + res.status) - }, - params: { - "searchString": encoded, - "sessionId": sessionId, - "trackId": trackGUID, - "offset": offset, - "pageSize": pageSize, - "sortField": sortField ?? "genomicPosition", - "sortReverse": sortReverse - }, - }); + const params = { + searchString: encoded, + sessionId, + trackId: trackGUID, + offset: offset, + pageSize: pageSize, + sortField: sortField ?? "genomicPosition", + sortReverse: sortReverse, + }; + + try { + const url = ActionURL.buildURL('jbrowse', 'luceneQuery.api', null, params); + const response = await fetch(url); + if (!response.ok || !response.body) { + throw new Error(`HTTP error ${response.status}`); + } + + const reader = response.body.getReader(); + const decoder = new TextDecoder("utf-8"); + let buffer = ''; + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + + buffer += decoder.decode(value, { stream: true }); + + let boundary; + while ((boundary = buffer.indexOf('\n')) >= 0) { + const line = buffer.slice(0, boundary).trim(); + buffer = buffer.slice(boundary + 1); + if (line) { + try { + const parsed = JSON.parse(line); + handleRow(parsed); + } catch (err) { + console.error('Failed to parse line:', line, err); + } + } + } + } + + if (buffer.trim()) { + try { + handleRow(JSON.parse(buffer)); + } catch (err) { + console.error('Final line parse error:', buffer, err); + } + } + + handleComplete(); + } catch (error) { + handleError(error.toString()); + } } export class FieldModel { diff --git a/jbrowse/src/org/labkey/jbrowse/JBrowseController.java b/jbrowse/src/org/labkey/jbrowse/JBrowseController.java index 6dc297b95..d7c75b483 100644 --- a/jbrowse/src/org/labkey/jbrowse/JBrowseController.java +++ b/jbrowse/src/org/labkey/jbrowse/JBrowseController.java @@ -28,6 +28,7 @@ import org.json.JSONObject; import org.labkey.api.action.ApiResponse; import org.labkey.api.action.ApiSimpleResponse; +import org.labkey.api.action.ExportAction; import org.labkey.api.action.MutatingApiAction; import org.labkey.api.action.ReadOnlyApiAction; import org.labkey.api.action.SimpleApiJsonForm; @@ -944,10 +945,10 @@ else if (!isValidUUID(form.getTrackId())) } @RequiresPermission(ReadPermission.class) - public static class LuceneQueryAction extends ReadOnlyApiAction + public static class LuceneQueryAction extends ExportAction { @Override - public ApiResponse execute(LuceneQueryForm form, BindException errors) + public void export(LuceneQueryForm form, HttpServletResponse response, BindException errors) throws Exception { JBrowseLuceneSearch searcher; try @@ -957,30 +958,31 @@ public ApiResponse execute(LuceneQueryForm form, BindException errors) catch (IllegalArgumentException e) { errors.reject(ERROR_MSG, e.getMessage()); - return null; + return; } try { - return new ApiSimpleResponse(searcher.doSearchJSON( + response.setContentType("application/x-ndjson"); + searcher.doSearchJSON( getUser(), PageFlowUtil.decode(form.getSearchString()), form.getPageSize(), form.getOffset(), form.getSortField(), - form.getSortReverse() - )); + form.getSortReverse(), + response + ); } catch (Exception e) { _log.error("Error in JBrowse lucene query", e); errors.reject(ERROR_MSG, e.getMessage()); - return null; } } @Override - public void validateForm(LuceneQueryForm form, Errors errors) + public void validate(LuceneQueryForm form, BindException errors) { if ((form.getSearchString() == null || form.getSessionId() == null || form.getTrackId() == null)) { diff --git a/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java b/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java index e50875cb4..34d8c82f5 100644 --- a/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java +++ b/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java @@ -1,6 +1,7 @@ package org.labkey.jbrowse; import jakarta.servlet.http.HttpServletResponse; +import org.apache.catalina.connector.Response; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.Logger; import org.apache.lucene.analysis.Analyzer; @@ -202,9 +203,9 @@ public String extractFieldName(String queryString) return parts.length > 0 ? parts[0].trim() : null; } - public JSONObject doSearchJSON(User u, String searchString, final int pageSize, final int offset, String sortField, boolean sortReverse) throws IOException, ParseException { + public void doSearchJSON(User u, String searchString, final int pageSize, final int offset, String sortField, boolean sortReverse, HttpServletResponse response) throws IOException, ParseException { SearchConfig searchConfig = createSearchConfig(u, searchString, pageSize, offset, sortField, sortReverse); - return paginateJSON(searchConfig); + paginateJSON(searchConfig, response); } public void doSearchCSV(User u, String searchString, String sortField, boolean sortReverse, HttpServletResponse response) throws IOException, ParseException { @@ -330,32 +331,26 @@ else if (numericQueryParserFields.containsKey(fieldName)) return new SearchConfig(cacheEntry, query, pageSize, offset, sort, fieldsList); } - private JSONObject paginateJSON(SearchConfig c) throws IOException, ParseException { + private void paginateJSON(SearchConfig c, HttpServletResponse response) throws IOException, ParseException { IndexSearcher searcher = c.cacheEntry.indexSearcher; TopDocs topDocs; + PrintWriter writer = response.getWriter(); if (c.offset == 0) { topDocs = searcher.search(c.query, c.pageSize, c.sort); } else { TopFieldDocs prev = searcher.search(c.query, c.pageSize * c.offset, c.sort); - long totalHits = prev.totalHits.value; ScoreDoc[] prevHits = prev.scoreDocs; if (prevHits.length < c.pageSize * c.offset) { - JSONObject results = new JSONObject(); - results.put("data", Collections.emptyList()); - results.put("totalHits", totalHits); - return results; + return; } ScoreDoc lastDoc = prevHits[c.pageSize * c.offset - 1]; topDocs = searcher.searchAfter(lastDoc, c.query, c.pageSize, c.sort); } - JSONObject results = new JSONObject(); - List data = new ArrayList<>(topDocs.scoreDocs.length); - for (ScoreDoc sd : topDocs.scoreDocs) { Document doc = searcher.storedFields().document(sd.doc); @@ -366,12 +361,11 @@ private JSONObject paginateJSON(SearchConfig c) throws IOException, ParseExcepti String[] vals = doc.getValues(name); elem.put(name, vals.length > 1 ? Arrays.asList(vals) : vals[0]); } - data.add(elem); + + writer.println(elem); } - results.put("data", data); - results.put("totalHits", topDocs.totalHits.value); - return results; + writer.flush(); } private void exportCSV(SearchConfig c, HttpServletResponse response) throws IOException @@ -648,8 +642,9 @@ public void cacheDefaultQuery() { try { + HttpServletResponse response = new Response(); JBrowseLuceneSearch.clearCache(_jsonFile.getObjectId()); - doSearchJSON(_user, ALL_DOCS, 100, 0, GENOMIC_POSITION, false); + doSearchJSON(_user, ALL_DOCS, 100, 0, GENOMIC_POSITION, false, response); } catch (ParseException | IOException e) { From 709e32a0c0de9464f7d09d99556d3aef381f0290 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 21 May 2025 14:49:35 -0700 Subject: [PATCH 33/58] Bugfix to CRAM handler for BAM inputs --- .../pipeline/ConvertToCramHandler.java | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ConvertToCramHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ConvertToCramHandler.java index 2be23a63d..a7906eaa6 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ConvertToCramHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ConvertToCramHandler.java @@ -134,6 +134,11 @@ public void processFilesRemote(List inputFiles, JobContext c new SamtoolsCramConverter(ctx.getLogger()).convert(so.getFile(), outputFile, genome.getWorkingFastaFileGzipped(), true, threads, doCramArchivalMode); } + if (!outputFile.exists()) + { + throw new PipelineJobException("Missing CRAM: " + outputFile.getPath()); + } + if (replaceOriginal) { ctx.getLogger().info("Deleting original BAM/CRAM: {}", so.getFile().getPath()); @@ -148,6 +153,31 @@ public void processFilesRemote(List inputFiles, JobContext c { ctx.getLogger().debug("Input BAM not found, possibly deleted in earlier job iteration?"); } + + ctx.getLogger().debug("Moving CRAM to replace original BAM file: " + so.getFile().getPath()); + try + { + File targetCram = new File(so.getFile().getParentFile(), outputFile.getName()); + if (targetCram.exists()) + { + ctx.getLogger().debug("Deleting file: " + targetCram.getPath()); + targetCram.delete(); + } + + File targetCramIdx = new File(so.getFile().getParentFile(), outputFile.getName() + ".crai"); + if (targetCramIdx.exists()) + { + ctx.getLogger().debug("Deleting file: " + targetCramIdx.getPath()); + targetCramIdx.delete(); + } + + FileUtils.moveFile(outputFile, targetCram); + FileUtils.moveFile(new File(outputFile.getPath() + ".crai"), targetCramIdx); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } } else if (SequenceUtil.FILETYPE.cram.getFileType().isType(so.getFile())) { From 2b312428ea8eb858eecc477f3a7e78fd78a3073e Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 22 May 2025 15:17:04 -0700 Subject: [PATCH 34/58] Support additional GeneComponentScore values --- .../pipeline/singlecell/CalculateGeneComponentScores.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/CalculateGeneComponentScores.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/CalculateGeneComponentScores.java index 898e2e218..06c40161a 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/CalculateGeneComponentScores.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/CalculateGeneComponentScores.java @@ -25,7 +25,7 @@ public Provider() super("CalculateGeneComponentScores", "Calculate Gene Module Scores", "RIRA", "This will generate UCell scores for a set of pre-defined gene modules", Collections.singletonList( SeuratToolParameter.create("savedComponent", "Saved Component(s)", "This is the name of the saved component (from RIRA) to apply", "ldk-simplecombo", new JSONObject() {{ - put("storeValues", "Tcell_EffectorDifferentiation;TCR_EarlyStimulationComponent;TCR_StimulationComponent1"); + put("storeValues", "Tcell_EffectorDifferentiation;TCR_EarlyStimulationComponent;TCR_StimulationComponent1;PLS_Score_1;PLS_Score_2;PLS_Score_3;PLS_Score_4;PLS_Score_5;PLS_Score_6"); put("multiSelect", true); put("allowBlank", false); put("joinReturnValue", true); From 45810583b1206d3c918665cb548f134c314fea9e Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 27 May 2025 16:12:34 -0700 Subject: [PATCH 35/58] Expand OrphanFilePipelineJob --- .../pipeline/OrphanFilePipelineJob.java | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/OrphanFilePipelineJob.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/OrphanFilePipelineJob.java index 27fbd8dc3..a9a3eb796 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/OrphanFilePipelineJob.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/OrphanFilePipelineJob.java @@ -48,6 +48,7 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; @@ -55,7 +56,9 @@ import java.util.LinkedHashSet; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; +import java.util.stream.Collectors; import static org.labkey.sequenceanalysis.pipeline.SequenceTaskHelper.SHARED_SUBFOLDER_NAME; @@ -451,7 +454,16 @@ public boolean accept(File pathname) } } - for (Container child : ContainerManager.getChildren(c)) + List children = ContainerManager.getChildren(c); + + // Check for unexpected subfolders: + Set allowableSubfolders = children.stream().map(Container::getName).collect(Collectors.toSet()); + Set unknownFolders = Arrays.stream(Objects.requireNonNull(root.getRootPath().getParentFile().listFiles())).filter(fn -> !fn.getName().startsWith("@") & !allowableSubfolders.contains(fn.getName())).collect(Collectors.toSet()); + if (!unknownFolders.isEmpty()) { + unknownFolders.forEach(x -> getJob().getLogger().warn("Folder does not match expected child: " + x.getPath())); + } + + for (Container child : children) { if (child.isWorkbook()) { From d9ac32c2e0c9eb3ed322f80114fbe2aa65c293c1 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 31 May 2025 08:10:33 -0700 Subject: [PATCH 36/58] Expand studies datasets --- Studies/resources/data/amount_units.tsv | 11 + .../data/calculated_status_codes.tsv | 6 + Studies/resources/data/conc_units.tsv | 12 + Studies/resources/data/dosage_units.tsv | 11 + Studies/resources/data/gender_codes.tsv | 4 + Studies/resources/data/geographic_origins.tsv | 4 + Studies/resources/data/lookup_sets.tsv | 9 + Studies/resources/data/reports.tsv | 12 + Studies/resources/data/routes.tsv | 24 ++ Studies/resources/data/species.tsv | 10 + Studies/resources/data/volume_units.tsv | 13 + .../folderTypes/Studies.folderType.xml | 78 +++++ Studies/resources/module.xml | 5 + .../query/study/demographics.query.xml | 34 ++ Studies/resources/query/study/flags.query.xml | 43 +++ .../resources/query/study/samples.query.xml | 11 + .../resources/query/study/studyData.query.xml | 13 + .../resources/query/study/weight.query.xml | 14 + Studies/resources/referenceStudy/folder.xml | 4 + .../study/datasets/Studies.dataset | 19 ++ .../study/datasets/datasets_manifest.xml | 35 ++ .../study/datasets/datasets_metadata.xml | 321 ++++++++++++++++++ .../resources/referenceStudy/study/study.xml | 9 + .../referenceStudy/study/studyPolicy.xml | 10 + .../postgresql/studies-23.000-23.001.sql | 34 ++ .../sqlserver/studies-23.000-23.001.sql | 34 ++ Studies/resources/schemas/studies.xml | 126 ++++++- Studies/resources/views/studiesAdmin.html | 33 ++ Studies/resources/views/studiesAdmin.view.xml | 8 + .../resources/views/studiesAdmin.webpart.xml | 6 + .../org/labkey/studies/StudiesController.java | 113 ++++++ .../src/org/labkey/studies/StudiesModule.java | 42 ++- .../labkey/studies/query/LookupSetTable.java | 116 +++++++ .../studies/query/LookupSetsManager.java | 31 ++ .../labkey/studies/query/LookupSetsTable.java | 60 ++++ .../studies/query/StudiesCustomizer.java | 22 ++ .../studies/query/StudiesUserSchema.java | 125 +++++++ .../security/StudiesDataAdminPermission.java | 31 ++ .../security/StudiesDataAdminRole.java | 22 ++ 39 files changed, 1496 insertions(+), 19 deletions(-) create mode 100644 Studies/resources/data/amount_units.tsv create mode 100644 Studies/resources/data/calculated_status_codes.tsv create mode 100644 Studies/resources/data/conc_units.tsv create mode 100644 Studies/resources/data/dosage_units.tsv create mode 100644 Studies/resources/data/gender_codes.tsv create mode 100644 Studies/resources/data/geographic_origins.tsv create mode 100644 Studies/resources/data/lookup_sets.tsv create mode 100644 Studies/resources/data/reports.tsv create mode 100644 Studies/resources/data/routes.tsv create mode 100644 Studies/resources/data/species.tsv create mode 100644 Studies/resources/data/volume_units.tsv create mode 100644 Studies/resources/folderTypes/Studies.folderType.xml create mode 100644 Studies/resources/module.xml create mode 100644 Studies/resources/query/study/demographics.query.xml create mode 100644 Studies/resources/query/study/flags.query.xml create mode 100644 Studies/resources/query/study/samples.query.xml create mode 100644 Studies/resources/query/study/studyData.query.xml create mode 100644 Studies/resources/query/study/weight.query.xml create mode 100644 Studies/resources/referenceStudy/folder.xml create mode 100644 Studies/resources/referenceStudy/study/datasets/Studies.dataset create mode 100644 Studies/resources/referenceStudy/study/datasets/datasets_manifest.xml create mode 100644 Studies/resources/referenceStudy/study/datasets/datasets_metadata.xml create mode 100644 Studies/resources/referenceStudy/study/study.xml create mode 100644 Studies/resources/referenceStudy/study/studyPolicy.xml create mode 100644 Studies/resources/schemas/dbscripts/postgresql/studies-23.000-23.001.sql create mode 100644 Studies/resources/schemas/dbscripts/sqlserver/studies-23.000-23.001.sql create mode 100644 Studies/resources/views/studiesAdmin.html create mode 100644 Studies/resources/views/studiesAdmin.view.xml create mode 100644 Studies/resources/views/studiesAdmin.webpart.xml create mode 100644 Studies/src/org/labkey/studies/query/LookupSetTable.java create mode 100644 Studies/src/org/labkey/studies/query/LookupSetsManager.java create mode 100644 Studies/src/org/labkey/studies/query/LookupSetsTable.java create mode 100644 Studies/src/org/labkey/studies/query/StudiesCustomizer.java create mode 100644 Studies/src/org/labkey/studies/query/StudiesUserSchema.java create mode 100644 Studies/src/org/labkey/studies/security/StudiesDataAdminPermission.java create mode 100644 Studies/src/org/labkey/studies/security/StudiesDataAdminRole.java diff --git a/Studies/resources/data/amount_units.tsv b/Studies/resources/data/amount_units.tsv new file mode 100644 index 000000000..b4b010264 --- /dev/null +++ b/Studies/resources/data/amount_units.tsv @@ -0,0 +1,11 @@ +value +g +IU +U +mEq +mg +no units +ug +units +L/min +% \ No newline at end of file diff --git a/Studies/resources/data/calculated_status_codes.tsv b/Studies/resources/data/calculated_status_codes.tsv new file mode 100644 index 000000000..d2bf1dec4 --- /dev/null +++ b/Studies/resources/data/calculated_status_codes.tsv @@ -0,0 +1,6 @@ +value +Alive +Dead +No Record +Shipped +Unknown \ No newline at end of file diff --git a/Studies/resources/data/conc_units.tsv b/Studies/resources/data/conc_units.tsv new file mode 100644 index 000000000..d7d9b4d18 --- /dev/null +++ b/Studies/resources/data/conc_units.tsv @@ -0,0 +1,12 @@ +value denominator numerator +g/ml mL g +g/tsp tsp g +IU/ml mL IU +mEq/ml mL mEq +mg/capsule capsule(s) mg +mg/ml mL mg +mg/piece piece(s) mg +mg/tablet tablet(s) mg +mg/tsp tsp mg +ug/ml mL ug +units/ml mL units \ No newline at end of file diff --git a/Studies/resources/data/dosage_units.tsv b/Studies/resources/data/dosage_units.tsv new file mode 100644 index 000000000..eb8de1491 --- /dev/null +++ b/Studies/resources/data/dosage_units.tsv @@ -0,0 +1,11 @@ +value numerator denominator +g/kg g kg +IU/kg IU kg +mEq/kg mEq kg +mg/animal mg +mg/kg mg kg +ml/kg ml kg +no units +ounces/kg ounces kg +ug/kg ug kg +units/kg units kg \ No newline at end of file diff --git a/Studies/resources/data/gender_codes.tsv b/Studies/resources/data/gender_codes.tsv new file mode 100644 index 000000000..07b681950 --- /dev/null +++ b/Studies/resources/data/gender_codes.tsv @@ -0,0 +1,4 @@ +v meaning origgender +f Female f +m Male m +u Unknown \ No newline at end of file diff --git a/Studies/resources/data/geographic_origins.tsv b/Studies/resources/data/geographic_origins.tsv new file mode 100644 index 000000000..0870abf80 --- /dev/null +++ b/Studies/resources/data/geographic_origins.tsv @@ -0,0 +1,4 @@ +value +Indian +Chinese +Hybrid \ No newline at end of file diff --git a/Studies/resources/data/lookup_sets.tsv b/Studies/resources/data/lookup_sets.tsv new file mode 100644 index 000000000..8e2b94822 --- /dev/null +++ b/Studies/resources/data/lookup_sets.tsv @@ -0,0 +1,9 @@ +setname label keyfield titleColumn +amount_units Amount Units unit +calculated_status_codes Calculated Status Codes code +conc_units Concentraiton Units unit +dosage_units Dosage Units unit +gender_codes Gender Codes +geographic_origins Geographic Origins origin +routes Routes route +volume_units Volume Units unit \ No newline at end of file diff --git a/Studies/resources/data/reports.tsv b/Studies/resources/data/reports.tsv new file mode 100644 index 000000000..4dd48af51 --- /dev/null +++ b/Studies/resources/data/reports.tsv @@ -0,0 +1,12 @@ +reportname category reporttype reporttitle visible containerpath schemaname queryname viewname report datefieldname todayonly queryhaslocation sort_order QCStateLabelFieldName description +activeAssignments Assignments and Groups query Active Assignments true study Assignment Active Assignments date false false qcstate/publicdata This report shows the active assignments for each animal +assignmentHistory Assignments and Groups query Assignment History true study Assignment date false false qcstate/publicdata This report shows all assignments records for the animals +activeGroups Assignments and Groups query Active Groups true study animal_group_members Active Members date false false qcstate/publicdata This report shows the active assignments for each animal +groupHistory Assignments and Groups query Group History true study animal_group_members date false false qcstate/publicdata This report shows all assignments records for the animals +microbiology Lab Results query Microbiology true study Microbiology Results date false false qcstate/publicdata +biochemistry Lab Results js Biochemistry true study bloodChemistry date false false Contains results of chemistry panels. Can be displayed either by panel, or showing reference ranges +clinPathRuns Lab Results query Lab Runs true study Clinpath Runs date false false qcstate/publicdata Contains all clinpath requests +iStat Lab Results js iStat true study iStat date false false qcstate/publicdata Contains iStat results +hematology Lab Results js Hematology true study hematology date false false Contains hematology data showing cell subsets +parasitology Lab Results query Parasitology true study Parasitology Results date false false qcstate/publicdata Contains results of parasitology testing +urinalysis Lab Results js Urinalysis true study urinalysisResults date false false Contains urinalysis results diff --git a/Studies/resources/data/routes.tsv b/Studies/resources/data/routes.tsv new file mode 100644 index 000000000..b0e1f0d80 --- /dev/null +++ b/Studies/resources/data/routes.tsv @@ -0,0 +1,24 @@ +value title +IM +intracardiac +intracarotid +intracorneal Intracorneal +intracranial +IP intraperitoneal +ID +INH +IT +IV +CRI +IVAG +oral +PO +rectal +Spillage +SQ +OU +OD +OS +topical +topical (eye) +topical (skin) \ No newline at end of file diff --git a/Studies/resources/data/species.tsv b/Studies/resources/data/species.tsv new file mode 100644 index 000000000..0e063c4cf --- /dev/null +++ b/Studies/resources/data/species.tsv @@ -0,0 +1,10 @@ +common scientific_name id_prefix mhc_prefix blood_per_kg max_draw_pct blood_draw_interval cites_code dateDisabled +Baboon 60 0.2 30 +Cotton-top Tamarin Saguinus oedipus so Saoe 60 0.2 30 +Cynomolgus Macaca fascicularis cy Mafa 60 0.2 30 +Marmoset Callithrix jacchus cj Caja 60 0.15 30 +Pigtail Macaca Nemestrina Mane 60 0.2 30 +Rhesus Macaca mulatta r|rh Mamu 60 0.2 30 +Sooty Mangabey Cercocebus atys Ceat 60 0.2 30 +Stump Tailed Macaca Arctoides Maar 60 0.2 30 +Vervet Chlorocebus sabaeus ag Chsa 60 0.2 30 diff --git a/Studies/resources/data/volume_units.tsv b/Studies/resources/data/volume_units.tsv new file mode 100644 index 000000000..fc2c76822 --- /dev/null +++ b/Studies/resources/data/volume_units.tsv @@ -0,0 +1,13 @@ +value +capsule(s) +cup +drop(s) +cube +mL +mL/hr +no units +ounce(s) +pan +piece(s) +tablet(s) +tsp \ No newline at end of file diff --git a/Studies/resources/folderTypes/Studies.folderType.xml b/Studies/resources/folderTypes/Studies.folderType.xml new file mode 100644 index 000000000..749c8d8f0 --- /dev/null +++ b/Studies/resources/folderTypes/Studies.folderType.xml @@ -0,0 +1,78 @@ + + Studies Overview + The default folder layout for Studies + + + + + + + + + + + + + + + + + datasets + Datasets + + + datasets + + + + + + + Datasets + body + + + + + + + + + + + + + + + + + + + + + + admin + Admin + + + + + + + + + Studies Admin + body + + + + + + + + + studies + + studies + true + \ No newline at end of file diff --git a/Studies/resources/module.xml b/Studies/resources/module.xml new file mode 100644 index 000000000..5a8029b41 --- /dev/null +++ b/Studies/resources/module.xml @@ -0,0 +1,5 @@ + + + + + diff --git a/Studies/resources/query/study/demographics.query.xml b/Studies/resources/query/study/demographics.query.xml new file mode 100644 index 000000000..e1feec92e --- /dev/null +++ b/Studies/resources/query/study/demographics.query.xml @@ -0,0 +1,34 @@ + + + + + + + + + + true + + + Date + Birth + /query/executeQuery.view? + schemaName=study& + query.queryName=Birth& + query.Id~eq=${Id} + + + + Date + Death + /query/executeQuery.view? + schemaName=study& + query.queryName=Deaths& + query.Id~eq=${Id} + + + +
+
+
+
\ No newline at end of file diff --git a/Studies/resources/query/study/flags.query.xml b/Studies/resources/query/study/flags.query.xml new file mode 100644 index 000000000..e8d0348bf --- /dev/null +++ b/Studies/resources/query/study/flags.query.xml @@ -0,0 +1,43 @@ + + + + + + + + Date Added + Date + + + Date Removed + false + Date + + + + + + + + + + + + + + + + + + + + + + Value + true + + +
+
+
+
diff --git a/Studies/resources/query/study/samples.query.xml b/Studies/resources/query/study/samples.query.xml new file mode 100644 index 000000000..b3f9fa380 --- /dev/null +++ b/Studies/resources/query/study/samples.query.xml @@ -0,0 +1,11 @@ + + + + + + + +
+
+
+
\ No newline at end of file diff --git a/Studies/resources/query/study/studyData.query.xml b/Studies/resources/query/study/studyData.query.xml new file mode 100644 index 000000000..7d6c3cf7f --- /dev/null +++ b/Studies/resources/query/study/studyData.query.xml @@ -0,0 +1,13 @@ + + + + + + + + + +
+
+
+
\ No newline at end of file diff --git a/Studies/resources/query/study/weight.query.xml b/Studies/resources/query/study/weight.query.xml new file mode 100644 index 000000000..929c141e3 --- /dev/null +++ b/Studies/resources/query/study/weight.query.xml @@ -0,0 +1,14 @@ + + + + + + + Weight (kg) + 0.#### + + +
+
+
+
\ No newline at end of file diff --git a/Studies/resources/referenceStudy/folder.xml b/Studies/resources/referenceStudy/folder.xml new file mode 100644 index 000000000..e3acbb155 --- /dev/null +++ b/Studies/resources/referenceStudy/folder.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/Studies/resources/referenceStudy/study/datasets/Studies.dataset b/Studies/resources/referenceStudy/study/datasets/Studies.dataset new file mode 100644 index 000000000..8e5970288 --- /dev/null +++ b/Studies/resources/referenceStudy/study/datasets/Studies.dataset @@ -0,0 +1,19 @@ +# default group can be used to avoid repeating definitions for each dataset +# +# action=[REPLACE,APPEND,DELETE] (default:REPLACE) +# deleteAfterImport=[TRUE|FALSE] (default:FALSE) + +default.action=REPLACE +default.deleteAfterImport=FALSE + +# map a source tsv column (right side) to a property name or full propertyURI (left) +# predefined properties: ParticipantId, SiteId, VisitId, Created +default.property.ParticipantId=ptid +default.property.Created=dfcreate + +# use to map from filename->datasetid +# NOTE: if there are NO explicit import definitions, we will try to import all files matching pattern +# NOTE: if there are ANY explicit mapping, we will only import listed datasets + +default.filePattern=dataset(\\d*).tsv +default.importAllMatches=TRUE diff --git a/Studies/resources/referenceStudy/study/datasets/datasets_manifest.xml b/Studies/resources/referenceStudy/study/datasets/datasets_manifest.xml new file mode 100644 index 000000000..a3202bcb3 --- /dev/null +++ b/Studies/resources/referenceStudy/study/datasets/datasets_manifest.xml @@ -0,0 +1,35 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Studies/resources/referenceStudy/study/datasets/datasets_metadata.xml b/Studies/resources/referenceStudy/study/datasets/datasets_metadata.xml new file mode 100644 index 000000000..1e2aecf5e --- /dev/null +++ b/Studies/resources/referenceStudy/study/datasets/datasets_metadata.xml @@ -0,0 +1,321 @@ + + + + + + varchar + http://cpas.labkey.com/Study#ParticipantId + + + timestamp + http://cpas.labkey.com/laboratory#sampleDate + + + entityid + true + + + + timestamp + + + varchar + + + Flags/Misc Information +
+ + + + varchar + http://cpas.labkey.com/Study#ParticipantId + + + timestamp + http://cpas.labkey.com/laboratory#sampleDate + + + entityid + true + + + + timestamp + + + varchar + + + varchar + + + double + + + varchar + + + double + + + varchar + + + double + + + varchar + + + varchar + + + integer + + + double + + + varchar + + + varchar + + + Medications/Treatments +
+ + + + varchar + http://cpas.labkey.com/Study#ParticipantId + + + timestamp + http://cpas.labkey.com/laboratory#sampleDate + + + entityid + true + + + + varchar + + + varchar + + + varchar + + + double + + + varchar + + + varchar + + + Immunizations +
+ + + + varchar + http://cpas.labkey.com/Study#ParticipantId + + + timestamp + http://cpas.labkey.com/laboratory#sampleDate + + + entityid + true + + + + double + + + Weight +
+ + + + varchar + http://cpas.labkey.com/Study#ParticipantId + + + timestamp + http://cpas.labkey.com/laboratory#sampleDate + + + entityid + true + + + + varchar + + + varchar + + + varchar + + + double + + + double + + + varchar + + + varchar + + + varchar + + + Viral Loads +
+ + + + varchar + http://cpas.labkey.com/Study#ParticipantId + + + timestamp + http://cpas.labkey.com/laboratory#sampleDate + + + entityid + true + + + + varchar + + + double + + + varchar + + + varchar + + + varchar + + + varchar + + + Lab Results +
+ + + + varchar + http://cpas.labkey.com/Study#ParticipantId + + + timestamp + http://cpas.labkey.com/laboratory#sampleDate + + + entityid + false + + + + varchar + + + varchar + + + timestamp + + + timestamp + + + varchar + + + varchar + + + varchar + + + varchar + + + Demographics +
+ + + + varchar + http://cpas.labkey.com/Study#ParticipantId + + + timestamp + http://cpas.labkey.com/laboratory#sampleDate + + + entityid + true + + + + timestamp + + + varchar + + + Project Assignment +
+ + + + varchar + http://cpas.labkey.com/Study#ParticipantId + + + timestamp + http://cpas.labkey.com/laboratory#sampleDate + + + entityid + true + + + + varchar + + + varchar + + + varchar + + + double + + + varchar + + + Samples +
+
diff --git a/Studies/resources/referenceStudy/study/study.xml b/Studies/resources/referenceStudy/study/study.xml new file mode 100644 index 000000000..0acc6d7dc --- /dev/null +++ b/Studies/resources/referenceStudy/study/study.xml @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/Studies/resources/referenceStudy/study/studyPolicy.xml b/Studies/resources/referenceStudy/study/studyPolicy.xml new file mode 100644 index 000000000..3755d25b8 --- /dev/null +++ b/Studies/resources/referenceStudy/study/studyPolicy.xml @@ -0,0 +1,10 @@ + + + BASIC_WRITE + + + + + + + \ No newline at end of file diff --git a/Studies/resources/schemas/dbscripts/postgresql/studies-23.000-23.001.sql b/Studies/resources/schemas/dbscripts/postgresql/studies-23.000-23.001.sql new file mode 100644 index 000000000..14f33187b --- /dev/null +++ b/Studies/resources/schemas/dbscripts/postgresql/studies-23.000-23.001.sql @@ -0,0 +1,34 @@ +CREATE TABLE studies.lookup_sets ( + rowid serial, + setname varchar(100), + label varchar(500), + description varchar(4000), + keyField varchar(4000), + titleColumn varchar(4000), + container entityid, + created timestamp, + createdby int, + modified timestamp, + modifiedby int, + + CONSTRAINT PK_lookup_sets PRIMARY KEY (rowid) +); + +CREATE TABLE studies.lookups ( + rowid serial, + setname varchar(100), + value varchar(4000), + title varchar(4000), + category varchar(4000), + description varchar(4000), + sort_order int, + date_disabled timestamp, + objectid varchar(4000), + container entityid, + created timestamp, + createdby int, + modified timestamp, + modifiedby int, + + CONSTRAINT PK_lookups PRIMARY KEY (rowid) +); \ No newline at end of file diff --git a/Studies/resources/schemas/dbscripts/sqlserver/studies-23.000-23.001.sql b/Studies/resources/schemas/dbscripts/sqlserver/studies-23.000-23.001.sql new file mode 100644 index 000000000..fe59ea113 --- /dev/null +++ b/Studies/resources/schemas/dbscripts/sqlserver/studies-23.000-23.001.sql @@ -0,0 +1,34 @@ +CREATE TABLE studies.lookup_sets ( + rowid int identity(1,1), + setname nvarchar(100), + label nvarchar(500), + description nvarchar(MAX), + keyField nvarchar(MAX), + titleColumn nvarchar(MAX), + container entityid, + created datetime, + createdby int, + modified datetime, + modifiedby int, + + CONSTRAINT PK_lookup_sets PRIMARY KEY (rowid) +); + +CREATE TABLE studies.lookups ( + rowid int identity(1,1), + setname nvarchar(100), + value nvarchar(MAX), + title nvarchar(MAX), + category nvarchar(MAX), + description nvarchar(MAX), + sort_order int, + date_disabled datetime, + objectid nvarchar(100), + container entityid, + created datetime, + createdby int, + modified datetime, + modifiedby int, + + CONSTRAINT PK_lookups PRIMARY KEY (rowid) +); \ No newline at end of file diff --git a/Studies/resources/schemas/studies.xml b/Studies/resources/schemas/studies.xml index 7918b339b..054cd9393 100644 --- a/Studies/resources/schemas/studies.xml +++ b/Studies/resources/schemas/studies.xml @@ -1,3 +1,127 @@ \ No newline at end of file + xmlns="http://labkey.org/data/xml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + + + Each lookup set is exposed as a separate virtual table in the studies schema, making it easy to add simple value tables without having to create discrete tables in the underlying database + Lookup Sets + DETAILED + rowid + + + + + + + + + + Set Name + + + Label + + + Description + + + Key Field + + + Title Column + + + + + + false + true + true + + + false + true + true + + + false + true + true + + + false + true + true + + +
+ + Lookup Values + DETAILED + + + + + + + true + false + false + true + + + Set Name + + + Value + + + Title + + + Category + + + Description + + + Sort Order + + + Date Disabled + + + Key + false + false + false + true + + + false + true + true + + + false + true + true + + + false + true + true + + + false + true + true + + + false + true + true + + +
+
\ No newline at end of file diff --git a/Studies/resources/views/studiesAdmin.html b/Studies/resources/views/studiesAdmin.html new file mode 100644 index 000000000..a17cc7e92 --- /dev/null +++ b/Studies/resources/views/studiesAdmin.html @@ -0,0 +1,33 @@ + \ No newline at end of file diff --git a/Studies/resources/views/studiesAdmin.view.xml b/Studies/resources/views/studiesAdmin.view.xml new file mode 100644 index 000000000..7789b6b5c --- /dev/null +++ b/Studies/resources/views/studiesAdmin.view.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/Studies/resources/views/studiesAdmin.webpart.xml b/Studies/resources/views/studiesAdmin.webpart.xml new file mode 100644 index 000000000..e44bcd6a3 --- /dev/null +++ b/Studies/resources/views/studiesAdmin.webpart.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/Studies/src/org/labkey/studies/StudiesController.java b/Studies/src/org/labkey/studies/StudiesController.java index 4e1176393..a0b634567 100644 --- a/Studies/src/org/labkey/studies/StudiesController.java +++ b/Studies/src/org/labkey/studies/StudiesController.java @@ -1,14 +1,127 @@ package org.labkey.studies; +import org.apache.logging.log4j.Logger; +import org.jetbrains.annotations.NotNull; +import org.labkey.api.action.ConfirmAction; import org.labkey.api.action.SpringActionController; +import org.labkey.api.data.TableInfo; +import org.labkey.api.module.ModuleLoader; +import org.labkey.api.pipeline.PipelineUrls; +import org.labkey.api.query.BatchValidationException; +import org.labkey.api.query.DuplicateKeyException; +import org.labkey.api.query.QueryService; +import org.labkey.api.query.QueryUpdateService; +import org.labkey.api.query.QueryUpdateServiceException; +import org.labkey.api.reader.DataLoader; +import org.labkey.api.reader.TabLoader; +import org.labkey.api.resource.Resource; +import org.labkey.api.security.RequiresPermission; +import org.labkey.api.security.permissions.AdminPermission; +import org.labkey.api.studies.StudiesService; +import org.labkey.api.util.FileUtil; +import org.labkey.api.util.HtmlString; +import org.labkey.api.util.PageFlowUtil; +import org.labkey.api.util.Path; +import org.labkey.api.util.URLHelper; +import org.labkey.api.util.logging.LogHelper; +import org.labkey.api.view.HtmlView; +import org.springframework.validation.BindException; +import org.springframework.validation.Errors; +import org.springframework.web.servlet.ModelAndView; + +import java.io.IOException; +import java.sql.SQLException; +import java.util.List; +import java.util.Map; public class StudiesController extends SpringActionController { private static final DefaultActionResolver _actionResolver = new DefaultActionResolver(StudiesController.class); public static final String NAME = "studies"; + private static final Logger _log = LogHelper.getLogger(StudiesController.class, "Messages from StudiesController"); + public StudiesController() { setActionResolver(_actionResolver); } + + @RequiresPermission(AdminPermission.class) + public static class ImportStudyAction extends ConfirmAction + { + @Override + public ModelAndView getConfirmView(Object o, BindException errors) throws Exception + { + setTitle("Import Study"); + + return new HtmlView(HtmlString.unsafe("This will import the default study in this folder, and truncate/load ancillary data. Do you want to continue?")); + } + + @Override + public boolean handlePost(Object o, BindException errors) throws Exception + { + StudiesService.get().importFolderDefinition(getContainer(), getUser(), ModuleLoader.getInstance().getModule(StudiesModule.NAME), new Path("referenceStudy")); + + StudiesModule m = ModuleLoader.getInstance().getModule(StudiesModule.class); + loadTsv(m.getModuleResource("data/lookup_sets.tsv"), StudiesSchema.NAME); + + Resource r = m.getModuleResource("data"); + r.list().forEach(tsv -> { + if ("lookup_sets.tsv".equals(tsv.getName())) + { + return; + } + + String schemaName = switch (tsv.getName()) + { + case "reports.tsv" -> "laboratory"; + case "species.tsv" -> "laboratory"; + default -> StudiesSchema.NAME; + }; + + loadTsv(tsv, schemaName); + }); + + return true; + } + + private void loadTsv(Resource tsv, String schemaName) + { + try (DataLoader loader = DataLoader.get().createLoader(tsv, true, null, TabLoader.TSV_FILE_TYPE)) + { + TableInfo ti = QueryService.get().getUserSchema(getUser(), getContainer(), schemaName).getTable(FileUtil.getBaseName(tsv.getName())); + if (ti == null) + { + throw new IllegalStateException("Missing table: " + tsv.getName()); + } + + List> rows = loader.load(); + + QueryUpdateService qus = ti.getUpdateService(); + qus.setBulkLoad(true); + + qus.truncateRows(getUser(), getContainer(), null, null); + qus.insertRows(getUser(), getContainer(), rows, new BatchValidationException(), null, null); + } + catch (IOException | SQLException | BatchValidationException | QueryUpdateServiceException | DuplicateKeyException e) + { + _log.error("Error populating TSV", e); + + throw new RuntimeException(e); + } + } + + @Override + public void validateCommand(Object o, Errors errors) + { + + } + + @NotNull + @Override + public URLHelper getSuccessURL(Object o) + { + return PageFlowUtil.urlProvider(PipelineUrls.class).urlBegin(getContainer()); + } + } } diff --git a/Studies/src/org/labkey/studies/StudiesModule.java b/Studies/src/org/labkey/studies/StudiesModule.java index 9b944c1a6..9fd15fb9a 100644 --- a/Studies/src/org/labkey/studies/StudiesModule.java +++ b/Studies/src/org/labkey/studies/StudiesModule.java @@ -3,16 +3,21 @@ import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; import org.labkey.api.data.Container; -import org.labkey.api.module.DefaultModule; +import org.labkey.api.ldk.ExtendedSimpleModule; +import org.labkey.api.module.Module; import org.labkey.api.module.ModuleContext; +import org.labkey.api.query.DefaultSchema; +import org.labkey.api.query.QuerySchema; +import org.labkey.api.security.roles.RoleManager; import org.labkey.api.studies.StudiesService; -import org.labkey.api.view.WebPartFactory; +import org.labkey.studies.query.StudiesUserSchema; +import org.labkey.studies.security.StudiesDataAdminRole; import java.util.Collection; import java.util.Collections; import java.util.Set; -public class StudiesModule extends DefaultModule +public class StudiesModule extends ExtendedSimpleModule { public static final String NAME = "Studies"; @@ -25,20 +30,7 @@ public String getName() @Override public @Nullable Double getSchemaVersion() { - return 23.000; - } - - @Override - public boolean hasScripts() - { - return true; - } - - @Override - @NotNull - protected Collection createWebPartFactories() - { - return Collections.emptyList(); + return 23.001; } @Override @@ -47,10 +39,11 @@ protected void init() addController(StudiesController.NAME, StudiesController.class); StudiesService.setInstance(StudiesServiceImpl.get()); + RoleManager.registerRole(new StudiesDataAdminRole()); } @Override - public void doStartup(ModuleContext moduleContext) + public void doStartupAfterSpringConfig(ModuleContext moduleContext) { } @@ -68,4 +61,17 @@ public Set getSchemaNames() { return Collections.singleton(StudiesSchema.NAME); } + + @Override + public void registerSchemas() + { + DefaultSchema.registerProvider(StudiesSchema.NAME, new DefaultSchema.SchemaProvider(this) + { + @Override + public QuerySchema createSchema(final DefaultSchema schema, Module module) + { + return new StudiesUserSchema(schema.getUser(), schema.getContainer(), StudiesSchema.getInstance().getSchema()); + } + }); + } } \ No newline at end of file diff --git a/Studies/src/org/labkey/studies/query/LookupSetTable.java b/Studies/src/org/labkey/studies/query/LookupSetTable.java new file mode 100644 index 000000000..b333008f3 --- /dev/null +++ b/Studies/src/org/labkey/studies/query/LookupSetTable.java @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2013-2019 LabKey Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.labkey.studies.query; + +import org.labkey.api.data.ColumnInfo; +import org.labkey.api.data.Container; +import org.labkey.api.data.ContainerFilter; +import org.labkey.api.data.SchemaTableInfo; +import org.labkey.api.ldk.LDKService; +import org.labkey.api.ldk.table.AbstractDataDefinedTable; +import org.labkey.api.query.QueryUpdateService; +import org.labkey.api.query.SimpleUserSchema; + +import java.util.Map; + +/** + * User: bimber + * Date: 1/31/13 + * Time: 4:33 PM + */ +public class LookupSetTable extends AbstractDataDefinedTable +{ + private static final String CACHE_KEY = LookupSetTable.class.getName() + "||values"; + + private static final String FILTER_COL = "setname"; + private static final String VALUE_COL = "value"; + + private String _keyField; + + public static String getCacheKey(Container c) + { + return CACHE_KEY + "||" + c.getId(); + } + + public LookupSetTable(StudiesUserSchema schema, SchemaTableInfo table, ContainerFilter cf, String setName, Map map) + { + super(schema, table, cf, FILTER_COL, VALUE_COL, setName, setName); + + setTitleColumn(VALUE_COL); + + if (map.containsKey("label")) + setTitle((String)map.get("label")); + + if (map.containsKey("description")) + setDescription((String) map.get("description")); + + if (map.containsKey("keyField") && map.get("keyField") != null) + _keyField = (String)map.get("keyField"); + + if (map.containsKey("titleColumn") && map.get("titleColumn") != null) + _titleColumn = (String)map.get("titleColumn"); + else + _titleColumn = VALUE_COL; + } + + @Override + public LookupSetTable init() + { + super.init(); + + if (_keyField != null) + { + var keyCol = getMutableColumn(_keyField); + if (keyCol != null) + { + keyCol.setKeyField(true); + getMutableColumnOrThrow("rowid").setKeyField(false); + } + } + else + { + getMutableColumnOrThrow(VALUE_COL).setKeyField(false); + getMutableColumnOrThrow("rowid").setKeyField(true); + } + + if (_titleColumn != null) + { + ColumnInfo titleCol = getColumn(_titleColumn); + if (titleCol != null) + { + setTitleColumn(titleCol.getName()); + } + } + LDKService.get().getDefaultTableCustomizer().customize(this); + return this; + } + + @Override + public QueryUpdateService getUpdateService() + { + return new EHRLookupsUpdateService(this); + } + + protected class EHRLookupsUpdateService extends UpdateService + { + public EHRLookupsUpdateService(SimpleUserSchema.SimpleTable ti) + { + super(ti); + } + } +} + + diff --git a/Studies/src/org/labkey/studies/query/LookupSetsManager.java b/Studies/src/org/labkey/studies/query/LookupSetsManager.java new file mode 100644 index 000000000..6cb54f239 --- /dev/null +++ b/Studies/src/org/labkey/studies/query/LookupSetsManager.java @@ -0,0 +1,31 @@ +package org.labkey.studies.query; + +import org.apache.logging.log4j.Logger; +import org.labkey.api.cache.Cache; +import org.labkey.api.cache.CacheManager; +import org.labkey.api.util.logging.LogHelper; + +public class LookupSetsManager +{ + private static final LookupSetsManager _instance = new LookupSetsManager(); + private static final Logger _log = LogHelper.getLogger(LookupSetsManager.class, "Messages from the Studies LookupSetsManager"); + + public static final String TABLE_LOOKUPS = "lookups"; + public static final String TABLE_LOOKUP_SETS = "lookup_sets"; + private final Cache _cache; + + private LookupSetsManager() + { + _cache = CacheManager.getStringKeyCache(1000, CacheManager.UNLIMITED, "LookupSetsManagerCache"); + } + + public static LookupSetsManager get() + { + return _instance; + } + + public Cache getCache() + { + return _cache; + } +} diff --git a/Studies/src/org/labkey/studies/query/LookupSetsTable.java b/Studies/src/org/labkey/studies/query/LookupSetsTable.java new file mode 100644 index 000000000..4d6ff8230 --- /dev/null +++ b/Studies/src/org/labkey/studies/query/LookupSetsTable.java @@ -0,0 +1,60 @@ +package org.labkey.studies.query; + +import org.labkey.api.data.Container; +import org.labkey.api.data.ContainerFilter; +import org.labkey.api.data.TableInfo; +import org.labkey.api.ldk.table.ContainerScopedTable; +import org.labkey.api.query.BatchValidationException; +import org.labkey.api.query.InvalidKeyException; +import org.labkey.api.query.QueryUpdateService; +import org.labkey.api.query.QueryUpdateServiceException; +import org.labkey.api.query.SimpleUserSchema; +import org.labkey.api.query.UserSchema; +import org.labkey.api.security.User; + +import java.sql.SQLException; +import java.util.Map; + +public class LookupSetsTable extends ContainerScopedTable +{ + public LookupSetsTable(SchemaType schema, TableInfo st, ContainerFilter cf, String newPk) + { + super(schema, st, cf, newPk); + } + + @Override + public QueryUpdateService getUpdateService() + { + return new UpdateService(this); + } + + private class UpdateService extends ContainerScopedTable.UpdateService + { + public UpdateService(SimpleUserSchema.SimpleTable ti) + { + super(ti); + } + + @Override + protected void afterInsertUpdate(int count, BatchValidationException errors) + { + LookupSetsManager.get().getCache().clear(); + } + + @Override + protected Map deleteRow(User user, Container container, Map oldRowMap) throws QueryUpdateServiceException, SQLException, InvalidKeyException + { + Map row = super.deleteRow(user, container, oldRowMap); + LookupSetsManager.get().getCache().clear(); + return row; + } + + @Override + protected int truncateRows(User user, Container container) throws QueryUpdateServiceException, SQLException + { + int i = super.truncateRows(user, container); + LookupSetsManager.get().getCache().clear(); + return i; + } + } +} diff --git a/Studies/src/org/labkey/studies/query/StudiesCustomizer.java b/Studies/src/org/labkey/studies/query/StudiesCustomizer.java new file mode 100644 index 000000000..6951dceeb --- /dev/null +++ b/Studies/src/org/labkey/studies/query/StudiesCustomizer.java @@ -0,0 +1,22 @@ +package org.labkey.studies.query; + +import org.labkey.api.data.TableInfo; +import org.labkey.api.ldk.table.AbstractTableCustomizer; +import org.labkey.api.study.DatasetTable; + +public class StudiesCustomizer extends AbstractTableCustomizer +{ + @Override + public void customize(TableInfo tableInfo) + { + if (tableInfo instanceof DatasetTable ds) + { + performDatasetCustomization(ds); + } + } + + public void performDatasetCustomization(DatasetTable ds) + { + + } +} diff --git a/Studies/src/org/labkey/studies/query/StudiesUserSchema.java b/Studies/src/org/labkey/studies/query/StudiesUserSchema.java new file mode 100644 index 000000000..ef30fc38c --- /dev/null +++ b/Studies/src/org/labkey/studies/query/StudiesUserSchema.java @@ -0,0 +1,125 @@ +package org.labkey.studies.query; + +import org.labkey.api.collections.CaseInsensitiveHashMap; +import org.labkey.api.collections.CaseInsensitiveTreeSet; +import org.labkey.api.data.Container; +import org.labkey.api.data.ContainerFilter; +import org.labkey.api.data.DbSchema; +import org.labkey.api.data.SchemaTableInfo; +import org.labkey.api.data.SimpleFilter; +import org.labkey.api.data.TableInfo; +import org.labkey.api.data.TableSelector; +import org.labkey.api.ldk.table.ContainerScopedTable; +import org.labkey.api.ldk.table.CustomPermissionsTable; +import org.labkey.api.query.FieldKey; +import org.labkey.api.query.SimpleUserSchema; +import org.labkey.api.security.User; +import org.labkey.api.security.permissions.DeletePermission; +import org.labkey.api.security.permissions.InsertPermission; +import org.labkey.api.security.permissions.ReadPermission; +import org.labkey.api.security.permissions.UpdatePermission; +import org.labkey.studies.StudiesSchema; +import org.labkey.studies.security.StudiesDataAdminPermission; + +import java.util.Collections; +import java.util.Map; +import java.util.Set; + +import static org.labkey.studies.query.LookupSetsManager.TABLE_LOOKUPS; +import static org.labkey.studies.query.LookupSetsManager.TABLE_LOOKUP_SETS; + +public class StudiesUserSchema extends SimpleUserSchema +{ + public StudiesUserSchema(User user, Container container, DbSchema dbschema) + { + super(StudiesSchema.NAME, "", user, container, dbschema); + } + + @Override + public Set getTableNames() + { + Set available = new CaseInsensitiveTreeSet(super.getTableNames()); + available.addAll(getPropertySetNames().keySet()); + + return Collections.unmodifiableSet(available); + } + + @Override + public Set getVisibleTableNames() + { + return getTableNames(); + } + + private Container getTargetContainer() + { + return getContainer().isWorkbookOrTab() ? getContainer().getParent() : getContainer(); + } + + private Map> getPropertySetNames() + { + Map> nameMap = (Map>) LookupSetsManager.get().getCache().get(LookupSetTable.getCacheKey(getTargetContainer())); + if (nameMap != null) + { + return nameMap; + } + + nameMap = new CaseInsensitiveHashMap<>(); + + TableSelector ts = new TableSelector(_dbSchema.getTable(TABLE_LOOKUP_SETS), new SimpleFilter(FieldKey.fromString("container"), getTargetContainer().getId()), null); + Map[] rows = ts.getMapArray(); + if (rows.length > 0) + { + Set existing = super.getTableNames(); + for (Map row : rows) + { + String setname = (String)row.get("setname"); + if (setname != null && !existing.contains(setname)) + nameMap.put(setname, row); + } + } + + nameMap = Collections.unmodifiableMap(nameMap); + LookupSetsManager.get().getCache().put(LookupSetTable.getCacheKey(getTargetContainer()), nameMap); + + return nameMap; + } + + @Override + public TableInfo createTable(String name, ContainerFilter cf) + { + if (TABLE_LOOKUP_SETS.equalsIgnoreCase(name)) + { + ContainerScopedTable ret = new LookupSetsTable<>(this, createSourceTable(name), cf, "setname"); + ret.addPermissionMapping(InsertPermission.class, StudiesDataAdminPermission.class); + ret.addPermissionMapping(UpdatePermission.class, StudiesDataAdminPermission.class); + ret.addPermissionMapping(DeletePermission.class, StudiesDataAdminPermission.class); + return ret.init(); + } + else if (TABLE_LOOKUPS.equalsIgnoreCase(name)) + { + CustomPermissionsTable ret = new CustomPermissionsTable<>(this, createSourceTable(name), cf); + ret.addPermissionMapping(InsertPermission.class, StudiesDataAdminPermission.class); + ret.addPermissionMapping(UpdatePermission.class, StudiesDataAdminPermission.class); + ret.addPermissionMapping(DeletePermission.class, StudiesDataAdminPermission.class); + ret.addPermissionMapping(ReadPermission.class, StudiesDataAdminPermission.class); + return ret.init(); + } + + //try to find it in propertySets + Map> nameMap = getPropertySetNames(); + if (nameMap.containsKey(name)) + return createForPropertySet(this, cf, name, nameMap.get(name)); + + return super.createTable(name, cf); + } + + private LookupSetTable createForPropertySet(StudiesUserSchema us, ContainerFilter cf, String setName, Map map) + { + SchemaTableInfo table = _dbSchema.getTable(TABLE_LOOKUPS); + LookupSetTable ret = new LookupSetTable(us, table, cf, setName, map); + ret.addPermissionMapping(InsertPermission.class, StudiesDataAdminPermission.class); + ret.addPermissionMapping(UpdatePermission.class, StudiesDataAdminPermission.class); + ret.addPermissionMapping(DeletePermission.class, StudiesDataAdminPermission.class); + return ret.init(); + } +} diff --git a/Studies/src/org/labkey/studies/security/StudiesDataAdminPermission.java b/Studies/src/org/labkey/studies/security/StudiesDataAdminPermission.java new file mode 100644 index 000000000..fc259b15e --- /dev/null +++ b/Studies/src/org/labkey/studies/security/StudiesDataAdminPermission.java @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2016-2019 LabKey Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.labkey.studies.security; + +import org.labkey.api.security.permissions.AbstractPermission; + +/** + * User: bimber + * Date: 1/17/13 + * Time: 7:49 PM + */ +public class StudiesDataAdminPermission extends AbstractPermission +{ + public StudiesDataAdminPermission() + { + super("StudiesDataAdminPermission", "This is the base permission used control which users can manage administrative data in the EHR, such as assignments"); + } +} diff --git a/Studies/src/org/labkey/studies/security/StudiesDataAdminRole.java b/Studies/src/org/labkey/studies/security/StudiesDataAdminRole.java new file mode 100644 index 000000000..a5a4261e5 --- /dev/null +++ b/Studies/src/org/labkey/studies/security/StudiesDataAdminRole.java @@ -0,0 +1,22 @@ +package org.labkey.studies.security; + +import org.jetbrains.annotations.NotNull; +import org.labkey.api.security.permissions.DeletePermission; +import org.labkey.api.security.permissions.InsertPermission; +import org.labkey.api.security.permissions.ReadPermission; +import org.labkey.api.security.permissions.UpdatePermission; +import org.labkey.api.security.roles.AbstractRole; + +public class StudiesDataAdminRole extends AbstractRole +{ + public StudiesDataAdminRole() + { + super("StudiesDataAdmin", "These users can administer data from the studies module", ReadPermission.class, InsertPermission.class, UpdatePermission.class, DeletePermission.class, StudiesDataAdminPermission.class); + } + + @Override + public @NotNull String getDisplayName() + { + return "Studies Data Admin"; + } +} From 7db7c214a73106d1e143b6cba54cd8a3581e774b Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 31 May 2025 12:30:11 -0700 Subject: [PATCH 37/58] Update default for JBrowse lucene cores --- .../pipeline/OrphanFilePipelineJob.java | 3 +-- jbrowse/resources/module.xml | 15 +++++++++++++ .../labkey/jbrowse/JBrowseServiceImpl.java | 22 ++++++++++++++++++- 3 files changed, 37 insertions(+), 3 deletions(-) create mode 100644 jbrowse/resources/module.xml diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/OrphanFilePipelineJob.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/OrphanFilePipelineJob.java index a9a3eb796..95e6d7012 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/OrphanFilePipelineJob.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/OrphanFilePipelineJob.java @@ -425,8 +425,7 @@ public void getOrphanFilesForContainer(Container c, User u, Set orphanFile @Override public boolean accept(File pathname) { - //50mb - return (pathname.length() >= 5e7); + return (pathname.length() >= 5e3); } }); diff --git a/jbrowse/resources/module.xml b/jbrowse/resources/module.xml new file mode 100644 index 000000000..eeeb7de4a --- /dev/null +++ b/jbrowse/resources/module.xml @@ -0,0 +1,15 @@ + + + + false + 1 + The number of cores to allow for lucene searches + + ADMIN + + + + + + + diff --git a/jbrowse/src/org/labkey/jbrowse/JBrowseServiceImpl.java b/jbrowse/src/org/labkey/jbrowse/JBrowseServiceImpl.java index db944ea98..83cfa1f1b 100644 --- a/jbrowse/src/org/labkey/jbrowse/JBrowseServiceImpl.java +++ b/jbrowse/src/org/labkey/jbrowse/JBrowseServiceImpl.java @@ -1,11 +1,13 @@ package org.labkey.jbrowse; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.math.NumberUtils; import org.apache.logging.log4j.Logger; import org.jetbrains.annotations.Nullable; import org.json.JSONObject; import org.labkey.api.collections.CaseInsensitiveHashMap; import org.labkey.api.data.Container; +import org.labkey.api.data.ContainerManager; import org.labkey.api.data.SimpleFilter; import org.labkey.api.data.TableInfo; import org.labkey.api.data.TableSelector; @@ -17,7 +19,9 @@ import org.labkey.api.jbrowse.JBrowseFieldCustomizer; import org.labkey.api.jbrowse.JBrowseFieldDescriptor; import org.labkey.api.jbrowse.JBrowseService; +import org.labkey.api.module.Module; import org.labkey.api.module.ModuleLoader; +import org.labkey.api.module.ModuleProperty; import org.labkey.api.pipeline.PipeRoot; import org.labkey.api.pipeline.PipelineJobException; import org.labkey.api.pipeline.PipelineService; @@ -436,7 +440,23 @@ public boolean isAvailable(Container c) } } + private static final String JBrowseLuceneCoresProp = "JBrowseLuceneCores"; + public int getCoresForLuceneSearches() { - return Runtime.getRuntime().availableProcessors(); + Module m = ModuleLoader.getInstance().getModule(JBrowseModule.NAME); + ModuleProperty mp = m.getModuleProperties().get(JBrowseLuceneCoresProp); + String nCores = StringUtils.trimToNull(mp.getEffectiveValue(ContainerManager.getRoot())); + if (nCores == null) + { + return 1; + } + else if (!NumberUtils.isCreatable(nCores)) + { + _log.error("Improper value for " + JBrowseLuceneCoresProp + ": " + nCores); + } + + Number n = NumberUtils.createNumber(nCores); + + return n.intValue(); } } From 192acfd45d177826361b2bb27c67860b5d01d265 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 31 May 2025 13:05:04 -0700 Subject: [PATCH 38/58] Delay creation of ExecutorService until after server startup and until needed --- .../labkey/jbrowse/JBrowseLuceneSearch.java | 37 ++++++++++++++----- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java b/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java index 34d8c82f5..ccddc65fa 100644 --- a/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java +++ b/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java @@ -80,11 +80,6 @@ public class JBrowseLuceneSearch { private static final Logger _log = LogHelper.getLogger(JBrowseLuceneSearch.class, "Logger related to JBrowse/Lucene indexing and queries"); - private static final ExecutorService SEARCH_EXECUTOR = Executors.newFixedThreadPool(JBrowseServiceImpl.get().getCoresForLuceneSearches()); - private final JBrowseSession _session; - private final JsonFile _jsonFile; - private final User _user; - private final String[] specialStartPatterns = {"*:* -", "+", "-"}; private static final String ALL_DOCS = "all"; private static final String GENOMIC_POSITION = "genomicPosition"; private static final int maxCachedQueries = 1000; @@ -92,6 +87,13 @@ public class JBrowseLuceneSearch private static final Cache _cache = new LuceneIndexCache(); + private static ExecutorService _executor = null; + + private final JBrowseSession _session; + private final JsonFile _jsonFile; + private final User _user; + private final String[] specialStartPatterns = {"*:* -", "+", "-"}; + private JBrowseLuceneSearch(final JBrowseSession session, final JsonFile jsonFile, User u) { _session = session; @@ -99,6 +101,16 @@ private JBrowseLuceneSearch(final JBrowseSession session, final JsonFile jsonFil _user = u; } + private static synchronized ExecutorService getSearchExecutor() + { + if (_executor == null) + { + _executor = Executors.newFixedThreadPool(JBrowseServiceImpl.get().getCoresForLuceneSearches()); + } + + return _executor; + } + private Container getContainer() { return ContainerManager.getForId(_session.getContainer()); @@ -114,7 +126,7 @@ public static JBrowseLuceneSearch create(String sessionId, String trackId, User private static synchronized CacheEntry getCacheEntryForSession(String trackObjectId, File indexPath) throws IOException { CacheEntry cacheEntry = _cache.get(trackObjectId); - if (SEARCH_EXECUTOR.isShutdown() || SEARCH_EXECUTOR.isTerminated()) + if (getSearchExecutor().isShutdown() || getSearchExecutor().isTerminated()) { throw new IllegalStateException("The server is shutting down!"); } @@ -127,7 +139,7 @@ private static synchronized CacheEntry getCacheEntryForSession(String trackObjec Directory indexDirectory = FSDirectory.open(indexPath.toPath()); LRUQueryCache queryCache = new LRUQueryCache(maxCachedQueries, maxRamBytesUsed); IndexReader indexReader = DirectoryReader.open(indexDirectory); - IndexSearcher indexSearcher = new IndexSearcher(indexReader, SEARCH_EXECUTOR); + IndexSearcher indexSearcher = new IndexSearcher(indexReader, getSearchExecutor()); indexSearcher.setQueryCache(queryCache); indexSearcher.setQueryCachingPolicy(new ForceMatchAllDocsCachingPolicy()); cacheEntry = new CacheEntry(queryCache, indexSearcher, indexPath); @@ -697,7 +709,10 @@ public void shutdownStarted() try { - SEARCH_EXECUTOR.shutdown(); + if (_executor != null) + { + _executor.shutdown(); + } } catch (Exception e) { @@ -706,7 +721,8 @@ public void shutdownStarted() } } - private class SearchConfig { + private static class SearchConfig + { CacheEntry cacheEntry; Query query; int pageSize; @@ -714,7 +730,8 @@ private class SearchConfig { Sort sort; List fields; - public SearchConfig(CacheEntry cacheEntry, Query query, int pageSize, int offset, Sort sort, List fields) { + public SearchConfig(CacheEntry cacheEntry, Query query, int pageSize, int offset, Sort sort, List fields) + { this.cacheEntry = cacheEntry; this.query = query; this.pageSize = pageSize; From e604c8805946bae4eb289e83da85bd5cd4db9515 Mon Sep 17 00:00:00 2001 From: hextraza Date: Mon, 2 Jun 2025 13:13:27 -0700 Subject: [PATCH 39/58] Batch exportCSV scoredocs (#331) Co-authored-by: Sebastian Benjamin --- .../labkey/jbrowse/JBrowseLuceneSearch.java | 44 +++++++++++-------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java b/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java index ccddc65fa..331781e2c 100644 --- a/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java +++ b/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java @@ -380,32 +380,38 @@ private void paginateJSON(SearchConfig c, HttpServletResponse response) throws I writer.flush(); } - private void exportCSV(SearchConfig c, HttpServletResponse response) throws IOException - { + private void exportCSV(SearchConfig c, HttpServletResponse response) throws IOException { PrintWriter writer = response.getWriter(); IndexSearcher searcher = c.cacheEntry.indexSearcher; - TopFieldDocs topDocs = searcher.search(c.query, Integer.MAX_VALUE, c.sort); - writer.println(String.join(",", c.fields)); - for (ScoreDoc scoreDoc : topDocs.scoreDocs) - { - Document doc = searcher.storedFields().document(scoreDoc.doc); - List rowValues = new ArrayList<>(); + ScoreDoc lastDoc = null; + int batchSize = 1000; - for (String fieldName : c.fields) - { - String[] values = doc.getValues(fieldName); - String value = values.length > 0 - ? String.join(",", values) - : ""; - - // Escape strings - value = "\"" + value.replace("\"", "\"\"") + "\""; - rowValues.add(value); + while (true) { + TopDocs topDocs = searcher.searchAfter(lastDoc, c.query, batchSize, c.sort); + ScoreDoc[] hits = topDocs.scoreDocs; + + if (hits.length == 0) { + break; } - writer.println(String.join(",", rowValues)); + for (ScoreDoc scoreDoc : hits) { + Document doc = searcher.storedFields().document(scoreDoc.doc); + List rowValues = new ArrayList<>(); + + for (String fieldName : c.fields) { + String[] values = doc.getValues(fieldName); + String value = values.length > 0 + ? String.join(",", values) + : ""; + value = "\"" + value.replace("\"", "\"\"") + "\""; + rowValues.add(value); + } + + writer.println(String.join(",", rowValues)); + } + lastDoc = hits[hits.length - 1]; } writer.flush(); From 7cbcb24dc25c1ab588e278cd995b82498fdf0d7c Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 3 Jun 2025 09:45:25 -0700 Subject: [PATCH 40/58] Add study columns --- .../resources/query/study/demographics.query.xml | 1 + Studies/resources/query/study/studyData.query.xml | 15 +++++++++++++-- .../study/datasets/datasets_manifest.xml | 4 ++-- .../study/datasets/datasets_metadata.xml | 9 +++++++++ Studies/resources/views/studiesAdmin.html | 2 +- 5 files changed, 26 insertions(+), 5 deletions(-) diff --git a/Studies/resources/query/study/demographics.query.xml b/Studies/resources/query/study/demographics.query.xml index e1feec92e..9df49a04a 100644 --- a/Studies/resources/query/study/demographics.query.xml +++ b/Studies/resources/query/study/demographics.query.xml @@ -2,6 +2,7 @@ + diff --git a/Studies/resources/query/study/studyData.query.xml b/Studies/resources/query/study/studyData.query.xml index 7d6c3cf7f..008e71bee 100644 --- a/Studies/resources/query/study/studyData.query.xml +++ b/Studies/resources/query/study/studyData.query.xml @@ -4,8 +4,19 @@
- - + + + + + Date + + + End Date + + + Key + true +
diff --git a/Studies/resources/referenceStudy/study/datasets/datasets_manifest.xml b/Studies/resources/referenceStudy/study/datasets/datasets_manifest.xml index a3202bcb3..de46c5955 100644 --- a/Studies/resources/referenceStudy/study/datasets/datasets_manifest.xml +++ b/Studies/resources/referenceStudy/study/datasets/datasets_manifest.xml @@ -4,13 +4,13 @@ - + - + diff --git a/Studies/resources/referenceStudy/study/datasets/datasets_metadata.xml b/Studies/resources/referenceStudy/study/datasets/datasets_metadata.xml index 1e2aecf5e..c1bbd3bd1 100644 --- a/Studies/resources/referenceStudy/study/datasets/datasets_metadata.xml +++ b/Studies/resources/referenceStudy/study/datasets/datasets_metadata.xml @@ -279,9 +279,18 @@ timestamp + + varchar + varchar + + varchar + + + varchar + Project Assignment diff --git a/Studies/resources/views/studiesAdmin.html b/Studies/resources/views/studiesAdmin.html index a17cc7e92..c4a455ab3 100644 --- a/Studies/resources/views/studiesAdmin.html +++ b/Studies/resources/views/studiesAdmin.html @@ -22,7 +22,7 @@ sections: [{ header: 'Configuration/Data Management', items: [{ - name: 'Import/Re-import MCC Study', + name: 'Import/Re-import Study and Reference Data', url: LABKEY.ActionURL.buildURL('studies', 'importStudy.view') }] }] From f5f8debc6beea75fa5d5bd31bd63a80673d8ab2d Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 3 Jun 2025 09:47:33 -0700 Subject: [PATCH 41/58] Update JBrowse packages --- jbrowse/package-lock.json | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/jbrowse/package-lock.json b/jbrowse/package-lock.json index 3938e1886..a0f488c77 100644 --- a/jbrowse/package-lock.json +++ b/jbrowse/package-lock.json @@ -3091,9 +3091,9 @@ } }, "node_modules/@labkey/api": { - "version": "1.40.0", - "resolved": "https://labkey.jfrog.io/artifactory/api/npm/libs-client/@labkey/api/-/@labkey/api-1.40.0.tgz", - "integrity": "sha512-ezCVNWtLkzbH5K/CoEb69gK5q6QSgQpG9FVJse3hPR1t5Bxpt6Mt0RbmKKdOfdJcNcA30IfpcbzEOc1gz7vhZQ==" + "version": "1.41.2", + "resolved": "https://labkey.jfrog.io/artifactory/api/npm/libs-client/@labkey/api/-/@labkey/api-1.41.2.tgz", + "integrity": "sha512-ninfc/+Sj5+8Zla9bY2j/4fSy41OS27YAHKtDFPnu52QkC8WsOYh3JFI5PkU6Rn+xIp0In4P6d5Qn/yluJRC/w==" }, "node_modules/@labkey/build": { "version": "8.5.0", @@ -3133,12 +3133,12 @@ } }, "node_modules/@labkey/components": { - "version": "6.38.1", - "resolved": "https://labkey.jfrog.io/artifactory/api/npm/libs-client/@labkey/components/-/@labkey/components-6.38.1.tgz", - "integrity": "sha512-fv47V+NL390BYMku9+rbW1UUtM4E6zWX/TkxK65lBwkyi+ZuM9FZHWLro1+tPS0M3Vc+QFVXTP6iRs1gnNrcCQ==", + "version": "6.45.0", + "resolved": "https://labkey.jfrog.io/artifactory/api/npm/libs-client/@labkey/components/-/@labkey/components-6.45.0.tgz", + "integrity": "sha512-KT4C+NdlS6T54GF8jsde3cYm6Dt88AEZFw+dA39N83MGW3FiJ63s3DhF55dx8ImheCnchYlpK5xDF5/JI/Ux7A==", "dependencies": { "@hello-pangea/dnd": "18.0.1", - "@labkey/api": "1.40.0", + "@labkey/api": "1.41.2", "@testing-library/dom": "~10.4.0", "@testing-library/jest-dom": "~6.6.3", "@testing-library/react": "~16.3.0", @@ -11349,9 +11349,9 @@ } }, "node_modules/tar-fs": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.1.2.tgz", - "integrity": "sha512-EsaAXwxmx8UB7FRKqeozqEPop69DXcmYwTQwXvyAPF352HJsPdkVhvTaDPYqfNgruveJIJy3TA2l+2zj8LJIJA==", + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.1.3.tgz", + "integrity": "sha512-090nwYJDmlhwFwEW3QQl+vaNnxsO2yVsd45eTKRBzSzu+hlb1w2K9inVq5b0ngXuLVqQ4ApvsUHHnu/zQNkWAg==", "dependencies": { "chownr": "^1.1.1", "mkdirp-classic": "^0.5.2", From 1a845e1a62ab271cf206be1e9bba43119edcc919 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 7 Jun 2025 08:31:14 -0700 Subject: [PATCH 42/58] Store more information about cluster jobs --- cluster/resources/schemas/cluster.xml | 12 ++ .../postgresql/cluster-15.24-15.25.sql | 4 + .../sqlserver/cluster-15.24-15.25.sql | 4 + .../src/org/labkey/cluster/ClusterModule.java | 2 +- .../labkey/cluster/pipeline/ClusterJob.java | 33 ++++ .../pipeline/SlurmExecutionEngine.java | 147 ++++++++++++++++++ 6 files changed, 201 insertions(+), 1 deletion(-) create mode 100644 cluster/resources/schemas/dbscripts/postgresql/cluster-15.24-15.25.sql create mode 100644 cluster/resources/schemas/dbscripts/sqlserver/cluster-15.24-15.25.sql diff --git a/cluster/resources/schemas/cluster.xml b/cluster/resources/schemas/cluster.xml index 34b3c06f5..e811450e4 100644 --- a/cluster/resources/schemas/cluster.xml +++ b/cluster/resources/schemas/cluster.xml @@ -59,6 +59,18 @@ yyyy-MM-dd HH:mm + + Cluster Account + + + Duration (seconds) + + + CPU Used + + + GPU Used + diff --git a/cluster/resources/schemas/dbscripts/postgresql/cluster-15.24-15.25.sql b/cluster/resources/schemas/dbscripts/postgresql/cluster-15.24-15.25.sql new file mode 100644 index 000000000..8b997f87d --- /dev/null +++ b/cluster/resources/schemas/dbscripts/postgresql/cluster-15.24-15.25.sql @@ -0,0 +1,4 @@ +ALTER TABLE cluster.clusterJobs ADD clusterAccount varchar(1000); +ALTER TABLE cluster.clusterJobs ADD duration double; +ALTER TABLE cluster.clusterJobs ADD cpuUsed int; +ALTER TABLE cluster.clusterJobs ADD gpuUsed int; diff --git a/cluster/resources/schemas/dbscripts/sqlserver/cluster-15.24-15.25.sql b/cluster/resources/schemas/dbscripts/sqlserver/cluster-15.24-15.25.sql new file mode 100644 index 000000000..d05005115 --- /dev/null +++ b/cluster/resources/schemas/dbscripts/sqlserver/cluster-15.24-15.25.sql @@ -0,0 +1,4 @@ +ALTER TABLE cluster.clusterJobs ADD clusterAccount nvarchar(1000); +ALTER TABLE cluster.clusterJobs ADD duration double; +ALTER TABLE cluster.clusterJobs ADD cpuUsed int; +ALTER TABLE cluster.clusterJobs ADD gpuUsed int; diff --git a/cluster/src/org/labkey/cluster/ClusterModule.java b/cluster/src/org/labkey/cluster/ClusterModule.java index e7638ddcc..fa3f86a7b 100644 --- a/cluster/src/org/labkey/cluster/ClusterModule.java +++ b/cluster/src/org/labkey/cluster/ClusterModule.java @@ -66,7 +66,7 @@ public String getName() @Override public Double getSchemaVersion() { - return 15.24; + return 15.25; } @Override diff --git a/cluster/src/org/labkey/cluster/pipeline/ClusterJob.java b/cluster/src/org/labkey/cluster/pipeline/ClusterJob.java index dd8ed85f7..afe3c5024 100644 --- a/cluster/src/org/labkey/cluster/pipeline/ClusterJob.java +++ b/cluster/src/org/labkey/cluster/pipeline/ClusterJob.java @@ -22,6 +22,9 @@ public class ClusterJob private String _location; private String _activeTaskId; private String _clusterUser; + private Integer _duration; + private Integer _cpuUsed; + private Integer _gpuUsed; private String _hostname; private Date _logModified; @@ -195,4 +198,34 @@ public void setLogModified(Date logModified) { _logModified = logModified; } + + public Integer getDuration() + { + return _duration; + } + + public void setDuration(Integer duration) + { + _duration = duration; + } + + public Integer getCpuUsed() + { + return _cpuUsed; + } + + public void setCpuUsed(Integer cpuUsed) + { + _cpuUsed = cpuUsed; + } + + public Integer getGpuUsed() + { + return _gpuUsed; + } + + public void setGpuUsed(Integer gpuUsed) + { + _gpuUsed = gpuUsed; + } } diff --git a/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java b/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java index 017ce8e46..6cad27ddf 100644 --- a/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java +++ b/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java @@ -1,6 +1,7 @@ package org.labkey.cluster.pipeline; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.math.NumberUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.jetbrains.annotations.NotNull; @@ -11,6 +12,7 @@ import org.labkey.api.collections.CaseInsensitiveHashSet; import org.labkey.api.data.Container; import org.labkey.api.data.ContainerManager; +import org.labkey.api.data.Table; import org.labkey.api.pipeline.PipelineJob; import org.labkey.api.pipeline.PipelineJobException; import org.labkey.api.pipeline.PipelineService; @@ -19,6 +21,7 @@ import org.labkey.api.util.Pair; import org.labkey.api.writer.PrintWriters; import org.labkey.cluster.ClusterManager; +import org.labkey.cluster.ClusterSchema; import org.labkey.cluster.ClusterServiceImpl; import org.quartz.JobExecutionException; @@ -94,6 +97,7 @@ protected List submitJobToCluster(ClusterJob j, PipelineJob job) throws line = line.replaceFirst("^Submitted batch job", ""); line = line.trim(); j.setClusterId(line); + j.setClusterUser(ClusterServiceImpl.get().getClusterUser(job.getContainer())); break; } @@ -129,6 +133,8 @@ protected Set updateStatusForAllJobs() throws PipelineJobException int stateIdx = -1; int hostnameIdx = -1; int reasonIdx = -1; + int elapsedIdx = -1; + int resourcesIdx = -1; for (String line : ret) { line = StringUtils.trimToNull(line); @@ -145,6 +151,8 @@ protected Set updateStatusForAllJobs() throws PipelineJobException stateIdx = header.indexOf("STATE"); hostnameIdx = header.indexOf("NODELIST"); reasonIdx = header.indexOf("REASON"); + elapsedIdx = header.indexOf("ELAPSEDRAW"); + resourcesIdx = header.indexOf("ALLOCTRES"); if (stateIdx == -1) { @@ -177,10 +185,13 @@ protected Set updateStatusForAllJobs() throws PipelineJobException } else { + Map propsToUpdate = new HashMap<>(); + String hostname = hostnameIdx != -1 && tokens.length > hostnameIdx ? StringUtils.trimToNull(tokens[hostnameIdx]) : null; if (hostname != null) { j.setHostname(hostname); + propsToUpdate.put("hostname", hostname); } Pair status = translateSlurmStatusToTaskStatus(StringUtils.trimToNull(tokens[stateIdx])); @@ -199,6 +210,35 @@ protected Set updateStatusForAllJobs() throws PipelineJobException } } + if (resourcesIdx > -1) + { + j.setCpuUsed(findIntValue(tokens[resourcesIdx], "cpu")); + if (j.getCpuUsed() != null) + { + propsToUpdate.put("cpuUsed", j.getCpuUsed()); + } + + j.setGpuUsed(findIntValue(tokens[resourcesIdx], "gpu")); + if (j.getGpuUsed() != null) + { + propsToUpdate.put("gpuUsed", j.getGpuUsed()); + } + } + + if (elapsedIdx > -1) + { + j.setDuration(Integer.parseInt(tokens[elapsedIdx])); + if (j.getDuration() != null) + { + propsToUpdate.put("duration", j.getDuration()); + } + } + + if (!propsToUpdate.isEmpty()) + { + updateClusterSubmission(j, propsToUpdate); + } + updateJobStatus(status == null ? null : status.first, j, status == null ? null : status.second); jobsUpdated.add(j.getClusterId()); } @@ -223,6 +263,40 @@ protected Set updateStatusForAllJobs() throws PipelineJobException return jobsUpdated; } + // parses AllocTRES, such as: cpu=4,gres/disk=1028,mem=20000M,node=1 + private Integer findIntValue(String input, String key) + { + input = StringUtils.trimToNull(input); + if (input == null) + { + return null; + } + + String[] tokens = input.split(","); + for (String token : tokens) + { + if (token.startsWith(key + "=")) + { + String val = token.split("=")[1]; + if (!NumberUtils.isCreatable(val)) + { + _log.error("Non-numeric value for: " + key + ", input: " + input); + return null; + } + + return Integer.parseInt(val); + } + } + + return null; + } + + private void updateClusterSubmission(ClusterJob j, Map toUpdate) + { + toUpdate.put("rowid", j.getRowId()); + Table.update(null, ClusterSchema.getInstance().getSchema().getTable(ClusterSchema.CLUSTER_JOBS), toUpdate, j.getRowId()); + } + @Override protected Pair getStatusForJob(ClusterJob job, Container c) { @@ -248,6 +322,9 @@ protected Pair getStatusForJob(ClusterJob job, Container c) int hostnameIdx = -1; int maxRssIdx = -1; int reqMemIdx = -1; + int elapsedIdx = -1; + int resourcesIdx = -1; + String reqMem = null; for (String line : ret) { @@ -266,6 +343,8 @@ protected Pair getStatusForJob(ClusterJob job, Container c) hostnameIdx = header.indexOf("NODELIST"); maxRssIdx = header.indexOf("MAXRSS"); reqMemIdx = header.indexOf("REQMEM"); + elapsedIdx = header.indexOf("ELAPSEDRAW"); + resourcesIdx = header.indexOf("ALLOCTRES"); if (stateIdx == -1) { @@ -294,6 +373,8 @@ else if (headerFound) statuses.add(StringUtils.trimToNull(tokens[stateIdx])); } + Map propsToUpdate = new HashMap<>(); + if (hostnameIdx > -1) { String hostname = tokens.length > hostnameIdx ? StringUtils.trimToNull(tokens[hostnameIdx]) : null; @@ -302,6 +383,7 @@ else if (headerFound) if (job.getHostname() == null || !job.getHostname().equals(hostname)) { job.setHostname(hostname); + propsToUpdate.put("hostname", hostname); } } } @@ -316,6 +398,35 @@ else if (headerFound) } + if (resourcesIdx > -1) + { + job.setCpuUsed(findIntValue(tokens[resourcesIdx], "cpu")); + if (job.getCpuUsed() != null) + { + propsToUpdate.put("cpuUsed", job.getCpuUsed()); + } + + job.setGpuUsed(findIntValue(tokens[resourcesIdx], "gpu")); + if (job.getGpuUsed() != null) + { + propsToUpdate.put("gpuUsed", job.getGpuUsed()); + } + } + + if (elapsedIdx > -1) + { + job.setDuration(Integer.parseInt(tokens[elapsedIdx])); + if (job.getDuration() != null) + { + propsToUpdate.put("duration", job.getDuration()); + } + } + + if (!propsToUpdate.isEmpty()) + { + updateClusterSubmission(job, propsToUpdate); + } + // NOTE: if the line has blank ending columns, trimmed lines might lack that value if ((job.getClusterId() + ".0").equals(id) && maxRssIdx > -1 && maxRssIdx < tokens.length) { @@ -725,6 +836,8 @@ private Pair getStatusFromQueue(ClusterJob job) int jobIdx = -1; int stateIdx = -1; int hostnameIdx = -1; + int elapsedIdx = -1; + int resourcesIdx = -1; for (String line : ret) { @@ -741,6 +854,8 @@ private Pair getStatusFromQueue(ClusterJob job) jobIdx = header.indexOf("JOBID"); stateIdx = header.indexOf("STATE"); hostnameIdx = header.indexOf("NODELIST"); + elapsedIdx = header.indexOf("ELAPSEDRAW"); + resourcesIdx = header.indexOf("ALLOCTRES"); if (stateIdx == -1) { @@ -765,15 +880,47 @@ private Pair getStatusFromQueue(ClusterJob job) String id = StringUtils.trimToNull(tokens[jobIdx]); if (job.getClusterId().equals(id)) { + Map propsToUpdate = new HashMap<>(); + if (hostnameIdx > -1) { String hostname = tokens.length > hostnameIdx ? StringUtils.trimToNull(tokens[hostnameIdx]) : null; if (hostname != null) { job.setHostname(hostname); + propsToUpdate.put("hostname", hostname); + } + } + + if (resourcesIdx > -1) + { + job.setCpuUsed(findIntValue(tokens[resourcesIdx], "cpu")); + if (job.getCpuUsed() != null) + { + propsToUpdate.put("cpuUsed", job.getCpuUsed()); + } + + job.setGpuUsed(findIntValue(tokens[resourcesIdx], "gpu")); + if (job.getGpuUsed() != null) + { + propsToUpdate.put("gpuUsed", job.getGpuUsed()); } } + if (elapsedIdx > -1) + { + job.setDuration(Integer.parseInt(tokens[elapsedIdx])); + if (job.getDuration() != null) + { + propsToUpdate.put("duration", job.getDuration()); + } + } + + if (!propsToUpdate.isEmpty()) + { + updateClusterSubmission(job, propsToUpdate); + } + return translateSlurmStatusToTaskStatus(StringUtils.trimToNull(tokens[stateIdx])); } } From 9a6f11ccd6bdf3dbdf4ee07aa365279163cee0a8 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 7 Jun 2025 09:56:24 -0700 Subject: [PATCH 43/58] Fix sql error --- .../schemas/dbscripts/sqlserver/cluster-15.24-15.25.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cluster/resources/schemas/dbscripts/sqlserver/cluster-15.24-15.25.sql b/cluster/resources/schemas/dbscripts/sqlserver/cluster-15.24-15.25.sql index d05005115..0e742b307 100644 --- a/cluster/resources/schemas/dbscripts/sqlserver/cluster-15.24-15.25.sql +++ b/cluster/resources/schemas/dbscripts/sqlserver/cluster-15.24-15.25.sql @@ -1,4 +1,4 @@ ALTER TABLE cluster.clusterJobs ADD clusterAccount nvarchar(1000); -ALTER TABLE cluster.clusterJobs ADD duration double; +ALTER TABLE cluster.clusterJobs ADD duration double precision; ALTER TABLE cluster.clusterJobs ADD cpuUsed int; ALTER TABLE cluster.clusterJobs ADD gpuUsed int; From 004facdf1ba1d3bdcaa4edac07fb00039a7a8942 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 7 Jun 2025 13:25:33 -0700 Subject: [PATCH 44/58] Bugfixes to SlurmExecutionEngine --- .../pipeline/SlurmExecutionEngine.java | 97 ++++++------------- 1 file changed, 29 insertions(+), 68 deletions(-) diff --git a/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java b/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java index 6cad27ddf..09266900a 100644 --- a/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java +++ b/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java @@ -133,8 +133,7 @@ protected Set updateStatusForAllJobs() throws PipelineJobException int stateIdx = -1; int hostnameIdx = -1; int reasonIdx = -1; - int elapsedIdx = -1; - int resourcesIdx = -1; + for (String line : ret) { line = StringUtils.trimToNull(line); @@ -151,8 +150,6 @@ protected Set updateStatusForAllJobs() throws PipelineJobException stateIdx = header.indexOf("STATE"); hostnameIdx = header.indexOf("NODELIST"); reasonIdx = header.indexOf("REASON"); - elapsedIdx = header.indexOf("ELAPSEDRAW"); - resourcesIdx = header.indexOf("ALLOCTRES"); if (stateIdx == -1) { @@ -210,30 +207,6 @@ protected Set updateStatusForAllJobs() throws PipelineJobException } } - if (resourcesIdx > -1) - { - j.setCpuUsed(findIntValue(tokens[resourcesIdx], "cpu")); - if (j.getCpuUsed() != null) - { - propsToUpdate.put("cpuUsed", j.getCpuUsed()); - } - - j.setGpuUsed(findIntValue(tokens[resourcesIdx], "gpu")); - if (j.getGpuUsed() != null) - { - propsToUpdate.put("gpuUsed", j.getGpuUsed()); - } - } - - if (elapsedIdx > -1) - { - j.setDuration(Integer.parseInt(tokens[elapsedIdx])); - if (j.getDuration() != null) - { - propsToUpdate.put("duration", j.getDuration()); - } - } - if (!propsToUpdate.isEmpty()) { updateClusterSubmission(j, propsToUpdate); @@ -293,6 +266,8 @@ private Integer findIntValue(String input, String key) private void updateClusterSubmission(ClusterJob j, Map toUpdate) { + _log.debug("Updating job: " + j.getJobId() + ", " + toUpdate.keySet().stream().map(x -> x + "=" + toUpdate.get(x)).collect(Collectors.joining(", "))); + toUpdate.put("rowid", j.getRowId()); Table.update(null, ClusterSchema.getInstance().getSchema().getTable(ClusterSchema.CLUSTER_JOBS), toUpdate, j.getRowId()); } @@ -315,6 +290,7 @@ protected Pair getStatusForJob(ClusterJob job, Container c) //verify success boolean headerFound = false; boolean foundJobLine = false; + List fieldWidths = new ArrayList<>(); LinkedHashSet statuses = new LinkedHashSet<>(); List header; int jobIdx = -1; @@ -360,24 +336,24 @@ protected Pair getStatusForJob(ClusterJob job, Container c) } else if (foundJobLine && line.startsWith("------------")) { + fieldWidths.addAll(Arrays.asList(line.split(" "))); headerFound = true; } else if (headerFound) { try { - String[] tokens = line.split("( )+"); - String id = StringUtils.trimToNull(tokens[jobIdx]); + String id = StringUtils.trimToNull(extractField(line, fieldWidths, jobIdx)); if (id.equals(job.getClusterId())) { - statuses.add(StringUtils.trimToNull(tokens[stateIdx])); + statuses.add(StringUtils.trimToNull(extractField(line, fieldWidths, stateIdx))); } Map propsToUpdate = new HashMap<>(); if (hostnameIdx > -1) { - String hostname = tokens.length > hostnameIdx ? StringUtils.trimToNull(tokens[hostnameIdx]) : null; + String hostname = StringUtils.trimToNull(extractField(line, fieldWidths, hostnameIdx)); if (hostname != null) { if (job.getHostname() == null || !job.getHostname().equals(hostname)) @@ -388,9 +364,9 @@ else if (headerFound) } } - if (reqMemIdx > -1 && reqMemIdx < tokens.length) + if (reqMemIdx > -1) { - String val = StringUtils.trimToNull(tokens[reqMemIdx]); + String val = StringUtils.trimToNull(extractField(line, fieldWidths, reqMemIdx)); if (val != null) { reqMem = val; @@ -400,13 +376,13 @@ else if (headerFound) if (resourcesIdx > -1) { - job.setCpuUsed(findIntValue(tokens[resourcesIdx], "cpu")); + job.setCpuUsed(findIntValue(extractField(line, fieldWidths, resourcesIdx), "cpu")); if (job.getCpuUsed() != null) { propsToUpdate.put("cpuUsed", job.getCpuUsed()); } - job.setGpuUsed(findIntValue(tokens[resourcesIdx], "gpu")); + job.setGpuUsed(findIntValue(extractField(line, fieldWidths, resourcesIdx), "gpu")); if (job.getGpuUsed() != null) { propsToUpdate.put("gpuUsed", job.getGpuUsed()); @@ -415,7 +391,7 @@ else if (headerFound) if (elapsedIdx > -1) { - job.setDuration(Integer.parseInt(tokens[elapsedIdx])); + job.setDuration(Integer.parseInt(extractField(line, fieldWidths, elapsedIdx))); if (job.getDuration() != null) { propsToUpdate.put("duration", job.getDuration()); @@ -428,11 +404,11 @@ else if (headerFound) } // NOTE: if the line has blank ending columns, trimmed lines might lack that value - if ((job.getClusterId() + ".0").equals(id) && maxRssIdx > -1 && maxRssIdx < tokens.length) + if ((job.getClusterId() + ".0").equals(id) && maxRssIdx > -1) { try { - String maxRSS = StringUtils.trimToNull(tokens[maxRssIdx]); + String maxRSS = StringUtils.trimToNull(extractField(line, fieldWidths, maxRssIdx)); if (maxRSS != null) { double bytes = FileSizeFormatter.convertStringRepresentationToBytes(maxRSS); @@ -471,7 +447,7 @@ else if (headerFound) } catch (Exception e) { - _log.error("Error parsing line: " + line, e); + _log.error("Error parsing line: [" + line + "]", e); throw e; } } @@ -507,6 +483,19 @@ else if (headerFound) return null; } + private String extractField(String line, List fieldWidths, int idx) + { + int start = 0; + for (int i = 0; i < idx; i++) + { + start += fieldWidths.get(i).length() + 1; + } + + int end = start + fieldWidths.get(idx).length(); + + return line.substring(start, end); + } + @Override protected boolean removeJob(ClusterJob clusterJob) { @@ -836,8 +825,6 @@ private Pair getStatusFromQueue(ClusterJob job) int jobIdx = -1; int stateIdx = -1; int hostnameIdx = -1; - int elapsedIdx = -1; - int resourcesIdx = -1; for (String line : ret) { @@ -854,8 +841,6 @@ private Pair getStatusFromQueue(ClusterJob job) jobIdx = header.indexOf("JOBID"); stateIdx = header.indexOf("STATE"); hostnameIdx = header.indexOf("NODELIST"); - elapsedIdx = header.indexOf("ELAPSEDRAW"); - resourcesIdx = header.indexOf("ALLOCTRES"); if (stateIdx == -1) { @@ -892,30 +877,6 @@ private Pair getStatusFromQueue(ClusterJob job) } } - if (resourcesIdx > -1) - { - job.setCpuUsed(findIntValue(tokens[resourcesIdx], "cpu")); - if (job.getCpuUsed() != null) - { - propsToUpdate.put("cpuUsed", job.getCpuUsed()); - } - - job.setGpuUsed(findIntValue(tokens[resourcesIdx], "gpu")); - if (job.getGpuUsed() != null) - { - propsToUpdate.put("gpuUsed", job.getGpuUsed()); - } - } - - if (elapsedIdx > -1) - { - job.setDuration(Integer.parseInt(tokens[elapsedIdx])); - if (job.getDuration() != null) - { - propsToUpdate.put("duration", job.getDuration()); - } - } - if (!propsToUpdate.isEmpty()) { updateClusterSubmission(job, propsToUpdate); From 826a3dd48fae04bbfd99c0131912d17b80e1dc84 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 7 Jun 2025 13:50:56 -0700 Subject: [PATCH 45/58] Bugfixes to SlurmExecutionEngine --- .../pipeline/SlurmExecutionEngine.java | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java b/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java index 09266900a..4d4efc1bf 100644 --- a/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java +++ b/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java @@ -343,17 +343,17 @@ else if (headerFound) { try { - String id = StringUtils.trimToNull(extractField(line, fieldWidths, jobIdx)); - if (id.equals(job.getClusterId())) + String id =extractField(line, fieldWidths, jobIdx); + if (id != null && id.equals(job.getClusterId())) { - statuses.add(StringUtils.trimToNull(extractField(line, fieldWidths, stateIdx))); + statuses.add(extractField(line, fieldWidths, stateIdx)); } Map propsToUpdate = new HashMap<>(); if (hostnameIdx > -1) { - String hostname = StringUtils.trimToNull(extractField(line, fieldWidths, hostnameIdx)); + String hostname = extractField(line, fieldWidths, hostnameIdx); if (hostname != null) { if (job.getHostname() == null || !job.getHostname().equals(hostname)) @@ -366,7 +366,7 @@ else if (headerFound) if (reqMemIdx > -1) { - String val = StringUtils.trimToNull(extractField(line, fieldWidths, reqMemIdx)); + String val = extractField(line, fieldWidths, reqMemIdx); if (val != null) { reqMem = val; @@ -391,10 +391,14 @@ else if (headerFound) if (elapsedIdx > -1) { - job.setDuration(Integer.parseInt(extractField(line, fieldWidths, elapsedIdx))); - if (job.getDuration() != null) + String durationString = extractField(line, fieldWidths, elapsedIdx); + if (durationString != null) { - propsToUpdate.put("duration", job.getDuration()); + job.setDuration(Integer.parseInt(durationString)); + if (job.getDuration() != null) + { + propsToUpdate.put("duration", job.getDuration()); + } } } @@ -408,7 +412,7 @@ else if (headerFound) { try { - String maxRSS = StringUtils.trimToNull(extractField(line, fieldWidths, maxRssIdx)); + String maxRSS = extractField(line, fieldWidths, maxRssIdx); if (maxRSS != null) { double bytes = FileSizeFormatter.convertStringRepresentationToBytes(maxRSS); @@ -493,7 +497,7 @@ private String extractField(String line, List fieldWidths, int idx) int end = start + fieldWidths.get(idx).length(); - return line.substring(start, end); + return StringUtils.trimToNull(line.substring(start, end)); } @Override From a570fa42b39fd0277407d037805273779590684d Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 7 Jun 2025 14:05:04 -0700 Subject: [PATCH 46/58] Fix sql type --- .../schemas/dbscripts/postgresql/cluster-15.24-15.25.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cluster/resources/schemas/dbscripts/postgresql/cluster-15.24-15.25.sql b/cluster/resources/schemas/dbscripts/postgresql/cluster-15.24-15.25.sql index 8b997f87d..70e847645 100644 --- a/cluster/resources/schemas/dbscripts/postgresql/cluster-15.24-15.25.sql +++ b/cluster/resources/schemas/dbscripts/postgresql/cluster-15.24-15.25.sql @@ -1,4 +1,4 @@ ALTER TABLE cluster.clusterJobs ADD clusterAccount varchar(1000); -ALTER TABLE cluster.clusterJobs ADD duration double; +ALTER TABLE cluster.clusterJobs ADD duration double precision; ALTER TABLE cluster.clusterJobs ADD cpuUsed int; ALTER TABLE cluster.clusterJobs ADD gpuUsed int; From e6225b0f3e6cb9e30abe767548c556236d943ff2 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 7 Jun 2025 15:00:36 -0700 Subject: [PATCH 47/58] Debug slurm parsing --- .../labkey/cluster/pipeline/SlurmExecutionEngine.java | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java b/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java index 4d4efc1bf..7d2f14066 100644 --- a/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java +++ b/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java @@ -497,7 +497,16 @@ private String extractField(String line, List fieldWidths, int idx) int end = start + fieldWidths.get(idx).length(); - return StringUtils.trimToNull(line.substring(start, end)); + try + { + return StringUtils.trimToNull(line.substring(start, end)); + } + catch (Exception e) + { + _log.error("Unable to parse slurm field at idx: " + idx + ". Line: [" + line + "]"); + _log.error("fieldWidths: " + StringUtils.join(fieldWidths, "|"), e); + return null; + } } @Override From 7e92b4790f85de491e1b2c37aa0fb21f6012a08d Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 9 Jun 2025 08:41:33 -0700 Subject: [PATCH 48/58] No need to copy files locally in AbstractSingleCellHandler --- .../analysis/AbstractSingleCellHandler.java | 93 ++++--------------- 1 file changed, 16 insertions(+), 77 deletions(-) diff --git a/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java b/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java index 6e9c2fb0d..4f0b32f92 100644 --- a/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java +++ b/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java @@ -36,7 +36,6 @@ import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; import org.labkey.api.singlecell.CellHashingService; import org.labkey.api.singlecell.pipeline.AbstractSingleCellPipelineStep; -import org.labkey.api.singlecell.pipeline.AbstractSingleCellStep; import org.labkey.api.singlecell.pipeline.SingleCellRawDataStep; import org.labkey.api.singlecell.pipeline.SingleCellStep; import org.labkey.api.util.FileUtil; @@ -385,87 +384,27 @@ public void processFilesRemote(List inputFiles, JobContext c List currentFiles; Set originalInputs = inputFiles.stream().map(SequenceOutputFile::getFile).collect(Collectors.toSet()); - Map localCopyToOrig = new HashMap<>(); + Map inputFileMap = new HashMap<>(); if (_doProcessRawCounts) { currentFiles = processRawCounts(ctx, inputFiles, basename); } else { - try - { - Set distinctIds = new HashSet<>(); - Set copiedFiles = new HashSet<>(); + Set distinctIds = new HashSet<>(); - currentFiles = new ArrayList<>(); - for (SequenceOutputFile so : inputFiles) + currentFiles = new ArrayList<>(); + for (SequenceOutputFile so : inputFiles) + { + String datasetId = FileUtil.makeLegalName(so.getReadset() != null ? ctx.getSequenceSupport().getCachedReadset(so.getReadset()).getName() : so.getName()); + if (distinctIds.contains(datasetId)) { - String datasetId = FileUtil.makeLegalName(so.getReadset() != null ? ctx.getSequenceSupport().getCachedReadset(so.getReadset()).getName() : so.getName()); - if (distinctIds.contains(datasetId)) - { - throw new PipelineJobException("Duplicate dataset Ids in input data: " + datasetId); - } - distinctIds.add(datasetId); - - //ensure local copy: - if (copiedFiles.contains(so.getFile().getName())) - { - throw new PipelineJobException("Duplicate files names in input data: " + so.getFile().getName()); - } - copiedFiles.add(so.getFile().getName()); - - File local = new File(ctx.getOutputDir(), so.getFile().getName()); - if (local.exists()) - { - local.delete(); - } - - FileUtils.copyFile(so.getFile(), local); - _resumer.getFileManager().addIntermediateFile(local); - - File cellBarcodes = CellHashingServiceImpl.get().getCellBarcodesFromSeurat(so.getFile(), false); - if (cellBarcodes.exists()) - { - ctx.getLogger().debug("Also making local copy of cellBarcodes TSV: " + cellBarcodes.getPath()); - File cellBarcodesLocal = new File(ctx.getOutputDir(), cellBarcodes.getName()); - if (cellBarcodesLocal.exists()) - { - cellBarcodesLocal.delete(); - } - - FileUtils.copyFile(cellBarcodes, cellBarcodesLocal); - _resumer.getFileManager().addIntermediateFile(cellBarcodesLocal); - } - else - { - ctx.getLogger().debug("cellBarcodes TSV not found, expected: " + cellBarcodes.getPath()); - } - - File metadataFile = CellHashingServiceImpl.get().getMetaTableFromSeurat(so.getFile(), false); - if (metadataFile.exists()) - { - ctx.getLogger().debug("Also making local copy of metadata TSV: " + metadataFile.getPath()); - File metadataFileLocal = new File(ctx.getOutputDir(), metadataFile.getName()); - if (metadataFileLocal.exists()) - { - metadataFileLocal.delete(); - } - - FileUtils.copyFile(metadataFile, metadataFileLocal); - _resumer.getFileManager().addIntermediateFile(metadataFileLocal); - } - else - { - ctx.getLogger().warn("metadataFile TSV not found, expected: " + metadataFile.getPath()); - } - - currentFiles.add(new SingleCellStep.SeuratObjectWrapper(datasetId, datasetId, local, so)); - localCopyToOrig.put(local, so.getFile()); + throw new PipelineJobException("Duplicate dataset Ids in input data: " + datasetId); } - } - catch (IOException e) - { - throw new PipelineJobException(e); + distinctIds.add(datasetId); + + currentFiles.add(new SingleCellStep.SeuratObjectWrapper(datasetId, datasetId, so.getFile(), so)); + inputFileMap.put(so.getName(), so.getFile()); } } @@ -671,14 +610,14 @@ else if (inputFiles.size() == 1) //This indicates the job processed an input file, but did not create a new object (like running FindMarkers) boolean skipOutput = false; - if (localCopyToOrig.containsKey(output.getFile())) + if (inputFileMap.containsKey(output.getFile().getName())) { try { - ctx.getLogger().debug("Comparing file context of output to determine if it matches input: "+ output.getFile().getName()); - ctx.getLogger().debug("Original file: " + localCopyToOrig.get(output.getFile())); + ctx.getLogger().debug("Comparing file context of output to determine if it matches input: " + output.getFile().getName()); + ctx.getLogger().debug("Original file: " + inputFileMap.get(output.getFile().getName())); ctx.getLogger().debug("Pipeline output file: " + output.getFile()); - if (FileUtils.contentEquals(localCopyToOrig.get(output.getFile()), output.getFile())) + if (FileUtils.contentEquals(inputFileMap.get(output.getFile().getName()), output.getFile())) { ctx.getLogger().info("Sequence output is the same as an input, will not re-create output for seurat object: " + output.getFile().getPath()); skipOutput = true; From b07522bdc68e232f53cb12dfd595fa14e9a9289a Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 9 Jun 2025 09:04:52 -0700 Subject: [PATCH 49/58] Add consolidate_genomicsdb_array --- SequenceAnalysis/pipeline_code/sequence_tools_install.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/SequenceAnalysis/pipeline_code/sequence_tools_install.sh b/SequenceAnalysis/pipeline_code/sequence_tools_install.sh index 3e31985f4..91dbd82bf 100755 --- a/SequenceAnalysis/pipeline_code/sequence_tools_install.sh +++ b/SequenceAnalysis/pipeline_code/sequence_tools_install.sh @@ -309,6 +309,10 @@ then unzip gatk-4.6.1.0.zip cp ./gatk-4.6.1.0/gatk-package-4.6.1.0-local.jar $LKTOOLS_DIR/GenomeAnalysisTK4.jar + + wget $WGET_OPTS https://github.com/GenomicsDB/GenomicsDB/releases/download/v1.4.3/consolidate_genomicsdb_array + chmod +x consolidate_genomicsdb_array + cp consolidate_genomicsdb_array $LKTOOLS_DIR/consolidate_genomicsdb_array else echo "Already installed" fi From eca5f07ed65ddf9e331aeada777f6ad8bddd1be2 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 9 Jun 2025 12:45:43 -0700 Subject: [PATCH 50/58] Remove direct access to GenomicsDBImport --- SequenceAnalysis/pipeline_code/sequence_tools_install.sh | 4 ---- .../org/labkey/sequenceanalysis/SequenceAnalysisModule.java | 4 ---- .../run/util/AbstractGenomicsDBImportHandler.java | 2 +- 3 files changed, 1 insertion(+), 9 deletions(-) diff --git a/SequenceAnalysis/pipeline_code/sequence_tools_install.sh b/SequenceAnalysis/pipeline_code/sequence_tools_install.sh index 91dbd82bf..3e31985f4 100755 --- a/SequenceAnalysis/pipeline_code/sequence_tools_install.sh +++ b/SequenceAnalysis/pipeline_code/sequence_tools_install.sh @@ -309,10 +309,6 @@ then unzip gatk-4.6.1.0.zip cp ./gatk-4.6.1.0/gatk-package-4.6.1.0-local.jar $LKTOOLS_DIR/GenomeAnalysisTK4.jar - - wget $WGET_OPTS https://github.com/GenomicsDB/GenomicsDB/releases/download/v1.4.3/consolidate_genomicsdb_array - chmod +x consolidate_genomicsdb_array - cp consolidate_genomicsdb_array $LKTOOLS_DIR/consolidate_genomicsdb_array else echo "Already installed" fi diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java index f10c1c9ec..7a10338fd 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java @@ -158,8 +158,6 @@ import org.labkey.sequenceanalysis.run.reference.VirusReferenceLibraryStep; import org.labkey.sequenceanalysis.run.util.CombineGVCFsHandler; import org.labkey.sequenceanalysis.run.util.FastqcRunner; -import org.labkey.sequenceanalysis.run.util.GenomicsDBAppendHandler; -import org.labkey.sequenceanalysis.run.util.GenomicsDBImportHandler; import org.labkey.sequenceanalysis.run.util.SVAnnotateStep; import org.labkey.sequenceanalysis.run.variant.DepthOfCoverageHandler; import org.labkey.sequenceanalysis.run.variant.GenotypeConcordanceStep; @@ -397,8 +395,6 @@ public static void registerPipelineSteps() SequenceAnalysisService.get().registerFileHandler(new RecalculateSequenceMetricsHandler()); SequenceAnalysisService.get().registerFileHandler(new ListVcfSamplesHandler()); SequenceAnalysisService.get().registerFileHandler(new MultiQCBamHandler()); - SequenceAnalysisService.get().registerFileHandler(new GenomicsDBImportHandler()); - SequenceAnalysisService.get().registerFileHandler(new GenomicsDBAppendHandler()); SequenceAnalysisService.get().registerFileHandler(new MergeLoFreqVcfHandler()); SequenceAnalysisService.get().registerFileHandler(new PangolinHandler()); SequenceAnalysisService.get().registerFileHandler(new NextCladeHandler()); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/AbstractGenomicsDBImportHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/AbstractGenomicsDBImportHandler.java index a44f756c5..3fe1aaf32 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/AbstractGenomicsDBImportHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/AbstractGenomicsDBImportHandler.java @@ -644,7 +644,7 @@ else if (genomeIds.isEmpty()) wrapper.execute(genome, vcfsToProcess, workingDestinationWorkspaceFolder, intervals, options, _append); - if (ctx.getParams().optBoolean("consolidate", true)) + if (ctx.getParams().optBoolean("consolidate", false)) { ctx.getLogger().info("Will consolidate the workspace using consolidate_genomicsdb_array"); doConsolidate(ctx, workingDestinationWorkspaceFolder, genome); From 0611e5bcb91567c4205f8ea2248eaea080cf60a6 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 9 Jun 2025 12:46:14 -0700 Subject: [PATCH 51/58] Add ResultsOOODisplayColumn --- .../study/datasets/datasets_metadata.xml | 7 ++ .../query/ResultsOOODisplayColumn.java | 68 +++++++++++++++++++ 2 files changed, 75 insertions(+) create mode 100644 Studies/src/org/labkey/studies/query/ResultsOOODisplayColumn.java diff --git a/Studies/resources/referenceStudy/study/datasets/datasets_metadata.xml b/Studies/resources/referenceStudy/study/datasets/datasets_metadata.xml index c1bbd3bd1..4db53dd2a 100644 --- a/Studies/resources/referenceStudy/study/datasets/datasets_metadata.xml +++ b/Studies/resources/referenceStudy/study/datasets/datasets_metadata.xml @@ -173,6 +173,10 @@ varchar + true + + org.labkey.studies.query.ResultsOOODisplayColumn + varchar @@ -288,6 +292,9 @@ varchar + + Cohort ID + varchar diff --git a/Studies/src/org/labkey/studies/query/ResultsOOODisplayColumn.java b/Studies/src/org/labkey/studies/query/ResultsOOODisplayColumn.java new file mode 100644 index 000000000..ef2a9923a --- /dev/null +++ b/Studies/src/org/labkey/studies/query/ResultsOOODisplayColumn.java @@ -0,0 +1,68 @@ +package org.labkey.studies.query; + +import org.apache.commons.lang3.StringUtils; +import org.labkey.api.data.ColumnInfo; +import org.labkey.api.data.DataColumn; +import org.labkey.api.data.RenderContext; +import org.labkey.api.query.FieldKey; + +import java.text.DecimalFormat; +import java.util.Set; + +public class ResultsOOODisplayColumn extends DataColumn +{ + public ResultsOOODisplayColumn(ColumnInfo col) + { + super(col); + } + + @Override + public Class getDisplayValueClass() + { + return String.class; + } + + @Override + public Object getDisplayValue(RenderContext ctx) + { + Object result = ctx.get(getBoundColumn().getFieldKey(), Double.class); + if (result == null) + { + return null; + } + + String oor = ctx.get(getOOR(), String.class); + if (StringUtils.isEmpty(oor)) + { + return result; + } + + if (getBoundColumn().getFormat() != null) + { + DecimalFormat fmt = new DecimalFormat(getBoundColumn().getFormat()); + result = fmt.format(result); + } + + return oor + result; + } + + private FieldKey getOOR() + { + FieldKey oor = FieldKey.fromString("resultOOOIndicator"); + if (getBoundColumn() != null) + { + return FieldKey.fromParts(getBoundColumn().getFieldKey().getParent(), oor); + } + else + { + return oor; + } + } + + @Override + public void addQueryFieldKeys(Set keys) + { + super.addQueryFieldKeys(keys); + keys.add(getOOR()); + } +} From e7c6a9b6a9ae0dab31db32cfc5d4eec5d6efa27d Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 9 Jun 2025 16:50:13 -0700 Subject: [PATCH 52/58] Add metadata for many SIV study queries (#333) * Add metadata for many SIV study queries * Add genetics table --- .../queries/study/assignment.query.xml | 28 ++++++++ .../study/demographics.query.xml | 28 ++++++++ .../resources/queries/study/flags.query.xml | 22 +++++++ .../queries/study/genetics.query.xml | 30 +++++++++ .../queries/study/immunizations.query.xml | 33 ++++++++++ .../resources/queries/study/labwork.query.xml | 34 ++++++++++ .../queries/study/procedures.query.xml | 22 +++++++ .../resources/queries/study/samples.query.xml | 31 +++++++++ .../study/studyData.query.xml | 0 .../queries/study/treatments.query.xml | 65 +++++++++++++++++++ .../queries/study/viralloads.query.xml | 40 ++++++++++++ .../{query => queries}/study/weight.query.xml | 6 ++ Studies/resources/query/study/flags.query.xml | 43 ------------ .../resources/query/study/samples.query.xml | 11 ---- .../study/datasets/datasets_manifest.xml | 6 ++ .../study/datasets/datasets_metadata.xml | 55 ++++++++++++++++ 16 files changed, 400 insertions(+), 54 deletions(-) create mode 100644 Studies/resources/queries/study/assignment.query.xml rename Studies/resources/{query => queries}/study/demographics.query.xml (53%) create mode 100644 Studies/resources/queries/study/flags.query.xml create mode 100644 Studies/resources/queries/study/genetics.query.xml create mode 100644 Studies/resources/queries/study/immunizations.query.xml create mode 100644 Studies/resources/queries/study/labwork.query.xml create mode 100644 Studies/resources/queries/study/procedures.query.xml create mode 100644 Studies/resources/queries/study/samples.query.xml rename Studies/resources/{query => queries}/study/studyData.query.xml (100%) create mode 100644 Studies/resources/queries/study/treatments.query.xml create mode 100644 Studies/resources/queries/study/viralloads.query.xml rename Studies/resources/{query => queries}/study/weight.query.xml (67%) delete mode 100644 Studies/resources/query/study/flags.query.xml delete mode 100644 Studies/resources/query/study/samples.query.xml diff --git a/Studies/resources/queries/study/assignment.query.xml b/Studies/resources/queries/study/assignment.query.xml new file mode 100644 index 000000000..4885dec2e --- /dev/null +++ b/Studies/resources/queries/study/assignment.query.xml @@ -0,0 +1,28 @@ + + + + + + + + Date Added + Date + + + End Date + Date + + + Study + + + Sub-Group + + + Category + + +
+
+
+
diff --git a/Studies/resources/query/study/demographics.query.xml b/Studies/resources/queries/study/demographics.query.xml similarity index 53% rename from Studies/resources/query/study/demographics.query.xml rename to Studies/resources/queries/study/demographics.query.xml index 9df49a04a..5b516500b 100644 --- a/Studies/resources/query/study/demographics.query.xml +++ b/Studies/resources/queries/study/demographics.query.xml @@ -28,6 +28,34 @@ query.Id~eq=${Id}
+ + true + true + + + Sex + + + Geographic Origin + + + Birth + + + Death + + + Species + + + Mother + + + Father + + + Status + diff --git a/Studies/resources/queries/study/flags.query.xml b/Studies/resources/queries/study/flags.query.xml new file mode 100644 index 000000000..6e2b23eda --- /dev/null +++ b/Studies/resources/queries/study/flags.query.xml @@ -0,0 +1,22 @@ + + + + + + + + Date Added + Date + + + End Date + Date + + + Flag + + +
+
+
+
diff --git a/Studies/resources/queries/study/genetics.query.xml b/Studies/resources/queries/study/genetics.query.xml new file mode 100644 index 000000000..54b08daef --- /dev/null +++ b/Studies/resources/queries/study/genetics.query.xml @@ -0,0 +1,30 @@ + + + + + + + + Date Added + Date + + + Category + + + Assay Type + + + Marker/Allele + + + Result + + + Score + + +
+
+
+
diff --git a/Studies/resources/queries/study/immunizations.query.xml b/Studies/resources/queries/study/immunizations.query.xml new file mode 100644 index 000000000..833d0fde2 --- /dev/null +++ b/Studies/resources/queries/study/immunizations.query.xml @@ -0,0 +1,33 @@ + + + + + + + + Date Added + Date + + + Category + + + Treatment + + + Route + + + Quantity + + + Quantity Units + + + Reason + + +
+
+
+
\ No newline at end of file diff --git a/Studies/resources/queries/study/labwork.query.xml b/Studies/resources/queries/study/labwork.query.xml new file mode 100644 index 000000000..0e07d77e1 --- /dev/null +++ b/Studies/resources/queries/study/labwork.query.xml @@ -0,0 +1,34 @@ + + + + + + + + Date + Date + + + Category + + + Test + + + Result + + + Units + + + Qualitative Result + true + + + Method + + +
+
+
+
diff --git a/Studies/resources/queries/study/procedures.query.xml b/Studies/resources/queries/study/procedures.query.xml new file mode 100644 index 000000000..b866f8b5f --- /dev/null +++ b/Studies/resources/queries/study/procedures.query.xml @@ -0,0 +1,22 @@ + + + + + + + + + + true + + + Category + + + Procedure + + +
+
+
+
\ No newline at end of file diff --git a/Studies/resources/queries/study/samples.query.xml b/Studies/resources/queries/study/samples.query.xml new file mode 100644 index 000000000..87ac54943 --- /dev/null +++ b/Studies/resources/queries/study/samples.query.xml @@ -0,0 +1,31 @@ + + + + + + + + Date Added + Date + + + Sample ID + + + Sample Type + + + Preservation + + + Quantity + + + Quantity Units + + + +
+
+
+
\ No newline at end of file diff --git a/Studies/resources/query/study/studyData.query.xml b/Studies/resources/queries/study/studyData.query.xml similarity index 100% rename from Studies/resources/query/study/studyData.query.xml rename to Studies/resources/queries/study/studyData.query.xml diff --git a/Studies/resources/queries/study/treatments.query.xml b/Studies/resources/queries/study/treatments.query.xml new file mode 100644 index 000000000..c92ad7063 --- /dev/null +++ b/Studies/resources/queries/study/treatments.query.xml @@ -0,0 +1,65 @@ + + + + + + + + Date + Date + + + End Date + Date + + + Category + + + Treatment + + + Route + + + Frequency + + + Amount + + + Amount Units + + + Volume + true + + + Volume Units + true + + + Concentration + true + + + Conc. Units + true + + + Dosage + true + + + Dosage Units + true + + + Reason + true + + +
+
+
+
diff --git a/Studies/resources/queries/study/viralloads.query.xml b/Studies/resources/queries/study/viralloads.query.xml new file mode 100644 index 000000000..8699aa8f5 --- /dev/null +++ b/Studies/resources/queries/study/viralloads.query.xml @@ -0,0 +1,40 @@ + + + + + + + + Date + Date + + + Sample Type + + + Assay Type + + + target + + + LOD + + + Result + + + Units + + + Qualitative Result + true + + + true + + +
+
+
+
diff --git a/Studies/resources/query/study/weight.query.xml b/Studies/resources/queries/study/weight.query.xml similarity index 67% rename from Studies/resources/query/study/weight.query.xml rename to Studies/resources/queries/study/weight.query.xml index 929c141e3..24e484dd7 100644 --- a/Studies/resources/query/study/weight.query.xml +++ b/Studies/resources/queries/study/weight.query.xml @@ -3,6 +3,12 @@ + + + + + true + Weight (kg) 0.#### diff --git a/Studies/resources/query/study/flags.query.xml b/Studies/resources/query/study/flags.query.xml deleted file mode 100644 index e8d0348bf..000000000 --- a/Studies/resources/query/study/flags.query.xml +++ /dev/null @@ -1,43 +0,0 @@ - - - -
- - - - Date Added - Date - - - Date Removed - false - Date - - - - - - - - - - - - - - - - - - - - - - Value - true - - -
-
-
- diff --git a/Studies/resources/query/study/samples.query.xml b/Studies/resources/query/study/samples.query.xml deleted file mode 100644 index b3f9fa380..000000000 --- a/Studies/resources/query/study/samples.query.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - - -
-
-
-
\ No newline at end of file diff --git a/Studies/resources/referenceStudy/study/datasets/datasets_manifest.xml b/Studies/resources/referenceStudy/study/datasets/datasets_manifest.xml index de46c5955..346bd0300 100644 --- a/Studies/resources/referenceStudy/study/datasets/datasets_manifest.xml +++ b/Studies/resources/referenceStudy/study/datasets/datasets_manifest.xml @@ -31,5 +31,11 @@ + + + + + + diff --git a/Studies/resources/referenceStudy/study/datasets/datasets_metadata.xml b/Studies/resources/referenceStudy/study/datasets/datasets_metadata.xml index 4db53dd2a..d536f5c76 100644 --- a/Studies/resources/referenceStudy/study/datasets/datasets_metadata.xml +++ b/Studies/resources/referenceStudy/study/datasets/datasets_metadata.xml @@ -334,4 +334,59 @@ Samples + + + + varchar + http://cpas.labkey.com/Study#ParticipantId + + + timestamp + http://cpas.labkey.com/laboratory#sampleDate + + + entityid + true + + + varchar + + + varchar + + + varchar + + + varchar + + + double + + + Genetic Data +
+ + + + varchar + http://cpas.labkey.com/Study#ParticipantId + + + timestamp + http://cpas.labkey.com/laboratory#sampleDate + + + entityid + true + + + varchar + + + varchar + + + Procedures +
From a83bb06420a8aa9c5bc262089b9033518b07c9d1 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 9 Jun 2025 17:25:58 -0700 Subject: [PATCH 53/58] Correct XML --- Studies/resources/queries/study/assignment.query.xml | 3 +++ .../referenceStudy/study/datasets/datasets_metadata.xml | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Studies/resources/queries/study/assignment.query.xml b/Studies/resources/queries/study/assignment.query.xml index 4885dec2e..05d40e8ca 100644 --- a/Studies/resources/queries/study/assignment.query.xml +++ b/Studies/resources/queries/study/assignment.query.xml @@ -18,6 +18,9 @@ Sub-Group + + Cohort ID + Category diff --git a/Studies/resources/referenceStudy/study/datasets/datasets_metadata.xml b/Studies/resources/referenceStudy/study/datasets/datasets_metadata.xml index d536f5c76..ecd9b819b 100644 --- a/Studies/resources/referenceStudy/study/datasets/datasets_metadata.xml +++ b/Studies/resources/referenceStudy/study/datasets/datasets_metadata.xml @@ -293,7 +293,7 @@ varchar - Cohort ID + varchar varchar From 1edf565389dcce5f0358078235cd13876fca6dfa Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 9 Jun 2025 17:31:32 -0700 Subject: [PATCH 54/58] More query XML updates --- Studies/resources/queries/study/demographics.query.xml | 2 ++ Studies/resources/queries/study/labwork.query.xml | 1 + Studies/resources/queries/study/studyData.query.xml | 1 + Studies/resources/queries/study/treatments.query.xml | 9 ++++++++- Studies/resources/queries/study/viralloads.query.xml | 2 ++ 5 files changed, 14 insertions(+), 1 deletion(-) diff --git a/Studies/resources/queries/study/demographics.query.xml b/Studies/resources/queries/study/demographics.query.xml index 5b516500b..ac7429eae 100644 --- a/Studies/resources/queries/study/demographics.query.xml +++ b/Studies/resources/queries/study/demographics.query.xml @@ -9,6 +9,7 @@ true + false Date @@ -30,6 +31,7 @@ true + false true diff --git a/Studies/resources/queries/study/labwork.query.xml b/Studies/resources/queries/study/labwork.query.xml index 0e07d77e1..99e85a14a 100644 --- a/Studies/resources/queries/study/labwork.query.xml +++ b/Studies/resources/queries/study/labwork.query.xml @@ -23,6 +23,7 @@ Qualitative Result true + false Method diff --git a/Studies/resources/queries/study/studyData.query.xml b/Studies/resources/queries/study/studyData.query.xml index 008e71bee..6c668a896 100644 --- a/Studies/resources/queries/study/studyData.query.xml +++ b/Studies/resources/queries/study/studyData.query.xml @@ -16,6 +16,7 @@ Key true + false diff --git a/Studies/resources/queries/study/treatments.query.xml b/Studies/resources/queries/study/treatments.query.xml index c92ad7063..1a5e82c98 100644 --- a/Studies/resources/queries/study/treatments.query.xml +++ b/Studies/resources/queries/study/treatments.query.xml @@ -27,36 +27,43 @@ Amount - + Amount Units Volume true + false Volume Units true + false Concentration true + false Conc. Units true + false Dosage true + false Dosage Units true + false Reason true + false diff --git a/Studies/resources/queries/study/viralloads.query.xml b/Studies/resources/queries/study/viralloads.query.xml index 8699aa8f5..ef00f0e25 100644 --- a/Studies/resources/queries/study/viralloads.query.xml +++ b/Studies/resources/queries/study/viralloads.query.xml @@ -29,9 +29,11 @@ Qualitative Result true + false true + false From 1061b1503bf8041b0d1bb5c7bd3c9fa6d1456bad Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 10 Jun 2025 10:35:51 -0700 Subject: [PATCH 55/58] Refactor code into discrete SivStudies module --- .../labkey/api/studies/StudiesService.java | 3 + .../security/StudiesDataAdminPermission.java | 2 +- .../security/StudiesDataAdminRole.java | 2 +- Studies/resources/data/amount_units.tsv | 11 - .../data/calculated_status_codes.tsv | 6 - Studies/resources/data/conc_units.tsv | 12 - Studies/resources/data/dosage_units.tsv | 11 - Studies/resources/data/gender_codes.tsv | 4 - Studies/resources/data/geographic_origins.tsv | 4 - Studies/resources/data/lookup_sets.tsv | 9 - Studies/resources/data/reports.tsv | 12 - Studies/resources/data/routes.tsv | 24 -- Studies/resources/data/species.tsv | 10 - Studies/resources/data/volume_units.tsv | 13 - .../folderTypes/Studies.folderType.xml | 78 ---- .../queries/study/assignment.query.xml | 31 -- .../queries/study/demographics.query.xml | 65 --- .../resources/queries/study/flags.query.xml | 22 - .../queries/study/genetics.query.xml | 30 -- .../queries/study/immunizations.query.xml | 33 -- .../resources/queries/study/labwork.query.xml | 35 -- .../queries/study/procedures.query.xml | 22 - .../resources/queries/study/samples.query.xml | 31 -- .../queries/study/studyData.query.xml | 25 -- .../queries/study/treatments.query.xml | 72 ---- .../queries/study/viralloads.query.xml | 42 -- .../resources/queries/study/weight.query.xml | 20 - Studies/resources/referenceStudy/folder.xml | 4 - .../study/datasets/Studies.dataset | 19 - .../study/datasets/datasets_manifest.xml | 41 -- .../study/datasets/datasets_metadata.xml | 392 ------------------ .../resources/referenceStudy/study/study.xml | 9 - .../referenceStudy/study/studyPolicy.xml | 10 - Studies/resources/views/studiesAdmin.html | 33 -- Studies/resources/views/studiesAdmin.view.xml | 8 - .../resources/views/studiesAdmin.webpart.xml | 6 - .../org/labkey/studies/StudiesController.java | 79 ---- .../src/org/labkey/studies/StudiesModule.java | 2 +- .../labkey/studies/StudiesServiceImpl.java | 39 ++ .../studies/query/StudiesCustomizer.java | 22 - .../studies/query/StudiesUserSchema.java | 2 +- 41 files changed, 46 insertions(+), 1249 deletions(-) rename Studies/{src/org/labkey => api-src/org/labkey/api}/studies/security/StudiesDataAdminPermission.java (96%) rename Studies/{src/org/labkey => api-src/org/labkey/api}/studies/security/StudiesDataAdminRole.java (94%) delete mode 100644 Studies/resources/data/amount_units.tsv delete mode 100644 Studies/resources/data/calculated_status_codes.tsv delete mode 100644 Studies/resources/data/conc_units.tsv delete mode 100644 Studies/resources/data/dosage_units.tsv delete mode 100644 Studies/resources/data/gender_codes.tsv delete mode 100644 Studies/resources/data/geographic_origins.tsv delete mode 100644 Studies/resources/data/lookup_sets.tsv delete mode 100644 Studies/resources/data/reports.tsv delete mode 100644 Studies/resources/data/routes.tsv delete mode 100644 Studies/resources/data/species.tsv delete mode 100644 Studies/resources/data/volume_units.tsv delete mode 100644 Studies/resources/folderTypes/Studies.folderType.xml delete mode 100644 Studies/resources/queries/study/assignment.query.xml delete mode 100644 Studies/resources/queries/study/demographics.query.xml delete mode 100644 Studies/resources/queries/study/flags.query.xml delete mode 100644 Studies/resources/queries/study/genetics.query.xml delete mode 100644 Studies/resources/queries/study/immunizations.query.xml delete mode 100644 Studies/resources/queries/study/labwork.query.xml delete mode 100644 Studies/resources/queries/study/procedures.query.xml delete mode 100644 Studies/resources/queries/study/samples.query.xml delete mode 100644 Studies/resources/queries/study/studyData.query.xml delete mode 100644 Studies/resources/queries/study/treatments.query.xml delete mode 100644 Studies/resources/queries/study/viralloads.query.xml delete mode 100644 Studies/resources/queries/study/weight.query.xml delete mode 100644 Studies/resources/referenceStudy/folder.xml delete mode 100644 Studies/resources/referenceStudy/study/datasets/Studies.dataset delete mode 100644 Studies/resources/referenceStudy/study/datasets/datasets_manifest.xml delete mode 100644 Studies/resources/referenceStudy/study/datasets/datasets_metadata.xml delete mode 100644 Studies/resources/referenceStudy/study/study.xml delete mode 100644 Studies/resources/referenceStudy/study/studyPolicy.xml delete mode 100644 Studies/resources/views/studiesAdmin.html delete mode 100644 Studies/resources/views/studiesAdmin.view.xml delete mode 100644 Studies/resources/views/studiesAdmin.webpart.xml delete mode 100644 Studies/src/org/labkey/studies/query/StudiesCustomizer.java diff --git a/Studies/api-src/org/labkey/api/studies/StudiesService.java b/Studies/api-src/org/labkey/api/studies/StudiesService.java index 6ddb36fc0..249137680 100644 --- a/Studies/api-src/org/labkey/api/studies/StudiesService.java +++ b/Studies/api-src/org/labkey/api/studies/StudiesService.java @@ -2,6 +2,7 @@ import org.labkey.api.data.Container; import org.labkey.api.module.Module; +import org.labkey.api.resource.Resource; import org.labkey.api.security.User; import org.labkey.api.util.Path; @@ -25,4 +26,6 @@ static public void setInstance(StudiesService instance) } abstract public void importFolderDefinition(Container container, User user, Module m, Path sourceFolderDirPath) throws IOException; + + abstract public void loadTsv(Resource tsv, String schemaName, User u, Container c); } diff --git a/Studies/src/org/labkey/studies/security/StudiesDataAdminPermission.java b/Studies/api-src/org/labkey/api/studies/security/StudiesDataAdminPermission.java similarity index 96% rename from Studies/src/org/labkey/studies/security/StudiesDataAdminPermission.java rename to Studies/api-src/org/labkey/api/studies/security/StudiesDataAdminPermission.java index fc259b15e..719944c7a 100644 --- a/Studies/src/org/labkey/studies/security/StudiesDataAdminPermission.java +++ b/Studies/api-src/org/labkey/api/studies/security/StudiesDataAdminPermission.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.labkey.studies.security; +package org.labkey.api.studies.security; import org.labkey.api.security.permissions.AbstractPermission; diff --git a/Studies/src/org/labkey/studies/security/StudiesDataAdminRole.java b/Studies/api-src/org/labkey/api/studies/security/StudiesDataAdminRole.java similarity index 94% rename from Studies/src/org/labkey/studies/security/StudiesDataAdminRole.java rename to Studies/api-src/org/labkey/api/studies/security/StudiesDataAdminRole.java index a5a4261e5..3b3b0ea13 100644 --- a/Studies/src/org/labkey/studies/security/StudiesDataAdminRole.java +++ b/Studies/api-src/org/labkey/api/studies/security/StudiesDataAdminRole.java @@ -1,4 +1,4 @@ -package org.labkey.studies.security; +package org.labkey.api.studies.security; import org.jetbrains.annotations.NotNull; import org.labkey.api.security.permissions.DeletePermission; diff --git a/Studies/resources/data/amount_units.tsv b/Studies/resources/data/amount_units.tsv deleted file mode 100644 index b4b010264..000000000 --- a/Studies/resources/data/amount_units.tsv +++ /dev/null @@ -1,11 +0,0 @@ -value -g -IU -U -mEq -mg -no units -ug -units -L/min -% \ No newline at end of file diff --git a/Studies/resources/data/calculated_status_codes.tsv b/Studies/resources/data/calculated_status_codes.tsv deleted file mode 100644 index d2bf1dec4..000000000 --- a/Studies/resources/data/calculated_status_codes.tsv +++ /dev/null @@ -1,6 +0,0 @@ -value -Alive -Dead -No Record -Shipped -Unknown \ No newline at end of file diff --git a/Studies/resources/data/conc_units.tsv b/Studies/resources/data/conc_units.tsv deleted file mode 100644 index d7d9b4d18..000000000 --- a/Studies/resources/data/conc_units.tsv +++ /dev/null @@ -1,12 +0,0 @@ -value denominator numerator -g/ml mL g -g/tsp tsp g -IU/ml mL IU -mEq/ml mL mEq -mg/capsule capsule(s) mg -mg/ml mL mg -mg/piece piece(s) mg -mg/tablet tablet(s) mg -mg/tsp tsp mg -ug/ml mL ug -units/ml mL units \ No newline at end of file diff --git a/Studies/resources/data/dosage_units.tsv b/Studies/resources/data/dosage_units.tsv deleted file mode 100644 index eb8de1491..000000000 --- a/Studies/resources/data/dosage_units.tsv +++ /dev/null @@ -1,11 +0,0 @@ -value numerator denominator -g/kg g kg -IU/kg IU kg -mEq/kg mEq kg -mg/animal mg -mg/kg mg kg -ml/kg ml kg -no units -ounces/kg ounces kg -ug/kg ug kg -units/kg units kg \ No newline at end of file diff --git a/Studies/resources/data/gender_codes.tsv b/Studies/resources/data/gender_codes.tsv deleted file mode 100644 index 07b681950..000000000 --- a/Studies/resources/data/gender_codes.tsv +++ /dev/null @@ -1,4 +0,0 @@ -v meaning origgender -f Female f -m Male m -u Unknown \ No newline at end of file diff --git a/Studies/resources/data/geographic_origins.tsv b/Studies/resources/data/geographic_origins.tsv deleted file mode 100644 index 0870abf80..000000000 --- a/Studies/resources/data/geographic_origins.tsv +++ /dev/null @@ -1,4 +0,0 @@ -value -Indian -Chinese -Hybrid \ No newline at end of file diff --git a/Studies/resources/data/lookup_sets.tsv b/Studies/resources/data/lookup_sets.tsv deleted file mode 100644 index 8e2b94822..000000000 --- a/Studies/resources/data/lookup_sets.tsv +++ /dev/null @@ -1,9 +0,0 @@ -setname label keyfield titleColumn -amount_units Amount Units unit -calculated_status_codes Calculated Status Codes code -conc_units Concentraiton Units unit -dosage_units Dosage Units unit -gender_codes Gender Codes -geographic_origins Geographic Origins origin -routes Routes route -volume_units Volume Units unit \ No newline at end of file diff --git a/Studies/resources/data/reports.tsv b/Studies/resources/data/reports.tsv deleted file mode 100644 index 4dd48af51..000000000 --- a/Studies/resources/data/reports.tsv +++ /dev/null @@ -1,12 +0,0 @@ -reportname category reporttype reporttitle visible containerpath schemaname queryname viewname report datefieldname todayonly queryhaslocation sort_order QCStateLabelFieldName description -activeAssignments Assignments and Groups query Active Assignments true study Assignment Active Assignments date false false qcstate/publicdata This report shows the active assignments for each animal -assignmentHistory Assignments and Groups query Assignment History true study Assignment date false false qcstate/publicdata This report shows all assignments records for the animals -activeGroups Assignments and Groups query Active Groups true study animal_group_members Active Members date false false qcstate/publicdata This report shows the active assignments for each animal -groupHistory Assignments and Groups query Group History true study animal_group_members date false false qcstate/publicdata This report shows all assignments records for the animals -microbiology Lab Results query Microbiology true study Microbiology Results date false false qcstate/publicdata -biochemistry Lab Results js Biochemistry true study bloodChemistry date false false Contains results of chemistry panels. Can be displayed either by panel, or showing reference ranges -clinPathRuns Lab Results query Lab Runs true study Clinpath Runs date false false qcstate/publicdata Contains all clinpath requests -iStat Lab Results js iStat true study iStat date false false qcstate/publicdata Contains iStat results -hematology Lab Results js Hematology true study hematology date false false Contains hematology data showing cell subsets -parasitology Lab Results query Parasitology true study Parasitology Results date false false qcstate/publicdata Contains results of parasitology testing -urinalysis Lab Results js Urinalysis true study urinalysisResults date false false Contains urinalysis results diff --git a/Studies/resources/data/routes.tsv b/Studies/resources/data/routes.tsv deleted file mode 100644 index b0e1f0d80..000000000 --- a/Studies/resources/data/routes.tsv +++ /dev/null @@ -1,24 +0,0 @@ -value title -IM -intracardiac -intracarotid -intracorneal Intracorneal -intracranial -IP intraperitoneal -ID -INH -IT -IV -CRI -IVAG -oral -PO -rectal -Spillage -SQ -OU -OD -OS -topical -topical (eye) -topical (skin) \ No newline at end of file diff --git a/Studies/resources/data/species.tsv b/Studies/resources/data/species.tsv deleted file mode 100644 index 0e063c4cf..000000000 --- a/Studies/resources/data/species.tsv +++ /dev/null @@ -1,10 +0,0 @@ -common scientific_name id_prefix mhc_prefix blood_per_kg max_draw_pct blood_draw_interval cites_code dateDisabled -Baboon 60 0.2 30 -Cotton-top Tamarin Saguinus oedipus so Saoe 60 0.2 30 -Cynomolgus Macaca fascicularis cy Mafa 60 0.2 30 -Marmoset Callithrix jacchus cj Caja 60 0.15 30 -Pigtail Macaca Nemestrina Mane 60 0.2 30 -Rhesus Macaca mulatta r|rh Mamu 60 0.2 30 -Sooty Mangabey Cercocebus atys Ceat 60 0.2 30 -Stump Tailed Macaca Arctoides Maar 60 0.2 30 -Vervet Chlorocebus sabaeus ag Chsa 60 0.2 30 diff --git a/Studies/resources/data/volume_units.tsv b/Studies/resources/data/volume_units.tsv deleted file mode 100644 index fc2c76822..000000000 --- a/Studies/resources/data/volume_units.tsv +++ /dev/null @@ -1,13 +0,0 @@ -value -capsule(s) -cup -drop(s) -cube -mL -mL/hr -no units -ounce(s) -pan -piece(s) -tablet(s) -tsp \ No newline at end of file diff --git a/Studies/resources/folderTypes/Studies.folderType.xml b/Studies/resources/folderTypes/Studies.folderType.xml deleted file mode 100644 index 749c8d8f0..000000000 --- a/Studies/resources/folderTypes/Studies.folderType.xml +++ /dev/null @@ -1,78 +0,0 @@ - - Studies Overview - The default folder layout for Studies - - - - - - - - - - - - - - - - - datasets - Datasets - - - datasets - - - - - - - Datasets - body - - - - - - - - - - - - - - - - - - - - - - admin - Admin - - - - - - - - - Studies Admin - body - - - - - - - - - studies - - studies - true - \ No newline at end of file diff --git a/Studies/resources/queries/study/assignment.query.xml b/Studies/resources/queries/study/assignment.query.xml deleted file mode 100644 index 05d40e8ca..000000000 --- a/Studies/resources/queries/study/assignment.query.xml +++ /dev/null @@ -1,31 +0,0 @@ - - - - - - - - Date Added - Date - - - End Date - Date - - - Study - - - Sub-Group - - - Cohort ID - - - Category - - -
-
-
-
diff --git a/Studies/resources/queries/study/demographics.query.xml b/Studies/resources/queries/study/demographics.query.xml deleted file mode 100644 index ac7429eae..000000000 --- a/Studies/resources/queries/study/demographics.query.xml +++ /dev/null @@ -1,65 +0,0 @@ - - - - - - - - - - - true - false - - - Date - Birth - /query/executeQuery.view? - schemaName=study& - query.queryName=Birth& - query.Id~eq=${Id} - - - - Date - Death - /query/executeQuery.view? - schemaName=study& - query.queryName=Deaths& - query.Id~eq=${Id} - - - - true - false - true - - - Sex - - - Geographic Origin - - - Birth - - - Death - - - Species - - - Mother - - - Father - - - Status - - -
-
-
-
\ No newline at end of file diff --git a/Studies/resources/queries/study/flags.query.xml b/Studies/resources/queries/study/flags.query.xml deleted file mode 100644 index 6e2b23eda..000000000 --- a/Studies/resources/queries/study/flags.query.xml +++ /dev/null @@ -1,22 +0,0 @@ - - - - - - - - Date Added - Date - - - End Date - Date - - - Flag - - -
-
-
-
diff --git a/Studies/resources/queries/study/genetics.query.xml b/Studies/resources/queries/study/genetics.query.xml deleted file mode 100644 index 54b08daef..000000000 --- a/Studies/resources/queries/study/genetics.query.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - - - - - - Date Added - Date - - - Category - - - Assay Type - - - Marker/Allele - - - Result - - - Score - - -
-
-
-
diff --git a/Studies/resources/queries/study/immunizations.query.xml b/Studies/resources/queries/study/immunizations.query.xml deleted file mode 100644 index 833d0fde2..000000000 --- a/Studies/resources/queries/study/immunizations.query.xml +++ /dev/null @@ -1,33 +0,0 @@ - - - - - - - - Date Added - Date - - - Category - - - Treatment - - - Route - - - Quantity - - - Quantity Units - - - Reason - - -
-
-
-
\ No newline at end of file diff --git a/Studies/resources/queries/study/labwork.query.xml b/Studies/resources/queries/study/labwork.query.xml deleted file mode 100644 index 99e85a14a..000000000 --- a/Studies/resources/queries/study/labwork.query.xml +++ /dev/null @@ -1,35 +0,0 @@ - - - - - - - - Date - Date - - - Category - - - Test - - - Result - - - Units - - - Qualitative Result - true - false - - - Method - - -
-
-
-
diff --git a/Studies/resources/queries/study/procedures.query.xml b/Studies/resources/queries/study/procedures.query.xml deleted file mode 100644 index b866f8b5f..000000000 --- a/Studies/resources/queries/study/procedures.query.xml +++ /dev/null @@ -1,22 +0,0 @@ - - - - - - - - - - true - - - Category - - - Procedure - - -
-
-
-
\ No newline at end of file diff --git a/Studies/resources/queries/study/samples.query.xml b/Studies/resources/queries/study/samples.query.xml deleted file mode 100644 index 87ac54943..000000000 --- a/Studies/resources/queries/study/samples.query.xml +++ /dev/null @@ -1,31 +0,0 @@ - - - - - - - - Date Added - Date - - - Sample ID - - - Sample Type - - - Preservation - - - Quantity - - - Quantity Units - - - -
-
-
-
\ No newline at end of file diff --git a/Studies/resources/queries/study/studyData.query.xml b/Studies/resources/queries/study/studyData.query.xml deleted file mode 100644 index 6c668a896..000000000 --- a/Studies/resources/queries/study/studyData.query.xml +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - - - - - - Date - - - End Date - - - Key - true - false - - -
-
-
-
\ No newline at end of file diff --git a/Studies/resources/queries/study/treatments.query.xml b/Studies/resources/queries/study/treatments.query.xml deleted file mode 100644 index 1a5e82c98..000000000 --- a/Studies/resources/queries/study/treatments.query.xml +++ /dev/null @@ -1,72 +0,0 @@ - - - - - - - - Date - Date - - - End Date - Date - - - Category - - - Treatment - - - Route - - - Frequency - - - Amount - - - Amount Units - - - Volume - true - false - - - Volume Units - true - false - - - Concentration - true - false - - - Conc. Units - true - false - - - Dosage - true - false - - - Dosage Units - true - false - - - Reason - true - false - - -
-
-
-
diff --git a/Studies/resources/queries/study/viralloads.query.xml b/Studies/resources/queries/study/viralloads.query.xml deleted file mode 100644 index ef00f0e25..000000000 --- a/Studies/resources/queries/study/viralloads.query.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - - - - - - Date - Date - - - Sample Type - - - Assay Type - - - target - - - LOD - - - Result - - - Units - - - Qualitative Result - true - false - - - true - false - - -
-
-
-
diff --git a/Studies/resources/queries/study/weight.query.xml b/Studies/resources/queries/study/weight.query.xml deleted file mode 100644 index 24e484dd7..000000000 --- a/Studies/resources/queries/study/weight.query.xml +++ /dev/null @@ -1,20 +0,0 @@ - - - - - - - - - - true - - - Weight (kg) - 0.#### - - -
-
-
-
\ No newline at end of file diff --git a/Studies/resources/referenceStudy/folder.xml b/Studies/resources/referenceStudy/folder.xml deleted file mode 100644 index e3acbb155..000000000 --- a/Studies/resources/referenceStudy/folder.xml +++ /dev/null @@ -1,4 +0,0 @@ - - - - \ No newline at end of file diff --git a/Studies/resources/referenceStudy/study/datasets/Studies.dataset b/Studies/resources/referenceStudy/study/datasets/Studies.dataset deleted file mode 100644 index 8e5970288..000000000 --- a/Studies/resources/referenceStudy/study/datasets/Studies.dataset +++ /dev/null @@ -1,19 +0,0 @@ -# default group can be used to avoid repeating definitions for each dataset -# -# action=[REPLACE,APPEND,DELETE] (default:REPLACE) -# deleteAfterImport=[TRUE|FALSE] (default:FALSE) - -default.action=REPLACE -default.deleteAfterImport=FALSE - -# map a source tsv column (right side) to a property name or full propertyURI (left) -# predefined properties: ParticipantId, SiteId, VisitId, Created -default.property.ParticipantId=ptid -default.property.Created=dfcreate - -# use to map from filename->datasetid -# NOTE: if there are NO explicit import definitions, we will try to import all files matching pattern -# NOTE: if there are ANY explicit mapping, we will only import listed datasets - -default.filePattern=dataset(\\d*).tsv -default.importAllMatches=TRUE diff --git a/Studies/resources/referenceStudy/study/datasets/datasets_manifest.xml b/Studies/resources/referenceStudy/study/datasets/datasets_manifest.xml deleted file mode 100644 index 346bd0300..000000000 --- a/Studies/resources/referenceStudy/study/datasets/datasets_manifest.xml +++ /dev/null @@ -1,41 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/Studies/resources/referenceStudy/study/datasets/datasets_metadata.xml b/Studies/resources/referenceStudy/study/datasets/datasets_metadata.xml deleted file mode 100644 index ecd9b819b..000000000 --- a/Studies/resources/referenceStudy/study/datasets/datasets_metadata.xml +++ /dev/null @@ -1,392 +0,0 @@ - - - - - - varchar - http://cpas.labkey.com/Study#ParticipantId - - - timestamp - http://cpas.labkey.com/laboratory#sampleDate - - - entityid - true - - - - timestamp - - - varchar - - - Flags/Misc Information -
- - - - varchar - http://cpas.labkey.com/Study#ParticipantId - - - timestamp - http://cpas.labkey.com/laboratory#sampleDate - - - entityid - true - - - - timestamp - - - varchar - - - varchar - - - double - - - varchar - - - double - - - varchar - - - double - - - varchar - - - varchar - - - integer - - - double - - - varchar - - - varchar - - - Medications/Treatments -
- - - - varchar - http://cpas.labkey.com/Study#ParticipantId - - - timestamp - http://cpas.labkey.com/laboratory#sampleDate - - - entityid - true - - - - varchar - - - varchar - - - varchar - - - double - - - varchar - - - varchar - - - Immunizations -
- - - - varchar - http://cpas.labkey.com/Study#ParticipantId - - - timestamp - http://cpas.labkey.com/laboratory#sampleDate - - - entityid - true - - - - double - - - Weight -
- - - - varchar - http://cpas.labkey.com/Study#ParticipantId - - - timestamp - http://cpas.labkey.com/laboratory#sampleDate - - - entityid - true - - - - varchar - - - varchar - - - varchar - - - double - - - double - - - varchar - true - - org.labkey.studies.query.ResultsOOODisplayColumn - - - - varchar - - - varchar - - - Viral Loads -
- - - - varchar - http://cpas.labkey.com/Study#ParticipantId - - - timestamp - http://cpas.labkey.com/laboratory#sampleDate - - - entityid - true - - - - varchar - - - double - - - varchar - - - varchar - - - varchar - - - varchar - - - Lab Results -
- - - - varchar - http://cpas.labkey.com/Study#ParticipantId - - - timestamp - http://cpas.labkey.com/laboratory#sampleDate - - - entityid - false - - - - varchar - - - varchar - - - timestamp - - - timestamp - - - varchar - - - varchar - - - varchar - - - varchar - - - Demographics -
- - - - varchar - http://cpas.labkey.com/Study#ParticipantId - - - timestamp - http://cpas.labkey.com/laboratory#sampleDate - - - entityid - true - - - - timestamp - - - varchar - - - varchar - - - varchar - - - varchar - - - varchar - - - Project Assignment -
- - - - varchar - http://cpas.labkey.com/Study#ParticipantId - - - timestamp - http://cpas.labkey.com/laboratory#sampleDate - - - entityid - true - - - - varchar - - - varchar - - - varchar - - - double - - - varchar - - - Samples -
- - - - varchar - http://cpas.labkey.com/Study#ParticipantId - - - timestamp - http://cpas.labkey.com/laboratory#sampleDate - - - entityid - true - - - varchar - - - varchar - - - varchar - - - varchar - - - double - - - Genetic Data -
- - - - varchar - http://cpas.labkey.com/Study#ParticipantId - - - timestamp - http://cpas.labkey.com/laboratory#sampleDate - - - entityid - true - - - varchar - - - varchar - - - Procedures -
-
diff --git a/Studies/resources/referenceStudy/study/study.xml b/Studies/resources/referenceStudy/study/study.xml deleted file mode 100644 index 0acc6d7dc..000000000 --- a/Studies/resources/referenceStudy/study/study.xml +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - diff --git a/Studies/resources/referenceStudy/study/studyPolicy.xml b/Studies/resources/referenceStudy/study/studyPolicy.xml deleted file mode 100644 index 3755d25b8..000000000 --- a/Studies/resources/referenceStudy/study/studyPolicy.xml +++ /dev/null @@ -1,10 +0,0 @@ - - - BASIC_WRITE - - - - - - - \ No newline at end of file diff --git a/Studies/resources/views/studiesAdmin.html b/Studies/resources/views/studiesAdmin.html deleted file mode 100644 index c4a455ab3..000000000 --- a/Studies/resources/views/studiesAdmin.html +++ /dev/null @@ -1,33 +0,0 @@ - \ No newline at end of file diff --git a/Studies/resources/views/studiesAdmin.view.xml b/Studies/resources/views/studiesAdmin.view.xml deleted file mode 100644 index 7789b6b5c..000000000 --- a/Studies/resources/views/studiesAdmin.view.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/Studies/resources/views/studiesAdmin.webpart.xml b/Studies/resources/views/studiesAdmin.webpart.xml deleted file mode 100644 index e44bcd6a3..000000000 --- a/Studies/resources/views/studiesAdmin.webpart.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/Studies/src/org/labkey/studies/StudiesController.java b/Studies/src/org/labkey/studies/StudiesController.java index a0b634567..58d6b51dd 100644 --- a/Studies/src/org/labkey/studies/StudiesController.java +++ b/Studies/src/org/labkey/studies/StudiesController.java @@ -45,83 +45,4 @@ public StudiesController() { setActionResolver(_actionResolver); } - - @RequiresPermission(AdminPermission.class) - public static class ImportStudyAction extends ConfirmAction - { - @Override - public ModelAndView getConfirmView(Object o, BindException errors) throws Exception - { - setTitle("Import Study"); - - return new HtmlView(HtmlString.unsafe("This will import the default study in this folder, and truncate/load ancillary data. Do you want to continue?")); - } - - @Override - public boolean handlePost(Object o, BindException errors) throws Exception - { - StudiesService.get().importFolderDefinition(getContainer(), getUser(), ModuleLoader.getInstance().getModule(StudiesModule.NAME), new Path("referenceStudy")); - - StudiesModule m = ModuleLoader.getInstance().getModule(StudiesModule.class); - loadTsv(m.getModuleResource("data/lookup_sets.tsv"), StudiesSchema.NAME); - - Resource r = m.getModuleResource("data"); - r.list().forEach(tsv -> { - if ("lookup_sets.tsv".equals(tsv.getName())) - { - return; - } - - String schemaName = switch (tsv.getName()) - { - case "reports.tsv" -> "laboratory"; - case "species.tsv" -> "laboratory"; - default -> StudiesSchema.NAME; - }; - - loadTsv(tsv, schemaName); - }); - - return true; - } - - private void loadTsv(Resource tsv, String schemaName) - { - try (DataLoader loader = DataLoader.get().createLoader(tsv, true, null, TabLoader.TSV_FILE_TYPE)) - { - TableInfo ti = QueryService.get().getUserSchema(getUser(), getContainer(), schemaName).getTable(FileUtil.getBaseName(tsv.getName())); - if (ti == null) - { - throw new IllegalStateException("Missing table: " + tsv.getName()); - } - - List> rows = loader.load(); - - QueryUpdateService qus = ti.getUpdateService(); - qus.setBulkLoad(true); - - qus.truncateRows(getUser(), getContainer(), null, null); - qus.insertRows(getUser(), getContainer(), rows, new BatchValidationException(), null, null); - } - catch (IOException | SQLException | BatchValidationException | QueryUpdateServiceException | DuplicateKeyException e) - { - _log.error("Error populating TSV", e); - - throw new RuntimeException(e); - } - } - - @Override - public void validateCommand(Object o, Errors errors) - { - - } - - @NotNull - @Override - public URLHelper getSuccessURL(Object o) - { - return PageFlowUtil.urlProvider(PipelineUrls.class).urlBegin(getContainer()); - } - } } diff --git a/Studies/src/org/labkey/studies/StudiesModule.java b/Studies/src/org/labkey/studies/StudiesModule.java index 9fd15fb9a..abb00167c 100644 --- a/Studies/src/org/labkey/studies/StudiesModule.java +++ b/Studies/src/org/labkey/studies/StudiesModule.java @@ -11,7 +11,7 @@ import org.labkey.api.security.roles.RoleManager; import org.labkey.api.studies.StudiesService; import org.labkey.studies.query.StudiesUserSchema; -import org.labkey.studies.security.StudiesDataAdminRole; +import org.labkey.api.studies.security.StudiesDataAdminRole; import java.util.Collection; import java.util.Collections; diff --git a/Studies/src/org/labkey/studies/StudiesServiceImpl.java b/Studies/src/org/labkey/studies/StudiesServiceImpl.java index 1768e2450..9edb5f9a9 100644 --- a/Studies/src/org/labkey/studies/StudiesServiceImpl.java +++ b/Studies/src/org/labkey/studies/StudiesServiceImpl.java @@ -3,9 +3,17 @@ import org.apache.logging.log4j.Logger; import org.labkey.api.admin.ImportOptions; import org.labkey.api.data.Container; +import org.labkey.api.data.TableInfo; import org.labkey.api.module.Module; import org.labkey.api.pipeline.PipeRoot; import org.labkey.api.pipeline.PipelineService; +import org.labkey.api.query.BatchValidationException; +import org.labkey.api.query.DuplicateKeyException; +import org.labkey.api.query.QueryService; +import org.labkey.api.query.QueryUpdateService; +import org.labkey.api.query.QueryUpdateServiceException; +import org.labkey.api.reader.DataLoader; +import org.labkey.api.reader.TabLoader; import org.labkey.api.resource.DirectoryResource; import org.labkey.api.resource.Resource; import org.labkey.api.security.User; @@ -19,6 +27,9 @@ import java.io.InputStream; import java.io.OutputStream; import java.nio.file.Files; +import java.sql.SQLException; +import java.util.List; +import java.util.Map; public class StudiesServiceImpl extends StudiesService { @@ -91,4 +102,32 @@ private void copyResourceToPath(Resource resource, java.nio.file.Path target) th } } } + + @Override + public void loadTsv(Resource tsv, String schemaName, User u, Container c) + { + try (DataLoader loader = DataLoader.get().createLoader(tsv, true, null, TabLoader.TSV_FILE_TYPE)) + { + TableInfo ti = QueryService.get().getUserSchema(u, c, schemaName).getTable(FileUtil.getBaseName(tsv.getName())); + if (ti == null) + { + throw new IllegalStateException("Missing table: " + tsv.getName()); + } + + List> rows = loader.load(); + + QueryUpdateService qus = ti.getUpdateService(); + qus.setBulkLoad(true); + + qus.truncateRows(u, c, null, null); + qus.insertRows(u, c, rows, new BatchValidationException(), null, null); + } + catch (IOException | SQLException | BatchValidationException | QueryUpdateServiceException | + DuplicateKeyException e) + { + _log.error("Error populating TSV", e); + + throw new RuntimeException(e); + } + } } diff --git a/Studies/src/org/labkey/studies/query/StudiesCustomizer.java b/Studies/src/org/labkey/studies/query/StudiesCustomizer.java deleted file mode 100644 index 6951dceeb..000000000 --- a/Studies/src/org/labkey/studies/query/StudiesCustomizer.java +++ /dev/null @@ -1,22 +0,0 @@ -package org.labkey.studies.query; - -import org.labkey.api.data.TableInfo; -import org.labkey.api.ldk.table.AbstractTableCustomizer; -import org.labkey.api.study.DatasetTable; - -public class StudiesCustomizer extends AbstractTableCustomizer -{ - @Override - public void customize(TableInfo tableInfo) - { - if (tableInfo instanceof DatasetTable ds) - { - performDatasetCustomization(ds); - } - } - - public void performDatasetCustomization(DatasetTable ds) - { - - } -} diff --git a/Studies/src/org/labkey/studies/query/StudiesUserSchema.java b/Studies/src/org/labkey/studies/query/StudiesUserSchema.java index ef30fc38c..2fb451c26 100644 --- a/Studies/src/org/labkey/studies/query/StudiesUserSchema.java +++ b/Studies/src/org/labkey/studies/query/StudiesUserSchema.java @@ -19,7 +19,7 @@ import org.labkey.api.security.permissions.ReadPermission; import org.labkey.api.security.permissions.UpdatePermission; import org.labkey.studies.StudiesSchema; -import org.labkey.studies.security.StudiesDataAdminPermission; +import org.labkey.api.studies.security.StudiesDataAdminPermission; import java.util.Collections; import java.util.Map; From f91af8810b7886f68ce410b83a2bbff32d77e233 Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 12 Jun 2025 09:13:22 -0700 Subject: [PATCH 56/58] Minor cleanup --- .../studies/query/ResultsOOODisplayColumn.java | 12 ++++++------ singlecell/resources/queries/singlecell/samples.js | 9 +++++++++ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/Studies/src/org/labkey/studies/query/ResultsOOODisplayColumn.java b/Studies/src/org/labkey/studies/query/ResultsOOODisplayColumn.java index ef2a9923a..4fddcc0e3 100644 --- a/Studies/src/org/labkey/studies/query/ResultsOOODisplayColumn.java +++ b/Studies/src/org/labkey/studies/query/ResultsOOODisplayColumn.java @@ -31,18 +31,18 @@ public Object getDisplayValue(RenderContext ctx) return null; } - String oor = ctx.get(getOOR(), String.class); - if (StringUtils.isEmpty(oor)) - { - return result; - } - if (getBoundColumn().getFormat() != null) { DecimalFormat fmt = new DecimalFormat(getBoundColumn().getFormat()); result = fmt.format(result); } + String oor = ctx.get(getOOR(), String.class); + if (StringUtils.isEmpty(oor)) + { + return result; + } + return oor + result; } diff --git a/singlecell/resources/queries/singlecell/samples.js b/singlecell/resources/queries/singlecell/samples.js index 0b78206ec..f66be9f90 100644 --- a/singlecell/resources/queries/singlecell/samples.js +++ b/singlecell/resources/queries/singlecell/samples.js @@ -23,6 +23,15 @@ function beforeUpsert(row, oldRow, errors){ else if (['No stim', 'No Stim'].indexOf(row.stim) !== -1){ row.stim = 'NoStim'; } + else if (['Infected cells: SIV+', 'Infected Cells: SIV+'].indexOf(row.stim) !== -1){ + row.stim = 'SIV-Infected CD4s'; + } + else if (['Infected cells: SIV-', 'Infected Cells: SIV-'].indexOf(row.stim) !== -1){ + row.stim = 'SIV-Infected CD4s / SIV-'; + } + else if (['Infected cells: Mock', 'Infected Cells: Mock'].indexOf(row.stim) !== -1){ + row.stim = 'Mock-Infected CD4s'; + } var lookupFields = ['stim']; for (var i=0;i Date: Fri, 13 Jun 2025 06:35:45 -0700 Subject: [PATCH 57/58] Use full filepath --- .../singlecell/pipeline/AbstractSingleCellPipelineStep.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java b/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java index d24f1abf1..6bfa58366 100644 --- a/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java +++ b/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java @@ -478,9 +478,9 @@ protected Collection getAdditionalDockerInputs(SequenceOutputHandler.JobCo return Collections.emptySet(); } - protected String printInputFile(SeuratObjectWrapper so) + private String printInputFile(SeuratObjectWrapper so) { - return "'" + so.getFile().getName() + "'"; + return "'" + so.getFile().getPath() + "'"; } protected Chunk createFinalChunk() throws PipelineJobException From 301768be08905e9e576849558b76e4136228819c Mon Sep 17 00:00:00 2001 From: bbimber Date: Fri, 13 Jun 2025 09:04:33 -0700 Subject: [PATCH 58/58] Add length check --- .../org/labkey/cluster/pipeline/SlurmExecutionEngine.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java b/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java index 7d2f14066..622a7c224 100644 --- a/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java +++ b/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java @@ -499,6 +499,14 @@ private String extractField(String line, List fieldWidths, int idx) try { + if (end > line.length()) + { + _log.debug("Encountered slurm line shorter than expected. Slurm field at idx: " + idx + ". Line: [" + line + "]"); + _log.debug("fieldWidths: " + StringUtils.join(fieldWidths, "|")); + + return null; + } + return StringUtils.trimToNull(line.substring(start, end)); } catch (Exception e)