From a1a8a5aba07e31726ccc0887b501e994dcd4793a Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Thu, 31 Oct 2024 22:08:12 -0700
Subject: [PATCH 01/20] Prepare to support species in mGAP (#175)

* Prepare to support species in mGAP
---
 mGAP/resources/data/species.tsv               |  10 ++
 mGAP/resources/etls/prime-seq.xml             |   2 +
 .../queries/mGAP/combinedPedigree.sql         |  12 +-
 .../queries/mGAP/releaseTracks/.qview.xml     |   2 +-
 .../mGAP/variantCatalogReleases/.qview.xml    |   1 +
 mGAP/resources/r/UpdateTracks.r               | 115 +++++++++++++++
 .../dbscripts/postgresql/mgap-16.73-16.74.sql |   3 +
 .../dbscripts/sqlserver/mgap-16.73-16.74.sql  |   3 +
 mGAP/resources/schemas/mgap.xml               |  26 +++-
 mGAP/src/org/labkey/mgap/mGAPController.java  |  88 ++++++++++--
 mGAP/src/org/labkey/mgap/mGAPModule.java      |   2 +-
 mcc/package-lock.json                         | 132 +++++++-----------
 12 files changed, 290 insertions(+), 106 deletions(-)
 create mode 100644 mGAP/resources/data/species.tsv
 create mode 100644 mGAP/resources/r/UpdateTracks.r
 create mode 100644 mGAP/resources/schemas/dbscripts/postgresql/mgap-16.73-16.74.sql
 create mode 100644 mGAP/resources/schemas/dbscripts/sqlserver/mgap-16.73-16.74.sql
diff --git a/mGAP/resources/data/species.tsv b/mGAP/resources/data/species.tsv
new file mode 100644
index 000000000..2472af328
--- /dev/null
+++ b/mGAP/resources/data/species.tsv
@@ -0,0 +1,10 @@
+common_name	scientific_name	mhc_prefix
+Cotton-top Tamarin	Saguinus oedipus	Saoe
+Cynomolgus macaque	Macaca fascicularis	Mafa
+Marmoset	Callithrix jacchus	Caja
+Pigtail macaque	Macaca nemestrina	Mane
+Rhesus macaque	Macaca mulatta	Mamu
+Sooty Mangabey	Cercocebus atys	Ceat
+Stump Tailed	Macaca Arctoides	Maar
+Vervet	Chlorocebus sabaeus	Chsa
+Japanese macaque	Macaca fuscata	Mafu
diff --git a/mGAP/resources/etls/prime-seq.xml b/mGAP/resources/etls/prime-seq.xml
index 8b2bcecc9..458803fca 100644
--- a/mGAP/resources/etls/prime-seq.xml
+++ b/mGAP/resources/etls/prime-seq.xml
@@ -53,6 +53,7 @@
                     <column>releaseId</column>
                     <column>trackName</column>
                     <column>label</column>
+                    <column>species</column>
                     <column>totalSamples</column>
                     <column>category</column>
                     <column>url</column>
@@ -101,6 +102,7 @@
                     <column>releaseDate</column>
                     <column>vcfId/dataid/DataFileUrl</column>
                     <column>genomeId/name</column>
+                    <column>species</column>
                     <column>totalSubjects</column>
                     <column>totalVariants</column>
                     <column>dbSnpId</column>
diff --git a/mGAP/resources/queries/mGAP/combinedPedigree.sql b/mGAP/resources/queries/mGAP/combinedPedigree.sql
index 55512965a..4725d8998 100644
--- a/mGAP/resources/queries/mGAP/combinedPedigree.sql
+++ b/mGAP/resources/queries/mGAP/combinedPedigree.sql
@@ -1,12 +1,12 @@
 SELECT
-  s.subjectname,
+  s.Id as subjectname,
   s.gender,
-  s.mother as dam,
-  s.father as sire,
+  s.Id.parents.dam as dam,
+  s.Id.parents.sire as sire,
   s.species,
-  s.geographic_origin,
+  s.geographic_origin
 
-FROM laboratory.subjects s
+FROM "/Internal/PMR/".study.demographics s
 
 UNION ALL
 
@@ -19,4 +19,4 @@ SELECT
     null as geographic_origin
 
 FROM mgap.demographics d
-WHERE d.subjectname NOT IN (SELECT DISTINCT s.subjectname FROM laboratory.subjects s)
\ No newline at end of file
+WHERE d.subjectname NOT IN (SELECT DISTINCT s.Id FROM "/Internal/PMR/".study.demographics s)
\ No newline at end of file
diff --git a/mGAP/resources/queries/mGAP/releaseTracks/.qview.xml b/mGAP/resources/queries/mGAP/releaseTracks/.qview.xml
index 08488c2ec..7de86c8ae 100644
--- a/mGAP/resources/queries/mGAP/releaseTracks/.qview.xml
+++ b/mGAP/resources/queries/mGAP/releaseTracks/.qview.xml
@@ -3,6 +3,7 @@
         <column name="rowid"/>
         <column name="trackName"/>
         <column name="label"/>
+        <column name="species"/>
         <column name="source"/>
         <column name="category"/>
         <column name="totalSamples"/>
@@ -10,7 +11,6 @@
         <column name="vcfId"/>
         <column name="description"/>
         <column name="isprimarytrack"/>
-        <column name="mergepriority"/>
         <column name="skipvalidation"/>
         <column name="vcfId/container/Name">
             <properties>
diff --git a/mGAP/resources/queries/mGAP/variantCatalogReleases/.qview.xml b/mGAP/resources/queries/mGAP/variantCatalogReleases/.qview.xml
index f8b614ee2..ea7526061 100644
--- a/mGAP/resources/queries/mGAP/variantCatalogReleases/.qview.xml
+++ b/mGAP/resources/queries/mGAP/variantCatalogReleases/.qview.xml
@@ -1,6 +1,7 @@
 <customView xmlns="http://labkey.org/data/xml/queryCustomView">
     <columns>
         <column name="version"/>
+        <column name="version/species"/>
         <column name="releaseDate"/>
         <column name="totalSubjects"/>
         <column name="totalVariants"/>
diff --git a/mGAP/resources/r/UpdateTracks.r b/mGAP/resources/r/UpdateTracks.r
new file mode 100644
index 000000000..8646b5cb0
--- /dev/null
+++ b/mGAP/resources/r/UpdateTracks.r
@@ -0,0 +1,115 @@
+library(Rlabkey)
+library(dplyr)
+
+# This script is designed to be run externally per release, to identify subject that need to be added to the releaseTrackSubsets table:
+
+testByCenter <- function(centerName, trackName) {
+  dat <- suppressWarnings(labkey.selectRows(
+    baseUrl="https://prime-seq.ohsu.edu", 
+    folderPath="/Internal/ColonyData", 
+    schemaName="mgap", 
+    queryName="sampleSummary", 
+    viewName="", 
+    colSelect="subjectId,externalAlias", 
+    colFilter=makeFilter(
+      c("tracks", "DOES_NOT_CONTAIN", trackName),
+      c("center", "EQUAL", centerName)), 
+    containerFilter=NULL, 
+    colNameOpt="rname"
+  ))
+  
+  print(paste0(trackName, ': ', nrow(dat)))
+  
+  if (nrow(dat) == 0) {
+    return(NULL)
+  }
+  
+  return(data.frame(trackName = trackName, subjectId = dat$subjectid))
+}
+
+testBySpecies <- function(speciesList, trackName) {
+  dat <- suppressWarnings(labkey.selectRows(
+    baseUrl="https://prime-seq.ohsu.edu", 
+    folderPath="/Internal/ColonyData", 
+    schemaName="mgap", 
+    queryName="sampleSummary", 
+    viewName="", 
+    colSelect="subjectId,externalAlias", 
+    colFilter=makeFilter(
+      c("tracks", "DOES_NOT_CONTAIN", trackName),
+      c("species", "IN", paste0(speciesList, collapse = ';'))), 
+    containerFilter=NULL, 
+    colNameOpt="rname"
+  ))
+  
+  print(paste0(trackName, ': ', nrow(dat)))
+  
+  if (nrow(dat) == 0) {
+    return(NULL)
+  }
+  
+  return(data.frame(trackName = trackName, subjectId = dat$subjectid))
+}
+
+toInsert <- rbind(
+  testByCenter('CNPRC', 'CNPRC Animals'),
+  testByCenter('TNPRC', 'TNPRC Animals'),
+  testByCenter('ENPRC', 'ENPRC Animals'),
+  testByCenter('NEPRC', 'NEPRC Animals'),
+  testByCenter('SNPRC', 'SNPRC Animals'),
+  testByCenter('ONPRC', 'ONPRC Animals'),
+  testByCenter('MDA', 'MDA Animals'),
+  testByCenter('WFU', 'WFU Animals'),
+  testByCenter('CPRC', 'CPRC Animals'),
+  testBySpecies(c('RHESUS MACAQUE', 'Rhesus', 'Macaca mulatta'), 'Rhesus Macaques'),
+  testBySpecies(c('JAPANESE MACAQUE', 'Macaca fuscata'), 'Japanese Macaques')
+)
+
+
+if (FALSE) {
+  added <- labkey.insertRows(
+    baseUrl="https://prime-seq.ohsu.edu", 
+    folderPath="/Internal/ColonyData", 
+    schemaName="mgap", 
+    queryName="releaseTrackSubsets", 
+    toInsert = toInsert
+  )
+}
+
+
+# Now ensure all tracks exist:
+existingTracks <- labkey.selectRows(
+  baseUrl="https://prime-seq.ohsu.edu", 
+  folderPath="/Internal/ColonyData", 
+  schemaName="mgap", 
+  queryName="releaseTracks",
+  colNameOpt="rname"
+)
+
+missingTrackNames <- labkey.selectRows(
+  baseUrl="https://prime-seq.ohsu.edu", 
+  folderPath="/Internal/ColonyData", 
+  schemaName="mgap", 
+  queryName="releaseTrackSubsets",
+  colSelect="trackName",
+  colNameOpt="rname"
+) %>% 
+  filter(!trackname %in% existingTracks$trackname) %>%
+  select(trackname) %>% unique()
+
+if (nrow(missingTrackNames) > 0) {
+  toAdd <- data.frame(trackName = missingTrackNames$trackname, label = missingTrackNames$trackname, isprimarytrack = FALSE)
+  toAdd$Category <- 'Species Dataset'
+  # Add anything else desired, like species, source, url, description, category
+  
+  if (FALSE) {
+    added <- labkey.insertRows(
+      baseUrl="https://prime-seq.ohsu.edu", 
+      folderPath="/Internal/ColonyData", 
+      schemaName="mgap", 
+      queryName="releaseTracks", 
+      toInsert = toAdd
+    )
+  }
+}
+
diff --git a/mGAP/resources/schemas/dbscripts/postgresql/mgap-16.73-16.74.sql b/mGAP/resources/schemas/dbscripts/postgresql/mgap-16.73-16.74.sql
new file mode 100644
index 000000000..e4f4faba8
--- /dev/null
+++ b/mGAP/resources/schemas/dbscripts/postgresql/mgap-16.73-16.74.sql
@@ -0,0 +1,3 @@
+ALTER TABLE mGAP.variantCatalogReleases ADD species varchar(1000);
+ALTER TABLE mGAP.releaseTracks ADD species varchar(1000);
+ALTER TABLE mGAP.releaseTracks DROP COLUMN mergepriority;
\ No newline at end of file
diff --git a/mGAP/resources/schemas/dbscripts/sqlserver/mgap-16.73-16.74.sql b/mGAP/resources/schemas/dbscripts/sqlserver/mgap-16.73-16.74.sql
new file mode 100644
index 000000000..e4f4faba8
--- /dev/null
+++ b/mGAP/resources/schemas/dbscripts/sqlserver/mgap-16.73-16.74.sql
@@ -0,0 +1,3 @@
+ALTER TABLE mGAP.variantCatalogReleases ADD species varchar(1000);
+ALTER TABLE mGAP.releaseTracks ADD species varchar(1000);
+ALTER TABLE mGAP.releaseTracks DROP COLUMN mergepriority;
\ No newline at end of file
diff --git a/mGAP/resources/schemas/mgap.xml b/mGAP/resources/schemas/mgap.xml
index 24bfd6332..fa92efe8c 100644
--- a/mGAP/resources/schemas/mgap.xml
+++ b/mGAP/resources/schemas/mgap.xml
@@ -95,6 +95,15 @@
                 <isUserEditable>false</isUserEditable>
                 <columnTitle>Row Id</columnTitle>
             </column>
+            <column columnName="species">
+                <columnTitle>Species</columnTitle>
+                <fk>
+                    <fkDbSchema>laboratory</fkDbSchema>
+                    <fkTable>species</fkTable>
+                    <fkColumnName>common_name</fkColumnName>
+                    <fkDisplayColumnName useRawValue="true"/>
+                </fk>
+            </column>
             <column columnName="version">
                 <columnTitle>Version</columnTitle>
                 <nullable>false</nullable>
@@ -644,6 +653,15 @@
                 <columnTitle>Label</columnTitle>
                 <nullable>false</nullable>
             </column>
+            <column columnName="species">
+                <columnTitle>Species</columnTitle>
+                <fk>
+                    <fkDbSchema>laboratory</fkDbSchema>
+                    <fkTable>species</fkTable>
+                    <fkColumnName>common_name</fkColumnName>
+                    <fkDisplayColumnName useRawValue="true"/>
+                </fk>
+            </column>
             <column columnName="source">
                 <columnTitle>Source</columnTitle>
                 <url>${url}</url>
@@ -669,11 +687,7 @@
                 <inputType>textarea</inputType>
             </column>
             <column columnName="isprimarytrack">
-                <columnTitle>Is Primary Track?</columnTitle>
-            </column>
-            <column columnName="mergepriority">
-                <columnTitle>Merge Priority Order</columnTitle>
-                <description>This order will be used for genotype priority order when merging to create the primary release VCF.  Lower numbers have higher priority.  Set to -1 to exclude this track when merging.</description>
+                <columnTitle>Is Primary Track For Species?</columnTitle>
             </column>
             <column columnName="skipvalidation">
                 <columnTitle>Skip Validation?</columnTitle>
@@ -831,7 +845,7 @@
                 <inputType>textarea</inputType>
             </column>
             <column columnName="isprimarytrack">
-                <columnTitle>Is Primary Track?</columnTitle>
+                <columnTitle>Is Primary Track For Species?</columnTitle>
                 <defaultValue>false</defaultValue>
             </column>
             <column columnName="container">
diff --git a/mGAP/src/org/labkey/mgap/mGAPController.java b/mGAP/src/org/labkey/mgap/mGAPController.java
index e8f0ebbc4..52991080b 100644
--- a/mGAP/src/org/labkey/mgap/mGAPController.java
+++ b/mGAP/src/org/labkey/mgap/mGAPController.java
@@ -58,6 +58,7 @@
 import org.labkey.api.query.QueryUpdateService;
 import org.labkey.api.query.UserSchema;
 import org.labkey.api.reader.Readers;
+import org.labkey.api.resource.Resource;
 import org.labkey.api.security.AuthenticationManager;
 import org.labkey.api.security.Group;
 import org.labkey.api.security.GroupManager;
@@ -224,7 +225,7 @@ public Object execute(RequestUserForm form, BindException errors) throws Excepti
                         }
 
                         DetailsURL url = DetailsURL.fromString("/query/executeQuery.view?schemaName=mgap&query.queryName=userRequests&query.viewName=Pending Requests", c);
-                        mail.setEncodedHtmlContent("A user requested an account on mGap.  <a href=\"" + AppProps.getInstance().getBaseServerUrl() + url.getActionURL().toString()+ "\">Click here to view/approve this request</a>");
+                        mail.setEncodedHtmlContent("A user requested an account on mGap.  <a href=\"" + AppProps.getInstance().getBaseServerUrl() + url.getActionURL().toString() + "\">Click here to view/approve this request</a>");
                         mail.setFrom(getReplyEmail(getContainer()));
                         mail.setSubject("mGap Account Request");
                         mail.addRecipients(Message.RecipientType.TO, emails.toArray(new Address[emails.size()]));
@@ -412,13 +413,13 @@ public Object execute(ApproveUserRequestsForm form, BindException errors) throws
                     User u;
                     if (map.get("userId") != null)
                     {
-                        Integer userId = (Integer)map.get("userId");
+                        Integer userId = (Integer) map.get("userId");
                         u = UserManager.getUser(userId);
                         existingUsersGivenAccess.add(u);
                     }
                     else
                     {
-                        ValidEmail ve = new ValidEmail((String)map.get("email"));
+                        ValidEmail ve = new ValidEmail((String) map.get("email"));
                         u = UserManager.getUser(ve);
                         if (u != null)
                         {
@@ -428,8 +429,8 @@ public Object execute(ApproveUserRequestsForm form, BindException errors) throws
                         {
                             SecurityManager.NewUserStatus st = SecurityManager.addUser(ve, getUser());
                             u = st.getUser();
-                            u.setFirstName((String)map.get("firstName"));
-                            u.setLastName((String)map.get("lastName"));
+                            u.setFirstName((String) map.get("firstName"));
+                            u.setLastName((String) map.get("lastName"));
                             UserManager.updateUser(getUser(), u);
 
                             if (st.isLdapOrSsoEmail())
@@ -539,7 +540,7 @@ private static Map<String, Object> getReleaseRow(User u, ReleaseForm form, Error
             return null;
         }
 
-        Container rowContainer = ContainerManager.getForId((String)row.get("container"));
+        Container rowContainer = ContainerManager.getForId((String) row.get("container"));
         if (rowContainer == null)
         {
             errors.reject(ERROR_MSG, "Unknown row container: " + form.getReleaseId());
@@ -555,7 +556,7 @@ else if (!rowContainer.hasPermission(u, ReadPermission.class))
 
     private static SequenceOutputFile getOutputFile(Map<String, Object> row, ReleaseForm form, Errors errors)
     {
-        SequenceOutputFile so = SequenceOutputFile.getForId((Integer)row.get("vcfId"));
+        SequenceOutputFile so = SequenceOutputFile.getForId((Integer) row.get("vcfId"));
         if (so == null)
         {
             errors.reject(ERROR_MSG, "Unknown VCF file ID: " + form.getReleaseId());
@@ -590,7 +591,7 @@ public void export(DownloadBundleForm form, HttpServletResponse response, BindEx
             }
 
             Set<File> toZip = new HashSet<>();
-            String zipName = "mGap_VariantCatalog_v" + FileUtil.makeLegalName((String)row.get("version"));
+            String zipName = "mGap_VariantCatalog_v" + FileUtil.makeLegalName((String) row.get("version"));
             zipName = zipName.replaceAll(" ", "_");
 
             toZip.add(so.getFile());
@@ -598,7 +599,7 @@ public void export(DownloadBundleForm form, HttpServletResponse response, BindEx
 
             if (form.getIncludeGenome())
             {
-                ReferenceGenome genome = SequenceAnalysisService.get().getReferenceGenome((Integer)row.get("genomeId"), getUser());
+                ReferenceGenome genome = SequenceAnalysisService.get().getReferenceGenome((Integer) row.get("genomeId"), getUser());
                 if (genome == null)
                 {
                     errors.reject(ERROR_MSG, "Unknown genome: " + row.get("genomeId"));
@@ -969,7 +970,7 @@ public URLHelper getRedirectURL(GenomeBrowserForm form)
             String species = StringUtils.trimToNull(form.getSpecies());
             if (jbrowseDatabaseId == null)
             {
-                jbrowseDatabaseId = ctx.getString("human".equals(species) ? "mgapJBrowseHuman": "mgapJBrowse");
+                jbrowseDatabaseId = ctx.getString("human".equals(species) ? "mgapJBrowseHuman" : "mgapJBrowse");
             }
 
             if (jbrowseDatabaseId == null)
@@ -1283,4 +1284,71 @@ public URLHelper getSuccessURL(Object o)
             return PageFlowUtil.urlProvider(PipelineUrls.class).urlBegin(getContainer());
         }
     }
+
+    @RequiresPermission(AdminPermission.class)
+    public static class ImportDataAction extends ConfirmAction<Object>
+    {
+        @Override
+        public ModelAndView getConfirmView(Object o, BindException errors) throws Exception
+        {
+            setTitle("Import mGAP Reference Data");
+
+            return HtmlView.of("This will import default values for reference tables. Do you want to continue?");
+        }
+
+        @Override
+        public void validateCommand(Object o, Errors errors)
+        {
+
+        }
+
+        @Override
+        public @NotNull URLHelper getSuccessURL(Object o)
+        {
+            return getContainer().getStartURL(getUser());
+        }
+
+        @Override
+        public boolean handlePost(Object o, BindException errors) throws Exception
+        {
+            Resource r = ModuleLoader.getInstance().getModule(mGAPModule.class).getModuleResource(Path.parse("data/species.tsv"));
+            if (!r.exists())
+            {
+                throw new IllegalStateException("Unable to find species.tsv");
+            }
+
+            List<Map<String, Object>> toAdd = new ArrayList<>();
+            try (CSVReader reader = new CSVReader(Readers.getReader(r.getInputStream()), '\t'))
+            {
+                String[] line;
+                while ((line = reader.readNext()) != null)
+                {
+                    if (line[0].equals("common_name"))
+                    {
+                        continue;
+                    }
+
+                    Map<String, Object> row = new CaseInsensitiveHashMap<>();
+                    row.put("common_name", line[0]);
+                    row.put("scientific_name", line[1]);
+                    row.put("mhc_prefix", line[2]);
+
+                    toAdd.add(row);
+                }
+            }
+
+            UserSchema us = QueryService.get().getUserSchema(getUser(), getContainer(), "laboratory");
+            TableInfo ti = us.getTable("species");
+            ti.getUpdateService().truncateRows(getUser(), getContainer(), null, null);
+
+            BatchValidationException bve = new BatchValidationException();
+            ti.getUpdateService().insertRows(getUser(), getContainer(), toAdd, bve, null, null);
+            if (bve.hasErrors())
+            {
+                throw bve;
+            }
+
+            return true;
+        }
+    }
 }
\ No newline at end of file
diff --git a/mGAP/src/org/labkey/mgap/mGAPModule.java b/mGAP/src/org/labkey/mgap/mGAPModule.java
index 7bcc1c7c4..28affbe03 100644
--- a/mGAP/src/org/labkey/mgap/mGAPModule.java
+++ b/mGAP/src/org/labkey/mgap/mGAPModule.java
@@ -77,7 +77,7 @@ public String getName()
     @Override
     public Double getSchemaVersion()
     {
-        return 16.73;
+        return 16.74;
     }
 
     @Override
diff --git a/mcc/package-lock.json b/mcc/package-lock.json
index 3655ca754..9091b05dc 100644
--- a/mcc/package-lock.json
+++ b/mcc/package-lock.json
@@ -4938,21 +4938,6 @@
       "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==",
       "dev": true
     },
-    "node_modules/body-parser/node_modules/qs": {
-      "version": "6.13.0",
-      "resolved": "https://registry.npmjs.org/qs/-/qs-6.13.0.tgz",
-      "integrity": "sha512-+38qI9SOr8tfZ4QmJNplMUxqjbe7LKvvZgWdExBOmd+egZTtjLB67Gu0HRX3u/XOq7UU2Nx6nsjvS16Z9uwfpg==",
-      "dev": true,
-      "dependencies": {
-        "side-channel": "^1.0.6"
-      },
-      "engines": {
-        "node": ">=0.6"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/ljharb"
-      }
-    },
     "node_modules/bonjour-service": {
       "version": "1.2.1",
       "resolved": "https://registry.npmjs.org/bonjour-service/-/bonjour-service-1.2.1.tgz",
@@ -5554,10 +5539,11 @@
       "integrity": "sha512-ASFBup0Mz1uyiIjANan1jzLQami9z1PoYSZCiiYW2FczPbenXc45FZdBZLzOT+r6+iciuEModtmCti+hjaAk0A=="
     },
     "node_modules/cookie": {
-      "version": "0.6.0",
-      "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.6.0.tgz",
-      "integrity": "sha512-U71cyTamuh1CRNCfpGY6to28lxvNwPG4Guz/EVjgf3Jmzv0vlDp1atT9eS5dDjMYHucpHbWns6Lwf3BKz6svdw==",
+      "version": "0.7.1",
+      "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.7.1.tgz",
+      "integrity": "sha512-6DnInpx7SJ2AK3+CTUE/ZM0vWTUboZCegxhC2xiIydHR9jNuTAASBrfEpHhiGOZw/nX51bHt6YQl8jsGo4y/0w==",
       "dev": true,
+      "license": "MIT",
       "engines": {
         "node": ">= 0.6"
       }
@@ -6587,24 +6573,25 @@
       }
     },
     "node_modules/express": {
-      "version": "4.20.0",
-      "resolved": "https://registry.npmjs.org/express/-/express-4.20.0.tgz",
-      "integrity": "sha512-pLdae7I6QqShF5PnNTCVn4hI91Dx0Grkn2+IAsMTgMIKuQVte2dN9PeGSSAME2FR8anOhVA62QDIUaWVfEXVLw==",
+      "version": "4.21.1",
+      "resolved": "https://registry.npmjs.org/express/-/express-4.21.1.tgz",
+      "integrity": "sha512-YSFlK1Ee0/GC8QaO91tHcDxJiE/X4FbpAyQWkxAvG6AXCuR65YzK8ua6D9hvi/TzUfZMpc+BwuM1IPw8fmQBiQ==",
       "dev": true,
+      "license": "MIT",
       "dependencies": {
         "accepts": "~1.3.8",
         "array-flatten": "1.1.1",
         "body-parser": "1.20.3",
         "content-disposition": "0.5.4",
         "content-type": "~1.0.4",
-        "cookie": "0.6.0",
+        "cookie": "0.7.1",
         "cookie-signature": "1.0.6",
         "debug": "2.6.9",
         "depd": "2.0.0",
         "encodeurl": "~2.0.0",
         "escape-html": "~1.0.3",
         "etag": "~1.8.1",
-        "finalhandler": "1.2.0",
+        "finalhandler": "1.3.1",
         "fresh": "0.5.2",
         "http-errors": "2.0.0",
         "merge-descriptors": "1.0.3",
@@ -6613,11 +6600,11 @@
         "parseurl": "~1.3.3",
         "path-to-regexp": "0.1.10",
         "proxy-addr": "~2.0.7",
-        "qs": "6.11.0",
+        "qs": "6.13.0",
         "range-parser": "~1.2.1",
         "safe-buffer": "5.2.1",
         "send": "0.19.0",
-        "serve-static": "1.16.0",
+        "serve-static": "1.16.2",
         "setprototypeof": "1.2.0",
         "statuses": "2.0.1",
         "type-is": "~1.6.18",
@@ -6750,13 +6737,14 @@
       }
     },
     "node_modules/finalhandler": {
-      "version": "1.2.0",
-      "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.2.0.tgz",
-      "integrity": "sha512-5uXcUVftlQMFnWC9qu/svkWv3GTd2PfUhK/3PLkYNAe7FbqJMt3515HaxE6eRL74GdsriiwujiawdaB1BpEISg==",
+      "version": "1.3.1",
+      "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.3.1.tgz",
+      "integrity": "sha512-6BN9trH7bp3qvnrRyzsBz+g3lZxTNZTbVO2EV1CS0WIcDbawYVdYvGflME/9QP0h0pYlCDBCTjYa9nZzMDpyxQ==",
       "dev": true,
+      "license": "MIT",
       "dependencies": {
         "debug": "2.6.9",
-        "encodeurl": "~1.0.2",
+        "encodeurl": "~2.0.0",
         "escape-html": "~1.0.3",
         "on-finished": "2.4.1",
         "parseurl": "~1.3.3",
@@ -6772,15 +6760,27 @@
       "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
       "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
       "dev": true,
+      "license": "MIT",
       "dependencies": {
         "ms": "2.0.0"
       }
     },
+    "node_modules/finalhandler/node_modules/encodeurl": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-2.0.0.tgz",
+      "integrity": "sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
     "node_modules/finalhandler/node_modules/ms": {
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
       "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==",
-      "dev": true
+      "dev": true,
+      "license": "MIT"
     },
     "node_modules/find-cache-dir": {
       "version": "4.0.0",
@@ -7857,10 +7857,11 @@
       }
     },
     "node_modules/http-proxy-middleware": {
-      "version": "2.0.6",
-      "resolved": "https://registry.npmjs.org/http-proxy-middleware/-/http-proxy-middleware-2.0.6.tgz",
-      "integrity": "sha512-ya/UeJ6HVBYxrgYotAZo1KvPWlgB48kUJLDePFeneHsVujFaW5WNj2NgWCAE//B1Dl02BIfYlpNgBy8Kf8Rjmw==",
+      "version": "2.0.7",
+      "resolved": "https://registry.npmjs.org/http-proxy-middleware/-/http-proxy-middleware-2.0.7.tgz",
+      "integrity": "sha512-fgVY8AV7qU7z/MmXJ/rxwbrtQH4jBQ9m7kp3llF0liB7glmFeVZFBepQb32T3y8n8k2+AEYuMPCpinYW+/CuRA==",
       "dev": true,
+      "license": "MIT",
       "dependencies": {
         "@types/http-proxy": "^1.17.8",
         "http-proxy": "^1.18.1",
@@ -11846,12 +11847,13 @@
       ]
     },
     "node_modules/qs": {
-      "version": "6.11.0",
-      "resolved": "https://registry.npmjs.org/qs/-/qs-6.11.0.tgz",
-      "integrity": "sha512-MvjoMCJwEarSbUYk5O+nmoSzSutSsTwF85zcHPQ9OrlFoZOYIjaqBAJIqIXjptyD5vThxGq52Xu/MaJzRkIk4Q==",
+      "version": "6.13.0",
+      "resolved": "https://registry.npmjs.org/qs/-/qs-6.13.0.tgz",
+      "integrity": "sha512-+38qI9SOr8tfZ4QmJNplMUxqjbe7LKvvZgWdExBOmd+egZTtjLB67Gu0HRX3u/XOq7UU2Nx6nsjvS16Z9uwfpg==",
       "dev": true,
+      "license": "BSD-3-Clause",
       "dependencies": {
-        "side-channel": "^1.0.4"
+        "side-channel": "^1.0.6"
       },
       "engines": {
         "node": ">=0.6"
@@ -12946,63 +12948,29 @@
       }
     },
     "node_modules/serve-static": {
-      "version": "1.16.0",
-      "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.16.0.tgz",
-      "integrity": "sha512-pDLK8zwl2eKaYrs8mrPZBJua4hMplRWJ1tIFksVC3FtBEBnl8dxgeHtsaMS8DhS9i4fLObaon6ABoc4/hQGdPA==",
+      "version": "1.16.2",
+      "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.16.2.tgz",
+      "integrity": "sha512-VqpjJZKadQB/PEbEwvFdO43Ax5dFBZ2UECszz8bQ7pi7wt//PWe1P6MN7eCnjsatYtBT6EuiClbjSWP2WrIoTw==",
       "dev": true,
+      "license": "MIT",
       "dependencies": {
-        "encodeurl": "~1.0.2",
+        "encodeurl": "~2.0.0",
         "escape-html": "~1.0.3",
         "parseurl": "~1.3.3",
-        "send": "0.18.0"
+        "send": "0.19.0"
       },
       "engines": {
         "node": ">= 0.8.0"
       }
     },
-    "node_modules/serve-static/node_modules/debug": {
-      "version": "2.6.9",
-      "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
-      "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
-      "dev": true,
-      "dependencies": {
-        "ms": "2.0.0"
-      }
-    },
-    "node_modules/serve-static/node_modules/debug/node_modules/ms": {
+    "node_modules/serve-static/node_modules/encodeurl": {
       "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
-      "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==",
-      "dev": true
-    },
-    "node_modules/serve-static/node_modules/ms": {
-      "version": "2.1.3",
-      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
-      "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
-      "dev": true
-    },
-    "node_modules/serve-static/node_modules/send": {
-      "version": "0.18.0",
-      "resolved": "https://registry.npmjs.org/send/-/send-0.18.0.tgz",
-      "integrity": "sha512-qqWzuOjSFOuqPjFe4NOsMLafToQQwBSOEpS+FwEt3A2V3vKubTquT3vmLTQpFgMXp8AlFWFuP1qKaJZOtPpVXg==",
+      "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-2.0.0.tgz",
+      "integrity": "sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==",
       "dev": true,
-      "dependencies": {
-        "debug": "2.6.9",
-        "depd": "2.0.0",
-        "destroy": "1.2.0",
-        "encodeurl": "~1.0.2",
-        "escape-html": "~1.0.3",
-        "etag": "~1.8.1",
-        "fresh": "0.5.2",
-        "http-errors": "2.0.0",
-        "mime": "1.6.0",
-        "ms": "2.1.3",
-        "on-finished": "2.4.1",
-        "range-parser": "~1.2.1",
-        "statuses": "2.0.1"
-      },
+      "license": "MIT",
       "engines": {
-        "node": ">= 0.8.0"
+        "node": ">= 0.8"
       }
     },
     "node_modules/set-blocking": {

From 5650d21477329c7e589f8fc0abc91b51da7b6fa2 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Thu, 31 Oct 2024 22:29:00 -0700
Subject: [PATCH 02/20] Drop support for Cassandra

---
 .../labkey/mgap/pipeline/AnnotationStep.java  | 171 ++----------------
 .../pipeline/MultiSourceAnnotatorRunner.java  |   8 +-
 2 files changed, 14 insertions(+), 165 deletions(-)

diff --git a/mGAP/src/org/labkey/mgap/pipeline/AnnotationStep.java b/mGAP/src/org/labkey/mgap/pipeline/AnnotationStep.java
index 96dd56b56..2854ce3c5 100644
--- a/mGAP/src/org/labkey/mgap/pipeline/AnnotationStep.java
+++ b/mGAP/src/org/labkey/mgap/pipeline/AnnotationStep.java
@@ -2,7 +2,6 @@
 
 import htsjdk.samtools.util.Interval;
 import htsjdk.variant.vcf.VCFFileReader;
-import org.apache.commons.io.FileUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.jetbrains.annotations.Nullable;
 import org.json.JSONObject;
@@ -25,18 +24,14 @@
 import org.labkey.api.sequenceanalysis.SequenceOutputFile;
 import org.labkey.api.sequenceanalysis.pipeline.AbstractVariantProcessingStepProvider;
 import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
-import org.labkey.api.sequenceanalysis.pipeline.PipelineStep;
 import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider;
 import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
 import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
-import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
 import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
 import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStep;
 import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStepOutputImpl;
 import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep;
 import org.labkey.api.sequenceanalysis.run.SelectVariantsWrapper;
-import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper;
-import org.labkey.api.util.FileUtil;
 import org.labkey.api.util.PageFlowUtil;
 import org.labkey.api.writer.PrintWriters;
 import org.labkey.mgap.mGAPSchema;
@@ -57,7 +52,7 @@
 /**
  * Created by bimber on 5/2/2017.
  */
-public class AnnotationStep extends AbstractCommandPipelineStep<CassandraRunner> implements VariantProcessingStep
+public class AnnotationStep extends AbstractCommandPipelineStep<MultiSourceAnnotatorRunner> implements VariantProcessingStep
 {
     public static final String GRCH37 = "genome37";
     private static final String CLINVAR_VCF = "clinvar37";
@@ -65,7 +60,7 @@ public class AnnotationStep extends AbstractCommandPipelineStep<CassandraRunner>
 
     public AnnotationStep(PipelineStepProvider<?> provider, PipelineContext ctx)
     {
-        super(provider, ctx, new CassandraRunner(ctx.getLogger()));
+        super(provider, ctx, new MultiSourceAnnotatorRunner(ctx.getLogger()));
     }
 
     public static class Provider extends AbstractVariantProcessingStepProvider<AnnotationStep> implements VariantProcessingStep.SupportsScatterGather
@@ -88,10 +83,6 @@ public Provider()
                         put("valueField", "rowid");
                         put("allowBlank", false);
                     }}, null),
-                    ToolParameterDescriptor.create("useCassandra", "Use Cassandra", "If checked, Cassandra will be run.", "checkbox", new JSONObject()
-                    {{
-                        put("checked", true);
-                    }}, true),
                     ToolParameterDescriptor.create("useFuncotator", "Use Funcotator", "If checked, Extended Funcotator will be run.", "checkbox", new JSONObject()
                     {{
                         put("checked", true);
@@ -242,44 +233,20 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
         boolean dropGenotypes = totalSubjects > 10;
         boolean dropFiltered = getProvider().getParameterByName("dropFiltered").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class);
 
-        //This flag exists to allow in-flight jobs to be reworked to include a sample.  it should eventually be removed.
-        boolean forceRecreate = false;
-
         File currentVcf = inputVCF;
         if (dropGenotypes || dropFiltered)
         {
             if (dropGenotypes)
-                getPipelineCtx().getLogger().info("dropping most genotypes prior to liftover for performance reasons.  a single is retained since cassandra requires one.");
+                getPipelineCtx().getLogger().info("dropping genotypes prior to liftover for performance reasons.");
             if (dropFiltered)
                 getPipelineCtx().getLogger().info("dropping filtered sites");
 
             File subset = new File(outputDirectory, SequenceAnalysisService.get().getUnzippedBaseName(inputVCF.getName()) + ".subset.vcf.gz");
 
-            //NOTE: this check exists to correct in-flight jobs created using --sites-only-vcf-output.  It should eventually be removed.
-            if (subset.exists())
-            {
-                try (VCFFileReader reader = new VCFFileReader(subset))
-                {
-                    if (reader.getFileHeader().getGenotypeSamples().isEmpty())
-                    {
-                        getPipelineCtx().getLogger().info("A VCF appears to have been created with --sites-only.  Will overwrite these using an output with a single sample for Cassandra");
-                        forceRecreate = true;
-                    }
-                }
-            }
-
             List<String> selectArgs = new ArrayList<>();
             if (dropGenotypes)
             {
-                //NOTE: Cassandra requires at least one genotype, so instead of --sites-only-vcf-output, subset to first sample only
-                String firstSample;
-                try (VCFFileReader reader = new VCFFileReader(inputVCF))
-                {
-                    firstSample = reader.getFileHeader().getGenotypeSamples().get(0);
-                }
-
-                selectArgs.add("-sn");
-                selectArgs.add(firstSample);
+                selectArgs.add("--sites-only-vcf-output");
             }
 
             if (dropFiltered)
@@ -297,7 +264,7 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
                 needToSubsetToInterval = false;
             }
 
-            if (forceRecreate || !indexExists(subset))
+            if (!indexExists(subset))
             {
                 SelectVariantsWrapper wrapper = new SelectVariantsWrapper(getPipelineCtx().getLogger());
                 wrapper.execute(originalGenome.getWorkingFastaFile(), inputVCF, subset, selectArgs);
@@ -332,7 +299,7 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
                 needToSubsetToInterval = false;
 
                 File intervalSubset = new File(outputDirectory, SequenceAnalysisService.get().getUnzippedBaseName(inputVCF.getName()) + ".intervalSubset.vcf.gz");
-                if (forceRecreate || !indexExists(intervalSubset))
+                if (!indexExists(intervalSubset))
                 {
                     SelectVariantsWrapper wrapper = new SelectVariantsWrapper(getPipelineCtx().getLogger());
                     wrapper.execute(originalGenome.getWorkingFastaFile(), inputVCF, intervalSubset, selectArgs);
@@ -358,7 +325,7 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
 
         File liftedToGRCh37 = new File(outputDirectory, SequenceAnalysisService.get().getUnzippedBaseName(currentVcf.getName()) + ".liftTo" + grch37Genome.getGenomeId() + ".vcf.gz");
         File liftoverRejects = new File(outputDirectory, SequenceAnalysisService.get().getUnzippedBaseName(currentVcf.getName()) + ".liftoverReject" + grch37Genome.getGenomeId() + ".vcf.gz");
-        if (forceRecreate || !indexExists(liftoverRejects) || !indexExists(liftedToGRCh37))
+        if (!indexExists(liftoverRejects) || !indexExists(liftedToGRCh37))
         {
             LiftoverVcfRunner liftoverVcfRunner = new LiftoverVcfRunner(getPipelineCtx().getLogger());
             liftoverVcfRunner.doLiftover(currentVcf, chainFile, grch37Genome.getWorkingFastaFile(), liftoverRejects, liftedToGRCh37, 0.95);
@@ -374,7 +341,7 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
         //annotate with clinvar
         getPipelineCtx().getLogger().info("annotating with ClinVar 2.0");
         File clinvarAnnotated = new File(outputDirectory, SequenceAnalysisService.get().getUnzippedBaseName(liftedToGRCh37.getName()) + ".cv.vcf.gz");
-        if (forceRecreate || !indexExists(clinvarAnnotated))
+        if (!indexExists(clinvarAnnotated))
         {
             ClinvarAnnotatorRunner cvRunner = new ClinvarAnnotatorRunner(getPipelineCtx().getLogger());
             cvRunner.execute(liftedToGRCh37, clinvarVCF, clinvarAnnotated);
@@ -390,7 +357,7 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
         //backport ClinVar
         getPipelineCtx().getLogger().info("backport ClinVar 2.0 to source genome");
         File clinvarAnnotatedBackport = new File(outputDirectory, SequenceAnalysisService.get().getUnzippedBaseName(clinvarAnnotated.getName()) + ".bp.vcf.gz");
-        if (forceRecreate || !indexExists(clinvarAnnotatedBackport ))
+        if (!indexExists(clinvarAnnotatedBackport ))
         {
             BackportLiftedVcfRunner bpRunner = new BackportLiftedVcfRunner(getPipelineCtx().getLogger());
             bpRunner.execute(clinvarAnnotated, originalGenome.getWorkingFastaFile(), grch37Genome.getWorkingFastaFile(), clinvarAnnotatedBackport);
@@ -403,49 +370,6 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
         output.addIntermediateFile(clinvarAnnotatedBackport);
         output.addIntermediateFile(new File(clinvarAnnotatedBackport.getPath() + ".tbi"));
 
-        //annotate with cassandra
-        File cassandraAnnotatedBackport = null;
-        boolean useCassandra = getProvider().getParameterByName("useCassandra").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false);
-        if (useCassandra)
-        {
-            getPipelineCtx().getLogger().info("annotating with Cassandra");
-            String basename = SequenceAnalysisService.get().getUnzippedBaseName(liftedToGRCh37.getName()) + ".cassandra";
-            File cassandraAnnotated = new File(outputDirectory, basename + ".vcf.gz");
-            if (forceRecreate || !indexExists(cassandraAnnotated))
-            {
-                //we can assume splitting happened upstream, so run over the full VCF
-                runCassandra(liftedToGRCh37, cassandraAnnotated, output, forceRecreate);
-            }
-            else
-            {
-                getPipelineCtx().getLogger().info("resuming with existing file: " + cassandraAnnotated.getPath());
-            }
-
-            output.addOutput(cassandraAnnotated, "VCF Annotated With Cassandra");
-            output.addIntermediateFile(cassandraAnnotated);
-            output.addIntermediateFile(new File(cassandraAnnotated.getPath() + ".tbi"));
-
-            //backport Cassandra
-            getPipelineCtx().getLogger().info("backport Cassandra to source genome");
-            cassandraAnnotatedBackport = new File(outputDirectory, SequenceAnalysisService.get().getUnzippedBaseName(cassandraAnnotated.getName()) + ".bp.vcf.gz");
-            if (forceRecreate || !indexExists(cassandraAnnotatedBackport))
-            {
-                BackportLiftedVcfRunner bpRunner = new BackportLiftedVcfRunner(getPipelineCtx().getLogger());
-                bpRunner.execute(cassandraAnnotated, originalGenome.getWorkingFastaFile(), grch37Genome.getWorkingFastaFile(), cassandraAnnotatedBackport);
-            }
-            else
-            {
-                getPipelineCtx().getLogger().info("resuming with existing file: " + cassandraAnnotatedBackport.getPath());
-            }
-            output.addOutput(cassandraAnnotatedBackport, "VCF Annotated With Cassandra, Backported");
-            output.addIntermediateFile(cassandraAnnotatedBackport);
-            output.addIntermediateFile(new File(cassandraAnnotatedBackport.getPath() + ".tbi"));
-        }
-        else
-        {
-            getPipelineCtx().getLogger().debug("Cassandra will be skipped");
-        }
-
         //annotate with funcotator
         File funcotatorAnnotatedBackport = null;
         if (useFuncotator)
@@ -453,7 +377,7 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
             getPipelineCtx().getLogger().info("annotating with Funcotator");
             String basename = SequenceAnalysisService.get().getUnzippedBaseName(liftedToGRCh37.getName()) + ".funcotator";
             File funcotatorAnnotated = new File(outputDirectory, basename + ".vcf.gz");
-            if (forceRecreate || !indexExists(funcotatorAnnotated))
+            if (!indexExists(funcotatorAnnotated))
             {
                 //we can assume splitting happened upstream, so run over the full VCF
                 FuncotatorWrapper fr = new FuncotatorWrapper(getPipelineCtx().getLogger());
@@ -490,7 +414,7 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
             //backport Funcotator
             getPipelineCtx().getLogger().info("backport Funcotator to source genome");
             funcotatorAnnotatedBackport = new File(outputDirectory, SequenceAnalysisService.get().getUnzippedBaseName(funcotatorAnnotated.getName()) + ".bp.vcf.gz");
-            if (forceRecreate || !indexExists(funcotatorAnnotatedBackport))
+            if (!indexExists(funcotatorAnnotatedBackport))
             {
                 BackportLiftedVcfRunner bpRunner = new BackportLiftedVcfRunner(getPipelineCtx().getLogger());
                 bpRunner.execute(funcotatorAnnotated, originalGenome.getWorkingFastaFile(), grch37Genome.getWorkingFastaFile(), funcotatorAnnotatedBackport);
@@ -511,7 +435,7 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
         //multiannotator
         getPipelineCtx().getLogger().info("Running MultiSourceAnnotator");
         File multiAnnotated = new File(getPipelineCtx().getWorkingDirectory(), SequenceAnalysisService.get().getUnzippedBaseName(inputVCF.getName()) + ".ma.vcf.gz");
-        if (forceRecreate || !indexExists(multiAnnotated))
+        if (!indexExists(multiAnnotated))
         {
             MultiSourceAnnotatorRunner maRunner = new MultiSourceAnnotatorRunner(getPipelineCtx().getLogger());
 
@@ -535,7 +459,7 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
                 addToolFieldNames("Funcotator", "-ff", options, multiAnnotated.getParentFile(), output, liftFields);
             }
 
-            maRunner.execute(inputVCF, cassandraAnnotatedBackport, clinvarAnnotatedBackport, liftoverRejects, funcotatorAnnotatedBackport, multiAnnotated, options);
+            maRunner.execute(inputVCF, clinvarAnnotatedBackport, liftoverRejects, funcotatorAnnotatedBackport, multiAnnotated, options);
         }
         else
         {
@@ -588,75 +512,6 @@ private void addToolFieldNames(String toolName, String argName, List<String> opt
         options.add(fieldFile.getPath());
     }
 
-    private void runCassandra(File liftedToGRCh37, File finalOutput, VariantProcessingStepOutputImpl output, boolean forceRecreate) throws PipelineJobException
-    {
-        List<String> extraArgs = new ArrayList<>();
-
-        //NOTE: Cassandra will not sort the output when multithreaded, so the extra sorting we would need to do negates any benefit here
-        String tmpDir = SequencePipelineService.get().getJavaTempDir();
-        if (!StringUtils.isEmpty(tmpDir))
-        {
-            File tmpDirFile = new File(tmpDir, "cassandra");
-            if (!tmpDirFile.exists())
-            {
-                tmpDirFile.mkdirs();
-            }
-
-            extraArgs.add("--tempDir");
-            extraArgs.add(tmpDirFile.getPath());
-        }
-
-        CassandraRunner cassRunner = new CassandraRunner(getPipelineCtx().getLogger());
-
-        Integer maxRam = SequencePipelineService.get().getMaxRam();
-        cassRunner.setMaxRamOverride(maxRam);
-
-        //Cassandra requires unzipped files
-        File liftedToGRCh37Unzipped = new File(liftedToGRCh37.getParentFile(), FileUtil.getBaseName(liftedToGRCh37.getName()));
-        File liftedToGRCh37UnzippedDone = new File(liftedToGRCh37Unzipped.getPath() + ".done");
-        if (forceRecreate || !liftedToGRCh37UnzippedDone.exists())
-        {
-            SimpleScriptWrapper wrapper = new SimpleScriptWrapper(getPipelineCtx().getLogger());
-            wrapper.execute(Arrays.asList("gunzip", liftedToGRCh37.getPath()));
-            try
-            {
-                FileUtils.touch(liftedToGRCh37UnzippedDone);
-                if (!liftedToGRCh37.exists() && indexExists(liftedToGRCh37))
-                {
-                    File idx = new File(liftedToGRCh37.getPath() + ".tbi");
-                    idx.delete();
-                }
-            }
-            catch (IOException e)
-            {
-                throw new PipelineJobException(e);
-            }
-        }
-        else
-        {
-            getPipelineCtx().getLogger().info("Resuming from file: " + liftedToGRCh37Unzipped.getPath());
-        }
-
-        output.addIntermediateFile(liftedToGRCh37Unzipped);
-        output.addIntermediateFile(new File(liftedToGRCh37Unzipped.getPath() + ".idx"));
-        output.addIntermediateFile(liftedToGRCh37UnzippedDone);
-
-        cassRunner.execute(liftedToGRCh37Unzipped, finalOutput, extraArgs);
-        if (!finalOutput.exists())
-        {
-            throw new PipelineJobException("Unable to find output");
-        }
-
-        try
-        {
-            SequenceAnalysisService.get().ensureVcfIndex(finalOutput, getPipelineCtx().getLogger());
-        }
-        catch (IOException e)
-        {
-            throw new PipelineJobException(e);
-        }
-    }
-
     protected static boolean indexExists(File vcf)
     {
         File idx = new File(vcf.getPath() + ".tbi");
diff --git a/mGAP/src/org/labkey/mgap/pipeline/MultiSourceAnnotatorRunner.java b/mGAP/src/org/labkey/mgap/pipeline/MultiSourceAnnotatorRunner.java
index 23d0b388a..6a1c50939 100644
--- a/mGAP/src/org/labkey/mgap/pipeline/MultiSourceAnnotatorRunner.java
+++ b/mGAP/src/org/labkey/mgap/pipeline/MultiSourceAnnotatorRunner.java
@@ -15,19 +15,13 @@ public MultiSourceAnnotatorRunner(Logger log)
         super(log);
     }
 
-    public File execute(File inputVcf, @Nullable File cassandraVcf, File clinvarAnnotatedBackport, File liftoverRejects, @Nullable File funcotator, File outputVcf, @Nullable List<String> options)  throws PipelineJobException
+    public File execute(File inputVcf, File clinvarAnnotatedBackport, File liftoverRejects, @Nullable File funcotator, File outputVcf, @Nullable List<String> options)  throws PipelineJobException
     {
         List<String> args = getBaseArgs("MultiSourceAnnotator");
 
         args.add("-V");
         args.add(inputVcf.getPath());
 
-        if (cassandraVcf != null)
-        {
-            args.add("-c");
-            args.add(cassandraVcf.getPath());
-        }
-
         args.add("-lr");
         args.add(liftoverRejects.getPath());
 

From d31a908f2950e754e65dc01fdf1cb16e219a719e Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Fri, 1 Nov 2024 15:42:37 -0700
Subject: [PATCH 03/20] Add mGAP calculated columns

---
 .../mGAP/variantCatalogReleases/.qview.xml    |   2 +-
 mGAP/resources/schemas/mgap.xml               |   2 +-
 mGAP/src/org/labkey/mgap/mGAPModule.java      |   2 -
 .../pipeline/AnnotateNovelSitesWrapper.java   |  49 +++++
 .../mGapReleaseAnnotateNovelSitesStep.java    | 206 ------------------
 .../org/labkey/mgap/query/mGAPUserSchema.java |  15 +-
 6 files changed, 65 insertions(+), 211 deletions(-)
 create mode 100644 mGAP/src/org/labkey/mgap/pipeline/AnnotateNovelSitesWrapper.java
 delete mode 100644 mGAP/src/org/labkey/mgap/pipeline/mGapReleaseAnnotateNovelSitesStep.java

diff --git a/mGAP/resources/queries/mGAP/variantCatalogReleases/.qview.xml b/mGAP/resources/queries/mGAP/variantCatalogReleases/.qview.xml
index ea7526061..425728879 100644
--- a/mGAP/resources/queries/mGAP/variantCatalogReleases/.qview.xml
+++ b/mGAP/resources/queries/mGAP/variantCatalogReleases/.qview.xml
@@ -1,7 +1,7 @@
 <customView xmlns="http://labkey.org/data/xml/queryCustomView">
     <columns>
         <column name="version"/>
-        <column name="version/species"/>
+        <column name="species"/>
         <column name="releaseDate"/>
         <column name="totalSubjects"/>
         <column name="totalVariants"/>
diff --git a/mGAP/resources/schemas/mgap.xml b/mGAP/resources/schemas/mgap.xml
index fa92efe8c..090becab2 100644
--- a/mGAP/resources/schemas/mgap.xml
+++ b/mGAP/resources/schemas/mgap.xml
@@ -690,7 +690,7 @@
                 <columnTitle>Is Primary Track For Species?</columnTitle>
             </column>
             <column columnName="skipvalidation">
-                <columnTitle>Skip Validation?</columnTitle>
+                <columnTitle>Skip Annotation Checks?</columnTitle>
             </column>
             <column columnName="container">
                 <isHidden>true</isHidden>
diff --git a/mGAP/src/org/labkey/mgap/mGAPModule.java b/mGAP/src/org/labkey/mgap/mGAPModule.java
index 28affbe03..1b1666ceb 100644
--- a/mGAP/src/org/labkey/mgap/mGAPModule.java
+++ b/mGAP/src/org/labkey/mgap/mGAPModule.java
@@ -57,7 +57,6 @@
 import org.labkey.mgap.pipeline.SampleSpecificGenotypeFiltrationStep;
 import org.labkey.mgap.pipeline.VcfComparisonStep;
 import org.labkey.mgap.pipeline.mGapReleaseAlleleFreqStep;
-import org.labkey.mgap.pipeline.mGapReleaseAnnotateNovelSitesStep;
 import org.labkey.mgap.pipeline.mGapReleaseComparisonStep;
 import org.labkey.mgap.pipeline.mGapReleaseGenerator;
 import org.labkey.mgap.query.mGAPUserSchema;
@@ -141,7 +140,6 @@ public PipelineStartup()
                 SequencePipelineService.get().registerPipelineStep(new VcfComparisonStep.Provider());
                 SequencePipelineService.get().registerPipelineStep(new mGapReleaseComparisonStep.Provider());
                 SequencePipelineService.get().registerPipelineStep(new SampleSpecificGenotypeFiltrationStep.Provider());
-                SequencePipelineService.get().registerPipelineStep(new mGapReleaseAnnotateNovelSitesStep.Provider());
                 SequencePipelineService.get().registerPipelineStep(new GenerateMgapTracksStep.Provider());
                 SequencePipelineService.get().registerPipelineStep(new IndexVariantsForMgapStep.Provider());
                 SequencePipelineService.get().registerPipelineStep(new mGapReleaseAlleleFreqStep.Provider());
diff --git a/mGAP/src/org/labkey/mgap/pipeline/AnnotateNovelSitesWrapper.java b/mGAP/src/org/labkey/mgap/pipeline/AnnotateNovelSitesWrapper.java
new file mode 100644
index 000000000..3352ce2fd
--- /dev/null
+++ b/mGAP/src/org/labkey/mgap/pipeline/AnnotateNovelSitesWrapper.java
@@ -0,0 +1,49 @@
+package org.labkey.mgap.pipeline;
+
+import org.apache.logging.log4j.Logger;
+import org.labkey.api.pipeline.PipelineJobException;
+import org.labkey.api.sequenceanalysis.run.AbstractDiscvrSeqWrapper;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.List;
+
+public class AnnotateNovelSitesWrapper extends AbstractDiscvrSeqWrapper
+{
+    public AnnotateNovelSitesWrapper(Logger log)
+    {
+        super(log);
+    }
+
+    public File execute(File vcf, File referenceVcf, File fasta, String versionString, File vcfOutput, List<String> extraArgs) throws PipelineJobException
+    {
+        List<String> args = new ArrayList<>(getBaseArgs());
+        args.add("AnnotateNovelSites");
+        args.add("-R");
+        args.add(fasta.getPath());
+
+        args.add("-V");
+        args.add(vcf.getPath());
+        args.add("-rv");
+        args.add(referenceVcf.getPath());
+
+        args.add("-an");
+        args.add("mGAPV");
+        args.add("-ad");
+        args.add("The first mGAP version where variants at this site appeared");
+        args.add("-av");
+        args.add(versionString);
+
+        args.add("-O");
+        args.add(vcfOutput.getPath());
+
+        if (extraArgs != null)
+        {
+            args.addAll(extraArgs);
+        }
+
+        execute(args);
+
+        return vcfOutput;
+    }
+}
diff --git a/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseAnnotateNovelSitesStep.java b/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseAnnotateNovelSitesStep.java
deleted file mode 100644
index 70e30aaf1..000000000
--- a/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseAnnotateNovelSitesStep.java
+++ /dev/null
@@ -1,206 +0,0 @@
-package org.labkey.mgap.pipeline;
-
-import htsjdk.samtools.util.Interval;
-import org.apache.commons.lang3.math.NumberUtils;
-import org.apache.logging.log4j.Logger;
-import org.jetbrains.annotations.Nullable;
-import org.json.JSONObject;
-import org.labkey.api.data.SimpleFilter;
-import org.labkey.api.data.TableSelector;
-import org.labkey.api.pipeline.PipelineJob;
-import org.labkey.api.pipeline.PipelineJobException;
-import org.labkey.api.query.FieldKey;
-import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
-import org.labkey.api.sequenceanalysis.SequenceOutputFile;
-import org.labkey.api.sequenceanalysis.pipeline.AbstractVariantProcessingStepProvider;
-import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
-import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider;
-import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
-import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
-import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
-import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStep;
-import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStepOutputImpl;
-import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep;
-import org.labkey.api.sequenceanalysis.run.AbstractDiscvrSeqWrapper;
-import org.labkey.api.util.PageFlowUtil;
-import org.labkey.mgap.mGAPSchema;
-
-import java.io.File;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * User: bimber
- * Date: 6/15/2014
- * Time: 12:39 PM
- */
-public class mGapReleaseAnnotateNovelSitesStep extends AbstractCommandPipelineStep<mGapReleaseAnnotateNovelSitesStep.AnnotateNovelSitesWrapper> implements VariantProcessingStep
-{
-    public static final String VERSION_ROWID = "versionRowId";
-    public static final String PRIOR_RELEASE_LABEL = "priorReleaseLabel";
-    public static final String SITES_ONLY_DATA = "sitesOnlyVcfData";
-
-    public mGapReleaseAnnotateNovelSitesStep(PipelineStepProvider<?> provider, PipelineContext ctx)
-    {
-        super(provider, ctx, new AnnotateNovelSitesWrapper(ctx.getLogger()));
-    }
-
-    public static class Provider extends AbstractVariantProcessingStepProvider<mGapReleaseAnnotateNovelSitesStep> implements SupportsScatterGather
-    {
-        public Provider()
-        {
-            super("mGapAnnotateNovelSites", "Annotate Novel Sites Against mGAP Release", "AnnotateNovelSites", "Compare the VCF to the specified mGAP release VCF, producing TSV/VCF reports with site- and genotype-level concordance.", Arrays.asList(
-                    ToolParameterDescriptor.create(VERSION_ROWID, "mGAP Release", "The mGAP release VCF to use for comparison", "ldk-simplelabkeycombo", new JSONObject(){{
-                        put("allowBlank", false);
-                        put("width", 400);
-                        put("schemaName", "mgap");
-                        put("queryName", "variantCatalogReleases");
-                        put("containerPath", "js:Laboratory.Utils.getQueryContainerPath()");
-                        put("displayField", "version");
-                        put("valueField", "rowid");
-                        put("doNotIncludeInTemplates", true);
-                    }}, null),
-                    ToolParameterDescriptor.create("releaseVersion", "mGAP Version", "This string will be used to tag novel variants.", "textfield", new JSONObject(){{
-                        put("allowBlank", false);
-                        put("doNotIncludeInTemplates", true);
-                    }}, null)
-            ), PageFlowUtil.set("sequenceanalysis/field/SequenceOutputFileSelectorField.js"), null);
-        }
-
-        @Override
-        public mGapReleaseAnnotateNovelSitesStep create(PipelineContext ctx)
-        {
-            return new mGapReleaseAnnotateNovelSitesStep(this, ctx);
-        }
-    }
-
-    @Override
-    public Output processVariants(File inputVCF, File outputDirectory, ReferenceGenome genome, @Nullable List<Interval> intervals) throws PipelineJobException
-    {
-        VariantProcessingStepOutputImpl output = new VariantProcessingStepOutputImpl();
-        getPipelineCtx().getLogger().info("Annotating VCF by mGAP Release");
-
-        String releaseVersion = getProvider().getParameterByName("releaseVersion").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class, "0.0");
-        if (releaseVersion.toLowerCase().startsWith("v"))
-        {
-            releaseVersion = releaseVersion.substring(1);
-        }
-
-        if (!NumberUtils.isCreatable(releaseVersion))
-        {
-            throw new IllegalArgumentException("Expected the release version to be numeric: " + releaseVersion);
-        }
-
-        String priorReleaseLabel = getPipelineCtx().getSequenceSupport().getCachedObject(PRIOR_RELEASE_LABEL, String.class);
-        int sitesOnlyExpDataId = getPipelineCtx().getSequenceSupport().getCachedObject(SITES_ONLY_DATA, Integer.class);
-        File sitesOnlyVcf = getPipelineCtx().getSequenceSupport().getCachedData(sitesOnlyExpDataId);
-        if (!sitesOnlyVcf.exists())
-        {
-            throw new PipelineJobException("Unable to find file: " + sitesOnlyVcf);
-        }
-
-        List<String> extraArgs = new ArrayList<>();
-        if (intervals != null)
-        {
-            intervals.forEach(interval -> {
-                extraArgs.add("-L");
-                extraArgs.add(interval.getContig() + ":" + interval.getStart() + "-" + interval.getEnd());
-            });
-
-            extraArgs.add("--ignore-variants-starting-outside-interval");
-        }
-
-        extraArgs.add("-dv");
-        extraArgs.add(priorReleaseLabel);
-
-        File annotatedVCF = new File(outputDirectory, SequenceAnalysisService.get().getUnzippedBaseName(inputVCF.getName()) + ".comparison.vcf.gz");
-        getWrapper().execute(inputVCF, sitesOnlyVcf, genome.getWorkingFastaFile(), releaseVersion, annotatedVCF, extraArgs);
-        if (!annotatedVCF.exists())
-        {
-            throw new PipelineJobException("Unable to find output: " + annotatedVCF.getPath());
-        }
-
-        output.addInput(inputVCF, "Input VCF");
-        output.addInput(sitesOnlyVcf, "Reference VCF");
-
-        output.addOutput(annotatedVCF, "VCF Annotated by mGAP Version");
-        output.setVcf(annotatedVCF);
-
-        return output;
-    }
-
-    @Override
-    public void init(PipelineJob job, SequenceAnalysisJobSupport support, List<SequenceOutputFile> inputFiles) throws PipelineJobException
-    {
-        Integer versionRowId = getProvider().getParameterByName(VERSION_ROWID).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class);
-        String version = new TableSelector(mGAPSchema.getInstance().getSchema().getTable(mGAPSchema.TABLE_VARIANT_CATALOG_RELEASES), PageFlowUtil.set("version"), new SimpleFilter(FieldKey.fromString("rowId"), versionRowId), null).getObject(String.class);
-        if (version == null)
-        {
-            throw new PipelineJobException("Unable to find release for release: " + versionRowId);
-        }
-
-        Integer referenceVcfOutputId = new TableSelector(mGAPSchema.getInstance().getSchema().getTable(mGAPSchema.TABLE_VARIANT_CATALOG_RELEASES), PageFlowUtil.set("sitesOnlyVcfId"), new SimpleFilter(FieldKey.fromString("rowId"), versionRowId), null).getObject(Integer.class);
-        if (referenceVcfOutputId == null)
-        {
-            getPipelineCtx().getLogger().debug("Sites-only VCF not found, using primary VCF");
-            referenceVcfOutputId = new TableSelector(mGAPSchema.getInstance().getSchema().getTable(mGAPSchema.TABLE_VARIANT_CATALOG_RELEASES), PageFlowUtil.set("vcfId"), new SimpleFilter(FieldKey.fromString("rowId"), versionRowId), null).getObject(Integer.class);
-        }
-
-        if (referenceVcfOutputId == null)
-        {
-            throw new PipelineJobException("Unable to find sites-only VCF for release: " + versionRowId);
-        }
-
-        SequenceOutputFile sitesOnly = SequenceOutputFile.getForId(referenceVcfOutputId);
-        if (sitesOnly == null)
-        {
-            throw new PipelineJobException("Unable to find sites-only VCF output file for fileId: " + referenceVcfOutputId);
-        }
-
-        support.cacheExpData(sitesOnly.getExpData());
-
-        support.cacheObject(SITES_ONLY_DATA, sitesOnly.getDataId());
-        support.cacheObject(PRIOR_RELEASE_LABEL, version);
-    }
-
-    public static class AnnotateNovelSitesWrapper extends AbstractDiscvrSeqWrapper
-    {
-        public AnnotateNovelSitesWrapper(Logger log)
-        {
-            super(log);
-        }
-
-        public File execute(File vcf, File referenceVcf, File fasta, String versionString, File vcfOutput, List<String> extraArgs) throws PipelineJobException
-        {
-            List<String> args = new ArrayList<>(getBaseArgs());
-            args.add("AnnotateNovelSites");
-            args.add("-R");
-            args.add(fasta.getPath());
-
-            args.add("-V");
-            args.add(vcf.getPath());
-            args.add("-rv");
-            args.add(referenceVcf.getPath());
-
-            args.add("-an");
-            args.add("mGAPV");
-            args.add("-ad");
-            args.add("The first mGAP version where variants at this site appeared");
-            args.add("-av");
-            args.add(versionString);
-
-            args.add("-O");
-            args.add(vcfOutput.getPath());
-
-            if (extraArgs != null)
-            {
-                args.addAll(extraArgs);
-            }
-
-            execute(args);
-
-            return vcfOutput;
-        }
-    }
-}
diff --git a/mGAP/src/org/labkey/mgap/query/mGAPUserSchema.java b/mGAP/src/org/labkey/mgap/query/mGAPUserSchema.java
index f960fa01a..cc947f79f 100644
--- a/mGAP/src/org/labkey/mgap/query/mGAPUserSchema.java
+++ b/mGAP/src/org/labkey/mgap/query/mGAPUserSchema.java
@@ -76,7 +76,20 @@ else if (mGAPSchema.TABLE_RELEASE_TRACKS.equalsIgnoreCase(name))
 
     private TableInfo createWrappedVariantTable(String name, TableInfo sourceTable, ContainerFilter cf)
     {
-        return super.createWrappedTable(name, sourceTable, cf);
+        AbstractTableInfo ati = (AbstractTableInfo)super.createWrappedTable(name, sourceTable, cf);
+
+        String fieldName = "versionAndSpecies";
+        if (ati.getColumn(fieldName) == null)
+        {
+            SQLFragment sql = new SQLFragment("(" + ati.getSqlDialect().concatenate(ExprColumn.STR_TABLE_ALIAS + ".species", "': '", ExprColumn.STR_TABLE_ALIAS + ".version") + ")");
+            ExprColumn col = new ExprColumn(ati, fieldName, sql, JdbcType.VARCHAR, ati.getColumn("version"), ati.getColumn("species"));
+            col.setLabel("Version and Species");
+            col.setFacetingBehaviorType(FacetingBehaviorType.ALWAYS_OFF);
+            col.setDescription("This column shows the version and species");
+            ati.addColumn(col);
+        }
+
+        return ati;
     }
 
     private TableInfo customizeReleaseTracks(String name, TableInfo sourceTable, ContainerFilter cf)

From 93406cdd9e2ca7cf0491ee6bd224b31d7f379dfa Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Sat, 2 Nov 2024 09:52:04 -0700
Subject: [PATCH 04/20] Test fix

---
 .../mgap/pipeline/AnnotateNovelSitesWrapper.java  | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/mGAP/src/org/labkey/mgap/pipeline/AnnotateNovelSitesWrapper.java b/mGAP/src/org/labkey/mgap/pipeline/AnnotateNovelSitesWrapper.java
index 3352ce2fd..97bcc2495 100644
--- a/mGAP/src/org/labkey/mgap/pipeline/AnnotateNovelSitesWrapper.java
+++ b/mGAP/src/org/labkey/mgap/pipeline/AnnotateNovelSitesWrapper.java
@@ -1,6 +1,7 @@
 package org.labkey.mgap.pipeline;
 
 import org.apache.logging.log4j.Logger;
+import org.jetbrains.annotations.Nullable;
 import org.labkey.api.pipeline.PipelineJobException;
 import org.labkey.api.sequenceanalysis.run.AbstractDiscvrSeqWrapper;
 
@@ -15,7 +16,7 @@ public AnnotateNovelSitesWrapper(Logger log)
         super(log);
     }
 
-    public File execute(File vcf, File referenceVcf, File fasta, String versionString, File vcfOutput, List<String> extraArgs) throws PipelineJobException
+    public File execute(File vcf, @Nullable File referenceVcf, File fasta, String versionString, File vcfOutput, List<String> extraArgs) throws PipelineJobException
     {
         List<String> args = new ArrayList<>(getBaseArgs());
         args.add("AnnotateNovelSites");
@@ -24,8 +25,16 @@ public File execute(File vcf, File referenceVcf, File fasta, String versionStrin
 
         args.add("-V");
         args.add(vcf.getPath());
-        args.add("-rv");
-        args.add(referenceVcf.getPath());
+
+        if (referenceVcf == null)
+        {
+            args.add("-rv");
+            args.add(referenceVcf.getPath());
+        }
+        else
+        {
+            args.add("--allow-missing-ref");
+        }
 
         args.add("-an");
         args.add("mGAPV");

From a63899e0cc5d7c214c5ae48c658821825534696c Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Sun, 3 Nov 2024 06:58:27 -0800
Subject: [PATCH 05/20] Refactor GenerateMgapTracksStep to allow multiple
 species

---
 .../mgap/pipeline/GenerateMgapTracksStep.java | 315 +++++++++++++-----
 1 file changed, 225 insertions(+), 90 deletions(-)

diff --git a/mGAP/src/org/labkey/mgap/pipeline/GenerateMgapTracksStep.java b/mGAP/src/org/labkey/mgap/pipeline/GenerateMgapTracksStep.java
index df573c6ec..b90606960 100644
--- a/mGAP/src/org/labkey/mgap/pipeline/GenerateMgapTracksStep.java
+++ b/mGAP/src/org/labkey/mgap/pipeline/GenerateMgapTracksStep.java
@@ -6,6 +6,7 @@
 import htsjdk.variant.vcf.VCFFileReader;
 import htsjdk.variant.vcf.VCFHeader;
 import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.lang3.math.NumberUtils;
 import org.apache.logging.log4j.Logger;
 import org.jetbrains.annotations.Nullable;
 import org.json.JSONObject;
@@ -29,7 +30,6 @@
 import org.labkey.api.sequenceanalysis.pipeline.AbstractPipelineStep;
 import org.labkey.api.sequenceanalysis.pipeline.AbstractVariantProcessingStepProvider;
 import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
-import org.labkey.api.sequenceanalysis.pipeline.PipelineStep;
 import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider;
 import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
 import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
@@ -62,6 +62,9 @@
 public class GenerateMgapTracksStep extends AbstractPipelineStep implements VariantProcessingStep, VariantProcessingStep.SupportsScatterGather
 {
     public static final String TRACK_CATEGORY = "mGAP Release Track";
+    public static final String VERSION_ROWID = "versionRowId";
+    public static final String PRIOR_RELEASE_LABEL = "priorReleaseLabel";
+    public static final String SITES_ONLY_DATA = "sitesOnlyVcfData";
 
     // 1) makes the subset VCF per track with those IDs,
     // 2) dies if it cannot find any of the IDs being requested,
@@ -78,11 +81,31 @@ public static class Provider extends AbstractVariantProcessingStepProvider<Gener
         public Provider()
         {
             super("GenerateMgapTracksStep", "Generate mGAP Tracks", "GenerateMgapTracksStep", "This will use the set of sample IDs from the table mgap.releaseTrackSubsets to subset the input VCF and produce one VCF per track. It will perform basic validation and also update mgap.releaseTracks.", Arrays.asList(
+                    ToolParameterDescriptor.create("species", "Version", "The species, which is used to filter tracks", "ldk-simplelabkeycombo", new JSONObject(){{
+                        put("allowBlank", false);
+                        put("doNotIncludeInTemplates", true);
+                        put("width", 400);
+                        put("schemaName", "laboratory");
+                        put("queryName", "species");
+                        put("containerPath", "js:Laboratory.Utils.getQueryContainerPath()");
+                        put("displayField", "common_name");
+                        put("valueField", "common_name");
+                    }}, null),
                     ToolParameterDescriptor.create("releaseVersion", "mGAP Version", "This is the string that was used to annotate novel variants.", "textfield", new JSONObject(){{
                         put("allowBlank", false);
                         put("doNotIncludeInTemplates", true);
+                    }}, null),
+                    ToolParameterDescriptor.create(VERSION_ROWID, "Prior mGAP Release", "The mGAP release VCF to use for comparison", "ldk-simplelabkeycombo", new JSONObject(){{
+                        put("allowBlank", true); // this allows species without a prior release
+                        put("width", 400);
+                        put("schemaName", "mgap");
+                        put("queryName", "variantCatalogReleases");
+                        put("containerPath", "js:Laboratory.Utils.getQueryContainerPath()");
+                        put("displayField", "versionAndSpecies");
+                        put("valueField", "rowid");
+                        put("doNotIncludeInTemplates", true);
                     }}, null)
-            ), null, null);
+            ), PageFlowUtil.set("sequenceanalysis/field/SequenceOutputFileSelectorField.js"), null);
         }
 
         @Override
@@ -100,16 +123,41 @@ public void init(PipelineJob job, SequenceAnalysisJobSupport support, List<Seque
             throw new PipelineJobException("This step expects to have a single VCF input");
         }
 
+        String species = getProvider().getParameterByName("species").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class);
         SequenceOutputFile so = inputFiles.get(0);
 
+        // Check how many tracks we expect:
+        TableInfo existingTracks = QueryService.get().getUserSchema(getPipelineCtx().getJob().getUser(), (getPipelineCtx().getJob().getContainer().isWorkbook() ? getPipelineCtx().getJob().getContainer().getParent() : getPipelineCtx().getJob().getContainer()), mGAPSchema.NAME).getTable(mGAPSchema.TABLE_RELEASE_TRACKS);
+
+        Set<String> primaryTrackNames = new HashSet<>();
+        Map<String, Set<String>> trackToSubject = new HashMap<>();
+        new TableSelector(existingTracks, PageFlowUtil.set("trackName", "isprimarytrack"), new SimpleFilter(FieldKey.fromString("species"), species), null).forEachResults(rs -> {
+            if (trackToSubject.containsKey(rs.getString(FieldKey.fromString("trackName"))))
+            {
+                throw new IllegalStateException("Duplicate track names present: " + rs.getString(FieldKey.fromString("trackName")));
+            }
+
+            trackToSubject.put(rs.getString(FieldKey.fromString("trackName")), new HashSet<>());
+
+            if (rs.getObject(FieldKey.fromString("isprimarytrack")) != null & rs.getBoolean(FieldKey.fromString("isprimarytrack")))
+            {
+                primaryTrackNames.add(rs.getString(FieldKey.fromString("trackName")));
+            }
+        });
+
+        if (primaryTrackNames.size() != 1)
+        {
+            throw new IllegalStateException("Expected single primary track, found: " + primaryTrackNames.size());
+        }
+
         // Verify all IDs in header are mGAP aliases. This map is the true ID to mGAP alias
         Map<String, String> sampleIdToMgapAlias = getSampleToAlias(so.getFile());
 
         // Now read track list, validate IDs present, and write to file:
         TableInfo ti = QueryService.get().getUserSchema(getPipelineCtx().getJob().getUser(), (getPipelineCtx().getJob().getContainer().isWorkbook() ? getPipelineCtx().getJob().getContainer().getParent() : getPipelineCtx().getJob().getContainer()), mGAPSchema.NAME).getTable(mGAPSchema.TABLE_RELEASE_TRACK_SUBSETS);
-        TableSelector ts = new TableSelector(ti, PageFlowUtil.set("trackName", "subjectId"));
+        TableSelector ts = new TableSelector(ti, PageFlowUtil.set("trackName", "subjectId"), new SimpleFilter(FieldKey.fromString("trackName"), trackToSubject.keySet(), CompareType.IN), null);
         Set<String> requestedNotInVcf = new HashSet<>();
-        Map<String, Set<String>> trackToSubject = new HashMap<>();
+
         ts.forEachResults(rs -> {
             if (!trackToSubject.containsKey(rs.getString(FieldKey.fromString("trackName"))))
             {
@@ -138,6 +186,11 @@ public void init(PipelineJob job, SequenceAnalysisJobSupport support, List<Seque
             for (String trackName : trackToSubject.keySet())
             {
                 getPipelineCtx().getLogger().info(trackToSubject + ": " + trackToSubject.get(trackName).size());
+                if (trackToSubject.get(trackName).isEmpty())
+                {
+                    continue;
+                }
+
                 trackToSubject.get(trackName).forEach(x -> {
                     writer.writeNext(new String[]{trackName, x});
                 });
@@ -147,67 +200,153 @@ public void init(PipelineJob job, SequenceAnalysisJobSupport support, List<Seque
         {
             throw new PipelineJobException(e);
         }
+
+        getPipelineCtx().getSequenceSupport().cacheObject("primaryTrackName", primaryTrackNames.iterator().next());
+
+        // Prepare to annotate novel sites:
+        Integer versionRowId = getProvider().getParameterByName(VERSION_ROWID).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class);
+        String version = new TableSelector(mGAPSchema.getInstance().getSchema().getTable(mGAPSchema.TABLE_VARIANT_CATALOG_RELEASES), PageFlowUtil.set("version"), new SimpleFilter(FieldKey.fromString("rowId"), versionRowId), null).getObject(String.class);
+        if (version == null)
+        {
+            throw new PipelineJobException("Unable to find release for release: " + versionRowId);
+        }
+
+        Integer referenceVcfOutputId = new TableSelector(mGAPSchema.getInstance().getSchema().getTable(mGAPSchema.TABLE_VARIANT_CATALOG_RELEASES), PageFlowUtil.set("sitesOnlyVcfId"), new SimpleFilter(FieldKey.fromString("rowId"), versionRowId), null).getObject(Integer.class);
+        if (referenceVcfOutputId == null)
+        {
+            getPipelineCtx().getLogger().debug("Sites-only VCF not found, using primary VCF");
+            referenceVcfOutputId = new TableSelector(mGAPSchema.getInstance().getSchema().getTable(mGAPSchema.TABLE_VARIANT_CATALOG_RELEASES), PageFlowUtil.set("vcfId"), new SimpleFilter(FieldKey.fromString("rowId"), versionRowId), null).getObject(Integer.class);
+        }
+
+        if (referenceVcfOutputId == null)
+        {
+            throw new PipelineJobException("Unable to find sites-only VCF for release: " + versionRowId);
+        }
+
+        SequenceOutputFile sitesOnly = SequenceOutputFile.getForId(referenceVcfOutputId);
+        if (sitesOnly == null)
+        {
+            throw new PipelineJobException("Unable to find sites-only VCF output file for fileId: " + referenceVcfOutputId);
+        }
+
+        support.cacheExpData(sitesOnly.getExpData());
+
+        support.cacheObject(SITES_ONLY_DATA, sitesOnly.getDataId());
+        support.cacheObject(PRIOR_RELEASE_LABEL, version);
     }
 
-    private File getNovelSitesOutput(File outputDirectory)
+    private @Nullable File getAnnotationReferenceVcf() throws PipelineJobException
     {
-        String releaseVersion = getProvider().getParameterByName("releaseVersion").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class);
-        return new File(outputDirectory, "mGAP_v" + releaseVersion + "_NovelSites.vcf.gz");
+        File refVcf = null;
+        Integer sitesOnlyExpDataId = getPipelineCtx().getSequenceSupport().getCachedObject(SITES_ONLY_DATA, Integer.class);
+        if (sitesOnlyExpDataId == null)
+        {
+            refVcf = getPipelineCtx().getSequenceSupport().getCachedData(sitesOnlyExpDataId);
+            if (!refVcf.exists())
+            {
+                throw new PipelineJobException("Unable to find file: " + refVcf);
+            }
+        }
+
+        return refVcf;
     }
 
-    @Override
-    public Output processVariants(File inputVCF, File outputDirectory, ReferenceGenome genome, @Nullable List<Interval> intervals) throws PipelineJobException
+    private File annotateNovelSites(File inputVCF, File outputDirectory, ReferenceGenome genome, @Nullable List<Interval> intervals) throws PipelineJobException
     {
-        VariantProcessingStepOutputImpl output = new VariantProcessingStepOutputImpl();
-        Map<String, List<String>> trackToSamples = parseSampleMap(getSampleNameFile(getPipelineCtx().getSourceDirectory(true)));
+        String releaseVersion = getProvider().getParameterByName("releaseVersion").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class, "0.0");
+        if (releaseVersion.toLowerCase().startsWith("v"))
+        {
+            releaseVersion = releaseVersion.substring(1);
+        }
 
-        VCFHeader header;
-        try (VCFFileReader reader = new VCFFileReader(inputVCF))
+        if (!NumberUtils.isCreatable(releaseVersion))
         {
-            header = reader.getFileHeader();
+            throw new IllegalArgumentException("Expected the release version to be numeric: " + releaseVersion);
         }
 
-        if (!header.hasInfoLine("mGAPV"))
+        String priorReleaseLabel = getPipelineCtx().getSequenceSupport().getCachedObject(PRIOR_RELEASE_LABEL, String.class);
+        File sitesOnlyVcf = getAnnotationReferenceVcf();
+
+        List<String> extraArgs = new ArrayList<>();
+        if (intervals != null)
         {
-            throw new IllegalStateException("VCF is missing the annotation: mGAPV");
+            intervals.forEach(interval -> {
+                extraArgs.add("-L");
+                extraArgs.add(interval.getContig() + ":" + interval.getStart() + "-" + interval.getEnd());
+            });
+
+            extraArgs.add("--ignore-variants-starting-outside-interval");
         }
 
-        processTracks(output, inputVCF, trackToSamples, outputDirectory, genome, intervals);
+        extraArgs.add("-dv");
+        extraArgs.add(priorReleaseLabel);
 
-        // Also create the Novel Sites track:
-        String releaseVersion = getProvider().getParameterByName("releaseVersion").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class);
-        File novelSitesOutput = getNovelSitesOutput(outputDirectory);
-        if (new File(novelSitesOutput.getPath() + ".tbi").exists())
+        if (sitesOnlyVcf != null)
         {
-            getPipelineCtx().getLogger().debug("Index exists, will not remake novel sites VCF");
+            extraArgs.add("-ns");
+            extraArgs.add(getNovelSitesOutput(outputDirectory).getPath());
+        }
+
+        File annotatedVCF = new File(outputDirectory, SequenceAnalysisService.get().getUnzippedBaseName(inputVCF.getName()) + ".comparison.vcf.gz");
+        if (new File(annotatedVCF.getPath() + ".tbi").exists())
+        {
+            getPipelineCtx().getLogger().debug("Index exists, will not remake annotated sites VCF");
         }
         else
         {
-            getPipelineCtx().getJob().setStatus(PipelineJob.TaskStatus.running, "Processing novel sites track");
-
-            SelectVariantsWrapper sv = new SelectVariantsWrapper(getPipelineCtx().getLogger());
-            List<String> svArgs = new ArrayList<>();
-            svArgs.add("-select");
-            svArgs.add("mGAPV == '" + releaseVersion + "'");
-            if (intervals != null)
+            new AnnotateNovelSitesWrapper(getPipelineCtx().getLogger()).execute(inputVCF, sitesOnlyVcf, genome.getWorkingFastaFile(), releaseVersion, annotatedVCF, extraArgs);
+            if (!annotatedVCF.exists())
             {
-                intervals.forEach(interval -> {
-                    svArgs.add("-L");
-                    svArgs.add(interval.getContig() + ":" + interval.getStart() + "-" + interval.getEnd());
-                });
+                throw new PipelineJobException("Unable to find output: " + annotatedVCF.getPath());
             }
+        }
+
+        return annotatedVCF;
+    }
+
+    private File getNovelSitesOutput(File outputDirectory)
+    {
+        String releaseVersion = getProvider().getParameterByName("releaseVersion").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class);
+        String species = getProvider().getParameterByName("species").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class);
 
-            sv.execute(genome.getWorkingFastaFile(), inputVCF, novelSitesOutput, svArgs);
+        return new File(outputDirectory, "mGAP_v" + releaseVersion + "_" + species.replaceAll(" ", "_") + "_NovelSites.vcf.gz");
+    }
+
+    @Override
+    public Output processVariants(File inputVCF, File outputDirectory, ReferenceGenome genome, @Nullable List<Interval> intervals) throws PipelineJobException
+    {
+        VariantProcessingStepOutputImpl output = new VariantProcessingStepOutputImpl();
+        Map<String, List<String>> trackToSamples = parseSampleMap(getSampleNameFile(getPipelineCtx().getSourceDirectory(true)));
+
+        String primaryTrackName = getPipelineCtx().getSequenceSupport().getCachedObject("primaryTrackName", String.class);
+        Map<String, File> tracks = processTracks(output, inputVCF, trackToSamples, outputDirectory, genome, intervals);
+
+        File primaryTrackFile = tracks.get(primaryTrackName);
+        if (primaryTrackFile == null)
+        {
+            throw new PipelineJobException("Missing primary track");
         }
 
-        getPipelineCtx().getJob().getLogger().info("total variants: " + SequenceAnalysisService.get().getVCFLineCount(novelSitesOutput, getPipelineCtx().getJob().getLogger(), false));
+        File primaryTrackAnnotated = annotateNovelSites(primaryTrackFile, outputDirectory, genome, intervals);
+        output.addIntermediateFile(primaryTrackAnnotated);
+
+        if (getAnnotationReferenceVcf() != null)
+        {
+            File novelSitesOutput = getNovelSitesOutput(outputDirectory);
+            if (!novelSitesOutput.exists())
+            {
+                throw new PipelineJobException("Missing file: " + novelSitesOutput.getPath());
+            }
+
+            getPipelineCtx().getJob().getLogger().info("total novel variants in release: " + SequenceAnalysisService.get().getVCFLineCount(novelSitesOutput, getPipelineCtx().getJob().getLogger(), false));
+        }
 
         return output;
     }
 
     private File getOutputVcf(String trackName, File outputDirectory)
     {
-        return new File(outputDirectory, FileUtil.makeLegalName(trackName) + ".vcf.gz");
+        return new File(outputDirectory, FileUtil.makeLegalName(trackName).replaceAll(" ", "_") + ".vcf.gz");
     }
 
     @Override
@@ -221,37 +360,33 @@ public void complete(PipelineJob job, List<SequenceOutputFile> inputs, List<Sequ
                 continue;
             }
 
-            createOrUpdateTrack(so, job);
+            createOrUpdateTrack(so, job, so.getName());
         }
-
-        createOrUpdatePrimaryTrack(inputs.get(0), job);
-    }
-
-    private void createOrUpdatePrimaryTrack(SequenceOutputFile so, PipelineJob job) throws PipelineJobException
-    {
-        createOrUpdateTrack(so, job, "mGAP Release", true);
     }
 
-    private void createOrUpdateTrack(SequenceOutputFile so, PipelineJob job) throws PipelineJobException
+    private void createOrUpdateTrack(SequenceOutputFile so, PipelineJob job, String trackName) throws PipelineJobException
     {
-        createOrUpdateTrack(so, job, so.getName(), false);
-    }
+        String primaryTrackName = getPipelineCtx().getSequenceSupport().getCachedObject("primaryTrackName", String.class);
+        if (primaryTrackName == null)
+        {
+            throw new PipelineJobException("Missing cached primary track");
+        }
 
-    private void createOrUpdateTrack(SequenceOutputFile so, PipelineJob job, String trackName, boolean isPrimaryTrack) throws PipelineJobException
-    {
         try
         {
+            String species = getProvider().getParameterByName("species").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class);
             Container targetContainer = job.getContainer().isWorkbook() ? job.getContainer().getParent() : job.getContainer();
             TableInfo releaseTracks = QueryService.get().getUserSchema(job.getUser(), targetContainer, mGAPSchema.NAME).getTable(mGAPSchema.TABLE_RELEASE_TRACKS);
-            TableSelector ts = new TableSelector(releaseTracks, PageFlowUtil.set("rowid"), new SimpleFilter(FieldKey.fromString("trackName"), trackName), null);
+            TableSelector ts = new TableSelector(releaseTracks, PageFlowUtil.set("rowid"), new SimpleFilter(FieldKey.fromString("trackName"), trackName).addCondition(FieldKey.fromString("species"), species), null);
             if (!ts.exists())
             {
                 job.getLogger().debug("Creating new track: " + trackName + " / " + so.getName());
                 Map<String, Object> newRow = new CaseInsensitiveHashMap<>();
                 newRow.put("trackName", trackName);
                 newRow.put("label", trackName);
+                newRow.put("species", species);
                 newRow.put("vcfId", so.getRowid());
-                newRow.put("isprimarytrack", isPrimaryTrack);
+                newRow.put("isprimarytrack", primaryTrackName.equals(trackName));
 
                 BatchValidationException bve = new BatchValidationException();
                 releaseTracks.getUpdateService().insertRows(job.getUser(), targetContainer, Arrays.asList(newRow), bve, null, null);
@@ -281,11 +416,6 @@ private void createOrUpdateTrack(SequenceOutputFile so, PipelineJob job, String
         }
     }
 
-    private boolean indexExists(File vcf)
-    {
-        return new File(vcf.getPath() + ".tbi").exists();
-    }
-
     private File getSampleNameFile(File outputDir)
     {
         return new File(outputDir, "sampleMapping.txt");
@@ -443,6 +573,7 @@ public void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, Pi
 
                 return f;
             }).toList();
+
             job.getLogger().debug("Total VCFs to merge: " + toConcat.size());
             if (toConcat.isEmpty())
             {
@@ -470,45 +601,49 @@ public void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, Pi
             manager.addSequenceOutput(so);
         }
 
-        job.getLogger().info("Merging novel sites VCF");
-        List<File> toConcat = orderedJobDirs.stream().map(dirName -> {
-            File f = getNovelSitesOutput(new File(ctx.getSourceDirectory(), dirName));
-            if (!f.exists())
-            {
-                throw new IllegalStateException("Missing file: " + f.getPath());
-            }
+        if (getAnnotationReferenceVcf() != null)
+        {
+            job.getLogger().info("Merging novel sites VCF");
+            List<File> toConcat = orderedJobDirs.stream().map(dirName -> {
+                File f = getNovelSitesOutput(new File(ctx.getSourceDirectory(), dirName));
+                if (!f.exists())
+                {
+                    throw new IllegalStateException("Missing file: " + f.getPath());
+                }
 
-            ctx.getFileManager().addIntermediateFile(f);
-            ctx.getFileManager().addIntermediateFile(new File(f.getPath() + ".tbi"));
+                ctx.getFileManager().addIntermediateFile(f);
+                ctx.getFileManager().addIntermediateFile(new File(f.getPath() + ".tbi"));
 
-            return f;
-        }).toList();
+                return f;
+            }).toList();
 
-        if (toConcat.isEmpty())
-        {
-            throw new PipelineJobException("No novel sites VCFs found");
-        }
+            if (toConcat.isEmpty())
+            {
+                throw new PipelineJobException("No novel sites VCFs found");
+            }
 
-        String basename = SequenceAnalysisService.get().getUnzippedBaseName(toConcat.get(0).getName());
-        File combined = new File(ctx.getSourceDirectory(), basename + ".vcf.gz");
-        File combinedIdx = new File(combined.getPath() + ".tbi");
-        if (combinedIdx.exists())
-        {
-            job.getLogger().info("VCF exists, will not recreate: " + combined.getPath());
-        }
-        else
-        {
-            combined = SequenceAnalysisService.get().combineVcfs(toConcat, combined, genome, job.getLogger(), true, null);
-        }
+            String basename = SequenceAnalysisService.get().getUnzippedBaseName(toConcat.get(0).getName());
+            File combined = new File(ctx.getSourceDirectory(), basename + ".vcf.gz");
+            File combinedIdx = new File(combined.getPath() + ".tbi");
+            if (combinedIdx.exists())
+            {
+                job.getLogger().info("VCF exists, will not recreate: " + combined.getPath());
+            }
+            else
+            {
+                combined = SequenceAnalysisService.get().combineVcfs(toConcat, combined, genome, job.getLogger(), true, null);
+            }
 
-        SequenceOutputFile so = new SequenceOutputFile();
-        so.setName("Novel Sites in This Release");
-        so.setFile(combined);
-        so.setCategory(TRACK_CATEGORY);
-        so.setLibrary_id(genome.getGenomeId());
-        String releaseVersion = getProvider().getParameterByName("releaseVersion").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class);
-        so.setDescription("These are novel sites in mGAP v" + releaseVersion);
-        manager.addSequenceOutput(so);
+            String releaseVersion = getProvider().getParameterByName("releaseVersion").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class);
+            String species = getProvider().getParameterByName("species").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class);
+            SequenceOutputFile so = new SequenceOutputFile();
+            so.setName(species + ": Novel Sites in Release " + releaseVersion);
+            so.setFile(combined);
+            so.setCategory(TRACK_CATEGORY);
+            so.setLibrary_id(genome.getGenomeId());
+            so.setDescription("These are novel sites in mGAP v" + releaseVersion + " for " + species);
+            manager.addSequenceOutput(so);
+        }
     }
 
     public static class SplitVcfBySamplesWrapper extends AbstractDiscvrSeqWrapper

From 3d119555ab690db4f08b8f36c2941947a84200a4 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Sun, 3 Nov 2024 11:44:36 -0800
Subject: [PATCH 06/20] Allow GenerateMgapTracksStep to have missing prior
 release

---
 .../mgap/pipeline/GenerateMgapTracksStep.java | 57 +++++++++++--------
 1 file changed, 34 insertions(+), 23 deletions(-)

diff --git a/mGAP/src/org/labkey/mgap/pipeline/GenerateMgapTracksStep.java b/mGAP/src/org/labkey/mgap/pipeline/GenerateMgapTracksStep.java
index b90606960..abaa16c43 100644
--- a/mGAP/src/org/labkey/mgap/pipeline/GenerateMgapTracksStep.java
+++ b/mGAP/src/org/labkey/mgap/pipeline/GenerateMgapTracksStep.java
@@ -81,7 +81,7 @@ public static class Provider extends AbstractVariantProcessingStepProvider<Gener
         public Provider()
         {
             super("GenerateMgapTracksStep", "Generate mGAP Tracks", "GenerateMgapTracksStep", "This will use the set of sample IDs from the table mgap.releaseTrackSubsets to subset the input VCF and produce one VCF per track. It will perform basic validation and also update mgap.releaseTracks.", Arrays.asList(
-                    ToolParameterDescriptor.create("species", "Version", "The species, which is used to filter tracks", "ldk-simplelabkeycombo", new JSONObject(){{
+                    ToolParameterDescriptor.create("species", "Species", "The species, which is used to filter tracks", "ldk-simplelabkeycombo", new JSONObject(){{
                         put("allowBlank", false);
                         put("doNotIncludeInTemplates", true);
                         put("width", 400);
@@ -205,33 +205,41 @@ public void init(PipelineJob job, SequenceAnalysisJobSupport support, List<Seque
 
         // Prepare to annotate novel sites:
         Integer versionRowId = getProvider().getParameterByName(VERSION_ROWID).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class);
-        String version = new TableSelector(mGAPSchema.getInstance().getSchema().getTable(mGAPSchema.TABLE_VARIANT_CATALOG_RELEASES), PageFlowUtil.set("version"), new SimpleFilter(FieldKey.fromString("rowId"), versionRowId), null).getObject(String.class);
-        if (version == null)
+        String version = null;
+        if (versionRowId != null)
         {
-            throw new PipelineJobException("Unable to find release for release: " + versionRowId);
-        }
+            version = new TableSelector(mGAPSchema.getInstance().getSchema().getTable(mGAPSchema.TABLE_VARIANT_CATALOG_RELEASES), PageFlowUtil.set("version"), new SimpleFilter(FieldKey.fromString("rowId"), versionRowId), null).getObject(String.class);
+            if (version == null)
+            {
+                throw new PipelineJobException("Unable to find release for release: " + versionRowId);
+            }
 
-        Integer referenceVcfOutputId = new TableSelector(mGAPSchema.getInstance().getSchema().getTable(mGAPSchema.TABLE_VARIANT_CATALOG_RELEASES), PageFlowUtil.set("sitesOnlyVcfId"), new SimpleFilter(FieldKey.fromString("rowId"), versionRowId), null).getObject(Integer.class);
-        if (referenceVcfOutputId == null)
-        {
-            getPipelineCtx().getLogger().debug("Sites-only VCF not found, using primary VCF");
-            referenceVcfOutputId = new TableSelector(mGAPSchema.getInstance().getSchema().getTable(mGAPSchema.TABLE_VARIANT_CATALOG_RELEASES), PageFlowUtil.set("vcfId"), new SimpleFilter(FieldKey.fromString("rowId"), versionRowId), null).getObject(Integer.class);
-        }
+            Integer referenceVcfOutputId = new TableSelector(mGAPSchema.getInstance().getSchema().getTable(mGAPSchema.TABLE_VARIANT_CATALOG_RELEASES), PageFlowUtil.set("sitesOnlyVcfId"), new SimpleFilter(FieldKey.fromString("rowId"), versionRowId), null).getObject(Integer.class);
+            if (referenceVcfOutputId == null)
+            {
+                getPipelineCtx().getLogger().debug("Sites-only VCF not found, using primary VCF");
+                referenceVcfOutputId = new TableSelector(mGAPSchema.getInstance().getSchema().getTable(mGAPSchema.TABLE_VARIANT_CATALOG_RELEASES), PageFlowUtil.set("vcfId"), new SimpleFilter(FieldKey.fromString("rowId"), versionRowId), null).getObject(Integer.class);
+            }
 
-        if (referenceVcfOutputId == null)
-        {
-            throw new PipelineJobException("Unable to find sites-only VCF for release: " + versionRowId);
-        }
+            if (referenceVcfOutputId == null)
+            {
+                throw new PipelineJobException("Unable to find sites-only VCF for release: " + versionRowId);
+            }
 
-        SequenceOutputFile sitesOnly = SequenceOutputFile.getForId(referenceVcfOutputId);
-        if (sitesOnly == null)
+            SequenceOutputFile sitesOnly = SequenceOutputFile.getForId(referenceVcfOutputId);
+            if (sitesOnly == null)
+            {
+                throw new PipelineJobException("Unable to find sites-only VCF output file for fileId: " + referenceVcfOutputId);
+            }
+
+            support.cacheExpData(sitesOnly.getExpData());
+            support.cacheObject(SITES_ONLY_DATA, sitesOnly.getDataId());
+        }
+        else
         {
-            throw new PipelineJobException("Unable to find sites-only VCF output file for fileId: " + referenceVcfOutputId);
+            support.cacheObject(SITES_ONLY_DATA, null);
         }
 
-        support.cacheExpData(sitesOnly.getExpData());
-
-        support.cacheObject(SITES_ONLY_DATA, sitesOnly.getDataId());
         support.cacheObject(PRIOR_RELEASE_LABEL, version);
     }
 
@@ -278,8 +286,11 @@ private File annotateNovelSites(File inputVCF, File outputDirectory, ReferenceGe
             extraArgs.add("--ignore-variants-starting-outside-interval");
         }
 
-        extraArgs.add("-dv");
-        extraArgs.add(priorReleaseLabel);
+        if (priorReleaseLabel != null)
+        {
+            extraArgs.add("-dv");
+            extraArgs.add(priorReleaseLabel);
+        }
 
         if (sitesOnlyVcf != null)
         {

From ce20f5ad6c088f7ce198f53cfe22f26b9e385f32 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Sun, 3 Nov 2024 14:26:17 -0800
Subject: [PATCH 07/20] Correct typo

---
 mGAP/src/org/labkey/mgap/pipeline/GenerateMgapTracksStep.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mGAP/src/org/labkey/mgap/pipeline/GenerateMgapTracksStep.java b/mGAP/src/org/labkey/mgap/pipeline/GenerateMgapTracksStep.java
index abaa16c43..484b9c00e 100644
--- a/mGAP/src/org/labkey/mgap/pipeline/GenerateMgapTracksStep.java
+++ b/mGAP/src/org/labkey/mgap/pipeline/GenerateMgapTracksStep.java
@@ -247,7 +247,7 @@ public void init(PipelineJob job, SequenceAnalysisJobSupport support, List<Seque
     {
         File refVcf = null;
         Integer sitesOnlyExpDataId = getPipelineCtx().getSequenceSupport().getCachedObject(SITES_ONLY_DATA, Integer.class);
-        if (sitesOnlyExpDataId == null)
+        if (sitesOnlyExpDataId != null)
         {
             refVcf = getPipelineCtx().getSequenceSupport().getCachedData(sitesOnlyExpDataId);
             if (!refVcf.exists())

From f738fef91f3380654f626c5f914d1a23a27d7a1b Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Sun, 3 Nov 2024 16:08:04 -0800
Subject: [PATCH 08/20] Correct typo

---
 .../src/org/labkey/mgap/pipeline/AnnotateNovelSitesWrapper.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mGAP/src/org/labkey/mgap/pipeline/AnnotateNovelSitesWrapper.java b/mGAP/src/org/labkey/mgap/pipeline/AnnotateNovelSitesWrapper.java
index 97bcc2495..3fef55a99 100644
--- a/mGAP/src/org/labkey/mgap/pipeline/AnnotateNovelSitesWrapper.java
+++ b/mGAP/src/org/labkey/mgap/pipeline/AnnotateNovelSitesWrapper.java
@@ -26,7 +26,7 @@ public File execute(File vcf, @Nullable File referenceVcf, File fasta, String ve
         args.add("-V");
         args.add(vcf.getPath());
 
-        if (referenceVcf == null)
+        if (referenceVcf != null)
         {
             args.add("-rv");
             args.add(referenceVcf.getPath());

From 778645146ecd2c8d6198a96635fe3717073fcdfb Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Mon, 4 Nov 2024 10:16:08 -0800
Subject: [PATCH 09/20] Add case-sensitive ID check

---
 .../queries/mGAP/sampleSummary.query.xml      |  7 ++++
 mGAP/resources/queries/mGAP/sampleSummary.sql |  3 +-
 .../queries/mGAP/sampleSummary/.qview.xml     |  5 +++
 mGAP/resources/views/mgapDataDashboard.html   |  7 ++++
 .../mgap/query/SampleSummaryCustomizer.java   | 42 +++++++++++++++++++
 5 files changed, 63 insertions(+), 1 deletion(-)
 create mode 100644 mGAP/resources/queries/mGAP/sampleSummary/.qview.xml
 create mode 100644 mGAP/src/org/labkey/mgap/query/SampleSummaryCustomizer.java

diff --git a/mGAP/resources/queries/mGAP/sampleSummary.query.xml b/mGAP/resources/queries/mGAP/sampleSummary.query.xml
index 4d9e68f7c..86a7a599e 100644
--- a/mGAP/resources/queries/mGAP/sampleSummary.query.xml
+++ b/mGAP/resources/queries/mGAP/sampleSummary.query.xml
@@ -2,8 +2,15 @@
     <metadata>
         <tables xmlns="http://labkey.org/data/xml">
             <table tableName="" tableDbType="TABLE">
+                <javaCustomizer class="org.labkey.mgap.query.SampleSummaryCustomizer"/>
                 <pkColumnName>subjectName</pkColumnName>
                 <tableTitle>mGAP Subject/gVCF Summary</tableTitle>
+                <columns>
+                    <column columnName="aliasSubjectName">
+                        <columnTitle>SubjectId Listed In Alias Table</columnTitle>
+                        <isHidden>true</isHidden>
+                    </column>
+                </columns>
             </table>
         </tables>
     </metadata>
diff --git a/mGAP/resources/queries/mGAP/sampleSummary.sql b/mGAP/resources/queries/mGAP/sampleSummary.sql
index 385533ddf..d45c1c4bb 100644
--- a/mGAP/resources/queries/mGAP/sampleSummary.sql
+++ b/mGAP/resources/queries/mGAP/sampleSummary.sql
@@ -8,7 +8,8 @@ SELECT
     ss.center,
     t.tracks,
     t.total,
-    CASE WHEN ss.originalId IS NULL OR ss.gender IS NULL or ss.species IS NULL or ss.center IS NULL THEN true ELSE false END as missingDemographics
+    CASE WHEN ss.originalId IS NULL OR ss.gender IS NULL or ss.species IS NULL or ss.center IS NULL THEN true ELSE false END as missingDemographics,
+    am.subjectname as aliasSubjectName
 
 FROM (SELECT
         COALESCE(o.readset.subjectId, rt.subjectId) as subjectId,
diff --git a/mGAP/resources/queries/mGAP/sampleSummary/.qview.xml b/mGAP/resources/queries/mGAP/sampleSummary/.qview.xml
new file mode 100644
index 000000000..48de68347
--- /dev/null
+++ b/mGAP/resources/queries/mGAP/sampleSummary/.qview.xml
@@ -0,0 +1,5 @@
+<customView xmlns="http://labkey.org/data/xml/queryCustomView">
+    <sorts>
+        <sort column="subjectId" descending="false"/>
+    </sorts>
+</customView>
\ No newline at end of file
diff --git a/mGAP/resources/views/mgapDataDashboard.html b/mGAP/resources/views/mgapDataDashboard.html
index fc6ce214d..1b0fd9138 100644
--- a/mGAP/resources/views/mgapDataDashboard.html
+++ b/mGAP/resources/views/mgapDataDashboard.html
@@ -76,6 +76,13 @@
                             queryName: 'sampleSummary',
                             'query.externalAlias~isblank': ''
                         })
+                    },{
+                        name: 'gVCFs With SubjectId / Case-sensitive Difference',
+                        url: LABKEY.ActionURL.buildURL('query', 'executeQuery.view', null, {
+                            schemaName: 'mgap',
+                            queryName: 'subjectCaseMismatch',
+                            'query.externalAlias~isnonblank': ''
+                        })
                     }]
                 },{
                     header: 'Prior Releases',
diff --git a/mGAP/src/org/labkey/mgap/query/SampleSummaryCustomizer.java b/mGAP/src/org/labkey/mgap/query/SampleSummaryCustomizer.java
new file mode 100644
index 000000000..317773403
--- /dev/null
+++ b/mGAP/src/org/labkey/mgap/query/SampleSummaryCustomizer.java
@@ -0,0 +1,42 @@
+package org.labkey.mgap.query;
+
+import org.labkey.api.data.AbstractTableInfo;
+import org.labkey.api.data.JdbcType;
+import org.labkey.api.data.SQLFragment;
+import org.labkey.api.data.TableInfo;
+import org.labkey.api.gwt.client.FacetingBehaviorType;
+import org.labkey.api.ldk.table.AbstractTableCustomizer;
+import org.labkey.api.query.ExprColumn;
+
+public class SampleSummaryCustomizer extends AbstractTableCustomizer
+{
+    @Override
+    public void customize(TableInfo ti)
+    {
+        if (ti instanceof AbstractTableInfo ati)
+        {
+            customizeTable(ati);
+        }
+    }
+
+    private void customizeTable(AbstractTableInfo ti)
+    {
+        String fieldName = "subjectCaseMismatch";
+        if (ti.getColumn(fieldName) != null)
+        {
+            return;
+        }
+
+        if (!ti.getSqlDialect().isSqlServer())
+        {
+            return;
+        }
+
+        SQLFragment sql = new SQLFragment("CASE WHEN HASHBYTES('sha1', " + ExprColumn.STR_TABLE_ALIAS + ".subjectId) = HASHBYTES('sha1', " + ExprColumn.STR_TABLE_ALIAS + ".aliasSubjectName) THEN NULL ELSE " + ExprColumn.STR_TABLE_ALIAS + ".aliasSubjectName END");
+        ExprColumn col = new ExprColumn(ti, fieldName, sql, JdbcType.VARCHAR, ti.getColumn("subjectId"), ti.getColumn("aliasSubjectName"));
+        col.setLabel("Id Case Mismatch?");
+        col.setFacetingBehaviorType(FacetingBehaviorType.ALWAYS_OFF);
+        col.setDescription("If the case of the subjectId differs from the alias table, the updated case is shown");
+        ti.addColumn(col);
+    }
+}

From 67cfed2c7d3a7963a47461b72dd2a3e8666feee2 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Wed, 6 Nov 2024 21:13:28 -0800
Subject: [PATCH 10/20] Update MCC dashboard to account for permissions

---
 mcc/src/client/U24Dashboard/Dashboard.tsx | 41 +++++++++++++----------
 mcc/src/org/labkey/mcc/MccModule.java     |  2 ++
 2 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/mcc/src/client/U24Dashboard/Dashboard.tsx b/mcc/src/client/U24Dashboard/Dashboard.tsx
index 7acbdd8e2..8819a0789 100644
--- a/mcc/src/client/U24Dashboard/Dashboard.tsx
+++ b/mcc/src/client/U24Dashboard/Dashboard.tsx
@@ -63,24 +63,29 @@ export function Dashboard() {
             scope: this
         });
 
-        Query.selectRows({
-            containerPath: requestContainerPath,
-            schemaName: 'mcc',
-            queryName: 'requestScores',
-            columns: 'requestId/status',
-            success: function(results) {
-                if (isApiSubscribed) {
-                    setRequestRows(results.rows);
-                }
-            },
-            failure: function(response) {
-                if (isApiSubscribed) {
-                    alert('There was an error loading data');
-                    console.error(response);
-                }
-            },
-            scope: this
-        });
+        if (ctx.hasRequestReadPermission) {
+            Query.selectRows({
+                containerPath: requestContainerPath,
+                schemaName: 'mcc',
+                queryName: 'requestScores',
+                columns: 'requestId/status',
+                success: function (results) {
+                    if (isApiSubscribed) {
+                        setRequestRows(results.rows);
+                    }
+                },
+                failure: function (response) {
+                    if (isApiSubscribed) {
+                        alert('There was an error loading data');
+                        console.error(response);
+                    }
+                },
+                scope: this
+            });
+        }
+        else {
+            setRequestRows([])
+        }
 
         Query.selectRows({
             containerPath: containerPath,
diff --git a/mcc/src/org/labkey/mcc/MccModule.java b/mcc/src/org/labkey/mcc/MccModule.java
index 673b3d950..3a0c891cd 100644
--- a/mcc/src/org/labkey/mcc/MccModule.java
+++ b/mcc/src/org/labkey/mcc/MccModule.java
@@ -56,6 +56,7 @@
 import org.labkey.mcc.security.MccRabReviewerRole;
 import org.labkey.mcc.security.MccRequestAdminPermission;
 import org.labkey.mcc.security.MccRequesterRole;
+import org.labkey.mcc.security.MccViewRequestsPermission;
 
 import java.util.Collection;
 import java.util.Collections;
@@ -109,6 +110,7 @@ public JSONObject getPageContextJson(ContainerUser context)
 
         Container requestContainer = MccManager.get().getMCCRequestContainer(context.getContainer());
         ret.put("hasRequestAdminPermission", requestContainer != null && requestContainer.hasPermission(context.getUser(), MccRequestAdminPermission.class));
+        ret.put("hasRequestReadPermission", requestContainer != null && requestContainer.hasPermission(context.getUser(), MccViewRequestsPermission.class));
         ret.put("hasRabPermission", requestContainer != null && requestContainer.hasPermission(context.getUser(), MccRabReviewPermission.class));
         ret.put("hasFinalDecisionPermission", requestContainer != null && requestContainer.hasPermission(context.getUser(), MccFinalReviewPermission.class));
 

From 27183799b1a624e4378f4ef49b9eab797973ac98 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Tue, 12 Nov 2024 05:28:46 -0800
Subject: [PATCH 11/20] Category field does not need to be required

---
 mGAP/resources/schemas/mgap.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mGAP/resources/schemas/mgap.xml b/mGAP/resources/schemas/mgap.xml
index 090becab2..7eeaf7123 100644
--- a/mGAP/resources/schemas/mgap.xml
+++ b/mGAP/resources/schemas/mgap.xml
@@ -826,7 +826,7 @@
             </column>
             <column columnName="category">
                 <columnTitle>Category</columnTitle>
-                <nullable>false</nullable>
+                <nullable>true</nullable>
             </column>
             <column columnName="url">
                 <columnTitle>URL</columnTitle>

From cdf397fbc272c1d86123583dd5a87ad4af0009f1 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Tue, 12 Nov 2024 05:33:23 -0800
Subject: [PATCH 12/20] Fix URL in dashboard

---
 .../web/mGAP/window/ReleaseWindow.js          | 35 ++++++++++++-------
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/mGAP/resources/web/mGAP/window/ReleaseWindow.js b/mGAP/resources/web/mGAP/window/ReleaseWindow.js
index 9921c9746..51fe467b0 100644
--- a/mGAP/resources/web/mGAP/window/ReleaseWindow.js
+++ b/mGAP/resources/web/mGAP/window/ReleaseWindow.js
@@ -12,22 +12,28 @@ Ext4.define('mGAP.window.ReleaseWindow', {
                 schemaName: 'mgap',
                 queryName: 'releaseTracks',
                 scope: this,
-                columns: 'vcfId,trackName,vcfId/library_id,isprimarytrack',
+                columns: 'vcfId,species,trackName,vcfId/library_id,isprimarytrack',
                 failure: LDK.Utils.getErrorCallback(),
                 success: function (results) {
                     Ext4.Msg.hide();
                     var outputFiles = [];
-                    var distinctGenomes = [];
+                    var distinctGenomesBySpecies = {};
                     Ext4.Array.forEach(results.rows, function(r){
-                        if (r.vcfId) {
-                            outputFiles.push(r.vcfId);
+                        if (!r.vcfId) {
+                            Ext4.Msg.alert('Error', 'Track lacks VCF ID: ' + r.trackName);
+                            return false;
+                        }
 
-                            if (r['vcfId/library_id']) {
-                                distinctGenomes.push(r['vcfId/library_id']);
-                            }
+                        if (!r.species) {
+                            Ext4.Msg.alert('Error', 'Track lacks species: ' + r.trackName);
+                            return false;
                         }
-                        else if (!r['isprimarytrack']) {
-                            console.error('Track lacks VCF ID: ' + r.trackName);
+
+                        outputFiles.push(r.vcfId);
+
+                        distinctGenomesBySpecies[r.species] = distinctGenomesBySpecies[r.species] || [];
+                        if (r['vcfId/library_id']) {
+                            distinctGenomesBySpecies[r.species].push(r['vcfId/library_id']);
                         }
                     }, this);
 
@@ -36,9 +42,12 @@ Ext4.define('mGAP.window.ReleaseWindow', {
                         return;
                     }
 
-                    distinctGenomes = Ext4.Array.unique(distinctGenomes);
-                    if (distinctGenomes.length !== 1){
-                        Ext4.Msg.alert('Error', 'All files must use the same genome.  Genomes found: ' + distinctGenomes.length);
+                    for (sn in Ext4.Object.getKeys(distinctGenomesBySpecies)) {
+                        var genomes = Ext4.Array.unique(distinctGenomesBySpecies[sn]);
+                        if (genomes.length !== 1){
+                            Ext4.Msg.alert('Error', 'All files must use the same genome.  Genomes found for species ' + sn + ': ' + genomes.length);
+                            return;
+                        }
                     }
 
                     LABKEY.Ajax.request({
@@ -68,7 +77,7 @@ Ext4.define('mGAP.window.ReleaseWindow', {
                                             title: results.name,
                                             handlerConfig: results,
                                             toolParameters: results.toolParameters,
-                                            libraryId: distinctGenomes.length == 1 ? distinctGenomes[0] : null
+                                            libraryId: distinctGenomes.length === 1 ? distinctGenomes[0] : null
                                         }).show();
                                     }
                                 }

From 7b71b2f51e2894e5d9c8e29a148d88d3c6c3532f Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Tue, 12 Nov 2024 08:24:05 -0800
Subject: [PATCH 13/20] Allow another column to be null

---
 mGAP/resources/schemas/mgap.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mGAP/resources/schemas/mgap.xml b/mGAP/resources/schemas/mgap.xml
index 7eeaf7123..5679fefa4 100644
--- a/mGAP/resources/schemas/mgap.xml
+++ b/mGAP/resources/schemas/mgap.xml
@@ -668,7 +668,7 @@
             </column>
             <column columnName="category">
                 <columnTitle>Category</columnTitle>
-                <nullable>false</nullable>
+                <nullable>true</nullable>
             </column>
             <column columnName="url">
                 <columnTitle>URL</columnTitle>

From 6ea56e7125c033cd447a3cbe761a6df9ad690894 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Tue, 12 Nov 2024 10:24:31 -0800
Subject: [PATCH 14/20] Add additional mGAP track columns

---
 .../dbscripts/postgresql/mgap-16.74-16.75.sql |  5 +++++
 .../dbscripts/sqlserver/mgap-16.74-16.75.sql  |  5 +++++
 mGAP/resources/schemas/mgap.xml               | 22 +++++++++++++++++++
 mGAP/src/org/labkey/mgap/mGAPModule.java      |  2 +-
 4 files changed, 33 insertions(+), 1 deletion(-)
 create mode 100644 mGAP/resources/schemas/dbscripts/postgresql/mgap-16.74-16.75.sql
 create mode 100644 mGAP/resources/schemas/dbscripts/sqlserver/mgap-16.74-16.75.sql

diff --git a/mGAP/resources/schemas/dbscripts/postgresql/mgap-16.74-16.75.sql b/mGAP/resources/schemas/dbscripts/postgresql/mgap-16.74-16.75.sql
new file mode 100644
index 000000000..7080ea1bc
--- /dev/null
+++ b/mGAP/resources/schemas/dbscripts/postgresql/mgap-16.74-16.75.sql
@@ -0,0 +1,5 @@
+ALTER TABLE mGAP.releaseTracks ADD shouldindex boolean default false;
+ALTER TABLE mGAP.releaseTracks ADD vcfIndexId int;
+
+ALTER TABLE mGAP.tracksPerRelease ADD shouldindex boolean default false;
+ALTER TABLE mGAP.tracksPerRelease ADD vcfIndexId int;
\ No newline at end of file
diff --git a/mGAP/resources/schemas/dbscripts/sqlserver/mgap-16.74-16.75.sql b/mGAP/resources/schemas/dbscripts/sqlserver/mgap-16.74-16.75.sql
new file mode 100644
index 000000000..39631f183
--- /dev/null
+++ b/mGAP/resources/schemas/dbscripts/sqlserver/mgap-16.74-16.75.sql
@@ -0,0 +1,5 @@
+ALTER TABLE mGAP.releaseTracks ADD shouldindex bit default 0;
+ALTER TABLE mGAP.releaseTracks ADD vcfIndexId int;
+
+ALTER TABLE mGAP.tracksPerRelease ADD shouldindex bit default 0;
+ALTER TABLE mGAP.tracksPerRelease ADD vcfIndexId int;
\ No newline at end of file
diff --git a/mGAP/resources/schemas/mgap.xml b/mGAP/resources/schemas/mgap.xml
index 5679fefa4..caed3fa92 100644
--- a/mGAP/resources/schemas/mgap.xml
+++ b/mGAP/resources/schemas/mgap.xml
@@ -692,6 +692,17 @@
             <column columnName="skipvalidation">
                 <columnTitle>Skip Annotation Checks?</columnTitle>
             </column>
+            <column columnName="shouldindex">
+                <columnTitle>Should Include Lucene Index?</columnTitle>
+            </column>
+            <column columnName="vcfIndexId">
+                <columnTitle>Lucene Index Id</columnTitle>
+                <fk>
+                    <fkDbSchema>sequenceanalysis</fkDbSchema>
+                    <fkTable>outputfiles</fkTable>
+                    <fkColumnName>rowid</fkColumnName>
+                </fk>
+            </column>
             <column columnName="container">
                 <isHidden>true</isHidden>
             </column>
@@ -848,6 +859,17 @@
                 <columnTitle>Is Primary Track For Species?</columnTitle>
                 <defaultValue>false</defaultValue>
             </column>
+            <column columnName="shouldindex">
+                <columnTitle>Should Include Lucene Index?</columnTitle>
+            </column>
+            <column columnName="vcfIndexId">
+                <columnTitle>Lucene Index Id</columnTitle>
+                <fk>
+                    <fkDbSchema>sequenceanalysis</fkDbSchema>
+                    <fkTable>outputfiles</fkTable>
+                    <fkColumnName>rowid</fkColumnName>
+                </fk>
+            </column>
             <column columnName="container">
                 <isHidden>true</isHidden>
             </column>
diff --git a/mGAP/src/org/labkey/mgap/mGAPModule.java b/mGAP/src/org/labkey/mgap/mGAPModule.java
index 1b1666ceb..36fccd989 100644
--- a/mGAP/src/org/labkey/mgap/mGAPModule.java
+++ b/mGAP/src/org/labkey/mgap/mGAPModule.java
@@ -76,7 +76,7 @@ public String getName()
     @Override
     public Double getSchemaVersion()
     {
-        return 16.74;
+        return 16.75;
     }
 
     @Override

From 64853cf863f9806738a73dc8f627cbf54b200465 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Tue, 12 Nov 2024 19:48:25 -0800
Subject: [PATCH 15/20] Add action to fix existing SBT ExpDatas

---
 .../labkey/primeseq/PrimeseqController.java   | 76 +++++++++++++++++++
 1 file changed, 76 insertions(+)

diff --git a/primeseq/src/org/labkey/primeseq/PrimeseqController.java b/primeseq/src/org/labkey/primeseq/PrimeseqController.java
index d93d71258..e5523d2b1 100644
--- a/primeseq/src/org/labkey/primeseq/PrimeseqController.java
+++ b/primeseq/src/org/labkey/primeseq/PrimeseqController.java
@@ -33,7 +33,10 @@
 import org.labkey.api.data.ContainerType;
 import org.labkey.api.data.DbScope;
 import org.labkey.api.data.SQLFragment;
+import org.labkey.api.data.SimpleFilter;
 import org.labkey.api.data.SqlExecutor;
+import org.labkey.api.data.TableSelector;
+import org.labkey.api.exp.api.ExpData;
 import org.labkey.api.module.Module;
 import org.labkey.api.module.ModuleLoader;
 import org.labkey.api.pipeline.PipeRoot;
@@ -42,10 +45,13 @@
 import org.labkey.api.pipeline.PipelineService;
 import org.labkey.api.pipeline.PipelineStatusFile;
 import org.labkey.api.pipeline.PipelineUrls;
+import org.labkey.api.query.FieldKey;
+import org.labkey.api.query.QueryService;
 import org.labkey.api.security.RequiresPermission;
 import org.labkey.api.security.RequiresSiteAdmin;
 import org.labkey.api.security.permissions.ReadPermission;
 import org.labkey.api.security.permissions.UpdatePermission;
+import org.labkey.api.sequenceanalysis.SequenceOutputFile;
 import org.labkey.api.sequenceanalysis.pipeline.HasJobParams;
 import org.labkey.api.sequenceanalysis.pipeline.JobResourceSettings;
 import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
@@ -795,4 +801,74 @@ public void setRestartJobs(boolean restartJobs)
             _restartJobs = restartJobs;
         }
     }
+
+    @RequiresSiteAdmin
+    public static class FixSbtAction extends ConfirmAction<Object>
+    {
+        @Override
+        public ModelAndView getConfirmView(Object o, BindException errors) throws Exception
+        {
+            setTitle("Fix SBT Errors");
+
+            return new HtmlView(HtmlString.of("This will update filepaths on SBT outputs.  Do you want to continue?"));
+        }
+
+        @Override
+        public boolean handlePost(Object o, BindException errors) throws Exception
+        {
+            new TableSelector(QueryService.get().getUserSchema(getUser(), getContainer(), "sequenceanalysis").getTable("outputfiles"), PageFlowUtil.set("rowid"), new SimpleFilter(FieldKey.fromString("category"), "SBT Results"), null).forEachResults(rs -> {
+                SequenceOutputFile so = SequenceOutputFile.getForId(rs.getInt(FieldKey.fromString("rowid")));
+
+                File f = so.getFile();
+                if (f.exists())
+                {
+                    return;
+                }
+
+                File root = f.getParentFile().getParentFile();
+                File [] dirs = root.listFiles(fn -> {
+                    return fn.isDirectory() & !fn.getName().equalsIgnoreCase("Shared");
+                });
+
+                if (dirs == null || dirs.length == 0)
+                {
+                    _log.error("Unable to file directory for: " + f.getPath());
+                    return;
+                }
+
+                File parent = new File(dirs[0], "Alignment");
+                File [] children = parent.listFiles(fn -> {
+                    return fn.getName().endsWith(".sbt_hits.txt.gz");
+                });
+
+                if (children == null || children.length != 1)
+                {
+                    _log.error("Unable to file child under: " + parent.getPath());
+                    return;
+                }
+
+                _log.info("Found: " + children[0].getPath());
+
+                ExpData d = so.getExpData();
+                d.setDataFileURI(children[0].toURI());
+
+                //d.save(getUser());
+            });
+
+            return true;
+        }
+
+        @Override
+        public void validateCommand(Object o, Errors errors)
+        {
+
+        }
+
+        @NotNull
+        @Override
+        public URLHelper getSuccessURL(Object o)
+        {
+            return PageFlowUtil.urlProvider(PipelineUrls.class).urlBegin(getContainer());
+        }
+    }
 }
\ No newline at end of file

From 70acbc47a02dcbd1249d9e7842889feab86fe5de Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Wed, 13 Nov 2024 10:59:20 -0800
Subject: [PATCH 16/20] Action to retroactively fix MHC filepaths

---
 primeseq/src/org/labkey/primeseq/PrimeseqController.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/primeseq/src/org/labkey/primeseq/PrimeseqController.java b/primeseq/src/org/labkey/primeseq/PrimeseqController.java
index e5523d2b1..5bcfd42fb 100644
--- a/primeseq/src/org/labkey/primeseq/PrimeseqController.java
+++ b/primeseq/src/org/labkey/primeseq/PrimeseqController.java
@@ -852,7 +852,7 @@ public boolean handlePost(Object o, BindException errors) throws Exception
                 ExpData d = so.getExpData();
                 d.setDataFileURI(children[0].toURI());
 
-                //d.save(getUser());
+                d.save(getUser());
             });
 
             return true;

From c58ba3dd89a719f817dc3820483873042135a40c Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Wed, 13 Nov 2024 11:10:15 -0800
Subject: [PATCH 17/20] Update mGAP release code to handle multi-species

---
 .../mgap/pipeline/mGapReleaseGenerator.java   | 73 ++++++++++---------
 1 file changed, 37 insertions(+), 36 deletions(-)

diff --git a/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseGenerator.java b/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseGenerator.java
index 278960bc1..6a2ac50bd 100644
--- a/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseGenerator.java
+++ b/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseGenerator.java
@@ -96,11 +96,21 @@
 public class mGapReleaseGenerator extends AbstractParameterizedOutputHandler<SequenceOutputHandler.SequenceOutputProcessor>
 {
     private final FileType _vcfType = new FileType(List.of(".vcf"), ".vcf", false, FileType.gzSupportLevel.SUPPORT_GZ);
-    public static final String MMUL_GENOME = "mmulGenome";
+    public static final String BASE_GENOME = "baseGenome";
 
     public mGapReleaseGenerator()
     {
         super(ModuleLoader.getInstance().getModule(mGAPModule.class), "Create mGAP Release", "This will prepare an input VCF for use as an mGAP public release.  This will optionally include: removing excess annotations and program records, limiting to SNVs (optional) and removing genotype data (optional).  If genotypes are retained, the subject names will be checked for mGAP aliases and replaced as needed.", new LinkedHashSet<>(PageFlowUtil.set("sequenceanalysis/field/GenomeFileSelectorField.js")), Arrays.asList(
+                ToolParameterDescriptor.create("species", "Version", "The species, which is used to filter tracks", "ldk-simplelabkeycombo", new JSONObject(){{
+                    put("allowBlank", false);
+                    put("doNotIncludeInTemplates", true);
+                    put("width", 400);
+                    put("schemaName", "laboratory");
+                    put("queryName", "species");
+                    put("containerPath", "js:Laboratory.Utils.getQueryContainerPath()");
+                    put("displayField", "common_name");
+                    put("valueField", "common_name");
+                }}, null),
                 ToolParameterDescriptor.create("releaseVersion", "Version", "This value will be used as the version when published.", "textfield", new JSONObject(){{
                     put("allowBlank", false);
                     put("doNotIncludeInTemplates", true);
@@ -182,10 +192,16 @@ public void init(JobContext ctx, List<SequenceOutputFile> inputFiles, List<Recor
             ctx.getJob().getLogger().info("writing track/subset data to file");
             Container target = ctx.getJob().getContainer().isWorkbook() ? ctx.getJob().getContainer().getParent() : ctx.getJob().getContainer();
             TableInfo releaseTracks = QueryService.get().getUserSchema(ctx.getJob().getUser(), target, mGAPSchema.NAME).getTable(mGAPSchema.TABLE_RELEASE_TRACKS);
+            
+            final String species = ctx.getParams().optString("species");
+            if (species == null)
+            {
+                throw new PipelineJobException("Missing value for species");
+            }
 
             Set<FieldKey> toSelect = new HashSet<>();
             toSelect.add(FieldKey.fromString("trackName"));
-            toSelect.add(FieldKey.fromString("mergepriority"));
+            toSelect.add(FieldKey.fromString("species"));
             toSelect.add(FieldKey.fromString("skipvalidation"));
             toSelect.add(FieldKey.fromString("isprimarytrack"));
             toSelect.add(FieldKey.fromString("vcfId"));
@@ -197,7 +213,7 @@ public void init(JobContext ctx, List<SequenceOutputFile> inputFiles, List<Recor
             File trackFile = getTrackListFile(ctx.getOutputDir());
             try (CSVWriter writer = new CSVWriter(PrintWriters.getPrintWriter(trackFile), '\t', CSVWriter.NO_QUOTE_CHARACTER))
             {
-                new TableSelector(releaseTracks, colMap.values(), null, null).forEachResults(rs -> {
+                new TableSelector(releaseTracks, colMap.values(), new SimpleFilter(FieldKey.fromString("species"), species), null).forEachResults(rs -> {
                     if (rs.getObject(FieldKey.fromString("vcfId")) == null)
                     {
                         throw new SQLException("No VCF found for track: " + rs.getObject(FieldKey.fromString("trackName")));
@@ -217,7 +233,7 @@ public void init(JobContext ctx, List<SequenceOutputFile> inputFiles, List<Recor
                     writer.writeNext(new String[]{
                             rs.getString(FieldKey.fromString("trackName")),
                             String.valueOf(rs.getInt(FieldKey.fromString("vcfId/dataId"))),
-                            String.valueOf(rs.getObject(FieldKey.fromString("mergepriority")) == null ? 999 : rs.getInt(FieldKey.fromString("mergepriority"))),
+                            rs.getString(FieldKey.fromString("species")),
                             String.valueOf(rs.getObject(FieldKey.fromString("skipvalidation")) != null && rs.getBoolean(FieldKey.fromString("skipvalidation"))),
                             String.valueOf(rs.getObject(FieldKey.fromString("isprimarytrack")) != null && rs.getBoolean(FieldKey.fromString("isprimarytrack")))
                     });
@@ -256,7 +272,7 @@ public void init(JobContext ctx, List<SequenceOutputFile> inputFiles, List<Recor
             }
             int sourceGenome = genomeIds.iterator().next();
             ctx.getSequenceSupport().cacheGenome(SequenceAnalysisService.get().getReferenceGenome(sourceGenome, ctx.getJob().getUser()));
-            ctx.getSequenceSupport().cacheObject(MMUL_GENOME, sourceGenome);
+            ctx.getSequenceSupport().cacheObject(BASE_GENOME, sourceGenome);
 
             AnnotationStep.findChainFile(genomeIds.iterator().next(), ctx.getParams().getInt(AnnotationStep.GRCH37), ctx.getSequenceSupport(), ctx.getJob());
 
@@ -286,8 +302,8 @@ public void init(JobContext ctx, List<SequenceOutputFile> inputFiles, List<Recor
 
         private SequenceOutputFile getAndValidateLuceneIndex(PipelineJob job, JSONObject params) throws PipelineJobException
         {
-            Integer luceneIndexId = params.optInt("luceneIndex");
-            if (luceneIndexId == null || luceneIndexId == 0)
+            int luceneIndexId = params.optInt("luceneIndex");
+            if (luceneIndexId == 0)
             {
                 throw new PipelineJobException("Missing luceneIndex ID");
             }
@@ -520,10 +536,13 @@ else if (so.getCategory().endsWith("Release Track"))
                     throw new PipelineJobException("Unable to find total variant from stats file!");
                 }
 
+                final String species = ctx.getParams().optString("species");
+
                 //actually create release record
                 Map<String, Object> row = new CaseInsensitiveHashMap<>();
                 row.put("version", job.getParameters().get("releaseVersion"));
                 row.put("releaseDate", new Date());
+                row.put("species", species);
                 row.put("vcfId", so.getRowid());
                 row.put("liftedVcfId", liftedVcf.getRowid());
                 row.put("sitesOnlyVcfId", sitesOnlyVcf.getRowid());
@@ -583,7 +602,7 @@ else if (so.getCategory().endsWith("Release Track"))
 
                     //also tracks:
                     UserSchema us = QueryService.get().getUserSchema(job.getUser(), job.getContainer().isWorkbook() ? job.getContainer().getParent() : job.getContainer(), mGAPSchema.NAME);
-                    new TableSelector(us.getTable(mGAPSchema.TABLE_RELEASE_TRACKS), null, null).forEachResults(rs -> {
+                    new TableSelector(us.getTable(mGAPSchema.TABLE_RELEASE_TRACKS), new SimpleFilter(FieldKey.fromString("species"), species), null).forEachResults(rs -> {
                         SequenceOutputFile so3 = trackVCFMap.get(rs.getString(FieldKey.fromString("trackName")));
                         if (so3 == null && rs.getBoolean(FieldKey.fromString("isprimarytrack")))
                         {
@@ -836,7 +855,7 @@ public static class TrackDescriptor
         {
             String _trackName;
             Integer _dataId;
-            Integer _mergePriority;
+            String _species;
             boolean _skipValidation;
             boolean _isPrimary;
 
@@ -844,7 +863,7 @@ public TrackDescriptor(String[] vals)
             {
                 _trackName = vals[0];
                 _dataId = Integer.parseInt(vals[1]);
-                _mergePriority = Integer.parseInt(vals[2]);
+                _species = vals[2];
                 _skipValidation = Boolean.parseBoolean(vals[3]);
                 _isPrimary = Boolean.parseBoolean(vals[4]);
             }
@@ -859,9 +878,9 @@ public Integer getDataId()
                 return _dataId;
             }
 
-            public Integer getMergePriority()
+            public String getSpecies()
             {
-                return _mergePriority;
+                return _species;
             }
 
             public boolean isSkipValidation()
@@ -886,15 +905,6 @@ private List<TrackDescriptor> getTracks(File webserverDir) throws PipelineJobExc
                     ret.add(new TrackDescriptor(line));
                 }
 
-                ret.sort(new Comparator<TrackDescriptor>()
-                {
-                    @Override
-                    public int compare(TrackDescriptor o1, TrackDescriptor o2)
-                    {
-                        return o1.getMergePriority().compareTo(o2.getMergePriority());
-                    }
-                });
-
                 return ret;
             }
             catch (IOException e)
@@ -917,12 +927,13 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
 
             GeneToNameTranslator translator = new GeneToNameTranslator(gtf, ctx.getLogger());
             ReferenceGenome grch37Genome = ctx.getSequenceSupport().getCachedGenome(ctx.getParams().getInt(AnnotationStep.GRCH37));
-            int genomeId = ctx.getSequenceSupport().getCachedObject(MMUL_GENOME, Integer.class);
+            int genomeId = ctx.getSequenceSupport().getCachedObject(BASE_GENOME, Integer.class);
             ReferenceGenome genome = ctx.getSequenceSupport().getCachedGenome(genomeId);
             boolean testOnly = ctx.getParams().optBoolean("testOnly", false);
 
+            String species = ctx.getParams().getString("species");
             String releaseVersion = ctx.getParams().optString("releaseVersion", "0.0");
-            File primaryTrackVcf = new File(ctx.getOutputDir(), "mGap.v" + FileUtil.makeLegalName(releaseVersion).replaceAll(" ", "_") + ".vcf.gz");
+            File primaryTrackVcf = new File(ctx.getOutputDir(), "mGap." + species + ".v" + FileUtil.makeLegalName(releaseVersion).replaceAll(" ", "_") + ".vcf.gz");
 
             try
             {
@@ -994,7 +1005,7 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
 
             SequenceOutputFile output = new SequenceOutputFile();
             output.setFile(primaryTrackVcf);
-            output.setName("mGAP Release: " + releaseVersion);
+            output.setName("mGAP Release: " + species + " " + releaseVersion);
             output.setCategory((testOnly ? "Test " : "") + "mGAP Release");
             output.setLibrary_id(genome.getGenomeId());
             ctx.getFileManager().addSequenceOutput(output);
@@ -1002,7 +1013,7 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
             File interestingVariantTable = getVariantTableName(ctx, primaryTrackVcf);
             SequenceOutputFile output2 = new SequenceOutputFile();
             output2.setFile(interestingVariantTable);
-            output2.setName("mGAP Release: " + releaseVersion + " Variant Table");
+            output2.setName("mGAP Release: " + species + " " + releaseVersion + " Variant Table");
             output2.setCategory((testOnly ? "Test " : "") + "mGAP Release Variant Table");
             output2.setLibrary_id(genome.getGenomeId());
             ctx.getFileManager().addSequenceOutput(output2);
@@ -1012,7 +1023,7 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
             File lifted = liftToHuman(ctx, primaryTrackVcf, sitesOnlyVcf, grch37Genome);
             SequenceOutputFile output3 = new SequenceOutputFile();
             output3.setFile(lifted);
-            output3.setName("mGAP Release: " + releaseVersion + " Lifted to Human");
+            output3.setName("mGAP Release: " + species + " " + releaseVersion + " Lifted to Human");
             output3.setCategory((testOnly ? "Test " : "") + "mGAP Release Lifted to Human");
             output3.setLibrary_id(grch37Genome.getGenomeId());
             ctx.getFileManager().addSequenceOutput(output3);
@@ -1111,16 +1122,6 @@ private File getSitesOnlyVcfName(File outDir, File primaryTrackVcf)
             return new File(outDir, SequenceAnalysisService.get().getUnzippedBaseName(primaryTrackVcf.getName()) + ".sitesOnly.vcf.gz");
         }
 
-        private File getDroppedSitesVcfName(File outDir, File primaryTrackVcf)
-        {
-            return new File(outDir, SequenceAnalysisService.get().getUnzippedBaseName(primaryTrackVcf.getName()) + ".droppedFromPriorRelease.vcf.gz");
-        }
-
-        private File getNovelSitesVcfName(File outDir, File primaryTrackVcf)
-        {
-            return new File(outDir, SequenceAnalysisService.get().getUnzippedBaseName(primaryTrackVcf.getName()) + ".newToRelease.vcf.gz");
-        }
-
         private File getLiftedVcfName(File outDir, File primaryTrackVcf)
         {
             return new File(outDir, SequenceAnalysisService.get().getUnzippedBaseName(primaryTrackVcf.getName()) + ".liftToGRCh37.vcf.gz");

From c540e99d7f70e3755d92eb9037056f481f3bc4f9 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Fri, 15 Nov 2024 12:19:35 -0800
Subject: [PATCH 18/20] Rework action to repair improper SBT outputs

---
 primeseq/src/org/labkey/primeseq/PrimeseqController.java | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/primeseq/src/org/labkey/primeseq/PrimeseqController.java b/primeseq/src/org/labkey/primeseq/PrimeseqController.java
index 5bcfd42fb..3606da7ef 100644
--- a/primeseq/src/org/labkey/primeseq/PrimeseqController.java
+++ b/primeseq/src/org/labkey/primeseq/PrimeseqController.java
@@ -37,6 +37,8 @@
 import org.labkey.api.data.SqlExecutor;
 import org.labkey.api.data.TableSelector;
 import org.labkey.api.exp.api.ExpData;
+import org.labkey.api.exp.api.ExpRun;
+import org.labkey.api.exp.api.ExperimentService;
 import org.labkey.api.module.Module;
 import org.labkey.api.module.ModuleLoader;
 import org.labkey.api.pipeline.PipeRoot;
@@ -825,7 +827,10 @@ public boolean handlePost(Object o, BindException errors) throws Exception
                     return;
                 }
 
-                File root = f.getParentFile().getParentFile();
+                ExpRun run = ExperimentService.get().getExpRun(so.getRunId());
+                PipelineStatusFile sf = PipelineService.get().getStatusFile(run.getJobId());
+                File logFile = new File(sf.getFilePath());
+                File root = logFile.getParentFile();
                 File [] dirs = root.listFiles(fn -> {
                     return fn.isDirectory() & !fn.getName().equalsIgnoreCase("Shared");
                 });

From c2586eea0d8a47ecdfed2852e155946bca017e78 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Fri, 15 Nov 2024 15:46:55 -0800
Subject: [PATCH 19/20] Switch docker to conditionally mount volumes based on
 provider

---
 .../pipeline/ExacloudResourceSettings.java    | 38 +++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/primeseq/src/org/labkey/primeseq/pipeline/ExacloudResourceSettings.java b/primeseq/src/org/labkey/primeseq/pipeline/ExacloudResourceSettings.java
index 5158b5783..e7eaf6c75 100644
--- a/primeseq/src/org/labkey/primeseq/pipeline/ExacloudResourceSettings.java
+++ b/primeseq/src/org/labkey/primeseq/pipeline/ExacloudResourceSettings.java
@@ -3,12 +3,17 @@
 import org.json.JSONObject;
 import org.labkey.api.data.Container;
 import org.labkey.api.module.ModuleLoader;
+import org.labkey.api.pipeline.PipeRoot;
+import org.labkey.api.pipeline.PipelineService;
 import org.labkey.api.sequenceanalysis.pipeline.JobResourceSettings;
 import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
 import org.labkey.primeseq.PrimeseqModule;
 
 import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Set;
 
 /**
  * Created by bimber on 9/30/2016.
@@ -43,4 +48,37 @@ public boolean isAvailable(Container c)
     {
         return c.getActiveModules().contains(ModuleLoader.getInstance().getModule(PrimeseqModule.class));
     }
+
+    @Override
+    public Collection<String> getDockerVolumes(Container c)
+    {
+        Set<String> volumes = new HashSet<>();
+        volumes.add("/home/groups/prime-seq");
+        volumes.add("/home/exacloud/gscratch");
+
+        PipeRoot pr = PipelineService.get().findPipelineRoot(c);
+        if (pr != null && pr.getRootPath().exists())
+        {
+            if (pr.getRootPath().getPath().startsWith("/home/groups/"))
+            {
+                String folderName = pr.getRootPath().getPath().replaceAll("^/home/groups/", "").split("/")[0];
+                volumes.add("/home/groups/" + folderName);
+            }
+        }
+
+        if (c.isWorkbook())
+        {
+            PipeRoot pr2 = PipelineService.get().findPipelineRoot(c.getParent());
+            if (pr2 != null && pr2.getRootPath().exists())
+            {
+                if (pr2.getRootPath().getPath().startsWith("/home/groups/"))
+                {
+                    String folderName = pr2.getRootPath().getPath().replaceAll("^/home/groups/", "").split("/")[0];
+                    volumes.add("/home/groups/" + folderName);
+                }
+            }
+        }
+
+        return volumes;
+    }
 }

From 7bd9494b6d40b1f4ab73a4cd66a077f9ba81956d Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Sat, 16 Nov 2024 09:30:36 -0800
Subject: [PATCH 20/20] Update several ETLs

---
 PMR/resources/etls/pmr-datasets.xml     | 4 ++++
 PMR/resources/etls/pmr-demographics.xml | 2 ++
 mGAP/resources/etls/prime-seq.xml       | 4 ++++
 3 files changed, 10 insertions(+)

diff --git a/PMR/resources/etls/pmr-datasets.xml b/PMR/resources/etls/pmr-datasets.xml
index 19e981726..1186b27a4 100644
--- a/PMR/resources/etls/pmr-datasets.xml
+++ b/PMR/resources/etls/pmr-datasets.xml
@@ -141,6 +141,8 @@
                     <column>relationship</column>
                     <column>method</column>
                     <column>objectid</column>
+                    <column>created</column>
+                    <column>modified</column>
                 </sourceColumns>
                 <sourceFilters>
                     <sourceFilter column="QCState/Label" operator="eq" value="Completed"/>
@@ -171,6 +173,8 @@
                     <column>conception</column>
                     <column>conceptualDay</column>
                     <column>objectid</column>
+                    <column>created</column>
+                    <column>modified</column>
                 </sourceColumns>
                 <sourceFilters>
                     <sourceFilter column="QCState/Label" operator="eq" value="Completed"/>
diff --git a/PMR/resources/etls/pmr-demographics.xml b/PMR/resources/etls/pmr-demographics.xml
index 9c06b7b3c..03b2bf94e 100644
--- a/PMR/resources/etls/pmr-demographics.xml
+++ b/PMR/resources/etls/pmr-demographics.xml
@@ -17,6 +17,8 @@
                     <column>calculated_status</column>
                     <column>QCState/Label</column>
                     <column>objectid</column>
+                    <column>created</column>
+                    <column>modified</column>
                 </sourceColumns>
             </source>
             <destination schemaName="study" queryName="demographics" targetOption="truncate" bulkLoad="true" batchSize="2500">
diff --git a/mGAP/resources/etls/prime-seq.xml b/mGAP/resources/etls/prime-seq.xml
index 458803fca..312f0d033 100644
--- a/mGAP/resources/etls/prime-seq.xml
+++ b/mGAP/resources/etls/prime-seq.xml
@@ -60,6 +60,9 @@
                     <column>source</column>
                     <column>description</column>
                     <column>isprimarytrack</column>
+                    <column>shouldindex</column>
+                    <column>vcfIndexId/dataid/DataFileUrl</column>
+                    <column>vcfIndexId/library_id/name</column>
                     <column>vcfId/dataid/DataFileUrl</column>
                     <column>vcfId/library_id/name</column>
                 </sourceColumns>
@@ -67,6 +70,7 @@
             <destination schemaName="mGAP" queryName="tracksPerRelease" bulkLoad="true" targetOption="truncate">
                 <columnTransforms>
                     <column source="vcfId/dataid/DataFileUrl" target="vcfId" transformClass="org.labkey.mgap.columnTransforms.TrackOutputFileTransform" />
+                    <column source="vcfIndexId/dataid/DataFileUrl" target="vcfIndexId" transformClass="org.labkey.mgap.columnTransforms.TrackOutputFileTransform" />
                 </columnTransforms>
             </destination>
         </transform>