StatFunGen · gaow · Jan 20, 2026 · Jan 13, 2026 · Jan 15, 2026 · Jan 16, 2026
diff --git a/code/pecotmr_integration/twas_ctwas.ipynb b/code/pecotmr_integration/twas_ctwas.ipynb
@@ -577,28 +577,32 @@
     "\n",
     "\n",
     "    # function to update method selection\n",
-    "    update_twas_method <- function(twas_df){\n",
+    "    update_twas_method <- function(twas_df, rsq_cutoff =${rsq_cutoff}, rsq_pval_cutoff = ${rsq_pval_cutoff}){\n",
+    "        if (nrow(twas_df) == 0) {  \n",
+    "            return(twas_df)\n",
+    "        } \n",
     "        twas_df$method_selected_original <- twas_df$is_selected_method\n",
     "        twas_df_colnames <- colnames(twas_df)\n",
     "        twas_df$gene_context <- paste0(twas_df$molecular_id, \"_\", twas_df$context)\n",
-    "        rs_update <- do.call(rbind, lapply(unique(twas_df$gene_context), function(gene_context){\n",
-    "            do.call(rbind, lapply(unique(twas_df$gwas_study), function(study){\n",
-    "                df <- twas_df[twas_df$gene_context==gene_context & twas_df$gwas_study==study,,drop=FALSE]\n",
-    "                df_colnames <- colnames(df)\n",
-    "                all_imputable <- df[df$rsq_cv>=${rsq_cutoff} & df$pval_cv<=${rsq_pval_cutoff} & !is.na(df$twas_z),,drop=FALSE]\n",
-    "                if (nrow(all_imputable)<=1 || all(is.na(all_imputable$twas_z))) {\n",
-    "                    return(df) # gene-context pair does not having any other imputable model available.\n",
-    "                } else {\n",
-    "                    message (\"Updating method selection for \", gene_context, \"-\", study, \".\")\n",
-    "                    selected_method_update <- all_imputable[order(-all_imputable$rsq_cv),]$method[1]\n",
-    "                    df$selected_method_update <- df$method==selected_method_update\n",
-    "                    df$is_selected_method <- df$selected_method_update\n",
-    "                    df <- df[, df_colnames]\n",
-    "                    return(df)\n",
-    "                }\n",
-    "            }))\n",
-    "        }))\n",
-    "        return(rs_update[, twas_df_colnames])\n",
+    "        fix_sel <- twas_df$is_selected_method & (is.na(twas_df$twas_z) | is.na(twas_df$twas_pval) | is.infinite(twas_df$twas_pval) |\n",
+    "                                           is.null(twas_df$twas_z) | is.null(twas_df$twas_pval))\n",
+    "        if (!any(fix_sel)) return(twas_df) # If nothing to fix, return fast\n",
+    "        all_group <- paste(twas_df$gene_context, twas_df$gwas_study, sep = \" - \") #gene-context-study group\n",
+    "        fix_groups <- unique(all_group[fix_sel])\n",
+    "\n",
+    "        for (fixgroup in fix_groups) {\n",
+    "            df_idxs <- which(all_group == fixgroup) # idx of gene-context-study group that needs replacement of method selected\n",
+    "            if (length(df_idxs) == 1) next\n",
+    "            candidate_idxs <- df_idxs[!is.na(twas_df$twas_z[df_idxs]) & !is.na(twas_df$twas_pval[df_idxs]) & !is.infinite(twas_df$twas_z[df_idxs]) & \n",
+    "                             !is.null(twas_df$twas_z[df_idxs]) & !is.null(twas_df$twas_pval[df_idxs]) &\n",
+    "                             twas_df$rsq_cv[df_idxs] >= rsq_cutoff & twas_df$pval_cv[df_idxs] <= rsq_pval_cutoff] \n",
+    "            if (length(candidate_idxs) <= 0 || all(is.na(candidate_idxs))) next\n",
+    "            best_i <- candidate_idxs[which.max(twas_df$rsq_cv[candidate_idxs])]\n",
+    "            best_method <- twas_df$method[best_i]\n",
+    "            twas_df$is_selected_method[df_idxs] <- (twas_df$method[df_idxs] == best_method)\n",
+    "            message (\"Updating method selection for \", fixgroup, \".\")\n",
+    "        }\n",
+    "        return(twas_df[, twas_df_colnames])\n",
     "    }\n",
     "\n",
     "    # Load metadata and configuration - let these fail if there are issues\n",
@@ -638,7 +642,7 @@
     "        message(paste(\"Error checking weight files:\", e$message))\n",
     "        return(list())\n",
     "    })\n",
-    "    \n",
+    "\n",
     "    if(length(weight_db_list_update) <= 0) {\n",
     "        message(paste0(\"No valid weight files for region ${_filtered_region_info[3]}. Creating empty output files.\"))\n",
     "        # Define TWAS result columns (same as in final_results)\n",
@@ -677,15 +681,17 @@
     "    for (gene_db in names(weight_db_list_update)) {\n",
     "        weight_dbs <- weight_db_list_update[[gene_db]]\n",
     "        message(paste(\"Processing gene:\", gene_db, \"with\", length(weight_dbs), \"weight files\"))\n",
-    "        \n",
+    "        conditions = xqtl_meta_df |> filter(region_id == gene_db) |> pull(contexts)\n",
+    "\n",
     "        # Load weights for this gene - let it fail if there are real issues\n",
     "        twas_weights_results[[gene_db]] = load_twas_weights(\n",
     "            weight_dbs, \n",
+    "            conditions = if(is.na(conditions)) NULL else conditions,\n",
     "            variable_name_obj = \"variant_names\", \n",
     "            susie_obj = \"susie_weights_intermediate\",\n",
     "            twas_weights_table = \"twas_weights\"\n",
     "        )\n",
-    "        \n",
+    "\n",
     "        if (length(twas_weights_results[[gene_db]]) > 1) {\n",
     "            twas_weights_results[[gene_db]]$data_type <- setNames(\n",
     "                lapply(names(twas_weights_results[[gene_db]]$weights), function(context) {\n",
@@ -739,9 +745,7 @@
     "      # TWAS analysis - allow this to fail with informative errors\n",
     "    twas_results_db <- vector(\"list\", length(twas_weights_results))    \n",
     "    for (batch in seq_along(twas_weights_results)) {\n",
-    "    \n",
     "        message(paste(\"Processing batch\", batch, \"of\", length(twas_weights_results)))\n",
-    "    \n",
     "        res <- tryCatch(\n",
     "            twas_pipeline(\n",
     "                twas_weights_results[[batch]], \n",
@@ -802,10 +806,16 @@
     "        final_results <- do.call(rbind, lapply(twas_results_db, function(x) {\n",
     "            if (!is.null(x$twas_result)) x$twas_result[, c(2,1,14:16,3:13)] else data.frame()\n",
     "        }))\n",
+    "\n",
     "        # update methods if twas z-score is NA \n",
-    "        final_results <- update_twas_method(final_results)        \n",
-    "        message(paste(\"Writing\", nrow(final_results), \"final TWAS results\"))\n",
-    "        fwrite(final_results, ${_output[0]:r}, sep = \"\\t\", compress = \"gzip\")\n",
+    "        final_results <- update_twas_method(final_results) \n",
+    "        if (nrow(final_results)==0) {\n",
+    "            message(\"No valid TWAS results to write\")\n",
+    "            fwrite(data.frame(), file = ${_output[0]:r}, sep = \"\\t\", compress = \"gzip\")\n",
+    "        } else {\n",
+    "            message(paste(\"Writing\", nrow(final_results), \"final TWAS results\"))\n",
+    "            fwrite(final_results, ${_output[0]:r}, sep = \"\\t\", compress = \"gzip\")\n",
+    "        }\n",
     "    } else {\n",
     "        message(\"No valid TWAS results to write\")\n",
     "        fwrite(data.frame(), file = ${_output[0]:r}, sep = \"\\t\", compress = \"gzip\")\n",