diff --git a/code/pecotmr_integration/twas_ctwas.ipynb b/code/pecotmr_integration/twas_ctwas.ipynb index 0304d597..bc36e488 100644 --- a/code/pecotmr_integration/twas_ctwas.ipynb +++ b/code/pecotmr_integration/twas_ctwas.ipynb @@ -577,28 +577,32 @@ "\n", "\n", " # function to update method selection\n", - " update_twas_method <- function(twas_df){\n", + " update_twas_method <- function(twas_df, rsq_cutoff =${rsq_cutoff}, rsq_pval_cutoff = ${rsq_pval_cutoff}){\n", + " if (nrow(twas_df) == 0) { \n", + " return(twas_df)\n", + " } \n", " twas_df$method_selected_original <- twas_df$is_selected_method\n", " twas_df_colnames <- colnames(twas_df)\n", " twas_df$gene_context <- paste0(twas_df$molecular_id, \"_\", twas_df$context)\n", - " rs_update <- do.call(rbind, lapply(unique(twas_df$gene_context), function(gene_context){\n", - " do.call(rbind, lapply(unique(twas_df$gwas_study), function(study){\n", - " df <- twas_df[twas_df$gene_context==gene_context & twas_df$gwas_study==study,,drop=FALSE]\n", - " df_colnames <- colnames(df)\n", - " all_imputable <- df[df$rsq_cv>=${rsq_cutoff} & df$pval_cv<=${rsq_pval_cutoff} & !is.na(df$twas_z),,drop=FALSE]\n", - " if (nrow(all_imputable)<=1 || all(is.na(all_imputable$twas_z))) {\n", - " return(df) # gene-context pair does not having any other imputable model available.\n", - " } else {\n", - " message (\"Updating method selection for \", gene_context, \"-\", study, \".\")\n", - " selected_method_update <- all_imputable[order(-all_imputable$rsq_cv),]$method[1]\n", - " df$selected_method_update <- df$method==selected_method_update\n", - " df$is_selected_method <- df$selected_method_update\n", - " df <- df[, df_colnames]\n", - " return(df)\n", - " }\n", - " }))\n", - " }))\n", - " return(rs_update[, twas_df_colnames])\n", + " fix_sel <- twas_df$is_selected_method & (is.na(twas_df$twas_z) | is.na(twas_df$twas_pval) | is.infinite(twas_df$twas_pval) |\n", + " is.null(twas_df$twas_z) | is.null(twas_df$twas_pval))\n", + " if (!any(fix_sel)) return(twas_df) # If nothing to fix, return fast\n", + " all_group <- paste(twas_df$gene_context, twas_df$gwas_study, sep = \" - \") #gene-context-study group\n", + " fix_groups <- unique(all_group[fix_sel])\n", + "\n", + " for (fixgroup in fix_groups) {\n", + " df_idxs <- which(all_group == fixgroup) # idx of gene-context-study group that needs replacement of method selected\n", + " if (length(df_idxs) == 1) next\n", + " candidate_idxs <- df_idxs[!is.na(twas_df$twas_z[df_idxs]) & !is.na(twas_df$twas_pval[df_idxs]) & !is.infinite(twas_df$twas_z[df_idxs]) & \n", + " !is.null(twas_df$twas_z[df_idxs]) & !is.null(twas_df$twas_pval[df_idxs]) &\n", + " twas_df$rsq_cv[df_idxs] >= rsq_cutoff & twas_df$pval_cv[df_idxs] <= rsq_pval_cutoff] \n", + " if (length(candidate_idxs) <= 0 || all(is.na(candidate_idxs))) next\n", + " best_i <- candidate_idxs[which.max(twas_df$rsq_cv[candidate_idxs])]\n", + " best_method <- twas_df$method[best_i]\n", + " twas_df$is_selected_method[df_idxs] <- (twas_df$method[df_idxs] == best_method)\n", + " message (\"Updating method selection for \", fixgroup, \".\")\n", + " }\n", + " return(twas_df[, twas_df_colnames])\n", " }\n", "\n", " # Load metadata and configuration - let these fail if there are issues\n", @@ -638,7 +642,7 @@ " message(paste(\"Error checking weight files:\", e$message))\n", " return(list())\n", " })\n", - " \n", + "\n", " if(length(weight_db_list_update) <= 0) {\n", " message(paste0(\"No valid weight files for region ${_filtered_region_info[3]}. Creating empty output files.\"))\n", " # Define TWAS result columns (same as in final_results)\n", @@ -677,15 +681,17 @@ " for (gene_db in names(weight_db_list_update)) {\n", " weight_dbs <- weight_db_list_update[[gene_db]]\n", " message(paste(\"Processing gene:\", gene_db, \"with\", length(weight_dbs), \"weight files\"))\n", - " \n", + " conditions = xqtl_meta_df |> filter(region_id == gene_db) |> pull(contexts)\n", + "\n", " # Load weights for this gene - let it fail if there are real issues\n", " twas_weights_results[[gene_db]] = load_twas_weights(\n", " weight_dbs, \n", + " conditions = if(is.na(conditions)) NULL else conditions,\n", " variable_name_obj = \"variant_names\", \n", " susie_obj = \"susie_weights_intermediate\",\n", " twas_weights_table = \"twas_weights\"\n", " )\n", - " \n", + "\n", " if (length(twas_weights_results[[gene_db]]) > 1) {\n", " twas_weights_results[[gene_db]]$data_type <- setNames(\n", " lapply(names(twas_weights_results[[gene_db]]$weights), function(context) {\n", @@ -739,9 +745,7 @@ " # TWAS analysis - allow this to fail with informative errors\n", " twas_results_db <- vector(\"list\", length(twas_weights_results)) \n", " for (batch in seq_along(twas_weights_results)) {\n", - " \n", " message(paste(\"Processing batch\", batch, \"of\", length(twas_weights_results)))\n", - " \n", " res <- tryCatch(\n", " twas_pipeline(\n", " twas_weights_results[[batch]], \n", @@ -802,10 +806,16 @@ " final_results <- do.call(rbind, lapply(twas_results_db, function(x) {\n", " if (!is.null(x$twas_result)) x$twas_result[, c(2,1,14:16,3:13)] else data.frame()\n", " }))\n", + "\n", " # update methods if twas z-score is NA \n", - " final_results <- update_twas_method(final_results) \n", - " message(paste(\"Writing\", nrow(final_results), \"final TWAS results\"))\n", - " fwrite(final_results, ${_output[0]:r}, sep = \"\\t\", compress = \"gzip\")\n", + " final_results <- update_twas_method(final_results) \n", + " if (nrow(final_results)==0) {\n", + " message(\"No valid TWAS results to write\")\n", + " fwrite(data.frame(), file = ${_output[0]:r}, sep = \"\\t\", compress = \"gzip\")\n", + " } else {\n", + " message(paste(\"Writing\", nrow(final_results), \"final TWAS results\"))\n", + " fwrite(final_results, ${_output[0]:r}, sep = \"\\t\", compress = \"gzip\")\n", + " }\n", " } else {\n", " message(\"No valid TWAS results to write\")\n", " fwrite(data.frame(), file = ${_output[0]:r}, sep = \"\\t\", compress = \"gzip\")\n",