Skip to content

Commit eb55621

Browse files
authored
Merge pull request #123 from xueweic/main
Update about post-filtering of uCoS
2 parents 0a5ef28 + 3a9dc27 commit eb55621

11 files changed

Lines changed: 1319 additions & 30 deletions

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ export(get_cos_purity)
1010
export(get_cos_summary)
1111
export(get_hierarchical_clusters)
1212
export(get_robust_colocalization)
13+
export(get_robust_ucos)
1314
export(get_ucos_summary)
1415
importFrom(grDevices,adjustcolor)
1516
importFrom(graphics,abline)

R/colocboost.R

Lines changed: 46 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,12 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
210210
dict_YX = dict_YX, dict_sumstatLD = dict_sumstatLD,
211211
effect_est = effect_est, effect_se = effect_se, effect_n = effect_n,
212212
overlap_variables = overlap_variables,
213-
M = M, min_abs_corr = min_abs_corr
213+
M = M, min_abs_corr = min_abs_corr,
214+
jk_equiv_corr = jk_equiv_corr,
215+
jk_equiv_loglik = jk_equiv_loglik,
216+
func_simplex = func_simplex,
217+
cos_npc_cutoff = cos_npc_cutoff,
218+
npc_outcome_cutoff = npc_outcome_cutoff
214219
)
215220
if (is.null(validated_data)) {
216221
return(NULL)
@@ -235,6 +240,12 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
235240
jk_equiv_corr <- validated_data$jk_equiv_corr
236241
jk_equiv_loglik <- validated_data$jk_equiv_loglik
237242
func_simplex <- validated_data$func_simplex
243+
cos_npc_cutoff <- validated_data$cos_npc_cutoff
244+
npc_outcome_cutoff <- validated_data$npc_outcome_cutoff
245+
if (M == 1){
246+
cos_npc_cutoff <- 0
247+
npc_outcome_cutoff <- 0
248+
}
238249

239250
# - initial colocboost object
240251
keep_variables <- c(keep_variable_individual, keep_variable_sumstat)
@@ -338,6 +349,20 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
338349
weight_fudge_factor = weight_fudge_factor,
339350
coverage = coverage
340351
)
352+
# ---- post filtering of the colocboost results (get robust trait-specific events)
353+
if ("ucos_details" %in% names(cb_output)) {
354+
if (is.null(pvalue_cutoff)){
355+
pvalue_cutoff_ucos <- NULL
356+
} else {
357+
# only keep the suggestive significant results
358+
pvalue_cutoff_ucos <- ifelse(pvalue_cutoff > 1e-5, 1e-5, pvalue_cutoff)
359+
}
360+
cb_output <- get_robust_ucos(
361+
cb_output,
362+
npc_outcome_cutoff = npc_outcome_cutoff,
363+
pvalue_cutoff = pvalue_cutoff_ucos
364+
)
365+
}
341366

342367
return(cb_output)
343368
}
@@ -387,7 +412,11 @@ colocboost_validate_input_data <- function(X = NULL, Y = NULL,
387412
dict_YX = NULL, dict_sumstatLD = NULL,
388413
effect_est = NULL, effect_se = NULL, effect_n = NULL,
389414
overlap_variables = FALSE,
390-
M = 500, min_abs_corr = 0.5) {
415+
M = 500, min_abs_corr = 0.5,
416+
jk_equiv_corr = 0.8, jk_equiv_loglik = 1,
417+
func_simplex = "LD_z2z",
418+
cos_npc_cutoff = 0.2,
419+
npc_outcome_cutoff = 0.2) {
391420

392421
# - check individual level data
393422
if (!is.null(X) & !is.null(Y)) {
@@ -624,9 +653,11 @@ colocboost_validate_input_data <- function(X = NULL, Y = NULL,
624653
# --- check input of LD
625654
M_updated <- M
626655
min_abs_corr_updated <- min_abs_corr
627-
jk_equiv_corr_updated <- 0.8
628-
jk_equiv_loglik_updated <- 1
629-
func_simplex_updated <- "LD_z2z"
656+
jk_equiv_corr_updated <- jk_equiv_corr
657+
jk_equiv_loglik_updated <- jk_equiv_loglik
658+
func_simplex_updated <- func_simplex
659+
cos_npc_cutoff_updated <- cos_npc_cutoff
660+
npc_outcome_cutoff_updated <- npc_outcome_cutoff
630661

631662
if (is.null(LD)) {
632663
# if no LD input, set diagonal matrix to LD
@@ -644,6 +675,8 @@ colocboost_validate_input_data <- function(X = NULL, Y = NULL,
644675
jk_equiv_corr_updated <- 0
645676
jk_equiv_loglik_updated <- 0.1
646677
func_simplex_updated <- "only_z2z"
678+
cos_npc_cutoff_updated <- 0
679+
npc_outcome_cutoff_updated <- 0
647680

648681
} else {
649682

@@ -804,9 +837,11 @@ colocboost_validate_input_data <- function(X = NULL, Y = NULL,
804837
Z <- N_sumstat <- Var_y <- SeBhat <- sumstatLD_dict <- keep_variable_sumstat <- NULL
805838
M_updated <- M
806839
min_abs_corr_updated <- min_abs_corr
807-
jk_equiv_corr_updated <- 0.8
808-
jk_equiv_loglik_updated <- 1
809-
func_simplex_updated <- "LD_z2z"
840+
jk_equiv_corr_updated <- jk_equiv_corr
841+
jk_equiv_loglik_updated <- jk_equiv_loglik
842+
func_simplex_updated <- func_simplex
843+
cos_npc_cutoff_updated = cos_npc_cutoff
844+
npc_outcome_cutoff_updated = npc_outcome_cutoff
810845
}
811846

812847
return(list(
@@ -826,7 +861,9 @@ colocboost_validate_input_data <- function(X = NULL, Y = NULL,
826861
min_abs_corr = min_abs_corr_updated,
827862
jk_equiv_corr = jk_equiv_corr_updated,
828863
jk_equiv_loglik = jk_equiv_loglik_updated,
829-
func_simplex = func_simplex_updated
864+
func_simplex = func_simplex_updated,
865+
cos_npc_cutoff = cos_npc_cutoff_updated,
866+
npc_outcome_cutoff = npc_outcome_cutoff_updated
830867
))
831868
}
832869

R/colocboost_inference.R

Lines changed: 97 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ check_null_post <- function(cb_obj,
269269
ld_feature <- sqrt(abs(ld_jk))
270270
# - calculate delta
271271
delta <- boost_KL_delta(
272-
z = z, ld_feature = ld_feature, adj_dep = adj_dep,
272+
z = z, ld_feature = ld_feature, adj_dep = adj_dep,
273273
func_simplex = func_simplex, lambda = lambda
274274
)
275275
scaling_factor <- if (!is.null(N)) (N - 1) else 1
@@ -575,7 +575,6 @@ get_cos_evidence <- function(cb_obj, coloc_out, data_info) {
575575
}
576576

577577
avWeight <- coloc_out$avWeight
578-
cs_change <- coloc_out$cs_change
579578
check_null_max <- sapply(cb_obj$cb_model, function(cb) cb$check_null_max)
580579
outcome_names <- data_info$outcome_info$outcome_names
581580
n_cos <- length(avWeight)
@@ -603,3 +602,99 @@ get_cos_evidence <- function(cb_obj, coloc_out, data_info) {
603602
return(list(normalization_evidence = normalization_evidence, npc = npc))
604603
}
605604

605+
606+
607+
#' Function to get the evidence of trait-specific ucos
608+
#' @keywords cb_post_inference
609+
#' @noRd
610+
#' @importFrom stats var
611+
#' @importFrom utils tail
612+
get_ucos_evidence <- function(cb_obj, ucoloc_info) {
613+
614+
get_ucos_profile <- function(cs_beta, outcome_idx, X = NULL, Y = NULL, N = NULL,
615+
XtX = NULL, YtY = NULL, XtY = NULL, miss_idx = NULL, adj_dep = 1) {
616+
if (!is.null(X)) {
617+
cos_profile <- mean((Y - X %*% as.matrix(cs_beta))^2) * N / (N - 1)
618+
yty <- var(Y)
619+
} else if (!is.null(XtY)) {
620+
scaling_factor <- if (!is.null(N)) (N - 1) else 1
621+
beta_scaling <- if (!is.null(N)) 1 else 100
622+
cs_beta <- cs_beta / beta_scaling
623+
yty <- YtY / scaling_factor
624+
xtx <- XtX
625+
if (length(miss_idx) != 0) {
626+
xty <- XtY[-miss_idx] / scaling_factor
627+
cs_beta <- cs_beta[-miss_idx]
628+
} else {
629+
xty <- XtY / scaling_factor
630+
}
631+
if (length(xtx) == 1){
632+
cos_profile <- (yty - 2 * sum(cs_beta * xty) + sum(cs_beta^2)) * adj_dep
633+
} else {
634+
cos_profile <- (yty - 2 * sum(cs_beta * xty) + sum((xtx %*% as.matrix(cs_beta)) * cs_beta)) * adj_dep
635+
}
636+
}
637+
delta <- yty - cos_profile
638+
if (delta <= 0) {
639+
warning(paste(
640+
"Warning message: potential sumstat & LD mismatch may happens for outcome", outcome_idx,
641+
". Using logLR = uCoS(profile) - max(profile). Please check our website https://statfungen.github.io/colocboost/articles/."
642+
))
643+
}
644+
cos_profile
645+
}
646+
647+
get_outcome_profile_change <- function(outcome_idx, ucos, cb_obj) {
648+
extract_last <- function(lst) {
649+
tail(lst, n = 1)
650+
}
651+
cb_data <- cb_obj$cb_data
652+
cs_beta <- rep(0, cb_obj$cb_model_para$P)
653+
cs_beta[ucos] <- cb_obj$cb_model[[outcome_idx]]$beta[ucos]
654+
X_dict <- cb_data$dict[outcome_idx]
655+
cos_profile <- get_ucos_profile(cs_beta, outcome_idx,
656+
X = cb_data$data[[X_dict]]$X, Y = cb_data$data[[outcome_idx]]$Y,
657+
XtX = cb_data$data[[X_dict]]$XtX, XtY = cb_data$data[[outcome_idx]]$XtY,
658+
YtY = cb_data$data[[outcome_idx]]$YtY, N = cb_data$data[[outcome_idx]]$N,
659+
miss_idx = cb_data$data[[outcome_idx]]$variable_miss,
660+
adj_dep = cb_data$data[[outcome_idx]]$dependency
661+
)
662+
max_profile <- max(cb_obj$cb_model[[outcome_idx]]$profile_loglike_each)
663+
ifelse(max_profile < cos_profile, 0, max_profile - cos_profile)
664+
}
665+
666+
# - Calculate best configuration likelihood explained by minimal configuration
667+
get_normalization_evidence <- function(profile_change, null_max, outcomes, outcome_names) {
668+
# Define the exponential likelihood ratio normalization (ELRN)
669+
logLR_normalization <- function(ratio) {
670+
1 - exp(-2 * ratio)
671+
}
672+
673+
ratio <- profile_change / null_max
674+
prob <- logLR_normalization(ratio)
675+
df <- data.frame(outcome = outcome_names, outcomes_index = outcomes, relative_logLR = ratio, npc_outcome = prob)
676+
return(df)
677+
}
678+
679+
check_null_max_ucos <- sapply(cb_obj$cb_model, function(cb) cb$check_null_max_ucos)
680+
n_ucos <- length(ucoloc_info$ucos)
681+
normalization_evidence <- list()
682+
for (i in 1:n_ucos) {
683+
outcome_idx <- ucoloc_info$outcome[[i]]
684+
outcome_name <- ucoloc_info$outcome_name[[i]]
685+
# most likely cos
686+
ucos <- ucoloc_info$ucos[[i]]
687+
profile_change_outcome <- get_outcome_profile_change(outcome_idx, ucos, cb_obj)
688+
normalization_evidence[[i]] <- get_normalization_evidence(
689+
profile_change = profile_change_outcome,
690+
null_max = check_null_max_ucos[outcome_idx],
691+
outcome_idx, outcome_name
692+
)
693+
}
694+
normalization_evidence <- do.call(rbind, normalization_evidence)
695+
rownames(normalization_evidence) <- names(ucoloc_info$ucos)
696+
return(normalization_evidence)
697+
}
698+
699+
700+

0 commit comments

Comments
 (0)