Improvements for output_table#115
Draft
dwinkler1 wants to merge 5 commits intokonfound-project:masterfrom
Draft
Conversation
- for these classes `$` is not implemented - access works via `@`
Author
|
I have a completely revamped version of output table that also works for categorical variables. Please let me know if such a large diff would be accepted in one PR: output_table <- function(model_object, tested_variable, round = FALSE) {
if (isS4(model_object)) {
p <- all.vars(model_object@call)[1]
} else {
p <- all.vars(model_object$call)[1]
}
cat("Dependent variable is", p, "\n")
model_output <- broom::tidy(model_object) # tidying output
model_output$itcv <- NA
var_row <- model_output$term == tested_variable
model_output$itcv[var_row] <- abs(konfound(model_object,
!!tested_variable,
to_return = "raw_output"
)$itcv)
covariate_names <- model_output$term[
!(model_output$term %in% c("(Intercept)", tested_variable))
]
model_frame <- model.frame(model_object)
model_matrix <- model.matrix(model_object)
data_mat <- cbind(
model.response(model_frame),
model_matrix[, colnames(model_matrix) != "(Intercept)"]
)
colnames(data_mat)[1] <- p
cor_mat <- stats::cor(data_mat)[, c(p, tested_variable)]
cor_df <- data.frame(cor_mat) |>
dplyr::mutate(
term = rownames(cor_mat),
Impact = !!rlang::sym(p) * !!rlang::sym(tested_variable),
itcv_ = abs(Impact)
)
# Observed Impact Table
impact_table <- cor_df |>
dplyr::as_tibble() |>
dplyr::filter(!term %in% c(p, tested_variable)) |>
dplyr::rename(
`Cor(vX)` = rlang::sym(tested_variable),
`Cor(vY)` = rlang::sym(p)
)
model_output <- model_output |>
dplyr::left_join(dplyr::select(impact_table, term, itcv_), by = "term") |>
dplyr::mutate(itcv = dplyr::coalesce(itcv, itcv_)) |>
dplyr::select(-itcv_)
pcor_mat_x <- ppcor::pcor(data_mat[, colnames(data_mat) != p])$estimate
colnames(pcor_mat_x) <- colnames(data_mat)[colnames(data_mat) != p]
pcor_df_x <- data.frame(pcor_mat_x) |>
dplyr::select(!!rlang::sym(tested_variable)) |>
dplyr::mutate(
term = colnames(pcor_mat_x)
)
pcor_mat_y <- ppcor::pcor(data_mat[, colnames(data_mat) != tested_variable])$estimate
colnames(pcor_mat_y) <- colnames(data_mat)[colnames(data_mat) != tested_variable]
pcor_df_y <- data.frame(pcor_mat_y) |>
dplyr::select(!!rlang::sym(p)) |>
dplyr::mutate(
term = colnames(pcor_mat_y)
)
pcor_df <- dplyr::inner_join(pcor_df_x, pcor_df_y, by = "term") |>
dplyr::mutate(Partial_Impact = abs(!!rlang::sym(p) * !!rlang::sym(tested_variable)))
# Partial Impact Table
impact_table_partial <- pcor_df |>
dplyr::as_tibble() |>
dplyr::rename(
`Partial Cor(vX)` = rlang::sym(tested_variable),
`Partial Cor(vY)` = rlang::sym(p)
)
cat(paste0(
"X represents ", tested_variable, ", Y represents ", p,
", v represents each covariate.\n",
"First table is based on unconditional correlations, second table is based on\n",
"partial correlations.\n\n"
))
# Check if any row has all Partial Correlation components as NA
if (any(is.na(impact_table_partial$`Partial Cor(vX)`) &
is.na(impact_table_partial$`Partial Cor(vY)`) &
is.na(impact_table_partial$Partial_Impact))) {
stop(
paste0(
"Numerical instability detected in partial correlation. This indicates potential multicollinearity or scaling issues.\n",
"To resolve this issue, consider:\n",
"1) Standardize predictors with scale().\n",
"2) Remove or combine highly correlated predictors.\n",
"3) Apply regularization (e.g., ridge regression)."
)
)
}
impact_table <- impact_table |>
dplyr::select(term, `Cor(vX)`, `Cor(vY)`, Impact)
impact_table_partial <- impact_table_partial |>
dplyr::select(term, `Partial Cor(vX)`, `Partial Cor(vY)`, Partial_Impact)
if (round) {
impact_table <- impact_table |>
dplyr::mutate(across(where(is.numeric), ~ round(.x, 4)))
impact_table_partial <- impact_table_partial |>
dplyr::mutate(across(where(is.numeric), ~ round(.x, 4)))
}
# Return all three tables as a list
return(list(Main_Output = model_output, Unconditional_Impact = impact_table, Partial_Impact = impact_table_partial))
} |
Author
|
A more general implementation that would also take random effects into account would be: get_kr_df <- function(model_object) {
var_names <- names(lme4::fixef(model_object))
L <- diag(length(var_names)) |> data.frame()
vcov_adj <- pbkrtest::vcovAdj(model_object)
vcov_0 <- vcov(model_object)
out <- suppressWarnings(purrr::map_dbl(
L,
pbkrtest::Lb_ddf,
V0 = vcov_0, Vadj = vcov_adj
))
names(out) <- var_names
return(out)
}
#' Calculate partial correlation from t and df
t2pcorr <- function(t, df) {
t / sqrt(t^2 + df)
}
beta2pcorr_ <- function(beta, se, df) {
t <- beta / se
return(t2pcorr(t, df))
}
#' Get a data frame with the estimates and added partial correlations
beta2pcorr <- function(model, terms = names(fixef(model)), df_ = NULL) {
mod <- broom::tidy(model) |>
dplyr::filter(term %in% terms)
if (!is.null(df_)) {
dd <- data.frame(df_ = df_)
dd$term <- rownames(dd)
mod <- dplyr::left_join(mod, dd, by = "term")
} else if ("df" %in% names(mod)) {
mod <- dplyr::rename(mod, df_ = df)
} else {
dd <- data.frame(df_ = get_kr_df_par(model, terms))
dd$term <- names(dd)
mod <- dplyr::left_join(mod, dd, by = "term")
}
mod |>
dplyr::mutate(
pcorr = beta2pcorr_(estimate, std.error, df_),
df = df_
) |>
dplyr::select(term, estimate, std.error, df, pcorr)
}
#' Calculate partial correlations and partial impact to benchmark ITCV
#' Calculate partial correlations and partial impact to benchmark ITCV
impact_table <- function(model_object, term, print = FALSE) {
col_names <- if (isS4(model_object)) {
all.vars(model_object@call)
} else {
all.vars(model_object$call)
}
y_name <- col_names[1]
y_name_ <- rlang::sym(y_name)
term_ <- rlang::sym(term)
model_matrix <- model.matrix(model_object)
x_names <- colnames(model_matrix)
data_mat <- cbind(
model.response(model.frame(model_object)),
model_matrix[, x_names != "(Intercept)"]
)
colnames(data_mat)[1] <- y_name
cor_mat <- stats::cor(data_mat)[, c(term, y_name)]
cor_df <- data.frame(cor_mat) |>
dplyr::mutate(
term = rownames(cor_mat),
impact = !!y_name_ * !!term_,
) |>
dplyr::rename_with(~ paste0("corr_", c({{ term }}, y_name)), c(term_, y_name_))
model_y <- update(model_object, formula = paste0(". ~ . -", term_))
model_x <- update(model_object, formula = paste0(term_, " ~ . -", term_))
if (inherits(model_y, "lmerMod")) {
df_y <- get_kr_df(model_y)
df_x <- get_kr_df(model_x)
} else {
df_y <- setNames(rep(model_y$df.residual, length(x_names)), x_names)
df_x <- setNames(rep(model_x$df.residual, length(x_names)), x__names)
}
pcorr_y <- beta2pcorr(model_y, terms = x_names, df = df_y) |>
dplyr::select(term, pcorr_y = pcorr)
pcorr_x <- beta2pcorr(model_x, terms = x_names, df = df_x) |>
dplyr::select(term, pcorr_x = pcorr)
impact_table <- dplyr::inner_join(pcorr_x, pcorr_y, by = "term") |>
dplyr::mutate(partial_impact = pcorr_x * pcorr_y) |>
dplyr::rename_with(~ paste0("pcorr_", c(term, y_name)), c(pcorr_x, pcorr_y)) |>
dplyr::filter(term != "(Intercept)") |>
dplyr::inner_join(cor_df, by = "term")
if (print) {
cat("Raw:\n")
impact_table |>
dplyr::select(term, starts_with("c"), impact) |>
print()
cat("\nPartial:\n")
impact_table |>
dplyr::select(term, starts_with("p")) |>
print()
cat(rep("-", getOption("width") - 1), "\n")
}
return(impact_table)
} |
Collaborator
|
thanks we are reviewing |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
s4classes$is not implemented@lmerMod(lme4) is implemented as ans4class.