Improvements for `output_table` by dwinkler1 · Pull Request #115 · konfound-project/konfound

dwinkler1 · 2025-07-23T05:53:30Z

for s4 classes $ is not implemented
access works via @
lmerMod (lme4) is implemented as an s4 class.

- for these classes `$` is not implemented - access works via `@`

dwinkler1 · 2025-07-23T05:55:27Z

I have a completely revamped version of output table that also works for categorical variables. Please let me know if such a large diff would be accepted in one PR:

output_table <- function(model_object, tested_variable, round = FALSE) {
  if (isS4(model_object)) {
    p <- all.vars(model_object@call)[1]
  } else {
    p <- all.vars(model_object$call)[1]
  }
  cat("Dependent variable is", p, "\n")
  model_output <- broom::tidy(model_object) # tidying output

  model_output$itcv <- NA

  var_row <- model_output$term == tested_variable
  model_output$itcv[var_row] <- abs(konfound(model_object,
    !!tested_variable,
    to_return = "raw_output"
  )$itcv)

  covariate_names <- model_output$term[
    !(model_output$term %in% c("(Intercept)", tested_variable))
  ]

  model_frame <- model.frame(model_object)
  model_matrix <- model.matrix(model_object)
  data_mat <- cbind(
    model.response(model_frame),
    model_matrix[, colnames(model_matrix) != "(Intercept)"]
  )
  colnames(data_mat)[1] <- p

  cor_mat <- stats::cor(data_mat)[, c(p, tested_variable)]
  cor_df <- data.frame(cor_mat) |>
    dplyr::mutate(
      term = rownames(cor_mat),
      Impact = !!rlang::sym(p) * !!rlang::sym(tested_variable),
      itcv_ = abs(Impact)
    )

  # Observed Impact Table
  impact_table <- cor_df |>
    dplyr::as_tibble() |>
    dplyr::filter(!term %in% c(p, tested_variable)) |>
    dplyr::rename(
      `Cor(vX)` = rlang::sym(tested_variable),
      `Cor(vY)` = rlang::sym(p)
    )

  model_output <- model_output |>
    dplyr::left_join(dplyr::select(impact_table, term, itcv_), by = "term") |>
    dplyr::mutate(itcv = dplyr::coalesce(itcv, itcv_)) |>
    dplyr::select(-itcv_)

  pcor_mat_x <- ppcor::pcor(data_mat[, colnames(data_mat) != p])$estimate
  colnames(pcor_mat_x) <- colnames(data_mat)[colnames(data_mat) != p]
  pcor_df_x <- data.frame(pcor_mat_x) |>
    dplyr::select(!!rlang::sym(tested_variable)) |>
    dplyr::mutate(
      term = colnames(pcor_mat_x)
    )

  pcor_mat_y <- ppcor::pcor(data_mat[, colnames(data_mat) != tested_variable])$estimate
  colnames(pcor_mat_y) <- colnames(data_mat)[colnames(data_mat) != tested_variable]
  pcor_df_y <- data.frame(pcor_mat_y) |>
    dplyr::select(!!rlang::sym(p)) |>
    dplyr::mutate(
      term = colnames(pcor_mat_y)
    )

  pcor_df <- dplyr::inner_join(pcor_df_x, pcor_df_y, by = "term") |>
    dplyr::mutate(Partial_Impact = abs(!!rlang::sym(p) * !!rlang::sym(tested_variable)))

  # Partial Impact Table
  impact_table_partial <- pcor_df |>
    dplyr::as_tibble() |>
    dplyr::rename(
      `Partial Cor(vX)` = rlang::sym(tested_variable),
      `Partial Cor(vY)` = rlang::sym(p)
    )

  cat(paste0(
    "X represents ", tested_variable, ", Y represents ", p,
    ", v represents each covariate.\n",
    "First table is based on unconditional correlations, second table is based on\n",
    "partial correlations.\n\n"
  ))

  # Check if any row has all Partial Correlation components as NA
  if (any(is.na(impact_table_partial$`Partial Cor(vX)`) &
    is.na(impact_table_partial$`Partial Cor(vY)`) &
    is.na(impact_table_partial$Partial_Impact))) {
    stop(
      paste0(
        "Numerical instability detected in partial correlation. This indicates potential multicollinearity or scaling issues.\n",
        "To resolve this issue, consider:\n",
        "1) Standardize predictors with scale().\n",
        "2) Remove or combine highly correlated predictors.\n",
        "3) Apply regularization (e.g., ridge regression)."
      )
    )
  }

  impact_table <- impact_table |>
    dplyr::select(term, `Cor(vX)`, `Cor(vY)`, Impact)
  impact_table_partial <- impact_table_partial |>
    dplyr::select(term, `Partial Cor(vX)`, `Partial Cor(vY)`, Partial_Impact)

  if (round) {
    impact_table <- impact_table |>
      dplyr::mutate(across(where(is.numeric), ~ round(.x, 4)))
    impact_table_partial <- impact_table_partial |>
      dplyr::mutate(across(where(is.numeric), ~ round(.x, 4)))
  }

  # Return all three tables as a list
  return(list(Main_Output = model_output, Unconditional_Impact = impact_table, Partial_Impact = impact_table_partial))
}

dwinkler1 · 2025-07-23T09:56:45Z

A more general implementation that would also take random effects into account would be:

get_kr_df <- function(model_object) {
  var_names <- names(lme4::fixef(model_object))
  L <- diag(length(var_names)) |> data.frame()
  vcov_adj <- pbkrtest::vcovAdj(model_object)
  vcov_0 <- vcov(model_object)
  out <- suppressWarnings(purrr::map_dbl(
    L,
    pbkrtest::Lb_ddf,
    V0 = vcov_0, Vadj = vcov_adj
  ))
  names(out) <- var_names
  return(out)
}

#' Calculate partial correlation from t and df
t2pcorr <- function(t, df) {
  t / sqrt(t^2 + df)
}

beta2pcorr_ <- function(beta, se, df) {
  t <- beta / se
  return(t2pcorr(t, df))
}

#' Get a data frame with the estimates and added partial correlations
beta2pcorr <- function(model, terms = names(fixef(model)), df_ = NULL) {
  mod <- broom::tidy(model) |>
    dplyr::filter(term %in% terms)

  if (!is.null(df_)) {
    dd <- data.frame(df_ = df_)
    dd$term <- rownames(dd)
    mod <- dplyr::left_join(mod, dd, by = "term")
  } else if ("df" %in% names(mod)) {
    mod <- dplyr::rename(mod, df_ = df)
  } else {
    dd <- data.frame(df_ = get_kr_df_par(model, terms))
    dd$term <- names(dd)
    mod <- dplyr::left_join(mod, dd, by = "term")
  }

  mod |>
    dplyr::mutate(
      pcorr = beta2pcorr_(estimate, std.error, df_),
      df = df_
    ) |>
    dplyr::select(term, estimate, std.error, df, pcorr)
}

#' Calculate partial correlations and partial impact to benchmark ITCV
#' Calculate partial correlations and partial impact to benchmark ITCV
impact_table <- function(model_object, term, print = FALSE) {
  col_names <- if (isS4(model_object)) {
    all.vars(model_object@call)
  } else {
    all.vars(model_object$call)
  }
  y_name <- col_names[1]
  y_name_ <- rlang::sym(y_name)
  term_ <- rlang::sym(term)


  model_matrix <- model.matrix(model_object)
  x_names <- colnames(model_matrix)
  data_mat <- cbind(
    model.response(model.frame(model_object)),
    model_matrix[, x_names != "(Intercept)"]
  )
  colnames(data_mat)[1] <- y_name

  cor_mat <- stats::cor(data_mat)[, c(term, y_name)]
  cor_df <- data.frame(cor_mat) |>
    dplyr::mutate(
      term = rownames(cor_mat),
      impact = !!y_name_ * !!term_,
    ) |>
    dplyr::rename_with(~ paste0("corr_", c({{ term }}, y_name)), c(term_, y_name_))

  model_y <- update(model_object, formula = paste0(". ~ . -", term_))
  model_x <- update(model_object, formula = paste0(term_, " ~ . -", term_))
  if (inherits(model_y, "lmerMod")) {
    df_y <- get_kr_df(model_y)
    df_x <- get_kr_df(model_x)
  } else {
    df_y <- setNames(rep(model_y$df.residual, length(x_names)), x_names)
    df_x <- setNames(rep(model_x$df.residual, length(x_names)), x__names)
  }
  pcorr_y <- beta2pcorr(model_y, terms = x_names, df = df_y) |>
    dplyr::select(term, pcorr_y = pcorr)
  pcorr_x <- beta2pcorr(model_x, terms = x_names, df = df_x) |>
    dplyr::select(term, pcorr_x = pcorr)
  impact_table <- dplyr::inner_join(pcorr_x, pcorr_y, by = "term") |>
    dplyr::mutate(partial_impact = pcorr_x * pcorr_y) |>
    dplyr::rename_with(~ paste0("pcorr_", c(term, y_name)), c(pcorr_x, pcorr_y)) |>
    dplyr::filter(term != "(Intercept)") |>
    dplyr::inner_join(cor_df, by = "term")
  if (print) {
    cat("Raw:\n")
    impact_table |>
      dplyr::select(term, starts_with("c"), impact) |>
      print()
    cat("\nPartial:\n")
    impact_table |>
      dplyr::select(term, starts_with("p")) |>
      print()
    cat(rep("-", getOption("width") - 1), "\n")
  }
  return(impact_table)
}

jrosen48 · 2025-09-11T15:39:32Z

thanks we are reviewing

Implement extraction of call for models implemented as S4 classes

fdbdda4

- for these classes `$` is not implemented - access works via `@`

Additional fixes for access of s4 models

b8e0242

dwinkler1 marked this pull request as draft July 23, 2025 06:59

dwinkler1 added 3 commits July 23, 2025 17:16

Universal extraction of data matrix from models

1434b9d

Remove intercept when calculating correlations

ead6fa4

Remove SDs from model_output as this causes an error

9420e30

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Improvements for `output_table`#115

Improvements for `output_table`#115
dwinkler1 wants to merge 5 commits intokonfound-project:masterfrom
dwinkler1:output_table

dwinkler1 commented Jul 23, 2025 •

edited

Loading

Uh oh!

dwinkler1 commented Jul 23, 2025 •

edited

Loading

Uh oh!

dwinkler1 commented Jul 23, 2025 •

edited

Loading

Uh oh!

jrosen48 commented Sep 11, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants

Conversation

dwinkler1 commented Jul 23, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

dwinkler1 commented Jul 23, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

dwinkler1 commented Jul 23, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

jrosen48 commented Sep 11, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants

dwinkler1 commented Jul 23, 2025 •

edited

Loading

dwinkler1 commented Jul 23, 2025 •

edited

Loading

dwinkler1 commented Jul 23, 2025 •

edited

Loading