Skip to content

Improvements for output_table#115

Draft
dwinkler1 wants to merge 5 commits intokonfound-project:masterfrom
dwinkler1:output_table
Draft

Improvements for output_table#115
dwinkler1 wants to merge 5 commits intokonfound-project:masterfrom
dwinkler1:output_table

Conversation

@dwinkler1
Copy link
Copy Markdown

@dwinkler1 dwinkler1 commented Jul 23, 2025

  • for s4 classes $ is not implemented
  • access works via @
  • lmerMod (lme4) is implemented as an s4 class.

- for these classes `$` is not implemented
- access works via `@`
@dwinkler1
Copy link
Copy Markdown
Author

dwinkler1 commented Jul 23, 2025

I have a completely revamped version of output table that also works for categorical variables. Please let me know if such a large diff would be accepted in one PR:

output_table <- function(model_object, tested_variable, round = FALSE) {
  if (isS4(model_object)) {
    p <- all.vars(model_object@call)[1]
  } else {
    p <- all.vars(model_object$call)[1]
  }
  cat("Dependent variable is", p, "\n")
  model_output <- broom::tidy(model_object) # tidying output

  model_output$itcv <- NA

  var_row <- model_output$term == tested_variable
  model_output$itcv[var_row] <- abs(konfound(model_object,
    !!tested_variable,
    to_return = "raw_output"
  )$itcv)

  covariate_names <- model_output$term[
    !(model_output$term %in% c("(Intercept)", tested_variable))
  ]

  model_frame <- model.frame(model_object)
  model_matrix <- model.matrix(model_object)
  data_mat <- cbind(
    model.response(model_frame),
    model_matrix[, colnames(model_matrix) != "(Intercept)"]
  )
  colnames(data_mat)[1] <- p

  cor_mat <- stats::cor(data_mat)[, c(p, tested_variable)]
  cor_df <- data.frame(cor_mat) |>
    dplyr::mutate(
      term = rownames(cor_mat),
      Impact = !!rlang::sym(p) * !!rlang::sym(tested_variable),
      itcv_ = abs(Impact)
    )

  # Observed Impact Table
  impact_table <- cor_df |>
    dplyr::as_tibble() |>
    dplyr::filter(!term %in% c(p, tested_variable)) |>
    dplyr::rename(
      `Cor(vX)` = rlang::sym(tested_variable),
      `Cor(vY)` = rlang::sym(p)
    )

  model_output <- model_output |>
    dplyr::left_join(dplyr::select(impact_table, term, itcv_), by = "term") |>
    dplyr::mutate(itcv = dplyr::coalesce(itcv, itcv_)) |>
    dplyr::select(-itcv_)

  pcor_mat_x <- ppcor::pcor(data_mat[, colnames(data_mat) != p])$estimate
  colnames(pcor_mat_x) <- colnames(data_mat)[colnames(data_mat) != p]
  pcor_df_x <- data.frame(pcor_mat_x) |>
    dplyr::select(!!rlang::sym(tested_variable)) |>
    dplyr::mutate(
      term = colnames(pcor_mat_x)
    )

  pcor_mat_y <- ppcor::pcor(data_mat[, colnames(data_mat) != tested_variable])$estimate
  colnames(pcor_mat_y) <- colnames(data_mat)[colnames(data_mat) != tested_variable]
  pcor_df_y <- data.frame(pcor_mat_y) |>
    dplyr::select(!!rlang::sym(p)) |>
    dplyr::mutate(
      term = colnames(pcor_mat_y)
    )

  pcor_df <- dplyr::inner_join(pcor_df_x, pcor_df_y, by = "term") |>
    dplyr::mutate(Partial_Impact = abs(!!rlang::sym(p) * !!rlang::sym(tested_variable)))

  # Partial Impact Table
  impact_table_partial <- pcor_df |>
    dplyr::as_tibble() |>
    dplyr::rename(
      `Partial Cor(vX)` = rlang::sym(tested_variable),
      `Partial Cor(vY)` = rlang::sym(p)
    )

  cat(paste0(
    "X represents ", tested_variable, ", Y represents ", p,
    ", v represents each covariate.\n",
    "First table is based on unconditional correlations, second table is based on\n",
    "partial correlations.\n\n"
  ))

  # Check if any row has all Partial Correlation components as NA
  if (any(is.na(impact_table_partial$`Partial Cor(vX)`) &
    is.na(impact_table_partial$`Partial Cor(vY)`) &
    is.na(impact_table_partial$Partial_Impact))) {
    stop(
      paste0(
        "Numerical instability detected in partial correlation. This indicates potential multicollinearity or scaling issues.\n",
        "To resolve this issue, consider:\n",
        "1) Standardize predictors with scale().\n",
        "2) Remove or combine highly correlated predictors.\n",
        "3) Apply regularization (e.g., ridge regression)."
      )
    )
  }

  impact_table <- impact_table |>
    dplyr::select(term, `Cor(vX)`, `Cor(vY)`, Impact)
  impact_table_partial <- impact_table_partial |>
    dplyr::select(term, `Partial Cor(vX)`, `Partial Cor(vY)`, Partial_Impact)

  if (round) {
    impact_table <- impact_table |>
      dplyr::mutate(across(where(is.numeric), ~ round(.x, 4)))
    impact_table_partial <- impact_table_partial |>
      dplyr::mutate(across(where(is.numeric), ~ round(.x, 4)))
  }

  # Return all three tables as a list
  return(list(Main_Output = model_output, Unconditional_Impact = impact_table, Partial_Impact = impact_table_partial))
}

@dwinkler1 dwinkler1 marked this pull request as draft July 23, 2025 06:59
@dwinkler1
Copy link
Copy Markdown
Author

dwinkler1 commented Jul 23, 2025

A more general implementation that would also take random effects into account would be:

get_kr_df <- function(model_object) {
  var_names <- names(lme4::fixef(model_object))
  L <- diag(length(var_names)) |> data.frame()
  vcov_adj <- pbkrtest::vcovAdj(model_object)
  vcov_0 <- vcov(model_object)
  out <- suppressWarnings(purrr::map_dbl(
    L,
    pbkrtest::Lb_ddf,
    V0 = vcov_0, Vadj = vcov_adj
  ))
  names(out) <- var_names
  return(out)
}

#' Calculate partial correlation from t and df
t2pcorr <- function(t, df) {
  t / sqrt(t^2 + df)
}

beta2pcorr_ <- function(beta, se, df) {
  t <- beta / se
  return(t2pcorr(t, df))
}

#' Get a data frame with the estimates and added partial correlations
beta2pcorr <- function(model, terms = names(fixef(model)), df_ = NULL) {
  mod <- broom::tidy(model) |>
    dplyr::filter(term %in% terms)

  if (!is.null(df_)) {
    dd <- data.frame(df_ = df_)
    dd$term <- rownames(dd)
    mod <- dplyr::left_join(mod, dd, by = "term")
  } else if ("df" %in% names(mod)) {
    mod <- dplyr::rename(mod, df_ = df)
  } else {
    dd <- data.frame(df_ = get_kr_df_par(model, terms))
    dd$term <- names(dd)
    mod <- dplyr::left_join(mod, dd, by = "term")
  }

  mod |>
    dplyr::mutate(
      pcorr = beta2pcorr_(estimate, std.error, df_),
      df = df_
    ) |>
    dplyr::select(term, estimate, std.error, df, pcorr)
}

#' Calculate partial correlations and partial impact to benchmark ITCV
#' Calculate partial correlations and partial impact to benchmark ITCV
impact_table <- function(model_object, term, print = FALSE) {
  col_names <- if (isS4(model_object)) {
    all.vars(model_object@call)
  } else {
    all.vars(model_object$call)
  }
  y_name <- col_names[1]
  y_name_ <- rlang::sym(y_name)
  term_ <- rlang::sym(term)


  model_matrix <- model.matrix(model_object)
  x_names <- colnames(model_matrix)
  data_mat <- cbind(
    model.response(model.frame(model_object)),
    model_matrix[, x_names != "(Intercept)"]
  )
  colnames(data_mat)[1] <- y_name

  cor_mat <- stats::cor(data_mat)[, c(term, y_name)]
  cor_df <- data.frame(cor_mat) |>
    dplyr::mutate(
      term = rownames(cor_mat),
      impact = !!y_name_ * !!term_,
    ) |>
    dplyr::rename_with(~ paste0("corr_", c({{ term }}, y_name)), c(term_, y_name_))

  model_y <- update(model_object, formula = paste0(". ~ . -", term_))
  model_x <- update(model_object, formula = paste0(term_, " ~ . -", term_))
  if (inherits(model_y, "lmerMod")) {
    df_y <- get_kr_df(model_y)
    df_x <- get_kr_df(model_x)
  } else {
    df_y <- setNames(rep(model_y$df.residual, length(x_names)), x_names)
    df_x <- setNames(rep(model_x$df.residual, length(x_names)), x__names)
  }
  pcorr_y <- beta2pcorr(model_y, terms = x_names, df = df_y) |>
    dplyr::select(term, pcorr_y = pcorr)
  pcorr_x <- beta2pcorr(model_x, terms = x_names, df = df_x) |>
    dplyr::select(term, pcorr_x = pcorr)
  impact_table <- dplyr::inner_join(pcorr_x, pcorr_y, by = "term") |>
    dplyr::mutate(partial_impact = pcorr_x * pcorr_y) |>
    dplyr::rename_with(~ paste0("pcorr_", c(term, y_name)), c(pcorr_x, pcorr_y)) |>
    dplyr::filter(term != "(Intercept)") |>
    dplyr::inner_join(cor_df, by = "term")
  if (print) {
    cat("Raw:\n")
    impact_table |>
      dplyr::select(term, starts_with("c"), impact) |>
      print()
    cat("\nPartial:\n")
    impact_table |>
      dplyr::select(term, starts_with("p")) |>
      print()
    cat(rep("-", getOption("width") - 1), "\n")
  }
  return(impact_table)
}

@jrosen48
Copy link
Copy Markdown
Collaborator

thanks we are reviewing

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants