diff --git a/NAMESPACE b/NAMESPACE index 9c7147e5..52a83da7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -48,6 +48,7 @@ export(style_roche_number) export(style_roche_percent) export(style_roche_pvalue) export(style_roche_ratio) +export(tbl_ancova) export(tbl_baseline_chg) export(tbl_coxph) export(tbl_demographics) diff --git a/NEWS.md b/NEWS.md index 2122ed67..60823d43 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,8 @@ ## New Functions and Functionality +* Added `tbl_ancova()` for building ANCOVA tables with adjusted means and treatment-vs-control contrasts using `emmeans`. (#6) + * Added `g_lineplot()` for creating line plots with optional summary statistics table. Includes helper functions `calc_stats()`, `g_lineplot_table()`, and `preprocess_lineplot_data()`. * Added `ard_tabulate_abnormal_by_baseline()` for creating an Analysis Results Data (ARD) object counting participants with abnormal assessments, stratified by their baseline status. diff --git a/R/tbl_ancova.R b/R/tbl_ancova.R new file mode 100644 index 00000000..c50dd67d --- /dev/null +++ b/R/tbl_ancova.R @@ -0,0 +1,372 @@ +#' ANCOVA Table +#' +#' @description +#' Builds a table of adjusted means and treatment-vs-control contrasts +#' from a linear model (ANCOVA). The table displays, for each treatment group: +#' - n (number of observations) +#' - Adjusted Mean (least-squares mean) +#' - Difference in Adjusted Means (vs reference group) +#' - Confidence interval for the difference +#' - p-value for the difference +#' +#' The model is fit via [`cardx::construct_model()`] and least-squares +#' estimates are obtained with the `emmeans` package. Contrasts use +#' `emmeans::contrast(method = "trt.vs.ctrl")` so that each non-reference +#' group is compared to the reference group only. +#' +#' @param data (`data.frame`)\cr +#' Analysis data set, typically one parameter/visit subset. +#' @param formula (`formula`)\cr +#' Model formula passed to the fitting function. The left-hand side is the +#' response (e.g. `CHG`), the right-hand side includes the treatment variable +#' and any covariates (e.g. `CHG ~ TRT01A + BASE`). +#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr +#' Column in `data` identifying treatment groups. This variable must also +#' appear on the right-hand side of `formula`. Used for column headers. +#' @param ref_group (`string`)\cr +#' Level of `by` that is the reference (control) group. Contrasts are +#' computed as each non-reference group minus this group. +#' @param conf.level (scalar `numeric`)\cr +#' Confidence level for the difference in adjusted means. +#' Default is `0.95`. +#' @param method (`string`)\cr +#' Modelling function name. Default is `"lm"`. +#' @param method.args (`list`)\cr +#' Additional arguments passed to the modelling function. +#' @param package (`string`)\cr +#' Package exporting `method`. Default is `"stats"`. +#' @param adjust (`string`)\cr +#' Multiplicity adjustment method for contrasts, passed to +#' [`emmeans::contrast()`]. Common choices: `"none"` (no adjustment, +#' default), `"dunnett"` (Dunnett's method), `"bonferroni"`, `"tukey"`. +#' See [`emmeans::summary.emmGrid()`] for all options. +#' @param label (`string`)\cr +#' Label for the top-level row in the table. Defaults to the label attribute +#' of the response variable, or the variable name if no label exists. +#' When used inside `tbl_strata(strata = PARAM)`, pass the parameter value +#' (e.g. `unique(data$PARAM)`) so each sub-table is labelled correctly. +#' @param denominator (`data.frame`)\cr +#' Optional data frame used to compute the header Ns (typically `ADSL`). +#' When supplied, the column headers show `(N = )` from this data +#' frame rather than from `data`. +#' +#' @return A `'gtsummary'` table of class `c("tbl_ancova", "gtsummary")`. +#' @name tbl_ancova +#' +#' @examples +#' # Simple ANCOVA with baseline covariate +#' cards::ADLB |> +#' dplyr::filter(PARAMCD == "SODIUM", AVISIT == "Week 8") |> +#' tbl_ancova( +#' formula = CHG ~ TRTA + BASE, +#' by = TRTA, +#' ref_group = "Placebo" +#' ) +#' +#' # With denominator for header Ns +#' cards::ADLB |> +#' dplyr::filter(PARAMCD == "SODIUM", AVISIT == "Week 8") |> +#' tbl_ancova( +#' formula = CHG ~ TRTA + BASE, +#' by = TRTA, +#' ref_group = "Placebo", +#' denominator = cards::ADSL +#' ) +#' +#' # With custom label (e.g. parameter name inside tbl_strata) +#' cards::ADLB |> +#' dplyr::filter(PARAMCD == "SODIUM", AVISIT == "Week 8") |> +#' tbl_ancova( +#' formula = CHG ~ TRTA + BASE, +#' by = TRTA, +#' ref_group = "Placebo", +#' label = "Sodium (mmol/L)" +#' ) +#' +#' # With Dunnett's multiplicity adjustment +#' cards::ADLB |> +#' dplyr::filter(PARAMCD == "SODIUM", AVISIT == "Week 8") |> +#' tbl_ancova( +#' formula = CHG ~ TRTA + BASE, +#' by = TRTA, +#' ref_group = "Placebo", +#' adjust = "dunnett" +#' ) +#' +#' # Custom column order — set factor levels on the input data +#' cards::ADLB |> +#' dplyr::filter(PARAMCD == "SODIUM", AVISIT == "Week 8") |> +#' dplyr::mutate(TRTA = factor(TRTA, levels = c( +#' "Placebo", "Xanomeline Low Dose", "Xanomeline High Dose" +#' ))) |> +#' tbl_ancova( +#' formula = CHG ~ TRTA + BASE, +#' by = TRTA, +#' ref_group = "Placebo" +#' ) +#' +#' @export +tbl_ancova <- function(data, + formula, + by, + ref_group, + conf.level = 0.95, + method = "lm", + method.args = list(), + package = "stats", + adjust = "none", + label = NULL, + denominator = NULL) { + set_cli_abort_call() + + # input checks --------------------------------------------------------------- + check_not_missing(data) + check_not_missing(formula) + check_not_missing(ref_group) + + check_class(data, "data.frame") + check_class(formula, "formula") + check_string(ref_group) + check_string(method) + check_string(package) + check_scalar(conf.level) + check_range(conf.level, range = c(0, 1)) + check_string(adjust) + if (!is.null(label)) check_string(label) + check_pkg_installed("emmeans") + + by <- dplyr::select(data, {{ by }}) |> names() + check_scalar(by, allow_empty = FALSE) + + if (!ref_group %in% unique(data[[by]])) { + cli::cli_abort( + "{.arg ref_group} value {.val {ref_group}} not found in column {.val {by}}.", + call = get_cli_abort_call() + ) + } + + # relevel so ref_group is first emmeans uses the first level as reference + data[[by]] <- stats::relevel(factor(data[[by]]), ref = ref_group) + + # fit model and compute emmeans ---------------------------------------------- + mod <- cardx::construct_model( + data = data, formula = formula, method = method, + method.args = {{ method.args }}, package = package + ) + + emmeans_specs <- stats::reformulate(by) + emm <- emmeans::emmeans(mod, specs = emmeans_specs) + + # LS means ------------------------------------------------------------------- + emm_summary <- summary(emm, calc = c(n = ".wgt.")) |> + dplyr::as_tibble() |> + dplyr::rename( + estimate = dplyr::any_of(c("emmean", "prob")), + n = dplyr::any_of("n") + ) + + # contrasts (treatment vs control) ------------------------------------------- + contr_summary <- emmeans::contrast(emm, method = "trt.vs.ctrl", ref = ref_group, adjust = adjust) |> + summary(infer = TRUE, level = conf.level) |> + dplyr::as_tibble() |> + dplyr::rename( + conf.low = dplyr::any_of(c("lower.CL", "asymp.LCL")), + conf.high = dplyr::any_of(c("upper.CL", "asymp.UCL")) + ) + + + # parse contrast labels to extract the non-reference group name + # e.g. "Xanomeline High Dose - Placebo" -> "Xanomeline High Dose" + contr_summary$trt_group <- sub( + paste0("\\s*-\\s*", .escape_regex(ref_group), "$"), "", + contr_summary$contrast + ) + + # build ARD for tbl_ard_summary ---------------------------------------------- + trt_levels <- levels(data[[by]]) + endpoint_var <- all.vars(formula)[1] + endpoint_label <- label %||% attr(data[[endpoint_var]], "label") %||% endpoint_var + + ard <- .build_ancova_ard( + emm_summary = emm_summary, + contr_summary = contr_summary, + by = by, + trt_levels = trt_levels, + ref_group = ref_group, + endpoint_label = endpoint_label, + conf.level = conf.level + ) + + # build gtsummary table ------------------------------------------------------ + ci_pct <- paste0(round(conf.level * 100), "%") + + tbl <- gtsummary::tbl_ard_summary( + cards = ard, + by = dplyr::all_of(by), + type = everything() ~ "continuous2", + statistic = everything() ~ c( + "{n}", + "{estimate}", + "{mean.diff}", + paste0("{conf.low}, {conf.high}"), + "{p.value}" + ), + ) |> + gtsummary::modify_table_body( + ~ .x |> + dplyr::mutate( + label = dplyr::case_when( + .data$label == "n" ~ "n", + .data$label == "Mean" ~ "Adjusted Mean", + .data$label == "Mean Difference" ~ "Difference in Adjusted Means", + .data$label == "CI Lower Bound, CI Upper Bound" ~ + paste0(ci_pct, " CI for Difference in Adjusted Means"), + .data$label == "p-value" ~ "p-value", + TRUE ~ .data$label + ), + # blank out stats that don't apply to the reference group + dplyr::across( + dplyr::starts_with("stat_"), + ~ dplyr::if_else( + .data$label %in% c( + "Difference in Adjusted Means", + paste0(ci_pct, " CI for Difference in Adjusted Means"), + "p-value" + ) & grepl("NA", .x, fixed = TRUE), + "", + .x + ) + ) + ) + ) + + # add denominator header Ns if provided + if (!is.null(denominator)) { + check_class(denominator, "data.frame") + if (by %in% names(denominator)) { + # read the column-to-group mapping that tbl_ard_summary assigned + col_map <- tbl$table_body |> + dplyr::select(dplyr::starts_with("stat_")) |> + names() + # extract group labels from existing headers (format: "**Group**") + existing_headers <- tbl$table_styling$header |> + dplyr::filter(.data$column %in% col_map) |> + dplyr::mutate(group = gsub("^\\*\\*|\\*\\*$", "", .data$label)) |> + dplyr::select("column", "group") + + denom_n <- denominator |> + dplyr::count(!!rlang::sym(by), name = "n") + + header_df <- existing_headers |> + dplyr::left_join(denom_n, by = stats::setNames(by, "group")) + + header_labels <- stats::setNames( + paste0("**", header_df$group, "**\n(N = ", header_df$n, ")"), + header_df$column + ) + tbl <- tbl |> + gtsummary::modify_header(!!!header_labels) + } + } + + tbl |> + structure( + class = c("tbl_ancova", class(tbl)), + by = by, + ref_group = ref_group, + conf.level = conf.level, + adjust = adjust, + method = method, + package = package + ) +} + + +# Assembles an ARD (Analysis Results Dataset) from emmeans summaries. +# For each treatment level, creates rows for: n, adjusted mean (from emm_summary), +# and contrast stats mean difference, CI bounds, p-value (from contr_summary). +# Reference group gets NA for contrast stats so the table structure is uniform. +.build_ancova_ard <- function(emm_summary, contr_summary, by, + trt_levels, ref_group, endpoint_label, + conf.level) { + ard_rows <- list() + + for (trt in trt_levels) { + emm_row <- emm_summary[emm_summary[[by]] == trt, ] + n_val <- as.integer(emm_row$n) + est_val <- emm_row$estimate + + # base rows: n and adjusted mean + ard_rows <- c(ard_rows, list( + .ancova_ard_row(endpoint_label, by, trt, "n", n_val, "n", "integer"), + .ancova_ard_row(endpoint_label, by, trt, "estimate", est_val, "Mean", "numeric") + )) + + # contrast rows (only for non-reference groups) + if (trt != ref_group) { + contr_row <- contr_summary[contr_summary$trt_group == trt, ] + if (nrow(contr_row) == 1) { + ard_rows <- c(ard_rows, list( + .ancova_ard_row(endpoint_label, by, trt, "mean.diff", contr_row$estimate, "Mean Difference", "numeric"), + .ancova_ard_row(endpoint_label, by, trt, "conf.low", contr_row$conf.low, "CI Lower Bound", "numeric"), + .ancova_ard_row(endpoint_label, by, trt, "conf.high", contr_row$conf.high, "CI Upper Bound", "numeric"), + .ancova_ard_row(endpoint_label, by, trt, "p.value", contr_row$p.value, "p-value", "numeric") + )) + } + } else { + # reference group: fill with NA so the table structure is consistent + ard_rows <- c(ard_rows, list( + .ancova_ard_row(endpoint_label, by, trt, "mean.diff", NA_real_, "Mean Difference", "numeric"), + .ancova_ard_row(endpoint_label, by, trt, "conf.low", NA_real_, "CI Lower Bound", "numeric"), + .ancova_ard_row(endpoint_label, by, trt, "conf.high", NA_real_, "CI Upper Bound", "numeric"), + .ancova_ard_row(endpoint_label, by, trt, "p.value", NA_real_, "p-value", "numeric") + )) + } + } + + dplyr::bind_rows(ard_rows) |> + dplyr::mutate( + context = "continuous", + fmt_fun = map(.data$stat_name, function(nm) { + switch(nm, + "n" = function(x) sprintf("%.0f", x), + "estimate" = , + "mean.diff" = , + "conf.low" = , + "conf.high" = function(x) sub("^-0.00$", "0.00", sprintf("%.2f", x)), + "p.value" = function(x) sprintf("%.4f", x), + function(x) as.character(x) + ) + }) + ) |> + cards::as_card() +} + + +# Internal: create a single ARD row -------------------------------------------- +.ancova_ard_row <- function(variable, group1, group1_level, + stat_name, stat, stat_label, var_type) { + dplyr::tibble( + variable = variable, + var_type = var_type, + var_label = variable, + group1 = group1, + group1_level = list(group1_level), + stat_name = stat_name, + stat_label = stat_label, + stat = list(stat), + warning = list(NULL), + error = list(NULL) + ) +} + + +# Escapes regex special characters so group names can be used in sub() patterns. +# e.g. "Arm (High)" -> "Arm \\(High\\)", "Dose 1.5mg" -> "Dose 1\\.5mg" +.escape_regex <- function(x) { + chars <- c(".", "|", "^", "$", "(", ")", "[", "]", "{", "}", "*", "+", "?", "\\") + for (ch in chars) { + x <- gsub(ch, paste0("\\", ch), x, fixed = TRUE) + } + x +} diff --git a/_pkgdown.yml b/_pkgdown.yml index caa3260f..e59599fa 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -23,6 +23,10 @@ reference: - add_hierarchical_count_row - ard_tabulate_abnormal_by_baseline + - title: "ANCOVA Tables" + contents: + - tbl_ancova + - title: "Survival Analysis Tables" contents: - tbl_survfit_quantiles diff --git a/inst/WORDLIST b/inst/WORDLIST index 0a17e3c7..2e94b4b5 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -3,13 +3,16 @@ ADSL ADaM AE AEs +ANCOVA ARD ARDs +Dunnett's GDSR HLT Hoffmann Kaplan MMRM +Modelling Ns ORCID PY @@ -18,6 +21,7 @@ Pharmacokinetics RMP RMPT RStudio +Recode Rua SOCs Tidyverse @@ -27,6 +31,7 @@ cardx custiomization customizations de +durations efron fleming flextable @@ -35,14 +40,18 @@ gehan ggplot gtsummary harrington +jitter +labelled lineplot lineplots +modelling peto pharma pre prentice quosure quosures +recodes responder rlang's survfit diff --git a/man/tbl_ancova.Rd b/man/tbl_ancova.Rd new file mode 100644 index 00000000..c76857e7 --- /dev/null +++ b/man/tbl_ancova.Rd @@ -0,0 +1,139 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tbl_ancova.R +\name{tbl_ancova} +\alias{tbl_ancova} +\title{ANCOVA Table} +\usage{ +tbl_ancova( + data, + formula, + by, + ref_group, + conf.level = 0.95, + method = "lm", + method.args = list(), + package = "stats", + adjust = "none", + label = NULL, + denominator = NULL +) +} +\arguments{ +\item{data}{(\code{data.frame})\cr +Analysis data set, typically one parameter/visit subset.} + +\item{formula}{(\code{formula})\cr +Model formula passed to the fitting function. The left-hand side is the +response (e.g. \code{CHG}), the right-hand side includes the treatment variable +and any covariates (e.g. \code{CHG ~ TRT01A + BASE}).} + +\item{by}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr +Column in \code{data} identifying treatment groups. This variable must also +appear on the right-hand side of \code{formula}. Used for column headers.} + +\item{ref_group}{(\code{string})\cr +Level of \code{by} that is the reference (control) group. Contrasts are +computed as each non-reference group minus this group.} + +\item{conf.level}{(scalar \code{numeric})\cr +Confidence level for the difference in adjusted means. +Default is \code{0.95}.} + +\item{method}{(\code{string})\cr +Modelling function name. Default is \code{"lm"}.} + +\item{method.args}{(\code{list})\cr +Additional arguments passed to the modelling function.} + +\item{package}{(\code{string})\cr +Package exporting \code{method}. Default is \code{"stats"}.} + +\item{adjust}{(\code{string})\cr +Multiplicity adjustment method for contrasts, passed to +\code{\link[emmeans:contrast]{emmeans::contrast()}}. Common choices: \code{"none"} (no adjustment, +default), \code{"dunnett"} (Dunnett's method), \code{"bonferroni"}, \code{"tukey"}. +See \code{\link[emmeans:summary.emmGrid]{emmeans::summary.emmGrid()}} for all options.} + +\item{label}{(\code{string})\cr +Label for the top-level row in the table. Defaults to the label attribute +of the response variable, or the variable name if no label exists. +When used inside \code{tbl_strata(strata = PARAM)}, pass the parameter value +(e.g. \code{unique(data$PARAM)}) so each sub-table is labelled correctly.} + +\item{denominator}{(\code{data.frame})\cr +Optional data frame used to compute the header Ns (typically \code{ADSL}). +When supplied, the column headers show \verb{(N = )} from this data +frame rather than from \code{data}.} +} +\value{ +A \code{'gtsummary'} table of class \code{c("tbl_ancova", "gtsummary")}. +} +\description{ +Builds a table of adjusted means and treatment-vs-control contrasts +from a linear model (ANCOVA). The table displays, for each treatment group: +\itemize{ +\item n (number of observations) +\item Adjusted Mean (least-squares mean) +\item Difference in Adjusted Means (vs reference group) +\item Confidence interval for the difference +\item p-value for the difference +} + +The model is fit via \code{\link[cardx:construct_model]{cardx::construct_model()}} and least-squares +estimates are obtained with the \code{emmeans} package. Contrasts use +\code{emmeans::contrast(method = "trt.vs.ctrl")} so that each non-reference +group is compared to the reference group only. +} +\examples{ +# Simple ANCOVA with baseline covariate +cards::ADLB |> + dplyr::filter(PARAMCD == "SODIUM", AVISIT == "Week 8") |> + tbl_ancova( + formula = CHG ~ TRTA + BASE, + by = TRTA, + ref_group = "Placebo" + ) + +# With denominator for header Ns +cards::ADLB |> + dplyr::filter(PARAMCD == "SODIUM", AVISIT == "Week 8") |> + tbl_ancova( + formula = CHG ~ TRTA + BASE, + by = TRTA, + ref_group = "Placebo", + denominator = cards::ADSL + ) + +# With custom label (e.g. parameter name inside tbl_strata) +cards::ADLB |> + dplyr::filter(PARAMCD == "SODIUM", AVISIT == "Week 8") |> + tbl_ancova( + formula = CHG ~ TRTA + BASE, + by = TRTA, + ref_group = "Placebo", + label = "Sodium (mmol/L)" + ) + +# With Dunnett's multiplicity adjustment +cards::ADLB |> + dplyr::filter(PARAMCD == "SODIUM", AVISIT == "Week 8") |> + tbl_ancova( + formula = CHG ~ TRTA + BASE, + by = TRTA, + ref_group = "Placebo", + adjust = "dunnett" + ) + +# Custom column order — set factor levels on the input data +cards::ADLB |> + dplyr::filter(PARAMCD == "SODIUM", AVISIT == "Week 8") |> + dplyr::mutate(TRTA = factor(TRTA, levels = c( + "Placebo", "Xanomeline Low Dose", "Xanomeline High Dose" + ))) |> + tbl_ancova( + formula = CHG ~ TRTA + BASE, + by = TRTA, + ref_group = "Placebo" + ) + +} diff --git a/tests/testthat/_snaps/tbl_ancova.md b/tests/testthat/_snaps/tbl_ancova.md new file mode 100644 index 00000000..0e86e37f --- /dev/null +++ b/tests/testthat/_snaps/tbl_ancova.md @@ -0,0 +1,82 @@ +# tbl_ancova() works with default settings + + Code + as.data.frame(tbl) + Output + Characteristic Placebo Xanomeline High Dose Xanomeline Low Dose + 1 Change from Baseline + 2 n 5 4 3 + 3 Adjusted Mean 0.60 -0.66 -0.47 + 4 Difference in Adjusted Means -1.26 -1.07 + 5 95% CI for Difference in Adjusted Means -4.23, 1.71 -4.35, 2.21 + 6 p-value 0.3563 0.4733 + +# tbl_ancova() works with denominator + + Code + as.data.frame(tbl) + Output + Characteristic Placebo\n(N = 86) Xanomeline High Dose\n(N = 84) Xanomeline Low Dose\n(N = 84) + 1 Change from Baseline + 2 n 5 4 3 + 3 Adjusted Mean 0.60 -0.66 -0.47 + 4 Difference in Adjusted Means -1.26 -1.07 + 5 95% CI for Difference in Adjusted Means -4.23, 1.71 -4.35, 2.21 + 6 p-value 0.3563 0.4733 + +# tbl_ancova() errors on invalid ref_group + + Code + tbl_ancova(data = df_ancova, formula = CHG ~ TRTA + BASE, by = TRTA, ref_group = "NonexistentArm") + Condition + Error in `tbl_ancova()`: + ! `ref_group` value "NonexistentArm" not found in column "TRTA". + +# tbl_ancova() works with custom label + + Code + as.data.frame(tbl) + Output + Characteristic Placebo Xanomeline High Dose Xanomeline Low Dose + 1 Sodium (mmol/L) + 2 n 5 4 3 + 3 Adjusted Mean 0.60 -0.66 -0.47 + 4 Difference in Adjusted Means -1.26 -1.07 + 5 95% CI for Difference in Adjusted Means -4.23, 1.71 -4.35, 2.21 + 6 p-value 0.3563 0.4733 + +# tbl_ancova() errors on non-string label + + Code + tbl_ancova(data = df_ancova, formula = CHG ~ TRTA + BASE, by = TRTA, ref_group = "Placebo", + label = 123) + Condition + Error in `tbl_ancova()`: + ! The `label` argument must be a string, not a number. + +# tbl_ancova() works with Dunnett adjustment + + Code + as.data.frame(tbl) + Output + Characteristic Placebo Xanomeline High Dose Xanomeline Low Dose + 1 Change from Baseline + 2 n 5 4 3 + 3 Adjusted Mean 0.60 -0.66 -0.47 + 4 Difference in Adjusted Means -1.26 -1.07 + 5 95% CI for Difference in Adjusted Means -4.73, 2.22 -4.91, 2.77 + 6 p-value 0.5491 0.6839 + +# tbl_ancova() works without covariates + + Code + as.data.frame(tbl) + Output + Characteristic Placebo Xanomeline High Dose Xanomeline Low Dose + 1 Change from Baseline + 2 n 5 4 3 + 3 Adjusted Mean 0.20 -0.50 0.00 + 4 Difference in Adjusted Means -0.70 -0.20 + 5 95% CI for Difference in Adjusted Means -4.34, 2.94 -4.16, 3.76 + 6 p-value 0.6738 0.9116 + diff --git a/tests/testthat/test-tbl_ancova.R b/tests/testthat/test-tbl_ancova.R new file mode 100644 index 00000000..9e43ce1f --- /dev/null +++ b/tests/testthat/test-tbl_ancova.R @@ -0,0 +1,232 @@ +df_ancova <- cards::ADLB |> + dplyr::filter( + PARAMCD == "SODIUM", + AVISIT == "Week 8" + ) |> + dplyr::mutate(TRTA = factor(TRTA)) + + +test_that("tbl_ancova() works with default settings", { + withr::local_options(list(width = 200)) + + expect_silent( + tbl <- tbl_ancova( + data = df_ancova, + formula = CHG ~ TRTA + BASE, + by = TRTA, + ref_group = "Placebo" + ) + ) + + expect_s3_class(tbl, "tbl_ancova") + expect_s3_class(tbl, "gtsummary") + + # table has the expected row labels + labels <- tbl$table_body$label[tbl$table_body$row_type == "level"] + expect_true("n" %in% labels) + expect_true("Adjusted Mean" %in% labels) + expect_true("Difference in Adjusted Means" %in% labels) + expect_true("p-value" %in% labels) + + # reference group has blank contrast cells + ref_col <- tbl$table_body |> + dplyr::filter(.data$label == "Difference in Adjusted Means") |> + dplyr::pull("stat_1") + expect_equal(ref_col, "") + + expect_snapshot(as.data.frame(tbl)) +}) + + +test_that("tbl_ancova() works with denominator", { + withr::local_options(list(width = 200)) + + expect_silent( + tbl <- tbl_ancova( + data = df_ancova, + formula = CHG ~ TRTA + BASE, + by = TRTA, + ref_group = "Placebo", + denominator = cards::ADSL + ) + ) + + expect_s3_class(tbl, "tbl_ancova") + + # header Ns come from ADSL, not from the analysis data + adsl_n <- cards::ADSL |> + dplyr::count(TRTA) + headers <- tbl$table_styling$header |> + dplyr::filter(grepl("^stat_", .data$column)) + for (i in seq_len(nrow(adsl_n))) { + matching_header <- headers$label[grepl(adsl_n$TRTA[i], headers$label, fixed = TRUE)] + expect_true( + length(matching_header) == 1 && grepl(paste0("N = ", adsl_n$n[i]), matching_header), + label = paste("Header for", adsl_n$TRTA[i], "should show N =", adsl_n$n[i]) + ) + } + + expect_snapshot(as.data.frame(tbl)) +}) + + +test_that("tbl_ancova() denominator headers align when ref_group is not alphabetically first", { + withr::local_options(list(width = 200)) + + # Arms where the reference group ("B: Placebo") sorts after "A: Drug X" + df_alpha <- df_ancova |> + dplyr::mutate(ARM = dplyr::case_when( + TRTA == "Placebo" ~ "B: Placebo", + TRTA == "Xanomeline High Dose" ~ "A: Drug X", + TRTA == "Xanomeline Low Dose" ~ "C: Combination" + )) + adsl_alpha <- cards::ADSL |> + dplyr::mutate(ARM = dplyr::case_when( + TRTA == "Placebo" ~ "B: Placebo", + TRTA == "Xanomeline High Dose" ~ "A: Drug X", + TRTA == "Xanomeline Low Dose" ~ "C: Combination" + )) + + expect_silent( + tbl <- tbl_ancova( + data = df_alpha, + formula = CHG ~ ARM + BASE, + by = ARM, + ref_group = "B: Placebo", + denominator = adsl_alpha + ) + ) + + # verify each column header matches its data: + # the reference group column must show blank contrast rows + headers <- tbl$table_styling$header |> + dplyr::filter(grepl("^stat_", .data$column)) + ref_col <- headers$column[grepl("B: Placebo", headers$label)] + + diff_row <- tbl$table_body |> + dplyr::filter(.data$label == "Difference in Adjusted Means") + # reference column must be blank + + expect_equal(diff_row[[ref_col]], "") + # non-reference columns must have numeric values + non_ref_cols <- setdiff(headers$column, ref_col) + for (col in non_ref_cols) { + expect_false(diff_row[[col]] == "", label = paste("column", col, "should not be blank")) + } +}) + +test_that("tbl_ancova() works with conf.level = 0.90", { + withr::local_options(list(width = 200)) + + expect_silent( + tbl <- tbl_ancova( + data = df_ancova, + formula = CHG ~ TRTA + BASE, + by = TRTA, + ref_group = "Placebo", + conf.level = 0.90 + ) + ) + + # CI label reflects 90% + labels <- tbl$table_body$label[tbl$table_body$row_type == "level"] + expect_true(any(grepl("90%", labels))) +}) + + +test_that("tbl_ancova() errors on invalid ref_group", { + expect_snapshot( + tbl_ancova( + data = df_ancova, + formula = CHG ~ TRTA + BASE, + by = TRTA, + ref_group = "NonexistentArm" + ), + error = TRUE + ) +}) + + +test_that("tbl_ancova() works with custom label", { + withr::local_options(list(width = 200)) + + expect_silent( + tbl <- tbl_ancova( + data = df_ancova, + formula = CHG ~ TRTA + BASE, + by = TRTA, + ref_group = "Placebo", + label = "Sodium (mmol/L)" + ) + ) + + expect_s3_class(tbl, "tbl_ancova") + + # the custom label appears in the table body instead of the variable name + top_label <- tbl$table_body$label[tbl$table_body$row_type == "label"] + expect_true("Sodium (mmol/L)" %in% top_label) + expect_false("CHG" %in% top_label) + expect_false("Change from Baseline" %in% top_label) + + expect_snapshot(as.data.frame(tbl)) +}) + +test_that("tbl_ancova() errors on non-string label", { + expect_snapshot( + tbl_ancova( + data = df_ancova, + formula = CHG ~ TRTA + BASE, + by = TRTA, + ref_group = "Placebo", + label = 123 + ), + error = TRUE + ) +}) + + +test_that("tbl_ancova() works with Dunnett adjustment", { + withr::local_options(list(width = 200)) + + expect_silent( + tbl <- tbl_ancova( + data = df_ancova, + formula = CHG ~ TRTA + BASE, + by = TRTA, + ref_group = "Placebo", + adjust = "dunnett" + ) + ) + + expect_s3_class(tbl, "tbl_ancova") + expect_equal(attr(tbl, "adjust"), "dunnett") + expect_equal(attr(tbl, "method"), "lm") + expect_equal(attr(tbl, "package"), "stats") + expect_equal(attr(tbl, "by"), "TRTA") + expect_equal(attr(tbl, "ref_group"), "Placebo") + + expect_snapshot(as.data.frame(tbl)) +}) + + +test_that("tbl_ancova() works without covariates", { + withr::local_options(list(width = 200)) + + expect_silent( + tbl <- tbl_ancova( + data = df_ancova, + formula = CHG ~ TRTA, + by = TRTA, + ref_group = "Placebo" + ) + ) + + expect_s3_class(tbl, "tbl_ancova") + + # verify labels are present even without covariates + labels <- tbl$table_body$label[tbl$table_body$row_type == "level"] + expect_true("Adjusted Mean" %in% labels) + expect_true("Difference in Adjusted Means" %in% labels) + + expect_snapshot(as.data.frame(tbl)) +})