Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
8a7cebb
add tbl_ancova() for ANCOVA tables with treatment-vs-control contrast…
Melkiades May 10, 2026
03cbde7
add adjust parameter for multiplicity correction in tbl_ancova()
Melkiades May 10, 2026
366b2f9
address PR review comments on tbl_ancova()
Melkiades May 10, 2026
25aedac
move `@return` before `@examples`, use `structure()` for attributes, …
Melkiades May 10, 2026
89a2f1b
use standalone map(), fix tidyselect warning in tbl_mmrm, add snapshots
Melkiades May 10, 2026
dacea6c
fix tidyselect warning in process_selectors, regenerate docs with rox…
Melkiades May 10, 2026
088b44b
fixes
Melkiades May 10, 2026
7dfd609
wordlist
Melkiades May 10, 2026
2c48e7f
fix
Melkiades May 10, 2026
37bc080
fix snapshot: -0.00 rounding in no-covariates test
Melkiades May 10, 2026
6f77943
strip negative zero in formatted output to avoid platform-dependent s…
Melkiades May 10, 2026
a738391
add ANCOVA Tables section to pkgdown reference
Melkiades May 10, 2026
54c3cf9
snaps
Melkiades May 10, 2026
120314f
use variable label attribute for endpoint label instead of raw variab…
Melkiades May 10, 2026
c17a789
add label parameter to tbl_ancova() for custom row label
Melkiades May 11, 2026
89cb17c
add input check, example, and tests for label parameter
Melkiades May 11, 2026
e740388
use expect_snapshot for label type error test
Melkiades May 11, 2026
d88dc15
fixes
Melkiades May 11, 2026
2827a14
fix: align denominator headers with tbl_ard_summary column order
Melkiades May 26, 2026
fc1ce0f
docs: add example showing custom column order via factor levels
Melkiades May 26, 2026
d30fab6
Merge branch 'main' into 6_tbl_ancova@main
Melkiades May 29, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ export(style_roche_number)
export(style_roche_percent)
export(style_roche_pvalue)
export(style_roche_ratio)
export(tbl_ancova)
export(tbl_baseline_chg)
export(tbl_coxph)
export(tbl_demographics)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

## New Functions and Functionality

* Added `tbl_ancova()` for building ANCOVA tables with adjusted means and treatment-vs-control contrasts using `emmeans`. (#6)

* Added `g_lineplot()` for creating line plots with optional summary statistics table. Includes helper functions `calc_stats()`, `g_lineplot_table()`, and `preprocess_lineplot_data()`.

* Added `ard_tabulate_abnormal_by_baseline()` for creating an Analysis Results Data (ARD) object counting participants with abnormal assessments, stratified by their baseline status.
Expand Down
372 changes: 372 additions & 0 deletions R/tbl_ancova.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,372 @@
#' ANCOVA Table
#'
#' @description
#' Builds a table of adjusted means and treatment-vs-control contrasts
#' from a linear model (ANCOVA). The table displays, for each treatment group:
#' - n (number of observations)
#' - Adjusted Mean (least-squares mean)
#' - Difference in Adjusted Means (vs reference group)
#' - Confidence interval for the difference
#' - p-value for the difference
#'
#' The model is fit via [`cardx::construct_model()`] and least-squares
#' estimates are obtained with the `emmeans` package. Contrasts use
#' `emmeans::contrast(method = "trt.vs.ctrl")` so that each non-reference
#' group is compared to the reference group only.
#'
#' @param data (`data.frame`)\cr
#' Analysis data set, typically one parameter/visit subset.
#' @param formula (`formula`)\cr
#' Model formula passed to the fitting function. The left-hand side is the
#' response (e.g. `CHG`), the right-hand side includes the treatment variable
#' and any covariates (e.g. `CHG ~ TRT01A + BASE`).
#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
#' Column in `data` identifying treatment groups. This variable must also
#' appear on the right-hand side of `formula`. Used for column headers.
#' @param ref_group (`string`)\cr
#' Level of `by` that is the reference (control) group. Contrasts are
#' computed as each non-reference group minus this group.
#' @param conf.level (scalar `numeric`)\cr
#' Confidence level for the difference in adjusted means.
#' Default is `0.95`.
#' @param method (`string`)\cr
#' Modelling function name. Default is `"lm"`.
#' @param method.args (`list`)\cr
#' Additional arguments passed to the modelling function.
#' @param package (`string`)\cr
#' Package exporting `method`. Default is `"stats"`.
#' @param adjust (`string`)\cr
#' Multiplicity adjustment method for contrasts, passed to
#' [`emmeans::contrast()`]. Common choices: `"none"` (no adjustment,
#' default), `"dunnett"` (Dunnett's method), `"bonferroni"`, `"tukey"`.
#' See [`emmeans::summary.emmGrid()`] for all options.
#' @param label (`string`)\cr
#' Label for the top-level row in the table. Defaults to the label attribute
#' of the response variable, or the variable name if no label exists.
#' When used inside `tbl_strata(strata = PARAM)`, pass the parameter value
#' (e.g. `unique(data$PARAM)`) so each sub-table is labelled correctly.
#' @param denominator (`data.frame`)\cr
#' Optional data frame used to compute the header Ns (typically `ADSL`).
#' When supplied, the column headers show `(N = <count>)` from this data
#' frame rather than from `data`.
#'
#' @return A `'gtsummary'` table of class `c("tbl_ancova", "gtsummary")`.
#' @name tbl_ancova
#'
#' @examples
#' # Simple ANCOVA with baseline covariate
#' cards::ADLB |>
#' dplyr::filter(PARAMCD == "SODIUM", AVISIT == "Week 8") |>
#' tbl_ancova(
#' formula = CHG ~ TRTA + BASE,
#' by = TRTA,
#' ref_group = "Placebo"
#' )
#'
#' # With denominator for header Ns
#' cards::ADLB |>
#' dplyr::filter(PARAMCD == "SODIUM", AVISIT == "Week 8") |>
#' tbl_ancova(
#' formula = CHG ~ TRTA + BASE,
#' by = TRTA,
#' ref_group = "Placebo",
#' denominator = cards::ADSL
#' )
#'
#' # With custom label (e.g. parameter name inside tbl_strata)
#' cards::ADLB |>
#' dplyr::filter(PARAMCD == "SODIUM", AVISIT == "Week 8") |>
#' tbl_ancova(
#' formula = CHG ~ TRTA + BASE,
#' by = TRTA,
#' ref_group = "Placebo",
#' label = "Sodium (mmol/L)"
#' )
#'
#' # With Dunnett's multiplicity adjustment
#' cards::ADLB |>
#' dplyr::filter(PARAMCD == "SODIUM", AVISIT == "Week 8") |>
#' tbl_ancova(
#' formula = CHG ~ TRTA + BASE,
#' by = TRTA,
#' ref_group = "Placebo",
#' adjust = "dunnett"
#' )
#'
#' # Custom column order — set factor levels on the input data
#' cards::ADLB |>
#' dplyr::filter(PARAMCD == "SODIUM", AVISIT == "Week 8") |>
#' dplyr::mutate(TRTA = factor(TRTA, levels = c(
#' "Placebo", "Xanomeline Low Dose", "Xanomeline High Dose"
#' ))) |>
#' tbl_ancova(
#' formula = CHG ~ TRTA + BASE,
#' by = TRTA,
#' ref_group = "Placebo"
#' )
#'
#' @export
tbl_ancova <- function(data,
formula,
by,
ref_group,
conf.level = 0.95,
method = "lm",
method.args = list(),
package = "stats",
adjust = "none",
label = NULL,
denominator = NULL) {
set_cli_abort_call()

# input checks ---------------------------------------------------------------
check_not_missing(data)
check_not_missing(formula)
check_not_missing(ref_group)

check_class(data, "data.frame")
check_class(formula, "formula")
check_string(ref_group)
check_string(method)
check_string(package)
check_scalar(conf.level)
check_range(conf.level, range = c(0, 1))
check_string(adjust)
if (!is.null(label)) check_string(label)
check_pkg_installed("emmeans")

by <- dplyr::select(data, {{ by }}) |> names()
check_scalar(by, allow_empty = FALSE)

if (!ref_group %in% unique(data[[by]])) {
cli::cli_abort(
"{.arg ref_group} value {.val {ref_group}} not found in column {.val {by}}.",
call = get_cli_abort_call()
)
}

# relevel so ref_group is first <U+2014> emmeans uses the first level as reference
data[[by]] <- stats::relevel(factor(data[[by]]), ref = ref_group)

# fit model and compute emmeans ----------------------------------------------
mod <- cardx::construct_model(
data = data, formula = formula, method = method,
method.args = {{ method.args }}, package = package
)

emmeans_specs <- stats::reformulate(by)
emm <- emmeans::emmeans(mod, specs = emmeans_specs)

# LS means -------------------------------------------------------------------
emm_summary <- summary(emm, calc = c(n = ".wgt.")) |>
dplyr::as_tibble() |>
dplyr::rename(
estimate = dplyr::any_of(c("emmean", "prob")),
n = dplyr::any_of("n")
)

# contrasts (treatment vs control) -------------------------------------------
contr_summary <- emmeans::contrast(emm, method = "trt.vs.ctrl", ref = ref_group, adjust = adjust) |>
summary(infer = TRUE, level = conf.level) |>
Comment thread
Melkiades marked this conversation as resolved.
dplyr::as_tibble() |>
dplyr::rename(
conf.low = dplyr::any_of(c("lower.CL", "asymp.LCL")),
conf.high = dplyr::any_of(c("upper.CL", "asymp.UCL"))
)


# parse contrast labels to extract the non-reference group name
# e.g. "Xanomeline High Dose - Placebo" -> "Xanomeline High Dose"
contr_summary$trt_group <- sub(
paste0("\\s*-\\s*", .escape_regex(ref_group), "$"), "",
contr_summary$contrast
)
Comment thread
Melkiades marked this conversation as resolved.

# build ARD for tbl_ard_summary ----------------------------------------------
trt_levels <- levels(data[[by]])
Comment thread
Melkiades marked this conversation as resolved.
endpoint_var <- all.vars(formula)[1]
endpoint_label <- label %||% attr(data[[endpoint_var]], "label") %||% endpoint_var

ard <- .build_ancova_ard(
emm_summary = emm_summary,
contr_summary = contr_summary,
by = by,
trt_levels = trt_levels,
ref_group = ref_group,
endpoint_label = endpoint_label,
conf.level = conf.level
)

# build gtsummary table ------------------------------------------------------
ci_pct <- paste0(round(conf.level * 100), "%")

tbl <- gtsummary::tbl_ard_summary(
cards = ard,
by = dplyr::all_of(by),
type = everything() ~ "continuous2",
Comment thread
Melkiades marked this conversation as resolved.
statistic = everything() ~ c(
"{n}",
"{estimate}",
"{mean.diff}",
paste0("{conf.low}, {conf.high}"),
"{p.value}"
),
) |>
gtsummary::modify_table_body(
~ .x |>
dplyr::mutate(
label = dplyr::case_when(
.data$label == "n" ~ "n",
.data$label == "Mean" ~ "Adjusted Mean",
.data$label == "Mean Difference" ~ "Difference in Adjusted Means",
.data$label == "CI Lower Bound, CI Upper Bound" ~
paste0(ci_pct, " CI for Difference in Adjusted Means"),
.data$label == "p-value" ~ "p-value",
TRUE ~ .data$label
),
# blank out stats that don't apply to the reference group
dplyr::across(
dplyr::starts_with("stat_"),
~ dplyr::if_else(
.data$label %in% c(
"Difference in Adjusted Means",
paste0(ci_pct, " CI for Difference in Adjusted Means"),
"p-value"
) & grepl("NA", .x, fixed = TRUE),
"",
.x
)
)
)
)

# add denominator header Ns if provided
Comment thread
Melkiades marked this conversation as resolved.
if (!is.null(denominator)) {
check_class(denominator, "data.frame")
if (by %in% names(denominator)) {
# read the column-to-group mapping that tbl_ard_summary assigned
col_map <- tbl$table_body |>
dplyr::select(dplyr::starts_with("stat_")) |>
names()
# extract group labels from existing headers (format: "**Group**")
existing_headers <- tbl$table_styling$header |>
dplyr::filter(.data$column %in% col_map) |>
dplyr::mutate(group = gsub("^\\*\\*|\\*\\*$", "", .data$label)) |>
dplyr::select("column", "group")

denom_n <- denominator |>
dplyr::count(!!rlang::sym(by), name = "n")

header_df <- existing_headers |>
dplyr::left_join(denom_n, by = stats::setNames(by, "group"))

header_labels <- stats::setNames(
paste0("**", header_df$group, "**\n(N = ", header_df$n, ")"),
header_df$column
)
tbl <- tbl |>
gtsummary::modify_header(!!!header_labels)
}
}

tbl |>
structure(
class = c("tbl_ancova", class(tbl)),
by = by,
ref_group = ref_group,
conf.level = conf.level,
adjust = adjust,
method = method,
package = package
)
}


# Assembles an ARD (Analysis Results Dataset) from emmeans summaries.
# For each treatment level, creates rows for: n, adjusted mean (from emm_summary),
# and contrast stats mean difference, CI bounds, p-value (from contr_summary).
# Reference group gets NA for contrast stats so the table structure is uniform.
.build_ancova_ard <- function(emm_summary, contr_summary, by,
trt_levels, ref_group, endpoint_label,
conf.level) {
ard_rows <- list()

for (trt in trt_levels) {
emm_row <- emm_summary[emm_summary[[by]] == trt, ]
n_val <- as.integer(emm_row$n)
est_val <- emm_row$estimate

# base rows: n and adjusted mean
ard_rows <- c(ard_rows, list(
.ancova_ard_row(endpoint_label, by, trt, "n", n_val, "n", "integer"),
.ancova_ard_row(endpoint_label, by, trt, "estimate", est_val, "Mean", "numeric")
))

# contrast rows (only for non-reference groups)
if (trt != ref_group) {
contr_row <- contr_summary[contr_summary$trt_group == trt, ]
if (nrow(contr_row) == 1) {
ard_rows <- c(ard_rows, list(
.ancova_ard_row(endpoint_label, by, trt, "mean.diff", contr_row$estimate, "Mean Difference", "numeric"),
.ancova_ard_row(endpoint_label, by, trt, "conf.low", contr_row$conf.low, "CI Lower Bound", "numeric"),
.ancova_ard_row(endpoint_label, by, trt, "conf.high", contr_row$conf.high, "CI Upper Bound", "numeric"),
.ancova_ard_row(endpoint_label, by, trt, "p.value", contr_row$p.value, "p-value", "numeric")
))
}
} else {
# reference group: fill with NA so the table structure is consistent
ard_rows <- c(ard_rows, list(
.ancova_ard_row(endpoint_label, by, trt, "mean.diff", NA_real_, "Mean Difference", "numeric"),
.ancova_ard_row(endpoint_label, by, trt, "conf.low", NA_real_, "CI Lower Bound", "numeric"),
.ancova_ard_row(endpoint_label, by, trt, "conf.high", NA_real_, "CI Upper Bound", "numeric"),
.ancova_ard_row(endpoint_label, by, trt, "p.value", NA_real_, "p-value", "numeric")
))
}
}

dplyr::bind_rows(ard_rows) |>
dplyr::mutate(
context = "continuous",
Comment thread
Melkiades marked this conversation as resolved.
fmt_fun = map(.data$stat_name, function(nm) {
switch(nm,
"n" = function(x) sprintf("%.0f", x),
"estimate" = ,
"mean.diff" = ,
"conf.low" = ,
Comment thread
Melkiades marked this conversation as resolved.
"conf.high" = function(x) sub("^-0.00$", "0.00", sprintf("%.2f", x)),
"p.value" = function(x) sprintf("%.4f", x),
function(x) as.character(x)
)
})
) |>
cards::as_card()
}


# Internal: create a single ARD row --------------------------------------------
.ancova_ard_row <- function(variable, group1, group1_level,
stat_name, stat, stat_label, var_type) {
dplyr::tibble(
variable = variable,
var_type = var_type,
var_label = variable,
group1 = group1,
group1_level = list(group1_level),
stat_name = stat_name,
stat_label = stat_label,
stat = list(stat),
warning = list(NULL),
error = list(NULL)
)
}


# Escapes regex special characters so group names can be used in sub() patterns.
# e.g. "Arm (High)" -> "Arm \\(High\\)", "Dose 1.5mg" -> "Dose 1\\.5mg"
.escape_regex <- function(x) {
chars <- c(".", "|", "^", "$", "(", ")", "[", "]", "{", "}", "*", "+", "?", "\\")
Comment thread
Melkiades marked this conversation as resolved.
for (ch in chars) {
x <- gsub(ch, paste0("\\", ch), x, fixed = TRUE)
}
x
}
Loading
Loading