Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -130,4 +130,5 @@ importFrom(stats,uniroot)
importFrom(stats,var)
importFrom(stats,wilcox.test)
importFrom(tidyr,pivot_longer)
importFrom(utils,combn)
importFrom(utils,head)
99 changes: 73 additions & 26 deletions R/brunner_munzel.R
Original file line number Diff line number Diff line change
Expand Up @@ -181,35 +181,51 @@
#' Phipson, B., & Smyth, G. K. (2010). Permutation P-values should never be zero: calculating exact P-values when permutations are randomly drawn. Statistical Applications in Genetics and Molecular Biology, 9(1), Article 39.
#' @name brunner_munzel
#' @importFrom stats var quantile
#' @importFrom utils combn
#' @family Robust tests
#' @export brunner_munzel
NULL

#' @noRd
bm_perm_indices <- function(N, n.x, R) {
# Internal helper function to generate permutation indices without replacement
# Returns a matrix where each column contains indices for group x
# Sample permutations without replacement to avoid duplicates
perms <- matrix(NA, nrow = R, ncol = N)
for (i in 1:R) {
perms[i, ] <- sample(1:N, N)
# Returns a matrix where each column contains a full permutation of 1:N
# We ensure unique ALLOCATIONS (which n.x elements go to group x), not just unique permutations
#
# For small sample sizes where R >= choose(N, n.x), enumerate all combinations
max_combs <- choose(N, n.x)
if (R >= max_combs) {
# Enumerate all unique combinations
combs <- combn(N, n.x)
# Convert each combination to a full permutation [combo, remaining]
perms <- matrix(NA, nrow = max_combs, ncol = N)
for (i in 1:max_combs) {
x_idx <- combs[, i]
y_idx <- setdiff(1:N, x_idx)
perms[i, ] <- c(x_idx, y_idx)
}
return(t(perms)) # Return as N x R matrix
}
# Remove duplicate permutations
perms <- unique(perms)

# For larger sample sizes, sample unique allocations
seen_allocs <- character(0)
perms_list <- list()
iter <- 0
max_iter <- 10
while (nrow(perms) < R && iter < max_iter) {
needed <- R - nrow(perms)
new_perms <- matrix(NA, nrow = needed * 2, ncol = N)
for (i in 1:(needed * 2)) {
new_perms[i, ] <- sample(1:N, N)
max_iter <- R * 10 # Allow more iterations to find unique allocations

while (length(perms_list) < R && iter < max_iter) {
perm <- sample(1:N, N)
# Create a canonical representation of the allocation (sorted first n.x elements)
alloc_key <- paste(sort(perm[1:n.x]), collapse = ",")

if (!(alloc_key %in% seen_allocs)) {
seen_allocs <- c(seen_allocs, alloc_key)
perms_list[[length(perms_list) + 1]] <- perm
}
perms <- unique(rbind(perms, new_perms))
iter <- iter + 1
}
if (nrow(perms) > R) {
perms <- perms[1:R, , drop = FALSE]
}

perms <- do.call(rbind, perms_list)
return(t(perms)) # Return as N x R matrix for compatibility with original code
}

Expand Down Expand Up @@ -328,6 +344,12 @@ brunner_munzel.default = function(x,
if(!is.numeric(y)) stop("'y' must be numeric")
DNAME <- paste(deparse(substitute(x)), "and",
deparse(substitute(y)))
# Capture group names for estimate label (X and Y in P(X>Y))
# Use simple "X" and "Y" for default method since variable names
# may be expressions (e.g., 1:10, c(7:20, 200)) which look messy
# Formula method will overwrite these with actual factor level names
XNAME <- "X"
YNAME <- "Y"
if(paired) {
if(length(x) != length(y))
stop("'x' and 'y' must have the same length")
Expand Down Expand Up @@ -361,6 +383,9 @@ brunner_munzel.default = function(x,
conf.level <- 1 - alpha
}

# Initialize n_perm_actual (will be set if test_method == "perm")
n_perm_actual <- NULL

# Paired -----
if(paired){

Expand Down Expand Up @@ -671,7 +696,8 @@ brunner_munzel.default = function(x,

V <- N*(s1+s2)
singular.bf <- (V == 0)
V[singular.bf] <- N/(2 * n.x * n.y)
# Use same zero-variance handling as perm_loop: vP = 0.5/(n.x*n.y)^2, so V = N*vP
V[singular.bf] <- N * 0.5/(n.x * n.y)^2
std_err = sqrt(V/N)

df.sw <- (s1 + s2)^2/(s1^2/(n.x - 1) + s2^2/(n.y - 1))
Expand All @@ -689,6 +715,7 @@ brunner_munzel.default = function(x,
# Use sampling without replacement to avoid duplicate permutations
P <- bm_perm_indices(N, n.x, R)
R_actual <- ncol(P) # Actual number of unique permutations obtained
n_perm_actual <- R_actual # Store for output parameter
Px<-matrix(c(x,y)[P],ncol=R_actual)

# perm_loop already centers at 0.5 (see res1[1,]<-(pdP-1/2)/sqrt(vP))
Expand Down Expand Up @@ -748,8 +775,12 @@ brunner_munzel.default = function(x,
test_stat <- sqrt(N) * (pd - mu) / sqrt(V)

# Count extreme values for p-value calculation
b_less <- sum(test_stat <= Tperm[1,])
b_greater <- sum(test_stat >= Tperm[1,])
b_less <- sum(Tperm[1,] <= test_stat)
b_greater <- sum(Tperm[1,] >= test_stat)

# For two-sided tests, use absolute value method: mean(|T_perm| >= |T_obs|)
# This matches the brunnermunzel package's approach
b_two_sided <- sum(abs(Tperm[1,]) >= abs(test_stat))

if(alternative == "two.sided"){
c1<-0.5*(Tperm[1,floor((1-alpha/2)*R_actual)]+Tperm[1,ceiling((1-alpha/2)*R_actual)])
Expand All @@ -765,9 +796,10 @@ brunner_munzel.default = function(x,
# Compute p-values using selected method
p_less <- bm_compute_perm_pval(b_less, R_actual, p_method)
p_greater <- bm_compute_perm_pval(b_greater, R_actual, p_method)
p_two_sided <- bm_compute_perm_pval(b_two_sided, R_actual, p_method)

p.value = switch(alternative,
"two.sided" = min(2 * p_less, 2 * p_greater),
"two.sided" = p_two_sided,
"less" = p_less,
"greater" = p_greater)

Expand Down Expand Up @@ -932,18 +964,24 @@ brunner_munzel.default = function(x,

if(test_method == "perm"){
names(test_stat) = "t-observed"
# For permutation tests, report number of permutations instead of df
param <- n_perm_actual
names(param) <- "N-permutations"
} else {
names(test_stat) = "t"
param <- df.sw
names(param) <- "df"
}

names(df.sw) = "df"
cint = c(pd.lower, pd.upper)
attr(cint, "conf.level") = conf.level
estimate = pd
names(estimate) = "P(X>Y) + .5*P(X=Y)"
# Use actual group names in estimate label
# XNAME and YNAME are set from input variable names or overwritten by formula method
names(estimate) = paste0("P(", XNAME, ">", YNAME, ") + .5*P(", XNAME, "=", YNAME, ")")

rval <- list(statistic = test_stat,
parameter = df.sw,
parameter = param,
p.value = as.numeric(p.value),
estimate = estimate,
stderr = std_err,
Expand All @@ -970,7 +1008,7 @@ brunner_munzel.formula = function(formula,
|| (length(formula) != 3L)
|| (length(attr(terms(formula[-2L]), "term.labels")) != 1L))
stop("'formula' missing or incorrect")

# Check for paired argument in ... and warn user
dots <- list(...)
if("paired" %in% names(dots)){
Expand All @@ -995,6 +1033,11 @@ brunner_munzel.formula = function(formula,
DATA <- setNames(split(mf[[response]], g), c("x", "y"))
y <- do.call("brunner_munzel", c(DATA, list(...)))
y$data.name <- DNAME
# Update estimate label with actual factor level names
# First level becomes "x" (XNAME), second level becomes "y" (YNAME)
XNAME <- levels(g)[1]
YNAME <- levels(g)[2]
names(y$estimate) <- paste0("P(", XNAME, ">", YNAME, ") + .5*P(", XNAME, "=", YNAME, ")")
y

}
Expand All @@ -1005,17 +1048,21 @@ perm_loop <-function(x,y,n.x,n.y,R){
pl1P<-matrix(0,nrow=n.x,ncol=R)
pl2P<-matrix(0,nrow=n.y,ncol=R)

# Compute P(Y < X_h1) for each h1 (used for variance calculation)
for(h1 in 1:n.x){
help1<-matrix(t(x[h1,]),
ncol=R,
nrow=n.y,byrow=TRUE)
pl1P[h1,]<-1/n.y*(colSums((y<help1)+1/2*(y==help1)))
}
# Compute P(X > Y_h2) for each h2 (used for pd calculation)
# Note: Changed from (x<help2) to (x>help2) to match main function convention
# Main function uses pd = P(X > Y), so perm_loop must use same convention
for(h2 in 1:n.y){
help2<-matrix(t(y[h2,]),
ncol=R,
nrow=n.x,byrow=TRUE)
pl2P[h2,]<-1/n.x*(colSums((x<help2)+1/2*(x==help2)))
pl2P[h2,]<-1/n.x*(colSums((x>help2)+1/2*(x==help2)))
}

pdP<-colMeans(pl2P)
Expand Down
37 changes: 17 additions & 20 deletions R/simple_htest.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#' `r lifecycle::badge("maturing")`
#'
#' Performs statistical hypothesis tests with extended functionality beyond standard implementations.
#' Supports t-tests, Wilcoxon-Mann-Whitney tests, and Brunner-Munzel tests with additional
#' Supports t-tests and Wilcoxon-Mann-Whitney tests with additional
#' alternatives such as equivalence and minimal effect testing.
#'
#' @section Purpose:
Expand All @@ -16,11 +16,9 @@
#'
#' @inheritParams t_TOST
#' @inheritParams z_cor_test
#' @inheritParams brunner_munzel
#' @param test a character string specifying the type of hypothesis test to use:
#' - "t.test": Student's t-test (parametric, default)
#' - "wilcox.test": Wilcoxon-Mann-Whitney test (non-parametric)
#' - "brunner_munzel": Brunner-Munzel test (non-parametric)
#'
#' You can specify just the initial letter (e.g., "t" for "t.test").
#' @param alternative the alternative hypothesis:
Expand All @@ -30,7 +28,7 @@
#' - "equivalence": between specified bounds
#' - "minimal.effect": outside specified bounds
#' @param mu a number or vector specifying the null hypothesis value(s):
#' - For standard alternatives (two.sided, less, greater): a single value (default: 0 for t-test/wilcox.test, 0.5 for brunner_munzel)
#' - For standard alternatives (two.sided, less, greater): a single value (default: 0)
#' - For equivalence/minimal.effect: either a single value (symmetric bounds will be created) or a vector of two values representing the lower and upper bounds
#' @param ... further arguments to be passed to or from the underlying test functions.
#'
Expand All @@ -52,14 +50,11 @@
#' the underlying test functions with some additional standardization in the output format.
#'
#' The interpretation of `mu` depends on the test used:
#' - For t-test and wilcox.test: mu represents the difference in means/medians (default: 0)
#' - For brunner_munzel: mu represents the probability that a randomly selected value from the first sample exceeds a randomly selected value from the second sample (default: 0.5)
#'
#' - For t-test: mu represents the difference in means (default: 0)
#' - For wilcox.test: mu represents the location shift (default: 0)
#'
#' If `mu` is a single value for equivalence or minimal effect alternatives, symmetric bounds
#' will be created automatically:
#' - For t-test and wilcox.test: bounds become c(mu, -mu)
#' - For brunner_munzel: bounds become c(mu, abs(mu-1))
#' will be created automatically as c(mu, -mu).
#'
#'
#' @return A list with class `"htest"` containing the following components:
Expand All @@ -68,7 +63,7 @@
#' - parameter: the parameter(s) for the test statistic (e.g., degrees of freedom for t-tests).
#' - p.value: the p-value for the test.
#' - conf.int: a confidence interval appropriate to the specified alternative hypothesis.
#' - estimate: the estimated effect (e.g., mean difference for t-tests, probability estimate for Brunner-Munzel).
#' - estimate: the estimated effect (e.g., mean difference for t-tests).
#' - null.value: the specified hypothesized value(s). For equivalence and minimal effect tests, this will be two values.
#' - stderr: the standard error of the estimate (for t-tests).
#' - alternative: a character string describing the alternative hypothesis.
Expand Down Expand Up @@ -98,15 +93,6 @@
#' paired = TRUE,
#' alternative = "greater"))
#'
#' # Example 4: Brunner-Munzel test
#' # Testing if values in one group tend to exceed values in another
#' set.seed(123)
#' group1 <- rnorm(20, mean = 5, sd = 1)
#' group2 <- rnorm(20, mean = 6, sd = 2)
#' simple_htest(x = group1, y = group2,
#' test = "brunner_munzel",
#' alternative = "less")
#'
#' @family TOST
#' @family htest
#' @name simple_htest
Expand Down Expand Up @@ -145,6 +131,17 @@ simple_htest.default = function(x,
...) {
alternative = match.arg(alternative)
test = match.arg(test)

# Deprecate brunner_munzel - direct users to use brunner_munzel() directly
if(test == "brunner_munzel") {
lifecycle::deprecate_warn(
when = "0.9.0",
what = "simple_htest(test = 'brunner_munzel')",
with = "brunner_munzel()",
details = "The Brunner-Munzel test has been removed from simple_htest(). Please use brunner_munzel() directly for full functionality including permutation tests and improved output labeling."
)
}

if(is.null(mu)){
if(test == "brunner_munzel"){
mu = 0.5
Expand Down
Loading
Loading