diff --git a/NAMESPACE b/NAMESPACE index d2f491d..e404110 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -130,4 +130,5 @@ importFrom(stats,uniroot) importFrom(stats,var) importFrom(stats,wilcox.test) importFrom(tidyr,pivot_longer) +importFrom(utils,combn) importFrom(utils,head) diff --git a/R/brunner_munzel.R b/R/brunner_munzel.R index bef3f68..9f53f62 100644 --- a/R/brunner_munzel.R +++ b/R/brunner_munzel.R @@ -181,6 +181,7 @@ #' Phipson, B., & Smyth, G. K. (2010). Permutation P-values should never be zero: calculating exact P-values when permutations are randomly drawn. Statistical Applications in Genetics and Molecular Biology, 9(1), Article 39. #' @name brunner_munzel #' @importFrom stats var quantile +#' @importFrom utils combn #' @family Robust tests #' @export brunner_munzel NULL @@ -188,28 +189,43 @@ NULL #' @noRd bm_perm_indices <- function(N, n.x, R) { # Internal helper function to generate permutation indices without replacement - # Returns a matrix where each column contains indices for group x - # Sample permutations without replacement to avoid duplicates - perms <- matrix(NA, nrow = R, ncol = N) - for (i in 1:R) { - perms[i, ] <- sample(1:N, N) + # Returns a matrix where each column contains a full permutation of 1:N + # We ensure unique ALLOCATIONS (which n.x elements go to group x), not just unique permutations + # + # For small sample sizes where R >= choose(N, n.x), enumerate all combinations + max_combs <- choose(N, n.x) + if (R >= max_combs) { + # Enumerate all unique combinations + combs <- combn(N, n.x) + # Convert each combination to a full permutation [combo, remaining] + perms <- matrix(NA, nrow = max_combs, ncol = N) + for (i in 1:max_combs) { + x_idx <- combs[, i] + y_idx <- setdiff(1:N, x_idx) + perms[i, ] <- c(x_idx, y_idx) + } + return(t(perms)) # Return as N x R matrix } - # Remove duplicate permutations - perms <- unique(perms) + + # For larger sample sizes, sample unique allocations + seen_allocs <- character(0) + perms_list <- list() iter <- 0 - max_iter <- 10 - while (nrow(perms) < R && iter < max_iter) { - needed <- R - nrow(perms) - new_perms <- matrix(NA, nrow = needed * 2, ncol = N) - for (i in 1:(needed * 2)) { - new_perms[i, ] <- sample(1:N, N) + max_iter <- R * 10 # Allow more iterations to find unique allocations + + while (length(perms_list) < R && iter < max_iter) { + perm <- sample(1:N, N) + # Create a canonical representation of the allocation (sorted first n.x elements) + alloc_key <- paste(sort(perm[1:n.x]), collapse = ",") + + if (!(alloc_key %in% seen_allocs)) { + seen_allocs <- c(seen_allocs, alloc_key) + perms_list[[length(perms_list) + 1]] <- perm } - perms <- unique(rbind(perms, new_perms)) iter <- iter + 1 } - if (nrow(perms) > R) { - perms <- perms[1:R, , drop = FALSE] - } + + perms <- do.call(rbind, perms_list) return(t(perms)) # Return as N x R matrix for compatibility with original code } @@ -328,6 +344,12 @@ brunner_munzel.default = function(x, if(!is.numeric(y)) stop("'y' must be numeric") DNAME <- paste(deparse(substitute(x)), "and", deparse(substitute(y))) + # Capture group names for estimate label (X and Y in P(X>Y)) + # Use simple "X" and "Y" for default method since variable names + # may be expressions (e.g., 1:10, c(7:20, 200)) which look messy + # Formula method will overwrite these with actual factor level names + XNAME <- "X" + YNAME <- "Y" if(paired) { if(length(x) != length(y)) stop("'x' and 'y' must have the same length") @@ -361,6 +383,9 @@ brunner_munzel.default = function(x, conf.level <- 1 - alpha } + # Initialize n_perm_actual (will be set if test_method == "perm") + n_perm_actual <- NULL + # Paired ----- if(paired){ @@ -671,7 +696,8 @@ brunner_munzel.default = function(x, V <- N*(s1+s2) singular.bf <- (V == 0) - V[singular.bf] <- N/(2 * n.x * n.y) + # Use same zero-variance handling as perm_loop: vP = 0.5/(n.x*n.y)^2, so V = N*vP + V[singular.bf] <- N * 0.5/(n.x * n.y)^2 std_err = sqrt(V/N) df.sw <- (s1 + s2)^2/(s1^2/(n.x - 1) + s2^2/(n.y - 1)) @@ -689,6 +715,7 @@ brunner_munzel.default = function(x, # Use sampling without replacement to avoid duplicate permutations P <- bm_perm_indices(N, n.x, R) R_actual <- ncol(P) # Actual number of unique permutations obtained + n_perm_actual <- R_actual # Store for output parameter Px<-matrix(c(x,y)[P],ncol=R_actual) # perm_loop already centers at 0.5 (see res1[1,]<-(pdP-1/2)/sqrt(vP)) @@ -748,8 +775,12 @@ brunner_munzel.default = function(x, test_stat <- sqrt(N) * (pd - mu) / sqrt(V) # Count extreme values for p-value calculation - b_less <- sum(test_stat <= Tperm[1,]) - b_greater <- sum(test_stat >= Tperm[1,]) + b_less <- sum(Tperm[1,] <= test_stat) + b_greater <- sum(Tperm[1,] >= test_stat) + + # For two-sided tests, use absolute value method: mean(|T_perm| >= |T_obs|) + # This matches the brunnermunzel package's approach + b_two_sided <- sum(abs(Tperm[1,]) >= abs(test_stat)) if(alternative == "two.sided"){ c1<-0.5*(Tperm[1,floor((1-alpha/2)*R_actual)]+Tperm[1,ceiling((1-alpha/2)*R_actual)]) @@ -765,9 +796,10 @@ brunner_munzel.default = function(x, # Compute p-values using selected method p_less <- bm_compute_perm_pval(b_less, R_actual, p_method) p_greater <- bm_compute_perm_pval(b_greater, R_actual, p_method) + p_two_sided <- bm_compute_perm_pval(b_two_sided, R_actual, p_method) p.value = switch(alternative, - "two.sided" = min(2 * p_less, 2 * p_greater), + "two.sided" = p_two_sided, "less" = p_less, "greater" = p_greater) @@ -932,18 +964,24 @@ brunner_munzel.default = function(x, if(test_method == "perm"){ names(test_stat) = "t-observed" + # For permutation tests, report number of permutations instead of df + param <- n_perm_actual + names(param) <- "N-permutations" } else { names(test_stat) = "t" + param <- df.sw + names(param) <- "df" } - names(df.sw) = "df" cint = c(pd.lower, pd.upper) attr(cint, "conf.level") = conf.level estimate = pd - names(estimate) = "P(X>Y) + .5*P(X=Y)" + # Use actual group names in estimate label + # XNAME and YNAME are set from input variable names or overwritten by formula method + names(estimate) = paste0("P(", XNAME, ">", YNAME, ") + .5*P(", XNAME, "=", YNAME, ")") rval <- list(statistic = test_stat, - parameter = df.sw, + parameter = param, p.value = as.numeric(p.value), estimate = estimate, stderr = std_err, @@ -970,7 +1008,7 @@ brunner_munzel.formula = function(formula, || (length(formula) != 3L) || (length(attr(terms(formula[-2L]), "term.labels")) != 1L)) stop("'formula' missing or incorrect") - + # Check for paired argument in ... and warn user dots <- list(...) if("paired" %in% names(dots)){ @@ -995,6 +1033,11 @@ brunner_munzel.formula = function(formula, DATA <- setNames(split(mf[[response]], g), c("x", "y")) y <- do.call("brunner_munzel", c(DATA, list(...))) y$data.name <- DNAME + # Update estimate label with actual factor level names + # First level becomes "x" (XNAME), second level becomes "y" (YNAME) + XNAME <- levels(g)[1] + YNAME <- levels(g)[2] + names(y$estimate) <- paste0("P(", XNAME, ">", YNAME, ") + .5*P(", XNAME, "=", YNAME, ")") y } @@ -1005,17 +1048,21 @@ perm_loop <-function(x,y,n.x,n.y,R){ pl1P<-matrix(0,nrow=n.x,ncol=R) pl2P<-matrix(0,nrow=n.y,ncol=R) + # Compute P(Y < X_h1) for each h1 (used for variance calculation) for(h1 in 1:n.x){ help1<-matrix(t(x[h1,]), ncol=R, nrow=n.y,byrow=TRUE) pl1P[h1,]<-1/n.y*(colSums((y Y_h2) for each h2 (used for pd calculation) + # Note: Changed from (xhelp2) to match main function convention + # Main function uses pd = P(X > Y), so perm_loop must use same convention for(h2 in 1:n.y){ help2<-matrix(t(y[h2,]), ncol=R, nrow=n.x,byrow=TRUE) - pl2P[h2,]<-1/n.x*(colSums((xhelp2)+1/2*(x==help2))) } pdP<-colMeans(pl2P) diff --git a/R/simple_htest.R b/R/simple_htest.R index df7804e..b38d636 100644 --- a/R/simple_htest.R +++ b/R/simple_htest.R @@ -4,7 +4,7 @@ #' `r lifecycle::badge("maturing")` #' #' Performs statistical hypothesis tests with extended functionality beyond standard implementations. -#' Supports t-tests, Wilcoxon-Mann-Whitney tests, and Brunner-Munzel tests with additional +#' Supports t-tests and Wilcoxon-Mann-Whitney tests with additional #' alternatives such as equivalence and minimal effect testing. #' #' @section Purpose: @@ -16,11 +16,9 @@ #' #' @inheritParams t_TOST #' @inheritParams z_cor_test -#' @inheritParams brunner_munzel #' @param test a character string specifying the type of hypothesis test to use: #' - "t.test": Student's t-test (parametric, default) #' - "wilcox.test": Wilcoxon-Mann-Whitney test (non-parametric) -#' - "brunner_munzel": Brunner-Munzel test (non-parametric) #' #' You can specify just the initial letter (e.g., "t" for "t.test"). #' @param alternative the alternative hypothesis: @@ -30,7 +28,7 @@ #' - "equivalence": between specified bounds #' - "minimal.effect": outside specified bounds #' @param mu a number or vector specifying the null hypothesis value(s): -#' - For standard alternatives (two.sided, less, greater): a single value (default: 0 for t-test/wilcox.test, 0.5 for brunner_munzel) +#' - For standard alternatives (two.sided, less, greater): a single value (default: 0) #' - For equivalence/minimal.effect: either a single value (symmetric bounds will be created) or a vector of two values representing the lower and upper bounds #' @param ... further arguments to be passed to or from the underlying test functions. #' @@ -52,14 +50,11 @@ #' the underlying test functions with some additional standardization in the output format. #' #' The interpretation of `mu` depends on the test used: -#' - For t-test and wilcox.test: mu represents the difference in means/medians (default: 0) -#' - For brunner_munzel: mu represents the probability that a randomly selected value from the first sample exceeds a randomly selected value from the second sample (default: 0.5) -#' +#' - For t-test: mu represents the difference in means (default: 0) +#' - For wilcox.test: mu represents the location shift (default: 0) #' #' If `mu` is a single value for equivalence or minimal effect alternatives, symmetric bounds -#' will be created automatically: -#' - For t-test and wilcox.test: bounds become c(mu, -mu) -#' - For brunner_munzel: bounds become c(mu, abs(mu-1)) +#' will be created automatically as c(mu, -mu). #' #' #' @return A list with class `"htest"` containing the following components: @@ -68,7 +63,7 @@ #' - parameter: the parameter(s) for the test statistic (e.g., degrees of freedom for t-tests). #' - p.value: the p-value for the test. #' - conf.int: a confidence interval appropriate to the specified alternative hypothesis. -#' - estimate: the estimated effect (e.g., mean difference for t-tests, probability estimate for Brunner-Munzel). +#' - estimate: the estimated effect (e.g., mean difference for t-tests). #' - null.value: the specified hypothesized value(s). For equivalence and minimal effect tests, this will be two values. #' - stderr: the standard error of the estimate (for t-tests). #' - alternative: a character string describing the alternative hypothesis. @@ -98,15 +93,6 @@ #' paired = TRUE, #' alternative = "greater")) #' -#' # Example 4: Brunner-Munzel test -#' # Testing if values in one group tend to exceed values in another -#' set.seed(123) -#' group1 <- rnorm(20, mean = 5, sd = 1) -#' group2 <- rnorm(20, mean = 6, sd = 2) -#' simple_htest(x = group1, y = group2, -#' test = "brunner_munzel", -#' alternative = "less") -#' #' @family TOST #' @family htest #' @name simple_htest @@ -145,6 +131,17 @@ simple_htest.default = function(x, ...) { alternative = match.arg(alternative) test = match.arg(test) + + # Deprecate brunner_munzel - direct users to use brunner_munzel() directly + if(test == "brunner_munzel") { + lifecycle::deprecate_warn( + when = "0.9.0", + what = "simple_htest(test = 'brunner_munzel')", + with = "brunner_munzel()", + details = "The Brunner-Munzel test has been removed from simple_htest(). Please use brunner_munzel() directly for full functionality including permutation tests and improved output labeling." + ) + } + if(is.null(mu)){ if(test == "brunner_munzel"){ mu = 0.5 diff --git a/junk/validate_brunner_munzel_vs_nparcomp.R b/junk/validate_brunner_munzel_vs_nparcomp.R new file mode 100644 index 0000000..58c5085 --- /dev/null +++ b/junk/validate_brunner_munzel_vs_nparcomp.R @@ -0,0 +1,404 @@ +# ============================================================================= +# Validation Script: TOSTER brunner_munzel vs nparcomp +# ============================================================================= +# +# This script validates that TOSTER's brunner_munzel() function produces +# results consistent with nparcomp::npar.t.test() and nparcomp:::npar.t.test.paired() +# when using test_method = "perm" and p_method = "original". +# +# Key differences between packages: +# - TOSTER estimate: P(X > Y) + 0.5*P(X = Y) +# - nparcomp estimate: P(X < Y) + 0.5*P(X = Y) = 1 - TOSTER estimate +# - Alternative mappings (p-values match when using SAME alternative name): +# - TOSTER "greater" (H1: P(X>Y) > 0.5) ↔ nparcomp "greater" (H1: P(X 0.5) +# Both give same p-value because they test "effect in same tail direction" +# - TOSTER "less" (H1: P(X>Y) < 0.5) ↔ nparcomp "less" (H1: P(X Y), nparcomp tests P(X < Y) + # Despite different parameterization, the p-values match when using + # the SAME alternative name because: + # - TOSTER "greater": H1: P(X>Y) > 0.5 → one-tailed test in upper direction + # - nparcomp "greater": H1: P(X 0.5 → same test, just different effect scale + # The permutation p-values are computed the same way + alternative_nparcomp <- alternative_toster + + # Create data frame for nparcomp + dat <- data.frame( + value = c(x, y), + group = factor(rep(c("x", "y"), c(length(x), length(y)))) + ) + + # Run TOSTER + set.seed(seed) + res_toster <- brunner_munzel(x, y, + alternative = alternative_toster, + test_method = "perm", + R = nperm, + p_method = "original") + + # Run nparcomp + set.seed(seed) + res_nparcomp <- suppressWarnings( + npar.t.test(value ~ group, data = dat, + alternative = alternative_nparcomp, + method = "permu", + nperm = nperm, + info = FALSE) + ) + + # Extract results + # nparcomp returns 3 rows: id (studentized), logit, probit + # We use "id" row (first row) which corresponds to TOSTER's approach + toster_est <- as.numeric(res_toster$estimate) + nparcomp_est <- res_nparcomp$Analysis$Estimator[1] # First row (id) + + toster_p <- res_toster$p.value + nparcomp_p <- res_nparcomp$Analysis$p.value[1] # First row (id) + + # Check estimate relationship: TOSTER = 1 - nparcomp + # Note: nparcomp rounds to 3 decimal places, so we use a tolerance of 1e-3 + est_match <- abs(toster_est - (1 - nparcomp_est)) < 1e-3 + + # Check p-values are close (allowing for MC variance) + p_match <- abs(toster_p - nparcomp_p) < tol + + list( + alternative_toster = alternative_toster, + alternative_nparcomp = alternative_nparcomp, + toster_estimate = toster_est, + nparcomp_estimate = nparcomp_est, + estimate_match = est_match, + toster_p = toster_p, + nparcomp_p = nparcomp_p, + p_diff = abs(toster_p - nparcomp_p), + p_match = p_match, + passed = est_match && p_match + ) +} + +#' Compare TOSTER and nparcomp results for paired test +#' @param x First sample (paired with y) +#' @param y Second sample (paired with x) +#' @param alternative_toster TOSTER alternative ("two.sided", "greater", "less") +#' @param tol Tolerance for numeric comparison +#' @param nperm Number of permutations +#' @param seed Random seed for reproducibility +compare_paired <- function(x, y, alternative_toster = "two.sided", + tol = 0.02, nperm = 10000, seed = 123) { + + # For PAIRED tests, the alternative mapping IS reversed because: + # - TOSTER tests P(X > Y) + # - nparcomp's npar.t.test.paired tests P(X < Y) + # So TOSTER "greater" (H1: P(X>Y) > 0.5) corresponds to nparcomp "less" (H1: P(XY)=0, P(XY) + # 3. Two-sided p-value formula: changed from min(2*p_less, 2*p_greater) to mean(|T_perm| >= |T_obs|) + # 4. Zero-variance handling: made consistent between observed and permuted statistics + # 5. Unique allocation sampling: fixed bm_perm_indices to ensure unique allocations + # + # With these fixes, TOSTER now matches the brunnermunzel package's permutation test results. + + # Test case: completely separated data x = 1:3, y = 4:6 + # All x < all y, so P(X>Y) = 0 (estimate should be very small) + # With 20 unique allocations (choose(6,3)), exactly 2 have |T| >= |T_obs| + # So the two-sided p-value should be: + # - With p_method="original": 2/20 = 0.10 + # - With p_method="plusone": 3/21 ≈ 0.143 + + set.seed(42) + result <- brunner_munzel(1:3, 4:6, + test_method = "perm", + R = 1000, + p_method = "plusone") + + # Two-sided p-value with plusone should be (b+1)/(R+1) = 3/21 ≈ 0.143 + expect_true(result$p.value > 0.12 && result$p.value < 0.18, + info = paste("Two-sided p-value was", result$p.value, + "but should be approximately 0.143 with plusone method")) + + # P(X>Y) estimate should be very small (close to 0) + expect_true(result$estimate < 0.01, + info = paste("P(X>Y) estimate was", result$estimate, + "but should be close to 0")) + + # Test the opposite direction: y < x + set.seed(789) + result_rev <- brunner_munzel(4:6, 1:3, + test_method = "perm", + R = 1000, + p_method = "plusone") + + # Two-sided p-value with plusone should also be approximately 0.143 + expect_true(result_rev$p.value > 0.12 && result_rev$p.value < 0.18, + info = paste("reversed p-value was", result_rev$p.value, + "but should be approximately 0.143 with plusone method")) + + # P(X>Y) should be close to 1 in this case + expect_true(result_rev$estimate > 0.99, + info = paste("P(X>Y) estimate was", result_rev$estimate, + "but should be close to 1")) + + # Test with p_method="original" to verify exact match with brunnermunzel package + set.seed(42) + result_orig <- brunner_munzel(1:3, 4:6, + test_method = "perm", + R = 1000, + p_method = "original") + + # With p_method="original", p-value should be exactly 2/20 = 0.10 + # (matching brunnermunzel package's formula: count / total) + expect_equal(result_orig$p.value, 0.10, tolerance = 1e-10, + info = "p-value with original method should be exactly 0.10") + + # Test one-sided alternatives work correctly + # This is the critical test that catches the comparison operator bug! + # The bug was that b_less and b_greater were computed incorrectly, + # causing p_less and p_greater to be swapped for one-sided tests. + + set.seed(456) + result_greater <- brunner_munzel(1:3, 4:6, + test_method = "perm", + alternative = "greater", + R = 1000, + p_method = "plusone") + + # For "greater" alternative (H1: P(X>Y) > 0.5), with observed P(X>Y) ≈ 0, + # the p-value should be very large (close to 1) + expect_true(result_greater$p.value > 0.85, + info = paste("one-sided greater p-value was", result_greater$p.value, + "but should be close to 1")) + + set.seed(456) + result_less <- brunner_munzel(1:3, 4:6, + test_method = "perm", + alternative = "less", + R = 1000, + p_method = "plusone") + + # For "less" alternative (H1: P(X>Y) < 0.5), with observed P(X>Y) ≈ 0, + # the p-value should be very small + expect_true(result_less$p.value < 0.15, + info = paste("one-sided less p-value was", result_less$p.value, + "but should be small")) + + # CRITICAL: Verify p-values are NOT swapped (regression test for the bug) + # With the bug, "less" would give ~0.9 and "greater" would give ~0.1 + # The correct behavior is the opposite + expect_true(result_less$p.value < result_greater$p.value, + info = paste("p_less should be < p_greater, but got:", + "p_less =", result_less$p.value, + ", p_greater =", result_greater$p.value)) + + # Also test the reverse direction: x > y + set.seed(789) + result_greater_rev <- brunner_munzel(4:6, 1:3, + test_method = "perm", + alternative = "greater", + R = 1000, + p_method = "plusone") + + # For "greater" alternative with x > y, P(X>Y) ≈ 1, + # so we should reject in favor of P(X>Y) > 0.5 + expect_true(result_greater_rev$p.value < 0.15, + info = paste("reversed greater p-value was", result_greater_rev$p.value, + "but should be small")) + + set.seed(789) + result_less_rev <- brunner_munzel(4:6, 1:3, + test_method = "perm", + alternative = "less", + R = 1000, + p_method = "plusone") + + # For "less" alternative with x > y, P(X>Y) ≈ 1, + # so we should NOT reject in favor of P(X>Y) < 0.5 + expect_true(result_less_rev$p.value > 0.85, + info = paste("reversed less p-value was", result_less_rev$p.value, + "but should be close to 1")) + + # CRITICAL: Verify p-values are NOT swapped for reverse direction + expect_true(result_less_rev$p.value > result_greater_rev$p.value, + info = paste("reversed: p_less should be > p_greater, but got:", + "p_less =", result_less_rev$p.value, + ", p_greater =", result_greater_rev$p.value)) + + # Validation against brunnermunzel R package reference values ---- + # These tests verify that TOSTER's permutation test gives results consistent + + # with the brunnermunzel package's brunnermunzel.permutation.test function. + # + # Key differences between packages: + # - TOSTER estimate: P(X>Y) + 0.5*P(X=Y) + # - brunnermunzel estimate: P(XY)+.5*P(X=Y) = 1 - 0.2839506 = 0.7160494 + + set.seed(123) + result_hw <- brunner_munzel(x_hw, y_hw, test_method = "perm", R = 10000) + + # Estimate should match exactly (no Monte Carlo involved in estimate) + expect_equal(as.numeric(result_hw$estimate), 0.7160494, tolerance = 1e-6, + info = "Estimate should match brunnermunzel package") + + # P-value should be close to brunnermunzel's 0.158 (some MC variance expected) + expect_true(result_hw$p.value > 0.10 && result_hw$p.value < 0.25, + info = paste("p-value was", result_hw$p.value, + "but should be close to brunnermunzel's 0.158")) + + # Example from Brunner & Munzel (2000) / Neubert & Brunner (2007): + # Pain scores after surgery + Y_pain <- c(1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 4, 1, 1) + N_pain <- c(3, 3, 4, 3, 1, 2, 3, 1, 1, 5, 4) + + # brunnermunzel.permutation.test reports: + # p-value = 0.008038 + # P(XY)+.5*P(X=Y) = 1 - 0.788961 = 0.211039 + + set.seed(456) + result_pain <- brunner_munzel(Y_pain, N_pain, test_method = "perm", R = 10000) + + # Estimate should match exactly + expect_equal(as.numeric(result_pain$estimate), 1 - 0.788961, tolerance = 1e-4, + info = "Estimate should match brunnermunzel package") + + # P-value should be close to brunnermunzel's 0.008 (highly significant) + expect_true(result_pain$p.value < 0.02, + info = paste("p-value was", result_pain$p.value, + "but should be close to brunnermunzel's 0.008")) })