Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,7 @@
^README\.Rmd$
^\.github$
^codecov\.yml$
^pkgdown$
^README\.html$
^doc$
^Meta$
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@ dev
inst/doc
vignettes/*.html
vignettes/*.R
/doc/
/Meta/
19 changes: 14 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,26 @@ BugReports: https://github.com/microbiome/daa/issues/
biocViews: Software, Microbiome
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.2
RoxygenNote: 7.3.3
Depends:
R (>= 4.5.0)
R (>= 4.5.0),
SingleCellExperiment,
SummarizedExperiment
Imports:
dplyr
broom,
dplyr,
limma,
logistf,
rlang,
rms,
rstatix,
stats,
tidyr
Suggests:
mia,
BiocStyle,
knitr,
RefManageR,
rmarkdown,
sessioninfo,
testthat
Config/testthat/edition: 3
VignetteBuilder: knitr
42 changes: 42 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,2 +1,44 @@
# Generated by roxygen2: do not edit by hand

export(getAnova)
export(getFirth)
export(getKruskal)
export(getLimma)
export(getLm)
export(getOrm)
export(getTTest)
export(getWilcoxonTest)
importFrom(SummarizedExperiment,assay)
importFrom(SummarizedExperiment,assayNames)
importFrom(SummarizedExperiment,colData)
importFrom(SummarizedExperiment,rowData)
importFrom(broom,tidy)
importFrom(dplyr,arrange)
importFrom(dplyr,bind_rows)
importFrom(dplyr,filter)
importFrom(dplyr,group_by)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
importFrom(dplyr,n_distinct)
importFrom(dplyr,ungroup)
importFrom(limma,eBayes)
importFrom(limma,lmFit)
importFrom(logistf,logistf)
importFrom(logistf,logistf.control)
importFrom(rlang,"!!")
importFrom(rlang,.data)
importFrom(rlang,sym)
importFrom(rms,orm)
importFrom(rstatix,anova_test)
importFrom(rstatix,kruskal_test)
importFrom(rstatix,t_test)
importFrom(rstatix,wilcox_test)
importFrom(stats,as.formula)
importFrom(stats,delete.response)
importFrom(stats,lm)
importFrom(stats,model.matrix)
importFrom(stats,p.adjust)
importFrom(stats,pchisq)
importFrom(stats,terms)
importFrom(stats,var)
importFrom(tidyr,pivot_longer)
34 changes: 34 additions & 0 deletions R/getAnova.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#' @title Perform ANOVA
#'
#' @description
#' Performs ANOVA to compare values between two or more groups.
#'
#' @param tse A \code{SummarizedExperiment} object.
#' @param formula A formula where the LHS specifies the variable to test
#' (assay name, colData, or rowData variable) and the RHS specifies the
#' grouping variable, e.g., \code{counts ~ Group}.
#' @param ... Additional arguments passed to \code{anova_test}.
#'
#' @return A \code{data.frame} with test results.
#'
#' @examples
#' data(peerj13075, package = "mia")
#' tse <- peerj13075
#' tse <- mia::agglomerateByRank(tse, "phylum")
#' res <- getAnova(tse, counts ~ Geographical_location)
#'
#' @importFrom rstatix anova_test
#' @export
getAnova <- function(tse, formula, ...) {
if (length(all.vars(formula[[3]])) != 1L) {
stop("Formula RHS must specify exactly one grouping variable.",
call. = FALSE
)
}
df <- .get_long_data(tse, formula)
res <- .calculate_rstatix(
df = df, formula = formula,
FUN = anova_test, ...
)
return(res)
}
62 changes: 62 additions & 0 deletions R/getFirth.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#' @title Fit Firth logistic regression per feature
#'
#' @description
#' Fits Firth logistic regression per feature using wide-format data.
#'
#' @param tse A \code{SummarizedExperiment} object.
#' @param formula A formula where the LHS specifies the assay or colData
#' variable (must be binary/presence-absence) and the RHS specifies variables
#' from \code{colData(tse)}, e.g., \code{pa ~ Group + Age}.
#' @param p_adjust_method \code{Character scalar} or \code{NULL}. Method passed
#' to \code{p.adjust} for multiple testing correction. If \code{NULL}, no
#' adjusted p-values are added. (Default: \code{"BH"})
#' @param ... Additional arguments (reserved for future use).
#'
#' @return A \code{data.frame} with per-feature model coefficients, p-values,
#' and optionally adjusted p-values in column \code{q_value}.
#'
#' @examples
#' \dontrun{
#' data(peerj13075, package = "mia")
#' tse <- peerj13075
#' tse <- mia::agglomerateByRank(tse, "phylum")
#' tse <- mia::transformAssay(tse, method = "pa")
#' res <- getFirth(tse, pa ~ Geographical_location)
#' }
#'
#' @importFrom logistf logistf logistf.control
#' @export
getFirth <- function(tse, formula, p_adjust_method = "BH", ...) {
data_list <- .get_wide_data(tse, formula)
res <- .train_model_per_feature(
formula = formula,
mat = data_list[["matrix"]],
metadata = data_list[["sample_metadata"]],
FUN = .run_firth,
p_adjust_method = p_adjust_method
)
return(res)
}

#' @keywords internal
#' @noRd
.run_firth <- function(abundance, metadata, formula) {
mm <- .create_design_matrix(formula, metadata)
mm <- cbind.data.frame(mm, abundance = abundance)

fit <- logistf(
abundance ~ .,
data = mm,
control = logistf.control(maxit = 1000)
)

res <- data.frame(
estimate = fit$coefficients,
p_value = fit$prob
)

res <- res[!rownames(res) %in% c("(Intercept)"), , drop = FALSE]
res[["variable"]] <- rownames(res)
rownames(res) <- NULL
return(res)
}
34 changes: 34 additions & 0 deletions R/getKruskal.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#' @title Perform Kruskal-Wallis test
#'
#' @description
#' Performs Kruskal-Wallis test to compare values between two or more groups.
#'
#' @param tse A \code{SummarizedExperiment} object.
#' @param formula A formula where the LHS specifies the variable to test
#' (assay name, colData, or rowData variable) and the RHS specifies the
#' grouping variable, e.g., \code{counts ~ Group}.
#' @param ... Additional arguments passed to \code{kruskal_test}.
#'
#' @return A \code{data.frame} with test results.
#'
#' @examples
#' data(peerj13075, package = "mia")
#' tse <- peerj13075
#' tse <- mia::agglomerateByRank(tse, "phylum")
#' res <- getKruskal(tse, counts ~ Geographical_location)
#'
#' @importFrom rstatix kruskal_test
#' @export
getKruskal <- function(tse, formula, ...) {
if (length(all.vars(formula[[3]])) != 1L) {
stop("Formula RHS must specify exactly one grouping variable.",
call. = FALSE
)
}
df <- .get_long_data(tse, formula)
res <- .calculate_rstatix(
df = df, formula = formula,
FUN = kruskal_test, ...
)
return(res)
}
42 changes: 42 additions & 0 deletions R/getLimma.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#' @title Fit limma model
#'
#' @description
#' Fits a limma model using wide-format data and sample metadata.
#'
#' @param tse A \code{SummarizedExperiment} object.
#' @param formula A formula where the LHS specifies the assay or colData
#' variable and the RHS specifies variables from \code{colData(tse)},
#' e.g., \code{counts ~ Group}.
#' @param ... Additional arguments (reserved for future use).
#'
#' @return A fitted limma model object (result of \code{limma::eBayes}).
#'
#' @examples
#' data(peerj13075, package = "mia")
#' tse <- peerj13075
#' tse <- mia::agglomerateByRank(tse, "phylum")
#' fit <- getLimma(tse, counts ~ Geographical_location)
#'
#' @importFrom limma eBayes lmFit
#' @importFrom stats model.matrix
#' @export
getLimma <- function(tse, formula, ...) {
data_list <- .get_wide_data(tse, formula)
res <- .calculate_limma(
formula = formula,
mat = data_list[["matrix"]],
metadata = data_list[["sample_metadata"]]
)
return(res)
}

#' @keywords internal
#' @noRd
.calculate_limma <- function(formula, mat, metadata) {
rhs_formula <- .get_rhs_formula(formula)
design_mat <- model.matrix(rhs_formula, data = metadata)

fit <- lmFit(mat, design_mat)
fit <- eBayes(fit)
return(fit)
}
55 changes: 55 additions & 0 deletions R/getLm.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#' @title Fit linear model per feature
#'
#' @description
#' Fits a linear model per feature using wide-format data and sample metadata.
#'
#' @param tse A \code{SummarizedExperiment} object.
#' @param formula A formula where the LHS specifies the assay or colData
#' variable and the RHS specifies variables from \code{colData(tse)},
#' e.g., \code{rclr ~ Group}.
#' @param p_adjust_method \code{Character scalar} or \code{NULL}. Method passed
#' to \code{p.adjust} for multiple testing correction. If \code{NULL}, no
#' adjusted p-values are added. (Default: \code{"BH"})
#' @param ... Additional arguments (reserved for future use).
#'
#' @return A \code{data.frame} with per-feature model coefficients, p-values,
#' and optionally adjusted p-values in column \code{q_value}.
#'
#' @examples
#' data(peerj13075, package = "mia")
#' tse <- peerj13075
#' tse <- mia::agglomerateByRank(tse, "phylum")
#' tse <- mia::transformAssay(tse, method = "rclr")
#' res <- getLm(tse, rclr ~ Geographical_location)
#'
#' @importFrom broom tidy
#' @importFrom stats lm
#' @export
getLm <- function(tse, formula, p_adjust_method = "BH", ...) {
data_list <- .get_wide_data(tse, formula)
res <- .train_model_per_feature(
formula = formula,
mat = data_list[["matrix"]],
metadata = data_list[["sample_metadata"]],
FUN = .run_lm,
p_adjust_method = p_adjust_method
)
return(res)
}

#' @keywords internal
#' @noRd
.run_lm <- function(abundance, metadata, formula) {
mm <- .create_design_matrix(formula, metadata)
mm <- cbind.data.frame(mm, abundance = abundance)

fit <- lm(abundance ~ ., data = mm)
res <- tidy(fit)

res <- res[res$term != "(Intercept)", c("term", "estimate", "p.value"),
drop = FALSE
]
colnames(res) <- c("variable", "estimate", "p_value")
rownames(res) <- NULL
return(res)
}
68 changes: 68 additions & 0 deletions R/getOrm.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#' @title Fit ordinal regression model per feature
#'
#' @description
#' Fits an ordinal regression model (ORM) per feature using wide-format data.
#'
#' @param tse A \code{SummarizedExperiment} object.
#' @param formula A formula where the LHS specifies the assay or colData
#' variable and the RHS specifies variables from \code{colData(tse)},
#' e.g., \code{counts ~ Group}.
#' @param p_adjust_method \code{Character scalar} or \code{NULL}. Method passed
#' to \code{p.adjust} for multiple testing correction. If \code{NULL}, no
#' adjusted p-values are added. (Default: \code{"BH"})
#' @param ... Additional arguments (reserved for future use).
#'
#' @return A \code{data.frame} with per-feature model coefficients, p-values,
#' and optionally adjusted p-values in column \code{q_value}.
#'
#' @examples
#' data(peerj13075, package = "mia")
#' tse <- peerj13075
#' tse <- mia::agglomerateByRank(tse, "phylum")
#' res <- getOrm(tse, counts ~ Geographical_location)
#'
#' @importFrom rms orm
#' @importFrom stats pchisq
#' @export
getOrm <- function(tse, formula, p_adjust_method = "BH", ...) {
data_list <- .get_wide_data(tse, formula)
res <- .train_model_per_feature(
formula = formula,
mat = data_list[["matrix"]],
metadata = data_list[["sample_metadata"]],
FUN = .run_orm,
p_adjust_method = p_adjust_method
)
return(res)
}

#' @keywords internal
#' @noRd
.run_orm <- function(abundance, metadata, formula) {
mm <- .create_design_matrix(formula, metadata)
mm <- cbind.data.frame(mm, abundance = abundance)

inds <- seq_len(ncol(mm) - 1)
vars <- colnames(mm)[inds]

fit_1 <- orm(abundance ~ ., data = mm)
score_1 <- fit_1$stats["Score"]

res <- data.frame(estimate = fit_1$coefficients[vars], p_value = NA)

if (length(inds) == 1) {
res$p_value <- fit_1$stats["Score P"]
} else {
for (i in inds) {
fit_0 <- orm(abundance ~ ., data = mm[, -i])
score_0 <- fit_0$stats["Score"]
res$p_value[i] <- as.numeric(
1 - pchisq(score_1 - score_0, df = 1)
)
}
}

res[["variable"]] <- rownames(res)
rownames(res) <- NULL
return(res)
}
Loading
Loading