Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

S3method(summary,aldex)
export(aldex)
export(aldex.plot)
export(clr.sm)
export(coefficient.sm)
export(sample.sm)
Expand All @@ -11,6 +12,12 @@ import(nlme)
import(purrr)
import(stats)
importFrom(MASS,mvrnorm)
importFrom(grDevices,rgb)
importFrom(graphics,abline)
importFrom(graphics,barplot)
importFrom(graphics,mtext)
importFrom(graphics,points)
importFrom(graphics,text)
importFrom(lme4,VarCorr)
importFrom(lme4,nobars)
importFrom(lmerTest,lmer)
Expand Down
64 changes: 51 additions & 13 deletions R/aldex-summary.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,22 @@
##' covariate in the model is an indicator denoting health (0) and disease (1)
##' then set `var=3`. In contrast, if X was a formula (in which case the
##' `data` argument should have been specified) then `var` can be set to
##' the unquoted name of the binary condition variable (e.g.,
##' `var=condition`).
##' the quoted name of the binary condition variable (e.g.,
##' `var="condition"`).
##' @import matrixStats stats
##' @return A (D x nsample)-matrix of Cohen's D statistics for the variable of
##' interest
##' @author Justin Silverman
##' @return a matrix with the expected values for the estimate, pooled standard
##' deviation, Cohen's d, and the overlap of Cohen's d with no effect
##' @author Justin Silverman, Greg Gloor
cohensd <- function(m, var) {
expr <- substitute(var)
if (is.numeric(expr)) {
# somehow the substitute function was changing a character to something else
# when the variable was passed from another function, aargh
#expr <- substitute(var)
if (is.numeric(var)) {
# already numeric
} else {
var <- deparse(expr)
var <- which(rownames(m$X) == var)
} else if (is.character(var)){
#var <- deparse(expr)
#var <- which(rownames(m$X) == var)
var <- grep(var, rownames(m$X)) # returns the offset needed
}

diff.mean <- m$estimate[var,,] # D x S
Expand All @@ -39,7 +42,7 @@ cohensd <- function(m, var) {
n1 <- length(x1idx)

if (!all(c("logComp", "logScale") %in% names(m))) {
stop("m must contain logComp and logScale samples")
stop("m must contain logComp and logScale samples\ntry reducing the nsample parameter")
}

logW <- sweep(m$logComp, c(2,3), m$logScale, FUN = `+`) # D x N x S
Expand Down Expand Up @@ -67,11 +70,46 @@ cohensd <- function(m, var) {

# Pooled variance
pooled_var <- ((n0 - 1) * var0 + (n1 - 1) * var1) / (n0 + n1 - 2)
cohensd <- diff.mean / sqrt(pooled_var)
cohens.d <- diff.mean / sqrt(pooled_var)
### added by gg
# from ALDEx2. 0.5 is complete uncertainty as to direction of Cohen's d
overlap <- round(apply( cohens.d, 1, function(row) { if(all(is.na(row))) warning("NAs in effect");
row[is.na(row)] <- 0 ;
min( aitchison.mean( c( sum( row < 0 ) , sum( row > 0 ) ) + 0.5 ) ) } ),3)


pooled.SD <- round(rowMeans(sqrt(pooled_var)),3)
estimate <- round(rowMeans(diff.mean),3)
cohens.d <- round(rowMeans(cohens.d),3)
the.three <- as.data.frame(cbind(estimate, pooled.SD, cohens.d, overlap))
return(the.three)
###

return(cohensd)
}
### added by gg
# private function from ALDEx2, via Andrew Fernandes
aitchison.mean <- function( n, log=FALSE ) {

# Input is a vector of non-negative integer counts.
# Output is a probability vector of expected frequencies.
# If log-frequencies are requested, the uninformative subspace is removed.

n <- round( as.vector( n, mode="numeric" ) )
if ( any( n < 0 ) ) stop("counts cannot be negative")

a <- n + 0.5
sa <- sum(a)

log.p <- digamma(a) - digamma(sa)
log.p <- log.p - mean(log.p)

if ( log ) return(log.p)

p <- exp( log.p - max(log.p) )
p <- p / sum(p)
return(p)
}
###

## summary <- function(object, ...) {
## UseMethod("summary", object)
Expand Down
143 changes: 143 additions & 0 deletions R/aldex.plot.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
#' Simple plots for an ALDEx3 object
#'
#' Provides volcano, effect, MA and waterfall plots from an ALDEx3 object.
#'
#' This method plots combinations of adjusted p-values from `object$p.val.adj`,
#' posterior estimates, standard errors and log abundance values
#' averaged across Monte Carlo samples.
#' The result is returned as a single plots of the desired type
#' Note-calls the aldex `summary` and `cohensd` functions internally
#' Note-the contrast must be supplied
#' Note-the data for MA plots may not be available for very large datasets
#' Note-if there are many tied values the waterfall function will show the first n features
#'
#' @title Plot Method for ALDEx3 objects with pairwise comparisons
#' @param object An object of class \code{aldex}
#' @param plot type of plot (default='volcano')
#' @param contrast the name of the comparison to plot, must be provided
#' @param threshold FDR significance threshold (default=0.05)
#' @param min.diff (default=0.5) used for MA (display), and waterfall (cutoff) plots
#' @param cohen Cohen's d threshold for effect plot (slope)
#' @param sig.col color for significant features
#' @param water.show number features to show in waterfall plot
#' @param water.col vector of colors for waterfall plot
#' @param water.names logical, show names of features for waterfall plot
#' @param ... additional plot parameters (cex)
#' @return the desired plot
#' @author Greg Gloor
#' @importFrom grDevices rgb
#' @importFrom graphics abline barplot mtext points text
#' @export
aldex.plot <-function(object, plot=c("volcano", "effect", "MA", "water"),
contrast=NULL, threshold=0.05, min.diff=0.5, cohen=0.5, sig.col=rgb(1,0,0,0.5),
water.show=5, water.col=c("red", "blue"), water.names=TRUE, ... ){

# this allows partial matching because I'm lazy; defaults to volcano
plot=match.arg(plot)
if( is.null(contrast) ) stop("\nPlease enter the name of the contrast to plot")

# check names in the passed object
# throws an error if the needed slot is not populated
# only needed for MA plot, so test is inside that if statement
# if(!("logScale" %in% names(object))) stop("\nlogScale slot not found\ntry reducing nsample when running aldex()\nnsamples=32 is a reasonable minimum")

#contrast = names(object[[9]])
#contrast = names(object$data)

nms0 <- as.character(object$data[,contrast])[as.numeric(which(object$X[2,] == 0)[1])]
nms1 <- as.character(object$data[,contrast])[as.numeric(which(object$X[2,] == 1)[1])]

# 0 should be the negative direction for the estimate

# call to summary()
# this will need to be updated for glm with multiple contrasts
sum.output <- summary(object)
nsamples <- length(object$data[,1])
# get the sig features
sig <- sum.output$p.val.adj < threshold

if(plot=="volcano"){
# replace 0 with min pval/10
p.val1 <- sum.output$p.val.adj > 0
p.val0 <- sum.output$p.val.adj == 0
min.p <- min(sum.output$p.val.adj[p.val1])
sum.output$p.val.adj[p.val0] <- min.p/10
y.val <- -(log10(sum.output$p.val.adj))
plot(sum.output$estimate, y.val, pch=19, col=rgb(0,0,0,0.3),
xlab="estimate", ylab="-log10(p.adjust)", ...)
abline(h=-log10(threshold), lty=2)
mtext(nms0, side=1, line = 2, at = min(sum.output$estimate),
col = "grey", cex = 0.8)
mtext(nms1, side=1, line = 2, at = max(sum.output$estimate),
col = "grey", cex = 0.8)
points(sum.output$estimate[sig], y.val[sig], pch=19, col=sig.col, ...)
}else if(plot=="effect"){
cohens <- cohensd(object, contrast)
plot(cohens$pooled.SD, sum.output$estimate,
pch=19, col=rgb(0,0,0,0.3),xlab="std dev", ylab='estimate', ...)
mtext(nms0, side=2, line = 2, at = min(sum.output$estimate),
col = "grey", cex = 0.8)
mtext(nms1, side=2, line = 2, at = max(sum.output$estimate),
col = "grey", cex = 0.8)

points(cohens$pooled.SD[sig], sum.output$estimate[sig],
pch=19, col=sig.col, ... )
abline(0,cohen, lty=2, col='grey')
abline(0,-cohen, lty=2, col='grey')
}else if(plot=="MA"){
####
# currently logComp and logScale can be too large to print out
# in this case there is an error and no plot
# example, yeast dataset with nsample > 128(ish)
# there is a known hack in aldex()
####
if(!("logScale" %in% names(object))) stop("\nlogScale slot not found\ntry reducing nsample when running aldex()\nnsamples=32 is a reasonable minimum")
vals <- vector()
for(i in 1:length(object$logComp[,1,1])){vals[i]= mean(object$logComp[i,,])}
plot(vals,sum.output$estimate, pch=19, col=rgb(0,0,0,0.3),
xlab='log abundance', ylab='estimate', ... )
mtext(nms0, side=2, line = 2, at = min(sum.output$estimate),
col = "grey", cex = 0.8)
mtext(nms1, side=2, line = 2, at = max(sum.output$estimate),
col = "grey", cex = 0.8)
points(vals[sig], sum.output$estimate[sig], pch=19, col=sig.col, ... )
abline(h=min.diff, lty=2, col='grey')
abline(h=-min.diff, lty=2, col='grey')
}else if(plot=="water"){
# positions below threshold
pos <- which(sum.output$p.val.adj < threshold & sum.output$estimate > min.diff)
neg <- which(sum.output$p.val.adj < threshold & sum.output$estimate < -min.diff)

# order from lowest to highest adjusted p value and then by estimate
pos.p <- pos[order(sum.output[pos, "p.val.adj"], sum.output[pos, "estimate"], decreasing=c(T,F))]
neg.p <- neg[order(sum.output[neg, "p.val.adj"], sum.output[neg, "estimate"])]

# select n values to display
display = water.show
display.p <- display
display.n <- display
# if there are a more 0 values than n to display order them by estimate
if(length(which(sum.output$p.val.adj == min(sum.output$p.val.adj)) > display)){

}

if(length(pos.p) <= display) display.p <- length(pos.p)
if(length(neg.p) <= display) display.n <- length(neg.p)

pos.p <- pos.p[1:display.p]
neg.p <- neg.p[1:display.n]

# get L2FC values and order from largest to smallest
pos.e <- pos.p[order(sum.output[pos.p, "estimate"], decreasing=TRUE)]
neg.e <- rev(neg.p[order(sum.output[neg.p, "estimate"])])

min.estimate <- min(sum.output[c(pos.e, neg.e), "estimate"])

plot.offset <- barplot(sum.output[c(pos.e, neg.e), "estimate"], horiz=T, las=2, col=c(rep(water.col[1], display.p), rep(water.col[2],display.n)), xlab="estimate", ...)
if(water.names==TRUE) {
text(c(rep(min.estimate,display.p), rep(0.5,display.n)), plot.offset[,1],
adj=0, sum.output[c(pos.e, neg.e), "entity"])
}
}

}
64 changes: 64 additions & 0 deletions man/aldex.plot.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions man/cohensd.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 15 additions & 13 deletions tests/testthat/test-aldex-summary.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ test_that("cohensd runs", {
data <- data.frame(condition=condition)
nsample <- 2000
foo <- aldex(Y, X, data=data, nsample=nsample, scale=clr.sm)
a <- cohensd(foo, condition)
a <- cohensd(foo, "condition")
b <- cohensd(foo, 2)
expect_equal(a, b)
expect_true(!any(is.na(a)))
Expand All @@ -18,20 +18,22 @@ test_that("cohensd correct", {
X <- formula(~condition)
data <- data.frame(condition=condition)
nsample <- 2000
set.seed(322217) #GG
foo <- aldex(Y, X, data, nsample=nsample, scale=clr.sm)
a <- cohensd(foo, condition)
a <- cohensd(foo, "condition")
## check a[2,5] for brevity
s <- 5
d <- 2
logW <- sweep(foo$logComp, c(2,3), foo$logScale, FUN=`+`)[2,,5]
x0 <- logW[condition==0]
x1 <- logW[condition==1]
mean0 <- mean(x0)
mean1 <- mean(x1)
n0 <- sum(condition==0)
n1 <- sum(condition==1)
denom <- sqrt(((n0-1)*var(x0) + (n1-1)*var(x1))/(n0+n1-2))
expect_equal(unname(a[2,5]), (mean1-mean0)/denom)
# s <- 5
# d <- 2
# logW <- sweep(foo$logComp, c(2,3), foo$logScale, FUN=`+`)[2,,5]
# x0 <- logW[condition==0]
# x1 <- logW[condition==1]
# mean0 <- mean(x0)
# mean1 <- mean(x1)
# n0 <- sum(condition==0)
# n1 <- sum(condition==1)
# denom <- sqrt(((n0-1)*var(x0) + (n1-1)*var(x1))/(n0+n1-2))
# expect_equal(unname(a[2,5]), (mean1-mean0)/denom)
expect_equal(unname(a[2,3]), 0.755)
})

test_that("summary.aldex works", {
Expand Down
Loading