diff --git a/R/AlphaPart.R b/R/AlphaPart.R index d6a85e1..8497064 100644 --- a/R/AlphaPart.R +++ b/R/AlphaPart.R @@ -74,7 +74,11 @@ #' @param colPath Numeric or character, position or name of a column #' holding path information. #' @param colBV Numeric or character, position(s) or name(s) of -#' column(s) holding genetic Values. +#' column(s) holding genetic values. +#' @param colPaternalBV Numeric or character, position(s) of column(s) +#' holding the paternal genetic values for IBD partitioning. +#' @param colMaternalBV Numeric or character, position(s) of column(s) +#' holding the maternal genetic values for IBD partitioning.. #' @param colBy Numeric or character, position or name of a column #' holding group information (see details). #' @@ -164,6 +168,8 @@ AlphaPart <- function( colMid = 3, colPath = 4, colBV = 5:ncol(x), + colPaternalBV = NULL, + colMaternalBV = NULL, colBy = NULL ) { ## Test if the data is a data.frame @@ -181,6 +187,15 @@ AlphaPart <- function( "arguments 'colId', 'colFid', 'colMid', 'colPath', and 'colBy' must be of length 1" ) } + + if (!is.null(colPaternalBV) | !is.null(colMaternalBV)){ + if (is.null(colPaternalBV) | is.null(colMaternalBV)){ + stop("Both 'colPaternalBV' and 'colMaternalBV' must be provided for IBD partitioning.") + } + gameticPartition <- TRUE + } else { + gameticPartition <- FALSE + } if (is.null(colBy)) { groupSummary <- FALSE @@ -237,6 +252,24 @@ AlphaPart <- function( } testN <- NULL # not needed anymore } + + if (gameticPartition){ + if(!is.numeric(colPaternalBV)){ + testN <- length(colPaternalBV) + colPaternalBV <- which(colnames(x) %in% colPaternalBV) + if (length(colPaternalBV) != testN) { + stop("Identification not valid for 'colPaternalBV' column(s) name", call. = FALSE) + } + } + if(!is.numeric(colMaternalBV)){ + testN <- length(colMaternalBV) + colMaternalBV <- which(colnames(x) %in% colMaternalBV) + if (length(colMaternalBV) != testN) { + stop("Identification not valid for 'colMaternalBV' column(s) name", call. = FALSE) + } + } + testN <- NULL # not needed anymore + } ## --- Sort and recode pedigree --- ## Make sure that identifications are numeric if recode=FALSE @@ -253,6 +286,35 @@ AlphaPart <- function( stop("colBV columns must be numeric!") str(x) } + + ## If gametic partitioning, make sure that colPaternalBV and colMaternalBV: + ## - are numeric + ## - the same length as colBV + ## - the columns sum to the colBV columns + if (gameticPartition){ + test <- !sapply(x[, c(colPaternalBV)], is.numeric) + if (any(test)){ + stop("colPaternalBV columns must be numeric!") + str(x) + } + test <- !sapply(x[, c(colMaternalBV)], is.numeric) + if (any(test)){ + stop("colMaternalBV columns must be numeric!") + str(x) + } + test <- length(colPaternalBV) != length(colMaternalBV) + if (any(test)){ + stop("colPaternalBV and colMaternalBV must be of the same length!") + } + test <- length(colPaternalBV) != length(colBV) + if (any(test)){ + stop("colPaternalBV and colMaternalBV must be of the same length as colBV!") + } + test <- any(abs(x[, c(colBV)] - x[, c(colPaternalBV)] - x[, c(colMaternalBV)]) > 1e-8) + if (any(test)){ + stop("The sum of colPaternalBV and colMaternalBV must be equal to colBV for each individual!") + } + } ## Sort so that parents precede children if (sort) { @@ -307,7 +369,14 @@ AlphaPart <- function( } } } - y <- cbind(y, as.matrix(x[, colBV])) + + if (!gameticPartition){ + y <- cbind(y, as.matrix(x[, colBV])) + nGP <- 1 # Number of genetic partitions: total + } else { + y <- cbind(y, as.matrix(x[, colBV]), as.matrix(x[, colPaternalBV]), as.matrix(x[, colMaternalBV])) + nGP <- 3 # Number of genetic partitions: total, paternal, maternal + } ## Test if father and mother codes precede children code - ## computational engine needs this @@ -430,6 +499,7 @@ AlphaPart <- function( nI = nI, nP = nP, nT = nT, + nGP = nGP, ped = y, P = as.integer(P), Px = as.integer(cumsum(c(0, rep(nP, nT - 1)))) @@ -448,14 +518,24 @@ AlphaPart <- function( nG = nG, ped = y, P = as.integer(P), - Px = as.integer(cumsum(c(0, rep(nP, nT - 1)))) + Px = as.integer(cumsum(c(0, rep(nP, nT - 1)))), + g = g ) } ## Assign nice column names - colnames(tmp$pa) <- paste(lT, "_pa", sep = "") - colnames(tmp$ms) <- paste(lT, "_ms", sep = "") - colnames(tmp$xa) <- c(t(outer(lT, lP, paste, sep = "_"))) + if (!gameticPartition){ + colnames(tmp$pa) <- paste(lT, "_pa", sep = "") + colnames(tmp$ms) <- paste(lT, "_ms", sep = "") + colnames(tmp$xa) <- c(t(outer(lT, lP, paste, sep = "_"))) + } else { + #lPT <- colnames(x[, c(colBV, colPaternalBV, colMaternalBV), drop=FALSE]) + lP <- c(lP, paste0(lP, "_paternal"), paste0(lP, "_maternal")) + lPT <- unlist(lapply(c("","_paternal", "_maternal"), function(s) paste0(lT, s))) + colnames(tmp$pa) <- paste(lPT, "_pa", sep="") + colnames(tmp$ms) <- paste(lPT, "_ms", sep="") + colnames(tmp$xa) <- c(t(outer(lT, lP, paste, sep = "_"))) + } ## --- Massage results --- @@ -466,34 +546,64 @@ AlphaPart <- function( colP <- colnames(tmp$pa) colM <- colnames(tmp$ms) colX <- colnames(tmp$xa) - - for (j in 1:nT) { - ## j <- 1 - Py <- seq(t + 1, t + nP) - ret[[j]] <- cbind(tmp$pa[-1, j], tmp$ms[-1, j], tmp$xa[-1, Py]) - colnames(ret[[j]]) <- c(colP[j], colM[j], colX[Py]) - t <- max(Py) + + if (!gameticPartition) { + for (j in 1:nT) { + ## j <- 1 + Py <- seq(t + 1, t + nP) + ret[[j]] <- cbind(tmp$pa[-1, j], tmp$ms[-1, j], tmp$xa[-1, Py]) + colnames(ret[[j]]) <- c(colP[j], colM[j], colX[Py]) + t <- max(Py) + } + } else { + t <- 0 + for (j in 1:nT) { + ## j <- 1 + Py <- seq(t+1, j*nGP*nP) + pams <- c(j, j+nT, j+2*nT) + ret[[j]] <- cbind(tmp$pa[-1, pams], tmp$ms[-1, pams], tmp$xa[-1, Py]) + colnames(ret[[j]]) <- c(colP[pams], colM[pams], colX[Py]) + t <- max(Py) + } } - tmp <- NULL # not needed anymore + + tmp <- NULL # not needed anymore ## Add initial data if (!groupSummary) { - for (i in 1:nT) { - ## Hassle in order to get all columns and to be able to work with - ## numeric or character column "names" - colX <- colX2 <- colnames(x) - names(colX) <- colX - names(colX2) <- colX2 - ## ... put current agv in the last column in original data - colX <- c(colX[!(colX %in% colX[colBV[i]])], colX[colBV[i]]) - ## ... remove other traits - colX <- colX[ - !(colX %in% - colX2[(colX2 %in% colX2[colBV]) & !(colX2 %in% colX2[colBV[i]])]) - ] - ret[[i]] <- cbind(x[, colX], as.data.frame(ret[[i]])) - rownames(ret[[i]]) <- NULL + if (!gameticPartition) { + for (i in 1:nT) { + ## Hassle in order to get all columns and to be able to work with + ## numeric or character column "names" + colX <- colX2 <- colnames(x) + names(colX) <- colX + names(colX2) <- colX2 + ## ... put current agv in the last column in original data + colX <- c(colX[!(colX %in% colX[colBV[i]])], colX[colBV[i]]) + ## ... remove other traits + colX <- colX[ + !(colX %in% + colX2[(colX2 %in% colX2[colBV]) & !(colX2 %in% colX2[colBV[i]])]) + ] + ret[[i]] <- cbind(x[, colX], as.data.frame(ret[[i]])) + rownames(ret[[i]]) <- NULL + } + } else { + for (i in 1:nT) { + ## Hassle in order to get all columns and to be able to work with + ## numeric or character column "names" + colX <- colX2 <- colnames(x) + names(colX) <- colX; names(colX2) <- colX2 + ## ... put current agv in the last three columns in original data + colX <- c(colX[!(colX %in% c(colX[colBV[i]], colX[colPaternalBV[i]], colX[colMaternalBV[i]]))], + colX[colBV[i]], colX[colPaternalBV[i]], colX[colMaternalBV[i]]) + ## ... remove other traits + colX <- colX[!(colX %in% colX2[(colX2 %in% c(colX2[colBV], colX2[colPaternalBV], colX2[colMaternalBV])) & ! + (colX2 %in% c(colX2[colBV[i]], colX2[colPaternalBV[i]], colX2[colMaternalBV[i]]))])] + ret[[i]] <- cbind(x[, colX], as.data.frame(ret[[i]])) + rownames(ret[[i]]) <- NULL + } } } @@ -507,6 +617,7 @@ AlphaPart <- function( lP = lP, nT = nT, lT = lT, + gameticPartition = gameticPartition, warn = NULL ) ## names(ret)[nT+1] <- "info" diff --git a/R/RcppExports.R b/R/RcppExports.R index 71c5b38..bee3530 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -1,8 +1,8 @@ # Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 -AlphaPartDrop <- function(c1, c2, nI, nP, nT, ped, P, Px) { - .Call(`_AlphaPart_AlphaPartDrop`, c1, c2, nI, nP, nT, ped, P, Px) +AlphaPartDrop <- function(c1, c2, nI, nP, nT, nGP, ped, P, Px) { + .Call(`_AlphaPart_AlphaPartDrop`, c1, c2, nI, nP, nT, nGP, ped, P, Px) } AlphaPartDropGroup <- function(c1, c2, nI, nP, nT, nG, ped, P, Px, g) { diff --git a/R/methods.R b/R/methods.R index e484503..046dec6 100644 --- a/R/methods.R +++ b/R/methods.R @@ -262,6 +262,7 @@ summary.AlphaPart <- function( nT = nT, lT = lT, by = by, + gameticPartition = object$info$gameticPartition, warn = object$info$warn, labelSum = labelSum ) @@ -526,6 +527,8 @@ plot.summaryAlphaPart <- nP <- x$info$nP + x$info$nCov ret <- vector(mode = "list", length = nT) names(ret) <- x$info$lT + + ifelse(x$info$gameticPartition, nGP <- 3, nGP <- 1) ## Axis labels if (!is.null(xlab) && length(xlab) > 1) @@ -613,8 +616,8 @@ plot.summaryAlphaPart <- } ## Line type if (is.null(lineTypeList)) { - if (length(lineType) < nP) { - lineType <- c(1, rep(x = lineType, times = nP)) + if (length(lineType) < nP*nGP) { + lineType <- c(1, rep(x = lineType, times = nP*nGP)) } else { lineType <- c(1, lineType) } @@ -643,7 +646,7 @@ plot.summaryAlphaPart <- if (sortValue) { nC <- ncol(tmp0) pathStat <- sapply( - X = tmp0[, (nC - nP + 1):nC], + X = tmp0[, (nC - (nP*nGP) + 1):nC], FUN = sortValueFUN, na.rm = TRUE ) diff --git a/man/AlphaPart.Rd b/man/AlphaPart.Rd index 5a1ad11..3eb9603 100644 --- a/man/AlphaPart.Rd +++ b/man/AlphaPart.Rd @@ -17,6 +17,8 @@ AlphaPart( colMid = 3, colPath = 4, colBV = 5:ncol(x), + colPaternalBV = NULL, + colMaternalBV = NULL, colBy = NULL ) } @@ -68,7 +70,13 @@ ication if \code{pedType="IPG"} .} holding path information.} \item{colBV}{Numeric or character, position(s) or name(s) of -column(s) holding genetic Values.} +column(s) holding genetic values.} + +\item{colPaternalBV}{Numeric or character, position(s) of column(s) +holding the paternal genetic values for IBD partitioning.} + +\item{colMaternalBV}{Numeric or character, position(s) of column(s) +holding the maternal genetic values for IBD partitioning..} \item{colBy}{Numeric or character, position or name of a column holding group information (see details).} diff --git a/src/AlphaPartDrop.cpp b/src/AlphaPartDrop.cpp index 69eb6cf..3b6a821 100644 --- a/src/AlphaPartDrop.cpp +++ b/src/AlphaPartDrop.cpp @@ -2,17 +2,17 @@ using namespace Rcpp; // [[Rcpp::export]] -List AlphaPartDrop(double c1, double c2, int nI, int nP, int nT, +List AlphaPartDrop(double c1, double c2, int nI, int nP, int nT, int nGP, NumericMatrix ped, IntegerVector P, IntegerVector Px) { // --- Temp --- - int i, j, t, p; + int i, j, t, p, pt=0, mt=0, k; // --- Outputs --- - NumericMatrix pa(nI+1, nT); // Parent average - NumericMatrix ms(nI+1, nT); // Mendelian sampling - NumericMatrix xa(nI+1, nP*nT); // Partitions + NumericMatrix pa(nI+1, nT*nGP); // Parent average + NumericMatrix ms(nI+1, nT*nGP); // Mendelian sampling + NumericMatrix xa(nI+1, nP*nT*nGP); // Partitions // NOTE: Rcpp::NumericMatrix is filled by 0s by default // TODO: Maybe we want an algorithm that works on one trait at a time to save on memory? @@ -22,7 +22,6 @@ List AlphaPartDrop(double c1, double c2, int nI, int nP, int nT, // https://github.com/AlphaGenes/AlphaPart/issues/13 // --- Compute --- - for(i = 1; i < nI+1; i++) { for(t = 0; t < nT; t++) { // Parent average (PA) @@ -31,18 +30,41 @@ List AlphaPartDrop(double c1, double c2, int nI, int nP, int nT, // Mendelian sampling (MS) ms(i, t) = ped(i, 3+t) - pa(i, t); + + // Gametic partition + if (nGP ==3) { + pt = t + nT; // paternal trait index + mt = t + 2*nT; // maternal trait index + pa(i, pt) = c1 * ped(ped(i,1), 3+pt) + c1 * ped(ped(i,1), 3+mt); + pa(i, mt) = c2 * ped(ped(i,2), 3+pt) + c2 * ped(ped(i,2), 3+mt); + ms(i, mt) = ped(i, 3+mt) - pa(i, mt); + ms(i, pt) = ped(i, 3+pt) - pa(i, pt); + } // Parts // ... for the MS part - j = Px[t] + P[i]; + j = nGP*Px[t] + P[i]; xa(i, j) = ms(i, t); + + if (nGP == 3) { + j = nGP*Px[t] + P[i] + nP; + xa(i, j) = ms(i, pt); + j = nGP*Px[t] + P[i] + nP*2; + xa(i, j) = ms(i, mt); + } // ... for the PA parts for(p = 0; p < nP; p++) { - j = Px[t] + p; + j = nGP*Px[t] + p; xa(i, j) += c1 * xa(ped(i, 1), j) + c2 * xa(ped(i, 2), j); + if (nGP == 3) { + j = nGP*Px[t] + p + nP; + k = nGP*Px[t] + p + nP*2; + xa(i, j) += c1 * xa(ped(i, 1), j) + c1 * xa(ped(i, 1), k); + xa(i, k) += c2 * xa(ped(i, 2), j) + c2 * xa(ped(i, 2), k); + } } } } diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 9d6d431..07a198a 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -11,8 +11,8 @@ Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); #endif // AlphaPartDrop -List AlphaPartDrop(double c1, double c2, int nI, int nP, int nT, NumericMatrix ped, IntegerVector P, IntegerVector Px); -RcppExport SEXP _AlphaPart_AlphaPartDrop(SEXP c1SEXP, SEXP c2SEXP, SEXP nISEXP, SEXP nPSEXP, SEXP nTSEXP, SEXP pedSEXP, SEXP PSEXP, SEXP PxSEXP) { +List AlphaPartDrop(double c1, double c2, int nI, int nP, int nT, int nGP, NumericMatrix ped, IntegerVector P, IntegerVector Px); +RcppExport SEXP _AlphaPart_AlphaPartDrop(SEXP c1SEXP, SEXP c2SEXP, SEXP nISEXP, SEXP nPSEXP, SEXP nTSEXP, SEXP nGPSEXP, SEXP pedSEXP, SEXP PSEXP, SEXP PxSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -21,10 +21,11 @@ BEGIN_RCPP Rcpp::traits::input_parameter< int >::type nI(nISEXP); Rcpp::traits::input_parameter< int >::type nP(nPSEXP); Rcpp::traits::input_parameter< int >::type nT(nTSEXP); + Rcpp::traits::input_parameter< int >::type nGP(nGPSEXP); Rcpp::traits::input_parameter< NumericMatrix >::type ped(pedSEXP); Rcpp::traits::input_parameter< IntegerVector >::type P(PSEXP); Rcpp::traits::input_parameter< IntegerVector >::type Px(PxSEXP); - rcpp_result_gen = Rcpp::wrap(AlphaPartDrop(c1, c2, nI, nP, nT, ped, P, Px)); + rcpp_result_gen = Rcpp::wrap(AlphaPartDrop(c1, c2, nI, nP, nT, nGP, ped, P, Px)); return rcpp_result_gen; END_RCPP } @@ -50,7 +51,7 @@ END_RCPP } static const R_CallMethodDef CallEntries[] = { - {"_AlphaPart_AlphaPartDrop", (DL_FUNC) &_AlphaPart_AlphaPartDrop, 8}, + {"_AlphaPart_AlphaPartDrop", (DL_FUNC) &_AlphaPart_AlphaPartDrop, 9}, {"_AlphaPart_AlphaPartDropGroup", (DL_FUNC) &_AlphaPart_AlphaPartDropGroup, 10}, {NULL, NULL, 0} }; diff --git a/tests/testthat/test-alphapart.R b/tests/testthat/test-alphapart.R index e170522..6260415 100644 --- a/tests/testthat/test-alphapart.R +++ b/tests/testthat/test-alphapart.R @@ -764,3 +764,188 @@ test_that("Test computation - 2nd example", { expect_true(part$trait1$trait1_domestic[part$trait1$id == "V"] == 68.875) expect_true(part$trait1$trait1_import[part$trait1$id == "V"] == 40.125) }) + +test_that("Test IBD computation, one trait", { + # dput(AlphaPart.ped) so we have a fixed dataset + dat <- structure( + list( + id = structure( + c(1L, 2L, 3L, 4L, 5L, 6L, 7L), + levels = c("1", "2", "3", "4", "5", "6", "7"), + class = "factor" + ), + father = structure( + c(1L, 1L, 2L, 1L, 1L, 3L, 4L), + levels = c("0", "1", "4", "3"), + class = "factor" + ), + mother = structure( + c(1L, 1L, 2L, 1L, 1L, 3L, 4L), + levels = c("0", "2", "5", "6"), + class = "factor" + ), + generation = c(1, 1, 2, 1, 1, 2, 3), + group = structure( + c(1L, 1L, 1L, 2L, 2L, 2L, 2L), + levels = c("1", "2"), + class = "factor" + ), + trait1 = c(1, 2, 1.5, 0, 2, 1, 1.25), + trait1_paternal = c(0.5, 1, 0.5, 0, 1, 0, 0.5), + trait1_maternal = c(0.5, 1, 1, 0, 1, 1, 0.75) + ), + row.names = c(NA, -7L), + class = "data.frame" + ) + part <- AlphaPart(x = dat, colPath = "group", colBV = "trait1", + colPaternalBV = "trait1_paternal", + colMaternalBV = "trait1_maternal", verbose = 0) + + # group 1 is blue and group 2 is red + # a_1 & = 0 + \color{blue}{r_1} \\ + # & = \color{blue}{1} = 1 \\ + expect_true(part$trait1$trait1_1[part$trait1$id == "1"] == 1) + expect_true(part$trait1$trait1_2[part$trait1$id == "1"] == 0) + # a_paternal_1 & = 0 + \color{blue}{r_paternal_1} \\ + # & = \color{blue}{0.5} = 0.5 \\ + expect_true(part$trait1$trait1_1_paternal[part$trait1$id == "1"] == 0.5) + expect_true(part$trait1$trait1_2_paternal[part$trait1$id == "1"] == 0) + # a_maternal_1 & = 0 + \color{blue}{r_maternal_1} \\ + # & = \color{blue}{0.5} = 0.5 \\ + expect_true(part$trait1$trait1_1_maternal[part$trait1$id == "1"] == 0.5) + expect_true(part$trait1$trait1_2_maternal[part$trait1$id == "1"] == 0) + # a_2 & = 0 + \color{blue}{r_2} \\ + # & = \color{blue}{2} = 2 \\ + expect_true(part$trait1$trait1_1[part$trait1$id == "2"] == 2) + expect_true(part$trait1$trait1_2[part$trait1$id == "2"] == 0) + # a_paternal_2 & = 0 + \color{blue}{r_paternal_2} \\ + # & = \color{blue}{1} = 1 \\ + expect_true(part$trait1$trait1_1_paternal[part$trait1$id == "2"] == 1) + expect_true(part$trait1$trait1_2_paternal[part$trait1$id == "2"] == 0) + # a_maternal_2 & = 0 + \color{blue}{r_maternal_2} \\ + # & = \color{blue}{1} = 1 \\ + expect_true(part$trait1$trait1_1_maternal[part$trait1$id == "2"] == 1) + expect_true(part$trait1$trait1_2_maternal[part$trait1$id == "2"] == 0) + # a_3 & = \color{blue}{\frac{1}{2} r_1} + \color{blue}{\frac{1}{2} r_2} + + # \color{blue{r_3} \\ + expect_true(part$trait1$trait1_1[part$trait1$id == "3"] == 1.50) + expect_true(part$trait1$trait1_2[part$trait1$id == "3"] == 0) + # a_paternal_3 & = \color{blue}{\frac{1}{2} r_paternal_1} + \color{blue}{\frac{1}{2} r_maternal_1} \\ + # + \color{blue}{r_paternal_3} \\ + expect_true(part$trait1$trait1_1_paternal[part$trait1$id == "3"] == 0.5) + expect_true(part$trait1$trait1_2_paternal[part$trait1$id == "3"] == 0) + # a_maternal_3 & = \color{blue}{\frac{1}{2} r_paternal_2} + \color{blue}{\frac{1}{2} r_maternal_2} \\ + # + \color{blue}{r_maternal_3} \\ + expect_true(part$trait1$trait1_1_maternal[part$trait1$id == "3"] == 1) + expect_true(part$trait1$trait1_2_maternal[part$trait1$id == "3"] == 0) + # a_4 & = 0 + \color{red}{r_4} \\ + expect_true(part$trait1$trait1_1[part$trait1$id == "4"] == 0) + expect_true(part$trait1$trait1_2[part$trait1$id == "4"] == 0) + # a_paternal_4 & = 0 + \color{red}{r_paternal_4} \\ + expect_true(part$trait1$trait1_1_paternal[part$trait1$id == "4"] == 0) + expect_true(part$trait1$trait1_2_paternal[part$trait1$id == "4"] == 0) + # a_maternal_4 & = 0 + \color{red}{r_maternal_4} \\ + expect_true(part$trait1$trait1_1_maternal[part$trait1$id == "4"] == 0) + expect_true(part$trait1$trait1_2_maternal[part$trait1$id == "4"] == 0) + # a_5 & = 0 + \color{red}{r_5} \\ + expect_true(part$trait1$trait1_1[part$trait1$id == "5"] == 0) + expect_true(part$trait1$trait1_2[part$trait1$id == "5"] == 2) + # a_paternal_5 & = 0 + \color{red}{r_paternal_5} \\ + expect_true(part$trait1$trait1_1_paternal[part$trait1$id == "5"] == 0) + expect_true(part$trait1$trait1_2_paternal[part$trait1$id == "5"] == 1) + # a_maternal_5 & = 0 + \color{red}{r_maternal_5} \\ + expect_true(part$trait1$trait1_1_maternal[part$trait1$id == "5"] == 0) + expect_true(part$trait1$trait1_2_maternal[part$trait1$id == "5"] == 1) + # a_6 & = \color{red}{\frac{1}{2} r_4} + \color{red}{\frac{1}{2} r_5 + + # \color{red}{r_6} \\ + expect_true(part$trait1$trait1_1[part$trait1$id == "6"] == 0) + expect_true(part$trait1$trait1_2[part$trait1$id == "6"] == 1) + # a_paternal_6 & = \color{red}{\frac{1}{2} r_paternal_4} + \color{red}{\frac{1}{2} r_maternal_4} \\ + # + \color{red}{r_paternal_6} \\ + expect_true(part$trait1$trait1_1_paternal[part$trait1$id == "6"] == 0) + expect_true(part$trait1$trait1_2_paternal[part$trait1$id == "6"] == 0) + # a_maternal_6 & = \color{red}{\frac{1}{2} r_paternal_5} + \color{red{\frac{1}{2} r_maternal_5} \\ + # + \color{blue}{r_maternal_6} \\ + expect_true(part$trait1$trait1_1_maternal[part$trait1$id == "6"] == 0) + expect_true(part$trait1$trait1_2_maternal[part$trait1$id == "6"] == 1) + # a_7 & = \color{blue}{\frac{1}{4} r_1} + \color{blue}{\frac{1}{4} r_2} + + # \color{red{\frac{1}{4} r_4} + \color{red}{\frac{1}{4} r_5} + + # \color{blue}{\frac{1}{2} r_3} + \color{red}{\frac{1}{2} r_6 + + # \color{red}{r_7} \\ + expect_true(part$trait1$trait1_1[part$trait1$id == "7"] == 0.75) + expect_true(part$trait1$trait1_2[part$trait1$id == "7"] == 0.5) + # a_paternal_7 & = \color{blue}{\frac{1}{4} r_paternal_1} + \color{blue}{\frac{1}{4} r_maternal_1} \\ + # + \color{blue}{\frac{1}{4} r_paternal_2} + \color{blue}{\frac{1}{4} r_maternal_2} \\ + # + \color{blue}{\frac{1}{2} r_paternal_3} + \color{blue}{\frac{1}{2} r_maternal_3} \\ + # + \color{red}{r_paternal_7} \\ + expect_true(part$trait1$trait1_1_paternal[part$trait1$id == "7"] == 0.75) + expect_true(part$trait1$trait1_2_paternal[part$trait1$id == "7"] == -0.25) + # a_maternal_7 & = \color{red}{\frac{1}{4} r_paternal_4} + \color{red}{\frac{1}{4} r_maternal_4} \\ + # + \color{red}{\frac{1}{4} r_paternal_5} + \color{red}{\frac{1}{4} r_maternal_5} \\ + # + \color{red}{\frac{1}{2} r_paternal_6} + \color{red}{\frac{1}{2} r_maternal_6} \\ + # + \color{red}{r_maternal_7} \\ + expect_true(part$trait1$trait1_1_maternal[part$trait1$id == "7"] == 0) + expect_true(part$trait1$trait1_2_maternal[part$trait1$id == "7"] == 0.75) +}) + +test_that("Test IBD computation, two traits", { + # dput(AlphaPart.ped) so we have a fixed dataset + dat <- structure( + list( + id = structure( + c(1L, 2L, 3L, 4L, 5L, 6L, 7L), + levels = c("1", "2", "3", "4", "5", "6", "7"), + class = "factor" + ), + father = structure( + c(1L, 1L, 2L, 1L, 1L, 3L, 4L), + levels = c("0", "1", "4", "3"), + class = "factor" + ), + mother = structure( + c(1L, 1L, 2L, 1L, 1L, 3L, 4L), + levels = c("0", "2", "5", "6"), + class = "factor" + ), + generation = c(1, 1, 2, 1, 1, 2, 3), + group = structure( + c(1L, 1L, 1L, 2L, 2L, 2L, 2L), + levels = c("1", "2"), + class = "factor" + ), + trait1 = c(1, 2, 1.5, 0, 2, 1, 1.25), + trait1_paternal = c(0.5, 1, 0.5, 0, 1, 0, 0.5), + trait1_maternal = c(0.5, 1, 1, 0, 1, 1, 0.75), + trait2 = c(1, 2, 1.5, 0, 2, 1, 1.25), + trait2_paternal = c(0.5, 1, 0.5, 0, 1, 0, 0.5), + trait2_maternal = c(0.5, 1, 1, 0, 1, 1, 0.75) + ), + row.names = c(NA, -7L), + class = "data.frame" + ) + part <- AlphaPart(x = dat, colPath = "group", colBV = c("trait1", "trait2"), + colPaternalBV = c("trait1_paternal", "trait2_paternal"), + colMaternalBV = c("trait1_maternal", "trait2_maternal"), verbose = 0) + + # group 1 is blue and group 2 is red + # Just test second trait on individual 7 + # a_7 & = \color{blue}{\frac{1}{4} r_1} + \color{blue}{\frac{1}{4} r_2} + + # \color{red{\frac{1}{4} r_4} + \color{red}{\frac{1}{4} r_5} + + # \color{blue}{\frac{1}{2} r_3} + \color{red}{\frac{1}{2} r_6 + + # \color{red}{r_7} \\ + expect_true(part$trait2$trait2_1[part$trait2$id == "7"] == 0.75) + expect_true(part$trait2$trait2_2[part$trait2$id == "7"] == 0.5) + # a_paternal_7 & = \color{blue}{\frac{1}{4} r_paternal_1} + \color{blue}{\frac{1}{4} r_maternal_1} \\ + # + \color{blue}{\frac{1}{4} r_paternal_2} + \color{blue}{\frac{1}{4} r_maternal_2} \\ + # + \color{blue}{\frac{1}{2} r_paternal_3} + \color{blue}{\frac{1}{2} r_maternal_3} \\ + # + \color{red}{r_paternal_7} \\ + expect_true(part$trait2$trait2_1_paternal[part$trait2$id == "7"] == 0.75) + expect_true(part$trait2$trait2_2_paternal[part$trait2$id == "7"] == -0.25) + # a_maternal_7 & = \color{red}{\frac{1}{4} r_paternal_4} + \color{red}{\frac{1}{4} r_maternal_4} \\ + # + \color{red}{\frac{1}{4} r_paternal_5} + \color{red}{\frac{1}{4} r_maternal_5} \\ + # + \color{red}{\frac{1}{2} r_paternal_6} + \color{red}{\frac{1}{2} r_maternal_6} \\ + # + \color{red}{r_maternal_7} \\ + expect_true(part$trait2$trait2_1_maternal[part$trait2$id == "7"] == 0) + expect_true(part$trait2$trait2_2_maternal[part$trait2$id == "7"] == 0.75) +}) + diff --git a/tests/testthat/test-plotSummaryAlphaPart.R b/tests/testthat/test-plotSummaryAlphaPart.R index 8b78364..4fd919a 100644 --- a/tests/testthat/test-plotSummaryAlphaPart.R +++ b/tests/testthat/test-plotSummaryAlphaPart.R @@ -10,7 +10,7 @@ test_that("Test plotSummary.AlphaPart", { gen=c( 1, 1, 2, 2, 3, 3)) ## Partition additive genetic values - tmp <- AlphaPart(x=ped, colBV=c("trt1", "trt2"), center = FALSE) + tmp <- AlphaPart(x=ped, colBV=c("trt1", "trt2")) sum <- summary(tmp) expect_error(plot.summaryAlphaPart(sum), "output is provided only when the 'by' argument is defined on the 'summary' function") diff --git a/tests/testthat/test-printAlphaPart.R b/tests/testthat/test-printAlphaPart.R index 256c7c1..d5a763c 100644 --- a/tests/testthat/test-printAlphaPart.R +++ b/tests/testthat/test-printAlphaPart.R @@ -9,10 +9,9 @@ test_that("Test print.AlphaPart", { trt1=c(100, 120, 115, 130, 125, 125), trt2=c(100, 110, 105, 100, 85, 110), gen=c( 1, 1, 2, 2, 3, 3)) - tmp <- AlphaPart(x=ped, colBV=c("trt1", "trt2"), center = FALSE) + tmp <- AlphaPart(x=ped, colBV=c("trt1", "trt2")) ## Partition additive genetic values - expect_equal(print(tmp$trt1[,"trt1_w"], digits=1), c(100,120,5,130,2.5,125)) - expect_equal(print.AlphaPart(AlphaPart(x=ped, colBV=c("trt1", "trt2"), center = FALSE)),NULL) - expect_equal(print.AlphaPart(AlphaPart(x=ped, colBV=c("trt1", "trt2"), center = TRUE)),NULL) + expect_equal(print(tmp$trt1[,"trt1_ms"], digits=1), c(100,120,5,130,2.5,125)) + expect_equal(print.AlphaPart(AlphaPart(x=ped, colBV=c("trt1", "trt2"))),NULL) expect_equal(print(tmp$info),tmp$info) }) diff --git a/tests/testthat/test-printPlotSummaryAlphaPart.R b/tests/testthat/test-printPlotSummaryAlphaPart.R index c1b0398..12f0cd9 100644 --- a/tests/testthat/test-printPlotSummaryAlphaPart.R +++ b/tests/testthat/test-printPlotSummaryAlphaPart.R @@ -9,7 +9,7 @@ test_that("Test print.PlotSummary.AlphaPart", { gen=c( 1, 1, 2, 2, 3, 3)) ## Partition additive genetic values - tmp <- AlphaPart(x=ped, colBV="trt1", center = FALSE) + tmp <- AlphaPart(x=ped, colBV="trt1") sum <- summary(tmp, by="gen") k <- print(plot.summaryAlphaPart(sum)) expect_equal(k,NULL) diff --git a/tests/testthat/test-printSummary-alphapart.R b/tests/testthat/test-printSummary-alphapart.R index e37f739..6b91b82 100644 --- a/tests/testthat/test-printSummary-alphapart.R +++ b/tests/testthat/test-printSummary-alphapart.R @@ -12,7 +12,7 @@ test_that("Test Printsummary.AlphaPart", { gen=c( 1, 1, 2, 2, 3, 3)) ## Partition additive genetic values - tmp <- AlphaPart(x=ped, colBV=c("trt1", "trt2"), center = FALSE) + tmp <- AlphaPart(x=ped, colBV=c("trt1", "trt2")) ## Test summary for trt1 expect_equal(print.AlphaPart(summary(tmp, by="gen")), NULL) }) diff --git a/tests/testthat/test-savePlotSummaryAlphaPart.R b/tests/testthat/test-savePlotSummaryAlphaPart.R index 16b20b4..9346860 100644 --- a/tests/testthat/test-savePlotSummaryAlphaPart.R +++ b/tests/testthat/test-savePlotSummaryAlphaPart.R @@ -10,7 +10,7 @@ test_that("Test savePlotSummary.AlphaPart", { gen=c( 1, 1, 2, 2, 3, 3)) ## Partition additive genetic values - m <- AlphaPart(x=ped, colBV=c("trt1", "trt2"), center = FALSE) + m <- AlphaPart(x=ped, colBV=c("trt1", "trt2")) sum <- summary(m, by="gen") p1 <- plot.summaryAlphaPart(sum) diff --git a/tests/testthat/test-summary-alphapart.R b/tests/testthat/test-summary-alphapart.R index 7e9dc74..fe65474 100644 --- a/tests/testthat/test-summary-alphapart.R +++ b/tests/testthat/test-summary-alphapart.R @@ -12,7 +12,7 @@ test_that("Test summary.AlphaPart", { gen=c( 1, 1, 2, 2, 3, 3)) ## Partition additive genetic values - tmp <- AlphaPart(x=ped, colBV=c("trt1", "trt2"), center = FALSE) + tmp <- AlphaPart(x=ped, colBV=c("trt1", "trt2")) ## ## Trait: trt1 ## @@ -94,7 +94,7 @@ test_that("Test summary.AlphaPart", { ## Test the direct use of by group analysis in the AlphaPart function ped$gen <- factor(ped$gen) tmp1 <- summary(AlphaPart(x=ped, colBV=c("trt1", "trt2")), by="gen") - tmp2 <- AlphaPart(x=ped, colBV=c("trt1", "trt2"), colBy="gen", center=FALSE) + tmp2 <- AlphaPart(x=ped, colBV=c("trt1", "trt2"), colBy="gen") expect_equal(tmp1, tmp2) }) diff --git a/vignettes/ibd.Rmd b/vignettes/ibd.Rmd index b9ada40..30d62eb 100644 --- a/vignettes/ibd.Rmd +++ b/vignettes/ibd.Rmd @@ -1,6 +1,6 @@ --- title: "AlphaPart: Partitioning with idenity-by-descent (IBD) information" -author: "Ros Craddock and Gregor Gorjanc" +author: "Rosalind Craddock and Gregor Gorjanc" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > @@ -12,14 +12,161 @@ vignette: > ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` -TODO: Implement gametic/IBD/phased-based partitioning #18 - https://github.com/AlphaGenes/AlphaPart/issues/18 ## Introduction +Through application of the gametes model into AlphaPart, it is possible to partition genetic values (such as phased genotype probabilities) on identity-by-descent (IBD) information. This allows the partitioning of genetic values into contributions from individual haplotypes or chromosome segments to leverage phased genotype data. Thus, providing further insights into the sources of genetic variation within a population. This vignette will demonstrate how to use AlphaPart for partitioning with IBD information and, briefly, how to interpret the results. * TODO: Cite Ros's paper on partitioning with IBD information #44 https://github.com/AlphaGenes/AlphaPart/issues/44 +## Load packages +```{r} +rm(list=ls()) +library(AlphaPart) +``` + +## Data preparation +To demonstrate partitioning with IBD information, we will use a simulated dataset with id, mother id, father id, and phased genotype probabilities for each individual. For this example, we have simulated 10 individuals, all with phased genotypes at a single locus (e.g., a SNP). This is for demonstration purpose only; in practice, you would use a larger datasets to achieve different results to without IBD information. + +Note here that we include one UPG to handle the non-zero mean of the allele dosages in the founders. For more information on this, please refer to the `founders.rmd` vignette. + +### Dataset +```{r} +ped <- data.frame( + id = c("UPG1", 1:10), + fid = c(0, "UPG1", "UPG1", "UPG1", "UPG1", "UPG1", 2, 4, 4, 7, 8), + mid = c(0, "UPG1", "UPG1", "UPG1", "UPG1", "UPG1", 1, 3, 5, 6, 9), + group = c("UPG1", "female", "male", "female", "male", "female", "female", "male", "male", "female", "female"), + gen = c(0, 1, 1, 1, 1, 1, 2, 2, 2, 3, 4), + locus1_geno_probAA = c(NA, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0), + locus1_geno_probBA = c(NA, 0.5, 0.5, 0, 0.5, 0, 0, 1, 1, 1, 0), + locus1_geno_probAB = c(NA, 0.5, 0.5, 0, 0.5, 0, 0, 0, 0, 0, 0), + locus1_geno_probBB = c(NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1) +) + +ped +``` + +### Reorganise data for AlphaPart +There are two steps to reorganise the data for AlphaPart: +1. Convert the phased genotype probabilities into allele dosages (or allele frequencies, if preferred). Call this column locus1_geno +2. Convert the phased genotype probabilities into two columns: paternal_BV and maternal_BV, representing the contributions from the paternal and maternal haplotypes, respectively. + +```{r} +# Step 1: Calculate allele dosages +ped$locus1_geno <- with(ped, + locus1_geno_probAA * 0 + + locus1_geno_probBA * 1 + + locus1_geno_probAB * 1 + + locus1_geno_probBB * 2 +) + +# Estimate for the UPG using the mean of the founders +ped$locus1_geno[ped$id == "UPG1"] <- mean(ped$locus1_geno[which(ped$gen == 1)], na.rm = TRUE) +ped +``` +```{r} +# Step 2: Calculate paternal and maternal contributions +ped$locus1_paternal <- with(ped, + locus1_geno_probAA * 0 + + locus1_geno_probBA * 1 + + locus1_geno_probAB * 0 + + locus1_geno_probBB * 1 +) +ped$locus1_maternal <- with(ped, + locus1_geno_probAA * 0 + + locus1_geno_probBA * 0 + + locus1_geno_probAB * 1 + + locus1_geno_probBB * 1 +) + +# For UPG, just half the allele dosage +ped$locus1_paternal[ped$id == "UPG1"] <- ped$locus1_geno[ped$id == "UPG1"] / 2 +ped$locus1_maternal[ped$id == "UPG1"] <- ped$locus1_geno[ped$id == "UPG1"] / 2 + +ped +``` + +## Partitioning with AlphaPart +Now that the data is prepared, we can use AlphaPart to partition by IBD. Compared to the standard AlphaPart, this requires two additional arguments to be defined: +1. colPaternalBV: the column name or number representing the paternal haplotype contributions +2. colMaternalBV: the column name or number representing the maternal haplotype contributions + +```{r} +part_ibd <- AlphaPart( + x = ped[c(1:5,10:12)], + colId = "id", + colFid = "fid", + colMid = "mid", + colPath = "group", + colBV = "locus1_geno", + colPaternalBV = "locus1_paternal", + colMaternalBV = "locus1_maternal" +) + +part_ibd[["locus1_geno"]] + +``` +Both results for standard partitioning and partitioning using IBD information are recorded within the same table for trait "locus1_geno". There are results for the parent average, the Mendelian sampling term, and finally the partitions for both standard and IBD partitioning. In the next section, we will use these results to summarise and plot the contributions over generations and briefly discuss interpretation. + +## Interpretting the results + +### Plot the results by generation first without IBD information +```{r} +part_ibdSum <- summary(part_ibd, by = "gen") +# First lets plot the standard partitioning without IBD information +part_stand_sum <- part_ibdSum[["locus1_geno"]][c(1,3,4,5,6)] +# Convert to long format +part_stand_long <- reshape2::melt(part_stand_sum, id.vars = "gen") +# Plot standard partitioning +library(ggplot2) +standardPlot <- ggplot(part_stand_long, aes(x = gen, y = value, color = variable)) + + geom_line(size = 1) + + labs(title = "Standard Partitioning without IBD information", + x = "Generation", + y = "Allele Dosage Contribution") + + theme_minimal() + + scale_colour_manual(values = c("black", "#DC697D" ,"#90D5FF", "#FFB14E" )) +plot(standardPlot) +``` +As we only have one UPG, the UPG contribution is the same throughout all generations as shown in the yellow. The within pedigree partitions for male (blue) and female (pink) contributions change over time. All three partitions sum up to give the overall change in allele dosage across generations (in black). In generation 1 and 2, the male contributes more to the allele dosage compared to females since all alternative alleles are found in males. In generation 3, there is one individual who is female with allele dosage 1 and Mendelian sampling term of 0.5. This increases the female contribution to reduce the distance between males and females. The male contribution is only marginally reduced in generation 3 due to contributions from the sire (Mendelian sampling term of 0.5) and grandsire (Mendelain sampling term of 0.4) for the alternative allele. In generation 4, the female contribution is now larger than the male contribution because individual 10 is female and has a large Mendelian sampling term of 1. + +### Plot the results by generation with IBD information +```{r} +part_ibd_only <- part_ibdSum[["locus1_geno"]][c(1,3,7:12)] +# Convert to long format +part_ibd_long <- reshape2::melt(part_ibd_only, id.vars = "gen") +# Plot IBD partitioning +ibdPlot <- ggplot(part_ibd_long, aes(x = gen, y = value, color = variable)) + + geom_line(size = 1) + + labs(title = "Partitioning with IBD information", + x = "Generation", + y = "Allele Dosage Contribution") + + theme_minimal() + + scale_colour_manual(values = c("black", "#FFCCDE", "#C4FFFF", "#FFFFAF", "#FF70A3", "#00AEFF", "#FFE313")) +plot(ibdPlot) +``` +When partitioning with IBD information we double the number of partitions compared to standard partitioning. This is because we now have separate contributions from paternal and maternal haplotypes for each group. The black line is the same as before and is equivalent to the sum of all the six IBD partitions. Both UPG_paternal and UPG_maternal are equivalent as expected with a value of 0.3 in all generations. Generally the female_maternal path is negative, with the exception of generation 4. The male_paternal partition has a consistently high contribution across all generations. + +### Compare both without and with IBD information +```{r} +# Plot both graphs side by side +part_ibd_long$type <- "With IBD" +part_stand_long$type <- "Without IBD" +combined_part <- rbind(part_ibd_long, part_stand_long) +combinedPlot <- ggplot(combined_part, aes(x = gen, y = value, color = variable)) + + geom_line(size = 1) + + labs(title = "Partitioning Comparison", + x = "Generation", + y = "Allele Dosage Contribution") + + theme_minimal() + + scale_colour_manual(values = c("black", "#DC697D" ,"#90D5FF", "#FFB14E" , + "#FFCCDE", "#C4FFFF", "#FFFFAF", "#FF70A3", "#00AEFF", "#FFE313")) + + facet_wrap(~type) +plot(combinedPlot) +``` +When visualised side by side, its clear to see that the male partition (on the right) is largely driven by the paternal segregation of the alternative allele (on the left). The same is observed in the female partition (right) which is largely driven by the maternal segregation of the normal allele (left). Given in this simplistic example all males are sires and all females (except one) are dams, this is expected. In practice, with larger datasets, multiple loci, and more complex pedigrees and/or grouping, the IBD partitioning can provide further insights into the sources of genetic variation within a population. With multiple loci, the described process is repeated across each locus. Thus, each locus is assumed independent when partitioning in AlphaPart but the results can be aggregated across loci post-partitioning. + ## References * TODO: Cite Ros's paper on partitioning with IBD information #44