|
| 1 | +# Exported classes ------------------------------------------------------------- |
| 2 | + |
| 3 | +# NOTE: This create a 'classGeneratorFunction' (internal constructor), .BSseq() |
| 4 | +.MethylCounts <- setClass( |
| 5 | + "MethylCounts", |
| 6 | + slots = representation( |
| 7 | + parameters = "list"), |
| 8 | + contains = "RangedSummarizedExperiment" |
| 9 | +) |
| 10 | + |
| 11 | +# Validity methods ------------------------------------------------------------- |
| 12 | + |
| 13 | +# TODO: Benchmark validity method |
| 14 | +setValidity2("MethylCounts", function(object) { |
| 15 | + msg <- NULL |
| 16 | + |
| 17 | + if (identical(object, .BSseq())) { |
| 18 | + # No validity checks for object returned by internal constructor |
| 19 | + return(msg) |
| 20 | + } |
| 21 | + msg <- validMsg(msg, .checkAssayNames(object, c("M", "U"))) |
| 22 | + |
| 23 | + ## TODO: Add a check like .checkMandCov |
| 24 | + ## It should check that M, U, D (optionally H) are integers >= 0 |
| 25 | + if (is.null(msg)) { |
| 26 | + TRUE |
| 27 | + } else { |
| 28 | + msg |
| 29 | + } |
| 30 | +}) |
| 31 | + |
| 32 | +# Exported functions ----------------------------------------------------------- |
| 33 | + |
| 34 | +# TODO: BSseq() is arguably a bad constructor. It doesn't return a valid BSseq |
| 35 | +# object when called without any arguments. It also does some pretty |
| 36 | +# complicated parsing of the inputs. But we're stuck with it because it's |
| 37 | +# been around for a long time. |
| 38 | + |
| 39 | +MethylCounts <- function(M = NULL, U = NULL, D = NULL, H = NULL, |
| 40 | + parameters = NULL, pData = NULL, gr = NULL, |
| 41 | + pos = NULL, chr = NULL, sampleNames = NULL, |
| 42 | + rmZeroCov = FALSE) { |
| 43 | + |
| 44 | + # Argument checks ---------------------------------------------------------- |
| 45 | + |
| 46 | + # Process assays. |
| 47 | + # NOTE: Nothing to do for 'coef', and 'se.coef'. |
| 48 | + if (is.null(M) || is.null(Cov)) { |
| 49 | + stop("Need 'M' and 'Cov'.") |
| 50 | + } |
| 51 | + # Process 'trans' and 'parameters'. |
| 52 | + if (is.null(trans)) { |
| 53 | + trans <- function() NULL |
| 54 | + environment(trans) <- emptyenv() |
| 55 | + } |
| 56 | + if (is.null(parameters)) { |
| 57 | + parameters <- list() |
| 58 | + } |
| 59 | + # Process 'sampleNames' and 'pData'. |
| 60 | + if (is.null(sampleNames)) { |
| 61 | + if (is.null(pData)) { |
| 62 | + # BSseq object will have no colnames. |
| 63 | + pData <- make_zero_col_DFrame(ncol(M)) |
| 64 | + } else { |
| 65 | + # BSseq object will have 'sampleNames' as colnames. |
| 66 | + pData <- DataFrame(row.names = sampleNames) |
| 67 | + } |
| 68 | + } else { |
| 69 | + if (is.null(pData)) { |
| 70 | + # BSseq object will have 'sampleNames' as colnames. |
| 71 | + pData <- DataFrame(row.names = sampleNames) |
| 72 | + } else { |
| 73 | + if (is.null(rownames(pData))) { |
| 74 | + rownames(pData) <- sampleNames |
| 75 | + } else { |
| 76 | + stopifnot(identical(rownames(pData), sampleNames)) |
| 77 | + } |
| 78 | + } |
| 79 | + } |
| 80 | + # Process 'gr', 'pos', and 'chr'. |
| 81 | + if (is.null(gr)) { |
| 82 | + if (is.null(pos) || is.null(chr)) { |
| 83 | + stop("Need 'pos' and 'chr' if 'gr' not supplied.") |
| 84 | + } |
| 85 | + gr <- GRanges(seqnames = chr, ranges = IRanges(start = pos, width = 1L)) |
| 86 | + } |
| 87 | + if (!is(gr, "GRanges")) { |
| 88 | + stop("'gr' needs to be a GRanges.") |
| 89 | + } |
| 90 | + # Process 'rmZeroCov'. |
| 91 | + stopifnot(isTRUEorFALSE(rmZeroCov)) |
| 92 | + |
| 93 | + # Collapse duplicate loci -------------------------------------------------- |
| 94 | + |
| 95 | + is_duplicated <- duplicated(gr) |
| 96 | + if (any(is_duplicated)) { |
| 97 | + warning("Detected duplicate loci. Collapsing counts in 'M' and 'Cov' ", |
| 98 | + "at these positions.") |
| 99 | + if (!is.null(coef) || !is.null(se.coef)) { |
| 100 | + stop("Cannot collapse when 'coef' or 'se.coef' are non-NULL.") |
| 101 | + } |
| 102 | + loci <- gr[!is_duplicated] |
| 103 | + ol <- findOverlaps(gr, loci, type = "equal") |
| 104 | + M <- rowsum(x = M, group = subjectHits(ol), reorder = FALSE) |
| 105 | + rownames(M) <- NULL |
| 106 | + Cov <- rowsum(x = Cov, group = subjectHits(ol), reorder = FALSE) |
| 107 | + rownames(Cov) <- NULL |
| 108 | + if (!is.null(Filtered)){ |
| 109 | + Filtered <- rowsum(x = Filtered, group = subjectHits(ol), reorder = FALSE) |
| 110 | + rownames(Filtered) <- NULL} |
| 111 | + } else { |
| 112 | + loci <- gr |
| 113 | + } |
| 114 | + |
| 115 | + # Optionally, remove positions with zero coverage -------------------------- |
| 116 | + |
| 117 | + if (rmZeroCov) { |
| 118 | + loci_with_zero_cov <- rowAlls(Cov, value = 0) |
| 119 | + if (any(loci_with_zero_cov)) { |
| 120 | + loci_with_nonzero_cov <- !loci_with_zero_cov |
| 121 | + gr <- gr[loci_with_nonzero_cov] |
| 122 | + M <- M[loci_with_nonzero_cov, , drop = FALSE] |
| 123 | + Cov <- Cov[loci_with_nonzero_cov, , drop = FALSE] |
| 124 | + coef <- coef[loci_with_nonzero_cov, , drop = FALSE] |
| 125 | + se.coef <- se.coef[loci_with_nonzero_cov, , drop = FALSE] |
| 126 | + Filtered <- Filtered[loci_with_nonzero_cov, , drop = FALSE] |
| 127 | + } |
| 128 | + } |
| 129 | + |
| 130 | + # Construct BSseq object --------------------------------------------------- |
| 131 | + |
| 132 | + assays <- SimpleList(M = M, Cov = Cov, Filtered = Filtered, |
| 133 | + coef = coef, se.coef = se.coef) |
| 134 | + assays <- assays[!S4Vectors:::sapply_isNULL(assays)] |
| 135 | + se <- SummarizedExperiment( |
| 136 | + assays = assays, |
| 137 | + rowRanges = loci, |
| 138 | + colData = pData) |
| 139 | + .BSseq(se, trans = trans, parameters = parameters) |
| 140 | +} |
| 141 | + |
| 142 | +## Move to BSseq-utils? |
| 143 | +getMethylCounts <- function(MethylCounts, |
| 144 | + type = c("M", "U", "H", "D", "gr", "parameters"), |
| 145 | + withDimnames = TRUE) { |
| 146 | + type <- match.arg(type) |
| 147 | + if (type %in% c("M", "U", "H", "D")) { |
| 148 | + return(assay(MethylCounts, type, withDimnames = withDimnames)) |
| 149 | + } |
| 150 | + if (type == "parameters") { |
| 151 | + return(MethylCounts@parameters) |
| 152 | + } |
| 153 | + if (type == "gr") { |
| 154 | + return(MethylCounts@rowRanges) |
| 155 | + } |
| 156 | +} |
| 157 | + |
| 158 | +# Exported methods ------------------------------------------------------------- |
| 159 | + |
| 160 | +setMethod("show", signature(object = "MethylCounts"), function(object) { |
| 161 | + cat("An object of type 'MethylCounts' with\n") |
| 162 | + cat(" ", nrow(object), "methylation loci\n") |
| 163 | + cat(" ", ncol(object), "samples\n") |
| 164 | + if (.isHDF5ArrayBacked(object)) { |
| 165 | + cat("Some assays are HDF5Array-backed\n") |
| 166 | + } else { |
| 167 | + cat("All assays are in-memory\n") |
| 168 | + } |
| 169 | +}) |
| 170 | + |
| 171 | +setMethod("pData", "MethylCounts", function(object) { |
| 172 | + object@colData |
| 173 | +}) |
| 174 | + |
| 175 | +setReplaceMethod( |
| 176 | + "pData", |
| 177 | + signature = signature(object = "MethylCounts", value = "data.frame"), |
| 178 | + function(object, value) { |
| 179 | + colData(object) <- as(value, "DataFrame") |
| 180 | + object |
| 181 | + } |
| 182 | +) |
| 183 | + |
| 184 | +setReplaceMethod( |
| 185 | + "pData", |
| 186 | + signature = signature(object = "MethylCounts", value = "DataFrame"), |
| 187 | + function(object, value) { |
| 188 | + colData(object) <- value |
| 189 | + object |
| 190 | + } |
| 191 | +) |
| 192 | + |
| 193 | +setMethod("sampleNames", "MethylCounts", function(object) colnames(object)) |
| 194 | + |
| 195 | +setReplaceMethod( |
| 196 | + "sampleNames", |
| 197 | + signature = signature(object = "MethylCounts", value = "ANY"), |
| 198 | + function(object, value) { |
| 199 | + colnames(object) <- value |
| 200 | + object |
| 201 | + } |
| 202 | +) |
0 commit comments