Skip to content

Commit 35e23db

Browse files
initial work on MethylCounts
1 parent a0f3ce3 commit 35e23db

1 file changed

Lines changed: 202 additions & 0 deletions

File tree

R/MethylCounts.R

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
# Exported classes -------------------------------------------------------------
2+
3+
# NOTE: This create a 'classGeneratorFunction' (internal constructor), .BSseq()
4+
.MethylCounts <- setClass(
5+
"MethylCounts",
6+
slots = representation(
7+
parameters = "list"),
8+
contains = "RangedSummarizedExperiment"
9+
)
10+
11+
# Validity methods -------------------------------------------------------------
12+
13+
# TODO: Benchmark validity method
14+
setValidity2("MethylCounts", function(object) {
15+
msg <- NULL
16+
17+
if (identical(object, .BSseq())) {
18+
# No validity checks for object returned by internal constructor
19+
return(msg)
20+
}
21+
msg <- validMsg(msg, .checkAssayNames(object, c("M", "U")))
22+
23+
## TODO: Add a check like .checkMandCov
24+
## It should check that M, U, D (optionally H) are integers >= 0
25+
if (is.null(msg)) {
26+
TRUE
27+
} else {
28+
msg
29+
}
30+
})
31+
32+
# Exported functions -----------------------------------------------------------
33+
34+
# TODO: BSseq() is arguably a bad constructor. It doesn't return a valid BSseq
35+
# object when called without any arguments. It also does some pretty
36+
# complicated parsing of the inputs. But we're stuck with it because it's
37+
# been around for a long time.
38+
39+
MethylCounts <- function(M = NULL, U = NULL, D = NULL, H = NULL,
40+
parameters = NULL, pData = NULL, gr = NULL,
41+
pos = NULL, chr = NULL, sampleNames = NULL,
42+
rmZeroCov = FALSE) {
43+
44+
# Argument checks ----------------------------------------------------------
45+
46+
# Process assays.
47+
# NOTE: Nothing to do for 'coef', and 'se.coef'.
48+
if (is.null(M) || is.null(Cov)) {
49+
stop("Need 'M' and 'Cov'.")
50+
}
51+
# Process 'trans' and 'parameters'.
52+
if (is.null(trans)) {
53+
trans <- function() NULL
54+
environment(trans) <- emptyenv()
55+
}
56+
if (is.null(parameters)) {
57+
parameters <- list()
58+
}
59+
# Process 'sampleNames' and 'pData'.
60+
if (is.null(sampleNames)) {
61+
if (is.null(pData)) {
62+
# BSseq object will have no colnames.
63+
pData <- make_zero_col_DFrame(ncol(M))
64+
} else {
65+
# BSseq object will have 'sampleNames' as colnames.
66+
pData <- DataFrame(row.names = sampleNames)
67+
}
68+
} else {
69+
if (is.null(pData)) {
70+
# BSseq object will have 'sampleNames' as colnames.
71+
pData <- DataFrame(row.names = sampleNames)
72+
} else {
73+
if (is.null(rownames(pData))) {
74+
rownames(pData) <- sampleNames
75+
} else {
76+
stopifnot(identical(rownames(pData), sampleNames))
77+
}
78+
}
79+
}
80+
# Process 'gr', 'pos', and 'chr'.
81+
if (is.null(gr)) {
82+
if (is.null(pos) || is.null(chr)) {
83+
stop("Need 'pos' and 'chr' if 'gr' not supplied.")
84+
}
85+
gr <- GRanges(seqnames = chr, ranges = IRanges(start = pos, width = 1L))
86+
}
87+
if (!is(gr, "GRanges")) {
88+
stop("'gr' needs to be a GRanges.")
89+
}
90+
# Process 'rmZeroCov'.
91+
stopifnot(isTRUEorFALSE(rmZeroCov))
92+
93+
# Collapse duplicate loci --------------------------------------------------
94+
95+
is_duplicated <- duplicated(gr)
96+
if (any(is_duplicated)) {
97+
warning("Detected duplicate loci. Collapsing counts in 'M' and 'Cov' ",
98+
"at these positions.")
99+
if (!is.null(coef) || !is.null(se.coef)) {
100+
stop("Cannot collapse when 'coef' or 'se.coef' are non-NULL.")
101+
}
102+
loci <- gr[!is_duplicated]
103+
ol <- findOverlaps(gr, loci, type = "equal")
104+
M <- rowsum(x = M, group = subjectHits(ol), reorder = FALSE)
105+
rownames(M) <- NULL
106+
Cov <- rowsum(x = Cov, group = subjectHits(ol), reorder = FALSE)
107+
rownames(Cov) <- NULL
108+
if (!is.null(Filtered)){
109+
Filtered <- rowsum(x = Filtered, group = subjectHits(ol), reorder = FALSE)
110+
rownames(Filtered) <- NULL}
111+
} else {
112+
loci <- gr
113+
}
114+
115+
# Optionally, remove positions with zero coverage --------------------------
116+
117+
if (rmZeroCov) {
118+
loci_with_zero_cov <- rowAlls(Cov, value = 0)
119+
if (any(loci_with_zero_cov)) {
120+
loci_with_nonzero_cov <- !loci_with_zero_cov
121+
gr <- gr[loci_with_nonzero_cov]
122+
M <- M[loci_with_nonzero_cov, , drop = FALSE]
123+
Cov <- Cov[loci_with_nonzero_cov, , drop = FALSE]
124+
coef <- coef[loci_with_nonzero_cov, , drop = FALSE]
125+
se.coef <- se.coef[loci_with_nonzero_cov, , drop = FALSE]
126+
Filtered <- Filtered[loci_with_nonzero_cov, , drop = FALSE]
127+
}
128+
}
129+
130+
# Construct BSseq object ---------------------------------------------------
131+
132+
assays <- SimpleList(M = M, Cov = Cov, Filtered = Filtered,
133+
coef = coef, se.coef = se.coef)
134+
assays <- assays[!S4Vectors:::sapply_isNULL(assays)]
135+
se <- SummarizedExperiment(
136+
assays = assays,
137+
rowRanges = loci,
138+
colData = pData)
139+
.BSseq(se, trans = trans, parameters = parameters)
140+
}
141+
142+
## Move to BSseq-utils?
143+
getMethylCounts <- function(MethylCounts,
144+
type = c("M", "U", "H", "D", "gr", "parameters"),
145+
withDimnames = TRUE) {
146+
type <- match.arg(type)
147+
if (type %in% c("M", "U", "H", "D")) {
148+
return(assay(MethylCounts, type, withDimnames = withDimnames))
149+
}
150+
if (type == "parameters") {
151+
return(MethylCounts@parameters)
152+
}
153+
if (type == "gr") {
154+
return(MethylCounts@rowRanges)
155+
}
156+
}
157+
158+
# Exported methods -------------------------------------------------------------
159+
160+
setMethod("show", signature(object = "MethylCounts"), function(object) {
161+
cat("An object of type 'MethylCounts' with\n")
162+
cat(" ", nrow(object), "methylation loci\n")
163+
cat(" ", ncol(object), "samples\n")
164+
if (.isHDF5ArrayBacked(object)) {
165+
cat("Some assays are HDF5Array-backed\n")
166+
} else {
167+
cat("All assays are in-memory\n")
168+
}
169+
})
170+
171+
setMethod("pData", "MethylCounts", function(object) {
172+
object@colData
173+
})
174+
175+
setReplaceMethod(
176+
"pData",
177+
signature = signature(object = "MethylCounts", value = "data.frame"),
178+
function(object, value) {
179+
colData(object) <- as(value, "DataFrame")
180+
object
181+
}
182+
)
183+
184+
setReplaceMethod(
185+
"pData",
186+
signature = signature(object = "MethylCounts", value = "DataFrame"),
187+
function(object, value) {
188+
colData(object) <- value
189+
object
190+
}
191+
)
192+
193+
setMethod("sampleNames", "MethylCounts", function(object) colnames(object))
194+
195+
setReplaceMethod(
196+
"sampleNames",
197+
signature = signature(object = "MethylCounts", value = "ANY"),
198+
function(object, value) {
199+
colnames(object) <- value
200+
object
201+
}
202+
)

0 commit comments

Comments
 (0)