-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdiffExpressionAgainstSiblingsLarge.R
More file actions
73 lines (61 loc) · 2.68 KB
/
Copy pathdiffExpressionAgainstSiblingsLarge.R
File metadata and controls
73 lines (61 loc) · 2.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# Read the data.
sourceDir <- NULL
large <- read.csv(paste0(sourceDir, "geneExpressionLogCPMLarge.csv"),
row.names = 1)
# Split data into families.
familyInfo <- do.call(rbind, lapply(colnames(large), function(samp){
# Get the family label. Remove the "X" added by R.
sampSplitDot <- strsplit(samp, ".", fixed = TRUE)[[1]]
family <- substr(sampSplitDot[1], 2, nchar(sampSplitDot[1]))
# Get the position within the family.
positionStr <- substr(sampSplitDot[2], 1, 3)
# Construct a data frame.
retval <- data.frame(family = family, position = positionStr)
return(retval)
}))
# Extract only probands and siblings.
whichChildren <- union(which(familyInfo$position == "s1"),
which(familyInfo$position == "p1"))
children <- large[,whichChildren]
childrenInfo <- familyInfo[whichChildren,]
write.csv(children, paste0(sourceDir, "childrenGeneExpression.csv"))
write.csv(childrenInfo, paste0(sourceDir, "childrenLabels.csv"), row.names = FALSE)
# z-scale each gene.
geneMeans <- rowMeans(children)
geneSds <- unlist(lapply(rownames(children), function(gene){
return(sd(children[gene,]))
}))
childrenScaled <- (children - geneMeans) / geneSds
# For each family, obtain differences.
# Note that we had to exclude 2 families because there was no sibling data.
families <- sort(unique(childrenInfo$family))
diffExpressionList <- lapply(families, function(family){
# Initialize differential expression.
diffExpression <- NULL
# Extract proband and sibling data.
whichProband <- intersect(which(childrenInfo$family == family),
which(childrenInfo$position == "p1"))
whichSibling <- intersect(which(childrenInfo$family == family),
which(childrenInfo$position == "s1"))
if(length(whichSibling) == 0){
print(paste("No sibling for family", family, "-- skipping"))
}else if(length(whichProband) == 0){
print(paste("No proband for family", family, "-- skipping"))
}else{
proband <- childrenScaled[,whichProband]
sibling <- childrenScaled[,whichSibling]
names(proband) <- rownames(childrenScaled)
names(sibling) <- rownames(childrenScaled)
# Compute differential expression.
diffExpression <- data.frame(v1 = proband - sibling)
rownames(diffExpression) <- names(proband)
colnames(diffExpression) <- family
}
return(diffExpression)
})
isNull <- unlist(lapply(diffExpressionList,is.null))
diffExpressionList <- diffExpressionList[which(isNull == FALSE)]
diffExpression <- do.call(cbind, diffExpressionList)
# Save the differential expression matrix.
diffExpressionMat <- as.matrix(diffExpression)
write.csv(diffExpressionMat, paste0(sourceDir, "diffExpression.csv"))