-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfitting.R
More file actions
54 lines (51 loc) · 2.14 KB
/
fitting.R
File metadata and controls
54 lines (51 loc) · 2.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
simbias <- function(seed=8765){
# The default seed guarantees a nice histogram. This is the only
# reason that accepting the default, x1c <- simbias(), is required in the lesson.
# The effect will be evident with other seeds as well.
set.seed(seed)
temp <- rnorm(100)
# Point A
x1 <- (temp + rnorm(100))/sqrt(2)
x2 <- (temp + rnorm(100))/sqrt(2)
x3 <- rnorm(100)
# Function to simulate regression of y on 2 variables.
f <- function(k){
# Point B
y <- x1 + x2 + x3 + .3*rnorm(100)
# Point C
c(lm(y ~ x1 + x2)$coef[2],
lm(y ~ x1 + x3)$coef[2])
}
# Point D
sapply(1:150, f)
}
# Illustrate the effect of bogus regressors on residual squared error.
bogus <- function(){
temp <- swiss
# Add 41 columns of random regressors to a copy of the swiss data.
for(n in 1:41){temp[,paste0("random",n)] <- rnorm(nrow(temp))}
# Define a function to compute the deviance of Fertility regressed
# on all regressors up to column n. The function, deviance(model), computes
# the residual sum of squares of the model given as its argument.
f <- function(n){deviance(lm(Fertility ~ ., temp[,1:n]))}
# Apply f to data from n=6, i.e., the legitimate regressors,
# through n=47, i.e., a full complement of bogus regressors.
rss <- sapply(6:47, f)
# Display result.
plot(0:41, rss, xlab="Number of bogus regressors.", ylab="Residual squared error.",
main="Residual Squared Error for Swiss Data\nUsing Irrelevant (Bogus) Regressors",
pch=21, bg='red')
}
# Plot histograms illustrating bias in estimates of a regressor
# coefficient 1) when an uncorrelated regressor is missing and
# 2) when a correlated regressor is missing.
x1hist <- function(x1c){
p1 <- hist(x1c[1,], plot=FALSE)
p2 <- hist(x1c[2,], plot=FALSE)
yrange <- c(0, max(p1$counts, p2$counts))
plot(p1, col=rgb(0,0,1,1/4), xlim=range(x1c), ylim=yrange, xlab="Estimated coefficient of x1",
main="Bias Effect of Omitted Regressor")
plot(p2, col=rgb(1,0,0,1/4), xlim=range(x1c), ylim=yrange, add=TRUE)
legend(1.1, 40, c("Uncorrelated regressor, x3, omitted", "Correlated regressor, x2, omitted"),
fill=c(rgb(0,0,1,1/4), rgb(1,0,0,1/4)))
}