You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
> x = (0:100) / 100
> y = sapply(x, function(t)binom.test(r,10,t,alternative="less")$p.value)
> points(x, y)
Exact binomial test
data: 4 and 10
number of successes = 4, number of trials = 10, p-value = 0.7539
alternative hypothesis: true probability of success is not equal to 0.5
95 percent confidence interval:
0.1215523 0.7376219
sample estimates:
probability of success
0.4
> library(exactci)
> binom.exact(4, 10, tsmethod="minlike")
Exact two-sided binomial test (sum of minimum likelihood method)
data: 4 and 10
number of successes = 4, number of trials = 10, p-value = 0.7539
alternative hypothesis: true probability of success is not equal to 0.5
95 percent confidence interval:
0.1500 0.7091
sample estimates:
probability of success
0.4
> binom.test(2, 15, 0.4)
f = function(theta) {
x = rbinom(100000, 10, theta)
r = sapply(x, function(u){binom.test(u,10)$conf.int})
mean(r[1,] <= theta & theta <= r[2,])
}
CI = sapply(0:10, function(x) binom.test(x,10)$conf.int)
f = function(theta) {
p = dbinom(0:10, 10, theta)
sum(p * (CI[1,] <= theta & theta <= CI[2,]))
}
3.6 2項分布から正規分布へ
3.7 検定の例:PISAの「盗難事件」問題
> binom.test(508, 508+516, 0.5)
Exact binomial test
data: 508 and 508 + 516
number of successes = 508, number of trials = 1024, p-value = 0.8269
alternative hypothesis: true probability of success is not equal to 0.5
95 percent confidence interval:
0.4650308 0.5271792
sample estimates:
probability of success
0.4960938
> pnorm(-0.25)*2
[1] 0.8025873
3.8 信頼区間の例
> binom.test(200, 1000)
Exact binomial test
data: 200 and 1000
number of successes = 200, number of trials = 1000, p-value < 2.2e-16
alternative hypothesis: true probability of success is not equal to 0.5
95 percent confidence interval:
0.1756206 0.2261594
sample estimates:
probability of success
0.2
> poisson.test(10)
Exact Poisson test
data: 10 time base: 1
number of events = 10, time base = 1, p-value = 1.114e-07
alternative hypothesis: true event rate is not equal to 1
95 percent confidence interval:
4.795389 18.390356
sample estimates:
event rate
10
> poisson.test(5, r=1.623486, alternative="greater")
...
number of events = 5, time base = 1, p-value = 0.025
...
> poisson.test(5, r=11.668332, alternative="less")
...
number of events = 5, time base = 1, p-value = 0.025
...
> poisson.test(7, r=3)
Exact Poisson test
data: 7 time base: 1
number of events = 7, time base = 1, p-value = 0.03351
alternative hypothesis: true event rate is not equal to 3
95 percent confidence interval:
2.814363 14.422675
sample estimates:
event rate
7
plot(NULL, xlim=c(0,20), ylim=c(0,20), xaxs="i", yaxs="i", asp=1,
xlab=expression(italic(x)), ylab=expression(italic(λ)))
for (lambda in seq(0,20,0.1)) {
t = sort(dpois(0:100, lambda), decreasing=TRUE)
s = cumsum(t)
m = t[sum(s < 0.95) + 1]
x = range((0:100)[dpois(0:100, lambda) >= m])
segments(x[1], lambda, x[2], lambda, col="gray")
}
abline(v=5)
abline(h=1.9701)
abline(h=11.7992)
axis(4, c(1.9701,11.7992), labels=c("2.0","11.8"))
> install.packages("exactci")
> library(exactci)
> poisson.exact(7, r=3, tsmethod="minlike")
Exact two-sided Poisson test (sum of minimum likelihood method)
data: 7 time base: 1
number of events = 7, time base = 1, p-value = 0.03351
alternative hypothesis: true event rate is not equal to 3
95 percent confidence interval:
3.2853 14.3402
sample estimates:
event rate
7
> fisher.test(matrix(c(3,1,2,4), nrow=2))
Fisher's Exact Test for Count Data
data: matrix(c(3, 1, 2, 4), nrow = 2)
p-value = 0.5238
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
0.2180460 390.5629165
sample estimates:
odds ratio
4.918388
> library(fmsb)
> oddsratio(12, 5, 6, 12)
Disease Nondisease Total
Exposed 12 6 18
Nonexposed 5 12 17
Total 17 18 35
Odds ratio estimate and its significance probability
data: 12 5 6 12
p-value = 0.02983
95 percent confidence interval:
1.147127 20.084959
sample estimates:
[1] 4.8
> library(exact2x2)
> fisher.exact(x)
Two-sided Fisher's Exact Test (usual method using minimum likelihood)
data: x
p-value = 0.04371
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
1.0905 22.9610
sample estimates:
odds ratio
4.568253
> blaker.exact(x)
Blaker's Exact Test
data: x
p-value = 0.04371
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
1.0905 23.6488
sample estimates:
odds ratio
4.568253
5.6 ファイ係数,クラメールのVなど
> sqrt(chisq.test(x,correct=FALSE)$statistic / sum(x))
X-squared
0.372549
> a = x[1,1]; b = x[1,2]; c = x[2,1]; d = x[2,2]
> (a*d-b*c) / sqrt((a+b)*(c+d)*(a+c)*(b+d))
[1] 0.372549
> binom.test(1, 9) # binom.test(8, 9) でも同じ
Exact binomial test
data: 1 and 9
number of successes = 1, number of trials = 9, p-value = 0.03906
alternative hypothesis: true probability of success is not equal to 0.5
95 percent confidence interval:
0.002809137 0.482496515
sample estimates:
probability of success
0.1111111
> mcnemar.exact(matrix(c(6,8,1,5), nrow=2))
Exact McNemar test (with central confidence intervals)
data: matrix(c(6, 8, 1, 5), nrow = 2)
b = 1, c = 8, p-value = 0.03906
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
0.00281705 0.93235414
sample estimates:
odds ratio
0.125
> t.test(X)
One Sample t-test
data: X
t = -1.4231, df = 9, p-value = 0.1884
alternative hypothesis: true mean is not equal to 0
95 percent confidence interval:
-1.0125190 0.2305190
sample estimates:
mean of x
-0.391
6.2 2標本の差の$t$検定
> A = rep(1:5, c(2,3,4,3,2)) # A=c(1,1,2,2,2,3,3,3,3,4,4,4,5,5)と同じ
> B = rep(1:5, c(0,2,4,5,3)) # B=c(2,2,3,3,3,3,4,4,4,4,4,5,5,5)と同じ
> table(A) # 念のため確認
A
1 2 3 4 5
2 3 4 3 2
> table(B)
B
2 3 4 5
2 4 5 3
> t.test(A, B) # 等分散を仮定しないt検定
Welch Two Sample t-test
data: A and B
t = -1.4615, df = 24.476, p-value = 0.1566
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-1.5497688 0.2640545
sample estimates:
mean of A mean of B
3.000000 3.642857
> t.test(A, B, var.equal=TRUE) # 等分散を仮定したt検定
Two Sample t-test
data: A and B
t = -1.4615, df = 26, p-value = 0.1559
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-1.5470216 0.2613074
sample estimates:
mean of A mean of B
3.000000 3.642857
f = function() {
x = rnorm(10, mean=0, sd=1.5)
y = rnorm(30, mean=0, sd=1.0)
vp = var.test(x, y)$p.value
t.test(x, y, var.equal=(vp >= 0.05))$p.value
}
p = replicate(1000, f())
mean(p < 0.05)
ttest = function(n1, x1, s1, n2, x2, s2) {
n = n1 + n2 - 2
u = ((n1 - 1) * s1^2 + (n2 - 1) * s2^2) / n
t = (x1 - x2) / sqrt(u / n1 + u / n2)
r = cat("Equal variance:\n\t", sep="")
r = cat(r, "t = ", t, ", df = ", n, ",
p = ", 2 * pt(-abs(t), n), "\n", sep="")
t = (x1 - x2) / sqrt(s1^2 / n1 + s2^2 / n2)
n = (s1^2 / n1 + s2^2 / n2)^2 /
((s1^2 / n1)^2 / (n1-1) + (s2^2 / n2)^2 / (n2-1))
r = cat(r, "Unequal variance:\n\t", sep="")
cat(r, "t = ", t, ", df = ", n, ", p = ", 2 * pt(-abs(t), n), "\n", sep="")
}
f = function() {
x = sample(1:5, 14, replace=TRUE)
y = sample(1:5, 14, replace=TRUE)
t.test(x, y)$p.value
}
p = replicate(100000, f())
mean(p < 0.05)
6.3 一元配置分散分析
x = c(1,3,5,8,5,4,2) # データ
g = factor(c(1,1,2,2,2,3,3)) # グループの分かれ方
> y = ave(x, g)
> y
[1] 2 2 6 6 6 3 3
> z = ave(x)
> z
[1] 4 4 4 4 4 4 4
> y - z
[1] -2 -2 2 2 2 -1 -1
> sum((y - z)^2)
[1] 22
> x - y
[1] -1 1 -1 2 -1 1 -1
> sum((x - y)^2)
[1] 10
> sum((x - z)^2)
[1] 32
> (sum((y-z)^2) / 2) / (sum((x-y)^2) / 4)
[1] 4.4
> 1 - pf(4.4, 2, 4)
[1] 0.09765625
> anova(lm(x ~ g))
Analysis of Variance Table
Response: x
Df Sum Sq Mean Sq F value Pr(>F)
g 2 22.0 11.0 4.4 0.09766 .
Residuals 4 10.0 2.5
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
> oneway.test(x ~ g, var.equal=TRUE)
> oneway.test(x ~ g)
One-way analysis of means (not assuming equal variances)
data: x and g
F = 3.3913, num df = 2.0, denom df = 2.4, p-value = 0.1998
x = c(1,3,5,8,5,4,2) # データ
g = factor(c(1,1,2,2,2,3,3)) # グループの分かれ方
ssq0 = sum((ave(x,g) - ave(x))^2) # 群間2乗和
c1 = combn(7, 3)
c2 = combn(4, 2)
n1 = ncol(c1)
n2 = ncol(c2)
ssq = numeric(0)
for (i in 1:n1) {
a = c1[,i]
g[a] = 1
b = setdiff(1:7, a)
for (j in 1:n2) {
g[b[c2[,j]]] = 2
g[b[-c2[,j]]] = 3
ssq = append(ssq, sum((ave(x,g)-ave(x))^2))
}
}
mean(ssq >= ssq0)
> kruskal.test(x ~ g)
Kruskal-Wallis rank sum test
data: x by g
Kruskal-Wallis chi-squared = 4.8, df = 2, p-value = 0.09072
> cor.test(組織率, 正答率合計, method="kendall")
Kendall's rank correlation tau
data: 組織率 and 正答率合計
z = 1.7298, p-value = 0.08366
alternative hypothesis: true tau is not equal to 0
sample estimates:
tau
0.3736324
Warning message:
In cor.test.default(組織率, 正答率合計, method = "kendall") :
タイのため正確な p 値を計算することができません
> cor.test(組織率, 正答率合計, method="spearman")
Spearman's rank correlation rho
data: 組織率 and 正答率合計
S = 179.2146, p-value = 0.07656
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
0.5076522
Warning message:
In cor.test.default(組織率, 正答率合計, method = "spearman") :
タイのため正確な p 値を計算することができません
8.3 ピアソンの相関係数
> x = c(1,2,3) # x = 1:3 でも同じ
> y = c(1,3,2)
> cor(x, y)
> x = c(1, 2, 3) # x = 1:3 でも同じ
> y = c(1, 3, 2)
> r = cor(x, y) # r = 0.5 になる
> n = 3
> t = r * sqrt(n-2) / sqrt(1 - r^2) # t = 0.5773503
> 2 * pt(-t, n-2) # 0.6666667 と表示される
> cor.test(x, y)
Pearson's product-moment correlation
data: x and y
t = 0.5774, df = 1, p-value = 0.6667
alternative hypothesis: true correlation is not equal to 0
sample estimates:
cor
0.5
8.4 順位相関係数
f = function() {
k = runif(1)
a = runif(10); b = runif(10)
x = k * a + (1-k) * b; y = k * a - (1-k) * b
c(cor(x,y,method="kendall"), cor(x,y,method="spearman"))
}
r = replicate(1000, f())
plot(r[1,], r[2,], xlim=c(-1,1), ylim=c(-1,1), asp=1)
abline(0,1)
> t = cor(X, Y, method="kendall")
> a = replicate(10000, cor(X, sample(Y), method="kendall"))
> mean(abs(a) >= abs(t)) # 両側確率
f1 = function() { x = rnorm(40); y = rnorm(40); cor(x, y) }
f2 = function() { x = cumsum(rnorm(40)); y = cumsum(rnorm(40)); cor(x, y) }
r1 = replicate(1000000, f1()); hist(r1) # 左
r2 = replicate(1000000, f2()); hist(r2) # 右
第9章 回帰分析
9.1 最小2乗法
x = c(1,2,3,4)
y = c(2,3,5,4)
f = function(arg) {
a = arg[1]
b = arg[2]
t = a * x + b
sum((y - t)^2)
}
optim(c(1,1), f) # 初期値(a,b)=(1,1)から始めてfを最小化する
> optim(c(1,1), f)$par
[1] 0.7999722 1.5000885
> r = lm(y ~ x)
> summary(r)
Call:
lm(formula = y ~ x)
Residuals:
1 2 3 4
-0.3 -0.1 1.1 -0.7
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.5000 1.1619 1.291 0.326
x 0.8000 0.4243 1.886 0.200
Residual standard error: 0.9487 on 2 degrees of freedom
Multiple R-squared: 0.64, Adjusted R-squared: 0.46
F-statistic: 3.556 on 1 and 2 DF, p-value: 0.2
9.2 息抜き体操
x = rnorm(40); y = rnorm(40)
plot(x, y, pch=16, main=cor(x,y))
abline(lm(y ~ x))
a = rnorm(100); b = rnorm(100); c = rnorm(100)
x = (a + c) / sqrt(2)
y = (b + c) / sqrt(2)
plot(x, y, pch=16, xlim=c(-3,3), ylim=c(-3,3),
asp=1)
abline(lm(y ~ x))
abline(0, 1, lty=2)
> r = lm(y ~ x, weights=1/e^2) # 誤差が一定ならば r = lm(y ~ x)
> summary(r)
Call:
lm(formula = y ~ x, weights = 1/e^2)
Weighted Residuals:
1 2 3 4 5 6 7
-0.9404 0.6282 -0.2603 0.3495 -0.3084 0.2882 -0.3850
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.05207 0.03867 1.347 0.236
x 0.57022 0.04294 13.281 4.33e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.5992 on 5 degrees of freedom
Multiple R-squared: 0.9724, Adjusted R-squared: 0.9669
F-statistic: 176.4 on 1 and 5 DF, p-value: 4.327e-05
> abline(r)
> sum((r$residuals / e)^2)
> pchisq(1.795245, df=5)
r = lm(y ~ x - 1, weights=1/e^2)
9.4 ポアソン回帰
f = function(arg) {
a = arg[1]
b = arg[2]
lambda = a * x + b
-sum(y * log(lambda) - lambda)
}
optim(c(1,1), f)
glm(y ~ x, family=poisson(link="identity"))
9.5 ポアソン回帰と似た方法,等価な方法
f = function(arg) {
a = arg[1]
b = arg[2]
lambda = a * x + b
sum((y - lambda)^2 / y)
}
optim(c(1,1), f)
lm(y ~ x, weights=1/y)
f = function(arg) {
a = arg[1]
b = arg[2]
lambda = a * x + b
sum((y - lambda)^2 / lambda)
}
optim(c(1,1), f)
w = c(1,1,1,1) # 適当な初期値
for (i in 1:10) { # 収束するまで続ける
r = lm(y ~ x, weights=w)
lambda = predict(r)
print(c(as.numeric(r$coef), -sum(y*log(lambda)-lambda)))
w = 1 / lambda
}
weights=varPower(fixed=0.5)
library(nlme)
data = data.frame(y=y, x=x)
gnls(y ~ a * x + b, data=data,
start=list(a=0.9,b=1.3), weights=varPower(fixed=0.5))
9.6 ポアソン回帰のあてはまりの良さ
> x = c(1,2,3,4)
> y = c(2,3,5,4)
> r = glm(y ~ x, family=poisson(link="identity"))
> summary(r)
Call:
glm(formula = y ~ x, family = poisson(link = "identity"))
Deviance Residuals:
1 2 3 4
-0.11496 -0.03194 0.51015 -0.39066
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.2784 1.9766 0.647 0.518
x 0.8887 0.8141 1.092 0.275
(Dispersion parameter for poisson family taken to be 1)
Null deviance: 1.4716 on 3 degrees of freedom
Residual deviance: 0.4271 on 2 degrees of freedom
AIC: 16.779
Number of Fisher Scoring iterations: 5
x = 1:20
y = c(11,4,13,10,4,8,6,16,7,12,10,13,6,5,1,4,2,0,0,1)
d = dnorm(x, 10, 3)
e = exp(-x/10)
f = function(arg) {
a = arg[1]; b = arg[2]
mu = a * d + b * e
-sum(y * log(mu) - mu)
}
optim(c(50,10), f) # (50,10) は初期値
x = 1:20
y = c(11,4,13,10,4,8,6,16,7,12,10,13,6,5,1,4,2,0,0,1)
r = glm(y ~ dnorm(x,10,3) + exp(-x/10) - 1, family=poisson(link="identity"))
library(nlme)
data = data.frame(x, y)
r = gnls(y ~ a * dnorm(x,m,s) + b * exp(-x/10), data=data,
start=list(a=50,b=10,m=10,s=3),
weights=varPower(fixed=0.5),
control=list(nlsTol=1e-5))
Coefficients:
Value Std.Error t-value p-value
a 48.76802 15.070588 3.235973 0.0052
b 10.24538 2.025268 5.058778 0.0001
m 10.19476 0.654146 15.584844 0.0000
s 2.28875 0.669822 3.416955 0.0035
10.5 度数分布を使わないフィッティング
x = 1:20
y = c(11,4,13,10,4,8,6,16,7,12,10,13,6,5,1,4,2,0,0,1)
> x = rep(1:5, c(4,5,6,3,2))
> y = rep(1:5, c(1,4,3,6,6))
> t.test(x, y)
12.2 ウィルコクソン検定(順位和検定)
> x = rep(1:5, c(4,5,6,3,2))
> y = rep(1:5, c(1,4,3,6,6))
> wilcox.test(x, y)
Wilcoxon rank sum test with continuity correction
data: x and y
W = 123, p-value = 0.03435
alternative hypothesis: true location shift is not equal to 0
警告メッセージ:
wilcox.test.default(x, y) で:
タイがあるため、正確な p 値を計算することができません
> wilcox.test(x, y, exact=TRUE)
> library(exactRankTests)
Package 'exactRankTests' is no longer under development.
Please consider using package 'coin' instead.
> wilcox.exact(x, y)
Exact Wilcoxon rank sum test
data: x and y
W = 123, p-value = 0.03611
alternative hypothesis: true mu is not equal to 0
f = function() {
a = sample(40)
x = a[1:20]
y = a[21:40]
c(t.test(x, y, var.equal=TRUE)$p.value, wilcox.test(x, y)$p.value)
}
r = replicate(1000, f())
plot(r[1,], r[2,], asp=1)
abline(0, 1)
12.3 ブルンナー・ムンツェル検定
> library(lawstat)
> brunner.munzel.test(x, y)
Brunner-Munzel Test
data: x and y
Brunner-Munzel Test Statistic = 2.3138, df = 37.759, p-value = 0.02622
95 percent confidence interval:
0.5240416 0.8609584
sample estimates:
P(X<Y)+.5*P(X=Y)
0.6925