-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathestimator.py
More file actions
93 lines (77 loc) · 3.78 KB
/
estimator.py
File metadata and controls
93 lines (77 loc) · 3.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import time
import random, math
from random import randint
# ++++++++++++++++++++++++++++++++++++++++++++++++++
class Estimator():
def __init__(self, default=40):
self.set = self.dataset(samples=default)
self.mean_median = [[], []]
def __del__(self):
del self
def __str__(self):
return "Class of error estimator using bootstrap method."
def dataset(self, samples=40):
return [randint(30, 170) for s in range(samples)]
# step 2
def pick(self, samples=40):
return sorted([self.set[randint(0,s)] for s in range(samples)])
def calculate_mean(self, resampled):
return sum(resampled) / len(resampled)
def calculate_median(self, resampled):
if len(resampled) % 2 == 1:
return resampled[resampled//2]
elif len(resampled) % 2 == 0:
if resampled[len(resampled)//2] != resampled[1+len(resampled)//2]:
return resampled[len(resampled)//2] * 0.5 + resampled[1+len(resampled)//2] * 0.5
else:
return resampled[len(resampled)//2]
def calculate_SD(self, set):
# here we calculate standard error like in regression algorithme formula.
avg = self.calculate_mean(sorted(set))
N = len(set); tmp = 0
for i in set:
tmp += (i - avg)**2
sd = math.sqrt(tmp/N)
return sd
def small_centile(self, l):
return l[0] +(-1.96) * l[1]
def big_centile(self, l):
return l[0] + (1.96) * l[1]
if __name__ == '__main__':
estim = Estimator()
counter = 0
maximum = 10000
print("+++++++++++++++++++++++++++++++++++++++++")
print("step [1] := DATASET GENERATION")
print("-dataset : ",estim.set, "\n-size : ",len(estim.set))
print("step [5] : REPEAT STEP 2 though 4 10000 times (start of loop)")
while counter < maximum:
print(" +++++++++ {} ++++++++".format(counter))
print("\n step [2,3] := RANDOM PICK UP FOR ALL SAMPLES(default=40)")
print(" -picked up : ",estim.pick())
print("\n step [4] := CAULCULATE our SAMPLE STATISTIC(mean and median)")
estim.mean_median[0].append(estim.calculate_mean(estim.pick())) # mean
estim.mean_median[1].append(estim.calculate_median(estim.pick())) # median
print("- mean : ", estim.mean_median[0][-1], "\n-median : ", estim.mean_median[1][-1])
counter += 1
print("\nstep [6] := CAULCULATE STANDARD DEVIATION of distribution of the 10,000 means and medians")
print("-SD of mean : ", estim.calculate_SD(estim.mean_median[0]))
print("-SD of median : ", estim.calculate_SD(estim.mean_median[1]))
print("\nstep [7] := CAULCULATE 2.5th and 97.5th centiles of the 10,000 means and medians")
# I done some reseach about The percentile , i found the formula and the general table of Z.
# formula x = avg + Z * median/ abd we deduce the Z.
# 1th - 99th : -/+2.326
# 2th - 97.5th : -/+1.960
# 5th- 95th : -/+1.645
# 10th- 90th : -/+1.282
# 25th- 75th : -/+0.675
# 50th- 50th : 0
mean_of_mean = estim.calculate_mean(estim.mean_median[0])
mean_of_median = estim.calculate_mean(estim.mean_median[1])
sd_of_mean = estim.calculate_SD(estim.mean_median[0])
sd_of_median = estim.calculate_SD(estim.mean_median[1])
print(mean_of_mean, sd_of_mean)
print(mean_of_median, sd_of_median)
print("-2.5th and 97.5th centile of 10,000 means : {} and {}".format(estim.small_centile([mean_of_mean, sd_of_mean]), estim.big_centile([mean_of_mean, sd_of_mean])))
print("-2.5th and 97.5th centile of 10,000 medians : {} and {}".format(estim.small_centile([mean_of_median , sd_of_median]), estim.big_centile([mean_of_median , sd_of_median])))
print("+++++++++++++++++++++++++++++++++++++++++")