-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathUnderstanding Statistics Coding Challenge.py
More file actions
68 lines (46 loc) · 1.8 KB
/
Understanding Statistics Coding Challenge.py
File metadata and controls
68 lines (46 loc) · 1.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# import required libraries
import pandas as pd
import numpy as np
scores = [29, 27, 14, 23, 29, 10]
# find the mean of all items of the list 'scores'
np.mean(scores)
# find the median of all items of the list 'scores'
np.median(scores)
# Find the mode of the list fruits
from statistics import mode
fruits = ['apple', 'grapes', 'orange', 'apple']
# find mode of the list 'fruits'
mode(fruits)
from random import sample
data = sample(range(1, 100), 50) # generating a list 50 random integers
# find variance of data
np.var(data)
# find standard deviation
np.std(data)
(np.var(data))**0.5
# read data_python.csv using pandas
project_dir = "C:/Users/sivac/Documents/Python Projects/Introduction to Data Science Course"
path = project_dir+"/Data Files/Understanding Statistics/data_statistics.csv"
mydata = pd.read_csv(path)
# print first few rows of mydata
mydata.head()
# plot histogram for 'Item_Outlet_Sales'
plt.hist(mydata['Item_Outlet_Sales'])
plt.show()
# increadse no. of bins to 20
plt.hist(mydata['Item_Outlet_Sales'], bins=20)
plt.show()
# find mean and median of 'Item_Weight'
np.mean(mydata['Item_MRP']), np.median(mydata['Item_MRP'])
# find mode of 'Outlet_Size'
mydata['Outlet_Size'].mo
mode(mydata['Outlet_Size'])
# frequency table of 'Outlet_Type'
mydata['Outlet_Type'].value_counts()
# mean of 'Item_Outlet_Sales' for 'Supermarket Type2' outlet type
np.mean(mydata['Item_Outlet_Sales'][mydata['Outlet_Type'] == 'Supermarket Type2'])
# mean of 'Item_Outlet_Sales' for 'Supermarket Type3' outlet type
np.mean(mydata['Item_Outlet_Sales'][mydata['Outlet_Type'] == 'Supermarket Type3'])
# 2 sample independent t-test
from scipy import stats
stats.ttest_ind(mydata['Item_Outlet_Sales'][mydata['Outlet_Type'] == 'Supermarket Type2'], mydata['Item_Outlet_Sales'][mydata['Outlet_Type'] == 'Supermarket Type3'])