Introduction-to-Data-Science-Course/Understanding Statistics Coding Challenge.py at master · sivacharansrc/Introduction-to-Data-Science-Course · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# import required libraries
import pandas as pd
import numpy as np

scores = [29, 27, 14, 23, 29, 10]

# find the mean of all items of the list 'scores'
np.mean(scores)

# find the median of all items of the list 'scores'
np.median(scores)

# Find the mode of the list fruits
from statistics import mode
fruits = ['apple', 'grapes', 'orange', 'apple']

# find mode of the list 'fruits'
mode(fruits)


from random import sample
data = sample(range(1, 100), 50)    # generating a list 50 random integers

# find variance of data
np.var(data)


# find standard deviation
np.std(data)
(np.var(data))**0.5

# read data_python.csv using pandas
project_dir = "C:/Users/sivac/Documents/Python Projects/Introduction to Data Science Course"
path = project_dir+"/Data Files/Understanding Statistics/data_statistics.csv"
mydata = pd.read_csv(path)

# print first few rows of mydata
mydata.head()

# plot histogram for 'Item_Outlet_Sales'
plt.hist(mydata['Item_Outlet_Sales'])
plt.show()

# increadse no. of bins to 20
plt.hist(mydata['Item_Outlet_Sales'], bins=20)
plt.show()

# find mean and median of 'Item_Weight'
np.mean(mydata['Item_MRP']), np.median(mydata['Item_MRP'])

# find mode of 'Outlet_Size'
mydata['Outlet_Size'].mo
mode(mydata['Outlet_Size'])

# frequency table of 'Outlet_Type'
mydata['Outlet_Type'].value_counts()

# mean of 'Item_Outlet_Sales' for 'Supermarket Type2' outlet type
np.mean(mydata['Item_Outlet_Sales'][mydata['Outlet_Type'] == 'Supermarket Type2'])


# mean of 'Item_Outlet_Sales' for 'Supermarket Type3' outlet type
np.mean(mydata['Item_Outlet_Sales'][mydata['Outlet_Type'] == 'Supermarket Type3'])

# 2 sample independent t-test
from scipy import stats
stats.ttest_ind(mydata['Item_Outlet_Sales'][mydata['Outlet_Type'] == 'Supermarket Type2'], mydata['Item_Outlet_Sales'][mydata['Outlet_Type'] == 'Supermarket Type3'])