-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathFinal Coding Challenge.py
More file actions
59 lines (38 loc) · 1.45 KB
/
Final Coding Challenge.py
File metadata and controls
59 lines (38 loc) · 1.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import pandas as pd
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)
pd.set_option('expand_frame_repr', False)
proj_dir = "C:/Users/sivac/Documents/Python Projects/Introduction to Data Science Course/"
path = proj_dir + "Data Files/Predictive Modeling and Machine Learning/titanic.csv"
df = pd.read_csv(path)
# Number of columns and rows
df.shape
# How many columns as int64
df.dtypes[df.dtypes == 'int64']
df.dtypes.value_counts()
# Missing values in Age
df['Age'].isnull().value_counts()
# Mean of Age
df['Age'].mean()
# Mean Age of Male Passengers
df['Age'][df['Sex'] == 'male'].mean()
# Correlation between Fare and Survived
df.corr()
# Percentage of female that survived
len(df[(df['Sex'] == 'female') & (df['Survived'] == 1)]) / len(df[(df['Sex'] == 'female')])
# Number of passengers who survived and who did not having PClass as 2
df['Survived'][df['Pclass'] == 2].value_counts()
# Median fare of the ticket for PClass 1, 2, 3
df.groupby('Pclass').agg({'Fare': 'median'})
# Missing Values in Embarked
df['Embarked'].head()
# Percentage of missing values in Cabin
df['Cabin'].isnull().value_counts()[1] / df['Cabin'].__len__()
# 95th percentile of Fare variable
df['Fare'].quantile(0.95)
# Number of passengers paid more than 250
df[df['Fare'] > 250].__len__()
# Distribution of Fare is right Skewed?
df['Fare'].plot.hist()
# Does Age and Fare have Outliers? Use box plot - Ans: Both have outliers
df[['Fare', 'Age']].plot.box()