-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstreamlit_app.py
More file actions
120 lines (92 loc) · 3.46 KB
/
streamlit_app.py
File metadata and controls
120 lines (92 loc) · 3.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
df=pd.read_csv("train.csv")
st.title("Projet de classification binaire Titanic")
st.sidebar.title("Sommaire")
pages=["Exploration", "DataVizualization", "Modélisation"]
page=st.sidebar.radio("Aller vers", pages)
if page == pages[0] :
st.write("### Introduction")
st.dataframe(df.head(10))
st.write(df.shape)
st.dataframe(df.describe())
if st.checkbox("Afficher les NA") :
st.dataframe(df.isna().sum())
if page == pages[1] :
st.write("### DataVizualization")
fig = plt.figure()
sns.countplot(x = 'Survived', data = df)
st.pyplot(fig)
fig = plt.figure()
sns.countplot(x = 'Sex', data = df)
plt.title("Répartition du genre des passagers")
st.pyplot(fig)
fig = plt.figure()
sns.countplot(x = 'Pclass', data = df)
plt.title("Répartition des classes des passagers")
st.pyplot(fig)
fig = sns.displot(x = 'Age', data = df)
plt.title("Distribution de l'âge des passagers")
st.pyplot(fig)
fig = plt.figure()
sns.countplot(x = 'Survived', hue='Sex', data = df)
st.pyplot(fig)
fig = sns.catplot(x='Pclass', y='Survived', data=df, kind='point')
st.pyplot(fig)
fig = sns.lmplot(x='Age', y='Survived', hue="Pclass", data=df)
st.pyplot(fig)
fig, ax = plt.subplots()
sns.heatmap(df.select_dtypes('number').corr(), ax=ax, cmap='RdBu_r')
st.write(fig)
if page == pages[2] :
st.write("### Modélisation")
df = df.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1)
y = df['Survived']
X_cat = df[['Pclass', 'Sex', 'Embarked']]
X_num = df[['Age', 'Fare', 'SibSp', 'Parch']]
for col in X_cat.columns:
X_cat[col] = X_cat[col].fillna(X_cat[col].mode()[0])
for col in X_num.columns:
X_num[col] = X_num[col].fillna(X_num[col].median())
X_cat_scaled = pd.get_dummies(X_cat, columns=X_cat.columns)
X = pd.concat([X_cat_scaled, X_num], axis = 1)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train[X_num.columns] = scaler.fit_transform(X_train[X_num.columns])
X_test[X_num.columns] = scaler.transform(X_test[X_num.columns])
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
def prediction(classifier):
if classifier == 'Random Forest':
clf = RandomForestClassifier()
elif classifier == 'SVC':
clf = SVC()
elif classifier == 'Logistic Regression':
clf = LogisticRegression()
clf.fit(X_train, y_train)
return clf
def scores(clf, choice):
if choice == 'Accuracy':
return clf.score(X_test, y_test)
elif choice == 'Confusion matrix':
return confusion_matrix(y_test, clf.predict(X_test))
choix = ['Random Forest', 'SVC', 'Logistic Regression']
option = st.selectbox('Choix du modèle', choix)
st.write('Le modèle choisi est :', option)
clf = prediction(option)
import joblib
joblib.dump(clf, "model.joblib")
import pickle
pickle.dump(clf, open("model.pickle", 'wb'))
display = st.radio('Que souhaitez-vous montrer ?', ('Accuracy', 'Confusion matrix'))
if display == 'Accuracy':
st.write(scores(clf, display))
elif display == 'Confusion matrix':
st.dataframe(scores(clf, display))