yh-machine-learning/streamlit_app.py at master · yoshan0921/yh-machine-learning · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import streamlit as st
import numpy as np
import pandas as pd
import joblib


def render_sidebar():
    with st.sidebar:
        st.image("./images/PenguinClassifier_transparent.png")
        st.info(
            "The purpose of this application is to provide a simple experience of the process of creating an ML model and releasing a web application that uses that model."
        )

        st.page_link("streamlit_app.py", label="Predict", icon=":material/smart_toy:")
        st.page_link(
            "pages/learningdata_visualization.py",
            label="Learning Data",
            icon=":material/database:",
        )

# Create container
container = st.container(border=True)

# Input parameters fileds
container.header("Input features")
sex = container.selectbox("Sex", ("male", "female"))
island = container.selectbox(
    "Island",
    (
        "Biscoe",
        "Dream",
        "Torgersen",
    ),
)
bill_length_mm = container.slider("Bill length (mm)", 32.1, 59.6, 43.9)
bill_depth_mm = container.slider("Bill depth (mm)", 13.1, 21.5, 17.2)
flipper_length_mm = container.slider("Flipper length (mm)", 172.0, 231.0, 201.0)
body_mass_g = container.slider("Body mass (g)", 2700.0, 6300.0, 4207.0)

# Create Dataframe for the input features
data = {
    "island": island,
    "bill_length_mm": bill_length_mm,
    "bill_depth_mm": bill_depth_mm,
    "flipper_length_mm": flipper_length_mm,
    "body_mass_g": body_mass_g,
    "sex": sex,
}
input_df = pd.DataFrame(data, index=[0])

# Data encoding for category variables
encode = ["island", "sex"]
input_encoded_df = pd.get_dummies(input_df, prefix=encode)

# Ensure all dummy variables used during model training are present in this order
expected_columns = [
    "bill_length_mm",
    "bill_depth_mm",
    "flipper_length_mm",
    "body_mass_g",
    "island_Biscoe",
    "island_Dream",
    "island_Torgersen",
    "sex_female",
    "sex_male",
]

# Add missing category variables as columns with 0 value
for col in expected_columns:
    if col not in input_encoded_df.columns:
        input_encoded_df[col] = False

# Reorder df_penguins in line with expected_columns
input_encoded_df = input_encoded_df[expected_columns]

# Load the model
clf = joblib.load("penguin_classifier_model.pkl")

# Execute prediction
prediction = clf.predict(input_encoded_df)
prediction_proba = clf.predict_proba(input_encoded_df)
prediction_proba = [n * 100 for n in prediction_proba]

# Display prediction result
st.write("## 🐧Prediction results")
penguins_species = np.array(["Adelie", "Chinstrap", "Gentoo"])
st.success(str(penguins_species[prediction][0]))

# Display prediction probability
df_prediction_proba = pd.DataFrame(prediction_proba)
df_prediction_proba.columns = ["Adelie", "Chinstrap", "Gentoo"]
df_prediction_proba.rename(columns={0: "Adelie", 1: "Chinstrap", 2: "Gentoo"})
st.dataframe(
    df_prediction_proba,
    column_config={
        "Adelie": st.column_config.ProgressColumn(
            "Adelie", format="%d %%", min_value=0, max_value=100
        ),
        "Chinstrap": st.column_config.ProgressColumn(
            "Chinstrap", format="%d %%", min_value=0, max_value=100
        ),
        "Gentoo": st.column_config.ProgressColumn(
            "Gentoo", format="%d %%", min_value=0, max_value=100
        ),
    },
    hide_index=True,
    width=704,
)

# Custom CSS for expanding label font size
st.markdown(
    """
    <style>
    h2, h3 {
        font-size: 1.25rem !important;
    }
    </style>
    """,
    unsafe_allow_html=True,
)

# Display sidebar
if __name__ == "__main__":
    render_sidebar()