-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
95 lines (78 loc) · 3.78 KB
/
app.py
File metadata and controls
95 lines (78 loc) · 3.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import streamlit as st
import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import OneHotEncoder
import seaborn as sns
import matplotlib.pyplot as plt
# Set Streamlit page configuration
st.set_page_config(page_title="Financial Aid Predictor", layout="centered")
st.header("📊 Data Visualizations")
df = pd.read_csv("cleanedd.csv")
with st.expander("View Pie Charts for Special Circumstances and Documents"):
# Pie Charts for specific categorical columns
pie_columns = ["Special Family Circumstances", "Loan", "FAID Missing Document"]
fig1, axes1 = plt.subplots(1, 3, figsize=(18, 6))
for i, col in enumerate(pie_columns):
data_counts = df[col].astype(str).value_counts()
axes1[i].pie(data_counts, labels=data_counts.index, autopct='%1.1f%%', colors=plt.cm.Pastel1.colors)
axes1[i].set_title(f"Distribution of {col}")
st.pyplot(fig1)
with st.expander("View Count Plots for Applicant Info"):
# Count Plots for categorical distribution
cat_plot_columns = ["LEVEL", "Nationality", "Marital Status", "Plan to Reside", "Father Status", "Mother Status"]
sns.set_style("whitegrid")
fig2, axes2 = plt.subplots(nrows=3, ncols=2, figsize=(12, 10))
axes2 = axes2.flatten()
for i, col in enumerate(cat_plot_columns):
sns.countplot(x=df[col], data=df, palette="pastel", ax=axes2[i])
axes2[i].set_title(f"Distribution of {col}")
axes2[i].set_xlabel("")
axes2[i].set_ylabel("Count")
axes2[i].tick_params(axis='x', rotation=45)
plt.tight_layout()
st.pyplot(fig2)
# Load model and encoder
model = joblib.load("gradient_boosting_model.pkl")
encoder_path = "onehot_encoder.pkl"
@st.cache_resource
def load_encoder():
df = pd.read_csv("cleanedd.csv") # path to your full dataset
encoder = OneHotEncoder(sparse=False, handle_unknown='ignore')
encoder.fit(df[categorical])
return encoder
# Define features
categorical = ['LEVEL', 'Nationality', 'Marital Status', 'Plan to Reside',
'Father Citizenship', 'Father Status', 'Mother Status', 'Travel Records',
'Car_Models', 'FAID_Record_owner']
numerical = ['Father Income Document', 'Mother Income Document', 'Number of Siblings @ AUB',
'Number of Dependents', 'Number of Properties', 'Total Estimated Value', 'Total Area',
'Total Tuition for siblings', 'Total Financial Assistance for siblings',
'Financial Assistant', 'Father_Total_Benefits', 'Mother_Total_Benefits',
'Number_of_Cars', 'Applicant Annual Income']
encoder = load_encoder()
# Streamlit UI
st.title("🎓 Financial Aid % Prediction")
st.markdown("Enter applicant information below to predict the **percentage of tuition** that will be awarded as financial aid.")
st.header("🧾 Applicant Information")
# Input collection
user_input = {}
for col in categorical:
options = encoder.categories_[categorical.index(col)]
user_input[col] = st.selectbox(col, options)
for col in numerical:
user_input[col] = st.number_input(col, min_value=0.0, step=1.0)
# Predict
if st.button("Predict"):
input_df = pd.DataFrame([user_input])
# One-hot encode
encoded_input = pd.DataFrame(encoder.transform(input_df[categorical]))
encoded_input.columns = encoder.get_feature_names_out(categorical)
# Combine with numeric
final_input = pd.concat([input_df[numerical], encoded_input], axis=1)
final_input = final_input.apply(pd.to_numeric, errors='coerce').fillna(0)
# Predict using regression model
prediction = model.predict(final_input)[0]
# Display result
st.subheader("🎯 Prediction Result")
st.success(f"The estimated financial aid award is: **{prediction:.2f}%** of tuition.")