-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathapp.py
More file actions
65 lines (54 loc) · 3 KB
/
app.py
File metadata and controls
65 lines (54 loc) · 3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import streamlit as st
import pandas as pd
from src.components.data_ingestion import DataIngestion
from src.components.data_transformation import DataTransformation
from src.components.model_trainer import ModelTrainer
from src.logger import logging
def main():
st.title("Automated Machine Learning Pipeline")
uploaded_file = st.file_uploader("Upload your input CSV file", type=["csv"])
problem_type = st.selectbox("Select Problem Type", ["classification", "regression", "clustering"])
target_column_name = st.text_input("Enter the Target Column Name")
if uploaded_file is not None and problem_type and target_column_name:
df = pd.read_csv(uploaded_file)
st.write("Uploaded Data:")
st.write(df)
if st.button("Run Analysis"):
try:
# Data Ingestion
data_ingestion = DataIngestion()
st.info("Starting data ingestion...")
train_data, test_data, eda_report_path = data_ingestion.initiate_data_ingestion(df)
st.success("Data Ingestion Completed")
# Display EDA Report
with open(eda_report_path, "r") as f:
st.download_button("Download EDA Report", f, file_name="eda_report.html")
# Data Transformation
data_transformation = DataTransformation()
st.info("Starting data transformation...")
train_arr, test_arr, _ = data_transformation.initiate_data_transformation(train_data, test_data, problem_type, target_column_name)
st.success("Data Transformation Completed")
# Model Training
model_trainer = ModelTrainer()
st.info("Starting model training...")
try:
best_model_name, best_model_score, model_report = model_trainer.initiate_model_trainer(train_arr, test_arr, problem_type)
st.success("Model Training Completed")
st.subheader("Model Comparison")
# Display model comparison
if problem_type in ['regression', 'classification']:
comparison_df = pd.DataFrame.from_dict(model_report, orient='index', columns=['Train Score', 'Test Score'])
else: # clustering
comparison_df = pd.DataFrame.from_dict(model_report, orient='index', columns=['Silhouette Score'])
st.table(comparison_df)
st.subheader("Best Model")
st.write(f"Best Model: {best_model_name}")
st.write(f"Best Model Score: {best_model_score}")
except Exception as e:
st.error(f"An error occurred during model training: {str(e)}")
logging.error(f"Error in model training: {str(e)}")
except Exception as e:
st.error(f"An error occurred: {str(e)}")
logging.error(f"Error: {str(e)}")
if __name__ == "__main__":
main()