diff --git a/app/static/__init__.py b/app/static/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/static/conf_mat.py b/app/static/conf_mat.py new file mode 100644 index 0000000..9f7bb91 --- /dev/null +++ b/app/static/conf_mat.py @@ -0,0 +1,211 @@ +""" +conf_mat.py + +Script to generate confusion matrix png files +""" + +from pathlib import Path +from sklearn.pipeline import Pipeline +from sklearn.model_selection import train_test_split +from sklearn.metrics import confusion_matrix +from models.download_from_hf import download +import matplotlib.pyplot as plt +import seaborn as sns +import pandas as pd +import numpy as np +import joblib +import os + +MODEL_DIR = Path("models") +PIPE_PATH = Path("models","pipe.pkl") +COLUMNS_PATH = Path("models","column_names.pkl") +CONF_MAT_PATH = Path("app","static","materials","confusion_mat.png") + +def initialize_artifacts() -> Pipeline: + """ + Checks if model artifacts exist. If not, runs the training script. + """ + # 1. Ensure the model directory exists + os.makedirs(MODEL_DIR, exist_ok=True) + + # 2. Check for missing files + pipe_exists = os.path.exists(PIPE_PATH) + columns_exists = os.path.exists(COLUMNS_PATH) + + if not pipe_exists or not columns_exists: + print("--- MODEL ARTIFACTS MISSING ---") + if not pipe_exists: + print(f"Missing: {PIPE_PATH}") + if not columns_exists: + print(f"Missing: {COLUMNS_PATH}") + + print("Downloading the saved models from Hugging Face... This may take a moment.") + try: + # Run the `download` function from `models/download_from_hf.py` + download() + print("Download complete. Artifacts generated successfully.") + print("---------------------------------") + except Exception as e: + print(f"\nFATAL: Error during self-heal downloading: {e}") + print("Application cannot start without model artifacts. Exitting......") + exit(1) # Exit if training fails + else: + print("Model artifacts found. Loading...") + pipe = joblib.load(PIPE_PATH) + print("Model artifacts are loaded. Ready for prediction 🚀") + return pipe + +def get_window(camps, campaign_dates): + if pd.isna(camps) or not camps: + return np.nan, np.nan + + camps = str(camps).split(',') if isinstance(camps, str) else camps + starts, ends = [], [] + + for c in camps: + try: + camp_num = int(c.strip()) + if camp_num in campaign_dates: + start, end = campaign_dates[camp_num] + starts.append(start) + ends.append(end) + except (ValueError, KeyError): + continue + + return (min(starts) if starts else np.nan, max(ends) if ends else np.nan) + + +def load_and_prepare_data(): + # Load Kepler dataset + df_raw = pd.read_csv("data/kepler_data.csv", comment="#") + feature_list = [ + "koi_disposition", "koi_period", "koi_time0bk", "koi_depth", "koi_prad", + "koi_sma", "koi_incl", "koi_teq", "koi_insol", "koi_impact", + "koi_ror", "koi_srho", "koi_dor", "koi_num_transits" + ] + df_1 = df_raw[feature_list].copy() + + # Load K2 dataset + df_2 = pd.read_csv("data/k2_data.csv", comment="#") + + # Define campaign windows + campaign_dates = { + 0: (2456725.0, 2456805.0), 1: (2456808.0, 2456891.0), 2: (2456893.0, 2456975.0), + 3: (2456976.0, 2457064.0), 4: (2457065.0, 2457159.0), 5: (2457159.0, 2457246.0), + 6: (2457250.0, 2457338.0), 7: (2457339.0, 2457420.0), 8: (2457421.0, 2457530.0), + 9: (2457504.0, 2457579.0), 10: (2457577.0, 2457653.0), 11: (2457657.0, 2457732.0), + 12: (2457731.0, 2457819.0), 13: (2457820.0, 2457900.0), 14: (2457898.0, 2457942.0), + 15: (2457941.0, 2458022.0), 16: (2458020.0, 2458074.0), 17: (2458074.0, 2458176.0), + 18: (2458151.0, 2458201.0), 19: (2458232.0, 2458348.0) + } + + # Add observation window + df_2['campaigns'] = df_2['k2_campaigns'] + df_2[['obs_start_bjd', 'obs_end_bjd']] = df_2['campaigns'].apply( + lambda x: pd.Series(get_window(x, campaign_dates)) + ) + + # Transit counting + df_2['n_min'] = np.ceil((df_2['obs_start_bjd'] - df_2['pl_tranmid']) / df_2['pl_orbper']) + df_2['n_max'] = np.floor((df_2['obs_end_bjd'] - df_2['pl_tranmid']) / df_2['pl_orbper']) + df_2['num_transits'] = (df_2['n_max'] - df_2['n_min'] + 1).clip(lower=0) + + # Select and rename columns + df_2 = df_2[ + ["disposition", "pl_orbper", "pl_tranmid", "pl_trandep", "pl_rade", + "pl_orbsmax", "pl_orbincl", "pl_eqt", "pl_insol", "pl_imppar", + "pl_ratror", "pl_dens", "pl_ratdor", "num_transits"] + ] + + mapping = { + "disposition": "koi_disposition", "pl_orbper": "koi_period", "pl_tranmid": "koi_time0bk", + "pl_trandep": "koi_depth", "pl_rade": "koi_prad", "pl_orbsmax": "koi_sma", + "pl_orbincl": "koi_incl", "pl_eqt": "koi_teq", "pl_insol": "koi_insol", + "pl_imppar": "koi_impact", "pl_ratror": "koi_ror", "pl_dens": "koi_srho", + "pl_ratdor": "koi_dor", "num_transits": "koi_num_transits" + } + df_2 = df_2.rename(columns=mapping) + + # Combine both datasets + df = pd.concat([df_1, df_2]) + + # Prepare input/output + X = df.iloc[:, 1:].to_numpy() + y = df["koi_disposition"].map({ + "FALSE POSITIVE": 0, "CANDIDATE": 1, "CONFIRMED": 2, "REFUTED": 0 + }).to_numpy() + + return X, y, df.columns[1:] + +def main() -> None: + pipe = initialize_artifacts() + x,y,column_names = load_and_prepare_data() + + x_train, x_test, y_train, y_test = train_test_split( + x, y, test_size=1/3, shuffle=True, random_state=91, stratify=y + ) + + labels = ["FALSE POSITIVE","CANDIDATE","CONFIRMED"] + y_true = y_test + y_pred = pipe.predict(x_test) + + cm = confusion_matrix(y_true,y_pred) + + # Custom cosmic dark theme + plt.style.use('dark_background') + + fig, ax = plt.subplots(figsize=(12, 12)) + fig.patch.set_facecolor('#0a0a0a') + ax.set_facecolor('#0a0a0a') + + # Custom colormap - cosmic blue to cyan + cmap = sns.color_palette([ + '#0a0a0a', + '#001a2c', + '#003355', + '#004d7a', + '#0066a3', + '#0080cc', + '#0099f5', + '#00b3ff' + ], as_cmap=True) + + # Create heatmap with custom styling + sns.heatmap( + cm, + xticklabels=labels, + yticklabels=labels, + annot=True, + fmt="d", + square=True, + cmap=cmap, + cbar_kws={'label': 'Count', 'shrink': 1.0}, + linewidths=4, + linecolor='#1a1a1a', + ax=ax, + annot_kws={'size': 32, 'weight': 'bold', 'color': '#ffffff'} + ) + + # Customize appearance + ax.set_xlabel('Predicted', fontsize=22, color='#ffffff', fontweight='bold', labelpad=25) + ax.set_ylabel('Actual', fontsize=22, color='#ffffff', fontweight='bold', labelpad=25) + ax.set_title('Confusion Matrix', fontsize=36, color='#00BCFF', fontweight='bold', pad=40) + + # Style tick labels + ax.tick_params(axis='both', colors='#a0a0a0', labelsize=18) + ax.set_xticklabels(ax.get_xticklabels(), rotation=0, ha='center') + ax.set_yticklabels(ax.get_yticklabels(), rotation=0, ha='right') + + # Style colorbar + cbar = ax.collections[0].colorbar + cbar.ax.yaxis.set_tick_params(color='#a0a0a0') + cbar.outline.set_edgecolor('#1a1a1a') + plt.setp(plt.getp(cbar.ax.axes, 'yticklabels'), color='#a0a0a0', size=16) + + plt.tight_layout() + plt.savefig(CONF_MAT_PATH, dpi=300, facecolor='#0a0a0a', edgecolor='none', bbox_inches='tight', pad_inches=0.1) + plt.close() + print(f"Confusion Matrix is successfully saved at {str(CONF_MAT_PATH)}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/app/static/css/style.css b/app/static/css/style.css index cfe57e6..373c5a3 100644 --- a/app/static/css/style.css +++ b/app/static/css/style.css @@ -15,7 +15,7 @@ --glass-border: rgba(255, 255, 255, 0.15); /* Spacing & Layout */ - --container-width: 1200px; + --container-width: 1400px; --header-height: 80px; --radius-lg: 24px; --radius-md: 12px; @@ -819,6 +819,175 @@ h1, h2, h3, h4, h5, h6 { background: rgba(255, 255, 255, 0.02); } +/* ========================================= + Performance Tab Styles + ========================================= */ +.performance-section { + padding: 4rem 0; +} + +.performance-section .glass-card { + max-width: 1300px; + width: 100%; + margin: 0 auto; +} + +.performance-grid { + display: flex; + flex-direction: column; + gap: 4rem; + align-items: center; + width: 100%; +} + +@media (max-width: 1200px) { + .performance-grid { + grid-template-columns: 1fr; + } +} + +.cm-container { + background: var(--bg-card); + border: 1px solid var(--glass-border); + border-radius: var(--radius-lg); + padding: 3rem; + text-align: center; + width: 100%; + display: flex; + flex-direction: column; + align-items: center; +} + +.section-subtitle { + color: var(--accent-primary); + margin-bottom: 2rem; + font-size: 1.5rem; + position: relative; + z-index: 1; +} + +.confusion-matrix { + width: 100%; + max-width: 880px; + height: auto; + border-radius: var(--radius-md); + box-shadow: 0 20px 60px rgba(0, 0, 0, 0.4), 0 0 30px rgba(0, 188, 255, 0.1); + transition: var(--transition-smooth); + background: #0a0a0a; +} + +.confusion-matrix:hover { + transform: scale(1.02); + box-shadow: 0 30px 70px rgba(0, 0, 0, 0.5), 0 0 40px rgba(0, 188, 255, 0.2); +} + +.metrics-container { + display: flex; + flex-direction: column; + gap: 2rem; + width: 100%; + max-width: 1000px; +} + +.metrics-container .section-subtitle { + margin-bottom: 0.5rem; +} + +.accuracy-card { + background: linear-gradient(135deg, rgba(0, 188, 255, 0.1), rgba(0, 119, 255, 0.1)); + border: 1px solid var(--glass-border); + border-radius: var(--radius-md); + padding: 2rem; + text-align: center; +} + +.accuracy-value { + font-size: 3.5rem; + font-weight: 700; + color: var(--accent-primary); + font-family: 'Lexend Deca', sans-serif; + line-height: 1; +} + +.accuracy-label { + color: var(--text-muted); + margin-top: 0.5rem; + font-size: 1rem; +} + +.metrics-table { + width: 100%; + border-collapse: collapse; + background: var(--bg-card); + border: 1px solid var(--glass-border); + border-radius: var(--radius-md); + overflow: hidden; + font-size: 0.9rem; +} + +.metrics-table th, +.metrics-table td { + padding: 1rem; + text-align: center; + border-bottom: 1px solid var(--border-color); +} + +.metrics-table th { + background: rgba(0, 188, 255, 0.1); + color: var(--accent-primary); + font-weight: 600; + font-size: 0.85rem; + text-transform: uppercase; + letter-spacing: 0.5px; +} + +.metrics-table td:first-child { + text-align: left; +} + +.metrics-table tbody tr:hover td { + background: rgba(255, 255, 255, 0.02); +} + +.metrics-table tfoot td { + background: rgba(0, 188, 255, 0.05); + color: var(--text-muted); + font-size: 0.85rem; + border-bottom: none; +} + +.model-info { + background: var(--bg-card); + border: 1px solid var(--glass-border); + border-radius: var(--radius-md); + padding: 1.5rem; +} + +.model-info h4 { + color: var(--accent-primary); + margin-bottom: 1rem; + font-size: 1.1rem; +} + +.model-info p { + color: var(--text-muted); + margin-bottom: 0.5rem; +} + +.model-info ul { + list-style: none; + color: var(--text-muted); + font-size: 0.9rem; +} + +.model-info li { + margin-bottom: 0.3rem; +} + +.model-info li strong { + color: var(--text-main); +} + /* Notification System */ #notification-container { position: fixed; diff --git a/app/static/materials/confusion_mat.png b/app/static/materials/confusion_mat.png new file mode 100644 index 0000000..f035e14 Binary files /dev/null and b/app/static/materials/confusion_mat.png differ diff --git a/app/templates/index.html b/app/templates/index.html index 7693b1d..aca2d68 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -39,6 +39,9 @@

+ About @@ -224,6 +227,84 @@

CSV File Requirements

+ +
+
+
+
+

Model Performance

+

Evaluation metrics and confusion matrix of the stacking ensemble classifier.

+
+ +
+
+

Confusion Matrix

+ Confusion Matrix +
+ +
+

Classification Metrics

+ +
+
0.75
+
Overall Accuracy
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ClassPrecisionRecallF1-ScoreSupport
FALSE POSITIVE0.820.810.821718
CANDIDATE0.560.550.561118
CONFIRMED0.790.810.801687
Weighted Avg: Precision 0.74 | Recall 0.75 | F1 0.75
+ +
+

Model Architecture

+

Stacking Ensemble with:

+
    +
  • Base Models: Random Forest (1000 trees), XGBoost
  • +
  • Meta-Classifier: Logistic Regression (SAGA, L2)
  • +
  • Preprocessing: StandardScaler + SMOTE
  • +
+
+
+
+
+
+
+