-
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpredictions.py
More file actions
42 lines (30 loc) · 1.33 KB
/
predictions.py
File metadata and controls
42 lines (30 loc) · 1.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import pandas as pd
import pickle
def predict_cutoffs(year, difficulty, reference_df, output_file="predictions.csv"):
with open("models/scaler.pkl", "rb") as f:
preprocessor = pickle.load(f)
with open("models/model.pkl", "rb") as f:
model = pickle.load(f)
# since I have dropped newer branches, I could remove this line of code and simply use itertools but this does the job so I'm letting it be.
prediction_data = reference_df[["campus", "branch"]].drop_duplicates().copy()
prediction_data["year"] = year
prediction_data["difficulty"] = difficulty
prediction_data_scaled = preprocessor.transform(prediction_data)
prediction_data["marks"] = model.predict(prediction_data_scaled).round().astype(int)
result = prediction_data[["campus", "branch", "marks", "year"]].sort_values(
"marks", ascending=False
)
result.to_csv(output_file, index=False)
return result
# I'm taking 0.2 -> best case, 0.5 -> most-likely and 0.8 -> worst case
# to manipulate
coefficient_of_difficulty = 0.8
df = pd.read_csv("data/model_data/cutoff_2025.csv")
predictions_2026 = predict_cutoffs(
year=2026,
difficulty=coefficient_of_difficulty,
reference_df=df,
output_file="predict/worst_case.csv",
)
print(predictions_2026)
predictions_2026.to_csv("predict/worst_case.csv", index=False)