-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodel_precision.py
More file actions
executable file
·103 lines (85 loc) · 3.31 KB
/
model_precision.py
File metadata and controls
executable file
·103 lines (85 loc) · 3.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score
from estimate import estimate
from src.config import load_filenames, load_data
from src.ascii_format import INFO, ERROR, DONE, BG_YELLOW, RED, RESET
from typing import Any
from sys import stderr
"""
* Mean Squared Error (MSE)
- MSE is the average of the squared differences between predicted and
actual values.
- Lower MSE indicates better model performance.
- The unit of MSE is the square of the target variable
(e.g., square of price).
* Root Mean Squared Error (RMSE)
- RMSE is the square root of MSE, making it easier to interpret because
it has the same unit as the target variable (e.g., price in dollars).
- An RMSE of 667.57 means that, on average, the model's predictions
are off by about 667.57 units.
- Whether this is "good" depends on the range and scale of your
target variable.
- For example:
- If car prices in the dataset range between $3,000 and $8,000, an error
of $667 is ~10% of the range, which might be acceptable.
- If prices are closer together (e.g., $5,000-$6,000), this error may be
too high.
* R-squared (R²)
- R2 measures how well the model explains the variance in the data.
- A value of 0.733 means the model explains 73.3% of the variability in
the target variable.
- R2 ranges from 0 to 1:
- 1 indicates a perfect model.
- 0 means the model does no better than simply predicting the mean of
the target.
- For many real-world problems, an R2R2 value above 0.7 is considered "good",
but this depends on the field:
- In social sciences, even 0.3 might be acceptable.
- In physical sciences, models often achieve R² > 0.9.
"""
def calculate_precision(y_target: float, y_pred: float) -> Any:
"""
Calculate precision metrics: MSE, RMSE, and R².
Parameters
----------
- The target y value
- The predicted y value
Returns
-------
- The precision values
"""
mse = mean_squared_error(y_target, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_target, y_pred)
r2 = max(0, r2) # r2 has a minimum value of 0
return mse, rmse, r2
def main():
print(f"{INFO} This program will calculate the precision of the model.")
print(f"{INFO} Loading data...")
# Load filenames from the configuration file
filenames = load_filenames()
# Unpack the filenames into two separate variables
if len(filenames) >= 2:
thetaset_filename, dataset_filename = filenames
else:
print(
f"{ERROR} Missing filename(s) in the configuration file.\n",
file=stderr
)
# Get labels, thetaset and feature values
X_label, y_label, theta0, theta1, X, y = load_data(
thetaset_filename, dataset_filename
)
# Predict y values
print(f"{INFO} Estimating {y_label}s using thetaset...")
y_pred = estimate(theta0, theta1, X, X)
# Calculate precision metrics
print(f"{INFO} Calculating precision...")
mse, rmse, r2 = calculate_precision(y, y_pred)
# Print results
print(f"\n{DONE} Computed model precision.\n")
print(f"Mean Squared Error (MSE):\t{BG_YELLOW}{RED}{mse}{RESET}")
print(f"Root Mean Squared Error (RMSE):\t{BG_YELLOW}{RED}{rmse}{RESET}")
print(f"R-squared (R²):\t\t\t{BG_YELLOW}{RED}{r2}{RESET}\n")
if __name__ == "__main__":
main()