-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvisualization.py
More file actions
160 lines (128 loc) · 4.83 KB
/
visualization.py
File metadata and controls
160 lines (128 loc) · 4.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
def plot_history(history):
"""
Plot training and validation metrics
Args:
history: Training history dictionary
Returns:
fig: Matplotlib figure
"""
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
# Plot loss
ax1.plot(history['loss'], label='Training Loss')
ax1.plot(history['val_loss'], label='Validation Loss')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.set_title('Training and Validation Loss')
ax1.legend()
# Plot MAE
ax2.plot(history['mae'], label='Training MAE')
ax2.plot(history['val_mae'], label='Validation MAE')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Mean Absolute Error (MPG)')
ax2.set_title('Training and Validation MAE')
ax2.legend()
plt.tight_layout()
return fig
def plot_actual_vs_predicted(y_true, y_pred):
"""
Plot actual vs predicted values
Args:
y_true: True MPG values
y_pred: Predicted MPG values
Returns:
fig: Matplotlib figure
"""
fig, ax = plt.subplots(figsize=(10, 6))
# Plot the points
ax.scatter(y_true, y_pred, alpha=0.7)
# Add perfect prediction line
min_val = min(np.min(y_true), np.min(y_pred))
max_val = max(np.max(y_true), np.max(y_pred))
ax.plot([min_val, max_val], [min_val, max_val], 'r--', lw=2)
ax.set_xlabel('Actual MPG')
ax.set_ylabel('Predicted MPG')
ax.set_title('Actual vs. Predicted MPG')
# Calculate and show R^2
correlation = np.corrcoef(y_true, y_pred)[0, 1]
r_squared = correlation**2
ax.text(0.05, 0.95, f'R² = {r_squared:.3f}', transform=ax.transAxes,
bbox=dict(facecolor='white', alpha=0.8))
plt.tight_layout()
return fig
def plot_feature_importance(input_data, train_stats, feature_names):
"""
Visualize feature values compared to dataset statistics
Args:
input_data: Dictionary of input feature values
train_stats: Training statistics
feature_names: List of feature names
Returns:
fig: Matplotlib figure
"""
fig, ax = plt.subplots(figsize=(10, 6))
# Calculate normalized values (0-1 scale based on min-max)
normalized_values = []
for feature in feature_names:
min_val = train_stats.loc[feature, 'min']
max_val = train_stats.loc[feature, 'max']
# Normalize to 0-1 scale
norm_value = (input_data[feature] - min_val) / (max_val - min_val)
normalized_values.append(norm_value)
# Plot as horizontal bars
y_pos = np.arange(len(feature_names))
ax.barh(y_pos, normalized_values, align='center')
ax.set_yticks(y_pos)
ax.set_yticklabels(feature_names)
ax.invert_yaxis() # Labels read top-to-bottom
ax.set_xlabel('Normalized Value (0-1 scale)')
ax.set_title('Feature Values Relative to Dataset Range')
# Add value labels
for i, v in enumerate(normalized_values):
ax.text(max(v + 0.03, 0.1), i, f"{input_data[feature_names[i]]}",
va='center', fontweight='bold')
plt.tight_layout()
return fig
def plot_prediction_comparison(X_train, y_train, input_data, predicted_mpg):
"""
Plot how the prediction compares to similar vehicles
Args:
X_train: Training features
y_train: Training targets
input_data: Dictionary of input feature values
predicted_mpg: Predicted MPG value
Returns:
fig: Matplotlib figure
"""
# Create dataframe from training data
train_df = X_train.copy()
train_df['mpg'] = y_train
# Find similar vehicles based on key features (weight and horsepower)
weight_range = (input_data['weight'] * 0.85, input_data['weight'] * 1.15)
hp_range = (input_data['horsepower'] * 0.85, input_data['horsepower'] * 1.15)
similar = train_df[
(train_df['weight'] >= weight_range[0]) &
(train_df['weight'] <= weight_range[1]) &
(train_df['horsepower'] >= hp_range[0]) &
(train_df['horsepower'] <= hp_range[1])
]
# Create plot
fig, ax = plt.subplots(figsize=(10, 6))
# Plot similar vehicles
sns.histplot(similar['mpg'], bins=10, kde=True, ax=ax)
# Plot predicted value
ax.axvline(x=predicted_mpg, color='red', linestyle='--', linewidth=2,
label=f'Predicted MPG: {predicted_mpg:.1f}')
# Add mean value line
mean_mpg = similar['mpg'].mean()
ax.axvline(x=mean_mpg, color='green', linestyle='-', linewidth=2,
label=f'Average MPG of similar vehicles: {mean_mpg:.1f}')
ax.set_title('How Your Vehicle Compares to Similar Vehicles')
ax.set_xlabel('MPG')
ax.set_ylabel('Number of Vehicles')
ax.legend()
plt.tight_layout()
return fig