-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlab_01.py
More file actions
143 lines (111 loc) · 4.35 KB
/
lab_01.py
File metadata and controls
143 lines (111 loc) · 4.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# -*- coding: utf-8 -*-
"""Lab 01.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1pdlSGy2YMtIhcbH3DUwIEMf9fH8iNwh7
#Mini Project – House Price Prediction
Task D1 — Data Loading & Exploration
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
# Load house price dataset
house_df = pd.read_csv("House_Prices.csv")
print("First 5 rows of House Prices Dataset:")
print(house_df.head())
print("\n" + "="*50 + "\n")
print("Dataset Information:")
print(house_df.info())
print("\n" + "="*50 + "\n")
print("Descriptive Statistics:")
print(house_df.describe())
print("\n" + "="*50 + "\n")
# Check for missing values
print("Missing Values:")
print(house_df.isnull().sum())
print("\n" + "="*50 + "\n")
print("Dataset Shape:", house_df.shape)
print("Features (X): ['Area (sqft)', 'Bedrooms', 'Age (years)']")
print("Target (y): 'Price (Lakhs)'")
"""Task D2 — Multiple Linear Regression Model Training"""
# Prepare data
X_house = house_df[['Area (sqft)', 'Bedrooms', 'Age (years)']]
y_house = house_df['Price (Lakhs)']
# Create and train model
house_model = LinearRegression()
house_model.fit(X_house, y_house)
print("Regression Coefficients:")
for feature, coef in zip(['Area', 'Bedrooms', 'Age'], house_model.coef_):
print(f" {feature}: {coef:.4f}")
print(f"\nIntercept: {house_model.intercept_:.4f}")
print("\nRegression Equation:")
print(f"Price = {house_model.coef_[0]:.4f}×Area + {house_model.coef_[1]:.4f}×Bedrooms + {house_model.coef_[2]:.4f}×Age + {house_model.intercept_:.4f}")
"""Task D3 — Model Evaluation"""
# Task D3 — Model Evaluation
from sklearn.metrics import mean_squared_error, r2_score
# Make predictions
y_house_pred = house_model.predict(X_house)
# Calculate metrics
mse_house = mean_squared_error(y_house, y_house_pred)
r2_house = r2_score(y_house, y_house_pred)
print("House Price Model Performance:")
print(f"Mean Squared Error (MSE): {mse_house:.2f}")
print(f"Root Mean Squared Error (RMSE): {np.sqrt(mse_house):.2f}")
print(f"R² Score: {r2_house:.4f}")
print(f"R² Score Percentage: {r2_house*100:.2f}%")
# Create comparison dataframe
comparison_house = pd.DataFrame({
'Actual Price': y_house,
'Predicted Price': y_house_pred,
'Difference': y_house - y_house_pred,
'Error %': ((y_house - y_house_pred) / y_house * 100)
})
print("\nFirst 10 predictions vs actual:")
print(comparison_house.head(10))
# Plot actual vs predicted
plt.figure(figsize=(10, 5))
plt.scatter(range(len(y_house)), y_house, color='blue',
alpha=0.7, label='Actual Price', s=100)
plt.scatter(range(len(y_house_pred)), y_house_pred, color='red',
alpha=0.7, label='Predicted Price', s=100, marker='x')
plt.xlabel('House Index')
plt.ylabel('Price (Lakhs)')
plt.title('Actual vs Predicted House Prices')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()
# Plot residuals
residuals = y_house - y_house_pred
plt.figure(figsize=(10, 4))
plt.scatter(y_house_pred, residuals, alpha=0.7)
plt.axhline(y=0, color='red', linestyle='--')
plt.xlabel('Predicted Price')
plt.ylabel('Residuals (Actual - Predicted)')
plt.title('Residual Plot')
plt.grid(True, alpha=0.3)
plt.show()
"""Task D4 — Predict New House Price"""
# Define new house features
new_house_features = [[1600, 3, 5]] # Area, Bedrooms, Age
# Make prediction
predicted_price = house_model.predict(new_house_features)
print("New House Details:")
print("-" * 30)
print(f"Area: {new_house_features[0][0]} sqft")
print(f"Bedrooms: {new_house_features[0][1]}")
print(f"Age: {new_house_features[0][2]} years")
print("-" * 30)
print(f"\nPredicted Price: {predicted_price[0]:.2f} Lakhs")
# Show contribution of each feature
print("\nPrice Contribution Breakdown:")
print("-" * 40)
area_contrib = house_model.coef_[0] * new_house_features[0][0]
bedrooms_contrib = house_model.coef_[1] * new_house_features[0][1]
age_contrib = house_model.coef_[2] * new_house_features[0][2]
print(f"Area (1600 × {house_model.coef_[0]:.4f}): {area_contrib:>7.2f} Lakhs")
print(f"Bedrooms (3 × {house_model.coef_[1]:.4f}): {bedrooms_contrib:>7.2f} Lakhs")
print(f"Age (5 × {house_model.coef_[2]:.4f}): {age_contrib:>7.2f} Lakhs")
print(f"Base Price (Intercept): {house_model.intercept_:>7.2f} Lakhs")
print("-" * 40)
print(f"Total Predicted Price: {predicted_price[0]:>7.2f} Lakhs")