Machine-Learning/linearreg.py at master · Muhib-Hasan/Machine-Learning · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import r2_score

FILEPATH = "dataset_v4.csv"

def load_dataframe(filepath):
	dataframe=pd.read_csv(filepath)

	return dataframe

def retrieve_features(dataframe):
	features = ["Code","Year","Tempareture","Humidity","Rainfall","Wind Speed",
				"Bright Sunshine","Cloud Coverage","Area"]
	target = "Production"

	x = dataframe[features]
	y = dataframe[target]

	return x, y

def split_x_y(x, y):
	x_train,x_test,y_train,y_test=train_test_split(x, y, test_size=0.30, random_state=0)

	return x_train,x_test,y_train,y_test

def model_sklearn(x_train, y_train):
	model = LinearRegression()
	model.fit(x_train,y_train)

	return model

def model_evaluation(model, x_test, y_test):
	# checking R2 score
	y_predict = model.predict(x_test)
	score = r2_score(y_test,y_predict)
	print(score)

	# RMSE

def data_scaling(X_train, X_test):
	scaler = StandardScaler()
	scaler.fit(X_train)
	scaled_x_train = scaler.transform(X_train)
	scaled_x_test = scaler.transform(X_test)

	return scaled_x_train, scaled_x_test

def apply_pca(scaled_x_train, scaled_x_test):
	pca = PCA(n_components=2, random_state=0).fit(scaled_x_train)
	print("PCA Components: ", pca.n_components_)
	x_train_pca = pca.transform(scaled_x_train)
	x_test_pca = pca.transform(scaled_x_test)

	return x_train_pca, x_test_pca

if __name__ == '__main__':
	df = load_dataframe(FILEPATH)

	x, y = retrieve_features(df)
	print(x.shape, y.shape)

	x_train,x_test,y_train,y_test = split_x_y(x,y)
	print(x_train.shape, x_test.shape)
	print(y_train.shape, y_test.shape)

	# Without PCA
	print("\n###########Model without PCA###############\n")
	model_1 = model_sklearn(x_train, y_train)
	model_evaluation(model_1, x_test, y_test)

	# with PCA
	print("\n###########Model with PCA###############\n")
	x_train_scaled, x_test_scaled = data_scaling(x_train, x_test)
	print(x_train_scaled.shape, x_test_scaled.shape)

	x_train_pca, x_test_pca = apply_pca(x_train_scaled, x_test_scaled)
	print(x_train_pca.shape, x_test_pca.shape)

	model_2 = model_sklearn(x_train_pca, y_train)

	model_evaluation(model_2, x_test_pca, y_test)