Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions 5thRegressionMultiReg (1).py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import pandas as pd
df=pd.read_csv(open("D:/desktop/AI Training Content/multireg.csv","rb"))

x=df.iloc[:,:-1]
y=df.iloc[:,-1]


import numpy as np
x=np.array(x)
y=np.array(y)


from sklearn.impute import SimpleImputer
im=SimpleImputer()
x=im.fit_transform(x) #rule: min 2D array


from sklearn.preprocessing import MinMaxScaler
sc=MinMaxScaler()
x=sc.fit_transform(x)

from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(x,y, test_size=0.2)


#now select the algo: as target is of continous type use regression algo: Linear Regression, RandomForest
# it works on equation of a line: y=bx+a

#Types: Simple linear regression {1 input column: 1 output column} 2) Multi linear regression{more than 1 input and 1 output}.

from sklearn.linear_model import LinearRegression
lr=LinearRegression()
lr.fit(xtrain,ytrain)
pred=lr.predict(xtest)

#for regression the evaluation method is completely different.
#1) visualization

xaxis=np.linspace(1,len(pred),len(pred))
import matplotlib.pyplot as plt
plt.plot(xaxis,pred,color='red')
plt.plot(xaxis,ytest,color='blue')
plt.show()

#2) how much good??? to define it in a number RMSE calculate
from sklearn.metrics import mean_squared_error
res=np.sqrt(mean_squared_error(pred,ytest))



from sklearn.ensemble import RandomForestRegressor
rf=RandomForestRegressor()
rf.fit(xtrain,ytrain)
predrf=rf.predict(xtest)

#for regression the evaluation method is completely different.
#1) visualization

xaxis=np.linspace(1,len(predrf),len(predrf))
import matplotlib.pyplot as plt
plt.plot(xaxis,predrf,color='red')
plt.plot(xaxis,ytest,color='blue')
plt.show()

#2) how much good??? to define it in a number RMSE calculate
from sklearn.metrics import mean_squared_error
resrf=np.sqrt(mean_squared_error(predrf,ytest))



77 changes: 77 additions & 0 deletions 5thRegressionMultiReg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import pandas as pd
# df=pd.read_csv(open("D:/desktop/AI Training Content/multireg.csv","rb"))
df=pd.read_csv(open("C:/Users/Admin/Downloads/Churn_Modelling.csv","rb"))


# x=df.iloc[:,:-1]
# y=df.iloc[:,-1]
x=df.iloc[:,3:-1]
y=df.iloc[:,-1]

import numpy as np
x=np.array(x)
y=np.array(y)

from sklearn.preprocessing import LabelEncoder
lbl=LabelEncoder()
x[:,1]=lbl.fit_transform(x[:,1])
x[:,2]=lbl.fit_transform(x[:,2])

from sklearn.impute import SimpleImputer
im=SimpleImputer()
x=im.fit_transform(x) #rule: min 2D array


from sklearn.preprocessing import MinMaxScaler
sc=MinMaxScaler()
x=sc.fit_transform(x)

from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(x,y, test_size=0.2)


#now select the algo: as target is of continous type use regression algo: Linear Regression, RandomForest
# it works on equation of a line: y=bx+a

#Types: Simple linear regression {1 input column: 1 output column} 2) Multi linear regression{more than 1 input and 1 output}.

from sklearn.linear_model import LinearRegression
lr=LinearRegression()
lr.fit(xtrain,ytrain)
pred=lr.predict(xtest)

#for regression the evaluation method is completely different.
#1) visualization

xaxis=np.linspace(1,len(pred),len(pred))
import matplotlib.pyplot as plt
plt.plot(xaxis,pred,color='red')
plt.plot(xaxis,ytest,color='blue')
plt.show()

#2) how much good??? to define it in a number RMSE calculate
from sklearn.metrics import mean_squared_error
res=np.sqrt(mean_squared_error(pred,ytest))



from sklearn.ensemble import RandomForestRegressor
rf=RandomForestRegressor()
rf.fit(xtrain,ytrain)
predrf=rf.predict(xtest)

#for regression the evaluation method is completely different.
#1) visualization

xaxis=np.linspace(1,len(predrf),len(predrf))
import matplotlib.pyplot as plt
plt.plot(xaxis,predrf,color='red')
plt.plot(xaxis,ytest,color='blue')
plt.show()

#2) how much good??? to define it in a number RMSE calculate
from sklearn.metrics import mean_squared_error
resrf=np.sqrt(mean_squared_error(predrf,ytest))



83 changes: 83 additions & 0 deletions Mumbai_regression_housePredicvtion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import numpy as np
import pandas as pd


df =pd.read_csv(r"C:\Users\user\Documents\Taral\DATA_SET\DATA_SET\Regression Data Set\Mumbai1.csv")
print(df)
df.info()
df.drop("Unnamed: 0",axis ="columns",inplace =True)

x = df.iloc[:,1:]#input
print(x)
x.info()
x.corr()
y =df.iloc[:,0]#output
print(y)

corr = x.corr()
#training and testing data

from sklearn.preprocessing import LabelEncoder
lb = LabelEncoder()
x.Location = lb.fit_transform(x.Location)
print(x)

from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state=500)


print(len(x_train))
print(len(x_test))
print(len(y_train))
print(len(y_test))


print(x_train)
print(x_test)
print(y_train)
print(y_test)


from sklearn.linear_model import LinearRegression
lbl =LinearRegression()
lbl.fit(x_train,y_train)
lbl.predict(x_test)
lbl.score(x_test,y_test)

from sklearn.linear_model import Lasso,Ridge
las1 = Lasso()

las1.fit(x_train,y_train)

las1.predict(x_test)
las1.score(x_test,y_test)


las1 = Lasso(alpha =1000,selection="random")

las1.fit(x_train,y_train)

las1.predict(x_test)
las1.score(x_test,y_test)


rid1 = Ridge()

rid1.fit(x_train,y_train)

predict1=rid1.predict(x_test)
rid1.score(x_test,y_test)

# dff = pd.DataFrame(predict1)
# dff
# dff["actual_value"] =y_test
# dff

rid2 = Ridge(alpha =50)

rid2.fit(x_train,y_train)

rid2.predict(x_test)
rid2.score(x_test,y_test)
rid2.score(x_train,y_train)

11 changes: 11 additions & 0 deletions house_pred_duplicate.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
area,price
3200,61000000
2600,55000000
2600,55000000
3000,56500000
3200,61000000
3600,59500000
4000,76000000
3000,
4011,
2600,55000000
99 changes: 99 additions & 0 deletions labelencoder_linearregression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# -*- coding: utf-8 -*-
"""
Created on Fri Feb 25 12:06:40 2022
Label Encoder Multilinear Regression

@author: user
"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

df =pd.read_csv(r"C:\Users\user\Documents\Taral\DATA_SET\DATA_SET\Regression Data Set\Basic Regression Data\multi_house_dummy.csv")
print(df)
df = df.reindex(columns=["area","town","price"])
print(df)

x = df.iloc[:,:-1]
print(x)

y =df.iloc[:,-1:]
print(y)

plt.scatter(x.area, y)
plt.show()

#LabelEncoder converts strings inot numbers
from sklearn.preprocessing import LabelEncoder
lbl =LabelEncoder()
x.town = lbl.fit_transform(x.town)
print(x)

from sklearn.linear_model import LinearRegression
reg = LinearRegression()
reg.fit(x,y)
reg.predict(x)

from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression

pr = PolynomialFeatures(degree =3)
x_poly = pr.fit_transform(x)
reg = LinearRegression()
reg.fit(x_poly,y)

#lasso regression
#ridge regression
#polynomail regression --> scatter

print(reg.coef_)
print(reg.intercept_)

reg.predict(x_poly)
reg.score(x_poly,y)

reg.predict([[3600]])
reg.predict([[2600]])
reg.predict([[4600]])

reg.predict([[1,2600]])
reg.predict([[2,2600]])
reg.score(x,y)

plt.scatter(x.area, y)
plt.plot(x.area,reg.predict(x),'rD:')
plt.show()


from sklearn.linear_model import Ridge,Lasso
from sklearn.model_selection import GridSearchCV
model1=Ridge(alpha=5)
model1.fit(x,y)
model1.score(x,y)
print(model1.coef_)#coefiicent
print(model1.intercept_)
model1.predict([[2600]])
model1.predict([[3600]])
model1.predict([[4600]])
model1.predict([[2000]])

model2=Lasso(alpha=6)
model2.fit(x,y)
model2.score(x,y)
print(model2.coef_)
print(model2.intercept_)


m1=Ridge()
parameters={'alpha':[1e-15,1e-18,1e-8,1e-4,1e-3,1e-2,1,5,10,20]}

lasso_regressor=GridSearchCV(m1,parameters,cv=5)
lasso_regressor.fit(x,y)
lasso_regressor.score(x,y)

print(model2.coef_)

# from sklearn.metrics import mean_squared_error
# mse = mean_squared_error(y,ypred)
# rmse= np.sqrt(mse)
#
Loading