-
Notifications
You must be signed in to change notification settings - Fork 3.1k
Description
заданием было сделать через одиночное дерево
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeRegressor
from sklearn import tree
from sklearn.model_selection import train_test_split
ds1 = 'https://raw.githubusercontent.com/aniruddhachoudhury/Red-Wine-Quality/master/winequality-red.csv'
ds = pd.read_csv(ds1)
ds.head()
ds.shape
ds.describe()
plt.scatter(ds['fixed acidity'], ds['citric acid'], color='g', label='idk what')
plt.show
mtr = ds.corr()
print(mtr)
from sklearn import metrics
regressor = DecisionTreeRegressor(max_depth=4, random_state=42)
ds = ds.drop_duplicates()
#X = ds.iloc[:,:-1].values
#X = ds[['alcohol', 'volatile acidity', 'sulphates', 'citric acid', 'total sulfur dioxide', 'density']].values 41%
#X = ds[['alcohol', 'volatile acidity', 'sulphates', 'citric acid', 'density']].values 44%
X = ds[['alcohol', 'volatile acidity', 'sulphates', 'density']].values
y = ds.iloc[:,-1].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
regressor.fit(X_train, y_train)
tree.plot_tree(regressor)
y_pred = regressor.predict(X_test)
df = pd.DataFrame({'actual':y_test, 'pred':y_pred})
df
mse = metrics.mean_squared_error(y_test,y_pred)
mae = metrics.mean_absolute_error(y_test,y_pred)
print('mae',mae)
print('rmse', mse**0.5)
r2 = metrics.r2_score(y_test, y_pred)
print(f"точность R2: {r2 * 100:.2f}%")