-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathLoad_data.py
More file actions
33 lines (26 loc) · 1.05 KB
/
Load_data.py
File metadata and controls
33 lines (26 loc) · 1.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# coding=utf-8
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
# 鸢尾花(iris)数据集
# 数据集内包含 3 类共 150 条记录,每类各 50 个数据,
# 每条记录都有 4 项特征:花萼长度、花萼宽度、花瓣长度、花瓣宽度,
# 可以通过这4个特征预测鸢尾花卉属于(iris-setosa, iris-versicolour, iris-virginica)中的哪一品种。
# 这里只取前100条记录,两项特征,两个类别。
def create_data():
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['label'] = iris.target
df.columns = ['sepal length', 'sepal width', 'petal length', 'petal width', 'label']
data = np.array(df.iloc[:100, [0, 1, -1]])
for i in range(len(data)):
if data[i, -1] == 0:
data[i, -1] = -1
# print(data)
return data[:, :2], data[:, -1]
def my_load_iris():
return create_data()
if __name__ == '__main__':
print(my_load_iris())