-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathweek1-assignment.py
More file actions
108 lines (71 loc) · 2.73 KB
/
week1-assignment.py
File metadata and controls
108 lines (71 loc) · 2.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/usr/bin/env python
# coding: utf-8
"""
Created on Mon Apr 27 16:15:25 2020
@author: Sara Ben Shabbat
week1-assinment.py - 'Applied Machine Learning' - University of Michigan online course.
the no.1 week assignment.
"""
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
cancer = load_breast_cancer()
# Question 1
# Convert the sklearn.dataset `cancer` to a DataFrame.
def answer_one():
dataframe = pd.DataFrame(data=cancer.data, columns=cancer.feature_names)
dataframe['target'] = cancer.target
return dataframe
# Question 2
# What is the class distribution? (i.e. how many instances of `malignant` (encoded 0) and how many `benign` (encoded 1)?)
def answer_two():
cancerdf = answer_one()
index = ['malignant', 'benign']
malignants = np.where(cancerdf['target'] == 0.0)
benings = np.where(cancerdf['target'] == 1.0)
data = [np.size(malignants), np.size(benings)]
series = pd.Series(data, index=index)
return series
# Question 3
# Split the DataFrame into `X` (the data) and `y` (the labels).
def answer_three():
df = answer_one()
X = df[df.keys()[:len(df.keys())-1]]
y = df['target']
return (X ,y)
# Question 4
# Using `train_test_split`, split `X` and `y` into training and test sets `(X_train, X_test, y_train, and y_test)`.
def answer_four():
X, y = answer_three()
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
return X_train, X_test, y_train, y_test
# Question 5
# Using KNeighborsClassifier, fit a k-nearest neighbors (knn) classifier with `X_train`, `y_train` and using one nearest neighbor (`n_neighbors = 1`).
def answer_five():
X_train, X_test, y_train, y_test = answer_four()
knn = KNeighborsClassifier(n_neighbors = 1)
knn.fit(X_train, y_train)
return knn
# Question 6
# Using your knn classifier, predict the class label using the mean value for each feature.
def answer_six():
cancerdf = answer_one()
means = cancerdf.mean()[:-1].values.reshape(1, -1)
knn = answer_five()
return knn.predict(means)
# Question 7
# Using your knn classifier, predict the class labels for the test set `X_test`.
def answer_seven():
X_train, X_test, y_train, y_test = answer_four()
knn = answer_five()
predict_lst = knn.predict(X_test)
return predict_lst
# Question 8
# Find the score (mean accuracy) of your knn classifier using `X_test` and `y_test`.
def answer_eight():
X_train, X_test, y_train, y_test = answer_four()
knn = answer_five()
score = knn.score(X_test, y_test)
return score