-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathalgorithms.py
More file actions
145 lines (117 loc) · 5.58 KB
/
algorithms.py
File metadata and controls
145 lines (117 loc) · 5.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import numpy as np
def z_score_normalize(data, threshold = 1.0):
"""Assigned to Aysha: Normalizes and removes outliers."""
print("\nZ-score Normalizer function called.\n")
# TODO: Implemented logic
np.set_printoptions(legacy = '1.25') #added this so the data prints without np.float64. referenced stackOverflow
#implementing my assigned algorithm now:
str_checker = False
for i in range(0, len(data)):
if (isinstance(data[i], str) == True): #referenced geeksforgeeks
str_checker = True
if(len(data) == 0):
print("Dataset needs at least one data value. This is empty.\n")
clean_list = data
elif(str_checker == True):
print("List cannot contain any strings. This contains a string.\n")
clean_list = data #maybe I should be returning the list without the string?
elif(len(set(data)) == 1): #referenced geeksforgeeks
print("all of the elements in the list are the same. Standard deviation is 0 and Z-scores DNE.")
clean_list = data
else:
#accepts negative and duplicate values
mean_data = np.mean(data)
sd_data = np.std(data)
z_score = (data[0:len(data)] - mean_data)/sd_data
clean_list = [] #the list where we store the "cleaned" values (no outliers)
for i in range(0, len(z_score)):
if z_score[i] < threshold:
#only adding the values into the cleaned list that are less than the threshold
clean_list.append(z_score[i])
print('\n')
return (clean_list) #this is the cleaned list that we are returning
#pass
def impute_missing_values(data):
"""Assigned to Priyanshu: Replaces None, "", and "NA" with the mean."""
print("Impute missing value function called")
# TODO: Implement logic
print("Impute Missing Values function called")
MissingValues = [None, "", "NA"] #Created Group for all possible missing values
NumberValues = []
#Below is the for loop to go over each values in the List and looks for whether it has any missingvalue or not.
#Also then we convert Non-missing values to String and make sure whether this has valid numbers and if yes then we store it and if not then we display error to user.
for i in data:
if i in MissingValues:
pass
else:
AllStrings = str(i) #referenced from W3schools to convert into string
Number = True
for ch in AllStrings:
if ch not in "0123456789":
Number = False
if Number == True:
NumberValues.append(AllStrings)
else:
print("The element from a string is no valid number, please check the values")
return data
#This returns 0 for the same length of missing values if whole list is missing data.
if len(NumberValues) == 0:
return [0] * len(data)
#Below code it loks for all the missing entry and then we determine previous value and next value to calculate average and add it in the list.
for p in range(len(data)):
if data[p] in MissingValues:
#Added for loop to keep looking for PreviousValue until its found.
PreviousValue = None
for k in range(p - 1, -1, -1):
if data[k] not in MissingValues:
PreviousValue = data[k]
break
#Added for loop to keep looking for NextValue until its found.
NextValue = None
for j in range(p + 1, len(data)):
if data[j] not in MissingValues:
NextValue = data[j]
break
#Incase if NextValue is blank then basically for loop begins and look for the value from the start to end of list.
if NextValue in MissingValues:
for l in range(0,p):
if data[l] not in MissingValues:
NextValue = data[l]
break
if NextValue is not None and PreviousValue is not None:
Avg = ((PreviousValue + NextValue) / 2)
if Avg.is_integer(): #Referenced from StackOverflow and Google AI Overview upon search
Avg = int(Avg)
elif PreviousValue is not None:
Avg = PreviousValue
else:
Avg = NextValue
for value in range(p, len(data)):
if data[value] not in MissingValues:
break
data[value] = Avg
return data
pass
def min_max_scale(data):
"""Assigned to Glenn: Scales values to the range ."""
print("First revision on a feature branch.")
# TODO: Implement logic
# First, check if list is empty. Since min()/max() are iterative functions
# they cannot be called on an empty list, therfore this must be done first.
if not data:
print("The list is empty - nothing to scale.")
scaledList = []
return scaledList
# Save minimum and maximum of the list to compute the scaled list.
mini = min(data)
maxi = max(data)
# If min = max, then it follows that all list elements are equal,
# therefore, all will be scaled to 0.0 -> create a list with n 0s
if(mini == maxi):
scaledList = [0] * len(data)
# If neither edge case above is encountered, compute the scaled elements
else:
scaledList = []
for x in data:
scaledList.append((x - mini) / (maxi - mini))
return scaledList