-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathag_utils.py
More file actions
220 lines (166 loc) · 6.19 KB
/
ag_utils.py
File metadata and controls
220 lines (166 loc) · 6.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
# functions used for algorithm
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsRegressor
def get_data(dataset_name, fill_invalid_with = np.nan, used_for= 'reg'):
# in the original data, the rss value from the ap that was
# not detected is shown as 100, we can determined how it replace
# the invalid with value that is resonable
# time stamps indicator - 0/1
# multi-floor indicator - 0/1
# multi-building indicator - 0/1
# nan-value -- values
# floor information -- 'nan' - no such information
# 'rel' - relative position
# 'abs' - absolute position
# used for cls for reg
folder = r'data/'
file_name = dataset_name + '.npz'
path = folder + file_name
data = np.load(path)
tr_rss = data['tr_rss']
ts_rss = data['ts_rss']
tr_crd = data['tr_crd']
ts_crd = data['ts_crd']
invalid_value = data['nan_value']
multi_flr_id = data['multi_fl_id']
multi_bd_id = data['multi_bd_id']
fl_type = data['fl_type']
if multi_flr_id == 1:
if multi_bd_id == 1:
fl_ind = -2
else:
fl_ind = -1
# print(fl_ind)
if used_for == 'reg' and fl_type == 'rel':
# print('go with reg')
new_fl_tr = fl_cls2reg(tr_crd[:,fl_ind],dataset_name)
new_fl_ts = fl_cls2reg(ts_crd[:,fl_ind],dataset_name)
# print(new_fl_tr)
tr_crd[:,fl_ind] = new_fl_tr
ts_crd[:,fl_ind] = new_fl_ts
elif used_for == 'cls' and fl_type == 'abs':
# print('go with cls')
new_fl_tr = fl_reg2cls(tr_crd[:,fl_ind])
new_fl_ts = fl_reg2cls(ts_crd[:,fl_ind])
tr_crd[:,fl_ind] = new_fl_tr
ts_crd[:,fl_ind] = new_fl_ts
if multi_bd_id == 1 and used_for == 'reg':
tr_crd = tr_crd[:, :3]
ts_crd = ts_crd[:, :3]
if fill_invalid_with != "No_Op":
ts_rss[ts_rss == invalid_value] = fill_invalid_with
tr_rss[tr_rss == invalid_value] = fill_invalid_with
return (ts_rss, ts_crd, tr_rss, tr_crd)
def fl_cls2reg(fl,dataset_name):
fl_high = {
'TUT2': #{fill_your_data},
}
dif = fl_high[dataset_name]
min_value = np.min(fl)
scaled_categories = (np.array(fl) - min_value) * dif
return scaled_categories
def fl_reg2cls(fl):
unique_values = np.unique(fl)
value_to_category = {value: index for index, value in enumerate(unique_values)}
categories = np.array([value_to_category[value] for value in fl])
return categories
# calculate Euclidean distance
def euclidean_distance(data1,data2):
distances = np.linalg.norm(data1 - data2, axis=1)
return distances
# get the index of the n strongest siganl except nan in row
def max_n_indices(row, n):
non_nan_indices = np.where(~np.isnan(row))[0]
non_nan_values = row[non_nan_indices]
sorted_indices = np.argsort(non_nan_values)[::-1][:n]
return non_nan_indices[sorted_indices]
# get the index of the n strongest siganl except nan in each row
def get_top_n_indices(data, n):
num_rows, num_cols = data.shape
result = np.full((num_rows, n), np.nan) # Initialize with NaN values
for i in range(num_rows):
indices = max_n_indices(data[i], n)
if len(indices) < n:
missing_value = n - len(indices)
tem_indx = np.concatenate((indices, [np.nan] * missing_value), axis=0)
result[i] = tem_indx
else:
result[i] = indices
return result
# find the index of the rows in a matrix whose element are all in the target list
def find_rows_with_all_elements(matrix, target_list):
matching_indices = []
for i, row in enumerate(matrix):
if all([element in target_list for element in row]):
matching_indices.append(i)
return matching_indices
# key procedure of the group matching algorithm
def get_fp(signal,raw_fp,raw_cord,n=8,threshold=1):
tg = max_n_indices(signal,n)
mt = get_top_n_indices(raw_fp,n)
# new added
if len(tg)<n:
n = len(tg)
loop_count = 0
indx_stack = []
while loop_count<n:
mt_new = mt[:,:loop_count+1]
indx = find_rows_with_all_elements(mt_new,tg)
if len(indx)>=threshold:
mt = raw_fp[indx]
indx_stack.append(indx)
loop_count += 1
else:
break
if len(indx_stack)>0:
final_indx = indx_stack[-1]
new_fp = raw_fp[final_indx,:]
new_cord = raw_cord[final_indx,:]
else:
new_fp = raw_fp
new_cord = raw_cord
return (new_fp, new_cord)
# KNN algorithm
class KNNRegression:
def __init__(self, k=3, wknn_id = 1):
self.k = k
self.model = None
self.wknn_id = wknn_id
def fit(self, X, y):
self.X_train = X
self.y_train = y
def predict(self, X):
if len(self.X_train)<self.k:
self.k = len(self.X_train)
if self.wknn_id == 1:
self.model = KNeighborsRegressor(n_neighbors=self.k, weights='distance')
else:
self.model = KNeighborsRegressor(n_neighbors=self.k, weights='uniform')
self.model.fit(self.X_train, self.y_train)
prediction = self.model.predict(X)
# print('prediction shape', prediction.shape)
return np.array(prediction).reshape(1, -1)
# remove nan values
def nan_replace(data, replace_with = np.nan):
data[np.isnan(data)] = replace_with
return data
# cdf plot
def cdf_plot(data):
sorted_data = np.sort(data)
# Calculate the cumulative probabilities
cdf = np.arange(1, len(sorted_data) + 1) / len(sorted_data)
# Create the CDF plot
plt.plot(sorted_data, cdf, marker='o')
plt.xlabel('Error(m)')
plt.ylabel('CDF')
plt.title('Cumulative Distribution Function (CDF)')
# Find the 80th percentile error
percentile_80 = np.percentile(sorted_data, 80)
# Mark the 80th percentile on the plot
plt.axvline(x=percentile_80, color='red', linestyle='--', label='80th Percentile')
plt.legend() # Add legend to the plot
plt.grid(True)
plt.show()
print(f'The 80th percentile error is: {percentile_80} meters')
return