Modes_Prediction/Training_Testing_Functions.py at main · Smart-Photonics-IPHT/Modes_Prediction · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
import numpy as np
import time
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import os
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error

def trainTONE(data, settings):
    # === Training starts here ===
    start_time = time.time()  # for measuring training times
    # One case here for each bin-find method
    if settings['BinSearch'].lower() == 'statisticalsearch':  # reference case
        Train = StatisticalSearch(data, settings)
    else:
        raise ValueError("Bin search unsupported!")

    # eval runtime
    runtime = time.time() - start_time
    Train['Training Time'] = runtime
    print('training time: ', runtime, 's')

    return Train.copy()

# The search is independent of the tasks, only based on output analysis
def StatisticalSearch(data, settings):
    lambda_val = settings['trainingRidge']
    search_bins = settings["trainingBins"]

    overall_stdevs = settings['overall_stdevs1']
    overall_average1 = settings['overall_average1']

    # initating the local bins list
    best_local_indices = []
    n_local_max = int(search_bins * settings["local_max_bins_ratio"])

    if n_local_max > 0:
        length_of_local_intervals = len(overall_average1) / n_local_max
        length_of_local_intervals = length_of_local_intervals

        for i in range(0, n_local_max):
            start_local_interval = int(i * length_of_local_intervals)
            end_local_interval = int((i + 1) * length_of_local_intervals)

            local_max_std = overall_stdevs[start_local_interval:end_local_interval]

            sorted_indices = np.argsort(local_max_std)
            best_local_indices.append(sorted_indices[-1] + i * length_of_local_intervals)

    # copy the best local bins list to another list, to add best_global and best_relative to it later
    best_indices = best_local_indices.copy()

    # Get the indices that sort the abs max global STD
    highest_abs_max_STD = []
    n_global_abs_max = int(search_bins * settings["global_max_bins_ratio"])
    sorted_indices = np.argsort(overall_stdevs)  # ascending order from lowest STD to highest

    if n_global_abs_max > 0:
        top_indices = sorted_indices[::-1]  # from highest to lowest STD
        # Keep track of how many items we've added
        target_set = set(best_indices)
        highest_abs_max_STD_bins_count = 0

        # Iterate over source_list and add unique elements to target_list
        for index in top_indices:
            if index not in target_set:  # Check if item is unique in target_list
                best_indices.append(index)
                highest_abs_max_STD.append(index)
                target_set.add(index)  # Keep the set updated
                highest_abs_max_STD_bins_count += 1
                if highest_abs_max_STD_bins_count == n_global_abs_max:
                    break

    print("len(best_indices)", len(best_indices))
    best_indices_txt_file = f"{settings['saving_dir']}/bins={settings['trainingBins']}_" \
                            f"best_bins_indices.txt"
    np.savetxt(best_indices_txt_file, best_indices, fmt='%f', delimiter=',')

    # best_indices = best_indices[1:]
    print("number of bins", len(best_indices))

    TrainData = data['ProcessedData']['TrainData']
    TrainData = np.array(TrainData)

    TrainLabel = data['ProcessedData']['TrainLabel']
    best_indices = [int(x) for x in best_indices]
    ReadOUT_train = TrainData[:, best_indices]

    best_indices_lists = [best_local_indices, highest_abs_max_STD]

    n = len(overall_average1)
    side_length = int(np.sqrt(n))

    # Ensure that the signal length is a perfect square. If not, truncate.
    if side_length * side_length != n:
        print(f"Warning: Signal length ({n}) is not a perfect square.  Truncating to {side_length * side_length}.")
        overall_average1 = overall_average1[:side_length * side_length]

    # Reshape the signal into a 2D square array
    image_array = overall_average1.reshape((side_length, side_length))

    # plot boxes
    display_image_with_boxes(image_array, best_indices_lists, settings["saving_dir"], box_size=1,
                             edgecolors=["blue", "magenta"])

    Wout_final = calc_weight(ReadOUT_train, TrainLabel, lambda_val, settings["lasso_option"],
                             settings['bias'])  # calculate weight matrix based on

    Wout_final_matrix_txt_file = f"{settings['saving_dir']}/bins={settings['trainingBins']}_Weight_Matrix.txt"
    np.savetxt(Wout_final_matrix_txt_file, Wout_final, fmt='%f', delimiter=',')

    # eval training
    train_score, train_loss, accuracy = eval_train(ReadOUT_train, TrainLabel, best_indices, Wout_final, settings)

    return {
        'TrainData_Readout': TrainData[:, best_indices],
        'BestBins': best_indices,  # best bins (indices)
        'BestBins_WL': data['PixelsGrid'][best_indices],  # best bins (wl)
        'BestWeight': Wout_final,  # best weights
        'TrainData_score': train_score,  # predicted data
        'Train loss': train_loss,  # error of best set (training)
        'Training accuracy': accuracy,  # training accuracy, in regression we use r2 score to indicate it
    }

def calc_weight(training_data, training_labels, ridge_factor=None, lasso_option=0, bias=0):
    if bias != 0:
        bias_vector = np.ones((np.shape(training_data)[0], 1)) * bias
        training_data = np.concatenate((training_data, bias_vector), axis=1)
    bins = np.shape(training_data)[1]

    if lasso_option == 1:
        weight_matrix = np.linalg.pinv(np.dot(training_data.T, training_data)) @ np.dot(training_data.T,
                                                                                        training_labels)
        eps = 10 ** -6
        for dim_of_sol in range(5):  # original number is 12
            gamma = 1 / (np.sqrt(np.abs(weight_matrix)) + eps)


            denominator = np.linalg.pinv(np.dot(training_data.T, training_data)
                                         + ridge_factor * np.eye(bins) * np.dot(gamma, gamma.T) + eps)
            denominator[np.isinf(denominator)] = 0  # to prevent svd didn't converge error
            denominator[np.isnan(denominator)] = 0
            weight_matrix_new = denominator @ np.dot(training_data.T, training_labels)

            weight_matrix = weight_matrix_new
    else:
        weight_matrix = np.linalg.pinv(np.dot(training_data.T, training_data)
                                       + ridge_factor * np.eye(bins)) @ np.dot(training_data.T, training_labels)

    return weight_matrix

def calc_loss(score, validation_labels, settings):
    # Find the index of the max value per row
    if settings['loss'].lower() == 'mse':
        loss = mean_squared_error(validation_labels, score)
    elif settings['loss'].lower() == 'rmse':
        loss2 = mean_squared_error(validation_labels, score)
        loss = np.sqrt(loss2)
    elif settings['loss'].lower() == 'mape':
        loss = mean_absolute_percentage_error(validation_labels, score)
    elif settings['loss'].lower() == 'mae':
        loss = mean_absolute_error(validation_labels, score)
    else:
        raise ValueError("Invalid loss selection!")
    return loss

def eval_train(Readout_train, label, indices, weight, settings):
    # eval training
    bias = settings["bias"]
    if bias != 0:
        bias_vector = np.ones((np.shape(Readout_train)[0], 1)) * bias
        # bias_term2 = np.ones((1, np.shape(training_labels)[1]))
        Readout_train = np.concatenate((Readout_train, bias_vector), axis=1)

    train_score = np.dot(Readout_train, weight)
    train_loss = calc_loss(train_score, label, settings)
    train_accuracy =  r2_score(label, train_score)    # higher r2 score indicates a better fitting
    return train_score, train_loss, train_accuracy

def applyTONE(train, data, settings):
    weights = train['BestWeight']
    best_bins = train['BestBins']
    test_set = data['ProcessedData']['TestData']
    test_label = data['ProcessedData']['TestLabel']
    bias = settings["bias"]
    readout_test = test_set[:, best_bins]

    if bias != 0:
        bias_vector = np.ones((np.shape(readout_test)[0], 1)) * bias
        # bias_term2 = np.ones((1, np.shape(training_labels)[1]))
        readout_test = np.concatenate((readout_test, bias_vector), axis=1)

    readout_test_txt_file = f"{settings['saving_dir']}/bins={settings['trainingBins']}_readout_test.txt"
    np.savetxt(readout_test_txt_file, readout_test, fmt='%f', delimiter=',')
    output_scores = np.dot(readout_test, weights)  # scores (nominal)

    output_scores_txt_file = f"{settings['saving_dir']}/bins={settings['trainingBins']}_output_scores.txt"
    np.savetxt(output_scores_txt_file, output_scores, fmt='%f', delimiter=',')

    loss = calc_loss(output_scores, test_label, settings)
    test_accuracy =  r2_score(test_label, output_scores)    # higher r2 score indicates a better fitting, we use r2 score as a

    return {
        'Output Scores': output_scores,  # scores of predicted data
        'Output Prediction': output_scores,  # predicted output
        'Test Loss': loss,  # test loss
        'Test accuracy':test_accuracy
    }

def saveResults(data, train, results, settings, saving_dir):   # for regression tasks
    bins_number = settings['trainingBins']
    used_loss = settings['loss']
    train_true_labels = data['ProcessedData']['TrainLabel']
    test_true_labels = data['ProcessedData']['TestLabel']
    train_predicted_labels = train['TrainData_score']
    test_predicted_labels = results['Output Scores']  # predicted values
    plt.rcParams['pdf.fonttype'] = 42  # import option for editing the text later after saving the figure as a pdf.

    # these sizes are appropriate for 3 subplots at one line.
    plt.rcParams['font.size'] = 8  # for labels
    plt.rcParams['axes.titlesize'] = 8
    plt.rcParams['axes.labelsize'] = 8
    plt.rcParams['xtick.labelsize'] = 7
    plt.rcParams['ytick.labelsize'] = 7
    plt.rcParams['legend.fontsize'] = 8
    plt.rcParams['lines.linewidth'] = 1.5
    plt.rcParams['axes.linewidth'] = 0.8
    plt.rcParams['grid.alpha'] = 0.5
    fig, axs = plt.subplots(1, 3, figsize=(6.6, 2.2))
    # fig, axs = set_plots(12)

    # Plot training data on the first subplot
    axs[0].plot(train_true_labels, train_predicted_labels, 'x', markersize=2, linewidth=1.5, label='Training', color="blue")
    axs[0].set_title('Training Data')
    axs[0].set_ylim([np.min(test_true_labels), np.max(test_true_labels)])
    axs[0].set_xlim([np.min(test_true_labels), np.max(test_true_labels)])
    perfect_case_line = [np.min(test_true_labels), np.max(test_true_labels)]
    axs[0].plot(perfect_case_line, perfect_case_line, markersize=2, linewidth=1, linestyle="--", color="black")
    axs[0].set_ylabel('Predicted Data')
    axs[0].set_xlabel('Actual Data')

    # Plot testing data on the second subplot
    axs[1].plot(test_true_labels, test_predicted_labels, 'x', markersize=2, linewidth=1.5, label='Testing', color="blue")
    axs[1].set_title('Testing Data _ ylim applied')
    axs[1].set_ylim([np.min(test_true_labels), np.max(test_true_labels)])
    axs[1].set_xlim([np.min(test_true_labels), np.max(test_true_labels)])
    axs[1].plot(perfect_case_line, perfect_case_line, markersize=2, linewidth=1, linestyle="--", color="black")
    axs[1].set_xlabel('Actual Data')
    # axs[0].set_ylabel('Actual Data')

    axs[2].plot(test_true_labels, test_predicted_labels, 'x', markersize=2, linewidth=1.5, label='Testing', color="blue")
    axs[2].set_title('Testing Data')
    axs[2].set_xlabel('Actual Data')

    fig.suptitle(f'Training loss ({used_loss}): {np.round(train["Train loss"], 3)}' +
                 f', Test error ({used_loss}): {np.round(results["Test Loss"], 3)}')

    plt.tight_layout()
    fig.canvas.draw()

    plt.savefig(f'{saving_dir}/bins={bins_number}, Regression.png', dpi=400)
    plt.savefig(f'{saving_dir}/bins={bins_number}, Regression.pdf', dpi=400)
    plt.close()

def save_dict_to_txt(dictionary, save_dir, filename="Analysis_Settings.txt"):
    """
    Saves a dictionary to a .txt file in key: value format.

    Args:
        dictionary (dict): The dictionary to save.
        save_dir (str): Directory path to save the file.
        filename (str): Name of the file (default: "Analysis_Settings.txt")
    """
    os.makedirs(save_dir, exist_ok=True)  # Ensure directory exists
    file_path = os.path.join(save_dir, filename)

    with open(file_path, 'w') as f:
        for key, value in dictionary.items():
            f.write(f"{key}: {value}\n")
        f.close()

def save_list_to_txt(data_list, save_dir, filename="list1.txt"):
    """
    Saves a list to a .txt file, one item per line.

    Args:
        data_list (list): The list to save.
        save_dir (str): Directory path to save the file.
        filename (str): Name of the file (default: "list1.txt")
    """
    os.makedirs(save_dir, exist_ok=True)
    file_path = os.path.join(save_dir, filename)

    with open(file_path, 'w') as f:
        for item in data_list:
            f.write(f"{item}\n")

    print(f"{filename} saved to {file_path}")

def set_plots(image_size, subfig_bin_distribution=1, max_columns=3):
    # image_size = LC #L is the number of lines, C is the number of columns of a one figure
    # 33: 17cmx17cm
    # 33: full size
    # 22: 4/9 of a full size (takes 2 lines, two columns)
    # 23: 2/3 of a full size (takes 2 lines, three columns per line)
    # 13: 3/9 of a full image (takes 1 line, three columns per line)
    # 11: 1/9 of a full size (1 line and 1 column, best for fitting )

    # plt.rcParams['font.family'] = 'sans-serif'
    # plt.rcParams['font.sans-serif'] = 'Arial'
    plt.rcParams['pdf.fonttype'] = 42  # import option for editing the text later after saving the figure as a pdf.

    # these sizes are appropriate for 3 subplots at one line.
    plt.rcParams['font.size'] = 8  # for labels
    plt.rcParams['axes.titlesize'] = 8
    plt.rcParams['axes.labelsize'] = 8
    plt.rcParams['xtick.labelsize'] = 7
    plt.rcParams['ytick.labelsize'] = 7
    plt.rcParams['legend.fontsize'] = 8
    plt.rcParams['lines.linewidth'] = 1.5
    plt.rcParams['axes.linewidth'] = 0.8
    plt.rcParams['grid.alpha'] = 0.5
    # plt.rcParams['spine'] = 0.8
    x_size_13 = 2.2
    y_size_13 = 2.2

    if max_columns == 2:
        x_size_13 *= 1.5
        y_size_13 *= 1.5

    x_size_factor = image_size % 10
    y_size_factor = image_size // 10
    # y_size_factor = y_size_factor/5

    if subfig_bin_distribution > 1:  # to show different bins distribution at the same sub-fig
        y_size_factor = y_size_factor / subfig_bin_distribution  # to show 5 different distribution

    fig, ax = plt.subplots(figsize=(x_size_13 * x_size_factor, y_size_13 * y_size_factor))
    return fig, ax

def display_image_with_boxes(image, flattened_indices, saving_dir, box_size=5, edgecolors=["blue", "magenta"]):
    """
    Displays an image with red boxes drawn on it. The locations for the boxes are
    given as indices of the flattened image array.

    """
    img_height, img_width = image.shape[:2]

    fig, ax = set_plots(11)
    c = ax.imshow(image, cmap='gray', aspect="equal")  # Use 'gray' for grayscale images, remove for color images

    total_search_bins = 0
    for ith_color, bins_list in enumerate(flattened_indices):
        # Convert flattened indices to 2D coordinates and draw boxes
        for idx in bins_list:
            total_search_bins += 1
            # Calculate row and column from index
            row = idx // img_width
            col = idx % img_width

            # Create a rectangle patch at the calculated location
            rect = patches.Rectangle((col - 0.5, row - 0.5), box_size, box_size, linewidth=1,
                                     edgecolor=edgecolors[ith_color], facecolor='none')
            ax.add_patch(rect)
        # Configure the axis
    ax.set_axis_off()
    fig.colorbar(c)

    # ax.set_title('Image with Red Boxes')
    plt.savefig(f'{saving_dir}/avg_image_{total_search_bins}_bins.png', dpi=300)
    plt.savefig(f'{saving_dir}/avg_image_{total_search_bins}_bins.pdf', dpi=300)
    plt.close()

def display_images_STD_with_boxes(image, flattened_indices, saving_dir, box_size=5,
                                  edgecolors=["blue", "magenta"]):
    """
    Displays an image with red boxes drawn on it. The locations for the boxes are
    given as indices of the flattened image array.
    """
    # Determine image dimensions
    img_height, img_width = image.shape[:2]

    # Create a figure and axis for displaying the image
    fig, ax = set_plots(11)
    orig_map = plt.cm.get_cmap('hot_r')
    reversed_map = orig_map.reversed()

    ax.imshow(image, cmap=reversed_map, aspect="equal")  #
    total_search_bins = 0
    for ith_color, bins_list in enumerate(flattened_indices):
        # Convert flattened indices to 2D coordinates and draw boxes
        for idx in bins_list:
            total_search_bins += 1
            # Calculate row and column from index
            row = idx // img_width
            col = idx % img_width

            # Create a rectangle patch at the calculated location
            rect = patches.Rectangle((col - 0.5, row - 0.5), box_size, box_size, linewidth=1,
                                     edgecolor=edgecolors[ith_color], facecolor='none')
            ax.add_patch(rect)
    # Configure the axis
    ax.set_axis_off()
    # fig.colorbar(c)

    # ax.set_title('Image with Red Boxes')

    plt.savefig(f'{saving_dir}/std_heatmap_{total_search_bins}_bins.png', dpi=300)
    plt.savefig(f'{saving_dir}/std_heatmap_{total_search_bins}_bins.pdf', dpi=300)
    plt.close()

def calculate_the_complex_numbers(real_part_output, img_part_output):
    complex_output = []
    for i, real_part_row in enumerate(real_part_output):
        img_part_row = img_part_output[i]
        complex_output.append([a + 1j*b for a,b in (real_part_row, img_part_row)])
    return complex_output