-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpreprocess_labels.py
More file actions
48 lines (38 loc) · 1.24 KB
/
preprocess_labels.py
File metadata and controls
48 lines (38 loc) · 1.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import sys
import os
import scipy
import numpy as np
from skimage import io
from scipy.ndimage.filters import gaussian_filter
from scipy.misc import toimage
TRAIN = 'train/'
VALIDATE = 'valid/'
LABELS = 'labels.txt'
DATASET = VALIDATE
try:
if sys.argv[1] == 'TRAIN':
print "Preprocessing labels for training data"
DATASET = TRAIN
elif sys.argv[1] == 'VALID':
print "Preprocessing labels for validation data"
else:
print "Invalid argument .. quitting"
sys.exit()
except:
DATASET = VALIDATE
labels = []
#This code opens the directory dataset(train or validate) and opens the file labels.txt in it.
with open(DATASET + LABELS) as f:
for line in f:
labels.append(int(line[:-1]))
#list label is first converted into set to remove all duplicates then again recasted into the list
#final list contains the labels with all elements distinct
distinct_labels = list(set(labels))
distinct_labels.sort()
preprocessed_labels = []
for label in labels:
curr = [0] * len(distinct_labels)
curr[label] = 1
preprocessed_labels.append(curr)
#this creates a file named test__preprocessed_labels.npy OR valid__preprocessed_labels.npy
np.save(DATASET[:-1] + '_preprocessed_labels', preprocessed_labels)