forked from ardamavi/Vocalize-Sign-Language
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathget_dataset.py
More file actions
61 lines (56 loc) · 2.13 KB
/
get_dataset.py
File metadata and controls
61 lines (56 loc) · 2.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# Arda Mavi
import os
import numpy as np
from os import listdir
from scipy.misc import imread, imresize
from keras.utils import to_categorical
from database_process import create_table, add_data
from sklearn.model_selection import train_test_split
# Settings:
img_size = 64
channel_size = 1
num_class = 10
test_size = 0.2
def get_img(data_path):
# Getting image array from path:
img = imread(data_path, flatten = True if channel_size == 1 else False)
img = imresize(img, (img_size, img_size, channel_size))
return img
def get_dataset(dataset_path='Data/Train_Data'):
# Getting all data from data path:
try:
X = np.load('Data/npy_dataset/X.npy')
Y = np.load('Data/npy_dataset/Y.npy')
except:
if os.path.exists('Data/DataBase/database.sqlite'):
os.remove('Data/DataBase/database.sqlite')
# Create database:
create_table('id_char','id, char')
labels = listdir(dataset_path) # Geting labels
X = []
Y = []
count_categori = [-1,''] # For encode labels
for i, label in enumerate(labels):
datas_path = dataset_path+'/'+label
for data in listdir(datas_path):
img = get_img(datas_path+'/'+data)
X.append(img)
# For encode labels:
if label != count_categori[1]:
count_categori[0] += 1
count_categori[1] = label
add_data('id_char', "{0}, '{1}'".format(count_categori[0], count_categori[1]))
Y.append(count_categori[0])
# Create dateset:
X = 1-np.array(X).astype('float32')/255.
X = X.reshape(X.shape[0], img_size, img_size, channel_size)
Y = np.array(Y).astype('float32')
Y = to_categorical(Y, len(labels))
if not os.path.exists('Data/npy_dataset/'):
os.makedirs('Data/npy_dataset/')
np.save('Data/npy_dataset/X.npy', X)
np.save('Data/npy_dataset/Y.npy', Y)
X, X_test, Y, Y_test = train_test_split(X, Y, test_size=test_size, random_state=42)
return X, X_test, Y, Y_test
if __name__ == '__main__':
get_dataset()