-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata.py
More file actions
87 lines (75 loc) · 3.53 KB
/
data.py
File metadata and controls
87 lines (75 loc) · 3.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#
# This is a sample Notebook to demonstrate how to read "MNIST Dataset"
#
import numpy as np # linear algebra
import struct
from array import array
from os.path import join
import matplotlib.pyplot as plt
#
# MNIST Data Loader Class
#
class MnistDataloader(object):
def __init__(self, training_images_filepath,training_labels_filepath,
test_images_filepath, test_labels_filepath):
self.training_images_filepath = training_images_filepath
self.training_labels_filepath = training_labels_filepath
self.test_images_filepath = test_images_filepath
self.test_labels_filepath = test_labels_filepath
def read_images_labels(self, images_filepath, labels_filepath):
labels = []
with open(labels_filepath, 'rb') as file:
magic, size = struct.unpack(">II", file.read(8))
if magic != 2049:
raise ValueError('Magic number mismatch, expected 2049, got {}'.format(magic))
labels = array("B", file.read())
with open(images_filepath, 'rb') as file:
magic, size, rows, cols = struct.unpack(">IIII", file.read(16))
if magic != 2051:
raise ValueError('Magic number mismatch, expected 2051, got {}'.format(magic))
image_data = array("B", file.read())
images = []
for i in range(size):
images.append([0] * rows * cols)
for i in range(size):
img = np.array(image_data[i * rows * cols:(i + 1) * rows * cols])
img = img.reshape(28, 28)
images[i][:] = img
return images, labels
def load_data(self):
x_train, y_train = self.read_images_labels(self.training_images_filepath, self.training_labels_filepath)
x_test, y_test = self.read_images_labels(self.test_images_filepath, self.test_labels_filepath)
return (x_train, y_train),(x_test, y_test)
# File paths
input_path = './input'
training_images_filepath = join(input_path, 'train-images-idx3-ubyte/train-images-idx3-ubyte')
training_labels_filepath = join(input_path, 'train-labels-idx1-ubyte/train-labels-idx1-ubyte')
test_images_filepath = join(input_path, 't10k-images-idx3-ubyte/t10k-images-idx3-ubyte')
test_labels_filepath = join(input_path, 't10k-labels-idx1-ubyte/t10k-labels-idx1-ubyte')
# Helper function to display images
def show_images(images, title_texts):
cols = 5
rows = int(len(images) / cols) + 1
plt.figure(figsize=(30, 20))
index = 1
for img, title in zip(images, title_texts):
plt.subplot(rows, cols, index)
plt.imshow(img, cmap='gray')
plt.title(title, fontsize=15)
index += 1
plt.show()
# Load MNIST dataset using the external MnistDataloader class
mnist_dataloader = MnistDataloader(training_images_filepath, training_labels_filepath, test_images_filepath, test_labels_filepath)
#print(x_train.shape, x_test.shape)
# # Visualize random samples
# images_to_show = []
# titles_to_show = []
# for _ in range(10): # Random training images
# idx = random.randint(0, x_train.shape[1] - 1)
# images_to_show.append(x_train[:, idx].reshape(28, 28)) # Reshape back to image
# titles_to_show.append(f"Train: {np.argmax(y_train[:, idx])}")
# for _ in range(5): # Random test images
# idx = random.randint(0, x_test.shape[1] - 1)
# images_to_show.append(x_test[:, idx].reshape(28, 28)) # Reshape back to image
# titles_to_show.append(f"Test: {np.argmax(y_test[:, idx])}")
# show_images(images_to_show, titles_to_show)