-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbatch_preprocess.py
More file actions
184 lines (147 loc) · 8.18 KB
/
batch_preprocess.py
File metadata and controls
184 lines (147 loc) · 8.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
import numpy as np
import os
import pickle
try:
import SimpleITK as sitk
except ImportError:
print("You need to have SimpleITK installed to run this example!")
raise ImportError("SimpleITK not found")
from multiprocessing import Pool
def load_pickle(file, mode='rb'):
with open(file, mode) as f:
a = pickle.load(f)
return a
def save_pickle(obj, file, mode='wb'):
with open(file, mode) as f:
pickle.dump(obj, f)
def get_list_of_files(base_dir):
"""
returns a list of lists containing the filenames. The outer list contains all training examples. Each entry in the
outer list is again a list pointing to the files of that training example in the following order:
T1, T1c, T2, FLAIR, segmentation
:param base_dir:
:return:
"""
list_of_lists = []
for case in os.listdir(base_dir):
image_file = os.path.os.path.join(base_dir,case,case + ".nii.gz")
seg_file = os.path.os.path.join(base_dir, case,case + "_seg.nii.gz")
this_case = [image_file ,seg_file]
assert all((os.path.isfile(i) for i in this_case)), "some file is missing for patient %s; make sure the following " \
"files are there: %s" % (p, str(this_case))
list_of_lists.append(this_case)
print("Found %d patients" % len(list_of_lists))
return list_of_lists
def load_and_preprocess(case, patient_name, output_folder):
"""
loads, preprocesses and saves a case
This is what happens here:
1) load all images and stack them to a 4d array
2) crop to nonzero region, this removes unnecessary zero-valued regions and reduces computation time
3) normalize the nonzero region with its mean and standard deviation
4) save 4d tensor as numpy array. Also save metadata required to create niftis again (required for export
of predictions)
:param case:
:param patient_name:
:return:
"""
# load SimpleITK Images
imgs_sitk = [sitk.ReadImage(i) for i in case]
# get pixel arrays from SimpleITK images
imgs_npy = [sitk.GetArrayFromImage(i) for i in imgs_sitk]
# get some metadata
spacing = imgs_sitk[0].GetSpacing()
# the spacing returned by SimpleITK is in inverse order relative to the numpy array we receive. If we wanted to
# resample the data and if the spacing was not isotropic (in BraTS all cases have already been resampled to 1x1x1mm
# by the organizers) then we need to pay attention here. Therefore we bring the spacing into the correct order so
# that spacing[0] actually corresponds to the spacing of the first axis of the numpy array
spacing = np.array(spacing)[::-1]
direction = imgs_sitk[0].GetDirection()
origin = imgs_sitk[0].GetOrigin()
original_shape = imgs_npy[0].shape
# now stack the images into one 4d array, cast to float because we will get rounding problems if we don't
# imgs_npy = np.concatenate([i[None] for i in imgs_npy]).astype(np.float32)
# now find the nonzero region and crop to that
nonzero = [np.array(np.where(i != 0)) for i in imgs_npy]
nonzero = [[np.min(i, 1), np.max(i, 1)] for i in nonzero]
nonzero = np.array([np.min([i[0] for i in nonzero], 0), np.max([i[1] for i in nonzero], 0)]).T
# nonzero now has shape 3, 2. It contains the (min, max) coordinate of nonzero voxels for each axis
#$$$ I have commented these out because i could not understand this part, this whole non zero part i didnt understand
#$$$ let me know what i have to do here
# now crop to nonzero
# imgs_npy = imgs_npy[:,
# nonzero[0, 0] : nonzero[0, 1] + 1,
# nonzero[1, 0]: nonzero[1, 1] + 1,
# nonzero[2, 0]: nonzero[2, 1] + 1,
# ]
# now we create a brain mask that we use for normalization
# nonzero_masks = [i != 0 for i in imgs_npy[:-1]]
# brain_mask = np.zeros(imgs_npy.shape[1:], dtype=bool)
# for i in range(len(nonzero_masks)):
# brain_mask = brain_mask | nonzero_masks[i]
# now normalize each modality with its mean and standard deviation (computed within the brain mask)
# for i in range(len(imgs_npy) - 1):
# mean = imgs_npy[i][brain_mask].mean()
# std = imgs_npy[i][brain_mask].std()
# imgs_npy[i] = (imgs_npy[i] - mean) / (std + 1e-8)
# imgs_npy[i][brain_mask == 0] = 0
# the segmentation of brats has the values 0, 1, 2 and 4. This is pretty inconvenient to say the least.
# We move everything that is 4 to 3
# imgs_npy[-1][imgs_npy[-1] == 4] = 3
# now save as npz
np.save(os.path.join(output_folder, patient_name + ".npy"), imgs_npy)
metadata = {
'spacing': spacing,
'direction': direction,
'origin': origin,
'original_shape': original_shape,
'nonzero_region': nonzero
}
save_pickle(metadata, os.path.join(output_folder, patient_name + ".pkl"))
#$$$ There is no problem here, but where do i use this function ?
def save_segmentation_as_nifti(segmentation, metadata, output_file):
original_shape = metadata['original_shape']
seg_original_shape = np.zeros(original_shape, dtype=np.uint8)
nonzero = metadata['nonzero_region']
seg_original_shape[nonzero[0, 0] : nonzero[0, 1] + 1,
nonzero[1, 0]: nonzero[1, 1] + 1,
nonzero[2, 0]: nonzero[2, 1] + 1] = segmentation
sitk_image = sitk.GetImageFromArray(seg_original_shape)
sitk_image.SetDirection(metadata['direction'])
sitk_image.SetOrigin(metadata['origin'])
# remember to revert spacing back to sitk order again
sitk_image.SetSpacing(tuple(metadata['spacing'][[2, 1, 0]]))
sitk.WriteImage(sitk_image, output_file)
if __name__ == "__main__":
# This is the same preprocessing I used for our contributions to the BraTS 2017 and 2018 challenges.
# Preprocessing is described in the documentation of load_and_preprocess
# The training data is identical between BraTS 2017 and 2018. You can request access here:
# https://ipp.cbica.upenn.edu/#BraTS18_registration
# brats_base points to where the extracted downloaded training data is
# preprocessed data is saved as npy. This may seem odd if you are familiar with medical images, but trust me it's
# the best way to do this for deep learning. It does not make much of a difference for BraTS, but if you are
# dealing with larger images this is crusial for your pipelines to not get stuck in CPU bottleneck. What we can do
# with numpy arrays is we can load them via np.load(file, mmap_mode="r") and then read just parts of it on the fly
# during training. This is super important if your patch size is smaller than the size of the entire patient (for
# example if you work with large CT data or if you need 2D slices).
# For this to work properly the output_folder (or wherever the data is stored during training) must be on an SSD!
# HDDs are usually too slow and you also wouldn't want to do this over a network share (there are exceptions but
# take this as a rule of thumb)
# Why is this not an IPython Notebook you may ask? Because I HATE IPython Notebooks. Simple :-)
list_of_lists = get_list_of_files('../data/images/patients')
patient_names = [i[0].split("/")[-2] for i in list_of_lists]
p = Pool(processes=8)
brats_preprocessed_folder = '../data/images/preprocessed'
p.starmap(load_and_preprocess, zip(list_of_lists, patient_names, [brats_preprocessed_folder] * len(list_of_lists)))
p.close()
p.join()
# remember that we cropped the data before preprocessing. If we predict the test cases, we want to run the same
# preprocessing for them. We need to then put the segmentation back into its original position (due to cropping).
# Here is how you can do that:
# lets use Brats17_2013_0_1 for this example
# img = np.load(os.path.join(brats_preprocessed_folder, "Brats17_2013_0_1.npy"))
# metadata = load_pickle(os.path.join(brats_preprocessed_folder, "Brats17_2013_0_1.pkl"))
# # remember that we changed the segmentation labels from 0, 1, 2, 4 to 0, 1, 2, 3. We need to change that back to
# # get the correct format
# img[-1][img[-1] == 3] = 4
# save_segmentation_as_nifti(img[-1], metadata, os.path.join(brats_preprocessed_folder, "delete_me.nii.gz"))