GAN-Tests/dataloaders.py at main · danielnflam/GAN-Tests · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
import torch
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
import os, sys, time, datetime
import pydicom.fileset
import matplotlib.pyplot as plt

###########################
# JSRT CXR dataset
# Shiraishi J, Katsuragawa S, Ikezoe J, Matsumoto T, Kobayashi T, Komatsu K, Matsui M, Fujita H, Kodera Y, and Doi K.: Development of a digital image database for chest radiographs with and without a lung nodule: Receiver operating characteristic analysis of radiologists’ detection of pulmonary nodules. AJR 174; 71-74, 2000
###########################
class JSRT_CXR(Dataset):
    def __init__(self, data_normal, data_BSE, transform):
        """
        JSRT Chest X-ray dataset.
        Source:
        Shiraishi J, Katsuragawa S, Ikezoe J, Matsumoto T, Kobayashi T, Komatsu K, Matsui M, Fujita H, Kodera Y, and Doi K.
        Development of a digital image database for chest radiographs with and without a lung nodule: Receiver operating characteristic analysis of radiologists’ detection of pulmonary nodules.
        AJR 174; 71-74, 2000

        Inputs:
            data_normal: root directory holding the normal / non-suppressed images
            data_BSE: root directory holding the bone-suppressed images
            transform: (optional) a torchvision.transforms.Compose series of transformations
        Assumed that files corresponding to the same patient have the same name in both folders data_normal and data_BSE.
        """
        sample = {"Patient": [], "boneless":[], "source":[]}
        for root, dirs, files in os.walk(data_BSE):
            for name in files:
                if '.png' in name:
                    a_filepath = os.path.join(root, name)
                    # Patient code
                    head, tail = os.path.split(a_filepath)
                    patient_code_file = os.path.splitext(tail)[0]
                    # Place into lists
                    sample["Patient"].append(patient_code_file)
                    sample["boneless"].append(a_filepath)

                    # For each patient code, search the alternate data_folder to obtain the corresponding source
                    for root2, dirs2, files2 in os.walk(data_normal):
                        for name2 in files2:
                            # Need regex to distinguish between e.g. 0_1 and 0_10
                            filename2,_ = os.path.splitext(name2)
                            if patient_code_file == filename2:
                                sample["source"].append(os.path.join(root2, name2))

        self.data = pd.DataFrame(sample)

        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        """Describe the reading of images in here"""
        if torch.is_tensor(idx):
            idx = idx.tolist() # transform into python list

        patient_code = self.data["Patient"].iloc[idx]
        source_image = plt.imread(self.data["source"].iloc[idx])
        boneless_image = plt.imread(self.data["boneless"].iloc[idx])
        source_image, boneless_image = self._check_if_array_3D(source_image, boneless_image)

        sample = {'source': source_image, 'boneless': boneless_image} #'patientCode': patient_code

        if self.transform:
            sample = self.transform(sample)
        return sample

    def visualise(self, idx):
        bonelessIm = plt.imread(self.data["boneless"].iloc[idx])
        sourceIm = plt.imread(self.data["source"].iloc[idx])
        sourceIm, bonelessIm = self._check_if_array_3D( sourceIm, bonelessIm)

        # Visualisation
        fig, ax=plt.subplots(1,2)
        ax[0].imshow(sourceIm, cmap="gray")
        ax[1].imshow(bonelessIm, cmap="gray")

    # Helper function
    def _check_if_array_3D(self, source_image, boneless_image):
        # Check if array is 3D or 2D
        iters = 0
        for image in [source_image, boneless_image]:
            if image.ndim == 3:
                # make the image grayscale
                image = image[:,:,0]
            iters+=1
            if iters == 1:
                source_image = image
            if iters == 2:
                boneless_image = image
        return source_image, boneless_image


#####################################
# CT Datasets
#####################################
class Dataset_CTCovid19August2020(Dataset):
    """
    TCIA Covid-19 dataset.

    Data citation:
    An P, Xu S, Harmon SA, Turkbey EB, Sanford TH, Amalou A, Kassin M, Varble N, Blain M, Anderson V, Patella F, Carrafiello G, Turkbey BT, Wood BJ (2020). CT Images in Covid-19 [Data set].
    The Cancer Imaging Archive. DOI: https://doi.org/10.7937/tcia.2020.gqry-nc81

    https://wiki.cancerimagingarchive.net/display/Public/CT+Images+in+COVID-19#70227107171ba531fc374829b21d3647e95f532c

    TCIA citation:
    Clark K, Vendt B, Smith K, Freymann J, Kirby J, Koppel P, Moore S, Phillips S, Maffitt D, Pringle M, Tarbox L, Prior F.
    The Cancer Imaging Archive (TCIA): Maintaining and Operating a Public Information Repository,
    Journal of Digital Imaging, Volume 26, Number 6, December, 2013, pp 1045-1057. DOI: 10.1007/s10278-013-9622-7.
    """
    def __init__(self, csv_file, root_dir, transform=None):
        """
        Inputs:
            csv_file: Path to CSV file containing patient image names
            root_dir: Directory with all the images.
            transform: Optional transform to be applied
                on a sample.
        Outputs:
            sample (dict) with keys:
                'source': (ndarray) the original DRR generated from the CT image
                'boneless': (ndarray) the DRR generated with bone intensities = 0
                'lung': (ndarray) the DRR generated with the lung segment only
                'PixelSize': (tuple) the pixel dimensions in (rows, columns) in milimetres
        """

        self.root_dir = root_dir
        self.images_dataframe = pd.read_csv(os.path.join(csv_file))
        self.transform = transform

    def __len__(self):
        return len(self.images_dataframe)

    def __getitem__(self, idx):
        """Describe the reading of images in here"""
        if torch.is_tensor(idx):
            idx = idx.tolist() # transform into python list

        # Integer indexing
        source_name = os.path.join(self.root_dir, self.images_dataframe.iloc[idx, 1])
        boneless_name = os.path.join(self.root_dir, self.images_dataframe.iloc[idx, 2])
        lung_name = os.path.join(self.root_dir, self.images_dataframe.iloc[idx, 3])

        source = np.load(source_name)
        boneless = np.load(boneless_name)
        lung = np.load(lung_name)

        sample = {'source': source, 'lung': lung, 'boneless': boneless, 'PixelSize': (self.images_dataframe.iloc[idx,4],self.images_dataframe.iloc[idx,5])}

        if self.transform:
            sample = self.transform(sample)

        return sample

class Dataset_PolyU_CXR(Dataset):
    """
    CXR data from PolyU -- this is used as conditioning data for the generator & discriminator.
    Generate a dataframe in memory to hold the details, but NOT the images.
    """
    def __init__(self, root_dir, transform=None):
        """
        Construct the dataframe here.
        """

        path_to_file = []
        for root, dirs, files in os.walk(root_dir):
            for name in files:
                if name=="DICOMDIR":
                    path_to_file.append(os.path.join(root, name))

        # Lists to create a dataframe table
        PatientID = []
        StudyDate = []
        StudyTime = []
        Modality = []
        SOPClassUID = []
        SOPInstanceUID = []
        # Select a CXR directory
        for path_idx in range(len(path_to_file)):
            a_path = path_to_file[path_idx]
            fs = pydicom.fileset.FileSet(a_path)

            # Select an instance inside this directory
            # Check whether it is a CR / DX image
            for instance in fs:
                if "1.2.840.10008.5.1.4.1.1.1" in instance.SOPClassUID:  # check if CR or DX image
                    PatientID.append(instance.PatientID)
                    StudyDate.append(instance.StudyDate)
                    StudyTime.append(instance.StudyTime)
                    Modality.append(instance.Modality)
                    SOPInstanceUID.append(instance.SOPInstanceUID)
                    SOPClassUID.append(instance.SOPClassUID)

        # Make dataframe
        data = {
            'PatientID': PatientID,
            'StudyDate': StudyDate,
            'StudyTime': StudyTime,
            'Modality': Modality,
            'SOPClassUID': SOPClassUID,
            'SOPInstanceUID': SOPInstanceUID
        }
        df = pd.DataFrame(data)
        self.images_dataframe=df
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.images_dataframe)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist() # transform into python list

        # Take a look at the CXR dataframe
        PatientID = self.images_dataframe.iloc[idx,0]
        path_to_DICOMDIR = os.path.join(self.root_dir , PatientID , "DICOMDIR")
        fs = pydicom.fileset.FileSet(path_to_DICOMDIR)
        for instance in fs:
            if instance.SOPInstanceUID == self.images_dataframe.iloc[idx, 5]:
                ds = instance.load()
                PixelSpacing = ds.ImagerPixelSpacing
                image = ds.pixel_array

        sample = {'source': image, 'PixelSize': PixelSpacing}
        if self.transform:
            sample = self.transform(sample)
        return sample