MTANN/datasets.py at main · danielnflam/MTANN · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os, sys, time, datetime, pathlib, random, math
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms as tvtransforms
from skimage import io, transform

# HELPER FUNCTION
def _check_if_array_3D(source_image, boneless_image=None):
    # Check if array is 3D or 2D
    iters = 0
    img_list = [source_image, boneless_image]
    for image in img_list:
        if image is not None:
            if image.ndim == 3:
                # make the image grayscale
                image = image[:,:,0]
            iters+=1
            if iters == 1:
                source_image = image
            if iters == 2:
                boneless_image = image

    if boneless_image is None:
        return source_image
    else:
        return source_image, boneless_image

###########################
# JSRT CXR dataset
# Shiraishi J, Katsuragawa S, Ikezoe J, Matsumoto T, Kobayashi T, Komatsu K, Matsui M, Fujita H, Kodera Y, and Doi K.: Development of a digital image database for chest radiographs with and without a lung nodule: Receiver operating characteristic analysis of radiologists’ detection of pulmonary nodules. AJR 174; 71-74, 2000
###########################
class JSRT_CXR(Dataset):
    def __init__(self, data_normal, data_BSE, transform):
        """
        Inputs:
            data_normal: root directory holding the normal / non-suppressed images
            data_BSE: root directory holding the bone-suppressed images
            transform: (optional) a torchvision.transforms.Compose series of transformations
        Assumed that files corresponding to the same patient have the same name in both folders data_normal and data_BSE.
        """
        if data_BSE is None:
            sample = {"Patient": [], "source":[]}
        else:
            sample = {"Patient": [], "boneless":[], "source":[]}

        for root, dirs, files in os.walk(data_normal):
            for name in files:
                if '.png' in name:
                    a_filepath = os.path.join(root, name)
                    # Patient code
                    head, tail = os.path.split(a_filepath)
                    patient_code_file = os.path.splitext(tail)[0]
                    # Place into lists
                    sample["Patient"].append(patient_code_file)
                    sample["source"].append(a_filepath)

                    # For each patient code, search the alternate data_folder to obtain the corresponding source
                    if data_BSE is not None:
                        for root2, dirs2, files2 in os.walk(data_BSE):
                            for name2 in files2:
                                # Need regex to distinguish between e.g. 0_1 and 0_10
                                filename2,_ = os.path.splitext(name2)
                                if patient_code_file == filename2:
                                    sample["boneless"].append(os.path.join(root2, name2))

        self.data = pd.DataFrame(sample)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        """Describe the reading of images in here"""
        if torch.is_tensor(idx):
            idx = idx.tolist() # transform into python list

        patient_code = self.data["Patient"].iloc[idx]
        source_image = plt.imread(self.data["source"].iloc[idx])
        if "boneless" in self.data.keys():
            boneless_image = plt.imread(self.data["boneless"].iloc[idx])
            source_image, boneless_image = _check_if_array_3D(source_image, boneless_image)
            sample = {'source': source_image, 'boneless': boneless_image} #'patientCode': patient_code
        else:
            source_image = _check_if_array_3D(source_image, None)
            sample = {'source': source_image} #'patientCode': patient_code

        if self.transform:
            sample = self.transform(sample)

        return sample

    def visualise(self, idx):
        bonelessIm = plt.imread(self.data["boneless"].iloc[idx])
        sourceIm = plt.imread(self.data["source"].iloc[idx])
        sourceIm, bonelessIm = _check_if_array_3D( sourceIm, bonelessIm)

        # Visualisation
        fig, ax=plt.subplots(1,2)
        ax[0].imshow(sourceIm, cmap="gray")
        ax[1].imshow(bonelessIm, cmap="gray")


class POLYU_COVID19_CXR_CT_Cohort1(Dataset):
    def __init__(self, data_normal, transform):
        """
        Inputs:
            data_normal: root directory holding the normal / non-suppressed images
            transform: (optional) a torchvision.transforms.Compose series of transformations
        Assumed that files corresponding to the same patient have the same name in both folders data_normal and data_BSE.
        """
        sample = {"Patient": [], "source":[]}
        for root, dirs, files in os.walk(data_normal):
            for name in files:
                if '.png' in name:
                    a_filepath = os.path.join(root, name)
                    # Patient code
                    head, tail = os.path.split(a_filepath)
                    patient_code_file = os.path.splitext(tail)[0]
                    # Place into lists
                    sample["Patient"].append(patient_code_file)
                    sample["source"].append(a_filepath)

        self.data = pd.DataFrame(sample)
        self.transform = transform
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        """Describe the reading of images in here"""
        if torch.is_tensor(idx):
            idx = idx.tolist() # transform into python list

        patient_code = self.data["Patient"].iloc[idx]
        source_image = plt.imread(self.data["source"].iloc[idx])
        source_image = _check_if_array_3D(source_image)

        sample = {'source': source_image} #'patientCode': patient_code

        if self.transform:
            sample = self.transform(sample)

        return sample