-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdatasets.py
More file actions
146 lines (124 loc) · 5.98 KB
/
datasets.py
File metadata and controls
146 lines (124 loc) · 5.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os, sys, time, datetime, pathlib, random, math
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms as tvtransforms
from skimage import io, transform
# HELPER FUNCTION
def _check_if_array_3D(source_image, boneless_image=None):
# Check if array is 3D or 2D
iters = 0
img_list = [source_image, boneless_image]
for image in img_list:
if image is not None:
if image.ndim == 3:
# make the image grayscale
image = image[:,:,0]
iters+=1
if iters == 1:
source_image = image
if iters == 2:
boneless_image = image
if boneless_image is None:
return source_image
else:
return source_image, boneless_image
###########################
# JSRT CXR dataset
# Shiraishi J, Katsuragawa S, Ikezoe J, Matsumoto T, Kobayashi T, Komatsu K, Matsui M, Fujita H, Kodera Y, and Doi K.: Development of a digital image database for chest radiographs with and without a lung nodule: Receiver operating characteristic analysis of radiologists’ detection of pulmonary nodules. AJR 174; 71-74, 2000
###########################
class JSRT_CXR(Dataset):
def __init__(self, data_normal, data_BSE, transform):
"""
Inputs:
data_normal: root directory holding the normal / non-suppressed images
data_BSE: root directory holding the bone-suppressed images
transform: (optional) a torchvision.transforms.Compose series of transformations
Assumed that files corresponding to the same patient have the same name in both folders data_normal and data_BSE.
"""
if data_BSE is None:
sample = {"Patient": [], "source":[]}
else:
sample = {"Patient": [], "boneless":[], "source":[]}
for root, dirs, files in os.walk(data_normal):
for name in files:
if '.png' in name:
a_filepath = os.path.join(root, name)
# Patient code
head, tail = os.path.split(a_filepath)
patient_code_file = os.path.splitext(tail)[0]
# Place into lists
sample["Patient"].append(patient_code_file)
sample["source"].append(a_filepath)
# For each patient code, search the alternate data_folder to obtain the corresponding source
if data_BSE is not None:
for root2, dirs2, files2 in os.walk(data_BSE):
for name2 in files2:
# Need regex to distinguish between e.g. 0_1 and 0_10
filename2,_ = os.path.splitext(name2)
if patient_code_file == filename2:
sample["boneless"].append(os.path.join(root2, name2))
self.data = pd.DataFrame(sample)
self.transform = transform
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
"""Describe the reading of images in here"""
if torch.is_tensor(idx):
idx = idx.tolist() # transform into python list
patient_code = self.data["Patient"].iloc[idx]
source_image = plt.imread(self.data["source"].iloc[idx])
if "boneless" in self.data.keys():
boneless_image = plt.imread(self.data["boneless"].iloc[idx])
source_image, boneless_image = _check_if_array_3D(source_image, boneless_image)
sample = {'source': source_image, 'boneless': boneless_image} #'patientCode': patient_code
else:
source_image = _check_if_array_3D(source_image, None)
sample = {'source': source_image} #'patientCode': patient_code
if self.transform:
sample = self.transform(sample)
return sample
def visualise(self, idx):
bonelessIm = plt.imread(self.data["boneless"].iloc[idx])
sourceIm = plt.imread(self.data["source"].iloc[idx])
sourceIm, bonelessIm = _check_if_array_3D( sourceIm, bonelessIm)
# Visualisation
fig, ax=plt.subplots(1,2)
ax[0].imshow(sourceIm, cmap="gray")
ax[1].imshow(bonelessIm, cmap="gray")
class POLYU_COVID19_CXR_CT_Cohort1(Dataset):
def __init__(self, data_normal, transform):
"""
Inputs:
data_normal: root directory holding the normal / non-suppressed images
transform: (optional) a torchvision.transforms.Compose series of transformations
Assumed that files corresponding to the same patient have the same name in both folders data_normal and data_BSE.
"""
sample = {"Patient": [], "source":[]}
for root, dirs, files in os.walk(data_normal):
for name in files:
if '.png' in name:
a_filepath = os.path.join(root, name)
# Patient code
head, tail = os.path.split(a_filepath)
patient_code_file = os.path.splitext(tail)[0]
# Place into lists
sample["Patient"].append(patient_code_file)
sample["source"].append(a_filepath)
self.data = pd.DataFrame(sample)
self.transform = transform
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
"""Describe the reading of images in here"""
if torch.is_tensor(idx):
idx = idx.tolist() # transform into python list
patient_code = self.data["Patient"].iloc[idx]
source_image = plt.imread(self.data["source"].iloc[idx])
source_image = _check_if_array_3D(source_image)
sample = {'source': source_image} #'patientCode': patient_code
if self.transform:
sample = self.transform(sample)
return sample