-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathdata_postprocess.py
More file actions
220 lines (184 loc) · 7.91 KB
/
data_postprocess.py
File metadata and controls
220 lines (184 loc) · 7.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
"""
This file contains functions and code used to revert the
preprocessing pipeline. This is mainly used to visualize the
dataset after running through DataPreProcessor to make sure
any transformations made does not alter the dataset content
in an unintended way.
"""
import torch
import random
import cv2
import os
import numpy as np
from torchvision import transforms
from grasp_utils import grasps_to_bboxes
from parameters import Params
params = Params()
def grasps_to_bboxes(grasps):
"""Converts grasp boxes to bounding boxes."""
# convert grasp representation to bbox
x = grasps[:,0] * 1024
y = grasps[:,1] * 1024
theta = torch.deg2rad(grasps[:,2] * 180 - 90)
w = grasps[:,3] * 1024
h = grasps[:,4] * 100
x1 = x -w/2*torch.cos(theta) +h/2*torch.sin(theta)
y1 = y -w/2*torch.sin(theta) -h/2*torch.cos(theta)
x2 = x +w/2*torch.cos(theta) +h/2*torch.sin(theta)
y2 = y +w/2*torch.sin(theta) -h/2*torch.cos(theta)
x3 = x +w/2*torch.cos(theta) -h/2*torch.sin(theta)
y3 = y +w/2*torch.sin(theta) +h/2*torch.cos(theta)
x4 = x -w/2*torch.cos(theta) -h/2*torch.sin(theta)
y4 = y -w/2*torch.sin(theta) +h/2*torch.cos(theta)
bboxes = torch.stack((x1, y1, x2, y2, x3, y3, x4, y4), 1)
return bboxes
def process(rgb, d):
"""
Returns rgbd image with correct format for inputing to model:
- Imagenet normalization
- Concat depth channel to image
"""
transformation_rgb = transforms.Compose([
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
rgb = rgb / 255.0
rgb = torch.moveaxis(rgb, -1, 0)
rgb = transformation_rgb(rgb)
if d is None:
img = rgb
elif params.NUM_CHANNEL == 3:
# Input channels -- (gray, gray, depth)
#rgb = transforms.Grayscale(num_output_channels=1)(rgb)
#rgb = torch.cat((rgb, rgb), axis=0)
# Input channels -- (red, green, depth)
d = torch.unsqueeze(d, 2)
d = d - torch.mean(d)
d = torch.clip(d, -1, 1)
d = torch.moveaxis(d, -1, 0)
img = torch.cat((rgb[:2], d), axis=0)
else:
# Input channels -- (red, green, blue, depth)
d = torch.unsqueeze(d, 2)
d = d - torch.mean(d)
d = torch.clip(d, -1, 1)
d = torch.moveaxis(d, -1, 0)
img = torch.cat((rgb, d), axis=0)
img = torch.unsqueeze(img, 0)
img = img.to(params.DEVICE)
return img
class DataPostProcessor:
def __init__(self):
pass
def map2grasp(self, map):
grasp_candidates = set()
for i in range(len(map)):
for j in range(len(map[i])):
if map[i][j][5] == 0:
continue
grasp_x = (j + map[i][j][0]) / params.OUTPUT_SIZE
grasp_y = (i + map[i][j][1]) / params.OUTPUT_SIZE
grasp_theta = map[i][j][2]
grasp_w = map[i][j][3] / params.OUTPUT_SIZE
grasp_h = map[i][j][4]
grasp = (grasp_x, grasp_y, grasp_theta, grasp_w, grasp_h)
grasp_candidates.add(grasp)
grasp_candidates = torch.tensor(list(grasp_candidates))
return grasp_candidates
def grasp2bbox(self, candidates):
bboxes = grasps_to_bboxes(candidates)
return bboxes
def visualize_grasp(self, img, candidates):
target_bboxes = grasps_to_bboxes(candidates)
img_vis = np.array(img.cpu())
img_r = np.clip((img_vis[:, 0, :, :] * 0.229 + 0.485) * 255, 0, 255)
img_g = np.clip((img_vis[:, 1, :, :] * 0.224 + 0.456) * 255, 0, 255)
img_b = np.clip((img_vis[:, 2, :, :] * 0.225 + 0.406) * 255, 0, 255)
img_bgr = np.concatenate((img_b, img_g, img_r), axis=0)
img_bgr = np.moveaxis(img_bgr, 0, -1)
img_bgr = np.ascontiguousarray(img_bgr, dtype=np.uint8)
for bbox in target_bboxes:
# Choose some 20% random bboxes to show:
#if random.randint(0, 5) == 0:
self.draw_bbox(img_bgr, bbox, (0, 255, 0))
cv2.imshow('img', img_bgr)
cv2.waitKey(0)
def draw_bbox(self, img, bbox, color):
"""Draw grasp boxes with the grasp-plate edges as RED and the
other two edges as <color>."""
x1 = int(bbox[0] / 1024 * params.OUTPUT_SIZE)
y1 = int(bbox[1] / 1024 * params.OUTPUT_SIZE)
x2 = int(bbox[2] / 1024 * params.OUTPUT_SIZE)
y2 = int(bbox[3] / 1024 * params.OUTPUT_SIZE)
x3 = int(bbox[4] / 1024 * params.OUTPUT_SIZE)
y3 = int(bbox[5] / 1024 * params.OUTPUT_SIZE)
x4 = int(bbox[6] / 1024 * params.OUTPUT_SIZE)
y4 = int(bbox[7] / 1024 * params.OUTPUT_SIZE)
cv2.line(img, (x1, y1), (x2, y2), color, 1)
cv2.line(img, (x2, y2), (x3, y3), (0, 0, 255), 1)
cv2.line(img, (x3, y3), (x4, y4), color, 1)
cv2.line(img, (x4, y4), (x1, y1), (0, 0, 255), 1)
def load_grasp_label(file_path):
"""Returns a list of grasp labels from <file_path>."""
grasp_list = []
with open(file_path, 'r') as f:
file = f.readlines()
# dat format in each line: 'x;y;theta;w;h'
for grasp in file:
# remove '\n' from string
grasp = grasp[:-1]
label = grasp.split(';')
label = normalize_grasp(label)
grasp_list.append(label)
return grasp_list
def normalize_grasp(label):
"""Returns normalize grasping labels."""
norm_label = []
for i, value in enumerate(label):
if i == 4:
# Height
norm_label.append(float(value) / 100)
elif i == 2:
# Theta
norm_label.append((float(value) + 90) / 180)
elif i == 3:
# Width
norm_label.append(float(value) / 1024)
else:
# Coordinates
norm_label.append(float(value) / 1024)
return norm_label
def npy_grasp_normalize(labels):
for idx in [0, 1, 3]:
labels[:, idx] /= 224
if __name__ == '__main__':
path = 'data/top_5_compressed/train'
ori_path = 'data/top_5/train'
processor = DataPostProcessor()
for cls in os.listdir(path):
for img_id in os.listdir(os.path.join(path, cls)):
for img_id_with_var in os.listdir(os.path.join(path, cls, img_id))[10:]:
if not img_id_with_var.endswith('RGB.npy'):
continue
var = img_id_with_var[:2]
rgb_name = var + img_id + '_RGB.npy'
d_name = var + img_id + '_perfect_depth.npy'
mask_name = var + img_id + '_mask.npy'
map_name = var + img_id + '_0_map_grasps.npy'
grasp_list_name = var + img_id + '_0_txt_grasps.npy'
rgb = np.load(open(os.path.join(path, cls, img_id, rgb_name), 'rb'))
d = np.load(open(os.path.join(path, cls, img_id, d_name), 'rb'))
mask = np.load(open(os.path.join(path, cls, img_id, mask_name), 'rb'))
map = np.load(open(os.path.join(path, cls, img_id, map_name), 'rb'))
mask = torch.unsqueeze(torch.unsqueeze(torch.tensor(mask), 0), 0)
rotated_mask = transforms.functional.rotate(torch.tensor(mask), 90)
true_grasps = np.load(open(os.path.join(path, cls, img_id, grasp_list_name), 'rb'))
true_grasps = torch.tensor(true_grasps)
npy_grasp_normalize(true_grasps)
print(true_grasps)
input_img = process(torch.tensor(rgb), torch.tensor(d))
print(rotated_mask.shape)
input_img = input_img * rotated_mask.to('cuda')
grasps = processor.map2grasp(map)
processor.visualize_grasp(input_img, grasps)
processor.visualize_grasp(input_img, true_grasps)