ROS-label-node/ros_colour_node.py at master · SfTI-Robotics/ROS-label-node · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
#!/usr/bin/env python
import rospy
from sensor_msgs.msg import Image
from sensor_msgs.msg import CameraInfo
from std_msgs.msg import UInt8MultiArray

import ros_numpy
import numpy as np
import numpy as np
import time
import cv2
import pyrealsense2 as rs
import random
import math
import argparse

from sensor_msgs.msg import Image
from threading import Thread
from matplotlib import pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from mpl_toolkits.mplot3d import proj3d
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.patches import FancyArrowPatch
from sort import *

from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.utils.visualizer import GenericMask
from detectron2.utils.visualizer import ColorMode
from detectron2.structures import Boxes, RotatedBoxes

from detectron2.data import MetadataCatalog

import torch, torchvision


# Resolution of camera streams
RESOLUTION_X = 640
RESOLUTION_Y = 480

# Configuration for histogram for depth image
NUM_BINS = 500
MAX_RANGE = 10000

AXES_SIZE = 10


class VideoStreamer:
    """
    Video streamer that continuously is reading frames through subscribing to d435 images.
    Frames are then ready to read when program requires.
    """
    def __init__(self, pub, video_file=None):
        self._pub = pub
        self.colour_retrieved = False
        self.depth_retrieved = False
        self.intrin_retrieved = False

    def read(self):
        return (self.color_image, self.depth_image)

    def colour_callback(self, msg):
        if not self.colour_retrieved:
            data = ros_numpy.numpify(msg)
            self.color_image = data[:,:,::-1]
            self.colour_retrieved = True

    def depth_callback(self, msg):
        if not self.depth_retrieved:
            data = ros_numpy.numpify(msg)
            self.depth_image = data
            self.depth_retrieved = True

    def intrin_callback(self, cameraInfo):
        """
        D435 camera intrinsic values can be derived from CameraInfo ROS message.
        """
        if not self.intrin_retrieved:
            self.intrin = rs.intrinsics()
            self.intrin.width = cameraInfo.width
            self.intrin.height = cameraInfo.height
            self.intrin.ppx = cameraInfo.K[2]
            self.intrin.ppy = cameraInfo.K[5]
            self.intrin.fx = cameraInfo.K[0]
            self.intrin.fy = cameraInfo.K[4]
            #self.intrin.model = cameraInfo.distortion_model
            self.intrin.model  = rs.distortion.none
            self.intrin.coeffs = [i for i in cameraInfo.D]
            self.intrin_retrieved = True


    def set_not_retrieved(self):
        self.colour_retrieved = False
        self.depth_retrieved = False
        self.intrin_retrieved = False

    def publish(self, image):
        # Convert image array to Image from sensor_msgs
        img_list = ros_numpy.msgify(Image, image, encoding='rgb8')

        self._pub.publish(img_list)

class Predictor(DefaultPredictor):
    def __init__(self, pub):
        self._pub = pub
        self.config = self.setup_predictor_config()
        super().__init__(self.config)

    def create_outputs(self, color_image):
        self.outputs = self(color_image)

    def setup_predictor_config(self):
        """
        Setup config and return predictor. See config/defaults.py for more options
        """
        cfg = get_cfg()

        cfg.merge_from_file("/opt/ros/melodic/lib/octomap_server/scripts/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml")
        cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7
        # Mask R-CNN ResNet101 FPN weights
        cfg.MODEL.WEIGHTS = "/opt/ros/melodic/lib/octomap_server/scripts/model_final_a3ec72.pkl"
        # This determines the resizing of the image. At 0, resizing is disabled.
        cfg.INPUT.MIN_SIZE_TEST = 0

        return cfg

    def format_results(self, class_names):
        """
        Format results so they can be used by overlay_instances function
        """
        predictions = self.outputs['instances']
        boxes = predictions.pred_boxes if predictions.has("pred_boxes") else None
        scores = predictions.scores if predictions.has("scores") else None
        classes = predictions.pred_classes if predictions.has("pred_classes") else None

        labels = None
        if classes is not None and class_names is not None and len(class_names) > 1:
            labels = [class_names[i] for i in classes]
        if scores is not None:
            if labels is None:
                labels = ["{:.0f}%".format(s * 100) for s in scores]
            else:
                labels = ["{} {:.0f}%".format(l, s * 100) for l, s in zip(labels, scores)]

        masks = predictions.pred_masks.cpu().numpy()
        masks = [GenericMask(x, v.output.height, v.output.width) for x in masks]

        boxes_list = boxes.tensor.tolist()
        scores_list = scores.tolist()
        class_list = classes.tolist()

        for i in range(len(scores_list)):
            boxes_list[i].append(scores_list[i])
            boxes_list[i].append(class_list[i])


        boxes_list = np.array(boxes_list)

        return (masks, boxes, boxes_list, labels, scores_list, class_list, classes)

    def publish(self, mask):
        # Publish label array
        img_list = ros_numpy.msgify(Image, mask, encoding='mono8')

        self._pub.publish(img_list)


class OptimizedVisualizer(Visualizer):
    """
    Detectron2's altered Visualizer class which converts boxes tensor to cpu. The original
    doesn't do this, but it only works for me if I do this.
    """
    def __init__(self, img_rgb, metadata, scale=1.0, instance_mode=ColorMode.IMAGE):
        super().__init__(img_rgb, metadata, scale, instance_mode)

    def _convert_boxes(self, boxes):
        """
        Convert different format of boxes to an NxB array, where B = 4 or 5 is the box dimension.
        """
        if isinstance(boxes, Boxes) or isinstance(boxes, RotatedBoxes):
            return boxes.tensor.cpu().numpy()
        else:
            return np.asarray(boxes)


class DetectedObject:
    """
    Each object corresponds to all objects detected during the instance segmentation
    phase. Associated trackers, distance, position and velocity are stored as attributes
    of the object.
    masks[i], boxes[i], labels[i], scores_list[i], class_list[i]
    """
    def __init__(self, mask, box, label, score, class_name, class_index):
        self.mask = mask
        self.box = box
        self.label = label
        self.score = score
        self.class_name = class_name
        self.class_index = class_index

    def __str__(self):
        """
        Printing for debugging purposes
        """
        ret_str = "The pixel mask of {} represents a {} and is {}m away from the camera.\n".format(self.mask, self.class_name, self.distance)
        if hasattr(self, 'track'):
            if hasattr(self.track, 'speed'):
                if self.track.speed >= 0:
                    ret_str += "The {} is travelling {}m/s towards the camera\n".format(self.class_name, self.track.speed)
                else:
                    ret_str += "The {} is travelling {}m/s away from the camera\n".format(self.class_name, abs(self.track.speed))
            if hasattr(self.track, 'impact_time'):
                ret_str += "The {} will collide in {} seconds\n".format(self.class_name, self.track.impact_time)
            if hasattr(self.track, 'velocity'):
                ret_str += "The {} is located at {} and travelling at {}m/s\n".format(self.class_name, self.track.position, self.track.velocity)
        return ret_str

    def create_vector_arrow(self):
        """
        Creates direction arrow which will use Arrow3D object. Converts vector to a suitable size so that the direction is clear.
        NOTE: The magnitude of the velocity is not represented through this arrow. The arrow lengths are almost all identical
        """
        arrow_ratio = AXES_SIZE / max(abs(self.track.velocity_vector[0]), abs(self.track.velocity_vector[1]), abs(self.track.velocity_vector[2]))
        self.track.v_points = [x * arrow_ratio for x in self.track.velocity_vector]

class Arrow3D(FancyArrowPatch):
    """
    Arrow used to demonstrate direction of travel for each object. Only for debugging/visualisation.
    """
    def __init__(self, xs, ys, zs, *args, **kwargs):
        FancyArrowPatch.__init__(self, (0,0), (0,0), *args, **kwargs)
        self._verts3d = xs, ys, zs

    def draw(self, renderer):
        xs3d, ys3d, zs3d = self._verts3d
        xs, ys, zs = proj3d.proj_transform(xs3d, ys3d, zs3d, renderer.M)
        self.set_positions((xs[0],ys[0]),(xs[1],ys[1]))
        FancyArrowPatch.draw(self, renderer)


def find_mask_centre(mask, color_image):
    """
    Finding centre of mask of object
    """
    moments = cv2.moments(np.float32(mask))

    cX = int(moments["m10"] / moments["m00"])
    cY = int(moments["m01"] / moments["m00"])

    return cX, cY


def find_median_depth(mask_area, num_median, histg):
    """
    Iterate through all histogram bins and stop at the median value. This is the
    median depth of the mask.
    """

    median_counter = 0
    centre_depth = 0.0
    for x in range(0, len(histg)):
        median_counter += histg[x][0]
        if median_counter >= num_median:
            # Half of histogram is iterated through,
            # Therefore this bin contains the median
            centre_depth = x / 50
            break

    return centre_depth

def debug_plots(color_image, depth_image, mask, histg, depth_colormap):
    """
    This function is used for debugging purposes. This plots the depth color-
    map, mask, mask and depth color-map bitwise_and, and histogram distrobutions
    of the full image and the masked image.
    """
    full_hist = cv2.calcHist([depth_image], [0], None, [NUM_BINS], [0, MAX_RANGE])
    masked_depth_image = cv2.bitwise_and(depth_colormap, depth_colormap, mask= mask)

    plt.figure()

    plt.subplot(2, 2, 1)
    plt.imshow(depth_colormap)

    plt.subplot(2, 2, 2)
    plt.imshow(masks[i].mask)

    plt.subplot(2, 2, 3).set_title(labels[i])
    plt.imshow(masked_depth_image)

    plt.subplot(2, 2, 4)
    plt.plot(full_hist)
    plt.plot(histg)
    plt.xlim([0, 600])
    plt.show()

if __name__ == "__main__":

    parser = argparse.ArgumentParser()
    parser.add_argument('--file', help='type --file=file-name.bag to stream using file instead of webcam')
    args = parser.parse_args()

    rospy.init_node('ros_colour')

    # ROS topics for image with superimposed mask and label (8-bit int) mask
    pub = rospy.Publisher('labelled_image', Image, queue_size=1)
    mask_pub = rospy.Publisher('label_mask', Image, queue_size=1)

    # Initialise Detectron2 predictor
    predictor = Predictor(mask_pub)

    # Initialise video streams from D435
    video_streamer = VideoStreamer(pub)

    # Initialise Kalman filter tracker from modified Sort module
    mot_tracker = Sort()

    speed_time_start = time.time()

    """
    This is the ROS topic to get images from. If no image is being found, type
    'rostopic list' in the console to find available topics. It may be called
    '/d400/color/image_raw' instead.
    """
    rospy.Subscriber("/d435/color/image_raw/", Image, video_streamer.colour_callback)
    rospy.Subscriber("/d435/aligned_depth_to_color/image_raw", Image, video_streamer.depth_callback)
    rospy.Subscriber("/d435/aligned_depth_to_color/camera_info", CameraInfo, video_streamer.intrin_callback)
    #rospy.Subscriber("/d435/color/image_raw/d435/aligned_depth_to_color/image_raw", Image, video_streamer.callback)
    time.sleep(1)

    while True:

        time_start = time.time()
        color_image, depth_image = video_streamer.read()
        detected_objects = []

        t1 = time.time()

        camera_time = t1 - time_start

        # Run image through instance segmentation
        predictor.create_outputs(color_image)
        outputs = predictor.outputs

        t2 = time.time()
        model_time = t2 - t1
        print("Model took {:.2f} time".format(model_time))

        predictions = outputs['instances']

        if outputs['instances'].has('pred_masks'):
            num_masks = len(predictions.pred_masks)
        else:
            # Even if no masks are found, the trackers must still be updated
            tracked_objects = mot_tracker.update(boxes_list)
            continue

        detectron_time = time.time()

        # Create a new Visualizer object from Detectron2
        v = OptimizedVisualizer(color_image[:, :, ::-1], MetadataCatalog.get(predictor.config.DATASETS.TRAIN[0]))

        masks, boxes, boxes_list, labels, scores_list, class_list, classes = predictor.format_results(v.metadata.get("thing_classes"))

        for i in range(num_masks):
            try:
                detected_obj = DetectedObject(masks[i], boxes[i], labels[i], scores_list[i], class_list[i], classes[i].item())
            except:
                print("Object doesn't meet all parameters")

            detected_objects.append(detected_obj)

        # Next 3 lines create label array
        added_masks = np.zeros((RESOLUTION_Y, RESOLUTION_X), dtype='uint8')
        for i in detected_objects:
            added_masks += (i.mask.mask * (i.class_index + 1))

        tracked_objects = mot_tracker.update(boxes_list)

        # Create colour image with labelled mask on top
        v.overlay_instances(
            masks=masks,
            boxes=boxes,
            labels=labels,
            keypoints=None,
            assigned_colors=None,
            alpha=0.3
        )

        speed_time_end = time.time()
        total_speed_time = speed_time_end - speed_time_start
        speed_time_start = time.time()

        for i in range(num_masks):


            mask_area = detected_objects[i].mask.area()
            num_median = math.floor(mask_area / 2)

            histg = cv2.calcHist([depth_image], [0], detected_objects[i].mask.mask, [NUM_BINS], [0, MAX_RANGE])


            # Uncomment this to use the debugging function
            #depth_colormap = cv2.applyColorMap(cv2.convertScaleAbs(depth_image, alpha=0.03), cv2.COLORMAP_JET)
            #debug_plots(color_image, depth_image, masks[i].mask, histg, depth_colormap)

            centre_depth = find_median_depth(mask_area, num_median, histg)
            if detected_objects[i].class_index == 0 and centre_depth < 1.0:
                print("Person detected who is closer than 1m")

            detected_objects[i].distance = centre_depth
            cX, cY = find_mask_centre(detected_objects[i].mask._mask, v.output)

            # track refers to the list which holds the index of the detected mask which matches the tracker
            track = mot_tracker.matched[np.where(mot_tracker.matched[:,0]==i)[0],1]

            if len(track) > 0:
                # Index of detected mask
                track = track[0]
                if i not in mot_tracker.unmatched:
                    try:
                        # If the tracker's distance has already been initialised - tracker has been detected previously
                        if hasattr(mot_tracker.trackers[track], 'distance'):
                            mot_tracker.trackers[track].set_speed(centre_depth, total_speed_time)

                            mot_tracker.trackers[track].set_impact_time(centre_depth)


                            if mot_tracker.trackers[track].impact_time != False and mot_tracker.trackers[track].impact_time >= 0:
                                if detected_objects[i].class_index == 0 and mot_tracker.trackers[track].impact_time < 2.0:
                                    print("Person detected to impact with robot within 2 seconds")
                                v.draw_text("{:.2f} seconds to impact".format(mot_tracker.trackers[track].impact_time), (cX, cY + 60))

                        if hasattr(mot_tracker.trackers[track], 'position'):
                            # New 3D coordinates for current frame
                            x1, y1, z1 = rs.rs2_deproject_pixel_to_point(
                            video_streamer.intrin, [cX, cY], centre_depth
                        )

                            # Update states for tracked object
                            mot_tracker.trackers[track].set_velocity_vector(x1, y1, z1)
                            mot_tracker.trackers[track].set_distance_3d(x1, y1, z1)
                            mot_tracker.trackers[track].set_velocity(total_speed_time)

                            detected_objects[i].track = mot_tracker.trackers[track]

                            v.draw_text("{:.2f}m/s".format(detected_objects[i].track.velocity), (cX, cY + 40))

                        position = rs.rs2_deproject_pixel_to_point(
                            video_streamer.intrin, [cX, cY], centre_depth
                        )

                        mot_tracker.trackers[track].set_distance(centre_depth)
                        mot_tracker.trackers[track].set_position(position)


                    except IndexError:
                        continue


            v.draw_circle((cX, cY), (0, 0, 0))
            v.draw_text("{:.2f}m".format(centre_depth), (cX, cY + 20))


        #for i in detected_objects:
            #print(i)

        cv2.imshow('Segmented Image', v.output.get_image()[:,:,::-1])

        predictor.publish(added_masks)
        video_streamer.publish(v.output.get_image()[:,:,::-1])
        video_streamer.set_not_retrieved()

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

        time_end = time.time()
        total_time = time_end - time_start

        #print("Time to process frame: {:.2f}".format(total_time))
        #print("FPS: {:.2f}\n".format(1/total_time))

    cv2.destroyAllWindows()