Tango3DModel/renderer.py at main · marius-ne/Tango3DModel · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
import trimesh
import pyrender
import numpy as np
import cv2
from scipy.spatial.transform import Rotation
import json
import os
import open3d as o3d

class ModelRenderer:
    def __init__(self, filename):
        # Load mesh using trimesh
        self.mesh = trimesh.load(filename)
        if not isinstance(self.mesh, trimesh.Trimesh):
            self.mesh = self.mesh.dump().sum()  # Combine into single mesh if needed

        # Create pyrender scene and add mesh
        self.scene = pyrender.Scene()
        mesh = pyrender.Mesh.from_trimesh(self.mesh, smooth=True)
        self.scene.add(mesh)

        self.o3dmesh = o3d.io.read_triangle_mesh(filename)


    def render(self,
               intrinsic_matrix: np.ndarray,
               extrinsic_matrix: np.ndarray):

        # Create camera from intrinsics
        fx, fy = intrinsic_matrix[0, 0], intrinsic_matrix[1, 1]
        cx, cy = intrinsic_matrix[0, 2], intrinsic_matrix[1, 2]
        camera = pyrender.IntrinsicsCamera(fx=fx, fy=fy, cx=cx, cy=cy)

        width = int(intrinsic_matrix[0,2]*2)
        height = int(intrinsic_matrix[1,2]*2)
        # Add camera to scene
        cam_node = self.scene.add(camera, pose=extrinsic_matrix)

        # Use OffscreenRenderer
        renderer = pyrender.OffscreenRenderer(viewport_width=width, viewport_height=height)
        color, _ = renderer.render(self.scene)

        # Clean up
        self.scene.remove_node(cam_node)
        renderer.delete()

        return color

    def generate_random_positions(self):
        if self.strategy == "uniform_cartesian":
            distances = np.random.uniform(self.min_distance, self.max_distance, size=(self.num_candidates))
        elif self.strategy == "uniform_spherical":
            distances = np.random.uniform(self.min_distance, self.max_distance, size=(self.num_candidates))
            distances = np.sqrt(distances) # distribute evenly in a sphere
        positions = np.column_stack((distances, np.zeros((self.num_candidates, 2))))
        return positions

    def generate_random_attitudes(self):
        attitudes = np.zeros((self.num_candidates, 4))
        for i in range(self.num_candidates):
            attitudes[i, :] = Rotation.random().as_quat()
        return attitudes

    def generate_random_camera_positions(self):

        attitudes = self.generate_random_attitudes()
        positions = self.generate_random_positions()
        camera_positions = np.zeros((self.num_candidates, 3))
        for i in range(self.num_candidates):
            camera_positions[i, :] = Rotation.from_quat(attitudes[i]).apply(positions[i])
        return camera_positions

    def get_active_camera_pose_for_camera_position(self, camera_position_world):
        """
        Gives active transformation (translates points in the world coordinate frame).
        Maps origin to camera center.
        """
        # Handle the case where the camera is at the origin
        if np.linalg.norm(camera_position_world) < 1e-6:
            raise ValueError("Warning: Camera is at the origin, projection is potentially unstable.")

        camera_to_origin_world = -camera_position_world
        camera_to_origin_world /= np.linalg.norm(camera_to_origin_world)  # Normalize

        desired_camera_axis = camera_to_origin_world  # Camera Z-axis (looking towards the origin)
        current_camera_axis = np.array([0, 0, 1])  # Default camera Z-axis

        rotation, _ = Rotation.align_vectors([desired_camera_axis], [current_camera_axis])

        r_vec = rotation.as_rotvec()

        return r_vec, camera_to_origin_world

    def get_passive_camera_pose_for_camera_position(self, camera_position_world):
        """
        Gives passive transformation (changes reference frames).
        Maps origin to a point on the boresight axis.
        """
        # Handle the case where the camera is at the origin
        if np.linalg.norm(camera_position_world) < 1e-6:
            raise ValueError("Warning: Camera is at the origin, projection is potentially unstable.")

        r_vec, t_vec = self.get_active_camera_pose_for_camera_position(camera_position_world)
        # Invert the pose to get the passive camera pose
        passive_rvec, passive_tvec = self.invert_pose(r_vec, t_vec)

        return passive_rvec, passive_tvec

    def generate_random_camera_poses(self):
        positions = self.generate_random_camera_positions()
        camera_poses = np.zeros((self.num_candidates, 6))
        for i in range(self.num_candidates):
            rvec, tvec = self.get_active_camera_pose_for_camera_position(positions[i])

            camera_poses[i] = np.hstack((rvec, tvec))

        return camera_poses

    def invert_pose(self, rvec, tvec):
        """
        Inverts the camera pose (rotation and translation).
        """
        # Invert the rotation vector
        rotation = Rotation.from_rotvec(rvec).inv()
        inverted_rvec = rotation.as_rotvec()

        # Invert the translation vector
        inverted_tvec = -rotation.apply(tvec)

        return inverted_rvec, inverted_tvec

    def visualize_camera_positions(self, camera_positions, colors=None):
        # Create point cloud
        pcd = o3d.geometry.PointCloud()
        pcd.points = o3d.utility.Vector3dVector(np.array(camera_positions))

        # Handle colors
        if colors is None:
            colors = np.tile(np.array([[1.0, 0.0, 0.0]]), (len(camera_positions), 1))  # red
        else:
            colors = np.array(colors) / 255.0  # Normalize to [0, 1]

        pcd.colors = o3d.utility.Vector3dVector(colors)

        geometry_list = [pcd]
        geometry_list.append(self.o3dmesh)

        # Visualize
        o3d.visualization.draw_geometries(geometry_list)

    def draw_pose_axes_on_image(self, image, K, rvec, tvec):
        # Create a 3D point cloud for the axes
        axes_length = 1
        axes_points = np.array([[0, 0, 0], [axes_length, 0, 0], [0, axes_length, 0], [0, 0, axes_length]])

        axes_on_image = cv2.projectPoints(axes_points.astype(np.float32), rvec, tvec, K, None)
        axes_on_image = axes_on_image[0].squeeze()
        print(axes_on_image)
        output_image = image.copy()
        for i in range(3):
            start_point = tuple(axes_on_image[0].astype(int))
            end_point = tuple(axes_on_image[i + 1].astype(int))
            color = (255, 0, 0) if i == 0 else (0, 255, 0) if i == 1 else (0, 0, 255)
            cv2.line(output_image, start_point, end_point, color, 10)
        return output_image


if __name__ == "__main__":
    speed_path = "E:\ESA\VBN_DataSets\Data\SPEED+\synthetic\images"
    model_path = "models/obj/Tango_rotated.obj"
    renderer = ModelRenderer(model_path)
    renderer.num_candidates = 10
    renderer.strategy = "uniform_spherical"
    renderer.min_distance = 2
    renderer.max_distance = 5

    poses = renderer.generate_random_camera_poses()

    with open("data/train.json", "r") as f:
        speed_poses = json.load(f)
        n_poses = len(speed_poses)

    K = np.array([[
      2988.5795163815555,
      0,
      960
    ],
    [
      0,
      2988.3401159176124,
      600
    ],
    [
      0,
      0,
      1
    ]])

    # Convert K to Open3D format
    intrinsic_matrix = np.array([K[0,0], K[1,1], K[0,2], K[1,2]])


    for pose in speed_poses:
        i = np.random.randint(0, n_poses)
        pose = speed_poses[i]

        print(pose['filename'])
        # Renderer wants CAMERA -> WORLD (passive, so in world coordinates)
        # Speed gives WORLD -> CAMERA (passive)
        speed_t_vec = np.array(pose["r_Vo2To_vbs_true"])
        # For some weird reason the position is inverted in the speed dataset
        #speed_t_vec = -speed_t_vec  # Invert the translation vector

        attitude = pose["q_vbs2tango_true"]
        changed_attitude = np.array([attitude[1], attitude[2], attitude[3], attitude[0]])
        #attitude = np.array([attitude[3], attitude[0], attitude[1], attitude[2]])
        speed_r_vec = Rotation.from_quat(changed_attitude).as_rotvec()  # Rotation vector

        r_vec, t_vec = speed_r_vec, speed_t_vec

        r_vec_inverted, t_vec_inverted = renderer.invert_pose(speed_r_vec, -speed_t_vec)

        #renderer.visualize_camera_positions([t_vec])

        r_mat = Rotation.from_rotvec(r_vec_inverted).as_matrix()

        extrinsic_matrix = np.hstack((r_mat, t_vec_inverted.reshape(3, 1)))
        extrinsic_matrix = np.vstack((extrinsic_matrix, [0, 0, 0, 1]))

        image = renderer.render(K, extrinsic_matrix)
        #cv2.imshow("Rendered",cv2.cvtColor(image, cv2.COLOR_RGB2BGR))
        speed_image_path = os.path.join(speed_path, pose['filename'])
        speed_image = cv2.imread(speed_image_path)

        # Create mask for the rendered image (just invert)
        rendered_mask = cv2.bitwise_not(image[:, :, 0])

        # Create mask for the speed image
        speed_mask = np.zeros((speed_image.shape[0], speed_image.shape[1]), dtype=np.uint8)
        speed_mask[speed_image[:, :, 0] > 10] = 255

        image_axes = renderer.draw_pose_axes_on_image(image, K, speed_r_vec, speed_t_vec)
        speed_image_axes = renderer.draw_pose_axes_on_image(speed_image, K, speed_r_vec, speed_t_vec)

        # Check if 50% of the mask is black (otherwise probably has background)
        if np.sum(speed_mask == 0) / speed_mask.size < 0.5:
            print("Image has background, skipping")
            continue

        cv2.imshow("Rendered Mask", rendered_mask)
        cv2.imshow("Speed Mask", speed_mask)

        # Compute IoU
        intersection = cv2.bitwise_and(rendered_mask, speed_mask)
        union = cv2.bitwise_or(rendered_mask, speed_mask)
        iou = np.sum(intersection) / np.sum(union)
        print(f"IoU: {iou}")

        cv2.waitKey(0)
        cv2.destroyAllWindows()
        #break