zeroth-robotics
diff --git a/‎sim/genesis/zeroth_env.py‎
Lines changed: 244 additions & 43 deletions b/‎sim/genesis/zeroth_env.py‎
Lines changed: 244 additions & 43 deletions
@@ -4,7 +4,6 @@
 import genesis as gs
 from genesis.utils.geom import quat_to_xyz, transform_by_quat, inv_quat, transform_quat_by_quat
 
-
 def gs_rand_float(lower, upper, shape, device):
     return (upper - lower) * torch.rand(size=shape, device=device) + lower
 
@@ -15,12 +14,14 @@ def __init__(self, num_envs, env_cfg, obs_cfg, reward_cfg, command_cfg, show_vie
 
         self.num_envs = num_envs
         self.num_obs = obs_cfg["num_obs"]
-        # self.num_privileged_obs = obs_cfg.get("num_privileged_obs", None)
         self.num_actions = env_cfg["num_actions"]
         self.num_commands = command_cfg["num_commands"]
+
+        self.add_noise = obs_cfg.get("add_noise", False)
 
         # observation history
-        self.frame_stack = obs_cfg.get("frame_stack", 1)
+        self.frame_stack = obs_cfg.get("frame_stack", 15)
+        self.c_frame_stack = obs_cfg.get("c_frame_stack", 3)
         self.obs_history = collections.deque(maxlen=self.frame_stack)
         self.critic_history = collections.deque(maxlen=self.frame_stack)
 
@@ -37,7 +38,7 @@ def __init__(self, num_envs, env_cfg, obs_cfg, reward_cfg, command_cfg, show_vie
         self.reward_scales = reward_cfg["reward_scales"]
 
         # privileged observation config
-        self.num_privileged_obs = self.num_commands + self.num_actions * 3 + 3 + 3 + 3  # commands + dof_pos + dof_vel + actions + lin_vel + ang_vel + quat
+        self.num_privileged_obs = 66
 
         # create scene
         self.scene = gs.Scene(
@@ -58,16 +59,32 @@ def __init__(self, num_envs, env_cfg, obs_cfg, reward_cfg, command_cfg, show_vie
             show_viewer=show_viewer,
         )
 
-        # add plain
-        self.scene.add_entity(gs.morphs.URDF(file="urdf/plane/plane.urdf", fixed=True))
+        # add terrain
+        self.terrain = self.scene.add_entity(gs.morphs.Terrain(
+            n_subterrains=(3, 3),
+            subterrain_size=(12.0, 12.0),
+            horizontal_scale=0.25,
+            vertical_scale=0.005,
+            subterrain_types=[
+                ["flat_terrain", "random_uniform_terrain", "stepping_stones_terrain"],
+                ["pyramid_sloped_terrain", "discrete_obstacles_terrain", "wave_terrain"], 
+                ["random_uniform_terrain", "pyramid_stairs_terrain", "sloped_terrain"]
+            ],
+            visualization=True,
+            collision=True
+        ))
+        
+        # terrain measurement
+        self.measured_heights = None
+        self.height_samples = 64
+        self.obs_scales["height_measurements"] = 1.0
 
         # add robot
         self.base_init_pos = torch.tensor(self.env_cfg["base_init_pos"], device=self.device)
         self.base_init_quat = torch.tensor(self.env_cfg["base_init_quat"], device=self.device)
         self.inv_base_init_quat = inv_quat(self.base_init_quat)
         self.robot = self.scene.add_entity(
             gs.morphs.URDF(
-                # file="../resources/stompymicro/robot_fixed.urdf",
                 file="sim/resources/stompymicro/robot_fixed.urdf", 
                 pos=self.base_init_pos.cpu().numpy(),
                 quat=self.base_init_quat.cpu().numpy(),
@@ -80,6 +97,29 @@ def __init__(self, num_envs, env_cfg, obs_cfg, reward_cfg, command_cfg, show_vie
         # names to indices
         self.motor_dofs = [self.robot.get_joint(name).dof_idx_local for name in self.env_cfg["dof_names"]]
 
+        # Initialize legs_joints mapping
+        self.legs_joints = {
+            "left_hip_pitch": self.motor_dofs[0],
+            "left_knee_pitch": self.motor_dofs[1],
+            "left_ankle_pitch": self.motor_dofs[2],
+            "right_hip_pitch": self.motor_dofs[3],
+            "right_knee_pitch": self.motor_dofs[4],
+            "right_ankle_pitch": self.motor_dofs[5]
+        }
+
+        # Initialize feet indices
+        self.feet_indices = [
+            self.robot.get_body_index("left_foot"),
+            self.robot.get_body_index("right_foot")
+        ]
+
+        # Initialize observation related buffers
+        self.contact_forces = torch.zeros((self.num_envs, len(self.feet_indices), 3), device=self.device)
+        self.rand_push_force = torch.zeros((self.num_envs, 3), device=self.device)
+        self.rand_push_torque = torch.zeros((self.num_envs, 3), device=self.device)
+        self.env_frictions = torch.ones((self.num_envs,), device=self.device)
+        self.body_mass = torch.ones((self.num_envs,), device=self.device) * 30.0
+
         # PD control parameters
         self.robot.set_dofs_kp([self.env_cfg["kp"]] * self.num_actions, self.motor_dofs)
         self.robot.set_dofs_kv([self.env_cfg["kd"]] * self.num_actions, self.motor_dofs)
@@ -125,6 +165,10 @@ def __init__(self, num_envs, env_cfg, obs_cfg, reward_cfg, command_cfg, show_vie
         )
         self.extras = dict()  # extra information for logging
 
+        self.noise_scale_vec = self._get_noise_scale_vec(obs_cfg)
+
+    
+    
     def _resample_commands(self, envs_idx):
         self.commands[envs_idx, 0] = gs_rand_float(*self.command_cfg["lin_vel_x_range"], (len(envs_idx),), self.device)
         self.commands[envs_idx, 1] = gs_rand_float(*self.command_cfg["lin_vel_y_range"], (len(envs_idx),), self.device)
@@ -133,6 +177,8 @@ def _resample_commands(self, envs_idx):
 
     def step(self, actions):
         self.actions = torch.clip(actions, -self.env_cfg["clip_actions"], self.env_cfg["clip_actions"])
+        # Update last actions before execution
+        self.last_actions[:] = self.actions[:]
         exec_actions = self.last_actions if self.simulate_action_latency else self.actions
         target_dof_pos = exec_actions * self.env_cfg["action_scale"] + self.default_dof_pos
         self.robot.control_dofs_position(target_dof_pos, self.motor_dofs)
@@ -178,55 +224,166 @@ def step(self, actions):
             self.rew_buf += rew
             self.episode_sums[name] += rew
 
-        # compute observations
-        obs_now = torch.cat(
-            [
-                self.base_ang_vel * self.obs_scales["ang_vel"],  # 3
-                self.projected_gravity,  # 3
-                self.commands * self.commands_scale,  # 4
-                (self.dof_pos - self.default_dof_pos) * self.obs_scales["dof_pos"],  # 12
-                self.dof_vel * self.obs_scales["dof_vel"],  # 12
-                self.actions,  # 12
-            ],
-            axis=-1,
-        )
-        
-        # privileged observations
-        if self.num_privileged_obs is not None:
-            self.privileged_obs_buf = torch.cat(
-                [
-                    self.commands * self.commands_scale,  # 4
-                    (self.dof_pos - self.default_dof_pos) * self.obs_scales["dof_pos"],  # 12
-                    self.dof_vel * self.obs_scales["dof_vel"],  # 12
-                    self.actions,  # 12
-                    self.base_lin_vel * self.obs_scales["lin_vel"],  # 3
-                    self.base_ang_vel * self.obs_scales["ang_vel"],  # 3
-                    self.base_euler * self.obs_scales["quat"],  # 3
-                ],
-                axis=-1,
-            )
-            self.critic_history.append(self.privileged_obs_buf)
-        
-        # stack observations
-        self.obs_history.append(obs_now)
-        obs_buf_all = torch.stack([self.obs_history[i] for i in range(self.frame_stack)], dim=1)
-        self.obs_buf = obs_buf_all.reshape(self.num_envs, -1)
+        # Compute observations
+        self.compute_observations()
 
         self.last_actions[:] = self.actions[:]
         self.last_dof_vel[:] = self.dof_vel[:]
 
         return self.obs_buf, self.rew_buf, self.reset_buf, {
             "observations": {
-                "critic": self.obs_buf
+                "critic": self.privileged_obs_buf
             },
             **self.extras
         }
 
     def get_observations(self):
         return self.obs_buf, {"observations": {"critic": self.obs_buf}}
 
-    def get_privileged_observations(self):
-        return None
+    def _get_phase(self):
+        cycle_time = self.env_cfg.get("cycle_time", 1.0)
+        phase = self.episode_length_buf * self.dt / cycle_time
+        return phase
+
+    def _get_gait_phase(self):
+        # return float mask 1 is stance, 0 is swing
+        phase = self._get_phase()
+        sin_pos = torch.sin(2 * torch.pi * phase)
+        # Add double support phase
+        stance_mask = torch.zeros((self.num_envs, 2), device=self.device)
+        # left foot stance
+        stance_mask[:, 0] = sin_pos >= 0
+        # right foot stance
+        stance_mask[:, 1] = sin_pos < 0
+        # Double support phase
+        stance_mask[torch.abs(sin_pos) < 0.1] = 1
+        return stance_mask
+
+    def compute_ref_state(self):
+        phase = self._get_phase()
+        sin_pos = torch.sin(2 * torch.pi * phase)
+        sin_pos_l = sin_pos.clone()
+        sin_pos_r = sin_pos.clone()
+        default_clone = self.default_dof_pos.clone()
+        self.ref_dof_pos = self.default_dof_pos.repeat(self.num_envs, 1)
+
+        scale_1 = self.env_cfg.get("target_joint_pos_scale", 0.1)
+        scale_2 = 2 * scale_1
+        # left foot stance phase set to default joint pos
+        sin_pos_l[sin_pos_l > 0] = 0
+        self.ref_dof_pos[:, self.legs_joints["left_hip_pitch"]] += sin_pos_l * scale_1
+        self.ref_dof_pos[:, self.legs_joints["left_knee_pitch"]] += sin_pos_l * scale_2
+        self.ref_dof_pos[:, self.legs_joints["left_ankle_pitch"]] += sin_pos_l * scale_1
+
+        # right foot stance phase set to default joint pos
+        sin_pos_r[sin_pos_r < 0] = 0
+        self.ref_dof_pos[:, self.legs_joints["right_hip_pitch"]] += sin_pos_r * scale_1
+        self.ref_dof_pos[:, self.legs_joints["right_knee_pitch"]] += sin_pos_r * scale_2
+        self.ref_dof_pos[:, self.legs_joints["right_ankle_pitch"]] += sin_pos_r * scale_1
+
+        # Double support phase
+        self.ref_dof_pos[torch.abs(sin_pos) < 0.1] = 0
+
+        self.ref_action = 2 * self.ref_dof_pos
+
+    def _get_noise_scale_vec(self, cfg):
+        """Sets a vector used to scale the noise added to the observations.
+            [NOTE]: Must be adapted when changing the observations structure
+
+        Args:
+            cfg (Dict): Environment config file
+
+        Returns:
+            [torch.Tensor]: Vector of scales used to multiply a uniform distribution in [-1, 1]
+        """
+        num_actions = self.num_actions
+        noise_vec = torch.zeros(cfg["num_single_obs"], device=self.device)
+        self.add_noise = cfg.get("add_noise", False)
+        noise_scales = cfg["noise_scales"]
+        noise_vec[0:5] = 0.0  # commands
+        noise_vec[5 : (num_actions + 5)] = noise_scales.dof_pos * self.obs_scales.dof_pos
+        noise_vec[(num_actions + 5) : (2 * num_actions + 5)] = noise_scales.dof_vel * self.obs_scales.dof_vel
+        noise_vec[(2 * num_actions + 5) : (3 * num_actions + 5)] = 0.0  # previous actions
+        noise_vec[(3 * num_actions + 5) : (3 * num_actions + 5) + 3] = (
+            noise_scales.ang_vel * self.obs_scales.ang_vel
+        )  # ang vel
+        noise_vec[(3 * num_actions + 5) + 3 : (3 * num_actions + 5)] = (
+            noise_scales.quat * self.obs_scales.quat
+        )  # euler x,y
+        return noise_vec
+
+    def compute_observations(self):
+        phase = self._get_phase()
+        self.compute_ref_state()
+
+        sin_pos = torch.sin(2 * torch.pi * phase).unsqueeze(1)
+        cos_pos = torch.cos(2 * torch.pi * phase).unsqueeze(1)
+
+        stance_mask = self._get_gait_phase()
+        contact_mask = self.contact_forces[:, self.feet_indices, 2] > 5.0
+
+        self.command_input = torch.cat((sin_pos, cos_pos, self.commands[:, :3] * self.commands_scale), dim=1)
+        q = (self.dof_pos - self.default_dof_pos) * self.obs_scales.dof_pos
+        dq = self.dof_vel * self.obs_scales.dof_vel
+
+        diff = self.dof_pos - self.ref_dof_pos
+        self.privileged_obs_buf = torch.cat(
+            (
+                self.command_input,  # 2 + 3
+                (self.dof_pos - self.default_joint_pd_target) * self.obs_scales.dof_pos,  # 10D
+                self.dof_vel * self.obs_scales.dof_vel,  # 10D
+                self.actions,  # 10D
+                diff,  # 10D
+                self.base_lin_vel * self.obs_scales.lin_vel,  # 3
+                self.base_ang_vel * self.obs_scales.ang_vel,  # 3
+                self.base_euler_xyz * self.obs_scales.quat,  # 3
+                self.rand_push_force[:, :2],  # 3
+                self.rand_push_torque,  # 3
+                self.env_frictions,  # 1
+                self.body_mass / 30.0,  # 1
+                stance_mask,  # 2
+                contact_mask,  # 2
+            ),
+            dim=-1,
+        )
+
+        obs_buf = torch.cat(
+            (
+                self.command_input,  # 5 = 2D(sin cos) + 3D(vel_x, vel_y, aug_vel_yaw)
+                q,  # 10D
+                dq,  # 10D
+                self.actions,  # 10D
+                self.base_ang_vel * self.obs_scales.ang_vel,  # 3
+                self.base_euler_xyz * self.obs_scales.quat,  # 3
+                self.measured_heights.mean(dim=1, keepdim=True) * self.obs_scales.height_measurements,  # 1
+                self.terrain_difficulty.unsqueeze(1),  # 1
+            ),
+            dim=-1,
+        )
+
+        if self.measure_heights:
+            heights = (
+                torch.clip(
+                    self.root_states[:, 2].unsqueeze(1) - 0.5 - self.measured_heights,
+                    -1,
+                    1.0,
+                )
+                * self.obs_scales.height_measurements
+            )
+            self.privileged_obs_buf = torch.cat((self.obs_buf, heights), dim=-1)
+
+        if self.add_noise:
+            noise_level = self.obs_cfg.get("noise_level", 0.1)
+            obs_now = obs_buf.clone() + torch.randn_like(obs_buf) * self.noise_scale_vec * noise_level
+        else:
+            obs_now = obs_buf.clone()
+        self.obs_history.append(obs_now)
+        self.critic_history.append(self.privileged_obs_buf)
+
+        obs_buf_all = torch.stack([self.obs_history[i] for i in range(self.obs_history.maxlen)], dim=1)  # N,T,K
+
+        self.obs_buf = obs_buf_all.reshape(self.num_envs, -1)  # N, T*K
+        self.privileged_obs_buf = torch.cat([self.critic_history[i] for i in range(self.c_frame_stack)], dim=1)
 
     def reset_idx(self, envs_idx):
         if len(envs_idx) == 0:
@@ -325,3 +482,47 @@ def _reward_energy_efficiency(self):
     def _reward_orientation(self):
         # Penalize base orientation away from upright
         return torch.exp(-torch.abs(self.base_euler[:, 0]) - torch.abs(self.base_euler[:, 1])) * self.reward_cfg["reward_scales"]["orientation"]
+
+    def _reward_terrain_adaptation(self):
+        """Reward for adapting to different terrain types"""
+        # Calculate foot clearance
+        foot_clearance = torch.zeros((self.num_envs, 2), device=self.device)
+        for i, foot_idx in enumerate(self.feet_indices):
+            foot_pos = self.robot.get_body_pos(foot_idx)
+            terrain_height = self.terrain.get_height(foot_pos[:, :2])
+            foot_clearance[:, i] = foot_pos[:, 2] - terrain_height
+
+        # Reward for maintaining appropriate foot clearance
+        target_clearance = 0.05  # 5cm
+        clearance_error = torch.abs(foot_clearance - target_clearance)
+        return torch.exp(-torch.mean(clearance_error, dim=1) / 0.02)
+
+    def _reward_terrain_stability(self):
+        """Reward for maintaining stability on uneven terrain"""
+        # Penalize large base orientation changes
+        base_euler = quat_to_xyz(self.base_quat)
+        return torch.exp(-torch.abs(base_euler[:, 0]) - torch.abs(base_euler[:, 1]))
+
+    def _reward_terrain_progress(self):
+        """Reward for making progress across different terrain types"""
+        # Calculate forward progress relative to terrain difficulty
+        forward_vel = self.base_lin_vel[:, 0]
+        terrain_difficulty = self.terrain.get_difficulty(self.base_pos[:, :2])
+        
+        # Difficulty factors based on terrain type
+        difficulty_factors = {
+            "flat_terrain": 0.1,
+            "random_uniform_terrain": 0.3,
+            "stepping_stones_terrain": 0.5,
+            "pyramid_sloped_terrain": 0.7,
+            "discrete_obstacles_terrain": 0.8,
+            "wave_terrain": 0.6,
+            "pyramid_stairs_terrain": 0.9,
+            "sloped_terrain": 0.4
+        }
+        
+        # Get current terrain type
+        terrain_type = self.terrain.get_type(self.base_pos[:, :2])
+        difficulty = difficulty_factors.get(terrain_type, 0.5)
+        
+        return forward_vel / (difficulty + 0.1)