33import isaaclab .sim as sim_utils
44from isaaclab .assets import ArticulationCfg , AssetBaseCfg
55from isaaclab .envs import ManagerBasedRLEnvCfg
6- from isaaclab .managers import ActionTermCfg as ActionTerm
76from isaaclab .managers import CurriculumTermCfg as CurrTerm
87from isaaclab .managers import EventTermCfg as EventTerm
98from isaaclab .managers import ObservationGroupCfg as ObsGroup
1312from isaaclab .managers import TerminationTermCfg as DoneTerm
1413from isaaclab .scene import InteractiveSceneCfg
1514from isaaclab .utils import configclass
16- from isaaclab .utils .assets import ISAAC_NUCLEUS_DIR
1715from isaaclab .utils .noise import AdditiveUniformNoiseCfg as Unoise
1816
1917import HumanoidRLPackage .HumanoidRLSetup .tasks .manipulation .mdp as mdp
2018
2119@configclass
2220class ReachSceneCfg (InteractiveSceneCfg ):
23- """Configuration for the scene with a robotic arm."""
24-
25- # world
2621 ground = AssetBaseCfg (
2722 prim_path = "/World/ground" ,
2823 spawn = sim_utils .GroundPlaneCfg (),
2924 init_state = AssetBaseCfg .InitialStateCfg (pos = (0.0 , 0.0 , - 1.05 )),
3025 )
3126
32- # robots
3327 robot : ArticulationCfg = MISSING
3428
35- # lights
3629 light = AssetBaseCfg (
3730 prim_path = "/World/light" ,
3831 spawn = sim_utils .DomeLightCfg (color = (0.75 , 0.75 , 0.75 ), intensity = 2500.0 ),
3932 )
4033
4134
42- ##
43- # MDP settings
44- ##
45-
46-
4735@configclass
4836class CommandsCfg :
49- """Command terms for the MDP."""
50-
5137 ee_pose = mdp .UniformPoseCommandCfg (
5238 asset_name = "robot" ,
5339 body_name = "DIP_INDEX_v1_.*" ,
54- resampling_time_range = (4.0 , 4.0 ),
40+ resampling_time_range = (4.0 , 4.0 ), # target pose changes every sampling time
5541 debug_vis = True ,
5642 ranges = mdp .UniformPoseCommandCfg .Ranges (
57- pos_x = (- 0.5 , 0.5 ),
58- pos_y = (0.62 , 0.66 ),
59- pos_z = (0.3 , 0.7 ),
43+ pos_x = (- 0.8 , - 0.7 ),
44+ pos_y = (- 0.4 , 0.4 ),
45+ pos_z = (0.0 , 0.3 ),
6046 roll = (0.0 , 0.0 ),
6147 pitch = (0.0 , 0.0 ),
6248 yaw = (0.0 , 0.0 ),
63- ),
49+ ), # range of position/rotation that the target pose can appear in
6450 )
6551
6652 # ee_pose_2 = mdp.UniformPoseCommandCfg(
@@ -126,20 +112,15 @@ class CommandsCfg:
126112
127113@configclass
128114class ActionsCfg :
129- """Action specifications for the MDP."""
130-
131- arm_action : ActionTerm = MISSING
132- gripper_action : ActionTerm | None = None
115+ arm_action = mdp .JointPositionActionCfg (
116+ asset_name = "robot" , joint_names = [".*" ], scale = 0.5 , use_default_offset = True
117+ )
133118
134119
135120@configclass
136121class ObservationsCfg :
137- """Observation specifications for the MDP."""
138-
139122 @configclass
140123 class PolicyCfg (ObsGroup ):
141- """Observations for policy group."""
142-
143124 # observation terms (order preserved)
144125 joint_pos = ObsTerm (func = mdp .joint_pos_rel , noise = Unoise (n_min = - 0.01 , n_max = 0.01 ))
145126 joint_vel = ObsTerm (func = mdp .joint_vel_rel , noise = Unoise (n_min = - 0.01 , n_max = 0.01 ))
@@ -181,7 +162,7 @@ class RewardsCfg:
181162 # task terms
182163 end_effector_position_tracking = RewTerm (
183164 func = mdp .position_command_error ,
184- weight = - 0.2 ,
165+ weight = - 1.0 ,
185166 params = {"asset_cfg" : SceneEntityCfg ("robot" , body_names = "DIP_INDEX_v1_.*" ), "command_name" : "ee_pose" },
186167 )
187168 # end_effector_2_position_tracking = RewTerm(
@@ -207,7 +188,7 @@ class RewardsCfg:
207188
208189 end_effector_position_tracking_fine_grained = RewTerm (
209190 func = mdp .position_command_error_tanh ,
210- weight = 0.2 ,
191+ weight = 0.5 ,
211192 params = {"asset_cfg" : SceneEntityCfg ("robot" , body_names = "DIP_INDEX_v1_.*" ), "std" : 0.1 , "command_name" : "ee_pose" },
212193 )
213194 # end_effector_2_position_tracking_fine_grained = RewTerm(
@@ -258,43 +239,32 @@ class RewardsCfg:
258239 # )
259240
260241 # action penalty
261- action_rate = RewTerm (func = mdp .action_rate_l2 , weight = - 0.00001 )
242+ action_rate = RewTerm (func = mdp .action_rate_l2 , weight = - 0.000002 )
262243 joint_vel = RewTerm (
263244 func = mdp .joint_vel_l2 ,
264- weight = - 0.0000001 ,
245+ weight = - 0.005 ,
265246 params = {"asset_cfg" : SceneEntityCfg ("robot" )},
266247 )
267248
268249
269250@configclass
270251class TerminationsCfg :
271- """Termination terms for the MDP."""
272-
273252 time_out = DoneTerm (func = mdp .time_out , time_out = True )
274253
275254
276255@configclass
277256class CurriculumCfg :
278- """Curriculum terms for the MDP."""
279-
280257 action_rate = CurrTerm (
281- func = mdp .modify_reward_weight , params = {"term_name" : "action_rate" , "weight" : - 0.0001 , "num_steps" : 4500 }
258+ func = mdp .modify_reward_weight , params = {"term_name" : "action_rate" , "weight" : - 0.00005 , "num_steps" : 18000 }
282259 )
283260
284261 joint_vel = CurrTerm (
285- func = mdp .modify_reward_weight , params = {"term_name" : "joint_vel" , "weight" : - 0.001 , "num_steps" : 4500 }
262+ func = mdp .modify_reward_weight , params = {"term_name" : "joint_vel" , "weight" : - 0.05 , "num_steps" : 18000 }
286263 )
287264
288265
289- ##
290- # Environment configuration
291- ##
292-
293-
294266@configclass
295267class ReachEnvCfg (ManagerBasedRLEnvCfg ):
296- """Configuration for the reach end-effector pose tracking environment."""
297-
298268 # Scene settings
299269 scene : ReachSceneCfg = ReachSceneCfg (num_envs = 4096 , env_spacing = 2.5 )
300270 # Basic settings
@@ -309,10 +279,8 @@ class ReachEnvCfg(ManagerBasedRLEnvCfg):
309279
310280 def __post_init__ (self ):
311281 """Post initialization."""
312- # general settings
313- self .decimation = 4 # policy every 4 physics steps (was 2) → smoother target updates
282+ self .decimation = 4
314283 self .sim .render_interval = self .decimation
315284 self .episode_length_s = 12.0
316285 self .viewer .eye = (3.5 , 3.5 , 3.5 )
317- # simulation settings
318286 self .sim .dt = 1.0 / 60.0
0 commit comments