Skip to content

Commit d031c65

Browse files
Merge pull request #34 from WATonomous/improve-arm-hand-rl
Improve arm hand RL
2 parents 7ddadc8 + 2c0be78 commit d031c65

8 files changed

Lines changed: 33 additions & 60 deletions

File tree

autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/modelCfg/humanoid.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from isaaclab.assets.articulation import ArticulationCfg
66

77
_HUMANOID_WATO_ROOT = os.path.abspath(
8-
os.path.join(os.path.dirname(__file__), "..", "..", "..", "..", "..")
8+
os.path.join(os.path.dirname(__file__), "..", "..", "..", "..")
99
)
1010
_MODEL_ASSETS = os.path.join(_HUMANOID_WATO_ROOT, "ModelAssets")
1111

@@ -40,10 +40,9 @@
4040
actuators={
4141
"arm": ImplicitActuatorCfg(
4242
joint_names_expr=[".*"],
43-
# velocity_limit=100.0,
44-
# effort_limit=87.0,
4543
stiffness=0.5,
4644
damping=0.5,
45+
velocity_limit_sim=3.0,
4746
),
4847
},
4948
)
@@ -93,72 +92,84 @@
9392
joint_names_expr=["shoulder_flexion_extension"],
9493
stiffness=757.6,
9594
damping=60.3,
95+
velocity_limit_sim=3.0,
9696
),
9797
# J≈0.95 kg·m², f=4.0 Hz, ζ=1.0 → k=600.0, d=47.7
9898
"shoulder_aa": ImplicitActuatorCfg(
9999
joint_names_expr=["shoulder_abduction_adduction"],
100100
stiffness=600.0,
101101
damping=47.7,
102+
velocity_limit_sim=3.0,
102103
),
103104
# J≈0.77 kg·m², f=4.5 Hz, ζ=1.0 → k=615.5, d=43.5
104105
"shoulder_rot": ImplicitActuatorCfg(
105106
joint_names_expr=["shoulder_rotation"],
106107
stiffness=615.5,
107108
damping=43.5,
109+
velocity_limit_sim=3.0,
108110
),
109111
# J≈0.77 kg·m², f=4.5 Hz, ζ=1.0 → k=615.5, d=43.5
110112
"elbow": ImplicitActuatorCfg(
111113
joint_names_expr=["elbow_flexion_extension"],
112114
stiffness=615.5,
113115
damping=43.5,
116+
velocity_limit_sim=3.0,
114117
),
115118
# J≈0.45 kg·m², f=5.0 Hz, ζ=1.0 → k=444.1, d=28.3
116119
"forearm": ImplicitActuatorCfg(
117120
joint_names_expr=["forearm_rotation"],
118121
stiffness=444.1,
119122
damping=28.3,
123+
velocity_limit_sim=3.0,
120124
),
121125
# J≈0.12 kg·m², f=6.0 Hz, ζ=1.0 → k=170.5, d=9.0
122126
"wrist": ImplicitActuatorCfg(
123127
joint_names_expr=["wrist_extension"],
124128
stiffness=170.5,
125129
damping=9.0,
130+
velocity_limit_sim=3.0,
126131
),
127132
# J≈0.0012 kg·m², f=8.0 Hz, ζ=1.0 → k=3.0, d=0.12
128133
"finger_mcp": ImplicitActuatorCfg(
129134
joint_names_expr=["mcp_index", "mcp_middle", "mcp_ring", "mcp_pinky"],
130135
stiffness=3.0,
131136
damping=0.12,
137+
velocity_limit_sim=3.0,
132138
),
133139
# J≈0.0006 kg·m², f=8.0 Hz, ζ=1.0 → k=1.5, d=0.06
134140
"finger_pip": ImplicitActuatorCfg(
135141
joint_names_expr=["pip_index", "pip_middle", "pip_ring", "pip_pinky"],
136142
stiffness=1.5,
137143
damping=0.06,
144+
velocity_limit_sim=3.0,
138145
),
139146
# J≈0.0002 kg·m², f=8.0 Hz, ζ=1.0 → k=0.5, d=0.02
140147
"finger_dip": ImplicitActuatorCfg(
141148
joint_names_expr=["dip_index", "dip_middle", "dip_ring", "dip_pinky"],
142149
stiffness=0.5,
143150
damping=0.02,
151+
velocity_limit_sim=3.0,
144152
),
145153
# J≈0.003 kg·m², f=8.0 Hz, ζ=1.0 → k=7.6, d=0.30
146154
"thumb_cmc": ImplicitActuatorCfg(
147155
joint_names_expr=["cmc_thumb"],
148156
stiffness=7.6,
149157
damping=0.30,
158+
velocity_limit_sim=3.0,
150159
),
151160
# J≈0.001 kg·m², f=8.0 Hz, ζ=1.0 → k=2.5, d=0.10
152161
"thumb_mcp": ImplicitActuatorCfg(
153162
joint_names_expr=["mcp_thumb"],
154163
stiffness=2.5,
155164
damping=0.10,
165+
velocity_limit_sim=3.0,
156166
),
157167
# J≈0.0002 kg·m², f=8.0 Hz, ζ=1.0 → k=0.5, d=0.02
158168
"thumb_ip": ImplicitActuatorCfg(
159169
joint_names_expr=["ip_thumb"],
160170
stiffness=0.5,
161171
damping=0.02,
172+
velocity_limit_sim=3.0,
162173
),
163174
},
164175
)

autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/manipulation/config/HumanoidRLEnv/joint_pos_env_cfg.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,15 @@
11
import math
22
from isaaclab.utils import configclass
3-
import HumanoidRLPackage.HumanoidRLSetup.tasks.manipulation.mdp as mdp
43
from HumanoidRLPackage.HumanoidRLSetup.tasks.manipulation.reach_env_cfg import ReachEnvCfg
5-
# from HumanoidRLPackage.HumanoidRLSetup.modelCfg.universal_robots import UR10_CFG
64
from HumanoidRLPackage.HumanoidRLSetup.modelCfg.humanoid import ARM_CFG
75

86

7+
# Config on top of the base ReachEnvCfg, can override any settings
98
@configclass
109
class HumanoidArmReachEnvCfg(ReachEnvCfg):
1110
def __post_init__(self):
1211
super().__post_init__()
1312

14-
# self.scene.robot = UR10_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
1513
self.scene.robot = ARM_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
1614

1715
marker_scale = 0.02
@@ -54,10 +52,6 @@ def __post_init__(self):
5452
# self.rewards.end_effector_5_position_tracking_fine_grained.params["asset_cfg"].body_names = ["IP_THUMB_v1_.*"]
5553
# self.rewards.end_effector_5_orientation_tracking.params["asset_cfg"].body_names = ["TIP_B_5"]
5654

57-
# override actions
58-
self.actions.arm_action = mdp.JointPositionActionCfg(
59-
asset_name="robot", joint_names=[".*"], scale=0.5, use_default_offset=True
60-
)
6155
# override command generator body
6256
# end-effector is along x-direction
6357
self.commands.ee_pose.body_name = "DIP_INDEX_v1_.*"
@@ -82,13 +76,13 @@ def __post_init__(self):
8276
@configclass
8377
class HumanoidArmReachEnvCfg_PLAY(HumanoidArmReachEnvCfg):
8478
def __post_init__(self):
85-
# post init of parent
8679
super().__post_init__()
80+
8781
# make a smaller scene for play
8882
self.scene.num_envs = 50
8983
self.scene.env_spacing = 2.5
90-
# disable randomization for play
91-
self.observations.policy.enable_corruption = False
84+
self.observations.policy.enable_corruption = False # disable randomization for play
85+
9286

9387
# (env_isaaclab) hy@hy-LOQ-15IRX9:~/Downloads/Humanoid_Wato/HumanoidRL$ PYTHONPATH=$(pwd) /home/hy/IsaacLab/isaaclab.sh -p HumanoidRLPackage/rsl_rl_scripts/train.py --task=Isaac-Reach-Humanoid-Arm-v0 --headless
9488

autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/manipulation/reach_env_cfg.py

Lines changed: 15 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import isaaclab.sim as sim_utils
44
from isaaclab.assets import ArticulationCfg, AssetBaseCfg
55
from isaaclab.envs import ManagerBasedRLEnvCfg
6-
from isaaclab.managers import ActionTermCfg as ActionTerm
76
from isaaclab.managers import CurriculumTermCfg as CurrTerm
87
from isaaclab.managers import EventTermCfg as EventTerm
98
from isaaclab.managers import ObservationGroupCfg as ObsGroup
@@ -13,54 +12,41 @@
1312
from isaaclab.managers import TerminationTermCfg as DoneTerm
1413
from isaaclab.scene import InteractiveSceneCfg
1514
from isaaclab.utils import configclass
16-
from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR
1715
from isaaclab.utils.noise import AdditiveUniformNoiseCfg as Unoise
1816

1917
import HumanoidRLPackage.HumanoidRLSetup.tasks.manipulation.mdp as mdp
2018

2119
@configclass
2220
class ReachSceneCfg(InteractiveSceneCfg):
23-
"""Configuration for the scene with a robotic arm."""
24-
25-
# world
2621
ground = AssetBaseCfg(
2722
prim_path="/World/ground",
2823
spawn=sim_utils.GroundPlaneCfg(),
2924
init_state=AssetBaseCfg.InitialStateCfg(pos=(0.0, 0.0, -1.05)),
3025
)
3126

32-
# robots
3327
robot: ArticulationCfg = MISSING
3428

35-
# lights
3629
light = AssetBaseCfg(
3730
prim_path="/World/light",
3831
spawn=sim_utils.DomeLightCfg(color=(0.75, 0.75, 0.75), intensity=2500.0),
3932
)
4033

4134

42-
##
43-
# MDP settings
44-
##
45-
46-
4735
@configclass
4836
class CommandsCfg:
49-
"""Command terms for the MDP."""
50-
5137
ee_pose = mdp.UniformPoseCommandCfg(
5238
asset_name="robot",
5339
body_name="DIP_INDEX_v1_.*",
54-
resampling_time_range=(4.0, 4.0),
40+
resampling_time_range=(4.0, 4.0), # target pose changes every sampling time
5541
debug_vis=True,
5642
ranges=mdp.UniformPoseCommandCfg.Ranges(
57-
pos_x=(-0.5, 0.5),
58-
pos_y=(0.62, 0.66),
59-
pos_z=(0.3, 0.7),
43+
pos_x=(-0.8, -0.7),
44+
pos_y=(-0.4, 0.4),
45+
pos_z=(0.0, 0.3),
6046
roll=(0.0, 0.0),
6147
pitch=(0.0, 0.0),
6248
yaw=(0.0, 0.0),
63-
),
49+
), # range of position/rotation that the target pose can appear in
6450
)
6551

6652
# ee_pose_2 = mdp.UniformPoseCommandCfg(
@@ -126,20 +112,15 @@ class CommandsCfg:
126112

127113
@configclass
128114
class ActionsCfg:
129-
"""Action specifications for the MDP."""
130-
131-
arm_action: ActionTerm = MISSING
132-
gripper_action: ActionTerm | None = None
115+
arm_action = mdp.JointPositionActionCfg(
116+
asset_name="robot", joint_names=[".*"], scale=0.5, use_default_offset=True
117+
)
133118

134119

135120
@configclass
136121
class ObservationsCfg:
137-
"""Observation specifications for the MDP."""
138-
139122
@configclass
140123
class PolicyCfg(ObsGroup):
141-
"""Observations for policy group."""
142-
143124
# observation terms (order preserved)
144125
joint_pos = ObsTerm(func=mdp.joint_pos_rel, noise=Unoise(n_min=-0.01, n_max=0.01))
145126
joint_vel = ObsTerm(func=mdp.joint_vel_rel, noise=Unoise(n_min=-0.01, n_max=0.01))
@@ -181,7 +162,7 @@ class RewardsCfg:
181162
# task terms
182163
end_effector_position_tracking = RewTerm(
183164
func=mdp.position_command_error,
184-
weight=-0.2,
165+
weight=-1.0,
185166
params={"asset_cfg": SceneEntityCfg("robot", body_names="DIP_INDEX_v1_.*"), "command_name": "ee_pose"},
186167
)
187168
# end_effector_2_position_tracking = RewTerm(
@@ -207,7 +188,7 @@ class RewardsCfg:
207188

208189
end_effector_position_tracking_fine_grained = RewTerm(
209190
func=mdp.position_command_error_tanh,
210-
weight=0.2,
191+
weight=0.5,
211192
params={"asset_cfg": SceneEntityCfg("robot", body_names="DIP_INDEX_v1_.*"), "std": 0.1, "command_name": "ee_pose"},
212193
)
213194
# end_effector_2_position_tracking_fine_grained = RewTerm(
@@ -258,43 +239,32 @@ class RewardsCfg:
258239
# )
259240

260241
# action penalty
261-
action_rate = RewTerm(func=mdp.action_rate_l2, weight=-0.00001)
242+
action_rate = RewTerm(func=mdp.action_rate_l2, weight=-0.000002)
262243
joint_vel = RewTerm(
263244
func=mdp.joint_vel_l2,
264-
weight=-0.0000001,
245+
weight=-0.005,
265246
params={"asset_cfg": SceneEntityCfg("robot")},
266247
)
267248

268249

269250
@configclass
270251
class TerminationsCfg:
271-
"""Termination terms for the MDP."""
272-
273252
time_out = DoneTerm(func=mdp.time_out, time_out=True)
274253

275254

276255
@configclass
277256
class CurriculumCfg:
278-
"""Curriculum terms for the MDP."""
279-
280257
action_rate = CurrTerm(
281-
func=mdp.modify_reward_weight, params={"term_name": "action_rate", "weight": -0.0001, "num_steps": 4500}
258+
func=mdp.modify_reward_weight, params={"term_name": "action_rate", "weight": -0.00005, "num_steps": 18000}
282259
)
283260

284261
joint_vel = CurrTerm(
285-
func=mdp.modify_reward_weight, params={"term_name": "joint_vel", "weight": -0.001, "num_steps": 4500}
262+
func=mdp.modify_reward_weight, params={"term_name": "joint_vel", "weight": -0.05, "num_steps": 18000}
286263
)
287264

288265

289-
##
290-
# Environment configuration
291-
##
292-
293-
294266
@configclass
295267
class ReachEnvCfg(ManagerBasedRLEnvCfg):
296-
"""Configuration for the reach end-effector pose tracking environment."""
297-
298268
# Scene settings
299269
scene: ReachSceneCfg = ReachSceneCfg(num_envs=4096, env_spacing=2.5)
300270
# Basic settings
@@ -309,10 +279,8 @@ class ReachEnvCfg(ManagerBasedRLEnvCfg):
309279

310280
def __post_init__(self):
311281
"""Post initialization."""
312-
# general settings
313-
self.decimation = 4 # policy every 4 physics steps (was 2) → smoother target updates
282+
self.decimation = 4
314283
self.sim.render_interval = self.decimation
315284
self.episode_length_s = 12.0
316285
self.viewer.eye = (3.5, 3.5, 3.5)
317-
# simulation settings
318286
self.sim.dt = 1.0 / 60.0
-40 Bytes
Loading
1.04 KB
Binary file not shown.

0 commit comments

Comments
 (0)