From c47e9e86479ea777c182d56ad0128fd1685506bf Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 23 Sep 2025 13:20:53 +0000
Subject: [PATCH 1/2] Initial plan


From ca6e66e0bb94e6e5543dd227e0d39fca0441d116 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 23 Sep 2025 13:28:53 +0000
Subject: [PATCH 2/2] Fix reward function logic in locomotion environment

Co-authored-by: mihirk284 <27280479+mihirk284@users.noreply.github.com>
---
 .../isaaclab_tasks/direct/locomotion/locomotion_env.py   | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/locomotion/locomotion_env.py b/source/isaaclab_tasks/isaaclab_tasks/direct/locomotion/locomotion_env.py
index a049354d3b41..79a00480e2c2 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/direct/locomotion/locomotion_env.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/direct/locomotion/locomotion_env.py
@@ -193,12 +193,11 @@ def compute_rewards(
     alive_reward_scale: float,
     motor_effort_ratio: torch.Tensor,
 ):
-    heading_weight_tensor = torch.ones_like(heading_proj) * heading_weight
-    heading_reward = torch.where(heading_proj > 0.8, heading_weight_tensor, heading_weight * heading_proj / 0.8)
+    # reward for moving in the right direction (heading towards target)
+    heading_reward = torch.where(heading_proj > 0.8, heading_weight, heading_weight * heading_proj / 0.8)
 
-    # aligning up axis of robot and environment
-    up_reward = torch.zeros_like(heading_reward)
-    up_reward = torch.where(up_proj > 0.93, up_reward + up_weight, up_reward)
+    # aligning up axis of robot and environment (upright posture)
+    up_reward = torch.where(up_proj > 0.93, up_weight, 0.0)
 
     # energy penalty for movement
     actions_cost = torch.sum(actions**2, dim=-1)