From e6e35b8148f5ba7d6f726d1802bbcb9919faa77d Mon Sep 17 00:00:00 2001 From: OutisLi Date: Thu, 1 Jan 2026 20:33:47 +0800 Subject: [PATCH 1/3] fix(pt): pairtab --- .../pt/model/atomic_model/pairtab_atomic_model.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/deepmd/pt/model/atomic_model/pairtab_atomic_model.py b/deepmd/pt/model/atomic_model/pairtab_atomic_model.py index 6933fdc19a..96256c972e 100644 --- a/deepmd/pt/model/atomic_model/pairtab_atomic_model.py +++ b/deepmd/pt/model/atomic_model/pairtab_atomic_model.py @@ -392,6 +392,13 @@ def _get_pairwise_dist(coords: torch.Tensor, nlist: torch.Tensor) -> torch.Tenso ------- torch.Tensor The pairwise distance between the atoms (nframes, nloc, nnei). + + Notes + ----- + When nlist contains padding indices that have been masked to 0, the + corresponding diff vectors may become zero (if atom 0 happens to be the + center atom itself). To avoid NaN gradients during backpropagation, + Use a safe norm computation with an epsilon floor on the squared sum. """ nframes, nloc, nnei = nlist.shape coord_l = coords[:, :nloc].view(nframes, -1, 1, 3) @@ -399,7 +406,13 @@ def _get_pairwise_dist(coords: torch.Tensor, nlist: torch.Tensor) -> torch.Tenso coord_r = torch.gather(coords, 1, index) coord_r = coord_r.view(nframes, nloc, nnei, 3) diff = coord_r - coord_l - pairwise_rr = torch.linalg.norm(diff, dim=-1, keepdim=True).squeeze(-1) + # Use safe norm to avoid NaN gradients when diff is zero (e.g., for + # padding entries where masked nlist index 0 points to self). + # The epsilon 1e-14 is small enough to not affect physical distances + # (atomic distances are typically > 0.1 Å) while preventing NaN. + pairwise_rr = torch.sqrt( + torch.sum(diff * diff, dim=-1, keepdim=True).clamp(min=1e-14) + ).squeeze(-1) return pairwise_rr @staticmethod From 5a210230581bb924f1d557615ce24606d550dd1b Mon Sep 17 00:00:00 2001 From: OutisLi <137472077+OutisLi@users.noreply.github.com> Date: Fri, 2 Jan 2026 22:50:53 +0800 Subject: [PATCH 2/3] Update deepmd/pt/model/atomic_model/pairtab_atomic_model.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: OutisLi <137472077+OutisLi@users.noreply.github.com> --- deepmd/pt/model/atomic_model/pairtab_atomic_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepmd/pt/model/atomic_model/pairtab_atomic_model.py b/deepmd/pt/model/atomic_model/pairtab_atomic_model.py index 96256c972e..5f1cbdb44a 100644 --- a/deepmd/pt/model/atomic_model/pairtab_atomic_model.py +++ b/deepmd/pt/model/atomic_model/pairtab_atomic_model.py @@ -398,7 +398,7 @@ def _get_pairwise_dist(coords: torch.Tensor, nlist: torch.Tensor) -> torch.Tenso When nlist contains padding indices that have been masked to 0, the corresponding diff vectors may become zero (if atom 0 happens to be the center atom itself). To avoid NaN gradients during backpropagation, - Use a safe norm computation with an epsilon floor on the squared sum. + use a safe norm computation with an epsilon floor on the squared sum. """ nframes, nloc, nnei = nlist.shape coord_l = coords[:, :nloc].view(nframes, -1, 1, 3) From b646fa159226a51fe946a51c2e8bc2a1a3bf51a3 Mon Sep 17 00:00:00 2001 From: OutisLi Date: Mon, 5 Jan 2026 14:37:50 +0800 Subject: [PATCH 3/3] fix --- .../atomic_model/pairtab_atomic_model.py | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/deepmd/pt/model/atomic_model/pairtab_atomic_model.py b/deepmd/pt/model/atomic_model/pairtab_atomic_model.py index 5f1cbdb44a..a77e5391f8 100644 --- a/deepmd/pt/model/atomic_model/pairtab_atomic_model.py +++ b/deepmd/pt/model/atomic_model/pairtab_atomic_model.py @@ -395,10 +395,8 @@ def _get_pairwise_dist(coords: torch.Tensor, nlist: torch.Tensor) -> torch.Tenso Notes ----- - When nlist contains padding indices that have been masked to 0, the - corresponding diff vectors may become zero (if atom 0 happens to be the - center atom itself). To avoid NaN gradients during backpropagation, - use a safe norm computation with an epsilon floor on the squared sum. + Safe gradient implementation: when diff is zero (padding entries), + both distance and gradient are zero. """ nframes, nloc, nnei = nlist.shape coord_l = coords[:, :nloc].view(nframes, -1, 1, 3) @@ -406,12 +404,16 @@ def _get_pairwise_dist(coords: torch.Tensor, nlist: torch.Tensor) -> torch.Tenso coord_r = torch.gather(coords, 1, index) coord_r = coord_r.view(nframes, nloc, nnei, 3) diff = coord_r - coord_l - # Use safe norm to avoid NaN gradients when diff is zero (e.g., for - # padding entries where masked nlist index 0 points to self). - # The epsilon 1e-14 is small enough to not affect physical distances - # (atomic distances are typically > 0.1 Å) while preventing NaN. - pairwise_rr = torch.sqrt( - torch.sum(diff * diff, dim=-1, keepdim=True).clamp(min=1e-14) + diff_sq = torch.sum(diff * diff, dim=-1, keepdim=True) + + # When diff is zero, output is zero and gradient is also zero + mask = diff_sq.squeeze(-1) > 0 + pairwise_rr = torch.where( + mask.unsqueeze(-1), + torch.sqrt( + torch.where(mask.unsqueeze(-1), diff_sq, torch.ones_like(diff_sq)) + ), + torch.zeros_like(diff_sq), ).squeeze(-1) return pairwise_rr