Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions deepmd/infer/deep_pot.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,11 @@ def eval(
aparam=aparam,
**kwargs,
)
# TODO: if the grid is requested, we can directly return it without reshaping to energy, force and virial. We can also consider to return the grid in a separate key in the results dict, instead of reshaping it to energy, force and virial.
if "grid" in kwargs:
result = results["density"].reshape(nframes, -1)
return result
Comment on lines +216 to +218
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Guard against grid=None before density extraction.

if "grid" in kwargs also matches grid=None, but the backend density branch only runs when grid is not None; this can make results["density"] missing and raise at runtime. Use the same predicate here (kwargs.get("grid") is not None).

Suggested fix
-        if "grid" in kwargs:
+        if kwargs.get("grid") is not None:
             result = results["density"].reshape(nframes, -1)
             return result
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@deepmd/infer/deep_pot.py` around lines 216 - 218, The condition that returns
density currently uses if "grid" in kwargs which also matches grid=None and can
access missing results["density"]; change the predicate to check the actual
value (e.g., kwargs.get("grid") is not None) before reshaping results["density"]
in the function where nframes and results are used so the density branch only
runs when grid is not None.


energy = results["energy_redu"].reshape(nframes, 1)
force = results["energy_derv_r"].reshape(nframes, natoms, 3)
virial = results["energy_derv_c_redu"].reshape(nframes, 9)
Expand Down
7 changes: 6 additions & 1 deletion deepmd/pt/entrypoints/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,12 @@ def train(

# Initialize DDP
if os.environ.get("LOCAL_RANK") is not None:
dist.init_process_group(backend="cuda:nccl,cpu:gloo")
import datetime

timeout = datetime.timedelta(
seconds=18000
) # set a longer timeout for for large datasets or slow file systems
dist.init_process_group(backend="cuda:nccl,cpu:gloo", timeout=timeout)

trainer = get_trainer(
config,
Expand Down
95 changes: 90 additions & 5 deletions deepmd/pt/infer/deep_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,11 +406,7 @@ def eval(
coords, atom_types, len(atom_types.shape) > 1
)
request_defs = self._get_request_defs(atomic)
if "spin" not in kwargs or kwargs["spin"] is None:
out = self._eval_func(self._eval_model, numb_test, natoms)(
coords, cells, atom_types, fparam, aparam, request_defs, charge_spin
)
else:
if "spin" in kwargs and kwargs["spin"] is not None:
out = self._eval_func(self._eval_model_spin, numb_test, natoms)(
coords,
cells,
Expand All @@ -421,6 +417,21 @@ def eval(
request_defs,
charge_spin,
)
elif "grid" in kwargs and kwargs["grid"] is not None:
out = self._eval_func(self._eval_model_density, numb_test, natoms)(
coords,
cells,
atom_types,
np.array(kwargs["grid"]),
fparam,
aparam,
request_defs,
)
return {"density": out}
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical | ⚡ Quick win

Return ndarray instead of tuple for "density" payload.

out here is a tuple from _eval_model_density (currently tuple(results)), so {"density": out} makes downstream callers receive a tuple, not an array. DeepPot.eval then calls .reshape(...) on this value and will fail.

Suggested fix
-            return {"density": out}
+            return {"density": out[0]}

Also applies to: 769-775

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@deepmd/pt/infer/deep_eval.py` at line 430, The "density" payload is returning
a tuple (from _eval_model_density/tuple(results)) which breaks DeepPot.eval when
it calls .reshape; convert the tuple to a NumPy array before returning. Locate
the return sites (the dict with "density": out around the return in deep_eval.py
and the similar block at lines ~769-775) and replace the tuple with a NumPy
ndarray (e.g., wrap results with numpy.array or ensure tuple(results) is created
as np.array(results)); add or use the existing import for numpy (np) so callers
receive an ndarray instead of a tuple.

else:
out = self._eval_func(self._eval_model, numb_test, natoms)(
coords, cells, atom_types, fparam, aparam, request_defs, charge_spin
)
Comment on lines +432 to +434
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical | ⚡ Quick win

Pass charge_spin in the default eval path.

_eval_model(...) requires charge_spin, but the non-spin/non-grid call omits it. This will raise TypeError for normal inference.

Suggested fix
-            out = self._eval_func(self._eval_model, numb_test, natoms)(
-                coords, cells, atom_types, fparam, aparam, request_defs
-            )
+            out = self._eval_func(self._eval_model, numb_test, natoms)(
+                coords, cells, atom_types, fparam, aparam, request_defs, charge_spin
+            )

Also applies to: 527-536

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@deepmd/pt/infer/deep_eval.py` around lines 432 - 434, The default
non-spin/non-grid evaluation call to self._eval_func(...)(...) omits the
required charge_spin argument for _eval_model; update the call site (the
invocation that constructs out via self._eval_func(self._eval_model, numb_test,
natoms)(...)) to include the charge_spin parameter (e.g., add charge_spin to the
argument list alongside coords, cells, atom_types, fparam, aparam,
request_defs), and apply the same change to the other similar block around the
527-536 region so both calls pass charge_spin into _eval_model.

return dict(
zip(
[x.name for x in request_defs],
Expand Down Expand Up @@ -688,6 +699,80 @@ def _eval_model_spin(
) # this is kinda hacky
return tuple(results)

def _eval_model_density(
self,
coords: np.ndarray,
cells: np.ndarray | None,
atom_types: np.ndarray,
grid: np.ndarray,
fparam: np.ndarray | None,
aparam: np.ndarray | None,
request_defs: list[OutputVariableDef],
) -> tuple[np.ndarray, ...]:
model = self.dp.to(DEVICE)

nframes = coords.shape[0]
if len(atom_types.shape) == 1:
natoms = len(atom_types)
atom_types = np.tile(atom_types, nframes).reshape(nframes, -1)
else:
natoms = len(atom_types[0])

coord_input = torch.tensor(
coords.reshape([nframes, natoms, 3]),
dtype=GLOBAL_PT_FLOAT_PRECISION,
device=DEVICE,
)
type_input = torch.tensor(atom_types, dtype=torch.long, device=DEVICE)
grid_input = torch.tensor(
grid.reshape([nframes, -1, 3]),
dtype=GLOBAL_PT_FLOAT_PRECISION,
device=DEVICE,
)
ngrid = grid_input.shape[1]
if cells is not None:
box_input = torch.tensor(
cells.reshape([nframes, 3, 3]),
dtype=GLOBAL_PT_FLOAT_PRECISION,
device=DEVICE,
)
else:
box_input = None
if fparam is not None:
fparam_input = to_torch_tensor(
fparam.reshape(nframes, self.get_dim_fparam())
)
else:
fparam_input = None
if aparam is not None:
aparam_input = to_torch_tensor(
aparam.reshape(nframes, natoms, self.get_dim_aparam())
)
else:
aparam_input = None

do_atomic_virial = any(
x.category == OutputVariableCategory.DERV_C_REDU for x in request_defs
)
batch_output = model(
coord_input,
type_input,
grid=grid_input,
box=box_input,
do_atomic_virial=do_atomic_virial,
fparam=fparam_input,
aparam=aparam_input,
)
if isinstance(batch_output, tuple):
batch_output = batch_output[0]

results = []
pt_name = "density"
density_shape = [nframes, ngrid]
out = batch_output[pt_name].reshape(density_shape).detach().cpu().numpy()
results.append(out)
return tuple(results)

def _get_output_shape(
self, odef: OutputVariableDef, nframes: int, natoms: int
) -> list[int]:
Expand Down
4 changes: 4 additions & 0 deletions deepmd/pt/loss/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
from .charge import (
GridDensityLoss,
)
from .denoise import (
DenoiseLoss,
)
Expand Down Expand Up @@ -28,6 +31,7 @@
"EnergyHessianStdLoss",
"EnergySpinLoss",
"EnergyStdLoss",
"GridDensityLoss",
"PropertyLoss",
"TaskLoss",
"TensorLoss",
Expand Down
137 changes: 137 additions & 0 deletions deepmd/pt/loss/charge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
from typing import (
Any,
)

import torch

Comment thread
coderabbitai[bot] marked this conversation as resolved.
from deepmd.pt.loss.loss import (
TaskLoss,
)
from deepmd.pt.utils import (
env,
)
from deepmd.pt.utils.env import (
GLOBAL_PT_FLOAT_PRECISION,
)
from deepmd.utils.data import (
DataRequirementItem,
)


class GridDensityLoss(TaskLoss):
def __init__(
self,
starter_learning_rate: float = 1.0,
start_pref_d: float = 0.0,
limit_pref_d: float = 0.0,
inference: bool = False,
**kwargs: Any,
) -> None:
r"""Construct a layer to compute loss on grid density.

Parameters
----------
starter_learning_rate : float
The learning rate at the start of the training.
start_pref_d : float
The prefactor of charge density loss at the start of the training.
limit_pref_d : float
The prefactor of charge density loss at the end of the training.
inference : bool
If true, it will output all losses found in output, ignoring the pre-factors.
**kwargs
Other keyword arguments.
"""
super().__init__()
self.starter_learning_rate = starter_learning_rate
self.has_d = (start_pref_d != 0.0 and limit_pref_d != 0.0) or inference
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Fix density-loss activation logic on Line 44.

Using and here disables density loss whenever only one prefactor endpoint is zero (common for ramp-up/ramp-down schedules), so training can silently skip this objective.

Suggested fix
-        self.has_d = (start_pref_d != 0.0 and limit_pref_d != 0.0) or inference
+        self.has_d = (start_pref_d != 0.0 or limit_pref_d != 0.0) or inference
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
self.has_d = (start_pref_d != 0.0 and limit_pref_d != 0.0) or inference
self.has_d = (start_pref_d != 0.0 or limit_pref_d != 0.0) or inference
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@deepmd/pt/loss/charge.py` at line 44, The density-loss activation logic in
charge.py sets self.has_d incorrectly using AND between start_pref_d and
limit_pref_d so density loss is disabled when either endpoint is zero; change
the condition in the initializer that assigns self.has_d to use OR between the
endpoint checks (i.e., treat density active if either start_pref_d or
limit_pref_d is nonzero) while still preserving the existing inference flag
check (keep the overall OR with inference); locate the assignment to self.has_d
in charge.py and update the boolean expression accordingly.


self.start_pref_d = start_pref_d
self.limit_pref_d = limit_pref_d
self.inference = inference

def forward(
self,
input_dict: dict[str, torch.Tensor],
model: torch.nn.Module,
label: dict[str, torch.Tensor],
natoms: int,
learning_rate: float,
mae: bool = False,
) -> tuple[dict[str, torch.Tensor], torch.Tensor, dict[str, torch.Tensor]]:
"""Return loss on energy and force.

Parameters
----------
input_dict : dict[str, torch.Tensor]
Model inputs.
model : torch.nn.Module
Model to be used to output the predictions.
label : dict[str, torch.Tensor]
Labels.
natoms : int
The local atom number.

Returns
-------
model_pred: dict[str, torch.Tensor]
Model predictions.
loss: torch.Tensor
Loss for model to minimize.
more_loss: dict[str, torch.Tensor]
Other losses for display.
"""
model_pred = model(**input_dict)
coef = learning_rate / self.starter_learning_rate
pref_d = self.limit_pref_d + (self.start_pref_d - self.limit_pref_d) * coef

loss = torch.zeros(1, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE)[0]
more_loss = {}
# more_loss['log_keys'] = [] # showed when validation on the fly
# more_loss['test_keys'] = [] # showed when doing dp test
atom_norm = 1.0 / natoms
if self.has_d and "density" in model_pred and "density" in label:
density_pred = model_pred["density"]
density_label = label["density"]
find_density = label.get("find_density", 0.0)
pref_d = pref_d * find_density
density_pred_reshape = density_pred.reshape(-1)
density_label_reshape = density_label.reshape(-1)
l2_density_loss = torch.square(
density_label_reshape - density_pred_reshape
).mean()
rmse_d = l2_density_loss.sqrt()
more_loss["rmse_d"] = self.display_if_exist(rmse_d.detach(), find_density)
l1_density_loss = torch.abs(
density_label_reshape - density_pred_reshape
).mean()
loss += (pref_d * l1_density_loss).to(GLOBAL_PT_FLOAT_PRECISION)
mae_d = l1_density_loss
more_loss["mae_d"] = self.display_if_exist(mae_d.detach(), find_density)
return model_pred, loss, more_loss

@property
def label_requirement(self) -> list[DataRequirementItem]:
"""Return data label requirements needed for this loss calculation."""
label_requirement = []
label_requirement.append(
DataRequirementItem(
"grid",
ndof=3,
atomic=True, # the grid is defined for each atom, so it is atomic
must=True,
high_prec=True,
)
)
if self.has_d:
label_requirement.append(
DataRequirementItem(
"density",
ndof=1,
atomic=True,
must=False,
high_prec=True,
)
)
return label_requirement
4 changes: 4 additions & 0 deletions deepmd/pt/model/atomic_model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
from .base_atomic_model import (
BaseAtomicModel,
)
from .density_atomic_model import (
DPDensityAtomicModel,
)
from .dipole_atomic_model import (
DPDipoleAtomicModel,
)
Expand Down Expand Up @@ -47,6 +50,7 @@
"BaseAtomicModel",
"DPAtomicModel",
"DPDOSAtomicModel",
"DPDensityAtomicModel",
"DPDipoleAtomicModel",
"DPEnergyAtomicModel",
"DPPolarAtomicModel",
Expand Down
Loading