diff --git a/deepmd/calculator.py b/deepmd/calculator.py index 356bfeb9ce..dad2862ee5 100644 --- a/deepmd/calculator.py +++ b/deepmd/calculator.py @@ -49,6 +49,13 @@ class DP(Calculator): will infer this information from model, by default None neighbor_list : ase.neighborlist.NeighborList, optional The neighbor list object. If None, then build the native neighbor list. + nlist_backend : str, default: "auto" + Which algorithm builds the neighbor list. ``"auto"`` uses the optional + O(N) ``vesin`` cell list when applicable (much faster for large systems) + and silently falls back to the native O(N^2) builder otherwise. + ``"vesin"`` strictly requires the vesin path (raises if it is missing or + the model is spin / hessian); ``"native"`` forces the built-in + all-pairs builder. Ignored when an explicit ``neighbor_list`` is given. head : Union[str, None], optional a specific model branch choosing from pretrained model, by default None @@ -90,6 +97,7 @@ def __init__( label: str = "DP", type_dict: dict[str, int] | None = None, neighbor_list: Optional["NeighborList"] = None, + nlist_backend: str = "auto", head: str | None = None, **kwargs: Any, ) -> None: @@ -97,6 +105,7 @@ def __init__( self.dp = DeepPot( str(Path(model).resolve()), neighbor_list=neighbor_list, + nlist_backend=nlist_backend, head=head, ) if type_dict: diff --git a/deepmd/pt/infer/deep_eval.py b/deepmd/pt/infer/deep_eval.py index 0887ceb9df..949e8b6792 100644 --- a/deepmd/pt/infer/deep_eval.py +++ b/deepmd/pt/infer/deep_eval.py @@ -46,6 +46,9 @@ from deepmd.pt.model.model import ( get_model, ) +from deepmd.pt.model.model.transform_output import ( + communicate_extended_output, +) from deepmd.pt.model.network.network import ( TypeEmbedNetConsistent, ) @@ -67,6 +70,10 @@ to_numpy_array, to_torch_tensor, ) +from deepmd.pt_expt.utils.nlist import ( + build_neighbor_list_vesin_torch, + is_vesin_torch_available, +) from deepmd.utils.batch_size import ( RetrySignal, ) @@ -116,6 +123,14 @@ class DeepEval(DeepEvalBackend): neighbor_list : ase.neighborlist.NewPrimitiveNeighborList, optional The ASE neighbor list class to produce the neighbor list. If None, the neighbor list will be built natively in the model. + nlist_backend : str, default: "auto" + Which algorithm builds the neighbor list on the Python inference path. + ``"auto"`` uses the O(N) ``vesin`` cell list when it is applicable + (``vesin`` installed and a non-spin, non-hessian model) and silently + falls back to the native all-pairs builder otherwise. ``"vesin"`` + strictly requires the vesin path and raises ``ValueError`` if it cannot + be used (vesin missing, or a spin / hessian model). ``"native"`` forces + the built-in all-pairs O(N^2) builder. **kwargs : dict Keyword arguments. """ @@ -127,6 +142,7 @@ def __init__( *args: Any, auto_batch_size: bool | int | AutoBatchSize = True, neighbor_list: Optional["ase.neighborlist.NewPrimitiveNeighborList"] = None, + nlist_backend: str = "auto", head: str | int | None = None, no_jit: bool = False, **kwargs: Any, @@ -239,6 +255,48 @@ def __init__( if callable(self._has_spin): self._has_spin = self._has_spin() self._has_hessian = self.model_def_script.get("hessian_mode", False) + self._setup_nlist_backend(nlist_backend) + + def _setup_nlist_backend(self, nlist_backend: str) -> None: + """Resolve the requested neighbor-list backend for the inference path. + + ``"auto"`` uses the O(N) vesin cell list when applicable and silently + falls back to native otherwise; ``"vesin"`` is strict and raises if it + cannot be honored; ``"native"`` forces the native builder. + """ + if nlist_backend not in ("auto", "vesin", "native"): + raise ValueError( + f"Unknown nlist_backend {nlist_backend!r}; " + "expected 'auto', 'vesin', or 'native'." + ) + self.nlist_backend = nlist_backend + # The vesin O(N) builder only handles the standard (non-spin, + # non-hessian) energy path; report why it is unavailable, if so. + unsupported = None + if self._has_spin: + unsupported = "spin" + elif self._has_hessian: + unsupported = "hessian" + if nlist_backend == "vesin": + # explicit request: fail loudly if it cannot be honored + if not is_vesin_torch_available(): + raise ValueError( + "nlist_backend='vesin' was requested but the 'vesin.torch' " + "package is not installed; install it (`pip install " + "vesin[torch]`) or use nlist_backend='native' (or 'auto')." + ) + if unsupported is not None: + raise ValueError( + f"nlist_backend='vesin' is not supported for {unsupported} " + "models; use nlist_backend='native' (or 'auto')." + ) + self._use_vesin = True + elif nlist_backend == "native": + self._use_vesin = False + else: # auto: use vesin when possible, otherwise fall back silently + self._use_vesin = is_vesin_torch_available() and unsupported is None + if self._use_vesin: + self._nsel = self.dp.model["Default"].get_nsel() def get_rcut(self) -> float: """Get the cutoff radius of this model.""" @@ -539,6 +597,16 @@ def _eval_model( request_defs: list[OutputVariableDef], charge_spin: np.ndarray | None, ) -> tuple[np.ndarray, ...]: + if self._use_vesin: + return self._eval_model_vesin( + coords, + cells, + atom_types, + fparam, + aparam, + request_defs, + charge_spin, + ) model = self.dp.to(DEVICE) prec = NP_PRECISION_DICT[RESERVED_PRECISION_DICT[GLOBAL_PT_FLOAT_PRECISION]] @@ -612,6 +680,111 @@ def _eval_model( ) # this is kinda hacky return tuple(results) + def _eval_model_vesin( + self, + coords: np.ndarray, + cells: np.ndarray | None, + atom_types: np.ndarray, + fparam: np.ndarray | None, + aparam: np.ndarray | None, + request_defs: list[OutputVariableDef], + charge_spin: np.ndarray | None, + ) -> tuple[np.ndarray, ...]: + """Evaluate using an O(N) ``vesin`` neighbor list built on the host. + + The neighbor list is built outside the model graph and fed to the + exported ``forward_common_lower`` interface; the extended-region output + is mapped back to local atoms with :func:`communicate_extended_output`, + exactly mirroring the native ``forward_common`` path. This avoids the + native all-pairs O(N^2) neighbor-list build, which dominates runtime for + large systems on the Python / ASE inference path. + """ + model = self.dp.model["Default"].to(DEVICE) + prec = NP_PRECISION_DICT[RESERVED_PRECISION_DICT[GLOBAL_PT_FLOAT_PRECISION]] + + nframes = coords.shape[0] + if len(atom_types.shape) == 1: + natoms = len(atom_types) + atom_types = np.tile(atom_types, nframes).reshape(nframes, -1) + else: + natoms = len(atom_types[0]) + + coords = coords.reshape([nframes, natoms, 3]) + # Build the neighbor list on the model's device with the vesin.torch + # cell list (CPU or CUDA). The lower interface re-formats (distance-sort, + # truncate, type-split) the candidate list, so a single mixed list of the + # nearest sum(sel) neighbors is sufficient here (matches forward_common, + # which builds the nlist with mixed_types=True and distinguishes in the + # lower interface). + coord_t = torch.tensor( + coords.astype(prec), dtype=GLOBAL_PT_FLOAT_PRECISION, device=DEVICE + ) + atype_t = torch.tensor(atom_types, dtype=torch.long, device=DEVICE) + box_t = ( + torch.tensor( + cells.reshape([nframes, 3, 3]).astype(prec), + dtype=GLOBAL_PT_FLOAT_PRECISION, + device=DEVICE, + ) + if cells is not None + else None + ) + ext_coord_input, ext_atype_input, nlist_input, mapping_input = ( + build_neighbor_list_vesin_torch( + coord_t, box_t, atype_t, self.rcut, [self._nsel], False + ) + ) + + if fparam is not None: + fparam_input = to_torch_tensor( + fparam.reshape(nframes, self.get_dim_fparam()) + ) + else: + fparam_input = None + if aparam is not None: + aparam_input = to_torch_tensor( + aparam.reshape(nframes, natoms, self.get_dim_aparam()) + ) + else: + aparam_input = None + if charge_spin is not None: + charge_spin_input = to_torch_tensor(charge_spin.reshape(nframes, 2)) + else: + charge_spin_input = None + do_atomic_virial = any( + x.category == OutputVariableCategory.DERV_C for x in request_defs + ) + + model_ret = model.forward_common_lower( + ext_coord_input, + ext_atype_input, + nlist_input, + mapping_input, + fparam=fparam_input, + aparam=aparam_input, + do_atomic_virial=do_atomic_virial, + charge_spin=charge_spin_input, + ) + batch_output = communicate_extended_output( + model_ret, + self.output_def, + mapping_input, + do_atomic_virial=do_atomic_virial, + ) + + results = [] + for odef in request_defs: + # communicate_extended_output keys are the internal output names, + # which match odef.name (e.g. "energy_redu", "energy_derv_r"). + if odef.name in batch_output and batch_output[odef.name] is not None: + shape = self._get_output_shape(odef, nframes, natoms) + out = batch_output[odef.name].reshape(shape).detach().cpu().numpy() + results.append(out) + else: + shape = self._get_output_shape(odef, nframes, natoms) + results.append(np.full(np.abs(shape), np.nan, dtype=prec)) + return tuple(results) + def _eval_model_spin( self, coords: np.ndarray, diff --git a/deepmd/pt_expt/infer/deep_eval.py b/deepmd/pt_expt/infer/deep_eval.py index 5af63024b4..f8af7bf55a 100644 --- a/deepmd/pt_expt/infer/deep_eval.py +++ b/deepmd/pt_expt/infer/deep_eval.py @@ -54,6 +54,10 @@ from deepmd.pt.utils.auto_batch_size import ( AutoBatchSize, ) +from deepmd.pt_expt.utils.nlist import ( + build_neighbor_list_vesin_torch, + is_vesin_torch_available, +) if TYPE_CHECKING: import ase.neighborlist @@ -103,6 +107,7 @@ def __init__( *args: Any, auto_batch_size: bool | int | AutoBatchSize = True, neighbor_list: Optional["ase.neighborlist.NewPrimitiveNeighborList"] = None, + nlist_backend: str = "auto", **kwargs: Any, ) -> None: self.output_def = output_def @@ -122,6 +127,7 @@ def __init__( "backend: expected `.pt2` / `.pte` (deployable archives) or " "`.pt` (training checkpoint)." ) + self._setup_nlist_backend(nlist_backend) if isinstance(auto_batch_size, bool): if auto_batch_size: @@ -135,6 +141,48 @@ def __init__( else: raise TypeError("auto_batch_size should be bool, int, or AutoBatchSize") + def _setup_nlist_backend(self, nlist_backend: str) -> None: + """Resolve the requested neighbor-list backend for the inference path. + + ``"auto"`` uses the O(N) vesin cell list when applicable and silently + falls back to native otherwise; ``"vesin"`` is strict and raises if it + cannot be honored; ``"native"`` forces the native builder. An explicitly + supplied ASE ``neighbor_list`` takes precedence over the vesin path. + """ + if nlist_backend not in ("auto", "vesin", "native"): + raise ValueError( + f"Unknown nlist_backend {nlist_backend!r}; " + "expected 'auto', 'vesin', or 'native'." + ) + self.nlist_backend = nlist_backend + ase_provided = self.neighbor_list is not None + unsupported = "spin" if self._is_spin else None + if nlist_backend == "vesin": + # explicit request: fail loudly if it cannot be honored + if not is_vesin_torch_available(): + raise ValueError( + "nlist_backend='vesin' was requested but the 'vesin.torch' " + "package is not installed; install it (`pip install " + "vesin[torch]`) or use nlist_backend='native' (or 'auto')." + ) + if unsupported is not None: + raise ValueError( + f"nlist_backend='vesin' is not supported for {unsupported} " + "models; use nlist_backend='native' (or 'auto')." + ) + if ase_provided: + raise ValueError( + "nlist_backend='vesin' conflicts with an explicitly supplied " + "neighbor_list; pass only one." + ) + self._use_vesin = True + elif nlist_backend == "native": + self._use_vesin = False + else: # auto: use vesin when possible, otherwise fall back silently + self._use_vesin = ( + is_vesin_torch_available() and unsupported is None and not ase_provided + ) + def _init_from_model_json(self, model_json_str: str) -> None: """Deserialize model.json and derive model API from the dpmodel instance.""" from deepmd.pt_expt.model.model import ( @@ -1053,7 +1101,30 @@ def _prepare_inputs( ) coord_input = coords.reshape(nframes, natoms, 3) - if self.neighbor_list is not None: + if self._use_vesin: + # device-resident vesin.torch build: on GPU the neighbor search stays + # on the GPU. forward_common_lower re-formats the candidate list, so + # the mixed/distinguished choice here only mirrors the ASE path. + coord_t = torch.tensor(coord_input, dtype=torch.float64, device=DEVICE) + atype_t = torch.tensor(atom_types, dtype=torch.int64, device=DEVICE) + box_t = ( + torch.tensor( + cells.reshape(nframes, 3, 3), dtype=torch.float64, device=DEVICE + ) + if cells is not None + else None + ) + ext_coord_t, ext_atype_t, nlist_t, mapping_t = ( + build_neighbor_list_vesin_torch( + coord_t, + box_t, + atype_t, + self._rcut, + self._sel, + distinguish_types=not self._mixed_types, + ) + ) + elif self.neighbor_list is not None: # ASE path: builds nlist in numpy, then convert to tensors extended_coord, extended_atype, nlist, mapping = self._build_nlist_ase( coord_input, diff --git a/deepmd/pt_expt/utils/nlist.py b/deepmd/pt_expt/utils/nlist.py new file mode 100644 index 0000000000..6b4d6e4980 --- /dev/null +++ b/deepmd/pt_expt/utils/nlist.py @@ -0,0 +1,182 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""Device-resident neighbor-list builders for the pt_expt inference path. + +These are torch-specific helpers (kept out of the array-API ``dpmodel`` +backend) that build the extended system and neighbor list with the +``vesin.torch`` cell list. The neighbor search runs on the device of the +input coordinates (CPU or CUDA), so on a GPU the whole build stays on the GPU +and avoids a host round-trip. The neighbor *search* is non-differentiable, so +using an external library here does not affect the model's autograd graph. +""" + +import torch + +from deepmd.dpmodel.utils.nlist import ( + nlist_distinguish_types, +) + + +def is_vesin_torch_available() -> bool: + """Whether the device-capable ``vesin.torch`` neighbor list is importable.""" + try: + import vesin.torch # noqa: F401 + except ImportError: + return False + return True + + +def build_neighbor_list_vesin_torch( + coord: torch.Tensor, + box: torch.Tensor | None, + atype: torch.Tensor, + rcut: float, + sel: list[int], + distinguish_types: bool, +) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + """Build the extended system and neighbor list with ``vesin.torch``. + + Parameters + ---------- + coord : torch.Tensor + local atom coordinates, shape (nframes, nloc, 3). + box : torch.Tensor or None + simulation cell, shape (nframes, 3, 3). ``None`` for non-periodic. + atype : torch.Tensor + atom types, shape (nframes, nloc). + rcut : float + cutoff radius. + sel : list[int] + maximal number of selected neighbors (summed over types). + distinguish_types : bool + whether to reorder the neighbor list per atom type. + + Returns + ------- + extended_coord : torch.Tensor, shape (nframes, nall, 3) + extended_atype : torch.Tensor, shape (nframes, nall) + nlist : torch.Tensor, shape (nframes, nloc, sum(sel)) + mapping : torch.Tensor, shape (nframes, nall) + """ + device = coord.device + nframes = coord.shape[0] + frame_results = [ + _build_neighbor_list_vesin_torch_single( + coord[ff], + box[ff] if box is not None else None, + atype[ff], + rcut, + sel, + distinguish_types, + ) + for ff in range(nframes) + ] + max_nall = max(ec.shape[0] for ec, _, _, _ in frame_results) + ext_coords, ext_atypes, nlists, mappings = [], [], [], [] + for ec, ea, nl, mp in frame_results: + pad = max_nall - ec.shape[0] + if pad > 0: + ec = torch.cat( + [ec, torch.zeros((pad, 3), dtype=ec.dtype, device=device)], dim=0 + ) + ea = torch.cat( + [ea, torch.full((pad,), -1, dtype=ea.dtype, device=device)], dim=0 + ) + mp = torch.cat( + [mp, torch.zeros((pad,), dtype=mp.dtype, device=device)], dim=0 + ) + ext_coords.append(ec) + ext_atypes.append(ea) + nlists.append(nl) + mappings.append(mp) + return ( + torch.stack(ext_coords, dim=0), + torch.stack(ext_atypes, dim=0), + torch.stack(nlists, dim=0), + torch.stack(mappings, dim=0), + ) + + +def _build_neighbor_list_vesin_torch_single( + positions: torch.Tensor, + cell: torch.Tensor | None, + atype: torch.Tensor, + rcut: float, + sel: list[int], + distinguish_types: bool, +) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + """Single-frame variant of :func:`build_neighbor_list_vesin_torch`.""" + import vesin.torch + + device = positions.device + nsel = sum(sel) + nloc = positions.shape[0] + periodic = cell is not None + box = ( + cell if periodic else torch.zeros((3, 3), dtype=positions.dtype, device=device) + ) + + # Pin the default device to the input's device: vesin.torch allocates some + # internal tensors on the ambient default device, which may be a fake/other + # device in some contexts (e.g. tests set a placeholder CUDA default). + nl = vesin.torch.NeighborList(cutoff=rcut, full_list=True) + with torch.device(device): + ii, jj, ss = nl.compute( + points=positions, box=box, periodic=periodic, quantities="ijS" + ) + ii = ii.to(torch.int64) + jj = jj.to(torch.int64) + ss = ss.to(positions.dtype) + + # ghost atoms: neighbors reached through a non-zero periodic shift + out_mask = torch.any(ss != 0, dim=1) + out_idx = jj[out_mask] + out_coords = positions[out_idx] + ss[out_mask] @ box + nghost = int(out_idx.shape[0]) + + extended_coord = torch.cat([positions, out_coords], dim=0) + extended_atype = torch.cat([atype, atype[out_idx]], dim=0) + mapping = torch.cat( + [torch.arange(nloc, dtype=torch.int64, device=device), out_idx], dim=0 + ) + + # remap neighbor column indices: ghosts -> [nloc, nloc + nghost) + neigh_idx = jj.clone() + neigh_idx[out_mask] = torch.arange( + nloc, nloc + nghost, dtype=torch.int64, device=device + ) + + # group pairs by center atom (vesin does not guarantee CSR ordering) + counts = torch.bincount(ii, minlength=nloc) + max_nn = int(counts.max()) if counts.numel() > 0 else 0 + order = torch.argsort(ii, stable=True) + rows = ii[order] + cols = torch.arange(ii.shape[0], dtype=torch.int64, device=device) - ( + torch.repeat_interleave(torch.cumsum(counts, 0) - counts, counts) + ) + dense_idx = torch.full((nloc, max_nn), -1, dtype=torch.int64, device=device) + if ii.shape[0] > 0: + dense_idx[rows, cols] = neigh_idx[order] + + # sort candidates by distance, keep the nsel nearest within rcut, pad with -1 + valid = dense_idx >= 0 + lookup = torch.where(valid, dense_idx, torch.zeros_like(dense_idx)) + dists = torch.linalg.norm(extended_coord[lookup] - positions[:, None, :], dim=-1) + valid &= dists <= rcut + dists = torch.where(valid, dists, torch.full_like(dists, float("inf"))) + sort_order = torch.argsort(dists, dim=-1) + sorted_idx = torch.take_along_dim(dense_idx, sort_order, dim=-1) + sorted_valid = torch.take_along_dim(valid, sort_order, dim=-1) + + nlist = torch.full((nloc, nsel), -1, dtype=torch.int64, device=device) + keep = min(nsel, max_nn) + if keep > 0: + nlist[:, :keep] = torch.where( + sorted_valid[:, :keep], + sorted_idx[:, :keep], + torch.full_like(sorted_idx[:, :keep], -1), + ) + + if distinguish_types: + nlist = nlist_distinguish_types(nlist[None], extended_atype[None], sel)[0] + + return extended_coord, extended_atype, nlist, mapping diff --git a/pyproject.toml b/pyproject.toml index 35fc0fdb18..430e27e98b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,6 +58,9 @@ dependencies = [ 'array-api-compat', 'lmdb', 'msgpack', + # O(N) neighbor-list backend for the Python / ASE inference path; the + # [torch] extra adds vesin.torch for a device-resident (GPU) build. + 'vesin[torch]', ] requires-python = ">=3.10" keywords = ["deepmd"] diff --git a/source/tests/pt/model/test_deeppot.py b/source/tests/pt/model/test_deeppot.py index 0ae5b5f3a3..9a1cda2fb5 100644 --- a/source/tests/pt/model/test_deeppot.py +++ b/source/tests/pt/model/test_deeppot.py @@ -1,4 +1,5 @@ # SPDX-License-Identifier: LGPL-3.0-or-later +import importlib.util import json import os import unittest @@ -20,6 +21,9 @@ from deepmd.pt.infer.deep_eval import ( DeepPot, ) +from deepmd.pt.model.model import ( + get_model, +) class TestDeepPot(unittest.TestCase): @@ -115,6 +119,152 @@ def test_eval_typeebd(self) -> None: ) np.testing.assert_allclose(eval_typeebd[-1], np.zeros_like(eval_typeebd[-1])) + # --- nlist_backend (vesin) option --------------------------------------- + + _cell = np.array( + [ + 5.122106549439247480e00, + 4.016537340154059388e-01, + 6.951654033828678081e-01, + 4.016537340154059388e-01, + 6.112136112297989143e00, + 8.178091365465004481e-01, + 6.951654033828678081e-01, + 8.178091365465004481e-01, + 6.159552512682983760e00, + ] + ).reshape(1, 3, 3) + _coord = np.array( + [ + 2.978060152121375648e00, + 3.588469695887098077e00, + 2.792459820604495491e00, + 3.895592322591093115e00, + 2.712091020667753760e00, + 1.366836847133650501e00, + 9.955616170888935690e-01, + 4.121324820711413039e00, + 1.817239061889086571e00, + 3.553661462345699906e00, + 5.313046969500791583e00, + 6.635182659098815883e00, + 6.088601018589653080e00, + 6.575011420004332585e00, + 6.825240650611076099e00, + ] + ).reshape(1, -1, 3) + _atype = np.array([0, 0, 0, 1, 1]).reshape(1, -1) + + def test_nlist_backend_default_is_auto(self) -> None: + # default "auto" uses vesin for this (non-spin energy) model + dp = DeepPot(str(self.model)) + self.assertEqual(dp.deep_eval.nlist_backend, "auto") + self.assertEqual( + dp.deep_eval._use_vesin, + importlib.util.find_spec("vesin.torch") is not None, + ) + + def test_nlist_backend_native_disables_vesin(self) -> None: + dp = DeepPot(str(self.model), nlist_backend="native") + self.assertEqual(dp.deep_eval.nlist_backend, "native") + self.assertFalse(dp.deep_eval._use_vesin) + + def test_nlist_backend_invalid_raises(self) -> None: + with self.assertRaises(ValueError): + DeepPot(str(self.model), nlist_backend="bogus") + + def test_nlist_backend_vesin_unavailable(self) -> None: + # "auto" silently falls back; explicit "vesin" raises. + import deepmd.pt.infer.deep_eval as deep_eval_mod + + original = deep_eval_mod.is_vesin_torch_available + deep_eval_mod.is_vesin_torch_available = lambda: False + try: + dp = DeepPot(str(self.model), nlist_backend="auto") + self.assertFalse(dp.deep_eval._use_vesin) + with self.assertRaises(ValueError): + DeepPot(str(self.model), nlist_backend="vesin") + finally: + deep_eval_mod.is_vesin_torch_available = original + + # spin gate-off is covered end-to-end on a real spin model in + # TestDeepPotSpinNlistBackend below. + + @unittest.skipUnless( + importlib.util.find_spec("vesin.torch") is not None, "vesin not installed" + ) + def test_nlist_backend_hessian(self) -> None: + # hessian models: "auto" falls back to native, explicit "vesin" raises. + dp = DeepPot(str(self.model), nlist_backend="auto") + dp.deep_eval._has_hessian = True + dp.deep_eval._setup_nlist_backend("auto") + self.assertFalse(dp.deep_eval._use_vesin) + with self.assertRaises(ValueError): + dp.deep_eval._setup_nlist_backend("vesin") + + @unittest.skipUnless( + importlib.util.find_spec("vesin.torch") is not None, "vesin not installed" + ) + def test_nlist_backend_vesin_consistency(self) -> None: + """Vesin O(N) nlist must match the native builder (PBC + non-PBC).""" + dp_native = DeepPot(str(self.model), nlist_backend="native") + dp_vesin = DeepPot(str(self.model), nlist_backend="vesin") + self.assertFalse(dp_native.deep_eval._use_vesin) + self.assertTrue(dp_vesin.deep_eval._use_vesin) + + for cell in (self._cell, None): + e1, f1, v1, ae1, av1 = dp_native.eval( + self._coord, cell, self._atype, atomic=True + ) + e2, f2, v2, ae2, av2 = dp_vesin.eval( + self._coord, cell, self._atype, atomic=True + ) + np.testing.assert_allclose(e1, e2, rtol=1e-10, atol=1e-10, err_msg="energy") + np.testing.assert_allclose(f1, f2, rtol=1e-10, atol=1e-10, err_msg="force") + np.testing.assert_allclose(v1, v2, rtol=1e-10, atol=1e-10, err_msg="virial") + np.testing.assert_allclose( + ae1, ae2, rtol=1e-10, atol=1e-10, err_msg="atom_energy" + ) + np.testing.assert_allclose( + av1, av2, rtol=1e-10, atol=1e-10, err_msg="atom_virial" + ) + + @unittest.skipUnless( + importlib.util.find_spec("vesin.torch") is not None, "vesin not installed" + ) + def test_nlist_backend_vesin_multiframe(self) -> None: + """Vesin nlist with multiple frames must match native, for both + auto_batch_size disabled and enabled (the latter with a small batch + size forcing the AutoBatchSize loop to split frames into batches). + """ + nframes = 5 + coord = np.tile(self._coord, (nframes, 1, 1)) + cell = np.tile(self._cell, (nframes, 1, 1)) + # auto_batch_size=False: single call; =2: forces 3 batches over 5 frames. + for auto_batch_size in (False, 2): + dp_native = DeepPot( + str(self.model), + nlist_backend="native", + auto_batch_size=auto_batch_size, + ) + dp_vesin = DeepPot( + str(self.model), + nlist_backend="vesin", + auto_batch_size=auto_batch_size, + ) + e1, f1, v1 = dp_native.eval(coord, cell, self._atype) + e2, f2, v2 = dp_vesin.eval(coord, cell, self._atype) + self.assertEqual(e2.shape, (nframes, 1)) + np.testing.assert_allclose( + e1, e2, rtol=1e-10, atol=1e-10, err_msg=f"energy abs={auto_batch_size}" + ) + np.testing.assert_allclose( + f1, f2, rtol=1e-10, atol=1e-10, err_msg=f"force abs={auto_batch_size}" + ) + np.testing.assert_allclose( + v1, v2, rtol=1e-10, atol=1e-10, err_msg=f"virial abs={auto_batch_size}" + ) + class TestDeepPotFrozen(TestDeepPot): def setUp(self) -> None: @@ -136,4 +286,85 @@ def test_dp_test_cpu(self) -> None: self.test_dp_test() +_SPIN_CONFIG = { + "type_map": ["Ni", "O"], + "descriptor": { + "type": "se_atten", + "sel": 30, + "rcut_smth": 2.0, + "rcut": 6.0, + "neuron": [2, 4, 8], + "axis_neuron": 4, + "attn": 5, + "attn_layer": 2, + "attn_dotr": True, + "attn_mask": False, + "activation_function": "tanh", + "scaling_factor": 1.0, + "normalize": True, + "temperature": 1.0, + "type_one_side": True, + "seed": 1, + }, + "fitting_net": {"neuron": [5, 5, 5], "resnet_dt": True, "seed": 1}, + "spin": {"use_spin": [True, False], "virtual_scale": [0.3140, 0.0]}, +} + + +class TestDeepPotSpinNlistBackend(unittest.TestCase): + """Real spin model: nlist_backend='vesin' must gate off to the native + builder and the spin eval path must run end-to-end with identical results. + """ + + @classmethod + def setUpClass(cls) -> None: + torch.manual_seed(1) + model = get_model(deepcopy(_SPIN_CONFIG)) + cls.model_file = "spin_model_nlist_backend.pth" + torch.jit.script(model).save(cls.model_file) + cls.coord = np.array( + [12.83, 2.56, 2.18, 12.09, 2.87, 2.74, 0.25, 3.32, 1.68, + 3.36, 3.00, 1.81, 3.51, 2.51, 2.60, 4.27, 3.22, 1.56] + ).reshape(1, -1) # fmt: skip + cls.spin = np.array( + [0.13, 0.02, 0.03, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.14, 0.10, 0.12, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] + ).reshape(1, -1) # fmt: skip + cls.atype = [0, 1, 1, 0, 1, 1] + cls.box = (np.eye(3) * 13.0).reshape(1, -1) + + @classmethod + def tearDownClass(cls) -> None: + if os.path.isfile(cls.model_file): + os.remove(cls.model_file) + + def test_spin_model_explicit_vesin_raises(self) -> None: + # a real spin model: explicit "vesin" must fail loudly... + self.assertTrue(DeepPot(self.model_file).deep_eval._has_spin) + with self.assertRaises(ValueError): + DeepPot(self.model_file, nlist_backend="vesin") + # ...while "auto" silently keeps the native builder. + dp_auto = DeepPot(self.model_file, nlist_backend="auto") + self.assertFalse(dp_auto.deep_eval._use_vesin) + + def test_spin_eval_auto_matches_native(self) -> None: + """A spin model under "auto" runs the native spin eval path and gives + identical results to nlist_backend='native'. + """ + dp_native = DeepPot(self.model_file, nlist_backend="native") + dp_auto = DeepPot(self.model_file, nlist_backend="auto") + self.assertFalse(dp_auto.deep_eval._use_vesin) + + rn = dp_native.eval( + self.coord, self.box, self.atype, atomic=True, spin=self.spin + ) + ra = dp_auto.eval(self.coord, self.box, self.atype, atomic=True, spin=self.spin) + # e, f, v, ae, av, fm are float outputs; mm (mask_mag) is integer. + for idx, name in enumerate(["e", "f", "v", "ae", "av", "fm"]): + np.testing.assert_allclose( + rn[idx], ra[idx], rtol=1e-10, atol=1e-10, err_msg=name + ) + np.testing.assert_array_equal(rn[6], ra[6]) # mask_mag + + # TestFparamAparamPT: moved to infer/test_models.py diff --git a/source/tests/pt/test_calculator.py b/source/tests/pt/test_calculator.py index 7458117ca3..8e78a17dbd 100644 --- a/source/tests/pt/test_calculator.py +++ b/source/tests/pt/test_calculator.py @@ -4,6 +4,9 @@ from copy import ( deepcopy, ) +from importlib.util import ( + find_spec, +) from pathlib import ( Path, ) @@ -194,3 +197,79 @@ def test_calculator(self) -> None: np.testing.assert_allclose(f0[idx_perm, :], f1, rtol=low_prec, atol=prec) np.testing.assert_allclose(s0, s1, rtol=low_prec, atol=prec) np.testing.assert_allclose(v0, v1, rtol=low_prec, atol=prec) + + +_CALC_CONFIG = { + "type_map": ["O", "H"], + "descriptor": { + "type": "se_e2_a", + "sel": [20, 20], + "rcut_smth": 0.5, + "rcut": 4.0, + "neuron": [8], + "resnet_dt": False, + "axis_neuron": 4, + "seed": 1, + }, + "fitting_net": {"neuron": [8], "resnet_dt": True, "seed": 1}, +} + + +@unittest.skipUnless(find_spec("vesin.torch") is not None, "vesin not installed") +class TestCalculatorNlistBackend(unittest.TestCase): + """The ASE DP calculator must thread nlist_backend to DeepPot and give + backend-independent results. + """ + + @classmethod + def setUpClass(cls) -> None: + from deepmd.pt.model.model import ( + get_model, + ) + + torch.manual_seed(1) + cls.model_file = "calc_model_nlist_backend.pth" + torch.jit.script(get_model(deepcopy(_CALC_CONFIG))).save(cls.model_file) + + @classmethod + def tearDownClass(cls) -> None: + import os + + if os.path.isfile(cls.model_file): + os.remove(cls.model_file) + + def test_calculator_nlist_backend(self) -> None: + from ase import ( + Atoms, + ) + + from deepmd.calculator import ( + DP, + ) + + rng = np.random.default_rng(7) + atoms = Atoms( + numbers=[8, 1, 1, 8, 1, 1], + positions=rng.random((6, 3)) * 8.0, + cell=np.eye(3) * 8.0, + pbc=True, + ) + calc_native = DP(self.model_file, nlist_backend="native") + calc_vesin = DP(self.model_file, nlist_backend="vesin") + # the kwarg must reach the underlying DeepPot backend + self.assertEqual(calc_native.dp.deep_eval.nlist_backend, "native") + self.assertTrue(calc_vesin.dp.deep_eval._use_vesin) + + a_native = atoms.copy() + a_native.calc = calc_native + a_vesin = atoms.copy() + a_vesin.calc = calc_vesin + np.testing.assert_allclose( + a_native.get_potential_energy(), + a_vesin.get_potential_energy(), + rtol=1e-10, + atol=1e-10, + ) + np.testing.assert_allclose( + a_native.get_forces(), a_vesin.get_forces(), rtol=1e-10, atol=1e-10 + ) diff --git a/source/tests/pt_expt/infer/test_deep_eval.py b/source/tests/pt_expt/infer/test_deep_eval.py index 7537575f1a..1fcff6684f 100644 --- a/source/tests/pt_expt/infer/test_deep_eval.py +++ b/source/tests/pt_expt/infer/test_deep_eval.py @@ -632,6 +632,166 @@ def test_ase_nlist_multiple_frames(self) -> None: np.testing.assert_allclose(f1, f2, rtol=1e-10, atol=1e-10, err_msg="force") np.testing.assert_allclose(v1, v2, rtol=1e-10, atol=1e-10, err_msg="virial") + @unittest.skipUnless( + importlib.util.find_spec("vesin.torch") is not None, "vesin not installed" + ) + def test_vesin_neighbor_list_consistency(self) -> None: + """The vesin O(N) nlist must match the native builder (PBC + non-PBC).""" + rng = np.random.default_rng(GLOBAL_SEED + 19) + natoms = 5 + coords = rng.random((1, natoms, 3)) * 8.0 + atom_types = np.array([i % self.nt for i in range(natoms)], dtype=np.int32) + + dp_native = DeepPot(self.tmpfile.name, nlist_backend="native") + dp_vesin = DeepPot(self.tmpfile.name, nlist_backend="vesin") + self.assertFalse(dp_native.deep_eval._use_vesin) + self.assertTrue(dp_vesin.deep_eval._use_vesin) + + for cells in (np.eye(3).reshape(1, 9) * 10.0, None): + e1, f1, v1, ae1, av1 = dp_native.eval( + coords, cells, atom_types, atomic=True + ) + e2, f2, v2, ae2, av2 = dp_vesin.eval(coords, cells, atom_types, atomic=True) + np.testing.assert_allclose(e1, e2, rtol=1e-10, atol=1e-10, err_msg="energy") + np.testing.assert_allclose(f1, f2, rtol=1e-10, atol=1e-10, err_msg="force") + np.testing.assert_allclose(v1, v2, rtol=1e-10, atol=1e-10, err_msg="virial") + np.testing.assert_allclose( + ae1, ae2, rtol=1e-10, atol=1e-10, err_msg="atom_energy" + ) + np.testing.assert_allclose( + av1, av2, rtol=1e-10, atol=1e-10, err_msg="atom_virial" + ) + + @unittest.skipUnless( + importlib.util.find_spec("vesin.torch") is not None, "vesin not installed" + ) + def test_vesin_nlist_multiple_frames(self) -> None: + """Vesin nlist with multiple frames and auto_batch_size=False.""" + rng = np.random.default_rng(GLOBAL_SEED + 23) + natoms = 4 + nframes = 3 + coords = rng.random((nframes, natoms, 3)) * 8.0 + cells = np.tile(np.eye(3).reshape(1, 9) * 10.0, (nframes, 1)) + atom_types = np.array([i % self.nt for i in range(natoms)], dtype=np.int32) + + dp_native = DeepPot( + self.tmpfile.name, nlist_backend="native", auto_batch_size=False + ) + dp_vesin = DeepPot( + self.tmpfile.name, nlist_backend="vesin", auto_batch_size=False + ) + e1, f1, v1 = dp_native.eval(coords, cells, atom_types) + e2, f2, v2 = dp_vesin.eval(coords, cells, atom_types) + + np.testing.assert_allclose(e1, e2, rtol=1e-10, atol=1e-10, err_msg="energy") + np.testing.assert_allclose(f1, f2, rtol=1e-10, atol=1e-10, err_msg="force") + np.testing.assert_allclose(v1, v2, rtol=1e-10, atol=1e-10, err_msg="virial") + + def test_nlist_backend_default_is_auto(self) -> None: + dp = DeepPot(self.tmpfile.name) + self.assertEqual(dp.deep_eval.nlist_backend, "auto") + self.assertEqual( + dp.deep_eval._use_vesin, + importlib.util.find_spec("vesin.torch") is not None, + ) + + def test_nlist_backend_native_disables_vesin(self) -> None: + dp = DeepPot(self.tmpfile.name, nlist_backend="native") + self.assertEqual(dp.deep_eval.nlist_backend, "native") + self.assertFalse(dp.deep_eval._use_vesin) + + def test_nlist_backend_invalid_raises(self) -> None: + with self.assertRaises(ValueError): + DeepPot(self.tmpfile.name, nlist_backend="bogus") + + def test_nlist_backend_vesin_unavailable(self) -> None: + # "auto" silently falls back; explicit "vesin" raises. + import deepmd.pt_expt.infer.deep_eval as deep_eval_mod + + original = deep_eval_mod.is_vesin_torch_available + deep_eval_mod.is_vesin_torch_available = lambda: False + try: + dp = DeepPot(self.tmpfile.name, nlist_backend="auto") + self.assertFalse(dp.deep_eval._use_vesin) + with self.assertRaises(ValueError): + DeepPot(self.tmpfile.name, nlist_backend="vesin") + finally: + deep_eval_mod.is_vesin_torch_available = original + + def test_nlist_backend_vesin_conflicts_with_neighbor_list(self) -> None: + import ase.neighborlist + + with self.assertRaises(ValueError): + DeepPot( + self.tmpfile.name, + nlist_backend="vesin", + neighbor_list=ase.neighborlist.NewPrimitiveNeighborList( + cutoffs=self.rcut, bothways=True + ), + ) + + @unittest.skipUnless( + importlib.util.find_spec("ase") is not None, "ase not installed" + ) + def test_nlist_backend_auto_yields_to_neighbor_list(self) -> None: + # an explicit ASE neighbor_list takes precedence over the auto vesin path + import ase.neighborlist + + dp = DeepPot( + self.tmpfile.name, + nlist_backend="auto", + neighbor_list=ase.neighborlist.NewPrimitiveNeighborList( + cutoffs=self.rcut, bothways=True + ), + ) + self.assertFalse(dp.deep_eval._use_vesin) + + @unittest.skipUnless( + importlib.util.find_spec("vesin.torch") is not None + and importlib.util.find_spec("ase") is not None, + "vesin or ase not installed", + ) + def test_calculator_nlist_backend(self) -> None: + # the ASE DP calculator must thread nlist_backend to the pt_expt backend + # and give backend-independent results for a .pte model. + from ase import ( + Atoms, + ) + + from deepmd.calculator import ( + DP, + ) + + rng = np.random.default_rng(GLOBAL_SEED + 51) + atoms = Atoms( + numbers=[8, 1, 1, 8, 1, 1], + positions=rng.random((6, 3)) * 8.0, + cell=np.eye(3) * 8.0, + pbc=True, + ) + type_dict = {"O": 0, "H": 1} + calc_native = DP(self.tmpfile.name, type_dict=type_dict, nlist_backend="native") + calc_vesin = DP(self.tmpfile.name, type_dict=type_dict, nlist_backend="vesin") + self.assertEqual(calc_native.dp.deep_eval.nlist_backend, "native") + self.assertTrue(calc_vesin.dp.deep_eval._use_vesin) + + a_native = atoms.copy() + a_native.calc = calc_native + a_vesin = atoms.copy() + a_vesin.calc = calc_vesin + np.testing.assert_allclose( + a_native.get_potential_energy(), + a_vesin.get_potential_energy(), + rtol=1e-10, + atol=1e-10, + ) + np.testing.assert_allclose( + a_native.get_forces(), a_vesin.get_forces(), rtol=1e-10, atol=1e-10 + ) + + # spin gate-off is covered end-to-end on a real spin model in + # test_deep_eval_spin.py::TestSpinInference. + class TestDeepEvalEnerPt2(unittest.TestCase): """Test pt_expt inference for energy models via .pt2 (AOTInductor).""" diff --git a/source/tests/pt_expt/infer/test_deep_eval_spin.py b/source/tests/pt_expt/infer/test_deep_eval_spin.py index 6a3be99319..3d450dfbe2 100644 --- a/source/tests/pt_expt/infer/test_deep_eval_spin.py +++ b/source/tests/pt_expt/infer/test_deep_eval_spin.py @@ -375,6 +375,39 @@ def test_eval_nopbc_nonatomic(self, spin_model_files, ext) -> None: atol=1e-10, ) + def test_nlist_backend_vesin_gated_off_for_spin( + self, spin_model_files, ext + ) -> None: + """A real spin model: explicit "vesin" must raise, while "auto" silently + keeps the native builder and the spin eval path still matches the ref. + """ + from deepmd.infer import ( + DeepPot, + ) + + files, ref, _ = spin_model_files + # explicit "vesin" is unsupported for spin -> fail loudly + with pytest.raises(ValueError): + DeepPot(files[ext], nlist_backend="vesin") + + # "auto" falls back to native for the spin model + dp = DeepPot(files[ext], nlist_backend="auto") + assert dp.deep_eval._use_vesin is False + + e, f, v, ae, av, fm, mm = dp.eval(COORD, BOX, ATYPE, atomic=True, spin=SPIN) + np.testing.assert_allclose( + e.reshape(-1), ref["energy"].reshape(-1), rtol=1e-10, atol=1e-10 + ) + np.testing.assert_allclose( + f.reshape(-1), ref["force"].reshape(-1), rtol=1e-10, atol=1e-10 + ) + np.testing.assert_allclose( + fm.reshape(-1), ref["force_mag"].reshape(-1), rtol=1e-10, atol=1e-10 + ) + np.testing.assert_allclose( + v.reshape(-1), ref["virial"].reshape(-1), rtol=1e-10, atol=1e-10 + ) + class TestSpinMetadataOnly: """Test metadata-only spin model inference through DeepPot.""" diff --git a/source/tests/pt_expt/utils/test_nlist.py b/source/tests/pt_expt/utils/test_nlist.py new file mode 100644 index 0000000000..2570d5c857 --- /dev/null +++ b/source/tests/pt_expt/utils/test_nlist.py @@ -0,0 +1,114 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""Tests for the device-resident vesin.torch neighbor-list builder.""" + +import unittest + +import numpy as np + +from deepmd.dpmodel.utils.nlist import ( + extend_input_and_build_neighbor_list, +) +from deepmd.pt_expt.utils.nlist import ( + build_neighbor_list_vesin_torch, + is_vesin_torch_available, +) + + +def _per_atom_neighbor_dists(ext_coord, nlist, coord): + """Sorted, rounded valid-neighbor distances for each local atom.""" + ext_coord = np.asarray(ext_coord).reshape(-1, 3) + coord = np.asarray(coord).reshape(-1, 3) + out = [] + for i in range(coord.shape[0]): + ds = [ + round(float(np.linalg.norm(ext_coord[j] - coord[i])), 6) + for j in nlist[i] + if j >= 0 + ] + out.append(sorted(ds)) + return out + + +@unittest.skipIf(not is_vesin_torch_available(), "vesin.torch is not installed") +class TestNeighListVesinTorch(unittest.TestCase): + """The vesin.torch builder must produce the same neighbor relationships as + the native all-pairs builder, on whatever device the input lives. + """ + + def setUp(self) -> None: + import torch + + rng = np.random.default_rng(20240602) + self.nloc = 40 + self.rcut = 2.5 + self.sel = [30, 30] + box_len = 6.0 + self.box_np = (np.eye(3) * box_len).reshape(1, 9) + coord = (rng.random((self.nloc, 3)) * box_len).reshape(1, self.nloc, 3) + atype = rng.integers(0, 2, self.nloc).reshape(1, self.nloc) + self.coord_np = coord + self.atype_np = atype + self.coord_t = torch.tensor(coord, dtype=torch.float64) + self.box_t = torch.tensor( + (np.eye(3) * box_len).reshape(1, 3, 3), dtype=torch.float64 + ) + self.atype_t = torch.tensor(atype, dtype=torch.int64) + + def _native_dists(self, box_np, distinguish): + ec, _, _, nl = extend_input_and_build_neighbor_list( + self.coord_np, + self.atype_np, + self.rcut, + self.sel, + mixed_types=not distinguish, + box=box_np, + ) + return _per_atom_neighbor_dists( + np.asarray(ec).reshape(-1, 3), np.asarray(nl)[0], self.coord_np[0] + ) + + def _vesin_dists(self, ect, nlt): + return _per_atom_neighbor_dists( + ect[0].cpu().numpy(), nlt[0].cpu().numpy(), self.coord_np[0] + ) + + def test_pbc_matches_native(self) -> None: + ect, _, nlt, _ = build_neighbor_list_vesin_torch( + self.coord_t, self.box_t, self.atype_t, self.rcut, self.sel, False + ) + self.assertEqual( + self._native_dists(self.box_np, False), self._vesin_dists(ect, nlt) + ) + + def test_nopbc_matches_native(self) -> None: + ect, _, nlt, _ = build_neighbor_list_vesin_torch( + self.coord_t, None, self.atype_t, self.rcut, self.sel, False + ) + self.assertEqual(self._native_dists(None, False), self._vesin_dists(ect, nlt)) + + def test_distinguish_types_matches_native(self) -> None: + ect, _, nlt, _ = build_neighbor_list_vesin_torch( + self.coord_t, self.box_t, self.atype_t, self.rcut, self.sel, True + ) + self.assertEqual( + self._native_dists(self.box_np, True), self._vesin_dists(ect, nlt) + ) + + def test_outputs_on_input_device(self) -> None: + import torch + + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + ect, eat, nlt, mpt = build_neighbor_list_vesin_torch( + self.coord_t.to(device), + self.box_t.to(device), + self.atype_t.to(device), + self.rcut, + self.sel, + False, + ) + for t in (ect, eat, nlt, mpt): + self.assertEqual(t.device.type, device.type) + + +if __name__ == "__main__": + unittest.main()