-
Notifications
You must be signed in to change notification settings - Fork 13
add gromacs to frames node #463
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,260 @@ | ||
| import logging | ||
| import typing | ||
| import warnings | ||
| from pathlib import Path | ||
|
|
||
| import h5py | ||
| import MDAnalysis as mda | ||
| import numpy as np | ||
| import tqdm | ||
| import znh5md | ||
| import zntrack | ||
| from ase import Atoms | ||
| from ase.calculators.singlepoint import SinglePointCalculator | ||
| from ase.units import kJ, mol | ||
| from MDAnalysis.auxiliary.EDR import EDRReader | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
| _TYPE_TO_ELEMENT = { | ||
| "CL": "Cl", | ||
| "NA": "Na", | ||
| "MG": "Mg", | ||
| "ZN": "Zn", | ||
| "FE": "Fe", | ||
| "CA": "Ca", | ||
| "MN": "Mn", | ||
| "CU": "Cu", | ||
| "LI": "Li", | ||
| "AL": "Al", | ||
| "SI": "Si", | ||
| "BR": "Br", | ||
| "SE": "Se", | ||
| } | ||
|
|
||
|
|
||
| def _get_symbols(u: mda.Universe) -> list[str]: | ||
| """Extract element symbols from a Universe, trying multiple strategies.""" | ||
| # 1. Use elements attribute if available | ||
| try: | ||
| return list(u.atoms.elements) | ||
| except (mda.exceptions.NoDataError, AttributeError): | ||
| pass | ||
|
|
||
| # 2. Use atom types (usually cleaner than names for CHARMM-GUI) | ||
| types = u.atoms.types | ||
| symbols = [] | ||
| for t in types: | ||
| t_upper = t.upper() | ||
| if t_upper in _TYPE_TO_ELEMENT: | ||
| symbols.append(_TYPE_TO_ELEMENT[t_upper]) | ||
| elif len(t) <= 2 and t[0].isalpha(): | ||
| # Capitalize properly: first letter upper, rest lower | ||
| symbols.append(t[0].upper() + t[1:].lower() if len(t) > 1 else t.upper()) | ||
| else: | ||
| # Last resort: take leading alphabetic characters from atom name | ||
| symbols.append(t[0].upper()) | ||
| return symbols | ||
|
|
||
|
|
||
| def _match_edr_frame( | ||
| edr_times: np.ndarray, traj_time: float, tolerance: float = 0.1 | ||
| ) -> int: | ||
| """Find the EDR index closest to a trajectory time, warning on large gaps.""" | ||
| idx = int(np.argmin(np.abs(edr_times - traj_time))) | ||
| time_diff = abs(edr_times[idx] - traj_time) | ||
| if time_diff > tolerance: | ||
| logger.warning( | ||
| "EDR time %.3f ps does not match trajectory time %.3f ps (diff=%.3f ps)", | ||
| edr_times[idx], | ||
| traj_time, | ||
| time_diff, | ||
| ) | ||
| return idx | ||
|
Comment on lines
+60
to
+73
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Avoid quadratic EDR matching and reject missed timestamps.
🤖 Prompt for AI Agents |
||
|
|
||
|
|
||
| def gmx_to_ase( | ||
| topology: str, | ||
| trajectory: str | None = None, | ||
| edr: str | None = None, | ||
| start: int | None = None, | ||
| stop: int | None = None, | ||
| step: int | None = None, | ||
| ) -> list[Atoms]: | ||
| """Convert a GROMACS trajectory to a list of ASE Atoms objects. | ||
|
|
||
| Extracts all available information: positions, velocities, forces, | ||
| and (via the .edr file) energies and stress. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| topology : str | ||
| Path to a GROMACS topology/structure file (.gro, .tpr). | ||
| trajectory : str | None | ||
| Path to a trajectory file (.xtc, .trr). If None, only the single | ||
| structure from the topology file is returned. | ||
| edr : str | None | ||
| Path to a GROMACS energy file (.edr). If given, per-frame energies | ||
| and stress tensors are attached via SinglePointCalculator. | ||
| start, stop, step : int | None | ||
| Slice parameters for selecting a subset of frames. | ||
|
|
||
| Returns | ||
| ------- | ||
| list[Atoms] | ||
| One ASE Atoms object per frame. Each Atoms has: | ||
| - positions (always) | ||
| - cell and pbc (always) | ||
| - velocities (if present in trajectory) | ||
| - forces (if present in trajectory, e.g. .trr) | ||
| - calculator with energy/stress/forces (if .edr provided or forces | ||
| present), plus all EDR terms stored in calc.results | ||
| """ | ||
| if trajectory is not None: | ||
| u = mda.Universe(topology, trajectory) | ||
| else: | ||
| u = mda.Universe(topology) | ||
|
|
||
| symbols = _get_symbols(u) | ||
|
|
||
| # Load EDR data if provided | ||
| edr_data = None | ||
| if edr is not None: | ||
| with warnings.catch_warnings(): | ||
| warnings.simplefilter("ignore") | ||
| reader = EDRReader(edr) | ||
| edr_all = reader.get_data(list(reader.terms)) | ||
| edr_times = edr_all.pop("Time") | ||
| edr_data = dict(edr_all) | ||
| edr_terms = list(edr_data.keys()) | ||
|
|
||
| frames = [] | ||
| for ts in tqdm.tqdm(u.trajectory[start:stop:step]): | ||
| positions = ts.positions.copy() | ||
| box = ts.dimensions | ||
|
|
||
| atoms = Atoms(symbols=symbols, positions=positions, pbc=True) | ||
|
|
||
| if box is not None and all(box[:3] > 0): | ||
| atoms.set_cell(box, scale_atoms=False) | ||
|
|
||
| # Velocities (e.g. from .gro or .trr) | ||
| if ts.has_velocities: | ||
| # MDAnalysis: Å/ps, ASE: Å/fs -> divide by 1000 | ||
| atoms.set_velocities(ts.velocities / 1000.0) | ||
|
|
||
| # Forces and energies via SinglePointCalculator | ||
| forces = ts.forces.copy() if ts.has_forces else None | ||
| energy = None | ||
| stress = None | ||
| extra_results = {} | ||
|
|
||
| if edr_data is not None: | ||
| idx = _match_edr_frame(edr_times, ts.time) | ||
| energy = float(edr_data["Potential"][idx]) * (kJ / mol) # convert to eV | ||
|
|
||
| # Build Voigt stress from pressure tensor if available | ||
| try: | ||
| pxx = edr_data["Pres-XX"][idx] | ||
| pyy = edr_data["Pres-YY"][idx] | ||
| pzz = edr_data["Pres-ZZ"][idx] | ||
| pyz = edr_data["Pres-YZ"][idx] | ||
| pxz = edr_data["Pres-XZ"][idx] | ||
| pxy = edr_data["Pres-XY"][idx] | ||
| # GROMACS pressure in bar -> store as-is (not ASE native eV/ų) | ||
| stress = np.array([pxx, pyy, pzz, pyz, pxz, pxy]) | ||
|
Comment on lines
+164
to
+165
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Stress units mismatch: GROMACS pressure is in bar, ASE expects eV/ų. The comment acknowledges the unit discrepancy, but storing stress in bar breaks ASE's stress conventions. Any code calling Convert bar to eV/ų: 1 bar = 1e-4 GPa = 6.2415e-7 eV/ų (use Proposed fix+from ase import units
+
+# bar -> eV/ų
+BAR_TO_EV_ANG3 = 1.0 / (units.bar / (units.eV / units.Ang**3))
+
...
- # GROMACS pressure in bar -> store as-is (not ASE native eV/ų)
- stress = np.array([pxx, pyy, pzz, pyz, pxz, pxy])
+ # Convert GROMACS pressure (bar) to ASE stress (eV/ų)
+ stress = np.array([pxx, pyy, pzz, pyz, pxz, pxy]) * BAR_TO_EV_ANG3 |
||
| except KeyError: | ||
| pass | ||
|
|
||
| # Store all EDR terms for this frame | ||
| for term in edr_terms: | ||
| extra_results[term] = float(edr_data[term][idx]) | ||
|
|
||
| if energy is not None or forces is not None: | ||
| calc = SinglePointCalculator( | ||
| atoms, | ||
| energy=energy, | ||
| forces=forces, | ||
| stress=stress, | ||
| ) | ||
|
Comment on lines
+147
to
+179
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: # First, let's look at the full file context
cd /tmp && find . -name "add_data_gromacs.py" -type f 2>/dev/null | head -5Repository: zincware/IPSuite Length of output: 42 🏁 Script executed: # Find the file in the repository
find . -name "add_data_gromacs.py" -type f 2>/dev/nullRepository: zincware/IPSuite Length of output: 102 🏁 Script executed: # Read the relevant file section to understand context
cat -n ipsuite/data_loading/add_data_gromacs.py | head -200Repository: zincware/IPSuite Length of output: 7743 🏁 Script executed: # Check imports to see how kJ/mol is defined
head -50 ipsuite/data_loading/add_data_gromacs.pyRepository: zincware/IPSuite Length of output: 1203 🌐 Web query:
💡 Result: In MDAnalysis, the base unit for forces in trajectories is kJ/(mol·Å). This applies when forces are present in the trajectory file format (e.g., via Universe.atoms.forces). MDAnalysis automatically converts forces to this unit upon reading trajectories, regardless of the original MD simulation format. Other units like kcal/(mol·Å) or N can be converted using MDAnalysis.units.convert. Citations:
🌐 Web query:
💡 Result: In the Atomic Simulation Environment (ASE), the SinglePointCalculator stores pre-computed results (energy, forces, stress, etc.) for a specific atomic configuration from an external calculation. It does not perform new calculations but returns the stored values via methods like get_forces. ASE uses consistent internal units: energies in eV, lengths in Angstroms (Å). Consequently, forces are in eV/Å. The SinglePointCalculator preserves whatever units the input data had when created (typically from I/O readers that convert external code outputs to ASE units). When reading results from formats like CASTEP .geom or ABINIT output, forces are explicitly converted to eV/Angstrom. For example: - CASTEP .geom reader: "Note that everything in the .geom file is in atomic units, which has been conversed to commonly used unit angstrom(length) and eV (energy)." - ABINIT reader: forces from 'cartesian_forces' converted using Hartree / Bohr = eV/Å. All ASE calculators, including those populating SinglePointCalculator, follow this convention (eV for energy, eV/Å for forces), as confirmed by documentation, tutorials (e.g., EMT calculator forces in eV/Å), and community sources. Thus, forces from ASE SinglePointCalculator are in eV/Å. Citations:
Convert forces to Line 147 forwards Suggested fix- forces = ts.forces.copy() if ts.has_forces else None
+ forces = ts.forces.copy() * (kJ / mol) if ts.has_forces else None🤖 Prompt for AI Agents |
||
| calc.results.update(extra_results) | ||
| atoms.calc = calc | ||
|
|
||
| frames.append(atoms) | ||
|
|
||
| return frames | ||
|
|
||
|
|
||
| class Gmx2Frames(zntrack.Node): | ||
| """Convert GROMACS output files to ASE Atoms frames. | ||
|
|
||
| Reads topology, trajectory, and optionally energy (.edr) files | ||
| to produce a list of ASE Atoms with positions, velocities, forces, | ||
| energies, and stress where available. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| topology : Path | ||
| Path to a GROMACS topology/structure file (.gro, .tpr). | ||
| trajectory : Path, optional | ||
| Path to a trajectory file (.xtc, .trr). | ||
| edr : Path, optional | ||
| Path to a GROMACS energy file (.edr). | ||
| start : int, optional | ||
| First frame index to read. | ||
| stop : int, optional | ||
| Last frame index (exclusive) to read. | ||
| step : int, optional | ||
| Step size for frame selection. | ||
|
|
||
| Examples | ||
| -------- | ||
| >>> with project: | ||
| ... md = ips.Gmx2Frames( | ||
| ... topology="gromacs/system.gro", | ||
| ... trajectory="gromacs/production.xtc", | ||
| ... edr="gromacs/production.edr", | ||
| ... start=1, | ||
| ... ) | ||
| """ | ||
|
|
||
| topology: Path = zntrack.deps_path() | ||
| trajectory: Path | None = zntrack.deps_path(None) | ||
| edr: Path | None = zntrack.deps_path(None) | ||
| start: int | None = zntrack.params(None) | ||
| stop: int | None = zntrack.params(None) | ||
| step: int | None = zntrack.params(None) | ||
|
|
||
| frames_path: Path = zntrack.outs_path(zntrack.nwd / "frames.h5") | ||
|
|
||
| def run(self) -> None: | ||
| data = gmx_to_ase( | ||
| topology=str(self.topology), | ||
| trajectory=str(self.trajectory) if self.trajectory else None, | ||
| edr=str(self.edr) if self.edr else None, | ||
| start=self.start, | ||
| stop=self.stop, | ||
| step=self.step, | ||
| ) | ||
| frame_io = znh5md.IO(self.frames_path) | ||
| frame_io.extend(data) | ||
|
|
||
| @property | ||
| def frames(self) -> typing.List[Atoms]: | ||
| with self.state.fs.open(self.frames_path, "rb") as f: | ||
| with h5py.File(f) as file: | ||
| return znh5md.IO(file_handle=file)[:] | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| # Example: load the production trajectory with energies | ||
| frames = gmx_to_ase( | ||
| "gromacs/system.gro", | ||
| "gromacs/production.xtc", | ||
| edr="gromacs/production.edr", | ||
| ) | ||
| print(f"Loaded {len(frames)} frames, {len(frames[0])} atoms per frame") | ||
| print(f"Cell: {frames[0].cell.cellpar()}") | ||
| print(f"Potential energy (frame 0): {frames[0].get_potential_energy()} eV") | ||
| if len(frames) >= 2: | ||
| print(f"All EDR terms on frame 1: {list(frames[1].calc.results.keys())}") | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧩 Analysis chain
🏁 Script executed:
cat -n ipsuite/data_loading/add_data_gromacs.py | head -100Repository: zincware/IPSuite
Length of output: 3699
🏁 Script executed:
Repository: zincware/IPSuite
Length of output: 1268
🏁 Script executed:
Repository: zincware/IPSuite
Length of output: 335
🏁 Script executed:
Repository: zincware/IPSuite
Length of output: 191
🏁 Script executed:
Repository: zincware/IPSuite
Length of output: 160
🏁 Script executed:
Repository: zincware/IPSuite
Length of output: 42
🏁 Script executed:
Repository: zincware/IPSuite
Length of output: 100
🏁 Script executed:
Repository: zincware/IPSuite
Length of output: 230
🏁 Script executed:
Repository: zincware/IPSuite
Length of output: 2179
🏁 Script executed:
Repository: zincware/IPSuite
Length of output: 647
🏁 Script executed:
Repository: zincware/IPSuite
Length of output: 100
🏁 Script executed:
Repository: zincware/IPSuite
Length of output: 42
🏁 Script executed:
Repository: zincware/IPSuite
Length of output: 303
🏁 Script executed:
Repository: zincware/IPSuite
Length of output: 842
🏁 Script executed:
Repository: zincware/IPSuite
Length of output: 137
🏁 Script executed:
Repository: zincware/IPSuite
Length of output: 42
🏁 Script executed:
Repository: zincware/IPSuite
Length of output: 1415
🏁 Script executed:
# Let's check what the MDAnalysis documentation actually says about element guessing web_search MDAnalysis element guessing default guesser names typesRepository: zincware/IPSuite
Length of output: 159
🌐 Web query:
MDAnalysis element guessing default guesser documentation💡 Result:
The default guesser for element guessing in MDAnalysis is DefaultGuesser from MDAnalysis.guesser.default_guesser. It is the generic, default guessing class used for most topologies, particularly following biological naming conventions (e.g., "CA" guessed as carbon, not calcium). Element guessing behavior: The guess_atom_element(atomname) method strips numbers and symbols from the atom name, looks it up in MDAnalysis.guesser.tables.atomelements (derived from CHARMM/AMBER tables), removes the last character if not found, or falls back to the first character of the name if still not found. Empty names return empty string. When guessing types, they represent the element using the same method. It is used automatically as the default context (context='default') when creating a Universe with to_guess=['types', 'elements'] or via u.guess_TopologyAttrs(to_guess=['elements']). Pre-2.8.0, standalone functions like guess_atom_element in MDAnalysis.topology.guessers were used (now deprecated). Example: import MDAnalysis as mda u = mda.Universe('topology.gro', context='default', to_guess=['elements']) # or u.guess_TopologyAttrs(to_guess=['elements']) Official documentation: https://docs.mdanalysis.org/stable/documentation_pages/guesser_modules/default_guesser.html and https://userguide.mdanalysis.org/stable/formats/guessers/default.html (MDAnalysis 2.10.0 as of latest docs).
Citations:
🏁 Script executed:
Repository: zincware/IPSuite
Length of output: 1823
🏁 Script executed:
Repository: zincware/IPSuite
Length of output: 42
🏁 Script executed:
Repository: zincware/IPSuite
Length of output: 168
🏁 Script executed:
Repository: zincware/IPSuite
Length of output: 928
Use MDAnalysis's proper element guessing from atom names, not types.
The current approach using
u.atoms.typescreates semantic bugs:OW(water oxygen in CHARMM-GUI) becomes invalidOwCA(carbon alpha) is hardcoded toCa(calcium) instead of being guessed as carbonMDAnalysis's DefaultGuesser operates on atom names using domain-standard rules (stripping digits/symbols, lookups in CHARMM/AMBER tables). Replace the manual type-based logic with
u.atoms.namesand leverage MDAnalysis's built-in element inference, or callu.guess_TopologyAttrs(to_guess=['elements'])to populateu.atoms.elementsproperly.🤖 Prompt for AI Agents