Hashan-Peiris · Hashan-Peiris · Jun 5, 2025
diff --git a/Cumulative RDF Calculator/Readme.md b/Cumulative RDF Calculator/Readme.md
@@ -1,3 +1,31 @@
-This calculates the RDF for a given atom summed up over the length of the trajectory.
-All runtime parameters are provided through a JSON file named `rdf_config.json`.
-Edit this file to set the trajectory filename, atom indices and binning options.
+The scripts in this folder compute cumulative radial distribution functions (RDF) from
+extended XYZ trajectories.
+
+`atomRDF.py` is the original implementation which operates on explicit atom index lists. A newer script `group_rdf.py` provides a more flexible interface allowing atom groups to be defined for each molecule. Groups can be selectively included or excluded and specific group-to-group interactions may be enabled or disabled via pair filters. The results can optionally be plotted individually.
+
+All runtime parameters are read from a JSON configuration file (default
+`rdf_config.json`). See the example below for the new options used by
+`group_rdf.py`.
+
+Pair filters can restrict which neighbour groups contribute to the RDF for each
+reference group.
+
+```json
+{
+  "filename": "trajectory.xyz",
+  "Range": 8.0,
+  "Limit": 0.05,
+  "Type": ["All"],
+  "groups": {
+    "mol1": [1,2,3],
+    "mol2": [4,5,6]
+  },
+  "include_groups": ["mol1"],
+  "exclude_groups": [],
+  "pair_include": {
+    "mol1": ["mol2"]
+  },
+  "pair_exclude": {},
+  "plot_individual": true
+}
+```
diff --git a/Cumulative RDF Calculator/group_rdf.py b/Cumulative RDF Calculator/group_rdf.py
@@ -0,0 +1,181 @@
+import argparse
+import json
+import numpy as np
+import re
+from pathlib import Path
+import matplotlib.pyplot as plt
+
+
+def parse_xyz(path):
+    """Parse an extended XYZ trajectory.
+
+    Returns
+    -------
+    positions : list of (N, 3) arrays
+        Cartesian coordinates for each time step.
+    elements : list of lists
+        Atomic symbols for each time step.
+    box : ndarray shape (3,)
+        Box lengths assuming an orthogonal cell.
+    """
+    positions = []
+    elements = []
+    box = None
+    with open(path) as fh:
+        while True:
+            line = fh.readline()
+            if not line:
+                break
+            natoms = int(line.strip())
+            comment = fh.readline()
+            if box is None:
+                match = re.search(r'Lattice="([^"]+)"', comment)
+                if not match:
+                    raise ValueError("Lattice information missing in XYZ header")
+                vals = [float(x) for x in match.group(1).split()]
+                box = np.array([abs(vals[0]), abs(vals[4]), abs(vals[8])])
+            step_pos = []
+            step_ele = []
+            for _ in range(natoms):
+                atom_line = fh.readline()
+                if not atom_line:
+                    break
+                parts = atom_line.split()
+                step_ele.append(parts[1])
+                step_pos.append([float(x) for x in parts[2:5]])
+            if len(step_pos) != natoms:
+                break
+            positions.append(np.asarray(step_pos, dtype=float))
+            elements.append(step_ele)
+    if not positions:
+        raise ValueError("No frames parsed from XYZ file")
+    return positions, elements, box
+
+
+def compute_rdf(
+    positions,
+    elements,
+    box,
+    groups,
+    types,
+    bins,
+    include=None,
+    exclude=None,
+    pair_include=None,
+    pair_exclude=None,
+):
+    """Calculate cumulative RDF for atom groups with optional pair filters."""
+
+    include = set(include or groups.keys())
+    exclude = set(exclude or [])
+    pair_include = pair_include or {}
+    pair_exclude = pair_exclude or {}
+
+    # Pre-convert group indices to zero-based arrays for efficiency
+    groups_idx = {g: np.array(idxs, dtype=int) - 1 for g, idxs in groups.items()}
+    hist = {g: np.zeros(len(bins) - 1, dtype=int) for g in groups}
+
+    for step_pos, step_ele in zip(positions, elements):
+        step_ele = np.array(step_ele)
+        for gname, idxs in groups_idx.items():
+            if gname not in include or gname in exclude:
+                continue
+
+            sel_pos = step_pos[idxs]
+
+            allowed = pair_include.get(gname)
+            if allowed is None:
+                allowed = list(groups_idx.keys())
+            allowed = [a for a in allowed if a not in pair_exclude.get(gname, [])]
+
+            # gather indices of allowed neighbour groups
+            target_indices = []
+            for aname in allowed:
+                if aname not in groups_idx:
+                    raise ValueError(f"Unknown group '{aname}' in pair filter")
+                target_indices.append(groups_idx[aname])
+            if not target_indices:
+                continue
+            target_indices = np.concatenate(target_indices)
+
+            if "All" in types:
+                mask = np.ones(len(target_indices), dtype=bool)
+            else:
+                mask = np.isin(step_ele[target_indices], types)
+            target_pos = step_pos[target_indices][mask]
+
+            for p in sel_pos:
+                delta = np.abs(target_pos - p)
+                delta = np.where(delta < 0.5 * box, delta, np.abs(delta - box))
+                dist = np.sqrt((delta ** 2).sum(axis=1))
+                hist[gname] += np.histogram(dist, bins=bins)[0]
+
+    return hist
+
+
+def save_results(hist, bins, prefix="group"):
+    for name, counts in hist.items():
+        with open(f"{prefix}_{name}.txt", "w") as fh:
+            fh.write("Bin\tCount\n")
+            for b, c in zip(bins[:-1], counts):
+                fh.write(f"{b:.4f}\t{c}\n")
+
+
+def plot_results(hist, bins, plot_individual=False):
+    if plot_individual:
+        for name, counts in hist.items():
+            plt.figure()
+            plt.plot(bins[:-1], np.cumsum(counts))
+            plt.xlabel("Distance")
+            plt.ylabel("Cumulative count")
+            plt.title(f"RDF for {name}")
+            plt.tight_layout()
+            plt.savefig(f"rdf_{name}.png")
+            plt.close()
+    else:
+        plt.figure()
+        for name, counts in hist.items():
+            plt.plot(bins[:-1], np.cumsum(counts), label=name)
+        plt.xlabel("Distance")
+        plt.ylabel("Cumulative count")
+        plt.legend()
+        plt.tight_layout()
+        plt.savefig("rdf_combined.png")
+        plt.close()
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Compute cumulative RDF for atom groups")
+    parser.add_argument("config", nargs="?", default="rdf_config.json", help="Path to config JSON")
+    args = parser.parse_args()
+    with open(args.config) as fh:
+        cfg = json.load(fh)
+    filename = cfg["filename"]
+    groups = cfg["groups"]
+    bins = np.arange(cfg["Limit"], cfg["Range"] + cfg["Limit"], cfg["Limit"])
+    types = cfg.get("Type", ["All"])
+    include = cfg.get("include_groups")
+    exclude = cfg.get("exclude_groups")
+    pair_in = cfg.get("pair_include")
+    pair_ex = cfg.get("pair_exclude")
+    plot_individual = cfg.get("plot_individual", False)
+
+    positions, elements, box = parse_xyz(filename)
+    hist = compute_rdf(
+        positions,
+        elements,
+        box,
+        groups,
+        types,
+        bins,
+        include,
+        exclude,
+        pair_in,
+        pair_ex,
+    )
+    save_results(hist, bins, prefix="rdf")
+    plot_results(hist, bins, plot_individual)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Cumulative RDF Calculator/rdf_config.json b/Cumulative RDF Calculator/rdf_config.json
@@ -1,9 +1,17 @@
 {
-  "filename": "Distances5.xyz",
-  "AtmA": [380,384,389,396,407,409,414,420,423,427,432,438,446,450],
-  "AtmB": ["O","O"],
+  "filename": "trajectory.xyz",
   "Range": 8.0,
   "Limit": 0.05,
-  "Type": ["All","O","C","Li","H","B","F","Ni"],
-  "multiply": 25
+  "Type": ["All"],
+  "groups": {
+    "mol1": [1,2,3],
+    "mol2": [4,5,6]
+  },
+  "include_groups": [],
+  "exclude_groups": [],
+  "pair_include": {
+    "mol1": ["mol2"]
+  },
+  "pair_exclude": {},
+  "plot_individual": false
 }