From 8f2cb99d0ce407fee664dbf56b637aa790c33440 Mon Sep 17 00:00:00 2001
From: Mechiel Nieuwoudt <m.nieuwoudt@instadeep.com>
Date: Wed, 18 Dec 2024 11:27:59 +0200
Subject: [PATCH] feat: Add interface_atom_pairs and interface_residue_pairs
 enhancements to Interface class

- Implemented `interface_atom_pairs` to return interacting atom pairs within a specified cutoff.
  - Atoms are annotated with their interacting partners via the `interacting_atoms` attribute when `assign_attribute=True`.
- Enhanced `interface_residue_pairs` to return interacting residue pairs within a specified cutoff.
  - Residues are annotated with their interacting partners via the `interacting_residues` attribute when `assign_attribute=True`.
- Improved usability by making assigned attributes directly relevant to the function, enabling downstream analysis of interactions.
- Updated both functions to ensure efficient use of the `SpaceQuery` KD-tree structure.
---
 protkit/properties/interface.py | 143 +++++++++++++++++++++++++++++++-
 tests/quick_start_guide.py      |  33 +++++++-
 2 files changed, 173 insertions(+), 3 deletions(-)

diff --git a/protkit/properties/interface.py b/protkit/properties/interface.py
index 7801008..270d6a1 100644
--- a/protkit/properties/interface.py
+++ b/protkit/properties/interface.py
@@ -15,7 +15,8 @@
 It uses a SpaceQuery data structure to calculate interacting atoms and residues fast.
 """
 
-from typing import List
+from typing import List, Tuple
+
 
 from protkit.structure.residue import Residue
 from protkit.structure.atom import Atom
@@ -161,3 +162,143 @@ def interface_residues_from_alpha_carbon(residues1: List[Residue],
                 residue.set_attribute(key, True)
 
         return interface_residues1, interface_residues2
+
+    @staticmethod
+    def interface_atom_pairs(atoms1: List[Atom],
+                             atoms2: List[Atom],
+                             cutoff: float = 5.0,
+                             assign_attribute: bool = False,
+                             key: str = "interacting_atoms") -> List[Tuple[Atom, Atom]]:
+        """
+        Returns a list of atom pairs (a1, a2) that are within a specified distance of each other.
+
+        Two atoms are considered to be interacting if the distance between them
+        is less than the specified cutoff.
+
+        If assign_attribute is True, each atom will have the `key` attribute
+        updated/created as a list of interacting partner atoms.
+
+        Args:
+            atoms1 (List[Atom]): A list of atoms (e.g., from one protein or selection).
+            atoms2 (List[Atom]): Another list of atoms (e.g., from another protein or selection).
+            cutoff (float): The distance cutoff for considering atoms to be interacting.
+            assign_attribute (bool): If True, adds the interacting partner atoms as attributes to each atom.
+            key (str): Attribute name to store the list of partner atoms.
+
+        Returns:
+            List[(Atom, Atom)]: A list of tuples, each containing an atom from atoms1 and an atom
+                                 from atoms2 that are interacting.
+        """
+
+        # Extract coordinates from atoms2 for KD-tree construction
+        atoms2_coords = [(a.x, a.y, a.z) for a in atoms2]
+
+        # Create KD-tree from atoms2
+        tree = SpaceQuery(atoms2_coords)
+
+        interacting_pairs = []
+
+        # For each atom in atoms1, query the KD-tree
+        for a1 in atoms1:
+            coords1 = [(a1.x, a1.y, a1.z)]
+            idx1, idx2 = tree.query_partners(coords1, cutoff)
+            # idx1 corresponds to matches in atoms2_coords
+            for partner_index in idx1:
+                a2 = atoms2[partner_index]
+                interacting_pairs.append((a1, a2))
+
+        # Assign attributes if requested
+        if assign_attribute:
+            for (atom_a, atom_b) in interacting_pairs:
+                # Update atom_a's attribute
+                if atom_a.has_attribute(key):
+                    partners = atom_a.get_attribute(key)
+                else:
+                    partners = []
+                partners.append(atom_b)
+                atom_a.set_attribute(key, partners)
+
+                # Update atom_b's attribute
+                if atom_b.has_attribute(key):
+                    partners = atom_b.get_attribute(key)
+                else:
+                    partners = []
+                partners.append(atom_a)
+                atom_b.set_attribute(key, partners)
+
+        return interacting_pairs
+
+    @staticmethod
+    def interface_residue_pairs(residues1: List[Residue],
+                                residues2: List[Residue],
+                                cutoff: float = 5.0,
+                                assign_attribute: bool = False,
+                                key: str = "interacting_residues") -> List[Tuple[Residue, Residue]]:
+        """
+        Returns a list of residue pairs (r1, r2) that are within a specified distance of each other.
+
+        Two residues are considered interacting if any atom of a residue in `residues1` is
+        within `cutoff` distance of any atom of a residue in `residues2`.
+
+        If assign_attribute is True, each residue will have the `key` attribute
+        updated/created as a list of interacting partner residues.
+
+        Args:
+            residues1 (List[Residue]): A list of residues (e.g., from one protein or selection).
+            residues2 (List[Residue]): Another list of residues (e.g., from another protein or selection).
+            cutoff (float): The distance cutoff for considering residues to be interacting.
+            assign_attribute (bool): If True, adds the interacting partner residues as attributes to each residue.
+            key (str): Attribute name to store the list of partner residues.
+
+        Returns:
+            List[(Residue, Residue)]: A list of tuples, each containing a residue from residues1 and a residue
+                                      from residues2 that are interacting.
+        """
+
+        # Flatten atoms from residues2 and keep a mapping to their residues
+        atoms2_coords = []
+        atoms2_res_map = []
+        for r2 in residues2:
+            for a2 in r2.atoms:
+                atoms2_coords.append((a2.x, a2.y, a2.z))
+                atoms2_res_map.append(r2)
+
+        # Create a KD-tree for the second set of residues
+        tree = SpaceQuery(atoms2_coords)
+
+        interacting_pairs = []
+
+        # For each residue in residues1, find interacting residues in residues2
+        for r1 in residues1:
+            found_partner_residues = set()
+
+            for a1 in r1.atoms:
+                # Query the KD-tree with the coordinates of a single atom from residue1
+                idx1, idx2 = tree.query_partners([(a1.x, a1.y, a1.z)], cutoff)
+                # idx1 are indices in atoms2_coords
+                for partner_index in idx1:
+                    found_partner_residues.add(atoms2_res_map[partner_index])
+
+            for r2 in found_partner_residues:
+                interacting_pairs.append((r1, r2))
+
+        # Assign attributes if requested
+        if assign_attribute:
+            for (res_a, res_b) in interacting_pairs:
+                # Update res_a's attribute
+                if res_a.has_attribute(key):
+                    partners = res_a.get_attribute(key)
+                else:
+                    partners = []
+                partners.append(res_b)
+                res_a.set_attribute(key, partners)
+
+                # Update res_b's attribute
+                if res_b.has_attribute(key):
+                    partners = res_b.get_attribute(key)
+                else:
+                    partners = []
+                partners.append(res_a)
+                res_b.set_attribute(key, partners)
+
+        return interacting_pairs
\ No newline at end of file
diff --git a/tests/quick_start_guide.py b/tests/quick_start_guide.py
index 232223a..3bdcd64 100644
--- a/tests/quick_start_guide.py
+++ b/tests/quick_start_guide.py
@@ -662,6 +662,33 @@ def properties_interface_residues():
         if residue.get_attribute("ca_in_interface"):
             print(residue.id)
 
+def properties_interface_atom_pairs():
+    from protkit.file_io import PDBIO
+    from protkit.properties import Interface
+
+    protein = PDBIO.load("1ahw.pdb")[0]
+    atoms1 = list(protein.filter_atoms(chain_criteria=[("chain_id", ["A", "B"])]))
+    atoms2 = list(protein.filter_atoms(chain_criteria=[("chain_id", ["C"])]))
+
+    pairs = Interface.interface_atom_pairs(atoms1, atoms2, cutoff=5.0, assign_attribute=True)
+
+    # pairs is now a list of (atom_from_A, atom_from_B) that interact.
+    for aA, aB in pairs:
+        print(f"{aA.id} interacts with {aB.id}")
+
+def properties_interface_residue_pairs():
+    from protkit.file_io import PDBIO
+    from protkit.properties import Interface
+
+    protein = PDBIO.load("1ahw.pdb")[0]
+    residues1 = list(protein.filter_residues(chain_criteria=[("chain_id", ["A", "B"])]))
+    residues2 = list(protein.filter_residues(chain_criteria=[("chain_id", ["C"])]))
+
+    pairs = Interface.interface_residue_pairs(residues1, residues2, cutoff=5.0, assign_attribute=True)
+
+    for rA, rB in pairs:
+        print(f"{rA.id} interacts with {rB.id}")
+
 def tools_reduce():
     from protkit.tools.reduce_adaptor import ReduceAdaptor
     from protkit.file_io import ProtIO
@@ -762,10 +789,12 @@ def ml_dataframe3():
 # properties_circular_variance()  # -> double check first residue
 # # properties_interface_atoms()
 # # properties_interface_residues()
-#
+properties_interface_atom_pairs()
+properties_interface_residue_pairs()
+
 # tools_reduce()
 # tools_freesasa()
 
 # ml_dataframe()
 # ml_dataframe2()
-ml_dataframe3()
\ No newline at end of file
+# ml_dataframe3()
\ No newline at end of file