From 8f2cb99d0ce407fee664dbf56b637aa790c33440 Mon Sep 17 00:00:00 2001 From: Mechiel Nieuwoudt Date: Wed, 18 Dec 2024 11:27:59 +0200 Subject: [PATCH] feat: Add interface_atom_pairs and interface_residue_pairs enhancements to Interface class - Implemented `interface_atom_pairs` to return interacting atom pairs within a specified cutoff. - Atoms are annotated with their interacting partners via the `interacting_atoms` attribute when `assign_attribute=True`. - Enhanced `interface_residue_pairs` to return interacting residue pairs within a specified cutoff. - Residues are annotated with their interacting partners via the `interacting_residues` attribute when `assign_attribute=True`. - Improved usability by making assigned attributes directly relevant to the function, enabling downstream analysis of interactions. - Updated both functions to ensure efficient use of the `SpaceQuery` KD-tree structure. --- protkit/properties/interface.py | 143 +++++++++++++++++++++++++++++++- tests/quick_start_guide.py | 33 +++++++- 2 files changed, 173 insertions(+), 3 deletions(-) diff --git a/protkit/properties/interface.py b/protkit/properties/interface.py index 7801008..270d6a1 100644 --- a/protkit/properties/interface.py +++ b/protkit/properties/interface.py @@ -15,7 +15,8 @@ It uses a SpaceQuery data structure to calculate interacting atoms and residues fast. """ -from typing import List +from typing import List, Tuple + from protkit.structure.residue import Residue from protkit.structure.atom import Atom @@ -161,3 +162,143 @@ def interface_residues_from_alpha_carbon(residues1: List[Residue], residue.set_attribute(key, True) return interface_residues1, interface_residues2 + + @staticmethod + def interface_atom_pairs(atoms1: List[Atom], + atoms2: List[Atom], + cutoff: float = 5.0, + assign_attribute: bool = False, + key: str = "interacting_atoms") -> List[Tuple[Atom, Atom]]: + """ + Returns a list of atom pairs (a1, a2) that are within a specified distance of each other. + + Two atoms are considered to be interacting if the distance between them + is less than the specified cutoff. + + If assign_attribute is True, each atom will have the `key` attribute + updated/created as a list of interacting partner atoms. + + Args: + atoms1 (List[Atom]): A list of atoms (e.g., from one protein or selection). + atoms2 (List[Atom]): Another list of atoms (e.g., from another protein or selection). + cutoff (float): The distance cutoff for considering atoms to be interacting. + assign_attribute (bool): If True, adds the interacting partner atoms as attributes to each atom. + key (str): Attribute name to store the list of partner atoms. + + Returns: + List[(Atom, Atom)]: A list of tuples, each containing an atom from atoms1 and an atom + from atoms2 that are interacting. + """ + + # Extract coordinates from atoms2 for KD-tree construction + atoms2_coords = [(a.x, a.y, a.z) for a in atoms2] + + # Create KD-tree from atoms2 + tree = SpaceQuery(atoms2_coords) + + interacting_pairs = [] + + # For each atom in atoms1, query the KD-tree + for a1 in atoms1: + coords1 = [(a1.x, a1.y, a1.z)] + idx1, idx2 = tree.query_partners(coords1, cutoff) + # idx1 corresponds to matches in atoms2_coords + for partner_index in idx1: + a2 = atoms2[partner_index] + interacting_pairs.append((a1, a2)) + + # Assign attributes if requested + if assign_attribute: + for (atom_a, atom_b) in interacting_pairs: + # Update atom_a's attribute + if atom_a.has_attribute(key): + partners = atom_a.get_attribute(key) + else: + partners = [] + partners.append(atom_b) + atom_a.set_attribute(key, partners) + + # Update atom_b's attribute + if atom_b.has_attribute(key): + partners = atom_b.get_attribute(key) + else: + partners = [] + partners.append(atom_a) + atom_b.set_attribute(key, partners) + + return interacting_pairs + + @staticmethod + def interface_residue_pairs(residues1: List[Residue], + residues2: List[Residue], + cutoff: float = 5.0, + assign_attribute: bool = False, + key: str = "interacting_residues") -> List[Tuple[Residue, Residue]]: + """ + Returns a list of residue pairs (r1, r2) that are within a specified distance of each other. + + Two residues are considered interacting if any atom of a residue in `residues1` is + within `cutoff` distance of any atom of a residue in `residues2`. + + If assign_attribute is True, each residue will have the `key` attribute + updated/created as a list of interacting partner residues. + + Args: + residues1 (List[Residue]): A list of residues (e.g., from one protein or selection). + residues2 (List[Residue]): Another list of residues (e.g., from another protein or selection). + cutoff (float): The distance cutoff for considering residues to be interacting. + assign_attribute (bool): If True, adds the interacting partner residues as attributes to each residue. + key (str): Attribute name to store the list of partner residues. + + Returns: + List[(Residue, Residue)]: A list of tuples, each containing a residue from residues1 and a residue + from residues2 that are interacting. + """ + + # Flatten atoms from residues2 and keep a mapping to their residues + atoms2_coords = [] + atoms2_res_map = [] + for r2 in residues2: + for a2 in r2.atoms: + atoms2_coords.append((a2.x, a2.y, a2.z)) + atoms2_res_map.append(r2) + + # Create a KD-tree for the second set of residues + tree = SpaceQuery(atoms2_coords) + + interacting_pairs = [] + + # For each residue in residues1, find interacting residues in residues2 + for r1 in residues1: + found_partner_residues = set() + + for a1 in r1.atoms: + # Query the KD-tree with the coordinates of a single atom from residue1 + idx1, idx2 = tree.query_partners([(a1.x, a1.y, a1.z)], cutoff) + # idx1 are indices in atoms2_coords + for partner_index in idx1: + found_partner_residues.add(atoms2_res_map[partner_index]) + + for r2 in found_partner_residues: + interacting_pairs.append((r1, r2)) + + # Assign attributes if requested + if assign_attribute: + for (res_a, res_b) in interacting_pairs: + # Update res_a's attribute + if res_a.has_attribute(key): + partners = res_a.get_attribute(key) + else: + partners = [] + partners.append(res_b) + res_a.set_attribute(key, partners) + + # Update res_b's attribute + if res_b.has_attribute(key): + partners = res_b.get_attribute(key) + else: + partners = [] + partners.append(res_a) + res_b.set_attribute(key, partners) + + return interacting_pairs \ No newline at end of file diff --git a/tests/quick_start_guide.py b/tests/quick_start_guide.py index 232223a..3bdcd64 100644 --- a/tests/quick_start_guide.py +++ b/tests/quick_start_guide.py @@ -662,6 +662,33 @@ def properties_interface_residues(): if residue.get_attribute("ca_in_interface"): print(residue.id) +def properties_interface_atom_pairs(): + from protkit.file_io import PDBIO + from protkit.properties import Interface + + protein = PDBIO.load("1ahw.pdb")[0] + atoms1 = list(protein.filter_atoms(chain_criteria=[("chain_id", ["A", "B"])])) + atoms2 = list(protein.filter_atoms(chain_criteria=[("chain_id", ["C"])])) + + pairs = Interface.interface_atom_pairs(atoms1, atoms2, cutoff=5.0, assign_attribute=True) + + # pairs is now a list of (atom_from_A, atom_from_B) that interact. + for aA, aB in pairs: + print(f"{aA.id} interacts with {aB.id}") + +def properties_interface_residue_pairs(): + from protkit.file_io import PDBIO + from protkit.properties import Interface + + protein = PDBIO.load("1ahw.pdb")[0] + residues1 = list(protein.filter_residues(chain_criteria=[("chain_id", ["A", "B"])])) + residues2 = list(protein.filter_residues(chain_criteria=[("chain_id", ["C"])])) + + pairs = Interface.interface_residue_pairs(residues1, residues2, cutoff=5.0, assign_attribute=True) + + for rA, rB in pairs: + print(f"{rA.id} interacts with {rB.id}") + def tools_reduce(): from protkit.tools.reduce_adaptor import ReduceAdaptor from protkit.file_io import ProtIO @@ -762,10 +789,12 @@ def ml_dataframe3(): # properties_circular_variance() # -> double check first residue # # properties_interface_atoms() # # properties_interface_residues() -# +properties_interface_atom_pairs() +properties_interface_residue_pairs() + # tools_reduce() # tools_freesasa() # ml_dataframe() # ml_dataframe2() -ml_dataframe3() \ No newline at end of file +# ml_dataframe3() \ No newline at end of file