From 251ba90d7e0aa089aa7401ba265dd1b68b3d4d71 Mon Sep 17 00:00:00 2001
From: Mike Henry <11765982+mikemhenry@users.noreply.github.com>
Date: Wed, 21 May 2025 08:07:15 -0700
Subject: [PATCH 1/2] first pass on a QA script

---
 devtools/qa/qa.py | 124 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 124 insertions(+)
 create mode 100644 devtools/qa/qa.py

diff --git a/devtools/qa/qa.py b/devtools/qa/qa.py
new file mode 100644
index 000000000..4c2b420a2
--- /dev/null
+++ b/devtools/qa/qa.py
@@ -0,0 +1,124 @@
+import pathlib
+import gufe
+from openfe.protocols.openmm_rfe import RelativeHybridTopologyProtocol
+from rdkit import Chem
+import openfe
+from openff.units import unit
+
+
+
+def gen_transformations(prefix: str, dataset_dir: pathlib.Path, charge_model: str, system_group: str, system_name: str) -> None:
+
+    # find the system
+    system_dir = dataset_dir.joinpath(system_group, system_name)
+    # define the protocol to use
+    settings = RelativeHybridTopologyProtocol.default_settings()
+    settings.protocol_repeats = 1
+    settings.engine_settings.compute_platform = "CUDA"
+    settings.forcefield_settings.small_molecule_forcefield = "openff-2.2.1"
+    settings.simulation_settings.time_per_iteration = 2.5 * unit.picosecond
+    settings.simulation_settings.real_time_analysis_interval = 1 * unit.nanosecond
+    settings.output_settings.checkpoint_interval = 1 * unit.nanosecond
+    settings.solvation_settings.box_shape = 'dodecahedron'
+    settings.forcefield_settings.nonbonded_cutoff = 0.9 * unit.nanometer
+
+
+    # load the network
+    network = gufe.LigandNetwork.from_graphml(system_dir.joinpath("lomap_network.graphml").read_text())
+
+    # load the ligands with charges
+    supplier = Chem.SDMolSupplier(system_dir.joinpath(f"ligands_{charge_model}.sdf"), removeHs=False)
+    ligands_by_name = {}
+    for mol in supplier:
+        ofe_mol = gufe.SmallMoleculeComponent.from_rdkit(mol)
+        ligands_by_name[ofe_mol.name] = ofe_mol
+
+    # load the protein
+    protein = gufe.ProteinComponent.from_pdb_file(system_dir.joinpath("protein.pdb").as_posix())
+
+    # check for cofactors
+    co_file = system_dir.joinpath(f"cofactors_{charge_model}.sdf")
+    cofactors = None
+    if co_file.exists():
+        cofactors = []
+        supplier = Chem.SDMolSupplier(co_file, removeHs=False)
+        for mol in supplier:
+            cofactors.append(gufe.SmallMoleculeComponent.from_rdkit(mol))
+
+    # define the standard solvent
+    solvent = gufe.SolventComponent()
+
+    # build the transforms
+    transformations = []
+    for edge in network.edges:
+        # make the edge again using ligands with charges
+        new_edge = gufe.LigandAtomMapping(
+            componentA=ligands_by_name[edge.componentA.name],
+            componentB=ligands_by_name[edge.componentB.name],
+            componentA_to_componentB=edge.componentA_to_componentB
+        )
+
+        # create the transformations for the bound and solvent legs
+        for leg in ["solvent", "complex"]:
+            system_a_dict = {
+                "ligand": new_edge.componentA, "solvent": solvent
+            }
+            system_b_dict = {
+                "ligand": new_edge.componentB, "solvent": solvent
+            }
+            if leg == "complex":
+                system_a_dict["protein"] = protein
+                system_b_dict["protien"] = protein
+                settings.solvation_settings.solvent_padding = 1.0 * unit.nanometer
+
+                if cofactors is not None:
+                    for i, cofactor in enumerate(cofactors):
+                        cofactor_name = f"cofactor_{i}"
+                        system_a_dict[cofactor_name] = cofactor
+                        system_b_dict[cofactor_name] = cofactor
+            elif leg == "solvent":
+                settings.solvation_settings.solvent_padding = 1.5 * unit.nanometer
+
+
+            system_a = gufe.ChemicalSystem(system_a_dict)
+            system_b = gufe.ChemicalSystem(system_b_dict)
+
+            name = f"{leg}_{new_edge.componentA.name}_{new_edge.componentB.name}"
+            protocol = RelativeHybridTopologyProtocol(settings=settings)
+            transformation = openfe.Transformation(
+                stateA=system_a,
+                stateB=system_b,
+                mapping=new_edge,
+                protocol=protocol,
+                name=name
+            )
+
+            transformations.append(transformation)
+
+    # create the network, profit!
+    alchemical_network = gufe.AlchemicalNetwork(edges=transformations)
+    prefix = pathlib.Path(prefix)
+    system_dir = prefix / pathlib.Path(system_name)
+    transform_dir = system_dir / pathlib.Path(f"transformations_{system_name}")
+    transform_dir.mkdir(parents=True, exist_ok=True)
+    for transformation in alchemical_network.edges:
+        transformation_name = transformation.name or transformation.key
+        filename = pathlib.Path(f"{transformation_name}.json")
+        transformation.to_json(transform_dir / filename)
+    alchemical_network.to_json(system_dir / pathlib.Path("alchemical_network.json"))
+
+
+
+# Make a function that will pull this data down or point to a local path
+dataset_dir = pathlib.Path("/home/mmh/Projects/openfe-benchmarks/openfe_benchmarks/data/industry_benchmark_systems")
+# declare the charge settings and the system you want to generate the network for
+#CHARGE_MODEL = "antechamber_am1bcc"
+# or
+PREFIX = "QA-1.4.0-OpenMM-8.2"
+CHARGE_MODEL = "openeye_elf10"
+SYSTEM_GROUP = "jacs_set"
+SYSTEM_NAME = "tyk2"
+
+gen_transformations(prefix=PREFIX, dataset_dir=dataset_dir, charge_model=CHARGE_MODEL, system_group="jacs_set", system_name="tyk2")
+gen_transformations(prefix=PREFIX, dataset_dir=dataset_dir, charge_model=CHARGE_MODEL, system_group="jacs_set", system_name="p38")
+gen_transformations(prefix=PREFIX, dataset_dir=dataset_dir, charge_model=CHARGE_MODEL, system_group="mcs_docking_set", system_name="hne")

From d3e60487aa91901c4241fb8cd5db1f3346c47d3a Mon Sep 17 00:00:00 2001
From: Mike Henry <11765982+mikemhenry@users.noreply.github.com>
Date: Wed, 21 May 2025 12:01:44 -0700
Subject: [PATCH 2/2] now we handle systems where there is a charge change

---
 devtools/qa/qa.py | 103 +++++++++++++++++++++++++++++++++++-----------
 1 file changed, 80 insertions(+), 23 deletions(-)

diff --git a/devtools/qa/qa.py b/devtools/qa/qa.py
index 4c2b420a2..bde867b7f 100644
--- a/devtools/qa/qa.py
+++ b/devtools/qa/qa.py
@@ -6,9 +6,34 @@
 from openff.units import unit
 
 
-
-def gen_transformations(prefix: str, dataset_dir: pathlib.Path, charge_model: str, system_group: str, system_name: str) -> None:
-
+def get_alchemical_charge_difference(mapping) -> int:
+    """
+    Checks and returns the difference in formal charge between state A and B.
+
+    Parameters
+    ----------
+    mapping : dict[str, ComponentMapping]
+      Dictionary of mappings between transforming components.
+
+    Returns
+    -------
+    int
+      The formal charge difference between states A and B.
+      This is defined as sum(charge state A) - sum(charge state B)
+    """
+    chg_A = Chem.rdmolops.GetFormalCharge(mapping.componentA.to_rdkit())
+    chg_B = Chem.rdmolops.GetFormalCharge(mapping.componentB.to_rdkit())
+
+    return chg_A - chg_B
+
+
+def gen_transformations(
+    prefix: str,
+    dataset_dir: pathlib.Path,
+    charge_model: str,
+    system_group: str,
+    system_name: str,
+) -> None:
     # find the system
     system_dir = dataset_dir.joinpath(system_group, system_name)
     # define the protocol to use
@@ -19,22 +44,27 @@ def gen_transformations(prefix: str, dataset_dir: pathlib.Path, charge_model: st
     settings.simulation_settings.time_per_iteration = 2.5 * unit.picosecond
     settings.simulation_settings.real_time_analysis_interval = 1 * unit.nanosecond
     settings.output_settings.checkpoint_interval = 1 * unit.nanosecond
-    settings.solvation_settings.box_shape = 'dodecahedron'
+    settings.solvation_settings.box_shape = "dodecahedron"
     settings.forcefield_settings.nonbonded_cutoff = 0.9 * unit.nanometer
 
-
     # load the network
-    network = gufe.LigandNetwork.from_graphml(system_dir.joinpath("lomap_network.graphml").read_text())
+    network = gufe.LigandNetwork.from_graphml(
+        system_dir.joinpath("lomap_network.graphml").read_text()
+    )
 
     # load the ligands with charges
-    supplier = Chem.SDMolSupplier(system_dir.joinpath(f"ligands_{charge_model}.sdf"), removeHs=False)
+    supplier = Chem.SDMolSupplier(
+        system_dir.joinpath(f"ligands_{charge_model}.sdf"), removeHs=False
+    )
     ligands_by_name = {}
     for mol in supplier:
         ofe_mol = gufe.SmallMoleculeComponent.from_rdkit(mol)
         ligands_by_name[ofe_mol.name] = ofe_mol
 
     # load the protein
-    protein = gufe.ProteinComponent.from_pdb_file(system_dir.joinpath("protein.pdb").as_posix())
+    protein = gufe.ProteinComponent.from_pdb_file(
+        system_dir.joinpath("protein.pdb").as_posix()
+    )
 
     # check for cofactors
     co_file = system_dir.joinpath(f"cofactors_{charge_model}.sdf")
@@ -55,17 +85,25 @@ def gen_transformations(prefix: str, dataset_dir: pathlib.Path, charge_model: st
         new_edge = gufe.LigandAtomMapping(
             componentA=ligands_by_name[edge.componentA.name],
             componentB=ligands_by_name[edge.componentB.name],
-            componentA_to_componentB=edge.componentA_to_componentB
+            componentA_to_componentB=edge.componentA_to_componentB,
         )
 
+        # Check if we need to use charge changeing settings
+        if get_alchemical_charge_difference(new_edge) == 0:
+            settings.alchemical_settings.explicit_charge_correction = False
+            settings.simulation_settings.production_length = 5.0 * unit.nanosecond
+            settings.simulation_settings.n_replicas = 11
+            settings.lambda_settings.lambda_windows = 11
+        else:
+            settings.alchemical_settings.explicit_charge_correction = True
+            settings.simulation_settings.production_length = 20 * unit.nanosecond
+            settings.simulation_settings.n_replicas = 22
+            settings.lambda_settings.lambda_windows = 22
+
         # create the transformations for the bound and solvent legs
         for leg in ["solvent", "complex"]:
-            system_a_dict = {
-                "ligand": new_edge.componentA, "solvent": solvent
-            }
-            system_b_dict = {
-                "ligand": new_edge.componentB, "solvent": solvent
-            }
+            system_a_dict = {"ligand": new_edge.componentA, "solvent": solvent}
+            system_b_dict = {"ligand": new_edge.componentB, "solvent": solvent}
             if leg == "complex":
                 system_a_dict["protein"] = protein
                 system_b_dict["protien"] = protein
@@ -79,18 +117,18 @@ def gen_transformations(prefix: str, dataset_dir: pathlib.Path, charge_model: st
             elif leg == "solvent":
                 settings.solvation_settings.solvent_padding = 1.5 * unit.nanometer
 
-
             system_a = gufe.ChemicalSystem(system_a_dict)
             system_b = gufe.ChemicalSystem(system_b_dict)
 
             name = f"{leg}_{new_edge.componentA.name}_{new_edge.componentB.name}"
+
             protocol = RelativeHybridTopologyProtocol(settings=settings)
             transformation = openfe.Transformation(
                 stateA=system_a,
                 stateB=system_b,
                 mapping=new_edge,
                 protocol=protocol,
-                name=name
+                name=name,
             )
 
             transformations.append(transformation)
@@ -108,17 +146,36 @@ def gen_transformations(prefix: str, dataset_dir: pathlib.Path, charge_model: st
     alchemical_network.to_json(system_dir / pathlib.Path("alchemical_network.json"))
 
 
-
 # Make a function that will pull this data down or point to a local path
-dataset_dir = pathlib.Path("/home/mmh/Projects/openfe-benchmarks/openfe_benchmarks/data/industry_benchmark_systems")
+dataset_dir = pathlib.Path(
+    "/home/mmh/Projects/openfe-benchmarks/openfe_benchmarks/data/industry_benchmark_systems"
+)
 # declare the charge settings and the system you want to generate the network for
-#CHARGE_MODEL = "antechamber_am1bcc"
+# CHARGE_MODEL = "antechamber_am1bcc"
 # or
 PREFIX = "QA-1.4.0-OpenMM-8.2"
 CHARGE_MODEL = "openeye_elf10"
 SYSTEM_GROUP = "jacs_set"
 SYSTEM_NAME = "tyk2"
 
-gen_transformations(prefix=PREFIX, dataset_dir=dataset_dir, charge_model=CHARGE_MODEL, system_group="jacs_set", system_name="tyk2")
-gen_transformations(prefix=PREFIX, dataset_dir=dataset_dir, charge_model=CHARGE_MODEL, system_group="jacs_set", system_name="p38")
-gen_transformations(prefix=PREFIX, dataset_dir=dataset_dir, charge_model=CHARGE_MODEL, system_group="mcs_docking_set", system_name="hne")
+gen_transformations(
+    prefix=PREFIX,
+    dataset_dir=dataset_dir,
+    charge_model=CHARGE_MODEL,
+    system_group="jacs_set",
+    system_name="tyk2",
+)
+gen_transformations(
+    prefix=PREFIX,
+    dataset_dir=dataset_dir,
+    charge_model=CHARGE_MODEL,
+    system_group="jacs_set",
+    system_name="p38",
+)
+gen_transformations(
+    prefix=PREFIX,
+    dataset_dir=dataset_dir,
+    charge_model=CHARGE_MODEL,
+    system_group="mcs_docking_set",
+    system_name="hne",
+)