From 7f2e60795fb7a9c5cb34ced0bf47648658bbd4f8 Mon Sep 17 00:00:00 2001 From: AJPreto Date: Thu, 30 Apr 2026 15:58:01 +0000 Subject: [PATCH] fix: resolve breaking issues in docking pipeline - Makefile: expand LD_LIBRARY_PATH with nvidia CUDA library paths to fix DiffDock NVRTC crash at runtime - scripts/run_guild.py: add --no-decoys flag to allow running without a decoy file present - guild/bulk.py: add PROTEINS_FOLDER import, prefer single-chain PDB as Boltz2 template, and retry without template on empty manifest (fixes Boltz2 template parsing IndexError) --- Makefile | 4 +-- guild/bulk.py | 63 +++++++++++++++++++++++++++++++++++++++++--- scripts/run_guild.py | 7 +++++ 3 files changed, 68 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 46353ee..9806320 100644 --- a/Makefile +++ b/Makefile @@ -80,7 +80,7 @@ run-boltz: _prepare-passwd $(DOCKER_COMMON) \ --gpus all \ --shm-size=8g \ - -e LD_LIBRARY_PATH=/opt/localcolabfold/.pixi/envs/default/lib:/usr/local/lib \ + -e LD_LIBRARY_PATH=/opt/localcolabfold/.pixi/envs/default/lib:/usr/local/lib:/app/.venv/lib/python3.10/site-packages/nvidia/cu13/lib:/app/.venv/lib/python3.10/site-packages/nvidia/cuda_nvrtc/lib:/app/.venv/lib/python3.10/site-packages/nvidia/cudnn/lib:/app/.venv/lib/python3.10/site-packages/nvidia/cublas/lib \ guild:latest \ python $(MASTER_SCRIPT) \ --project $(PROJECT) \ @@ -124,7 +124,7 @@ run-guild: _prepare-passwd $(DOCKER_COMMON) \ --gpus all \ --shm-size=8g \ - -e LD_LIBRARY_PATH=/opt/localcolabfold/.pixi/envs/default/lib:/usr/local/lib \ + -e LD_LIBRARY_PATH=/opt/localcolabfold/.pixi/envs/default/lib:/usr/local/lib:/app/.venv/lib/python3.10/site-packages/nvidia/cu13/lib:/app/.venv/lib/python3.10/site-packages/nvidia/cuda_nvrtc/lib:/app/.venv/lib/python3.10/site-packages/nvidia/cudnn/lib:/app/.venv/lib/python3.10/site-packages/nvidia/cublas/lib \ guild:latest \ python $(MASTER_SCRIPT) \ --project $(PROJECT) \ diff --git a/guild/bulk.py b/guild/bulk.py index 8c4a549..aaeeb5d 100644 --- a/guild/bulk.py +++ b/guild/bulk.py @@ -59,6 +59,7 @@ PROTEIN_CONF_ID, PROTEIN_ID, PROTEIN_PATH, + PROTEINS_FOLDER, # Scores lists RP_SCORES_COLUMNS, # Dictionaries @@ -554,23 +555,77 @@ def run_docking(self): continue os.makedirs(f"{current_batch_folder}/{BOLTZ_FOLDER}", exist_ok=True) + + # Use single-chain PDB as template to avoid Boltz multi-chain parsing errors + boltz_template_file = ( + f"{current_batch_folder}/{PROTEINS_FOLDER}/" + f"{unique_protein_configuration_id}_single_chain_clean.pdb" + ) + if not os.path.exists(boltz_template_file): + # Fallback to original protein path if single-chain not available + boltz_template_file = current_protein_path + + yaml_file = f"{current_batch_folder}/{BOLTZ_FOLDER}/{run_id}_boltz.yaml" + boltz_out_dir = f"{current_batch_folder}/{BOLTZ_FOLDER}" + generate_boltz_yaml( protein_sequence=current_protein_sequence, protein_chain=current_protein_chain, ligand_sequences=[ligand_smiles], ligand_ids=["L"], - output_file=f"{current_batch_folder}/{BOLTZ_FOLDER}/{run_id}_boltz.yaml", - template_file=current_protein_path, + output_file=yaml_file, + template_file=boltz_template_file, pocket_contacts=pocket_contacts if pocket_contacts else None, msa_file=msa_file, ) deploy_boltz( - f"{current_batch_folder}/{BOLTZ_FOLDER}/{run_id}_boltz.yaml", - out_dir=f"{current_batch_folder}/{BOLTZ_FOLDER}", + yaml_file, + out_dir=boltz_out_dir, use_gpu=self.use_gpu, ) + # Check if Boltz produced valid output (manifest with records). + # Template PDB parsing can fail silently in Boltz2, resulting + # in an empty manifest. If that happens, retry without the template. + manifest_path = ( + f"{boltz_out_dir}/boltz_results_{run_id}_boltz/processed/manifest.json" + ) + if os.path.exists(manifest_path): + import json as _json + + with open(manifest_path) as _mf: + _manifest = _json.load(_mf) + if not _manifest.get("records"): + logger.warning( + f"Boltz2 produced empty manifest for {run_id} " + "(likely template parsing failure). Retrying without template..." + ) + # Remove the failed output directory + import shutil + + failed_dir = f"{boltz_out_dir}/boltz_results_{run_id}_boltz" + if os.path.isdir(failed_dir): + shutil.rmtree(failed_dir) + + # Regenerate YAML without template + generate_boltz_yaml( + protein_sequence=current_protein_sequence, + protein_chain=current_protein_chain, + ligand_sequences=[ligand_smiles], + ligand_ids=["L"], + output_file=yaml_file, + template_file=None, + pocket_contacts=pocket_contacts if pocket_contacts else None, + msa_file=msa_file, + ) + + deploy_boltz( + yaml_file, + out_dir=boltz_out_dir, + use_gpu=self.use_gpu, + ) + logger.info( f"Boltz docking completed for {current_batch}: " f"protein {unique_protein_configuration_id}, ligand {current_ligand_id}" diff --git a/scripts/run_guild.py b/scripts/run_guild.py index de89c78..3b5a32a 100644 --- a/scripts/run_guild.py +++ b/scripts/run_guild.py @@ -96,6 +96,12 @@ def parse_args() -> argparse.Namespace: default="chembl_36", help="ChEMBL version string.", ) + parser.add_argument( + "--no-decoys", + action="store_true", + default=False, + help="Disable decoy generation (useful when decoy file is not available).", + ) parser.add_argument( "--clean", action="store_true", @@ -192,6 +198,7 @@ def main() -> None: max_mol_wt=args.max_mol_wt, chembl_version=args.chembl_version, decoys=decoys_path, + use_decoys=not args.no_decoys, use_known_binders=args.use_known_binders, n_workers=1, )