From 401e1677a22a2a24399f2cecdd60aa0b004517da Mon Sep 17 00:00:00 2001 From: Goosang-Yu Date: Fri, 28 Jun 2024 17:36:08 +0900 Subject: [PATCH 1/3] =?UTF-8?q?=F0=9F=99=88=20add=20gitignore?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..effece49 --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ +## Git Ignore List +## Please DO NOT commit any unnecessary files! + +## About python +*.pyc +*.ipynb +__pycache__/ +.pytest_cache/ +.ipynb_checkpoints/ + +## about build +/build +/dist \ No newline at end of file From 379edbf407e82525612a26ab94c23b5d815e960f Mon Sep 17 00:00:00 2001 From: Goosang-Yu Date: Tue, 2 Jul 2024 13:02:31 +0900 Subject: [PATCH 2/3] =?UTF-8?q?=F0=9F=94=A5=20Remove=20gitignore?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 13 ------------- 1 file changed, 13 deletions(-) delete mode 100644 .gitignore diff --git a/.gitignore b/.gitignore deleted file mode 100644 index effece49..00000000 --- a/.gitignore +++ /dev/null @@ -1,13 +0,0 @@ -## Git Ignore List -## Please DO NOT commit any unnecessary files! - -## About python -*.pyc -*.ipynb -__pycache__/ -.pytest_cache/ -.ipynb_checkpoints/ - -## about build -/build -/dist \ No newline at end of file From 0d8a3df0098f008e5699f40a9d92f462810f3085 Mon Sep 17 00:00:00 2001 From: Goosang-Yu Date: Tue, 2 Jul 2024 13:04:51 +0900 Subject: [PATCH 3/3] =?UTF-8?q?=E2=9C=A8=20Add=20pdb=20fixer=20option?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- esm/utils/structure/protein_chain.py | 26 +++++++++++++++++++++++++- pyproject.toml | 2 ++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/esm/utils/structure/protein_chain.py b/esm/utils/structure/protein_chain.py index ab5ea6d6..535b0068 100644 --- a/esm/utils/structure/protein_chain.py +++ b/esm/utils/structure/protein_chain.py @@ -7,6 +7,8 @@ from typing import Sequence, TypeVar, Union import biotite.structure as bs +import openmm +import pdbfixer import brotli import msgpack import msgpack_numpy @@ -643,8 +645,30 @@ def from_rcsb( cls, pdb_id: str, chain_id: str = "detect", + fix_pdb: bool = False, ): - f: io.StringIO = rcsb.fetch(pdb_id, "pdb") # type: ignore + f: io.StringIO = rcsb.fetch(pdb_id, "pdb") # type: ignore (_io.StringIO) + + if fix_pdb: + fixer = pdbfixer.PDBFixer(pdbfile=f) + + # PDBFixer operations + fixer.findNonstandardResidues() + fixer.replaceNonstandardResidues() + fixer.findMissingResidues() + fixer.findMissingAtoms() + fixer.addMissingAtoms(seed=0) + fixer.addMissingHydrogens() + + # Create a StringIO object + f = io.StringIO() + + # Write the PDBFixer object to the StringIO object + openmm.app.PDBFile.writeFile(fixer.topology, fixer.positions, f, keepIds=True) + + # Reset StringIO pointer to the beginning + f.seek(0) + return cls.from_pdb(f, chain_id=chain_id, id=pdb_id) @classmethod diff --git a/pyproject.toml b/pyproject.toml index b9b4c67b..d7078e0e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,8 @@ dependencies = [ "brotli", "attrs", "pandas", + "openmm", + "pdbfixer", ] [tool.setuptools]