diff --git a/pori_python/ipr/inputs.py b/pori_python/ipr/inputs.py index 3cfe526..3212f3f 100644 --- a/pori_python/ipr/inputs.py +++ b/pori_python/ipr/inputs.py @@ -9,7 +9,6 @@ import os import pandas as pd import re -from Bio.Data.IUPACData import protein_letters_3to1 from numpy import nan from typing import Any, Callable, Dict, Iterable, List, Set, Tuple, cast @@ -31,7 +30,7 @@ TMB_SIGNATURE, TMB_SIGNATURE_VARIANT_TYPE, ) -from .util import hash_key, logger, pandas_falsy +from .util import hash_key, logger, pandas_falsy, protein_letters_3to1 protein_letters_3to1.setdefault("Ter", "*") diff --git a/pori_python/ipr/util.py b/pori_python/ipr/util.py index d2aca07..86b0e0f 100644 --- a/pori_python/ipr/util.py +++ b/pori_python/ipr/util.py @@ -171,3 +171,37 @@ def get_preferred_gene_name( def pandas_falsy(field: Any) -> bool: """Check if a field is python falsy or pandas null.""" return bool(pd.isnull(field) or not field) + + +# the below is copied from +# https://github.com/biopython/biopython/blob/master/Bio/Data/IUPACData.py +# to allow us to remove otherwise unnecessary biopython dependency + +protein_letters_1to3 = { + "A": "Ala", + "C": "Cys", + "D": "Asp", + "E": "Glu", + "F": "Phe", + "G": "Gly", + "H": "His", + "I": "Ile", + "K": "Lys", + "L": "Leu", + "M": "Met", + "N": "Asn", + "P": "Pro", + "Q": "Gln", + "R": "Arg", + "S": "Ser", + "T": "Thr", + "V": "Val", + "W": "Trp", + "Y": "Tyr", +} +protein_letters_1to3_extended = { + **protein_letters_1to3, + **{"B": "Asx", "X": "Xaa", "Z": "Glx", "J": "Xle", "U": "Sec", "O": "Pyl"}, +} + +protein_letters_3to1 = {value: key for key, value in protein_letters_1to3.items()} diff --git a/setup.cfg b/setup.cfg index b5d3df0..68efe3d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -31,7 +31,6 @@ python_requires = >=3.9 dependency_links = [] include_package_data = true install_requires = - biopython jsonschema pandas>=1.1.0 requests