From 48038c9d4f4719646371824508308e1db4776194 Mon Sep 17 00:00:00 2001 From: Melanie Vollmar Date: Mon, 17 Mar 2025 10:22:53 +0000 Subject: [PATCH 1/5] adding API specifications for LLM annotations to YAML file --- oas3.yaml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/oas3.yaml b/oas3.yaml index 3c78ee5..f25d70e 100644 --- a/oas3.yaml +++ b/oas3.yaml @@ -257,6 +257,30 @@ paths: '400': description: bad input parameter + '/llm_annotations/summary/{pdbid}.json': + get: + parameters: + - name: pdb_id + description: A PDB identifier + in: path + required: true + example: '1cbs' + in: path + schema: + type: string + responses: + '200': + description: >- + Response is a collection of LLM annotations for a given PDB entry. + content: + application/json: + schema: + $ref: '#/components/schemas/llm_annotations' + '501': + description: If endpoint is not implemented + '400': + description: bad input parameter + servers: - url: 'https://www.ebi.ac.uk/pdbe/api/v2' description: PDBe API RESTful API service From 3b111efa4e2fd9ce8a6a0d8581861e4dcd6c4692 Mon Sep 17 00:00:00 2001 From: Melanie Vollmar Date: Mon, 17 Mar 2025 15:58:25 +0000 Subject: [PATCH 2/5] added specifications for LLM annotations --- oas3.yaml | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 88 insertions(+), 1 deletion(-) diff --git a/oas3.yaml b/oas3.yaml index f25d70e..bda1da6 100644 --- a/oas3.yaml +++ b/oas3.yaml @@ -257,7 +257,7 @@ paths: '400': description: bad input parameter - '/llm_annotations/summary/{pdbid}.json': + '/llm_annotations/summary/{pdb_id}.json': get: parameters: - name: pdb_id @@ -1853,3 +1853,90 @@ components: type: integer example: 1 description: "Remark R350" + + LLM_annotations: + title: llm_annotations + type: object + description: "LLM-derived annotations" + properties: + pdb_id: + title: PDB_id + type: object + properties: + datatype: + type: string + example: "ANNOTATIONS" + description: "The type of data represented" + data: + title: data + type: object + description: "The actual annotation data" + properties: + name: + type: string + example: "IUCr" + description: "Name of the resource, annotation, etc." + accession: + type: string + example: "IUCr" + description: "A unique identifier for the resource, annotation, etc." + dataType: + type: string + example: "IUCr" + description: "The data type provided" + residues: + title: residues + description: "A list of residue objects." + type: object + items: + startIndex: + type: integer + example: 42 + description: "(start) residue sequence position" + endIndex: + type: integer + example: 42 + description: "(end) residue sequence position" + indexType: + type: string + example: "PDB" + description: "source used for sequence indexing/counting" + startCode: + type: string + example: "ALA" + description: "(start) residue amino acid name" + endCode: + type: string + example: "ALA" + description: "(end) residue amino acid name" + additionalData: + type: array + items: + type: object + description: "List of annottaions for residue" + properties: + pubmedId: + type: integer + description: "A valid PubMed identifier" + example: 1234567 + pmcId: + type: string + description: "A valid PubMedCentral identifier" + example: "PMC1234567" + pdbResidue: + type: integer + description: "A PDB residue" + example: 42 + pdbChain: + type: string + description: "A PDB chain" + example: "A" + uniprotAccession: + type: string + description: "A UniProt accession" + example: "P12345" + uniprotResidue: + type: integer + description: "A UniProt residue" + example: 42 + From ef7f9be28be64ee4ec7eb11e9b750245e63343da Mon Sep 17 00:00:00 2001 From: Melanie Vollmar Date: Tue, 22 Apr 2025 14:19:21 +0100 Subject: [PATCH 3/5] adding updated description YAML file and generated model.py for API design --- model.py | 1382 +++++++++++++++++++++++++++++++++++++++++++++++++++++ oas3.yaml | 242 ++++++---- 2 files changed, 1536 insertions(+), 88 deletions(-) create mode 100644 model.py diff --git a/model.py b/model.py new file mode 100644 index 0000000..25b4fb4 --- /dev/null +++ b/model.py @@ -0,0 +1,1382 @@ +# generated by datamodel-codegen: +# filename: oas3.yaml +# timestamp: 2025-04-22T13:16:57+00:00 + +from __future__ import annotations + +from enum import Enum +from typing import List, Optional, Union + +from pydantic import BaseModel, Field + + +class UniprotEntry(BaseModel): + ac: str = Field(..., description='UniProt accession', example='P00520') + id: Optional[str] = Field( + None, description='UniProt identifier', example='ABL1_MOUSE' + ) + uniprot_checksum: Optional[str] = Field( + None, + description='CRC64 checksum of the UniProt sequence', + example='5F9BA1D4C7DE6925', + ) + sequence_length: Optional[int] = Field( + None, description='Length of the UniProt sequence', example=76 + ) + segment_start: Optional[int] = Field( + None, + description='1-indexed first residue of the UniProt sequence segment', + example=1, + ) + segment_end: Optional[int] = Field( + None, + description='1-indexed last residue of the UniProt sequence segment', + example=86, + ) + + +class PdbEntry(BaseModel): + entry_id: str = Field(..., description='PDB entry identifier', example='3bow') + chain_id: str = Field(..., description='PDB chain identifier', example='A') + mapped_uniprot: Optional[str] = Field( + None, description='UniProt accession mapped to the PDB entry', example='P12345' + ) + uniprot_start: int = Field( + ..., description='1-indexed first residue in the mapped UniProt', example=1 + ) + uniprot_end: int = Field( + ..., description='1-indexed last residue in the mapped UniProt', example=100 + ) + + +class ModelCategory(Enum): + EXPERIMENTALLY_DETERMINED = 'EXPERIMENTALLY DETERMINED' + TEMPLATE_BASED = 'TEMPLATE-BASED' + AB_INITIO = 'AB-INITIO' + CONFORMATIONAL_ENSEMBLE = 'CONFORMATIONAL ENSEMBLE' + + +class ModelFormat(Enum): + PDB = 'PDB' + MMCIF = 'MMCIF' + BCIF = 'BCIF' + + +class ModelType(Enum): + ATOMIC = 'ATOMIC' + DUMMY = 'DUMMY' + MIX = 'MIX' + + +class EnsembleSampleFormat(Enum): + PDB = 'PDB' + MMCIF = 'MMCIF' + BCIF = 'BCIF' + + +class ExperimentalMethod(Enum): + ELECTRON_CRYSTALLOGRAPHY = 'ELECTRON CRYSTALLOGRAPHY' + ELECTRON_MICROSCOPY = 'ELECTRON MICROSCOPY' + EPR = 'EPR' + FIBER_DIFFRACTION = 'FIBER DIFFRACTION' + FLUORESCENCE_TRANSFER = 'FLUORESCENCE TRANSFER' + INFRARED_SPECTROSCOPY = 'INFRARED SPECTROSCOPY' + NEUTRON_DIFFRACTION = 'NEUTRON DIFFRACTION' + X_RAY_POWDER_DIFFRACTION = 'X-RAY POWDER DIFFRACTION' + SOLID_STATE_NMR = 'SOLID-STATE NMR' + SOLUTION_NMR = 'SOLUTION NMR' + X_RAY_SOLUTION_SCATTERING = 'X-RAY SOLUTION SCATTERING' + THEORETICAL_MODEL = 'THEORETICAL MODEL' + X_RAY_DIFFRACTION = 'X-RAY DIFFRACTION' + HYBRID = 'HYBRID' + + +class ConfidenceType(Enum): + pLDDT = 'pLDDT' + QMEANDisCo = 'QMEANDisCo' + + +class OligomericState(Enum): + MONOMER = 'MONOMER' + HOMODIMER = 'HOMODIMER' + HETERODIMER = 'HETERODIMER' + HOMO_OLIGOMER = 'HOMO-OLIGOMER' + HETERO_OLIGOMER = 'HETERO-OLIGOMER' + + +class EntityType(Enum): + BRANCHED = 'BRANCHED' + MACROLIDE = 'MACROLIDE' + NON_POLYMER = 'NON-POLYMER' + POLYMER = 'POLYMER' + WATER = 'WATER' + + +class EntityPolyType(Enum): + CYCLIC_PSEUDO_PEPTIDE = 'CYCLIC-PSEUDO-PEPTIDE' + PEPTIDE_NUCLEIC_ACID = 'PEPTIDE NUCLEIC ACID' + POLYDEOXYRIBONUCLEOTIDE = 'POLYDEOXYRIBONUCLEOTIDE' + POLYDEOXYRIBONUCLEOTIDE_POLYRIBONUCLEOTIDE_HYBRID = ( + 'POLYDEOXYRIBONUCLEOTIDE/POLYRIBONUCLEOTIDE HYBRID' + ) + POLYPEPTIDE_D_ = 'POLYPEPTIDE(D)' + POLYPEPTIDE_L_ = 'POLYPEPTIDE(L)' + POLYRIBONUCLEOTIDE = 'POLYRIBONUCLEOTIDE' + OTHER = 'OTHER' + + +class IdentifierCategory(Enum): + UNIPROT = 'UNIPROT' + RFAM = 'RFAM' + CCD = 'CCD' + SMILES = 'SMILES' + INCHI = 'INCHI' + INCHIKEY = 'INCHIKEY' + + +class Entity(BaseModel): + entity_type: EntityType = Field( + ..., + description='The type of the molecular entity; similar to _entity.type in mmCIF', + example='POLYMER', + ) + entity_poly_type: Optional[EntityPolyType] = Field( + None, + description='The type of the molecular entity; similar to _entity_poly.type in mmCIF', + example='PEPTIDE NUCLEIC ACID', + ) + identifier: Optional[str] = Field( + None, description='Identifier of the molecule', example='Q13033' + ) + identifier_category: Optional[IdentifierCategory] = Field( + None, description='Category of the identifier', example='UNIPROT' + ) + description: str = Field( + ..., description='A textual label of the molecule', example='Striatin-3' + ) + chain_ids: List[str] + + +class SummaryItems(BaseModel): + model_identifier: str = Field( + ..., description='Identifier of the model, such as PDB id', example='8kfa' + ) + model_category: ModelCategory = Field( + ..., description='Category of the model', example='TEMPLATE-BASED' + ) + model_url: str = Field( + ..., + description='URL of the model coordinates', + example='https://www.ebi.ac.uk/pdbe/static/entry/1t29_updated.cif', + ) + model_format: ModelFormat = Field( + ..., description='File format of the coordinates', example='MMCIF' + ) + model_type: Optional[ModelType] = Field( + None, + description='Defines if the coordinates are atomic-level or contains dummy atoms (e.g. SAXS models), or a mix of both (e.g. hybrid models)\n', + example='ATOMIC', + ) + model_page_url: Optional[str] = Field( + None, + description='URL of a web page of the data provider that show the model', + example='https://alphafold.ebi.ac.uk/entry/Q5VSL9', + ) + provider: str = Field( + ..., description='Name of the model provider', example='SWISS-MODEL' + ) + number_of_conformers: Optional[float] = Field( + None, + description='The number of conformers in a conformational ensemble', + example=42, + ) + ensemble_sample_url: Optional[str] = Field( + None, + description='URL of a sample of conformations from a conformational ensemble', + example='https://proteinensemble.org/api/ensemble_sample/PED00001e001', + ) + ensemble_sample_format: Optional[EnsembleSampleFormat] = Field( + None, + description='File format of the sample coordinates, e.g. PDB', + example='PDB', + ) + created: str = Field( + ..., + description='Date of release of model generation in the format of YYYY-MM-DD', + example='2021-12-21', + ) + sequence_identity: float = Field( + ..., + description='Sequence identity in the range of [0,1] of the model to the UniProt sequence\n', + example=0.97, + ) + uniprot_start: int = Field( + ..., + description='1-indexed first residue of the model according to UniProt sequence numbering\n', + example=1, + ) + uniprot_end: int = Field( + ..., + description='1-indexed last residue of the model according to UniProt sequence numbering\n', + example=142, + ) + coverage: float = Field( + ..., + description='Fraction in range of [0, 1] of the UniProt sequence covered by the model. This is calculated as (uniprot_end - uniprot_start + 1) / uniprot_sequence_length\n', + example=0.4, + ) + experimental_method: Optional[ExperimentalMethod] = Field( + None, + description='Experimental method used to determine the structure, if applicable', + ) + resolution: Optional[float] = Field( + None, + description='The resolution of the model in Angstrom, if applicable', + example=1.4, + ) + confidence_type: Optional[ConfidenceType] = Field( + None, + description='Type of the confidence measure. This is required for theoretical models.\n', + example='QMEANDisCo', + ) + confidence_version: Optional[str] = Field( + None, + description='Version of confidence measure software used to calculate quality. This is required for theoretical models.\n', + example='v1.0.2', + ) + confidence_avg_local_score: Optional[float] = Field( + None, + description='Average of the confidence measures in the range of [0,1] for QMEANDisCo and [0,100] for pLDDT. Please contact 3D-Beacons developers if other estimates are to be added. This is required for theoretical models.\n', + example=0.95, + ) + oligomeric_state: Optional[OligomericState] = Field( + None, description='Oligomeric state of the model', example='MONOMER' + ) + preferred_assembly_id: Optional[str] = Field( + None, + description='Identifier of the preferred assembly in the model', + example='1A', + ) + entities: List[Entity] = Field( + ..., description='A list of molecular entities in the model' + ) + + +class ExperimentalMethod1(Enum): + ELECTRON_CRYSTALLOGRAPHY = 'ELECTRON CRYSTALLOGRAPHY' + ELECTRON_MICROSCOPY = 'ELECTRON MICROSCOPY' + EPR = 'EPR' + FIBER_DIFFRACTION = 'FIBER DIFFRACTION' + FLUORESCENCE_TRANSFER = 'FLUORESCENCE TRANSFER' + INFRARED_SPECTROSCOPY = 'INFRARED SPECTROSCOPY' + NEUTRON_DIFFRACTION = 'NEUTRON DIFFRACTION' + POWDER_DIFFRACTION = 'POWDER DIFFRACTION' + SOLID_STATE_NMR = 'SOLID-STATE NMR' + SOLUTION_NMR = 'SOLUTION NMR' + SOLUTION_SCATTERING = 'SOLUTION SCATTERING' + THEORETICAL_MODEL = 'THEORETICAL MODEL' + X_RAY_DIFFRACTION = 'X-RAY DIFFRACTION' + HYBRID = 'HYBRID' + + +class Template(BaseModel): + template_id: str = Field( + ..., description='Identifier of the template', example='2aqa' + ) + chain_id: str = Field( + ..., + description='Identifier of the chain of the template; this is label_asym_id in mmCIF', + example='C', + ) + template_sequence_identity: float = Field( + ..., + description='Sequence identity of the template with the UniProt accession, in the range of [0,1]\n', + example=0.97, + ) + last_updated: str = Field( + ..., + description='Date of release of the last update in the format of YYYY-MM-DD\n', + example='2021-08-06', + ) + provider: str = Field(..., description='Provider of the template', example='PDB') + experimental_method: ExperimentalMethod1 = Field( + ..., + description='Experimental method used to determine the template', + example='HYBRID', + ) + resolution: float = Field( + ..., description='Resolution of the template, in Angstrom', example=2.1 + ) + preferred_assembly_id: Optional[str] = Field( + None, + description='Identifier of the preferred assembly of the template', + example='1', + ) + + +class Seqres(BaseModel): + aligned_sequence: str = Field( + ..., description='Sequence of the model', example='AAGTGHLKKKYT...' + ) + from_: int = Field( + ..., alias='from', description='1-indexed first residue', example=32 + ) + to: int = Field(..., description='1-indexed last residue', example=976) + + +class Uniprot(BaseModel): + aligned_sequence: str = Field( + ..., + description='Sequence of the UniProt accession', + example='AAGTGHLKKKYTAAGTGHLKKKYT...', + ) + from_: int = Field( + ..., alias='from', description='1-indexed first residue', example=23 + ) + to: int = Field(..., description='1-indexed last residue', example=868) + + +class Residue(BaseModel): + confidence: Optional[float] = Field( + None, description='Confidence score in the range of [0,1]', example=0.99 + ) + model_residue_label: int = Field(..., description='Model residue index', example=1) + uniprot_residue_number: int = Field( + ..., description='UniProt residue index', example=1 + ) + + +class Segment(BaseModel): + templates: Optional[List[Template]] = Field( + None, description='Information on the template(s) used for the model' + ) + seqres: Seqres = Field(..., description='Information on the sequence of the model') + uniprot: Uniprot + residues: List[Residue] + + +class Chain(BaseModel): + chain_id: str + segments: Optional[List[Segment]] = None + + +class Chains(BaseModel): + __root__: List[Chain] + + +class LigandItem(BaseModel): + id: str = Field(..., description='Three-letter code of the ligand', example='IHP') + name: str = Field( + ..., description='Name of the small ligand', example='INOSITOL HEXAKISPHOSPHATE' + ) + formula: str = Field( + ..., + description='Chemical composition formula of the ligand', + example='C6 H18 O24 P6', + ) + inchikey: str = Field( + ..., description='InChIKey of the ligand', example='IMQLKJBTEOYOSI-GPIVLXJGSA-N' + ) + + +class Type(Enum): + HELIX = 'HELIX' + SHEET = 'SHEET' + COIL = 'COIL' + + +class RegionItem(BaseModel): + start: int = Field( + ..., description='The first position of the annotation', example=23 + ) + end: int = Field(..., description='The last position of the annotation', example=42) + + +class SecondaryStructureItem(BaseModel): + type: Type = Field( + ..., description='Type of the secondary structure element', example='HELIX' + ) + region: Optional[List[RegionItem]] = None + + +class Type1(Enum): + CARBOHYD = 'CARBOHYD' + DOMAIN = 'DOMAIN' + CA_BIND = 'CA_BIND' + DNA_BIND = 'DNA_BIND' + NP_BIND = 'NP_BIND' + ACT_SITE = 'ACT_SITE' + METAL = 'METAL' + BINDING = 'BINDING' + NON_STD = 'NON_STD' + MOD_RES = 'MOD_RES' + DISULFID = 'DISULFID' + MUTAGEN = 'MUTAGEN' + + +class Region(BaseModel): + start: int = Field( + ..., description='The first position of the annotation', example=23 + ) + end: int = Field(..., description='The last position of the annotation', example=42) + + +class FeatureItem(BaseModel): + type: Type1 = Field(..., description='Type of the annotation', example='ACT_SITE') + description: str = Field( + ..., + description='Description/Label of the annotation', + example='Pfam N1221 (PF07923)', + ) + residues: Optional[List[int]] = Field( + None, description='An array of residue indices' + ) + regions: Optional[List[Region]] = None + + +class Annotations(BaseModel): + accession: str = Field(..., description='A UniProt accession', example='P00734') + id: Optional[str] = Field( + None, description='A UniProt identifier', example='FGFR2_HUMAN' + ) + sequence: str = Field( + ..., description='The sequence of the protein', example='AFFGVAATRKL' + ) + ligand: Optional[List[LigandItem]] = Field( + None, description='Contains ligand annotations' + ) + secondary_structure: Optional[List[SecondaryStructureItem]] = None + feature: Optional[List[FeatureItem]] = None + + +class MappingAccessionType(Enum): + uniprot = 'uniprot' + pfam = 'pfam' + + +class ModelCategory1(Enum): + EXPERIMENTALLY_DETERMINED = 'EXPERIMENTALLY DETERMINED' + TEMPLATE_BASED = 'TEMPLATE-BASED' + AB_INITIO = 'AB-INITIO' + CONFORMATIONAL_ENSEMBLE = 'CONFORMATIONAL ENSEMBLE' + DEEP_LEARNING = 'DEEP-LEARNING' + + +class ModelType1(Enum): + single = 'single' + complex = 'complex' + + +class Metadata(BaseModel): + mappingAccession: str = Field( + ..., + description='Accession/identifier of the entity the model is mapped to', + example='P38398', + ) + mappingAccessionType: MappingAccessionType = Field( + ..., + description='The name of the data provider the model is mapped to', + example='uniprot', + ) + start: int = Field( + ..., + description='The index of the first residue of the model according to the mapping', + example=1, + ) + end: int = Field( + ..., + description='The index of the last residue of the model according to the mapping', + example=103, + ) + modelCategory: ModelCategory1 = Field( + ..., description='Category of the model', example='TEMPLATE-BASED' + ) + modelType: ModelType1 = Field( + ..., description='Monomeric or complex strutures', example='single' + ) + + +class ModificationFlag(Enum): + Y = 'Y' + N = 'N' + + +class WwPdbInfo(BaseModel): + defined_at: Optional[str] = Field( + None, + description='The date the chemical component was defined in wwPDB', + example='1999-07-08 00:00:00', + ) + modified: Optional[str] = Field( + None, + description='The modified date of the chemical componenet in wwPDB', + example='2011-06-04 00:00:00', + ) + modification_flag: Optional[ModificationFlag] = Field( + None, + description='Y/N denoting the modification status of the chemical component in wwPDB', + example='N', + ) + polymer_type: Optional[str] = Field( + None, + description='This flag denotes if the chemical component is a polymer or non-polymer in wwPDB', + example='NON-POLYMER', + ) + standard_parent: Optional[str] = Field( + None, + description='The standard chemical component defined in wwPDB', + example='null', + ) + + +class FunctionalAnnotations(Enum): + Drug_like = 'Drug-like' + Co_factor_like = 'Co-factor-like' + Reactant_like = 'Reactant-like' + + +class CrossLink(BaseModel): + resource: Optional[str] = Field( + None, description='The external resource name', example='BindingDb' + ) + resource_id: Optional[str] = Field( + None, description='The external resource id', example='50366480' + ) + + +class Synonyms(BaseModel): + origin: Optional[str] = Field( + None, + description='The resource which provides synonym for the chemical component', + example='DrugBank', + ) + value: Optional[str] = Field( + None, description='The synonym provided by the resource', example='Striadyne' + ) + + +class PhysChemProperties(BaseModel): + crippen_mr: Optional[float] = Field( + None, + description='Wildman-Crippen molar refractivity is a common descriptor accounting for molecular size and polarizability', + example=92.446, + ) + num_atom_stereo_centers: Optional[int] = Field( + None, + description='Number of atoms with four attachments different from each other', + example=6, + ) + crippen_clog_p: Optional[float] = Field( + None, + description='Octanol/Water partition coeficient predicted using Wildman-Crippen method', + example=-2.438, + ) + num_rings: Optional[int] = Field(None, description='Number of rings', example=3) + num_rotatable_bonds: Optional[int] = Field( + None, + description='Number of single bonds, not part of a ring bound to a nonterminal heavy atom', + example=15, + ) + num_heteroatoms: Optional[int] = Field( + None, description='Number of non oxygen and non carbon atoms' + ) + fraction_csp3: Optional[float] = Field( + None, description='Fraction of C atoms that are SP3 hybridized', example=5.9 + ) + num_aromatic_rings: Optional[int] = Field( + None, description='Number of aromatic rings for the molecule', example=2 + ) + exactmw: Optional[float] = Field( + None, description='Total mass of the molecule', example=506.996 + ) + num_spiro_atoms: Optional[int] = Field( + None, + description='Atoms shared between rings that share exactly one atom', + example=0, + ) + num_heavy_atoms: Optional[int] = Field( + None, description='Number of non hydrogen atoms', example=31 + ) + num_aliphatic_rings: Optional[int] = Field( + None, description='Niumber of aliphatic rings', example=1 + ) + num_hbd: Optional[int] = Field( + None, description='Number of hydrogen bond donors', example=7 + ) + num_saturated_heterocycles: Optional[int] = Field( + None, description='Number of saturated heterocycles', example=1 + ) + tpsa: Optional[float] = Field( + None, description='Topological surface area', example=279.13 + ) + num_bridgehead_atoms: Optional[int] = Field( + None, + description='Number of atoms shared between rings that share at least two bonds', + example=0, + ) + num_aromatic_heterocycles: Optional[int] = Field( + None, + description='Number or aromatic rings with at least two different elements', + example=2, + ) + labute_asa: Optional[float] = Field( + None, + description="Accessible surface area accorging to the Labute' definition", + example=194.334, + ) + num_hba: Optional[int] = Field( + None, description='Number of hydrogen bond acceptors', example=18 + ) + num_amide_bonds: Optional[int] = Field( + None, description='Number of amide bonds', example=0 + ) + num_saturated_rings: Optional[int] = Field( + None, description='Number of saturated rings', example=1 + ) + lipinski_hba: Optional[float] = Field( + None, + description='Number of hydrogen bond acceptors according to Lipinsky definition', + example=18, + ) + num_unspec_atom_stereo_centers: Optional[int] = Field( + None, description='Number of unsuspected stereocenters' + ) + lipinski_hbd: Optional[float] = Field( + None, + description='Number of hydrogen bond donors according to Lipinsky definition', + example=8, + ) + num_heterocycles: Optional[int] = Field( + None, + description='Number or rings with at least two different elements', + example=3, + ) + num_aliphatic_heterocycles: Optional[int] = Field( + None, description='Number of aliphatic heterocycles', example=1 + ) + + +class CompoundSummary(BaseModel): + name: str = Field( + ..., + description='The name of the chemical component', + example="ADENOSINE-5'-TRIPHOSPHATE", + ) + released: Optional[bool] = Field( + None, + description='A flag denoting if the hetcode is released or not', + example=True, + ) + superseded_by: Optional[str] = Field( + None, + description='A hetcode which superseeds the hetcode in query', + example='null', + ) + formula: str = Field( + ..., + description='The chemical formula of the component', + example='C10 H16 N5 O13 P3', + ) + inchi: str = Field( + ..., + description='The full INCHI of the component', + example='InChI=1S/C10H16N5O13P3/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(26-10)1-25-30(21,22)28-31(23,24)27-29(18,19)20/h2-4,6-7,10,16-17H,1H2,(H,21,22)(H,23,24)(H2,11,12,13)(H2,18,19,20)/t4-,6-,7-,10-/m1/s1', + ) + inchi_key: str = Field( + ..., + description='INCHI Key of the component', + example='ZKHQWZAMYRWXGA-KQYNXXCUSA-N', + ) + smiles: Optional[List[str]] = Field( + None, + description='The SMILES representation of the component (could be multiple)', + example=['c1nc(c2c(n1)n(cn2)C3C(C(C(O3)COP(=O)(O)OP(=O)(O)OP(=O)(O)O)O)O)N'], + ) + ww_pdb_info: Optional[WwPdbInfo] = Field( + None, + description='An info object which provides details of the chemical component from wwPDB', + ) + functional_annotations: Optional[Union[List[str], FunctionalAnnotations]] = Field( + None, description='A list of functional annotations for the chemical component' + ) + cross_link: Optional[CrossLink] = Field( + None, + alias='cross-link', + description='Cross references for this chemical component from other resources', + ) + synonyms: Optional[Synonyms] = Field( + None, + description='A list of synomyms for the chemical component from other sources', + ) + phys_chem_properties: Optional[PhysChemProperties] = Field( + None, description='An object of physical chemical properties' + ) + + +class Stereoisomer(BaseModel): + chem_comp_id: Optional[str] = Field( + None, + description='Chemical component identifier, the so-called 3-letter code, but it need not be 3-letter long!', + example='GL0', + ) + name: Optional[str] = Field( + None, description='A name for the hetcode', example='beta-D-gulopyranose' + ) + + +class SameScaffoldItem(BaseModel): + chem_comp_id: Optional[str] = Field( + None, + description='Chemical component identifier, the so-called 3-letter code, but it need not be 3-letter long!', + example='9YW', + ) + name: Optional[str] = Field( + None, + description='A name for the hetcode', + example='(alpha-D-glucopyranosyloxy)acetic acid', + ) + substructure_match: Optional[List[str]] = Field( + None, + description='A list of atom names in the hetcode that match with the hetcode in the query', + example=['C1', 'C4', 'O5', 'C2', 'C3', 'C5'], + ) + similarity_score: Optional[float] = Field( + None, + description='Similarity score in the range of 0 to 1 defined by the PARITY method.', + example=0.75, + ) + + +class CompoundSimilarity(BaseModel): + stereoisomers: Optional[List[Stereoisomer]] = Field( + None, description='A list of stereoisomer hetcode objects' + ) + same_scaffold: Optional[List[SameScaffoldItem]] = Field( + None, description='A list of hetcode objects which is part of the same scaffold' + ) + + +class LigandRoleEnum(Enum): + reactant = 'reactant' + cofactor = 'cofactor' + drug_like = 'drug-like' + unknown = 'unknown' + + +class EnzymeClass(Enum): + oxidoreductases = 'oxidoreductases' + transferases = 'transferases' + hydrolases = 'hydrolases' + lyases = 'lyases' + isomerases = 'isomerases' + ligases = 'ligases' + and_translocases = 'and translocases' + + +class BoundDatum(BaseModel): + uniprot_accession: Optional[str] = Field( + None, description='UniProt accession number (AC)', example='D6D1V7' + ) + protein_name: Optional[str] = Field( + None, + description='Name of the protein for the given uniprot accession', + example='Glycosyl hydrolase family 71', + ) + ligand_role: Optional[List[LigandRoleEnum]] = Field( + None, description='Functional role of ligand' + ) + pdb_entries: Optional[List[str]] = Field( + None, + description='list of PDB structures corresponding to the protein (uniprot accession) to which a ligand bound', + example=[ + '4ad4', + '6far', + '5lyr', + '6hmh', + '4ad4', + '4v28', + '4ad5', + '6fam', + '6fwp', + ], + ) + representative_structure: Optional[str] = Field( + None, + description='Best representative structure. This is the structure with best (lowest) resolution with all/maximum number of ligand atoms resolved.', + example='6fwp', + ) + ec_number: Optional[List[str]] = Field( + None, + description='The Enzyme Commission Number based on the chemical reactions an enzyme catalyze.', + example=['3.2.1.130', '3.2.1.198'], + ) + enzyme_class: Optional[EnzymeClass] = Field( + None, + description='The Enzyme class name based on the chemical reactions an enzyme catalyze.', + example='hydrolases', + ) + + +class Structure(BaseModel): + total_pdb_structures: int = Field( + ..., + description='Total number of PDB structures to which given ligand is bound', + example=2400, + ) + total_unp_proteins: int = Field( + ..., + description='Total number of unique proteins (UniProt accession) to which a given ligand is bound', + example=1050, + ) + bound_data: Optional[List[BoundDatum]] = None + + +class AtomDetail(BaseModel): + ligand_atom: Optional[str] = None + protein_residue_atom: Optional[str] = None + interaction_type: Optional[str] = None + distance: Optional[float] = None + + +class InteractingLigandDetail(BaseModel): + entity_id: Optional[int] = None + chain_id: Optional[str] = None + residue_number: Optional[int] = None + author_residue_number: Optional[int] = None + author_insertion_code: Optional[str] = None + chem_comp_id: Optional[str] = None + atom_details: Optional[List[AtomDetail]] = None + + +class InteractingProteinResidue(BaseModel): + entity_id: Optional[int] = None + chain_id: Optional[str] = None + residue_number: Optional[int] = None + author_residue_number: Optional[int] = None + author_insertion_code: Optional[str] = None + chem_comp_id: Optional[str] = None + uniprot_accession: Optional[str] = None + interacting_ligand_details: Optional[List[InteractingLigandDetail]] = None + + +class InteractionDatum(BaseModel): + pdb_id: Optional[str] = None + interacting_protein_residues: Optional[List[InteractingProteinResidue]] = None + + +class Interaction(BaseModel): + total_interactions: Optional[int] = None + interaction_data: Optional[List[InteractionDatum]] = None + + +class TargetResidueName(Enum): + ALA = 'ALA' + ARG = 'ARG' + ASN = 'ASN' + ASP = 'ASP' + CYS = 'CYS' + GLU = 'GLU' + GLN = 'GLN' + GLY = 'GLY' + HIS = 'HIS' + ILE = 'ILE' + LEU = 'LEU' + LYS = 'LYS' + MET = 'MET' + PHE = 'PHE' + PRO = 'PRO' + SER = 'SER' + THR = 'THR' + TRP = 'TRP' + TYR = 'TYR' + VAL = 'VAL' + DA = 'DA' + DT = 'DT' + DG = 'DG' + DC = 'DC' + A = 'A' + G = 'G' + C = 'C' + U = 'U' + DU = 'DU' + + +class InteractionTypeEnum(Enum): + polar = 'polar' + hbond = 'hbond' + vdw_clash = 'vdw_clash' + vdw = 'vdw' + weak_hbond = 'weak_hbond' + carbonpi = 'carbonpi' + cationpi = 'cationpi' + weak_polar = 'weak_polar' + aromatic = 'aromatic' + + +class Interactions(BaseModel): + target_residue_atom_name: Optional[str] = Field( + None, description='Atom name of the interacting target residue', example='O1G' + ) + ligand_uniq_id: Optional[str] = Field( + None, + description='Ligand unique ID which is pdbid, entitiy_id, ?, and ? seperated by underscore.', + example='7esb_2_B_1', + ) + distance: Optional[float] = Field( + None, + description='Distance between the two interacting atoms of ligand and target', + example=3.49, + ) + interaction_type: Optional[List[InteractionTypeEnum]] = Field( + None, + description='Interaction type as given by Arpeggio.', + example=['polar', 'hbond', 'vdw_clash'], + ) + + +class Detail(BaseModel): + target_uniqid: Optional[str] = Field( + None, + description='Target unique ID which is pdbid, entitiy_id and residue_number seperated by underscore.', + example='7esb_1_242', + ) + interactions: Optional[Interactions] = Field( + None, description='Details of interactions.' + ) + + +class InteractingTargetResidueItem(BaseModel): + target_residue_name: Optional[TargetResidueName] = Field( + None, + description='Name of target residue which is interacting with the given ligand atom', + example='ARG', + ) + interaction_strength: Optional[float] = Field(None, example=2.1) + details: Optional[List[Detail]] = None + + +class InteractionOverview(BaseModel): + ligand_atom: Optional[str] = Field( + None, + description='Ligand atom name which is interacting with the target. This target is a polymer and can be either nucleic acid or protein.', + example='C1', + ) + interacting_target_residue: Optional[List[InteractingTargetResidueItem]] = Field( + None, + description='Details of all the target residue which are interacting with the given ligand atom', + ) + + +class InteractionType(Enum): + polar = 'polar' + hbond = 'hbond' + vdw_clash = 'vdw_clash' + vdw = 'vdw' + weak_hbond = 'weak_hbond' + carbonpi = 'carbonpi' + cationpi = 'cationpi' + weak_polar = 'weak_polar' + aromatic = 'aromatic' + + +class Interaction1(BaseModel): + interaction_type: Optional[InteractionType] = Field( + None, description='Interaction type as given by Arpeggio.', example='polar' + ) + interaction_count: Optional[int] = Field(None, example=1500) + interaction_strength: Optional[float] = Field(None, example=1.5) + target_uniqid: Optional[List[str]] = Field( + None, + description='Target unique ID which is pdbid, entitiy_id, residue_number and residue_atom_name seperated by underscore.', + example=['7esb_1_242_OG1', '7esb_1_300_C1', '1cbs_2_240_N1'], + ) + + +class InteractingTargetResidueItem1(BaseModel): + target_residue_name: Optional[TargetResidueName] = Field( + None, + description='Name of target residue which is interacting with the given ligand atom', + example='ARG', + ) + interaction_strength: Optional[float] = Field(None, example=2.1) + interaction_count: Optional[float] = Field(None, example=2500) + interactions: Optional[List[Interaction1]] = None + + +class InteractionStrength(BaseModel): + ligand_atom: Optional[str] = Field( + None, + description='Ligand atom name which is interacting with the target. This target is a polymer and can be either nucleic acid or protein.', + example='C1', + ) + interacting_target_residue: Optional[List[InteractingTargetResidueItem1]] = Field( + None, + description='Details of all the target residue which are interacting with the given ligand atom', + ) + + +class BoundDatum1(BaseModel): + uniprot_accession: Optional[str] = Field( + None, description='UniProt accession number (AC)', example='D6D1V7' + ) + protein_name: Optional[str] = Field( + None, + description='Name of the protein for the given uniprot accession', + example='Glycosyl hydrolase family 71', + ) + ligand_role: Optional[List[LigandRoleEnum]] = Field( + None, description='Functional role of ligand' + ) + pdb_entries: Optional[List[str]] = Field( + None, + description='list of PDB structures corresponding to the protein (uniprot accession) to which a ligand bound', + example=[ + '4ad4', + '6far', + '5lyr', + '6hmh', + '4ad4', + '4v28', + '4ad5', + '6fam', + '6fwp', + ], + ) + representative_structure: Optional[str] = Field( + None, + description='Best representative structure. This is the structure with best (lowest) resolution with all/maximum number of ligand atoms resolved.', + example='6fwp', + ) + ec_number: Optional[List[str]] = Field( + None, + description='The Enzyme Commission Number based on the chemical reactions an enzyme catalyze.', + example=['3.2.1.130', '3.2.1.198'], + ) + + +class SummaryCount(BaseModel): + total_pdb_structures: int = Field( + ..., + description='Total number of PDB structures to which given ligand is bound', + example=2400, + ) + total_unp_proteins: int = Field( + ..., + description='Total number of unique proteins (UniProt accession) to which a given ligand is bound', + example=1050, + ) + bound_data: Optional[List[BoundDatum1]] = None + + +class Bonds(BaseModel): + bond_distances: Optional[List[float]] = None + atom_site_1_chains: Optional[List[str]] = None + atom_site_1_residues: Optional[List[str]] = None + atom_site_1_label_asym_ids: Optional[List[str]] = None + atom_site_1_orig_label_asym_ids: Optional[List[str]] = None + atom_site_1_upn_accs: Optional[List[str]] = None + atom_site_1_upn_nums: Optional[List[str]] = None + atom_site_1_seq_nums: Optional[List[int]] = None + atom_site_1_label_seq_ids: Optional[List[int]] = None + atom_site_1_label_atom_ids: Optional[List[str]] = None + atom_site_1_inscodes: Optional[List[str]] = None + atom_site_2_chains: Optional[List[str]] = None + atom_site_2_residues: Optional[List[str]] = None + atom_site_2_label_asym_ids: Optional[List[str]] = None + atom_site_2_orig_label_asym_ids: Optional[List[str]] = None + atom_site_2_upn_accs: Optional[List[str]] = None + atom_site_2_upn_nums: Optional[List[str]] = None + atom_site_2_seq_nums: Optional[List[int]] = None + atom_site_2_label_seq_ids: Optional[List[int]] = None + atom_site_2_label_atom_ids: Optional[List[str]] = None + atom_site_2_inscodes: Optional[List[str]] = None + + +class MoleculeClass(Enum): + Protein = 'Protein' + Ligand = 'Ligand' + RNA = 'RNA' + DNA = 'DNA' + + +class Molecule(BaseModel): + molecule_id: Optional[str] = Field( + None, description='Identifier of the molecule on the interface', example='1' + ) + molecule_class: Optional[MoleculeClass] = Field( + None, description='Molecule type/class', example='Protein' + ) + chain_id: Optional[str] = Field( + None, description='PDB chain identifier of the molecule', example='A' + ) + residue_label_comp_ids: Optional[List[str]] = None + residue_seq_ids: Optional[List[str]] = None + residue_label_seq_ids: Optional[List[str]] = None + residue_ins_codes: Optional[List[str]] = None + residue_bonds: Optional[List[str]] = None + solvation_energies: Optional[List[float]] = None + accessible_surface_areas: Optional[List[float]] = None + buried_surface_areas: Optional[List[float]] = None + + +class Interface(BaseModel): + interface_id: Optional[int] = Field( + None, description='Interface identifier', example=1 + ) + interface_area: Optional[float] = Field( + None, description='[Area of interface (A^2)]', example=1427.5 + ) + solvation_energy: Optional[float] = Field( + None, description='[Solvation energy (kcal/mol)]', example=-18.22 + ) + stabilization_energy: Optional[float] = Field( + None, description='[Stabilisation energy (Kcal/mol)]', example=-28.59 + ) + p_value: Optional[float] = Field( + None, + description='[Probability that solvation energy gain for interface atom may be greater than binding energy]', + example=0.095, + ) + number_interface_residues: Optional[int] = Field( + None, description='Number of interface residues', example=1 + ) + number_hydrogen_bonds: Optional[int] = Field( + None, description='Number of interface hydrogen bonds', example=1 + ) + number_salt_bridges: Optional[int] = Field( + None, description='Number of interface salt bridges', example=1 + ) + number_covalent_bonds: Optional[int] = Field( + None, description='Number of interface covalent bonds', example=1 + ) + number_disulfide_bonds: Optional[int] = Field( + None, description='Number of interface disulfide bonds', example=1 + ) + number_other_bonds: Optional[int] = Field( + None, + description='Number of other contacts within a distance of 4 A and not classified as any of the other bonds ', + example=1, + ) + hydrogen_bonds: Optional[Bonds] = None + salt_bridges: Optional[Bonds] = None + disulfide_bonds: Optional[Bonds] = None + covalent_bonds: Optional[Bonds] = None + other_bonds: Optional[Bonds] = None + molecules: Optional[List[Molecule]] = None + + +class Assembly2(BaseModel): + id: Optional[str] = Field(None, description='Assembly identifier', example='1') + size: Optional[int] = Field(None, description='Size of the assembly', example=6) + interface_count: Optional[int] = Field( + None, description='[Number of interfaces]', example=1 + ) + score: Optional[str] = Field( + None, description='Macromolecular size of the assembly', example='2' + ) + macromolecular_size: Optional[int] = Field( + None, description='Macromolecular size of the assembly', example=2 + ) + dissociation_energy: Optional[float] = Field( + None, + description='[Maximal free energy of dissociation (kcal/mol)]', + example=15.61, + ) + accessible_surface_area: Optional[float] = Field( + None, description='[Accessible surface area (A^2)]', example=19395.3 + ) + buried_surface_area: Optional[float] = Field( + None, description='[Buried surface area (A^2)]', example=31514.17 + ) + entropy: Optional[float] = Field( + None, description='[Entropy change at dissociation ]', example=12.98 + ) + dissociation_area: Optional[float] = Field( + None, description='[Dissociation Interface Area (A^2)]', example=1427.5 + ) + solvation_energy_gain: Optional[float] = Field( + None, description='[Solvation energy gain (Kcal/mol)]', example=-35.28 + ) + number_of_uc: Optional[int] = Field( + None, description='number of assemblies in unit cells', example=1 + ) + number_of_dissociated_elements: Optional[int] = Field( + None, description='Number of dissociating parts', example=2 + ) + symmetry_number: Optional[int] = Field( + None, description='Symmetry number', example=2 + ) + formula: Optional[str] = Field( + None, description='[Formula]', example='A(2)a(2)b(2)' + ) + composition: Optional[str] = Field( + None, description='[Composition]', example='AAA[NA](2)[GOL](2)' + ) + R350: Optional[int] = Field(None, description='Remark R350', example=1) + + +class PdbId1(BaseModel): + assembly_id: Optional[str] = Field( + None, description='Assembly identifier', example='1' + ) + pisa_version: Optional[str] = Field( + None, description='The semantic version number of PISA', example='2.0.0' + ) + assembly: Optional[Assembly2] = Field(None, title='Assembly') + + +class Assembly1(BaseModel): + pdb_id: Optional[PdbId1] = Field(None, title='PDB_entry') + + +class IndexType(Enum): + PDB = 'PDB' + UNIPROT = 'UNIPROT' + + +class ResidueModel(BaseModel): + startIndex: Optional[int] = Field( + None, description='(start) residue sequence position', example=42 + ) + endIndex: Optional[int] = Field( + None, description='(end) residue sequence position', example=42 + ) + indexType: Optional[IndexType] = Field( + None, description='Type of index (PDB or UniProt).' + ) + startCode: Optional[str] = Field( + None, + description='Amino acid three-letter code for the start residue.', + example='PRO', + ) + endCode: Optional[str] = Field( + None, + description='Amino acid three-letter code for the end residue.', + example='PRO', + ) + + +class AdditionalData(BaseModel): + pubmedId: Optional[int] = Field( + None, description='A valid PubMed identifier', example=1234567 + ) + pmcId: Optional[str] = Field( + None, description='A valid PubMedCentral identifier', example='PMC1234567' + ) + pdbResidue: Optional[int] = Field(None, description='A PDB residue', example=42) + pdbChain: Optional[str] = Field(None, description='A PDB chain', example='A') + uniprotAccession: Optional[str] = Field( + None, description='A UniProt accession', example='P12345' + ) + uniprotResidue: Optional[int] = Field( + None, description='A UniProt residue', example=42 + ) + + +class Detailed(BaseModel): + summary: SummaryItems + chains: Chains + + +class Overview(BaseModel): + summary: SummaryItems + + +class Assembly(BaseModel): + mmsize: Optional[int] = Field(None, description='Size of the assembly', example=2) + dissociation_energy: Optional[float] = Field( + None, + description='[Maximal free energy of dissociation (kcal/mol)]', + example=15.61, + ) + accessible_surface_area: Optional[float] = Field( + None, description='[Accessible surface area (A^2)]', example=19395.3 + ) + buried_surface_area: Optional[float] = Field( + None, description='[Buried surface area (A^2)]', example=31514.17 + ) + entropy: Optional[float] = Field( + None, description='[Entropy change at dissociation ]', example=12.98 + ) + dissociation_area: Optional[float] = Field( + None, description='[Dissociation Interface Area (A^2)]', example=1427.5 + ) + solvation_energy_gain: Optional[float] = Field( + None, description='[Solvation energy gain (Kcal/mol)]', example=-35.28 + ) + formula: Optional[str] = Field( + None, description='[Formula]', example='A(2)a(2)b(2)' + ) + composition: Optional[str] = Field( + None, description='[Composition]', example='AAA[NA](2)[GOL](2)' + ) + interface_count: Optional[int] = Field( + None, description='[Number of interfaces]', example=1 + ) + interfaces: Optional[List[Interface]] = None + + +class PdbId(BaseModel): + assembly_id: Optional[str] = Field( + None, description='Assembly identifier', example='1' + ) + pisa_version: Optional[str] = Field( + None, description='The semantic version number of PISA', example='2.0.0' + ) + assembly: Optional[Assembly] = Field(None, title='Assembly') + + +class PISAInterfaces(BaseModel): + pdb_id: Optional[PdbId] = Field(None, title='PDB_id') + + +class Data(BaseModel): + name: Optional[str] = Field( + None, description='Name annotation provider', example='IUCr' + ) + accession: Optional[str] = Field( + None, + description='A unique identifier for the resource, annotation, etc.', + example='IUCr', + ) + residues: Optional[List[ResidueModel]] = Field( + None, description='List of residue objects.' + ) + additionalData: Optional[AdditionalData] = None + dataType: Optional[str] = Field( + None, description='Type of data provided in the section.', example='ANNOTATIONS' + ) + + +class UniprotSummary(BaseModel): + uniprot_entry: Optional[UniprotEntry] = None + structures: Optional[List[Overview]] = None + + +class UniprotDetails(BaseModel): + uniprot_entry: Optional[UniprotEntry] = None + structures: Optional[List[Detailed]] = None + + +class PdbSummary(BaseModel): + uniprot_entry: Optional[PdbEntry] = None + structures: Optional[List[Overview]] = None + + +class LlmAnnotationsSummary(BaseModel): + datatype: Optional[str] = Field( + None, description='The type of data represented', example='ANNOTATIONS' + ) + data: Optional[List[Data]] = Field(None, description='List of data annotations.') + + +class LlmAnnotationsChainResidue(BaseModel): + pdb_id: Optional[str] = Field(None, description='PDB identifier', example='1a2b') + chain_id: Optional[str] = Field(None, description='Chain identifier', example='A') + residue_id: Optional[str] = Field( + None, description='Residue identifier', example='42' + ) + datatype: Optional[str] = Field( + None, description='The type of data represented', example='ANNOTATIONS' + ) + data: Optional[List[Data]] = Field(None, description='List of data annotations.') diff --git a/oas3.yaml b/oas3.yaml index bda1da6..b4bb3c2 100644 --- a/oas3.yaml +++ b/oas3.yaml @@ -138,7 +138,6 @@ paths: $ref: '#/components/schemas/interaction_strength' '501': description: If endpoint is not implemented - '/summary_count/{hetcode}.json': get: parameters: @@ -158,7 +157,6 @@ paths: $ref: '#/components/schemas/summary_count' '501': description: If endpoint is not implemented - '/pisa/assembly/{pdbid}/{assemblyid}': get: summary: Assembly data per PDB assembly id @@ -256,7 +254,6 @@ paths: $ref: '#/components/schemas/Interface' '400': description: bad input parameter - '/llm_annotations/summary/{pdb_id}.json': get: parameters: @@ -265,7 +262,6 @@ paths: in: path required: true example: '1cbs' - in: path schema: type: string responses: @@ -275,7 +271,46 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/llm_annotations' + $ref: '#/components/schemas/llm_annotations_summary' + '501': + description: If endpoint is not implemented + '400': + description: bad input parameter + '/llm_annotations/summary/{pdb_id}/{chain_id}/{residue_id}.json': + get: + parameters: + - name: pdb_id + description: A PDB identifier + in: path + required: true + example: '1cbs' + in: path + schema: + type: string + - name: chain_id + description: A chain identifier for structure + in: path + required: true + example: 'A' + in: path + schema: + type: string + - name: residue_id + description: A chain identifier for structure + in: path + required: true + example: 'ARG11' + in: path + schema: + type: string + responses: + '200': + description: >- + Response is a collection of LLM annotations for specific residue in a selected chain for a given PDB entry. + content: + application/json: + schema: + $ref: '#/components/schemas/llm_annotations_chain_residue' '501': description: If endpoint is not implemented '400': @@ -1854,89 +1889,120 @@ components: example: 1 description: "Remark R350" - LLM_annotations: - title: llm_annotations + llm_annotations_summary: + title: llm_annotations_summary type: object - description: "LLM-derived annotations" + description: all LLM-derived annotations for a given PDB entry properties: - pdb_id: - title: PDB_id - type: object - properties: - datatype: - type: string - example: "ANNOTATIONS" - description: "The type of data represented" - data: - title: data - type: object - description: "The actual annotation data" - properties: - name: - type: string - example: "IUCr" - description: "Name of the resource, annotation, etc." - accession: - type: string - example: "IUCr" - description: "A unique identifier for the resource, annotation, etc." - dataType: - type: string - example: "IUCr" - description: "The data type provided" - residues: - title: residues - description: "A list of residue objects." - type: object - items: - startIndex: - type: integer - example: 42 - description: "(start) residue sequence position" - endIndex: - type: integer - example: 42 - description: "(end) residue sequence position" - indexType: - type: string - example: "PDB" - description: "source used for sequence indexing/counting" - startCode: - type: string - example: "ALA" - description: "(start) residue amino acid name" - endCode: - type: string - example: "ALA" - description: "(end) residue amino acid name" - additionalData: - type: array - items: - type: object - description: "List of annottaions for residue" - properties: - pubmedId: - type: integer - description: "A valid PubMed identifier" - example: 1234567 - pmcId: - type: string - description: "A valid PubMedCentral identifier" - example: "PMC1234567" - pdbResidue: - type: integer - description: "A PDB residue" - example: 42 - pdbChain: - type: string - description: "A PDB chain" - example: "A" - uniprotAccession: - type: string - description: "A UniProt accession" - example: "P12345" - uniprotResidue: - type: integer - description: "A UniProt residue" - example: 42 + datatype: + type: string + example: "ANNOTATIONS" + description: "The type of data represented" + data: + type: array + description: List of data annotations. + items: + $ref: "#/components/schemas/data" + + llm_annotations_chain_residue: + title: llm_annotations_chain_residue + type: object + description: all LLM-derived annotations for a specific residue in a selected chain for a given PDB entry + properties: + pdb_id: + type: string + example: "1a2b" + description: "PDB identifier" + chain_id: + type: string + example: "A" + description: "Chain identifier" + residue_id: + type: string + example: "42" + description: "Residue identifier" + datatype: + type: string + example: "ANNOTATIONS" + description: "The type of data represented" + data: + type: array + description: List of data annotations. + items: + $ref: "#/components/schemas/data" + data: + type: object + properties: + name: + type: string + example: "IUCr" + description: "Name annotation provider" + accession: + type: string + example: "IUCr" + description: "A unique identifier for the resource, annotation, etc." + residues: + type: array + description: List of residue objects. + items: + $ref: "#/components/schemas/residue" + additionalData: + $ref: "#/components/schemas/additional_data" + dataType: + type: string + description: Type of data provided in the section. + example: "ANNOTATIONS" + + residue: + type: object + properties: + startIndex: + type: integer + example: 42 + description: "(start) residue sequence position" + endIndex: + type: integer + example: 42 + description: "(end) residue sequence position" + indexType: + type: string + nullable: true + description: Type of index (PDB or UniProt). + enum: ["PDB", "UNIPROT"] + startCode: + type: string + description: Amino acid three-letter code for the start residue. + example: "PRO" + endCode: + type: string + description: Amino acid three-letter code for the end residue. + example: "PRO" + + additional_data: + type: object + properties: + pubmedId: + type: integer + description: "A valid PubMed identifier" + example: 1234567 + pmcId: + type: string + description: "A valid PubMedCentral identifier" + example: "PMC1234567" + pdbResidue: + type: integer + description: "A PDB residue" + example: 42 + pdbChain: + type: string + description: "A PDB chain" + example: "A" + uniprotAccession: + type: string + description: "A UniProt accession" + example: "P12345" + uniprotResidue: + type: integer + description: "A UniProt residue" + example: 42 From da2a26f40013f3e5a5541760fd4776a595470eeb Mon Sep 17 00:00:00 2001 From: Melanie Vollmar Date: Thu, 8 May 2025 18:34:43 +0100 Subject: [PATCH 4/5] removing model.py from repo as should not have been included in the first place --- model.py | 1382 ------------------------------------------------------ 1 file changed, 1382 deletions(-) delete mode 100644 model.py diff --git a/model.py b/model.py deleted file mode 100644 index 25b4fb4..0000000 --- a/model.py +++ /dev/null @@ -1,1382 +0,0 @@ -# generated by datamodel-codegen: -# filename: oas3.yaml -# timestamp: 2025-04-22T13:16:57+00:00 - -from __future__ import annotations - -from enum import Enum -from typing import List, Optional, Union - -from pydantic import BaseModel, Field - - -class UniprotEntry(BaseModel): - ac: str = Field(..., description='UniProt accession', example='P00520') - id: Optional[str] = Field( - None, description='UniProt identifier', example='ABL1_MOUSE' - ) - uniprot_checksum: Optional[str] = Field( - None, - description='CRC64 checksum of the UniProt sequence', - example='5F9BA1D4C7DE6925', - ) - sequence_length: Optional[int] = Field( - None, description='Length of the UniProt sequence', example=76 - ) - segment_start: Optional[int] = Field( - None, - description='1-indexed first residue of the UniProt sequence segment', - example=1, - ) - segment_end: Optional[int] = Field( - None, - description='1-indexed last residue of the UniProt sequence segment', - example=86, - ) - - -class PdbEntry(BaseModel): - entry_id: str = Field(..., description='PDB entry identifier', example='3bow') - chain_id: str = Field(..., description='PDB chain identifier', example='A') - mapped_uniprot: Optional[str] = Field( - None, description='UniProt accession mapped to the PDB entry', example='P12345' - ) - uniprot_start: int = Field( - ..., description='1-indexed first residue in the mapped UniProt', example=1 - ) - uniprot_end: int = Field( - ..., description='1-indexed last residue in the mapped UniProt', example=100 - ) - - -class ModelCategory(Enum): - EXPERIMENTALLY_DETERMINED = 'EXPERIMENTALLY DETERMINED' - TEMPLATE_BASED = 'TEMPLATE-BASED' - AB_INITIO = 'AB-INITIO' - CONFORMATIONAL_ENSEMBLE = 'CONFORMATIONAL ENSEMBLE' - - -class ModelFormat(Enum): - PDB = 'PDB' - MMCIF = 'MMCIF' - BCIF = 'BCIF' - - -class ModelType(Enum): - ATOMIC = 'ATOMIC' - DUMMY = 'DUMMY' - MIX = 'MIX' - - -class EnsembleSampleFormat(Enum): - PDB = 'PDB' - MMCIF = 'MMCIF' - BCIF = 'BCIF' - - -class ExperimentalMethod(Enum): - ELECTRON_CRYSTALLOGRAPHY = 'ELECTRON CRYSTALLOGRAPHY' - ELECTRON_MICROSCOPY = 'ELECTRON MICROSCOPY' - EPR = 'EPR' - FIBER_DIFFRACTION = 'FIBER DIFFRACTION' - FLUORESCENCE_TRANSFER = 'FLUORESCENCE TRANSFER' - INFRARED_SPECTROSCOPY = 'INFRARED SPECTROSCOPY' - NEUTRON_DIFFRACTION = 'NEUTRON DIFFRACTION' - X_RAY_POWDER_DIFFRACTION = 'X-RAY POWDER DIFFRACTION' - SOLID_STATE_NMR = 'SOLID-STATE NMR' - SOLUTION_NMR = 'SOLUTION NMR' - X_RAY_SOLUTION_SCATTERING = 'X-RAY SOLUTION SCATTERING' - THEORETICAL_MODEL = 'THEORETICAL MODEL' - X_RAY_DIFFRACTION = 'X-RAY DIFFRACTION' - HYBRID = 'HYBRID' - - -class ConfidenceType(Enum): - pLDDT = 'pLDDT' - QMEANDisCo = 'QMEANDisCo' - - -class OligomericState(Enum): - MONOMER = 'MONOMER' - HOMODIMER = 'HOMODIMER' - HETERODIMER = 'HETERODIMER' - HOMO_OLIGOMER = 'HOMO-OLIGOMER' - HETERO_OLIGOMER = 'HETERO-OLIGOMER' - - -class EntityType(Enum): - BRANCHED = 'BRANCHED' - MACROLIDE = 'MACROLIDE' - NON_POLYMER = 'NON-POLYMER' - POLYMER = 'POLYMER' - WATER = 'WATER' - - -class EntityPolyType(Enum): - CYCLIC_PSEUDO_PEPTIDE = 'CYCLIC-PSEUDO-PEPTIDE' - PEPTIDE_NUCLEIC_ACID = 'PEPTIDE NUCLEIC ACID' - POLYDEOXYRIBONUCLEOTIDE = 'POLYDEOXYRIBONUCLEOTIDE' - POLYDEOXYRIBONUCLEOTIDE_POLYRIBONUCLEOTIDE_HYBRID = ( - 'POLYDEOXYRIBONUCLEOTIDE/POLYRIBONUCLEOTIDE HYBRID' - ) - POLYPEPTIDE_D_ = 'POLYPEPTIDE(D)' - POLYPEPTIDE_L_ = 'POLYPEPTIDE(L)' - POLYRIBONUCLEOTIDE = 'POLYRIBONUCLEOTIDE' - OTHER = 'OTHER' - - -class IdentifierCategory(Enum): - UNIPROT = 'UNIPROT' - RFAM = 'RFAM' - CCD = 'CCD' - SMILES = 'SMILES' - INCHI = 'INCHI' - INCHIKEY = 'INCHIKEY' - - -class Entity(BaseModel): - entity_type: EntityType = Field( - ..., - description='The type of the molecular entity; similar to _entity.type in mmCIF', - example='POLYMER', - ) - entity_poly_type: Optional[EntityPolyType] = Field( - None, - description='The type of the molecular entity; similar to _entity_poly.type in mmCIF', - example='PEPTIDE NUCLEIC ACID', - ) - identifier: Optional[str] = Field( - None, description='Identifier of the molecule', example='Q13033' - ) - identifier_category: Optional[IdentifierCategory] = Field( - None, description='Category of the identifier', example='UNIPROT' - ) - description: str = Field( - ..., description='A textual label of the molecule', example='Striatin-3' - ) - chain_ids: List[str] - - -class SummaryItems(BaseModel): - model_identifier: str = Field( - ..., description='Identifier of the model, such as PDB id', example='8kfa' - ) - model_category: ModelCategory = Field( - ..., description='Category of the model', example='TEMPLATE-BASED' - ) - model_url: str = Field( - ..., - description='URL of the model coordinates', - example='https://www.ebi.ac.uk/pdbe/static/entry/1t29_updated.cif', - ) - model_format: ModelFormat = Field( - ..., description='File format of the coordinates', example='MMCIF' - ) - model_type: Optional[ModelType] = Field( - None, - description='Defines if the coordinates are atomic-level or contains dummy atoms (e.g. SAXS models), or a mix of both (e.g. hybrid models)\n', - example='ATOMIC', - ) - model_page_url: Optional[str] = Field( - None, - description='URL of a web page of the data provider that show the model', - example='https://alphafold.ebi.ac.uk/entry/Q5VSL9', - ) - provider: str = Field( - ..., description='Name of the model provider', example='SWISS-MODEL' - ) - number_of_conformers: Optional[float] = Field( - None, - description='The number of conformers in a conformational ensemble', - example=42, - ) - ensemble_sample_url: Optional[str] = Field( - None, - description='URL of a sample of conformations from a conformational ensemble', - example='https://proteinensemble.org/api/ensemble_sample/PED00001e001', - ) - ensemble_sample_format: Optional[EnsembleSampleFormat] = Field( - None, - description='File format of the sample coordinates, e.g. PDB', - example='PDB', - ) - created: str = Field( - ..., - description='Date of release of model generation in the format of YYYY-MM-DD', - example='2021-12-21', - ) - sequence_identity: float = Field( - ..., - description='Sequence identity in the range of [0,1] of the model to the UniProt sequence\n', - example=0.97, - ) - uniprot_start: int = Field( - ..., - description='1-indexed first residue of the model according to UniProt sequence numbering\n', - example=1, - ) - uniprot_end: int = Field( - ..., - description='1-indexed last residue of the model according to UniProt sequence numbering\n', - example=142, - ) - coverage: float = Field( - ..., - description='Fraction in range of [0, 1] of the UniProt sequence covered by the model. This is calculated as (uniprot_end - uniprot_start + 1) / uniprot_sequence_length\n', - example=0.4, - ) - experimental_method: Optional[ExperimentalMethod] = Field( - None, - description='Experimental method used to determine the structure, if applicable', - ) - resolution: Optional[float] = Field( - None, - description='The resolution of the model in Angstrom, if applicable', - example=1.4, - ) - confidence_type: Optional[ConfidenceType] = Field( - None, - description='Type of the confidence measure. This is required for theoretical models.\n', - example='QMEANDisCo', - ) - confidence_version: Optional[str] = Field( - None, - description='Version of confidence measure software used to calculate quality. This is required for theoretical models.\n', - example='v1.0.2', - ) - confidence_avg_local_score: Optional[float] = Field( - None, - description='Average of the confidence measures in the range of [0,1] for QMEANDisCo and [0,100] for pLDDT. Please contact 3D-Beacons developers if other estimates are to be added. This is required for theoretical models.\n', - example=0.95, - ) - oligomeric_state: Optional[OligomericState] = Field( - None, description='Oligomeric state of the model', example='MONOMER' - ) - preferred_assembly_id: Optional[str] = Field( - None, - description='Identifier of the preferred assembly in the model', - example='1A', - ) - entities: List[Entity] = Field( - ..., description='A list of molecular entities in the model' - ) - - -class ExperimentalMethod1(Enum): - ELECTRON_CRYSTALLOGRAPHY = 'ELECTRON CRYSTALLOGRAPHY' - ELECTRON_MICROSCOPY = 'ELECTRON MICROSCOPY' - EPR = 'EPR' - FIBER_DIFFRACTION = 'FIBER DIFFRACTION' - FLUORESCENCE_TRANSFER = 'FLUORESCENCE TRANSFER' - INFRARED_SPECTROSCOPY = 'INFRARED SPECTROSCOPY' - NEUTRON_DIFFRACTION = 'NEUTRON DIFFRACTION' - POWDER_DIFFRACTION = 'POWDER DIFFRACTION' - SOLID_STATE_NMR = 'SOLID-STATE NMR' - SOLUTION_NMR = 'SOLUTION NMR' - SOLUTION_SCATTERING = 'SOLUTION SCATTERING' - THEORETICAL_MODEL = 'THEORETICAL MODEL' - X_RAY_DIFFRACTION = 'X-RAY DIFFRACTION' - HYBRID = 'HYBRID' - - -class Template(BaseModel): - template_id: str = Field( - ..., description='Identifier of the template', example='2aqa' - ) - chain_id: str = Field( - ..., - description='Identifier of the chain of the template; this is label_asym_id in mmCIF', - example='C', - ) - template_sequence_identity: float = Field( - ..., - description='Sequence identity of the template with the UniProt accession, in the range of [0,1]\n', - example=0.97, - ) - last_updated: str = Field( - ..., - description='Date of release of the last update in the format of YYYY-MM-DD\n', - example='2021-08-06', - ) - provider: str = Field(..., description='Provider of the template', example='PDB') - experimental_method: ExperimentalMethod1 = Field( - ..., - description='Experimental method used to determine the template', - example='HYBRID', - ) - resolution: float = Field( - ..., description='Resolution of the template, in Angstrom', example=2.1 - ) - preferred_assembly_id: Optional[str] = Field( - None, - description='Identifier of the preferred assembly of the template', - example='1', - ) - - -class Seqres(BaseModel): - aligned_sequence: str = Field( - ..., description='Sequence of the model', example='AAGTGHLKKKYT...' - ) - from_: int = Field( - ..., alias='from', description='1-indexed first residue', example=32 - ) - to: int = Field(..., description='1-indexed last residue', example=976) - - -class Uniprot(BaseModel): - aligned_sequence: str = Field( - ..., - description='Sequence of the UniProt accession', - example='AAGTGHLKKKYTAAGTGHLKKKYT...', - ) - from_: int = Field( - ..., alias='from', description='1-indexed first residue', example=23 - ) - to: int = Field(..., description='1-indexed last residue', example=868) - - -class Residue(BaseModel): - confidence: Optional[float] = Field( - None, description='Confidence score in the range of [0,1]', example=0.99 - ) - model_residue_label: int = Field(..., description='Model residue index', example=1) - uniprot_residue_number: int = Field( - ..., description='UniProt residue index', example=1 - ) - - -class Segment(BaseModel): - templates: Optional[List[Template]] = Field( - None, description='Information on the template(s) used for the model' - ) - seqres: Seqres = Field(..., description='Information on the sequence of the model') - uniprot: Uniprot - residues: List[Residue] - - -class Chain(BaseModel): - chain_id: str - segments: Optional[List[Segment]] = None - - -class Chains(BaseModel): - __root__: List[Chain] - - -class LigandItem(BaseModel): - id: str = Field(..., description='Three-letter code of the ligand', example='IHP') - name: str = Field( - ..., description='Name of the small ligand', example='INOSITOL HEXAKISPHOSPHATE' - ) - formula: str = Field( - ..., - description='Chemical composition formula of the ligand', - example='C6 H18 O24 P6', - ) - inchikey: str = Field( - ..., description='InChIKey of the ligand', example='IMQLKJBTEOYOSI-GPIVLXJGSA-N' - ) - - -class Type(Enum): - HELIX = 'HELIX' - SHEET = 'SHEET' - COIL = 'COIL' - - -class RegionItem(BaseModel): - start: int = Field( - ..., description='The first position of the annotation', example=23 - ) - end: int = Field(..., description='The last position of the annotation', example=42) - - -class SecondaryStructureItem(BaseModel): - type: Type = Field( - ..., description='Type of the secondary structure element', example='HELIX' - ) - region: Optional[List[RegionItem]] = None - - -class Type1(Enum): - CARBOHYD = 'CARBOHYD' - DOMAIN = 'DOMAIN' - CA_BIND = 'CA_BIND' - DNA_BIND = 'DNA_BIND' - NP_BIND = 'NP_BIND' - ACT_SITE = 'ACT_SITE' - METAL = 'METAL' - BINDING = 'BINDING' - NON_STD = 'NON_STD' - MOD_RES = 'MOD_RES' - DISULFID = 'DISULFID' - MUTAGEN = 'MUTAGEN' - - -class Region(BaseModel): - start: int = Field( - ..., description='The first position of the annotation', example=23 - ) - end: int = Field(..., description='The last position of the annotation', example=42) - - -class FeatureItem(BaseModel): - type: Type1 = Field(..., description='Type of the annotation', example='ACT_SITE') - description: str = Field( - ..., - description='Description/Label of the annotation', - example='Pfam N1221 (PF07923)', - ) - residues: Optional[List[int]] = Field( - None, description='An array of residue indices' - ) - regions: Optional[List[Region]] = None - - -class Annotations(BaseModel): - accession: str = Field(..., description='A UniProt accession', example='P00734') - id: Optional[str] = Field( - None, description='A UniProt identifier', example='FGFR2_HUMAN' - ) - sequence: str = Field( - ..., description='The sequence of the protein', example='AFFGVAATRKL' - ) - ligand: Optional[List[LigandItem]] = Field( - None, description='Contains ligand annotations' - ) - secondary_structure: Optional[List[SecondaryStructureItem]] = None - feature: Optional[List[FeatureItem]] = None - - -class MappingAccessionType(Enum): - uniprot = 'uniprot' - pfam = 'pfam' - - -class ModelCategory1(Enum): - EXPERIMENTALLY_DETERMINED = 'EXPERIMENTALLY DETERMINED' - TEMPLATE_BASED = 'TEMPLATE-BASED' - AB_INITIO = 'AB-INITIO' - CONFORMATIONAL_ENSEMBLE = 'CONFORMATIONAL ENSEMBLE' - DEEP_LEARNING = 'DEEP-LEARNING' - - -class ModelType1(Enum): - single = 'single' - complex = 'complex' - - -class Metadata(BaseModel): - mappingAccession: str = Field( - ..., - description='Accession/identifier of the entity the model is mapped to', - example='P38398', - ) - mappingAccessionType: MappingAccessionType = Field( - ..., - description='The name of the data provider the model is mapped to', - example='uniprot', - ) - start: int = Field( - ..., - description='The index of the first residue of the model according to the mapping', - example=1, - ) - end: int = Field( - ..., - description='The index of the last residue of the model according to the mapping', - example=103, - ) - modelCategory: ModelCategory1 = Field( - ..., description='Category of the model', example='TEMPLATE-BASED' - ) - modelType: ModelType1 = Field( - ..., description='Monomeric or complex strutures', example='single' - ) - - -class ModificationFlag(Enum): - Y = 'Y' - N = 'N' - - -class WwPdbInfo(BaseModel): - defined_at: Optional[str] = Field( - None, - description='The date the chemical component was defined in wwPDB', - example='1999-07-08 00:00:00', - ) - modified: Optional[str] = Field( - None, - description='The modified date of the chemical componenet in wwPDB', - example='2011-06-04 00:00:00', - ) - modification_flag: Optional[ModificationFlag] = Field( - None, - description='Y/N denoting the modification status of the chemical component in wwPDB', - example='N', - ) - polymer_type: Optional[str] = Field( - None, - description='This flag denotes if the chemical component is a polymer or non-polymer in wwPDB', - example='NON-POLYMER', - ) - standard_parent: Optional[str] = Field( - None, - description='The standard chemical component defined in wwPDB', - example='null', - ) - - -class FunctionalAnnotations(Enum): - Drug_like = 'Drug-like' - Co_factor_like = 'Co-factor-like' - Reactant_like = 'Reactant-like' - - -class CrossLink(BaseModel): - resource: Optional[str] = Field( - None, description='The external resource name', example='BindingDb' - ) - resource_id: Optional[str] = Field( - None, description='The external resource id', example='50366480' - ) - - -class Synonyms(BaseModel): - origin: Optional[str] = Field( - None, - description='The resource which provides synonym for the chemical component', - example='DrugBank', - ) - value: Optional[str] = Field( - None, description='The synonym provided by the resource', example='Striadyne' - ) - - -class PhysChemProperties(BaseModel): - crippen_mr: Optional[float] = Field( - None, - description='Wildman-Crippen molar refractivity is a common descriptor accounting for molecular size and polarizability', - example=92.446, - ) - num_atom_stereo_centers: Optional[int] = Field( - None, - description='Number of atoms with four attachments different from each other', - example=6, - ) - crippen_clog_p: Optional[float] = Field( - None, - description='Octanol/Water partition coeficient predicted using Wildman-Crippen method', - example=-2.438, - ) - num_rings: Optional[int] = Field(None, description='Number of rings', example=3) - num_rotatable_bonds: Optional[int] = Field( - None, - description='Number of single bonds, not part of a ring bound to a nonterminal heavy atom', - example=15, - ) - num_heteroatoms: Optional[int] = Field( - None, description='Number of non oxygen and non carbon atoms' - ) - fraction_csp3: Optional[float] = Field( - None, description='Fraction of C atoms that are SP3 hybridized', example=5.9 - ) - num_aromatic_rings: Optional[int] = Field( - None, description='Number of aromatic rings for the molecule', example=2 - ) - exactmw: Optional[float] = Field( - None, description='Total mass of the molecule', example=506.996 - ) - num_spiro_atoms: Optional[int] = Field( - None, - description='Atoms shared between rings that share exactly one atom', - example=0, - ) - num_heavy_atoms: Optional[int] = Field( - None, description='Number of non hydrogen atoms', example=31 - ) - num_aliphatic_rings: Optional[int] = Field( - None, description='Niumber of aliphatic rings', example=1 - ) - num_hbd: Optional[int] = Field( - None, description='Number of hydrogen bond donors', example=7 - ) - num_saturated_heterocycles: Optional[int] = Field( - None, description='Number of saturated heterocycles', example=1 - ) - tpsa: Optional[float] = Field( - None, description='Topological surface area', example=279.13 - ) - num_bridgehead_atoms: Optional[int] = Field( - None, - description='Number of atoms shared between rings that share at least two bonds', - example=0, - ) - num_aromatic_heterocycles: Optional[int] = Field( - None, - description='Number or aromatic rings with at least two different elements', - example=2, - ) - labute_asa: Optional[float] = Field( - None, - description="Accessible surface area accorging to the Labute' definition", - example=194.334, - ) - num_hba: Optional[int] = Field( - None, description='Number of hydrogen bond acceptors', example=18 - ) - num_amide_bonds: Optional[int] = Field( - None, description='Number of amide bonds', example=0 - ) - num_saturated_rings: Optional[int] = Field( - None, description='Number of saturated rings', example=1 - ) - lipinski_hba: Optional[float] = Field( - None, - description='Number of hydrogen bond acceptors according to Lipinsky definition', - example=18, - ) - num_unspec_atom_stereo_centers: Optional[int] = Field( - None, description='Number of unsuspected stereocenters' - ) - lipinski_hbd: Optional[float] = Field( - None, - description='Number of hydrogen bond donors according to Lipinsky definition', - example=8, - ) - num_heterocycles: Optional[int] = Field( - None, - description='Number or rings with at least two different elements', - example=3, - ) - num_aliphatic_heterocycles: Optional[int] = Field( - None, description='Number of aliphatic heterocycles', example=1 - ) - - -class CompoundSummary(BaseModel): - name: str = Field( - ..., - description='The name of the chemical component', - example="ADENOSINE-5'-TRIPHOSPHATE", - ) - released: Optional[bool] = Field( - None, - description='A flag denoting if the hetcode is released or not', - example=True, - ) - superseded_by: Optional[str] = Field( - None, - description='A hetcode which superseeds the hetcode in query', - example='null', - ) - formula: str = Field( - ..., - description='The chemical formula of the component', - example='C10 H16 N5 O13 P3', - ) - inchi: str = Field( - ..., - description='The full INCHI of the component', - example='InChI=1S/C10H16N5O13P3/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(26-10)1-25-30(21,22)28-31(23,24)27-29(18,19)20/h2-4,6-7,10,16-17H,1H2,(H,21,22)(H,23,24)(H2,11,12,13)(H2,18,19,20)/t4-,6-,7-,10-/m1/s1', - ) - inchi_key: str = Field( - ..., - description='INCHI Key of the component', - example='ZKHQWZAMYRWXGA-KQYNXXCUSA-N', - ) - smiles: Optional[List[str]] = Field( - None, - description='The SMILES representation of the component (could be multiple)', - example=['c1nc(c2c(n1)n(cn2)C3C(C(C(O3)COP(=O)(O)OP(=O)(O)OP(=O)(O)O)O)O)N'], - ) - ww_pdb_info: Optional[WwPdbInfo] = Field( - None, - description='An info object which provides details of the chemical component from wwPDB', - ) - functional_annotations: Optional[Union[List[str], FunctionalAnnotations]] = Field( - None, description='A list of functional annotations for the chemical component' - ) - cross_link: Optional[CrossLink] = Field( - None, - alias='cross-link', - description='Cross references for this chemical component from other resources', - ) - synonyms: Optional[Synonyms] = Field( - None, - description='A list of synomyms for the chemical component from other sources', - ) - phys_chem_properties: Optional[PhysChemProperties] = Field( - None, description='An object of physical chemical properties' - ) - - -class Stereoisomer(BaseModel): - chem_comp_id: Optional[str] = Field( - None, - description='Chemical component identifier, the so-called 3-letter code, but it need not be 3-letter long!', - example='GL0', - ) - name: Optional[str] = Field( - None, description='A name for the hetcode', example='beta-D-gulopyranose' - ) - - -class SameScaffoldItem(BaseModel): - chem_comp_id: Optional[str] = Field( - None, - description='Chemical component identifier, the so-called 3-letter code, but it need not be 3-letter long!', - example='9YW', - ) - name: Optional[str] = Field( - None, - description='A name for the hetcode', - example='(alpha-D-glucopyranosyloxy)acetic acid', - ) - substructure_match: Optional[List[str]] = Field( - None, - description='A list of atom names in the hetcode that match with the hetcode in the query', - example=['C1', 'C4', 'O5', 'C2', 'C3', 'C5'], - ) - similarity_score: Optional[float] = Field( - None, - description='Similarity score in the range of 0 to 1 defined by the PARITY method.', - example=0.75, - ) - - -class CompoundSimilarity(BaseModel): - stereoisomers: Optional[List[Stereoisomer]] = Field( - None, description='A list of stereoisomer hetcode objects' - ) - same_scaffold: Optional[List[SameScaffoldItem]] = Field( - None, description='A list of hetcode objects which is part of the same scaffold' - ) - - -class LigandRoleEnum(Enum): - reactant = 'reactant' - cofactor = 'cofactor' - drug_like = 'drug-like' - unknown = 'unknown' - - -class EnzymeClass(Enum): - oxidoreductases = 'oxidoreductases' - transferases = 'transferases' - hydrolases = 'hydrolases' - lyases = 'lyases' - isomerases = 'isomerases' - ligases = 'ligases' - and_translocases = 'and translocases' - - -class BoundDatum(BaseModel): - uniprot_accession: Optional[str] = Field( - None, description='UniProt accession number (AC)', example='D6D1V7' - ) - protein_name: Optional[str] = Field( - None, - description='Name of the protein for the given uniprot accession', - example='Glycosyl hydrolase family 71', - ) - ligand_role: Optional[List[LigandRoleEnum]] = Field( - None, description='Functional role of ligand' - ) - pdb_entries: Optional[List[str]] = Field( - None, - description='list of PDB structures corresponding to the protein (uniprot accession) to which a ligand bound', - example=[ - '4ad4', - '6far', - '5lyr', - '6hmh', - '4ad4', - '4v28', - '4ad5', - '6fam', - '6fwp', - ], - ) - representative_structure: Optional[str] = Field( - None, - description='Best representative structure. This is the structure with best (lowest) resolution with all/maximum number of ligand atoms resolved.', - example='6fwp', - ) - ec_number: Optional[List[str]] = Field( - None, - description='The Enzyme Commission Number based on the chemical reactions an enzyme catalyze.', - example=['3.2.1.130', '3.2.1.198'], - ) - enzyme_class: Optional[EnzymeClass] = Field( - None, - description='The Enzyme class name based on the chemical reactions an enzyme catalyze.', - example='hydrolases', - ) - - -class Structure(BaseModel): - total_pdb_structures: int = Field( - ..., - description='Total number of PDB structures to which given ligand is bound', - example=2400, - ) - total_unp_proteins: int = Field( - ..., - description='Total number of unique proteins (UniProt accession) to which a given ligand is bound', - example=1050, - ) - bound_data: Optional[List[BoundDatum]] = None - - -class AtomDetail(BaseModel): - ligand_atom: Optional[str] = None - protein_residue_atom: Optional[str] = None - interaction_type: Optional[str] = None - distance: Optional[float] = None - - -class InteractingLigandDetail(BaseModel): - entity_id: Optional[int] = None - chain_id: Optional[str] = None - residue_number: Optional[int] = None - author_residue_number: Optional[int] = None - author_insertion_code: Optional[str] = None - chem_comp_id: Optional[str] = None - atom_details: Optional[List[AtomDetail]] = None - - -class InteractingProteinResidue(BaseModel): - entity_id: Optional[int] = None - chain_id: Optional[str] = None - residue_number: Optional[int] = None - author_residue_number: Optional[int] = None - author_insertion_code: Optional[str] = None - chem_comp_id: Optional[str] = None - uniprot_accession: Optional[str] = None - interacting_ligand_details: Optional[List[InteractingLigandDetail]] = None - - -class InteractionDatum(BaseModel): - pdb_id: Optional[str] = None - interacting_protein_residues: Optional[List[InteractingProteinResidue]] = None - - -class Interaction(BaseModel): - total_interactions: Optional[int] = None - interaction_data: Optional[List[InteractionDatum]] = None - - -class TargetResidueName(Enum): - ALA = 'ALA' - ARG = 'ARG' - ASN = 'ASN' - ASP = 'ASP' - CYS = 'CYS' - GLU = 'GLU' - GLN = 'GLN' - GLY = 'GLY' - HIS = 'HIS' - ILE = 'ILE' - LEU = 'LEU' - LYS = 'LYS' - MET = 'MET' - PHE = 'PHE' - PRO = 'PRO' - SER = 'SER' - THR = 'THR' - TRP = 'TRP' - TYR = 'TYR' - VAL = 'VAL' - DA = 'DA' - DT = 'DT' - DG = 'DG' - DC = 'DC' - A = 'A' - G = 'G' - C = 'C' - U = 'U' - DU = 'DU' - - -class InteractionTypeEnum(Enum): - polar = 'polar' - hbond = 'hbond' - vdw_clash = 'vdw_clash' - vdw = 'vdw' - weak_hbond = 'weak_hbond' - carbonpi = 'carbonpi' - cationpi = 'cationpi' - weak_polar = 'weak_polar' - aromatic = 'aromatic' - - -class Interactions(BaseModel): - target_residue_atom_name: Optional[str] = Field( - None, description='Atom name of the interacting target residue', example='O1G' - ) - ligand_uniq_id: Optional[str] = Field( - None, - description='Ligand unique ID which is pdbid, entitiy_id, ?, and ? seperated by underscore.', - example='7esb_2_B_1', - ) - distance: Optional[float] = Field( - None, - description='Distance between the two interacting atoms of ligand and target', - example=3.49, - ) - interaction_type: Optional[List[InteractionTypeEnum]] = Field( - None, - description='Interaction type as given by Arpeggio.', - example=['polar', 'hbond', 'vdw_clash'], - ) - - -class Detail(BaseModel): - target_uniqid: Optional[str] = Field( - None, - description='Target unique ID which is pdbid, entitiy_id and residue_number seperated by underscore.', - example='7esb_1_242', - ) - interactions: Optional[Interactions] = Field( - None, description='Details of interactions.' - ) - - -class InteractingTargetResidueItem(BaseModel): - target_residue_name: Optional[TargetResidueName] = Field( - None, - description='Name of target residue which is interacting with the given ligand atom', - example='ARG', - ) - interaction_strength: Optional[float] = Field(None, example=2.1) - details: Optional[List[Detail]] = None - - -class InteractionOverview(BaseModel): - ligand_atom: Optional[str] = Field( - None, - description='Ligand atom name which is interacting with the target. This target is a polymer and can be either nucleic acid or protein.', - example='C1', - ) - interacting_target_residue: Optional[List[InteractingTargetResidueItem]] = Field( - None, - description='Details of all the target residue which are interacting with the given ligand atom', - ) - - -class InteractionType(Enum): - polar = 'polar' - hbond = 'hbond' - vdw_clash = 'vdw_clash' - vdw = 'vdw' - weak_hbond = 'weak_hbond' - carbonpi = 'carbonpi' - cationpi = 'cationpi' - weak_polar = 'weak_polar' - aromatic = 'aromatic' - - -class Interaction1(BaseModel): - interaction_type: Optional[InteractionType] = Field( - None, description='Interaction type as given by Arpeggio.', example='polar' - ) - interaction_count: Optional[int] = Field(None, example=1500) - interaction_strength: Optional[float] = Field(None, example=1.5) - target_uniqid: Optional[List[str]] = Field( - None, - description='Target unique ID which is pdbid, entitiy_id, residue_number and residue_atom_name seperated by underscore.', - example=['7esb_1_242_OG1', '7esb_1_300_C1', '1cbs_2_240_N1'], - ) - - -class InteractingTargetResidueItem1(BaseModel): - target_residue_name: Optional[TargetResidueName] = Field( - None, - description='Name of target residue which is interacting with the given ligand atom', - example='ARG', - ) - interaction_strength: Optional[float] = Field(None, example=2.1) - interaction_count: Optional[float] = Field(None, example=2500) - interactions: Optional[List[Interaction1]] = None - - -class InteractionStrength(BaseModel): - ligand_atom: Optional[str] = Field( - None, - description='Ligand atom name which is interacting with the target. This target is a polymer and can be either nucleic acid or protein.', - example='C1', - ) - interacting_target_residue: Optional[List[InteractingTargetResidueItem1]] = Field( - None, - description='Details of all the target residue which are interacting with the given ligand atom', - ) - - -class BoundDatum1(BaseModel): - uniprot_accession: Optional[str] = Field( - None, description='UniProt accession number (AC)', example='D6D1V7' - ) - protein_name: Optional[str] = Field( - None, - description='Name of the protein for the given uniprot accession', - example='Glycosyl hydrolase family 71', - ) - ligand_role: Optional[List[LigandRoleEnum]] = Field( - None, description='Functional role of ligand' - ) - pdb_entries: Optional[List[str]] = Field( - None, - description='list of PDB structures corresponding to the protein (uniprot accession) to which a ligand bound', - example=[ - '4ad4', - '6far', - '5lyr', - '6hmh', - '4ad4', - '4v28', - '4ad5', - '6fam', - '6fwp', - ], - ) - representative_structure: Optional[str] = Field( - None, - description='Best representative structure. This is the structure with best (lowest) resolution with all/maximum number of ligand atoms resolved.', - example='6fwp', - ) - ec_number: Optional[List[str]] = Field( - None, - description='The Enzyme Commission Number based on the chemical reactions an enzyme catalyze.', - example=['3.2.1.130', '3.2.1.198'], - ) - - -class SummaryCount(BaseModel): - total_pdb_structures: int = Field( - ..., - description='Total number of PDB structures to which given ligand is bound', - example=2400, - ) - total_unp_proteins: int = Field( - ..., - description='Total number of unique proteins (UniProt accession) to which a given ligand is bound', - example=1050, - ) - bound_data: Optional[List[BoundDatum1]] = None - - -class Bonds(BaseModel): - bond_distances: Optional[List[float]] = None - atom_site_1_chains: Optional[List[str]] = None - atom_site_1_residues: Optional[List[str]] = None - atom_site_1_label_asym_ids: Optional[List[str]] = None - atom_site_1_orig_label_asym_ids: Optional[List[str]] = None - atom_site_1_upn_accs: Optional[List[str]] = None - atom_site_1_upn_nums: Optional[List[str]] = None - atom_site_1_seq_nums: Optional[List[int]] = None - atom_site_1_label_seq_ids: Optional[List[int]] = None - atom_site_1_label_atom_ids: Optional[List[str]] = None - atom_site_1_inscodes: Optional[List[str]] = None - atom_site_2_chains: Optional[List[str]] = None - atom_site_2_residues: Optional[List[str]] = None - atom_site_2_label_asym_ids: Optional[List[str]] = None - atom_site_2_orig_label_asym_ids: Optional[List[str]] = None - atom_site_2_upn_accs: Optional[List[str]] = None - atom_site_2_upn_nums: Optional[List[str]] = None - atom_site_2_seq_nums: Optional[List[int]] = None - atom_site_2_label_seq_ids: Optional[List[int]] = None - atom_site_2_label_atom_ids: Optional[List[str]] = None - atom_site_2_inscodes: Optional[List[str]] = None - - -class MoleculeClass(Enum): - Protein = 'Protein' - Ligand = 'Ligand' - RNA = 'RNA' - DNA = 'DNA' - - -class Molecule(BaseModel): - molecule_id: Optional[str] = Field( - None, description='Identifier of the molecule on the interface', example='1' - ) - molecule_class: Optional[MoleculeClass] = Field( - None, description='Molecule type/class', example='Protein' - ) - chain_id: Optional[str] = Field( - None, description='PDB chain identifier of the molecule', example='A' - ) - residue_label_comp_ids: Optional[List[str]] = None - residue_seq_ids: Optional[List[str]] = None - residue_label_seq_ids: Optional[List[str]] = None - residue_ins_codes: Optional[List[str]] = None - residue_bonds: Optional[List[str]] = None - solvation_energies: Optional[List[float]] = None - accessible_surface_areas: Optional[List[float]] = None - buried_surface_areas: Optional[List[float]] = None - - -class Interface(BaseModel): - interface_id: Optional[int] = Field( - None, description='Interface identifier', example=1 - ) - interface_area: Optional[float] = Field( - None, description='[Area of interface (A^2)]', example=1427.5 - ) - solvation_energy: Optional[float] = Field( - None, description='[Solvation energy (kcal/mol)]', example=-18.22 - ) - stabilization_energy: Optional[float] = Field( - None, description='[Stabilisation energy (Kcal/mol)]', example=-28.59 - ) - p_value: Optional[float] = Field( - None, - description='[Probability that solvation energy gain for interface atom may be greater than binding energy]', - example=0.095, - ) - number_interface_residues: Optional[int] = Field( - None, description='Number of interface residues', example=1 - ) - number_hydrogen_bonds: Optional[int] = Field( - None, description='Number of interface hydrogen bonds', example=1 - ) - number_salt_bridges: Optional[int] = Field( - None, description='Number of interface salt bridges', example=1 - ) - number_covalent_bonds: Optional[int] = Field( - None, description='Number of interface covalent bonds', example=1 - ) - number_disulfide_bonds: Optional[int] = Field( - None, description='Number of interface disulfide bonds', example=1 - ) - number_other_bonds: Optional[int] = Field( - None, - description='Number of other contacts within a distance of 4 A and not classified as any of the other bonds ', - example=1, - ) - hydrogen_bonds: Optional[Bonds] = None - salt_bridges: Optional[Bonds] = None - disulfide_bonds: Optional[Bonds] = None - covalent_bonds: Optional[Bonds] = None - other_bonds: Optional[Bonds] = None - molecules: Optional[List[Molecule]] = None - - -class Assembly2(BaseModel): - id: Optional[str] = Field(None, description='Assembly identifier', example='1') - size: Optional[int] = Field(None, description='Size of the assembly', example=6) - interface_count: Optional[int] = Field( - None, description='[Number of interfaces]', example=1 - ) - score: Optional[str] = Field( - None, description='Macromolecular size of the assembly', example='2' - ) - macromolecular_size: Optional[int] = Field( - None, description='Macromolecular size of the assembly', example=2 - ) - dissociation_energy: Optional[float] = Field( - None, - description='[Maximal free energy of dissociation (kcal/mol)]', - example=15.61, - ) - accessible_surface_area: Optional[float] = Field( - None, description='[Accessible surface area (A^2)]', example=19395.3 - ) - buried_surface_area: Optional[float] = Field( - None, description='[Buried surface area (A^2)]', example=31514.17 - ) - entropy: Optional[float] = Field( - None, description='[Entropy change at dissociation ]', example=12.98 - ) - dissociation_area: Optional[float] = Field( - None, description='[Dissociation Interface Area (A^2)]', example=1427.5 - ) - solvation_energy_gain: Optional[float] = Field( - None, description='[Solvation energy gain (Kcal/mol)]', example=-35.28 - ) - number_of_uc: Optional[int] = Field( - None, description='number of assemblies in unit cells', example=1 - ) - number_of_dissociated_elements: Optional[int] = Field( - None, description='Number of dissociating parts', example=2 - ) - symmetry_number: Optional[int] = Field( - None, description='Symmetry number', example=2 - ) - formula: Optional[str] = Field( - None, description='[Formula]', example='A(2)a(2)b(2)' - ) - composition: Optional[str] = Field( - None, description='[Composition]', example='AAA[NA](2)[GOL](2)' - ) - R350: Optional[int] = Field(None, description='Remark R350', example=1) - - -class PdbId1(BaseModel): - assembly_id: Optional[str] = Field( - None, description='Assembly identifier', example='1' - ) - pisa_version: Optional[str] = Field( - None, description='The semantic version number of PISA', example='2.0.0' - ) - assembly: Optional[Assembly2] = Field(None, title='Assembly') - - -class Assembly1(BaseModel): - pdb_id: Optional[PdbId1] = Field(None, title='PDB_entry') - - -class IndexType(Enum): - PDB = 'PDB' - UNIPROT = 'UNIPROT' - - -class ResidueModel(BaseModel): - startIndex: Optional[int] = Field( - None, description='(start) residue sequence position', example=42 - ) - endIndex: Optional[int] = Field( - None, description='(end) residue sequence position', example=42 - ) - indexType: Optional[IndexType] = Field( - None, description='Type of index (PDB or UniProt).' - ) - startCode: Optional[str] = Field( - None, - description='Amino acid three-letter code for the start residue.', - example='PRO', - ) - endCode: Optional[str] = Field( - None, - description='Amino acid three-letter code for the end residue.', - example='PRO', - ) - - -class AdditionalData(BaseModel): - pubmedId: Optional[int] = Field( - None, description='A valid PubMed identifier', example=1234567 - ) - pmcId: Optional[str] = Field( - None, description='A valid PubMedCentral identifier', example='PMC1234567' - ) - pdbResidue: Optional[int] = Field(None, description='A PDB residue', example=42) - pdbChain: Optional[str] = Field(None, description='A PDB chain', example='A') - uniprotAccession: Optional[str] = Field( - None, description='A UniProt accession', example='P12345' - ) - uniprotResidue: Optional[int] = Field( - None, description='A UniProt residue', example=42 - ) - - -class Detailed(BaseModel): - summary: SummaryItems - chains: Chains - - -class Overview(BaseModel): - summary: SummaryItems - - -class Assembly(BaseModel): - mmsize: Optional[int] = Field(None, description='Size of the assembly', example=2) - dissociation_energy: Optional[float] = Field( - None, - description='[Maximal free energy of dissociation (kcal/mol)]', - example=15.61, - ) - accessible_surface_area: Optional[float] = Field( - None, description='[Accessible surface area (A^2)]', example=19395.3 - ) - buried_surface_area: Optional[float] = Field( - None, description='[Buried surface area (A^2)]', example=31514.17 - ) - entropy: Optional[float] = Field( - None, description='[Entropy change at dissociation ]', example=12.98 - ) - dissociation_area: Optional[float] = Field( - None, description='[Dissociation Interface Area (A^2)]', example=1427.5 - ) - solvation_energy_gain: Optional[float] = Field( - None, description='[Solvation energy gain (Kcal/mol)]', example=-35.28 - ) - formula: Optional[str] = Field( - None, description='[Formula]', example='A(2)a(2)b(2)' - ) - composition: Optional[str] = Field( - None, description='[Composition]', example='AAA[NA](2)[GOL](2)' - ) - interface_count: Optional[int] = Field( - None, description='[Number of interfaces]', example=1 - ) - interfaces: Optional[List[Interface]] = None - - -class PdbId(BaseModel): - assembly_id: Optional[str] = Field( - None, description='Assembly identifier', example='1' - ) - pisa_version: Optional[str] = Field( - None, description='The semantic version number of PISA', example='2.0.0' - ) - assembly: Optional[Assembly] = Field(None, title='Assembly') - - -class PISAInterfaces(BaseModel): - pdb_id: Optional[PdbId] = Field(None, title='PDB_id') - - -class Data(BaseModel): - name: Optional[str] = Field( - None, description='Name annotation provider', example='IUCr' - ) - accession: Optional[str] = Field( - None, - description='A unique identifier for the resource, annotation, etc.', - example='IUCr', - ) - residues: Optional[List[ResidueModel]] = Field( - None, description='List of residue objects.' - ) - additionalData: Optional[AdditionalData] = None - dataType: Optional[str] = Field( - None, description='Type of data provided in the section.', example='ANNOTATIONS' - ) - - -class UniprotSummary(BaseModel): - uniprot_entry: Optional[UniprotEntry] = None - structures: Optional[List[Overview]] = None - - -class UniprotDetails(BaseModel): - uniprot_entry: Optional[UniprotEntry] = None - structures: Optional[List[Detailed]] = None - - -class PdbSummary(BaseModel): - uniprot_entry: Optional[PdbEntry] = None - structures: Optional[List[Overview]] = None - - -class LlmAnnotationsSummary(BaseModel): - datatype: Optional[str] = Field( - None, description='The type of data represented', example='ANNOTATIONS' - ) - data: Optional[List[Data]] = Field(None, description='List of data annotations.') - - -class LlmAnnotationsChainResidue(BaseModel): - pdb_id: Optional[str] = Field(None, description='PDB identifier', example='1a2b') - chain_id: Optional[str] = Field(None, description='Chain identifier', example='A') - residue_id: Optional[str] = Field( - None, description='Residue identifier', example='42' - ) - datatype: Optional[str] = Field( - None, description='The type of data represented', example='ANNOTATIONS' - ) - data: Optional[List[Data]] = Field(None, description='List of data annotations.') From a4b927f78e066f5d5e57669c7dc0a2b05099e53e Mon Sep 17 00:00:00 2001 From: Melanie Vollmar Date: Wed, 14 May 2025 15:32:29 +0100 Subject: [PATCH 5/5] udating definition yml to create python input for API --- oas3.yaml | 48 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/oas3.yaml b/oas3.yaml index b4bb3c2..aa672af 100644 --- a/oas3.yaml +++ b/oas3.yaml @@ -1949,10 +1949,6 @@ components: $ref: "#/components/schemas/residue" additionalData: $ref: "#/components/schemas/additional_data" - dataType: - type: string - description: Type of data provided in the section. - example: "ANNOTATIONS" residue: type: object @@ -1990,10 +1986,30 @@ components: type: string description: "A valid PubMedCentral identifier" example: "PMC1234567" + doi: + type: string + description: "A valid Digital Object Identifier" + example: "10.1234/abcd.efgh" + primaryCitation: + type: string + description: "A valid primary citation" + example: "Y" + openAccess: + type: string + description: "A valid open access identifier" + example: "Y" + pdbId: + type: string + description: "A valid PDB identifier" + example: "1a2b" pdbResidue: type: integer description: "A PDB residue" example: 42 + authorResidueNumber: + type: integer + description: "An author residue number" + example: 42 pdbChain: type: string description: "A PDB chain" @@ -2006,3 +2022,27 @@ components: type: integer description: "A UniProt residue" example: 42 + sentence: + type: string + description: "A sentence containing the annotation" + example: "This is a sample sentence." + section: + type: string + description: "Section in publication annotation was found" + example: "This is a sample section." + exact: + type: string + description: "Exact annotation text" + example: "This is a sample exact annotation." + entityType: + type: string + description: "Type of entity (e.g., protein, nucleic acid, etc.)" + example: "protein" + annotator: + type: string + description: "Name of the annotator" + example: "John Doe" + aiScore: + type: float + description: "AI confidence score for the annotation" + example: 0.95 \ No newline at end of file