diff --git a/notebooks/build_compiler_test.ipynb b/notebooks/build_compiler_test.ipynb index e69de29..b9f1e14 100644 --- a/notebooks/build_compiler_test.ipynb +++ b/notebooks/build_compiler_test.ipynb @@ -0,0 +1,175 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "87bdb42e", + "metadata": {}, + "outputs": [], + "source": [ + "import sbol2\n", + "from buildcompiler.buildcompiler import BuildCompiler\n", + "from buildcompiler.abstract_translator import extract_toplevel_definition" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "e60a9c84", + "metadata": {}, + "outputs": [], + "source": [ + "design_doc = sbol2.Document()\n", + "design_doc.read(\"../tests/test_files/moclo_parts_circuit.xml\")\n", + "design = extract_toplevel_definition(design_doc)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "90648527", + "metadata": {}, + "outputs": [], + "source": [ + "auth = \"51102d98-f852-4386-9ae8-7c5814d679c1\"\n", + "collections = [\n", + " \"https://synbiohub.org/user/Gon/impl_test/impl_test_collection/1\",\n", + " \"https://synbiohub.org/user/Gon/Enzyme_Implementations/Enzyme_Implementations_collection/1\",\n", + "]\n", + "buildcompiler = BuildCompiler(collections, \"https://synbiohub.org\", auth, None)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7c12e504", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[, ]\n", + "[]\n", + "matched pJ23100_AB_A_B with DVK_AE_A_E on fusion site A!\n", + "matched pB0034_BC_B_C with pJ23100_AB_A_B on fusion site B!\n", + "matched pE0030_CD_C_D with pB0034_BC_B_C on fusion site C!\n", + "matched final component pB0015_DE_D_E with pE0030_CD_C_D and DVK_AE_A_E on fusion sites (D, E)!\n", + "Success with backbone: DVK_AE_A_E and plasmids: ['pJ23100_AB_A_B', 'pB0034_BC_B_C', 'pE0030_CD_C_D', 'pB0015_DE_D_E']\n", + "[Plasmid:\n", + " Name: composite_1_A_B_C_D_E\n", + " Plasmid Definition: https://SBOL2Build.org/composite_1/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://SBOL2Build.org/composite_1_impl/1']\n", + " Strain Implementations: [None]\n", + " Fusion Sites: ['A', 'B', 'C', 'D', 'E']\n", + " Antibiotic Resistance: Kanamycin\n", + "]\n" + ] + } + ], + "source": [ + "print(buildcompiler.restriction_enzyme_implementations)\n", + "print(buildcompiler.ligase_implementations)\n", + "\n", + "composite_plasmids = buildcompiler.assembly_lvl1(design, None)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "79fd0cb5", + "metadata": {}, + "outputs": [], + "source": [ + "# Pull chassis from sbh\n", + "chassis_doc = sbol2.Document()\n", + "\n", + "buildcompiler.sbh.pull(\n", + " \"https://synbiohub.org/user/Gon/Chassis/Ecoli_DH5a/1/52b575c09496ebb3e2ef9e4c272c9e733134a874/share\",\n", + " chassis_doc,\n", + ")\n", + "\n", + "\n", + "chassis = chassis_doc.moduleDefinitions[0]\n", + "\n", + "dummy_activity = sbol2.Activity(\"chassis_domestication\")\n", + "dummy_activity.name = \"acquisistion of chassis strain\"\n", + "dummy_activity.types = \"http://sbols.org/v2#build\"\n", + "\n", + "chassis_implementation = sbol2.Implementation(f\"{chassis.name}_impl\")\n", + "chassis_implementation.built = chassis.identity\n", + "chassis_implementation.wasGeneratedBy = dummy_activity\n", + "\n", + "chassis_doc.add(chassis_implementation)\n", + "chassis_doc.add(dummy_activity)\n", + "\n", + "# chassis_doc.write(\"chassis_impl.xml\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8f4ea67c", + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'composite_plasmids' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mNameError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[6]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m bacterial_transformation(\u001b[43mcomposite_plasmids\u001b[49m, chassis_implementation, chassis, final_doc)\n", + "\u001b[31mNameError\u001b[39m: name 'composite_plasmids' is not defined" + ] + } + ], + "source": [ + "# bacterial_transformation(composite_plasmids, chassis_implementation, chassis, final_doc)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5553bfc3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Valid.'" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# final_doc.write(\"fullbuild.xml\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python (buildplanner)", + "language": "python", + "name": "buildplanner" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/buildcompiler/abstract_translator.py b/src/buildcompiler/abstract_translator.py index 5d51e10..5c2ef02 100644 --- a/src/buildcompiler/abstract_translator.py +++ b/src/buildcompiler/abstract_translator.py @@ -1,18 +1,30 @@ import sbol2 import itertools +import re from typing import Dict, List -from .constants import FUSION_SITES +from .constants import ( + ANTIBIOTIC_MAP, + ENGINEERED_PLASMID, + FUSION_SITES, + ANTIBIOTIC_RESISTANCE, + RESTRICTION_ENZYME_ASSEMBLY_SCAR, +) -class MocloPlasmid: +class Plasmid: def __init__( - self, name: str, definition: sbol2.ComponentDefinition, doc: sbol2.document + self, + definition: sbol2.ComponentDefinition, + strain_definition: sbol2.ModuleDefinition, + doc: sbol2.document, ): self.definition = definition - self.fusion_sites = self.match_fusion_sites(doc) - self.name = name + "".join(f"_{s}" for s in self.fusion_sites) + self.strain_definition = strain_definition + self.fusion_sites = self._match_fusion_sites(doc) + self.name = definition.displayId + "".join(f"_{s}" for s in self.fusion_sites) + self.antibiotic_resistance = self._get_antibiotic_resistance(doc) - def match_fusion_sites(self, doc: sbol2.document) -> List[str]: + def _match_fusion_sites(self, doc: sbol2.document) -> List[str]: fusion_site_definitions = extract_fusion_sites(self.definition, doc) fusion_sites = [] for site in fusion_site_definitions: @@ -26,16 +38,37 @@ def match_fusion_sites(self, doc: sbol2.document) -> List[str]: fusion_sites.sort() return fusion_sites + def _get_antibiotic_resistance(self, doc: sbol2.Document) -> str: + for component in ( + self.definition.components + ): # go a level deeper, within the backbone core component + definition = doc.get(component.definition) + for subcomponent in definition.components: + subcomponent_def = doc.get(subcomponent.definition) + if ANTIBIOTIC_RESISTANCE in subcomponent_def.roles: + match = re.search( + r"\b(" + "|".join(ANTIBIOTIC_MAP) + r")_", + subcomponent_def.displayId, + re.IGNORECASE, + ) + if match: + return ANTIBIOTIC_MAP[match.group(1).lower()] + return "Unknown" + + return None + def __repr__(self) -> str: return ( - f"MocloPlasmid:\n" + f"Plasmid:\n" f" Name: {self.name}\n" f" Definition: {self.definition.identity}\n" + f" Strain: {getattr(self.strain_definition, 'identity', 'None')}\n" f" Fusion Sites: {self.fusion_sites or 'Not found'}" + f" Antibiotic Resistance: {self.antibiotic_resistance}\n" ) def __eq__(self, other): - if not isinstance(other, MocloPlasmid): + if not isinstance(other, Plasmid): return False return self.definition == other.definition @@ -59,7 +92,7 @@ def extract_fusion_sites( fusion_sites = [] for component in plasmid.components: definition = doc.getComponentDefinition(component.definition) - if "http://identifiers.org/so/SO:0001953" in definition.roles: + if RESTRICTION_ENZYME_ASSEMBLY_SCAR in definition.roles: fusion_sites.append(definition) return fusion_sites @@ -102,6 +135,16 @@ def copy_sequences(component_definition, target_doc, collection_doc): seq_obj.copy(target_doc) +def get_or_pull(doc, sbh, uri): + """ + Get an SBOL object from a Document. + If missing, pull it from SynBioHub and retry. + """ + if uri not in doc: + sbh.pull(uri, doc) + return doc.get(uri) + + def extract_combinatorial_design_parts( design: sbol2.ComponentDefinition, doc: sbol2.Document, plasmid_doc ) -> Dict[str, List[sbol2.ComponentDefinition]]: @@ -166,13 +209,13 @@ def enumerate_design_variants(component_dict): def construct_plasmid_dict( part_list: List[sbol2.ComponentDefinition], plasmid_collection: sbol2.Document -) -> Dict[str, List[MocloPlasmid]]: +) -> Dict[str, List[Plasmid]]: """ - Builds a mapping from part display IDs to lists of compatible MoCloPlasmid objects. + Builds a mapping from part display IDs to lists of compatible Plasmid objects. For each part in the given list, this function searches the provided plasmid collection for plasmids that contain the part as a component. - Each matching plasmid is wrapped in a `MocloPlasmid` object and added to the + Each matching plasmid is wrapped in a `Plasmid` object and added to the dictionary under the part's display ID. Args: @@ -183,14 +226,14 @@ def construct_plasmid_dict( The :class:`sbol2.Document` containing plasmids to search through. Returns: - Dict[str, List[MocloPlasmid]]: + Dict[str, List[Plasmid]]: A dictionary mapping each part display ID to a list of corresponding - `MocloPlasmid` objects found in the collection. + `Plasmid` objects found in the collection. """ plasmid_dict = {} for part in part_list: for plasmid in plasmid_collection.componentDefinitions: - if "http://identifiers.org/so/SO:0000637" in plasmid.roles: + if ENGINEERED_PLASMID in plasmid.roles: for component in plasmid.components: if ( component.definition == str(part) @@ -211,34 +254,36 @@ def construct_plasmid_dict( ).name plasmid_dict[part.displayId].append( - MocloPlasmid(componentName, plasmid, plasmid_collection) + Plasmid(componentName, plasmid, plasmid_collection) ) return plasmid_dict def get_compatible_plasmids( - plasmid_dict: Dict[str, List[MocloPlasmid]], backbone: MocloPlasmid -) -> List[MocloPlasmid]: + plasmid_dict: Dict[str, List[Plasmid]], backbone: Plasmid +) -> List[Plasmid]: """ - Returns a list of MocloPlasmid objects that can form a compatible assembly + Returns a list of Plasmid objects that can form a compatible assembly with the given backbone plasmid. The function selects one plasmid from each entry in the dictionary, ensuring that adjacent plasmids have matching MoClo fusion sites, and that the first and last plasmids are compatible with the backbone. Args: plasmid_dict: A dictionary mapping assembly positions or categories to lists - of MocloPlasmid objects. - backbone: The backbone MocloPlasmid whose fusion sites define compatibility. + of Plasmid objects. + backbone: The backbone Plasmid whose fusion sites define compatibility. Returns: - A list of compatible MocloPlasmid objects forming a sequential assembly. + A list of compatible Plasmid objects forming a sequential assembly. """ selected_plasmids = [] match_to = backbone match_idx = 0 for i, key in enumerate(plasmid_dict): + found = False + for plasmid in plasmid_dict[key]: if ( i == len(plasmid_dict) - 1 @@ -249,20 +294,24 @@ def get_compatible_plasmids( f"matched final component {plasmid.name} with {match_to.name} and {backbone.name} on fusion sites ({plasmid.fusion_sites[0]}, {plasmid.fusion_sites[1]})!" ) selected_plasmids.append(plasmid) + found = True break elif ( i < len(plasmid_dict) - 1 and plasmid.fusion_sites[0] == match_to.fusion_sites[match_idx] - ): # TODO add error handling if no compatible plasmid found + ): print( f"matched {plasmid.name} with {match_to.name} on fusion site {plasmid.fusion_sites[0]}!" ) selected_plasmids.append(plasmid) + found = True match_to = plasmid match_idx = 1 break # TODO edge case where second fusion site does not match terminator fusion site will not be caught by current logic - # 10/14: rethink implementation, will likely need to be different for combinatorial designs + + if not found: + raise ValueError(f"No compatible plasmid found for part {key}") return selected_plasmids @@ -271,7 +320,7 @@ def translate_abstract_to_plasmids( abstract_design_doc: sbol2.Document, plasmid_collection: sbol2.Document, backbone_doc: sbol2.Document, -) -> List[MocloPlasmid]: +) -> List[Plasmid]: """ Translates an abstract SBOLCanvas design into a set of compatible MoClo plasmid assemblies. @@ -293,14 +342,14 @@ def translate_abstract_to_plasmids( parts are assembled. Returns: - List[MocloPlasmid]: + List[Plasmid]: - For combinatorial designs: a list of unique compatible plasmids - (`MocloPlasmid` objects) representing all enumerated design variants. + (`Plasmid` objects) representing all enumerated design variants. - For generic designs: a list of compatible plasmids for the single design instance. """ backbone_def = extract_toplevel_definition(backbone_doc) - backbone_plasmid = MocloPlasmid(backbone_def.displayId, backbone_def, backbone_doc) + backbone_plasmid = Plasmid(backbone_def.displayId, backbone_def, backbone_doc) # combinatorial design if len(abstract_design_doc.combinatorialderivations) > 0: diff --git a/src/buildcompiler/buildcompiler.py b/src/buildcompiler/buildcompiler.py index ef80ae1..5b610b9 100644 --- a/src/buildcompiler/buildcompiler.py +++ b/src/buildcompiler/buildcompiler.py @@ -1,11 +1,23 @@ import sbol2 -from typing import Union -import zipfile -from .abstract_translator import translate_abstract_to_plasmids -from .sbol2build import golden_gate_assembly_plan -from .robotutils import assembly_plan_RDF_to_JSON, run_opentrons_script_with_json_to_zip +from typing import List, Dict -Plasmid = "Plasmid" # Placeholder for the actual Plasmid class definition +from buildcompiler.plasmid import Plasmid +from buildcompiler.sbol2build import Assembly +from .abstract_translator import ( + get_or_pull, + get_compatible_plasmids, +) +from .constants import ( + AMP, + KAN, + LIGASE, + PART_ROLES, + RESTRICTION_ENZYME, + RESTRICTION_ENZYME_ASSEMBLY_SCAR, + ENGINEERED_PLASMID, + PLASMID_CLONING_VECTOR, + ORGANISM_STRAIN, +) class BuildCompiler: @@ -21,36 +33,91 @@ class BuildCompiler: :type plasmids: list[Plasmid] """ - def __init__(self, abstract_design: Union[sbol2.ComponentDefinition, sbol2.ModuleDefinition, sbol2.CombinatorialDerivation], *,sbol_doc: sbol2.Document): - self.abstract_design = abstract_design - self.sbol_doc = sbol_doc - self.collections = None - self.indexed_plasmids = list[Plasmid] - self.indexced_backbones = list[Plasmid] + def __init__( + self, + collections: List[str], + sbh_registry: str, + auth_token: str, + sbol_doc: sbol2.Document, + ): + self.sbh = sbol2.PartShop(sbh_registry) + self.sbh.key = auth_token + self.sbol_doc = sbol_doc or sbol2.Document() + self.indexed_plasmids = [] + self.indexed_backbones = [] + self.restriction_enzyme_implementations = [] + self.ligase_implementations = [] + self._index_collections(collections) - def index_collections(self, collections: list[sbol2.Collection]) -> dict[str, sbol2.Collection]: + def _index_collections(self, collections: List[str]): """Index input collections into plasmids and backbones. - Parses the provided collections (which may contain plasmids, backbones, or strains) - and normalizes them into internal Plasmid/Backbone records that remain linked to - their originating strain definitions. + Parses the provided collections (which may contain plasmids, backbones, strains, and enzymes) + and normalizes them into internal Plasmid/enzyme records that remain linked to + their originating strain and implementation definitions. :param collections: Iterable of user-provided collections/documents. :type collections: Iterable :returns: None. Updates ``self.indexed_plasmids`` in place. :rtype: None - :raises ValueError: If collection elements cannot be interpreted as plasmids. """ - self.collections = collections + for uri in collections: + self.sbh.pull(uri, self.sbol_doc) - #TODO: Iterate thorugh the Collections and create a set of indexed plasmids, linking them to their originating definitions. - # Updates indexed_plasmids + for implementation in self.sbol_doc.implementations: + built_object = get_or_pull(self.sbol_doc, self.sbh, implementation.built) + if ( + type(built_object) is sbol2.ModuleDefinition + and ORGANISM_STRAIN in built_object.roles + ): + self._extract_plasmids_from_strain( + built_object, implementation, self.sbol_doc + ) + elif ( + type(built_object) is sbol2.ComponentDefinition + and len(built_object.components) > 1 + ): + if ENGINEERED_PLASMID in built_object.roles: + existing_plasmid = self._get_indexed_plasmid( + self.indexed_plasmids, built_object + ) + if existing_plasmid: + existing_plasmid.plasmid_implementations.append(implementation) + else: + self.indexed_plasmids.append( + Plasmid( + built_object, None, [implementation], [], self.sbol_doc + ) + ) + elif PLASMID_CLONING_VECTOR in built_object.roles: + existing_backbone = self._get_indexed_plasmid( + self.indexed_backbones, built_object + ) + if existing_backbone: + existing_backbone.plasmid_implementations.append(implementation) + else: + self.indexed_backbones.append( + Plasmid( + built_object, None, [implementation], [], self.sbol_doc + ) + ) + elif sbol2.BIOPAX_PROTEIN in built_object.types: + if RESTRICTION_ENZYME in built_object.roles: + self.restriction_enzyme_implementations.append(implementation) + elif LIGASE in built_object.roles: + self.ligase_implementations.append(implementation) - - return "Success" - - def domestication(self,) -> list[sbol2.ComponentDefinition]: + for strain in self.sbol_doc.moduleDefinitions: + if ORGANISM_STRAIN in strain.roles: + self._extract_plasmids_from_strain(strain, None, self.sbol_doc) + + for definition in self.sbol_doc.componentDefinitions: + self._sort_plasmid_components(definition, self.sbol_doc) + + def domestication( + self, + ) -> list[sbol2.ComponentDefinition]: """Domesticate the indexed plasmids for Golden Gate assembly. For each indexed plasmid, this method identifies the necessary domestication @@ -61,16 +128,17 @@ def domestication(self,) -> list[sbol2.ComponentDefinition]: :rtype: list[sbol2.ComponentDefinition] """ - #TODO: Check which parts from the abstract design are not present in the indexed plasmids with the appropiate fusion sites and need to be domesticated. - #TODO: Create a SBOL representation of the domestication process, updating the SBOL Document. - #TODO: Generate a protocol for the domestication process. + # TODO: Check which parts from the abstract design are not present in the indexed plasmids with the appropiate fusion sites and need to be domesticated. + # TODO: Create a SBOL representation of the domestication process, updating the SBOL Document. + # TODO: Generate a protocol for the domestication process. protocol = "To be implemented by PUDU" - #TODO: Updates indexed plasmids with domesticated versions. + # TODO: Updates indexed plasmids with domesticated versions. - return protocol - - def assembly_lvl1(self,) -> list[sbol2.ComponentDefinition]: + + def assembly_lvl1( + self, abstract_design: sbol2.ComponentDefinition, backbone: Plasmid = None + ) -> list[sbol2.ComponentDefinition]: """Assemble level-1 plasmids for each gene/transcriptional unit. Uses indexed plasmids/backbones and the current design to assemble @@ -81,18 +149,57 @@ def assembly_lvl1(self,) -> list[sbol2.ComponentDefinition]: :raises LookupError: If compatible plasmids or backbones cannot be found. """ - #TODO: Identify parts from the abstract design needed for lvl1 assembly and find compatible indexed plasmids/backbones. - # if bacbckbone provided then use it.Then look for parts constraind by the backbone fusion sites. + # TODO: Identify parts from the abstract design needed for lvl1 assembly and find compatible indexed plasmids/backbones. + # if backbone provided then use it.Then look for parts constraind by the backbone fusion sites. # else, run an algorithm to try a backbone from 4 the choices. If it fails on the 4 raise an error. - #TODO: Create a SBOL representation of the assembly process, updating the SBOL Document. + + plasmid_dict = self._get_input_plasmids( + design=abstract_design, antibiotic_resistance=AMP + ) + + if not backbone: + backbone, compatible_plasmids = self._get_backbone( + plasmid_dict, antibiotic_resistance=KAN + ) + else: + compatible_plasmids = get_compatible_plasmids(plasmid_dict, self.backbone) + + bsaI_impl = next( + impl + for impl in self.restriction_enzyme_implementations + if self.sbol_doc.find(impl.built).displayId == "BsaI" + ) + if bsaI_impl is None: + raise ValueError( + "BsaI Restriction enzyme not found in provided collections. Terminating assembly." + ) + + ligase_impl = self.ligase_implementations[0] + if bsaI_impl is None: + raise ValueError( + "No appropriate ligase found in provided collections. Terminating assembly." + ) + + assembly = Assembly( + compatible_plasmids, backbone, bsaI_impl, ligase_impl, self.sbol_doc + ) + composite_plasmids, product_doc = assembly.run() + + self.indexed_plasmids.extend(composite_plasmids) + + return composite_plasmids + + # TODO: Create a SBOL representation of the assembly process, updating the SBOL Document. # Using he selected parts create the representation, you need Plasmids, BsaI and T4 Ligase. - #TODO: Updates indexed plasmids with assembled versions. - #TODO: Generate a protocol for the assembly process. + # TODO: Updates indexed plasmids with assembled versions. + # TODO: Generate a protocol for the assembly process. protocol = "To be implemented by PUDU" return protocol - - def assembly_lvl2(self,) -> list[sbol2.ComponentDefinition]: + + def assembly_lvl2( + self, + ) -> list[sbol2.ComponentDefinition]: """Assemble level-2 plasmids for the full design. Uses the assembled lvl1 plasmids and the current design to assemble @@ -101,12 +208,215 @@ def assembly_lvl2(self,) -> list[sbol2.ComponentDefinition]: :returns: List of assembled lvl2 plasmids. :rtype: list[Plasmid] :raises LookupError: If compatible plasmids or backbones cannot be found. - """ + """ - #TODO: Identify parts from the abstract design needed for lvl2 assembly and find compatible indexed plasmids/backbones. - #TODO: Create a SBOL representation of the assembly process, updating the SBOL Document. - #TODO: Generate a protocol for the assembly process. + # TODO: Identify parts from the abstract design needed for lvl2 assembly and find compatible indexed plasmids/backbones. + # TODO: Create a SBOL representation of the assembly process, updating the SBOL Document. + # TODO: Generate a protocol for the assembly process. protocol = "To be implemented by PUDU" - #TODO: Updates indexed plasmids with assembled versions. + # TODO: Updates indexed plasmids with assembled versions. return protocol + + def _extract_plasmids_from_strain( + self, + strain: sbol2.ModuleDefinition, + strain_implementation: sbol2.Implementation, + doc: sbol2.Document, + ): + # strain_implementation = optional param + for plasmid in strain.functionalComponents: + plasmid_definition = get_or_pull(doc, self.sbh, plasmid.definition) + + if ENGINEERED_PLASMID in plasmid_definition.roles: + existing = self._get_indexed_plasmid( + self.indexed_plasmids, plasmid_definition + ) + + if existing: + # Add strain if not already recorded, else do nothing + if all( + s.identity != strain.identity + for s in existing.strain_definitions + ): + existing.strain_definitions.append(strain) + + if strain_implementation: + existing.strain_implementations.append(strain_implementation) + else: + # Create new Plasmid entry + self.indexed_plasmids.append( + Plasmid( + plasmid_definition, + strain, + [], + [strain_implementation] if strain_implementation else [], + doc, + ) + ) + elif PLASMID_CLONING_VECTOR in plasmid_definition.roles: + existing = self._get_indexed_plasmid( + self.indexed_backbones, plasmid_definition + ) + if existing: + # Add strain if not already recorded, else do nothing + if all( + s.identity != strain.identity + for s in existing.strain_definitions + ): + existing.strain_definitions.append(strain) + + if strain_implementation: + existing.strain_implementations.append(strain_implementation) + else: + # Create new backbone entry + self.indexed_backbones.append( + Plasmid( + plasmid_definition, + strain, + [], + [strain_implementation] if strain_implementation else [], + doc, + ) + ) + + def _get_indexed_plasmid(self, plasmid_list, plasmid_definition): + return next( + ( + p + for p in plasmid_list + if p.plasmid_definition.identity == plasmid_definition.identity + ), + None, + ) + + def _sort_plasmid_components( + self, definition: sbol2.ComponentDefinition, doc: sbol2.Document + ): + if len(definition.components) > 1: + if ENGINEERED_PLASMID in definition.roles and not self._get_indexed_plasmid( + self.indexed_plasmids, definition + ): + self.indexed_plasmids.append(Plasmid(definition, None, [], [], doc)) + elif ( + PLASMID_CLONING_VECTOR in definition.roles + and not self._get_indexed_plasmid(self.indexed_backbones, definition) + ): + self.indexed_backbones.append(Plasmid(definition, None, [], [], doc)) + + def _get_input_plasmids( + self, design: sbol2.ComponentDefinition, antibiotic_resistance: str + ) -> Dict[str, List[Plasmid]]: + """ + with AR=ampicillin. + """ + + parts = self._extract_design_parts(design) + plasmid_dictionary = self._construct_plasmid_dict(parts, antibiotic_resistance) + return plasmid_dictionary + + def _get_backbone( + self, plasmid_dict: Dict[str, List[Plasmid]], antibiotic_resistance: str + ): + """ + with AR=kanamycin. + """ + sorted_backbones = sorted( + self.indexed_backbones, key=lambda p: p.fusion_sites[0] + ) + + for backbone in sorted_backbones: + if backbone.antibiotic_resistance == antibiotic_resistance: + # check for compatibility + # also, if we find a hit here we may not need to run get_compatible plasmids later, work is already done + try: + compatible_plasmids = get_compatible_plasmids( + plasmid_dict, backbone + ) + print( + f"Success with backbone: {backbone.name} and plasmids: {[plas.name for plas in compatible_plasmids]}" + ) + return backbone, compatible_plasmids + except ValueError as e: + print(f"{e} and backbone {backbone}") + compatible_plasmids = None + + return None, None + + def _extract_design_parts( + self, design: sbol2.ComponentDefinition + ) -> List[sbol2.ComponentDefinition]: + """ + Returns definitions of parts in a design in sequential order. + + Args: + design: :class:`sbol2.ComponentDefinition` to extract parts from. + doc: :class:`sbol2.Document` containing all component definitions. + + Returns: + A list of component definitions in sequential order. + """ + component_list = [c for c in design.getInSequentialOrder()] + return [ + get_or_pull(self.sbol_doc, self.sbh, component.definition) + for component in component_list + ] + + def _construct_plasmid_dict( + self, part_list: List[sbol2.ComponentDefinition], antibiotic_resistance: str + ) -> Dict[str, List[Plasmid]]: + """ + For each part in the given list, this function searches for plasmids that contain the part as a component. + + Args: + part_list: + List of :class:`sbol2.ComponentDefinition` objects representing + the parts to match. + + Returns: + Dict[str, List[Plasmid]]: + A dictionary mapping each part display ID to a list of corresponding + `Plasmid` objects found in the collection. + """ + plasmid_dict = {} + for part in part_list: + for plasmid in self.indexed_plasmids: + if ( + ENGINEERED_PLASMID in plasmid.plasmid_definition.roles + ): # TODO only grab implemented plasmids + for component in plasmid.plasmid_definition.components: + if ( + component.definition == str(part) + and self._is_single_part(plasmid.plasmid_definition) + and plasmid.antibiotic_resistance == antibiotic_resistance + ): + plasmid_dict.setdefault(part.displayId, []) + plasmid_dict[part.displayId].append(plasmid) + + return plasmid_dict + + def _is_single_part(self, plasmid: sbol2.ComponentDefinition) -> bool: + num_components = len(plasmid.components) + + if num_components != 4: # TODO subject to change for more complex L0s? + return False + else: + component_definitions = [ + get_or_pull(self.sbol_doc, self.sbh, comp.definition) + for comp in plasmid.getInSequentialOrder() + ] + + for index, comp in enumerate(component_definitions): + if bool(set(comp.roles) & set(PART_ROLES)): # identify part index + previous_component = component_definitions[ + (index - 1) % num_components + ] + next_component = component_definitions[(index + 1) % num_components] + + if ( + RESTRICTION_ENZYME_ASSEMBLY_SCAR in previous_component.roles + and RESTRICTION_ENZYME_ASSEMBLY_SCAR in next_component.roles + ): + return True + + return False diff --git a/src/buildcompiler/constants.py b/src/buildcompiler/constants.py index e8a77cf..19c25a2 100644 --- a/src/buildcompiler/constants.py +++ b/src/buildcompiler/constants.py @@ -9,6 +9,40 @@ "H": "ACTA", } +PART_ROLES = { + "http://identifiers.org/so/SO:0000167", # promoter + "http://identifiers.org/so/SO:0000139", # RBS + "http://identifiers.org/so/SO:0000316", # CDS + "http://identifiers.org/so/SO:0000141", # terminator +} + +KAN = "Kanamycin" +AMP = "Ampicillin" + +ANTIBIOTIC_MAP = { + "kan": KAN, + "amp": AMP, +} +# TODO http or https for identifiers? + +ENGINEERED_PLASMID = "http://identifiers.org/so/SO:0000637" +ENGINEERED_INSERT = "https://identifiers.org/so/SO:0000915" +ENGINEERED_REGION = "http://identifiers.org/so/SO:0000804" +PLASMID_VECTOR = "https://identifiers.org/so/SO:0000755" +PLASMID_CLONING_VECTOR = "https://identifiers.org/ncit/NCIT:C1919" +ANTIBIOTIC_RESISTANCE = "https://identifiers.org/ncit/NCIT:C17449" +LIGASE = "http://identifiers.org/ncit/NCIT:C16796" +RESTRICTION_ENZYME = "http://identifiers.org/obi/OBI_0000732" +RESTRICTION_ENZYME_ASSEMBLY_SCAR = "http://identifiers.org/so/SO:0001953" +ORGANISM_STRAIN = "https://identifiers.org/ncit/NCIT:C14419" + +FIVE_PRIME_OVERHANG = "http://identifiers.org/so/SO:0001932" +THREE_PRIME_OVERHANG = "http://identifiers.org/so/SO:0001933" + +SINGLE_STRANDED = "http://identifiers.org/so/SO:0000984" +CIRCULAR = "http://identifiers.org/so/SO:0000988" +LINEAR = "http://identifiers.org/so/SO:0000987" + DNA_TYPES = { # TODO see about restricting dna types to only accept dna "http://www.biopax.org/release/biopax-level3.owl#Dna", "http://www.biopax.org/release/biopax-level3.owl#DnaRegion", diff --git a/src/buildcompiler/plasmid.py b/src/buildcompiler/plasmid.py new file mode 100644 index 0000000..9b81dfe --- /dev/null +++ b/src/buildcompiler/plasmid.py @@ -0,0 +1,119 @@ +from typing import List + +import sbol2 +import re + +from buildcompiler.abstract_translator import extract_fusion_sites +from buildcompiler.constants import ANTIBIOTIC_MAP, ANTIBIOTIC_RESISTANCE, FUSION_SITES + + +class Plasmid: + def __init__( + self, + definition: sbol2.ComponentDefinition, + strain_definition: sbol2.ModuleDefinition, + plasmid_implementations: sbol2.Implementation, + strain_implementations: sbol2.Implementation, + doc: sbol2.document, + ): + self.plasmid_definition = definition + self.strain_definitions = [strain_definition] + self.plasmid_implementations = plasmid_implementations + self.strain_implementations = strain_implementations + self.fusion_sites = self._match_fusion_sites(doc) + self.name = definition.displayId + "".join(f"_{s}" for s in self.fusion_sites) + self.antibiotic_resistance = self._get_antibiotic_resistance(doc) + + def _match_fusion_sites(self, doc: sbol2.document) -> List[str]: + fusion_site_definitions = extract_fusion_sites(self.plasmid_definition, doc) + fusion_sites = [] + for site in fusion_site_definitions: + sequence_obj = doc.getSequence(site.sequences[0]) + sequence = sequence_obj.elements + + for key, seq in FUSION_SITES.items(): + if seq == sequence.upper(): + fusion_sites.append(key) + + fusion_sites.sort() + return fusion_sites + + def _get_antibiotic_resistance(self, doc: sbol2.Document) -> str: + for component in ( + self.plasmid_definition.components + ): # go a level deeper, within the backbone core component + definition = doc.get(component.definition) + for subcomponent in definition.components: + subcomponent_def = doc.get(subcomponent.definition) + if ANTIBIOTIC_RESISTANCE in subcomponent_def.roles: + match = re.search( + r"\b(" + "|".join(ANTIBIOTIC_MAP) + r")_", + subcomponent_def.displayId, + re.IGNORECASE, + ) + if match: + return ANTIBIOTIC_MAP[match.group(1).lower()] + return "Unknown" + + return None + + def __repr__(self) -> str: + strain_ids = ( + [getattr(s, "identity", None) for s in self.strain_definitions] + if self.strain_definitions + else [] + ) + + plasmid_impl_ids = ( + [getattr(p, "identity", None) for p in self.plasmid_implementations] + if self.plasmid_implementations + else [] + ) + + strain_impl_ids = ( + [getattr(s, "identity", None) for s in self.strain_implementations] + if self.strain_implementations + else [] + ) + + return ( + f"Plasmid:\n" + f" Name: {self.name}\n" + f" Plasmid Definition: {getattr(self.plasmid_definition, 'identity', 'None')}\n" + f" Strain Definitions: {strain_ids}\n" + f" Plasmid Implementations: {plasmid_impl_ids or 'None'}\n" + f" Strain Implementations: {strain_impl_ids or 'None'}\n" + f" Fusion Sites: {self.fusion_sites or 'Not found'}\n" + f" Antibiotic Resistance: {self.antibiotic_resistance or 'None'}\n" + ) + + def __eq__(self, other): + if not isinstance(other, Plasmid): + return False + return self.plasmid_definition == other.plasmid_definition + + def __hash__(self): + return hash(self.plasmid_definition) + + +# def _extract_fusion_sites( +# plasmid: sbol2.ComponentDefinition, +# doc: sbol2.Document, +# sbh: sbol2.PartShop +# ) -> List[sbol2.ComponentDefinition]: +# """ +# Returns all fusion site component definitions from a plasmid. + +# Args: +# plasmid: :class:`sbol2.ComponentDefinition` representing the plasmid. + +# Returns: +# A list of fusion site component definitions. +# """ +# fusion_sites = [] +# for component in plasmid.components: +# definition = get_or_pull(doc, sbh, component.definition) +# if RESTRICTION_ENZYME_ASSEMBLY_SCAR in definition.roles: +# fusion_sites.append(definition) + +# return fusion_sites diff --git a/src/buildcompiler/sbol2build.py b/src/buildcompiler/sbol2build.py index db6a755..fc731bf 100644 --- a/src/buildcompiler/sbol2build.py +++ b/src/buildcompiler/sbol2build.py @@ -3,14 +3,120 @@ from Bio.Seq import Seq from pydna.dseqrecord import Dseqrecord from itertools import product +from buildcompiler.plasmid import Plasmid from typing import List, Union, Tuple -from .constants import DNA_TYPES +from .constants import ( + CIRCULAR, + DNA_TYPES, + ENGINEERED_INSERT, + ENGINEERED_PLASMID, + ENGINEERED_REGION, + FIVE_PRIME_OVERHANG, + FUSION_SITES, + LINEAR, + PLASMID_VECTOR, + RESTRICTION_ENZYME, + RESTRICTION_ENZYME_ASSEMBLY_SCAR, + SINGLE_STRANDED, + THREE_PRIME_OVERHANG, +) sbol2.Config.setHomespace("https://SBOL2Build.org") sbol2.Config.setOption(sbol2.ConfigOptions.SBOL_COMPLIANT_URIS, True) sbol2.Config.setOption(sbol2.ConfigOptions.SBOL_TYPED_URIS, False) +class Assembly: + """Creates an Assembly Plan. + + :param name: Name of the assembly plan ModuleDefinition. + :param part_plasmids: Parts in backbone to be assembled. + :param plasmid_acceptor_backbone: Backbone in which parts are inserted on the assembly. + :param restriction_enzyme: Restriction enzyme name used by PyDNA. Case sensitive, follow standard restriction enzyme nomenclature, i.e. 'BsaI' + :param document: SBOL Document where the assembly plan will be created. + """ + + def __init__( # TODO add fields for activity/agent/plan + self, + part_plasmids: List[Plasmid], + backbone_plasmid: Plasmid, + restriction_enzyme: sbol2.Implementation, # TODO search for implementation in document, or domesticate the RE + ligase: sbol2.Implementation, + document: sbol2.Document, + ): + self.part_plasmids = part_plasmids + self.backbone = backbone_plasmid + self.restriction_enzyme = restriction_enzyme + self.ligase = ligase + self.extracted_parts = [] # list of tuples [ComponentDefinition, Sequence] + self.source_document = document + self.final_document = sbol2.Document() + self.assembly_activity = initialize_assembly_activity() + self.composites = [] + + def run( + self, include_extracted_parts=False + ) -> List[Tuple[sbol2.ComponentDefinition, sbol2.Sequence]]: + """Runs full assembly simulation. + + `document` parameter of golden_gate_assembly_plan object is updated by reference to include assembly plan ModuleDefinition and all related information. + + Runs :func:`part_digestion` for all `part_plasmids` and :func:`backbone_digestion` for `plasmid_acceptor_backbone` with `restriction_enzyme`. Then runs :func:`ligation` with these parts to form composites. + + :return: List of all composites generated, in the form of tuples of ComponentDefinition and Sequence. + """ + for plasmid in self.part_plasmids: + plasmid_impl = plasmid.plasmid_implementations[ + 0 + ] # TODO update with more sophisticated selection process? + extracts_tuple_list, _ = part_digestion( + plasmid_impl, + [self.restriction_enzyme], + self.assembly_activity, + self.source_document, + ) + append_extracts_to_doc(extracts_tuple_list, self.source_document) + if include_extracted_parts: + append_extracts_to_doc(extracts_tuple_list, self.final_document) + self.extracted_parts.append(extracts_tuple_list[0][0]) + + backbone_impl = self.backbone.plasmid_implementations[0] + extracts_tuple_list, _ = backbone_digestion( + backbone_impl, + [self.restriction_enzyme], + self.assembly_activity, + self.source_document, + ) + + append_extracts_to_doc(extracts_tuple_list, self.source_document) + if include_extracted_parts: + append_extracts_to_doc(extracts_tuple_list, self.final_document) + self.extracted_parts.append(extracts_tuple_list[0][0]) + + self.composites = ligation( + self.extracted_parts, + self.assembly_activity, + self.source_document, + self.final_document, + self.ligase, + ) + + self.final_document.add(self.assembly_activity) + + composite_plasmid_objs = [ + Plasmid( + self.final_document.get(impl.built), + None, + [impl], + [None], + self.source_document, + ) + for impl in self.composites + ] + + return composite_plasmid_objs, self.final_document + + def rebase_restriction_enzyme(name: str, **kwargs) -> sbol2.ComponentDefinition: """Creates an ComponentDefinition Restriction Enzyme Component from rebase. @@ -20,9 +126,9 @@ def rebase_restriction_enzyme(name: str, **kwargs) -> sbol2.ComponentDefinition: """ definition = f"http://rebase.neb.com/rebase/enz/{name}.html" # TODO: replace with getting the URI from Enzyme when REBASE identifiers become available in biopython 1.8 cd = sbol2.ComponentDefinition(name) - cd.types = sbol2.BIOPAX_PROTEIN + cd.types = [sbol2.BIOPAX_PROTEIN] cd.name = name - cd.roles = ["http://identifiers.org/obi/OBI:0000732"] + cd.roles = [RESTRICTION_ENZYME] cd.wasDerivedFrom = definition cd.description = f"Restriction enzyme {name} from REBASE." return cd @@ -145,12 +251,8 @@ def part_in_backbone_from_sbol( locations=[open_backbone_location1, open_backbone_location2] ) else: - part_in_backbone_component.addRole( - "http://identifiers.org/so/SO:0000988" - ) # circular - part_in_backbone_component.addRole( - "https://identifiers.org/so/SO:0000755" - ) # plasmid vector + part_in_backbone_component.addRole(CIRCULAR) + part_in_backbone_component.addRole(PLASMID_VECTOR) # creating backbone feature open_backbone_location1 = sbol2.Range( uri="backboneloc1", start=1, end=part_location[0] + fusion_site_length - 1 @@ -191,41 +293,31 @@ def is_circular(obj: sbol2.ComponentDefinition) -> bool: :return: true if circular """ return any(n == sbol2.SO_CIRCULAR for n in obj.types) or any( - n == "http://identifiers.org/so/SO:0000637" for n in obj.roles + n == ENGINEERED_PLASMID for n in obj.roles ) # temporarily allowing 'engineered plasmid' role to qualify as circular def part_digestion( - reactant: Union[sbol2.ComponentDefinition, sbol2.ModuleDefinition], - restriction_enzymes: List[sbol2.ComponentDefinition], - assembly_plan: sbol2.ModuleDefinition, + reactant: sbol2.Implementation, + restriction_enzymes: List[sbol2.Implementation], + assembly_activity: sbol2.Activity, document: sbol2.Document, - **kwargs, -) -> Tuple[ - List[Tuple[sbol2.ComponentDefinition, sbol2.Sequence]], sbol2.ModuleDefinition -]: +) -> Tuple[List[Tuple[sbol2.ComponentDefinition, sbol2.Sequence]]]: """Runs a simulated digestion on the top level sequence in the reactant ComponentDefinition or ModuleDefinition with the given restriciton enzymes, creating a extracted part ComponentDefinition, a digestion Interaction, and converts existing scars to 5' and 3' overhangs. - The product ComponentDefinition is assumed the open backbone in this case. + The product ComponentDefinition is assumed the digested part in this case. Written for use with the SBOL2.3 output of https://sbolcanvas.org - :param reactant: DNA to be digested as SBOL ComponentDefinition or ModuleDefinition, usually a part_in_backbone. ComponentDefinition is the best-practice type for plasmids.. + :param reactant: Plasmid DNA to be digested as SBOL ComponentDefinition :param restriction_enzymes: Restriction enzymes as :class:`sbol2.ComponentDefinition` (generate with :func:`rebase_restriction_enzyme`). :param assembly_plan: SBOL ModuleDefinition to contain the functional components, interactions, and participations :param document: original SBOL2 document to be used to extract referenced objects. :return: A tuple of a list ComponentDefinitions and Sequences, and an assembly plan ModuleDefinition. """ - if type(reactant) is sbol2.ModuleDefinition: - # extract component definition from module - reactant_displayId = reactant.functionalComponents[0].displayId - reactant_def_URI = reactant.functionalComponents[0].definition - reactant_component_definition = document.getComponentDefinition( - reactant_def_URI - ) - else: - reactant_displayId = reactant.displayId - reactant_component_definition = reactant + + reactant_component_definition = document.get(reactant.built) + reactant_displayId = reactant_component_definition.displayId types = set(reactant_component_definition.types or []) @@ -237,33 +329,40 @@ def part_digestion( raise ValueError( f"The reactant needs to have precisely one sequence. The input reactant has {len(reactant.sequences)} sequences" ) - participations = [] extracts_list = [] restriction_enzymes_pydna = [] - for re in restriction_enzymes: - enzyme = Restriction.__dict__[re.name] + assembly_activity.usages.add( + sbol2.Usage( + uri=f"{reactant.displayId}", + entity=reactant.identity, + role="http://sbols.org/v2#build", + ) + ) + + for enzyme_implmentation in restriction_enzymes: + enzyme_definition = document.get(enzyme_implmentation.built) + + enzyme = Restriction.__dict__[enzyme_definition.name] restriction_enzymes_pydna.append(enzyme) - enzyme_component = sbol2.FunctionalComponent(uri=f"{re.name}_enzyme") - enzyme_component.definition = re - enzyme_component.displayID = f"{re.name}_enzyme" - enzyme_in_module = False + enzyme_in_activity = False - for comp in assembly_plan.functionalComponents: - if comp.displayId == enzyme_component.displayID: - enzyme_component = comp - enzyme_in_module = True + for usage in assembly_activity.usages: + entity_URI = usage.entity + # entity = document.get(entity_URI) - if not enzyme_in_module: - assembly_plan.functionalComponents.add(enzyme_component) + if entity_URI == enzyme_implmentation.identity: + enzyme_in_activity = True - modifier_participation = sbol2.Participation(uri="restriction") - modifier_participation.participant = enzyme_component - modifier_participation.roles = [ - "http://identifiers.org/biomodels.sbo/SBO:0000019" - ] - participations.append(modifier_participation) + if not enzyme_in_activity: + assembly_activity.usages.add( + sbol2.Usage( + uri=f"{enzyme_definition.name}_enzyme", + entity=enzyme_implmentation.identity, + role="http://sbols.org/v2#build", + ) + ) # Inform topology to PyDNA, if not found assuming linear. if is_circular(reactant_component_definition): @@ -284,9 +383,9 @@ def part_digestion( f"Not supported number of products. Found{len(digested_reactant)}" ) elif circular and len(digested_reactant) == 2: - part_extract, backbone = sorted(digested_reactant, key=len) + part_extract, _ = sorted(digested_reactant, key=len) elif linear and len(digested_reactant) == 3: - prefix, part_extract, suffix = digested_reactant + _, part_extract, _ = digested_reactant else: raise ValueError( f"Reactant {reactant_component_definition.displayId} has no valid topology type, with {len(digested_reactant)} digested products, types: {reactant_component_definition.types}, and roles: {reactant_component_definition.roles}" @@ -301,18 +400,19 @@ def part_digestion( ) product_sequence = str(part_extract.seq) prod_component_definition, prod_seq = dna_componentdefinition_with_sequence( - identity=f"{reactant.displayId if isinstance(reactant, sbol2.ComponentDefinition) else reactant.functionalComponents[0].displayId}_extracted_part", + identity=f"{reactant_component_definition.displayId}_extracted_part", sequence=product_sequence, - **kwargs, ) prod_component_definition.wasDerivedFrom = reactant_component_definition.identity extracts_list.append((prod_component_definition, prod_seq)) + # TODO explore how much granulatity in overhang representation is needed to preserve final composite annotations/components + # five prime overhang five_prime_oh_definition = sbol2.ComponentDefinition( uri=f"{reactant_displayId}_five_prime_oh" - ) # TODO: ensure circular type is preserved for sbh visualization - five_prime_oh_definition.addRole("http://identifiers.org/so/SO:0001932") + ) + five_prime_oh_definition.addRole(FIVE_PRIME_OVERHANG) five_prime_oh_location = sbol2.Range( uri="five_prime_oh_location", start=1, end=len(product_5_prime_ss_end) ) @@ -323,7 +423,7 @@ def part_digestion( five_prime_overhang_annotation = sbol2.SequenceAnnotation(uri="five_prime_overhang") five_prime_overhang_annotation.locations.add(five_prime_oh_location) - # extracted part => point straight to part from sbolcanvas + # extracted part => point straight to part of interest part_location = sbol2.Range( uri=f"{reactant_displayId}_part_location", start=len(product_5_prime_ss_end) + 1, @@ -336,7 +436,7 @@ def part_digestion( three_prime_oh_definition = sbol2.ComponentDefinition( uri=f"{reactant_displayId}_three_prime_oh" ) - three_prime_oh_definition.addRole("http://identifiers.org/so/SO:0001933") + three_prime_oh_definition.addRole(THREE_PRIME_OVERHANG) three_prime_oh_location = sbol2.Range( uri="three_prime_oh_location", start=len(product_sequence) - len(product_3_prime_ss_end) + 1, @@ -360,7 +460,7 @@ def part_digestion( original_part_def_URI = "" - # enccode ontologies of overhangs + # enccode ontologies of overhangs (may no longer be necessary) for definition in document.componentDefinitions: for seqURI in definition.sequences: seq = document.getSequence(seqURI) @@ -372,9 +472,7 @@ def part_digestion( ) three_prime_sequence.wasDerivedFrom = seq.identity three_prime_oh_definition.sequences = [three_prime_sequence] - three_prime_oh_definition.types.append( - "http://identifiers.org/so/SO:0000984" - ) # single-stranded for overhangs + three_prime_oh_definition.types.append(SINGLE_STRANDED) extracts_list.append((three_prime_oh_definition, three_prime_sequence)) extracts_list.append((definition, seq)) # add scars to list @@ -391,9 +489,7 @@ def part_digestion( ) five_prime_sequence.wasDerivedFrom = seq.identity five_prime_oh_definition.sequences = [five_prime_sequence] - five_prime_oh_definition.types.append( - "http://identifiers.org/so/SO:0000984" - ) # single-stranded for overhangs + five_prime_oh_definition.types.append(SINGLE_STRANDED) extracts_list.append((five_prime_oh_definition, five_prime_sequence)) extracts_list.append((definition, seq)) @@ -407,53 +503,18 @@ def part_digestion( prod_component_definition.sequenceAnnotations.add(three_prime_overhang_annotation) prod_component_definition.sequenceAnnotations.add(five_prime_overhang_annotation) prod_component_definition.sequenceAnnotations.add(part_extract_annotation) - prod_component_definition.addRole( - "https://identifiers.org/so/SO:0000915" - ) # engineered insert - prod_component_definition.addType("http://identifiers.org/so/SO:0000987") # linear - - # Add reference to part in backbone - reactant_component = sbol2.FunctionalComponent(uri=f"{reactant_displayId}_reactant") - reactant_component.definition = reactant_component_definition - assembly_plan.functionalComponents.add(reactant_component) - - # Create reactant Participation. - reactant_participation = sbol2.Participation(uri=f"{reactant_displayId}_reactant") - reactant_participation.participant = reactant_component - reactant_participation.roles = [sbol2.SBO_REACTANT] - participations.append(reactant_participation) - - prod_component = sbol2.FunctionalComponent( - uri=f"{reactant_displayId}_digestion_product" - ) - prod_component.definition = prod_component_definition - assembly_plan.functionalComponents.add(prod_component) - - product_participation = sbol2.Participation(uri=f"{reactant_displayId}_product") - product_participation.participant = prod_component - product_participation.roles = [sbol2.SBO_PRODUCT] - participations.append(product_participation) - - # Make Interaction - interaction = sbol2.Interaction( - uri=f"{reactant_displayId}_digestion_interaction", - interaction_type="http://identifiers.org/biomodels.sbo/SBO:0000178", - ) - interaction.participations = participations - assembly_plan.interactions.add(interaction) + prod_component_definition.addRole(ENGINEERED_INSERT) + prod_component_definition.addType(LINEAR) - return extracts_list, assembly_plan + return extracts_list, assembly_activity def backbone_digestion( - reactant: Union[sbol2.ComponentDefinition, sbol2.ModuleDefinition], - restriction_enzymes: List[sbol2.ComponentDefinition], - assembly_plan: sbol2.ModuleDefinition, + reactant: sbol2.Implementation, + restriction_enzymes: List[sbol2.Implementation], + assembly_activity: sbol2.Activity, document: sbol2.Document, - **kwargs, -) -> Tuple[ - List[Tuple[sbol2.ComponentDefinition, sbol2.Sequence]], sbol2.ModuleDefinition -]: +) -> Tuple[List[Tuple[sbol2.ComponentDefinition, sbol2.Sequence]]]: """Runs a simulated digestion on the top level sequence in the reactant ComponentDefinition or ModuleDefinition with the given restriciton enzymes, creating an open backbone ComponentDefinition, a digestion Interaction, and converts existing scars to 5' and 3' overhangs. The product ComponentDefinition is assumed the open backbone in this case. @@ -466,16 +527,8 @@ def backbone_digestion( :param document: original SBOL2 document to be used to extract referenced objects. :return: A tuple of a list ComponentDefinitions and Sequences, and an assembly plan ModuleDefinition. """ - if type(reactant) is sbol2.ModuleDefinition: - # extract component definition from module - reactant_displayId = reactant.functionalComponents[0].displayId - reactant_def_URI = reactant.functionalComponents[0].definition - reactant_component_definition = document.getComponentDefinition( - reactant_def_URI - ) - else: - reactant_displayId = reactant.displayId - reactant_component_definition = reactant + reactant_component_definition = document.get(reactant.built) + reactant_displayId = reactant_component_definition.displayId types = set(reactant_component_definition.types or []) @@ -487,33 +540,40 @@ def backbone_digestion( raise ValueError( f"The reactant needs to have precisely one sequence. The input reactant has {len(reactant.sequences)} sequences" ) - participations = [] extracts_list = [] restriction_enzymes_pydna = [] - for re in restriction_enzymes: - enzyme = Restriction.__dict__[re.name] + assembly_activity.usages.add( + sbol2.Usage( + uri=f"{reactant.displayId}", + entity=reactant.identity, + role="http://sbols.org/v2#build", + ) + ) + + for enzyme_implmentation in restriction_enzymes: + enzyme_definition = document.get(enzyme_implmentation.built) + + enzyme = Restriction.__dict__[enzyme_definition.name] restriction_enzymes_pydna.append(enzyme) - enzyme_component = sbol2.FunctionalComponent(uri=f"{re.name}_enzyme") - enzyme_component.definition = re - enzyme_component.displayID = f"{re.name}_enzyme" - enzyme_in_module = False + enzyme_in_activity = False - for comp in assembly_plan.functionalComponents: - if comp.displayId == enzyme_component.displayID: - enzyme_component = comp - enzyme_in_module = True + for usage in assembly_activity.usages: + entity_URI = usage.entity + # entity = document.get(entity_URI) - if not enzyme_in_module: - assembly_plan.functionalComponents.add(enzyme_component) + if entity_URI == enzyme_implmentation.identity: + enzyme_in_activity = True - modifier_participation = sbol2.Participation(uri="restriction") - modifier_participation.participant = enzyme_component - modifier_participation.roles = [ - "http://identifiers.org/biomodels.sbo/SBO:0000019" - ] # modifier - participations.append(modifier_participation) + if not enzyme_in_activity: + assembly_activity.usages.add( + sbol2.Usage( + uri=f"{enzyme_definition.name}_enzyme", + entity=enzyme_implmentation.identity, + role="http://sbols.org/v2#build", + ) + ) # Inform topology to PyDNA, if not found assuming linear. if is_circular(reactant_component_definition): @@ -527,21 +587,21 @@ def backbone_digestion( reactant_seq = document.getSequence(reactant_seq).elements # Dseqrecord is from PyDNA package with reactant sequence ds_reactant = Dseqrecord(reactant_seq, circular=circular) - digested_reactant = ds_reactant.cut( - restriction_enzymes_pydna - ) # TODO see if ds_reactant.cut is working, causing problems downstream + digested_reactant = ds_reactant.cut(restriction_enzymes_pydna) if len(digested_reactant) < 2 or len(digested_reactant) > 3: raise ValueError( f"Not supported number of products. Found: {len(digested_reactant)}" - ) # TODO make more specific for buildplanner + ) # TODO select them based on content rather than size. elif circular and len(digested_reactant) == 2: - part_extract, backbone = sorted(digested_reactant, key=len) + _, backbone = sorted(digested_reactant, key=len) elif linear and len(digested_reactant) == 3: prefix, part_extract, suffix = digested_reactant else: - raise ValueError("The reactant has no valid topology type") + raise ValueError( + f"Reactant {reactant_component_definition.displayId} has no valid topology type, with {len(digested_reactant)} digested products, types: {reactant_component_definition.types}, and roles: {reactant_component_definition.roles}" + ) # Compute the length of single strand sticky ends or fusion sites product_5_prime_ss_strand, product_5_prime_ss_end = backbone.seq.five_prime_end() @@ -550,7 +610,6 @@ def backbone_digestion( prod_backbone_definition, prod_seq = dna_componentdefinition_with_sequence( identity=f"{reactant_component_definition.displayId}_extracted_backbone", sequence=product_sequence, - **kwargs, ) prod_backbone_definition.wasDerivedFrom = reactant_component_definition.identity extracts_list.append((prod_backbone_definition, prod_seq)) @@ -558,10 +617,8 @@ def backbone_digestion( # five prime overhang five_prime_oh_definition = sbol2.ComponentDefinition( uri=f"{reactant_displayId}_five_prime_oh" - ) # TODO: ensure circular type is preserved for sbh visualization - five_prime_oh_definition.addRole( - "http://identifiers.org/so/SO:0001932" - ) # overhang 5 prime + ) + five_prime_oh_definition.addRole(FIVE_PRIME_OVERHANG) five_prime_oh_location = sbol2.Range( uri="five_prime_oh_location", start=1, end=len(product_5_prime_ss_end) ) @@ -587,9 +644,7 @@ def backbone_digestion( three_prime_oh_definition = sbol2.ComponentDefinition( uri=f"{reactant_displayId}_three_prime_oh" ) - three_prime_oh_definition.addRole( - "http://identifiers.org/so/SO:0001933" - ) # overhang 3 prime + three_prime_oh_definition.addRole(THREE_PRIME_OVERHANG) three_prime_oh_location = sbol2.Range( uri="three_prime_oh_location", start=len(product_sequence) - len(product_3_prime_ss_end) + 1, @@ -626,9 +681,7 @@ def backbone_digestion( ) three_prime_sequence.wasDerivedFrom = seq.identity three_prime_oh_definition.sequences = [three_prime_sequence] - three_prime_oh_definition.types.append( - "http://identifiers.org/so/SO:0000984" - ) # single-stranded for overhangs + three_prime_oh_definition.types.append(SINGLE_STRANDED) extracts_list.append((three_prime_oh_definition, three_prime_sequence)) extracts_list.append((definition, seq)) # add scars to list @@ -645,9 +698,7 @@ def backbone_digestion( ) five_prime_sequence.wasDerivedFrom = seq.identity five_prime_oh_definition.sequences = [five_prime_sequence] - five_prime_oh_definition.types.append( - "http://identifiers.org/so/SO:0000984" - ) # single-stranded for overhangs + five_prime_oh_definition.types.append(SINGLE_STRANDED) extracts_list.append((five_prime_oh_definition, five_prime_sequence)) extracts_list.append((definition, seq)) @@ -661,45 +712,9 @@ def backbone_digestion( prod_backbone_definition.sequenceAnnotations.add(three_prime_overhang_annotation) prod_backbone_definition.sequenceAnnotations.add(five_prime_overhang_annotation) prod_backbone_definition.sequenceAnnotations.add(backbone_extract_annotation) - prod_backbone_definition.addRole("https://identifiers.org/so/SO:0000755") - - # Add reference to part in backbone - reactant_component = sbol2.FunctionalComponent( - uri=f"{reactant_component_definition.displayId}_backbone_reactant" - ) - reactant_component.definition = reactant_component_definition - assembly_plan.functionalComponents.add(reactant_component) - - # Create reactant Participation. - reactant_participation = sbol2.Participation( - uri=f"{reactant_component_definition.displayId}_backbone_reactant" - ) - reactant_participation.participant = reactant_component - reactant_participation.roles = [sbol2.SBO_REACTANT] - participations.append(reactant_participation) - - prod_component = sbol2.FunctionalComponent( - uri=f"{reactant_component_definition.displayId}_backbone_digestion_product" - ) - prod_component.definition = prod_backbone_definition - assembly_plan.functionalComponents.add(prod_component) - - product_participation = sbol2.Participation( - uri=f"{reactant_component_definition.displayId}_backbone_product" - ) - product_participation.participant = prod_component - product_participation.roles = [sbol2.SBO_PRODUCT] - participations.append(product_participation) - - # Make Interaction - interaction = sbol2.Interaction( - uri=f"{reactant_component_definition.displayId}_digestion_interaction", - interaction_type="http://identifiers.org/biomodels.sbo/SBO:0000178", - ) - interaction.participations = participations - assembly_plan.interactions.add(interaction) + prod_backbone_definition.addRole(PLASMID_VECTOR) - return extracts_list, assembly_plan + return extracts_list, assembly_activity def number_to_suffix(n): @@ -719,34 +734,28 @@ def number_to_suffix(n): def ligation( reactants: List[sbol2.ComponentDefinition], - assembly_plan: sbol2.ModuleDefinition, - document: sbol2.Document, - ligase: sbol2.ComponentDefinition = None, -) -> List[Tuple[sbol2.ComponentDefinition, sbol2.Sequence]]: + assembly_activity: sbol2.Activity, + source_document: sbol2.Document, + final_document: sbol2.Document, + ligase: sbol2.Implementation, +) -> List[sbol2.Implementation]: """Ligates Components using base complementarity and creates product Components and a ligation Interaction. :param reactants: DNA parts to be ligated as SBOL ModuleDefinition. - :param assembly_plan: SBOL ModuleDefinition to contain the functional components, interactions, and participants + :param assembly_activity: SBOL activity to track assembly inputs & outputs :param document: SBOL2 document containing all reactant ComponentDefinitions. - :param ligase: as SBOL ComponentDefinition, optional (defaults to T4 ligase) + :param ligase: as SBOL Implementation :return: List of all composites generated, in the form of tuples of ComponentDefinition and Sequence. """ - if ligase is None: - ligase = sbol2.ComponentDefinition(uri="T4_Ligase") - ligase.name = "T4_Ligase" - ligase.types = sbol2.BIOPAX_PROTEIN - document.add(ligase) - - ligase_component = sbol2.FunctionalComponent(uri="T4_Ligase") - ligase_component.definition = ligase - ligase_component.roles = ["http://identifiers.org/ncit/NCIT:C16796"] - assembly_plan.functionalComponents.add(ligase_component) - - modifier_participation = sbol2.Participation(uri="ligation") - modifier_participation.participant = ligase_component - modifier_participation.roles = [ - "http://identifiers.org/biomodels.sbo/SBO:0000019" - ] # modifier + enzyme_definition = source_document.get(ligase.built) + + assembly_activity.usages.add( + sbol2.Usage( + uri=f"{enzyme_definition.name}", + entity=ligase.identity, + role="http://sbols.org/v2#build", + ) + ) # Create a dictionary that maps each first and last 4 letters to a list of strings that have those letters. reactant_parts = [] @@ -773,9 +782,9 @@ def ligation( raise ValueError( f"Fusion sites of different length within the same part. Check {reactant.identity}" ) - if "https://identifiers.org/so/SO:0000755" in reactant.roles: + if PLASMID_VECTOR in reactant.roles: reactant_parts.append(reactant) - elif "https://identifiers.org/so/SO:0000915" in reactant.roles: + elif ENGINEERED_INSERT in reactant.roles: reactant_parts.append(reactant) else: raise ValueError(f"Part {reactant.identity} does not have a valid role") @@ -785,10 +794,14 @@ def ligation( for reactant in reactant_parts: reactant_seq = reactant.sequences[0] first_four_letters = ( - document.getSequence(reactant_seq).elements[:fusion_site_length].lower() + source_document.getSequence(reactant_seq) + .elements[:fusion_site_length] + .lower() ) last_four_letters = ( - document.getSequence(reactant_seq).elements[-fusion_site_length:].lower() + source_document.getSequence(reactant_seq) + .elements[-fusion_site_length:] + .lower() ) part_syntax = f"{first_four_letters}_{last_four_letters}" if part_syntax not in groups: @@ -804,7 +817,7 @@ def ligation( for combination in list_of_parts_per_combination: list_of_parts_per_composite = [combination[0]] insert_sequence_uri = combination[0].sequences[0] - insert_sequence = document.getSequence(insert_sequence_uri).elements + insert_sequence = source_document.getSequence(insert_sequence_uri).elements remaining_parts = list(combination[1:]) it = 1 while remaining_parts: @@ -813,14 +826,14 @@ def ligation( # match insert sequence 5' to part 3' part_sequence_uri = part.sequences[0] if ( - document.getSequence(part_sequence_uri) + source_document.getSequence(part_sequence_uri) .elements[:fusion_site_length] .lower() == insert_sequence[-fusion_site_length:].lower() ): insert_sequence = ( insert_sequence[:-fusion_site_length] - + document.getSequence(part_sequence_uri).elements + + source_document.getSequence(part_sequence_uri).elements ) list_of_parts_per_composite.append( part @@ -828,13 +841,13 @@ def ligation( remaining_parts.remove(part) # match insert sequence 3' to part 5' elif ( - document.getSequence(part_sequence_uri) + source_document.getSequence(part_sequence_uri) .elements[-fusion_site_length:] .lower() == insert_sequence[:fusion_site_length].lower() ): insert_sequence = ( - document.getSequence(part_sequence_uri).elements + source_document.getSequence(part_sequence_uri).elements + insert_sequence[fusion_site_length:] ) list_of_parts_per_composite.insert(0, part) @@ -851,85 +864,84 @@ def ligation( list_of_composites_per_assembly.append(list_of_parts_per_composite) # transform list_of_parts_per_assembly into list of composites - products_list = [] - participations = [] + product_impl_list = [] composite_number = 1 - participations.append(modifier_participation) # TODO: use componentinstances to append "subcomponents" to each definition that is a composite component. all composites share the "subcomponents" for composite in list_of_composites_per_assembly: # a composite of the form [A,B,C] # calculate sequence composite_sequence_str = "" - participations = [] prev_three_prime = ( composite[len(composite) - 1].components[1].definition ) # componentdefinitionuri - prev_three_prime_definition = document.getComponentDefinition(prev_three_prime) - scar_index = 1 + prev_three_prime_definition = source_document.getComponentDefinition( + prev_three_prime + ) anno_list = [] part_extract_definitions = [] for part_extract in composite: part_extract_sequence_uri = part_extract.sequences[0] - part_extract_sequence = document.getSequence( + part_extract_sequence = source_document.getSequence( part_extract_sequence_uri ).elements temp_extract_components = [] - reactant_component = sbol2.FunctionalComponent( - uri=f"{part_extract.displayId}_reactant" - ) - reactant_component.definition = part_extract # TODO do not make new components, instead derive product functionalcomponents from the assembly_plan moduledefinition to add to the ligation interaction/participation - for fc in assembly_plan.functionalComponents: - if fc.definition == reactant_component.definition: - reactant_component = fc - - reactant_participation = sbol2.Participation( - uri=f"{part_extract.displayId}_ligation" - ) - reactant_participation.participant = reactant_component - reactant_participation.roles = [sbol2.SBO_REACTANT] - participations.append(reactant_participation) for comp in part_extract.components: if ( - "http://identifiers.org/so/SO:0001932" - in document.getComponentDefinition(comp.definition).roles - ): # five prime + FIVE_PRIME_OVERHANG + in source_document.getComponentDefinition(comp.definition).roles + ): + sequence = source_document.getSequence( + prev_three_prime_definition.sequences[0] + ).elements + + fusion_site = None + + for ( + key, + seq, + ) in ( + FUSION_SITES.items() + ): # TODO error handling for fusion site not found? + if seq == sequence.upper(): + fusion_site = key + scar_definition = sbol2.ComponentDefinition( - uri=f"Ligation_Scar_{number_to_suffix(scar_index)}" + uri=f"Ligation_Scar_{fusion_site}" ) scar_sequence = sbol2.Sequence( - uri=f"Ligation_Scar_{number_to_suffix(scar_index)}_sequence", - elements=document.getSequence( - prev_three_prime_definition.sequences[0] - ).elements, + uri=f"Ligation_Scar_{fusion_site}_sequence", + elements=sequence, ) scar_definition.sequences = [scar_sequence] scar_definition.wasDerivedFrom = [comp.definition, prev_three_prime] - scar_definition.roles = ["http://identifiers.org/so/SO:0001953"] + scar_definition.roles = [RESTRICTION_ENZYME_ASSEMBLY_SCAR] temp_extract_components.append(scar_definition.identity) - add_object_to_doc(scar_definition, document) - add_object_to_doc(scar_sequence, document) + add_object_to_doc(scar_definition, source_document) + add_object_to_doc(scar_sequence, source_document) + + add_object_to_doc(scar_definition, final_document) + add_object_to_doc(scar_sequence, final_document) scar_location = sbol2.Range( - uri=f"Ligation_Scar_{number_to_suffix(scar_index)}_location", + uri=f"Ligation_Scar_{fusion_site}_location", start=len(composite_sequence_str) + 1, end=len(composite_sequence_str) + fusion_site_length, ) scar_anno = sbol2.SequenceAnnotation( - uri=f"Ligation_Scar_{number_to_suffix(scar_index)}_annotation" + uri=f"Ligation_Scar_{fusion_site}_annotation" ) scar_anno.locations.add(scar_location) anno_list.append(scar_anno) - scar_index += 1 elif ( - "http://identifiers.org/so/SO:0001933" - in document.getComponentDefinition(comp.definition).roles + THREE_PRIME_OVERHANG + in source_document.getComponentDefinition(comp.definition).roles ): # three prime prev_three_prime = comp.definition - prev_three_prime_definition = document.getComponentDefinition( - prev_three_prime + prev_three_prime_definition = ( + source_document.getComponentDefinition(prev_three_prime) ) else: temp_extract_components.append(comp.definition) @@ -958,45 +970,44 @@ def ligation( ) ) composite_component_definition.name = f"composite_{composite_number}" - composite_component_definition.addRole( - "http://identifiers.org/so/SO:0000804" - ) # engineered region - composite_component_definition.addType("http://identifiers.org/so/SO:0000988") + composite_component_definition.addRole(ENGINEERED_REGION) + composite_component_definition.addType(CIRCULAR) + + prev_part_extract = None for i, definition in enumerate(part_extract_definitions): - def_object = document.getComponentDefinition(definition) + def_object = source_document.getComponentDefinition(definition) comp = sbol2.Component(uri=def_object.displayId) comp.definition = definition - composite_component_definition.components.add(comp) + composite_component_definition.components.add(comp) anno_list[i].component = comp + if prev_part_extract: + _create_precedes_restriction( + composite_component_definition, prev_part_extract, comp + ) + + prev_part_extract = comp + + # _create_precedes_restriction(composite_component_definition, prev_part_extract, composite_component_definition.components[0]) # final component precedes first component; defining circular order + composite_component_definition.sequenceAnnotations = anno_list - prod_functional_component = sbol2.FunctionalComponent( - uri=f"{composite_component_definition.name}" + composite_implementation = sbol2.Implementation( + f"{composite_component_definition.displayId}_impl" ) - prod_functional_component.definition = composite_component_definition - assembly_plan.functionalComponents.add(prod_functional_component) + composite_implementation.built = composite_component_definition.identity + composite_implementation.wasGeneratedBy = assembly_activity.identity - product_participation = sbol2.Participation( - uri=f"{composite_component_definition.name}_product" - ) - product_participation.participant = prod_functional_component - product_participation.roles = [sbol2.SBO_PRODUCT] - participations.append(product_participation) - - # Make Interaction - interaction = sbol2.Interaction( - uri=f"{composite_component_definition.name}_ligation_interaction", - interaction_type="http://identifiers.org/biomodels.sbo/SBO:0000695", + final_document.add_list( + [composite_component_definition, composite_seq, composite_implementation] ) - interaction.participations = participations - assembly_plan.interactions.add(interaction) - products_list.append([composite_component_definition, composite_seq]) + product_impl_list.append(composite_implementation) composite_number += 1 - return products_list + + return product_impl_list # TODO instead of returning list of products CDs to append to doc, append all CDs and return list of their implementations def append_extracts_to_doc( @@ -1010,7 +1021,6 @@ def append_extracts_to_doc( """ for extract, sequence in extract_tuples: try: - print("adding: " + extract.displayId) add_object_to_doc(extract, doc) add_object_to_doc(sequence, doc) except Exception as e: @@ -1033,78 +1043,36 @@ def add_object_to_doc( raise e -class golden_gate_assembly_plan: - """Creates an Assembly Plan. - - :param name: Name of the assembly plan ModuleDefinition. - :param parts_in_backbone: Parts in backbone to be assembled. - :param plasmid_acceptor_backbone: Backbone in which parts are inserted on the assembly. - :param restriction_enzyme: Restriction enzyme name used by PyDNA. Case sensitive, follow standard restriction enzyme nomenclature, i.e. 'BsaI' - :param document: SBOL Document where the assembly plan will be created. - """ - - def __init__( - self, - name: str, - parts_in_backbone: List[sbol2.Document], - plasmid_acceptor_backbone: sbol2.Document, - restriction_enzyme: str, - document: sbol2.Document, - ): - self.name = name - self.parts_in_backbone = parts_in_backbone - self.backbone = plasmid_acceptor_backbone - self.restriction_enzyme = rebase_restriction_enzyme(restriction_enzyme) - self.extracted_parts = [] # list of tuples [ComponentDefinition, Sequence] - self.document = document - - self.assembly_plan = sbol2.ModuleDefinition(name) - self.document.add(self.assembly_plan) - self.document.add(self.restriction_enzyme) - self.composites = [] +def initialize_assembly_activity(): + activity = sbol2.Activity("assembly") - def run( - self, plasmids_in_module_definitions=False - ) -> List[Tuple[sbol2.ComponentDefinition, sbol2.Sequence]]: - """Runs full assembly simulation. + activity.name = "DNA Assembly" + activity.types = "http://sbols.org/v2#build" - `document` parameter of golden_gate_assembly_plan object is updated by reference to include assembly plan ModuleDefinition and all related information. + activity_association = sbol2.Association("assemble_") - Runs :func:`part_digestion` for all `parts_in_backbone` and :func:`backbone_digestion` for `plasmid_acceptor_backbone` with `restriction_enzyme`. Then runs :func:`ligation` with these parts to form composites. + assembly_plan = sbol2.Plan("assembly_plan") - :return: List of all composites generated, in the form of tuples of ComponentDefinition and Sequence. - """ - for part_doc in self.parts_in_backbone: - if plasmids_in_module_definitions: - topLevel = part_doc.getModuleDefinition( - "https://sbolcanvas.org/module1" - ) # TODO change to toplevel or some other index? - else: - topLevel = part_doc.componentDefinitions[0] - extracts_tuple_list, _ = part_digestion( - topLevel, [self.restriction_enzyme], self.assembly_plan, part_doc - ) # make sure assembly plan is pass-by-reference + assembly_plan.description = "MoClo DNA Assembly With Opentrons OT2" - append_extracts_to_doc(extracts_tuple_list, self.document) - self.extracted_parts.append(extracts_tuple_list[0][0]) + activity_association.plan = assembly_plan - if plasmids_in_module_definitions: - topLevel = self.backbone.getModuleDefinition( - "https://sbolcanvas.org/module1" - ) # TODO change to toplevel or some other index? - else: - topLevel = self.backbone.componentDefinitions[0] - extracts_tuple_list, _ = backbone_digestion( - topLevel, [self.restriction_enzyme], self.assembly_plan, self.backbone - ) + activity_agent = sbol2.Agent("BuildCompiler") + activity_association.agent = activity_agent - append_extracts_to_doc(extracts_tuple_list, self.document) - self.extracted_parts.append(extracts_tuple_list[0][0]) + activity.associations = [activity_association] - self.composites = ligation( - self.extracted_parts, self.assembly_plan, self.document - ) + return activity - append_extracts_to_doc(self.composites, self.document) - return self.composites +def _create_precedes_restriction( + parent_definition: sbol2.ComponentDefinition, + subject: sbol2.Component, + object: sbol2.Component, +): + constraint = parent_definition.sequenceConstraints.create( + f"{object.displayId}_{subject.displayId}" + ) + constraint.subject = subject + constraint.object = object + constraint.restriction = sbol2.SBOL_RESTRICTION_PRECEDES diff --git a/src/buildcompiler/transformation.py b/src/buildcompiler/transformation.py new file mode 100644 index 0000000..5442544 --- /dev/null +++ b/src/buildcompiler/transformation.py @@ -0,0 +1,81 @@ +import sbol2 +from typing import List + +from buildcompiler.buildcompiler import Plasmid + + +def bacterial_transformation( + plasmids: List[Plasmid], + chassis_impl: sbol2.Implementation, + chassis_md: sbol2.ModuleDefinition, # TODO change to impl + transformation_doc: sbol2.Document, +): + for plasmid in plasmids: + plasmid_impl = plasmid.plasmid_implementations[ + 0 + ] # TODO update with more sophisticated selection process? + plasmid_cd = plasmid.plasmid_definition + + transformation_activity = sbol2.Activity(f"transform_{chassis_md.name}") + transformation_activity.name = "Bacterial Tranformation" + transformation_activity.types = "http://sbols.org/v2#build" + + chassis_usage = sbol2.Usage( + uri=f"{chassis_md.name}_chassis", + entity=chassis_impl.identity, + role="http://sbols.org/v2#build", + ) + + plasmid_usage = sbol2.Usage( + uri=f"{plasmid_cd.name}_plasmid_source", + entity=plasmid_impl.identity, + role="http://sbols.org/v2#build", + ) + + transformation_activity.usages = [chassis_usage, plasmid_usage] + + new_strain = sbol2.ModuleDefinition(f"{chassis_md.name}_with_{plasmid_cd.name}") + chassis_module = sbol2.Module(uri=f"{chassis_md.name}_chassis") + chassis_module.definition = chassis_md.identity + plasmid_functional_component = sbol2.FunctionalComponent( + uri=f"{plasmid_cd.name}_engineered_plasmid" + ) + plasmid_functional_component.definition = plasmid_cd.identity + + new_strain.modules = [chassis_module] + new_strain.functionalComponents = [plasmid_functional_component] + + transformation_activity_association = sbol2.Association( + f"transform_{chassis_md.name}" + ) + + transformation_activity_plan = sbol2.Plan( + f"{new_strain.displayId}_transformation_plan" + ) + transformation_activity_plan.description = ( + "TODO: generate accurate description of transformation" + ) + transformation_activity_association.plan = transformation_activity_plan + + transformation_activity_agent = sbol2.Agent("BuildCompiler") + transformation_activity_association.agent = transformation_activity_agent + + transformation_activity.associations = [transformation_activity_association] + + new_strain_impl = sbol2.Implementation(f"{new_strain.displayId}_impl") + + new_strain_impl.built = new_strain.identity + new_strain_impl.wasGeneratedBy = transformation_activity.identity + + transformation_doc.add_list( + [ + new_strain_impl, + transformation_activity, + chassis_md, + chassis_usage, + chassis_module, + new_strain, + plasmid_functional_component, + transformation_activity_plan, + ] + )