From de398edafd83317edb2e132661df466988af18c5 Mon Sep 17 00:00:00 2001
From: Matthieu NAJM <matthieu.najm@pasteur.fr>
Date: Mon, 16 Mar 2026 15:43:12 +0100
Subject: [PATCH 01/15] deleted last previous mapping file of open_targets

---
 .../adapters/open_targets_diseases.yaml       | 30 -------------------
 1 file changed, 30 deletions(-)
 delete mode 100644 oncodashkb/adapters/open_targets_diseases.yaml

diff --git a/oncodashkb/adapters/open_targets_diseases.yaml b/oncodashkb/adapters/open_targets_diseases.yaml
deleted file mode 100644
index e290608..0000000
--- a/oncodashkb/adapters/open_targets_diseases.yaml
+++ /dev/null
@@ -1,30 +0,0 @@
-row:
-   rowIndex:
-      to_subject: id
-transformers:
-    - map:
-        columns:
-            - id
-        to_object: disease
-        via_relation: disease_to_id
-    - map:
-        columns:
-            - name
-        to_property:
-            - name
-        for_objects:
-            - disease
-    - map:
-        columns:
-            - description
-        to_property:
-            - description
-        for_objects:
-            - disease
-    - map:
-        columns:
-            - code
-        to_property:
-            - code
-        for_objects:
-            - disease

From 8f7530dbc101c75fd90cb6e0225767809f0ab85c Mon Sep 17 00:00:00 2001
From: Matthieu NAJM <matthieu.najm@pasteur.fr>
Date: Mon, 16 Mar 2026 16:07:20 +0100
Subject: [PATCH 02/15] remove unnecessary files

---
 oncodashkb/adapters/Ensembl_genes.conf     | 67 ----------------------
 oncodashkb/adapters/Hugo_Symbol_genes.conf |  9 ---
 2 files changed, 76 deletions(-)
 delete mode 100644 oncodashkb/adapters/Ensembl_genes.conf
 delete mode 100644 oncodashkb/adapters/Hugo_Symbol_genes.conf

diff --git a/oncodashkb/adapters/Ensembl_genes.conf b/oncodashkb/adapters/Ensembl_genes.conf
deleted file mode 100644
index c595cc8..0000000
--- a/oncodashkb/adapters/Ensembl_genes.conf
+++ /dev/null
@@ -1,67 +0,0 @@
-'ENSG00000100311',
-                        'ENSG00000140538',
-                        'ENSG00000101972',
-                        'ENSG00000107485',
-                        'ENSG00000141510',
-                        'ENSG00000171456',
-                        'ENSG00000136997',
-                        'ENSG00000099956',
-                        'ENSG00000157168',
-                        'ENSG00000104884',
-                        'ENSG00000112679',
-                        'ENSG00000169032',
-                        'ENSG00000115524',
-                        'ENSG00000187266',
-                        'ENSG00000119772',
-                        'ENSG00000139083',
-                        'ENSG00000172175',
-                        'ENSG00000113916',
-                        'ENSG00000171094',
-                        'ENSG00000121879',
-                        'ENSG00000141736',
-                        'ENSG00000109670',
-                        'ENSG00000073282',
-                        'ENSG00000127528',
-                        'ENSG00000133703',
-                        'ENSG00000138376',
-                        'ENSG00000066468',
-                        'ENSG00000179218',
-                        'ENSG00000156531',
-                        'ENSG00000183765',
-                        'ENSG00000149311',
-                        'ENSG00000169249',
-                        'ENSG00000120217',
-                        'ENSG00000245848',
-                        'ENSG00000096968',
-                        'ENSG00000023445',
-                        'ENSG00000105976',
-                        'ENSG00000068078',
-                        'ENSG00000147889',
-                        'ENSG00000178573',
-                        'ENSG00000182054',
-                        'ENSG00000139163',
-                        'ENSG00000097007',
-                        'ENSG00000174775',
-                        'ENSG00000012048',
-                        'ENSG00000157764',
-                        'ENSG00000100393',
-                        'ENSG00000157873',
-                        'ENSG00000168685',
-                        'ENSG00000183337',
-                        'ENSG00000085224',
-                        'ENSG00000071564',
-                        'ENSG00000105397',
-                        'ENSG00000152217',
-                        'ENSG00000185920',
-                        'ENSG00000106462',
-                        'ENSG00000205755',
-                        'ENSG00000197646',
-                        'ENSG00000091831',
-                        'ENSG00000292363',
-                        'ENSG00000148400',
-                        'ENSG00000135679',
-                        'ENSG00000138413',
-                        'ENSG00000171791',
-                        'ENSG00000077782',
-                        'ENSG00000137265',
-                        'ENSG00000187741'
\ No newline at end of file
diff --git a/oncodashkb/adapters/Hugo_Symbol_genes.conf b/oncodashkb/adapters/Hugo_Symbol_genes.conf
deleted file mode 100644
index 32e8631..0000000
--- a/oncodashkb/adapters/Hugo_Symbol_genes.conf
+++ /dev/null
@@ -1,9 +0,0 @@
-'MET', 'BRAF', 'EZH2', 'CDKN2A', 'ETV6', 'ETNK1', 'KRAS', 'NTRK3',
-'IDH2', 'MAF', 'BRCA1', 'TP53', 'BCOR', 'FGFR1', 'MYC', 'JAK2',
-'CD274', 'PDCD1LG2', 'PIK3CA', 'BCL6', 'TP63', 'IL7R', 'MDM2',
-'SETBP1', 'FBXW7', 'ABL1', 'MAP2K1', 'TYK2', 'EPOR', 'ERCC2',
-'SMARCB1', 'CHEK2', 'PDGFB', 'EP300', 'STAG2', 'PHF6', 'FGFR2',
-'FGFR3', 'NRG1', 'GATA3', 'HRAS', 'ERBB2', 'BCL2', 'TCF3', 'CEBPA',
-'CRLF2', 'ZRSR2', 'NOTCH1', 'TNFRSF14', 'BARD1', 'ESR1', 'PTCH1',
-'FANCA', 'KLF2', 'MALT1', 'CALR', 'DNMT3A', 'ALK', 'SF3B1', 'IDH1',
-'DUSP22', 'IRF4', 'BIRC3', 'ATM', 'ASXL1', 'ATRX'
\ No newline at end of file

From ba5e83b856725ea0383d1ad9f31cc38b8da313ba Mon Sep 17 00:00:00 2001
From: Matthieu NAJM <matthieu.najm@pasteur.fr>
Date: Mon, 16 Mar 2026 16:31:38 +0100
Subject: [PATCH 03/15] remove old integration of gene ontology

---
 README.md                                     |  53 -------
 config/schema.yaml                            |  25 +---
 oncodashkb/adapters/README.md                 |  29 ----
 oncodashkb/adapters/gene_ontology.py          | 139 ------------------
 oncodashkb/adapters/gene_ontology.yaml        |  36 -----
 .../adapters/gene_ontology_reverse.yaml       |  36 -----
 weave.py                                      |  73 ---------
 7 files changed, 6 insertions(+), 385 deletions(-)
 delete mode 100644 oncodashkb/adapters/README.md
 delete mode 100644 oncodashkb/adapters/gene_ontology.py
 delete mode 100644 oncodashkb/adapters/gene_ontology.yaml
 delete mode 100644 oncodashkb/adapters/gene_ontology_reverse.yaml

diff --git a/README.md b/README.md
index 7e78fb4..73ab27f 100644
--- a/README.md
+++ b/README.md
@@ -224,59 +224,6 @@ the data that you want to integrate.
 ./weave.py –oncokb /path_to_file/test_genomics_oncokbannotation.csv
 ```
 
-
-### Gene Ontology adapter
-
-**Gene Ontology** is one of the biggest biomedical databases. The described
-adapter helps to integrate the data about the molecular function of the gene
-product, as well as the biological process in which these genes are involved.
-
-- Molecular function: GO annotations that have relation type `enabled`
-  or `contributes_to`.
-- Biological process: GO annotations that have relation type `involved_in`.
-
-**To integrate the data, three files are necessary:**
-- `--gene_ontology` option for GO annotations in GAF format  [Download GO annotations](http://current.geneontology.org/products/pages/downloads.html)
-- `--gene_ontology_owl` option for GO ontology in OWL format [Download GO ontology](https://geneontology.org/docs/download-ontology/)
-- `--gene_ontology_genes` option for the list of genes for which we want to
-  integrate the GO annotations (example in adapters/Hugo_Symbol_genes.conf file,
-  by default = list of genes from OncoKB database).
-
-**Example of use:**
-
-``` sh
-./weave.py --gene_ontology /path_to_file/goa_human.gaf --gene_ontology_owl /path_to_file/go.owl --gene_ontology_genes /path_to_file/Hugo_Symbol_genes.conf
-```
-
-If you want to integrate annotations with another type of relations, you can
-modify the `adapters/gene_ontology.py` file by adding the next code in the
-**class Gene_ontology** (example for the `involved_in` edge type):
-
-``` python
-# Create new columns that depends on edge type.
-df['GO_involved_in'] = None
-
-# Cut df to include only edge type that we have chosen and annotations
-# for genes from OncoKB.
-df = df[((df['Qualifier'].isin(['enables', 'involved_in', 'contributes_to'])) &
-         (df['DB_Object_Symbol'].isin(included_genes)))]
-```
-Also, you need to add code in `separate_edges_types` method:
-
-``` sh
-# Function to copy GO_term to related column for future ontoweaver mapping
-# based on Qualifier column (relation type).
-   def separate_edges_types(row):
-        if row['Qualifier'] == 'enables':
-            row['GO_enables'] = row['GO_term']
-        elif row['Qualifier'] == 'involved_in':
-            row['GO_involved_in'] = row['GO_term']
-```
-
-Finally, you need to specify the node and edge types in the `gene_ontology.yaml`
-for `GO_involved_in` column.
-
-
 ### Open Targets adapter
 
 Open Targets is a public database that aims to systematically identify and
diff --git a/config/schema.yaml b/config/schema.yaml
index e6ef1d1..a28150b 100644
--- a/config/schema.yaml
+++ b/config/schema.yaml
@@ -302,10 +302,6 @@ gene status affects gene:
 
 
 ### GO -- TO BE FIXED
-# annotation: 
-#     is_a: named thing
-#     represented_as: node
-#     label_in_input: annotation
 
 biological process:
     is_a: named thing
@@ -314,20 +310,6 @@ biological process:
     properties:
         data_source: str
 
-# annotation for gene: 
-#     is_a: association
-#     represented_as: edge
-#     label_in_input: annotation_for_gene
-#     source: annotation
-#     target: gene
-
-# involved in: 
-#     is_a: association
-#     represented_as: edge
-#     label_in_input: involved_in
-#     source: annotation
-#     target: biological process
-
 gene to biological process:
     is_a: association
     represented_as: edge
@@ -345,9 +327,10 @@ biological process to gene:
     source: biological process
     target: gene
     properties:
-        # edglelabel: str
         data_source: str
 
+### FUNCTIONAL PROTEIN PROTEIN INTERACTIONS
+
 undirected molecular interaction:
     is_a: pairwise molecular interaction
     represented_as: edge
@@ -447,6 +430,8 @@ inhibition:
         extra_attrs: str
         evidences: str
 
+### TRASNCRIPT TO GENE RELATIONSHIP
+
 transcript to gene relationship:
     # is_a: transcript to gene relationship
     represented_as: edge
@@ -454,6 +439,8 @@ transcript to gene relationship:
     properties:
         data_source: str
 
+### DRUG HAS TARGET
+
 drug has target:
     is_a: drug to gene association
     represented_as: edge
diff --git a/oncodashkb/adapters/README.md b/oncodashkb/adapters/README.md
deleted file mode 100644
index b79f2c3..0000000
--- a/oncodashkb/adapters/README.md
+++ /dev/null
@@ -1,29 +0,0 @@
-## Gene Ontology Data Preparation
-
-**Gene Ontology** (GO) is one of the biggest biomedical databases for the annotation of genes and their products across different species. To integrate the data in the Semantic Knowledge Graph (SKG), we use the `GO Annotations file` for Homo Sapiens in `GAF format` [Download page](https://geneontology.org/docs/download-go-annotations/) . Each line in GAF file represents **one annotation** for a gene product and contains **17 columns** (you can read a detailed description of each column [here](https://geneontology.org/docs/go-annotation-file-gaf-format-2.2/])).
-
-Compared to the integration of the CGI and OncoKB databases, where each column represents a concrete data type from Biolink ontology, the GO annotations file contains data type for each annotation (row) in the column 'Qualifier'. For further details regarding different types of relationships, please refer to the following [link](https://wiki.geneontology.org/Annotation_Relations).
-
-To solve the issue concerning data types represented in one column and to make the integrated data in the SKG more clear and easy to understand, the following steps were implemented in the GO adapter:
-- [Download](https://geneontology.org/docs/download-ontology/) the **GO ontology OWL file** to create a dictionary that can map **GO_ID** to **GO_term** cause there is only a **GO_ID** column in the GAF file. 
-- Create a new column **GO_term** using a dictionary and `create_id_term_dict` method.
-- For the chosen type of the relation from the **column 'Qualifier'** (in our case, `enables`, `involved_in`, `contributes_to` relation types) create an additional column (in our case, `GO_enables`, `GO_involved_in`, `GO_contributes_to` columns) and copy the **GO_term** in the related column (see illustration below)
-
-![Schema_columns_GO_adapter](https://github.com/kgaydukova/oncodashkb/assets/23275374/37b23c98-17b6-45bd-ab34-bc4d7fdf72f9)
-
-- Declare data type and relation type in the mapping file `gene_ontology.yaml` for each synthetic additional column (`GO_enables`, `GO_involved_in`, `GO_contributes_to`). 
-
-```yaml
-subject: annotation # Type for each entry (e.g. line).
-
-columns:
-    GO_enables:
-        to_object: molecular_function
-        via_relation: enables
-    GO_involved_in:
-        to_object: biological_process
-        via_relation: involved_in
-    GO_contributes_to:
-        to_object: molecular_function
-        via_relation: contributes_to
-```
diff --git a/oncodashkb/adapters/gene_ontology.py b/oncodashkb/adapters/gene_ontology.py
deleted file mode 100644
index 4b6b7af..0000000
--- a/oncodashkb/adapters/gene_ontology.py
+++ /dev/null
@@ -1,139 +0,0 @@
-import types as pytypes
-import logging
-import ontoweaver
-
-from typing import Optional
-from collections.abc import Iterable
-
-import pandas as pd
-
-from owlready2 import get_ontology
-
-
-class Gene_ontology(ontoweaver.tabular.PandasAdapter):
-
-    def __init__(self,
-                 df: pd.DataFrame,
-                 ontology: str,
-                 genes_list: str,
-                 config: dict,
-                 type_affix=ontoweaver.base.TypeAffixes.none
-                 ):
-
-        # logging.info(" | | In Gene_ontology adapter init")
-        self.ontology = ontology
-        self.genes_list = genes_list
-        assert self.genes_list != None
-
-        # define column names based on the GAF specification
-        columns = ['DB', 'DB_Object_ID', 'DB_Object_Symbol', 'Qualifier', 'GO_ID', 'DB_Reference', 'Evidence_Code',
-                   'With_or_From', 'Aspect', 'DB_Object_Name', 'DB_Object_Synonym', 'DB_Object_Type', 'Taxon', 'Date',
-                   'Assigned_By', 'Annotation_Extension', 'Gene_Product_Form_ID']
-
-        # assign column names to the DataFrame
-        df.columns = columns
-
-        # create dict with GO_id:GO_term
-        logging.info(" | | Load GO taxonomy")
-        dict_go_plus = self.create_id_term_dict()
-
-        # logging.info(" | | Sanitize keys")
-        # DELETE ; and , from terms (values in dictionary) to avoid future errors in CSV for neo4j import
-        for key in dict_go_plus.keys():
-            if ',' in dict_go_plus[key]:
-                dict_go_plus[key] = dict_go_plus[key].replace(',', '')
-            if ';' in dict_go_plus[key]:
-                dict_go_plus[key] = dict_go_plus[key].replace(';', '')
-            if '\'' in dict_go_plus[key]:
-                dict_go_plus[key] = dict_go_plus[key].replace('\'', '')
-
-        # logging.info(" | | Expand data")
-        # create additional column with GO terms (mapped from GO_id)
-        df['GO_term'] = df['GO_ID'].map(lambda go_id: dict_go_plus[go_id])
-
-        # create new columns that depends on edge type
-        df['GO_involved_in'] = None
-        df['GO_enables'] = None
-        df['GO_contributes_to'] = None
-
-        '''
-        List of genes the annotation for which we will integrate from Gene Ontology data,
-        Reading from Hugo_Symbol_genes.conf file
-        By default = genes from OncoKB database
-        '''
-        # logging.info(" | | Read genes list")
-        included_genes = self.read_genes_list()
-        assert len(included_genes) > 0
-
-        # logging.info(" | | Filter out useless edges")
-        # cut df to include only edge type that we have chosen and annotations for genes from OncoKB
-        df = df[((df['Qualifier'].isin(['enables', 'involved_in', 'contributes_to'])) &
-                 (df['DB_Object_Symbol'].isin(included_genes)))]
-        assert len(df) > 0
-
-        # add the GO_term in GO_involved_in, GO_enables, GO_contributes_to columns depending on the edge type in
-        # Qualifier column
-        # logging.info(" | | Separate edge types")
-        df = df.apply(self.separate_edges_types, axis=1)
-        assert len(df) > 0
-
-        # Default mapping as a simple config.
-        # logging.info(" | | Parse data")
-        from . import types
-        parser = ontoweaver.tabular.YamlParser(config, types)
-        mapping = parser()
-
-        # logging.info(" | | Declare types")
-        # Declare types defined in the config.
-        super().__init__(
-            df,
-            *mapping,
-        )
-
-        logging.info(" | | Done Gene_ontology init")
-
-    # function to create a dictionary with GO_id:GO_term for gene ontology, input - OWL file, output - dictionary
-    def create_id_term_dict(self):
-        dict_id_term = {}
-
-        logging.debug(f"Load ontology: {self.ontology}")
-
-        ont = get_ontology(self.ontology).load()
-
-        # iterate through all classes in the ontology
-        for cls in ont.classes():
-            # get the class ID and label (term)
-            class_id = cls.iri # read class_id like http://purl.obolibrary.org/obo/GO_0003674'
-            class_label = cls.label.first() if cls.label else cls.name
-
-            # make the same key as we have in GO annotation files
-            class_id_key = class_id.replace("http://purl.obolibrary.org/obo/GO_", "GO:")
-            # add to dictionary like GO:0003674': 'molecular_function'
-            dict_id_term[class_id_key] = class_label
-
-        return dict_id_term
-
-    def read_genes_list(self):
-
-        # print(self.genes_list=='o')
-
-        with open(self.genes_list, 'r') as file:
-
-            content = file.read()
-            genes = content.replace('\n', '').split(',')
-            genes = [gene.strip().strip("'") for gene in genes]
-            genes = list(filter(None, genes))
-
-        return genes
-
-    # function to copy GO_term to related column for future ontoweaver mapping based on Qualifier column (relation type)
-
-    @staticmethod
-    def separate_edges_types(row):
-        if row['Qualifier'] == 'enables':
-            row['GO_enables'] = row['GO_term']
-        elif row['Qualifier'] == 'involved_in':
-            row['GO_involved_in'] = row['GO_term']
-        elif row['Qualifier'] == 'contributes_to':
-            row['GO_contributes_to'] = row['GO_term']
-        return row
diff --git a/oncodashkb/adapters/gene_ontology.yaml b/oncodashkb/adapters/gene_ontology.yaml
deleted file mode 100644
index d8d4184..0000000
--- a/oncodashkb/adapters/gene_ontology.yaml
+++ /dev/null
@@ -1,36 +0,0 @@
-row:
-   map:
-        column: DB_Object_Symbol
-        to_subject: gene
-transformers:
-    # - map:
-    #     column: DB_Object_Symbol
-    #     to_object: gene
-    #     via_relation: annotation_for_gene
-    # - map:
-    #     column: GO_enables
-    #     to_object: molecular_function
-    #     via_relation: enables
-    # - map:
-    #     columns: GO_involved_in
-    #     to_object: biological_process
-    #     via_relation: involved_in
-    # - map:
-    #     columns: GO_contributes_to
-    #     to_object: molecular_function
-    #     via_relation: contributes_to
-    - map:
-        columns: GO_involved_in
-        to_object: biological_process
-        via_relation: gene_to_biological_process
-    # - map:
-    #     column: GO_contributes_to
-    #     from_subject: gene
-    #     to_object: molecular_function
-    #     via_relation: gene_to_molecular_function
-    - string:
-        value: " "
-        to_property: edgelabel
-        for_objects: gene_to_biological_process
-metadata:
-        - data_source: gene_ontology
diff --git a/oncodashkb/adapters/gene_ontology_reverse.yaml b/oncodashkb/adapters/gene_ontology_reverse.yaml
deleted file mode 100644
index 1c72430..0000000
--- a/oncodashkb/adapters/gene_ontology_reverse.yaml
+++ /dev/null
@@ -1,36 +0,0 @@
-row:
-   map:
-        column: GO_involved_in
-        to_subject: biological_process
-transformers:
-    # - map:
-    #     column: DB_Object_Symbol
-    #     to_object: gene
-    #     via_relation: annotation_for_gene
-    # - map:
-    #     column: GO_enables
-    #     to_object: molecular_function
-    #     via_relation: enables
-    # - map:
-    #     columns: GO_involved_in
-    #     to_object: biological_process
-    #     via_relation: involved_in
-    # - map:
-    #     columns: GO_contributes_to
-    #     to_object: molecular_function
-    #     via_relation: contributes_to
-    - map:
-        columns: DB_Object_Symbol
-        to_object: gene
-        via_relation: biological_process_to_gene
-    # - map:
-    #     column: GO_contributes_to
-    #     from_subject: gene
-    #     to_object: molecular_function
-    #     via_relation: gene_to_molecular_function
-    - string:
-        value: " "
-        to_property: edgelabel
-        for_objects: biological_process_to_gene
-metadata:
-        - data_source: gene_ontology
diff --git a/weave.py b/weave.py
index 89fc028..cd0190b 100755
--- a/weave.py
+++ b/weave.py
@@ -14,7 +14,6 @@
 import biocypher
 
 import ontoweaver
-# import oncodashkb.adapters as od
 from alive_progress import alive_bar
 
 error_codes = {
@@ -129,39 +128,6 @@ def process_OT(directory, name):
 
     return local_nodes, local_edges
 
-
-def process_GO(name):
-    logging.info(f" | Weave {name} data...")
-    # Table input data.
-    logging.info(f" |  | Load {name} data...")
-    df = progress_read(asked.gene_ontology[0], sep='\t', comment='!', header=None, dtype={15: str}, hint=969214)
-
-    logging.info(f" |  | Read {name} mapping...")
-    # Extraction mapping configuration.
-    try:
-        with open(f"./oncodashkb/adapters/{name}.yaml") as fd:
-            conf = yaml.full_load(fd)
-    except Exception as e:
-        logging.error(e)
-        sys.exit(error_codes["CannotAccessFile"])
-
-    logging.info(f" |  | Preprocess {name} data...")
-    manager = od.gene_ontology.Gene_ontology(df, asked.gene_ontology_owl, asked.gene_ontology_genes, conf)
-
-    logging.info(f" |  | Transform {name} data...")
-    local_nodes = []
-    local_edges = []
-    # Use manager.df because Gene_ontology does filter the input dataframe
-    with alive_bar(len(manager.df), file=sys.stderr) as progress:
-        for n,e in manager():
-            local_nodes += n
-            local_edges += e
-            progress()
-
-    return local_nodes, local_edges
-
-
-
 if __name__ == "__main__":
     # TODO add adapter for parquet, one for csv and one that automatically checks filetype.
 
@@ -205,18 +171,6 @@ def process_GO(name):
     parser.add_argument("-c", "--cgi", metavar="CSV", nargs="+",
                         help="Extract from a CGI CSV file.")
 
-    parser.add_argument("-g", "--gene-ontology", metavar="CSV", nargs="+",
-                        help="Extract from a Gene_Ontology_Annotation GAF file.")
-
-    parser.add_argument("-n", "--gene-ontology-owl", metavar="OWL",
-                        help="Download Gene_Ontology owl file.")
-
-    parser.add_argument("-G", "--gene-ontology-genes", metavar="TXT",
-                        help="List of genes for which we integrate Gene Ontology annotations (by default genes from OncoKB).")
-
-    parser.add_argument("-r", "--gene-ontology-reverse", action='store_true',
-                        help="Extract from a Gene_Ontology_Annotation GAF file.")
-
     parser.add_argument("-s", "--separator", metavar="STRING", default=", ",
                         help="Separator in exported data files.")
 
@@ -270,10 +224,6 @@ def process_GO(name):
         "open_targets_drug_mechanism_of_action",
         "open_targets_drug_molecule",
         "cgi",
-        "gene_ontology",
-        "gene_ontology_owl",
-        "gene_ontology_genes",
-        "gene_ontology_reverse",
     ]
     opt_total = 0
     for opt in all_options:
@@ -446,29 +396,6 @@ def process_GO(name):
         edges += local_edges
         logging.info(f"Done adapter {opt_loaded}/{opt_total}")
 
-    ## GeneOntology
-
-    ### GO
-    if asked.gene_ontology:
-        opt_loaded += 1
-        logging.info(f"########## Adapter #{opt_loaded}/{opt_total} ##########")
-        local_nodes, local_edges = process_GO("gene_ontology")
-        logging.info(f" | Save data...")
-        nodes += local_nodes
-        edges += local_edges
-        logging.info(f"OK, wove Gene Ontology data: {len(local_nodes)} nodes, {len(local_edges)} edges.")
-        logging.info(f"Done adapter {opt_loaded}/{opt_total}")
-
-    ### GO reversed
-    if asked.gene_ontology_reverse:
-        opt_loaded += 1
-        logging.info(f"########## Adapter #{opt_loaded}/{opt_total} ##########")
-        local_nodes, local_edges = process_GO("gene_ontology_reverse")
-        nodes += local_nodes
-        edges += local_edges
-        logging.info(f"OK, reverse-wove Gene Ontology: {len(local_nodes)} nodes, {len(local_edges)} edges.")
-        logging.info(f"Done adapter {opt_loaded}/{opt_total}")
-
     ###################################################
     # Map the data not requiring special loadings.    #
     ###################################################

From 51988fe855e22b9efdd48b1fc0e4de743162b9c7 Mon Sep 17 00:00:00 2001
From: Matthieu NAJM <matthieu.najm@pasteur.fr>
Date: Mon, 16 Mar 2026 18:27:14 +0100
Subject: [PATCH 04/15] remove last marks of old integration of open targets
 and gene ontology

---
 make.sh                         | 4 ----
 oncodashkb/adapters/__init__.py | 9 ---------
 2 files changed, 13 deletions(-)
 delete mode 100644 oncodashkb/adapters/__init__.py

diff --git a/make.sh b/make.sh
index 8e5d64a..68eb7da 100755
--- a/make.sh
+++ b/make.sh
@@ -90,11 +90,7 @@ cmd="uv run python3 ${py_args} $script_dir/weave.py \
     --open-targets-target                   $data_dir/OT/target/
     ${weave_args}" # \
     # --clinical                              $data_dir/DECIDER/clinical/clinical_export.xlsx \
-    # --gene_ontology_genes        $data_dir/DECIDER/$data_version/OncoKB_gene_symbols.conf \
     # --oncokb                     $data_dir/DECIDER/$data_version/treatments.csv \
-    # --gene_ontology              $data_dir/GO/goa_human.gaf.gz \
-    # --gene_ontology_owl          $data_dir/GO/go.owl \
-    # --gene_ontology_reverse
 
 
 echo "Weaving command:" >&2
diff --git a/oncodashkb/adapters/__init__.py b/oncodashkb/adapters/__init__.py
deleted file mode 100644
index 582dac2..0000000
--- a/oncodashkb/adapters/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-
-# from . import types
-from . import gene_ontology
-from . import open_targets
-from . import open_targets_evidences
-from . import open_targets_drugs
-from . import open_targets_diseases
-__all__ = ['types', 'gene_ontology', 'open_targets', 'open_targets_evidences', 'open_targets_drugs', 'open_targets_diseases']
-

From 41e3f3988a520323a98b4146039640afff024ea6 Mon Sep 17 00:00:00 2001
From: Matthieu NAJM <matthieu.najm@pasteur.fr>
Date: Tue, 17 Mar 2026 17:21:22 +0100
Subject: [PATCH 05/15] feat: integration of the structural variants adapter

---
 make.sh                                      |   1 +
 oncodashkb/adapters/structural_variants.yaml | 115 +++++++++++++++++++
 weave.py                                     |  57 ++++++++-
 3 files changed, 170 insertions(+), 3 deletions(-)
 create mode 100644 oncodashkb/adapters/structural_variants.yaml

diff --git a/make.sh b/make.sh
index 68eb7da..a519657 100755
--- a/make.sh
+++ b/make.sh
@@ -84,6 +84,7 @@ cmd="uv run python3 ${py_args} $script_dir/weave.py \
     --short-mutations-external              $decider_dir/short_mutations_external.csv  \
     --copy-number-amplifications-local      $decider_dir/cnas_local.csv \
     --copy-number-amplifications-external   $decider_dir/cnas_external.csv  \
+    --structural-variants                     $data_dir/DECIDER/$data_version/structural_variants.xlsx  \
     --omnipath-networks                     $data_dir/omnipath_networks/omnipath_webservice_interactions__latest.tsv.gz \
     --open-targets-drug-molecule            $data_dir/OT/drug_molecule/
     --open-targets-drug_mechanism_of_action $data_dir/OT/drug_mechanism_of_action/
diff --git a/oncodashkb/adapters/structural_variants.yaml b/oncodashkb/adapters/structural_variants.yaml
new file mode 100644
index 0000000..1b96645
--- /dev/null
+++ b/oncodashkb/adapters/structural_variants.yaml
@@ -0,0 +1,115 @@
+row:
+    translate:
+        column: patient
+        to_subject: patient
+        translations_file: ./data/DECIDER/clinical/clinical_export.xlsx
+        translate_from: Patient card::Patient cohort code_Patient Card
+        translate_to: Patient card::Publication code
+        index_col: 0
+        usecols: [0,1,2]
+transformers:
+    # Nodes
+    ## Samples
+    - translate_sample_ids :
+        column: sample
+        to_object: sample
+        via_relation: patient_carries_sample
+        translations_file: ./data/DECIDER/clinical/clinical_export.xlsx
+        translate_from: Patient card::Patient cohort code_Patient Card
+        translate_to: Patient card::Publication code
+        index_col: 0
+        usecols: [0,1,2]
+    ## Alterations
+    - cat_format:
+        columns:
+            - primary_gene
+            - effect
+        from_subject: sample
+        to_object: alteration
+        via_relation: sample_carries_alteration
+        format_string: "{primary_gene}:{effect}"
+    ## Gene status
+    - translate_cat_format:
+        columns:
+          - primary_gene
+          - Gene_type
+        from_subject: alteration
+        to_object: gene_status
+        format_string: "{primary_gene}:{Gene_type}"
+        via_relation: alteration_causes_gene_status
+        column_to_translate: 
+            - primary_gene
+        translations_file: data/HGNC/hgnc_complete_set.txt
+        translate_from: symbol
+        translate_to: ensembl_gene_id
+        sep: "\t"
+    ## Genes
+    - translate:
+        column: primary_gene
+        from_subject: gene_status
+        to_object: gene
+        via_relation: gene_status_affects_gene
+        translations_file: data/HGNC/hgnc_complete_set.txt
+        translate_from: symbol
+        translate_to: ensembl_gene_id
+        sep: "\t"
+    # Properties
+    ## Alterations
+    - translate_cat_format:
+        columns:
+            - primary_gene
+            - effect
+        to_property: ensembl_id_alteration
+        for_object: alteration
+        format_string: "{primary_gene}:{effect}"
+        column_to_translate: 
+            - primary_gene
+        translations_file: data/HGNC/hgnc_complete_set.txt
+        translate_from: symbol
+        translate_to: ensembl_gene_id
+        sep: "\t"
+    ## Gene status
+    - cat_format:
+        columns:
+            - primary_gene
+            - Gene_type
+        to_property: gene_symbol_gene_status
+        for_object: gene_status
+        format_string: "{primary_gene}:{Gene_type}"
+    - map:
+        columns: Gene_type
+        to_property: gene_role
+        for_object: gene_status
+    ## Genes
+    - map: 
+        column: primary_gene
+        to_property: gene_symbol
+        for_object: gene
+    ## Alterations
+    - map:
+        column: pathogenic
+        to_property: oncogenic
+        for_object: alteration
+    - map:
+        column: effect
+        to_property: consequence
+        for_object: alteration
+    - map:
+        column: Homogeneous
+        to_property: homogenous
+        for_object: alteration
+    - map:
+        column: expressed
+        to_property: expressed
+        for_object: alteration
+    - string:
+        value: " "
+        to_property: edgelabel
+        for_objects:
+            - patient_carries_sample
+            - sample_carries_alteration
+            - alteration_causes_gene_status
+            - gene_status_affects_gene
+
+metadata:
+        - data_source: structural_variants_placeholder
\ No newline at end of file
diff --git a/weave.py b/weave.py
index cd0190b..38dcbda 100755
--- a/weave.py
+++ b/weave.py
@@ -34,8 +34,9 @@
 ontoweaver.transformer.register(OmniPath_directed)
 
 # Importing custom transformer for translating sample ids with publication code and registering it.
-from oncodashkb.transformers.specific_translate_transformers import translate_sample_ids
+from oncodashkb.transformers.specific_translate_transformers import translate_sample_ids, translate_cat_format
 ontoweaver.transformer.register(translate_sample_ids)
+ontoweaver.transformer.register(translate_cat_format)
 
 # Importing OpenTargets custom transformer and registering it.
 from oncodashkb.transformers.ot_transformers import access_proteins, urls_to_prop
@@ -153,6 +154,9 @@ def process_OT(directory, name):
     parser.add_argument("-cnae", "--copy-number-amplifications-external", metavar="CSV", nargs="+",
                         help="Extract from a CSV file with copy number amplifications' external annotations.")
 
+    parser.add_argument("-sv", "--structural-variants", metavar="CSV", nargs="+",
+                        help="Extract from a CSV file with short mutations' local annotations.")
+
     parser.add_argument("-o", "--oncokb", metavar="CSV", nargs="+",
                         help="Extract from an OncoKB CSV file.")
 
@@ -218,6 +222,7 @@ def process_OT(directory, name):
         "short_mutations_external",
         "copy_number_amplifications_local",
         "copy_number_amplifications_external",
+        "structural_variants",
         "oncokb",
         "omnipath_networks",
         "open_targets_target",
@@ -282,6 +287,53 @@ def process_OT(directory, name):
         edges += local_edges
         logging.info(f"Done adapter {opt_loaded}/{opt_total}")
 
+    if asked.structural_variants:
+        opt_loaded += 1
+        logging.info(f"########## Adapter #{opt_loaded}/{opt_total} ##########")
+        data_file = asked.structural_variants[0]
+        mapping_file = "./oncodashkb/adapters/structural_variants.yaml"
+
+        # logging.info(f"Weave structural variants...")
+        logging.info(f" | Weave `{data_file}:{mapping_file}`...")
+        logging.info(f" |  | Load data `{data_file}`...")
+        table = pd.read_excel(data_file)
+
+        table = table.rename(columns={"Gene.type":"Gene_type"})
+
+        try:
+            with open(mapping_file) as fd:
+                ymapping = yaml.full_load(fd)
+        except Exception as e:
+            logging.error(e)
+            sys.exit(error_codes["CannotAccessFile"])
+
+        logging.info(f" |  | Process {mapping_file}...")
+
+        yparser = ontoweaver.mapping.YamlParser(ymapping)
+        mapping = yparser()
+
+        adapter = ontoweaver.tabular.PandasAdapter(
+            table,
+            *mapping,
+            type_affix="suffix",
+            type_affix_sep=":",
+            raise_errors = True
+        )
+
+        local_nodes = []
+        local_edges = []
+        with alive_bar(len(table), file=sys.stderr) as progress:
+            for n,e in adapter():
+                # NOTE: here, n & e are ontoweaver.base.Element, not BioCypher tuples.
+                local_nodes += n
+                local_edges += e
+                progress()
+
+        logging.info(f" |  | OK, wove: {len(local_nodes)} nodes, {len(local_edges)} edges.")
+        nodes += local_nodes
+        edges += local_edges
+        logging.info(f"Done adapter {opt_loaded}/{opt_total}")
+
     if asked.omnipath_networks:
         opt_loaded += 1
         logging.info(f"########## Adapter #{opt_loaded}/{opt_total} ##########")
@@ -414,9 +466,8 @@ def process_OT(directory, name):
         "short_mutations_external",
         "copy_number_amplifications_local",
         "copy_number_amplifications_external",
+        # "structural_variants",
         "oncokb",
-        # "omnipath_networks",
-        # "ot-"
         "cgi",
     ]
     for name in direct_mappings:

From cf5e37299cf6c675958cf61ecd27a0ffc9339c5a Mon Sep 17 00:00:00 2001
From: Matthieu NAJM <matthieu.najm@pasteur.fr>
Date: Wed, 18 Mar 2026 19:15:41 +0100
Subject: [PATCH 06/15] feat: distinguished structural variants, short
 mutations and copy number amplifications

---
 config/schema.yaml                            | 69 +++++++++++++++----
 .../copy_number_amplifications_external.yaml  | 26 +++----
 .../copy_number_amplifications_local.yaml     |  8 +--
 .../adapters/short_mutations_external.yaml    | 26 +++----
 .../adapters/short_mutations_local.yaml       | 21 +++---
 oncodashkb/adapters/structural_variants.yaml  | 22 +++---
 6 files changed, 105 insertions(+), 67 deletions(-)

diff --git a/config/schema.yaml b/config/schema.yaml
index a28150b..bd0f079 100644
--- a/config/schema.yaml
+++ b/config/schema.yaml
@@ -4,12 +4,11 @@
 
 # Defined in alphabetical order
 
-alteration:
+short mutation:
     is_a: sequence variant
     represented_as: node
-    label_in_input: alteration
+    label_in_input: short_mutation
     properties:
-        gene_symbol_alteration: str
         citation_PM_ids: str
         consequence: str
         homogenous: str
@@ -23,7 +22,47 @@ alteration:
         refCount: int64
         altCount: int64
         expressed: bool
-        ensembl_id_alteration: str
+        # ensembl_id_alteration: str
+
+copy number amplification:
+    is_a: sequence variant
+    represented_as: node
+    label_in_input: copy_number_amplification
+    properties:
+        citation_PM_ids: str
+        consequence: str
+        homogenous: str
+        mutation_effect_description: str
+        data_source: str
+        oncogenic: str
+        reference_genome: str
+        tumor_type: str
+        tumor_type_summary: str
+        variant_summary: str
+        refCount: int64
+        altCount: int64
+        expressed: bool
+        # ensembl_id_alteration: str
+
+structural variant:
+    is_a: sequence variant
+    represented_as: node
+    label_in_input: structural_variant
+    properties:
+        citation_PM_ids: str
+        consequence: str
+        homogenous: str
+        mutation_effect_description: str
+        data_source: str
+        oncogenic: str
+        reference_genome: str
+        tumor_type: str
+        tumor_type_summary: str
+        variant_summary: str
+        refCount: int64
+        altCount: int64
+        expressed: bool
+        # ensembl_id_alteration: str
 
 disease:
     represented_as: node
@@ -212,7 +251,7 @@ protein:
 
 ### CARRIES
 
-# To allow queries for patient carrying samples, and samples carrying alterations,
+# To allow queries for patient carrying samples, and samples carrying variants,
 # without mixing with "effects" causes.
 carries:
     is_a: causes
@@ -232,12 +271,12 @@ patient carries sample:
         data_source: str
         edglelabel: str
 
-sample carries alteration:
+sample carries variant:
     is_a: carries
     represented_as: edge
-    label_in_input: sample_carries_alteration
+    label_in_input: sample_carries_variant
     source: sample
-    target: alteration
+    target: sequence variant
     properties:
         data_source: str
         edglelabel: str
@@ -246,16 +285,16 @@ sample carries alteration:
 
 # A gene is linked to its gene status (gain or loss of function),
 # which are represented as nodes, so as to allow a causal path
-# to go through alteration -> gene status -> transcript activity.
+# to go through variant -> gene status -> transcript activity.
 # Hence, outcomes have at least two instances:
 # - Gene:GoF, and
 # - Gene:LoF.
 
-alteration causes gene status:
+variant causes gene status:
     is_a: causes
     represented_as: edge
-    label_in_input: alteration_causes_gene_status
-    source: alteration
+    label_in_input: variant_causes_gene_status
+    source: sequence variant
     target: gene status
     properties:
         data_source: str
@@ -267,11 +306,11 @@ alteration causes gene status:
 # as predictive markers for treatment response, 
 # based on clinical evidence categorized by evidence levels. 
 
-alteration biomarker for drug:
+variant biomarker for drug:
     is_a: biomarker for
     represented_as: edge
-    label_in_input: alteration_biomarker_for_drug
-    source: alteration
+    label_in_input: variant_biomarker_for_drug
+    source: sequence variant
     target: drug
     properties:
         data_source: str
diff --git a/oncodashkb/adapters/copy_number_amplifications_external.yaml b/oncodashkb/adapters/copy_number_amplifications_external.yaml
index f0ef571..5fee1b5 100644
--- a/oncodashkb/adapters/copy_number_amplifications_external.yaml
+++ b/oncodashkb/adapters/copy_number_amplifications_external.yaml
@@ -29,18 +29,18 @@ transformers:
             - hugoSymbol
             - alteration
         from_subject: sample
-        to_object: alteration
-        via_relation: sample_carries_alteration
+        to_object: copy_number_amplification
+        via_relation: sample_carries_variant
         format_string: "{hugoSymbol}:{alteration}"
     ## Gene status
     - cat_format:
         columns:
           - ensembl_id
           - gene_role
-        from_subject: alteration
+        from_subject: copy_number_amplification
         to_object: gene_status
         format_string: "{ensembl_id}:{gene_role}"
-        via_relation: alteration_causes_gene_status
+        via_relation: variant_causes_gene_status
         # column_to_translate: 
         #     - hugoSymbol
         # translations_file: data/HGNC/hgnc_complete_set.txt
@@ -64,7 +64,7 @@ transformers:
             - ensembl_id
             - alteration
         to_property: ensembl_id_alteration
-        for_object: alteration
+        for_object: copy_number_amplification
         format_string: "{ensembl_id}:{alteration}"
         # column_to_translate: 
         #     - hugoSymbol
@@ -89,19 +89,19 @@ transformers:
     - map:
         column: tumorType
         to_property: tumor_type
-        for_object: alteration
+        for_object: copy_number_amplification
     - map:
         column: oncogenic
         to_property: oncogenic
-        for_object: alteration
+        for_object: copy_number_amplification
     - replace:
         column: mutationEffectDescription
         to_property: mutation_effect_description
-        for_object: alteration
+        for_object: copy_number_amplification
     - map:
         column: citationPMids
         to_property: citation_PM_ids
-        for_object: alteration
+        for_object: copy_number_amplification
     - replace:
         column: geneSummary
         to_property: gene_summary
@@ -109,18 +109,18 @@ transformers:
     - map:
         column: variantSummary
         to_property: variant_summary
-        for_object: alteration
+        for_object: copy_number_amplification
     - map:
         column: tumorTypeSummary
         to_property: tumor_type_summary
-        for_object: alteration
+        for_object: copy_number_amplification
     - string:
         value: " "
         to_property: edgelabel
         for_objects:
             - patient_carries_sample
-            - sample_carries_alteration
-            - alteration_causes_gene_status
+            - sample_carries_variant
+            - variant_causes_gene_status
             - gene_status_affects_gene
 
 metadata:
diff --git a/oncodashkb/adapters/copy_number_amplifications_local.yaml b/oncodashkb/adapters/copy_number_amplifications_local.yaml
index 87a4fb4..d7a46f8 100644
--- a/oncodashkb/adapters/copy_number_amplifications_local.yaml
+++ b/oncodashkb/adapters/copy_number_amplifications_local.yaml
@@ -22,16 +22,16 @@ transformers:
             - hugoSymbol
             - alteration
         from_subject: sample
-        to_object: alteration
+        to_object: copy_number_amplification
         format_string: "{hugoSymbol}:{alteration}"
-        via_relation: sample_carries_alteration
+        via_relation: sample_carries_variant
     - map:
         column: referenceGenome
         to_property: reference_genome
-        for_object: alteration
+        for_object: copy_number_amplification
     - map:
         column: tumorType
         to_property: tumor_type
-        for_object: alteration
+        for_object: copy_number_amplification
 metadata:
         - data_source: copy_number_amplifications_local
\ No newline at end of file
diff --git a/oncodashkb/adapters/short_mutations_external.yaml b/oncodashkb/adapters/short_mutations_external.yaml
index b4e4bfc..626f981 100644
--- a/oncodashkb/adapters/short_mutations_external.yaml
+++ b/oncodashkb/adapters/short_mutations_external.yaml
@@ -23,17 +23,17 @@ transformers:
     - map:
         column: alteration
         from_subject: sample
-        to_object: alteration
-        via_relation: sample_carries_alteration
+        to_object: short_mutation
+        via_relation: sample_carries_variant
     ## Gene Stauts
     - cat_format:
         columns:
           - ensembl_id
           - gene_role
-        from_subject: alteration
+        from_subject: short_mutation
         to_object: gene_status
         format_string: "{ensembl_id}:{gene_role}"
-        via_relation: alteration_causes_gene_status
+        via_relation: variant_causes_gene_status
     ## Genes
     - map:
         column: ensembl_id
@@ -63,23 +63,23 @@ transformers:
     - map:
         column: tumorType
         to_property: tumor_type
-        for_object: alteration
+        for_object: short_mutation
     - map:
         column: consequence
         to_property: consequence
-        for_object: alteration
+        for_object: short_mutation
     - map:
         column: oncogenic
         to_property: oncogenic
-        for_object: alteration
+        for_object: short_mutation
     - replace:
         column: mutationEffectDescription
         to_property: mutation_effect_description
-        for_object: alteration
+        for_object: short_mutation
     - map:
         column: citationPMids
         to_property: citation_PM_ids
-        for_object: alteration
+        for_object: short_mutation
     - replace:
         column: geneSummary
         to_property: gene_summary
@@ -87,18 +87,18 @@ transformers:
     - map:
         column: variantSummary
         to_property: variant_summary
-        for_object: alteration
+        for_object: short_mutation
     - map:
         column: tumorTypeSummary
         to_property: tumor_type_summary
-        for_object: alteration
+        for_object: short_mutation
     - string:
         value: " "
         to_property: edgelabel
         for_objects:
             - patient_carries_sample
-            - sample_carries_alteration
-            - alteration_causes_gene_status
+            - sample_carries_variant
+            - variant_causes_gene_status
             - gene_status_affects_gene
 metadata:
         - data_source: short_mutations_external
\ No newline at end of file
diff --git a/oncodashkb/adapters/short_mutations_local.yaml b/oncodashkb/adapters/short_mutations_local.yaml
index 31c4c31..dfa514f 100644
--- a/oncodashkb/adapters/short_mutations_local.yaml
+++ b/oncodashkb/adapters/short_mutations_local.yaml
@@ -20,42 +20,41 @@ transformers:
     - map:
         column: alteration
         from_subject: sample
-        to_object: alteration
-        via_relation: sample_carries_alteration
+        to_object: short_mutation
+        via_relation: sample_carries_variant
     - map:
         column: referenceGenome
         to_property: reference_genome
-        for_object: alteration
+        for_object: short_mutation
     - map:
         column: tumorType
         to_property: tumor_type
-        for_object: alteration
+        for_object: short_mutation
     - map:
         column: consequence
         to_property: consequence
-        for_object: alteration
+        for_object: short_mutation
     - map:
         column: homogenous
         to_property: homogenous
-        for_object: alteration
+        for_object: short_mutation
     - map:
         column: refCount
         to_property: refCount
-        for_object: alteration
+        for_object: short_mutation
     - map:
         column: altCount
         to_property: altCount
-        for_object: alteration
+        for_object: short_mutation
     - map:
         column: expressed
         to_property: expressed
-        for_object: alteration
+        for_object: short_mutation
     - string:
         value: " "
         to_property: edgelabel
         for_objects:
             - patient_carries_sample
-            - sample_carries_alteration
-            - alteration_affects_gene
+            - sample_carries_variant
 metadata:
         - data_source: short_mutations_local
\ No newline at end of file
diff --git a/oncodashkb/adapters/structural_variants.yaml b/oncodashkb/adapters/structural_variants.yaml
index 1b96645..754ce68 100644
--- a/oncodashkb/adapters/structural_variants.yaml
+++ b/oncodashkb/adapters/structural_variants.yaml
@@ -25,18 +25,18 @@ transformers:
             - primary_gene
             - effect
         from_subject: sample
-        to_object: alteration
-        via_relation: sample_carries_alteration
+        to_object: structural_variant
+        via_relation: sample_carries_variant
         format_string: "{primary_gene}:{effect}"
     ## Gene status
     - translate_cat_format:
         columns:
           - primary_gene
           - Gene_type
-        from_subject: alteration
+        from_subject: structural_variant
         to_object: gene_status
         format_string: "{primary_gene}:{Gene_type}"
-        via_relation: alteration_causes_gene_status
+        via_relation: variant_causes_gene_status
         column_to_translate: 
             - primary_gene
         translations_file: data/HGNC/hgnc_complete_set.txt
@@ -60,7 +60,7 @@ transformers:
             - primary_gene
             - effect
         to_property: ensembl_id_alteration
-        for_object: alteration
+        for_object: structural_variant
         format_string: "{primary_gene}:{effect}"
         column_to_translate: 
             - primary_gene
@@ -89,26 +89,26 @@ transformers:
     - map:
         column: pathogenic
         to_property: oncogenic
-        for_object: alteration
+        for_object: structural_variant
     - map:
         column: effect
         to_property: consequence
-        for_object: alteration
+        for_object: structural_variant
     - map:
         column: Homogeneous
         to_property: homogenous
-        for_object: alteration
+        for_object: structural_variant
     - map:
         column: expressed
         to_property: expressed
-        for_object: alteration
+        for_object: structural_variant
     - string:
         value: " "
         to_property: edgelabel
         for_objects:
             - patient_carries_sample
-            - sample_carries_alteration
-            - alteration_causes_gene_status
+            - sample_carries_variant
+            - variant_causes_gene_status
             - gene_status_affects_gene
 
 metadata:

From 932db67d57fc72e9a331a221b9e3e7e8db27cd71 Mon Sep 17 00:00:00 2001
From: Matthieu NAJM <matthieu.najm@pasteur.fr>
Date: Thu, 19 Mar 2026 16:05:10 +0100
Subject: [PATCH 07/15] first integration of cgi annotations

---
 config/schema.yaml           | 26 ++++++++--
 make.sh                      |  3 +-
 oncodashkb/adapters/cgi.yaml | 96 +++++++++++++++++++++++-------------
 weave.py                     | 49 +++++++++++++++++-
 4 files changed, 135 insertions(+), 39 deletions(-)

diff --git a/config/schema.yaml b/config/schema.yaml
index bd0f079..45d5e38 100644
--- a/config/schema.yaml
+++ b/config/schema.yaml
@@ -241,6 +241,12 @@ protein:
         ncbi_tax_id: str
         data_source: str
 
+# Treatment
+
+treatment:
+    represented_as: node
+    input_label: treatment
+
 ########################
 # EDGES
 ########################
@@ -306,13 +312,17 @@ variant causes gene status:
 # as predictive markers for treatment response, 
 # based on clinical evidence categorized by evidence levels. 
 
-variant biomarker for drug:
-    is_a: biomarker for
+variant biomarker for treatment:
+    is_a: sequence variant modulates treatment association
     represented_as: edge
-    label_in_input: variant_biomarker_for_drug
+    label_in_input: variant_biomarker_for_treatment
     source: sequence variant
-    target: drug
+    target: treatment
     properties:
+        level_of_evidence: str
+        cgi_level: str
+        citations: str
+        tumorType: str
         data_source: str
         edglelabel: str
 
@@ -486,3 +496,11 @@ drug has target:
     label_in_input: drug_has_target
     properties:
         data_source: str
+
+treatment has part drug:
+    is_a: association
+    represented_as: edge
+    label_in_input: treatment_has_part_drug
+    properties:
+        data_source: str
+
diff --git a/make.sh b/make.sh
index a519657..9356eaf 100755
--- a/make.sh
+++ b/make.sh
@@ -85,10 +85,11 @@ cmd="uv run python3 ${py_args} $script_dir/weave.py \
     --copy-number-amplifications-local      $decider_dir/cnas_local.csv \
     --copy-number-amplifications-external   $decider_dir/cnas_external.csv  \
     --structural-variants                     $data_dir/DECIDER/$data_version/structural_variants.xlsx  \
-    --omnipath-networks                     $data_dir/omnipath_networks/omnipath_webservice_interactions__latest.tsv.gz \
+    --omnipath-networks                       $data_dir/omnipath_networks/omnipath_webservice_interactions__latest.tsv.gz \
     --open-targets-drug-molecule            $data_dir/OT/drug_molecule/
     --open-targets-drug_mechanism_of_action $data_dir/OT/drug_mechanism_of_action/
     --open-targets-target                   $data_dir/OT/target/
+    --cgi                                     $data_dir/DECIDER/$data_version/treatments_cgi.csv \
     ${weave_args}" # \
     # --clinical                              $data_dir/DECIDER/clinical/clinical_export.xlsx \
     # --oncokb                     $data_dir/DECIDER/$data_version/treatments.csv \
diff --git a/oncodashkb/adapters/cgi.yaml b/oncodashkb/adapters/cgi.yaml
index 35b2ad3..8c47412 100644
--- a/oncodashkb/adapters/cgi.yaml
+++ b/oncodashkb/adapters/cgi.yaml
@@ -1,39 +1,69 @@
 row:
-   rowIndex:
-      to_subject: variant
+    map:
+        id_from_column: alteration
+        match_type_from_column: alteration_type
+        match:
+            - SNV:
+                to_subject: short_mutation
+            - CNA:
+                to_subject: copy_number_amplification
 transformers:
+    - replace:
+        column: treatment
+        to_object: treatment
+        via_relation: variant_biomarker_for_treatment
+        forbidden: ';'
+        substitue: ','
+    - split_translate:
+        column: treatment
+        from_subject: treatment
+        to_object: drug
+        via_relation: treatment_has_part_drug
+        separator: "[,|;|+]"
+        translations_file: ./data/OT/drug_molecule/part-00000-871f412e-aec4-4d33-a50d-feee532ddcd2-c000.snappy.parquet
+        translate_from: name
+        translate_to: id
     - map:
-        columns:
-            - patient_id
-        to_object: patient
-        via_relation: patient_has_variant
+        column: level_of_evidence
+        to_property: level_of_evidence
+        for_object: variant_biomarker_for_treatment
     - map:
-        columns:
-            - gene
-        to_object: gene_hugo
-        via_relation: variant_in_gene
-    - split:
-        columns:
-            - sample
-        to_object: sample
-        via_relation: variant_in_sample
-        separator: ";"
+        column: cgi_level
+        to_property: cgi_level
+        for_object: variant_biomarker_for_treatment
     - map:
-        columns:
-            - transcript
-        from_subject: gene_hugo
-        to_object: transcript
-        via_relation: transcript_to_gene_relationship
+        column: citations
+        to_property: citations
+        for_object: variant_biomarker_for_treatment
     - map:
-        columns:
-            - oncogenic_summary
-        from_subject: variant
-        to_object: disease
-        via_relation: variant_to_disease
-    - map:
-        columns:
-            - consequence
-        to_property:
-            - consequence
-        for_objects:
-            - variant
+        column: tumorType
+        to_property: tumorType
+        for_object: variant_biomarker_for_treatment
+        # separator: "+"
+    # node type: DRUG
+    # upper
+    # split for ;,+
+    # remove inside parenthesis
+    # translate to CHEMBLid 
+    # node type: DRUG CATEGORY
+    # upper
+    # split for ;,+
+    # remove inside parenthesis
+    # match if inhibitor blablabla : drug category
+    # - map:
+    #     column: treatment
+    #     from_subject: treatment
+    #     to_object: drug
+    # - replace:
+    #     columns:
+    #         - treatment
+    #     to_object: drug
+    #     via_relation: variant_biomarker_for_drug
+    #     substitute: "_"
+    # - string:
+    #     value: "."
+    #     to_property: edgelabel
+    #     for_objects:
+    #         - variant_biomarker_for_drug
+metadata:
+        - data_source: cgi_annotation
diff --git a/weave.py b/weave.py
index 38dcbda..51e1bb6 100755
--- a/weave.py
+++ b/weave.py
@@ -334,6 +334,53 @@ def process_OT(directory, name):
         edges += local_edges
         logging.info(f"Done adapter {opt_loaded}/{opt_total}")
 
+    if asked.cgi:
+        opt_loaded += 1
+        logging.info(f"########## Adapter #{opt_loaded}/{opt_total} ##########")
+        data_file = asked.cgi[0]
+        mapping_file = "./oncodashkb/adapters/cgi.yaml"
+
+        # logging.info(f"Weave structural variants...")
+        logging.info(f" | Weave `{data_file}:{mapping_file}`...")
+        logging.info(f" |  | Load data `{data_file}`...")
+        table = progress_read(data_file, hint=72648)
+
+        table["treatment"] = table.treatment.str.upper().str.replace(r'\([^()]*\)', '', regex=True)
+
+        try:
+            with open(mapping_file) as fd:
+                ymapping = yaml.full_load(fd)
+        except Exception as e:
+            logging.error(e)
+            sys.exit(error_codes["CannotAccessFile"])
+
+        logging.info(f" |  | Process {mapping_file}...")
+
+        yparser = ontoweaver.mapping.YamlParser(ymapping)
+        mapping = yparser()
+
+        adapter = ontoweaver.tabular.PandasAdapter(
+            table,
+            *mapping,
+            type_affix="suffix",
+            type_affix_sep=":",
+            raise_errors = True
+        )
+
+        local_nodes = []
+        local_edges = []
+        with alive_bar(len(table), file=sys.stderr) as progress:
+            for n,e in adapter():
+                # NOTE: here, n & e are ontoweaver.base.Element, not BioCypher tuples.
+                local_nodes += n
+                local_edges += e
+                progress()
+
+        logging.info(f" |  | OK, wove: {len(local_nodes)} nodes, {len(local_edges)} edges.")
+        nodes += local_nodes
+        edges += local_edges
+        logging.info(f"Done adapter {opt_loaded}/{opt_total}")
+
     if asked.omnipath_networks:
         opt_loaded += 1
         logging.info(f"########## Adapter #{opt_loaded}/{opt_total} ##########")
@@ -468,7 +515,7 @@ def process_OT(directory, name):
         "copy_number_amplifications_external",
         # "structural_variants",
         "oncokb",
-        "cgi",
+        # "cgi",
     ]
     for name in direct_mappings:
         option = getattr(asked, name)

From f4f771bef807f907193747a21cadcd2ed0ea2747 Mon Sep 17 00:00:00 2001
From: Matthieu NAJM <matthieu.najm@pasteur.fr>
Date: Thu, 19 Mar 2026 17:54:28 +0100
Subject: [PATCH 08/15] added gene_role as property for CNA

---
 oncodashkb/adapters/copy_number_amplifications_external.yaml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/oncodashkb/adapters/copy_number_amplifications_external.yaml b/oncodashkb/adapters/copy_number_amplifications_external.yaml
index 5fee1b5..074f1f3 100644
--- a/oncodashkb/adapters/copy_number_amplifications_external.yaml
+++ b/oncodashkb/adapters/copy_number_amplifications_external.yaml
@@ -80,6 +80,10 @@ transformers:
         to_property: gene_symbol_gene_status
         for_object: gene_status
         format_string: "{hugoSymbol}:{gene_role}"
+    - map:
+        columns: gene_role
+        to_property: gene_role
+        for_object: gene_status
     ## Genes
     - map: 
         column: hugoSymbol

From b03c67267f58cbb361bfec863761997a296e65a7 Mon Sep 17 00:00:00 2001
From: Matthieu NAJM <matthieu.najm@pasteur.fr>
Date: Thu, 19 Mar 2026 17:55:31 +0100
Subject: [PATCH 09/15] fix: starting the neo4j database when no second
 argument is given

---
 make.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/make.sh b/make.sh
index 9356eaf..5aafc7b 100755
--- a/make.sh
+++ b/make.sh
@@ -65,7 +65,7 @@ echo "Activate virtual environment..." >&2
 source $(dirname $(uv python find))/activate
 
 
-if [[ "$2" == "config/neo4j.yaml" ]] ; then
+if [[ "$CONFIG" == "config/neo4j.yaml" ]] ; then
     echo "Stop Neo4j server..." >&2
     neo_version=$(neo4j-admin --version | cut -d. -f 1)
     if [[ "$neo_version" -eq 4 ]]; then

From efc25b3693cdd419bd6484a4aa24839fa3d6bc23 Mon Sep 17 00:00:00 2001
From: Matthieu NAJM <matthieu.najm@pasteur.fr>
Date: Thu, 19 Mar 2026 17:56:04 +0100
Subject: [PATCH 10/15] changing effect to mutation when defining structural
 variant id

---
 oncodashkb/adapters/structural_variants.yaml | 12 ++++++++----
 weave.py                                     |  1 +
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/oncodashkb/adapters/structural_variants.yaml b/oncodashkb/adapters/structural_variants.yaml
index 754ce68..278ec62 100644
--- a/oncodashkb/adapters/structural_variants.yaml
+++ b/oncodashkb/adapters/structural_variants.yaml
@@ -23,11 +23,11 @@ transformers:
     - cat_format:
         columns:
             - primary_gene
-            - effect
+            - mutation
         from_subject: sample
         to_object: structural_variant
         via_relation: sample_carries_variant
-        format_string: "{primary_gene}:{effect}"
+        format_string: "{primary_gene}:{mutation}"
     ## Gene status
     - translate_cat_format:
         columns:
@@ -55,13 +55,17 @@ transformers:
         sep: "\t"
     # Properties
     ## Alterations
+    - map:
+        column: effect
+        to_property: consequence
+        for_object: structural_variant
     - translate_cat_format:
         columns:
             - primary_gene
-            - effect
+            - mutation
         to_property: ensembl_id_alteration
         for_object: structural_variant
-        format_string: "{primary_gene}:{effect}"
+        format_string: "{primary_gene}:{mutation}"
         column_to_translate: 
             - primary_gene
         translations_file: data/HGNC/hgnc_complete_set.txt
diff --git a/weave.py b/weave.py
index 51e1bb6..1044015 100755
--- a/weave.py
+++ b/weave.py
@@ -299,6 +299,7 @@ def process_OT(directory, name):
         table = pd.read_excel(data_file)
 
         table = table.rename(columns={"Gene.type":"Gene_type"})
+        table["mutation"] = table.mutation.str.replace(r';', ',', regex=True)
 
         try:
             with open(mapping_file) as fd:

From d8ecc54be3c30ad0977f5e41decacf54b74e7fae Mon Sep 17 00:00:00 2001
From: Matthieu NAJM <matthieu.najm@pasteur.fr>
Date: Mon, 23 Mar 2026 10:28:05 +0100
Subject: [PATCH 11/15] upgrade ontoweaver and set up for debugging omnipath
 adapter

---
 make.sh        | 20 ++++++++++----------
 pyproject.toml |  5 ++---
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/make.sh b/make.sh
index 5aafc7b..5cf3e70 100755
--- a/make.sh
+++ b/make.sh
@@ -79,18 +79,18 @@ fi
 echo "Weave data..." >&2
 
 cmd="uv run python3 ${py_args} $script_dir/weave.py \
+    --omnipath-networks                     $data_dir/omnipath_networks/subset_omnipath_networks_different_type_entity_type_source_and_entity_type_target_shorter.tsv \
     --config $CONFIG \
-    --short-mutations-local                 $decider_dir/short_mutations_local.csv  \
-    --short-mutations-external              $decider_dir/short_mutations_external.csv  \
-    --copy-number-amplifications-local      $decider_dir/cnas_local.csv \
-    --copy-number-amplifications-external   $decider_dir/cnas_external.csv  \
-    --structural-variants                     $data_dir/DECIDER/$data_version/structural_variants.xlsx  \
-    --omnipath-networks                       $data_dir/omnipath_networks/omnipath_webservice_interactions__latest.tsv.gz \
-    --open-targets-drug-molecule            $data_dir/OT/drug_molecule/
-    --open-targets-drug_mechanism_of_action $data_dir/OT/drug_mechanism_of_action/
-    --open-targets-target                   $data_dir/OT/target/
-    --cgi                                     $data_dir/DECIDER/$data_version/treatments_cgi.csv \
     ${weave_args}" # \
+    # --copy-number-amplifications-external   $decider_dir/cnas_external.csv  \
+    # --short-mutations-local                 $decider_dir/short_mutations_local.csv  \
+    # --short-mutations-external              $decider_dir/short_mutations_external.csv  \
+    # --copy-number-amplifications-local      $decider_dir/cnas_local.csv \
+    # --structural-variants                   $decider_dir/structural_variants.xlsx  \
+    # --open-targets-drug-molecule            $data_dir/OT/drug_molecule/
+    # --open-targets-drug_mechanism_of_action $data_dir/OT/drug_mechanism_of_action/
+    # --open-targets-target                   $data_dir/OT/target/
+    # --cgi                                   $decider_dir/treatments_cgi.csv \
     # --clinical                              $data_dir/DECIDER/clinical/clinical_export.xlsx \
     # --oncokb                     $data_dir/DECIDER/$data_version/treatments.csv \
 
diff --git a/pyproject.toml b/pyproject.toml
index c0b074c..98bffa4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -22,7 +22,7 @@ dependencies = [
   "matplotlib>=3.10.0,<4.0",
   "polars>=1.22.0,<2.0",
   "seaborn>=0.13.2,<0.14",
-  "ontoweaver>=1.3.0,<1.4.0",
+  "ontoweaver>=1.4.0,<1.5.0",
   "openpyxl>=3.1.5",
   "pyarrow<21.0.0",
   "fastparquet<2026.3.0",
@@ -32,5 +32,4 @@ dependencies = [
 dev = [
   "pre-commit>=4.5.0",
   "pytest>=8.4.1",
-]
-
+]
\ No newline at end of file

From d0c20f18bc7063d5ee4d9c58d916d19f628b8123 Mon Sep 17 00:00:00 2001
From: Claire Laudy <claire.laudy@pasteur.fr>
Date: Tue, 31 Mar 2026 15:14:05 +0200
Subject: [PATCH 12/15] fix(config for BPN): adds parameters to the biopathnet
 config file + fixes the pyproject for MacOs/ARM computers.

---
 config/biopathnet.yaml | 4 +++-
 make.sh                | 2 ++
 pyproject.toml         | 2 +-
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/config/biopathnet.yaml b/config/biopathnet.yaml
index 6c3cc50..18a7c40 100644
--- a/config/biopathnet.yaml
+++ b/config/biopathnet.yaml
@@ -9,9 +9,11 @@ biocypher:
         root_node: entity
 
 biopathnet:
-  file_format: txt
+  file_format: txt:bn 
   entity_types_file_stem: entity_types
   entity_names_file_stem: entity_names
   background_graph_file_stem: brg
   skg_file_stem: skg
+  targeted_relation: "(alteration, variant_biomarker_for_treatment, drug)"
+  include_properties: False
 
diff --git a/make.sh b/make.sh
index 8e5d64a..3c267b7 100755
--- a/make.sh
+++ b/make.sh
@@ -78,6 +78,8 @@ fi
 
 echo "Weave data..." >&2
 
+echo "CONFIG = $CONFIG" >&2
+
 cmd="uv run python3 ${py_args} $script_dir/weave.py \
     --config $CONFIG \
     --short-mutations-local                 $decider_dir/short_mutations_local.csv  \
diff --git a/pyproject.toml b/pyproject.toml
index c0b074c..9060199 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,7 +24,7 @@ dependencies = [
   "seaborn>=0.13.2,<0.14",
   "ontoweaver>=1.3.0,<1.4.0",
   "openpyxl>=3.1.5",
-  "pyarrow<21.0.0",
+  "pyarrow>20.0.0",
   "fastparquet<2026.3.0",
 ]
 

From ef7744f014b90363fa63c0a9606404ed1dd281ec Mon Sep 17 00:00:00 2001
From: Claire Laudy <claire.laudy@pasteur.fr>
Date: Tue, 31 Mar 2026 20:06:30 +0200
Subject: [PATCH 13/15] feat(fetch all datasources): Adds all the datasource to
 the make.sh script.

---
 make.sh | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/make.sh b/make.sh
index 2d5df70..fec908a 100755
--- a/make.sh
+++ b/make.sh
@@ -81,17 +81,17 @@ echo "Weave data..." >&2
 echo "CONFIG = $CONFIG" >&2
 
 cmd="uv run python3 ${py_args} $script_dir/weave.py \
-    --omnipath-networks                     $data_dir/omnipath_networks/subset_omnipath_networks_different_type_entity_type_source_and_entity_type_target_shorter.tsv \
+    --omnipath-networks                     $data_dir/omnipath_networks/omnipath_networks_different_type_entity_type_source_and_entity_type_target_shorter.tsv \
+    --copy-number-amplifications-external   $decider_dir/cnas_external.csv  \
+    --short-mutations-local                 $decider_dir/short_mutations_local.csv  \
+    --short-mutations-external              $decider_dir/short_mutations_external.csv  \
+    --copy-number-amplifications-local      $decider_dir/cnas_local.csv \
+    --structural-variants                   $decider_dir/structural_variants.xlsx  \
+    --open-targets-drug-molecule            $data_dir/OT/drug_molecule/
+    --open-targets-drug_mechanism_of_action $data_dir/OT/drug_mechanism_of_action/
+    --open-targets-target                   $data_dir/OT/target/
     --config $CONFIG \
     ${weave_args}" # \
-    # --copy-number-amplifications-external   $decider_dir/cnas_external.csv  \
-    # --short-mutations-local                 $decider_dir/short_mutations_local.csv  \
-    # --short-mutations-external              $decider_dir/short_mutations_external.csv  \
-    # --copy-number-amplifications-local      $decider_dir/cnas_local.csv \
-    # --structural-variants                   $decider_dir/structural_variants.xlsx  \
-    # --open-targets-drug-molecule            $data_dir/OT/drug_molecule/
-    # --open-targets-drug_mechanism_of_action $data_dir/OT/drug_mechanism_of_action/
-    # --open-targets-target                   $data_dir/OT/target/
     # --cgi                                   $decider_dir/treatments_cgi.csv \
     # --clinical                              $data_dir/DECIDER/clinical/clinical_export.xlsx \
     # --oncokb                     $data_dir/DECIDER/$data_version/treatments.csv \

From 71a55d86ec55df6926dcca7c37f92ca1857f69b2 Mon Sep 17 00:00:00 2001
From: Claire Laudy <claire.laudy@pasteur.fr>
Date: Wed, 1 Apr 2026 17:36:32 +0200
Subject: [PATCH 14/15] fix(config & make): Fixes the biopathnet config file
 and make.sh to script to enable the export of oncodashkb into a BioPathNet
 set of input files.

---
 config/biopathnet.yaml | 2 +-
 make.sh                | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/config/biopathnet.yaml b/config/biopathnet.yaml
index 18a7c40..7d87237 100644
--- a/config/biopathnet.yaml
+++ b/config/biopathnet.yaml
@@ -14,6 +14,6 @@ biopathnet:
   entity_names_file_stem: entity_names
   background_graph_file_stem: brg
   skg_file_stem: skg
-  targeted_relation: "(alteration, variant_biomarker_for_treatment, drug)"
+  targeted_relation: "(alteration, variant biomarker for treatment, drug)"
   include_properties: False
 
diff --git a/make.sh b/make.sh
index fec908a..a7b5ad3 100755
--- a/make.sh
+++ b/make.sh
@@ -81,18 +81,18 @@ echo "Weave data..." >&2
 echo "CONFIG = $CONFIG" >&2
 
 cmd="uv run python3 ${py_args} $script_dir/weave.py \
-    --omnipath-networks                     $data_dir/omnipath_networks/omnipath_networks_different_type_entity_type_source_and_entity_type_target_shorter.tsv \
     --copy-number-amplifications-external   $decider_dir/cnas_external.csv  \
     --short-mutations-local                 $decider_dir/short_mutations_local.csv  \
     --short-mutations-external              $decider_dir/short_mutations_external.csv  \
     --copy-number-amplifications-local      $decider_dir/cnas_local.csv \
-    --structural-variants                   $decider_dir/structural_variants.xlsx  \
     --open-targets-drug-molecule            $data_dir/OT/drug_molecule/
     --open-targets-drug_mechanism_of_action $data_dir/OT/drug_mechanism_of_action/
     --open-targets-target                   $data_dir/OT/target/
+    --cgi                                   $decider_dir/treatments_cgi.csv \
     --config $CONFIG \
     ${weave_args}" # \
-    # --cgi                                   $decider_dir/treatments_cgi.csv \
+    # --omnipath-networks                     $data_dir/omnipath_networks/omnipath_networks_different_type_entity_type_source_and_entity_type_target_shorter.tsv \
+    # --structural-variants                   $decider_dir/structural_variants.xlsx  \
     # --clinical                              $data_dir/DECIDER/clinical/clinical_export.xlsx \
     # --oncokb                     $data_dir/DECIDER/$data_version/treatments.csv \
 

From 9453d91917204b1abcb630eed992edce4b4b9ab3 Mon Sep 17 00:00:00 2001
From: Claire Laudy <claire.laudy@pasteur.fr>
Date: Fri, 3 Apr 2026 10:53:56 +0200
Subject: [PATCH 15/15] feat(multiple export back-ends): Adds the possibility
 to export the SKG to sevral back-ends.

---
 config/owl.yaml | 16 ++++++++++++++++
 weave.py        | 38 ++++++++++++++++++++++----------------
 2 files changed, 38 insertions(+), 16 deletions(-)
 create mode 100644 config/owl.yaml

diff --git a/config/owl.yaml b/config/owl.yaml
new file mode 100644
index 0000000..60a2b17
--- /dev/null
+++ b/config/owl.yaml
@@ -0,0 +1,16 @@
+biocypher:
+    debug: false
+    offline: true
+    dbms: owl
+
+    # Ontology configuration
+    head_ontology:
+        url: https://github.com/biolink/biolink-model/raw/v3.2.1/biolink-model.owl.ttl
+        root_node: entity
+
+owl:
+    edge_model: ObjectProperty
+    file_format: turtle
+    labels_order: "Ascending" # Default: From more specific to more generic.
+    node_labels_order: "Ascending" # Default: use labels_order.
+    edge_labels_order: "Leaves"
diff --git a/weave.py b/weave.py
index 1044015..278730e 100755
--- a/weave.py
+++ b/weave.py
@@ -136,7 +136,8 @@ def process_OT(directory, name):
     parser = argparse.ArgumentParser(
         description=usage)
 
-    parser.add_argument("-C", "--config", metavar="FILE", default="config/neo4j.yaml",
+    parser.add_argument("-C", "--config", metavar="FILE", default=["config/neo4j.yaml"],
+                        action="append",
                         help="The BioCypher configuration to load [default: config/neo4j.yaml].")
 
     parser.add_argument("-i", "--clinical", metavar="CSV", nargs="+",
@@ -197,10 +198,6 @@ def process_OT(directory, name):
                         help="Set the verbose level (default: %(default)s).")
 
     asked = parser.parse_args()
-    bc = biocypher.BioCypher(
-        biocypher_config_path = asked.config,
-        schema_config_path = "config/schema.yaml"
-    )
 
     logging.basicConfig()
     logging.getLogger().setLevel(asked.verbose)
@@ -670,17 +667,26 @@ def process_OT(directory, name):
     # Export the final SKG.
     ###################################################
 
-    logging.info(f"Write the final SKG into files...")
-    if fnodes:
-        bc.write_nodes(n.as_tuple() for n in fnodes)
-    if fedges:
-        bc.write_edges(e.as_tuple() for e in fedges)
-    #bc.summary()
-    import_file = bc.write_import_call()
-    logging.info(f"OK, wrote files.")
-
-    # Print on stdout for other scripts to get.
-    print(import_file)
+    configs = asked.config
+
+    for config in configs:
+        logging.info(f"Write the final SKG into {config} files...")
+        
+        bc = biocypher.BioCypher(
+            biocypher_config_path = config,
+            schema_config_path = "config/schema.yaml"
+        )
+        
+        if fnodes:
+            bc.write_nodes(n.as_tuple() for n in fnodes)
+        if fedges:
+            bc.write_edges(e.as_tuple() for e in fedges)
+        #bc.summary()
+        import_file = bc.write_import_call()
+        logging.info(f"OK, wrote files.")
+
+        # Print on stdout for other scripts to get.
+        print(import_file)
 
     if asked.import_script_run:
         shell = os.environ["SHELL"]