From 45929ec402fb46856518a8cd0eb1b3228733e0b6 Mon Sep 17 00:00:00 2001
From: Eleanor Lewis {content} {content} {variant_text['text']} {variant_text["text"]} no cancerType no cancerType normal normal no project no project no template no template normal, second variant normal, second variant normal{gene_name}
"]
+ output = [f'{gene_name}
']
sentence_categories: Dict[str, str] = {}
for statement_id, sentence in sentences_by_statement_id.items():
- relevance = statements[statement_id]["relevance"]["@rid"]
+ relevance = statements[statement_id]['relevance']['@rid']
category = categorize_relevance(
graphkb_conn,
relevance,
- RELEVANCE_BASE_TERMS + [("resistance", ["no sensitivity"])],
+ RELEVANCE_BASE_TERMS + [('resistance', ['no sensitivity'])],
)
sentence_categories[sentence] = category
# get the entrez gene descriptive hugo name
genes = graphkb_conn.query(
{
- "target": "Feature",
- "filters": {
- "AND": [
+ 'target': 'Feature',
+ 'filters': {
+ 'AND': [
{
- "source": {
- "target": "Source",
- "filters": {"name": "entrez gene"},
+ 'source': {
+ 'target': 'Source',
+ 'filters': {'name': 'entrez gene'},
}
},
- {"name": gene_name},
- {"biotype": "gene"},
+ {'name': gene_name},
+ {'biotype': 'gene'},
]
},
}
@@ -300,10 +300,10 @@ def create_section_html(
variants_text = display_variants(gene_name, exp_variants)
if not variants_text:
# exclude sections where they are not linked to an experimental variant. this can occur when there are co-occurent statements collected
- return ""
- if genes and genes[0].get("description", ""):
- description = ". ".join(genes[0]["description"].split(". ")[:2]) # type: ignore
- sourceId = genes[0].get("sourceId", "")
+ return ''
+ if genes and genes[0].get('description', ''):
+ description = '. '.join(genes[0]['description'].split('. ')[:2]) # type: ignore
+ sourceId = genes[0].get('sourceId', '')
output.append(
f"""
@@ -319,27 +319,27 @@ def create_section_html(
sentences_used: Set[str] = set()
for section in [
- {s for (s, v) in sentence_categories.items() if v == "diagnostic"},
- {s for (s, v) in sentence_categories.items() if v == "biological"},
- {s for (s, v) in sentence_categories.items() if v in ["therapeutic", "prognostic"]},
+ {s for (s, v) in sentence_categories.items() if v == 'diagnostic'},
+ {s for (s, v) in sentence_categories.items() if v == 'biological'},
+ {s for (s, v) in sentence_categories.items() if v in ['therapeutic', 'prognostic']},
{
s
for (s, v) in sentence_categories.items()
if v
not in [
- "diagnostic",
- "biological",
- "therapeutic",
- "prognostic",
- "resistance",
+ 'diagnostic',
+ 'biological',
+ 'therapeutic',
+ 'prognostic',
+ 'resistance',
]
},
- {s for (s, v) in sentence_categories.items() if v == "resistance"},
+ {s for (s, v) in sentence_categories.items() if v == 'resistance'},
]:
- content = ". ".join(sorted(list(section - sentences_used)))
+ content = '. '.join(sorted(list(section - sentences_used)))
sentences_used.update(section)
- output.append(f"{variant_text['variantName']}{cancer_type}
"]
- section.append(f"{variant_text["variantName"]}{cancer_type}
']
+ section.append(f'The comments below were automatically drawn from curated text stored in IPR for variant matches in this report, and have not been manually reviewed
"
- no_comments_found_output = "No comments found in IPR for variants in this report"
+ output_header = 'The comments below were automatically drawn from curated text stored in IPR for variant matches in this report, and have not been manually reviewed
'
+ no_comments_found_output = 'No comments found in IPR for variants in this report'
output = []
# get the list of variants to check for custom text for
- match_set = list(set([item["kbVariant"] for item in matches]))
+ match_set = list(set([item['kbVariant'] for item in matches]))
for variant in match_set:
data = {
- "variantName": variant,
+ 'variantName': variant,
}
itemlist: list[dict] = []
- itemlist = ipr_conn.get("variant-text", data=data) # type: ignore
+ itemlist = ipr_conn.get('variant-text', data=data) # type: ignore
if itemlist:
project_matches = [
item
for item in itemlist
- if "project" in item.keys() and item["project"]["name"] == project_name
+ if 'project' in item.keys() and item['project']['name'] == project_name
]
if project_matches:
itemlist = project_matches
elif include_nonspecific_project:
- itemlist = [item for item in itemlist if "project" not in item.keys()]
+ itemlist = [item for item in itemlist if 'project' not in item.keys()]
else:
itemlist = []
template_matches = [
item
for item in itemlist
- if "template" in item.keys() and item["template"]["name"] == report_type
+ if 'template' in item.keys() and item['template']['name'] == report_type
]
if template_matches:
itemlist = template_matches
elif include_nonspecific_template:
- itemlist = [item for item in itemlist if "template" not in item.keys()]
+ itemlist = [item for item in itemlist if 'template' not in item.keys()]
else:
itemlist = []
- disease_matches = [item for item in itemlist if disease_name in item["cancerType"]]
+ disease_matches = [item for item in itemlist if disease_name in item['cancerType']]
if disease_matches:
itemlist = disease_matches
elif include_nonspecific_disease:
- itemlist = [item for item in itemlist if not item["cancerType"]]
+ itemlist = [item for item in itemlist if not item['cancerType']]
else:
itemlist = []
@@ -466,7 +466,7 @@ def get_ipr_analyst_comments(
if not output:
return no_comments_found_output
output.insert(0, output_header)
- return "\n".join(output)
+ return '\n'.join(output)
def auto_analyst_comments(
@@ -478,12 +478,12 @@ def auto_analyst_comments(
"""Given a list of GraphKB matches, generate a text summary to add to the report."""
templates: Dict[str, List[Statement]] = {}
statements: Dict[str, Statement] = {}
- variants_by_keys = {v["key"]: v for v in variants}
+ variants_by_keys = {v['key']: v for v in variants}
variant_keys_by_statement_ids: Dict[str, Set[str]] = {}
for match in matches:
- rid = match["kbStatementId"]
- exp_variant = match["variant"]
+ rid = match['kbStatementId']
+ exp_variant = match['variant']
variant_keys_by_statement_ids.setdefault(rid, set()).add(exp_variant)
exp_variants_by_statements: Dict[str, List[IprVariant]] = {}
@@ -491,16 +491,16 @@ def auto_analyst_comments(
try:
exp_variants_by_statements[rid] = [variants_by_keys[key] for key in keys]
except KeyError as err:
- logger.warning(f"No specific variant matched for {rid}:{keys} - {err}")
+ logger.warning(f'No specific variant matched for {rid}:{keys} - {err}')
exp_variants_by_statements[rid] = []
# get details for statements
for match in matches:
- rid = match["kbStatementId"].replace("#", "")
- result = graphkb_conn.request(f"/statements/{rid}?neighbors=1")["result"]
+ rid = match['kbStatementId'].replace('#', '')
+ result = graphkb_conn.request(f'/statements/{rid}?neighbors=1')['result']
- templates.setdefault(result["displayNameTemplate"], []).append(result)
- statements[result["@rid"]] = result
+ templates.setdefault(result['displayNameTemplate'], []).append(result)
+ statements[result['@rid']] = result
# aggregate similar sentences
sentences = {}
@@ -511,7 +511,7 @@ def auto_analyst_comments(
statements_by_genes = section_statements_by_genes(graphkb_conn, list(statements.values()))
output: List[str] = [
- "The comments below were automatically generated from matches to GraphKB and have not been manually reviewed
"
+ 'The comments below were automatically generated from matches to GraphKB and have not been manually reviewed
'
]
for section, statement_rids in sorted(
@@ -520,7 +520,7 @@ def auto_analyst_comments(
exp_variants = {}
for variant_list in [exp_variants_by_statements[r] for r in statement_rids]:
for variant in variant_list:
- exp_variants[variant["key"]] = variant
+ exp_variants[variant['key']] = variant
output.append(
create_section_html(
@@ -532,4 +532,4 @@ def auto_analyst_comments(
)
)
- return "\n".join(output)
+ return '\n'.join(output)
diff --git a/pori_python/ipr/therapeutic_options.py b/pori_python/ipr/therapeutic_options.py
index 9ca8c3b..7dc38ce 100644
--- a/pori_python/ipr/therapeutic_options.py
+++ b/pori_python/ipr/therapeutic_options.py
@@ -27,57 +27,57 @@ def create_therapeutic_options(
Generate therapeutic options summary from the list of kb-matches
"""
options: List[Dict[str, Any]] = []
- resistance_markers = get_terms_set(graphkb_conn, ["no sensitivity"])
+ resistance_markers = get_terms_set(graphkb_conn, ['no sensitivity'])
for match in kb_matches:
- row_type = "therapeutic"
- if match["category"] != "therapeutic" or match["relevance"] == "eligibility":
+ row_type = 'therapeutic'
+ if match['category'] != 'therapeutic' or match['relevance'] == 'eligibility':
continue
- if match["kbRelevanceId"] in resistance_markers:
- row_type = "chemoresistance"
- variant = find_variant(variants, match["variantType"], match["variant"])
- drug = get_preferred_drug_representation(graphkb_conn, match["kbContextId"])
+ if match['kbRelevanceId'] in resistance_markers:
+ row_type = 'chemoresistance'
+ variant = find_variant(variants, match['variantType'], match['variant'])
+ drug = get_preferred_drug_representation(graphkb_conn, match['kbContextId'])
gene, variant_string = create_variant_name_tuple(variant)
options.append(
{
- "gene": gene,
- "type": row_type,
- "therapy": drug["displayName"],
- "therapyGraphkbId": drug["@rid"],
- "context": match["relevance"],
- "contextGraphkbId": match["kbRelevanceId"],
- "variantGraphkbId": match["kbVariantId"],
- "variant": variant_string,
- "evidenceLevel": match["evidenceLevel"],
- "kbStatementIds": match["kbStatementId"],
- "notes": "",
+ 'gene': gene,
+ 'type': row_type,
+ 'therapy': drug['displayName'],
+ 'therapyGraphkbId': drug['@rid'],
+ 'context': match['relevance'],
+ 'contextGraphkbId': match['kbRelevanceId'],
+ 'variantGraphkbId': match['kbVariantId'],
+ 'variant': variant_string,
+ 'evidenceLevel': match['evidenceLevel'],
+ 'kbStatementIds': match['kbStatementId'],
+ 'notes': '',
}
)
if not options:
return options
options_df = pandas.DataFrame.from_records(options)
- def delimited_list(inputs: List, delimiter: str = " / ") -> str:
+ def delimited_list(inputs: List, delimiter: str = ' / ') -> str:
return delimiter.join(sorted(list({i for i in inputs if i})))
- options_df = options_df.groupby(["gene", "type", "therapy", "variant"]).agg(
+ options_df = options_df.groupby(['gene', 'type', 'therapy', 'variant']).agg(
{
- "evidenceLevel": delimited_list,
- "context": delimited_list,
- "notes": lambda x: delimited_list(x, " "),
+ 'evidenceLevel': delimited_list,
+ 'context': delimited_list,
+ 'notes': lambda x: delimited_list(x, ' '),
}
)
options_df = options_df.reset_index()
- options = options_df.to_dict("records") # type: ignore
+ options = options_df.to_dict('records') # type: ignore
therapeutic_rank = 0
chemoresistance_rank = 0
for option in options:
- if option["type"] == "therapeutic":
- option["rank"] = therapeutic_rank
+ if option['type'] == 'therapeutic':
+ option['rank'] = therapeutic_rank
therapeutic_rank += 1
else:
- option["rank"] = chemoresistance_rank
+ option['rank'] = chemoresistance_rank
chemoresistance_rank += 1
return options
diff --git a/pori_python/ipr/util.py b/pori_python/ipr/util.py
index d2aca07..78932e3 100644
--- a/pori_python/ipr/util.py
+++ b/pori_python/ipr/util.py
@@ -12,12 +12,12 @@
GENE_NEIGHBORS_MAX = 3
# name the logger after the package to make it simple to disable for packages using this one as a dependency
-logger = logging.getLogger("ipr")
+logger = logging.getLogger('ipr')
LOG_LEVELS = {
- "info": logging.INFO,
- "debug": logging.DEBUG,
- "warn": logging.WARN,
- "error": logging.ERROR,
+ 'info': logging.INFO,
+ 'debug': logging.DEBUG,
+ 'warn': logging.WARN,
+ 'error': logging.ERROR,
}
@@ -31,17 +31,17 @@ def get_terms_set(graphkb_conn: GraphKBConnection, base_terms: List[str]) -> Set
def hash_key(key: Tuple[str]) -> str:
- body = json.dumps({"key": key}, sort_keys=True)
- hash_code = hashlib.md5(body.encode("utf-8")).hexdigest()
+ body = json.dumps({'key': key}, sort_keys=True)
+ hash_code = hashlib.md5(body.encode('utf-8')).hexdigest()
return hash_code
def convert_to_rid_set(records: Sequence[Record]) -> Set[str]:
- return {r["@rid"] for r in records}
+ return {r['@rid'] for r in records}
-def trim_empty_values(obj: IprVariant, empty_values: Sequence = ("", None, nan)):
- blacklist = ("gene1", "gene2") # allow null for sv genes
+def trim_empty_values(obj: IprVariant, empty_values: Sequence = ('', None, nan)):
+ blacklist = ('gene1', 'gene2') # allow null for sv genes
keys = list(obj.keys())
for key in keys:
@@ -55,19 +55,19 @@ def create_variant_name_tuple(variant: IprVariant) -> Tuple[str, str]:
Given an IPR variant row, create the variant representation to be used as the name
of the variant
"""
- variant_type = variant["variantType"]
- gene = str(variant.get("gene", variant.get("gene1", "")))
- if variant_type == "exp":
- return (gene, str(variant.get("expressionState", "")))
- elif variant_type == "cnv":
- return (gene, str(variant.get("cnvState", "")))
+ variant_type = variant['variantType']
+ gene = str(variant.get('gene', variant.get('gene1', '')))
+ if variant_type == 'exp':
+ return (gene, str(variant.get('expressionState', '')))
+ elif variant_type == 'cnv':
+ return (gene, str(variant.get('cnvState', '')))
variant_split = (
- variant["variant"].split(":", 1)[1] if ":" in variant["variant"] else variant["variant"]
+ variant['variant'].split(':', 1)[1] if ':' in variant['variant'] else variant['variant']
)
- gene2 = str(variant.get("gene2", ""))
+ gene2 = str(variant.get('gene2', ''))
if gene and gene2:
- gene = f"{gene}, {gene2}"
+ gene = f'{gene}, {gene2}'
elif gene2:
gene = gene2
@@ -81,28 +81,28 @@ def find_variant(
Find a variant in a list of variants by its key and type
"""
for variant in all_variants:
- if variant["key"] == variant_key and variant["variantType"] == variant_type:
+ if variant['key'] == variant_key and variant['variantType'] == variant_type:
return variant
- raise KeyError(f"expected variant ({variant_key}, {variant_type}) does not exist")
+ raise KeyError(f'expected variant ({variant_key}, {variant_type}) does not exist')
def generate_ontology_preference_key(record: Ontology, sources_sort: Dict[str, int] = {}) -> Tuple:
"""Generate a tuple key for comparing preferred ontology terms."""
return (
- record.get("name") == record.get("sourceId"),
- record.get("deprecated", False),
- record.get("alias", False),
- bool(record.get("dependency", "")),
- sources_sort.get(str(record.get("source")), 99999),
- record["sourceId"],
- record.get("sourceIdVersion", ""),
- record["name"],
+ record.get('name') == record.get('sourceId'),
+ record.get('deprecated', False),
+ record.get('alias', False),
+ bool(record.get('dependency', '')),
+ sources_sort.get(str(record.get('source')), 99999),
+ record['sourceId'],
+ record.get('sourceIdVersion', ''),
+ record['name'],
)
def get_alternatives(graphkb_conn: GraphKBConnection, record_id: str) -> List[Ontology]:
rec_list = graphkb_conn.query(
- {"target": [record_id], "queryType": "similarTo", "treeEdges": []}
+ {'target': [record_id], 'queryType': 'similarTo', 'treeEdges': []}
)
return [cast(Ontology, rec) for rec in rec_list]
@@ -115,8 +115,8 @@ def get_preferred_drug_representation(
"""
source_preference = {
- r["@rid"]: r["sort"] # type: ignore
- for r in graphkb_conn.query({"target": "Source", "returnProperties": ["sort", "@rid"]})
+ r['@rid']: r['sort'] # type: ignore
+ for r in graphkb_conn.query({'target': 'Source', 'returnProperties': ['sort', '@rid']})
}
drugs = sorted(
get_alternatives(graphkb_conn, drug_record_id),
@@ -130,42 +130,42 @@ def get_preferred_gene_name(
) -> str:
"""Given some Feature record ID return the preferred gene name."""
record = graphkb_conn.get_record_by_id(record_id)
- biotype = record.get("biotype", "")
+ biotype = record.get('biotype', '')
genes = []
- expanded_gene_names = graphkb_conn.query({"target": [record_id], "neighbors": neighbors})
- assert len(expanded_gene_names) == 1, "get_preferred_gene_name should have single result"
+ expanded_gene_names = graphkb_conn.query({'target': [record_id], 'neighbors': neighbors})
+ assert len(expanded_gene_names) == 1, 'get_preferred_gene_name should have single result'
expanded: Dict[str, List] = expanded_gene_names[0] # type: ignore
- if biotype != "gene":
- for edge in expanded.get("out_ElementOf", []):
- target = edge["in"]
- if target.get("biotype") == "gene":
+ if biotype != 'gene':
+ for edge in expanded.get('out_ElementOf', []):
+ target = edge['in']
+ if target.get('biotype') == 'gene':
genes.append(target)
for edge_type in [
- "out_AliasOf",
- "in_AliasOf",
- "in_DeprecatedBy",
- "out_CrossReferenceOf",
- "in_CrossReferenceOf",
+ 'out_AliasOf',
+ 'in_AliasOf',
+ 'in_DeprecatedBy',
+ 'out_CrossReferenceOf',
+ 'in_CrossReferenceOf',
]:
- target_name = "out" if edge_type.startswith("in") else "in"
+ target_name = 'out' if edge_type.startswith('in') else 'in'
for edge in expanded.get(edge_type, []):
target = edge[target_name]
- if target.get("biotype") == "gene":
+ if target.get('biotype') == 'gene':
genes.append(target)
genes = sorted(
genes,
key=lambda gene: (
- gene["deprecated"],
- bool(gene["dependency"]),
- "_" in gene["name"],
- gene["name"].startswith("ens"),
+ gene['deprecated'],
+ bool(gene['dependency']),
+ '_' in gene['name'],
+ gene['name'].startswith('ens'),
),
)
if genes:
- return genes[0]["displayName"]
+ return genes[0]['displayName']
# fallback to the input displayName
- return str(record.get("displayName", ""))
+ return str(record.get('displayName', ''))
def pandas_falsy(field: Any) -> bool:
diff --git a/pori_python/types.py b/pori_python/types.py
index faec8f2..dd1ab7e 100644
--- a/pori_python/types.py
+++ b/pori_python/types.py
@@ -5,8 +5,8 @@
# TODO: Can constants in inputs.py like COPY_REQ, SMALL_MUT_REQ, just be replaced by types?
CategoryBaseTermMapping = List[Tuple[str, List[str]]]
-Record = TypedDict("Record", {"@rid": str, "@class": str, "name": str})
-EmbeddedRecord = TypedDict("EmbeddedRecord", {"@class": str})
+Record = TypedDict('Record', {'@rid': str, '@class': str, 'name': str})
+EmbeddedRecord = TypedDict('EmbeddedRecord', {'@class': str})
class DisplayedRecord(Record):
diff --git a/pyproject.toml b/pyproject.toml
index 9e5a984..6213fb7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1 +1,7 @@
build-backend = "setuptools.build_meta"
+
+[tool.ruff]
+line-length = 100
+
+[tool.ruff.format]
+quote-style = "single"
diff --git a/setup.cfg b/setup.cfg
index e466a48..444f882 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -53,10 +53,7 @@ dev =
mkdocs-material
mkdocs-redirects
markdown-refdocs
- flake8
- black
- flake8-annotations
- isort
+ ruff
mypy
[options.package_data]
diff --git a/tests/test_graphkb/data.py b/tests/test_graphkb/data.py
index d628ff2..955e391 100644
--- a/tests/test_graphkb/data.py
+++ b/tests/test_graphkb/data.py
@@ -7,72 +7,72 @@
#
structuralVariants = {
# Unambiguous structural variations
- "(FGFR3,BRCA2):fusion(g.1234567,g.1234567)": {
- "matches": {
- "displayName": ["FGFR3 fusion", "FGFR3 rearrangement"],
- "type": ["fusion", "rearrangement"],
+ '(FGFR3,BRCA2):fusion(g.1234567,g.1234567)': {
+ 'matches': {
+ 'displayName': ['FGFR3 fusion', 'FGFR3 rearrangement'],
+ 'type': ['fusion', 'rearrangement'],
}
},
# ambiguous structural variations -> structural
- "FGFR3:c.1200_1300dup": {
- "matches": {
- "displayName": ["FGFR3 mutation", "FGFR3 rearrangement"],
- "type": ["mutation", "rearrangement"],
+ 'FGFR3:c.1200_1300dup': {
+ 'matches': {
+ 'displayName': ['FGFR3 mutation', 'FGFR3 rearrangement'],
+ 'type': ['mutation', 'rearrangement'],
}
},
- "FGFR3:c.1200_1201insACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT": {
- "matches": {
- "displayName": ["FGFR3 mutation", "FGFR3 rearrangement"],
- "type": ["mutation", "rearrangement"],
+ 'FGFR3:c.1200_1201insACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT': {
+ 'matches': {
+ 'displayName': ['FGFR3 mutation', 'FGFR3 rearrangement'],
+ 'type': ['mutation', 'rearrangement'],
}
},
- "FGFR3:g.5000_5100del": {
- "matches": {
- "displayName": ["FGFR3 mutation", "FGFR3 rearrangement"],
- "type": ["mutation", "rearrangement"],
+ 'FGFR3:g.5000_5100del': {
+ 'matches': {
+ 'displayName': ['FGFR3 mutation', 'FGFR3 rearrangement'],
+ 'type': ['mutation', 'rearrangement'],
}
},
- "FGFR3:c.1200_1300delinsA": {
- "matches": {
- "displayName": ["FGFR3 mutation", "FGFR3 rearrangement"],
- "type": ["mutation", "rearrangement"],
+ 'FGFR3:c.1200_1300delinsA': {
+ 'matches': {
+ 'displayName': ['FGFR3 mutation', 'FGFR3 rearrangement'],
+ 'type': ['mutation', 'rearrangement'],
}
},
- "FGFR3:c.1200delinsACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT": {
- "matches": {
- "displayName": ["FGFR3 mutation", "FGFR3 rearrangement"],
- "type": ["mutation", "rearrangement"],
+ 'FGFR3:c.1200delinsACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT': {
+ 'matches': {
+ 'displayName': ['FGFR3 mutation', 'FGFR3 rearrangement'],
+ 'type': ['mutation', 'rearrangement'],
}
},
# ambiguous structural variations -> non-structural
- "FGFR3:c.1200dup": {
- "matches": {"displayName": ["FGFR3 mutation"], "type": ["mutation"]},
- "does_not_matches": {"displayName": ["FGFR3 rearrangement"], "type": ["rearrangement"]},
+ 'FGFR3:c.1200dup': {
+ 'matches': {'displayName': ['FGFR3 mutation'], 'type': ['mutation']},
+ 'does_not_matches': {'displayName': ['FGFR3 rearrangement'], 'type': ['rearrangement']},
},
- "FGFR3:c.1200_1201insA": {
- "matches": {"displayName": ["FGFR3 mutation"], "type": ["mutation"]},
- "does_not_matches": {"displayName": ["FGFR3 rearrangement"], "type": ["rearrangement"]},
+ 'FGFR3:c.1200_1201insA': {
+ 'matches': {'displayName': ['FGFR3 mutation'], 'type': ['mutation']},
+ 'does_not_matches': {'displayName': ['FGFR3 rearrangement'], 'type': ['rearrangement']},
},
- "FGFR3:g.5000del": {
- "matches": {"displayName": ["FGFR3 mutation"], "type": ["mutation"]},
- "does_not_matches": {"displayName": ["FGFR3 rearrangement"], "type": ["rearrangement"]},
+ 'FGFR3:g.5000del': {
+ 'matches': {'displayName': ['FGFR3 mutation'], 'type': ['mutation']},
+ 'does_not_matches': {'displayName': ['FGFR3 rearrangement'], 'type': ['rearrangement']},
},
- "FGFR3:c.1200delinsA": {
- "matches": {"displayName": ["FGFR3 mutation"], "type": ["mutation"]},
- "does_not_matches": {"displayName": ["FGFR3 rearrangement"], "type": ["rearrangement"]},
+ 'FGFR3:c.1200delinsA': {
+ 'matches': {'displayName': ['FGFR3 mutation'], 'type': ['mutation']},
+ 'does_not_matches': {'displayName': ['FGFR3 rearrangement'], 'type': ['rearrangement']},
},
- "STK11:e.1_100del": {
- "matches": {"displayName": ["STK11 mutation"], "type": ["mutation"]},
- "does_not_matches": {"displayName": ["STK11 deletion"], "type": ["deletion"]},
+ 'STK11:e.1_100del': {
+ 'matches': {'displayName': ['STK11 mutation'], 'type': ['mutation']},
+ 'does_not_matches': {'displayName': ['STK11 deletion'], 'type': ['deletion']},
},
- "STK11:i.1_100del": {
- "matches": {"displayName": ["STK11 mutation"], "type": ["mutation"]},
- "does_not_matches": {"displayName": ["STK11 deletion"], "type": ["deletion"]},
+ 'STK11:i.1_100del': {
+ 'matches': {'displayName': ['STK11 mutation'], 'type': ['mutation']},
+ 'does_not_matches': {'displayName': ['STK11 deletion'], 'type': ['deletion']},
},
# non-structural variations
- "FGFR3:c.1200C>A": {
- "matches": {"displayName": ["FGFR3 mutation"], "type": ["mutation"]},
- "does_not_matches": {"displayName": ["FGFR3 rearrangement"], "type": ["rearrangement"]},
+ 'FGFR3:c.1200C>A': {
+ 'matches': {'displayName': ['FGFR3 mutation'], 'type': ['mutation']},
+ 'does_not_matches': {'displayName': ['FGFR3 rearrangement'], 'type': ['rearrangement']},
},
}
@@ -81,118 +81,118 @@
# pos 1: expected equivalences
ensemblProteinSample = [
(
- "EGFR",
+ 'EGFR',
[
- "EGFR",
- "ERBB",
- "ENSG00000146648",
- "ENSG00000146648.17",
- "ENST00000275493",
- "ENST00000275493.6",
- "NM_001346897",
- "NM_001346897.2",
- "NP_001333826",
- "NP_001333826.1",
+ 'EGFR',
+ 'ERBB',
+ 'ENSG00000146648',
+ 'ENSG00000146648.17',
+ 'ENST00000275493',
+ 'ENST00000275493.6',
+ 'NM_001346897',
+ 'NM_001346897.2',
+ 'NP_001333826',
+ 'NP_001333826.1',
],
),
(
- "NM_001346897",
+ 'NM_001346897',
[
- "EGFR",
- "ERBB",
- "ENSG00000146648",
- "ENSG00000146648.17",
- "NM_001346897",
- "NM_001346897.2",
- "NP_001333826",
- "NP_001333826.1",
+ 'EGFR',
+ 'ERBB',
+ 'ENSG00000146648',
+ 'ENSG00000146648.17',
+ 'NM_001346897',
+ 'NM_001346897.2',
+ 'NP_001333826',
+ 'NP_001333826.1',
],
),
(
- "NM_001346897.2",
+ 'NM_001346897.2',
[
- "EGFR",
- "ERBB",
- "ENSG00000146648",
- "ENSG00000146648.17",
- "NM_001346897",
- "NM_001346897.2",
- "NP_001333826",
- "NP_001333826.1",
+ 'EGFR',
+ 'ERBB',
+ 'ENSG00000146648',
+ 'ENSG00000146648.17',
+ 'NM_001346897',
+ 'NM_001346897.2',
+ 'NP_001333826',
+ 'NP_001333826.1',
],
),
(
- "NP_001333826",
+ 'NP_001333826',
[
- "EGFR",
- "ERBB",
- "ENSG00000146648", # Warn: Versionized ENSG won't be returned due to API limitations
- "NM_001346897",
- "NM_001346897.2",
- "NP_001333826",
- "NP_001333826.1",
+ 'EGFR',
+ 'ERBB',
+ 'ENSG00000146648', # Warn: Versionized ENSG won't be returned due to API limitations
+ 'NM_001346897',
+ 'NM_001346897.2',
+ 'NP_001333826',
+ 'NP_001333826.1',
],
),
(
- "NP_001333826.1",
+ 'NP_001333826.1',
[
- "EGFR",
- "ERBB",
- "ENSG00000146648", # Warn: Versionized ENSG won't be returned due to API limitations
- "NM_001346897",
- "NM_001346897.2",
- "NP_001333826",
- "NP_001333826.1",
+ 'EGFR',
+ 'ERBB',
+ 'ENSG00000146648', # Warn: Versionized ENSG won't be returned due to API limitations
+ 'NM_001346897',
+ 'NM_001346897.2',
+ 'NP_001333826',
+ 'NP_001333826.1',
],
),
(
- "ENSG00000146648",
+ 'ENSG00000146648',
[
- "EGFR",
- "ERBB",
- "ENSG00000146648",
- "ENSG00000146648.17",
- "ENST00000275493",
- "ENST00000275493.6",
- "NM_001346897",
- "NM_001346897.2",
- "NP_001333826", # Warn: Versionized NP won't be returned due to API limitations
+ 'EGFR',
+ 'ERBB',
+ 'ENSG00000146648',
+ 'ENSG00000146648.17',
+ 'ENST00000275493',
+ 'ENST00000275493.6',
+ 'NM_001346897',
+ 'NM_001346897.2',
+ 'NP_001333826', # Warn: Versionized NP won't be returned due to API limitations
],
),
(
- "ENSG00000146648.17",
+ 'ENSG00000146648.17',
[
- "EGFR",
- "ERBB",
- "ENSG00000146648",
- "ENSG00000146648.17",
- "ENST00000275493",
- "ENST00000275493.6",
- "NM_001346897",
- "NM_001346897.2",
- "NP_001333826", # Warn: Versionized NP won't be returned due to API limitations
+ 'EGFR',
+ 'ERBB',
+ 'ENSG00000146648',
+ 'ENSG00000146648.17',
+ 'ENST00000275493',
+ 'ENST00000275493.6',
+ 'NM_001346897',
+ 'NM_001346897.2',
+ 'NP_001333826', # Warn: Versionized NP won't be returned due to API limitations
],
),
(
- "ENST00000275493",
+ 'ENST00000275493',
[
- "EGFR",
- "ERBB",
- "ENSG00000146648",
- "ENSG00000146648.17",
- "ENST00000275493",
- "ENST00000275493.6",
+ 'EGFR',
+ 'ERBB',
+ 'ENSG00000146648',
+ 'ENSG00000146648.17',
+ 'ENST00000275493',
+ 'ENST00000275493.6',
],
),
(
- "ENST00000275493.6",
+ 'ENST00000275493.6',
[
- "EGFR",
- "ERBB",
- "ENSG00000146648",
- "ENSG00000146648.17",
- "ENST00000275493",
- "ENST00000275493.6",
+ 'EGFR',
+ 'ERBB',
+ 'ENSG00000146648',
+ 'ENSG00000146648.17',
+ 'ENST00000275493',
+ 'ENST00000275493.6',
],
),
]
diff --git a/tests/test_graphkb/test_genes.py b/tests/test_graphkb/test_genes.py
index fd97ffc..90efe5d 100644
--- a/tests/test_graphkb/test_genes.py
+++ b/tests/test_graphkb/test_genes.py
@@ -21,100 +21,100 @@
)
from pori_python.graphkb.util import get_rid
-EXCLUDE_INTEGRATION_TESTS = os.environ.get("EXCLUDE_INTEGRATION_TESTS") == "1"
-EXCLUDE_BCGSC_TESTS = os.environ.get("EXCLUDE_BCGSC_TESTS") == "1"
-EXCLUDE_ONCOKB_TESTS = os.environ.get("EXCLUDE_ONCOKB_TESTS") == "1"
+EXCLUDE_INTEGRATION_TESTS = os.environ.get('EXCLUDE_INTEGRATION_TESTS') == '1'
+EXCLUDE_BCGSC_TESTS = os.environ.get('EXCLUDE_BCGSC_TESTS') == '1'
+EXCLUDE_ONCOKB_TESTS = os.environ.get('EXCLUDE_ONCOKB_TESTS') == '1'
-CANONICAL_ONCOGENES = ["kras", "nras", "alk"]
-CANONICAL_TS = ["cdkn2a", "tp53"]
-CANONICAL_CG = ["alb"]
-CANONICAL_FUSION_GENES = ["alk", "ewsr1", "fli1"]
-CANONICAL_STRUCTURAL_VARIANT_GENES = ["brca1", "dpyd", "pten"]
-CANNONICAL_THERAPY_GENES = ["erbb2", "brca2", "egfr"]
+CANONICAL_ONCOGENES = ['kras', 'nras', 'alk']
+CANONICAL_TS = ['cdkn2a', 'tp53']
+CANONICAL_CG = ['alb']
+CANONICAL_FUSION_GENES = ['alk', 'ewsr1', 'fli1']
+CANONICAL_STRUCTURAL_VARIANT_GENES = ['brca1', 'dpyd', 'pten']
+CANNONICAL_THERAPY_GENES = ['erbb2', 'brca2', 'egfr']
PHARMACOGENOMIC_INITIAL_GENES = [
- "ACYP2",
- "CEP72",
+ 'ACYP2',
+ 'CEP72',
# 'CYP26B1', # defined as hgvsGenomic chr2:g.233760235_233760235nc_000002.12:g.233760235ta[7]>ta[8]
- "DPYD",
- "NUDT15",
- "RARG",
- "SLC28A3",
- "TPMT",
- "UGT1A6",
+ 'DPYD',
+ 'NUDT15',
+ 'RARG',
+ 'SLC28A3',
+ 'TPMT',
+ 'UGT1A6',
]
CANCER_PREDISP_INITIAL_GENES = [
- "AKT1",
- "APC",
- "ATM",
- "AXIN2",
- "BAP1",
- "BLM",
- "BMPR1A",
- "BRCA1",
- "BRCA2",
- "BRIP1",
- "CBL",
- "CDH1",
- "CDK4",
- "CDKN2A",
- "CHEK2",
- "DICER1",
- "EGFR",
- "EPCAM",
- "ETV6",
- "EZH2",
- "FH",
- "FLCN",
- "GATA2",
- "HRAS",
- "KIT",
- "MEN1",
- "MET",
- "MLH1",
- "MSH2",
- "MSH6",
- "MUTYH",
- "NBN",
- "NF1",
- "PALB2",
- "PDGFRA",
- "PMS2",
- "PTCH1",
- "PTEN",
- "PTPN11",
- "RAD51C",
- "RAD51D",
- "RB1",
- "RET",
- "RUNX1",
- "SDHA",
- "SDHB",
- "SDHC",
- "SDHD",
- "SMAD4",
- "SMARCA4",
- "STK11",
- "TP53",
- "TSC1",
- "TSC2",
- "VHL",
- "WT1",
+ 'AKT1',
+ 'APC',
+ 'ATM',
+ 'AXIN2',
+ 'BAP1',
+ 'BLM',
+ 'BMPR1A',
+ 'BRCA1',
+ 'BRCA2',
+ 'BRIP1',
+ 'CBL',
+ 'CDH1',
+ 'CDK4',
+ 'CDKN2A',
+ 'CHEK2',
+ 'DICER1',
+ 'EGFR',
+ 'EPCAM',
+ 'ETV6',
+ 'EZH2',
+ 'FH',
+ 'FLCN',
+ 'GATA2',
+ 'HRAS',
+ 'KIT',
+ 'MEN1',
+ 'MET',
+ 'MLH1',
+ 'MSH2',
+ 'MSH6',
+ 'MUTYH',
+ 'NBN',
+ 'NF1',
+ 'PALB2',
+ 'PDGFRA',
+ 'PMS2',
+ 'PTCH1',
+ 'PTEN',
+ 'PTPN11',
+ 'RAD51C',
+ 'RAD51D',
+ 'RB1',
+ 'RET',
+ 'RUNX1',
+ 'SDHA',
+ 'SDHB',
+ 'SDHC',
+ 'SDHD',
+ 'SMAD4',
+ 'SMARCA4',
+ 'STK11',
+ 'TP53',
+ 'TSC1',
+ 'TSC2',
+ 'VHL',
+ 'WT1',
]
-@pytest.fixture(scope="module")
+@pytest.fixture(scope='module')
def conn():
conn = GraphKBConnection()
- conn.login(os.environ["GRAPHKB_USER"], os.environ["GRAPHKB_PASS"])
+ conn.login(os.environ['GRAPHKB_USER'], os.environ['GRAPHKB_PASS'])
return conn
-@pytest.mark.skipif(EXCLUDE_ONCOKB_TESTS, reason="excluding tests that depend on oncokb data")
+@pytest.mark.skipif(EXCLUDE_ONCOKB_TESTS, reason='excluding tests that depend on oncokb data')
def test_oncogene(conn):
result = get_oncokb_oncogenes(conn)
- names = {row["name"] for row in result}
+ names = {row['name'] for row in result}
for gene in CANONICAL_ONCOGENES:
assert gene in names
for gene in CANONICAL_TS:
@@ -123,10 +123,10 @@ def test_oncogene(conn):
assert gene not in names
-@pytest.mark.skipif(EXCLUDE_ONCOKB_TESTS, reason="excluding tests that depend on oncokb data")
+@pytest.mark.skipif(EXCLUDE_ONCOKB_TESTS, reason='excluding tests that depend on oncokb data')
def test_tumour_supressors(conn):
result = get_oncokb_tumour_supressors(conn)
- names = {row["name"] for row in result}
+ names = {row['name'] for row in result}
for gene in CANONICAL_TS:
assert gene in names
for gene in CANONICAL_ONCOGENES:
@@ -135,13 +135,13 @@ def test_tumour_supressors(conn):
assert gene not in names
-@pytest.mark.skipif(EXCLUDE_ONCOKB_TESTS, reason="excluding tests that depend on oncokb data")
+@pytest.mark.skipif(EXCLUDE_ONCOKB_TESTS, reason='excluding tests that depend on oncokb data')
@pytest.mark.skipif(
- EXCLUDE_BCGSC_TESTS, reason="excluding BCGSC-specific tests (tso500 not available)"
+ EXCLUDE_BCGSC_TESTS, reason='excluding BCGSC-specific tests (tso500 not available)'
)
def test_cancer_genes(conn):
result = get_cancer_genes(conn)
- names = {row["name"] for row in result}
+ names = {row['name'] for row in result}
for gene in CANONICAL_CG:
assert gene in names
for gene in CANONICAL_TS:
@@ -151,104 +151,104 @@ def test_cancer_genes(conn):
@pytest.mark.skipif(
- EXCLUDE_BCGSC_TESTS, reason="excluding BCGSC-specific tests (requires CGL loader))"
+ EXCLUDE_BCGSC_TESTS, reason='excluding BCGSC-specific tests (requires CGL loader))'
)
def test_get_pharmacogenomic_info(conn):
genes, matches = get_pharmacogenomic_info(conn)
for gene in PHARMACOGENOMIC_INITIAL_GENES:
- assert gene in genes, f"{gene} not found in get_pharmacogenomic_info"
+ assert gene in genes, f'{gene} not found in get_pharmacogenomic_info'
for rid, variant_display in matches.items():
if variant_display.startswith(gene):
break
else: # no break called
# failing on this version of the func; addressed in 'new' version
- if gene == "ACYP2":
+ if gene == 'ACYP2':
continue
- assert False, f"No rid found for a pharmacogenomic with {gene}"
+ assert False, f'No rid found for a pharmacogenomic with {gene}'
@pytest.mark.skipif(
- EXCLUDE_BCGSC_TESTS, reason="excluding BCGSC-specific tests (requires CGL loader))"
+ EXCLUDE_BCGSC_TESTS, reason='excluding BCGSC-specific tests (requires CGL loader))'
)
def test_get_gene_linked_pharmacogenomic_info(conn):
genes, matches = get_gene_linked_pharmacogenomic_info(conn)
for gene in PHARMACOGENOMIC_INITIAL_GENES:
- assert gene in genes, f"{gene} not found in get_pharmacogenomic_info"
+ assert gene in genes, f'{gene} not found in get_pharmacogenomic_info'
for rid, variant_info in matches.items():
variant_gene_assoc = variant_info[1]
if gene in variant_gene_assoc:
break
else: # no break called
- assert False, f"No rid found for a pharmacogenomic with {gene}"
+ assert False, f'No rid found for a pharmacogenomic with {gene}'
-@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests")
+@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests')
@pytest.mark.skipif(
- EXCLUDE_BCGSC_TESTS, reason="excluding BCGSC-specific tests (requires CGL loader))"
+ EXCLUDE_BCGSC_TESTS, reason='excluding BCGSC-specific tests (requires CGL loader))'
)
def test_get_cancer_predisposition_info(conn):
genes, matches = get_cancer_predisposition_info(conn)
for gene in CANCER_PREDISP_INITIAL_GENES:
- assert gene in genes, f"{gene} not found in get_cancer_predisposition_info"
+ assert gene in genes, f'{gene} not found in get_cancer_predisposition_info'
-@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests")
+@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests')
@pytest.mark.skipif(
- EXCLUDE_BCGSC_TESTS, reason="excluding BCGSC-specific tests (requires CGL loader))"
+ EXCLUDE_BCGSC_TESTS, reason='excluding BCGSC-specific tests (requires CGL loader))'
)
def test_get_gene_linked_cancer_predisposition_info(conn):
genes, matches = get_gene_linked_cancer_predisposition_info(conn)
for gene in CANCER_PREDISP_INITIAL_GENES:
- assert gene in genes, f"{gene} not found in get_cancer_predisposition_info"
+ assert gene in genes, f'{gene} not found in get_cancer_predisposition_info'
@pytest.mark.parametrize(
- "alt_rep", ("NM_033360.4", "NM_033360", "ENSG00000133703.11", "ENSG00000133703")
+ 'alt_rep', ('NM_033360.4', 'NM_033360', 'ENSG00000133703.11', 'ENSG00000133703')
)
def test_get_preferred_gene_name_kras(alt_rep, conn):
gene_name = get_preferred_gene_name(conn, alt_rep)
- assert (
- "KRAS" == gene_name
- ), f"Expected KRAS as preferred gene name for {alt_rep}, not '{gene_name}'"
+ assert 'KRAS' == gene_name, (
+ f"Expected KRAS as preferred gene name for {alt_rep}, not '{gene_name}'"
+ )
@pytest.mark.skipif(
- EXCLUDE_BCGSC_TESTS, reason="excluding BCGSC-specific tests (requires CGL loader))"
+ EXCLUDE_BCGSC_TESTS, reason='excluding BCGSC-specific tests (requires CGL loader))'
)
-@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests")
+@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests')
def test_find_genes_by_variant_type_structural_variant(conn):
- result = get_genes_from_variant_types(conn, ["structural variant"])
- names = {row["name"] for row in result}
+ result = get_genes_from_variant_types(conn, ['structural variant'])
+ names = {row['name'] for row in result}
for gene in CANONICAL_STRUCTURAL_VARIANT_GENES:
- assert gene in names, f"{gene} was not identified as a structural variant gene."
+ assert gene in names, f'{gene} was not identified as a structural variant gene.'
-@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests")
+@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests')
def test_find_no_genes_by_variant_type_with_nonmatching_source_record_id(conn):
- refseq_id = get_rid(conn, target="source", name="refseq")
+ refseq_id = get_rid(conn, target='source', name='refseq')
result = get_genes_from_variant_types(
- conn, ["structural variant"], source_record_ids=[refseq_id]
+ conn, ['structural variant'], source_record_ids=[refseq_id]
)
assert not result
-@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests")
+@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests')
def test_get_therapeutic_associated_genes(conn):
gene_list = get_therapeutic_associated_genes(graphkb_conn=conn)
- assert gene_list, "No get_therapeutic_associated_genes found"
- assert (
- len(gene_list) > 300
- ), f"Expected over 300 get_therapeutic_associated_genes but found {len(gene_list)}"
- names = {row["name"] for row in gene_list}
+ assert gene_list, 'No get_therapeutic_associated_genes found'
+ assert len(gene_list) > 300, (
+ f'Expected over 300 get_therapeutic_associated_genes but found {len(gene_list)}'
+ )
+ names = {row['name'] for row in gene_list}
for gene in CANNONICAL_THERAPY_GENES + CANONICAL_ONCOGENES + CANONICAL_TS:
- assert gene in names, f"{gene} not found by get_therapeutic_associated_genes"
+ assert gene in names, f'{gene} not found by get_therapeutic_associated_genes'
@pytest.mark.skipif(
EXCLUDE_BCGSC_TESTS,
- reason="excluding BCGSC-specific tests (requires oncokb and other loaders))",
+ reason='excluding BCGSC-specific tests (requires oncokb and other loaders))',
)
-@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests")
+@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests')
def test_get_gene_information(conn):
gene_info = get_gene_information(
conn,
@@ -258,36 +258,36 @@ def test_get_gene_information(conn):
+ CANONICAL_FUSION_GENES
+ CANONICAL_STRUCTURAL_VARIANT_GENES
+ CANNONICAL_THERAPY_GENES
- + ["notagenename"],
+ + ['notagenename'],
)
assert gene_info
- nongene_flagged = [g["name"] for g in gene_info if g["name"] == "notagenename"]
- assert not nongene_flagged, f"Improper gene category: {nongene_flagged}"
+ nongene_flagged = [g['name'] for g in gene_info if g['name'] == 'notagenename']
+ assert not nongene_flagged, f'Improper gene category: {nongene_flagged}'
for gene in CANONICAL_ONCOGENES:
- assert gene in [
- g["name"] for g in gene_info if g.get("oncogene")
- ], f"Missed oncogene {gene}"
+ assert gene in [g['name'] for g in gene_info if g.get('oncogene')], (
+ f'Missed oncogene {gene}'
+ )
for gene in CANONICAL_TS:
- assert gene in [
- g["name"] for g in gene_info if g.get("tumourSuppressor")
- ], f"Missed 'tumourSuppressor' {gene}"
+ assert gene in [g['name'] for g in gene_info if g.get('tumourSuppressor')], (
+ f"Missed 'tumourSuppressor' {gene}"
+ )
for gene in CANONICAL_FUSION_GENES:
- assert gene in [
- g["name"] for g in gene_info if g.get("knownFusionPartner")
- ], f"Missed knownFusionPartner {gene}"
+ assert gene in [g['name'] for g in gene_info if g.get('knownFusionPartner')], (
+ f'Missed knownFusionPartner {gene}'
+ )
for gene in CANONICAL_STRUCTURAL_VARIANT_GENES:
- assert gene in [
- g["name"] for g in gene_info if g.get("knownSmallMutation")
- ], f"Missed knownSmallMutation {gene}"
+ assert gene in [g['name'] for g in gene_info if g.get('knownSmallMutation')], (
+ f'Missed knownSmallMutation {gene}'
+ )
for gene in CANNONICAL_THERAPY_GENES:
- assert gene in [
- g["name"] for g in gene_info if g.get("therapeuticAssociated")
- ], f"Missed therapeuticAssociated {gene}"
+ assert gene in [g['name'] for g in gene_info if g.get('therapeuticAssociated')], (
+ f'Missed therapeuticAssociated {gene}'
+ )
for gene in (
CANONICAL_ONCOGENES
@@ -296,11 +296,11 @@ def test_get_gene_information(conn):
+ CANONICAL_STRUCTURAL_VARIANT_GENES
+ CANNONICAL_THERAPY_GENES
):
- assert gene in [
- g["name"] for g in gene_info if g.get("kbStatementRelated")
- ], f"Missed kbStatementRelated {gene}"
+ assert gene in [g['name'] for g in gene_info if g.get('kbStatementRelated')], (
+ f'Missed kbStatementRelated {gene}'
+ )
for gene in CANONICAL_CG:
- assert gene in [
- g["name"] for g in gene_info if g.get("cancerGeneListMatch")
- ], f"Missed cancerGeneListMatch {gene}"
+ assert gene in [g['name'] for g in gene_info if g.get('cancerGeneListMatch')], (
+ f'Missed cancerGeneListMatch {gene}'
+ )
diff --git a/tests/test_graphkb/test_graphkb.py b/tests/test_graphkb/test_graphkb.py
index 5a7e48f..88158ac 100644
--- a/tests/test_graphkb/test_graphkb.py
+++ b/tests/test_graphkb/test_graphkb.py
@@ -7,26 +7,26 @@
def test_login_ok():
conn = GraphKBConnection()
- conn.login(os.environ["GRAPHKB_USER"], os.environ["GRAPHKB_PASS"])
+ conn.login(os.environ['GRAPHKB_USER'], os.environ['GRAPHKB_PASS'])
assert conn.token is not None
-@pytest.fixture(scope="module")
+@pytest.fixture(scope='module')
def conn():
conn = GraphKBConnection()
- conn.login(os.environ["GRAPHKB_USER"], os.environ["GRAPHKB_PASS"])
+ conn.login(os.environ['GRAPHKB_USER'], os.environ['GRAPHKB_PASS'])
return conn
class TestPaginate:
- @mock.patch("pori_python.graphkb.GraphKBConnection.request")
+ @mock.patch('pori_python.graphkb.GraphKBConnection.request')
def test_does_not_paginate_when_false(self, graphkb_request, conn):
- graphkb_request.side_effect = [{"result": [1, 2, 3]}, {"result": [4, 5]}]
+ graphkb_request.side_effect = [{'result': [1, 2, 3]}, {'result': [4, 5]}]
result = conn.query({}, paginate=False, limit=3)
assert result == [1, 2, 3]
- @mock.patch("pori_python.graphkb.GraphKBConnection.request")
+ @mock.patch('pori_python.graphkb.GraphKBConnection.request')
def test_paginates_by_default(self, graphkb_request, conn):
- graphkb_request.side_effect = [{"result": [1, 2, 3]}, {"result": [4, 5]}]
+ graphkb_request.side_effect = [{'result': [1, 2, 3]}, {'result': [4, 5]}]
result = conn.query({}, paginate=True, limit=3)
assert result == [1, 2, 3, 4, 5]
diff --git a/tests/test_graphkb/test_match.py b/tests/test_graphkb/test_match.py
index d4ee567..7a34b90 100644
--- a/tests/test_graphkb/test_match.py
+++ b/tests/test_graphkb/test_match.py
@@ -15,53 +15,53 @@
# Test datasets
from .data import ensemblProteinSample, structuralVariants
-EXCLUDE_BCGSC_TESTS = os.environ.get("EXCLUDE_BCGSC_TESTS") == "1"
-EXCLUDE_INTEGRATION_TESTS = os.environ.get("EXCLUDE_INTEGRATION_TESTS") == "1"
+EXCLUDE_BCGSC_TESTS = os.environ.get('EXCLUDE_BCGSC_TESTS') == '1'
+EXCLUDE_INTEGRATION_TESTS = os.environ.get('EXCLUDE_INTEGRATION_TESTS') == '1'
-INCREASE_PREFIXES = ["up", "increase", "over", "gain", "amp"]
-DECREASE_PREFIXES = ["down", "decrease", "reduce", "under", "loss", "delet"]
-GENERAL_MUTATION = "mutation"
+INCREASE_PREFIXES = ['up', 'increase', 'over', 'gain', 'amp']
+DECREASE_PREFIXES = ['down', 'decrease', 'reduce', 'under', 'loss', 'delet']
+GENERAL_MUTATION = 'mutation'
def has_prefix(word: str, prefixes: List[str]) -> bool:
for prefix in prefixes:
- if re.search(r"\b" + prefix, word):
+ if re.search(r'\b' + prefix, word):
return True
return False
-@pytest.fixture(scope="module")
+@pytest.fixture(scope='module')
def conn() -> GraphKBConnection:
conn = GraphKBConnection()
- conn.login(os.environ["GRAPHKB_USER"], os.environ["GRAPHKB_PASS"])
+ conn.login(os.environ['GRAPHKB_USER'], os.environ['GRAPHKB_PASS'])
return conn
-@pytest.fixture(scope="class")
+@pytest.fixture(scope='class')
def kras(conn):
- return [f["displayName"] for f in match.get_equivalent_features(conn, "kras")]
+ return [f['displayName'] for f in match.get_equivalent_features(conn, 'kras')]
"""
version found in the db for ENSG00000133703 will vary depending on which
version of ensembl was loaded. checking for any . version
"""
-kras_ensg_version = r"ENSG00000133703\..*"
+kras_ensg_version = r'ENSG00000133703\..*'
class TestGetEquivalentFeatures:
def test_kras_has_self(self, kras):
- assert "KRAS" in kras
+ assert 'KRAS' in kras
def test_expands_aliases(self, kras):
- assert "KRAS2" in kras
+ assert 'KRAS2' in kras
def test_expands_elements(self, kras):
- assert "NM_033360" in kras
- assert "ENST00000311936" in kras
+ assert 'NM_033360' in kras
+ assert 'ENST00000311936' in kras
def test_expands_generalizations(self, kras):
- assert "NM_033360.4" in kras
+ assert 'NM_033360.4' in kras
ensg_version_found = False
for item in kras:
if re.match(kras_ensg_version, item):
@@ -69,47 +69,47 @@ def test_expands_generalizations(self, kras):
assert ensg_version_found
def test_expands_generalizations_kras(self, kras):
- assert "NM_033360.4" in kras
- assert "NM_033360" in kras
+ assert 'NM_033360.4' in kras
+ assert 'NM_033360' in kras
ensg_version_found = False
for item in kras:
if re.match(kras_ensg_version, item):
ensg_version_found = True
assert ensg_version_found
- assert "ENSG00000133703" in kras
+ assert 'ENSG00000133703' in kras
@pytest.mark.parametrize(
- "alt_rep", ("NM_033360.4", "NM_033360", "ENSG00000133703.11", "ENSG00000133703")
+ 'alt_rep', ('NM_033360.4', 'NM_033360', 'ENSG00000133703.11', 'ENSG00000133703')
)
def test_expands_generalizations_refseq(self, alt_rep, conn):
- kras = [f["displayName"] for f in match.get_equivalent_features(conn, alt_rep)]
- assert "NM_033360.4" in kras
- assert "NM_033360" in kras
+ kras = [f['displayName'] for f in match.get_equivalent_features(conn, alt_rep)]
+ assert 'NM_033360.4' in kras
+ assert 'NM_033360' in kras
ensg_version_found = False
for item in kras:
if re.match(kras_ensg_version, item):
ensg_version_found = True
assert ensg_version_found
- assert "ENSG00000133703" in kras
+ assert 'ENSG00000133703' in kras
def test_checks_by_source_id_kras(self, conn):
kras = [
- f["displayName"]
+ f['displayName']
for f in match.get_equivalent_features(
- conn, "nm_033360", source="refseq", source_id_version="4", is_source_id=True
+ conn, 'nm_033360', source='refseq', source_id_version='4', is_source_id=True
)
]
- assert "KRAS" in kras
+ assert 'KRAS' in kras
# KBDEV-1163
# Testing if the addition of Ensembl protein Features are limiting results
# returned by get_equivalent_features() since SimilatTo queryType queries
# aren't traversing the graph to it's whole depth.
- @pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="excluding data-specific test")
+ @pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason='excluding data-specific test')
def test_ensembl_protein(self, conn):
for feature, expected in ensemblProteinSample:
equivalent_features = match.get_equivalent_features(conn, feature)
- equivalent_features = [el["displayName"] for el in equivalent_features]
+ equivalent_features = [el['displayName'] for el in equivalent_features]
for equivalent_feature in expected:
assert equivalent_feature in equivalent_features
@@ -117,46 +117,46 @@ def test_ensembl_protein(self, conn):
class TestMatchCopyVariant:
def test_bad_category(self, conn):
with pytest.raises(ValueError):
- match.match_copy_variant(conn, "kras", "not a copy number")
+ match.match_copy_variant(conn, 'kras', 'not a copy number')
def test_bad_gene_name(self, conn):
with pytest.raises(FeatureNotFoundError):
- match.match_copy_variant(conn, "not a real gene name", match.INPUT_COPY_CATEGORIES.AMP)
+ match.match_copy_variant(conn, 'not a real gene name', match.INPUT_COPY_CATEGORIES.AMP)
@pytest.mark.skipif(
EXCLUDE_BCGSC_TESTS,
- reason="excluding BCGSC-specific tests - no copy loss variants in other data",
+ reason='excluding BCGSC-specific tests - no copy loss variants in other data',
)
def test_known_loss(self, conn):
- matches = match.match_copy_variant(conn, "CDKN2A", match.INPUT_COPY_CATEGORIES.ANY_LOSS)
+ matches = match.match_copy_variant(conn, 'CDKN2A', match.INPUT_COPY_CATEGORIES.ANY_LOSS)
assert matches
- types_selected = {record["type"]["name"] for record in matches}
- zygositys = {record["zygosity"] for record in matches}
+ types_selected = {record['type']['name'] for record in matches}
+ zygositys = {record['zygosity'] for record in matches}
assert match.INPUT_COPY_CATEGORIES.ANY_LOSS in types_selected
assert match.INPUT_COPY_CATEGORIES.AMP not in types_selected
assert GENERAL_MUTATION not in types_selected
- assert "homozygous" in zygositys
+ assert 'homozygous' in zygositys
for variant_type in types_selected:
assert not has_prefix(variant_type, INCREASE_PREFIXES)
@pytest.mark.skipif(
EXCLUDE_BCGSC_TESTS,
- reason="excluding BCGSC-specific tests - no copy loss variants in other data",
+ reason='excluding BCGSC-specific tests - no copy loss variants in other data',
)
def test_known_loss_zygosity_filtered(self, conn):
matches = match.match_copy_variant(
- conn, "CDKN2A", match.INPUT_COPY_CATEGORIES.ANY_LOSS, True
+ conn, 'CDKN2A', match.INPUT_COPY_CATEGORIES.ANY_LOSS, True
)
assert matches
- types_selected = {record["type"]["name"] for record in matches}
- zygositys = {record["zygosity"] for record in matches}
+ types_selected = {record['type']['name'] for record in matches}
+ zygositys = {record['zygosity'] for record in matches}
- assert "homozygous" not in zygositys
+ assert 'homozygous' not in zygositys
assert GENERAL_MUTATION not in types_selected
assert match.INPUT_COPY_CATEGORIES.ANY_LOSS in types_selected
@@ -166,10 +166,10 @@ def test_known_loss_zygosity_filtered(self, conn):
assert not has_prefix(variant_type, INCREASE_PREFIXES)
def test_known_gain(self, conn):
- matches = match.match_copy_variant(conn, "KRAS", "copy gain")
+ matches = match.match_copy_variant(conn, 'KRAS', 'copy gain')
assert matches
- types_selected = {record["type"]["name"] for record in matches}
+ types_selected = {record['type']['name'] for record in matches}
assert GENERAL_MUTATION not in types_selected
assert match.INPUT_COPY_CATEGORIES.AMP in types_selected
@@ -179,12 +179,12 @@ def test_known_gain(self, conn):
assert not has_prefix(variant_type, DECREASE_PREFIXES)
@pytest.mark.skipif(
- EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests"
+ EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests'
)
def test_low_gain_excludes_amplification(self, conn):
- matches = match.match_copy_variant(conn, "KRAS", match.INPUT_COPY_CATEGORIES.GAIN)
+ matches = match.match_copy_variant(conn, 'KRAS', match.INPUT_COPY_CATEGORIES.GAIN)
- types_selected = {record["type"]["name"] for record in matches}
+ types_selected = {record['type']['name'] for record in matches}
assert match.INPUT_COPY_CATEGORIES.AMP not in types_selected
assert match.INPUT_COPY_CATEGORIES.LOSS not in types_selected
@@ -194,49 +194,49 @@ def test_low_gain_excludes_amplification(self, conn):
assert not has_prefix(variant_type, DECREASE_PREFIXES)
-@pytest.mark.parametrize("pos1,pos2_start,pos2_end", [[3, 2, 5], [2, None, 5], [3, 2, None]])
+@pytest.mark.parametrize('pos1,pos2_start,pos2_end', [[3, 2, 5], [2, None, 5], [3, 2, None]])
def test_range_overlap(pos1, pos2_start, pos2_end):
- assert match.positions_overlap({"pos": pos1}, {"pos": pos2_start}, {"pos": pos2_end})
+ assert match.positions_overlap({'pos': pos1}, {'pos': pos2_start}, {'pos': pos2_end})
@pytest.mark.parametrize(
- "pos1,pos2_start,pos2_end",
+ 'pos1,pos2_start,pos2_end',
[[2, 4, 5], [5, 2, 3], [10, None, 9], [10, 11, None], [1, 2, 2], [2, 1, 1]],
)
def test_range_not_overlap(pos1, pos2_start, pos2_end):
- assert not match.positions_overlap({"pos": pos1}, {"pos": pos2_start}, {"pos": pos2_end})
+ assert not match.positions_overlap({'pos': pos1}, {'pos': pos2_start}, {'pos': pos2_end})
-@pytest.mark.parametrize("pos1", [None, 1])
-@pytest.mark.parametrize("pos2", [None, 1])
+@pytest.mark.parametrize('pos1', [None, 1])
+@pytest.mark.parametrize('pos2', [None, 1])
def test_position_match(pos1, pos2):
- assert match.positions_overlap({"pos": pos1}, {"pos": pos2})
+ assert match.positions_overlap({'pos': pos1}, {'pos': pos2})
class TestMatchExpressionVariant:
def test_bad_category(self, conn):
with pytest.raises(ValueError):
- match.match_expression_variant(conn, "PTEN", "not a expression category")
+ match.match_expression_variant(conn, 'PTEN', 'not a expression category')
def test_bad_gene_name(self, conn):
with pytest.raises(FeatureNotFoundError):
match.match_expression_variant(
- conn, "not a real gene name", match.INPUT_EXPRESSION_CATEGORIES.UP
+ conn, 'not a real gene name', match.INPUT_EXPRESSION_CATEGORIES.UP
)
@pytest.mark.skipif(
- EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests"
+ EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests'
)
@pytest.mark.skipif(
- EXCLUDE_BCGSC_TESTS, reason="variants in db for this test are from IPRKB and ESMO"
+ EXCLUDE_BCGSC_TESTS, reason='variants in db for this test are from IPRKB and ESMO'
)
def test_known_reduced_expression(self, conn):
matches = match.match_expression_variant(
- conn, "PTEN", match.INPUT_EXPRESSION_CATEGORIES.DOWN
+ conn, 'PTEN', match.INPUT_EXPRESSION_CATEGORIES.DOWN
)
assert matches
- types_selected = {record["type"]["name"] for record in matches}
+ types_selected = {record['type']['name'] for record in matches}
assert match.INPUT_EXPRESSION_CATEGORIES.UP not in types_selected
assert GENERAL_MUTATION not in types_selected
@@ -245,16 +245,16 @@ def test_known_reduced_expression(self, conn):
assert not has_prefix(variant_type, INCREASE_PREFIXES)
@pytest.mark.skipif(
- EXCLUDE_BCGSC_TESTS, reason="excluding BCGSC-specific tests - no applicable variants"
+ EXCLUDE_BCGSC_TESTS, reason='excluding BCGSC-specific tests - no applicable variants'
)
def test_known_reduced_expression_gene_id(self, conn):
- gene_id = conn.query({"target": "Feature", "filters": [{"name": "PTEN"}]})[0]["@rid"]
+ gene_id = conn.query({'target': 'Feature', 'filters': [{'name': 'PTEN'}]})[0]['@rid']
matches = match.match_expression_variant(
conn, gene_id, match.INPUT_EXPRESSION_CATEGORIES.DOWN
)
assert matches
- types_selected = {record["type"]["name"] for record in matches}
+ types_selected = {record['type']['name'] for record in matches}
assert match.INPUT_EXPRESSION_CATEGORIES.UP not in types_selected
assert GENERAL_MUTATION not in types_selected
@@ -263,13 +263,13 @@ def test_known_reduced_expression_gene_id(self, conn):
assert not has_prefix(variant_type, INCREASE_PREFIXES)
@pytest.mark.skipif(
- EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests"
+ EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests'
)
def test_known_increased_expression(self, conn):
- matches = match.match_expression_variant(conn, "CA9", match.INPUT_EXPRESSION_CATEGORIES.UP)
+ matches = match.match_expression_variant(conn, 'CA9', match.INPUT_EXPRESSION_CATEGORIES.UP)
assert matches
- types_selected = {record["type"]["name"] for record in matches}
+ types_selected = {record['type']['name'] for record in matches}
assert match.INPUT_EXPRESSION_CATEGORIES.UP not in types_selected
assert GENERAL_MUTATION not in types_selected
@@ -281,115 +281,115 @@ def test_known_increased_expression(self, conn):
class TestComparePositionalVariants:
def test_nonspecific_altseq(self):
assert match.compare_positional_variants(
- conn, {"break1Start": {"pos": 1}}, {"break1Start": {"pos": 1}}
+ conn, {'break1Start': {'pos': 1}}, {'break1Start': {'pos': 1}}
)
# null matches anything
assert match.compare_positional_variants(
- conn, {"break1Start": {"pos": 1}, "untemplatedSeq": "T"}, {"break1Start": {"pos": 1}}
+ conn, {'break1Start': {'pos': 1}, 'untemplatedSeq': 'T'}, {'break1Start': {'pos': 1}}
)
assert match.compare_positional_variants(
- conn, {"break1Start": {"pos": 1}}, {"break1Start": {"pos": 1}, "untemplatedSeq": "T"}
+ conn, {'break1Start': {'pos': 1}}, {'break1Start': {'pos': 1}, 'untemplatedSeq': 'T'}
)
- @pytest.mark.parametrize("seq1", ["T", "X", "?"])
- @pytest.mark.parametrize("seq2", ["T", "X", "?"])
+ @pytest.mark.parametrize('seq1', ['T', 'X', '?'])
+ @pytest.mark.parametrize('seq2', ['T', 'X', '?'])
def test_ambiguous_altseq(self, seq1, seq2):
# ambiguous AA matches anything the same length
assert match.compare_positional_variants(
conn,
- {"break1Start": {"pos": 1}, "untemplatedSeq": seq1},
- {"break1Start": {"pos": 1}, "untemplatedSeq": seq2},
+ {'break1Start': {'pos': 1}, 'untemplatedSeq': seq1},
+ {'break1Start': {'pos': 1}, 'untemplatedSeq': seq2},
)
def test_altseq_length_mismatch(self):
assert not match.compare_positional_variants(
conn,
- {"break1Start": {"pos": 1}, "untemplatedSeq": "??"},
- {"break1Start": {"pos": 1}, "untemplatedSeq": "T"},
+ {'break1Start': {'pos': 1}, 'untemplatedSeq': '??'},
+ {'break1Start': {'pos': 1}, 'untemplatedSeq': 'T'},
)
assert not match.compare_positional_variants(
conn,
- {"break1Start": {"pos": 1}, "untemplatedSeq": "?"},
- {"break1Start": {"pos": 1}, "untemplatedSeq": "TT"},
+ {'break1Start': {'pos': 1}, 'untemplatedSeq': '?'},
+ {'break1Start': {'pos': 1}, 'untemplatedSeq': 'TT'},
)
def test_nonspecific_refseq(self):
# null matches anything
assert match.compare_positional_variants(
- conn, {"break1Start": {"pos": 1}, "refSeq": "T"}, {"break1Start": {"pos": 1}}
+ conn, {'break1Start': {'pos': 1}, 'refSeq': 'T'}, {'break1Start': {'pos': 1}}
)
assert match.compare_positional_variants(
- conn, {"break1Start": {"pos": 1}}, {"break1Start": {"pos": 1}, "refSeq": "T"}
+ conn, {'break1Start': {'pos': 1}}, {'break1Start': {'pos': 1}, 'refSeq': 'T'}
)
- @pytest.mark.parametrize("seq1", ["T", "X", "?"])
- @pytest.mark.parametrize("seq2", ["T", "X", "?"])
+ @pytest.mark.parametrize('seq1', ['T', 'X', '?'])
+ @pytest.mark.parametrize('seq2', ['T', 'X', '?'])
def test_ambiguous_refseq(self, seq1, seq2):
# ambiguous AA matches anything the same length
assert match.compare_positional_variants(
conn,
- {"break1Start": {"pos": 1}, "refSeq": seq1},
- {"break1Start": {"pos": 1}, "refSeq": seq2},
+ {'break1Start': {'pos': 1}, 'refSeq': seq1},
+ {'break1Start': {'pos': 1}, 'refSeq': seq2},
)
def test_refseq_length_mismatch(self):
assert not match.compare_positional_variants(
conn,
- {"break1Start": {"pos": 1}, "refSeq": "??"},
- {"break1Start": {"pos": 1}, "refSeq": "T"},
+ {'break1Start': {'pos': 1}, 'refSeq': '??'},
+ {'break1Start': {'pos': 1}, 'refSeq': 'T'},
)
assert not match.compare_positional_variants(
conn,
- {"break1Start": {"pos": 1}, "refSeq": "?"},
- {"break1Start": {"pos": 1}, "refSeq": "TT"},
+ {'break1Start': {'pos': 1}, 'refSeq': '?'},
+ {'break1Start': {'pos': 1}, 'refSeq': 'TT'},
)
def test_diff_altseq(self):
assert not match.compare_positional_variants(
conn,
- {"break1Start": {"pos": 1}, "untemplatedSeq": "M"},
- {"break1Start": {"pos": 1}, "untemplatedSeq": "R"},
+ {'break1Start': {'pos': 1}, 'untemplatedSeq': 'M'},
+ {'break1Start': {'pos': 1}, 'untemplatedSeq': 'R'},
)
def test_same_altseq_matches(self):
assert match.compare_positional_variants(
conn,
- {"break1Start": {"pos": 1}, "untemplatedSeq": "R"},
- {"break1Start": {"pos": 1}, "untemplatedSeq": "R"},
+ {'break1Start': {'pos': 1}, 'untemplatedSeq': 'R'},
+ {'break1Start': {'pos': 1}, 'untemplatedSeq': 'R'},
)
def test_diff_refseq(self):
assert not match.compare_positional_variants(
conn,
- {"break1Start": {"pos": 1}, "refSeq": "M"},
- {"break1Start": {"pos": 1}, "refSeq": "R"},
+ {'break1Start': {'pos': 1}, 'refSeq': 'M'},
+ {'break1Start': {'pos': 1}, 'refSeq': 'R'},
)
def test_same_refseq_matches(self):
assert match.compare_positional_variants(
conn,
- {"break1Start": {"pos": 1}, "refSeq": "R"},
- {"break1Start": {"pos": 1}, "refSeq": "R"},
+ {'break1Start': {'pos': 1}, 'refSeq': 'R'},
+ {'break1Start': {'pos': 1}, 'refSeq': 'R'},
)
def test_range_vs_sub(self):
sub = {
- "break1Repr": "p.G776",
- "break1Start": {"@Class": "ProteinPosition", "pos": 776, "refAA": "G"},
- "break2Repr": "p.V777",
- "break2Start": {"@Class": "ProteinPosition", "pos": 777, "refAA": "V"},
- "reference1": "ERBB2",
- "type": "insertion",
- "untemplatedSeq": "YVMA",
- "untemplatedSeqSize": 4,
+ 'break1Repr': 'p.G776',
+ 'break1Start': {'@Class': 'ProteinPosition', 'pos': 776, 'refAA': 'G'},
+ 'break2Repr': 'p.V777',
+ 'break2Start': {'@Class': 'ProteinPosition', 'pos': 777, 'refAA': 'V'},
+ 'reference1': 'ERBB2',
+ 'type': 'insertion',
+ 'untemplatedSeq': 'YVMA',
+ 'untemplatedSeqSize': 4,
}
range_variant = {
- "break1Repr": "p.G776",
- "break1Start": {"@Class": "ProteinPosition", "pos": 776, "refAA": "G"},
- "break2Repr": "p.?776",
- "break2Start": None,
- "refSeq": "G",
- "untemplatedSeq": "VV",
+ 'break1Repr': 'p.G776',
+ 'break1Start': {'@Class': 'ProteinPosition', 'pos': 776, 'refAA': 'G'},
+ 'break2Repr': 'p.?776',
+ 'break2Start': None,
+ 'refSeq': 'G',
+ 'untemplatedSeq': 'VV',
}
assert not match.compare_positional_variants(conn, sub, range_variant)
assert not match.compare_positional_variants(conn, range_variant, sub)
@@ -398,64 +398,64 @@ def test_range_vs_sub(self):
class TestMatchPositionalVariant:
def test_error_on_duplicate_reference1(self, conn):
with pytest.raises(ValueError):
- match.match_positional_variant(conn, "KRAS:p.G12D", "#123:34")
+ match.match_positional_variant(conn, 'KRAS:p.G12D', '#123:34')
def test_error_on_bad_reference2(self, conn):
with pytest.raises(ValueError):
- match.match_positional_variant(conn, "KRAS:p.G12D", reference2="#123:34")
+ match.match_positional_variant(conn, 'KRAS:p.G12D', reference2='#123:34')
def test_error_on_duplicate_reference2(self, conn):
with pytest.raises(ValueError):
match.match_positional_variant(
- conn, "(BCR,ABL1):fusion(e.13,e.3)", reference2="#123:34"
+ conn, '(BCR,ABL1):fusion(e.13,e.3)', reference2='#123:34'
)
def test_uncertain_position_not_supported(self, conn):
with pytest.raises(NotImplementedError):
- match.match_positional_variant(conn, "(BCR,ABL1):fusion(e.13_24,e.3)")
+ match.match_positional_variant(conn, '(BCR,ABL1):fusion(e.13_24,e.3)')
def test_bad_gene_name(self, conn):
with pytest.raises(FeatureNotFoundError):
- match.match_positional_variant(conn, "ME-AS-A-GENE:p.G12D")
+ match.match_positional_variant(conn, 'ME-AS-A-GENE:p.G12D')
def test_bad_gene2_name(self, conn):
with pytest.raises(FeatureNotFoundError):
- match.match_positional_variant(conn, "(BCR,ME-AS-A-GENE):fusion(e.13,e.3)")
+ match.match_positional_variant(conn, '(BCR,ME-AS-A-GENE):fusion(e.13,e.3)')
def test_match_explicit_reference1(self, conn):
- reference1 = conn.query({"target": "Feature", "filters": {"name": "KRAS"}})[0]["@rid"]
- matches = match.match_positional_variant(conn, "p.G12D", reference1=reference1)
+ reference1 = conn.query({'target': 'Feature', 'filters': {'name': 'KRAS'}})[0]['@rid']
+ matches = match.match_positional_variant(conn, 'p.G12D', reference1=reference1)
assert matches
@pytest.mark.skipif(
- EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests"
+ EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests'
)
def test_match_explicit_references(self, conn):
- reference1 = conn.query({"target": "Feature", "filters": {"name": "BCR"}})[0]["@rid"]
- reference2 = conn.query({"target": "Feature", "filters": {"name": "ABL1"}})[0]["@rid"]
+ reference1 = conn.query({'target': 'Feature', 'filters': {'name': 'BCR'}})[0]['@rid']
+ reference2 = conn.query({'target': 'Feature', 'filters': {'name': 'ABL1'}})[0]['@rid']
matches = match.match_positional_variant(
- conn, "fusion(e.13,e.3)", reference1=reference1, reference2=reference2
+ conn, 'fusion(e.13,e.3)', reference1=reference1, reference2=reference2
)
assert matches
@pytest.mark.skipif(
- EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests"
+ EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests'
)
@pytest.mark.parametrize(
- "known_variant,related_variants,unrelated_variants",
+ 'known_variant,related_variants,unrelated_variants',
[
- ["KRAS:p.G12D", ["KRAS:p.G12X", "chr12:g.25398284C>T"], ["KRAS:p.G12V"]],
- ["KRAS:p.G13D", ["KRAS:p.?13mut"], []],
- ["chr12:g.25398284C>T", ["KRAS:p.G12D"], ["KRAS:p.G12V"]],
- ["EGFR:p.E746_S752delinsI", ["EGFR mutation"], ["EGFR copy variant"]],
+ ['KRAS:p.G12D', ['KRAS:p.G12X', 'chr12:g.25398284C>T'], ['KRAS:p.G12V']],
+ ['KRAS:p.G13D', ['KRAS:p.?13mut'], []],
+ ['chr12:g.25398284C>T', ['KRAS:p.G12D'], ['KRAS:p.G12V']],
+ ['EGFR:p.E746_S752delinsI', ['EGFR mutation'], ['EGFR copy variant']],
],
)
@pytest.mark.skipif(
- EXCLUDE_BCGSC_TESTS, reason="TODO: fix loader for vars ending in X, p.?, copy variant"
+ EXCLUDE_BCGSC_TESTS, reason='TODO: fix loader for vars ending in X, p.?, copy variant'
)
def test_known_variants(self, conn, known_variant, related_variants, unrelated_variants):
matches = match.match_positional_variant(conn, known_variant)
- names = {m["displayName"] for m in matches}
+ names = {m['displayName'] for m in matches}
assert matches
assert known_variant in names
for variant in related_variants:
@@ -464,103 +464,103 @@ def test_known_variants(self, conn, known_variant, related_variants, unrelated_v
assert variant not in names
@pytest.mark.skipif(
- EXCLUDE_BCGSC_TESTS, reason="TODO: add nonIPRKB fusion tests; source for these is IPRKB"
+ EXCLUDE_BCGSC_TESTS, reason='TODO: add nonIPRKB fusion tests; source for these is IPRKB'
)
@pytest.mark.parametrize(
- "known_variant,related_variants",
+ 'known_variant,related_variants',
[
- ["(BCR,ABL1):fusion(e.13,e.3)", ["BCR and ABL1 fusion"]],
- ["(ATP1B1,NRG1):fusion(e.2,e.2)", ["NRG1 fusion", "ATP1B1 and NRG1 fusion"]],
+ ['(BCR,ABL1):fusion(e.13,e.3)', ['BCR and ABL1 fusion']],
+ ['(ATP1B1,NRG1):fusion(e.2,e.2)', ['NRG1 fusion', 'ATP1B1 and NRG1 fusion']],
],
)
def test_known_fusions(self, conn, known_variant, related_variants):
matches = match.match_positional_variant(conn, known_variant)
- types_selected = [m["type"]["name"] for m in matches]
+ types_selected = [m['type']['name'] for m in matches]
assert GENERAL_MUTATION not in types_selected
- names = {m["displayName"] for m in matches}
+ names = {m['displayName'] for m in matches}
assert matches
assert known_variant in names
for variant in related_variants:
assert variant in names
def test_known_fusion_single_gene_no_match(self, conn):
- known = "(TERT,?):fusion(e.1,e.?)"
+ known = '(TERT,?):fusion(e.1,e.?)'
matches = match.match_positional_variant(conn, known)
assert not matches
def test_novel_specific_matches_general(self, conn):
- novel_specific = "CDKN2A:p.T18888888888888888888M"
+ novel_specific = 'CDKN2A:p.T18888888888888888888M'
matches = match.match_positional_variant(conn, novel_specific)
- names = {m["displayName"] for m in matches}
+ names = {m['displayName'] for m in matches}
assert matches
assert novel_specific not in names
- assert "CDKN2A mutation" in names
+ assert 'CDKN2A mutation' in names
@pytest.mark.skipif(
- EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests"
+ EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests'
)
def test_genomic_coordinates(self, conn):
- genomic = "X:g.100611165A>T"
+ genomic = 'X:g.100611165A>T'
x = match.match_positional_variant(conn, genomic)
assert x != []
# no assert b/c checking for no error rather than the result (but also want to confirm some result returned)
@pytest.mark.skipif(
- EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests"
+ EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests'
)
- @pytest.mark.skipif(EXCLUDE_BCGSC_TESTS, reason="source for this variant is IPRKB")
+ @pytest.mark.skipif(EXCLUDE_BCGSC_TESTS, reason='source for this variant is IPRKB')
def test_tert_promoter(self, conn):
- assert match.match_positional_variant(conn, "TERT:c.-124C>T")
+ assert match.match_positional_variant(conn, 'TERT:c.-124C>T')
def test_wildtype_match_error(self, conn):
- for gkb_match in match.match_positional_variant(conn, "TP53:p.E285K"):
- assert (
- "wildtype" not in gkb_match["displayName"]
- ), f"TP53:p.E285K should not match {gkb_match['displayName']}"
+ for gkb_match in match.match_positional_variant(conn, 'TP53:p.E285K'):
+ assert 'wildtype' not in gkb_match['displayName'], (
+ f'TP53:p.E285K should not match {gkb_match["displayName"]}'
+ )
@pytest.mark.skipif(
- True, reason="GERO-303 - technically incorrect notation for GSC backwards compatibility."
+ True, reason='GERO-303 - technically incorrect notation for GSC backwards compatibility.'
)
def test_tert_promoter_leading_one_alt_notation(self, conn):
# GERO-303 - technically this format is incorrect.
- assert match.match_positional_variant(conn, "TERT:c.1-124C>T")
+ assert match.match_positional_variant(conn, 'TERT:c.1-124C>T')
def test_missense_is_not_nonsense(self, conn):
"""GERO-299 - nonsense mutation creates a stop codon and is usually more severe."""
# equivalent TP53 notations
- genomic = "chr17:g.7674252C>T"
- cds = "ENST00000269305:c.711G>A"
- protein = "TP53:p.M237I"
+ genomic = 'chr17:g.7674252C>T'
+ cds = 'ENST00000269305:c.711G>A'
+ protein = 'TP53:p.M237I'
for mut in (protein, genomic, cds):
matches = match.match_positional_variant(conn, mut)
- nonsense = [m for m in matches if "nonsense" in m["displayName"]]
- assert (
- not nonsense
- ), f"Missense {mut} is not a nonsense variant: {((m['displayName'], m['@rid']) for m in nonsense)}"
+ nonsense = [m for m in matches if 'nonsense' in m['displayName']]
+ assert not nonsense, (
+ f'Missense {mut} is not a nonsense variant: {((m["displayName"], m["@rid"]) for m in nonsense)}'
+ )
- @pytest.mark.skipif(EXCLUDE_BCGSC_TESTS, reason="TODO: missing record for FGFR3 rearrangement")
+ @pytest.mark.skipif(EXCLUDE_BCGSC_TESTS, reason='TODO: missing record for FGFR3 rearrangement')
def test_structural_variants(self, conn):
"""KBDEV-1056"""
for variant_string, expected in structuralVariants.items():
print(variant_string)
# Querying matches for variant_string
m = match.match_positional_variant(conn, variant_string)
- MatchingDisplayNames = [el["displayName"] for el in m]
- MatchingTypes = [el["type"]["name"] for el in m]
+ MatchingDisplayNames = [el['displayName'] for el in m]
+ MatchingTypes = [el['type']['name'] for el in m]
# Match
- for displayName in expected.get("matches", {}).get("displayName", []):
+ for displayName in expected.get('matches', {}).get('displayName', []):
assert displayName in MatchingDisplayNames
- for type in expected.get("matches", {}).get("type", []):
+ for type in expected.get('matches', {}).get('type', []):
assert type in MatchingTypes
# Does not match
for displayName in MatchingDisplayNames:
- assert displayName not in expected.get("does_not_matches", {}).get(
- "displayName", []
+ assert displayName not in expected.get('does_not_matches', {}).get(
+ 'displayName', []
)
for type in MatchingTypes:
- assert type not in expected.get("does_not_matches", {}).get("type", [])
+ assert type not in expected.get('does_not_matches', {}).get('type', [])
class TestCacheMissingFeatures:
@@ -568,14 +568,14 @@ def test_filling_cache(self):
mock_conn = MagicMock(
query=MagicMock(
return_value=[
- {"name": "bob", "sourceId": "alice"},
- {"name": "KRAS", "sourceId": "1234"},
+ {'name': 'bob', 'sourceId': 'alice'},
+ {'name': 'KRAS', 'sourceId': '1234'},
]
)
)
match.cache_missing_features(mock_conn)
- assert "kras" in match.FEATURES_CACHE
- assert "alice" in match.FEATURES_CACHE
+ assert 'kras' in match.FEATURES_CACHE
+ assert 'alice' in match.FEATURES_CACHE
match.FEATURES_CACHE = None
@@ -583,9 +583,9 @@ class TestTypeScreening:
# Types as class variables
default_type = DEFAULT_NON_STRUCTURAL_VARIANT_TYPE
threshold = STRUCTURAL_VARIANT_SIZE_THRESHOLD
- unambiguous_structural = ["fusion", "translocation"]
- ambiguous_structural = ["duplication", "deletion", "insertion", "indel"]
- non_structural = ["substitution", "missense", "nonsense", "frameshift", "truncating"]
+ unambiguous_structural = ['fusion', 'translocation']
+ ambiguous_structural = ['duplication', 'deletion', 'insertion', 'indel']
+ non_structural = ['substitution', 'missense', 'nonsense', 'frameshift', 'truncating']
def test_type_screening_update(self, conn, monkeypatch):
# Monkey-patching get_terms_set()
@@ -594,44 +594,44 @@ def mock_get_terms_set(graphkb_conn, base_terms):
called = True
return set()
- monkeypatch.setattr("pori_python.graphkb.match.get_terms_set", mock_get_terms_set)
+ monkeypatch.setattr('pori_python.graphkb.match.get_terms_set', mock_get_terms_set)
# Assert get_terms_set() has been called
called = False
- pori_python.graphkb.match.type_screening(conn, {"type": ""}, updateStructuralTypes=True)
+ pori_python.graphkb.match.type_screening(conn, {'type': ''}, updateStructuralTypes=True)
assert called
# Assert get_terms_set() has not been called (default behavior)
called = False
- pori_python.graphkb.match.type_screening(conn, {"type": ""})
+ pori_python.graphkb.match.type_screening(conn, {'type': ''})
assert not called
def test_type_screening_non_structural(self, conn):
for type in TestTypeScreening.non_structural:
# type substitution and alike
- assert match.type_screening(conn, {"type": type}) == type
+ assert match.type_screening(conn, {'type': type}) == type
def test_type_screening_structural(self, conn):
for type in TestTypeScreening.unambiguous_structural:
# type fusion and alike
- assert match.type_screening(conn, {"type": type}) == type
+ assert match.type_screening(conn, {'type': type}) == type
for type in TestTypeScreening.ambiguous_structural:
# w/ reference2
- assert match.type_screening(conn, {"type": type, "reference2": "#123:45"}) == type
+ assert match.type_screening(conn, {'type': type, 'reference2': '#123:45'}) == type
# w/ cytoband coordinates
- assert match.type_screening(conn, {"type": type, "prefix": "y"}) == type
+ assert match.type_screening(conn, {'type': type, 'prefix': 'y'}) == type
def test_type_screening_structural_ambiguous_size(self, conn):
for type in TestTypeScreening.ambiguous_structural:
# coordinate system with ambiguous size
- for prefix in ["e", "i"]:
+ for prefix in ['e', 'i']:
assert (
match.type_screening(
conn,
{
- "type": type,
- "break2Start": {"pos": TestTypeScreening.threshold},
- "prefix": prefix,
+ 'type': type,
+ 'break2Start': {'pos': TestTypeScreening.threshold},
+ 'prefix': prefix,
},
)
== TestTypeScreening.default_type
@@ -642,14 +642,14 @@ def test_type_screening_structural_untemplatedSeqSize(self, conn):
# Variation length too small (< threshold)
assert (
match.type_screening(
- conn, {"type": type, "untemplatedSeqSize": TestTypeScreening.threshold - 1}
+ conn, {'type': type, 'untemplatedSeqSize': TestTypeScreening.threshold - 1}
)
== TestTypeScreening.default_type
)
# Variation length big enough (>= threshold)
assert (
match.type_screening(
- conn, {"type": type, "untemplatedSeqSize": TestTypeScreening.threshold}
+ conn, {'type': type, 'untemplatedSeqSize': TestTypeScreening.threshold}
)
== type
)
@@ -658,32 +658,32 @@ def test_type_screening_structural_positions(self, conn):
for type in TestTypeScreening.ambiguous_structural:
# Variation length too small (< threshold)
for opt in [
- {"break2Start": {"pos": TestTypeScreening.threshold - 1}},
- {"break2Start": {"pos": TestTypeScreening.threshold - 1}, "prefix": "c"},
- {"break2Start": {"pos": TestTypeScreening.threshold - 1}, "prefix": "g"},
- {"break2Start": {"pos": TestTypeScreening.threshold - 1}, "prefix": "n"},
- {"break2Start": {"pos": TestTypeScreening.threshold - 1}, "prefix": "r"},
- {"break2Start": {"pos": int(TestTypeScreening.threshold / 3) - 1}, "prefix": "p"},
+ {'break2Start': {'pos': TestTypeScreening.threshold - 1}},
+ {'break2Start': {'pos': TestTypeScreening.threshold - 1}, 'prefix': 'c'},
+ {'break2Start': {'pos': TestTypeScreening.threshold - 1}, 'prefix': 'g'},
+ {'break2Start': {'pos': TestTypeScreening.threshold - 1}, 'prefix': 'n'},
+ {'break2Start': {'pos': TestTypeScreening.threshold - 1}, 'prefix': 'r'},
+ {'break2Start': {'pos': int(TestTypeScreening.threshold / 3) - 1}, 'prefix': 'p'},
{
- "break1Start": {"pos": 1 + 99},
- "break2Start": {"pos": TestTypeScreening.threshold + 99 - 1},
+ 'break1Start': {'pos': 1 + 99},
+ 'break2Start': {'pos': TestTypeScreening.threshold + 99 - 1},
},
]:
assert (
- match.type_screening(conn, {"type": type, **opt})
+ match.type_screening(conn, {'type': type, **opt})
== TestTypeScreening.default_type
)
# Variation length big enough (>= threshold)
for opt in [
- {"break2Start": {"pos": TestTypeScreening.threshold}},
- {"break2Start": {"pos": TestTypeScreening.threshold}, "prefix": "c"},
- {"break2Start": {"pos": TestTypeScreening.threshold}, "prefix": "g"},
- {"break2Start": {"pos": TestTypeScreening.threshold}, "prefix": "n"},
- {"break2Start": {"pos": TestTypeScreening.threshold}, "prefix": "r"},
- {"break2Start": {"pos": int(TestTypeScreening.threshold / 3) + 1}, "prefix": "p"},
+ {'break2Start': {'pos': TestTypeScreening.threshold}},
+ {'break2Start': {'pos': TestTypeScreening.threshold}, 'prefix': 'c'},
+ {'break2Start': {'pos': TestTypeScreening.threshold}, 'prefix': 'g'},
+ {'break2Start': {'pos': TestTypeScreening.threshold}, 'prefix': 'n'},
+ {'break2Start': {'pos': TestTypeScreening.threshold}, 'prefix': 'r'},
+ {'break2Start': {'pos': int(TestTypeScreening.threshold / 3) + 1}, 'prefix': 'p'},
{
- "break1Start": {"pos": 1 + 99},
- "break2Start": {"pos": TestTypeScreening.threshold + 99},
+ 'break1Start': {'pos': 1 + 99},
+ 'break2Start': {'pos': TestTypeScreening.threshold + 99},
},
]:
- assert match.type_screening(conn, {"type": type, **opt}) == type
+ assert match.type_screening(conn, {'type': type, **opt}) == type
diff --git a/tests/test_graphkb/test_statement.py b/tests/test_graphkb/test_statement.py
index fcf0bef..89faafd 100644
--- a/tests/test_graphkb/test_statement.py
+++ b/tests/test_graphkb/test_statement.py
@@ -4,36 +4,36 @@
from pori_python.graphkb import GraphKBConnection, statement
-EXCLUDE_INTEGRATION_TESTS = os.environ.get("EXCLUDE_INTEGRATION_TESTS") == "1"
-EXCLUDE_BCGSC_TESTS = os.environ.get("EXCLUDE_BCGSC_TESTS") == "1"
+EXCLUDE_INTEGRATION_TESTS = os.environ.get('EXCLUDE_INTEGRATION_TESTS') == '1'
+EXCLUDE_BCGSC_TESTS = os.environ.get('EXCLUDE_BCGSC_TESTS') == '1'
-@pytest.fixture(scope="module")
+@pytest.fixture(scope='module')
def conn() -> GraphKBConnection:
conn = GraphKBConnection()
- conn.login(os.environ["GRAPHKB_USER"], os.environ["GRAPHKB_PASS"])
+ conn.login(os.environ['GRAPHKB_USER'], os.environ['GRAPHKB_PASS'])
return conn
@pytest.fixture()
def graphkb_conn():
def make_rid_list(*values):
- return [{"@rid": v} for v in values]
+ return [{'@rid': v} for v in values]
def term_tree_calls(*final_values):
# this function makes 2 calls to conn.query here
- sets = [["fake"], final_values]
+ sets = [['fake'], final_values]
return [make_rid_list(*s) for s in sets]
return_values = [
- *term_tree_calls("1"), # therapeutic
- *term_tree_calls("2"), # therapeutic (2nd base term)
- *term_tree_calls("3"), # diagnostic
- *term_tree_calls("4"), # prognostic
- *term_tree_calls("5"), # pharmacogenomic ['metabolism']
- *term_tree_calls("6"), # pharmacogenomic ['toxicity']
- *term_tree_calls("7"), # pharmacogenomic ['dosage']
- *term_tree_calls("8"), # cancer predisposition
+ *term_tree_calls('1'), # therapeutic
+ *term_tree_calls('2'), # therapeutic (2nd base term)
+ *term_tree_calls('3'), # diagnostic
+ *term_tree_calls('4'), # prognostic
+ *term_tree_calls('5'), # pharmacogenomic ['metabolism']
+ *term_tree_calls('6'), # pharmacogenomic ['toxicity']
+ *term_tree_calls('7'), # pharmacogenomic ['dosage']
+ *term_tree_calls('8'), # cancer predisposition
*term_tree_calls(), # biological
*term_tree_calls(), # biological (2nd base term)
*term_tree_calls(), # biological (3rd base term)
@@ -46,55 +46,55 @@ def term_tree_calls(*final_values):
class TestCategorizeRelevance:
def test_default_categories(self, graphkb_conn):
- category = statement.categorize_relevance(graphkb_conn, "1")
- assert category == "therapeutic"
+ category = statement.categorize_relevance(graphkb_conn, '1')
+ assert category == 'therapeutic'
def test_first_match_returns(self, graphkb_conn):
- category = statement.categorize_relevance(graphkb_conn, "2")
- assert category == "therapeutic"
+ category = statement.categorize_relevance(graphkb_conn, '2')
+ assert category == 'therapeutic'
def test_second_category(self, graphkb_conn):
- category = statement.categorize_relevance(graphkb_conn, "3")
- assert category == "diagnostic"
+ category = statement.categorize_relevance(graphkb_conn, '3')
+ assert category == 'diagnostic'
def test_third_category(self, graphkb_conn):
- category = statement.categorize_relevance(graphkb_conn, "4")
- assert category == "prognostic"
+ category = statement.categorize_relevance(graphkb_conn, '4')
+ assert category == 'prognostic'
def test_fourth_category(self, graphkb_conn):
- category = statement.categorize_relevance(graphkb_conn, "5")
- assert category == "pharmacogenomic"
+ category = statement.categorize_relevance(graphkb_conn, '5')
+ assert category == 'pharmacogenomic'
def test_fifth_category(self, graphkb_conn):
- category = statement.categorize_relevance(graphkb_conn, "6")
- assert category == "pharmacogenomic"
+ category = statement.categorize_relevance(graphkb_conn, '6')
+ assert category == 'pharmacogenomic'
def test_predisposition_category(self, graphkb_conn):
- category = statement.categorize_relevance(graphkb_conn, "8")
- assert category == "cancer predisposition"
+ category = statement.categorize_relevance(graphkb_conn, '8')
+ assert category == 'cancer predisposition'
def test_no_match(self, graphkb_conn):
- category = statement.categorize_relevance(graphkb_conn, "x")
- assert category == ""
+ category = statement.categorize_relevance(graphkb_conn, 'x')
+ assert category == ''
def test_custom_categories(self, graphkb_conn):
category = statement.categorize_relevance(
- graphkb_conn, "x", [("blargh", ["some", "blargh"])]
+ graphkb_conn, 'x', [('blargh', ['some', 'blargh'])]
)
- assert category == ""
+ assert category == ''
category = statement.categorize_relevance(
- graphkb_conn, "1", [("blargh", ["some", "blargh"])]
+ graphkb_conn, '1', [('blargh', ['some', 'blargh'])]
)
- assert category == "blargh"
+ assert category == 'blargh'
@pytest.mark.skipif(
- EXCLUDE_BCGSC_TESTS, reason="db-specific rid; requires Inferred Functional Annotation source"
+ EXCLUDE_BCGSC_TESTS, reason='db-specific rid; requires Inferred Functional Annotation source'
)
-@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests")
+@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests')
class TestStatementMatch:
def test_truncating_categories(self, conn): # noqa - pytest fixture, not redefinition
- variant = {"@class": "CategoryVariant", "@rid": "#161:429", "displayName": "RB1 truncating"}
+ variant = {'@class': 'CategoryVariant', '@rid': '#161:429', 'displayName': 'RB1 truncating'}
statements = statement.get_statements_from_variants(conn, [variant])
assert statements
diff --git a/tests/test_graphkb/test_util.py b/tests/test_graphkb/test_util.py
index 319df57..36760b2 100644
--- a/tests/test_graphkb/test_util.py
+++ b/tests/test_graphkb/test_util.py
@@ -3,7 +3,7 @@
from pori_python.graphkb import GraphKBConnection, util
-EXCLUDE_BCGSC_TESTS = os.environ.get("EXCLUDE_BCGSC_TESTS") == "1"
+EXCLUDE_BCGSC_TESTS = os.environ.get('EXCLUDE_BCGSC_TESTS') == '1'
class OntologyTerm:
@@ -13,34 +13,34 @@ def __init__(self, name, sourceId, displayName):
self.displayName = displayName
-@pytest.fixture(scope="module")
+@pytest.fixture(scope='module')
def conn() -> GraphKBConnection:
conn = GraphKBConnection()
- conn.login(os.environ["GRAPHKB_USER"], os.environ["GRAPHKB_PASS"])
+ conn.login(os.environ['GRAPHKB_USER'], os.environ['GRAPHKB_PASS'])
return conn
class TestLooksLikeRid:
- @pytest.mark.parametrize("rid", ["#3:4", "#50:04", "#-3:4", "#-3:-4", "#3:-4"])
+ @pytest.mark.parametrize('rid', ['#3:4', '#50:04', '#-3:4', '#-3:-4', '#3:-4'])
def test_valid(self, rid):
assert util.looks_like_rid(rid)
- @pytest.mark.parametrize("rid", ["-3:4", "KRAS"])
+ @pytest.mark.parametrize('rid', ['-3:4', 'KRAS'])
def test_invalid(self, rid):
assert not util.looks_like_rid(rid)
@pytest.mark.parametrize(
- "input,result",
+ 'input,result',
[
- ["GP5:p.Leu113His", "GP5:p.L113H"],
- ["GP5:p.Lys113His", "GP5:p.K113H"],
- ["CDK11A:p.Arg536Gln", "CDK11A:p.R536Q"],
- ["APC:p.Cys1405*", "APC:p.C1405*"],
- ["ApcTer:p.Cys1405*", "ApcTer:p.C1405*"],
- ["GP5:p.Leu113_His114insLys", "GP5:p.L113_H114insK"],
- ["NP_003997.1:p.Lys23_Val25del", "NP_003997.1:p.K23_V25del"],
- ["LRG_199p1:p.Val7del", "LRG_199p1:p.V7del"],
+ ['GP5:p.Leu113His', 'GP5:p.L113H'],
+ ['GP5:p.Lys113His', 'GP5:p.K113H'],
+ ['CDK11A:p.Arg536Gln', 'CDK11A:p.R536Q'],
+ ['APC:p.Cys1405*', 'APC:p.C1405*'],
+ ['ApcTer:p.Cys1405*', 'ApcTer:p.C1405*'],
+ ['GP5:p.Leu113_His114insLys', 'GP5:p.L113_H114insK'],
+ ['NP_003997.1:p.Lys23_Val25del', 'NP_003997.1:p.K23_V25del'],
+ ['LRG_199p1:p.Val7del', 'LRG_199p1:p.V7del'],
],
)
def test_convert_aa_3to1(input, result):
@@ -49,12 +49,12 @@ def test_convert_aa_3to1(input, result):
class TestStripParentheses:
@pytest.mark.parametrize(
- "breakRepr,StrippedBreakRepr",
+ 'breakRepr,StrippedBreakRepr',
[
- ["p.(E2015_Q2114)", "p.E2015_Q2114"],
- ["p.(?572_?630)", "p.?572_?630"],
- ["g.178916854", "g.178916854"],
- ["e.10", "e.10"],
+ ['p.(E2015_Q2114)', 'p.E2015_Q2114'],
+ ['p.(?572_?630)', 'p.?572_?630'],
+ ['g.178916854', 'g.178916854'],
+ ['e.10', 'e.10'],
],
)
def test_stripParentheses(self, breakRepr, StrippedBreakRepr):
@@ -63,10 +63,10 @@ def test_stripParentheses(self, breakRepr, StrippedBreakRepr):
class TestStripRefSeq:
@pytest.mark.parametrize(
- "breakRepr,StrippedBreakRepr",
+ 'breakRepr,StrippedBreakRepr',
[
- ["p.L2209", "p.2209"],
- ["p.?891", "p.891"],
+ ['p.L2209', 'p.2209'],
+ ['p.?891', 'p.891'],
# TODO: ['p.?572_?630', 'p.572_630'],
],
)
@@ -76,31 +76,31 @@ def test_stripRefSeq(self, breakRepr, StrippedBreakRepr):
class TestStripDisplayName:
@pytest.mark.parametrize(
- "opt,stripDisplayName",
+ 'opt,stripDisplayName',
[
- [{"displayName": "ABL1:p.T315I", "withRef": True, "withRefSeq": True}, "ABL1:p.T315I"],
- [{"displayName": "ABL1:p.T315I", "withRef": False, "withRefSeq": True}, "p.T315I"],
- [{"displayName": "ABL1:p.T315I", "withRef": True, "withRefSeq": False}, "ABL1:p.315I"],
- [{"displayName": "ABL1:p.T315I", "withRef": False, "withRefSeq": False}, "p.315I"],
+ [{'displayName': 'ABL1:p.T315I', 'withRef': True, 'withRefSeq': True}, 'ABL1:p.T315I'],
+ [{'displayName': 'ABL1:p.T315I', 'withRef': False, 'withRefSeq': True}, 'p.T315I'],
+ [{'displayName': 'ABL1:p.T315I', 'withRef': True, 'withRefSeq': False}, 'ABL1:p.315I'],
+ [{'displayName': 'ABL1:p.T315I', 'withRef': False, 'withRefSeq': False}, 'p.315I'],
[
- {"displayName": "chr3:g.41266125C>T", "withRef": False, "withRefSeq": False},
- "g.41266125>T",
+ {'displayName': 'chr3:g.41266125C>T', 'withRef': False, 'withRefSeq': False},
+ 'g.41266125>T',
],
[
{
- "displayName": "chrX:g.99662504_99662505insG",
- "withRef": False,
- "withRefSeq": False,
+ 'displayName': 'chrX:g.99662504_99662505insG',
+ 'withRef': False,
+ 'withRefSeq': False,
},
- "g.99662504_99662505insG",
+ 'g.99662504_99662505insG',
],
[
{
- "displayName": "chrX:g.99662504_99662505dup",
- "withRef": False,
- "withRefSeq": False,
+ 'displayName': 'chrX:g.99662504_99662505dup',
+ 'withRef': False,
+ 'withRefSeq': False,
},
- "g.99662504_99662505dup",
+ 'g.99662504_99662505dup',
],
# TODO: [{'displayName': 'VHL:c.330_331delCAinsTT', 'withRef': False, 'withRefSeq': False}, 'c.330_331delinsTT'],
# TODO: [{'displayName': 'VHL:c.464-2G>A', 'withRef': False, 'withRefSeq': False}, 'c.464-2>A'],
@@ -112,40 +112,40 @@ def test_stripDisplayName(self, opt, stripDisplayName):
class TestStringifyVariant:
@pytest.mark.parametrize(
- "hgvs_string,opt,stringifiedVariant",
+ 'hgvs_string,opt,stringifiedVariant',
[
- ["VHL:c.345C>G", {"withRef": True, "withRefSeq": True}, "VHL:c.345C>G"],
- ["VHL:c.345C>G", {"withRef": False, "withRefSeq": True}, "c.345C>G"],
- ["VHL:c.345C>G", {"withRef": True, "withRefSeq": False}, "VHL:c.345>G"],
- ["VHL:c.345C>G", {"withRef": False, "withRefSeq": False}, "c.345>G"],
+ ['VHL:c.345C>G', {'withRef': True, 'withRefSeq': True}, 'VHL:c.345C>G'],
+ ['VHL:c.345C>G', {'withRef': False, 'withRefSeq': True}, 'c.345C>G'],
+ ['VHL:c.345C>G', {'withRef': True, 'withRefSeq': False}, 'VHL:c.345>G'],
+ ['VHL:c.345C>G', {'withRef': False, 'withRefSeq': False}, 'c.345>G'],
[
- "(LMNA,NTRK1):fusion(e.10,e.12)",
- {"withRef": False, "withRefSeq": False},
- "fusion(e.10,e.12)",
+ '(LMNA,NTRK1):fusion(e.10,e.12)',
+ {'withRef': False, 'withRefSeq': False},
+ 'fusion(e.10,e.12)',
],
- ["ABCA12:p.N1671Ifs*4", {"withRef": False, "withRefSeq": False}, "p.1671Ifs*4"],
- ["x:y.p22.33copyloss", {"withRef": False, "withRefSeq": False}, "y.p22.33copyloss"],
+ ['ABCA12:p.N1671Ifs*4', {'withRef': False, 'withRefSeq': False}, 'p.1671Ifs*4'],
+ ['x:y.p22.33copyloss', {'withRef': False, 'withRefSeq': False}, 'y.p22.33copyloss'],
# TODO: ['MED12:p.(?34_?68)mut', {'withRef': False, 'withRefSeq': False}, 'p.(34_68)mut'],
# TODO: ['FLT3:p.(?572_?630)_(?572_?630)ins', {'withRef': False, 'withRefSeq': False}, 'p.(572_630)_(572_630)ins'],
],
)
def test_stringifyVariant_parsed(self, conn, hgvs_string, opt, stringifiedVariant):
- opt["variant"] = conn.parse(hgvs_string)
+ opt['variant'] = conn.parse(hgvs_string)
assert util.stringifyVariant(**opt) == stringifiedVariant
# Based on the assumption that these variants are in the database.
# createdAt date help avoiding errors if assumption tuns to be false
@pytest.mark.parametrize(
- "rid,createdAt,stringifiedVariant",
+ 'rid,createdAt,stringifiedVariant',
[
- ["#157:0", 1565627324397, "p.315I"],
- ["#157:79", 1565627683602, "p.776_777insVGC"],
- ["#158:35317", 1652734056311, "c.1>G"],
+ ['#157:0', 1565627324397, 'p.315I'],
+ ['#157:79', 1565627683602, 'p.776_777insVGC'],
+ ['#158:35317', 1652734056311, 'c.1>G'],
],
)
- @pytest.mark.skipif(EXCLUDE_BCGSC_TESTS, reason="db-dependent rids")
+ @pytest.mark.skipif(EXCLUDE_BCGSC_TESTS, reason='db-dependent rids')
def test_stringifyVariant_positional(self, conn, rid, createdAt, stringifiedVariant):
- opt = {"withRef": False, "withRefSeq": False}
+ opt = {'withRef': False, 'withRefSeq': False}
variant = conn.get_record_by_id(rid)
- if variant and variant.get("createdAt", None) == createdAt:
+ if variant and variant.get('createdAt', None) == createdAt:
assert util.stringifyVariant(variant=variant, **opt) == stringifiedVariant
diff --git a/tests/test_graphkb/test_vocab.py b/tests/test_graphkb/test_vocab.py
index fc8497f..ff64b7a 100644
--- a/tests/test_graphkb/test_vocab.py
+++ b/tests/test_graphkb/test_vocab.py
@@ -7,79 +7,79 @@
from pori_python.graphkb import GraphKBConnection, genes, vocab
-BASE_EXPRESSION = "expression variant"
-BASE_INCREASED_EXPRESSION = "increased expression"
-BASE_REDUCED_EXPRESSION = "reduced expression"
+BASE_EXPRESSION = 'expression variant'
+BASE_INCREASED_EXPRESSION = 'increased expression'
+BASE_REDUCED_EXPRESSION = 'reduced expression'
-@pytest.fixture(scope="module")
+@pytest.fixture(scope='module')
def conn():
conn = GraphKBConnection()
- conn.login(os.environ["GRAPHKB_USER"], os.environ["GRAPHKB_PASS"])
+ conn.login(os.environ['GRAPHKB_USER'], os.environ['GRAPHKB_PASS'])
return conn
def test_expression_vocabulary(conn):
result = vocab.get_term_tree(conn, BASE_EXPRESSION)
- names = [row["name"] for row in result]
+ names = [row['name'] for row in result]
assert BASE_EXPRESSION in names
- assert "increased rna expression" in names
+ assert 'increased rna expression' in names
def test_indel_vocabulary(conn):
- result = vocab.get_term_tree(conn, "indel")
+ result = vocab.get_term_tree(conn, 'indel')
- names = {row["name"] for row in result}
- assert "indel" in names
- assert "copy variant" not in names
- assert "copy number variant" not in names
+ names = {row['name'] for row in result}
+ assert 'indel' in names
+ assert 'copy variant' not in names
+ assert 'copy number variant' not in names
def test_expression_up(conn):
result = vocab.get_term_tree(conn, BASE_INCREASED_EXPRESSION)
- names = [row["name"] for row in result]
+ names = [row['name'] for row in result]
assert BASE_EXPRESSION in names
assert BASE_INCREASED_EXPRESSION in names
- assert "increased rna expression" in names
- assert "reduced rna expression" not in names
+ assert 'increased rna expression' in names
+ assert 'reduced rna expression' not in names
assert BASE_REDUCED_EXPRESSION not in names
def test_expression_down(conn):
result = vocab.get_term_tree(conn, BASE_REDUCED_EXPRESSION)
- names = [row["name"] for row in result]
+ names = [row['name'] for row in result]
assert BASE_EXPRESSION in names
assert BASE_REDUCED_EXPRESSION in names
assert BASE_INCREASED_EXPRESSION not in names
- assert "increased rna expression" not in names
- assert "reduced rna expression" in names
+ assert 'increased rna expression' not in names
+ assert 'reduced rna expression' in names
class TestGetEquivalentTerms:
def test_gain_excludes_amplification(self, conn):
- result = vocab.get_equivalent_terms(conn, "copy gain")
- names = {row["name"] for row in result}
- assert "copy gain" in names
- assert "amplification" not in names
+ result = vocab.get_equivalent_terms(conn, 'copy gain')
+ names = {row['name'] for row in result}
+ assert 'copy gain' in names
+ assert 'amplification' not in names
def test_amplification_includes_gain(self, conn):
- result = vocab.get_equivalent_terms(conn, "amplification")
- names = {row["name"] for row in result}
- assert "copy gain" in names
- assert "amplification" in names
+ result = vocab.get_equivalent_terms(conn, 'amplification')
+ names = {row['name'] for row in result}
+ assert 'copy gain' in names
+ assert 'amplification' in names
def test_oncogenic(conn):
result = vocab.get_term_by_name(conn, genes.ONCOGENE)
- assert result["name"] == genes.ONCOGENE
+ assert result['name'] == genes.ONCOGENE
def test_get_terms_set(conn):
- terms = vocab.get_terms_set(conn, ["copy variant"])
+ terms = vocab.get_terms_set(conn, ['copy variant'])
assert terms
- more_terms = vocab.get_terms_set(conn, ["copy variant", "expression variant"])
+ more_terms = vocab.get_terms_set(conn, ['copy variant', 'expression variant'])
assert more_terms
assert len(more_terms) > len(terms)
diff --git a/tests/test_ipr/constants.py b/tests/test_ipr/constants.py
index b4edeab..8b211ff 100644
--- a/tests/test_ipr/constants.py
+++ b/tests/test_ipr/constants.py
@@ -1,3 +1,3 @@
import os
-EXCLUDE_INTEGRATION_TESTS = os.environ.get("EXCLUDE_INTEGRATION_TESTS") == "1"
+EXCLUDE_INTEGRATION_TESTS = os.environ.get('EXCLUDE_INTEGRATION_TESTS') == '1'
diff --git a/tests/test_ipr/test_annotate.py b/tests/test_ipr/test_annotate.py
index 0695deb..43de421 100644
--- a/tests/test_ipr/test_annotate.py
+++ b/tests/test_ipr/test_annotate.py
@@ -15,81 +15,81 @@
from .test_ipr import DISEASE_RIDS
-EXCLUDE_BCGSC_TESTS = os.environ.get("EXCLUDE_BCGSC_TESTS") == "1"
+EXCLUDE_BCGSC_TESTS = os.environ.get('EXCLUDE_BCGSC_TESTS') == '1'
# TP53 examples from https://www.bcgsc.ca/jira/browse/SDEV-3122
# Mutations are actually identical but on alternate transcripts.
TP53_MUT_DICT = {
- "pref": IprSmallMutationVariant( # type: ignore
+ 'pref': IprSmallMutationVariant( # type: ignore
{
- "key": "SDEV-3122_preferred",
- "gene": "TP53",
- "hgvsGenomic": "chr17:g.7674252C>T",
- "hgvsCds": "ENST00000269305:c.711G>A",
- "hgvsProtein": "TP53:p.M237I",
+ 'key': 'SDEV-3122_preferred',
+ 'gene': 'TP53',
+ 'hgvsGenomic': 'chr17:g.7674252C>T',
+ 'hgvsCds': 'ENST00000269305:c.711G>A',
+ 'hgvsProtein': 'TP53:p.M237I',
}
),
- "intersect": IprSmallMutationVariant( # type: ignore
+ 'intersect': IprSmallMutationVariant( # type: ignore
{
- "key": "SDEV-3122_alt",
- "gene": "TP53",
- "hgvsGenomic": "chr17:g.7674252C>T",
- "hgvsCds": "ENST00000610292:c.594G>A",
- "hgvsProtein": "TP53:p.M198I",
+ 'key': 'SDEV-3122_alt',
+ 'gene': 'TP53',
+ 'hgvsGenomic': 'chr17:g.7674252C>T',
+ 'hgvsCds': 'ENST00000610292:c.594G>A',
+ 'hgvsProtein': 'TP53:p.M198I',
}
),
- "prot_only": IprSmallMutationVariant( # type: ignore
- {"key": "prot_only", "gene": "TP53", "hgvsProtein": "TP53:p.M237I"}
+ 'prot_only': IprSmallMutationVariant( # type: ignore
+ {'key': 'prot_only', 'gene': 'TP53', 'hgvsProtein': 'TP53:p.M237I'}
),
- "cds_only": IprSmallMutationVariant( # type: ignore
- {"key": "cds_only", "gene": "TP53", "hgvsCds": "ENST00000269305:c.711G>A"}
+ 'cds_only': IprSmallMutationVariant( # type: ignore
+ {'key': 'cds_only', 'gene': 'TP53', 'hgvsCds': 'ENST00000269305:c.711G>A'}
),
- "genome_only": IprSmallMutationVariant( # type: ignore
- {"key": "genome_only", "gene": "TP53", "hgvsGenomic": "chr17:g.7674252C>T"}
+ 'genome_only': IprSmallMutationVariant( # type: ignore
+ {'key': 'genome_only', 'gene': 'TP53', 'hgvsGenomic': 'chr17:g.7674252C>T'}
),
}
KBDEV1231_TP53_ERR_MATCH_WT = {
- "altSeq": "",
- "chromosome": "chr17",
- "comments": "",
- "endPosition": "",
- "gene": "TP53",
- "germline": False,
- "hgvsCds": "ENST00000269305:c.853G>A",
- "hgvsGenomic": "chr17:g.7673767C>T",
- "hgvsProtein": "TP53:p.E285K",
- "key": "c23a7b0387335e7a5ed6c1081a1822ae",
- "library": "F145233;F145265",
- "ncbiBuild": "GRCh38",
- "normalAltCount": "",
- "normalDepth": "",
- "normalRefCount": "",
- "proteinChange": "p.E285K",
- "refSeq": "",
- "rnaAltCount": 311,
- "rnaDepth": 370,
- "rnaRefCount": 59,
- "startPosition": "",
- "transcript": "ENST00000269305",
- "tumourAltCopies": "",
- "tumourAltCount": 64,
- "tumourDepth": 100,
- "tumourRefCopies": "",
- "tumourRefCount": 36,
- "variant": "TP53:p.E285K",
- "variantType": "mut",
- "zygosity": "",
+ 'altSeq': '',
+ 'chromosome': 'chr17',
+ 'comments': '',
+ 'endPosition': '',
+ 'gene': 'TP53',
+ 'germline': False,
+ 'hgvsCds': 'ENST00000269305:c.853G>A',
+ 'hgvsGenomic': 'chr17:g.7673767C>T',
+ 'hgvsProtein': 'TP53:p.E285K',
+ 'key': 'c23a7b0387335e7a5ed6c1081a1822ae',
+ 'library': 'F145233;F145265',
+ 'ncbiBuild': 'GRCh38',
+ 'normalAltCount': '',
+ 'normalDepth': '',
+ 'normalRefCount': '',
+ 'proteinChange': 'p.E285K',
+ 'refSeq': '',
+ 'rnaAltCount': 311,
+ 'rnaDepth': 370,
+ 'rnaRefCount': 59,
+ 'startPosition': '',
+ 'transcript': 'ENST00000269305',
+ 'tumourAltCopies': '',
+ 'tumourAltCount': 64,
+ 'tumourDepth': 100,
+ 'tumourRefCopies': '',
+ 'tumourRefCount': 36,
+ 'variant': 'TP53:p.E285K',
+ 'variantType': 'mut',
+ 'zygosity': '',
}
-@pytest.fixture(scope="module")
+@pytest.fixture(scope='module')
def graphkb_conn():
- username = os.environ.get("GRAPHKB_USER", os.environ["IPR_USER"])
- password = os.environ.get("GRAPHKB_PASS", os.environ["IPR_PASS"])
- graphkb_url = os.environ.get("GRAPHKB_URL", False)
+ username = os.environ.get('GRAPHKB_USER', os.environ['IPR_USER'])
+ password = os.environ.get('GRAPHKB_PASS', os.environ['IPR_PASS'])
+ graphkb_url = os.environ.get('GRAPHKB_URL', False)
if graphkb_url:
graphkb_conn = GraphKBConnection(graphkb_url)
else:
@@ -99,77 +99,77 @@ def graphkb_conn():
@pytest.mark.skipif(
- EXCLUDE_BCGSC_TESTS, reason="excluding tests that depend on BCGSC-specific data"
+ EXCLUDE_BCGSC_TESTS, reason='excluding tests that depend on BCGSC-specific data'
)
class TestAnnotation:
def test_annotate_nonsense_vs_missense(self, graphkb_conn):
"""Verify missense (point mutation) is not mistaken for a nonsense (stop codon) mutation."""
- for key in ("prot_only", "cds_only", "genome_only", "pref"):
+ for key in ('prot_only', 'cds_only', 'genome_only', 'pref'):
matched = annotate_positional_variants(graphkb_conn, [TP53_MUT_DICT[key]], DISEASE_RIDS)
# nonsense - stop codon - should not match. This is missense not nonsense (#164:933).
- nonsense = [a for a in matched if a["kbVariant"] == "TP53 nonsense"]
- assert not nonsense, f"nonsense matched to {key}: {TP53_MUT_DICT[key]}"
- assert matched, f"should have matched in {key}: {TP53_MUT_DICT[key]}"
+ nonsense = [a for a in matched if a['kbVariant'] == 'TP53 nonsense']
+ assert not nonsense, f'nonsense matched to {key}: {TP53_MUT_DICT[key]}'
+ assert matched, f'should have matched in {key}: {TP53_MUT_DICT[key]}'
def test_annotate_nonsense_vs_missense_protein(self, graphkb_conn):
"""Verify missense (point mutation) is not mistaken for a nonsense (stop codon) mutation."""
- for key in ("prot_only", "pref"):
+ for key in ('prot_only', 'pref'):
matched = annotate_positional_variants(graphkb_conn, [TP53_MUT_DICT[key]], DISEASE_RIDS)
# nonsense - stop codon - should not match. This is missense not nonsense (#164:933).
- nonsense = [a for a in matched if "nonsense" in a["kbVariant"]]
- assert not nonsense, f"nonsense matched to {key}: {TP53_MUT_DICT[key]}"
- assert matched, f"should have matched in {key}: {TP53_MUT_DICT[key]}"
+ nonsense = [a for a in matched if 'nonsense' in a['kbVariant']]
+ assert not nonsense, f'nonsense matched to {key}: {TP53_MUT_DICT[key]}'
+ assert matched, f'should have matched in {key}: {TP53_MUT_DICT[key]}'
def test_annotate_signature_variants_cosmic(self, graphkb_conn):
"""Test a Cosmic Signature CVs with known GKB statements"""
- signature = "SBS10B"
+ signature = 'SBS10B'
cosmic = annotate_signature_variants(
graphkb_conn,
DISEASE_RIDS,
preprocess_signature_variants(
[
{
- "displayName": f"{signature} {COSMIC_SIGNATURE_VARIANT_TYPE}",
- "signatureName": signature,
- "variantTypeName": COSMIC_SIGNATURE_VARIANT_TYPE,
+ 'displayName': f'{signature} {COSMIC_SIGNATURE_VARIANT_TYPE}',
+ 'signatureName': signature,
+ 'variantTypeName': COSMIC_SIGNATURE_VARIANT_TYPE,
}
]
),
)
assert len(cosmic) != 0
- @pytest.mark.skip(reason="no GKB statement for dMMR Signature CVs yet")
+ @pytest.mark.skip(reason='no GKB statement for dMMR Signature CVs yet')
def test_annotate_signature_variants_dmmr(self, graphkb_conn):
"""Test a dMMR (from Cosmic) Signature CVs with known GKB statements"""
- signature = "DMMR"
+ signature = 'DMMR'
dmmr = annotate_signature_variants(
graphkb_conn,
DISEASE_RIDS,
preprocess_signature_variants(
[
{
- "displayName": f"{signature} {COSMIC_SIGNATURE_VARIANT_TYPE}",
- "signatureName": signature,
- "variantTypeName": COSMIC_SIGNATURE_VARIANT_TYPE,
+ 'displayName': f'{signature} {COSMIC_SIGNATURE_VARIANT_TYPE}',
+ 'signatureName': signature,
+ 'variantTypeName': COSMIC_SIGNATURE_VARIANT_TYPE,
}
]
),
)
assert len(dmmr) != 0
- @pytest.mark.skip(reason="no GKB statement for HLA Signature CVs yet")
+ @pytest.mark.skip(reason='no GKB statement for HLA Signature CVs yet')
def test_annotate_signature_variants_hla(self, graphkb_conn):
"""Test an HLA Signature CVs with known GKB statements"""
- signature = "HLA-A*02:01"
+ signature = 'HLA-A*02:01'
hla = annotate_signature_variants(
graphkb_conn,
DISEASE_RIDS,
preprocess_signature_variants(
[
{
- "displayName": f"{signature} {HLA_SIGNATURE_VARIANT_TYPE}",
- "signatureName": signature,
- "variantTypeName": HLA_SIGNATURE_VARIANT_TYPE,
+ 'displayName': f'{signature} {HLA_SIGNATURE_VARIANT_TYPE}',
+ 'signatureName': signature,
+ 'variantTypeName': HLA_SIGNATURE_VARIANT_TYPE,
}
]
),
@@ -184,9 +184,9 @@ def test_annotate_signature_variants_tmb(self, graphkb_conn):
preprocess_signature_variants(
[
{
- "displayName": f"{TMB_SIGNATURE} {TMB_SIGNATURE_VARIANT_TYPE}",
- "signatureName": TMB_SIGNATURE,
- "variantTypeName": TMB_SIGNATURE_VARIANT_TYPE,
+ 'displayName': f'{TMB_SIGNATURE} {TMB_SIGNATURE_VARIANT_TYPE}',
+ 'signatureName': TMB_SIGNATURE,
+ 'variantTypeName': TMB_SIGNATURE_VARIANT_TYPE,
}
]
),
@@ -199,39 +199,39 @@ def test_annotate_signature_variants_msi(self, graphkb_conn):
msi = annotate_signature_variants(
graphkb_conn,
DISEASE_RIDS,
- preprocess_signature_variants([MSI_MAPPING.get("microsatellite instability")]),
+ preprocess_signature_variants([MSI_MAPPING.get('microsatellite instability')]),
)
assert len(msi) != 0
def test_annotate_structural_variants_tp53(self, graphkb_conn):
"""Verify alternate TP53 variants match."""
- ref_key = "prot_only"
+ ref_key = 'prot_only'
pref = annotate_positional_variants(graphkb_conn, [TP53_MUT_DICT[ref_key]], DISEASE_RIDS)
# GERO-299 - nonsense - stop codon - should not match. This is missense not nonsense (#164:933).
- nonsense = [a for a in pref if a["kbVariant"] == "TP53 nonsense"]
+ nonsense = [a for a in pref if a['kbVariant'] == 'TP53 nonsense']
assert not nonsense
- pref_vars = set([m["kbVariant"] for m in pref])
- assert pref_vars, f"No matches to {TP53_MUT_DICT[pref]}"
+ pref_vars = set([m['kbVariant'] for m in pref])
+ assert pref_vars, f'No matches to {TP53_MUT_DICT[pref]}'
print(pref_vars)
for key, alt_rep in TP53_MUT_DICT.items():
if key == ref_key:
continue
- if key in ("cds_only", "genome_only"):
+ if key in ('cds_only', 'genome_only'):
# KBDEV-1259. Temporarely disabled until issue resolution.
continue
alt = annotate_positional_variants(graphkb_conn, [alt_rep], DISEASE_RIDS)
- alt_vars = set([m["kbVariant"] for m in alt])
+ alt_vars = set([m['kbVariant'] for m in alt])
diff = pref_vars.symmetric_difference(alt_vars)
missing = pref_vars.difference(alt_vars)
known_issues = set()
- if key == "genome_only":
+ if key == 'genome_only':
# genome_only matched to more precise type 'TP53 deleterious mutation' but not 'TP53 mutation'
- known_issues.add("TP53 mutation")
+ known_issues.add('TP53 mutation')
missing = pref_vars.difference(alt_vars).difference(known_issues)
print(alt_vars)
- assert not missing, f"{key} missing{missing}: {diff}"
+ assert not missing, f'{key} missing{missing}: {diff}'
def test_wt_not_matched(self, graphkb_conn):
"""Verify wildtypes are not matched to mutations."""
@@ -239,5 +239,5 @@ def test_wt_not_matched(self, graphkb_conn):
graphkb_conn, [KBDEV1231_TP53_ERR_MATCH_WT], DISEASE_RIDS
)
# KBDEV-1231 - wildtype - should not match. A mutation is not wildtype
- wt_matches = sorted(set([m["kbVariant"] for m in matches if "wildtype" in m["kbVariant"]]))
+ wt_matches = sorted(set([m['kbVariant'] for m in matches if 'wildtype' in m['kbVariant']]))
assert not wt_matches, f"Mutation 'TP53:p.E285K' should NOT match {wt_matches}"
diff --git a/tests/test_ipr/test_connection.py b/tests/test_ipr/test_connection.py
index 611afb2..d83ac79 100644
--- a/tests/test_ipr/test_connection.py
+++ b/tests/test_ipr/test_connection.py
@@ -4,37 +4,37 @@
from pori_python.ipr.connection import IprConnection
-IMAGE_DIR = os.path.join(os.path.dirname(__file__), "../../docs/images")
+IMAGE_DIR = os.path.join(os.path.dirname(__file__), '../../docs/images')
class TestPostImages:
def test_no_images_ok(self):
def request(*args, **kwargs):
m = mock.MagicMock(
- json=lambda: [{"upload": "successful"}], raise_for_status=lambda: None
+ json=lambda: [{'upload': 'successful'}], raise_for_status=lambda: None
)
return m
- with mock.patch("pori_python.ipr.connection.requests.request", request):
- conn = IprConnection("user", "pass")
- result = conn.post_images("report_id", files={}, data={})
+ with mock.patch('pori_python.ipr.connection.requests.request', request):
+ conn = IprConnection('user', 'pass')
+ result = conn.post_images('report_id', files={}, data={})
assert result is None
def test_images_load_ok(self):
def request(*args, **kwargs):
m = mock.MagicMock(
- json=lambda: [{"upload": "successful"}], raise_for_status=lambda: None
+ json=lambda: [{'upload': 'successful'}], raise_for_status=lambda: None
)
return m
- with mock.patch("pori_python.ipr.connection.requests.request", request):
- conn = IprConnection("user", "pass")
+ with mock.patch('pori_python.ipr.connection.requests.request', request):
+ conn = IprConnection('user', 'pass')
result = conn.post_images(
- "report_id",
+ 'report_id',
files={
- "expression.correlation": os.path.join(IMAGE_DIR, "expression_correlation.png"),
- "mixcr.circos_trb_vj_gene_usage": os.path.join(
- IMAGE_DIR, "mixcr.circos_trb_vj_gene_usage.png"
+ 'expression.correlation': os.path.join(IMAGE_DIR, 'expression_correlation.png'),
+ 'mixcr.circos_trb_vj_gene_usage': os.path.join(
+ IMAGE_DIR, 'mixcr.circos_trb_vj_gene_usage.png'
),
},
data={},
@@ -44,54 +44,54 @@ def request(*args, **kwargs):
def test_images_with_data_load_ok(self):
def request(*args, **kwargs):
m = mock.MagicMock(
- json=lambda: [{"upload": "successful"}], raise_for_status=lambda: None
+ json=lambda: [{'upload': 'successful'}], raise_for_status=lambda: None
)
return m
- with mock.patch("pori_python.ipr.connection.requests.request", request):
- conn = IprConnection("user", "pass")
+ with mock.patch('pori_python.ipr.connection.requests.request', request):
+ conn = IprConnection('user', 'pass')
result = conn.post_images(
- "report_id",
+ 'report_id',
files={
- "expression.correlation": os.path.join(IMAGE_DIR, "expression_correlation.png"),
- "mixcr.circos_trb_vj_gene_usage": os.path.join(
- IMAGE_DIR, "mixcr.circos_trb_vj_gene_usage.png"
+ 'expression.correlation': os.path.join(IMAGE_DIR, 'expression_correlation.png'),
+ 'mixcr.circos_trb_vj_gene_usage': os.path.join(
+ IMAGE_DIR, 'mixcr.circos_trb_vj_gene_usage.png'
),
},
- data={"expression.correlation.title": "this is a title"},
+ data={'expression.correlation.title': 'this is a title'},
)
assert result is None
def test_bad_file(self):
def request(*args, **kwargs):
m = mock.MagicMock(
- json=lambda: [{"upload": "successful"}], raise_for_status=lambda: None
+ json=lambda: [{'upload': 'successful'}], raise_for_status=lambda: None
)
return m
- with mock.patch("pori_python.ipr.connection.requests.request", request):
- conn = IprConnection("user", "pass")
+ with mock.patch('pori_python.ipr.connection.requests.request', request):
+ conn = IprConnection('user', 'pass')
with pytest.raises(FileNotFoundError):
conn.post_images(
- "report_id", files={"expression.correlation": "thing/that/does/not/exist.png"}
+ 'report_id', files={'expression.correlation': 'thing/that/does/not/exist.png'}
)
def test_failed_image_load(self):
def request(*args, **kwargs):
m = mock.MagicMock(
- json=lambda: [{"upload": "anything else", "key": "thing"}],
+ json=lambda: [{'upload': 'anything else', 'key': 'thing'}],
raise_for_status=lambda: None,
)
return m
- with mock.patch("pori_python.ipr.connection.requests.request", request):
- conn = IprConnection("user", "pass")
+ with mock.patch('pori_python.ipr.connection.requests.request', request):
+ conn = IprConnection('user', 'pass')
with pytest.raises(ValueError):
conn.post_images(
- "report_id",
+ 'report_id',
{
- "expression.correlation": os.path.join(
- IMAGE_DIR, "expression_correlation.png"
+ 'expression.correlation': os.path.join(
+ IMAGE_DIR, 'expression_correlation.png'
)
},
)
diff --git a/tests/test_ipr/test_inputs.py b/tests/test_ipr/test_inputs.py
index e99d170..56e0c4d 100644
--- a/tests/test_ipr/test_inputs.py
+++ b/tests/test_ipr/test_inputs.py
@@ -32,27 +32,27 @@
from pori_python.ipr.util import logger
from pori_python.types import IprFusionVariant, IprGeneVariant
-DATA_DIR = os.path.join(os.path.dirname(__file__), "test_data")
-NON_EMPTY_STRING_NULLS = ["", None, np.nan, pd.NA]
-EXPECTED_COSMIC = {"DBS9", "DBS11", "ID2", "ID7", "ID10", "SBS2", "SBS5", "DMMR"}
+DATA_DIR = os.path.join(os.path.dirname(__file__), 'test_data')
+NON_EMPTY_STRING_NULLS = ['', None, np.nan, pd.NA]
+EXPECTED_COSMIC = {'DBS9', 'DBS11', 'ID2', 'ID7', 'ID10', 'SBS2', 'SBS5', 'DMMR'}
EXPECTED_HLA = {
- "HLA-A*02:01",
- "HLA-A*02",
- "HLA-A*30:01",
- "HLA-A*30",
- "HLA-B*27:01",
- "HLA-B*27",
- "HLA-B*15:01",
- "HLA-B*15",
- "HLA-C*03:03",
- "HLA-C*03",
- "HLA-C*06:02",
- "HLA-C*06",
+ 'HLA-A*02:01',
+ 'HLA-A*02',
+ 'HLA-A*30:01',
+ 'HLA-A*30',
+ 'HLA-B*27:01',
+ 'HLA-B*27',
+ 'HLA-B*15:01',
+ 'HLA-B*15',
+ 'HLA-C*03:03',
+ 'HLA-C*03',
+ 'HLA-C*06:02',
+ 'HLA-C*06',
}
EXPECTED_TMB = {TMB_SIGNATURE}
-EXPECTED_MSI = {MSI_MAPPING.get("microsatellite instability")["signatureName"]}
+EXPECTED_MSI = {MSI_MAPPING.get('microsatellite instability')['signatureName']}
EXPECTED_HRD = {
- HRD_MAPPING.get("homologous recombination deficiency strong signature")["signatureName"]
+ HRD_MAPPING.get('homologous recombination deficiency strong signature')['signatureName']
}
@@ -63,187 +63,186 @@ def read_data_file(filename):
class TestPreProcessSmallMutations:
def test_load_test_file(self) -> None:
records = preprocess_small_mutations(
- pd.read_csv(os.path.join(DATA_DIR, "small_mutations.tab"), sep="\t").to_dict("records")
+ pd.read_csv(os.path.join(DATA_DIR, 'small_mutations.tab'), sep='\t').to_dict('records')
)
assert records
assert len(records) == 2614
def test_maintains_optional_fields(self):
original = {
- "gene": "A1BG",
- "proteinChange": "p.V460M",
- "zygosity": "het",
- "tumourAltCount": 42,
- "tumourRefCount": 48,
- "hgvsProtein": "",
- "transcript": "ENST1000",
- "hgvsCds": "",
- "hgvsGenomic": "",
- "key": "02fe85a3477784b5ac0f8ecffb300d10",
- "variant": "blargh",
- "chromosome": "2",
- "startPosition": 1234,
+ 'gene': 'A1BG',
+ 'proteinChange': 'p.V460M',
+ 'zygosity': 'het',
+ 'tumourAltCount': 42,
+ 'tumourRefCount': 48,
+ 'hgvsProtein': '',
+ 'transcript': 'ENST1000',
+ 'hgvsCds': '',
+ 'hgvsGenomic': '',
+ 'key': '02fe85a3477784b5ac0f8ecffb300d10',
+ 'variant': 'blargh',
+ 'chromosome': '2',
+ 'startPosition': 1234,
}
records = preprocess_small_mutations([original])
record = records[0]
- assert record["variantType"] == "mut"
+ assert record['variantType'] == 'mut'
for col in original:
assert col in record
- assert record["variant"] == "A1BG:p.V460M"
- assert "endPosition" in record
- assert record["endPosition"] == record["startPosition"]
- assert "tumourDepth" in record
- assert record["tumourDepth"] == 90
+ assert record['variant'] == 'A1BG:p.V460M'
+ assert 'endPosition' in record
+ assert record['endPosition'] == record['startPosition']
+ assert 'tumourDepth' in record
+ assert record['tumourDepth'] == 90
def test_null(self):
original = {
- "gene": "A1BG",
- "proteinChange": "p.V460M",
- "tumourAltCount": 42,
- "tumourRefCount": 48,
- "startPosition": 1234,
+ 'gene': 'A1BG',
+ 'proteinChange': 'p.V460M',
+ 'tumourAltCount': 42,
+ 'tumourRefCount': 48,
+ 'startPosition': 1234,
}
# Make sure TEST_KEYS are appropriate.
# For some fields, like 'ref' and 'alt', NA is _not_ equivalent to a null string.
- TEST_KEYS = ["startPosition", "endPosition", "tumourAltCount", "tumourRefCount"]
+ TEST_KEYS = ['startPosition', 'endPosition', 'tumourAltCount', 'tumourRefCount']
for key in TEST_KEYS:
for null in NON_EMPTY_STRING_NULLS:
small_mut = original.copy()
small_mut[key] = null
records = preprocess_small_mutations([small_mut])
record = records[0]
- assert record["variantType"] == "mut"
+ assert record['variantType'] == 'mut'
for col in original:
assert col in record
- assert record["variant"] == "A1BG:p.V460M"
- assert "endPosition" in record
+ assert record['variant'] == 'A1BG:p.V460M'
+ assert 'endPosition' in record
def test_load_small_mutations_probe(self) -> None:
records = preprocess_small_mutations(
- pd.read_csv(os.path.join(DATA_DIR, "small_mutations_probe.tab"), sep="\t").to_dict(
- "records"
+ pd.read_csv(os.path.join(DATA_DIR, 'small_mutations_probe.tab'), sep='\t').to_dict(
+ 'records'
)
)
assert records
assert len(records) == 4
- assert records[0]["variantType"] == "mut"
- assert "variant" in records[0]
+ assert records[0]['variantType'] == 'mut'
+ assert 'variant' in records[0]
class TestPreProcessCopyVariants:
def test_load_copy_variants(self) -> None:
records = preprocess_copy_variants(
- pd.read_csv(os.path.join(DATA_DIR, "copy_variants.tab"), sep="\t").to_dict("records")
+ pd.read_csv(os.path.join(DATA_DIR, 'copy_variants.tab'), sep='\t').to_dict('records')
)
assert records
assert len(records) == 4603
- assert records[0]["variantType"] == "cnv"
- assert "variant" in records[0]
+ assert records[0]['variantType'] == 'cnv'
+ assert 'variant' in records[0]
def test_add_chr_to_chrband(self) -> None:
- df1 = pd.read_csv(os.path.join(DATA_DIR, "copy_variants.tab"), sep="\t")
- df1 = df1.to_dict("records")
+ df1 = pd.read_csv(os.path.join(DATA_DIR, 'copy_variants.tab'), sep='\t')
+ df1 = df1.to_dict('records')
records = preprocess_copy_variants(df1)
assert records
assert len(records) == 4603
- assert records[0]["chromosomeBand"] == "1q22.1"
- assert "chromosome" not in records[0]
+ assert records[0]['chromosomeBand'] == '1q22.1'
+ assert 'chromosome' not in records[0]
def test_add_int_chr_to_chrband(self) -> None:
- df1 = pd.read_csv(os.path.join(DATA_DIR, "copy_variants.tab"), sep="\t")
- df1["chromosome"] = df1["chromosome"].apply(lambda x: x.split("chr")[1])
- df1 = df1.to_dict("records")
+ df1 = pd.read_csv(os.path.join(DATA_DIR, 'copy_variants.tab'), sep='\t')
+ df1['chromosome'] = df1['chromosome'].apply(lambda x: x.split('chr')[1])
+ df1 = df1.to_dict('records')
records = preprocess_copy_variants(df1)
assert records
assert len(records) == 4603
- assert records[0]["chromosomeBand"] == "1q22.1"
- assert "chromosome" not in records[0]
+ assert records[0]['chromosomeBand'] == '1q22.1'
+ assert 'chromosome' not in records[0]
def test_add_chr_to_chrband_if_chromosome_not_present(self) -> None:
- df1 = pd.read_csv(os.path.join(DATA_DIR, "copy_variants.tab"), sep="\t")
- df1["chr"] = df1["chromosome"].copy()
- df1.drop("chromosome", axis=1, inplace=True)
- df1 = df1.to_dict("records")
+ df1 = pd.read_csv(os.path.join(DATA_DIR, 'copy_variants.tab'), sep='\t')
+ df1['chr'] = df1['chromosome'].copy()
+ df1.drop('chromosome', axis=1, inplace=True)
+ df1 = df1.to_dict('records')
records = preprocess_copy_variants(df1)
assert records
assert len(records) == 4603
- assert records[0]["chromosomeBand"] == "1q22.1"
- assert "chr" not in records[0]
- assert "chromosome" not in records[0]
+ assert records[0]['chromosomeBand'] == '1q22.1'
+ assert 'chr' not in records[0]
+ assert 'chromosome' not in records[0]
def test_do_not_add_chr_if_chr_already_in_chrband(self) -> None:
- df1 = pd.read_csv(os.path.join(DATA_DIR, "copy_variants.tab"), sep="\t")
+ df1 = pd.read_csv(os.path.join(DATA_DIR, 'copy_variants.tab'), sep='\t')
df2 = df1.copy()
- df1["chromosomeBand"] = df1["chromosomeBand"].apply(lambda x: "chr99" + x)
- df1 = df1.to_dict("records")
- df2["chromosomeBand"] = df2["chromosomeBand"].apply(lambda x: "99" + x)
- df2 = df2.to_dict("records")
+ df1['chromosomeBand'] = df1['chromosomeBand'].apply(lambda x: 'chr99' + x)
+ df1 = df1.to_dict('records')
+ df2['chromosomeBand'] = df2['chromosomeBand'].apply(lambda x: '99' + x)
+ df2 = df2.to_dict('records')
records = preprocess_copy_variants(df1)
assert records
assert len(records) == 4603
- assert records[0]["chromosomeBand"] == "chr99q22.1"
- assert "chr" not in records[0] # make sure these cols are still getting removed
- assert "chromosome" not in records[0]
+ assert records[0]['chromosomeBand'] == 'chr99q22.1'
+ assert 'chr' not in records[0] # make sure these cols are still getting removed
+ assert 'chromosome' not in records[0]
records2 = preprocess_copy_variants(df2)
- assert records2[0]["chromosomeBand"] == "99q22.1"
+ assert records2[0]['chromosomeBand'] == '99q22.1'
def test_no_error_if_chr_column_not_present(self) -> None:
- df1 = pd.read_csv(os.path.join(DATA_DIR, "copy_variants.tab"), sep="\t")
- df1.drop("chromosome", axis=1, inplace=True)
- df1 = df1.to_dict("records")
+ df1 = pd.read_csv(os.path.join(DATA_DIR, 'copy_variants.tab'), sep='\t')
+ df1.drop('chromosome', axis=1, inplace=True)
+ df1 = df1.to_dict('records')
records = preprocess_copy_variants(df1)
assert records
assert len(records) == 4603
- assert records[0]["chromosomeBand"] == "q22.1"
+ assert records[0]['chromosomeBand'] == 'q22.1'
def test_null(self):
for kb_cat in list(INPUT_COPY_CATEGORIES.values()) + NON_EMPTY_STRING_NULLS:
- original = {"gene": "ERBB2", "kbCategory": kb_cat}
+ original = {'gene': 'ERBB2', 'kbCategory': kb_cat}
for key in COPY_OPTIONAL:
for null in NON_EMPTY_STRING_NULLS:
copy_var = original.copy()
copy_var[key] = null
records = preprocess_copy_variants([copy_var])
record = records[0]
- assert record["variantType"] == "cnv"
+ assert record['variantType'] == 'cnv'
class TestPreProcessSignatureVariants:
-
# Preprocessing records from file
cosmic = preprocess_cosmic(
[
- r["signature"]
- for r in pd.read_csv(os.path.join(DATA_DIR, "cosmic_variants.tab"), sep="\t").to_dict(
- "records"
+ r['signature']
+ for r in pd.read_csv(os.path.join(DATA_DIR, 'cosmic_variants.tab'), sep='\t').to_dict(
+ 'records'
)
]
)
hla = preprocess_hla(
- pd.read_csv(os.path.join(DATA_DIR, "hla_variants.tab"), sep="\t").to_dict("records")
+ pd.read_csv(os.path.join(DATA_DIR, 'hla_variants.tab'), sep='\t').to_dict('records')
)
tmb = preprocess_tmb(
tmb_high=TMB_SIGNATURE_HIGH_THRESHOLD,
tmburMutationBurden=pd.read_csv(
- os.path.join(DATA_DIR, "tmburMutationBurden.tab"), sep="\t"
- ).to_dict("records"),
- genomeTmb="11.430000000000001",
+ os.path.join(DATA_DIR, 'tmburMutationBurden.tab'), sep='\t'
+ ).to_dict('records'),
+ genomeTmb='11.430000000000001',
)
msi = preprocess_msi(
[
{
- "score": 27.55,
- "kbCategory": "microsatellite instability",
- "key": "microsatellite instability",
+ 'score': 27.55,
+ 'kbCategory': 'microsatellite instability',
+ 'key': 'microsatellite instability',
}
]
)
hrd = preprocess_hrd(
{
- "score": 9999,
- "kbCategory": "homologous recombination deficiency strong signature",
- "key": "homologous recombination deficiency strong signature",
+ 'score': 9999,
+ 'kbCategory': 'homologous recombination deficiency strong signature',
+ 'key': 'homologous recombination deficiency strong signature',
}
)
@@ -251,46 +250,46 @@ class TestPreProcessSignatureVariants:
def test_preprocess_cosmic(self) -> None:
assert self.cosmic
assert len(self.cosmic) == len(EXPECTED_COSMIC)
- assert "variantTypeName" in self.cosmic[0]
- assert "displayName" in self.cosmic[0]
+ assert 'variantTypeName' in self.cosmic[0]
+ assert 'displayName' in self.cosmic[0]
- signatureNames = {r.get("signatureName", "") for r in self.cosmic}
+ signatureNames = {r.get('signatureName', '') for r in self.cosmic}
assert len(EXPECTED_COSMIC.symmetric_difference(signatureNames)) == 0
def test_preprocess_hla(self) -> None:
assert self.hla
assert len(self.hla) == len(EXPECTED_HLA)
- assert "variantTypeName" in self.hla[0]
- assert "displayName" in self.hla[0]
+ assert 'variantTypeName' in self.hla[0]
+ assert 'displayName' in self.hla[0]
- signatureNames = {r.get("signatureName", "") for r in self.hla}
+ signatureNames = {r.get('signatureName', '') for r in self.hla}
assert len(EXPECTED_HLA.symmetric_difference(signatureNames)) == 0
def test_preprocess_tmb(self) -> None:
assert self.tmb
assert len(self.tmb) == len(EXPECTED_TMB)
- assert "variantTypeName" in self.tmb[0]
- assert "displayName" in self.tmb[0]
+ assert 'variantTypeName' in self.tmb[0]
+ assert 'displayName' in self.tmb[0]
- signatureNames = {r.get("signatureName", "") for r in self.tmb}
+ signatureNames = {r.get('signatureName', '') for r in self.tmb}
assert len(EXPECTED_TMB.symmetric_difference(signatureNames)) == 0
def test_preprocess_msi(self) -> None:
assert self.msi
assert len(self.msi) == len(EXPECTED_MSI)
- assert "variantTypeName" in self.msi[0]
- assert "displayName" in self.msi[0]
+ assert 'variantTypeName' in self.msi[0]
+ assert 'displayName' in self.msi[0]
- signatureNames = {r.get("signatureName", "") for r in self.msi}
+ signatureNames = {r.get('signatureName', '') for r in self.msi}
assert len(EXPECTED_MSI.symmetric_difference(signatureNames)) == 0
def test_preprocess_hrd(self) -> None:
assert self.hrd
assert len(self.hrd) == len(EXPECTED_HRD)
- assert "variantTypeName" in self.hrd[0]
- assert "displayName" in self.hrd[0]
+ assert 'variantTypeName' in self.hrd[0]
+ assert 'displayName' in self.hrd[0]
- signatureNames = {r.get("signatureName", "") for r in self.hrd}
+ signatureNames = {r.get('signatureName', '') for r in self.hrd}
assert len(EXPECTED_HRD.symmetric_difference(signatureNames)) == 0
def test_preprocess_signature_variants(self) -> None:
@@ -306,100 +305,100 @@ def test_preprocess_signature_variants(self) -> None:
assert len(records) == (
len(EXPECTED_COSMIC) + len(EXPECTED_HLA) + len(EXPECTED_TMB) + len(EXPECTED_MSI)
)
- assert "key" in records[0]
+ assert 'key' in records[0]
def test_load_structural_variants() -> None:
records = preprocess_structural_variants(
- pd.read_csv(os.path.join(DATA_DIR, "fusions.tab"), sep="\t").to_dict("records")
+ pd.read_csv(os.path.join(DATA_DIR, 'fusions.tab'), sep='\t').to_dict('records')
)
assert records
assert len(records) == 7
- assert records[0]["variantType"] == "sv"
- assert "variant" in records[0]
+ assert records[0]['variantType'] == 'sv'
+ assert 'variant' in records[0]
def test_load_expression_variants() -> None:
records = preprocess_expression_variants(
- pd.read_csv(os.path.join(DATA_DIR, "expression.tab"), sep="\t").to_dict("records")
+ pd.read_csv(os.path.join(DATA_DIR, 'expression.tab'), sep='\t').to_dict('records')
)
assert records
assert len(records) == 4603
- assert records[0]["variantType"] == "exp"
- assert "variant" in records[0]
+ assert records[0]['variantType'] == 'exp'
+ assert 'variant' in records[0]
class TestCheckVariantLinks:
def test_sm_missing_copy_empty_ok(self) -> None:
genes = check_variant_links(
- small_mutations=[IprGeneVariant({"gene": "KRAS"})], # type: ignore
+ small_mutations=[IprGeneVariant({'gene': 'KRAS'})], # type: ignore
copy_variants=[],
- expression_variants=[IprGeneVariant({"gene": "KRAS", "variant": ""})], # type: ignore
+ expression_variants=[IprGeneVariant({'gene': 'KRAS', 'variant': ''})], # type: ignore
structural_variants=[],
)
- assert genes == {"KRAS"}
+ assert genes == {'KRAS'}
def test_sm_missing_exp_empty_ok(self) -> None:
genes = check_variant_links(
- small_mutations=[IprGeneVariant({"gene": "KRAS"})], # type: ignore
- copy_variants=[IprGeneVariant({"gene": "KRAS", "variant": ""})], # type: ignore
+ small_mutations=[IprGeneVariant({'gene': 'KRAS'})], # type: ignore
+ copy_variants=[IprGeneVariant({'gene': 'KRAS', 'variant': ''})], # type: ignore
expression_variants=[],
structural_variants=[],
)
- assert genes == {"KRAS"}
+ assert genes == {'KRAS'}
def test_sm_missing_copy(self) -> None:
- with mock.patch.object(logger, "debug") as mock_debug:
+ with mock.patch.object(logger, 'debug') as mock_debug:
check_variant_links(
- small_mutations=[IprGeneVariant({"gene": "KRAS"})], # type: ignore
- copy_variants=[IprGeneVariant({"gene": "CDK", "variant": ""})], # type: ignore
- expression_variants=[IprGeneVariant({"gene": "KRAS", "variant": ""})], # type: ignore
+ small_mutations=[IprGeneVariant({'gene': 'KRAS'})], # type: ignore
+ copy_variants=[IprGeneVariant({'gene': 'CDK', 'variant': ''})], # type: ignore
+ expression_variants=[IprGeneVariant({'gene': 'KRAS', 'variant': ''})], # type: ignore
structural_variants=[],
)
assert mock_debug.called
def test_sm_missing_exp(self) -> None:
- with mock.patch.object(logger, "debug") as mock_debug:
+ with mock.patch.object(logger, 'debug') as mock_debug:
check_variant_links(
- small_mutations=[IprGeneVariant({"gene": "KRAS"})], # type: ignore
- copy_variants=[IprGeneVariant({"gene": "KRAS", "variant": ""})], # type: ignore
- expression_variants=[IprGeneVariant({"gene": "CDK", "variant": ""})], # type: ignore
+ small_mutations=[IprGeneVariant({'gene': 'KRAS'})], # type: ignore
+ copy_variants=[IprGeneVariant({'gene': 'KRAS', 'variant': ''})], # type: ignore
+ expression_variants=[IprGeneVariant({'gene': 'CDK', 'variant': ''})], # type: ignore
structural_variants=[],
)
assert mock_debug.called
def test_with_valid_inputs(self) -> None:
genes = check_variant_links(
- small_mutations=[IprGeneVariant({"gene": "KRAS"})], # type: ignore
+ small_mutations=[IprGeneVariant({'gene': 'KRAS'})], # type: ignore
copy_variants=[
- IprGeneVariant({"gene": "KRAS", "variant": ""}), # type: ignore
- IprGeneVariant({"gene": "CDK", "variant": ""}), # type: ignore
+ IprGeneVariant({'gene': 'KRAS', 'variant': ''}), # type: ignore
+ IprGeneVariant({'gene': 'CDK', 'variant': ''}), # type: ignore
],
- expression_variants=[IprGeneVariant({"gene": "KRAS", "variant": ""})], # type: ignore
+ expression_variants=[IprGeneVariant({'gene': 'KRAS', 'variant': ''})], # type: ignore
structural_variants=[],
)
- assert genes == {"KRAS"}
+ assert genes == {'KRAS'}
def test_copy_missing_exp(self) -> None:
- with mock.patch.object(logger, "debug") as mock_debug:
+ with mock.patch.object(logger, 'debug') as mock_debug:
check_variant_links(
small_mutations=[],
copy_variants=[
- IprGeneVariant({"gene": "BRAF", "variant": "copy gain"}), # type: ignore
- IprGeneVariant({"gene": "KRAS", "variant": ""}), # type: ignore
+ IprGeneVariant({'gene': 'BRAF', 'variant': 'copy gain'}), # type: ignore
+ IprGeneVariant({'gene': 'KRAS', 'variant': ''}), # type: ignore
],
- expression_variants=[IprGeneVariant({"gene": "KRAS", "variant": ""})], # type: ignore
+ expression_variants=[IprGeneVariant({'gene': 'KRAS', 'variant': ''})], # type: ignore
structural_variants=[],
)
assert mock_debug.called
def test_exp_missing_copy(self) -> None:
- with mock.patch.object(logger, "debug") as mock_debug:
+ with mock.patch.object(logger, 'debug') as mock_debug:
check_variant_links(
small_mutations=[],
- copy_variants=[IprGeneVariant({"gene": "KRAS", "variant": ""})], # type: ignore
+ copy_variants=[IprGeneVariant({'gene': 'KRAS', 'variant': ''})], # type: ignore
expression_variants=[
- IprGeneVariant({"gene": "BRAF", "variant": "increased expression"}) # type: ignore
+ IprGeneVariant({'gene': 'BRAF', 'variant': 'increased expression'}) # type: ignore
],
structural_variants=[],
)
@@ -409,67 +408,67 @@ def test_exp_missing_copy(self) -> None:
class TestCreateGraphkbSvNotation:
def test_both_genes_and_exons(self) -> None:
notation = create_graphkb_sv_notation(
- IprFusionVariant({"gene1": "A", "gene2": "B", "exon1": 1, "exon2": 2}) # type: ignore
+ IprFusionVariant({'gene1': 'A', 'gene2': 'B', 'exon1': 1, 'exon2': 2}) # type: ignore
)
- assert notation == "(A,B):fusion(e.1,e.2)"
+ assert notation == '(A,B):fusion(e.1,e.2)'
def test_one_exon_missing(self) -> None:
notation = create_graphkb_sv_notation(
- IprFusionVariant({"gene1": "A", "gene2": "B", "exon1": "", "exon2": 2}) # type: ignore
+ IprFusionVariant({'gene1': 'A', 'gene2': 'B', 'exon1': '', 'exon2': 2}) # type: ignore
)
- assert notation == "(A,B):fusion(e.?,e.2)"
+ assert notation == '(A,B):fusion(e.?,e.2)'
def test_one_gene_missing(self) -> None:
notation = create_graphkb_sv_notation(
- IprFusionVariant({"gene1": "A", "gene2": "", "exon1": 1, "exon2": 2}) # type: ignore
+ IprFusionVariant({'gene1': 'A', 'gene2': '', 'exon1': 1, 'exon2': 2}) # type: ignore
)
- assert notation == "(A,?):fusion(e.1,e.2)"
+ assert notation == '(A,?):fusion(e.1,e.2)'
def test_first_gene_missing(self) -> None:
notation = create_graphkb_sv_notation(
- IprFusionVariant({"gene1": "", "gene2": "B", "exon1": 1, "exon2": 2}) # type: ignore
+ IprFusionVariant({'gene1': '', 'gene2': 'B', 'exon1': 1, 'exon2': 2}) # type: ignore
)
- assert notation == "(B,?):fusion(e.2,e.1)"
+ assert notation == '(B,?):fusion(e.2,e.1)'
def test_no_genes_error(self) -> None:
with pytest.raises(ValueError):
create_graphkb_sv_notation(
- IprFusionVariant({"gene1": "", "gene2": "", "exon1": 1, "exon2": 2, "key": "x"}) # type: ignore
+ IprFusionVariant({'gene1': '', 'gene2': '', 'exon1': 1, 'exon2': 2, 'key': 'x'}) # type: ignore
)
class TestCheckComparators:
def test_missing_disease_expression_error(self):
- content = {"comparators": [{"analysisRole": "expression (primary site)"}]}
+ content = {'comparators': [{'analysisRole': 'expression (primary site)'}]}
variants = [{}]
with pytest.raises(ValueError):
check_comparators(content, variants)
def test_missing_primary_expression_error(self):
- content = {"comparators": [{"analysisRole": "expression (disease)"}]}
- variants = [{"primarySiteFoldChange": 1}]
+ content = {'comparators': [{'analysisRole': 'expression (disease)'}]}
+ variants = [{'primarySiteFoldChange': 1}]
with pytest.raises(ValueError):
check_comparators(content, variants)
def test_missing_biopsy_expression_error(self):
- content = {"comparators": [{"analysisRole": "expression (disease)"}]}
- variants = [{"biopsySitePercentile": 1}]
+ content = {'comparators': [{'analysisRole': 'expression (disease)'}]}
+ variants = [{'biopsySitePercentile': 1}]
with pytest.raises(ValueError):
check_comparators(content, variants)
def test_expression_not_required_without_variants(self):
- content = {"comparators": []}
+ content = {'comparators': []}
variants = []
assert check_comparators(content, variants) is None
def test_missing_mutation_burden(self):
content = {
- "comparators": [{"analysisRole": "mutation burden (secondary)"}],
- "images": [{"key": "mutationBurden.density_snv.primary"}],
+ 'comparators': [{'analysisRole': 'mutation burden (secondary)'}],
+ 'images': [{'key': 'mutationBurden.density_snv.primary'}],
}
variants = []
@@ -477,8 +476,8 @@ def test_missing_mutation_burden(self):
check_comparators(content, variants)
-@pytest.mark.parametrize("example_name", ["no_variants", "sm_and_exp", "sm_only"])
+@pytest.mark.parametrize('example_name', ['no_variants', 'sm_and_exp', 'sm_only'])
def test_valid_json_inputs(example_name: str):
- with open(os.path.join(DATA_DIR, "json_examples", f"{example_name}.json"), "r") as fh:
+ with open(os.path.join(DATA_DIR, 'json_examples', f'{example_name}.json'), 'r') as fh:
content = json.load(fh)
validate_report_content(content)
diff --git a/tests/test_ipr/test_ipr.py b/tests/test_ipr/test_ipr.py
index 9994cf4..fa31176 100644
--- a/tests/test_ipr/test_ipr.py
+++ b/tests/test_ipr/test_ipr.py
@@ -15,193 +15,193 @@
)
from pori_python.types import Statement
-DISEASE_RIDS = ["#138:12", "#138:13"]
-APPROVED_EVIDENCE_RIDS = ["approved1", "approved2"]
+DISEASE_RIDS = ['#138:12', '#138:13']
+APPROVED_EVIDENCE_RIDS = ['approved1', 'approved2']
GERMLINE_VARIANTS = [
{
- "key": "1",
- "germline": True,
- "hgvsCds": "SLC28A3:c.1381C>T",
- "hgvsGenomic": "chr9:g.84286011G>A",
- "hgvsProtein": "SLC28A3:p.L461L",
- "ncbiBuild": "GRCh38",
- "normalAltCount": 37,
- "normalDepth": 37,
- "normalRefCount": 0,
- "proteinChange": "p.L461L",
- "rnaAltCount": "",
- "rnaDepth": "",
- "rnaRefCount": "",
- "startPosition": 84286011,
- "transcript": "ENST00000376238",
- "tumourAltCount": "",
- "tumourDepth": "",
- "tumourRefCount": "",
- "variant": "SLC28A3:p.L461L",
- "variantType": "mut",
- "zygosity": "",
+ 'key': '1',
+ 'germline': True,
+ 'hgvsCds': 'SLC28A3:c.1381C>T',
+ 'hgvsGenomic': 'chr9:g.84286011G>A',
+ 'hgvsProtein': 'SLC28A3:p.L461L',
+ 'ncbiBuild': 'GRCh38',
+ 'normalAltCount': 37,
+ 'normalDepth': 37,
+ 'normalRefCount': 0,
+ 'proteinChange': 'p.L461L',
+ 'rnaAltCount': '',
+ 'rnaDepth': '',
+ 'rnaRefCount': '',
+ 'startPosition': 84286011,
+ 'transcript': 'ENST00000376238',
+ 'tumourAltCount': '',
+ 'tumourDepth': '',
+ 'tumourRefCount': '',
+ 'variant': 'SLC28A3:p.L461L',
+ 'variantType': 'mut',
+ 'zygosity': '',
},
{
- "key": "2",
- "germline": True,
- "hgvsCds": "BRCA1:c.4837A>",
- "hgvsGenomic": "chr17:g.43071077T>C",
- "hgvsProtein": "BRCA1:p.S1613G",
- "normalAltCount": 33,
- "normalDepth": 33,
- "normalRefCount": 0,
- "tumourAltCount": 37,
- "tumourDepth": 37,
- "tumourRefCount": 0,
+ 'key': '2',
+ 'germline': True,
+ 'hgvsCds': 'BRCA1:c.4837A>',
+ 'hgvsGenomic': 'chr17:g.43071077T>C',
+ 'hgvsProtein': 'BRCA1:p.S1613G',
+ 'normalAltCount': 33,
+ 'normalDepth': 33,
+ 'normalRefCount': 0,
+ 'tumourAltCount': 37,
+ 'tumourDepth': 37,
+ 'tumourRefCount': 0,
},
]
SOMATIC_VARIANTS = [
{
- "key": "1",
- "gene": "SLC28A3",
- "germline": False,
- "hgvsCds": "SLC28A3:c.1381C>T",
- "hgvsGenomic": "chr9:g.84286011G>A",
- "hgvsProtein": "SLC28A3:p.L461L",
- "ncbiBuild": "GRCh38",
- "normalAltCount": 0,
- "normalDepth": 37,
- "normalRefCount": 37,
- "tumourAltCount": 37,
- "tumourDepth": 37,
- "tumourRefCount": 0,
- "variant": "SLC28A3:p.L461L",
- "variantType": "mut",
- "zygosity": "",
+ 'key': '1',
+ 'gene': 'SLC28A3',
+ 'germline': False,
+ 'hgvsCds': 'SLC28A3:c.1381C>T',
+ 'hgvsGenomic': 'chr9:g.84286011G>A',
+ 'hgvsProtein': 'SLC28A3:p.L461L',
+ 'ncbiBuild': 'GRCh38',
+ 'normalAltCount': 0,
+ 'normalDepth': 37,
+ 'normalRefCount': 37,
+ 'tumourAltCount': 37,
+ 'tumourDepth': 37,
+ 'tumourRefCount': 0,
+ 'variant': 'SLC28A3:p.L461L',
+ 'variantType': 'mut',
+ 'zygosity': '',
},
{
- "key": "2",
- "germline": False,
- "hgvsCds": "BRCA1:c.4837A>",
- "hgvsGenomic": "chr17:g.43071077T>C",
- "hgvsProtein": "BRCA1:p.S1613G",
- "normalAltCount": 1,
- "normalDepth": 33,
- "normalRefCount": 32,
- "tumourAltCount": 37,
- "tumourDepth": 37,
- "tumourRefCount": 0,
+ 'key': '2',
+ 'germline': False,
+ 'hgvsCds': 'BRCA1:c.4837A>',
+ 'hgvsGenomic': 'chr17:g.43071077T>C',
+ 'hgvsProtein': 'BRCA1:p.S1613G',
+ 'normalAltCount': 1,
+ 'normalDepth': 33,
+ 'normalRefCount': 32,
+ 'tumourAltCount': 37,
+ 'tumourDepth': 37,
+ 'tumourRefCount': 0,
},
]
GERMLINE_KB_MATCHES = [
{
- "variant": "1",
- "approvedTherapy": False,
- "category": "pharmacogenomic",
- "context": "anthracyclines",
- "kbContextId": "#122:20944",
- "kbRelevanceId": "#147:38",
- "kbStatementId": "#154:13387",
- "kbVariant": "SLC28A3:c.1381C>T",
- "kbVariantId": "#159:5426",
- "matchedCancer": False,
- "reference": "PMID: 27197003",
- "relevance": "decreased toxicity",
- "reviewStatus": "initial",
+ 'variant': '1',
+ 'approvedTherapy': False,
+ 'category': 'pharmacogenomic',
+ 'context': 'anthracyclines',
+ 'kbContextId': '#122:20944',
+ 'kbRelevanceId': '#147:38',
+ 'kbStatementId': '#154:13387',
+ 'kbVariant': 'SLC28A3:c.1381C>T',
+ 'kbVariantId': '#159:5426',
+ 'matchedCancer': False,
+ 'reference': 'PMID: 27197003',
+ 'relevance': 'decreased toxicity',
+ 'reviewStatus': 'initial',
},
{
- "variant": "2",
- "approvedTherapy": True,
- "category": "cancer predisposition",
- "kbContextId": "#135:8764",
- "kbRelevanceId": "#147:32",
- "kbStatementId": "#155:13511",
- "kbVariant": "BRCA1 mutation",
- "kbVariantId": "#161:938",
- "matchedCancer": False,
- "reference": "MOAlmanac FDA-56",
- "relevance": "therapy",
- "reviewStatus": None,
+ 'variant': '2',
+ 'approvedTherapy': True,
+ 'category': 'cancer predisposition',
+ 'kbContextId': '#135:8764',
+ 'kbRelevanceId': '#147:32',
+ 'kbStatementId': '#155:13511',
+ 'kbVariant': 'BRCA1 mutation',
+ 'kbVariantId': '#161:938',
+ 'matchedCancer': False,
+ 'reference': 'MOAlmanac FDA-56',
+ 'relevance': 'therapy',
+ 'reviewStatus': None,
},
]
SOMATIC_KB_MATCHES = [
{
- "variant": "1",
- "approvedTherapy": False,
- "category": "prognostic",
- "kbContextId": "somatic_test",
- "kbRelevanceId": "#147:38",
- "kbStatementId": "#154:13387",
- "kbVariant": "SLC28A3:c.1381C>T",
- "kbVariantId": "#159:5426",
- "relevance": "prognostic",
- "reviewStatus": "initial",
+ 'variant': '1',
+ 'approvedTherapy': False,
+ 'category': 'prognostic',
+ 'kbContextId': 'somatic_test',
+ 'kbRelevanceId': '#147:38',
+ 'kbStatementId': '#154:13387',
+ 'kbVariant': 'SLC28A3:c.1381C>T',
+ 'kbVariantId': '#159:5426',
+ 'relevance': 'prognostic',
+ 'reviewStatus': 'initial',
},
{
- "variant": "2",
- "approvedTherapy": True,
- "category": "therapy",
- "kbContextId": "#135:8764",
- "kbRelevanceId": "#147:32",
- "kbStatementId": "#155:13511",
- "kbVariant": "BRCA1 mutation",
- "kbVariantId": "#161:938",
- "matchedCancer": False,
- "reference": "MOAlmanac FDA-56",
- "relevance": "therapy",
- "reviewStatus": None,
+ 'variant': '2',
+ 'approvedTherapy': True,
+ 'category': 'therapy',
+ 'kbContextId': '#135:8764',
+ 'kbRelevanceId': '#147:32',
+ 'kbStatementId': '#155:13511',
+ 'kbVariant': 'BRCA1 mutation',
+ 'kbVariantId': '#161:938',
+ 'matchedCancer': False,
+ 'reference': 'MOAlmanac FDA-56',
+ 'relevance': 'therapy',
+ 'reviewStatus': None,
},
]
KB_MATCHES_STATEMENTS = [
{
- "@rid": SOMATIC_KB_MATCHES[0]["kbStatementId"],
- "conditions": [
+ '@rid': SOMATIC_KB_MATCHES[0]['kbStatementId'],
+ 'conditions': [
{
- "@class": "PositionalVariant",
- "@rid": SOMATIC_KB_MATCHES[0]["kbVariantId"],
+ '@class': 'PositionalVariant',
+ '@rid': SOMATIC_KB_MATCHES[0]['kbVariantId'],
},
- {"@class": "CategoryVariant", "@rid": SOMATIC_KB_MATCHES[1]["kbVariantId"]},
- {"@class": "Disease", "@rid": ""},
+ {'@class': 'CategoryVariant', '@rid': SOMATIC_KB_MATCHES[1]['kbVariantId']},
+ {'@class': 'Disease', '@rid': ''},
],
},
{
- "@rid": SOMATIC_KB_MATCHES[1]["kbStatementId"],
- "conditions": [
- {"@class": "CategoryVariant", "@rid": SOMATIC_KB_MATCHES[1]["kbVariantId"]},
- {"@class": "PositionalVariant", "@rid": "157:0", "type": "#999:99"},
+ '@rid': SOMATIC_KB_MATCHES[1]['kbStatementId'],
+ 'conditions': [
+ {'@class': 'CategoryVariant', '@rid': SOMATIC_KB_MATCHES[1]['kbVariantId']},
+ {'@class': 'PositionalVariant', '@rid': '157:0', 'type': '#999:99'},
],
},
]
-def base_graphkb_statement(disease_id: str = "disease", relevance_rid: str = "other") -> Statement:
+def base_graphkb_statement(disease_id: str = 'disease', relevance_rid: str = 'other') -> Statement:
statement = Statement( # type: ignore
{
- "conditions": [
+ 'conditions': [
{
- "@class": "Disease",
- "@rid": disease_id,
- "displayName": "disease_display_name",
+ '@class': 'Disease',
+ '@rid': disease_id,
+ 'displayName': 'disease_display_name',
},
{
- "@class": "CategoryVariant",
- "@rid": "variant_rid",
- "displayName": "KRAS increased expression",
+ '@class': 'CategoryVariant',
+ '@rid': 'variant_rid',
+ 'displayName': 'KRAS increased expression',
},
],
- "evidence": [{"displayName": "pmid12345", "sourceId": "nct12345"}],
- "subject": {
- "@class": "dummy_value",
- "@rid": "101:010",
- "displayName": "dummy_display_name",
+ 'evidence': [{'displayName': 'pmid12345', 'sourceId': 'nct12345'}],
+ 'subject': {
+ '@class': 'dummy_value',
+ '@rid': '101:010',
+ 'displayName': 'dummy_display_name',
},
- "source": None,
- "sourceId": None,
- "relevance": {
- "@rid": relevance_rid,
- "displayName": "relevance_display_name",
- "name": "relevance_name",
+ 'source': None,
+ 'sourceId': None,
+ 'relevance': {
+ '@rid': relevance_rid,
+ 'displayName': 'relevance_display_name',
+ 'name': 'relevance_name',
},
- "@rid": "statement_rid",
+ '@rid': 'statement_rid',
}
)
return statement
@@ -211,25 +211,25 @@ def base_graphkb_statement(disease_id: str = "disease", relevance_rid: str = "ot
def graphkb_conn():
# Mock for the 'query' method
query_mock = Mock()
- query_return_values = [[{"@rid": v} for v in APPROVED_EVIDENCE_RIDS]]
- query_index = {"value": -1} # Mutable index for closure
+ query_return_values = [[{'@rid': v} for v in APPROVED_EVIDENCE_RIDS]]
+ query_index = {'value': -1} # Mutable index for closure
def query_side_effect(*args, **kwargs):
if args:
# for TestGetKbDiseaseMatches
- return [{"@rid": "#123:45"}]
- query_index["value"] += 1
- idx = query_index["value"]
+ return [{'@rid': '#123:45'}]
+ query_index['value'] += 1
+ idx = query_index['value']
return query_return_values[idx] if idx < len(query_return_values) else []
query_mock.side_effect = query_side_effect
# Mock for the 'post' method
- post_mock = Mock(return_value={"result": KB_MATCHES_STATEMENTS})
+ post_mock = Mock(return_value={'result': KB_MATCHES_STATEMENTS})
# 'get_source' remains a plain function
def mock_get_source(source):
- return {"@rid": 0}
+ return {'@rid': 0}
# Create the connection mock with attributes
conn = Mock()
@@ -245,8 +245,8 @@ def mock_get_source(source):
@pytest.fixture(autouse=True)
def mock_get_term_tree(monkeypatch):
- mock_func = Mock(return_value=[{"@rid": d} for d in DISEASE_RIDS])
- monkeypatch.setattr(gkb_vocab, "get_term_tree", mock_func)
+ mock_func = Mock(return_value=[{'@rid': d} for d in DISEASE_RIDS])
+ monkeypatch.setattr(gkb_vocab, 'get_term_tree', mock_func)
yield mock_func
mock_func.reset_mock()
@@ -254,9 +254,9 @@ def mock_get_term_tree(monkeypatch):
@pytest.fixture(autouse=True)
def get_terms_set(monkeypatch):
def mock_func(*pos, **kwargs):
- return {"#999:99"}
+ return {'#999:99'}
- monkeypatch.setattr(gkb_vocab, "get_terms_set", mock_func)
+ monkeypatch.setattr(gkb_vocab, 'get_terms_set', mock_func)
@pytest.fixture(autouse=True)
@@ -264,22 +264,22 @@ def mock_categorize_relevance(monkeypatch):
def mock_func(_, relevance_id):
return relevance_id
- monkeypatch.setattr(gkb_statement, "categorize_relevance", mock_func)
+ monkeypatch.setattr(gkb_statement, 'categorize_relevance', mock_func)
class TestGetKbDiseaseMatches:
def test_get_kb_disease_matches_default_disease(self, graphkb_conn) -> None:
get_kb_disease_matches(graphkb_conn) # default to 'cancer'
assert graphkb_conn.post.called
- assert graphkb_conn.post.call_args_list[0].args[0] == "/subgraphs/Disease"
+ assert graphkb_conn.post.call_args_list[0].args[0] == '/subgraphs/Disease'
def test_get_kb_disease_matches_disease_with_subgraphs(self, graphkb_conn) -> None:
- get_kb_disease_matches(graphkb_conn, "Breast Cancer")
+ get_kb_disease_matches(graphkb_conn, 'Breast Cancer')
assert graphkb_conn.post.called
- assert graphkb_conn.post.call_args_list[0].args[0] == "/subgraphs/Disease"
+ assert graphkb_conn.post.call_args_list[0].args[0] == '/subgraphs/Disease'
def test_get_kb_disease_matches_get_term_tree(self, graphkb_conn) -> None:
- get_kb_disease_matches(graphkb_conn, "Breast Cancer", useSubgraphsRoute=False)
+ get_kb_disease_matches(graphkb_conn, 'Breast Cancer', useSubgraphsRoute=False)
assert not graphkb_conn.post.called
@@ -287,238 +287,238 @@ class TestConvertStatementsToAlterations:
def test_disease_match(self, graphkb_conn) -> None:
statement = base_graphkb_statement(DISEASE_RIDS[0])
result = convert_statements_to_alterations(
- graphkb_conn, [statement], DISEASE_RIDS, {"variant_rid"}
+ graphkb_conn, [statement], DISEASE_RIDS, {'variant_rid'}
)
assert len(result) == 1
row = result[0]
- assert row["kbVariantId"] == "variant_rid"
- assert row["kbStatementId"] == "statement_rid"
- assert row["matchedCancer"]
- assert row["kbVariant"] == "KRAS increased expression"
- assert row["relevance"] == "relevance_display_name"
+ assert row['kbVariantId'] == 'variant_rid'
+ assert row['kbStatementId'] == 'statement_rid'
+ assert row['matchedCancer']
+ assert row['kbVariant'] == 'KRAS increased expression'
+ assert row['relevance'] == 'relevance_display_name'
def test_no_disease_match(self, graphkb_conn) -> None:
- statement = base_graphkb_statement("other")
+ statement = base_graphkb_statement('other')
result = convert_statements_to_alterations(
- graphkb_conn, [statement], DISEASE_RIDS, {"variant_rid"}
+ graphkb_conn, [statement], DISEASE_RIDS, {'variant_rid'}
)
assert len(result) == 1
row = result[0]
- assert not row["matchedCancer"]
+ assert not row['matchedCancer']
def test_multiple_disease_not_match(self, graphkb_conn) -> None:
- statement = base_graphkb_statement("disease")
- statement["conditions"].append(
- {"@class": "Disease", "@rid": "other", "displayName": "disease_display_name"} # type: ignore
+ statement = base_graphkb_statement('disease')
+ statement['conditions'].append(
+ {'@class': 'Disease', '@rid': 'other', 'displayName': 'disease_display_name'} # type: ignore
)
result = convert_statements_to_alterations(
- graphkb_conn, [statement], DISEASE_RIDS, {"variant_rid"}
+ graphkb_conn, [statement], DISEASE_RIDS, {'variant_rid'}
)
assert len(result) == 1
row = result[0]
- assert not row["matchedCancer"]
+ assert not row['matchedCancer']
def test_biological(self, graphkb_conn) -> None:
statement = base_graphkb_statement()
- statement["relevance"]["@rid"] = "biological"
+ statement['relevance']['@rid'] = 'biological'
result = convert_statements_to_alterations(
- graphkb_conn, [statement], DISEASE_RIDS, {"variant_rid"}
+ graphkb_conn, [statement], DISEASE_RIDS, {'variant_rid'}
)
assert len(result) == 1
row = result[0]
- assert row["category"] == "biological"
+ assert row['category'] == 'biological'
def test_prognostic_no_disease_match(self, graphkb_conn) -> None:
statement = base_graphkb_statement()
- statement["relevance"]["@rid"] = "prognostic"
+ statement['relevance']['@rid'] = 'prognostic'
result = convert_statements_to_alterations(
- graphkb_conn, [statement], DISEASE_RIDS, {"variant_rid"}
+ graphkb_conn, [statement], DISEASE_RIDS, {'variant_rid'}
)
assert len(result) == 0
def test_prognostic_disease_match(self, graphkb_conn) -> None:
statement = base_graphkb_statement(DISEASE_RIDS[0])
- statement["relevance"]["@rid"] = "prognostic"
+ statement['relevance']['@rid'] = 'prognostic'
result = convert_statements_to_alterations(
- graphkb_conn, [statement], DISEASE_RIDS, {"variant_rid"}
+ graphkb_conn, [statement], DISEASE_RIDS, {'variant_rid'}
)
assert len(result) == 1
row = result[0]
- assert row["category"] == "prognostic"
+ assert row['category'] == 'prognostic'
def test_diagnostic(self, graphkb_conn) -> None:
statement = base_graphkb_statement()
- statement["relevance"]["@rid"] = "diagnostic"
+ statement['relevance']['@rid'] = 'diagnostic'
result = convert_statements_to_alterations(
- graphkb_conn, [statement], DISEASE_RIDS, {"variant_rid"}
+ graphkb_conn, [statement], DISEASE_RIDS, {'variant_rid'}
)
assert len(result) == 1
row = result[0]
- assert row["category"] == "diagnostic"
+ assert row['category'] == 'diagnostic'
def test_reference_from_displayname_for_noneligibility_stmts(self, graphkb_conn) -> None:
statement = base_graphkb_statement()
result = convert_statements_to_alterations(
- graphkb_conn, [statement], DISEASE_RIDS, {"variant_rid"}
+ graphkb_conn, [statement], DISEASE_RIDS, {'variant_rid'}
)
assert len(result) == 1
row = result[0]
- assert row["reference"] == "pmid12345"
+ assert row['reference'] == 'pmid12345'
def test_reference_from_sourceid_for_eligibility_stmts(self, graphkb_conn) -> None:
statement = base_graphkb_statement()
- statement["relevance"]["name"] = "eligibility"
+ statement['relevance']['name'] = 'eligibility'
result = convert_statements_to_alterations(
- graphkb_conn, [statement], DISEASE_RIDS, {"variant_rid"}
+ graphkb_conn, [statement], DISEASE_RIDS, {'variant_rid'}
)
assert len(result) == 1
row = result[0]
- assert row["reference"] == "nct12345"
+ assert row['reference'] == 'nct12345'
- @patch("pori_python.ipr.ipr.get_evidencelevel_mapping")
+ @patch('pori_python.ipr.ipr.get_evidencelevel_mapping')
def test_unapproved_therapeutic(self, mock_get_evidencelevel_mapping, graphkb_conn) -> None:
- mock_get_evidencelevel_mapping.return_value = {"other": "test"}
+ mock_get_evidencelevel_mapping.return_value = {'other': 'test'}
statement = base_graphkb_statement()
- statement["relevance"]["@rid"] = "therapeutic"
- statement["evidenceLevel"] = [{"@rid": "other", "displayName": "level"}] # type: ignore
+ statement['relevance']['@rid'] = 'therapeutic'
+ statement['evidenceLevel'] = [{'@rid': 'other', 'displayName': 'level'}] # type: ignore
result = convert_statements_to_alterations(
- graphkb_conn, [statement], DISEASE_RIDS, {"variant_rid"}
+ graphkb_conn, [statement], DISEASE_RIDS, {'variant_rid'}
)
assert len(result) == 1
row = result[0]
- assert row["category"] == "therapeutic"
+ assert row['category'] == 'therapeutic'
- @patch("pori_python.ipr.ipr.get_evidencelevel_mapping")
+ @patch('pori_python.ipr.ipr.get_evidencelevel_mapping')
def test_approved_therapeutic(self, mock_get_evidencelevel_mapping, graphkb_conn) -> None:
- mock_get_evidencelevel_mapping.return_value = {APPROVED_EVIDENCE_RIDS[0]: "test"}
+ mock_get_evidencelevel_mapping.return_value = {APPROVED_EVIDENCE_RIDS[0]: 'test'}
statement = base_graphkb_statement()
- statement["relevance"]["@rid"] = "therapeutic"
- statement["evidenceLevel"] = [{"@rid": APPROVED_EVIDENCE_RIDS[0], "displayName": "level"}] # type: ignore
+ statement['relevance']['@rid'] = 'therapeutic'
+ statement['evidenceLevel'] = [{'@rid': APPROVED_EVIDENCE_RIDS[0], 'displayName': 'level'}] # type: ignore
result = convert_statements_to_alterations(
- graphkb_conn, [statement], DISEASE_RIDS, {"variant_rid"}
+ graphkb_conn, [statement], DISEASE_RIDS, {'variant_rid'}
)
assert len(result) == 1
row = result[0]
- assert row["category"] == "therapeutic"
+ assert row['category'] == 'therapeutic'
class TestKbmatchFilters:
def test_germline_kb_matches(self):
assert len(germline_kb_matches(GERMLINE_KB_MATCHES, GERMLINE_VARIANTS)) == len(
GERMLINE_KB_MATCHES
- ), "Germline variant not matched to germline KB statement."
- assert not germline_kb_matches(
- GERMLINE_KB_MATCHES, SOMATIC_VARIANTS
- ), "Somatic variant matched to KB germline statement."
+ ), 'Germline variant not matched to germline KB statement.'
+ assert not germline_kb_matches(GERMLINE_KB_MATCHES, SOMATIC_VARIANTS), (
+ 'Somatic variant matched to KB germline statement.'
+ )
assert len(germline_kb_matches(SOMATIC_KB_MATCHES, SOMATIC_VARIANTS)) == len(
SOMATIC_KB_MATCHES
- ), "Somatic variant not matched to somatic KB statement."
- assert not germline_kb_matches(
- SOMATIC_KB_MATCHES, GERMLINE_VARIANTS
- ), "Germline variant matched to KB somatic statement."
+ ), 'Somatic variant not matched to somatic KB statement.'
+ assert not germline_kb_matches(SOMATIC_KB_MATCHES, GERMLINE_VARIANTS), (
+ 'Germline variant matched to KB somatic statement.'
+ )
GKB_MATCHES = [
{
- "variant": "1",
- "approvedTherapy": False,
- "category": "prognostic",
- "kbContextId": "somatic_test",
- "kbRelevanceId": "#147:38",
- "kbStatementId": "#154:13387",
- "requiredKbMatches": ["#159:5426"],
- "kbVariant": "SLC28A3:c.1381C>T",
- "kbVariantId": "#159:5426",
- "relevance": "prognostic",
- "variantType": "mut",
- "reviewStatus": "initial",
+ 'variant': '1',
+ 'approvedTherapy': False,
+ 'category': 'prognostic',
+ 'kbContextId': 'somatic_test',
+ 'kbRelevanceId': '#147:38',
+ 'kbStatementId': '#154:13387',
+ 'requiredKbMatches': ['#159:5426'],
+ 'kbVariant': 'SLC28A3:c.1381C>T',
+ 'kbVariantId': '#159:5426',
+ 'relevance': 'prognostic',
+ 'variantType': 'mut',
+ 'reviewStatus': 'initial',
},
{
- "variant": "2",
- "approvedTherapy": True,
- "category": "therapy",
- "kbContextId": "#135:8764",
- "kbRelevanceId": "#147:32",
- "kbStatementId": "#155:13511",
- "requiredKbMatches": ["#161:938"],
- "kbVariant": "BRCA1 mutation",
- "kbVariantId": "#161:938",
- "matchedCancer": False,
- "reference": "MOAlmanac FDA-56",
- "relevance": "therapy",
- "variantType": "mut",
- "reviewStatus": None,
+ 'variant': '2',
+ 'approvedTherapy': True,
+ 'category': 'therapy',
+ 'kbContextId': '#135:8764',
+ 'kbRelevanceId': '#147:32',
+ 'kbStatementId': '#155:13511',
+ 'requiredKbMatches': ['#161:938'],
+ 'kbVariant': 'BRCA1 mutation',
+ 'kbVariantId': '#161:938',
+ 'matchedCancer': False,
+ 'reference': 'MOAlmanac FDA-56',
+ 'relevance': 'therapy',
+ 'variantType': 'mut',
+ 'reviewStatus': None,
},
{
- "variant": "3",
- "approvedTherapy": True,
- "category": "therapy",
- "kbContextId": "#135:8764",
- "kbRelevanceId": "#147:32",
- "kbStatementId": "#155:13511",
- "requiredKbMatches": ["#161:938"],
- "kbVariant": "BRCA1 mutation",
- "kbVariantId": "#161:938",
- "matchedCancer": False,
- "reference": "MOAlmanac FDA-56",
- "relevance": "therapy",
- "variantType": "mut",
- "reviewStatus": None,
+ 'variant': '3',
+ 'approvedTherapy': True,
+ 'category': 'therapy',
+ 'kbContextId': '#135:8764',
+ 'kbRelevanceId': '#147:32',
+ 'kbStatementId': '#155:13511',
+ 'requiredKbMatches': ['#161:938'],
+ 'kbVariant': 'BRCA1 mutation',
+ 'kbVariantId': '#161:938',
+ 'matchedCancer': False,
+ 'reference': 'MOAlmanac FDA-56',
+ 'relevance': 'therapy',
+ 'variantType': 'mut',
+ 'reviewStatus': None,
},
{
- "variant": "4",
- "approvedTherapy": True,
- "category": "therapy",
- "kbContextId": "#135:8764",
- "kbRelevanceId": "#147:32",
- "kbStatementId": "#155:13511",
- "requiredKbMatches": ["#159:54261", "#161:9381"],
- "kbVariant": "BRCA1 mutation",
- "kbVariantId": "#161:9381",
- "matchedCancer": False,
- "reference": "MOAlmanac FDA-56",
- "relevance": "therapy",
- "variantType": "mut",
- "reviewStatus": None,
+ 'variant': '4',
+ 'approvedTherapy': True,
+ 'category': 'therapy',
+ 'kbContextId': '#135:8764',
+ 'kbRelevanceId': '#147:32',
+ 'kbStatementId': '#155:13511',
+ 'requiredKbMatches': ['#159:54261', '#161:9381'],
+ 'kbVariant': 'BRCA1 mutation',
+ 'kbVariantId': '#161:9381',
+ 'matchedCancer': False,
+ 'reference': 'MOAlmanac FDA-56',
+ 'relevance': 'therapy',
+ 'variantType': 'mut',
+ 'reviewStatus': None,
},
]
ALL_VARIANTS = [
- {"variant": "var1", "key": '1', "variantType": 'mut'},
- {"variant": "var2", "key": '2', "variantType": 'mut'},
- {"variant": "var3", "key": '3', "variantType": 'mut'},
- {"variant": "var4", "key": '4', "variantType": 'mut'},
+ {'variant': 'var1', 'key': '1', 'variantType': 'mut'},
+ {'variant': 'var2', 'key': '2', 'variantType': 'mut'},
+ {'variant': 'var3', 'key': '3', 'variantType': 'mut'},
+ {'variant': 'var4', 'key': '4', 'variantType': 'mut'},
]
BASIC_GKB_MATCH = {
- "approvedTherapy": False,
- "category": "test",
- "context": "test",
- "kbContextId": "#124:24761",
- "disease": "test",
- "evidenceLevel": "test",
- "iprEvidenceLevel": "test",
- "matchedCancer": False,
- "reference": "test",
- "relevance": "test",
- "kbRelevanceId": "#148:31",
- "externalSource": "",
- "externalStatementId": "",
- "reviewStatus": "passed",
- "kbData": {},
+ 'approvedTherapy': False,
+ 'category': 'test',
+ 'context': 'test',
+ 'kbContextId': '#124:24761',
+ 'disease': 'test',
+ 'evidenceLevel': 'test',
+ 'iprEvidenceLevel': 'test',
+ 'matchedCancer': False,
+ 'reference': 'test',
+ 'relevance': 'test',
+ 'kbRelevanceId': '#148:31',
+ 'externalSource': '',
+ 'externalStatementId': '',
+ 'reviewStatus': 'passed',
+ 'kbData': {},
}
@@ -533,13 +533,13 @@ def create_gkb_matches(input_fields):
def get_condition_set_string_rep(condition_set):
for item in condition_set:
- item["observedKeysStrs"] = [
- "-".join([elem["kbVariantId"], elem["observedVariantKey"]])
- for elem in item["matchedConditions"]
+ item['observedKeysStrs'] = [
+ '-'.join([elem['kbVariantId'], elem['observedVariantKey']])
+ for elem in item['matchedConditions']
]
- item["observedKeysStrs"].sort()
- item["observedKeysStr"] = ",".join(item["observedKeysStrs"])
- condition_set = [f"{item['kbStatementId']},{item['observedKeysStr']}" for item in condition_set]
+ item['observedKeysStrs'].sort()
+ item['observedKeysStr'] = ','.join(item['observedKeysStrs'])
+ condition_set = [f'{item["kbStatementId"]},{item["observedKeysStr"]}' for item in condition_set]
condition_set.sort()
return condition_set
@@ -547,132 +547,132 @@ def get_condition_set_string_rep(condition_set):
class TestKbMatchSectionPrep:
def test_matched_variant_pairs_extracted_only_once_for_multiple_statements(self):
input_fields = [
- {"variant": "A", "kbVariantId": "test1", "kbStatementId": "test1"},
+ {'variant': 'A', 'kbVariantId': 'test1', 'kbStatementId': 'test1'},
{
- "variant": "A",
- "kbVariantId": "test1",
- "kbStatementId": "test2",
+ 'variant': 'A',
+ 'kbVariantId': 'test1',
+ 'kbStatementId': 'test2',
}, # diff statement
]
for item in input_fields: # we don't care about these for this test
- item["variantType"] = "test"
- item["kbVariant"] = "test"
- item["requiredKbMatches"] = ["test1", "test2"]
+ item['variantType'] = 'test'
+ item['kbVariant'] = 'test'
+ item['requiredKbMatches'] = ['test1', 'test2']
gkb_matches = create_gkb_matches(input_fields)
kb_variants = get_kb_variants(gkb_matches)
- found_variants = [f"{item['variant']},{item['kbVariantId']}" for item in kb_variants]
+ found_variants = [f'{item["variant"]},{item["kbVariantId"]}' for item in kb_variants]
found_variants.sort()
- assert found_variants == ["A,test1"]
+ assert found_variants == ['A,test1']
def test_all_distinct_observed_and_matched_variant_pairs_extracted(self):
input_fields = [
- {"variant": "A", "kbVariantId": "test1", "kbStatementId": "test1"},
- {"variant": "A", "kbVariantId": "test2", "kbStatementId": "test1"},
- {"variant": "B", "kbVariantId": "test1", "kbStatementId": "test1"},
- {"variant": "B", "kbVariantId": "test1", "kbStatementId": "test1"},
+ {'variant': 'A', 'kbVariantId': 'test1', 'kbStatementId': 'test1'},
+ {'variant': 'A', 'kbVariantId': 'test2', 'kbStatementId': 'test1'},
+ {'variant': 'B', 'kbVariantId': 'test1', 'kbStatementId': 'test1'},
+ {'variant': 'B', 'kbVariantId': 'test1', 'kbStatementId': 'test1'},
]
for item in input_fields: # we don't care about these for this test
- item["variantType"] = "test"
- item["kbVariant"] = "test"
- item["requiredKbMatches"] = ["test1", "test2"]
+ item['variantType'] = 'test'
+ item['kbVariant'] = 'test'
+ item['requiredKbMatches'] = ['test1', 'test2']
gkb_matches = create_gkb_matches(input_fields)
kb_variants = get_kb_variants(gkb_matches)
- found_variants = [f"{item['variant']},{item['kbVariantId']}" for item in kb_variants]
+ found_variants = [f'{item["variant"]},{item["kbVariantId"]}' for item in kb_variants]
found_variants.sort()
- assert found_variants == ["A,test1", "A,test2", "B,test1"]
+ assert found_variants == ['A,test1', 'A,test2', 'B,test1']
def test_statements_extracted_only_once(self):
input_fields = [
- {"variant": "A", "kbVariantId": "test1", "kbStatementId": "X"},
- {"variant": "A", "kbVariantId": "test2", "kbStatementId": "X"},
- {"variant": "B", "kbVariantId": "test1", "kbStatementId": "X"},
- {"variant": "B", "kbVariantId": "test2", "kbStatementId": "X"},
- {"variant": "C", "kbVariantId": "test1", "kbStatementId": "Y"},
+ {'variant': 'A', 'kbVariantId': 'test1', 'kbStatementId': 'X'},
+ {'variant': 'A', 'kbVariantId': 'test2', 'kbStatementId': 'X'},
+ {'variant': 'B', 'kbVariantId': 'test1', 'kbStatementId': 'X'},
+ {'variant': 'B', 'kbVariantId': 'test2', 'kbStatementId': 'X'},
+ {'variant': 'C', 'kbVariantId': 'test1', 'kbStatementId': 'Y'},
]
for item in input_fields: # we don't care about these for this test
- item["variantType"] = "test"
- item["kbVariant"] = "test"
- item["requiredKbMatches"] = ["test1", "test2"]
+ item['variantType'] = 'test'
+ item['kbVariant'] = 'test'
+ item['requiredKbMatches'] = ['test1', 'test2']
gkb_matches = create_gkb_matches(input_fields)
kb_stmts = get_kb_matched_statements(gkb_matches)
- kb_stmts = [item["kbStatementId"] for item in kb_stmts]
+ kb_stmts = [item['kbStatementId'] for item in kb_stmts]
kb_stmts.sort()
- assert kb_stmts == ["X", "Y"]
+ assert kb_stmts == ['X', 'Y']
def test_singlevar_statements_with_multiple_satisfying_condition_sets(self):
input_fields = [
- {"variant": "A", "kbVariantId": "test1", "kbStatementId": "X"},
- {"variant": "B", "kbVariantId": "test1", "kbStatementId": "X"},
- {"variant": "C", "kbVariantId": "test1", "kbStatementId": "X"},
+ {'variant': 'A', 'kbVariantId': 'test1', 'kbStatementId': 'X'},
+ {'variant': 'B', 'kbVariantId': 'test1', 'kbStatementId': 'X'},
+ {'variant': 'C', 'kbVariantId': 'test1', 'kbStatementId': 'X'},
]
for item in input_fields: # we don't care about these for this test
- item["variantType"] = "test"
- item["kbVariant"] = "test"
- item["requiredKbMatches"] = ["test1"]
+ item['variantType'] = 'test'
+ item['kbVariant'] = 'test'
+ item['requiredKbMatches'] = ['test1']
gkb_matches = create_gkb_matches(input_fields)
kbcs = get_kb_statement_matched_conditions(gkb_matches)
kbcs_string_rep = get_condition_set_string_rep(kbcs)
- assert kbcs_string_rep == ["X,test1-A", "X,test1-B", "X,test1-C"]
+ assert kbcs_string_rep == ['X,test1-A', 'X,test1-B', 'X,test1-C']
def test_multivar_statements_with_multiple_satisfying_condition_sets(self):
input_fields = [
- {"variant": "A", "kbVariantId": "test1", "kbStatementId": "X"},
- {"variant": "B", "kbVariantId": "test2", "kbStatementId": "X"},
- {"variant": "C", "kbVariantId": "test2", "kbStatementId": "X"},
+ {'variant': 'A', 'kbVariantId': 'test1', 'kbStatementId': 'X'},
+ {'variant': 'B', 'kbVariantId': 'test2', 'kbStatementId': 'X'},
+ {'variant': 'C', 'kbVariantId': 'test2', 'kbStatementId': 'X'},
]
for item in input_fields: # we don't care about these for this test
- item["variantType"] = "test"
- item["kbVariant"] = "test"
- item["requiredKbMatches"] = ["test1", "test2"]
+ item['variantType'] = 'test'
+ item['kbVariant'] = 'test'
+ item['requiredKbMatches'] = ['test1', 'test2']
gkb_matches = create_gkb_matches(input_fields)
kbcs = get_kb_statement_matched_conditions(gkb_matches)
kbcs_string_rep = get_condition_set_string_rep(kbcs)
- assert kbcs_string_rep == ["X,test1-A,test2-B", "X,test1-A,test2-C"]
+ assert kbcs_string_rep == ['X,test1-A,test2-B', 'X,test1-A,test2-C']
def test_do_not_infer_possible_matches(self):
"""edge case - when infer_possible_matches is false, do not allow var/kbvar
pairs to satisfy conditions for statements they are not explicitly linked
to in the input"""
input_fields = [
- {"variant": "A", "kbVariantId": "test1", "kbStatementId": "X"},
- {"variant": "B", "kbVariantId": "test1", "kbStatementId": "Y"},
+ {'variant': 'A', 'kbVariantId': 'test1', 'kbStatementId': 'X'},
+ {'variant': 'B', 'kbVariantId': 'test1', 'kbStatementId': 'Y'},
]
for item in input_fields: # we don't care about these for this test
- item["variantType"] = "test"
- item["kbVariant"] = "test"
- item["requiredKbMatches"] = ["test1"]
+ item['variantType'] = 'test'
+ item['kbVariant'] = 'test'
+ item['requiredKbMatches'] = ['test1']
gkb_matches = create_gkb_matches(input_fields)
kbcs = get_kb_statement_matched_conditions(gkb_matches)
kbcs_string_rep = get_condition_set_string_rep(kbcs)
- assert kbcs_string_rep == ["X,test1-A", "Y,test1-B"]
+ assert kbcs_string_rep == ['X,test1-A', 'Y,test1-B']
def test_no_dupes_when_requiredKbMatches_not_sorted(self):
input_fields = [
{
- "variant": "A",
- "kbVariantId": "test1",
- "requiredKbMatches": ["test1", "test2"],
+ 'variant': 'A',
+ 'kbVariantId': 'test1',
+ 'requiredKbMatches': ['test1', 'test2'],
},
{
- "variant": "B",
- "kbVariantId": "test2",
- "requiredKbMatches": ["test1", "test2"],
+ 'variant': 'B',
+ 'kbVariantId': 'test2',
+ 'requiredKbMatches': ['test1', 'test2'],
},
{
- "variant": "A",
- "kbVariantId": "test1",
- "requiredKbMatches": ["test2", "test1"],
+ 'variant': 'A',
+ 'kbVariantId': 'test1',
+ 'requiredKbMatches': ['test2', 'test1'],
},
{
- "variant": "B",
- "kbVariantId": "test2",
- "requiredKbMatches": ["test2", "test1"],
+ 'variant': 'B',
+ 'kbVariantId': 'test2',
+ 'requiredKbMatches': ['test2', 'test1'],
},
]
for item in input_fields: # we don't care about these for this test
- item["variantType"] = "test"
- item["kbVariant"] = "test"
- item["kbStatementId"] = "X"
+ item['variantType'] = 'test'
+ item['kbVariant'] = 'test'
+ item['kbStatementId'] = 'X'
gkb_matches = create_gkb_matches(input_fields)
stmts = get_kb_matched_statements(gkb_matches)
kbcs = get_kb_statement_matched_conditions(gkb_matches)
@@ -684,27 +684,27 @@ def test_partial_matches_omitted(self):
are omitted when allow_partial_matches=False"""
input_fields = [
{
- "variant": "A",
- "kbVariantId": "test1",
- "kbStatementId": "X",
- "requiredKbMatches": ["test1", "test2"],
+ 'variant': 'A',
+ 'kbVariantId': 'test1',
+ 'kbStatementId': 'X',
+ 'requiredKbMatches': ['test1', 'test2'],
},
{
- "variant": "B",
- "kbVariantId": "test2",
- "kbStatementId": "X",
- "requiredKbMatches": ["test1", "test2"],
+ 'variant': 'B',
+ 'kbVariantId': 'test2',
+ 'kbStatementId': 'X',
+ 'requiredKbMatches': ['test1', 'test2'],
},
{
- "variant": "A",
- "kbVariantId": "test1",
- "kbStatementId": "Y",
- "requiredKbMatches": ["test1", "test3"],
+ 'variant': 'A',
+ 'kbVariantId': 'test1',
+ 'kbStatementId': 'Y',
+ 'requiredKbMatches': ['test1', 'test3'],
},
]
for item in input_fields: # we don't care about these for this test
- item["variantType"] = "test"
- item["kbVariant"] = "test"
+ item['variantType'] = 'test'
+ item['kbVariant'] = 'test'
gkb_matches = create_gkb_matches(input_fields)
sections = get_kb_matches_sections(gkb_matches, allow_partial_matches=False)
@@ -713,7 +713,7 @@ def test_partial_matches_omitted(self):
kbcs = sections['kbStatementMatchedConditions']
assert len(stmts) == 2
assert len(kbcs) == 1 # X only
- assert kbcs[0]["kbStatementId"] == "X"
+ assert kbcs[0]['kbStatementId'] == 'X'
def test_partial_matches_omitted_even_when_var_used_elsewhere(self):
"""edge case -
@@ -726,39 +726,39 @@ def test_partial_matches_omitted_even_when_var_used_elsewhere(self):
satisfied for statement Z"""
input_fields = [
{
- "variant": "A",
- "kbVariantId": "test1",
- "kbStatementId": "X",
- "requiredKbMatches": ["test1", "test2"],
+ 'variant': 'A',
+ 'kbVariantId': 'test1',
+ 'kbStatementId': 'X',
+ 'requiredKbMatches': ['test1', 'test2'],
},
{
- "variant": "B",
- "kbVariantId": "test2",
- "kbStatementId": "X",
- "requiredKbMatches": ["test1", "test2"],
+ 'variant': 'B',
+ 'kbVariantId': 'test2',
+ 'kbStatementId': 'X',
+ 'requiredKbMatches': ['test1', 'test2'],
},
{
- "variant": "A",
- "kbVariantId": "test1",
- "kbStatementId": "Y",
- "requiredKbMatches": ["test1", "test3"],
+ 'variant': 'A',
+ 'kbVariantId': 'test1',
+ 'kbStatementId': 'Y',
+ 'requiredKbMatches': ['test1', 'test3'],
},
{
- "variant": "C",
- "kbVariantId": "test3",
- "kbStatementId": "Z",
- "requiredKbMatches": ["test3"],
+ 'variant': 'C',
+ 'kbVariantId': 'test3',
+ 'kbStatementId': 'Z',
+ 'requiredKbMatches': ['test3'],
},
]
for item in input_fields: # we don't care about these for this test
- item["variantType"] = "test"
- item["kbVariant"] = "test"
+ item['variantType'] = 'test'
+ item['kbVariant'] = 'test'
gkb_matches = create_gkb_matches(input_fields)
stmts = get_kb_matched_statements(gkb_matches)
kbcs = get_kb_statement_matched_conditions(gkb_matches)
assert len(stmts) == 3
assert len(kbcs) == 2 # X and Z but not Y
- assert "Y" not in [item["kbStatementId"] for item in kbcs]
+ assert 'Y' not in [item['kbStatementId'] for item in kbcs]
def test_kbvariants_removed_from_set_when_not_part_of_full_conditionset_match(self):
"""When there is a variant that fulfills one part of a statement's condition set,
@@ -767,33 +767,33 @@ def test_kbvariants_removed_from_set_when_not_part_of_full_conditionset_match(se
"""
input_fields = [
{
- "variant": "A",
- "kbVariantId": "test1",
- "kbStatementId": "X",
- "requiredKbMatches": ["test1", "test2", "test3"],
+ 'variant': 'A',
+ 'kbVariantId': 'test1',
+ 'kbStatementId': 'X',
+ 'requiredKbMatches': ['test1', 'test2', 'test3'],
},
{
- "variant": "B",
- "kbVariantId": "test2",
- "kbStatementId": "X",
- "requiredKbMatches": ["test1", "test2", "test3"],
+ 'variant': 'B',
+ 'kbVariantId': 'test2',
+ 'kbStatementId': 'X',
+ 'requiredKbMatches': ['test1', 'test2', 'test3'],
},
{
- "variant": "A",
- "kbVariantId": "test1",
- "kbStatementId": "Y",
- "requiredKbMatches": ["test4", "test1"],
+ 'variant': 'A',
+ 'kbVariantId': 'test1',
+ 'kbStatementId': 'Y',
+ 'requiredKbMatches': ['test4', 'test1'],
},
{
- "variant": "D",
- "kbVariantId": "test4",
- "kbStatementId": "Y",
- "requiredKbMatches": ["test4", "test1"],
+ 'variant': 'D',
+ 'kbVariantId': 'test4',
+ 'kbStatementId': 'Y',
+ 'requiredKbMatches': ['test4', 'test1'],
},
]
for item in input_fields: # we don't care about these for this test
- item["variantType"] = "test"
- item["kbVariant"] = "test"
+ item['variantType'] = 'test'
+ item['kbVariant'] = 'test'
gkb_matches = create_gkb_matches(input_fields)
sections1 = get_kb_matches_sections(gkb_matches, allow_partial_matches=False)
kbcs1 = sections1['kbStatementMatchedConditions']
@@ -812,27 +812,27 @@ def test_partial_matches_included(self):
are included when allow_partial_matches=True"""
input_fields = [
{
- "variant": "A",
- "kbVariantId": "test1",
- "kbStatementId": "X",
- "requiredKbMatches": ["test1", "test2"],
+ 'variant': 'A',
+ 'kbVariantId': 'test1',
+ 'kbStatementId': 'X',
+ 'requiredKbMatches': ['test1', 'test2'],
},
{
- "variant": "B",
- "kbVariantId": "test2",
- "kbStatementId": "X",
- "requiredKbMatches": ["test1", "test2"],
+ 'variant': 'B',
+ 'kbVariantId': 'test2',
+ 'kbStatementId': 'X',
+ 'requiredKbMatches': ['test1', 'test2'],
},
{
- "variant": "A",
- "kbVariantId": "test1",
- "kbStatementId": "Y",
- "requiredKbMatches": ["test1", "test3"],
+ 'variant': 'A',
+ 'kbVariantId': 'test1',
+ 'kbStatementId': 'Y',
+ 'requiredKbMatches': ['test1', 'test3'],
},
]
for item in input_fields: # we don't care about these for this test
- item["variantType"] = "test"
- item["kbVariant"] = "test"
+ item['variantType'] = 'test'
+ item['kbVariant'] = 'test'
gkb_matches = create_gkb_matches(input_fields)
stmts = get_kb_matched_statements(gkb_matches)
kbcs = get_kb_statement_matched_conditions(gkb_matches, allow_partial_matches=True)
diff --git a/tests/test_ipr/test_main.py b/tests/test_ipr/test_main.py
index 151b049..8fe585c 100644
--- a/tests/test_ipr/test_main.py
+++ b/tests/test_ipr/test_main.py
@@ -12,69 +12,69 @@
from .constants import EXCLUDE_INTEGRATION_TESTS
-EXCLUDE_BCGSC_TESTS = os.environ.get("EXCLUDE_BCGSC_TESTS") == "1"
-EXCLUDE_ONCOKB_TESTS = os.environ.get("EXCLUDE_ONCOKB_TESTS") == "1"
+EXCLUDE_BCGSC_TESTS = os.environ.get('EXCLUDE_BCGSC_TESTS') == '1'
+EXCLUDE_ONCOKB_TESTS = os.environ.get('EXCLUDE_ONCOKB_TESTS') == '1'
def get_test_spec():
- ipr_spec = {"components": {"schemas": {"genesCreate": {"properties": {}}}}}
+ ipr_spec = {'components': {'schemas': {'genesCreate': {'properties': {}}}}}
ipr_gene_keys = IprGene.__required_keys__ | IprGene.__optional_keys__
for key in ipr_gene_keys:
- ipr_spec["components"]["schemas"]["genesCreate"]["properties"][key] = ""
+ ipr_spec['components']['schemas']['genesCreate']['properties'][key] = ''
return ipr_spec
def get_test_file(name: str) -> str:
- return os.path.join(os.path.dirname(__file__), "test_data", name)
+ return os.path.join(os.path.dirname(__file__), 'test_data', name)
-@pytest.fixture(scope="module")
+@pytest.fixture(scope='module')
def report_upload_content(tmp_path_factory) -> Dict:
mock = MagicMock()
- json_file = tmp_path_factory.mktemp("inputs") / "content.json"
+ json_file = tmp_path_factory.mktemp('inputs') / 'content.json'
json_file.write_text(
json.dumps(
{
- "blargh": "some fake content",
- "comparators": [
- {"analysisRole": "expression (disease)", "name": "1"},
- {"analysisRole": "expression (primary site)", "name": "2"},
- {"analysisRole": "expression (biopsy site)", "name": "3"},
+ 'blargh': 'some fake content',
+ 'comparators': [
+ {'analysisRole': 'expression (disease)', 'name': '1'},
+ {'analysisRole': 'expression (primary site)', 'name': '2'},
+ {'analysisRole': 'expression (biopsy site)', 'name': '3'},
{
- "analysisRole": "expression (internal pancancer cohort)",
- "name": "4",
+ 'analysisRole': 'expression (internal pancancer cohort)',
+ 'name': '4',
},
],
- "patientId": "PATIENT001",
- "project": "TEST",
- "expressionVariants": json.loads(
- pd.read_csv(get_test_file("expression.short.tab"), sep="\t").to_json(
- orient="records"
+ 'patientId': 'PATIENT001',
+ 'project': 'TEST',
+ 'expressionVariants': json.loads(
+ pd.read_csv(get_test_file('expression.short.tab'), sep='\t').to_json(
+ orient='records'
)
),
- "smallMutations": json.loads(
- pd.read_csv(get_test_file("small_mutations.short.tab"), sep="\t").to_json(
- orient="records"
+ 'smallMutations': json.loads(
+ pd.read_csv(get_test_file('small_mutations.short.tab'), sep='\t').to_json(
+ orient='records'
)
),
- "copyVariants": json.loads(
- pd.read_csv(get_test_file("copy_variants.short.tab"), sep="\t").to_json(
- orient="records"
+ 'copyVariants': json.loads(
+ pd.read_csv(get_test_file('copy_variants.short.tab'), sep='\t').to_json(
+ orient='records'
)
),
- "structuralVariants": json.loads(
- pd.read_csv(get_test_file("fusions.tab"), sep="\t").to_json(orient="records")
+ 'structuralVariants': json.loads(
+ pd.read_csv(get_test_file('fusions.tab'), sep='\t').to_json(orient='records')
),
- "kbDiseaseMatch": "colorectal cancer",
- "msi": [
+ 'kbDiseaseMatch': 'colorectal cancer',
+ 'msi': [
{
- "score": 1000.0,
- "kbCategory": "microsatellite instability",
+ 'score': 1000.0,
+ 'kbCategory': 'microsatellite instability',
}
],
- "hrd": {
- "score": 9999.0,
- "kbCategory": "homologous recombination deficiency strong signature",
+ 'hrd': {
+ 'score': 9999.0,
+ 'kbCategory': 'homologous recombination deficiency strong signature',
},
},
allow_nan=False,
@@ -82,38 +82,38 @@ def report_upload_content(tmp_path_factory) -> Dict:
)
def side_effect_function(*args, **kwargs):
- if "templates" in args[0]:
- return [{"name": "genomic", "ident": "001"}]
- elif args[0] == "project":
- return [{"name": "TEST", "ident": "001"}]
+ if 'templates' in args[0]:
+ return [{'name': 'genomic', 'ident': '001'}]
+ elif args[0] == 'project':
+ return [{'name': 'TEST', 'ident': '001'}]
else:
return []
with patch.object(
sys,
- "argv",
+ 'argv',
[
- "ipr",
- "--username",
- os.environ.get("IPR_USER", os.environ["USER"]),
- "--password",
- os.environ["IPR_PASS"],
- "--ipr_url",
- "http://fake.url.ca",
- "--graphkb_username",
- os.environ.get("GRAPHKB_USER", os.environ["USER"]),
- "--graphkb_password",
- os.environ.get("GRAPHKB_PASS", os.environ["IPR_PASS"]),
- "--graphkb_url",
- os.environ.get("GRAPHKB_URL", False),
- "--content",
+ 'ipr',
+ '--username',
+ os.environ.get('IPR_USER', os.environ['USER']),
+ '--password',
+ os.environ['IPR_PASS'],
+ '--ipr_url',
+ 'http://fake.url.ca',
+ '--graphkb_username',
+ os.environ.get('GRAPHKB_USER', os.environ['USER']),
+ '--graphkb_password',
+ os.environ.get('GRAPHKB_PASS', os.environ['IPR_PASS']),
+ '--graphkb_url',
+ os.environ.get('GRAPHKB_URL', False),
+ '--content',
str(json_file),
- "--therapeutics",
+ '--therapeutics',
],
):
- with patch.object(IprConnection, "upload_report", new=mock):
- with patch.object(IprConnection, "get_spec", return_value=get_test_spec()):
- with patch.object(IprConnection, "get", side_effect=side_effect_function):
+ with patch.object(IprConnection, 'upload_report', new=mock):
+ with patch.object(IprConnection, 'get_spec', return_value=get_test_spec()):
+ with patch.object(IprConnection, 'get', side_effect=side_effect_function):
command_interface()
assert mock.called
@@ -122,57 +122,57 @@ def side_effect_function(*args, **kwargs):
return report_content
-@pytest.mark.skip(reason="KBDEV-1308; taking too long, getting canceled after reaching max delay")
-@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests")
+@pytest.mark.skip(reason='KBDEV-1308; taking too long, getting canceled after reaching max delay')
+@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests')
class TestCreateReport:
def test_main_sections_present(self, report_upload_content: Dict) -> None:
sections = set(report_upload_content.keys())
for section in [
- "structuralVariants",
- "expressionVariants",
- "copyVariants",
- "smallMutations",
- "kbMatches",
- "genes",
+ 'structuralVariants',
+ 'expressionVariants',
+ 'copyVariants',
+ 'smallMutations',
+ 'kbMatches',
+ 'genes',
]:
assert section in sections
def test_kept_low_quality_fusion(self, report_upload_content: Dict) -> None:
- fusions = [(sv["gene1"], sv["gene2"]) for sv in report_upload_content["structuralVariants"]]
+ fusions = [(sv['gene1'], sv['gene2']) for sv in report_upload_content['structuralVariants']]
if (
EXCLUDE_BCGSC_TESTS
): # may be missing statements assoc with SUZ12 if no access to bcgsc data
- assert ("SARM1", "CDKL2") in fusions
+ assert ('SARM1', 'CDKL2') in fusions
else:
- assert ("SARM1", "SUZ12") in fusions
+ assert ('SARM1', 'SUZ12') in fusions
def test_pass_through_content_added(self, report_upload_content: Dict) -> None:
# check the passthorough content was added
- assert "blargh" in report_upload_content
+ assert 'blargh' in report_upload_content
def test_found_fusion_partner_gene(self, report_upload_content: Dict) -> None:
- genes = report_upload_content["genes"]
+ genes = report_upload_content['genes']
# eg, A1BG
- assert any([g.get("knownFusionPartner", False) for g in genes])
+ assert any([g.get('knownFusionPartner', False) for g in genes])
- @pytest.mark.skipif(EXCLUDE_ONCOKB_TESTS, reason="excluding tests that depend on oncokb data")
+ @pytest.mark.skipif(EXCLUDE_ONCOKB_TESTS, reason='excluding tests that depend on oncokb data')
def test_found_oncogene(self, report_upload_content: Dict) -> None:
- genes = report_upload_content["genes"]
+ genes = report_upload_content['genes']
# eg, ZBTB20
- assert any([g.get("oncogene", False) for g in genes])
+ assert any([g.get('oncogene', False) for g in genes])
- @pytest.mark.skipif(EXCLUDE_ONCOKB_TESTS, reason="excluding tests that depend on oncokb data)")
+ @pytest.mark.skipif(EXCLUDE_ONCOKB_TESTS, reason='excluding tests that depend on oncokb data)')
def test_found_tumour_supressor(self, report_upload_content: Dict) -> None:
- genes = report_upload_content["genes"]
+ genes = report_upload_content['genes']
# eg, ZNRF3
- assert any([g.get("tumourSuppressor", False) for g in genes])
+ assert any([g.get('tumourSuppressor', False) for g in genes])
def test_found_kb_statement_related_gene(self, report_upload_content: Dict) -> None:
- genes = report_upload_content["genes"]
- assert any([g.get("kbStatementRelated", False) for g in genes])
+ genes = report_upload_content['genes']
+ assert any([g.get('kbStatementRelated', False) for g in genes])
- @pytest.mark.skipif(EXCLUDE_ONCOKB_TESTS, reason="excluding tests that depend on oncokb data")
+ @pytest.mark.skipif(EXCLUDE_ONCOKB_TESTS, reason='excluding tests that depend on oncokb data')
def test_found_cancer_gene_list_match_gene(self, report_upload_content: Dict) -> None:
- genes = report_upload_content["genes"]
- assert any([g.get("cancerGeneListMatch", False) for g in genes])
+ genes = report_upload_content['genes']
+ assert any([g.get('cancerGeneListMatch', False) for g in genes])
diff --git a/tests/test_ipr/test_probe.py b/tests/test_ipr/test_probe.py
index 4801c54..43ead9f 100644
--- a/tests/test_ipr/test_probe.py
+++ b/tests/test_ipr/test_probe.py
@@ -9,50 +9,50 @@
from .constants import EXCLUDE_INTEGRATION_TESTS
-EXCLUDE_BCGSC_TESTS = os.environ.get("EXCLUDE_BCGSC_TESTS") == "1"
+EXCLUDE_BCGSC_TESTS = os.environ.get('EXCLUDE_BCGSC_TESTS') == '1'
def get_test_file(name: str) -> str:
- return os.path.join(os.path.dirname(__file__), "test_data", name)
+ return os.path.join(os.path.dirname(__file__), 'test_data', name)
-@pytest.fixture(scope="module")
+@pytest.fixture(scope='module')
def probe_upload_content() -> Dict:
mock = MagicMock()
def side_effect_function(*args, **kwargs):
- if "templates" in args[0]:
- return [{"name": "genomic", "ident": "001"}]
- elif args[0] == "project":
- return [{"name": "TEST", "ident": "001"}]
+ if 'templates' in args[0]:
+ return [{'name': 'genomic', 'ident': '001'}]
+ elif args[0] == 'project':
+ return [{'name': 'TEST', 'ident': '001'}]
else:
return []
- with patch.object(IprConnection, "upload_report", new=mock):
- with patch.object(IprConnection, "get_spec", return_value={}):
- with patch.object(IprConnection, "get", side_effect=side_effect_function):
+ with patch.object(IprConnection, 'upload_report', new=mock):
+ with patch.object(IprConnection, 'get_spec', return_value={}):
+ with patch.object(IprConnection, 'get', side_effect=side_effect_function):
create_report(
content={
- "patientId": "PATIENT001",
- "project": "TEST",
- "smallMutations": pd.read_csv(
- get_test_file("small_mutations_probe.tab"),
- sep="\t",
- dtype={"chromosome": "string"},
- ).to_dict("records"),
- "structuralVariants": pd.read_csv(
- get_test_file("fusions.tab"), sep="\t"
- ).to_dict("records"),
- "blargh": "some fake content",
- "kbDiseaseMatch": "colorectal cancer",
+ 'patientId': 'PATIENT001',
+ 'project': 'TEST',
+ 'smallMutations': pd.read_csv(
+ get_test_file('small_mutations_probe.tab'),
+ sep='\t',
+ dtype={'chromosome': 'string'},
+ ).to_dict('records'),
+ 'structuralVariants': pd.read_csv(
+ get_test_file('fusions.tab'), sep='\t'
+ ).to_dict('records'),
+ 'blargh': 'some fake content',
+ 'kbDiseaseMatch': 'colorectal cancer',
},
- username=os.environ["IPR_USER"],
- password=os.environ["IPR_PASS"],
- log_level="info",
- ipr_url="http://fake.url.ca",
- graphkb_username=os.environ.get("GRAPHKB_USER", os.environ["IPR_USER"]),
- graphkb_password=os.environ.get("GRAPHKB_PASS", os.environ["IPR_PASS"]),
- graphkb_url=os.environ.get("GRAPHKB_URL", False),
+ username=os.environ['IPR_USER'],
+ password=os.environ['IPR_PASS'],
+ log_level='info',
+ ipr_url='http://fake.url.ca',
+ graphkb_username=os.environ.get('GRAPHKB_USER', os.environ['IPR_USER']),
+ graphkb_password=os.environ.get('GRAPHKB_PASS', os.environ['IPR_PASS']),
+ graphkb_url=os.environ.get('GRAPHKB_URL', False),
)
assert mock.called
@@ -61,22 +61,22 @@ def side_effect_function(*args, **kwargs):
return report_content
-@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests")
+@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests')
class TestCreateReport:
def test_found_probe_small_mutations(self, probe_upload_content: Dict) -> None:
- assert probe_upload_content["smallMutations"]
+ assert probe_upload_content['smallMutations']
@pytest.mark.skipif(
- EXCLUDE_BCGSC_TESTS, reason="excluding tests that depend on BCGSC-specific data"
+ EXCLUDE_BCGSC_TESTS, reason='excluding tests that depend on BCGSC-specific data'
)
def test_found_probe_small_mutations_match(self, probe_upload_content: Dict) -> None:
# verify each probe had a KB match
- for sm_probe in probe_upload_content["smallMutations"]:
+ for sm_probe in probe_upload_content['smallMutations']:
match_list = [
kb_match
- for kb_match in probe_upload_content["kbMatches"]
- if kb_match["variant"] == sm_probe["key"]
+ for kb_match in probe_upload_content['kbMatches']
+ if kb_match['variant'] == sm_probe['key']
]
- assert (
- match_list
- ), f"probe match failure: {sm_probe['gene']} {sm_probe['proteinChange']} key: {sm_probe['proteinChange']}"
+ assert match_list, (
+ f'probe match failure: {sm_probe["gene"]} {sm_probe["proteinChange"]} key: {sm_probe["proteinChange"]}'
+ )
diff --git a/tests/test_ipr/test_summary.py b/tests/test_ipr/test_summary.py
index 083683e..99c91db 100644
--- a/tests/test_ipr/test_summary.py
+++ b/tests/test_ipr/test_summary.py
@@ -16,14 +16,14 @@ def test_prefers_non_alias(self):
side_effect=[
[],
[
- {"sourceId": "1", "alias": False, "source": "source", "name": "name"},
- {"sourceId": "2", "alias": True, "source": "source", "name": "name"},
+ {'sourceId': '1', 'alias': False, 'source': 'source', 'name': 'name'},
+ {'sourceId': '2', 'alias': True, 'source': 'source', 'name': 'name'},
],
]
)
)
- rec = get_preferred_drug_representation(api, "anything")
- assert rec["sourceId"] == "1"
+ rec = get_preferred_drug_representation(api, 'anything')
+ assert rec['sourceId'] == '1'
def test_prefers_non_deprecated(self):
api = MagicMock(
@@ -31,29 +31,29 @@ def test_prefers_non_deprecated(self):
side_effect=[
[],
[
- {"sourceId": "1", "deprecated": True, "source": "source", "name": "name"},
- {"sourceId": "2", "deprecated": False, "source": "source", "name": "name"},
+ {'sourceId': '1', 'deprecated': True, 'source': 'source', 'name': 'name'},
+ {'sourceId': '2', 'deprecated': False, 'source': 'source', 'name': 'name'},
],
]
)
)
- rec = get_preferred_drug_representation(api, "anything")
- assert rec["sourceId"] == "2"
+ rec = get_preferred_drug_representation(api, 'anything')
+ assert rec['sourceId'] == '2'
def test_prefers_lower_sort_source(self):
api = MagicMock(
query=MagicMock(
side_effect=[
- [{"@rid": "source2", "sort": 0}, {"@rid": "source1", "sort": 1}],
+ [{'@rid': 'source2', 'sort': 0}, {'@rid': 'source1', 'sort': 1}],
[
- {"sourceId": "1", "deprecated": False, "source": "source1", "name": "name"},
- {"sourceId": "2", "deprecated": False, "source": "source2", "name": "name"},
+ {'sourceId': '1', 'deprecated': False, 'source': 'source1', 'name': 'name'},
+ {'sourceId': '2', 'deprecated': False, 'source': 'source2', 'name': 'name'},
],
]
)
)
- rec = get_preferred_drug_representation(api, "anything")
- assert rec["sourceId"] == "2"
+ rec = get_preferred_drug_representation(api, 'anything')
+ assert rec['sourceId'] == '2'
def test_prefers_newer_version(self):
api = MagicMock(
@@ -62,46 +62,46 @@ def test_prefers_newer_version(self):
[],
[
{
- "sourceId": "2",
- "deprecated": True,
- "source": "source",
- "name": "name",
- "sourceIdVersion": "1",
+ 'sourceId': '2',
+ 'deprecated': True,
+ 'source': 'source',
+ 'name': 'name',
+ 'sourceIdVersion': '1',
},
{
- "sourceId": "2",
- "deprecated": True,
- "source": "source",
- "name": "name",
- "sourceIdVersion": "2",
+ 'sourceId': '2',
+ 'deprecated': True,
+ 'source': 'source',
+ 'name': 'name',
+ 'sourceIdVersion': '2',
},
],
]
)
)
- rec = get_preferred_drug_representation(api, "anything")
- assert rec["sourceIdVersion"] == "1"
+ rec = get_preferred_drug_representation(api, 'anything')
+ assert rec['sourceIdVersion'] == '1'
class TestSubstituteSentenceTemplate:
def test_multiple_diseases_no_matches(self):
- template = "{conditions:variant} is associated with {relevance} to {subject} in {conditions:disease} ({evidence})"
- relevance = {"displayName": "senitivity"}
- disease_matches = {"1"}
+ template = '{conditions:variant} is associated with {relevance} to {subject} in {conditions:disease} ({evidence})'
+ relevance = {'displayName': 'senitivity'}
+ disease_matches = {'1'}
diseases = [
- {"@class": "Disease", "@rid": "2", "displayName": "disease 1"},
- {"@class": "Disease", "@rid": "3", "displayName": "disease 2"},
+ {'@class': 'Disease', '@rid': '2', 'displayName': 'disease 1'},
+ {'@class': 'Disease', '@rid': '3', 'displayName': 'disease 2'},
]
variants = [
{
- "@class": "CategoryVariant",
- "displayName": "KRAS increased RNA expression",
- "@rid": "4",
+ '@class': 'CategoryVariant',
+ 'displayName': 'KRAS increased RNA expression',
+ '@rid': '4',
}
]
- subjects = [{"@class": "Therapy", "displayName": "some drug", "@rid": "5"}]
+ subjects = [{'@class': 'Therapy', 'displayName': 'some drug', '@rid': '5'}]
sentence = substitute_sentence_template(
- template, diseases + variants, subjects, relevance, [], ["6", "7"], disease_matches
+ template, diseases + variants, subjects, relevance, [], ['6', '7'], disease_matches
)
assert (
sentence
@@ -109,24 +109,24 @@ def test_multiple_diseases_no_matches(self):
)
def test_multiple_diseases_some_matches(self):
- template = "{conditions:variant} is associated with {relevance} to {subject} in {conditions:disease} ({evidence})"
- relevance = {"displayName": "senitivity"}
- disease_matches = {"1"}
+ template = '{conditions:variant} is associated with {relevance} to {subject} in {conditions:disease} ({evidence})'
+ relevance = {'displayName': 'senitivity'}
+ disease_matches = {'1'}
diseases = [
- {"@class": "Disease", "@rid": "2", "displayName": "disease 2"},
- {"@class": "Disease", "@rid": "1", "displayName": "disease 1"},
- {"@class": "Disease", "@rid": "3", "displayName": "disease 3"},
+ {'@class': 'Disease', '@rid': '2', 'displayName': 'disease 2'},
+ {'@class': 'Disease', '@rid': '1', 'displayName': 'disease 1'},
+ {'@class': 'Disease', '@rid': '3', 'displayName': 'disease 3'},
]
variants = [
{
- "@class": "CategoryVariant",
- "displayName": "KRAS increased RNA expression",
- "@rid": "4",
+ '@class': 'CategoryVariant',
+ 'displayName': 'KRAS increased RNA expression',
+ '@rid': '4',
}
]
- subjects = [{"@class": "Therapy", "displayName": "some drug", "@rid": "5"}]
+ subjects = [{'@class': 'Therapy', 'displayName': 'some drug', '@rid': '5'}]
sentence = substitute_sentence_template(
- template, diseases + variants, subjects, relevance, [], ["6", "7"], disease_matches
+ template, diseases + variants, subjects, relevance, [], ['6', '7'], disease_matches
)
assert (
sentence
@@ -134,24 +134,24 @@ def test_multiple_diseases_some_matches(self):
)
def test_multiple_diseases_only_matches(self):
- template = "{conditions:variant} is associated with {relevance} to {subject} in {conditions:disease} ({evidence})"
- relevance = {"displayName": "senitivity"}
- disease_matches = {"1", "2", "3"}
+ template = '{conditions:variant} is associated with {relevance} to {subject} in {conditions:disease} ({evidence})'
+ relevance = {'displayName': 'senitivity'}
+ disease_matches = {'1', '2', '3'}
diseases = [
- {"@class": "Disease", "@rid": "2", "displayName": "disease 2"},
- {"@class": "Disease", "@rid": "1", "displayName": "disease 1"},
- {"@class": "Disease", "@rid": "3", "displayName": "disease 3"},
+ {'@class': 'Disease', '@rid': '2', 'displayName': 'disease 2'},
+ {'@class': 'Disease', '@rid': '1', 'displayName': 'disease 1'},
+ {'@class': 'Disease', '@rid': '3', 'displayName': 'disease 3'},
]
variants = [
{
- "@class": "CategoryVariant",
- "displayName": "KRAS increased RNA expression",
- "@rid": "4",
+ '@class': 'CategoryVariant',
+ 'displayName': 'KRAS increased RNA expression',
+ '@rid': '4',
}
]
- subjects = [{"@class": "Therapy", "displayName": "some drug", "@rid": "5"}]
+ subjects = [{'@class': 'Therapy', 'displayName': 'some drug', '@rid': '5'}]
sentence = substitute_sentence_template(
- template, diseases + variants, subjects, relevance, [], ["6", "7"], disease_matches
+ template, diseases + variants, subjects, relevance, [], ['6', '7'], disease_matches
)
assert (
sentence
@@ -162,74 +162,74 @@ def test_multiple_diseases_only_matches(self):
mock_ipr_results = [
[
{
- "text": "ERBB2 amplification (test1,test)
"
- assert summary_lines[2] == "
normal
' assert len(summary_lines) == 3 def test_omits_nonspecific_project_matches_when_specified(self): ipr_conn = MagicMock(get=MagicMock(side_effect=copy(mock_ipr_results))) - matches = [{"kbVariant": "ERBB2 amplification"}] + matches = [{'kbVariant': 'ERBB2 amplification'}] ipr_summary = get_ipr_analyst_comments( ipr_conn, matches=matches, - disease_name="test1", - project_name="notfound", - report_type="test3", + disease_name='test1', + project_name='notfound', + report_type='test3', include_nonspecific_project=False, include_nonspecific_disease=True, include_nonspecific_template=True, @@ -238,13 +238,13 @@ def test_omits_nonspecific_project_matches_when_specified(self): def test_omits_nonspecific_template_matches_when_specified(self): ipr_conn = MagicMock(get=MagicMock(side_effect=copy(mock_ipr_results))) - matches = [{"kbVariant": "ERBB2 amplification"}] + matches = [{'kbVariant': 'ERBB2 amplification'}] ipr_summary = get_ipr_analyst_comments( ipr_conn, matches=matches, - disease_name="test1", - project_name="test2", - report_type="notfound", + disease_name='test1', + project_name='test2', + report_type='notfound', include_nonspecific_project=True, include_nonspecific_disease=True, include_nonspecific_template=False, @@ -253,13 +253,13 @@ def test_omits_nonspecific_template_matches_when_specified(self): def test_omits_nonspecific_disease_matches_when_specified(self): ipr_conn = MagicMock(get=MagicMock(side_effect=copy(mock_ipr_results))) - matches = [{"kbVariant": "ERBB2 amplification"}] + matches = [{'kbVariant': 'ERBB2 amplification'}] ipr_summary = get_ipr_analyst_comments( ipr_conn, matches=matches, - disease_name="notfound", - project_name="test2", - report_type="test3", + disease_name='notfound', + project_name='test2', + report_type='test3', include_nonspecific_project=True, include_nonspecific_disease=False, include_nonspecific_template=True, @@ -268,86 +268,86 @@ def test_omits_nonspecific_disease_matches_when_specified(self): def test_includes_nonspecific_project_matches_when_specified(self): ipr_conn = MagicMock(get=MagicMock(side_effect=copy(mock_ipr_results))) - matches = [{"kbVariant": "ERBB2 amplification"}] + matches = [{'kbVariant': 'ERBB2 amplification'}] ipr_summary = get_ipr_analyst_comments( ipr_conn, matches=matches, - disease_name="test1", - project_name="notfound", - report_type="test3", + disease_name='test1', + project_name='notfound', + report_type='test3', include_nonspecific_project=True, include_nonspecific_disease=False, include_nonspecific_template=False, ) - summary_lines = ipr_summary.split("\n") - assert summary_lines[2] == "no project
" + summary_lines = ipr_summary.split('\n') + assert summary_lines[2] == 'no project
' assert len(summary_lines) == 3 def test_includes_nonspecific_template_matches_when_specified(self): ipr_conn = MagicMock(get=MagicMock(side_effect=copy(mock_ipr_results))) - matches = [{"kbVariant": "ERBB2 amplification"}] + matches = [{'kbVariant': 'ERBB2 amplification'}] ipr_summary = get_ipr_analyst_comments( ipr_conn, matches=matches, - disease_name="test1", - project_name="test2", - report_type="notfound", + disease_name='test1', + project_name='test2', + report_type='notfound', include_nonspecific_project=False, include_nonspecific_disease=False, include_nonspecific_template=True, ) - summary_lines = ipr_summary.split("\n") - assert summary_lines[2] == "no template
" + summary_lines = ipr_summary.split('\n') + assert summary_lines[2] == 'no template
' assert len(summary_lines) == 3 def test_includes_nonspecific_disease_matches_when_specified(self): ipr_conn = MagicMock(get=MagicMock(side_effect=copy(mock_ipr_results))) - matches = [{"kbVariant": "ERBB2 amplification"}] + matches = [{'kbVariant': 'ERBB2 amplification'}] ipr_summary = get_ipr_analyst_comments( ipr_conn, matches=matches, - disease_name="notfound", - project_name="test2", - report_type="test3", + disease_name='notfound', + project_name='test2', + report_type='test3', include_nonspecific_project=False, include_nonspecific_disease=True, include_nonspecific_template=False, ) - summary_lines = ipr_summary.split("\n") - assert summary_lines[1] == "no cancerType
" + summary_lines = ipr_summary.split('\n') + assert summary_lines[1] == 'no cancerType
' assert len(summary_lines) == 3 def test_prepare_section_for_multiple_variants(self): ipr_conn = MagicMock(get=MagicMock(side_effect=copy(mock_ipr_results))) # NB this test relies on matches being processed in this order - matches = [{"kbVariant": "ERBB2 amplification"}, {"kbVariant": "second variant"}] + matches = [{'kbVariant': 'ERBB2 amplification'}, {'kbVariant': 'second variant'}] ipr_summary = get_ipr_analyst_comments( ipr_conn, matches=matches, - disease_name="test1", - project_name="test2", - report_type="test3", + disease_name='test1', + project_name='test2', + report_type='test3', include_nonspecific_project=False, include_nonspecific_disease=False, include_nonspecific_template=False, ) - summary_lines = ipr_summary.split("\n") + summary_lines = ipr_summary.split('\n') assert len(summary_lines) == 5 assert ( - "\n".join(summary_lines[1:]) - == "normal
\nnormal, second variant
" + '\n'.join(summary_lines[1:]) + == 'normal
\nnormal, second variant
' ) def test_empty_section_when_no_variant_match(self): ipr_conn = MagicMock(get=MagicMock(side_effect=[[], []])) - matches = [{"kbVariant": "notfound1"}, {"kbVariant": "notfound2"}] + matches = [{'kbVariant': 'notfound1'}, {'kbVariant': 'notfound2'}] ipr_summary = get_ipr_analyst_comments( ipr_conn, matches=matches, - disease_name="test1", - project_name="test2", - report_type="test3", + disease_name='test1', + project_name='test2', + report_type='test3', include_nonspecific_project=False, include_nonspecific_disease=False, include_nonspecific_template=False, diff --git a/tests/test_ipr/test_upload.py b/tests/test_ipr/test_upload.py index a33997e..ea07fdb 100644 --- a/tests/test_ipr/test_upload.py +++ b/tests/test_ipr/test_upload.py @@ -13,97 +13,97 @@ from .constants import EXCLUDE_INTEGRATION_TESTS -EXCLUDE_BCGSC_TESTS = os.environ.get("EXCLUDE_BCGSC_TESTS") == "1" -EXCLUDE_ONCOKB_TESTS = os.environ.get("EXCLUDE_ONCOKB_TESTS") == "1" -INCLUDE_UPLOAD_TESTS = os.environ.get("INCLUDE_UPLOAD_TESTS", "0") == "1" -DELETE_UPLOAD_TEST_REPORTS = os.environ.get("DELETE_UPLOAD_TEST_REPORTS", "1") == "1" +EXCLUDE_BCGSC_TESTS = os.environ.get('EXCLUDE_BCGSC_TESTS') == '1' +EXCLUDE_ONCOKB_TESTS = os.environ.get('EXCLUDE_ONCOKB_TESTS') == '1' +INCLUDE_UPLOAD_TESTS = os.environ.get('INCLUDE_UPLOAD_TESTS', '0') == '1' +DELETE_UPLOAD_TEST_REPORTS = os.environ.get('DELETE_UPLOAD_TEST_REPORTS', '1') == '1' def get_test_spec(): - ipr_spec = {"components": {"schemas": {"genesCreate": {"properties": {}}}}} + ipr_spec = {'components': {'schemas': {'genesCreate': {'properties': {}}}}} ipr_gene_keys = IprGene.__required_keys__ | IprGene.__optional_keys__ for key in ipr_gene_keys: - ipr_spec["components"]["schemas"]["genesCreate"]["properties"][key] = "" + ipr_spec['components']['schemas']['genesCreate']['properties'][key] = '' return ipr_spec def get_test_file(name: str) -> str: - return os.path.join(os.path.dirname(__file__), "test_data", name) + return os.path.join(os.path.dirname(__file__), 'test_data', name) -@pytest.fixture(scope="module") +@pytest.fixture(scope='module') def loaded_reports(tmp_path_factory) -> Generator: - json_file = tmp_path_factory.mktemp("inputs") / "content.json" - async_json_file = tmp_path_factory.mktemp("inputs") / "async_content.json" - patient_id = f"TEST_{str(uuid.uuid4())}" - async_patient_id = f"TEST_ASYNC_{str(uuid.uuid4())}" + json_file = tmp_path_factory.mktemp('inputs') / 'content.json' + async_json_file = tmp_path_factory.mktemp('inputs') / 'async_content.json' + patient_id = f'TEST_{str(uuid.uuid4())}' + async_patient_id = f'TEST_ASYNC_{str(uuid.uuid4())}' json_contents = { - "comparators": [ - {"analysisRole": "expression (disease)", "name": "1"}, - {"analysisRole": "expression (primary site)", "name": "2"}, - {"analysisRole": "expression (biopsy site)", "name": "3"}, + 'comparators': [ + {'analysisRole': 'expression (disease)', 'name': '1'}, + {'analysisRole': 'expression (primary site)', 'name': '2'}, + {'analysisRole': 'expression (biopsy site)', 'name': '3'}, { - "analysisRole": "expression (internal pancancer cohort)", - "name": "4", + 'analysisRole': 'expression (internal pancancer cohort)', + 'name': '4', }, ], - "patientId": patient_id, - "project": "TEST", - "sampleInfo": [ + 'patientId': patient_id, + 'project': 'TEST', + 'sampleInfo': [ { - "sample": "Constitutional", - "biopsySite": "Normal tissue", - "sampleName": "SAMPLE1-PB", - "primarySite": "Blood-Peripheral", - "collectionDate": "11-11-11", + 'sample': 'Constitutional', + 'biopsySite': 'Normal tissue', + 'sampleName': 'SAMPLE1-PB', + 'primarySite': 'Blood-Peripheral', + 'collectionDate': '11-11-11', }, { - "sample": "Tumour", - "pathoTc": "90%", - "biopsySite": "hepatic", - "sampleName": "SAMPLE2-FF-1", - "primarySite": "Vena Cava-Hepatic", - "collectionDate": "12-12-12", + 'sample': 'Tumour', + 'pathoTc': '90%', + 'biopsySite': 'hepatic', + 'sampleName': 'SAMPLE2-FF-1', + 'primarySite': 'Vena Cava-Hepatic', + 'collectionDate': '12-12-12', }, ], - "msi": [ + 'msi': [ { - "score": 1000.0, - "kbCategory": "microsatellite instability", + 'score': 1000.0, + 'kbCategory': 'microsatellite instability', } ], - "hrd": { - "score": 9999.0, - "kbCategory": "homologous recombination deficiency strong signature", + 'hrd': { + 'score': 9999.0, + 'kbCategory': 'homologous recombination deficiency strong signature', }, - "expressionVariants": json.loads( - pd.read_csv(get_test_file("expression.short.tab"), sep="\t").to_json(orient="records") + 'expressionVariants': json.loads( + pd.read_csv(get_test_file('expression.short.tab'), sep='\t').to_json(orient='records') ), - "smallMutations": json.loads( - pd.read_csv(get_test_file("small_mutations.short.tab"), sep="\t").to_json( - orient="records" + 'smallMutations': json.loads( + pd.read_csv(get_test_file('small_mutations.short.tab'), sep='\t').to_json( + orient='records' ) ), - "copyVariants": json.loads( - pd.read_csv(get_test_file("copy_variants.short.tab"), sep="\t").to_json( - orient="records" + 'copyVariants': json.loads( + pd.read_csv(get_test_file('copy_variants.short.tab'), sep='\t').to_json( + orient='records' ) ), - "structuralVariants": json.loads( - pd.read_csv(get_test_file("fusions.tab"), sep="\t").to_json(orient="records") + 'structuralVariants': json.loads( + pd.read_csv(get_test_file('fusions.tab'), sep='\t').to_json(orient='records') ), - "kbDiseaseMatch": "colorectal cancer", - "cosmicSignatures": pd.read_csv( - get_test_file("cosmic_variants.tab"), sep="\t" + 'kbDiseaseMatch': 'colorectal cancer', + 'cosmicSignatures': pd.read_csv( + get_test_file('cosmic_variants.tab'), sep='\t' ).signature.tolist(), - "hlaTypes": json.loads( - pd.read_csv(get_test_file("hla_variants.tab"), sep="\t").to_json(orient="records") + 'hlaTypes': json.loads( + pd.read_csv(get_test_file('hla_variants.tab'), sep='\t').to_json(orient='records') ), - "images": [ + 'images': [ { - "key": "cnvLoh.circos", - "path": "test/testData/images/cnvLoh.png", - "caption": "Test adding a caption to an image", + 'key': 'cnvLoh.circos', + 'path': 'test/testData/images/cnvLoh.png', + 'caption': 'Test adding a caption to an image', } ], } @@ -115,7 +115,7 @@ def loaded_reports(tmp_path_factory) -> Generator: ) ) - json_contents["patientId"] = async_patient_id + json_contents['patientId'] = async_patient_id async_json_file.write_text( json.dumps( json_contents, @@ -124,61 +124,61 @@ def loaded_reports(tmp_path_factory) -> Generator: ) argslist = [ - "ipr", - "--username", - os.environ.get("IPR_USER", os.environ["USER"]), - "--password", - os.environ["IPR_PASS"], - "--graphkb_username", - os.environ.get("GRAPHKB_USER", os.environ.get("IPR_USER", os.environ["USER"])), - "--graphkb_password", - os.environ.get("GRAPHKB_PASS", os.environ["IPR_PASS"]), - "--ipr_url", - os.environ["IPR_TEST_URL"], - "--graphkb_url", - os.environ.get("GRAPHKB_URL", False), - "--therapeutics", - "--allow_partial_matches", + 'ipr', + '--username', + os.environ.get('IPR_USER', os.environ['USER']), + '--password', + os.environ['IPR_PASS'], + '--graphkb_username', + os.environ.get('GRAPHKB_USER', os.environ.get('IPR_USER', os.environ['USER'])), + '--graphkb_password', + os.environ.get('GRAPHKB_PASS', os.environ['IPR_PASS']), + '--ipr_url', + os.environ['IPR_TEST_URL'], + '--graphkb_url', + os.environ.get('GRAPHKB_URL', False), + '--therapeutics', + '--allow_partial_matches', ] sync_argslist = argslist.copy() - sync_argslist.extend(["--content", str(json_file)]) - with patch.object(sys, "argv", sync_argslist): - with patch.object(IprConnection, "get_spec", return_value=get_test_spec()): + sync_argslist.extend(['--content', str(json_file)]) + with patch.object(sys, 'argv', sync_argslist): + with patch.object(IprConnection, 'get_spec', return_value=get_test_spec()): command_interface() async_argslist = argslist.copy() - async_argslist.extend(["--content", str(async_json_file), "--async_upload"]) - with patch.object(sys, "argv", async_argslist): - with patch.object(IprConnection, "get_spec", return_value=get_test_spec()): + async_argslist.extend(['--content', str(async_json_file), '--async_upload']) + with patch.object(sys, 'argv', async_argslist): + with patch.object(IprConnection, 'get_spec', return_value=get_test_spec()): command_interface() ipr_conn = IprConnection( - username=os.environ.get("IPR_USER", os.environ["USER"]), - password=os.environ["IPR_PASS"], - url=os.environ["IPR_TEST_URL"], + username=os.environ.get('IPR_USER', os.environ['USER']), + password=os.environ['IPR_PASS'], + url=os.environ['IPR_TEST_URL'], ) - loaded_report = ipr_conn.get(uri=f"reports?searchText={patient_id}") - async_loaded_report = ipr_conn.get(uri=f"reports?searchText={async_patient_id}") + loaded_report = ipr_conn.get(uri=f'reports?searchText={patient_id}') + async_loaded_report = ipr_conn.get(uri=f'reports?searchText={async_patient_id}') loaded_reports_result = { - "sync": (patient_id, loaded_report), - "async": (async_patient_id, async_loaded_report), + 'sync': (patient_id, loaded_report), + 'async': (async_patient_id, async_loaded_report), } yield loaded_reports_result if DELETE_UPLOAD_TEST_REPORTS: - ipr_conn.delete(uri=f"reports/{loaded_report['reports'][0]['ident']}") - ipr_conn.delete(uri=f"reports/{async_loaded_report['reports'][0]['ident']}") + ipr_conn.delete(uri=f'reports/{loaded_report["reports"][0]["ident"]}') + ipr_conn.delete(uri=f'reports/{async_loaded_report["reports"][0]["ident"]}') def get_section(loaded_report, section_name): - ident = loaded_report[1]["reports"][0]["ident"] + ident = loaded_report[1]['reports'][0]['ident'] ipr_conn = IprConnection( - username=os.environ.get("IPR_USER", os.environ["USER"]), - password=os.environ["IPR_PASS"], - url=os.environ["IPR_TEST_URL"], + username=os.environ.get('IPR_USER', os.environ['USER']), + password=os.environ['IPR_PASS'], + url=os.environ['IPR_TEST_URL'], ) - return ipr_conn.get(uri=f"reports/{ident}/{section_name}") + return ipr_conn.get(uri=f'reports/{ident}/{section_name}') def stringify_sorted(obj): @@ -191,7 +191,7 @@ def stringify_sorted(obj): obj.sort() return str(obj) elif isinstance(obj, dict): - for key in ("ident", "updatedAt", "createdAt", "deletedAt"): + for key in ('ident', 'updatedAt', 'createdAt', 'deletedAt'): obj.pop(key, None) keys = obj.keys() for key in keys: @@ -207,50 +207,50 @@ def stringify_sorted(obj): @pytest.mark.skipif( - not INCLUDE_UPLOAD_TESTS, reason="excluding tests of upload to live ipr instance" + not INCLUDE_UPLOAD_TESTS, reason='excluding tests of upload to live ipr instance' ) -@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests") +@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests') class TestCreateReport: def test_patient_id_loaded_once(self, loaded_reports) -> None: - sync_patient_id = loaded_reports["sync"][0] - assert loaded_reports["sync"][1]["total"] == 1 - assert loaded_reports["sync"][1]["reports"][0]["patientId"] == sync_patient_id - async_patient_id = loaded_reports["async"][0] - assert loaded_reports["async"][1]["total"] == 1 - assert loaded_reports["async"][1]["reports"][0]["patientId"] == async_patient_id + sync_patient_id = loaded_reports['sync'][0] + assert loaded_reports['sync'][1]['total'] == 1 + assert loaded_reports['sync'][1]['reports'][0]['patientId'] == sync_patient_id + async_patient_id = loaded_reports['async'][0] + assert loaded_reports['async'][1]['total'] == 1 + assert loaded_reports['async'][1]['reports'][0]['patientId'] == async_patient_id def test_expression_variants_loaded(self, loaded_reports) -> None: - section = get_section(loaded_reports["sync"], "expression-variants") - kbmatched = [item for item in section if item["kbMatches"]] - assert "PTP4A3" in [item["gene"]["name"] for item in kbmatched] - async_section = get_section(loaded_reports["async"], "expression-variants") + section = get_section(loaded_reports['sync'], 'expression-variants') + kbmatched = [item for item in section if item['kbMatches']] + assert 'PTP4A3' in [item['gene']['name'] for item in kbmatched] + async_section = get_section(loaded_reports['async'], 'expression-variants') async_equals_sync = stringify_sorted(section) == stringify_sorted(async_section) assert async_equals_sync def test_structural_variants_loaded(self, loaded_reports) -> None: - section = get_section(loaded_reports["sync"], "structural-variants") - kbmatched = [item for item in section if item["kbMatches"]] - assert "(EWSR1,FLI1):fusion(e.7,e.4)" in [item["displayName"] for item in kbmatched] - async_section = get_section(loaded_reports["async"], "structural-variants") + section = get_section(loaded_reports['sync'], 'structural-variants') + kbmatched = [item for item in section if item['kbMatches']] + assert '(EWSR1,FLI1):fusion(e.7,e.4)' in [item['displayName'] for item in kbmatched] + async_section = get_section(loaded_reports['async'], 'structural-variants') async_equals_sync = stringify_sorted(section) == stringify_sorted(async_section) assert async_equals_sync def test_small_mutations_loaded(self, loaded_reports) -> None: - section = get_section(loaded_reports["sync"], "small-mutations") - kbmatched = [item for item in section if item["kbMatches"]] - assert "FGFR2:p.R421C" in [item["displayName"] for item in kbmatched] - assert "CDKN2A:p.T18M" in [item["displayName"] for item in kbmatched] - async_section = get_section(loaded_reports["async"], "small-mutations") + section = get_section(loaded_reports['sync'], 'small-mutations') + kbmatched = [item for item in section if item['kbMatches']] + assert 'FGFR2:p.R421C' in [item['displayName'] for item in kbmatched] + assert 'CDKN2A:p.T18M' in [item['displayName'] for item in kbmatched] + async_section = get_section(loaded_reports['async'], 'small-mutations') async_equals_sync = stringify_sorted(section) == stringify_sorted(async_section) assert async_equals_sync def test_copy_variants_loaded(self, loaded_reports) -> None: - section = get_section(loaded_reports["sync"], "copy-variants") - kbmatched = [item for item in section if item["kbMatches"]] - assert ("ERBB2", "amplification") in [ - (item["gene"]["name"], item["displayName"]) for item in kbmatched + section = get_section(loaded_reports['sync'], 'copy-variants') + kbmatched = [item for item in section if item['kbMatches']] + assert ('ERBB2', 'amplification') in [ + (item['gene']['name'], item['displayName']) for item in kbmatched ] - async_section = get_section(loaded_reports["async"], "copy-variants") + async_section = get_section(loaded_reports['async'], 'copy-variants') async_equals_sync = stringify_sorted(section) == stringify_sorted(async_section) assert async_equals_sync @@ -265,58 +265,58 @@ def test_copy_variants_loaded(self, loaded_reports) -> None: # assert compare_sections(section, async_section) def test_kb_matches_loaded(self, loaded_reports) -> None: - section = get_section(loaded_reports["sync"], "kb-matches") + section = get_section(loaded_reports['sync'], 'kb-matches') observed_and_matched = set( - [(item["kbVariant"], item["variant"]["displayName"]) for item in section] + [(item['kbVariant'], item['variant']['displayName']) for item in section] ) for pair in [ - ("ERBB2 amplification", "amplification"), - ("FGFR2 mutation", "FGFR2:p.R421C"), - ("PTP4A3 overexpression", "increased expression"), - ("EWSR1 and FLI1 fusion", "(EWSR1,FLI1):fusion(e.7,e.4)"), - ("CDKN2A mutation", "CDKN2A:p.T18M"), + ('ERBB2 amplification', 'amplification'), + ('FGFR2 mutation', 'FGFR2:p.R421C'), + ('PTP4A3 overexpression', 'increased expression'), + ('EWSR1 and FLI1 fusion', '(EWSR1,FLI1):fusion(e.7,e.4)'), + ('CDKN2A mutation', 'CDKN2A:p.T18M'), ]: assert pair in observed_and_matched - async_section = get_section(loaded_reports["async"], "kb-matches") + async_section = get_section(loaded_reports['async'], 'kb-matches') async_equals_sync = stringify_sorted(section) == stringify_sorted(async_section) assert async_equals_sync def test_therapeutic_targets_loaded(self, loaded_reports) -> None: - section = get_section(loaded_reports["sync"], "therapeutic-targets") - therapeutic_target_genes = set([item["gene"] for item in section]) - for gene in ["CDKN2A", "ERBB2", "FGFR2", "PTP4A3"]: + section = get_section(loaded_reports['sync'], 'therapeutic-targets') + therapeutic_target_genes = set([item['gene'] for item in section]) + for gene in ['CDKN2A', 'ERBB2', 'FGFR2', 'PTP4A3']: assert gene in therapeutic_target_genes - async_section = get_section(loaded_reports["async"], "therapeutic-targets") + async_section = get_section(loaded_reports['async'], 'therapeutic-targets') async_equals_sync = stringify_sorted(section) == stringify_sorted(async_section) assert async_equals_sync def test_genomic_alterations_identified_loaded(self, loaded_reports) -> None: - section = get_section(loaded_reports["sync"], "summary/genomic-alterations-identified") - variants = set([item["geneVariant"] for item in section]) + section = get_section(loaded_reports['sync'], 'summary/genomic-alterations-identified') + variants = set([item['geneVariant'] for item in section]) for variant in [ - "FGFR2:p.R421C", - "PTP4A3 (high_percentile)", - "ERBB2 (Amplification)", - "(EWSR1,FLI1):fusion(e.7,e.4)", - "CDKN2A:p.T18M", + 'FGFR2:p.R421C', + 'PTP4A3 (high_percentile)', + 'ERBB2 (Amplification)', + '(EWSR1,FLI1):fusion(e.7,e.4)', + 'CDKN2A:p.T18M', ]: assert variant in variants async_section = get_section( - loaded_reports["async"], "summary/genomic-alterations-identified" + loaded_reports['async'], 'summary/genomic-alterations-identified' ) async_equals_sync = stringify_sorted(section) == stringify_sorted(async_section) assert async_equals_sync def test_analyst_comments_loaded(self, loaded_reports) -> None: - sync_section = get_section(loaded_reports["sync"], "summary/analyst-comments") - assert sync_section["comments"] - async_section = get_section(loaded_reports["async"], "summary/analyst-comments") - assert async_section["comments"] - assert sync_section["comments"] == async_section["comments"] + sync_section = get_section(loaded_reports['sync'], 'summary/analyst-comments') + assert sync_section['comments'] + async_section = get_section(loaded_reports['async'], 'summary/analyst-comments') + assert async_section['comments'] + assert sync_section['comments'] == async_section['comments'] def test_sample_info_loaded(self, loaded_reports) -> None: - sync_section = get_section(loaded_reports["sync"], "sample-info") - async_section = get_section(loaded_reports["async"], "sample-info") + sync_section = get_section(loaded_reports['sync'], 'sample-info') + async_section = get_section(loaded_reports['async'], 'sample-info') async_equals_sync = stringify_sorted(sync_section) == stringify_sorted(async_section) assert async_equals_sync @@ -332,31 +332,31 @@ def test_multivariant_multiconditionset_statements_loaded(self, loaded_reports) are met. This is also a test of multiple condition sets since there are two variants in the test data that satisfy one of the conditions (the APC mutation).""" - section = get_section(loaded_reports["sync"], "kb-matches/kb-matched-statements") - multivariant_stmts = [item for item in section if item["reference"] == "pmid:27302369"] + section = get_section(loaded_reports['sync'], 'kb-matches/kb-matched-statements') + multivariant_stmts = [item for item in section if item['reference'] == 'pmid:27302369'] # if this statement is entered more than once there may be multiple sets of records to # check, so to make sure the count checks work, go stmt_id by stmt_id: - stmt_ids = list(set([item["kbStatementId"] for item in multivariant_stmts])) + stmt_ids = list(set([item['kbStatementId'] for item in multivariant_stmts])) for stmt_id in stmt_ids: - stmts = [item for item in multivariant_stmts if item["kbStatementId"] == stmt_id] + stmts = [item for item in multivariant_stmts if item['kbStatementId'] == stmt_id] # we expect two stmts, one for each condition set assert len(stmts) == 3 # we expect each condition set to have two kb variants in it # we expect the two kb variants to be the same in each stmt - assert len(stmts[0]["kbMatches"]) == 2 - assert len(stmts[1]["kbMatches"]) == 2 - kbmatches1 = [item["kbVariant"] for item in stmts[0]["kbMatches"]] - kbmatches2 = [item["kbVariant"] for item in stmts[1]["kbMatches"]] + assert len(stmts[0]['kbMatches']) == 2 + assert len(stmts[1]['kbMatches']) == 2 + kbmatches1 = [item['kbVariant'] for item in stmts[0]['kbMatches']] + kbmatches2 = [item['kbVariant'] for item in stmts[1]['kbMatches']] kbmatches1.sort() kbmatches2.sort() - assert kbmatches1 == kbmatches2 == ["APC mutation", "KRAS mutation"] + assert kbmatches1 == kbmatches2 == ['APC mutation', 'KRAS mutation'] # we expect the two stmts to have different observed variant sets - observedVariants1 = [item["variant"]["ident"] for item in stmts[0]["kbMatches"]] - observedVariants2 = [item["variant"]["ident"] for item in stmts[1]["kbMatches"]] + observedVariants1 = [item['variant']['ident'] for item in stmts[0]['kbMatches']] + observedVariants2 = [item['variant']['ident'] for item in stmts[1]['kbMatches']] observedVariants1.sort() observedVariants2.sort() assert observedVariants1 != observedVariants2 diff --git a/tests/test_ipr/test_util.py b/tests/test_ipr/test_util.py index 7031881..bbae6d9 100644 --- a/tests/test_ipr/test_util.py +++ b/tests/test_ipr/test_util.py @@ -4,8 +4,8 @@ @pytest.mark.parametrize( - "input,output_keys", - [[{"key": 0}, ["key"]], [{"key": None}, []], [{"key": ""}, []], [{"gene1": None}, ["gene1"]]], + 'input,output_keys', + [[{'key': 0}, ['key']], [{'key': None}, []], [{'key': ''}, []], [{'gene1': None}, ['gene1']]], ) def test_trim_empty_values(input, output_keys): modified_object = trim_empty_values(input) @@ -13,17 +13,17 @@ def test_trim_empty_values(input, output_keys): @pytest.mark.parametrize( - "variant,result", + 'variant,result', [ [ - {"variantType": "exp", "gene": "GENE", "expressionState": "increased expression"}, - "increased expression", + {'variantType': 'exp', 'gene': 'GENE', 'expressionState': 'increased expression'}, + 'increased expression', ], - [{"variantType": "cnv", "gene": "GENE", "cnvState": "amplification"}, "amplification"], - [{"variantType": "other", "gene2": "GENE", "variant": "GENE:anything"}, "anything"], + [{'variantType': 'cnv', 'gene': 'GENE', 'cnvState': 'amplification'}, 'amplification'], + [{'variantType': 'other', 'gene2': 'GENE', 'variant': 'GENE:anything'}, 'anything'], ], ) def test_create_variant_name_tuple(variant, result): gene, name = create_variant_name_tuple(variant) assert name == result - assert gene == "GENE" + assert gene == 'GENE' From 5c41c971289342fddb0ecd491f70bebf4cb37887 Mon Sep 17 00:00:00 2001 From: sshugscno cancerType
" assert len(summary_lines) == 3 + def test_includes_all_graphkb_disease_matches(self): + ipr_conn = MagicMock(get=MagicMock(side_effect=copy(mock_ipr_results))) + matches = [{"kbVariant": "ERBB2 amplification"}] + ipr_summary = get_ipr_analyst_comments( + ipr_conn, + matches=matches, + disease_name="notfound", + disease_match_names=["TEST1"], + project_name="test2", + report_type="test3", + include_nonspecific_project=False, + include_nonspecific_disease=False, + include_nonspecific_template=False, + ) + summary_lines = ipr_summary.split("\n") + assert summary_lines[1] == "normal
" + assert len(summary_lines) == 3 + def test_prepare_section_for_multiple_variants(self): ipr_conn = MagicMock(get=MagicMock(side_effect=copy(mock_ipr_results))) # NB this test relies on matches being processed in this order @@ -326,6 +352,7 @@ def test_prepare_section_for_multiple_variants(self): ipr_conn, matches=matches, disease_name="test1", + disease_match_names=[], project_name="test2", report_type="test3", include_nonspecific_project=False, @@ -346,6 +373,7 @@ def test_empty_section_when_no_variant_match(self): ipr_conn, matches=matches, disease_name="test1", + disease_match_names=[], project_name="test2", report_type="test3", include_nonspecific_project=False, From a3715db2fbefff8daf4d68d5ff091785eaaf1cee Mon Sep 17 00:00:00 2001 From: Eleanor Lewisnormal
" + summary_lines = ipr_summary.split('\n') + assert summary_lines[1] == 'normal
' assert len(summary_lines) == 3 def test_prepare_section_for_multiple_variants(self): From 96449261d0f5974dd4dca0c81309abd66f00ac16 Mon Sep 17 00:00:00 2001 From: mathieulemieux