Skip to content

Commit 8f670d3

Browse files
committed
added jaccard index figure.
1 parent fb1ec16 commit 8f670d3

2 files changed

Lines changed: 51 additions & 3 deletions

File tree

Snakefile

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -380,13 +380,15 @@ rule evaluation:
380380
output:
381381
eval_file = SEP.join([out_dir, "{dataset_gold_standard_pairs}-evaluation.txt"]),
382382
pr_edge_file = SEP.join([out_dir, '{dataset_gold_standard_pairs}-eval', "precision-recall-per-pathway_edge.txt"]),
383-
pr_edge_png = SEP.join([out_dir, '{dataset_gold_standard_pairs}-eval', 'precision-recall-per-pathway_edge.png'])
383+
pr_edge_png = SEP.join([out_dir, '{dataset_gold_standard_pairs}-eval', 'precision-recall-per-pathway_edge.png']),
384+
heatmap_edge_png = SEP.join([out_dir, '{dataset_gold_standard_pairs}-eval', 'jaccard-heatmap.png'])
385+
384386
run:
385387
node_table = Evaluation.from_file(input.gold_standard_file).node_table
386388
edge_table = Evaluation.from_file(input.gold_standard_file).edge_table
387389
Evaluation.precision(input.pathways, node_table, output.eval_file)
388390
Evaluation.precision_and_recall_edge(input.pathways, edge_table, algorithms, output.pr_edge_file, output.pr_edge_png)
389-
391+
Evaluation.jaccard_edge_heatmap(input.pathways, edge_table, output.heatmap_edge_png)
390392
# Remove the output directory
391393
rule clean:
392394
shell: f'rm -rf {out_dir}'

spras/evaluation.py

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
import pickle as pkl
33
from pathlib import Path
44
from typing import Dict, Iterable
5-
5+
import numpy as np
6+
import seaborn as sns
67
from matplotlib import pyplot as plt
78
import pandas as pd
89
from sklearn.metrics import precision_score, recall_score
@@ -169,3 +170,48 @@ def precision_and_recall_edge(file_paths: Iterable[Path], edge_table: pd.DataFra
169170
plt.plot([], [])
170171
plt.title("Empty Pathway Files")
171172
plt.savefig(output_png)
173+
174+
175+
@staticmethod
176+
def jaccard_edge_heatmap(file_paths: Iterable[Path], edge_table: pd.DataFrame, output_png:str=None):
177+
"""
178+
Takes in file paths for a specific dataset and an associated gold standard edge table.
179+
Generates a jaccard index heatmap image that compares all the edge similarity between each dataset and the gold standard
180+
Returns output back to output_png
181+
@param file_paths: file paths of pathway reconstruction algorithm outputs
182+
@param edge_table: the gold standard edges
183+
@param output_png (optional): the filename to plot the heatmap (not a PRC)
184+
"""
185+
gs_edges = set()
186+
for row in edge_table.itertuples():
187+
gs_edges.add((row[1], row[2]))
188+
189+
# calculate all the jaccard edge index for each method against the gold standard
190+
jaccard_edge_indices_list = []
191+
algorithms = []
192+
for file in file_paths:
193+
df = pd.read_table(file, sep="\t", header=0, usecols=["Node1", "Node2"])
194+
method_edges = set()
195+
for row in df.itertuples():
196+
method_edges.add((row[1], row[2]))
197+
edge_union = gs_edges | method_edges
198+
edge_intersection = gs_edges & method_edges
199+
jaccard_edge_index = len(edge_intersection) / len(edge_union)
200+
jaccard_edge_indices_list.append(float(jaccard_edge_index))
201+
algorithms.append(file.split("/")[1].split("-")[1])
202+
203+
jaccard_edge_indices = np.asanyarray([jaccard_edge_indices_list])
204+
205+
plt.figure(figsize=(10, 8))
206+
sns.heatmap(
207+
jaccard_edge_indices,
208+
annot=True,
209+
cmap="viridis",
210+
xticklabels=algorithms,
211+
yticklabels=[""],
212+
)
213+
plt.xlabel("Algorithms")
214+
plt.ylabel("Pathways")
215+
plt.title("Jaccard Index Edge Heatmap")
216+
plt.tick_params(axis='x', which='major', labelsize=7.5)
217+
plt.savefig(output_png, format="png", dpi=300)

0 commit comments

Comments
 (0)