Skip to content

Commit 0795193

Browse files
committed
fix unit tests
1 parent d4cdf02 commit 0795193

File tree

3 files changed

+15
-13
lines changed

3 files changed

+15
-13
lines changed

tests/fixtures/sample.obo

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,4 +84,4 @@ xref: wikipedia.en:Starch {source="wikipedia.en"}
8484
is_a: CHEBI:37163 ! glucan
8585
relationship: BFO:0000051 CHEBI:28057 ! has part amylopectin
8686
relationship: BFO:0000051 CHEBI:28102 ! has part amylose
87-
relationship: RO:0000087 CHEBI:75771 ! has role mouse metabolite
87+
relationship: RO:0000087 CHEBI:75771 ! has role mouse metabolite

tests/test_obo_extractor.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,19 @@ def test_returns_directed_graph(self):
1818
assert isinstance(g, nx.DiGraph)
1919

2020
def test_correct_number_of_nodes(self):
21-
# CHEBI:27189 is obsolete -> excluded; 3 explicit + 1 implicit (24921) = 4
21+
# CHEBI:27189 is obsolete -> excluded;
22+
# 4 explicit + 5 implicit (superclasses and relation targets) = 9
2223
g = build_chebi_graph(SAMPLE_OBO)
23-
assert len(g.nodes) == 4
24+
assert len(g.nodes) == 9
2425

2526
def test_node_ids_are_strings(self):
2627
g = build_chebi_graph(SAMPLE_OBO)
2728
assert all(isinstance(n, str) for n in g.nodes)
2829

2930
def test_expected_nodes_present(self):
3031
g = build_chebi_graph(SAMPLE_OBO)
31-
assert set(g.nodes) == {"10", "133004", "22750", "24921"}
32+
assert set(g.nodes) == {"10", "133004", "22750", "24921",
33+
"28017", '75771', '28057', '28102', '37163'}
3234

3335
def test_obsolete_term_excluded(self):
3436
g = build_chebi_graph(SAMPLE_OBO)
@@ -71,8 +73,8 @@ def test_isa_chain(self):
7173

7274
def test_total_edge_count(self):
7375
g = build_chebi_graph(SAMPLE_OBO)
74-
# 10->133004 (is_a), 133004->22750 (is_a), 22750->24921 (is_a)
75-
assert len(g.edges) == 3
76+
# 10->133004 (is_a), 133004->22750 (is_a), 22750->24921 (is_a), ...
77+
assert len(g.edges) == 7
7678

7779
def test_xref_lines_do_not_break_parsing(self, tmp_path):
7880
obo_with_xrefs = tmp_path / "xref.obo"

tests/test_sdf_extractor.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def test_chebi_id_column_present(self):
2424

2525
def test_chebi_ids_correct(self):
2626
df = extract_molecules(SAMPLE_SDF)
27-
assert set(df["chebi_id"]) == {"CHEBI:1", "CHEBI:2"}
27+
assert set(df["chebi_id"]) == {"1", "2"}
2828

2929
def test_name_column_present(self):
3030
df = extract_molecules(SAMPLE_SDF)
@@ -57,8 +57,8 @@ def test_mol_objects_are_rdkit_mol(self):
5757

5858
def test_mol_atom_counts(self):
5959
df = extract_molecules(SAMPLE_SDF)
60-
row1 = df[df["chebi_id"] == "CHEBI:1"].iloc[0]
61-
row2 = df[df["chebi_id"] == "CHEBI:2"].iloc[0]
60+
row1 = df[df["chebi_id"] == "1"].iloc[0]
61+
row2 = df[df["chebi_id"] == "2"].iloc[0]
6262
assert row1["mol"].GetNumAtoms() == 1 # methane: 1 C
6363
assert row2["mol"].GetNumAtoms() == 2 # ethane: 2 C
6464

@@ -70,7 +70,7 @@ def test_mol_sanitized(self):
7070

7171
def test_molecule_properties(self):
7272
df = extract_molecules(SAMPLE_SDF)
73-
row = df[df["chebi_id"] == "CHEBI:1"].iloc[0]
73+
row = df[df["chebi_id"] == "1"].iloc[0]
7474
assert row["name"] == "compound A"
7575
assert row["smiles"] == "C"
7676
assert row["formula"] == "CH4"
@@ -81,7 +81,7 @@ def test_gzipped_sdf(self, tmp_path):
8181
f_out.write(f_in.read())
8282
df = extract_molecules(gz_path)
8383
assert len(df) == 2
84-
assert set(df["chebi_id"]) == {"CHEBI:1", "CHEBI:2"}
84+
assert set(df["chebi_id"]) == {"1", "2"}
8585
assert all(isinstance(m, rdchem.Mol) for m in df["mol"])
8686

8787
def test_empty_sdf_returns_empty_dataframe(self, tmp_path):
@@ -90,11 +90,11 @@ def test_empty_sdf_returns_empty_dataframe(self, tmp_path):
9090
df = extract_molecules(empty_sdf)
9191
assert df.empty
9292

93-
def test_unparseable_molblock_gives_none(self, tmp_path, recwarn):
93+
def test_unparseable_molblock_excluded(self, tmp_path, recwarn):
9494
bad_sdf = tmp_path / "bad.sdf"
9595
bad_sdf.write_text(
9696
"bad_mol\n\n 0 0 0 0 0 0 0 0 0 0999 V2000\nM END\n"
9797
"> <ChEBI ID>\nCHEBI:99\n\n$$$$\n"
9898
)
9999
df = extract_molecules(bad_sdf)
100-
assert df.iloc[0]["mol"] is None
100+
assert len(df) == 0

0 commit comments

Comments
 (0)