fix unit tests

sfluegel05 · sfluegel05 · commit 079519380437 · 2026-02-27T15:01:54.000+01:00
diff --git a/tests/fixtures/sample.obo b/tests/fixtures/sample.obo
@@ -84,4 +84,4 @@ xref: wikipedia.en:Starch {source="wikipedia.en"}
 is_a: CHEBI:37163 ! glucan
 relationship: BFO:0000051 CHEBI:28057 ! has part amylopectin
 relationship: BFO:0000051 CHEBI:28102 ! has part amylose
-relationship: RO:0000087 CHEBI:75771 ! has role mouse metabolite
+relationship: RO:0000087 CHEBI:75771 ! has role mouse metabolite
diff --git a/tests/test_obo_extractor.py b/tests/test_obo_extractor.py
@@ -18,17 +18,19 @@ def test_returns_directed_graph(self):
         assert isinstance(g, nx.DiGraph)
 
     def test_correct_number_of_nodes(self):
-        # CHEBI:27189 is obsolete -> excluded; 3 explicit + 1 implicit (24921) = 4
+        # CHEBI:27189 is obsolete -> excluded;
+        # 4 explicit + 5 implicit (superclasses and relation targets) = 9
         g = build_chebi_graph(SAMPLE_OBO)
-        assert len(g.nodes) == 4
+        assert len(g.nodes) == 9
 
     def test_node_ids_are_strings(self):
         g = build_chebi_graph(SAMPLE_OBO)
         assert all(isinstance(n, str) for n in g.nodes)
 
     def test_expected_nodes_present(self):
         g = build_chebi_graph(SAMPLE_OBO)
-        assert set(g.nodes) == {"10", "133004", "22750", "24921"}
+        assert set(g.nodes) == {"10", "133004", "22750", "24921",
+                                "28017", '75771', '28057', '28102', '37163'}
 
     def test_obsolete_term_excluded(self):
         g = build_chebi_graph(SAMPLE_OBO)
@@ -71,8 +73,8 @@ def test_isa_chain(self):
 
     def test_total_edge_count(self):
         g = build_chebi_graph(SAMPLE_OBO)
-        # 10->133004 (is_a), 133004->22750 (is_a), 22750->24921 (is_a)
-        assert len(g.edges) == 3
+        # 10->133004 (is_a), 133004->22750 (is_a), 22750->24921 (is_a), ...
+        assert len(g.edges) == 7
 
     def test_xref_lines_do_not_break_parsing(self, tmp_path):
         obo_with_xrefs = tmp_path / "xref.obo"
diff --git a/tests/test_sdf_extractor.py b/tests/test_sdf_extractor.py
@@ -24,7 +24,7 @@ def test_chebi_id_column_present(self):
 
     def test_chebi_ids_correct(self):
         df = extract_molecules(SAMPLE_SDF)
-        assert set(df["chebi_id"]) == {"CHEBI:1", "CHEBI:2"}
+        assert set(df["chebi_id"]) == {"1", "2"}
 
     def test_name_column_present(self):
         df = extract_molecules(SAMPLE_SDF)
@@ -57,8 +57,8 @@ def test_mol_objects_are_rdkit_mol(self):
 
     def test_mol_atom_counts(self):
         df = extract_molecules(SAMPLE_SDF)
-        row1 = df[df["chebi_id"] == "CHEBI:1"].iloc[0]
-        row2 = df[df["chebi_id"] == "CHEBI:2"].iloc[0]
+        row1 = df[df["chebi_id"] == "1"].iloc[0]
+        row2 = df[df["chebi_id"] == "2"].iloc[0]
         assert row1["mol"].GetNumAtoms() == 1  # methane: 1 C
         assert row2["mol"].GetNumAtoms() == 2  # ethane: 2 C
 
@@ -70,7 +70,7 @@ def test_mol_sanitized(self):
 
     def test_molecule_properties(self):
         df = extract_molecules(SAMPLE_SDF)
-        row = df[df["chebi_id"] == "CHEBI:1"].iloc[0]
+        row = df[df["chebi_id"] == "1"].iloc[0]
         assert row["name"] == "compound A"
         assert row["smiles"] == "C"
         assert row["formula"] == "CH4"
@@ -81,7 +81,7 @@ def test_gzipped_sdf(self, tmp_path):
             f_out.write(f_in.read())
         df = extract_molecules(gz_path)
         assert len(df) == 2
-        assert set(df["chebi_id"]) == {"CHEBI:1", "CHEBI:2"}
+        assert set(df["chebi_id"]) == {"1", "2"}
         assert all(isinstance(m, rdchem.Mol) for m in df["mol"])
 
     def test_empty_sdf_returns_empty_dataframe(self, tmp_path):
@@ -90,11 +90,11 @@ def test_empty_sdf_returns_empty_dataframe(self, tmp_path):
         df = extract_molecules(empty_sdf)
         assert df.empty
 
-    def test_unparseable_molblock_gives_none(self, tmp_path, recwarn):
+    def test_unparseable_molblock_excluded(self, tmp_path, recwarn):
         bad_sdf = tmp_path / "bad.sdf"
         bad_sdf.write_text(
             "bad_mol\n\n  0  0  0  0  0  0  0  0  0  0999 V2000\nM  END\n"
             "> <ChEBI ID>\nCHEBI:99\n\n$$$$\n"
         )
         df = extract_molecules(bad_sdf)
-        assert df.iloc[0]["mol"] is None
+        assert len(df) == 0