@@ -24,7 +24,7 @@ def test_chebi_id_column_present(self):
2424
2525 def test_chebi_ids_correct (self ):
2626 df = extract_molecules (SAMPLE_SDF )
27- assert set (df ["chebi_id" ]) == {"CHEBI: 1" , "CHEBI: 2" }
27+ assert set (df ["chebi_id" ]) == {"1" , "2" }
2828
2929 def test_name_column_present (self ):
3030 df = extract_molecules (SAMPLE_SDF )
@@ -57,8 +57,8 @@ def test_mol_objects_are_rdkit_mol(self):
5757
5858 def test_mol_atom_counts (self ):
5959 df = extract_molecules (SAMPLE_SDF )
60- row1 = df [df ["chebi_id" ] == "CHEBI: 1" ].iloc [0 ]
61- row2 = df [df ["chebi_id" ] == "CHEBI: 2" ].iloc [0 ]
60+ row1 = df [df ["chebi_id" ] == "1" ].iloc [0 ]
61+ row2 = df [df ["chebi_id" ] == "2" ].iloc [0 ]
6262 assert row1 ["mol" ].GetNumAtoms () == 1 # methane: 1 C
6363 assert row2 ["mol" ].GetNumAtoms () == 2 # ethane: 2 C
6464
@@ -70,7 +70,7 @@ def test_mol_sanitized(self):
7070
7171 def test_molecule_properties (self ):
7272 df = extract_molecules (SAMPLE_SDF )
73- row = df [df ["chebi_id" ] == "CHEBI: 1" ].iloc [0 ]
73+ row = df [df ["chebi_id" ] == "1" ].iloc [0 ]
7474 assert row ["name" ] == "compound A"
7575 assert row ["smiles" ] == "C"
7676 assert row ["formula" ] == "CH4"
@@ -81,7 +81,7 @@ def test_gzipped_sdf(self, tmp_path):
8181 f_out .write (f_in .read ())
8282 df = extract_molecules (gz_path )
8383 assert len (df ) == 2
84- assert set (df ["chebi_id" ]) == {"CHEBI: 1" , "CHEBI: 2" }
84+ assert set (df ["chebi_id" ]) == {"1" , "2" }
8585 assert all (isinstance (m , rdchem .Mol ) for m in df ["mol" ])
8686
8787 def test_empty_sdf_returns_empty_dataframe (self , tmp_path ):
@@ -90,11 +90,11 @@ def test_empty_sdf_returns_empty_dataframe(self, tmp_path):
9090 df = extract_molecules (empty_sdf )
9191 assert df .empty
9292
93- def test_unparseable_molblock_gives_none (self , tmp_path , recwarn ):
93+ def test_unparseable_molblock_excluded (self , tmp_path , recwarn ):
9494 bad_sdf = tmp_path / "bad.sdf"
9595 bad_sdf .write_text (
9696 "bad_mol\n \n 0 0 0 0 0 0 0 0 0 0999 V2000\n M END\n "
9797 "> <ChEBI ID>\n CHEBI:99\n \n $$$$\n "
9898 )
9999 df = extract_molecules (bad_sdf )
100- assert df . iloc [ 0 ][ "mol" ] is None
100+ assert len ( df ) == 0
0 commit comments