From 18fbac063fb561f45fccbc7ab6fdac6b483343bf Mon Sep 17 00:00:00 2001 From: Bortlesboat Date: Fri, 8 May 2026 00:07:17 -0400 Subject: [PATCH] fix: avoid np.int in feature inf analysis --- qlib/contrib/report/data/ana.py | 2 +- tests/test_contrib_report_data_ana.py | 29 +++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 tests/test_contrib_report_data_ana.py diff --git a/qlib/contrib/report/data/ana.py b/qlib/contrib/report/data/ana.py index 33545ec06b2..e4b5da06403 100644 --- a/qlib/contrib/report/data/ana.py +++ b/qlib/contrib/report/data/ana.py @@ -99,7 +99,7 @@ def calc_stat_values(self): self._inf_cnt = {} for col, item in self._dataset.items(): if not super().skip(col): - self._inf_cnt[col] = item.apply(np.isinf).astype(np.int).groupby(DT_COL_NAME, group_keys=False).sum() + self._inf_cnt[col] = item.apply(np.isinf).astype(int).groupby(DT_COL_NAME, group_keys=False).sum() self._inf_cnt = pd.DataFrame(self._inf_cnt) def skip(self, col): diff --git a/tests/test_contrib_report_data_ana.py b/tests/test_contrib_report_data_ana.py new file mode 100644 index 00000000000..1deb611e162 --- /dev/null +++ b/tests/test_contrib_report_data_ana.py @@ -0,0 +1,29 @@ +import numpy as np +import pandas as pd + +from qlib.contrib.report.data.ana import FeaInfAna + + +def test_fea_inf_ana_counts_inf_values_with_numpy_2(): + index = pd.MultiIndex.from_tuples( + [ + (pd.Timestamp("2024-01-01"), "SH000001"), + (pd.Timestamp("2024-01-01"), "SH000002"), + (pd.Timestamp("2024-01-02"), "SH000001"), + (pd.Timestamp("2024-01-02"), "SH000002"), + ], + names=["datetime", "instrument"], + ) + dataset = pd.DataFrame( + { + "feature": [1.0, np.inf, -np.inf, 2.0], + "clean": [1.0, 2.0, 3.0, 4.0], + }, + index=index, + ) + + analyser = FeaInfAna(dataset) + + assert analyser._inf_cnt.loc[pd.Timestamp("2024-01-01"), "feature"] == 1 + assert analyser._inf_cnt.loc[pd.Timestamp("2024-01-02"), "feature"] == 1 + assert analyser.skip("clean")