Skip to content

Commit 1e651eb

Browse files
author
Tonny@Home
committed
refactor: fix portfolio analyzer biases and extract shared utils
- portfolio_analyzer: apply shift(1) lag to eliminate lookahead bias, rename 'size' factor to 'liquidity', unify alpha to arithmetic annualization - Extract ensemble_utils.py, search_utils.py, fusion_engine.py, backtest_report.py from duplicated logic across ensemble/brute-force scripts - Remove hacky cross-script imports in minentropy_ensemble.py - Reduce ensemble_fusion.py from ~1515 to ~1145 lines - Update all affected tests (525 passed)
1 parent 9e0da15 commit 1e651eb

21 files changed

Lines changed: 944 additions & 895 deletions

quantpits/scripts/analysis/portfolio_analyzer.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -304,9 +304,9 @@ def calculate_factor_exposure(self, market=None):
304304
X = sm.add_constant(aligned['Market'])
305305
model = sm.OLS(aligned['Portfolio'], X).fit()
306306

307-
# Use period-based years for consistency with metrics
308-
years = len(aligned) / self.periods_per_year
309-
alpha = (1.0 + model.params['const']) ** self.periods_per_year - 1.0
307+
# OLS intercept is an arithmetic daily mean; annualize arithmetically
308+
# to stay consistent with the multi-factor model.
309+
alpha = model.params['const'] * self.periods_per_year
310310
beta = model.params['Market']
311311

312312
return {
@@ -317,7 +317,10 @@ def calculate_factor_exposure(self, market=None):
317317

318318
def calculate_style_exposures(self, market=None):
319319
"""
320-
Regress daily returns against proxy style factors (Size, Momentum, Volatility).
320+
Regress daily returns against proxy style factors (Liquidity, Momentum, Volatility).
321+
Factor values are lagged by 1 day (T-1) to avoid lookahead bias.
322+
Note: 'Liquidity' uses log(close*volume) as a proxy; this is turnover/amount,
323+
not market capitalization (which requires total_shares data).
321324
market 默认从 model_config.json 读取。
322325
"""
323326
if market is None:
@@ -344,18 +347,23 @@ def calculate_style_exposures(self, market=None):
344347
features['datetime'] = pd.to_datetime(features['datetime'])
345348

346349
features = features.sort_values(['instrument', 'datetime'])
347-
features['size'] = np.log(features['close'] * features['volume'] + 1e-9)
348-
features['momentum'] = features.groupby('instrument')['close'].pct_change(20)
349350

351+
# Calculate daily returns first (no lag needed, this is the dependent variable)
350352
features['prev_close'] = features.groupby('instrument')['close'].shift(1)
351353
features['ret'] = (features['close'] - features['prev_close']) / features['prev_close']
352354

355+
# Factor values use T-1 data (shift(1)) to avoid lookahead bias:
356+
# We use yesterday's factor scores to explain today's returns.
357+
features['liquidity'] = np.log(features['close'] * features['volume'] + 1e-9)
358+
features['liquidity'] = features.groupby('instrument')['liquidity'].shift(1)
359+
features['momentum'] = features.groupby('instrument')['close'].pct_change(20).shift(1)
353360
features['volatility'] = features.groupby('instrument')['ret'].rolling(20, min_periods=5).std().reset_index(0, drop=True)
361+
features['volatility'] = features.groupby('instrument')['volatility'].shift(1)
354362

355-
features = features.dropna(subset=['ret', 'size', 'momentum', 'volatility'])
363+
features = features.dropna(subset=['ret', 'liquidity', 'momentum', 'volatility'])
356364

357365
factor_returns = {}
358-
for factor in ['size', 'momentum', 'volatility']:
366+
for factor in ['liquidity', 'momentum', 'volatility']:
359367
# top 20% minus bottom 20%
360368
def _factor_ret(df):
361369
if len(df) < 5:
@@ -397,13 +405,13 @@ def _factor_ret(df):
397405
if len(aligned) < 2:
398406
return {}
399407

400-
X = sm.add_constant(aligned[['Market', 'size', 'momentum', 'volatility']])
408+
X = sm.add_constant(aligned[['Market', 'liquidity', 'momentum', 'volatility']])
401409
model = sm.OLS(aligned.iloc[:, 0], X).fit()
402410

403411
return {
404412
'Multi_Factor_Intercept': float(model.params.get('const', 0)) * self.periods_per_year,
405413
'Multi_Factor_Beta': float(model.params.get('Market', 0)),
406-
'Barra_Size_Exp': float(model.params.get('size', 0)),
414+
'Barra_Liquidity_Exp': float(model.params.get('liquidity', 0)),
407415
'Barra_Momentum_Exp': float(model.params.get('momentum', 0)),
408416
'Barra_Volatility_Exp': float(model.params.get('volatility', 0)),
409417
'Barra_Style_R_Squared': float(model.rsquared),

quantpits/scripts/analyze_ensembles.py

Lines changed: 6 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -37,51 +37,12 @@ def run_single_backtest_oos(
3737
combo_models, norm_df, top_k, drop_n, benchmark, freq,
3838
trade_exchange, bt_start, bt_end, st_config=None, bt_config=None
3939
):
40-
"""单独运行一次标准回测,用于 OOS 精确验证"""
41-
if st_config is None:
42-
st_config = strategy.load_strategy_config()
43-
if bt_config is None:
44-
bt_config = strategy.get_backtest_config(st_config)
45-
46-
# 1. 合成信号
47-
combo_score = norm_df[list(combo_models)].dropna(how='any').mean(axis=1)
48-
49-
import copy
50-
st_config = copy.deepcopy(st_config)
51-
st_config["strategy"]["params"]["topk"] = top_k
52-
st_config["strategy"]["params"]["n_drop"] = drop_n
53-
54-
strategy_inst = strategy.create_backtest_strategy(combo_score, st_config)
55-
56-
# 2. 回测
57-
try:
58-
report, _ = run_backtest_with_strategy(
59-
strategy_inst=strategy_inst,
60-
trade_exchange=trade_exchange,
61-
freq=freq,
62-
account_cash=bt_config["account"],
63-
bt_start=bt_start,
64-
bt_end=bt_end
65-
)
66-
67-
st_config_inner = strategy.load_strategy_config()
68-
benchmark_col = st_config_inner.get('benchmark', 'SH000300')
69-
metrics = standard_evaluate_portfolio(report, benchmark_col, freq)
70-
71-
return {
72-
"models": ",".join(combo_models),
73-
"n_models": len(combo_models),
74-
"Ann_Ret": metrics.get("CAGR", 0),
75-
"Max_DD": metrics.get("Max_Drawdown", 0),
76-
"Excess_Ret": metrics.get("Absolute_Return", 0) - metrics.get("Benchmark_Absolute_Return", 0),
77-
"Ann_Excess": metrics.get("Excess_Return_CAGR", 0),
78-
"Total_Ret": metrics.get("Absolute_Return", 0),
79-
"Final_NAV": report.iloc[-1]["account"],
80-
"Calmar": metrics.get("Calmar", 0) if pd.notna(metrics.get("Calmar")) else 0,
81-
}
82-
except Exception as e:
83-
print(f" [ERROR] Combo {combo_models} failed: {e}")
84-
return None
40+
"""单独运行一次标准回测,用于 OOS 精确验证 (委托给 search_utils)"""
41+
from quantpits.utils.search_utils import run_single_backtest
42+
return run_single_backtest(
43+
combo_models, norm_df, top_k, drop_n, benchmark, freq,
44+
trade_exchange, bt_start, bt_end, st_config, bt_config
45+
)
8546

8647

8748

0 commit comments

Comments
 (0)