@@ -304,9 +304,9 @@ def calculate_factor_exposure(self, market=None):
304304 X = sm .add_constant (aligned ['Market' ])
305305 model = sm .OLS (aligned ['Portfolio' ], X ).fit ()
306306
307- # Use period-based years for consistency with metrics
308- years = len ( aligned ) / self . periods_per_year
309- alpha = ( 1.0 + model .params ['const' ]) ** self .periods_per_year - 1.0
307+ # OLS intercept is an arithmetic daily mean; annualize arithmetically
308+ # to stay consistent with the multi-factor model.
309+ alpha = model .params ['const' ] * self .periods_per_year
310310 beta = model .params ['Market' ]
311311
312312 return {
@@ -317,7 +317,10 @@ def calculate_factor_exposure(self, market=None):
317317
318318 def calculate_style_exposures (self , market = None ):
319319 """
320- Regress daily returns against proxy style factors (Size, Momentum, Volatility).
320+ Regress daily returns against proxy style factors (Liquidity, Momentum, Volatility).
321+ Factor values are lagged by 1 day (T-1) to avoid lookahead bias.
322+ Note: 'Liquidity' uses log(close*volume) as a proxy; this is turnover/amount,
323+ not market capitalization (which requires total_shares data).
321324 market 默认从 model_config.json 读取。
322325 """
323326 if market is None :
@@ -344,18 +347,23 @@ def calculate_style_exposures(self, market=None):
344347 features ['datetime' ] = pd .to_datetime (features ['datetime' ])
345348
346349 features = features .sort_values (['instrument' , 'datetime' ])
347- features ['size' ] = np .log (features ['close' ] * features ['volume' ] + 1e-9 )
348- features ['momentum' ] = features .groupby ('instrument' )['close' ].pct_change (20 )
349350
351+ # Calculate daily returns first (no lag needed, this is the dependent variable)
350352 features ['prev_close' ] = features .groupby ('instrument' )['close' ].shift (1 )
351353 features ['ret' ] = (features ['close' ] - features ['prev_close' ]) / features ['prev_close' ]
352354
355+ # Factor values use T-1 data (shift(1)) to avoid lookahead bias:
356+ # We use yesterday's factor scores to explain today's returns.
357+ features ['liquidity' ] = np .log (features ['close' ] * features ['volume' ] + 1e-9 )
358+ features ['liquidity' ] = features .groupby ('instrument' )['liquidity' ].shift (1 )
359+ features ['momentum' ] = features .groupby ('instrument' )['close' ].pct_change (20 ).shift (1 )
353360 features ['volatility' ] = features .groupby ('instrument' )['ret' ].rolling (20 , min_periods = 5 ).std ().reset_index (0 , drop = True )
361+ features ['volatility' ] = features .groupby ('instrument' )['volatility' ].shift (1 )
354362
355- features = features .dropna (subset = ['ret' , 'size ' , 'momentum' , 'volatility' ])
363+ features = features .dropna (subset = ['ret' , 'liquidity ' , 'momentum' , 'volatility' ])
356364
357365 factor_returns = {}
358- for factor in ['size ' , 'momentum' , 'volatility' ]:
366+ for factor in ['liquidity ' , 'momentum' , 'volatility' ]:
359367 # top 20% minus bottom 20%
360368 def _factor_ret (df ):
361369 if len (df ) < 5 :
@@ -397,13 +405,13 @@ def _factor_ret(df):
397405 if len (aligned ) < 2 :
398406 return {}
399407
400- X = sm .add_constant (aligned [['Market' , 'size ' , 'momentum' , 'volatility' ]])
408+ X = sm .add_constant (aligned [['Market' , 'liquidity ' , 'momentum' , 'volatility' ]])
401409 model = sm .OLS (aligned .iloc [:, 0 ], X ).fit ()
402410
403411 return {
404412 'Multi_Factor_Intercept' : float (model .params .get ('const' , 0 )) * self .periods_per_year ,
405413 'Multi_Factor_Beta' : float (model .params .get ('Market' , 0 )),
406- 'Barra_Size_Exp ' : float (model .params .get ('size ' , 0 )),
414+ 'Barra_Liquidity_Exp ' : float (model .params .get ('liquidity ' , 0 )),
407415 'Barra_Momentum_Exp' : float (model .params .get ('momentum' , 0 )),
408416 'Barra_Volatility_Exp' : float (model .params .get ('volatility' , 0 )),
409417 'Barra_Style_R_Squared' : float (model .rsquared ),
0 commit comments