""" 动量因子 - 使用Polars实现 包含动量、趋势、均线等相关因子计算 """ import polars as pl import numpy as np from typing import Dict, List, Optional, Any from tqdm import tqdm from main.factor.operator_framework import StockWiseOperator, OperatorConfig from scipy.stats import linregress class PriceMinusDeductionPriceOperator(StockWiseOperator): """价格减抵扣价算子""" def __init__(self, n: int = 10): if n <= 0: raise ValueError("n must be positive") config = OperatorConfig( name=f"price_minus_deduction_price_{n}", description=f"{n}日价格减抵扣价", required_columns=['close'], output_columns=[f'price_minus_deduction_price_{n}'], parameters={'n': n} ) super().__init__(config) self.n = n def get_factor_name(self) -> str: return f'price_minus_deduction_price_{self.n}' def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series: # 抵扣价是 n 日前的价格(更合理),若坚持 n-1 则保留 deduction_price = group_df['close'].shift(self.n) # 建议用 n,不是 n-1 price_diff = group_df['close'] - deduction_price return price_diff.alias(self.get_factor_name()) class PriceDeductionPriceDiffRatioToSMAOperator(StockWiseOperator): """价格抵扣价差值相对SMA比率算子""" def __init__(self, n: int = 10): if n <= 0: raise ValueError("n must be positive") config = OperatorConfig( name=f"price_deduction_price_diff_ratio_to_sma_{n}", description=f"{n}日价格抵扣价差值相对SMA比率", required_columns=['close'], output_columns=[f'price_deduction_price_diff_ratio_to_sma_{n}'], parameters={'n': n} ) super().__init__(config) self.n = n def get_factor_name(self) -> str: return f'price_deduction_price_diff_ratio_to_sma_{self.n}' def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series: sma = group_df['close'].rolling_mean(window_size=self.n) deduction_price = group_df['close'].shift(self.n) diff = group_df['close'] - deduction_price ratio = diff / (sma + 1e-8) return ratio.alias(self.get_factor_name()) class CatPriceVsSmaVsDeductionPriceOperator(StockWiseOperator): """价格vsSMAvs抵扣价分类算子""" def __init__(self, n: int = 10): if n <= 0: raise ValueError("n must be positive") config = OperatorConfig( name=f"cat_price_vs_sma_vs_deduction_price_{n}", description=f"{n}日价格vsSMAvs抵扣价分类", required_columns=['close'], output_columns=[f'cat_price_vs_sma_vs_deduction_price_{n}'], parameters={'n': n} ) super().__init__(config) self.n = n def get_factor_name(self) -> str: return f'cat_price_vs_sma_vs_deduction_price_{self.n}' def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series: sma = group_df['close'].rolling_mean(window_size=self.n) deduction_price = group_df['close'].shift(self.n) cond1 = (group_df['close'] > sma) & (deduction_price > sma) cond2 = (group_df['close'] < sma) & (deduction_price < sma) cond3 = (group_df['close'] > sma) & (deduction_price <= sma) cond4 = (group_df['close'] <= sma) & (deduction_price > sma) classification = ( pl.when(cond1).then(1) .when(cond2).then(2) .when(cond3).then(3) .when(cond4).then(4) .otherwise(0) ) return classification.alias(self.get_factor_name()) # ✅ 修复:使用 rolling_map class VolatilitySlopeOperator(StockWiseOperator): """波动率斜率算子""" def __init__(self, long_window: int = 20, short_window: int = 5): if long_window <= 0 or short_window <= 0: raise ValueError("Windows must be positive") config = OperatorConfig( name=f"volatility_slope_{long_window}_{short_window}", description=f"{long_window}日波动率{short_window}日斜率", required_columns=['pct_chg'], output_columns=[f'volatility_slope_{long_window}_{short_window}'], parameters={'long_window': long_window, 'short_window': short_window} ) super().__init__(config) self.long_window = long_window self.short_window = short_window def get_factor_name(self) -> str: return f'volatility_slope_{self.long_window}_{self.short_window}' def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series: # 先计算长期波动率(标准差) long_vol = group_df['pct_chg'].rolling_std(window_size=self.long_window) # 定义斜率函数(输入是 numpy array) def slope_func(window_vals: np.ndarray) -> float: if len(window_vals) < 2 or pl.Series(window_vals).is_null().any(): return 0.0 x = np.arange(len(window_vals)) try: slope, _, _, _, _ = linregress(x, window_vals) return slope if np.isfinite(slope) else 0.0 except: return 0.0 # 对波动率序列应用 rolling_map volatility_slope = long_vol.rolling_map( function=slope_func, window_size=self.short_window, min_periods=2 # 至少2点才能算斜率 ) return volatility_slope.alias(self.get_factor_name()) # ✅ 修复:使用 rolling_map class TurnoverRateTrendStrengthOperator(StockWiseOperator): """换手率趋势强度算子""" def __init__(self, window: int = 5): if window <= 0: raise ValueError("Window must be positive") config = OperatorConfig( name=f"turnover_trend_strength_{window}", description=f"{window}日换手率趋势强度", required_columns=['turnover_rate'], output_columns=[f'turnover_trend_strength_{window}'], parameters={'window': window} ) super().__init__(config) self.window = window def get_factor_name(self) -> str: return f'turnover_trend_strength_{self.window}' def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series: def slope_func(window_vals: np.ndarray) -> float: if len(window_vals) < 2 or pl.Series(window_vals).is_null().any(): return 0.0 x = np.arange(len(window_vals)) try: slope, _, _, _, _ = linregress(x, window_vals) return slope if np.isfinite(slope) else 0.0 except: return 0.0 trend_strength = group_df['turnover_rate'].rolling_map( function=slope_func, window_size=self.window, min_periods=2 ) return trend_strength.alias(self.get_factor_name()) class FreeFloatTurnoverSurgeOperator(StockWiseOperator): """自由流通股换手率激增算子""" def __init__(self, window: int = 10): if window <= 0: raise ValueError("Window must be positive") config = OperatorConfig( name=f"ff_turnover_surge_{window}", description=f"{window}日自由流通股换手率激增", required_columns=['turnover_rate'], output_columns=[f'ff_turnover_surge_{window}'], parameters={'window': window} ) super().__init__(config) self.window = window def get_factor_name(self) -> str: return f'ff_turnover_surge_{self.window}' def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series: avg_turnover = group_df['turnover_rate'].rolling_mean(window_size=self.window) surge_ratio = group_df['turnover_rate'] / (avg_turnover + 1e-8) return surge_ratio.alias(self.get_factor_name()) class PriceVolumeTrendCoherenceOperator(StockWiseOperator): """价量趋势一致性算子""" def __init__(self, price_window: int = 5, volume_window: int = 20): if price_window <= 0 or volume_window <= 0: raise ValueError("Windows must be positive") config = OperatorConfig( name=f"price_volume_coherence_{price_window}_{volume_window}", description=f"{price_window}日价格{volume_window}日成交量趋势一致性", required_columns=['close', 'vol'], output_columns=[f'price_volume_coherence_{price_window}_{volume_window}'], parameters={'price_window': price_window, 'volume_window': volume_window} ) super().__init__(config) self.price_window = price_window self.volume_window = volume_window def get_factor_name(self) -> str: return f'price_volume_coherence_{self.price_window}_{self.volume_window}' def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series: price_up = (group_df['close'].diff() > 0).cast(pl.Int8) price_up_ratio = price_up.rolling_mean(window_size=self.price_window) vol_avg = group_df['vol'].rolling_mean(window_size=self.volume_window) vol_above = (group_df['vol'] > vol_avg).cast(pl.Int8) vol_above_ratio = vol_above.rolling_mean(window_size=self.price_window) coherence = price_up_ratio * vol_above_ratio return coherence.alias(self.get_factor_name()) class FreeFloatToTotalTurnoverRatioOperator(StockWiseOperator): """自由流通股对总换手率比率算子""" def __init__(self): config = OperatorConfig( name="ff_to_total_turnover_ratio", description="自由流通股对总换手率比率", required_columns=['turnover_rate'], output_columns=['ff_to_total_turnover_ratio'], parameters={} ) super().__init__(config) def get_factor_name(self) -> str: return 'ff_to_total_turnover_ratio' def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series: # 实际业务中可能需要 total_turnover_rate,这里简化 ratio = pl.lit(1.0) # 或根据实际逻辑修改 return ratio.alias('ff_to_total_turnover_ratio') class VarianceOperator(StockWiseOperator): """方差算子""" def __init__(self, window: int): if window <= 0: raise ValueError("Window must be positive") config = OperatorConfig( name=f"variance_{window}", description=f"{window}日方差", required_columns=['pct_chg'], output_columns=[f'variance_{window}'], parameters={'window': window} ) super().__init__(config) self.window = window def get_factor_name(self) -> str: return f'variance_{self.window}' def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series: variance = group_df['pct_chg'].rolling_var(window_size=self.window) return variance.alias(self.get_factor_name()) class LimitUpDownOperator(StockWiseOperator): """涨跌停算子""" def __init__(self): config = OperatorConfig( name="limit_up_down", description="涨跌停因子", required_columns=['close', 'up_limit', 'down_limit'], output_columns=['cat_up_limit', 'cat_down_limit', 'up_limit_count_10d', 'down_limit_count_10d'], parameters={} ) super().__init__(config) def get_factor_name(self) -> str: return 'cat_up_limit' def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series: up_limit = (group_df['close'] == group_df['up_limit']).cast(pl.Int8) return up_limit.alias('cat_up_limit') class ConsecutiveUpLimitOperator(StockWiseOperator): """连续涨停算子""" def __init__(self): config = OperatorConfig( name="consecutive_up_limit", description="连续涨停天数", required_columns=['cat_up_limit'], output_columns=['consecutive_up_limit'], parameters={} ) super().__init__(config) def get_factor_name(self) -> str: return 'consecutive_up_limit' def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series: # 简化版:实际连续计数需用 cumsum + groupby trick # 这里先返回原始值,后续可优化 return group_df['cat_up_limit'].alias('consecutive_up_limit') class MomentumFactorOperator(StockWiseOperator): """动量因子算子""" def __init__(self, alpha: float = 0.5): if not (0 <= alpha <= 1): raise ValueError("alpha should be between 0 and 1") config = OperatorConfig( name=f"momentum_factor_{alpha}", description=f"动量因子(alpha={alpha})", required_columns=['volume_change_rate', 'turnover_deviation'], output_columns=[f'momentum_factor_{alpha}'], parameters={'alpha': alpha} ) super().__init__(config) self.alpha = alpha def get_factor_name(self) -> str: return f'momentum_factor_{self.alpha}' def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series: momentum = group_df['volume_change_rate'] + self.alpha * group_df['turnover_deviation'] return momentum.alias(self.get_factor_name()) class ResonanceFactorOperator(StockWiseOperator): """共振因子算子""" def __init__(self): config = OperatorConfig( name="resonance_factor", description="共振因子", required_columns=['volume_ratio', 'pct_chg'], output_columns=['resonance_factor'], parameters={} ) super().__init__(config) def get_factor_name(self) -> str: return 'resonance_factor' def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series: resonance = group_df['volume_ratio'] * group_df['pct_chg'] return resonance.alias('resonance_factor') # 动量因子集合 MOMENTUM_OPERATORS = [ PriceMinusDeductionPriceOperator(10), PriceDeductionPriceDiffRatioToSMAOperator(10), CatPriceVsSmaVsDeductionPriceOperator(10), # VolatilitySlopeOperator(20, 5), # TurnoverRateTrendStrengthOperator(5), FreeFloatTurnoverSurgeOperator(10), PriceVolumeTrendCoherenceOperator(5, 20), FreeFloatToTotalTurnoverRatioOperator(), VarianceOperator(20), LimitUpDownOperator(), ConsecutiveUpLimitOperator(), # MomentumFactorOperator(0.5), ResonanceFactorOperator(), ] def apply_momentum_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame: """ 应用所有动量因子 """ if operators is None: operators = MOMENTUM_OPERATORS result_df = df for operator in tqdm(operators, desc="Applying momentum factors"): result_df = operator.apply(result_df) return result_df