Files
NewStock/main/factor/polars_momentum_factors.py

411 lines
15 KiB
Python
Raw Normal View History

2025-10-13 21:42:35 +08:00
"""
动量因子 - 使用Polars实现
包含动量趋势均线等相关因子计算
"""
import polars as pl
import numpy as np
from typing import Dict, List, Optional, Any
2025-10-14 09:44:46 +08:00
from tqdm import tqdm
from main.factor.operator_framework import StockWiseOperator, OperatorConfig
2025-10-13 21:42:35 +08:00
from scipy.stats import linregress
class PriceMinusDeductionPriceOperator(StockWiseOperator):
"""价格减抵扣价算子"""
def __init__(self, n: int = 10):
2025-10-14 09:44:46 +08:00
if n <= 0:
raise ValueError("n must be positive")
2025-10-13 21:42:35 +08:00
config = OperatorConfig(
name=f"price_minus_deduction_price_{n}",
description=f"{n}日价格减抵扣价",
required_columns=['close'],
output_columns=[f'price_minus_deduction_price_{n}'],
parameters={'n': n}
)
super().__init__(config)
self.n = n
2025-10-14 09:44:46 +08:00
def get_factor_name(self) -> str:
return f'price_minus_deduction_price_{self.n}'
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
# 抵扣价是 n 日前的价格(更合理),若坚持 n-1 则保留
deduction_price = group_df['close'].shift(self.n) # 建议用 n不是 n-1
price_diff = group_df['close'] - deduction_price
return price_diff.alias(self.get_factor_name())
2025-10-13 21:42:35 +08:00
class PriceDeductionPriceDiffRatioToSMAOperator(StockWiseOperator):
"""价格抵扣价差值相对SMA比率算子"""
def __init__(self, n: int = 10):
2025-10-14 09:44:46 +08:00
if n <= 0:
raise ValueError("n must be positive")
2025-10-13 21:42:35 +08:00
config = OperatorConfig(
name=f"price_deduction_price_diff_ratio_to_sma_{n}",
description=f"{n}日价格抵扣价差值相对SMA比率",
required_columns=['close'],
output_columns=[f'price_deduction_price_diff_ratio_to_sma_{n}'],
parameters={'n': n}
)
super().__init__(config)
self.n = n
2025-10-14 09:44:46 +08:00
def get_factor_name(self) -> str:
return f'price_deduction_price_diff_ratio_to_sma_{self.n}'
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
sma = group_df['close'].rolling_mean(window_size=self.n)
deduction_price = group_df['close'].shift(self.n)
diff = group_df['close'] - deduction_price
2025-10-13 21:42:35 +08:00
ratio = diff / (sma + 1e-8)
2025-10-14 09:44:46 +08:00
return ratio.alias(self.get_factor_name())
2025-10-13 21:42:35 +08:00
class CatPriceVsSmaVsDeductionPriceOperator(StockWiseOperator):
"""价格vsSMAvs抵扣价分类算子"""
def __init__(self, n: int = 10):
2025-10-14 09:44:46 +08:00
if n <= 0:
raise ValueError("n must be positive")
2025-10-13 21:42:35 +08:00
config = OperatorConfig(
name=f"cat_price_vs_sma_vs_deduction_price_{n}",
description=f"{n}日价格vsSMAvs抵扣价分类",
required_columns=['close'],
output_columns=[f'cat_price_vs_sma_vs_deduction_price_{n}'],
parameters={'n': n}
)
super().__init__(config)
self.n = n
2025-10-14 09:44:46 +08:00
def get_factor_name(self) -> str:
return f'cat_price_vs_sma_vs_deduction_price_{self.n}'
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
sma = group_df['close'].rolling_mean(window_size=self.n)
deduction_price = group_df['close'].shift(self.n)
2025-10-13 21:42:35 +08:00
2025-10-14 09:44:46 +08:00
cond1 = (group_df['close'] > sma) & (deduction_price > sma)
cond2 = (group_df['close'] < sma) & (deduction_price < sma)
cond3 = (group_df['close'] > sma) & (deduction_price <= sma)
cond4 = (group_df['close'] <= sma) & (deduction_price > sma)
2025-10-13 21:42:35 +08:00
2025-10-14 09:44:46 +08:00
classification = (
pl.when(cond1).then(1)
.when(cond2).then(2)
.when(cond3).then(3)
.when(cond4).then(4)
.otherwise(0)
2025-10-13 21:42:35 +08:00
)
2025-10-14 09:44:46 +08:00
return classification.alias(self.get_factor_name())
2025-10-13 21:42:35 +08:00
2025-10-14 09:44:46 +08:00
# ✅ 修复:使用 rolling_map
2025-10-13 21:42:35 +08:00
class VolatilitySlopeOperator(StockWiseOperator):
"""波动率斜率算子"""
def __init__(self, long_window: int = 20, short_window: int = 5):
2025-10-14 09:44:46 +08:00
if long_window <= 0 or short_window <= 0:
raise ValueError("Windows must be positive")
2025-10-13 21:42:35 +08:00
config = OperatorConfig(
name=f"volatility_slope_{long_window}_{short_window}",
description=f"{long_window}日波动率{short_window}日斜率",
required_columns=['pct_chg'],
output_columns=[f'volatility_slope_{long_window}_{short_window}'],
parameters={'long_window': long_window, 'short_window': short_window}
)
super().__init__(config)
self.long_window = long_window
self.short_window = short_window
2025-10-14 09:44:46 +08:00
def get_factor_name(self) -> str:
return f'volatility_slope_{self.long_window}_{self.short_window}'
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
# 先计算长期波动率(标准差)
long_vol = group_df['pct_chg'].rolling_std(window_size=self.long_window)
2025-10-13 21:42:35 +08:00
2025-10-14 09:44:46 +08:00
# 定义斜率函数(输入是 numpy array
def slope_func(window_vals: np.ndarray) -> float:
if len(window_vals) < 2 or pl.Series(window_vals).is_null().any():
return 0.0
x = np.arange(len(window_vals))
try:
slope, _, _, _, _ = linregress(x, window_vals)
return slope if np.isfinite(slope) else 0.0
except:
return 0.0
2025-10-13 21:42:35 +08:00
2025-10-14 09:44:46 +08:00
# 对波动率序列应用 rolling_map
volatility_slope = long_vol.rolling_map(
function=slope_func,
window_size=self.short_window,
min_periods=2 # 至少2点才能算斜率
2025-10-13 21:42:35 +08:00
)
2025-10-14 09:44:46 +08:00
return volatility_slope.alias(self.get_factor_name())
2025-10-13 21:42:35 +08:00
2025-10-14 09:44:46 +08:00
# ✅ 修复:使用 rolling_map
2025-10-13 21:42:35 +08:00
class TurnoverRateTrendStrengthOperator(StockWiseOperator):
"""换手率趋势强度算子"""
def __init__(self, window: int = 5):
2025-10-14 09:44:46 +08:00
if window <= 0:
raise ValueError("Window must be positive")
2025-10-13 21:42:35 +08:00
config = OperatorConfig(
name=f"turnover_trend_strength_{window}",
description=f"{window}日换手率趋势强度",
required_columns=['turnover_rate'],
output_columns=[f'turnover_trend_strength_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
2025-10-14 09:44:46 +08:00
def get_factor_name(self) -> str:
return f'turnover_trend_strength_{self.window}'
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
def slope_func(window_vals: np.ndarray) -> float:
if len(window_vals) < 2 or pl.Series(window_vals).is_null().any():
return 0.0
x = np.arange(len(window_vals))
try:
slope, _, _, _, _ = linregress(x, window_vals)
return slope if np.isfinite(slope) else 0.0
except:
return 0.0
2025-10-13 21:42:35 +08:00
2025-10-14 09:44:46 +08:00
trend_strength = group_df['turnover_rate'].rolling_map(
function=slope_func,
window_size=self.window,
min_periods=2
2025-10-13 21:42:35 +08:00
)
2025-10-14 09:44:46 +08:00
return trend_strength.alias(self.get_factor_name())
2025-10-13 21:42:35 +08:00
class FreeFloatTurnoverSurgeOperator(StockWiseOperator):
"""自由流通股换手率激增算子"""
def __init__(self, window: int = 10):
2025-10-14 09:44:46 +08:00
if window <= 0:
raise ValueError("Window must be positive")
2025-10-13 21:42:35 +08:00
config = OperatorConfig(
name=f"ff_turnover_surge_{window}",
description=f"{window}日自由流通股换手率激增",
required_columns=['turnover_rate'],
output_columns=[f'ff_turnover_surge_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
2025-10-14 09:44:46 +08:00
def get_factor_name(self) -> str:
return f'ff_turnover_surge_{self.window}'
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
avg_turnover = group_df['turnover_rate'].rolling_mean(window_size=self.window)
surge_ratio = group_df['turnover_rate'] / (avg_turnover + 1e-8)
return surge_ratio.alias(self.get_factor_name())
2025-10-13 21:42:35 +08:00
class PriceVolumeTrendCoherenceOperator(StockWiseOperator):
"""价量趋势一致性算子"""
def __init__(self, price_window: int = 5, volume_window: int = 20):
2025-10-14 09:44:46 +08:00
if price_window <= 0 or volume_window <= 0:
raise ValueError("Windows must be positive")
2025-10-13 21:42:35 +08:00
config = OperatorConfig(
name=f"price_volume_coherence_{price_window}_{volume_window}",
description=f"{price_window}日价格{volume_window}日成交量趋势一致性",
required_columns=['close', 'vol'],
output_columns=[f'price_volume_coherence_{price_window}_{volume_window}'],
parameters={'price_window': price_window, 'volume_window': volume_window}
)
super().__init__(config)
self.price_window = price_window
self.volume_window = volume_window
2025-10-14 09:44:46 +08:00
def get_factor_name(self) -> str:
return f'price_volume_coherence_{self.price_window}_{self.volume_window}'
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
price_up = (group_df['close'].diff() > 0).cast(pl.Int8)
price_up_ratio = price_up.rolling_mean(window_size=self.price_window)
2025-10-13 21:42:35 +08:00
2025-10-14 09:44:46 +08:00
vol_avg = group_df['vol'].rolling_mean(window_size=self.volume_window)
vol_above = (group_df['vol'] > vol_avg).cast(pl.Int8)
vol_above_ratio = vol_above.rolling_mean(window_size=self.price_window)
2025-10-13 21:42:35 +08:00
2025-10-14 09:44:46 +08:00
coherence = price_up_ratio * vol_above_ratio
return coherence.alias(self.get_factor_name())
2025-10-13 21:42:35 +08:00
class FreeFloatToTotalTurnoverRatioOperator(StockWiseOperator):
"""自由流通股对总换手率比率算子"""
def __init__(self):
config = OperatorConfig(
name="ff_to_total_turnover_ratio",
description="自由流通股对总换手率比率",
required_columns=['turnover_rate'],
output_columns=['ff_to_total_turnover_ratio'],
parameters={}
)
super().__init__(config)
2025-10-14 09:44:46 +08:00
def get_factor_name(self) -> str:
return 'ff_to_total_turnover_ratio'
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
# 实际业务中可能需要 total_turnover_rate这里简化
ratio = pl.lit(1.0) # 或根据实际逻辑修改
return ratio.alias('ff_to_total_turnover_ratio')
2025-10-13 21:42:35 +08:00
class VarianceOperator(StockWiseOperator):
"""方差算子"""
def __init__(self, window: int):
2025-10-14 09:44:46 +08:00
if window <= 0:
raise ValueError("Window must be positive")
2025-10-13 21:42:35 +08:00
config = OperatorConfig(
name=f"variance_{window}",
description=f"{window}日方差",
required_columns=['pct_chg'],
output_columns=[f'variance_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
2025-10-14 09:44:46 +08:00
def get_factor_name(self) -> str:
return f'variance_{self.window}'
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
variance = group_df['pct_chg'].rolling_var(window_size=self.window)
return variance.alias(self.get_factor_name())
2025-10-13 21:42:35 +08:00
class LimitUpDownOperator(StockWiseOperator):
"""涨跌停算子"""
def __init__(self):
config = OperatorConfig(
name="limit_up_down",
description="涨跌停因子",
required_columns=['close', 'up_limit', 'down_limit'],
output_columns=['cat_up_limit', 'cat_down_limit', 'up_limit_count_10d', 'down_limit_count_10d'],
parameters={}
)
super().__init__(config)
2025-10-14 09:44:46 +08:00
def get_factor_name(self) -> str:
return 'cat_up_limit'
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
up_limit = (group_df['close'] == group_df['up_limit']).cast(pl.Int8)
return up_limit.alias('cat_up_limit')
2025-10-13 21:42:35 +08:00
class ConsecutiveUpLimitOperator(StockWiseOperator):
"""连续涨停算子"""
def __init__(self):
config = OperatorConfig(
name="consecutive_up_limit",
description="连续涨停天数",
required_columns=['cat_up_limit'],
output_columns=['consecutive_up_limit'],
parameters={}
)
super().__init__(config)
2025-10-14 09:44:46 +08:00
def get_factor_name(self) -> str:
return 'consecutive_up_limit'
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
# 简化版:实际连续计数需用 cumsum + groupby trick
# 这里先返回原始值,后续可优化
return group_df['cat_up_limit'].alias('consecutive_up_limit')
2025-10-13 21:42:35 +08:00
class MomentumFactorOperator(StockWiseOperator):
"""动量因子算子"""
def __init__(self, alpha: float = 0.5):
2025-10-14 09:44:46 +08:00
if not (0 <= alpha <= 1):
raise ValueError("alpha should be between 0 and 1")
2025-10-13 21:42:35 +08:00
config = OperatorConfig(
name=f"momentum_factor_{alpha}",
description=f"动量因子(alpha={alpha})",
required_columns=['volume_change_rate', 'turnover_deviation'],
output_columns=[f'momentum_factor_{alpha}'],
parameters={'alpha': alpha}
)
super().__init__(config)
self.alpha = alpha
2025-10-14 09:44:46 +08:00
def get_factor_name(self) -> str:
return f'momentum_factor_{self.alpha}'
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
momentum = group_df['volume_change_rate'] + self.alpha * group_df['turnover_deviation']
return momentum.alias(self.get_factor_name())
2025-10-13 21:42:35 +08:00
class ResonanceFactorOperator(StockWiseOperator):
"""共振因子算子"""
def __init__(self):
config = OperatorConfig(
name="resonance_factor",
description="共振因子",
required_columns=['volume_ratio', 'pct_chg'],
output_columns=['resonance_factor'],
parameters={}
)
super().__init__(config)
2025-10-14 09:44:46 +08:00
def get_factor_name(self) -> str:
return 'resonance_factor'
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
resonance = group_df['volume_ratio'] * group_df['pct_chg']
return resonance.alias('resonance_factor')
2025-10-13 21:42:35 +08:00
# 动量因子集合
MOMENTUM_OPERATORS = [
2025-10-14 09:44:46 +08:00
PriceMinusDeductionPriceOperator(10),
PriceDeductionPriceDiffRatioToSMAOperator(10),
CatPriceVsSmaVsDeductionPriceOperator(10),
# VolatilitySlopeOperator(20, 5),
# TurnoverRateTrendStrengthOperator(5),
2025-10-13 21:42:35 +08:00
FreeFloatTurnoverSurgeOperator(10),
2025-10-14 09:44:46 +08:00
PriceVolumeTrendCoherenceOperator(5, 20),
2025-10-13 21:42:35 +08:00
FreeFloatToTotalTurnoverRatioOperator(),
VarianceOperator(20),
LimitUpDownOperator(),
ConsecutiveUpLimitOperator(),
2025-10-14 09:44:46 +08:00
# MomentumFactorOperator(0.5),
2025-10-13 21:42:35 +08:00
ResonanceFactorOperator(),
]
def apply_momentum_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
"""
应用所有动量因子
"""
if operators is None:
operators = MOMENTUM_OPERATORS
result_df = df
2025-10-14 09:44:46 +08:00
for operator in tqdm(operators, desc="Applying momentum factors"):
result_df = operator.apply(result_df)
2025-10-13 21:42:35 +08:00
return result_df