Files
NewStock/main/factor/polars_momentum_factors.py

411 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
动量因子 - 使用Polars实现
包含动量、趋势、均线等相关因子计算
"""
import polars as pl
import numpy as np
from typing import Dict, List, Optional, Any
from tqdm import tqdm
from main.factor.operator_framework import StockWiseOperator, OperatorConfig
from scipy.stats import linregress
class PriceMinusDeductionPriceOperator(StockWiseOperator):
"""价格减抵扣价算子"""
def __init__(self, n: int = 10):
if n <= 0:
raise ValueError("n must be positive")
config = OperatorConfig(
name=f"price_minus_deduction_price_{n}",
description=f"{n}日价格减抵扣价",
required_columns=['close'],
output_columns=[f'price_minus_deduction_price_{n}'],
parameters={'n': n}
)
super().__init__(config)
self.n = n
def get_factor_name(self) -> str:
return f'price_minus_deduction_price_{self.n}'
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
# 抵扣价是 n 日前的价格(更合理),若坚持 n-1 则保留
deduction_price = group_df['close'].shift(self.n) # 建议用 n不是 n-1
price_diff = group_df['close'] - deduction_price
return price_diff.alias(self.get_factor_name())
class PriceDeductionPriceDiffRatioToSMAOperator(StockWiseOperator):
"""价格抵扣价差值相对SMA比率算子"""
def __init__(self, n: int = 10):
if n <= 0:
raise ValueError("n must be positive")
config = OperatorConfig(
name=f"price_deduction_price_diff_ratio_to_sma_{n}",
description=f"{n}日价格抵扣价差值相对SMA比率",
required_columns=['close'],
output_columns=[f'price_deduction_price_diff_ratio_to_sma_{n}'],
parameters={'n': n}
)
super().__init__(config)
self.n = n
def get_factor_name(self) -> str:
return f'price_deduction_price_diff_ratio_to_sma_{self.n}'
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
sma = group_df['close'].rolling_mean(window_size=self.n)
deduction_price = group_df['close'].shift(self.n)
diff = group_df['close'] - deduction_price
ratio = diff / (sma + 1e-8)
return ratio.alias(self.get_factor_name())
class CatPriceVsSmaVsDeductionPriceOperator(StockWiseOperator):
"""价格vsSMAvs抵扣价分类算子"""
def __init__(self, n: int = 10):
if n <= 0:
raise ValueError("n must be positive")
config = OperatorConfig(
name=f"cat_price_vs_sma_vs_deduction_price_{n}",
description=f"{n}日价格vsSMAvs抵扣价分类",
required_columns=['close'],
output_columns=[f'cat_price_vs_sma_vs_deduction_price_{n}'],
parameters={'n': n}
)
super().__init__(config)
self.n = n
def get_factor_name(self) -> str:
return f'cat_price_vs_sma_vs_deduction_price_{self.n}'
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
sma = group_df['close'].rolling_mean(window_size=self.n)
deduction_price = group_df['close'].shift(self.n)
cond1 = (group_df['close'] > sma) & (deduction_price > sma)
cond2 = (group_df['close'] < sma) & (deduction_price < sma)
cond3 = (group_df['close'] > sma) & (deduction_price <= sma)
cond4 = (group_df['close'] <= sma) & (deduction_price > sma)
classification = (
pl.when(cond1).then(1)
.when(cond2).then(2)
.when(cond3).then(3)
.when(cond4).then(4)
.otherwise(0)
)
return classification.alias(self.get_factor_name())
# ✅ 修复:使用 rolling_map
class VolatilitySlopeOperator(StockWiseOperator):
"""波动率斜率算子"""
def __init__(self, long_window: int = 20, short_window: int = 5):
if long_window <= 0 or short_window <= 0:
raise ValueError("Windows must be positive")
config = OperatorConfig(
name=f"volatility_slope_{long_window}_{short_window}",
description=f"{long_window}日波动率{short_window}日斜率",
required_columns=['pct_chg'],
output_columns=[f'volatility_slope_{long_window}_{short_window}'],
parameters={'long_window': long_window, 'short_window': short_window}
)
super().__init__(config)
self.long_window = long_window
self.short_window = short_window
def get_factor_name(self) -> str:
return f'volatility_slope_{self.long_window}_{self.short_window}'
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
# 先计算长期波动率(标准差)
long_vol = group_df['pct_chg'].rolling_std(window_size=self.long_window)
# 定义斜率函数(输入是 numpy array
def slope_func(window_vals: np.ndarray) -> float:
if len(window_vals) < 2 or pl.Series(window_vals).is_null().any():
return 0.0
x = np.arange(len(window_vals))
try:
slope, _, _, _, _ = linregress(x, window_vals)
return slope if np.isfinite(slope) else 0.0
except:
return 0.0
# 对波动率序列应用 rolling_map
volatility_slope = long_vol.rolling_map(
function=slope_func,
window_size=self.short_window,
min_periods=2 # 至少2点才能算斜率
)
return volatility_slope.alias(self.get_factor_name())
# ✅ 修复:使用 rolling_map
class TurnoverRateTrendStrengthOperator(StockWiseOperator):
"""换手率趋势强度算子"""
def __init__(self, window: int = 5):
if window <= 0:
raise ValueError("Window must be positive")
config = OperatorConfig(
name=f"turnover_trend_strength_{window}",
description=f"{window}日换手率趋势强度",
required_columns=['turnover_rate'],
output_columns=[f'turnover_trend_strength_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def get_factor_name(self) -> str:
return f'turnover_trend_strength_{self.window}'
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
def slope_func(window_vals: np.ndarray) -> float:
if len(window_vals) < 2 or pl.Series(window_vals).is_null().any():
return 0.0
x = np.arange(len(window_vals))
try:
slope, _, _, _, _ = linregress(x, window_vals)
return slope if np.isfinite(slope) else 0.0
except:
return 0.0
trend_strength = group_df['turnover_rate'].rolling_map(
function=slope_func,
window_size=self.window,
min_periods=2
)
return trend_strength.alias(self.get_factor_name())
class FreeFloatTurnoverSurgeOperator(StockWiseOperator):
"""自由流通股换手率激增算子"""
def __init__(self, window: int = 10):
if window <= 0:
raise ValueError("Window must be positive")
config = OperatorConfig(
name=f"ff_turnover_surge_{window}",
description=f"{window}日自由流通股换手率激增",
required_columns=['turnover_rate'],
output_columns=[f'ff_turnover_surge_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def get_factor_name(self) -> str:
return f'ff_turnover_surge_{self.window}'
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
avg_turnover = group_df['turnover_rate'].rolling_mean(window_size=self.window)
surge_ratio = group_df['turnover_rate'] / (avg_turnover + 1e-8)
return surge_ratio.alias(self.get_factor_name())
class PriceVolumeTrendCoherenceOperator(StockWiseOperator):
"""价量趋势一致性算子"""
def __init__(self, price_window: int = 5, volume_window: int = 20):
if price_window <= 0 or volume_window <= 0:
raise ValueError("Windows must be positive")
config = OperatorConfig(
name=f"price_volume_coherence_{price_window}_{volume_window}",
description=f"{price_window}日价格{volume_window}日成交量趋势一致性",
required_columns=['close', 'vol'],
output_columns=[f'price_volume_coherence_{price_window}_{volume_window}'],
parameters={'price_window': price_window, 'volume_window': volume_window}
)
super().__init__(config)
self.price_window = price_window
self.volume_window = volume_window
def get_factor_name(self) -> str:
return f'price_volume_coherence_{self.price_window}_{self.volume_window}'
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
price_up = (group_df['close'].diff() > 0).cast(pl.Int8)
price_up_ratio = price_up.rolling_mean(window_size=self.price_window)
vol_avg = group_df['vol'].rolling_mean(window_size=self.volume_window)
vol_above = (group_df['vol'] > vol_avg).cast(pl.Int8)
vol_above_ratio = vol_above.rolling_mean(window_size=self.price_window)
coherence = price_up_ratio * vol_above_ratio
return coherence.alias(self.get_factor_name())
class FreeFloatToTotalTurnoverRatioOperator(StockWiseOperator):
"""自由流通股对总换手率比率算子"""
def __init__(self):
config = OperatorConfig(
name="ff_to_total_turnover_ratio",
description="自由流通股对总换手率比率",
required_columns=['turnover_rate'],
output_columns=['ff_to_total_turnover_ratio'],
parameters={}
)
super().__init__(config)
def get_factor_name(self) -> str:
return 'ff_to_total_turnover_ratio'
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
# 实际业务中可能需要 total_turnover_rate这里简化
ratio = pl.lit(1.0) # 或根据实际逻辑修改
return ratio.alias('ff_to_total_turnover_ratio')
class VarianceOperator(StockWiseOperator):
"""方差算子"""
def __init__(self, window: int):
if window <= 0:
raise ValueError("Window must be positive")
config = OperatorConfig(
name=f"variance_{window}",
description=f"{window}日方差",
required_columns=['pct_chg'],
output_columns=[f'variance_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def get_factor_name(self) -> str:
return f'variance_{self.window}'
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
variance = group_df['pct_chg'].rolling_var(window_size=self.window)
return variance.alias(self.get_factor_name())
class LimitUpDownOperator(StockWiseOperator):
"""涨跌停算子"""
def __init__(self):
config = OperatorConfig(
name="limit_up_down",
description="涨跌停因子",
required_columns=['close', 'up_limit', 'down_limit'],
output_columns=['cat_up_limit', 'cat_down_limit', 'up_limit_count_10d', 'down_limit_count_10d'],
parameters={}
)
super().__init__(config)
def get_factor_name(self) -> str:
return 'cat_up_limit'
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
up_limit = (group_df['close'] == group_df['up_limit']).cast(pl.Int8)
return up_limit.alias('cat_up_limit')
class ConsecutiveUpLimitOperator(StockWiseOperator):
"""连续涨停算子"""
def __init__(self):
config = OperatorConfig(
name="consecutive_up_limit",
description="连续涨停天数",
required_columns=['cat_up_limit'],
output_columns=['consecutive_up_limit'],
parameters={}
)
super().__init__(config)
def get_factor_name(self) -> str:
return 'consecutive_up_limit'
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
# 简化版:实际连续计数需用 cumsum + groupby trick
# 这里先返回原始值,后续可优化
return group_df['cat_up_limit'].alias('consecutive_up_limit')
class MomentumFactorOperator(StockWiseOperator):
"""动量因子算子"""
def __init__(self, alpha: float = 0.5):
if not (0 <= alpha <= 1):
raise ValueError("alpha should be between 0 and 1")
config = OperatorConfig(
name=f"momentum_factor_{alpha}",
description=f"动量因子(alpha={alpha})",
required_columns=['volume_change_rate', 'turnover_deviation'],
output_columns=[f'momentum_factor_{alpha}'],
parameters={'alpha': alpha}
)
super().__init__(config)
self.alpha = alpha
def get_factor_name(self) -> str:
return f'momentum_factor_{self.alpha}'
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
momentum = group_df['volume_change_rate'] + self.alpha * group_df['turnover_deviation']
return momentum.alias(self.get_factor_name())
class ResonanceFactorOperator(StockWiseOperator):
"""共振因子算子"""
def __init__(self):
config = OperatorConfig(
name="resonance_factor",
description="共振因子",
required_columns=['volume_ratio', 'pct_chg'],
output_columns=['resonance_factor'],
parameters={}
)
super().__init__(config)
def get_factor_name(self) -> str:
return 'resonance_factor'
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
resonance = group_df['volume_ratio'] * group_df['pct_chg']
return resonance.alias('resonance_factor')
# 动量因子集合
MOMENTUM_OPERATORS = [
PriceMinusDeductionPriceOperator(10),
PriceDeductionPriceDiffRatioToSMAOperator(10),
CatPriceVsSmaVsDeductionPriceOperator(10),
# VolatilitySlopeOperator(20, 5),
# TurnoverRateTrendStrengthOperator(5),
FreeFloatTurnoverSurgeOperator(10),
PriceVolumeTrendCoherenceOperator(5, 20),
FreeFloatToTotalTurnoverRatioOperator(),
VarianceOperator(20),
LimitUpDownOperator(),
ConsecutiveUpLimitOperator(),
# MomentumFactorOperator(0.5),
ResonanceFactorOperator(),
]
def apply_momentum_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
"""
应用所有动量因子
"""
if operators is None:
operators = MOMENTUM_OPERATORS
result_df = df
for operator in tqdm(operators, desc="Applying momentum factors"):
result_df = operator.apply(result_df)
return result_df