factor优化,改为polars
This commit is contained in:
365
main/factor/polars_sentiment_factors.py
Normal file
365
main/factor/polars_sentiment_factors.py
Normal file
@@ -0,0 +1,365 @@
|
||||
"""
|
||||
情绪因子 - 使用Polars实现
|
||||
包含市场情绪、恐慌贪婪指数、反转因子等相关因子计算
|
||||
"""
|
||||
|
||||
import polars as pl
|
||||
import numpy as np
|
||||
from typing import Dict, List, Optional, Any
|
||||
from operator_framework import StockWiseOperator, OperatorConfig
|
||||
import talib
|
||||
|
||||
|
||||
class SentimentPanicGreedIndexOperator(StockWiseOperator):
|
||||
"""市场恐慌/贪婪指数算子"""
|
||||
|
||||
def __init__(self, window_atr: int = 14, window_smooth: int = 5):
|
||||
config = OperatorConfig(
|
||||
name=f"senti_panic_greed_{window_atr}_{window_smooth}",
|
||||
description=f"{window_atr}日ATR{window_smooth}日平滑恐慌贪婪指数",
|
||||
required_columns=['open', 'high', 'low', 'close', 'pct_chg', 'vol'],
|
||||
output_columns=[f'senti_panic_greed_{window_atr}_{window_smooth}'],
|
||||
parameters={'window_atr': window_atr, 'window_smooth': window_smooth}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.window_atr = window_atr
|
||||
self.window_smooth = window_smooth
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算恐慌贪婪指数"""
|
||||
# 计算前收盘价
|
||||
prev_close = pl.col('close').shift(1)
|
||||
|
||||
# 计算真实波幅
|
||||
tr = pl.max_horizontal(
|
||||
pl.col('high') - pl.col('low'),
|
||||
(pl.col('high') - prev_close).abs(),
|
||||
(pl.col('low') - prev_close).abs()
|
||||
)
|
||||
|
||||
# 计算ATR
|
||||
atr = tr.rolling_mean(window=self.window_atr)
|
||||
|
||||
# 计算影线
|
||||
upper_shadow = pl.col('high') - pl.max_horizontal(pl.col('open'), pl.col('close'))
|
||||
lower_shadow = pl.min_horizontal(pl.col('open'), pl.col('close')) - pl.col('low')
|
||||
body = (pl.col('close') - pl.col('open')).abs()
|
||||
|
||||
# 计算跳空
|
||||
gap = (pl.col('open') / prev_close - 1).fill_null(0)
|
||||
|
||||
# 计算波动性意外
|
||||
volatility_surprise = (tr / (atr + 1e-8) - 1) * pl.col('pct_chg').sign()
|
||||
|
||||
# 计算原始情绪指标
|
||||
raw_senti = (tr / (atr + 1e-8)) * pl.col('pct_chg').sign() + gap * 2
|
||||
|
||||
# 平滑处理
|
||||
sentiment = raw_senti.rolling_mean(window=self.window_smooth)
|
||||
|
||||
return stock_df.with_columns(
|
||||
sentiment.alias(f'senti_panic_greed_{self.window_atr}_{self.window_smooth}')
|
||||
)
|
||||
|
||||
|
||||
class SentimentMarketBreadthProxyOperator(StockWiseOperator):
|
||||
"""市场宽度情绪代理算子"""
|
||||
|
||||
def __init__(self, window_vol: int = 20, window_smooth: int = 3):
|
||||
config = OperatorConfig(
|
||||
name=f"senti_breadth_proxy_{window_vol}_{window_smooth}",
|
||||
description=f"{window_vol}日成交量{window_smooth}日平滑市场宽度情绪代理",
|
||||
required_columns=['pct_chg', 'vol'],
|
||||
output_columns=[f'senti_breadth_proxy_{window_vol}_{window_smooth}'],
|
||||
parameters={'window_vol': window_vol, 'window_smooth': window_smooth}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.window_vol = window_vol
|
||||
self.window_smooth = window_smooth
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算市场宽度情绪代理"""
|
||||
# 计算成交量滚动均值
|
||||
rolling_avg_vol = pl.col('vol').rolling_mean(window=self.window_vol)
|
||||
|
||||
# 计算价量配合度
|
||||
raw_breadth = pl.col('pct_chg') * (pl.col('vol') / (rolling_avg_vol + 1e-8))
|
||||
|
||||
# 平滑处理
|
||||
breadth_proxy = raw_breadth.rolling_mean(window=self.window_smooth)
|
||||
|
||||
return stock_df.with_columns(
|
||||
breadth_proxy.alias(f'senti_breadth_proxy_{self.window_vol}_{self.window_smooth}')
|
||||
)
|
||||
|
||||
|
||||
class SentimentReversalIndicatorOperator(StockWiseOperator):
|
||||
"""短期情绪反转因子算子"""
|
||||
|
||||
def __init__(self, window_ret: int = 5, window_vol: int = 5):
|
||||
config = OperatorConfig(
|
||||
name=f"senti_reversal_{window_ret}_{window_vol}",
|
||||
description=f"{window_ret}日收益{window_vol}日波动短期情绪反转因子",
|
||||
required_columns=['close', 'pct_chg'],
|
||||
output_columns=[f'senti_reversal_{window_ret}_{window_vol}'],
|
||||
parameters={'window_ret': window_ret, 'window_vol': window_vol}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.window_ret = window_ret
|
||||
self.window_vol = window_vol
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算短期情绪反转因子"""
|
||||
# 计算累计收益率
|
||||
return_m = pl.col('close').pct_change(self.window_ret)
|
||||
|
||||
# 计算波动率
|
||||
volatility_m = pl.col('pct_chg').rolling_std(window=self.window_vol)
|
||||
|
||||
# 计算反转因子 (负号表示反转)
|
||||
reversal_factor = -return_m * volatility_m
|
||||
|
||||
return stock_df.with_columns(
|
||||
reversal_factor.alias(f'senti_reversal_{self.window_ret}_{self.window_vol}')
|
||||
)
|
||||
|
||||
|
||||
class DailyMomentumBenchmarkOperator(StockWiseOperator):
|
||||
"""日级别动量基准算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="daily_momentum_benchmark",
|
||||
description="日级别动量基准",
|
||||
required_columns=['pct_chg'],
|
||||
output_columns=['daily_positive_benchmark', 'daily_negative_benchmark'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算日级别动量基准"""
|
||||
# 这个因子需要横截面计算,简化处理
|
||||
# 在实际应用中,应该使用DateWiseOperator来计算全市场基准
|
||||
|
||||
# 返回0作为占位符
|
||||
return stock_df.with_columns([
|
||||
pl.lit(0).alias('daily_positive_benchmark'),
|
||||
pl.lit(0).alias('daily_negative_benchmark')
|
||||
])
|
||||
|
||||
|
||||
class DailyDeviationOperator(StockWiseOperator):
|
||||
"""日级别偏离度算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="daily_deviation",
|
||||
description="日级别偏离度",
|
||||
required_columns=['pct_chg', 'daily_positive_benchmark', 'daily_negative_benchmark'],
|
||||
output_columns=['daily_deviation'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算日级别偏离度"""
|
||||
# 根据条件计算偏离度
|
||||
conditions = [
|
||||
(pl.col('pct_chg') > 0) & (pl.col('daily_positive_benchmark') > 0),
|
||||
(pl.col('pct_chg') < 0) & (pl.col('daily_negative_benchmark') < 0),
|
||||
]
|
||||
|
||||
choices = [
|
||||
pl.col('pct_chg') - pl.col('daily_positive_benchmark'),
|
||||
pl.col('pct_chg') - pl.col('daily_negative_benchmark'),
|
||||
]
|
||||
|
||||
deviation = pl.select(conditions=conditions, choices=choices, default=0)
|
||||
|
||||
return stock_df.with_columns(deviation.alias('daily_deviation'))
|
||||
|
||||
|
||||
class CatSentimentMomentumVolumeSpikeOperator(StockWiseOperator):
|
||||
"""情绪动量成交量激增分类算子"""
|
||||
|
||||
def __init__(self, return_period: int = 3, return_threshold: float = 0.05,
|
||||
volume_ratio_threshold: float = 1.5, current_pct_chg_min: float = -0.01,
|
||||
current_pct_chg_max: float = 0.03):
|
||||
config = OperatorConfig(
|
||||
name=f"cat_senti_mom_vol_spike_{return_period}",
|
||||
description=f"{return_period}日情绪动量成交量激增分类",
|
||||
required_columns=['close', 'pct_chg', 'volume_ratio'],
|
||||
output_columns=[f'cat_senti_mom_vol_spike_{return_period}'],
|
||||
parameters={'return_period': return_period, 'return_threshold': return_threshold,
|
||||
'volume_ratio_threshold': volume_ratio_threshold,
|
||||
'current_pct_chg_min': current_pct_chg_min,
|
||||
'current_pct_chg_max': current_pct_chg_max}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.return_period = return_period
|
||||
self.return_threshold = return_threshold
|
||||
self.volume_ratio_threshold = volume_ratio_threshold
|
||||
self.current_pct_chg_min = current_pct_chg_min
|
||||
self.current_pct_chg_max = current_pct_chg_max
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算情绪动量成交量激增分类"""
|
||||
# 计算n日收益率
|
||||
return_n = pl.col('close').pct_change(self.return_period)
|
||||
|
||||
# 定义条件
|
||||
cond_momentum = return_n > self.return_threshold
|
||||
cond_volume = pl.col('volume_ratio') > self.volume_ratio_threshold
|
||||
cond_current_price = (pl.col('pct_chg') > self.current_pct_chg_min) & \
|
||||
(pl.col('pct_chg') < self.current_pct_chg_max)
|
||||
|
||||
# 组合条件
|
||||
result = (cond_momentum.cast(str) + cond_volume.cast(str) + cond_current_price.cast(str))
|
||||
|
||||
return stock_df.with_columns(result.alias(f'cat_senti_mom_vol_spike_{self.return_period}'))
|
||||
|
||||
|
||||
class CatSentimentPreBreakoutOperator(StockWiseOperator):
|
||||
"""情绪突破前盘整分类算子"""
|
||||
|
||||
def __init__(self, atr_short_n: int = 10, atr_long_m: int = 40,
|
||||
vol_atrophy_n: int = 10, vol_atrophy_m: int = 40,
|
||||
price_stab_n: int = 5, price_stab_threshold: float = 0.05,
|
||||
current_pct_chg_min: float = 0.005, current_pct_chg_max: float = 0.07,
|
||||
volume_ratio_threshold: float = 1.2):
|
||||
config = OperatorConfig(
|
||||
name=f"cat_senti_pre_breakout",
|
||||
description="情绪突破前盘整分类",
|
||||
required_columns=['high', 'low', 'close', 'vol', 'pct_chg', 'volume_ratio'],
|
||||
output_columns=['cat_senti_pre_breakout'],
|
||||
parameters={'atr_short_n': atr_short_n, 'atr_long_m': atr_long_m,
|
||||
'vol_atrophy_n': vol_atrophy_n, 'vol_atrophy_m': vol_atrophy_m,
|
||||
'price_stab_n': price_stab_n, 'price_stab_threshold': price_stab_threshold,
|
||||
'current_pct_chg_min': current_pct_chg_min, 'current_pct_chg_max': current_pct_chg_max,
|
||||
'volume_ratio_threshold': volume_ratio_threshold}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.atr_short_n = atr_short_n
|
||||
self.atr_long_m = atr_long_m
|
||||
self.vol_atrophy_n = vol_atrophy_n
|
||||
self.vol_atrophy_m = vol_atrophy_m
|
||||
self.price_stab_n = price_stab_n
|
||||
self.price_stab_threshold = price_stab_threshold
|
||||
self.current_pct_chg_min = current_pct_chg_min
|
||||
self.current_pct_chg_max = current_pct_chg_max
|
||||
self.volume_ratio_threshold = volume_ratio_threshold
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算情绪突破前盘整分类"""
|
||||
# 1. 波动率收缩 (使用价格范围作为ATR代理)
|
||||
price_range = pl.col('high') - pl.col('low')
|
||||
atr_short = price_range.rolling_mean(window=self.atr_short_n)
|
||||
atr_long = price_range.rolling_mean(window=self.atr_long_m)
|
||||
cond_vol_contraction = atr_short < (0.7 * atr_long)
|
||||
|
||||
# 2. 成交量萎缩
|
||||
vol_short = pl.col('vol').rolling_mean(window=self.vol_atrophy_n)
|
||||
vol_long = pl.col('vol').rolling_mean(window=self.vol_atrophy_m)
|
||||
cond_vol_atrophy = vol_short < (0.7 * vol_long)
|
||||
|
||||
# 3. 近期价格稳定
|
||||
rolling_max_h = pl.col('high').rolling_max(window=self.price_stab_n)
|
||||
rolling_min_l = pl.col('low').rolling_min(window=self.price_stab_n)
|
||||
price_stability = (rolling_max_h - rolling_min_l) / pl.col('close')
|
||||
cond_price_stability = price_stability < self.price_stab_threshold
|
||||
|
||||
# 4. 当日温和放量上涨信号
|
||||
cond_price_signal = (pl.col('pct_chg') > self.current_pct_chg_min) & \
|
||||
(pl.col('pct_chg') < self.current_pct_chg_max)
|
||||
cond_vol_signal = pl.col('volume_ratio') > self.volume_ratio_threshold
|
||||
cond_current_day_signal = cond_price_signal & cond_vol_signal
|
||||
|
||||
# 组合条件
|
||||
result = (cond_vol_contraction.cast(str) + cond_vol_atrophy.cast(str) +
|
||||
cond_price_stability.cast(str) + cond_current_day_signal.cast(str))
|
||||
|
||||
return stock_df.with_columns(result.alias('cat_senti_pre_breakout'))
|
||||
|
||||
|
||||
class StrongInflowSignalOperator(StockWiseOperator):
|
||||
"""强主力资金流入信号算子"""
|
||||
|
||||
def __init__(self, intensity_avg_n: int = 3, intensity_threshold: float = 0.01,
|
||||
consecutive_buy_n: int = 2, accel_positive_m: int = 1):
|
||||
config = OperatorConfig(
|
||||
name="senti_strong_inflow",
|
||||
description="强主力资金流入信号",
|
||||
required_columns=['flow_lg_elg_intensity', 'flow_lg_elg_accel'],
|
||||
output_columns=['senti_strong_inflow'],
|
||||
parameters={'intensity_avg_n': intensity_avg_n, 'intensity_threshold': intensity_threshold,
|
||||
'consecutive_buy_n': consecutive_buy_n, 'accel_positive_m': accel_positive_m}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.intensity_avg_n = intensity_avg_n
|
||||
self.intensity_threshold = intensity_threshold
|
||||
self.consecutive_buy_n = consecutive_buy_n
|
||||
self.accel_positive_m = accel_positive_m
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算强主力资金流入信号"""
|
||||
# 检查必需列是否存在
|
||||
required_cols = ['flow_lg_elg_intensity', 'flow_lg_elg_accel']
|
||||
if not all(col in stock_df.columns for col in required_cols):
|
||||
# 如果缺少列,返回0
|
||||
return stock_df.with_columns(pl.lit(0).alias('senti_strong_inflow'))
|
||||
|
||||
# 1. 近N日主力资金强度均值
|
||||
avg_intensity = pl.col('flow_lg_elg_intensity').rolling_mean(window=self.intensity_avg_n)
|
||||
cond_avg_intensity = avg_intensity > self.intensity_threshold
|
||||
|
||||
# 2. 近N日连续主力净买入天数
|
||||
is_net_buy = (pl.col('flow_lg_elg_intensity') > 0).cast(int)
|
||||
|
||||
# 计算连续买入信号 (简化版)
|
||||
consecutive_buy = is_net_buy.rolling_sum(window=self.consecutive_buy_n) == self.consecutive_buy_n
|
||||
cond_consecutive_buy = consecutive_buy
|
||||
|
||||
# 3. 近M日主力资金流加速度为正
|
||||
is_accel_positive = (pl.col('flow_lg_elg_accel') > 0).cast(int)
|
||||
accel_positive = is_accel_positive.rolling_sum(window=self.accel_positive_m) == self.accel_positive_m
|
||||
cond_accel_positive = accel_positive
|
||||
|
||||
# 综合条件
|
||||
strong_inflow = cond_avg_intensity & cond_consecutive_buy & cond_accel_positive
|
||||
|
||||
return stock_df.with_columns(strong_inflow.cast(int).alias('senti_strong_inflow'))
|
||||
|
||||
|
||||
# 情绪因子集合
|
||||
SENTIMENT_OPERATORS = [
|
||||
SentimentPanicGreedIndexOperator(),
|
||||
SentimentMarketBreadthProxyOperator(),
|
||||
SentimentReversalIndicatorOperator(),
|
||||
DailyMomentumBenchmarkOperator(),
|
||||
DailyDeviationOperator(),
|
||||
CatSentimentMomentumVolumeSpikeOperator(),
|
||||
CatSentimentPreBreakoutOperator(),
|
||||
StrongInflowSignalOperator(),
|
||||
]
|
||||
|
||||
|
||||
def apply_sentiment_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||||
"""
|
||||
应用所有情绪因子
|
||||
|
||||
Args:
|
||||
df: 输入的Polars DataFrame
|
||||
operators: 要应用的算子列表,如果为None则使用默认列表
|
||||
|
||||
Returns:
|
||||
添加了情绪因子的DataFrame
|
||||
"""
|
||||
if operators is None:
|
||||
operators = SENTIMENT_OPERATORS
|
||||
|
||||
result_df = df
|
||||
for operator in operators:
|
||||
result_df = operator(result_df)
|
||||
|
||||
return result_df
|
||||
Reference in New Issue
Block a user