Files
NewStock/main/factor/polars_sentiment_factors.py

366 lines
15 KiB
Python
Raw Normal View History

2025-10-13 21:42:35 +08:00
"""
情绪因子 - 使用Polars实现
包含市场情绪恐慌贪婪指数反转因子等相关因子计算
"""
import polars as pl
import numpy as np
from typing import Dict, List, Optional, Any
from operator_framework import StockWiseOperator, OperatorConfig
import talib
class SentimentPanicGreedIndexOperator(StockWiseOperator):
"""市场恐慌/贪婪指数算子"""
def __init__(self, window_atr: int = 14, window_smooth: int = 5):
config = OperatorConfig(
name=f"senti_panic_greed_{window_atr}_{window_smooth}",
description=f"{window_atr}日ATR{window_smooth}日平滑恐慌贪婪指数",
required_columns=['open', 'high', 'low', 'close', 'pct_chg', 'vol'],
output_columns=[f'senti_panic_greed_{window_atr}_{window_smooth}'],
parameters={'window_atr': window_atr, 'window_smooth': window_smooth}
)
super().__init__(config)
self.window_atr = window_atr
self.window_smooth = window_smooth
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算恐慌贪婪指数"""
# 计算前收盘价
prev_close = pl.col('close').shift(1)
# 计算真实波幅
tr = pl.max_horizontal(
pl.col('high') - pl.col('low'),
(pl.col('high') - prev_close).abs(),
(pl.col('low') - prev_close).abs()
)
# 计算ATR
atr = tr.rolling_mean(window=self.window_atr)
# 计算影线
upper_shadow = pl.col('high') - pl.max_horizontal(pl.col('open'), pl.col('close'))
lower_shadow = pl.min_horizontal(pl.col('open'), pl.col('close')) - pl.col('low')
body = (pl.col('close') - pl.col('open')).abs()
# 计算跳空
gap = (pl.col('open') / prev_close - 1).fill_null(0)
# 计算波动性意外
volatility_surprise = (tr / (atr + 1e-8) - 1) * pl.col('pct_chg').sign()
# 计算原始情绪指标
raw_senti = (tr / (atr + 1e-8)) * pl.col('pct_chg').sign() + gap * 2
# 平滑处理
sentiment = raw_senti.rolling_mean(window=self.window_smooth)
return stock_df.with_columns(
sentiment.alias(f'senti_panic_greed_{self.window_atr}_{self.window_smooth}')
)
class SentimentMarketBreadthProxyOperator(StockWiseOperator):
"""市场宽度情绪代理算子"""
def __init__(self, window_vol: int = 20, window_smooth: int = 3):
config = OperatorConfig(
name=f"senti_breadth_proxy_{window_vol}_{window_smooth}",
description=f"{window_vol}日成交量{window_smooth}日平滑市场宽度情绪代理",
required_columns=['pct_chg', 'vol'],
output_columns=[f'senti_breadth_proxy_{window_vol}_{window_smooth}'],
parameters={'window_vol': window_vol, 'window_smooth': window_smooth}
)
super().__init__(config)
self.window_vol = window_vol
self.window_smooth = window_smooth
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算市场宽度情绪代理"""
# 计算成交量滚动均值
rolling_avg_vol = pl.col('vol').rolling_mean(window=self.window_vol)
# 计算价量配合度
raw_breadth = pl.col('pct_chg') * (pl.col('vol') / (rolling_avg_vol + 1e-8))
# 平滑处理
breadth_proxy = raw_breadth.rolling_mean(window=self.window_smooth)
return stock_df.with_columns(
breadth_proxy.alias(f'senti_breadth_proxy_{self.window_vol}_{self.window_smooth}')
)
class SentimentReversalIndicatorOperator(StockWiseOperator):
"""短期情绪反转因子算子"""
def __init__(self, window_ret: int = 5, window_vol: int = 5):
config = OperatorConfig(
name=f"senti_reversal_{window_ret}_{window_vol}",
description=f"{window_ret}日收益{window_vol}日波动短期情绪反转因子",
required_columns=['close', 'pct_chg'],
output_columns=[f'senti_reversal_{window_ret}_{window_vol}'],
parameters={'window_ret': window_ret, 'window_vol': window_vol}
)
super().__init__(config)
self.window_ret = window_ret
self.window_vol = window_vol
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算短期情绪反转因子"""
# 计算累计收益率
return_m = pl.col('close').pct_change(self.window_ret)
# 计算波动率
volatility_m = pl.col('pct_chg').rolling_std(window=self.window_vol)
# 计算反转因子 (负号表示反转)
reversal_factor = -return_m * volatility_m
return stock_df.with_columns(
reversal_factor.alias(f'senti_reversal_{self.window_ret}_{self.window_vol}')
)
class DailyMomentumBenchmarkOperator(StockWiseOperator):
"""日级别动量基准算子"""
def __init__(self):
config = OperatorConfig(
name="daily_momentum_benchmark",
description="日级别动量基准",
required_columns=['pct_chg'],
output_columns=['daily_positive_benchmark', 'daily_negative_benchmark'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算日级别动量基准"""
# 这个因子需要横截面计算,简化处理
# 在实际应用中应该使用DateWiseOperator来计算全市场基准
# 返回0作为占位符
return stock_df.with_columns([
pl.lit(0).alias('daily_positive_benchmark'),
pl.lit(0).alias('daily_negative_benchmark')
])
class DailyDeviationOperator(StockWiseOperator):
"""日级别偏离度算子"""
def __init__(self):
config = OperatorConfig(
name="daily_deviation",
description="日级别偏离度",
required_columns=['pct_chg', 'daily_positive_benchmark', 'daily_negative_benchmark'],
output_columns=['daily_deviation'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算日级别偏离度"""
# 根据条件计算偏离度
conditions = [
(pl.col('pct_chg') > 0) & (pl.col('daily_positive_benchmark') > 0),
(pl.col('pct_chg') < 0) & (pl.col('daily_negative_benchmark') < 0),
]
choices = [
pl.col('pct_chg') - pl.col('daily_positive_benchmark'),
pl.col('pct_chg') - pl.col('daily_negative_benchmark'),
]
deviation = pl.select(conditions=conditions, choices=choices, default=0)
return stock_df.with_columns(deviation.alias('daily_deviation'))
class CatSentimentMomentumVolumeSpikeOperator(StockWiseOperator):
"""情绪动量成交量激增分类算子"""
def __init__(self, return_period: int = 3, return_threshold: float = 0.05,
volume_ratio_threshold: float = 1.5, current_pct_chg_min: float = -0.01,
current_pct_chg_max: float = 0.03):
config = OperatorConfig(
name=f"cat_senti_mom_vol_spike_{return_period}",
description=f"{return_period}日情绪动量成交量激增分类",
required_columns=['close', 'pct_chg', 'volume_ratio'],
output_columns=[f'cat_senti_mom_vol_spike_{return_period}'],
parameters={'return_period': return_period, 'return_threshold': return_threshold,
'volume_ratio_threshold': volume_ratio_threshold,
'current_pct_chg_min': current_pct_chg_min,
'current_pct_chg_max': current_pct_chg_max}
)
super().__init__(config)
self.return_period = return_period
self.return_threshold = return_threshold
self.volume_ratio_threshold = volume_ratio_threshold
self.current_pct_chg_min = current_pct_chg_min
self.current_pct_chg_max = current_pct_chg_max
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算情绪动量成交量激增分类"""
# 计算n日收益率
return_n = pl.col('close').pct_change(self.return_period)
# 定义条件
cond_momentum = return_n > self.return_threshold
cond_volume = pl.col('volume_ratio') > self.volume_ratio_threshold
cond_current_price = (pl.col('pct_chg') > self.current_pct_chg_min) & \
(pl.col('pct_chg') < self.current_pct_chg_max)
# 组合条件
result = (cond_momentum.cast(str) + cond_volume.cast(str) + cond_current_price.cast(str))
return stock_df.with_columns(result.alias(f'cat_senti_mom_vol_spike_{self.return_period}'))
class CatSentimentPreBreakoutOperator(StockWiseOperator):
"""情绪突破前盘整分类算子"""
def __init__(self, atr_short_n: int = 10, atr_long_m: int = 40,
vol_atrophy_n: int = 10, vol_atrophy_m: int = 40,
price_stab_n: int = 5, price_stab_threshold: float = 0.05,
current_pct_chg_min: float = 0.005, current_pct_chg_max: float = 0.07,
volume_ratio_threshold: float = 1.2):
config = OperatorConfig(
name=f"cat_senti_pre_breakout",
description="情绪突破前盘整分类",
required_columns=['high', 'low', 'close', 'vol', 'pct_chg', 'volume_ratio'],
output_columns=['cat_senti_pre_breakout'],
parameters={'atr_short_n': atr_short_n, 'atr_long_m': atr_long_m,
'vol_atrophy_n': vol_atrophy_n, 'vol_atrophy_m': vol_atrophy_m,
'price_stab_n': price_stab_n, 'price_stab_threshold': price_stab_threshold,
'current_pct_chg_min': current_pct_chg_min, 'current_pct_chg_max': current_pct_chg_max,
'volume_ratio_threshold': volume_ratio_threshold}
)
super().__init__(config)
self.atr_short_n = atr_short_n
self.atr_long_m = atr_long_m
self.vol_atrophy_n = vol_atrophy_n
self.vol_atrophy_m = vol_atrophy_m
self.price_stab_n = price_stab_n
self.price_stab_threshold = price_stab_threshold
self.current_pct_chg_min = current_pct_chg_min
self.current_pct_chg_max = current_pct_chg_max
self.volume_ratio_threshold = volume_ratio_threshold
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算情绪突破前盘整分类"""
# 1. 波动率收缩 (使用价格范围作为ATR代理)
price_range = pl.col('high') - pl.col('low')
atr_short = price_range.rolling_mean(window=self.atr_short_n)
atr_long = price_range.rolling_mean(window=self.atr_long_m)
cond_vol_contraction = atr_short < (0.7 * atr_long)
# 2. 成交量萎缩
vol_short = pl.col('vol').rolling_mean(window=self.vol_atrophy_n)
vol_long = pl.col('vol').rolling_mean(window=self.vol_atrophy_m)
cond_vol_atrophy = vol_short < (0.7 * vol_long)
# 3. 近期价格稳定
rolling_max_h = pl.col('high').rolling_max(window=self.price_stab_n)
rolling_min_l = pl.col('low').rolling_min(window=self.price_stab_n)
price_stability = (rolling_max_h - rolling_min_l) / pl.col('close')
cond_price_stability = price_stability < self.price_stab_threshold
# 4. 当日温和放量上涨信号
cond_price_signal = (pl.col('pct_chg') > self.current_pct_chg_min) & \
(pl.col('pct_chg') < self.current_pct_chg_max)
cond_vol_signal = pl.col('volume_ratio') > self.volume_ratio_threshold
cond_current_day_signal = cond_price_signal & cond_vol_signal
# 组合条件
result = (cond_vol_contraction.cast(str) + cond_vol_atrophy.cast(str) +
cond_price_stability.cast(str) + cond_current_day_signal.cast(str))
return stock_df.with_columns(result.alias('cat_senti_pre_breakout'))
class StrongInflowSignalOperator(StockWiseOperator):
"""强主力资金流入信号算子"""
def __init__(self, intensity_avg_n: int = 3, intensity_threshold: float = 0.01,
consecutive_buy_n: int = 2, accel_positive_m: int = 1):
config = OperatorConfig(
name="senti_strong_inflow",
description="强主力资金流入信号",
required_columns=['flow_lg_elg_intensity', 'flow_lg_elg_accel'],
output_columns=['senti_strong_inflow'],
parameters={'intensity_avg_n': intensity_avg_n, 'intensity_threshold': intensity_threshold,
'consecutive_buy_n': consecutive_buy_n, 'accel_positive_m': accel_positive_m}
)
super().__init__(config)
self.intensity_avg_n = intensity_avg_n
self.intensity_threshold = intensity_threshold
self.consecutive_buy_n = consecutive_buy_n
self.accel_positive_m = accel_positive_m
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算强主力资金流入信号"""
# 检查必需列是否存在
required_cols = ['flow_lg_elg_intensity', 'flow_lg_elg_accel']
if not all(col in stock_df.columns for col in required_cols):
# 如果缺少列返回0
return stock_df.with_columns(pl.lit(0).alias('senti_strong_inflow'))
# 1. 近N日主力资金强度均值
avg_intensity = pl.col('flow_lg_elg_intensity').rolling_mean(window=self.intensity_avg_n)
cond_avg_intensity = avg_intensity > self.intensity_threshold
# 2. 近N日连续主力净买入天数
is_net_buy = (pl.col('flow_lg_elg_intensity') > 0).cast(int)
# 计算连续买入信号 (简化版)
consecutive_buy = is_net_buy.rolling_sum(window=self.consecutive_buy_n) == self.consecutive_buy_n
cond_consecutive_buy = consecutive_buy
# 3. 近M日主力资金流加速度为正
is_accel_positive = (pl.col('flow_lg_elg_accel') > 0).cast(int)
accel_positive = is_accel_positive.rolling_sum(window=self.accel_positive_m) == self.accel_positive_m
cond_accel_positive = accel_positive
# 综合条件
strong_inflow = cond_avg_intensity & cond_consecutive_buy & cond_accel_positive
return stock_df.with_columns(strong_inflow.cast(int).alias('senti_strong_inflow'))
# 情绪因子集合
SENTIMENT_OPERATORS = [
SentimentPanicGreedIndexOperator(),
SentimentMarketBreadthProxyOperator(),
SentimentReversalIndicatorOperator(),
DailyMomentumBenchmarkOperator(),
DailyDeviationOperator(),
CatSentimentMomentumVolumeSpikeOperator(),
CatSentimentPreBreakoutOperator(),
StrongInflowSignalOperator(),
]
def apply_sentiment_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
"""
应用所有情绪因子
Args:
df: 输入的Polars DataFrame
operators: 要应用的算子列表如果为None则使用默认列表
Returns:
添加了情绪因子的DataFrame
"""
if operators is None:
operators = SENTIMENT_OPERATORS
result_df = df
for operator in operators:
result_df = operator(result_df)
return result_df