Files
NewStock/main/factor/polars_sentiment_factors.py

366 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
情绪因子 - 使用Polars实现
包含市场情绪、恐慌贪婪指数、反转因子等相关因子计算
"""
import polars as pl
import numpy as np
from typing import Dict, List, Optional, Any
from operator_framework import StockWiseOperator, OperatorConfig
import talib
class SentimentPanicGreedIndexOperator(StockWiseOperator):
"""市场恐慌/贪婪指数算子"""
def __init__(self, window_atr: int = 14, window_smooth: int = 5):
config = OperatorConfig(
name=f"senti_panic_greed_{window_atr}_{window_smooth}",
description=f"{window_atr}日ATR{window_smooth}日平滑恐慌贪婪指数",
required_columns=['open', 'high', 'low', 'close', 'pct_chg', 'vol'],
output_columns=[f'senti_panic_greed_{window_atr}_{window_smooth}'],
parameters={'window_atr': window_atr, 'window_smooth': window_smooth}
)
super().__init__(config)
self.window_atr = window_atr
self.window_smooth = window_smooth
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算恐慌贪婪指数"""
# 计算前收盘价
prev_close = pl.col('close').shift(1)
# 计算真实波幅
tr = pl.max_horizontal(
pl.col('high') - pl.col('low'),
(pl.col('high') - prev_close).abs(),
(pl.col('low') - prev_close).abs()
)
# 计算ATR
atr = tr.rolling_mean(window=self.window_atr)
# 计算影线
upper_shadow = pl.col('high') - pl.max_horizontal(pl.col('open'), pl.col('close'))
lower_shadow = pl.min_horizontal(pl.col('open'), pl.col('close')) - pl.col('low')
body = (pl.col('close') - pl.col('open')).abs()
# 计算跳空
gap = (pl.col('open') / prev_close - 1).fill_null(0)
# 计算波动性意外
volatility_surprise = (tr / (atr + 1e-8) - 1) * pl.col('pct_chg').sign()
# 计算原始情绪指标
raw_senti = (tr / (atr + 1e-8)) * pl.col('pct_chg').sign() + gap * 2
# 平滑处理
sentiment = raw_senti.rolling_mean(window=self.window_smooth)
return stock_df.with_columns(
sentiment.alias(f'senti_panic_greed_{self.window_atr}_{self.window_smooth}')
)
class SentimentMarketBreadthProxyOperator(StockWiseOperator):
"""市场宽度情绪代理算子"""
def __init__(self, window_vol: int = 20, window_smooth: int = 3):
config = OperatorConfig(
name=f"senti_breadth_proxy_{window_vol}_{window_smooth}",
description=f"{window_vol}日成交量{window_smooth}日平滑市场宽度情绪代理",
required_columns=['pct_chg', 'vol'],
output_columns=[f'senti_breadth_proxy_{window_vol}_{window_smooth}'],
parameters={'window_vol': window_vol, 'window_smooth': window_smooth}
)
super().__init__(config)
self.window_vol = window_vol
self.window_smooth = window_smooth
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算市场宽度情绪代理"""
# 计算成交量滚动均值
rolling_avg_vol = pl.col('vol').rolling_mean(window=self.window_vol)
# 计算价量配合度
raw_breadth = pl.col('pct_chg') * (pl.col('vol') / (rolling_avg_vol + 1e-8))
# 平滑处理
breadth_proxy = raw_breadth.rolling_mean(window=self.window_smooth)
return stock_df.with_columns(
breadth_proxy.alias(f'senti_breadth_proxy_{self.window_vol}_{self.window_smooth}')
)
class SentimentReversalIndicatorOperator(StockWiseOperator):
"""短期情绪反转因子算子"""
def __init__(self, window_ret: int = 5, window_vol: int = 5):
config = OperatorConfig(
name=f"senti_reversal_{window_ret}_{window_vol}",
description=f"{window_ret}日收益{window_vol}日波动短期情绪反转因子",
required_columns=['close', 'pct_chg'],
output_columns=[f'senti_reversal_{window_ret}_{window_vol}'],
parameters={'window_ret': window_ret, 'window_vol': window_vol}
)
super().__init__(config)
self.window_ret = window_ret
self.window_vol = window_vol
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算短期情绪反转因子"""
# 计算累计收益率
return_m = pl.col('close').pct_change(self.window_ret)
# 计算波动率
volatility_m = pl.col('pct_chg').rolling_std(window=self.window_vol)
# 计算反转因子 (负号表示反转)
reversal_factor = -return_m * volatility_m
return stock_df.with_columns(
reversal_factor.alias(f'senti_reversal_{self.window_ret}_{self.window_vol}')
)
class DailyMomentumBenchmarkOperator(StockWiseOperator):
"""日级别动量基准算子"""
def __init__(self):
config = OperatorConfig(
name="daily_momentum_benchmark",
description="日级别动量基准",
required_columns=['pct_chg'],
output_columns=['daily_positive_benchmark', 'daily_negative_benchmark'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算日级别动量基准"""
# 这个因子需要横截面计算,简化处理
# 在实际应用中应该使用DateWiseOperator来计算全市场基准
# 返回0作为占位符
return stock_df.with_columns([
pl.lit(0).alias('daily_positive_benchmark'),
pl.lit(0).alias('daily_negative_benchmark')
])
class DailyDeviationOperator(StockWiseOperator):
"""日级别偏离度算子"""
def __init__(self):
config = OperatorConfig(
name="daily_deviation",
description="日级别偏离度",
required_columns=['pct_chg', 'daily_positive_benchmark', 'daily_negative_benchmark'],
output_columns=['daily_deviation'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算日级别偏离度"""
# 根据条件计算偏离度
conditions = [
(pl.col('pct_chg') > 0) & (pl.col('daily_positive_benchmark') > 0),
(pl.col('pct_chg') < 0) & (pl.col('daily_negative_benchmark') < 0),
]
choices = [
pl.col('pct_chg') - pl.col('daily_positive_benchmark'),
pl.col('pct_chg') - pl.col('daily_negative_benchmark'),
]
deviation = pl.select(conditions=conditions, choices=choices, default=0)
return stock_df.with_columns(deviation.alias('daily_deviation'))
class CatSentimentMomentumVolumeSpikeOperator(StockWiseOperator):
"""情绪动量成交量激增分类算子"""
def __init__(self, return_period: int = 3, return_threshold: float = 0.05,
volume_ratio_threshold: float = 1.5, current_pct_chg_min: float = -0.01,
current_pct_chg_max: float = 0.03):
config = OperatorConfig(
name=f"cat_senti_mom_vol_spike_{return_period}",
description=f"{return_period}日情绪动量成交量激增分类",
required_columns=['close', 'pct_chg', 'volume_ratio'],
output_columns=[f'cat_senti_mom_vol_spike_{return_period}'],
parameters={'return_period': return_period, 'return_threshold': return_threshold,
'volume_ratio_threshold': volume_ratio_threshold,
'current_pct_chg_min': current_pct_chg_min,
'current_pct_chg_max': current_pct_chg_max}
)
super().__init__(config)
self.return_period = return_period
self.return_threshold = return_threshold
self.volume_ratio_threshold = volume_ratio_threshold
self.current_pct_chg_min = current_pct_chg_min
self.current_pct_chg_max = current_pct_chg_max
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算情绪动量成交量激增分类"""
# 计算n日收益率
return_n = pl.col('close').pct_change(self.return_period)
# 定义条件
cond_momentum = return_n > self.return_threshold
cond_volume = pl.col('volume_ratio') > self.volume_ratio_threshold
cond_current_price = (pl.col('pct_chg') > self.current_pct_chg_min) & \
(pl.col('pct_chg') < self.current_pct_chg_max)
# 组合条件
result = (cond_momentum.cast(str) + cond_volume.cast(str) + cond_current_price.cast(str))
return stock_df.with_columns(result.alias(f'cat_senti_mom_vol_spike_{self.return_period}'))
class CatSentimentPreBreakoutOperator(StockWiseOperator):
"""情绪突破前盘整分类算子"""
def __init__(self, atr_short_n: int = 10, atr_long_m: int = 40,
vol_atrophy_n: int = 10, vol_atrophy_m: int = 40,
price_stab_n: int = 5, price_stab_threshold: float = 0.05,
current_pct_chg_min: float = 0.005, current_pct_chg_max: float = 0.07,
volume_ratio_threshold: float = 1.2):
config = OperatorConfig(
name=f"cat_senti_pre_breakout",
description="情绪突破前盘整分类",
required_columns=['high', 'low', 'close', 'vol', 'pct_chg', 'volume_ratio'],
output_columns=['cat_senti_pre_breakout'],
parameters={'atr_short_n': atr_short_n, 'atr_long_m': atr_long_m,
'vol_atrophy_n': vol_atrophy_n, 'vol_atrophy_m': vol_atrophy_m,
'price_stab_n': price_stab_n, 'price_stab_threshold': price_stab_threshold,
'current_pct_chg_min': current_pct_chg_min, 'current_pct_chg_max': current_pct_chg_max,
'volume_ratio_threshold': volume_ratio_threshold}
)
super().__init__(config)
self.atr_short_n = atr_short_n
self.atr_long_m = atr_long_m
self.vol_atrophy_n = vol_atrophy_n
self.vol_atrophy_m = vol_atrophy_m
self.price_stab_n = price_stab_n
self.price_stab_threshold = price_stab_threshold
self.current_pct_chg_min = current_pct_chg_min
self.current_pct_chg_max = current_pct_chg_max
self.volume_ratio_threshold = volume_ratio_threshold
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算情绪突破前盘整分类"""
# 1. 波动率收缩 (使用价格范围作为ATR代理)
price_range = pl.col('high') - pl.col('low')
atr_short = price_range.rolling_mean(window=self.atr_short_n)
atr_long = price_range.rolling_mean(window=self.atr_long_m)
cond_vol_contraction = atr_short < (0.7 * atr_long)
# 2. 成交量萎缩
vol_short = pl.col('vol').rolling_mean(window=self.vol_atrophy_n)
vol_long = pl.col('vol').rolling_mean(window=self.vol_atrophy_m)
cond_vol_atrophy = vol_short < (0.7 * vol_long)
# 3. 近期价格稳定
rolling_max_h = pl.col('high').rolling_max(window=self.price_stab_n)
rolling_min_l = pl.col('low').rolling_min(window=self.price_stab_n)
price_stability = (rolling_max_h - rolling_min_l) / pl.col('close')
cond_price_stability = price_stability < self.price_stab_threshold
# 4. 当日温和放量上涨信号
cond_price_signal = (pl.col('pct_chg') > self.current_pct_chg_min) & \
(pl.col('pct_chg') < self.current_pct_chg_max)
cond_vol_signal = pl.col('volume_ratio') > self.volume_ratio_threshold
cond_current_day_signal = cond_price_signal & cond_vol_signal
# 组合条件
result = (cond_vol_contraction.cast(str) + cond_vol_atrophy.cast(str) +
cond_price_stability.cast(str) + cond_current_day_signal.cast(str))
return stock_df.with_columns(result.alias('cat_senti_pre_breakout'))
class StrongInflowSignalOperator(StockWiseOperator):
"""强主力资金流入信号算子"""
def __init__(self, intensity_avg_n: int = 3, intensity_threshold: float = 0.01,
consecutive_buy_n: int = 2, accel_positive_m: int = 1):
config = OperatorConfig(
name="senti_strong_inflow",
description="强主力资金流入信号",
required_columns=['flow_lg_elg_intensity', 'flow_lg_elg_accel'],
output_columns=['senti_strong_inflow'],
parameters={'intensity_avg_n': intensity_avg_n, 'intensity_threshold': intensity_threshold,
'consecutive_buy_n': consecutive_buy_n, 'accel_positive_m': accel_positive_m}
)
super().__init__(config)
self.intensity_avg_n = intensity_avg_n
self.intensity_threshold = intensity_threshold
self.consecutive_buy_n = consecutive_buy_n
self.accel_positive_m = accel_positive_m
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算强主力资金流入信号"""
# 检查必需列是否存在
required_cols = ['flow_lg_elg_intensity', 'flow_lg_elg_accel']
if not all(col in stock_df.columns for col in required_cols):
# 如果缺少列返回0
return stock_df.with_columns(pl.lit(0).alias('senti_strong_inflow'))
# 1. 近N日主力资金强度均值
avg_intensity = pl.col('flow_lg_elg_intensity').rolling_mean(window=self.intensity_avg_n)
cond_avg_intensity = avg_intensity > self.intensity_threshold
# 2. 近N日连续主力净买入天数
is_net_buy = (pl.col('flow_lg_elg_intensity') > 0).cast(int)
# 计算连续买入信号 (简化版)
consecutive_buy = is_net_buy.rolling_sum(window=self.consecutive_buy_n) == self.consecutive_buy_n
cond_consecutive_buy = consecutive_buy
# 3. 近M日主力资金流加速度为正
is_accel_positive = (pl.col('flow_lg_elg_accel') > 0).cast(int)
accel_positive = is_accel_positive.rolling_sum(window=self.accel_positive_m) == self.accel_positive_m
cond_accel_positive = accel_positive
# 综合条件
strong_inflow = cond_avg_intensity & cond_consecutive_buy & cond_accel_positive
return stock_df.with_columns(strong_inflow.cast(int).alias('senti_strong_inflow'))
# 情绪因子集合
SENTIMENT_OPERATORS = [
SentimentPanicGreedIndexOperator(),
SentimentMarketBreadthProxyOperator(),
SentimentReversalIndicatorOperator(),
DailyMomentumBenchmarkOperator(),
DailyDeviationOperator(),
CatSentimentMomentumVolumeSpikeOperator(),
CatSentimentPreBreakoutOperator(),
StrongInflowSignalOperator(),
]
def apply_sentiment_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
"""
应用所有情绪因子
Args:
df: 输入的Polars DataFrame
operators: 要应用的算子列表如果为None则使用默认列表
Returns:
添加了情绪因子的DataFrame
"""
if operators is None:
operators = SENTIMENT_OPERATORS
result_df = df
for operator in operators:
result_df = operator(result_df)
return result_df