Files
NewStock/main/factor/polars_volatility_factors.py

420 lines
15 KiB
Python
Raw Normal View History

2025-10-13 21:42:35 +08:00
"""
波动率因子 - 使用Polars实现
包含上行波动率下行波动率波动率比率等相关因子计算
"""
import polars as pl
import numpy as np
from typing import Dict, List, Optional, Any
from operator_framework import StockWiseOperator, OperatorConfig
class UpsideVolatilityOperator(StockWiseOperator):
"""上行波动率算子"""
def __init__(self, window: int = 20):
config = OperatorConfig(
name=f"upside_volatility_{window}",
description=f"{window}日上行波动率",
required_columns=['pct_chg'],
output_columns=[f'upside_volatility_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算上行波动率"""
# 分离正收益率
pos_returns = pl.when(pl.col('pct_chg') > 0).then(pl.col('pct_chg')).otherwise(0)
# 计算正收益率的平方
pos_returns_sq = pos_returns.pow(2)
# 计算滚动和
rolling_pos_count = (pl.col('pct_chg') > 0).rolling_sum(window=self.window)
rolling_pos_sum = pos_returns.rolling_sum(window=self.window)
rolling_pos_sum_sq = pos_returns_sq.rolling_sum(window=self.window)
# 计算方差和标准差
pos_mean_sq = rolling_pos_sum_sq / rolling_pos_count
pos_mean = rolling_pos_sum / rolling_pos_count
pos_var = pos_mean_sq - pos_mean.pow(2)
# 处理样本数不足的情况
pos_var = pl.when(rolling_pos_count >= 2).then(pos_var).otherwise(None)
pos_var = pos_var.clip(lower=0)
upside_vol = pos_var.sqrt()
return stock_df.with_columns(upside_vol.alias(f'upside_volatility_{self.window}'))
class DownsideVolatilityOperator(StockWiseOperator):
"""下行波动率算子"""
def __init__(self, window: int = 20):
config = OperatorConfig(
name=f"downside_volatility_{window}",
description=f"{window}日下行波动率",
required_columns=['pct_chg'],
output_columns=[f'downside_volatility_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算下行波动率"""
# 分离负收益率
neg_returns = pl.when(pl.col('pct_chg') < 0).then(pl.col('pct_chg')).otherwise(0)
# 计算负收益率的平方
neg_returns_sq = neg_returns.pow(2)
# 计算滚动和
rolling_neg_count = (pl.col('pct_chg') < 0).rolling_sum(window=self.window)
rolling_neg_sum = neg_returns.rolling_sum(window=self.window)
rolling_neg_sum_sq = neg_returns_sq.rolling_sum(window=self.window)
# 计算方差和标准差
neg_mean_sq = rolling_neg_sum_sq / rolling_neg_count
neg_mean = rolling_neg_sum / rolling_neg_count
neg_var = neg_mean_sq - neg_mean.pow(2)
# 处理样本数不足的情况
neg_var = pl.when(rolling_neg_count >= 2).then(neg_var).otherwise(None)
neg_var = neg_var.clip(lower=0)
downside_vol = neg_var.sqrt()
return stock_df.with_columns(downside_vol.alias(f'downside_volatility_{self.window}'))
class VolatilityRatioOperator(StockWiseOperator):
"""波动率比率算子"""
def __init__(self, window: int = 20):
config = OperatorConfig(
name=f"volatility_ratio_{window}",
description=f"{window}日波动率比率",
required_columns=['pct_chg'],
output_columns=[f'volatility_ratio_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算波动率比率"""
# 计算上行和下行波动率
pos_returns = pl.when(pl.col('pct_chg') > 0).then(pl.col('pct_chg')).otherwise(0)
neg_returns = pl.when(pl.col('pct_chg') < 0).then(pl.col('pct_chg')).otherwise(0)
# 计算滚动标准差
upside_vol = pos_returns.rolling_std(window=self.window)
downside_vol = neg_returns.rolling_std(window=self.window)
# 计算比率
vol_ratio = upside_vol / downside_vol
# 处理无穷大和NaN值
vol_ratio = vol_ratio.replace([np.inf, -np.inf], None).fill_null(0)
return stock_df.with_columns(vol_ratio.alias(f'volatility_ratio_{self.window}'))
class ReturnSkewnessOperator(StockWiseOperator):
"""收益率偏度算子"""
def __init__(self, window: int = 5):
config = OperatorConfig(
name=f"return_skewness_{window}",
description=f"{window}日收益率偏度",
required_columns=['pct_chg'],
output_columns=[f'return_skewness_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算收益率偏度"""
skewness = pl.col('pct_chg').rolling_skew(window=self.window)
return stock_df.with_columns(skewness.alias(f'return_skewness_{self.window}'))
class ReturnKurtosisOperator(StockWiseOperator):
"""收益率峰度算子"""
def __init__(self, window: int = 5):
config = OperatorConfig(
name=f"return_kurtosis_{window}",
description=f"{window}日收益率峰度",
required_columns=['pct_chg'],
output_columns=[f'return_kurtosis_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算收益率峰度"""
kurtosis = pl.col('pct_chg').rolling_kurt(window=self.window)
return stock_df.with_columns(kurtosis.alias(f'return_kurtosis_{self.window}'))
class VolatilityAmplificationOperator(StockWiseOperator):
"""亏损状态波动率放大算子"""
def __init__(self, n: int = 20):
config = OperatorConfig(
name=f"vol_amp_loss_{n}",
description=f"{n}日亏损状态波动率放大",
required_columns=['pct_chg', 'weight_avg', 'close'],
output_columns=[f'vol_amp_loss_{n}'],
parameters={'n': n}
)
super().__init__(config)
self.n = n
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算亏损状态波动率放大"""
# 计算n日波动率
vol_n = pl.col('pct_chg').rolling_std(window=self.n)
# 计算亏损程度
loss_degree = pl.max_horizontal(0, pl.col('weight_avg') - pl.col('close')) / pl.col('close')
# 计算放大因子
vol_amp = vol_n * loss_degree
return stock_df.with_columns(vol_amp.alias(f'vol_amp_loss_{self.n}'))
class HighVolDropWhenProfitableOperator(StockWiseOperator):
"""高成交量下跌当获利状态算子"""
def __init__(self, n: int = 20, m: int = 5, profit_thresh: float = 0.1,
drop_thresh: float = -0.03, vol_multiple: float = 2.0):
config = OperatorConfig(
name=f"vol_drop_profit_cnt_{m}",
description=f"{m}日高成交量下跌当获利状态计数",
required_columns=['close', 'pct_chg', 'vol', 'weight_avg'],
output_columns=[f'vol_drop_profit_cnt_{m}'],
parameters={'n': n, 'm': m, 'profit_thresh': profit_thresh,
'drop_thresh': drop_thresh, 'vol_multiple': vol_multiple}
)
super().__init__(config)
self.n = n
self.m = m
self.profit_thresh = profit_thresh
self.drop_thresh = drop_thresh
self.vol_multiple = vol_multiple
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算高成交量下跌当获利状态计数"""
# 判断是否获利
is_profitable = pl.col('close') > pl.col('weight_avg') * (1 + self.profit_thresh)
# 判断是否下跌
is_dropping = pl.col('pct_chg') < self.drop_thresh
# 计算滚动均值和标准差
rolling_mean_vol = pl.col('vol').rolling_mean(window=self.n)
rolling_std_vol = pl.col('vol').rolling_std(window=self.n).fill_null(0)
# 判断是否高成交量
is_high_vol = pl.col('vol') > (rolling_mean_vol + self.vol_multiple * rolling_std_vol)
# 计算事件
event = is_profitable & is_dropping & is_high_vol
# 计算m日累计
event_cnt = event.cast(int).rolling_sum(window=self.m)
return stock_df.with_columns(event_cnt.alias(f'vol_drop_profit_cnt_{self.m}'))
class LargeFlowVolatilityInteractionOperator(StockWiseOperator):
"""大单资金流驱动波动率交互算子"""
def __init__(self, n: int = 20):
config = OperatorConfig(
name=f"lg_flow_vol_interact_{n}",
description=f"{n}日大单资金流驱动波动率交互",
required_columns=['pct_chg', 'buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol',
'sell_elg_vol', 'vol', 'close'],
output_columns=[f'lg_flow_vol_interact_{n}'],
parameters={'n': n}
)
super().__init__(config)
self.n = n
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算大单资金流驱动波动率交互"""
epsilon = 1e-8
# 计算n日波动率
vol_n = pl.col('pct_chg').rolling_std(window=self.n)
# 计算大单净额
net_lg_flow_val = (
(pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')) * pl.col('close')
)
# 计算总成交额
total_val = pl.col('vol') * pl.col('close')
# 计算大单净流入比率绝对值
abs_net_lg_flow_ratio = net_lg_flow_val.abs() / (total_val + epsilon)
# 计算n日均值
abs_ratio_n = abs_net_lg_flow_ratio.rolling_mean(window=self.n)
# 计算交互项
interaction = vol_n * abs_ratio_n
return stock_df.with_columns(interaction.alias(f'lg_flow_vol_interact_{self.n}'))
class VolatilityAdjustedROCPOperator(StockWiseOperator):
"""波动率调整收益率算子"""
def __init__(self, n: int = 20):
config = OperatorConfig(
name=f"vol_adj_roc_{n}",
description=f"{n}日波动率调整收益率",
required_columns=['close', 'pct_chg'],
output_columns=[f'vol_adj_roc_{n}'],
parameters={'n': n}
)
super().__init__(config)
self.n = n
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算波动率调整收益率"""
# 计算n日收益率
roc_n = pl.col('close').pct_change(self.n)
# 计算n日波动率
vol_n = pl.col('pct_chg').rolling_std(window=self.n).fill_null(0)
# 计算波动率调整收益率
vol_adj_roc = roc_n / (vol_n + 1e-10) # 避免除零
return stock_df.with_columns(vol_adj_roc.alias(f'vol_adj_roc_{self.n}'))
class StandardDeviation5Operator(StockWiseOperator):
"""5日收益率标准差算子"""
def __init__(self):
config = OperatorConfig(
name="std_return_5",
description="5日收益率标准差",
required_columns=['close'],
output_columns=['std_return_5'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算5日收益率标准差"""
# 计算收益率
returns = pl.col('close').pct_change()
# 计算5日标准差
std_5 = returns.rolling_std(window=5)
return stock_df.with_columns(std_5.alias('std_return_5'))
class StandardDeviation90Operator(StockWiseOperator):
"""90日收益率标准差算子"""
def __init__(self):
config = OperatorConfig(
name="std_return_90",
description="90日收益率标准差",
required_columns=['close'],
output_columns=['std_return_90'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算90日收益率标准差"""
# 计算收益率
returns = pl.col('close').pct_change()
# 计算90日标准差
std_90 = returns.rolling_std(window=90)
return stock_df.with_columns(std_90.alias('std_return_90'))
class StandardDeviation90ShiftedOperator(StockWiseOperator):
"""90日收益率标准差(移位)算子"""
def __init__(self):
config = OperatorConfig(
name="std_return_90_2",
description="90日收益率标准差(移位10日)",
required_columns=['close'],
output_columns=['std_return_90_2'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算90日收益率标准差(移位10日)"""
# 计算收益率(移位10日)
returns = pl.col('close').shift(10).pct_change()
# 计算90日标准差
std_90_2 = returns.rolling_std(window=90)
return stock_df.with_columns(std_90_2.alias('std_return_90_2'))
# 波动率因子集合
VOLATILITY_OPERATORS = [
UpsideVolatilityOperator(),
DownsideVolatilityOperator(),
VolatilityRatioOperator(),
ReturnSkewnessOperator(),
ReturnKurtosisOperator(),
VolatilityAmplificationOperator(),
HighVolDropWhenProfitableOperator(),
LargeFlowVolatilityInteractionOperator(),
VolatilityAdjustedROCPOperator(),
StandardDeviation5Operator(),
StandardDeviation90Operator(),
StandardDeviation90ShiftedOperator(),
]
def apply_volatility_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
"""
应用所有波动率因子
Args:
df: 输入的Polars DataFrame
operators: 要应用的算子列表如果为None则使用默认列表
Returns:
添加了波动率因子的DataFrame
"""
if operators is None:
operators = VOLATILITY_OPERATORS
result_df = df
for operator in operators:
result_df = operator(result_df)
return result_df