Files
NewStock/main/factor/polars_volatility_factors.py

420 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
波动率因子 - 使用Polars实现
包含上行波动率、下行波动率、波动率比率等相关因子计算
"""
import polars as pl
import numpy as np
from typing import Dict, List, Optional, Any
from operator_framework import StockWiseOperator, OperatorConfig
class UpsideVolatilityOperator(StockWiseOperator):
"""上行波动率算子"""
def __init__(self, window: int = 20):
config = OperatorConfig(
name=f"upside_volatility_{window}",
description=f"{window}日上行波动率",
required_columns=['pct_chg'],
output_columns=[f'upside_volatility_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算上行波动率"""
# 分离正收益率
pos_returns = pl.when(pl.col('pct_chg') > 0).then(pl.col('pct_chg')).otherwise(0)
# 计算正收益率的平方
pos_returns_sq = pos_returns.pow(2)
# 计算滚动和
rolling_pos_count = (pl.col('pct_chg') > 0).rolling_sum(window=self.window)
rolling_pos_sum = pos_returns.rolling_sum(window=self.window)
rolling_pos_sum_sq = pos_returns_sq.rolling_sum(window=self.window)
# 计算方差和标准差
pos_mean_sq = rolling_pos_sum_sq / rolling_pos_count
pos_mean = rolling_pos_sum / rolling_pos_count
pos_var = pos_mean_sq - pos_mean.pow(2)
# 处理样本数不足的情况
pos_var = pl.when(rolling_pos_count >= 2).then(pos_var).otherwise(None)
pos_var = pos_var.clip(lower=0)
upside_vol = pos_var.sqrt()
return stock_df.with_columns(upside_vol.alias(f'upside_volatility_{self.window}'))
class DownsideVolatilityOperator(StockWiseOperator):
"""下行波动率算子"""
def __init__(self, window: int = 20):
config = OperatorConfig(
name=f"downside_volatility_{window}",
description=f"{window}日下行波动率",
required_columns=['pct_chg'],
output_columns=[f'downside_volatility_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算下行波动率"""
# 分离负收益率
neg_returns = pl.when(pl.col('pct_chg') < 0).then(pl.col('pct_chg')).otherwise(0)
# 计算负收益率的平方
neg_returns_sq = neg_returns.pow(2)
# 计算滚动和
rolling_neg_count = (pl.col('pct_chg') < 0).rolling_sum(window=self.window)
rolling_neg_sum = neg_returns.rolling_sum(window=self.window)
rolling_neg_sum_sq = neg_returns_sq.rolling_sum(window=self.window)
# 计算方差和标准差
neg_mean_sq = rolling_neg_sum_sq / rolling_neg_count
neg_mean = rolling_neg_sum / rolling_neg_count
neg_var = neg_mean_sq - neg_mean.pow(2)
# 处理样本数不足的情况
neg_var = pl.when(rolling_neg_count >= 2).then(neg_var).otherwise(None)
neg_var = neg_var.clip(lower=0)
downside_vol = neg_var.sqrt()
return stock_df.with_columns(downside_vol.alias(f'downside_volatility_{self.window}'))
class VolatilityRatioOperator(StockWiseOperator):
"""波动率比率算子"""
def __init__(self, window: int = 20):
config = OperatorConfig(
name=f"volatility_ratio_{window}",
description=f"{window}日波动率比率",
required_columns=['pct_chg'],
output_columns=[f'volatility_ratio_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算波动率比率"""
# 计算上行和下行波动率
pos_returns = pl.when(pl.col('pct_chg') > 0).then(pl.col('pct_chg')).otherwise(0)
neg_returns = pl.when(pl.col('pct_chg') < 0).then(pl.col('pct_chg')).otherwise(0)
# 计算滚动标准差
upside_vol = pos_returns.rolling_std(window=self.window)
downside_vol = neg_returns.rolling_std(window=self.window)
# 计算比率
vol_ratio = upside_vol / downside_vol
# 处理无穷大和NaN值
vol_ratio = vol_ratio.replace([np.inf, -np.inf], None).fill_null(0)
return stock_df.with_columns(vol_ratio.alias(f'volatility_ratio_{self.window}'))
class ReturnSkewnessOperator(StockWiseOperator):
"""收益率偏度算子"""
def __init__(self, window: int = 5):
config = OperatorConfig(
name=f"return_skewness_{window}",
description=f"{window}日收益率偏度",
required_columns=['pct_chg'],
output_columns=[f'return_skewness_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算收益率偏度"""
skewness = pl.col('pct_chg').rolling_skew(window=self.window)
return stock_df.with_columns(skewness.alias(f'return_skewness_{self.window}'))
class ReturnKurtosisOperator(StockWiseOperator):
"""收益率峰度算子"""
def __init__(self, window: int = 5):
config = OperatorConfig(
name=f"return_kurtosis_{window}",
description=f"{window}日收益率峰度",
required_columns=['pct_chg'],
output_columns=[f'return_kurtosis_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算收益率峰度"""
kurtosis = pl.col('pct_chg').rolling_kurt(window=self.window)
return stock_df.with_columns(kurtosis.alias(f'return_kurtosis_{self.window}'))
class VolatilityAmplificationOperator(StockWiseOperator):
"""亏损状态波动率放大算子"""
def __init__(self, n: int = 20):
config = OperatorConfig(
name=f"vol_amp_loss_{n}",
description=f"{n}日亏损状态波动率放大",
required_columns=['pct_chg', 'weight_avg', 'close'],
output_columns=[f'vol_amp_loss_{n}'],
parameters={'n': n}
)
super().__init__(config)
self.n = n
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算亏损状态波动率放大"""
# 计算n日波动率
vol_n = pl.col('pct_chg').rolling_std(window=self.n)
# 计算亏损程度
loss_degree = pl.max_horizontal(0, pl.col('weight_avg') - pl.col('close')) / pl.col('close')
# 计算放大因子
vol_amp = vol_n * loss_degree
return stock_df.with_columns(vol_amp.alias(f'vol_amp_loss_{self.n}'))
class HighVolDropWhenProfitableOperator(StockWiseOperator):
"""高成交量下跌当获利状态算子"""
def __init__(self, n: int = 20, m: int = 5, profit_thresh: float = 0.1,
drop_thresh: float = -0.03, vol_multiple: float = 2.0):
config = OperatorConfig(
name=f"vol_drop_profit_cnt_{m}",
description=f"{m}日高成交量下跌当获利状态计数",
required_columns=['close', 'pct_chg', 'vol', 'weight_avg'],
output_columns=[f'vol_drop_profit_cnt_{m}'],
parameters={'n': n, 'm': m, 'profit_thresh': profit_thresh,
'drop_thresh': drop_thresh, 'vol_multiple': vol_multiple}
)
super().__init__(config)
self.n = n
self.m = m
self.profit_thresh = profit_thresh
self.drop_thresh = drop_thresh
self.vol_multiple = vol_multiple
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算高成交量下跌当获利状态计数"""
# 判断是否获利
is_profitable = pl.col('close') > pl.col('weight_avg') * (1 + self.profit_thresh)
# 判断是否下跌
is_dropping = pl.col('pct_chg') < self.drop_thresh
# 计算滚动均值和标准差
rolling_mean_vol = pl.col('vol').rolling_mean(window=self.n)
rolling_std_vol = pl.col('vol').rolling_std(window=self.n).fill_null(0)
# 判断是否高成交量
is_high_vol = pl.col('vol') > (rolling_mean_vol + self.vol_multiple * rolling_std_vol)
# 计算事件
event = is_profitable & is_dropping & is_high_vol
# 计算m日累计
event_cnt = event.cast(int).rolling_sum(window=self.m)
return stock_df.with_columns(event_cnt.alias(f'vol_drop_profit_cnt_{self.m}'))
class LargeFlowVolatilityInteractionOperator(StockWiseOperator):
"""大单资金流驱动波动率交互算子"""
def __init__(self, n: int = 20):
config = OperatorConfig(
name=f"lg_flow_vol_interact_{n}",
description=f"{n}日大单资金流驱动波动率交互",
required_columns=['pct_chg', 'buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol',
'sell_elg_vol', 'vol', 'close'],
output_columns=[f'lg_flow_vol_interact_{n}'],
parameters={'n': n}
)
super().__init__(config)
self.n = n
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算大单资金流驱动波动率交互"""
epsilon = 1e-8
# 计算n日波动率
vol_n = pl.col('pct_chg').rolling_std(window=self.n)
# 计算大单净额
net_lg_flow_val = (
(pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')) * pl.col('close')
)
# 计算总成交额
total_val = pl.col('vol') * pl.col('close')
# 计算大单净流入比率绝对值
abs_net_lg_flow_ratio = net_lg_flow_val.abs() / (total_val + epsilon)
# 计算n日均值
abs_ratio_n = abs_net_lg_flow_ratio.rolling_mean(window=self.n)
# 计算交互项
interaction = vol_n * abs_ratio_n
return stock_df.with_columns(interaction.alias(f'lg_flow_vol_interact_{self.n}'))
class VolatilityAdjustedROCPOperator(StockWiseOperator):
"""波动率调整收益率算子"""
def __init__(self, n: int = 20):
config = OperatorConfig(
name=f"vol_adj_roc_{n}",
description=f"{n}日波动率调整收益率",
required_columns=['close', 'pct_chg'],
output_columns=[f'vol_adj_roc_{n}'],
parameters={'n': n}
)
super().__init__(config)
self.n = n
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算波动率调整收益率"""
# 计算n日收益率
roc_n = pl.col('close').pct_change(self.n)
# 计算n日波动率
vol_n = pl.col('pct_chg').rolling_std(window=self.n).fill_null(0)
# 计算波动率调整收益率
vol_adj_roc = roc_n / (vol_n + 1e-10) # 避免除零
return stock_df.with_columns(vol_adj_roc.alias(f'vol_adj_roc_{self.n}'))
class StandardDeviation5Operator(StockWiseOperator):
"""5日收益率标准差算子"""
def __init__(self):
config = OperatorConfig(
name="std_return_5",
description="5日收益率标准差",
required_columns=['close'],
output_columns=['std_return_5'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算5日收益率标准差"""
# 计算收益率
returns = pl.col('close').pct_change()
# 计算5日标准差
std_5 = returns.rolling_std(window=5)
return stock_df.with_columns(std_5.alias('std_return_5'))
class StandardDeviation90Operator(StockWiseOperator):
"""90日收益率标准差算子"""
def __init__(self):
config = OperatorConfig(
name="std_return_90",
description="90日收益率标准差",
required_columns=['close'],
output_columns=['std_return_90'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算90日收益率标准差"""
# 计算收益率
returns = pl.col('close').pct_change()
# 计算90日标准差
std_90 = returns.rolling_std(window=90)
return stock_df.with_columns(std_90.alias('std_return_90'))
class StandardDeviation90ShiftedOperator(StockWiseOperator):
"""90日收益率标准差(移位)算子"""
def __init__(self):
config = OperatorConfig(
name="std_return_90_2",
description="90日收益率标准差(移位10日)",
required_columns=['close'],
output_columns=['std_return_90_2'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算90日收益率标准差(移位10日)"""
# 计算收益率(移位10日)
returns = pl.col('close').shift(10).pct_change()
# 计算90日标准差
std_90_2 = returns.rolling_std(window=90)
return stock_df.with_columns(std_90_2.alias('std_return_90_2'))
# 波动率因子集合
VOLATILITY_OPERATORS = [
UpsideVolatilityOperator(),
DownsideVolatilityOperator(),
VolatilityRatioOperator(),
ReturnSkewnessOperator(),
ReturnKurtosisOperator(),
VolatilityAmplificationOperator(),
HighVolDropWhenProfitableOperator(),
LargeFlowVolatilityInteractionOperator(),
VolatilityAdjustedROCPOperator(),
StandardDeviation5Operator(),
StandardDeviation90Operator(),
StandardDeviation90ShiftedOperator(),
]
def apply_volatility_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
"""
应用所有波动率因子
Args:
df: 输入的Polars DataFrame
operators: 要应用的算子列表如果为None则使用默认列表
Returns:
添加了波动率因子的DataFrame
"""
if operators is None:
operators = VOLATILITY_OPERATORS
result_df = df
for operator in operators:
result_df = operator(result_df)
return result_df