420 lines
15 KiB
Python
420 lines
15 KiB
Python
|
|
"""
|
|||
|
|
波动率因子 - 使用Polars实现
|
|||
|
|
包含上行波动率、下行波动率、波动率比率等相关因子计算
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import polars as pl
|
|||
|
|
import numpy as np
|
|||
|
|
from typing import Dict, List, Optional, Any
|
|||
|
|
from operator_framework import StockWiseOperator, OperatorConfig
|
|||
|
|
|
|||
|
|
|
|||
|
|
class UpsideVolatilityOperator(StockWiseOperator):
|
|||
|
|
"""上行波动率算子"""
|
|||
|
|
|
|||
|
|
def __init__(self, window: int = 20):
|
|||
|
|
config = OperatorConfig(
|
|||
|
|
name=f"upside_volatility_{window}",
|
|||
|
|
description=f"{window}日上行波动率",
|
|||
|
|
required_columns=['pct_chg'],
|
|||
|
|
output_columns=[f'upside_volatility_{window}'],
|
|||
|
|
parameters={'window': window}
|
|||
|
|
)
|
|||
|
|
super().__init__(config)
|
|||
|
|
self.window = window
|
|||
|
|
|
|||
|
|
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
|||
|
|
"""计算上行波动率"""
|
|||
|
|
# 分离正收益率
|
|||
|
|
pos_returns = pl.when(pl.col('pct_chg') > 0).then(pl.col('pct_chg')).otherwise(0)
|
|||
|
|
|
|||
|
|
# 计算正收益率的平方
|
|||
|
|
pos_returns_sq = pos_returns.pow(2)
|
|||
|
|
|
|||
|
|
# 计算滚动和
|
|||
|
|
rolling_pos_count = (pl.col('pct_chg') > 0).rolling_sum(window=self.window)
|
|||
|
|
rolling_pos_sum = pos_returns.rolling_sum(window=self.window)
|
|||
|
|
rolling_pos_sum_sq = pos_returns_sq.rolling_sum(window=self.window)
|
|||
|
|
|
|||
|
|
# 计算方差和标准差
|
|||
|
|
pos_mean_sq = rolling_pos_sum_sq / rolling_pos_count
|
|||
|
|
pos_mean = rolling_pos_sum / rolling_pos_count
|
|||
|
|
pos_var = pos_mean_sq - pos_mean.pow(2)
|
|||
|
|
|
|||
|
|
# 处理样本数不足的情况
|
|||
|
|
pos_var = pl.when(rolling_pos_count >= 2).then(pos_var).otherwise(None)
|
|||
|
|
pos_var = pos_var.clip(lower=0)
|
|||
|
|
|
|||
|
|
upside_vol = pos_var.sqrt()
|
|||
|
|
|
|||
|
|
return stock_df.with_columns(upside_vol.alias(f'upside_volatility_{self.window}'))
|
|||
|
|
|
|||
|
|
|
|||
|
|
class DownsideVolatilityOperator(StockWiseOperator):
|
|||
|
|
"""下行波动率算子"""
|
|||
|
|
|
|||
|
|
def __init__(self, window: int = 20):
|
|||
|
|
config = OperatorConfig(
|
|||
|
|
name=f"downside_volatility_{window}",
|
|||
|
|
description=f"{window}日下行波动率",
|
|||
|
|
required_columns=['pct_chg'],
|
|||
|
|
output_columns=[f'downside_volatility_{window}'],
|
|||
|
|
parameters={'window': window}
|
|||
|
|
)
|
|||
|
|
super().__init__(config)
|
|||
|
|
self.window = window
|
|||
|
|
|
|||
|
|
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
|||
|
|
"""计算下行波动率"""
|
|||
|
|
# 分离负收益率
|
|||
|
|
neg_returns = pl.when(pl.col('pct_chg') < 0).then(pl.col('pct_chg')).otherwise(0)
|
|||
|
|
|
|||
|
|
# 计算负收益率的平方
|
|||
|
|
neg_returns_sq = neg_returns.pow(2)
|
|||
|
|
|
|||
|
|
# 计算滚动和
|
|||
|
|
rolling_neg_count = (pl.col('pct_chg') < 0).rolling_sum(window=self.window)
|
|||
|
|
rolling_neg_sum = neg_returns.rolling_sum(window=self.window)
|
|||
|
|
rolling_neg_sum_sq = neg_returns_sq.rolling_sum(window=self.window)
|
|||
|
|
|
|||
|
|
# 计算方差和标准差
|
|||
|
|
neg_mean_sq = rolling_neg_sum_sq / rolling_neg_count
|
|||
|
|
neg_mean = rolling_neg_sum / rolling_neg_count
|
|||
|
|
neg_var = neg_mean_sq - neg_mean.pow(2)
|
|||
|
|
|
|||
|
|
# 处理样本数不足的情况
|
|||
|
|
neg_var = pl.when(rolling_neg_count >= 2).then(neg_var).otherwise(None)
|
|||
|
|
neg_var = neg_var.clip(lower=0)
|
|||
|
|
|
|||
|
|
downside_vol = neg_var.sqrt()
|
|||
|
|
|
|||
|
|
return stock_df.with_columns(downside_vol.alias(f'downside_volatility_{self.window}'))
|
|||
|
|
|
|||
|
|
|
|||
|
|
class VolatilityRatioOperator(StockWiseOperator):
|
|||
|
|
"""波动率比率算子"""
|
|||
|
|
|
|||
|
|
def __init__(self, window: int = 20):
|
|||
|
|
config = OperatorConfig(
|
|||
|
|
name=f"volatility_ratio_{window}",
|
|||
|
|
description=f"{window}日波动率比率",
|
|||
|
|
required_columns=['pct_chg'],
|
|||
|
|
output_columns=[f'volatility_ratio_{window}'],
|
|||
|
|
parameters={'window': window}
|
|||
|
|
)
|
|||
|
|
super().__init__(config)
|
|||
|
|
self.window = window
|
|||
|
|
|
|||
|
|
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
|||
|
|
"""计算波动率比率"""
|
|||
|
|
# 计算上行和下行波动率
|
|||
|
|
pos_returns = pl.when(pl.col('pct_chg') > 0).then(pl.col('pct_chg')).otherwise(0)
|
|||
|
|
neg_returns = pl.when(pl.col('pct_chg') < 0).then(pl.col('pct_chg')).otherwise(0)
|
|||
|
|
|
|||
|
|
# 计算滚动标准差
|
|||
|
|
upside_vol = pos_returns.rolling_std(window=self.window)
|
|||
|
|
downside_vol = neg_returns.rolling_std(window=self.window)
|
|||
|
|
|
|||
|
|
# 计算比率
|
|||
|
|
vol_ratio = upside_vol / downside_vol
|
|||
|
|
|
|||
|
|
# 处理无穷大和NaN值
|
|||
|
|
vol_ratio = vol_ratio.replace([np.inf, -np.inf], None).fill_null(0)
|
|||
|
|
|
|||
|
|
return stock_df.with_columns(vol_ratio.alias(f'volatility_ratio_{self.window}'))
|
|||
|
|
|
|||
|
|
|
|||
|
|
class ReturnSkewnessOperator(StockWiseOperator):
|
|||
|
|
"""收益率偏度算子"""
|
|||
|
|
|
|||
|
|
def __init__(self, window: int = 5):
|
|||
|
|
config = OperatorConfig(
|
|||
|
|
name=f"return_skewness_{window}",
|
|||
|
|
description=f"{window}日收益率偏度",
|
|||
|
|
required_columns=['pct_chg'],
|
|||
|
|
output_columns=[f'return_skewness_{window}'],
|
|||
|
|
parameters={'window': window}
|
|||
|
|
)
|
|||
|
|
super().__init__(config)
|
|||
|
|
self.window = window
|
|||
|
|
|
|||
|
|
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
|||
|
|
"""计算收益率偏度"""
|
|||
|
|
skewness = pl.col('pct_chg').rolling_skew(window=self.window)
|
|||
|
|
|
|||
|
|
return stock_df.with_columns(skewness.alias(f'return_skewness_{self.window}'))
|
|||
|
|
|
|||
|
|
|
|||
|
|
class ReturnKurtosisOperator(StockWiseOperator):
|
|||
|
|
"""收益率峰度算子"""
|
|||
|
|
|
|||
|
|
def __init__(self, window: int = 5):
|
|||
|
|
config = OperatorConfig(
|
|||
|
|
name=f"return_kurtosis_{window}",
|
|||
|
|
description=f"{window}日收益率峰度",
|
|||
|
|
required_columns=['pct_chg'],
|
|||
|
|
output_columns=[f'return_kurtosis_{window}'],
|
|||
|
|
parameters={'window': window}
|
|||
|
|
)
|
|||
|
|
super().__init__(config)
|
|||
|
|
self.window = window
|
|||
|
|
|
|||
|
|
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
|||
|
|
"""计算收益率峰度"""
|
|||
|
|
kurtosis = pl.col('pct_chg').rolling_kurt(window=self.window)
|
|||
|
|
|
|||
|
|
return stock_df.with_columns(kurtosis.alias(f'return_kurtosis_{self.window}'))
|
|||
|
|
|
|||
|
|
|
|||
|
|
class VolatilityAmplificationOperator(StockWiseOperator):
|
|||
|
|
"""亏损状态波动率放大算子"""
|
|||
|
|
|
|||
|
|
def __init__(self, n: int = 20):
|
|||
|
|
config = OperatorConfig(
|
|||
|
|
name=f"vol_amp_loss_{n}",
|
|||
|
|
description=f"{n}日亏损状态波动率放大",
|
|||
|
|
required_columns=['pct_chg', 'weight_avg', 'close'],
|
|||
|
|
output_columns=[f'vol_amp_loss_{n}'],
|
|||
|
|
parameters={'n': n}
|
|||
|
|
)
|
|||
|
|
super().__init__(config)
|
|||
|
|
self.n = n
|
|||
|
|
|
|||
|
|
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
|||
|
|
"""计算亏损状态波动率放大"""
|
|||
|
|
# 计算n日波动率
|
|||
|
|
vol_n = pl.col('pct_chg').rolling_std(window=self.n)
|
|||
|
|
|
|||
|
|
# 计算亏损程度
|
|||
|
|
loss_degree = pl.max_horizontal(0, pl.col('weight_avg') - pl.col('close')) / pl.col('close')
|
|||
|
|
|
|||
|
|
# 计算放大因子
|
|||
|
|
vol_amp = vol_n * loss_degree
|
|||
|
|
|
|||
|
|
return stock_df.with_columns(vol_amp.alias(f'vol_amp_loss_{self.n}'))
|
|||
|
|
|
|||
|
|
|
|||
|
|
class HighVolDropWhenProfitableOperator(StockWiseOperator):
|
|||
|
|
"""高成交量下跌当获利状态算子"""
|
|||
|
|
|
|||
|
|
def __init__(self, n: int = 20, m: int = 5, profit_thresh: float = 0.1,
|
|||
|
|
drop_thresh: float = -0.03, vol_multiple: float = 2.0):
|
|||
|
|
config = OperatorConfig(
|
|||
|
|
name=f"vol_drop_profit_cnt_{m}",
|
|||
|
|
description=f"{m}日高成交量下跌当获利状态计数",
|
|||
|
|
required_columns=['close', 'pct_chg', 'vol', 'weight_avg'],
|
|||
|
|
output_columns=[f'vol_drop_profit_cnt_{m}'],
|
|||
|
|
parameters={'n': n, 'm': m, 'profit_thresh': profit_thresh,
|
|||
|
|
'drop_thresh': drop_thresh, 'vol_multiple': vol_multiple}
|
|||
|
|
)
|
|||
|
|
super().__init__(config)
|
|||
|
|
self.n = n
|
|||
|
|
self.m = m
|
|||
|
|
self.profit_thresh = profit_thresh
|
|||
|
|
self.drop_thresh = drop_thresh
|
|||
|
|
self.vol_multiple = vol_multiple
|
|||
|
|
|
|||
|
|
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
|||
|
|
"""计算高成交量下跌当获利状态计数"""
|
|||
|
|
# 判断是否获利
|
|||
|
|
is_profitable = pl.col('close') > pl.col('weight_avg') * (1 + self.profit_thresh)
|
|||
|
|
|
|||
|
|
# 判断是否下跌
|
|||
|
|
is_dropping = pl.col('pct_chg') < self.drop_thresh
|
|||
|
|
|
|||
|
|
# 计算滚动均值和标准差
|
|||
|
|
rolling_mean_vol = pl.col('vol').rolling_mean(window=self.n)
|
|||
|
|
rolling_std_vol = pl.col('vol').rolling_std(window=self.n).fill_null(0)
|
|||
|
|
|
|||
|
|
# 判断是否高成交量
|
|||
|
|
is_high_vol = pl.col('vol') > (rolling_mean_vol + self.vol_multiple * rolling_std_vol)
|
|||
|
|
|
|||
|
|
# 计算事件
|
|||
|
|
event = is_profitable & is_dropping & is_high_vol
|
|||
|
|
|
|||
|
|
# 计算m日累计
|
|||
|
|
event_cnt = event.cast(int).rolling_sum(window=self.m)
|
|||
|
|
|
|||
|
|
return stock_df.with_columns(event_cnt.alias(f'vol_drop_profit_cnt_{self.m}'))
|
|||
|
|
|
|||
|
|
|
|||
|
|
class LargeFlowVolatilityInteractionOperator(StockWiseOperator):
|
|||
|
|
"""大单资金流驱动波动率交互算子"""
|
|||
|
|
|
|||
|
|
def __init__(self, n: int = 20):
|
|||
|
|
config = OperatorConfig(
|
|||
|
|
name=f"lg_flow_vol_interact_{n}",
|
|||
|
|
description=f"{n}日大单资金流驱动波动率交互",
|
|||
|
|
required_columns=['pct_chg', 'buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol',
|
|||
|
|
'sell_elg_vol', 'vol', 'close'],
|
|||
|
|
output_columns=[f'lg_flow_vol_interact_{n}'],
|
|||
|
|
parameters={'n': n}
|
|||
|
|
)
|
|||
|
|
super().__init__(config)
|
|||
|
|
self.n = n
|
|||
|
|
|
|||
|
|
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
|||
|
|
"""计算大单资金流驱动波动率交互"""
|
|||
|
|
epsilon = 1e-8
|
|||
|
|
|
|||
|
|
# 计算n日波动率
|
|||
|
|
vol_n = pl.col('pct_chg').rolling_std(window=self.n)
|
|||
|
|
|
|||
|
|
# 计算大单净额
|
|||
|
|
net_lg_flow_val = (
|
|||
|
|
(pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
|
|||
|
|
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')) * pl.col('close')
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 计算总成交额
|
|||
|
|
total_val = pl.col('vol') * pl.col('close')
|
|||
|
|
|
|||
|
|
# 计算大单净流入比率绝对值
|
|||
|
|
abs_net_lg_flow_ratio = net_lg_flow_val.abs() / (total_val + epsilon)
|
|||
|
|
|
|||
|
|
# 计算n日均值
|
|||
|
|
abs_ratio_n = abs_net_lg_flow_ratio.rolling_mean(window=self.n)
|
|||
|
|
|
|||
|
|
# 计算交互项
|
|||
|
|
interaction = vol_n * abs_ratio_n
|
|||
|
|
|
|||
|
|
return stock_df.with_columns(interaction.alias(f'lg_flow_vol_interact_{self.n}'))
|
|||
|
|
|
|||
|
|
|
|||
|
|
class VolatilityAdjustedROCPOperator(StockWiseOperator):
|
|||
|
|
"""波动率调整收益率算子"""
|
|||
|
|
|
|||
|
|
def __init__(self, n: int = 20):
|
|||
|
|
config = OperatorConfig(
|
|||
|
|
name=f"vol_adj_roc_{n}",
|
|||
|
|
description=f"{n}日波动率调整收益率",
|
|||
|
|
required_columns=['close', 'pct_chg'],
|
|||
|
|
output_columns=[f'vol_adj_roc_{n}'],
|
|||
|
|
parameters={'n': n}
|
|||
|
|
)
|
|||
|
|
super().__init__(config)
|
|||
|
|
self.n = n
|
|||
|
|
|
|||
|
|
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
|||
|
|
"""计算波动率调整收益率"""
|
|||
|
|
# 计算n日收益率
|
|||
|
|
roc_n = pl.col('close').pct_change(self.n)
|
|||
|
|
|
|||
|
|
# 计算n日波动率
|
|||
|
|
vol_n = pl.col('pct_chg').rolling_std(window=self.n).fill_null(0)
|
|||
|
|
|
|||
|
|
# 计算波动率调整收益率
|
|||
|
|
vol_adj_roc = roc_n / (vol_n + 1e-10) # 避免除零
|
|||
|
|
|
|||
|
|
return stock_df.with_columns(vol_adj_roc.alias(f'vol_adj_roc_{self.n}'))
|
|||
|
|
|
|||
|
|
|
|||
|
|
class StandardDeviation5Operator(StockWiseOperator):
|
|||
|
|
"""5日收益率标准差算子"""
|
|||
|
|
|
|||
|
|
def __init__(self):
|
|||
|
|
config = OperatorConfig(
|
|||
|
|
name="std_return_5",
|
|||
|
|
description="5日收益率标准差",
|
|||
|
|
required_columns=['close'],
|
|||
|
|
output_columns=['std_return_5'],
|
|||
|
|
parameters={}
|
|||
|
|
)
|
|||
|
|
super().__init__(config)
|
|||
|
|
|
|||
|
|
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
|||
|
|
"""计算5日收益率标准差"""
|
|||
|
|
# 计算收益率
|
|||
|
|
returns = pl.col('close').pct_change()
|
|||
|
|
|
|||
|
|
# 计算5日标准差
|
|||
|
|
std_5 = returns.rolling_std(window=5)
|
|||
|
|
|
|||
|
|
return stock_df.with_columns(std_5.alias('std_return_5'))
|
|||
|
|
|
|||
|
|
|
|||
|
|
class StandardDeviation90Operator(StockWiseOperator):
|
|||
|
|
"""90日收益率标准差算子"""
|
|||
|
|
|
|||
|
|
def __init__(self):
|
|||
|
|
config = OperatorConfig(
|
|||
|
|
name="std_return_90",
|
|||
|
|
description="90日收益率标准差",
|
|||
|
|
required_columns=['close'],
|
|||
|
|
output_columns=['std_return_90'],
|
|||
|
|
parameters={}
|
|||
|
|
)
|
|||
|
|
super().__init__(config)
|
|||
|
|
|
|||
|
|
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
|||
|
|
"""计算90日收益率标准差"""
|
|||
|
|
# 计算收益率
|
|||
|
|
returns = pl.col('close').pct_change()
|
|||
|
|
|
|||
|
|
# 计算90日标准差
|
|||
|
|
std_90 = returns.rolling_std(window=90)
|
|||
|
|
|
|||
|
|
return stock_df.with_columns(std_90.alias('std_return_90'))
|
|||
|
|
|
|||
|
|
|
|||
|
|
class StandardDeviation90ShiftedOperator(StockWiseOperator):
|
|||
|
|
"""90日收益率标准差(移位)算子"""
|
|||
|
|
|
|||
|
|
def __init__(self):
|
|||
|
|
config = OperatorConfig(
|
|||
|
|
name="std_return_90_2",
|
|||
|
|
description="90日收益率标准差(移位10日)",
|
|||
|
|
required_columns=['close'],
|
|||
|
|
output_columns=['std_return_90_2'],
|
|||
|
|
parameters={}
|
|||
|
|
)
|
|||
|
|
super().__init__(config)
|
|||
|
|
|
|||
|
|
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
|||
|
|
"""计算90日收益率标准差(移位10日)"""
|
|||
|
|
# 计算收益率(移位10日)
|
|||
|
|
returns = pl.col('close').shift(10).pct_change()
|
|||
|
|
|
|||
|
|
# 计算90日标准差
|
|||
|
|
std_90_2 = returns.rolling_std(window=90)
|
|||
|
|
|
|||
|
|
return stock_df.with_columns(std_90_2.alias('std_return_90_2'))
|
|||
|
|
|
|||
|
|
|
|||
|
|
# 波动率因子集合
|
|||
|
|
VOLATILITY_OPERATORS = [
|
|||
|
|
UpsideVolatilityOperator(),
|
|||
|
|
DownsideVolatilityOperator(),
|
|||
|
|
VolatilityRatioOperator(),
|
|||
|
|
ReturnSkewnessOperator(),
|
|||
|
|
ReturnKurtosisOperator(),
|
|||
|
|
VolatilityAmplificationOperator(),
|
|||
|
|
HighVolDropWhenProfitableOperator(),
|
|||
|
|
LargeFlowVolatilityInteractionOperator(),
|
|||
|
|
VolatilityAdjustedROCPOperator(),
|
|||
|
|
StandardDeviation5Operator(),
|
|||
|
|
StandardDeviation90Operator(),
|
|||
|
|
StandardDeviation90ShiftedOperator(),
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
|
|||
|
|
def apply_volatility_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
|||
|
|
"""
|
|||
|
|
应用所有波动率因子
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
df: 输入的Polars DataFrame
|
|||
|
|
operators: 要应用的算子列表,如果为None则使用默认列表
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
添加了波动率因子的DataFrame
|
|||
|
|
"""
|
|||
|
|
if operators is None:
|
|||
|
|
operators = VOLATILITY_OPERATORS
|
|||
|
|
|
|||
|
|
result_df = df
|
|||
|
|
for operator in operators:
|
|||
|
|
result_df = operator(result_df)
|
|||
|
|
|
|||
|
|
return result_df
|