420 lines
15 KiB
Python
420 lines
15 KiB
Python
"""
|
||
波动率因子 - 使用Polars实现
|
||
包含上行波动率、下行波动率、波动率比率等相关因子计算
|
||
"""
|
||
|
||
import polars as pl
|
||
import numpy as np
|
||
from typing import Dict, List, Optional, Any
|
||
from operator_framework import StockWiseOperator, OperatorConfig
|
||
|
||
|
||
class UpsideVolatilityOperator(StockWiseOperator):
|
||
"""上行波动率算子"""
|
||
|
||
def __init__(self, window: int = 20):
|
||
config = OperatorConfig(
|
||
name=f"upside_volatility_{window}",
|
||
description=f"{window}日上行波动率",
|
||
required_columns=['pct_chg'],
|
||
output_columns=[f'upside_volatility_{window}'],
|
||
parameters={'window': window}
|
||
)
|
||
super().__init__(config)
|
||
self.window = window
|
||
|
||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||
"""计算上行波动率"""
|
||
# 分离正收益率
|
||
pos_returns = pl.when(pl.col('pct_chg') > 0).then(pl.col('pct_chg')).otherwise(0)
|
||
|
||
# 计算正收益率的平方
|
||
pos_returns_sq = pos_returns.pow(2)
|
||
|
||
# 计算滚动和
|
||
rolling_pos_count = (pl.col('pct_chg') > 0).rolling_sum(window=self.window)
|
||
rolling_pos_sum = pos_returns.rolling_sum(window=self.window)
|
||
rolling_pos_sum_sq = pos_returns_sq.rolling_sum(window=self.window)
|
||
|
||
# 计算方差和标准差
|
||
pos_mean_sq = rolling_pos_sum_sq / rolling_pos_count
|
||
pos_mean = rolling_pos_sum / rolling_pos_count
|
||
pos_var = pos_mean_sq - pos_mean.pow(2)
|
||
|
||
# 处理样本数不足的情况
|
||
pos_var = pl.when(rolling_pos_count >= 2).then(pos_var).otherwise(None)
|
||
pos_var = pos_var.clip(lower=0)
|
||
|
||
upside_vol = pos_var.sqrt()
|
||
|
||
return stock_df.with_columns(upside_vol.alias(f'upside_volatility_{self.window}'))
|
||
|
||
|
||
class DownsideVolatilityOperator(StockWiseOperator):
|
||
"""下行波动率算子"""
|
||
|
||
def __init__(self, window: int = 20):
|
||
config = OperatorConfig(
|
||
name=f"downside_volatility_{window}",
|
||
description=f"{window}日下行波动率",
|
||
required_columns=['pct_chg'],
|
||
output_columns=[f'downside_volatility_{window}'],
|
||
parameters={'window': window}
|
||
)
|
||
super().__init__(config)
|
||
self.window = window
|
||
|
||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||
"""计算下行波动率"""
|
||
# 分离负收益率
|
||
neg_returns = pl.when(pl.col('pct_chg') < 0).then(pl.col('pct_chg')).otherwise(0)
|
||
|
||
# 计算负收益率的平方
|
||
neg_returns_sq = neg_returns.pow(2)
|
||
|
||
# 计算滚动和
|
||
rolling_neg_count = (pl.col('pct_chg') < 0).rolling_sum(window=self.window)
|
||
rolling_neg_sum = neg_returns.rolling_sum(window=self.window)
|
||
rolling_neg_sum_sq = neg_returns_sq.rolling_sum(window=self.window)
|
||
|
||
# 计算方差和标准差
|
||
neg_mean_sq = rolling_neg_sum_sq / rolling_neg_count
|
||
neg_mean = rolling_neg_sum / rolling_neg_count
|
||
neg_var = neg_mean_sq - neg_mean.pow(2)
|
||
|
||
# 处理样本数不足的情况
|
||
neg_var = pl.when(rolling_neg_count >= 2).then(neg_var).otherwise(None)
|
||
neg_var = neg_var.clip(lower=0)
|
||
|
||
downside_vol = neg_var.sqrt()
|
||
|
||
return stock_df.with_columns(downside_vol.alias(f'downside_volatility_{self.window}'))
|
||
|
||
|
||
class VolatilityRatioOperator(StockWiseOperator):
|
||
"""波动率比率算子"""
|
||
|
||
def __init__(self, window: int = 20):
|
||
config = OperatorConfig(
|
||
name=f"volatility_ratio_{window}",
|
||
description=f"{window}日波动率比率",
|
||
required_columns=['pct_chg'],
|
||
output_columns=[f'volatility_ratio_{window}'],
|
||
parameters={'window': window}
|
||
)
|
||
super().__init__(config)
|
||
self.window = window
|
||
|
||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||
"""计算波动率比率"""
|
||
# 计算上行和下行波动率
|
||
pos_returns = pl.when(pl.col('pct_chg') > 0).then(pl.col('pct_chg')).otherwise(0)
|
||
neg_returns = pl.when(pl.col('pct_chg') < 0).then(pl.col('pct_chg')).otherwise(0)
|
||
|
||
# 计算滚动标准差
|
||
upside_vol = pos_returns.rolling_std(window=self.window)
|
||
downside_vol = neg_returns.rolling_std(window=self.window)
|
||
|
||
# 计算比率
|
||
vol_ratio = upside_vol / downside_vol
|
||
|
||
# 处理无穷大和NaN值
|
||
vol_ratio = vol_ratio.replace([np.inf, -np.inf], None).fill_null(0)
|
||
|
||
return stock_df.with_columns(vol_ratio.alias(f'volatility_ratio_{self.window}'))
|
||
|
||
|
||
class ReturnSkewnessOperator(StockWiseOperator):
|
||
"""收益率偏度算子"""
|
||
|
||
def __init__(self, window: int = 5):
|
||
config = OperatorConfig(
|
||
name=f"return_skewness_{window}",
|
||
description=f"{window}日收益率偏度",
|
||
required_columns=['pct_chg'],
|
||
output_columns=[f'return_skewness_{window}'],
|
||
parameters={'window': window}
|
||
)
|
||
super().__init__(config)
|
||
self.window = window
|
||
|
||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||
"""计算收益率偏度"""
|
||
skewness = pl.col('pct_chg').rolling_skew(window=self.window)
|
||
|
||
return stock_df.with_columns(skewness.alias(f'return_skewness_{self.window}'))
|
||
|
||
|
||
class ReturnKurtosisOperator(StockWiseOperator):
|
||
"""收益率峰度算子"""
|
||
|
||
def __init__(self, window: int = 5):
|
||
config = OperatorConfig(
|
||
name=f"return_kurtosis_{window}",
|
||
description=f"{window}日收益率峰度",
|
||
required_columns=['pct_chg'],
|
||
output_columns=[f'return_kurtosis_{window}'],
|
||
parameters={'window': window}
|
||
)
|
||
super().__init__(config)
|
||
self.window = window
|
||
|
||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||
"""计算收益率峰度"""
|
||
kurtosis = pl.col('pct_chg').rolling_kurt(window=self.window)
|
||
|
||
return stock_df.with_columns(kurtosis.alias(f'return_kurtosis_{self.window}'))
|
||
|
||
|
||
class VolatilityAmplificationOperator(StockWiseOperator):
|
||
"""亏损状态波动率放大算子"""
|
||
|
||
def __init__(self, n: int = 20):
|
||
config = OperatorConfig(
|
||
name=f"vol_amp_loss_{n}",
|
||
description=f"{n}日亏损状态波动率放大",
|
||
required_columns=['pct_chg', 'weight_avg', 'close'],
|
||
output_columns=[f'vol_amp_loss_{n}'],
|
||
parameters={'n': n}
|
||
)
|
||
super().__init__(config)
|
||
self.n = n
|
||
|
||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||
"""计算亏损状态波动率放大"""
|
||
# 计算n日波动率
|
||
vol_n = pl.col('pct_chg').rolling_std(window=self.n)
|
||
|
||
# 计算亏损程度
|
||
loss_degree = pl.max_horizontal(0, pl.col('weight_avg') - pl.col('close')) / pl.col('close')
|
||
|
||
# 计算放大因子
|
||
vol_amp = vol_n * loss_degree
|
||
|
||
return stock_df.with_columns(vol_amp.alias(f'vol_amp_loss_{self.n}'))
|
||
|
||
|
||
class HighVolDropWhenProfitableOperator(StockWiseOperator):
|
||
"""高成交量下跌当获利状态算子"""
|
||
|
||
def __init__(self, n: int = 20, m: int = 5, profit_thresh: float = 0.1,
|
||
drop_thresh: float = -0.03, vol_multiple: float = 2.0):
|
||
config = OperatorConfig(
|
||
name=f"vol_drop_profit_cnt_{m}",
|
||
description=f"{m}日高成交量下跌当获利状态计数",
|
||
required_columns=['close', 'pct_chg', 'vol', 'weight_avg'],
|
||
output_columns=[f'vol_drop_profit_cnt_{m}'],
|
||
parameters={'n': n, 'm': m, 'profit_thresh': profit_thresh,
|
||
'drop_thresh': drop_thresh, 'vol_multiple': vol_multiple}
|
||
)
|
||
super().__init__(config)
|
||
self.n = n
|
||
self.m = m
|
||
self.profit_thresh = profit_thresh
|
||
self.drop_thresh = drop_thresh
|
||
self.vol_multiple = vol_multiple
|
||
|
||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||
"""计算高成交量下跌当获利状态计数"""
|
||
# 判断是否获利
|
||
is_profitable = pl.col('close') > pl.col('weight_avg') * (1 + self.profit_thresh)
|
||
|
||
# 判断是否下跌
|
||
is_dropping = pl.col('pct_chg') < self.drop_thresh
|
||
|
||
# 计算滚动均值和标准差
|
||
rolling_mean_vol = pl.col('vol').rolling_mean(window=self.n)
|
||
rolling_std_vol = pl.col('vol').rolling_std(window=self.n).fill_null(0)
|
||
|
||
# 判断是否高成交量
|
||
is_high_vol = pl.col('vol') > (rolling_mean_vol + self.vol_multiple * rolling_std_vol)
|
||
|
||
# 计算事件
|
||
event = is_profitable & is_dropping & is_high_vol
|
||
|
||
# 计算m日累计
|
||
event_cnt = event.cast(int).rolling_sum(window=self.m)
|
||
|
||
return stock_df.with_columns(event_cnt.alias(f'vol_drop_profit_cnt_{self.m}'))
|
||
|
||
|
||
class LargeFlowVolatilityInteractionOperator(StockWiseOperator):
|
||
"""大单资金流驱动波动率交互算子"""
|
||
|
||
def __init__(self, n: int = 20):
|
||
config = OperatorConfig(
|
||
name=f"lg_flow_vol_interact_{n}",
|
||
description=f"{n}日大单资金流驱动波动率交互",
|
||
required_columns=['pct_chg', 'buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol',
|
||
'sell_elg_vol', 'vol', 'close'],
|
||
output_columns=[f'lg_flow_vol_interact_{n}'],
|
||
parameters={'n': n}
|
||
)
|
||
super().__init__(config)
|
||
self.n = n
|
||
|
||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||
"""计算大单资金流驱动波动率交互"""
|
||
epsilon = 1e-8
|
||
|
||
# 计算n日波动率
|
||
vol_n = pl.col('pct_chg').rolling_std(window=self.n)
|
||
|
||
# 计算大单净额
|
||
net_lg_flow_val = (
|
||
(pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
|
||
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')) * pl.col('close')
|
||
)
|
||
|
||
# 计算总成交额
|
||
total_val = pl.col('vol') * pl.col('close')
|
||
|
||
# 计算大单净流入比率绝对值
|
||
abs_net_lg_flow_ratio = net_lg_flow_val.abs() / (total_val + epsilon)
|
||
|
||
# 计算n日均值
|
||
abs_ratio_n = abs_net_lg_flow_ratio.rolling_mean(window=self.n)
|
||
|
||
# 计算交互项
|
||
interaction = vol_n * abs_ratio_n
|
||
|
||
return stock_df.with_columns(interaction.alias(f'lg_flow_vol_interact_{self.n}'))
|
||
|
||
|
||
class VolatilityAdjustedROCPOperator(StockWiseOperator):
|
||
"""波动率调整收益率算子"""
|
||
|
||
def __init__(self, n: int = 20):
|
||
config = OperatorConfig(
|
||
name=f"vol_adj_roc_{n}",
|
||
description=f"{n}日波动率调整收益率",
|
||
required_columns=['close', 'pct_chg'],
|
||
output_columns=[f'vol_adj_roc_{n}'],
|
||
parameters={'n': n}
|
||
)
|
||
super().__init__(config)
|
||
self.n = n
|
||
|
||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||
"""计算波动率调整收益率"""
|
||
# 计算n日收益率
|
||
roc_n = pl.col('close').pct_change(self.n)
|
||
|
||
# 计算n日波动率
|
||
vol_n = pl.col('pct_chg').rolling_std(window=self.n).fill_null(0)
|
||
|
||
# 计算波动率调整收益率
|
||
vol_adj_roc = roc_n / (vol_n + 1e-10) # 避免除零
|
||
|
||
return stock_df.with_columns(vol_adj_roc.alias(f'vol_adj_roc_{self.n}'))
|
||
|
||
|
||
class StandardDeviation5Operator(StockWiseOperator):
|
||
"""5日收益率标准差算子"""
|
||
|
||
def __init__(self):
|
||
config = OperatorConfig(
|
||
name="std_return_5",
|
||
description="5日收益率标准差",
|
||
required_columns=['close'],
|
||
output_columns=['std_return_5'],
|
||
parameters={}
|
||
)
|
||
super().__init__(config)
|
||
|
||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||
"""计算5日收益率标准差"""
|
||
# 计算收益率
|
||
returns = pl.col('close').pct_change()
|
||
|
||
# 计算5日标准差
|
||
std_5 = returns.rolling_std(window=5)
|
||
|
||
return stock_df.with_columns(std_5.alias('std_return_5'))
|
||
|
||
|
||
class StandardDeviation90Operator(StockWiseOperator):
|
||
"""90日收益率标准差算子"""
|
||
|
||
def __init__(self):
|
||
config = OperatorConfig(
|
||
name="std_return_90",
|
||
description="90日收益率标准差",
|
||
required_columns=['close'],
|
||
output_columns=['std_return_90'],
|
||
parameters={}
|
||
)
|
||
super().__init__(config)
|
||
|
||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||
"""计算90日收益率标准差"""
|
||
# 计算收益率
|
||
returns = pl.col('close').pct_change()
|
||
|
||
# 计算90日标准差
|
||
std_90 = returns.rolling_std(window=90)
|
||
|
||
return stock_df.with_columns(std_90.alias('std_return_90'))
|
||
|
||
|
||
class StandardDeviation90ShiftedOperator(StockWiseOperator):
|
||
"""90日收益率标准差(移位)算子"""
|
||
|
||
def __init__(self):
|
||
config = OperatorConfig(
|
||
name="std_return_90_2",
|
||
description="90日收益率标准差(移位10日)",
|
||
required_columns=['close'],
|
||
output_columns=['std_return_90_2'],
|
||
parameters={}
|
||
)
|
||
super().__init__(config)
|
||
|
||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||
"""计算90日收益率标准差(移位10日)"""
|
||
# 计算收益率(移位10日)
|
||
returns = pl.col('close').shift(10).pct_change()
|
||
|
||
# 计算90日标准差
|
||
std_90_2 = returns.rolling_std(window=90)
|
||
|
||
return stock_df.with_columns(std_90_2.alias('std_return_90_2'))
|
||
|
||
|
||
# 波动率因子集合
|
||
VOLATILITY_OPERATORS = [
|
||
UpsideVolatilityOperator(),
|
||
DownsideVolatilityOperator(),
|
||
VolatilityRatioOperator(),
|
||
ReturnSkewnessOperator(),
|
||
ReturnKurtosisOperator(),
|
||
VolatilityAmplificationOperator(),
|
||
HighVolDropWhenProfitableOperator(),
|
||
LargeFlowVolatilityInteractionOperator(),
|
||
VolatilityAdjustedROCPOperator(),
|
||
StandardDeviation5Operator(),
|
||
StandardDeviation90Operator(),
|
||
StandardDeviation90ShiftedOperator(),
|
||
]
|
||
|
||
|
||
def apply_volatility_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||
"""
|
||
应用所有波动率因子
|
||
|
||
Args:
|
||
df: 输入的Polars DataFrame
|
||
operators: 要应用的算子列表,如果为None则使用默认列表
|
||
|
||
Returns:
|
||
添加了波动率因子的DataFrame
|
||
"""
|
||
if operators is None:
|
||
operators = VOLATILITY_OPERATORS
|
||
|
||
result_df = df
|
||
for operator in operators:
|
||
result_df = operator(result_df)
|
||
|
||
return result_df
|