factor优化,改为polars
This commit is contained in:
419
main/factor/polars_volatility_factors.py
Normal file
419
main/factor/polars_volatility_factors.py
Normal file
@@ -0,0 +1,419 @@
|
||||
"""
|
||||
波动率因子 - 使用Polars实现
|
||||
包含上行波动率、下行波动率、波动率比率等相关因子计算
|
||||
"""
|
||||
|
||||
import polars as pl
|
||||
import numpy as np
|
||||
from typing import Dict, List, Optional, Any
|
||||
from operator_framework import StockWiseOperator, OperatorConfig
|
||||
|
||||
|
||||
class UpsideVolatilityOperator(StockWiseOperator):
|
||||
"""上行波动率算子"""
|
||||
|
||||
def __init__(self, window: int = 20):
|
||||
config = OperatorConfig(
|
||||
name=f"upside_volatility_{window}",
|
||||
description=f"{window}日上行波动率",
|
||||
required_columns=['pct_chg'],
|
||||
output_columns=[f'upside_volatility_{window}'],
|
||||
parameters={'window': window}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.window = window
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算上行波动率"""
|
||||
# 分离正收益率
|
||||
pos_returns = pl.when(pl.col('pct_chg') > 0).then(pl.col('pct_chg')).otherwise(0)
|
||||
|
||||
# 计算正收益率的平方
|
||||
pos_returns_sq = pos_returns.pow(2)
|
||||
|
||||
# 计算滚动和
|
||||
rolling_pos_count = (pl.col('pct_chg') > 0).rolling_sum(window=self.window)
|
||||
rolling_pos_sum = pos_returns.rolling_sum(window=self.window)
|
||||
rolling_pos_sum_sq = pos_returns_sq.rolling_sum(window=self.window)
|
||||
|
||||
# 计算方差和标准差
|
||||
pos_mean_sq = rolling_pos_sum_sq / rolling_pos_count
|
||||
pos_mean = rolling_pos_sum / rolling_pos_count
|
||||
pos_var = pos_mean_sq - pos_mean.pow(2)
|
||||
|
||||
# 处理样本数不足的情况
|
||||
pos_var = pl.when(rolling_pos_count >= 2).then(pos_var).otherwise(None)
|
||||
pos_var = pos_var.clip(lower=0)
|
||||
|
||||
upside_vol = pos_var.sqrt()
|
||||
|
||||
return stock_df.with_columns(upside_vol.alias(f'upside_volatility_{self.window}'))
|
||||
|
||||
|
||||
class DownsideVolatilityOperator(StockWiseOperator):
|
||||
"""下行波动率算子"""
|
||||
|
||||
def __init__(self, window: int = 20):
|
||||
config = OperatorConfig(
|
||||
name=f"downside_volatility_{window}",
|
||||
description=f"{window}日下行波动率",
|
||||
required_columns=['pct_chg'],
|
||||
output_columns=[f'downside_volatility_{window}'],
|
||||
parameters={'window': window}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.window = window
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算下行波动率"""
|
||||
# 分离负收益率
|
||||
neg_returns = pl.when(pl.col('pct_chg') < 0).then(pl.col('pct_chg')).otherwise(0)
|
||||
|
||||
# 计算负收益率的平方
|
||||
neg_returns_sq = neg_returns.pow(2)
|
||||
|
||||
# 计算滚动和
|
||||
rolling_neg_count = (pl.col('pct_chg') < 0).rolling_sum(window=self.window)
|
||||
rolling_neg_sum = neg_returns.rolling_sum(window=self.window)
|
||||
rolling_neg_sum_sq = neg_returns_sq.rolling_sum(window=self.window)
|
||||
|
||||
# 计算方差和标准差
|
||||
neg_mean_sq = rolling_neg_sum_sq / rolling_neg_count
|
||||
neg_mean = rolling_neg_sum / rolling_neg_count
|
||||
neg_var = neg_mean_sq - neg_mean.pow(2)
|
||||
|
||||
# 处理样本数不足的情况
|
||||
neg_var = pl.when(rolling_neg_count >= 2).then(neg_var).otherwise(None)
|
||||
neg_var = neg_var.clip(lower=0)
|
||||
|
||||
downside_vol = neg_var.sqrt()
|
||||
|
||||
return stock_df.with_columns(downside_vol.alias(f'downside_volatility_{self.window}'))
|
||||
|
||||
|
||||
class VolatilityRatioOperator(StockWiseOperator):
|
||||
"""波动率比率算子"""
|
||||
|
||||
def __init__(self, window: int = 20):
|
||||
config = OperatorConfig(
|
||||
name=f"volatility_ratio_{window}",
|
||||
description=f"{window}日波动率比率",
|
||||
required_columns=['pct_chg'],
|
||||
output_columns=[f'volatility_ratio_{window}'],
|
||||
parameters={'window': window}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.window = window
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算波动率比率"""
|
||||
# 计算上行和下行波动率
|
||||
pos_returns = pl.when(pl.col('pct_chg') > 0).then(pl.col('pct_chg')).otherwise(0)
|
||||
neg_returns = pl.when(pl.col('pct_chg') < 0).then(pl.col('pct_chg')).otherwise(0)
|
||||
|
||||
# 计算滚动标准差
|
||||
upside_vol = pos_returns.rolling_std(window=self.window)
|
||||
downside_vol = neg_returns.rolling_std(window=self.window)
|
||||
|
||||
# 计算比率
|
||||
vol_ratio = upside_vol / downside_vol
|
||||
|
||||
# 处理无穷大和NaN值
|
||||
vol_ratio = vol_ratio.replace([np.inf, -np.inf], None).fill_null(0)
|
||||
|
||||
return stock_df.with_columns(vol_ratio.alias(f'volatility_ratio_{self.window}'))
|
||||
|
||||
|
||||
class ReturnSkewnessOperator(StockWiseOperator):
|
||||
"""收益率偏度算子"""
|
||||
|
||||
def __init__(self, window: int = 5):
|
||||
config = OperatorConfig(
|
||||
name=f"return_skewness_{window}",
|
||||
description=f"{window}日收益率偏度",
|
||||
required_columns=['pct_chg'],
|
||||
output_columns=[f'return_skewness_{window}'],
|
||||
parameters={'window': window}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.window = window
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算收益率偏度"""
|
||||
skewness = pl.col('pct_chg').rolling_skew(window=self.window)
|
||||
|
||||
return stock_df.with_columns(skewness.alias(f'return_skewness_{self.window}'))
|
||||
|
||||
|
||||
class ReturnKurtosisOperator(StockWiseOperator):
|
||||
"""收益率峰度算子"""
|
||||
|
||||
def __init__(self, window: int = 5):
|
||||
config = OperatorConfig(
|
||||
name=f"return_kurtosis_{window}",
|
||||
description=f"{window}日收益率峰度",
|
||||
required_columns=['pct_chg'],
|
||||
output_columns=[f'return_kurtosis_{window}'],
|
||||
parameters={'window': window}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.window = window
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算收益率峰度"""
|
||||
kurtosis = pl.col('pct_chg').rolling_kurt(window=self.window)
|
||||
|
||||
return stock_df.with_columns(kurtosis.alias(f'return_kurtosis_{self.window}'))
|
||||
|
||||
|
||||
class VolatilityAmplificationOperator(StockWiseOperator):
|
||||
"""亏损状态波动率放大算子"""
|
||||
|
||||
def __init__(self, n: int = 20):
|
||||
config = OperatorConfig(
|
||||
name=f"vol_amp_loss_{n}",
|
||||
description=f"{n}日亏损状态波动率放大",
|
||||
required_columns=['pct_chg', 'weight_avg', 'close'],
|
||||
output_columns=[f'vol_amp_loss_{n}'],
|
||||
parameters={'n': n}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.n = n
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算亏损状态波动率放大"""
|
||||
# 计算n日波动率
|
||||
vol_n = pl.col('pct_chg').rolling_std(window=self.n)
|
||||
|
||||
# 计算亏损程度
|
||||
loss_degree = pl.max_horizontal(0, pl.col('weight_avg') - pl.col('close')) / pl.col('close')
|
||||
|
||||
# 计算放大因子
|
||||
vol_amp = vol_n * loss_degree
|
||||
|
||||
return stock_df.with_columns(vol_amp.alias(f'vol_amp_loss_{self.n}'))
|
||||
|
||||
|
||||
class HighVolDropWhenProfitableOperator(StockWiseOperator):
|
||||
"""高成交量下跌当获利状态算子"""
|
||||
|
||||
def __init__(self, n: int = 20, m: int = 5, profit_thresh: float = 0.1,
|
||||
drop_thresh: float = -0.03, vol_multiple: float = 2.0):
|
||||
config = OperatorConfig(
|
||||
name=f"vol_drop_profit_cnt_{m}",
|
||||
description=f"{m}日高成交量下跌当获利状态计数",
|
||||
required_columns=['close', 'pct_chg', 'vol', 'weight_avg'],
|
||||
output_columns=[f'vol_drop_profit_cnt_{m}'],
|
||||
parameters={'n': n, 'm': m, 'profit_thresh': profit_thresh,
|
||||
'drop_thresh': drop_thresh, 'vol_multiple': vol_multiple}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.n = n
|
||||
self.m = m
|
||||
self.profit_thresh = profit_thresh
|
||||
self.drop_thresh = drop_thresh
|
||||
self.vol_multiple = vol_multiple
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算高成交量下跌当获利状态计数"""
|
||||
# 判断是否获利
|
||||
is_profitable = pl.col('close') > pl.col('weight_avg') * (1 + self.profit_thresh)
|
||||
|
||||
# 判断是否下跌
|
||||
is_dropping = pl.col('pct_chg') < self.drop_thresh
|
||||
|
||||
# 计算滚动均值和标准差
|
||||
rolling_mean_vol = pl.col('vol').rolling_mean(window=self.n)
|
||||
rolling_std_vol = pl.col('vol').rolling_std(window=self.n).fill_null(0)
|
||||
|
||||
# 判断是否高成交量
|
||||
is_high_vol = pl.col('vol') > (rolling_mean_vol + self.vol_multiple * rolling_std_vol)
|
||||
|
||||
# 计算事件
|
||||
event = is_profitable & is_dropping & is_high_vol
|
||||
|
||||
# 计算m日累计
|
||||
event_cnt = event.cast(int).rolling_sum(window=self.m)
|
||||
|
||||
return stock_df.with_columns(event_cnt.alias(f'vol_drop_profit_cnt_{self.m}'))
|
||||
|
||||
|
||||
class LargeFlowVolatilityInteractionOperator(StockWiseOperator):
|
||||
"""大单资金流驱动波动率交互算子"""
|
||||
|
||||
def __init__(self, n: int = 20):
|
||||
config = OperatorConfig(
|
||||
name=f"lg_flow_vol_interact_{n}",
|
||||
description=f"{n}日大单资金流驱动波动率交互",
|
||||
required_columns=['pct_chg', 'buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol',
|
||||
'sell_elg_vol', 'vol', 'close'],
|
||||
output_columns=[f'lg_flow_vol_interact_{n}'],
|
||||
parameters={'n': n}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.n = n
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算大单资金流驱动波动率交互"""
|
||||
epsilon = 1e-8
|
||||
|
||||
# 计算n日波动率
|
||||
vol_n = pl.col('pct_chg').rolling_std(window=self.n)
|
||||
|
||||
# 计算大单净额
|
||||
net_lg_flow_val = (
|
||||
(pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
|
||||
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')) * pl.col('close')
|
||||
)
|
||||
|
||||
# 计算总成交额
|
||||
total_val = pl.col('vol') * pl.col('close')
|
||||
|
||||
# 计算大单净流入比率绝对值
|
||||
abs_net_lg_flow_ratio = net_lg_flow_val.abs() / (total_val + epsilon)
|
||||
|
||||
# 计算n日均值
|
||||
abs_ratio_n = abs_net_lg_flow_ratio.rolling_mean(window=self.n)
|
||||
|
||||
# 计算交互项
|
||||
interaction = vol_n * abs_ratio_n
|
||||
|
||||
return stock_df.with_columns(interaction.alias(f'lg_flow_vol_interact_{self.n}'))
|
||||
|
||||
|
||||
class VolatilityAdjustedROCPOperator(StockWiseOperator):
|
||||
"""波动率调整收益率算子"""
|
||||
|
||||
def __init__(self, n: int = 20):
|
||||
config = OperatorConfig(
|
||||
name=f"vol_adj_roc_{n}",
|
||||
description=f"{n}日波动率调整收益率",
|
||||
required_columns=['close', 'pct_chg'],
|
||||
output_columns=[f'vol_adj_roc_{n}'],
|
||||
parameters={'n': n}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.n = n
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算波动率调整收益率"""
|
||||
# 计算n日收益率
|
||||
roc_n = pl.col('close').pct_change(self.n)
|
||||
|
||||
# 计算n日波动率
|
||||
vol_n = pl.col('pct_chg').rolling_std(window=self.n).fill_null(0)
|
||||
|
||||
# 计算波动率调整收益率
|
||||
vol_adj_roc = roc_n / (vol_n + 1e-10) # 避免除零
|
||||
|
||||
return stock_df.with_columns(vol_adj_roc.alias(f'vol_adj_roc_{self.n}'))
|
||||
|
||||
|
||||
class StandardDeviation5Operator(StockWiseOperator):
|
||||
"""5日收益率标准差算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="std_return_5",
|
||||
description="5日收益率标准差",
|
||||
required_columns=['close'],
|
||||
output_columns=['std_return_5'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算5日收益率标准差"""
|
||||
# 计算收益率
|
||||
returns = pl.col('close').pct_change()
|
||||
|
||||
# 计算5日标准差
|
||||
std_5 = returns.rolling_std(window=5)
|
||||
|
||||
return stock_df.with_columns(std_5.alias('std_return_5'))
|
||||
|
||||
|
||||
class StandardDeviation90Operator(StockWiseOperator):
|
||||
"""90日收益率标准差算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="std_return_90",
|
||||
description="90日收益率标准差",
|
||||
required_columns=['close'],
|
||||
output_columns=['std_return_90'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算90日收益率标准差"""
|
||||
# 计算收益率
|
||||
returns = pl.col('close').pct_change()
|
||||
|
||||
# 计算90日标准差
|
||||
std_90 = returns.rolling_std(window=90)
|
||||
|
||||
return stock_df.with_columns(std_90.alias('std_return_90'))
|
||||
|
||||
|
||||
class StandardDeviation90ShiftedOperator(StockWiseOperator):
|
||||
"""90日收益率标准差(移位)算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="std_return_90_2",
|
||||
description="90日收益率标准差(移位10日)",
|
||||
required_columns=['close'],
|
||||
output_columns=['std_return_90_2'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算90日收益率标准差(移位10日)"""
|
||||
# 计算收益率(移位10日)
|
||||
returns = pl.col('close').shift(10).pct_change()
|
||||
|
||||
# 计算90日标准差
|
||||
std_90_2 = returns.rolling_std(window=90)
|
||||
|
||||
return stock_df.with_columns(std_90_2.alias('std_return_90_2'))
|
||||
|
||||
|
||||
# 波动率因子集合
|
||||
VOLATILITY_OPERATORS = [
|
||||
UpsideVolatilityOperator(),
|
||||
DownsideVolatilityOperator(),
|
||||
VolatilityRatioOperator(),
|
||||
ReturnSkewnessOperator(),
|
||||
ReturnKurtosisOperator(),
|
||||
VolatilityAmplificationOperator(),
|
||||
HighVolDropWhenProfitableOperator(),
|
||||
LargeFlowVolatilityInteractionOperator(),
|
||||
VolatilityAdjustedROCPOperator(),
|
||||
StandardDeviation5Operator(),
|
||||
StandardDeviation90Operator(),
|
||||
StandardDeviation90ShiftedOperator(),
|
||||
]
|
||||
|
||||
|
||||
def apply_volatility_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||||
"""
|
||||
应用所有波动率因子
|
||||
|
||||
Args:
|
||||
df: 输入的Polars DataFrame
|
||||
operators: 要应用的算子列表,如果为None则使用默认列表
|
||||
|
||||
Returns:
|
||||
添加了波动率因子的DataFrame
|
||||
"""
|
||||
if operators is None:
|
||||
operators = VOLATILITY_OPERATORS
|
||||
|
||||
result_df = df
|
||||
for operator in operators:
|
||||
result_df = operator(result_df)
|
||||
|
||||
return result_df
|
||||
Reference in New Issue
Block a user