""" 波动率因子 - 使用Polars实现 包含上行波动率、下行波动率、波动率比率等相关因子计算 """ import polars as pl import numpy as np from typing import Dict, List, Optional, Any from operator_framework import StockWiseOperator, OperatorConfig class UpsideVolatilityOperator(StockWiseOperator): """上行波动率算子""" def __init__(self, window: int = 20): config = OperatorConfig( name=f"upside_volatility_{window}", description=f"{window}日上行波动率", required_columns=['pct_chg'], output_columns=[f'upside_volatility_{window}'], parameters={'window': window} ) super().__init__(config) self.window = window def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: """计算上行波动率""" # 分离正收益率 pos_returns = pl.when(pl.col('pct_chg') > 0).then(pl.col('pct_chg')).otherwise(0) # 计算正收益率的平方 pos_returns_sq = pos_returns.pow(2) # 计算滚动和 rolling_pos_count = (pl.col('pct_chg') > 0).rolling_sum(window=self.window) rolling_pos_sum = pos_returns.rolling_sum(window=self.window) rolling_pos_sum_sq = pos_returns_sq.rolling_sum(window=self.window) # 计算方差和标准差 pos_mean_sq = rolling_pos_sum_sq / rolling_pos_count pos_mean = rolling_pos_sum / rolling_pos_count pos_var = pos_mean_sq - pos_mean.pow(2) # 处理样本数不足的情况 pos_var = pl.when(rolling_pos_count >= 2).then(pos_var).otherwise(None) pos_var = pos_var.clip(lower=0) upside_vol = pos_var.sqrt() return stock_df.with_columns(upside_vol.alias(f'upside_volatility_{self.window}')) class DownsideVolatilityOperator(StockWiseOperator): """下行波动率算子""" def __init__(self, window: int = 20): config = OperatorConfig( name=f"downside_volatility_{window}", description=f"{window}日下行波动率", required_columns=['pct_chg'], output_columns=[f'downside_volatility_{window}'], parameters={'window': window} ) super().__init__(config) self.window = window def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: """计算下行波动率""" # 分离负收益率 neg_returns = pl.when(pl.col('pct_chg') < 0).then(pl.col('pct_chg')).otherwise(0) # 计算负收益率的平方 neg_returns_sq = neg_returns.pow(2) # 计算滚动和 rolling_neg_count = (pl.col('pct_chg') < 0).rolling_sum(window=self.window) rolling_neg_sum = neg_returns.rolling_sum(window=self.window) rolling_neg_sum_sq = neg_returns_sq.rolling_sum(window=self.window) # 计算方差和标准差 neg_mean_sq = rolling_neg_sum_sq / rolling_neg_count neg_mean = rolling_neg_sum / rolling_neg_count neg_var = neg_mean_sq - neg_mean.pow(2) # 处理样本数不足的情况 neg_var = pl.when(rolling_neg_count >= 2).then(neg_var).otherwise(None) neg_var = neg_var.clip(lower=0) downside_vol = neg_var.sqrt() return stock_df.with_columns(downside_vol.alias(f'downside_volatility_{self.window}')) class VolatilityRatioOperator(StockWiseOperator): """波动率比率算子""" def __init__(self, window: int = 20): config = OperatorConfig( name=f"volatility_ratio_{window}", description=f"{window}日波动率比率", required_columns=['pct_chg'], output_columns=[f'volatility_ratio_{window}'], parameters={'window': window} ) super().__init__(config) self.window = window def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: """计算波动率比率""" # 计算上行和下行波动率 pos_returns = pl.when(pl.col('pct_chg') > 0).then(pl.col('pct_chg')).otherwise(0) neg_returns = pl.when(pl.col('pct_chg') < 0).then(pl.col('pct_chg')).otherwise(0) # 计算滚动标准差 upside_vol = pos_returns.rolling_std(window=self.window) downside_vol = neg_returns.rolling_std(window=self.window) # 计算比率 vol_ratio = upside_vol / downside_vol # 处理无穷大和NaN值 vol_ratio = vol_ratio.replace([np.inf, -np.inf], None).fill_null(0) return stock_df.with_columns(vol_ratio.alias(f'volatility_ratio_{self.window}')) class ReturnSkewnessOperator(StockWiseOperator): """收益率偏度算子""" def __init__(self, window: int = 5): config = OperatorConfig( name=f"return_skewness_{window}", description=f"{window}日收益率偏度", required_columns=['pct_chg'], output_columns=[f'return_skewness_{window}'], parameters={'window': window} ) super().__init__(config) self.window = window def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: """计算收益率偏度""" skewness = pl.col('pct_chg').rolling_skew(window=self.window) return stock_df.with_columns(skewness.alias(f'return_skewness_{self.window}')) class ReturnKurtosisOperator(StockWiseOperator): """收益率峰度算子""" def __init__(self, window: int = 5): config = OperatorConfig( name=f"return_kurtosis_{window}", description=f"{window}日收益率峰度", required_columns=['pct_chg'], output_columns=[f'return_kurtosis_{window}'], parameters={'window': window} ) super().__init__(config) self.window = window def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: """计算收益率峰度""" kurtosis = pl.col('pct_chg').rolling_kurt(window=self.window) return stock_df.with_columns(kurtosis.alias(f'return_kurtosis_{self.window}')) class VolatilityAmplificationOperator(StockWiseOperator): """亏损状态波动率放大算子""" def __init__(self, n: int = 20): config = OperatorConfig( name=f"vol_amp_loss_{n}", description=f"{n}日亏损状态波动率放大", required_columns=['pct_chg', 'weight_avg', 'close'], output_columns=[f'vol_amp_loss_{n}'], parameters={'n': n} ) super().__init__(config) self.n = n def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: """计算亏损状态波动率放大""" # 计算n日波动率 vol_n = pl.col('pct_chg').rolling_std(window=self.n) # 计算亏损程度 loss_degree = pl.max_horizontal(0, pl.col('weight_avg') - pl.col('close')) / pl.col('close') # 计算放大因子 vol_amp = vol_n * loss_degree return stock_df.with_columns(vol_amp.alias(f'vol_amp_loss_{self.n}')) class HighVolDropWhenProfitableOperator(StockWiseOperator): """高成交量下跌当获利状态算子""" def __init__(self, n: int = 20, m: int = 5, profit_thresh: float = 0.1, drop_thresh: float = -0.03, vol_multiple: float = 2.0): config = OperatorConfig( name=f"vol_drop_profit_cnt_{m}", description=f"{m}日高成交量下跌当获利状态计数", required_columns=['close', 'pct_chg', 'vol', 'weight_avg'], output_columns=[f'vol_drop_profit_cnt_{m}'], parameters={'n': n, 'm': m, 'profit_thresh': profit_thresh, 'drop_thresh': drop_thresh, 'vol_multiple': vol_multiple} ) super().__init__(config) self.n = n self.m = m self.profit_thresh = profit_thresh self.drop_thresh = drop_thresh self.vol_multiple = vol_multiple def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: """计算高成交量下跌当获利状态计数""" # 判断是否获利 is_profitable = pl.col('close') > pl.col('weight_avg') * (1 + self.profit_thresh) # 判断是否下跌 is_dropping = pl.col('pct_chg') < self.drop_thresh # 计算滚动均值和标准差 rolling_mean_vol = pl.col('vol').rolling_mean(window=self.n) rolling_std_vol = pl.col('vol').rolling_std(window=self.n).fill_null(0) # 判断是否高成交量 is_high_vol = pl.col('vol') > (rolling_mean_vol + self.vol_multiple * rolling_std_vol) # 计算事件 event = is_profitable & is_dropping & is_high_vol # 计算m日累计 event_cnt = event.cast(int).rolling_sum(window=self.m) return stock_df.with_columns(event_cnt.alias(f'vol_drop_profit_cnt_{self.m}')) class LargeFlowVolatilityInteractionOperator(StockWiseOperator): """大单资金流驱动波动率交互算子""" def __init__(self, n: int = 20): config = OperatorConfig( name=f"lg_flow_vol_interact_{n}", description=f"{n}日大单资金流驱动波动率交互", required_columns=['pct_chg', 'buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol', 'sell_elg_vol', 'vol', 'close'], output_columns=[f'lg_flow_vol_interact_{n}'], parameters={'n': n} ) super().__init__(config) self.n = n def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: """计算大单资金流驱动波动率交互""" epsilon = 1e-8 # 计算n日波动率 vol_n = pl.col('pct_chg').rolling_std(window=self.n) # 计算大单净额 net_lg_flow_val = ( (pl.col('buy_lg_vol') + pl.col('buy_elg_vol') - pl.col('sell_lg_vol') - pl.col('sell_elg_vol')) * pl.col('close') ) # 计算总成交额 total_val = pl.col('vol') * pl.col('close') # 计算大单净流入比率绝对值 abs_net_lg_flow_ratio = net_lg_flow_val.abs() / (total_val + epsilon) # 计算n日均值 abs_ratio_n = abs_net_lg_flow_ratio.rolling_mean(window=self.n) # 计算交互项 interaction = vol_n * abs_ratio_n return stock_df.with_columns(interaction.alias(f'lg_flow_vol_interact_{self.n}')) class VolatilityAdjustedROCPOperator(StockWiseOperator): """波动率调整收益率算子""" def __init__(self, n: int = 20): config = OperatorConfig( name=f"vol_adj_roc_{n}", description=f"{n}日波动率调整收益率", required_columns=['close', 'pct_chg'], output_columns=[f'vol_adj_roc_{n}'], parameters={'n': n} ) super().__init__(config) self.n = n def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: """计算波动率调整收益率""" # 计算n日收益率 roc_n = pl.col('close').pct_change(self.n) # 计算n日波动率 vol_n = pl.col('pct_chg').rolling_std(window=self.n).fill_null(0) # 计算波动率调整收益率 vol_adj_roc = roc_n / (vol_n + 1e-10) # 避免除零 return stock_df.with_columns(vol_adj_roc.alias(f'vol_adj_roc_{self.n}')) class StandardDeviation5Operator(StockWiseOperator): """5日收益率标准差算子""" def __init__(self): config = OperatorConfig( name="std_return_5", description="5日收益率标准差", required_columns=['close'], output_columns=['std_return_5'], parameters={} ) super().__init__(config) def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: """计算5日收益率标准差""" # 计算收益率 returns = pl.col('close').pct_change() # 计算5日标准差 std_5 = returns.rolling_std(window=5) return stock_df.with_columns(std_5.alias('std_return_5')) class StandardDeviation90Operator(StockWiseOperator): """90日收益率标准差算子""" def __init__(self): config = OperatorConfig( name="std_return_90", description="90日收益率标准差", required_columns=['close'], output_columns=['std_return_90'], parameters={} ) super().__init__(config) def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: """计算90日收益率标准差""" # 计算收益率 returns = pl.col('close').pct_change() # 计算90日标准差 std_90 = returns.rolling_std(window=90) return stock_df.with_columns(std_90.alias('std_return_90')) class StandardDeviation90ShiftedOperator(StockWiseOperator): """90日收益率标准差(移位)算子""" def __init__(self): config = OperatorConfig( name="std_return_90_2", description="90日收益率标准差(移位10日)", required_columns=['close'], output_columns=['std_return_90_2'], parameters={} ) super().__init__(config) def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: """计算90日收益率标准差(移位10日)""" # 计算收益率(移位10日) returns = pl.col('close').shift(10).pct_change() # 计算90日标准差 std_90_2 = returns.rolling_std(window=90) return stock_df.with_columns(std_90_2.alias('std_return_90_2')) # 波动率因子集合 VOLATILITY_OPERATORS = [ UpsideVolatilityOperator(), DownsideVolatilityOperator(), VolatilityRatioOperator(), ReturnSkewnessOperator(), ReturnKurtosisOperator(), VolatilityAmplificationOperator(), HighVolDropWhenProfitableOperator(), LargeFlowVolatilityInteractionOperator(), VolatilityAdjustedROCPOperator(), StandardDeviation5Operator(), StandardDeviation90Operator(), StandardDeviation90ShiftedOperator(), ] def apply_volatility_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame: """ 应用所有波动率因子 Args: df: 输入的Polars DataFrame operators: 要应用的算子列表,如果为None则使用默认列表 Returns: 添加了波动率因子的DataFrame """ if operators is None: operators = VOLATILITY_OPERATORS result_df = df for operator in operators: result_df = operator(result_df) return result_df