Files
NewStock/main/factor/polars_volume_factors.py

481 lines
16 KiB
Python
Raw Normal View History

2025-10-13 21:42:35 +08:00
"""
成交量因子 - 使用Polars实现
包含成交量变化率突破信号换手率等相关因子计算
"""
import polars as pl
import numpy as np
from typing import Dict, List, Optional, Any
from operator_framework import StockWiseOperator, OperatorConfig
class VolumeChangeRateOperator(StockWiseOperator):
"""成交量变化率算子"""
def __init__(self):
config = OperatorConfig(
name="volume_change_rate",
description="短期成交量变化率",
required_columns=['vol'],
output_columns=['volume_change_rate'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算成交量变化率"""
# 计算2日均量
vol_mean_2 = pl.col('vol').rolling_mean(window=2)
# 计算10日均量
vol_mean_10 = pl.col('vol').rolling_mean(window=10)
# 计算变化率
change_rate = (vol_mean_2 / vol_mean_10) - 1
return stock_df.with_columns(change_rate.alias('volume_change_rate'))
class VolumeBreakoutOperator(StockWiseOperator):
"""成交量突破算子"""
def __init__(self):
config = OperatorConfig(
name="volume_breakout",
description="成交量突破信号",
required_columns=['vol'],
output_columns=['cat_volume_breakout'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算成交量突破信号"""
# 计算5日最大成交量
max_vol_5 = pl.col('vol').rolling_max(window=5)
# 判断是否突破
breakout = pl.col('vol') > max_vol_5
return stock_df.with_columns(breakout.alias('cat_volume_breakout'))
class TurnoverDeviationOperator(StockWiseOperator):
"""换手率偏离度算子"""
def __init__(self):
config = OperatorConfig(
name="turnover_deviation",
description="换手率均线偏离度",
required_columns=['turnover_rate'],
output_columns=['turnover_deviation'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算换手率均线偏离度"""
# 计算3日均值
mean_turnover = pl.col('turnover_rate').rolling_mean(window=3)
# 计算3日标准差
std_turnover = pl.col('turnover_rate').rolling_std(window=3)
# 计算偏离度
deviation = (pl.col('turnover_rate') - mean_turnover) / std_turnover
return stock_df.with_columns(deviation.alias('turnover_deviation'))
class TurnoverSpikeOperator(StockWiseOperator):
"""换手率激增算子"""
def __init__(self):
config = OperatorConfig(
name="turnover_spike",
description="换手率激增信号",
required_columns=['turnover_rate'],
output_columns=['cat_turnover_spike'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算换手率激增信号"""
# 计算3日均值
mean_turnover = pl.col('turnover_rate').rolling_mean(window=3)
# 计算3日标准差
std_turnover = pl.col('turnover_rate').rolling_std(window=3)
# 判断是否激增 (超过均值+2倍标准差)
spike = pl.col('turnover_rate') > (mean_turnover + 2 * std_turnover)
return stock_df.with_columns(spike.alias('cat_turnover_spike'))
class VolumeRatioAverageOperator(StockWiseOperator):
"""量比均值算子"""
def __init__(self):
config = OperatorConfig(
name="volume_ratio_average",
description="量比均值",
required_columns=['volume_ratio'],
output_columns=['avg_volume_ratio'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算量比均值"""
# 计算3日均值
avg_ratio = pl.col('volume_ratio').rolling_mean(window=3)
return stock_df.with_columns(avg_ratio.alias('avg_volume_ratio'))
class VolumeRatioBreakoutOperator(StockWiseOperator):
"""量比突破算子"""
def __init__(self):
config = OperatorConfig(
name="volume_ratio_breakout",
description="量比突破信号",
required_columns=['volume_ratio'],
output_columns=['cat_volume_ratio_breakout'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算量比突破信号"""
# 计算5日最大量比
max_ratio_5 = pl.col('volume_ratio').rolling_max(window=5)
# 判断是否突破
breakout = pl.col('volume_ratio') > max_ratio_5
return stock_df.with_columns(breakout.alias('cat_volume_ratio_breakout'))
class VolumeSpikeOperator(StockWiseOperator):
"""成交量激增算子"""
def __init__(self):
config = OperatorConfig(
name="volume_spike",
description="成交量激增",
required_columns=['vol'],
output_columns=['vol_spike'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算成交量激增"""
# 计算20日均量
vol_mean_20 = pl.col('vol').rolling_mean(window=20)
return stock_df.with_columns(vol_mean_20.alias('vol_spike'))
class VolumeStd5Operator(StockWiseOperator):
"""5日成交量标准差算子"""
def __init__(self):
config = OperatorConfig(
name="volume_std_5",
description="5日成交量标准差",
required_columns=['vol'],
output_columns=['vol_std_5'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算5日成交量标准差"""
# 计算成交量变化率
vol_pct_change = pl.col('vol').pct_change()
# 计算5日标准差
std_5 = vol_pct_change.rolling_std(window=5)
return stock_df.with_columns(std_5.alias('vol_std_5'))
class TurnoverRateMeanOperator(StockWiseOperator):
"""换手率均值算子"""
def __init__(self, n: int):
config = OperatorConfig(
name=f"turnover_rate_mean_{n}",
description=f"{n}日换手率均值",
required_columns=['turnover_rate'],
output_columns=[f'turnover_rate_mean_{n}'],
parameters={'n': n}
)
super().__init__(config)
self.n = n
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算n日换手率均值"""
# 计算n日均值
mean_rate = pl.col('turnover_rate').rolling_mean(window=self.n)
return stock_df.with_columns(mean_rate.alias(f'turnover_rate_mean_{self.n}'))
class VolumeSpikeCategoryOperator(StockWiseOperator):
"""成交量激增分类算子"""
def __init__(self):
config = OperatorConfig(
name="volume_spike_category",
description="成交量激增分类",
required_columns=['vol', 'vol_spike'],
output_columns=['cat_vol_spike'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算成交量激增分类"""
# 判断是否激增 (超过2倍均值)
spike = pl.col('vol') > (2 * pl.col('vol_spike'))
return stock_df.with_columns(spike.alias('cat_vol_spike'))
class TurnoverVolatilityOperator(StockWiseOperator):
"""换手率波动率算子"""
def __init__(self, window: int = 20):
config = OperatorConfig(
name=f"turnover_volatility_{window}",
description=f"{window}日换手率波动率",
required_columns=['turnover_rate'],
output_columns=[f'turnover_std_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算换手率波动率"""
# 计算滚动标准差
turnover_std = pl.col('turnover_rate').rolling_std(window=self.window)
return stock_df.with_columns(turnover_std.alias(f'turnover_std_{self.window}'))
class VolumeCovarianceOperator(StockWiseOperator):
"""成交量协方差算子"""
def __init__(self, window: int = 5):
config = OperatorConfig(
name=f"volume_covariance_{window}",
description=f"{window}日成交量协方差",
required_columns=['high', 'vol'],
output_columns=[f'cov_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算成交量协方差"""
# 计算滚动协方差
def calculate_cov(group_df):
return group_df.select(
pl.col('high').rolling_cov(pl.col('vol'), window=self.window)
)
cov_result = calculate_cov(stock_df)
return stock_df.with_columns(cov_result[f'cov_{self.window}'].alias(f'cov_{self.window}'))
class VolumeCovarianceDeltaOperator(StockWiseOperator):
"""成交量协方差变化算子"""
def __init__(self, period: int = 5):
config = OperatorConfig(
name=f"volume_covariance_delta_{period}",
description=f"{period}日成交量协方差变化",
required_columns=['cov_5'],
output_columns=[f'delta_cov_{period}'],
parameters={'period': period}
)
super().__init__(config)
self.period = period
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算成交量协方差变化"""
# 计算差分
delta = pl.col('cov_5').diff(self.period)
return stock_df.with_columns(delta.alias(f'delta_cov_{self.period}'))
class TurnoverRateAccelerationOperator(StockWiseOperator):
"""换手率加速度算子"""
def __init__(self, short_window: int = 5, long_window: int = 20):
config = OperatorConfig(
name=f"turnover_acceleration_{short_window}_{long_window}",
description=f"{short_window}日对{long_window}日换手率加速度",
required_columns=['turnover_rate'],
output_columns=[f'turnover_rate_acceleration_{short_window}_{long_window}'],
parameters={'short_window': short_window, 'long_window': long_window}
)
super().__init__(config)
self.short_window = short_window
self.long_window = long_window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算换手率加速度"""
# 计算短期均值
short_avg = pl.col('turnover_rate').rolling_mean(window=self.short_window)
# 计算长期均值
long_avg = pl.col('turnover_rate').rolling_mean(window=self.long_window)
# 计算加速度
acceleration = short_avg - long_avg
return stock_df.with_columns(
acceleration.alias(f'turnover_rate_acceleration_{self.short_window}_{self.long_window}')
)
class VolumeSustainabilityOperator(StockWiseOperator):
"""成交量持续性算子"""
def __init__(self, short_window: int = 10, long_window: int = 30):
config = OperatorConfig(
name=f"volume_sustain_{short_window}_{long_window}",
description=f"{short_window}日成交量大于{long_window}日均值占比",
required_columns=['vol'],
output_columns=[f'vol_sustain_{short_window}_{long_window}'],
parameters={'short_window': short_window, 'long_window': long_window}
)
super().__init__(config)
self.short_window = short_window
self.long_window = long_window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算成交量持续性"""
# 计算长期均值
long_avg = pl.col('vol').rolling_mean(window=self.long_window)
# 判断是否大于长期均值
above_avg = pl.col('vol') > long_avg
# 计算短期占比
sustain_ratio = above_avg.cast(int).rolling_mean(window=self.short_window)
return stock_df.with_columns(
sustain_ratio.alias(f'vol_sustain_{self.short_window}_{self.long_window}')
)
class TurnoverRelativeStrengthOperator(StockWiseOperator):
"""换手率相对强度算子"""
def __init__(self, window: int = 20):
config = OperatorConfig(
name=f"turnover_relative_strength_{window}",
description=f"{window}日换手率相对强度",
required_columns=['turnover_rate'],
output_columns=[f'turnover_relative_strength_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算换手率相对强度"""
# 计算长期均值
long_avg = pl.col('turnover_rate').rolling_mean(window=self.window)
# 计算相对强度
relative_strength = pl.col('turnover_rate') / long_avg
return stock_df.with_columns(
relative_strength.alias(f'turnover_relative_strength_{self.window}')
)
class AmountOutlierOperator(StockWiseOperator):
"""成交额异常值算子"""
def __init__(self, window: int = 10):
config = OperatorConfig(
name=f"amount_outlier_{window}",
description=f"{window}日成交额异常值",
required_columns=['amount'],
output_columns=[f'amount_outlier_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算成交额异常值"""
# 计算均值
avg_amount = pl.col('amount').rolling_mean(window=self.window)
# 计算差值
amount_diff = pl.col('amount') - avg_amount
# 计算Z-score (简化版,实际使用时可能需要横截面标准化)
mean_diff = amount_diff.rolling_mean(window=self.window)
std_diff = amount_diff.rolling_std(window=self.window)
# 计算异常值分数
outlier_score = (amount_diff - mean_diff) / (std_diff + 1e-8)
return stock_df.with_columns(outlier_score.alias(f'amount_outlier_{self.window}'))
# 成交量因子集合
VOLUME_OPERATORS = [
VolumeChangeRateOperator(),
VolumeBreakoutOperator(),
TurnoverDeviationOperator(),
TurnoverSpikeOperator(),
VolumeRatioAverageOperator(),
VolumeRatioBreakoutOperator(),
VolumeSpikeOperator(),
VolumeStd5Operator(),
TurnoverRateMeanOperator(20),
VolumeSpikeCategoryOperator(),
TurnoverVolatilityOperator(),
TurnoverRateAccelerationOperator(),
VolumeSustainabilityOperator(),
TurnoverRelativeStrengthOperator(),
AmountOutlierOperator(),
]
def apply_volume_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
"""
应用所有成交量因子
Args:
df: 输入的Polars DataFrame
operators: 要应用的算子列表如果为None则使用默认列表
Returns:
添加了成交量因子的DataFrame
"""
if operators is None:
operators = VOLUME_OPERATORS
result_df = df
for operator in operators:
result_df = operator(result_df)
return result_df