481 lines
16 KiB
Python
481 lines
16 KiB
Python
"""
|
||
成交量因子 - 使用Polars实现
|
||
包含成交量变化率、突破信号、换手率等相关因子计算
|
||
"""
|
||
|
||
import polars as pl
|
||
import numpy as np
|
||
from typing import Dict, List, Optional, Any
|
||
from operator_framework import StockWiseOperator, OperatorConfig
|
||
|
||
|
||
class VolumeChangeRateOperator(StockWiseOperator):
|
||
"""成交量变化率算子"""
|
||
|
||
def __init__(self):
|
||
config = OperatorConfig(
|
||
name="volume_change_rate",
|
||
description="短期成交量变化率",
|
||
required_columns=['vol'],
|
||
output_columns=['volume_change_rate'],
|
||
parameters={}
|
||
)
|
||
super().__init__(config)
|
||
|
||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||
"""计算成交量变化率"""
|
||
# 计算2日均量
|
||
vol_mean_2 = pl.col('vol').rolling_mean(window=2)
|
||
|
||
# 计算10日均量
|
||
vol_mean_10 = pl.col('vol').rolling_mean(window=10)
|
||
|
||
# 计算变化率
|
||
change_rate = (vol_mean_2 / vol_mean_10) - 1
|
||
|
||
return stock_df.with_columns(change_rate.alias('volume_change_rate'))
|
||
|
||
|
||
class VolumeBreakoutOperator(StockWiseOperator):
|
||
"""成交量突破算子"""
|
||
|
||
def __init__(self):
|
||
config = OperatorConfig(
|
||
name="volume_breakout",
|
||
description="成交量突破信号",
|
||
required_columns=['vol'],
|
||
output_columns=['cat_volume_breakout'],
|
||
parameters={}
|
||
)
|
||
super().__init__(config)
|
||
|
||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||
"""计算成交量突破信号"""
|
||
# 计算5日最大成交量
|
||
max_vol_5 = pl.col('vol').rolling_max(window=5)
|
||
|
||
# 判断是否突破
|
||
breakout = pl.col('vol') > max_vol_5
|
||
|
||
return stock_df.with_columns(breakout.alias('cat_volume_breakout'))
|
||
|
||
|
||
class TurnoverDeviationOperator(StockWiseOperator):
|
||
"""换手率偏离度算子"""
|
||
|
||
def __init__(self):
|
||
config = OperatorConfig(
|
||
name="turnover_deviation",
|
||
description="换手率均线偏离度",
|
||
required_columns=['turnover_rate'],
|
||
output_columns=['turnover_deviation'],
|
||
parameters={}
|
||
)
|
||
super().__init__(config)
|
||
|
||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||
"""计算换手率均线偏离度"""
|
||
# 计算3日均值
|
||
mean_turnover = pl.col('turnover_rate').rolling_mean(window=3)
|
||
|
||
# 计算3日标准差
|
||
std_turnover = pl.col('turnover_rate').rolling_std(window=3)
|
||
|
||
# 计算偏离度
|
||
deviation = (pl.col('turnover_rate') - mean_turnover) / std_turnover
|
||
|
||
return stock_df.with_columns(deviation.alias('turnover_deviation'))
|
||
|
||
|
||
class TurnoverSpikeOperator(StockWiseOperator):
|
||
"""换手率激增算子"""
|
||
|
||
def __init__(self):
|
||
config = OperatorConfig(
|
||
name="turnover_spike",
|
||
description="换手率激增信号",
|
||
required_columns=['turnover_rate'],
|
||
output_columns=['cat_turnover_spike'],
|
||
parameters={}
|
||
)
|
||
super().__init__(config)
|
||
|
||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||
"""计算换手率激增信号"""
|
||
# 计算3日均值
|
||
mean_turnover = pl.col('turnover_rate').rolling_mean(window=3)
|
||
|
||
# 计算3日标准差
|
||
std_turnover = pl.col('turnover_rate').rolling_std(window=3)
|
||
|
||
# 判断是否激增 (超过均值+2倍标准差)
|
||
spike = pl.col('turnover_rate') > (mean_turnover + 2 * std_turnover)
|
||
|
||
return stock_df.with_columns(spike.alias('cat_turnover_spike'))
|
||
|
||
|
||
class VolumeRatioAverageOperator(StockWiseOperator):
|
||
"""量比均值算子"""
|
||
|
||
def __init__(self):
|
||
config = OperatorConfig(
|
||
name="volume_ratio_average",
|
||
description="量比均值",
|
||
required_columns=['volume_ratio'],
|
||
output_columns=['avg_volume_ratio'],
|
||
parameters={}
|
||
)
|
||
super().__init__(config)
|
||
|
||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||
"""计算量比均值"""
|
||
# 计算3日均值
|
||
avg_ratio = pl.col('volume_ratio').rolling_mean(window=3)
|
||
|
||
return stock_df.with_columns(avg_ratio.alias('avg_volume_ratio'))
|
||
|
||
|
||
class VolumeRatioBreakoutOperator(StockWiseOperator):
|
||
"""量比突破算子"""
|
||
|
||
def __init__(self):
|
||
config = OperatorConfig(
|
||
name="volume_ratio_breakout",
|
||
description="量比突破信号",
|
||
required_columns=['volume_ratio'],
|
||
output_columns=['cat_volume_ratio_breakout'],
|
||
parameters={}
|
||
)
|
||
super().__init__(config)
|
||
|
||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||
"""计算量比突破信号"""
|
||
# 计算5日最大量比
|
||
max_ratio_5 = pl.col('volume_ratio').rolling_max(window=5)
|
||
|
||
# 判断是否突破
|
||
breakout = pl.col('volume_ratio') > max_ratio_5
|
||
|
||
return stock_df.with_columns(breakout.alias('cat_volume_ratio_breakout'))
|
||
|
||
|
||
class VolumeSpikeOperator(StockWiseOperator):
|
||
"""成交量激增算子"""
|
||
|
||
def __init__(self):
|
||
config = OperatorConfig(
|
||
name="volume_spike",
|
||
description="成交量激增",
|
||
required_columns=['vol'],
|
||
output_columns=['vol_spike'],
|
||
parameters={}
|
||
)
|
||
super().__init__(config)
|
||
|
||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||
"""计算成交量激增"""
|
||
# 计算20日均量
|
||
vol_mean_20 = pl.col('vol').rolling_mean(window=20)
|
||
|
||
return stock_df.with_columns(vol_mean_20.alias('vol_spike'))
|
||
|
||
|
||
class VolumeStd5Operator(StockWiseOperator):
|
||
"""5日成交量标准差算子"""
|
||
|
||
def __init__(self):
|
||
config = OperatorConfig(
|
||
name="volume_std_5",
|
||
description="5日成交量标准差",
|
||
required_columns=['vol'],
|
||
output_columns=['vol_std_5'],
|
||
parameters={}
|
||
)
|
||
super().__init__(config)
|
||
|
||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||
"""计算5日成交量标准差"""
|
||
# 计算成交量变化率
|
||
vol_pct_change = pl.col('vol').pct_change()
|
||
|
||
# 计算5日标准差
|
||
std_5 = vol_pct_change.rolling_std(window=5)
|
||
|
||
return stock_df.with_columns(std_5.alias('vol_std_5'))
|
||
|
||
|
||
class TurnoverRateMeanOperator(StockWiseOperator):
|
||
"""换手率均值算子"""
|
||
|
||
def __init__(self, n: int):
|
||
config = OperatorConfig(
|
||
name=f"turnover_rate_mean_{n}",
|
||
description=f"{n}日换手率均值",
|
||
required_columns=['turnover_rate'],
|
||
output_columns=[f'turnover_rate_mean_{n}'],
|
||
parameters={'n': n}
|
||
)
|
||
super().__init__(config)
|
||
self.n = n
|
||
|
||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||
"""计算n日换手率均值"""
|
||
# 计算n日均值
|
||
mean_rate = pl.col('turnover_rate').rolling_mean(window=self.n)
|
||
|
||
return stock_df.with_columns(mean_rate.alias(f'turnover_rate_mean_{self.n}'))
|
||
|
||
|
||
class VolumeSpikeCategoryOperator(StockWiseOperator):
|
||
"""成交量激增分类算子"""
|
||
|
||
def __init__(self):
|
||
config = OperatorConfig(
|
||
name="volume_spike_category",
|
||
description="成交量激增分类",
|
||
required_columns=['vol', 'vol_spike'],
|
||
output_columns=['cat_vol_spike'],
|
||
parameters={}
|
||
)
|
||
super().__init__(config)
|
||
|
||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||
"""计算成交量激增分类"""
|
||
# 判断是否激增 (超过2倍均值)
|
||
spike = pl.col('vol') > (2 * pl.col('vol_spike'))
|
||
|
||
return stock_df.with_columns(spike.alias('cat_vol_spike'))
|
||
|
||
|
||
class TurnoverVolatilityOperator(StockWiseOperator):
|
||
"""换手率波动率算子"""
|
||
|
||
def __init__(self, window: int = 20):
|
||
config = OperatorConfig(
|
||
name=f"turnover_volatility_{window}",
|
||
description=f"{window}日换手率波动率",
|
||
required_columns=['turnover_rate'],
|
||
output_columns=[f'turnover_std_{window}'],
|
||
parameters={'window': window}
|
||
)
|
||
super().__init__(config)
|
||
self.window = window
|
||
|
||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||
"""计算换手率波动率"""
|
||
# 计算滚动标准差
|
||
turnover_std = pl.col('turnover_rate').rolling_std(window=self.window)
|
||
|
||
return stock_df.with_columns(turnover_std.alias(f'turnover_std_{self.window}'))
|
||
|
||
|
||
class VolumeCovarianceOperator(StockWiseOperator):
|
||
"""成交量协方差算子"""
|
||
|
||
def __init__(self, window: int = 5):
|
||
config = OperatorConfig(
|
||
name=f"volume_covariance_{window}",
|
||
description=f"{window}日成交量协方差",
|
||
required_columns=['high', 'vol'],
|
||
output_columns=[f'cov_{window}'],
|
||
parameters={'window': window}
|
||
)
|
||
super().__init__(config)
|
||
self.window = window
|
||
|
||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||
"""计算成交量协方差"""
|
||
# 计算滚动协方差
|
||
def calculate_cov(group_df):
|
||
return group_df.select(
|
||
pl.col('high').rolling_cov(pl.col('vol'), window=self.window)
|
||
)
|
||
|
||
cov_result = calculate_cov(stock_df)
|
||
|
||
return stock_df.with_columns(cov_result[f'cov_{self.window}'].alias(f'cov_{self.window}'))
|
||
|
||
|
||
class VolumeCovarianceDeltaOperator(StockWiseOperator):
|
||
"""成交量协方差变化算子"""
|
||
|
||
def __init__(self, period: int = 5):
|
||
config = OperatorConfig(
|
||
name=f"volume_covariance_delta_{period}",
|
||
description=f"{period}日成交量协方差变化",
|
||
required_columns=['cov_5'],
|
||
output_columns=[f'delta_cov_{period}'],
|
||
parameters={'period': period}
|
||
)
|
||
super().__init__(config)
|
||
self.period = period
|
||
|
||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||
"""计算成交量协方差变化"""
|
||
# 计算差分
|
||
delta = pl.col('cov_5').diff(self.period)
|
||
|
||
return stock_df.with_columns(delta.alias(f'delta_cov_{self.period}'))
|
||
|
||
|
||
class TurnoverRateAccelerationOperator(StockWiseOperator):
|
||
"""换手率加速度算子"""
|
||
|
||
def __init__(self, short_window: int = 5, long_window: int = 20):
|
||
config = OperatorConfig(
|
||
name=f"turnover_acceleration_{short_window}_{long_window}",
|
||
description=f"{short_window}日对{long_window}日换手率加速度",
|
||
required_columns=['turnover_rate'],
|
||
output_columns=[f'turnover_rate_acceleration_{short_window}_{long_window}'],
|
||
parameters={'short_window': short_window, 'long_window': long_window}
|
||
)
|
||
super().__init__(config)
|
||
self.short_window = short_window
|
||
self.long_window = long_window
|
||
|
||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||
"""计算换手率加速度"""
|
||
# 计算短期均值
|
||
short_avg = pl.col('turnover_rate').rolling_mean(window=self.short_window)
|
||
|
||
# 计算长期均值
|
||
long_avg = pl.col('turnover_rate').rolling_mean(window=self.long_window)
|
||
|
||
# 计算加速度
|
||
acceleration = short_avg - long_avg
|
||
|
||
return stock_df.with_columns(
|
||
acceleration.alias(f'turnover_rate_acceleration_{self.short_window}_{self.long_window}')
|
||
)
|
||
|
||
|
||
class VolumeSustainabilityOperator(StockWiseOperator):
|
||
"""成交量持续性算子"""
|
||
|
||
def __init__(self, short_window: int = 10, long_window: int = 30):
|
||
config = OperatorConfig(
|
||
name=f"volume_sustain_{short_window}_{long_window}",
|
||
description=f"{short_window}日成交量大于{long_window}日均值占比",
|
||
required_columns=['vol'],
|
||
output_columns=[f'vol_sustain_{short_window}_{long_window}'],
|
||
parameters={'short_window': short_window, 'long_window': long_window}
|
||
)
|
||
super().__init__(config)
|
||
self.short_window = short_window
|
||
self.long_window = long_window
|
||
|
||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||
"""计算成交量持续性"""
|
||
# 计算长期均值
|
||
long_avg = pl.col('vol').rolling_mean(window=self.long_window)
|
||
|
||
# 判断是否大于长期均值
|
||
above_avg = pl.col('vol') > long_avg
|
||
|
||
# 计算短期占比
|
||
sustain_ratio = above_avg.cast(int).rolling_mean(window=self.short_window)
|
||
|
||
return stock_df.with_columns(
|
||
sustain_ratio.alias(f'vol_sustain_{self.short_window}_{self.long_window}')
|
||
)
|
||
|
||
|
||
class TurnoverRelativeStrengthOperator(StockWiseOperator):
|
||
"""换手率相对强度算子"""
|
||
|
||
def __init__(self, window: int = 20):
|
||
config = OperatorConfig(
|
||
name=f"turnover_relative_strength_{window}",
|
||
description=f"{window}日换手率相对强度",
|
||
required_columns=['turnover_rate'],
|
||
output_columns=[f'turnover_relative_strength_{window}'],
|
||
parameters={'window': window}
|
||
)
|
||
super().__init__(config)
|
||
self.window = window
|
||
|
||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||
"""计算换手率相对强度"""
|
||
# 计算长期均值
|
||
long_avg = pl.col('turnover_rate').rolling_mean(window=self.window)
|
||
|
||
# 计算相对强度
|
||
relative_strength = pl.col('turnover_rate') / long_avg
|
||
|
||
return stock_df.with_columns(
|
||
relative_strength.alias(f'turnover_relative_strength_{self.window}')
|
||
)
|
||
|
||
|
||
class AmountOutlierOperator(StockWiseOperator):
|
||
"""成交额异常值算子"""
|
||
|
||
def __init__(self, window: int = 10):
|
||
config = OperatorConfig(
|
||
name=f"amount_outlier_{window}",
|
||
description=f"{window}日成交额异常值",
|
||
required_columns=['amount'],
|
||
output_columns=[f'amount_outlier_{window}'],
|
||
parameters={'window': window}
|
||
)
|
||
super().__init__(config)
|
||
self.window = window
|
||
|
||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||
"""计算成交额异常值"""
|
||
# 计算均值
|
||
avg_amount = pl.col('amount').rolling_mean(window=self.window)
|
||
|
||
# 计算差值
|
||
amount_diff = pl.col('amount') - avg_amount
|
||
|
||
# 计算Z-score (简化版,实际使用时可能需要横截面标准化)
|
||
mean_diff = amount_diff.rolling_mean(window=self.window)
|
||
std_diff = amount_diff.rolling_std(window=self.window)
|
||
|
||
# 计算异常值分数
|
||
outlier_score = (amount_diff - mean_diff) / (std_diff + 1e-8)
|
||
|
||
return stock_df.with_columns(outlier_score.alias(f'amount_outlier_{self.window}'))
|
||
|
||
|
||
# 成交量因子集合
|
||
VOLUME_OPERATORS = [
|
||
VolumeChangeRateOperator(),
|
||
VolumeBreakoutOperator(),
|
||
TurnoverDeviationOperator(),
|
||
TurnoverSpikeOperator(),
|
||
VolumeRatioAverageOperator(),
|
||
VolumeRatioBreakoutOperator(),
|
||
VolumeSpikeOperator(),
|
||
VolumeStd5Operator(),
|
||
TurnoverRateMeanOperator(20),
|
||
VolumeSpikeCategoryOperator(),
|
||
TurnoverVolatilityOperator(),
|
||
TurnoverRateAccelerationOperator(),
|
||
VolumeSustainabilityOperator(),
|
||
TurnoverRelativeStrengthOperator(),
|
||
AmountOutlierOperator(),
|
||
]
|
||
|
||
|
||
def apply_volume_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||
"""
|
||
应用所有成交量因子
|
||
|
||
Args:
|
||
df: 输入的Polars DataFrame
|
||
operators: 要应用的算子列表,如果为None则使用默认列表
|
||
|
||
Returns:
|
||
添加了成交量因子的DataFrame
|
||
"""
|
||
if operators is None:
|
||
operators = VOLUME_OPERATORS
|
||
|
||
result_df = df
|
||
for operator in operators:
|
||
result_df = operator(result_df)
|
||
|
||
return result_df
|