Files
NewStock/main/factor/polars_volume_factors.py

481 lines
16 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
成交量因子 - 使用Polars实现
包含成交量变化率、突破信号、换手率等相关因子计算
"""
import polars as pl
import numpy as np
from typing import Dict, List, Optional, Any
from operator_framework import StockWiseOperator, OperatorConfig
class VolumeChangeRateOperator(StockWiseOperator):
"""成交量变化率算子"""
def __init__(self):
config = OperatorConfig(
name="volume_change_rate",
description="短期成交量变化率",
required_columns=['vol'],
output_columns=['volume_change_rate'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算成交量变化率"""
# 计算2日均量
vol_mean_2 = pl.col('vol').rolling_mean(window=2)
# 计算10日均量
vol_mean_10 = pl.col('vol').rolling_mean(window=10)
# 计算变化率
change_rate = (vol_mean_2 / vol_mean_10) - 1
return stock_df.with_columns(change_rate.alias('volume_change_rate'))
class VolumeBreakoutOperator(StockWiseOperator):
"""成交量突破算子"""
def __init__(self):
config = OperatorConfig(
name="volume_breakout",
description="成交量突破信号",
required_columns=['vol'],
output_columns=['cat_volume_breakout'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算成交量突破信号"""
# 计算5日最大成交量
max_vol_5 = pl.col('vol').rolling_max(window=5)
# 判断是否突破
breakout = pl.col('vol') > max_vol_5
return stock_df.with_columns(breakout.alias('cat_volume_breakout'))
class TurnoverDeviationOperator(StockWiseOperator):
"""换手率偏离度算子"""
def __init__(self):
config = OperatorConfig(
name="turnover_deviation",
description="换手率均线偏离度",
required_columns=['turnover_rate'],
output_columns=['turnover_deviation'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算换手率均线偏离度"""
# 计算3日均值
mean_turnover = pl.col('turnover_rate').rolling_mean(window=3)
# 计算3日标准差
std_turnover = pl.col('turnover_rate').rolling_std(window=3)
# 计算偏离度
deviation = (pl.col('turnover_rate') - mean_turnover) / std_turnover
return stock_df.with_columns(deviation.alias('turnover_deviation'))
class TurnoverSpikeOperator(StockWiseOperator):
"""换手率激增算子"""
def __init__(self):
config = OperatorConfig(
name="turnover_spike",
description="换手率激增信号",
required_columns=['turnover_rate'],
output_columns=['cat_turnover_spike'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算换手率激增信号"""
# 计算3日均值
mean_turnover = pl.col('turnover_rate').rolling_mean(window=3)
# 计算3日标准差
std_turnover = pl.col('turnover_rate').rolling_std(window=3)
# 判断是否激增 (超过均值+2倍标准差)
spike = pl.col('turnover_rate') > (mean_turnover + 2 * std_turnover)
return stock_df.with_columns(spike.alias('cat_turnover_spike'))
class VolumeRatioAverageOperator(StockWiseOperator):
"""量比均值算子"""
def __init__(self):
config = OperatorConfig(
name="volume_ratio_average",
description="量比均值",
required_columns=['volume_ratio'],
output_columns=['avg_volume_ratio'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算量比均值"""
# 计算3日均值
avg_ratio = pl.col('volume_ratio').rolling_mean(window=3)
return stock_df.with_columns(avg_ratio.alias('avg_volume_ratio'))
class VolumeRatioBreakoutOperator(StockWiseOperator):
"""量比突破算子"""
def __init__(self):
config = OperatorConfig(
name="volume_ratio_breakout",
description="量比突破信号",
required_columns=['volume_ratio'],
output_columns=['cat_volume_ratio_breakout'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算量比突破信号"""
# 计算5日最大量比
max_ratio_5 = pl.col('volume_ratio').rolling_max(window=5)
# 判断是否突破
breakout = pl.col('volume_ratio') > max_ratio_5
return stock_df.with_columns(breakout.alias('cat_volume_ratio_breakout'))
class VolumeSpikeOperator(StockWiseOperator):
"""成交量激增算子"""
def __init__(self):
config = OperatorConfig(
name="volume_spike",
description="成交量激增",
required_columns=['vol'],
output_columns=['vol_spike'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算成交量激增"""
# 计算20日均量
vol_mean_20 = pl.col('vol').rolling_mean(window=20)
return stock_df.with_columns(vol_mean_20.alias('vol_spike'))
class VolumeStd5Operator(StockWiseOperator):
"""5日成交量标准差算子"""
def __init__(self):
config = OperatorConfig(
name="volume_std_5",
description="5日成交量标准差",
required_columns=['vol'],
output_columns=['vol_std_5'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算5日成交量标准差"""
# 计算成交量变化率
vol_pct_change = pl.col('vol').pct_change()
# 计算5日标准差
std_5 = vol_pct_change.rolling_std(window=5)
return stock_df.with_columns(std_5.alias('vol_std_5'))
class TurnoverRateMeanOperator(StockWiseOperator):
"""换手率均值算子"""
def __init__(self, n: int):
config = OperatorConfig(
name=f"turnover_rate_mean_{n}",
description=f"{n}日换手率均值",
required_columns=['turnover_rate'],
output_columns=[f'turnover_rate_mean_{n}'],
parameters={'n': n}
)
super().__init__(config)
self.n = n
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算n日换手率均值"""
# 计算n日均值
mean_rate = pl.col('turnover_rate').rolling_mean(window=self.n)
return stock_df.with_columns(mean_rate.alias(f'turnover_rate_mean_{self.n}'))
class VolumeSpikeCategoryOperator(StockWiseOperator):
"""成交量激增分类算子"""
def __init__(self):
config = OperatorConfig(
name="volume_spike_category",
description="成交量激增分类",
required_columns=['vol', 'vol_spike'],
output_columns=['cat_vol_spike'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算成交量激增分类"""
# 判断是否激增 (超过2倍均值)
spike = pl.col('vol') > (2 * pl.col('vol_spike'))
return stock_df.with_columns(spike.alias('cat_vol_spike'))
class TurnoverVolatilityOperator(StockWiseOperator):
"""换手率波动率算子"""
def __init__(self, window: int = 20):
config = OperatorConfig(
name=f"turnover_volatility_{window}",
description=f"{window}日换手率波动率",
required_columns=['turnover_rate'],
output_columns=[f'turnover_std_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算换手率波动率"""
# 计算滚动标准差
turnover_std = pl.col('turnover_rate').rolling_std(window=self.window)
return stock_df.with_columns(turnover_std.alias(f'turnover_std_{self.window}'))
class VolumeCovarianceOperator(StockWiseOperator):
"""成交量协方差算子"""
def __init__(self, window: int = 5):
config = OperatorConfig(
name=f"volume_covariance_{window}",
description=f"{window}日成交量协方差",
required_columns=['high', 'vol'],
output_columns=[f'cov_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算成交量协方差"""
# 计算滚动协方差
def calculate_cov(group_df):
return group_df.select(
pl.col('high').rolling_cov(pl.col('vol'), window=self.window)
)
cov_result = calculate_cov(stock_df)
return stock_df.with_columns(cov_result[f'cov_{self.window}'].alias(f'cov_{self.window}'))
class VolumeCovarianceDeltaOperator(StockWiseOperator):
"""成交量协方差变化算子"""
def __init__(self, period: int = 5):
config = OperatorConfig(
name=f"volume_covariance_delta_{period}",
description=f"{period}日成交量协方差变化",
required_columns=['cov_5'],
output_columns=[f'delta_cov_{period}'],
parameters={'period': period}
)
super().__init__(config)
self.period = period
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算成交量协方差变化"""
# 计算差分
delta = pl.col('cov_5').diff(self.period)
return stock_df.with_columns(delta.alias(f'delta_cov_{self.period}'))
class TurnoverRateAccelerationOperator(StockWiseOperator):
"""换手率加速度算子"""
def __init__(self, short_window: int = 5, long_window: int = 20):
config = OperatorConfig(
name=f"turnover_acceleration_{short_window}_{long_window}",
description=f"{short_window}日对{long_window}日换手率加速度",
required_columns=['turnover_rate'],
output_columns=[f'turnover_rate_acceleration_{short_window}_{long_window}'],
parameters={'short_window': short_window, 'long_window': long_window}
)
super().__init__(config)
self.short_window = short_window
self.long_window = long_window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算换手率加速度"""
# 计算短期均值
short_avg = pl.col('turnover_rate').rolling_mean(window=self.short_window)
# 计算长期均值
long_avg = pl.col('turnover_rate').rolling_mean(window=self.long_window)
# 计算加速度
acceleration = short_avg - long_avg
return stock_df.with_columns(
acceleration.alias(f'turnover_rate_acceleration_{self.short_window}_{self.long_window}')
)
class VolumeSustainabilityOperator(StockWiseOperator):
"""成交量持续性算子"""
def __init__(self, short_window: int = 10, long_window: int = 30):
config = OperatorConfig(
name=f"volume_sustain_{short_window}_{long_window}",
description=f"{short_window}日成交量大于{long_window}日均值占比",
required_columns=['vol'],
output_columns=[f'vol_sustain_{short_window}_{long_window}'],
parameters={'short_window': short_window, 'long_window': long_window}
)
super().__init__(config)
self.short_window = short_window
self.long_window = long_window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算成交量持续性"""
# 计算长期均值
long_avg = pl.col('vol').rolling_mean(window=self.long_window)
# 判断是否大于长期均值
above_avg = pl.col('vol') > long_avg
# 计算短期占比
sustain_ratio = above_avg.cast(int).rolling_mean(window=self.short_window)
return stock_df.with_columns(
sustain_ratio.alias(f'vol_sustain_{self.short_window}_{self.long_window}')
)
class TurnoverRelativeStrengthOperator(StockWiseOperator):
"""换手率相对强度算子"""
def __init__(self, window: int = 20):
config = OperatorConfig(
name=f"turnover_relative_strength_{window}",
description=f"{window}日换手率相对强度",
required_columns=['turnover_rate'],
output_columns=[f'turnover_relative_strength_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算换手率相对强度"""
# 计算长期均值
long_avg = pl.col('turnover_rate').rolling_mean(window=self.window)
# 计算相对强度
relative_strength = pl.col('turnover_rate') / long_avg
return stock_df.with_columns(
relative_strength.alias(f'turnover_relative_strength_{self.window}')
)
class AmountOutlierOperator(StockWiseOperator):
"""成交额异常值算子"""
def __init__(self, window: int = 10):
config = OperatorConfig(
name=f"amount_outlier_{window}",
description=f"{window}日成交额异常值",
required_columns=['amount'],
output_columns=[f'amount_outlier_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算成交额异常值"""
# 计算均值
avg_amount = pl.col('amount').rolling_mean(window=self.window)
# 计算差值
amount_diff = pl.col('amount') - avg_amount
# 计算Z-score (简化版,实际使用时可能需要横截面标准化)
mean_diff = amount_diff.rolling_mean(window=self.window)
std_diff = amount_diff.rolling_std(window=self.window)
# 计算异常值分数
outlier_score = (amount_diff - mean_diff) / (std_diff + 1e-8)
return stock_df.with_columns(outlier_score.alias(f'amount_outlier_{self.window}'))
# 成交量因子集合
VOLUME_OPERATORS = [
VolumeChangeRateOperator(),
VolumeBreakoutOperator(),
TurnoverDeviationOperator(),
TurnoverSpikeOperator(),
VolumeRatioAverageOperator(),
VolumeRatioBreakoutOperator(),
VolumeSpikeOperator(),
VolumeStd5Operator(),
TurnoverRateMeanOperator(20),
VolumeSpikeCategoryOperator(),
TurnoverVolatilityOperator(),
TurnoverRateAccelerationOperator(),
VolumeSustainabilityOperator(),
TurnoverRelativeStrengthOperator(),
AmountOutlierOperator(),
]
def apply_volume_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
"""
应用所有成交量因子
Args:
df: 输入的Polars DataFrame
operators: 要应用的算子列表如果为None则使用默认列表
Returns:
添加了成交量因子的DataFrame
"""
if operators is None:
operators = VOLUME_OPERATORS
result_df = df
for operator in operators:
result_df = operator(result_df)
return result_df