""" 成交量因子 - 使用Polars实现 包含成交量变化率、突破信号、换手率等相关因子计算 """ import polars as pl import numpy as np from typing import Dict, List, Optional, Any from operator_framework import StockWiseOperator, OperatorConfig class VolumeChangeRateOperator(StockWiseOperator): """成交量变化率算子""" def __init__(self): config = OperatorConfig( name="volume_change_rate", description="短期成交量变化率", required_columns=['vol'], output_columns=['volume_change_rate'], parameters={} ) super().__init__(config) def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: """计算成交量变化率""" # 计算2日均量 vol_mean_2 = pl.col('vol').rolling_mean(window=2) # 计算10日均量 vol_mean_10 = pl.col('vol').rolling_mean(window=10) # 计算变化率 change_rate = (vol_mean_2 / vol_mean_10) - 1 return stock_df.with_columns(change_rate.alias('volume_change_rate')) class VolumeBreakoutOperator(StockWiseOperator): """成交量突破算子""" def __init__(self): config = OperatorConfig( name="volume_breakout", description="成交量突破信号", required_columns=['vol'], output_columns=['cat_volume_breakout'], parameters={} ) super().__init__(config) def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: """计算成交量突破信号""" # 计算5日最大成交量 max_vol_5 = pl.col('vol').rolling_max(window=5) # 判断是否突破 breakout = pl.col('vol') > max_vol_5 return stock_df.with_columns(breakout.alias('cat_volume_breakout')) class TurnoverDeviationOperator(StockWiseOperator): """换手率偏离度算子""" def __init__(self): config = OperatorConfig( name="turnover_deviation", description="换手率均线偏离度", required_columns=['turnover_rate'], output_columns=['turnover_deviation'], parameters={} ) super().__init__(config) def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: """计算换手率均线偏离度""" # 计算3日均值 mean_turnover = pl.col('turnover_rate').rolling_mean(window=3) # 计算3日标准差 std_turnover = pl.col('turnover_rate').rolling_std(window=3) # 计算偏离度 deviation = (pl.col('turnover_rate') - mean_turnover) / std_turnover return stock_df.with_columns(deviation.alias('turnover_deviation')) class TurnoverSpikeOperator(StockWiseOperator): """换手率激增算子""" def __init__(self): config = OperatorConfig( name="turnover_spike", description="换手率激增信号", required_columns=['turnover_rate'], output_columns=['cat_turnover_spike'], parameters={} ) super().__init__(config) def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: """计算换手率激增信号""" # 计算3日均值 mean_turnover = pl.col('turnover_rate').rolling_mean(window=3) # 计算3日标准差 std_turnover = pl.col('turnover_rate').rolling_std(window=3) # 判断是否激增 (超过均值+2倍标准差) spike = pl.col('turnover_rate') > (mean_turnover + 2 * std_turnover) return stock_df.with_columns(spike.alias('cat_turnover_spike')) class VolumeRatioAverageOperator(StockWiseOperator): """量比均值算子""" def __init__(self): config = OperatorConfig( name="volume_ratio_average", description="量比均值", required_columns=['volume_ratio'], output_columns=['avg_volume_ratio'], parameters={} ) super().__init__(config) def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: """计算量比均值""" # 计算3日均值 avg_ratio = pl.col('volume_ratio').rolling_mean(window=3) return stock_df.with_columns(avg_ratio.alias('avg_volume_ratio')) class VolumeRatioBreakoutOperator(StockWiseOperator): """量比突破算子""" def __init__(self): config = OperatorConfig( name="volume_ratio_breakout", description="量比突破信号", required_columns=['volume_ratio'], output_columns=['cat_volume_ratio_breakout'], parameters={} ) super().__init__(config) def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: """计算量比突破信号""" # 计算5日最大量比 max_ratio_5 = pl.col('volume_ratio').rolling_max(window=5) # 判断是否突破 breakout = pl.col('volume_ratio') > max_ratio_5 return stock_df.with_columns(breakout.alias('cat_volume_ratio_breakout')) class VolumeSpikeOperator(StockWiseOperator): """成交量激增算子""" def __init__(self): config = OperatorConfig( name="volume_spike", description="成交量激增", required_columns=['vol'], output_columns=['vol_spike'], parameters={} ) super().__init__(config) def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: """计算成交量激增""" # 计算20日均量 vol_mean_20 = pl.col('vol').rolling_mean(window=20) return stock_df.with_columns(vol_mean_20.alias('vol_spike')) class VolumeStd5Operator(StockWiseOperator): """5日成交量标准差算子""" def __init__(self): config = OperatorConfig( name="volume_std_5", description="5日成交量标准差", required_columns=['vol'], output_columns=['vol_std_5'], parameters={} ) super().__init__(config) def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: """计算5日成交量标准差""" # 计算成交量变化率 vol_pct_change = pl.col('vol').pct_change() # 计算5日标准差 std_5 = vol_pct_change.rolling_std(window=5) return stock_df.with_columns(std_5.alias('vol_std_5')) class TurnoverRateMeanOperator(StockWiseOperator): """换手率均值算子""" def __init__(self, n: int): config = OperatorConfig( name=f"turnover_rate_mean_{n}", description=f"{n}日换手率均值", required_columns=['turnover_rate'], output_columns=[f'turnover_rate_mean_{n}'], parameters={'n': n} ) super().__init__(config) self.n = n def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: """计算n日换手率均值""" # 计算n日均值 mean_rate = pl.col('turnover_rate').rolling_mean(window=self.n) return stock_df.with_columns(mean_rate.alias(f'turnover_rate_mean_{self.n}')) class VolumeSpikeCategoryOperator(StockWiseOperator): """成交量激增分类算子""" def __init__(self): config = OperatorConfig( name="volume_spike_category", description="成交量激增分类", required_columns=['vol', 'vol_spike'], output_columns=['cat_vol_spike'], parameters={} ) super().__init__(config) def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: """计算成交量激增分类""" # 判断是否激增 (超过2倍均值) spike = pl.col('vol') > (2 * pl.col('vol_spike')) return stock_df.with_columns(spike.alias('cat_vol_spike')) class TurnoverVolatilityOperator(StockWiseOperator): """换手率波动率算子""" def __init__(self, window: int = 20): config = OperatorConfig( name=f"turnover_volatility_{window}", description=f"{window}日换手率波动率", required_columns=['turnover_rate'], output_columns=[f'turnover_std_{window}'], parameters={'window': window} ) super().__init__(config) self.window = window def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: """计算换手率波动率""" # 计算滚动标准差 turnover_std = pl.col('turnover_rate').rolling_std(window=self.window) return stock_df.with_columns(turnover_std.alias(f'turnover_std_{self.window}')) class VolumeCovarianceOperator(StockWiseOperator): """成交量协方差算子""" def __init__(self, window: int = 5): config = OperatorConfig( name=f"volume_covariance_{window}", description=f"{window}日成交量协方差", required_columns=['high', 'vol'], output_columns=[f'cov_{window}'], parameters={'window': window} ) super().__init__(config) self.window = window def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: """计算成交量协方差""" # 计算滚动协方差 def calculate_cov(group_df): return group_df.select( pl.col('high').rolling_cov(pl.col('vol'), window=self.window) ) cov_result = calculate_cov(stock_df) return stock_df.with_columns(cov_result[f'cov_{self.window}'].alias(f'cov_{self.window}')) class VolumeCovarianceDeltaOperator(StockWiseOperator): """成交量协方差变化算子""" def __init__(self, period: int = 5): config = OperatorConfig( name=f"volume_covariance_delta_{period}", description=f"{period}日成交量协方差变化", required_columns=['cov_5'], output_columns=[f'delta_cov_{period}'], parameters={'period': period} ) super().__init__(config) self.period = period def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: """计算成交量协方差变化""" # 计算差分 delta = pl.col('cov_5').diff(self.period) return stock_df.with_columns(delta.alias(f'delta_cov_{self.period}')) class TurnoverRateAccelerationOperator(StockWiseOperator): """换手率加速度算子""" def __init__(self, short_window: int = 5, long_window: int = 20): config = OperatorConfig( name=f"turnover_acceleration_{short_window}_{long_window}", description=f"{short_window}日对{long_window}日换手率加速度", required_columns=['turnover_rate'], output_columns=[f'turnover_rate_acceleration_{short_window}_{long_window}'], parameters={'short_window': short_window, 'long_window': long_window} ) super().__init__(config) self.short_window = short_window self.long_window = long_window def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: """计算换手率加速度""" # 计算短期均值 short_avg = pl.col('turnover_rate').rolling_mean(window=self.short_window) # 计算长期均值 long_avg = pl.col('turnover_rate').rolling_mean(window=self.long_window) # 计算加速度 acceleration = short_avg - long_avg return stock_df.with_columns( acceleration.alias(f'turnover_rate_acceleration_{self.short_window}_{self.long_window}') ) class VolumeSustainabilityOperator(StockWiseOperator): """成交量持续性算子""" def __init__(self, short_window: int = 10, long_window: int = 30): config = OperatorConfig( name=f"volume_sustain_{short_window}_{long_window}", description=f"{short_window}日成交量大于{long_window}日均值占比", required_columns=['vol'], output_columns=[f'vol_sustain_{short_window}_{long_window}'], parameters={'short_window': short_window, 'long_window': long_window} ) super().__init__(config) self.short_window = short_window self.long_window = long_window def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: """计算成交量持续性""" # 计算长期均值 long_avg = pl.col('vol').rolling_mean(window=self.long_window) # 判断是否大于长期均值 above_avg = pl.col('vol') > long_avg # 计算短期占比 sustain_ratio = above_avg.cast(int).rolling_mean(window=self.short_window) return stock_df.with_columns( sustain_ratio.alias(f'vol_sustain_{self.short_window}_{self.long_window}') ) class TurnoverRelativeStrengthOperator(StockWiseOperator): """换手率相对强度算子""" def __init__(self, window: int = 20): config = OperatorConfig( name=f"turnover_relative_strength_{window}", description=f"{window}日换手率相对强度", required_columns=['turnover_rate'], output_columns=[f'turnover_relative_strength_{window}'], parameters={'window': window} ) super().__init__(config) self.window = window def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: """计算换手率相对强度""" # 计算长期均值 long_avg = pl.col('turnover_rate').rolling_mean(window=self.window) # 计算相对强度 relative_strength = pl.col('turnover_rate') / long_avg return stock_df.with_columns( relative_strength.alias(f'turnover_relative_strength_{self.window}') ) class AmountOutlierOperator(StockWiseOperator): """成交额异常值算子""" def __init__(self, window: int = 10): config = OperatorConfig( name=f"amount_outlier_{window}", description=f"{window}日成交额异常值", required_columns=['amount'], output_columns=[f'amount_outlier_{window}'], parameters={'window': window} ) super().__init__(config) self.window = window def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: """计算成交额异常值""" # 计算均值 avg_amount = pl.col('amount').rolling_mean(window=self.window) # 计算差值 amount_diff = pl.col('amount') - avg_amount # 计算Z-score (简化版,实际使用时可能需要横截面标准化) mean_diff = amount_diff.rolling_mean(window=self.window) std_diff = amount_diff.rolling_std(window=self.window) # 计算异常值分数 outlier_score = (amount_diff - mean_diff) / (std_diff + 1e-8) return stock_df.with_columns(outlier_score.alias(f'amount_outlier_{self.window}')) # 成交量因子集合 VOLUME_OPERATORS = [ VolumeChangeRateOperator(), VolumeBreakoutOperator(), TurnoverDeviationOperator(), TurnoverSpikeOperator(), VolumeRatioAverageOperator(), VolumeRatioBreakoutOperator(), VolumeSpikeOperator(), VolumeStd5Operator(), TurnoverRateMeanOperator(20), VolumeSpikeCategoryOperator(), TurnoverVolatilityOperator(), TurnoverRateAccelerationOperator(), VolumeSustainabilityOperator(), TurnoverRelativeStrengthOperator(), AmountOutlierOperator(), ] def apply_volume_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame: """ 应用所有成交量因子 Args: df: 输入的Polars DataFrame operators: 要应用的算子列表,如果为None则使用默认列表 Returns: 添加了成交量因子的DataFrame """ if operators is None: operators = VOLUME_OPERATORS result_df = df for operator in operators: result_df = operator(result_df) return result_df