Files
NewStock/main/factor/polars_momentum_factors.py

429 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
动量因子 - 使用Polars实现
包含动量、趋势、均线等相关因子计算
"""
import polars as pl
import numpy as np
from typing import Dict, List, Optional, Any
from operator_framework import StockWiseOperator, OperatorConfig
from scipy.stats import linregress
class PriceMinusDeductionPriceOperator(StockWiseOperator):
"""价格减抵扣价算子"""
def __init__(self, n: int = 10):
config = OperatorConfig(
name=f"price_minus_deduction_price_{n}",
description=f"{n}日价格减抵扣价",
required_columns=['close'],
output_columns=[f'price_minus_deduction_price_{n}'],
parameters={'n': n}
)
super().__init__(config)
self.n = n
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算价格减抵扣价"""
# 抵扣价是n-1周期前的价格
deduction_price = pl.col('close').shift(self.n - 1)
# 计算差值
price_diff = pl.col('close') - deduction_price
return stock_df.with_columns(price_diff.alias(f'price_minus_deduction_price_{self.n}'))
class PriceDeductionPriceDiffRatioToSMAOperator(StockWiseOperator):
"""价格抵扣价差值相对SMA比率算子"""
def __init__(self, n: int = 10):
config = OperatorConfig(
name=f"price_deduction_price_diff_ratio_to_sma_{n}",
description=f"{n}日价格抵扣价差值相对SMA比率",
required_columns=['close'],
output_columns=[f'price_deduction_price_diff_ratio_to_sma_{n}'],
parameters={'n': n}
)
super().__init__(config)
self.n = n
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算价格抵扣价差值相对SMA比率"""
# 计算n日SMA
sma = pl.col('close').rolling_mean(window=self.n)
# 抵扣价
deduction_price = pl.col('close').shift(self.n - 1)
# 计算差值
diff = pl.col('close') - deduction_price
# 计算比率 (处理除零)
ratio = diff / (sma + 1e-8)
return stock_df.with_columns(ratio.alias(f'price_deduction_price_diff_ratio_to_sma_{self.n}'))
class CatPriceVsSmaVsDeductionPriceOperator(StockWiseOperator):
"""价格vsSMAvs抵扣价分类算子"""
def __init__(self, n: int = 10):
config = OperatorConfig(
name=f"cat_price_vs_sma_vs_deduction_price_{n}",
description=f"{n}日价格vsSMAvs抵扣价分类",
required_columns=['close'],
output_columns=[f'cat_price_vs_sma_vs_deduction_price_{n}'],
parameters={'n': n}
)
super().__init__(config)
self.n = n
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算价格vsSMAvs抵扣价分类"""
# 计算n日SMA
sma = pl.col('close').rolling_mean(window=self.n)
# 抵扣价
deduction_price = pl.col('close').shift(self.n - 1)
# 定义条件
conditions = [
# 1: 当前价 > SMA 且 抵扣价 > SMA
(pl.col('close') > sma) & (deduction_price > sma),
# 2: 当前价 < SMA 且 抵扣价 < SMA
(pl.col('close') < sma) & (deduction_price < sma),
# 3: 当前价 > SMA 且 抵扣价 <= SMA
(pl.col('close') > sma) & (deduction_price <= sma),
# 4: 当前价 <= SMA 且 抵扣价 > SMA
(pl.col('close') <= sma) & (deduction_price > sma),
]
choices = [1, 2, 3, 4]
# 使用select函数进行分类
classification = pl.select(conditions=conditions, choices=choices, default=0)
return stock_df.with_columns(
classification.alias(f'cat_price_vs_sma_vs_deduction_price_{self.n}')
)
class VolatilitySlopeOperator(StockWiseOperator):
"""波动率斜率算子"""
def __init__(self, long_window: int = 20, short_window: int = 5):
config = OperatorConfig(
name=f"volatility_slope_{long_window}_{short_window}",
description=f"{long_window}日波动率{short_window}日斜率",
required_columns=['pct_chg'],
output_columns=[f'volatility_slope_{long_window}_{short_window}'],
parameters={'long_window': long_window, 'short_window': short_window}
)
super().__init__(config)
self.long_window = long_window
self.short_window = short_window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算波动率斜率"""
# 计算长期波动率
long_vol = pl.col('pct_chg').rolling_std(window=self.long_window)
# 计算斜率函数
def calculate_slope(series):
if len(series) < 2:
return 0
x = np.arange(len(series))
slope, _, _, _, _ = linregress(x, series)
return slope
# 计算斜率
volatility_slope = long_vol.rolling_apply(
function=calculate_slope,
window_size=self.short_window
)
return stock_df.with_columns(
volatility_slope.alias(f'volatility_slope_{self.long_window}_{self.short_window}')
)
class TurnoverRateTrendStrengthOperator(StockWiseOperator):
"""换手率趋势强度算子"""
def __init__(self, window: int = 5):
config = OperatorConfig(
name=f"turnover_trend_strength_{window}",
description=f"{window}日换手率趋势强度",
required_columns=['turnover_rate'],
output_columns=[f'turnover_trend_strength_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算换手率趋势强度"""
# 计算斜率函数
def calculate_slope(series):
if len(series) < 2:
return 0
x = np.arange(len(series))
slope, _, _, _, _ = linregress(x, series)
return slope
# 计算换手率斜率
trend_strength = pl.col('turnover_rate').rolling_apply(
function=calculate_slope,
window_size=self.window
)
return stock_df.with_columns(
trend_strength.alias(f'turnover_trend_strength_{self.window}')
)
class FreeFloatTurnoverSurgeOperator(StockWiseOperator):
"""自由流通股换手率激增算子"""
def __init__(self, window: int = 10):
config = OperatorConfig(
name=f"ff_turnover_surge_{window}",
description=f"{window}日自由流通股换手率激增",
required_columns=['turnover_rate'],
output_columns=[f'ff_turnover_surge_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算自由流通股换手率激增"""
# 计算均值
avg_turnover = pl.col('turnover_rate').rolling_mean(window=self.window)
# 计算激增比率
surge_ratio = pl.col('turnover_rate') / (avg_turnover + 1e-8)
return stock_df.with_columns(surge_ratio.alias(f'ff_turnover_surge_{self.window}'))
class PriceVolumeTrendCoherenceOperator(StockWiseOperator):
"""价量趋势一致性算子"""
def __init__(self, price_window: int = 5, volume_window: int = 20):
config = OperatorConfig(
name=f"price_volume_coherence_{price_window}_{volume_window}",
description=f"{price_window}日价格{volume_window}日成交量趋势一致性",
required_columns=['close', 'vol'],
output_columns=[f'price_volume_coherence_{price_window}_{volume_window}'],
parameters={'price_window': price_window, 'volume_window': volume_window}
)
super().__init__(config)
self.price_window = price_window
self.volume_window = volume_window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算价量趋势一致性"""
# 计算价格上涨占比
def price_up_ratio(series):
return (series.diff() > 0).rolling_mean(window=self.price_window)
price_up = pl.col('close').apply(price_up_ratio)
# 计算成交量高于均值占比
vol_avg = pl.col('vol').rolling_mean(window=self.volume_window)
vol_above_avg = pl.col('vol') > vol_avg
vol_above_ratio = vol_above_avg.cast(int).rolling_mean(window=self.price_window)
# 计算一致性
coherence = price_up * vol_above_ratio
return stock_df.with_columns(
coherence.alias(f'price_volume_coherence_{self.price_window}_{self.volume_window}')
)
class FreeFloatToTotalTurnoverRatioOperator(StockWiseOperator):
"""自由流通股对总换手率比率算子"""
def __init__(self):
config = OperatorConfig(
name="ff_to_total_turnover_ratio",
description="自由流通股对总换手率比率",
required_columns=['turnover_rate'],
output_columns=['ff_to_total_turnover_ratio'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算自由流通股对总换手率比率"""
# 假设turnover_rate是自由流通股换手率
# 计算比率 (简化处理)
ratio = pl.col('turnover_rate') / (pl.col('turnover_rate') + 1e-8)
return stock_df.with_columns(ratio.alias('ff_to_total_turnover_ratio'))
class VarianceOperator(StockWiseOperator):
"""方差算子"""
def __init__(self, window: int):
config = OperatorConfig(
name=f"variance_{window}",
description=f"{window}日方差",
required_columns=['pct_chg'],
output_columns=[f'variance_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算方差"""
# 计算方差
variance = pl.col('pct_chg').rolling_var(window=self.window)
return stock_df.with_columns(variance.alias(f'variance_{self.window}'))
class LimitUpDownOperator(StockWiseOperator):
"""涨跌停算子"""
def __init__(self):
config = OperatorConfig(
name="limit_up_down",
description="涨跌停因子",
required_columns=['close', 'up_limit', 'down_limit'],
output_columns=['cat_up_limit', 'cat_down_limit', 'up_limit_count_10d', 'down_limit_count_10d'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算涨跌停因子"""
# 判断是否涨停
up_limit = pl.col('close') == pl.col('up_limit')
# 判断是否跌停
down_limit = pl.col('close') == pl.col('down_limit')
# 计算10日涨停计数
up_count_10d = up_limit.cast(int).rolling_sum(window=10)
# 计算10日跌停计数
down_count_10d = down_limit.cast(int).rolling_sum(window=10)
return stock_df.with_columns([
up_limit.alias('cat_up_limit'),
down_limit.alias('cat_down_limit'),
up_count_10d.alias('up_limit_count_10d'),
down_count_10d.alias('down_limit_count_10d')
])
class ConsecutiveUpLimitOperator(StockWiseOperator):
"""连续涨停算子"""
def __init__(self):
config = OperatorConfig(
name="consecutive_up_limit",
description="连续涨停天数",
required_columns=['cat_up_limit'],
output_columns=['consecutive_up_limit'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算连续涨停天数"""
# 计算连续涨停
# 简化处理,实际应用中需要更复杂的逻辑
consecutive = pl.col('cat_up_limit').cast(int)
return stock_df.with_columns(consecutive.alias('consecutive_up_limit'))
class MomentumFactorOperator(StockWiseOperator):
"""动量因子算子"""
def __init__(self, alpha: float = 0.5):
config = OperatorConfig(
name=f"momentum_factor_{alpha}",
description=f"动量因子(alpha={alpha})",
required_columns=['volume_change_rate', 'turnover_deviation'],
output_columns=[f'momentum_factor_{alpha}'],
parameters={'alpha': alpha}
)
super().__init__(config)
self.alpha = alpha
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算动量因子"""
# 计算动量因子
momentum = pl.col('volume_change_rate') + self.alpha * pl.col('turnover_deviation')
return stock_df.with_columns(momentum.alias(f'momentum_factor_{self.alpha}'))
class ResonanceFactorOperator(StockWiseOperator):
"""共振因子算子"""
def __init__(self):
config = OperatorConfig(
name="resonance_factor",
description="共振因子",
required_columns=['volume_ratio', 'pct_chg'],
output_columns=['resonance_factor'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算共振因子"""
# 计算共振因子
resonance = pl.col('volume_ratio') * pl.col('pct_chg')
return stock_df.with_columns(resonance.alias('resonance_factor'))
# 动量因子集合
MOMENTUM_OPERATORS = [
PriceMinusDeductionPriceOperator(),
PriceDeductionPriceDiffRatioToSMAOperator(),
CatPriceVsSmaVsDeductionPriceOperator(),
VolatilitySlopeOperator(),
TurnoverRateTrendStrengthOperator(5),
FreeFloatTurnoverSurgeOperator(10),
PriceVolumeTrendCoherenceOperator(),
FreeFloatToTotalTurnoverRatioOperator(),
VarianceOperator(20),
LimitUpDownOperator(),
ConsecutiveUpLimitOperator(),
MomentumFactorOperator(),
ResonanceFactorOperator(),
]
def apply_momentum_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
"""
应用所有动量因子
Args:
df: 输入的Polars DataFrame
operators: 要应用的算子列表如果为None则使用默认列表
Returns:
添加了动量因子的DataFrame
"""
if operators is None:
operators = MOMENTUM_OPERATORS
result_df = df
for operator in operators:
result_df = operator(result_df)
return result_df