Files
NewStock/main/factor/polars_technical_factors.py

489 lines
15 KiB
Python
Raw Normal View History

2025-10-13 21:42:35 +08:00
"""
技术指标因子 - 使用Polars实现
包含ATROBVRSIEMA等技术指标相关因子计算
"""
import polars as pl
import numpy as np
from typing import Dict, List, Optional, Any
from operator_framework import StockWiseOperator, OperatorConfig
import talib
class ATROperator(StockWiseOperator):
"""ATR算子"""
def __init__(self, period: int = 14):
config = OperatorConfig(
name=f"atr_{period}",
description=f"{period}日ATR",
required_columns=['high', 'low', 'close'],
output_columns=[f'atr_{period}'],
parameters={'period': period}
)
super().__init__(config)
self.period = period
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算ATR"""
# 使用TA-Lib计算ATR
atr_values = talib.ATR(
stock_df['high'].to_numpy(),
stock_df['low'].to_numpy(),
stock_df['close'].to_numpy(),
timeperiod=self.period
)
return stock_df.with_columns(pl.Series(atr_values).alias(f'atr_{self.period}'))
class OBVOperator(StockWiseOperator):
"""OBV算子"""
def __init__(self):
config = OperatorConfig(
name="obv",
description="OBV能量潮",
required_columns=['close', 'vol'],
output_columns=['obv'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算OBV"""
# 使用TA-Lib计算OBV
obv_values = talib.OBV(
stock_df['close'].to_numpy(),
stock_df['vol'].to_numpy()
)
return stock_df.with_columns(pl.Series(obv_values).alias('obv'))
class OBVMAOperator(StockWiseOperator):
"""OBV均线算子"""
def __init__(self, period: int = 6):
config = OperatorConfig(
name=f"obv_ma_{period}",
description=f"{period}日OBV均线",
required_columns=['obv'],
output_columns=[f'maobv_{period}'],
parameters={'period': period}
)
super().__init__(config)
self.period = period
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算OBV均线"""
# 使用TA-Lib计算SMA
ma_values = talib.SMA(
stock_df['obv'].to_numpy(),
timeperiod=self.period
)
return stock_df.with_columns(pl.Series(ma_values).alias(f'maobv_{self.period}'))
class RSIOperator(StockWiseOperator):
"""RSI算子"""
def __init__(self, period: int = 3):
config = OperatorConfig(
name=f"rsi_{period}",
description=f"{period}日RSI",
required_columns=['close'],
output_columns=[f'rsi_{period}'],
parameters={'period': period}
)
super().__init__(config)
self.period = period
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算RSI"""
# 使用TA-Lib计算RSI
rsi_values = talib.RSI(
stock_df['close'].to_numpy(),
timeperiod=self.period
)
return stock_df.with_columns(pl.Series(rsi_values).alias(f'rsi_{self.period}'))
class EMAOperator(StockWiseOperator):
"""EMA算子"""
def __init__(self, period: int):
config = OperatorConfig(
name=f"ema_{period}",
description=f"{period}日EMA",
required_columns=['close'],
output_columns=[f'_ema_{period}'],
parameters={'period': period}
)
super().__init__(config)
self.period = period
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算EMA"""
# 使用TA-Lib计算EMA
ema_values = talib.EMA(
stock_df['close'].to_numpy(),
timeperiod=self.period
)
return stock_df.with_columns(pl.Series(ema_values).alias(f'_ema_{self.period}'))
class ReturnOperator(StockWiseOperator):
"""收益率算子"""
def __init__(self, period: int):
config = OperatorConfig(
name=f"return_{period}",
description=f"{period}日收益率",
required_columns=['close'],
output_columns=[f'return_{period}'],
parameters={'period': period}
)
super().__init__(config)
self.period = period
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算收益率"""
# 计算收益率
ret = pl.col('close').pct_change(self.period)
return stock_df.with_columns(ret.alias(f'return_{self.period}'))
class ActivityFactorOperator(StockWiseOperator):
"""活跃度因子算子"""
def __init__(self, period: int, scale: float):
config = OperatorConfig(
name=f"act_factor_{period}",
description=f"{period}日活跃度因子",
required_columns=[f'_ema_{period}'],
output_columns=[f'act_factor{period}'],
parameters={'period': period, 'scale': scale}
)
super().__init__(config)
self.period = period
self.scale = scale
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算活跃度因子"""
# 计算EMA变化率
ema_change = (pl.col(f'_ema_{self.period}') / pl.col(f'_ema_{self.period}').shift(1) - 1) * 100
# 计算活跃度因子
activity_factor = (ema_change * 57.3 / self.scale).arctan()
return stock_df.with_columns(activity_factor.alias(f'act_factor{self.period}'))
class ActivityFactor5Operator(StockWiseOperator):
"""活跃度因子5算子"""
def __init__(self):
config = OperatorConfig(
name="act_factor_5",
description="5日活跃度因子",
required_columns=['_ema_5'],
output_columns=['act_factor1'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算5日活跃度因子"""
# 计算EMA变化率
ema_change = (pl.col('_ema_5') / pl.col('_ema_5').shift(1) - 1) * 100
# 计算活跃度因子
activity_factor = (ema_change * 57.3 / 50).arctan()
return stock_df.with_columns(activity_factor.alias('act_factor1'))
class ActivityFactor13Operator(StockWiseOperator):
"""活跃度因子13算子"""
def __init__(self):
config = OperatorConfig(
name="act_factor_13",
description="13日活跃度因子",
required_columns=['_ema_13'],
output_columns=['act_factor2'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算13日活跃度因子"""
# 计算EMA变化率
ema_change = (pl.col('_ema_13') / pl.col('_ema_13').shift(1) - 1) * 100
# 计算活跃度因子
activity_factor = (ema_change * 57.3 / 40).arctan()
return stock_df.with_columns(activity_factor.alias('act_factor2'))
class ActivityFactor20Operator(StockWiseOperator):
"""活跃度因子20算子"""
def __init__(self):
config = OperatorConfig(
name="act_factor_20",
description="20日活跃度因子",
required_columns=['_ema_20'],
output_columns=['act_factor3'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算20日活跃度因子"""
# 计算EMA变化率
ema_change = (pl.col('_ema_20') / pl.col('_ema_20').shift(1) - 1) * 100
# 计算活跃度因子
activity_factor = (ema_change * 57.3 / 21).arctan()
return stock_df.with_columns(activity_factor.alias('act_factor3'))
class ActivityFactor60Operator(StockWiseOperator):
"""活跃度因子60算子"""
def __init__(self):
config = OperatorConfig(
name="act_factor_60",
description="60日活跃度因子",
required_columns=['_ema_60'],
output_columns=['act_factor4'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算60日活跃度因子"""
# 计算EMA变化率
ema_change = (pl.col('_ema_60') / pl.col('_ema_60').shift(1) - 1) * 100
# 计算活跃度因子
activity_factor = (ema_change * 57.3 / 10).arctan()
return stock_df.with_columns(activity_factor.alias('act_factor4'))
class ActivityFactor5and6Operator(StockWiseOperator):
"""活跃度因子5和6算子"""
def __init__(self):
config = OperatorConfig(
name="act_factor_5_6",
description="活跃度因子5和6",
required_columns=['act_factor1', 'act_factor2'],
output_columns=['act_factor5', 'act_factor6'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算活跃度因子5和6"""
# 计算因子5
factor5 = pl.col('act_factor1') + pl.col('act_factor2') + pl.col('act_factor3') + pl.col('act_factor4')
# 计算因子6
numerator = pl.col('act_factor1') - pl.col('act_factor2')
denominator = (pl.col('act_factor1').pow(2) + pl.col('act_factor2').pow(2)).sqrt()
factor6 = numerator / (denominator + 1e-8) # 避免除零
return stock_df.with_columns([
factor5.alias('act_factor5'),
factor6.alias('act_factor6')
])
class Alpha003Operator(StockWiseOperator):
"""Alpha003算子"""
def __init__(self):
config = OperatorConfig(
name="alpha_003",
description="Alpha003因子",
required_columns=['open', 'close', 'high', 'low'],
output_columns=['alpha_003'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算Alpha003"""
# 计算因子
alpha_003 = pl.when(pl.col('high') != pl.col('low')) \
.then((pl.col('close') - pl.col('open')) / (pl.col('high') - pl.col('low'))) \
.otherwise(0)
return stock_df.with_columns(alpha_003.alias('alpha_003'))
class Alpha007Operator(StockWiseOperator):
"""Alpha007算子"""
def __init__(self):
config = OperatorConfig(
name="alpha_007",
description="Alpha007因子",
required_columns=['close', 'vol'],
output_columns=['alpha_007'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算Alpha007"""
# 计算5日相关性
corr_5 = pl.col('close').rolling_corr(pl.col('vol'), window=5)
return stock_df.with_columns(corr_5.alias('alpha_007'))
class Alpha013Operator(StockWiseOperator):
"""Alpha013算子"""
def __init__(self):
config = OperatorConfig(
name="alpha_013",
description="Alpha013因子",
required_columns=['close'],
output_columns=['alpha_013'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算Alpha013"""
# 计算5日和20日和
sum_5 = pl.col('close').rolling_sum(window=5)
sum_20 = pl.col('close').rolling_sum(window=20)
# 计算因子
alpha_013 = sum_5 - sum_20
return stock_df.with_columns(alpha_013.alias('alpha_013'))
class Alpha022Operator(StockWiseOperator):
"""Alpha022算子"""
def __init__(self):
config = OperatorConfig(
name="alpha_022",
description="Alpha022改进因子",
required_columns=['high', 'low', 'close', 'vol'],
output_columns=['alpha_22_improved'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算Alpha022改进因子"""
# 计算滚动协方差
cov_5 = pl.col('high').rolling_cov(pl.col('vol'), window=5)
# 计算协方差差分
delta_cov = cov_5.diff(5)
# 计算收盘价标准差
std_close = pl.col('close').rolling_std(window=20)
# 计算标准差排名 (简化版)
rank_std = std_close
# 计算最终因子
alpha_22 = -1 * delta_cov * rank_std
return stock_df.with_columns(alpha_22.alias('alpha_22_improved'))
class BBIRatioOperator(StockWiseOperator):
"""BBI比率算子"""
def __init__(self):
config = OperatorConfig(
name="bbi_ratio",
description="BBI比率因子",
required_columns=['close'],
output_columns=['bbi_ratio_factor'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算BBI比率"""
# 计算不同周期的SMA
sma3 = pl.col('close').rolling_mean(window=3)
sma6 = pl.col('close').rolling_mean(window=6)
sma12 = pl.col('close').rolling_mean(window=12)
sma24 = pl.col('close').rolling_mean(window=24)
# 计算BBI
bbi = (sma3 + sma6 + sma12 + sma24) / 4
# 计算比率
bbi_ratio = bbi / pl.col('close')
return stock_df.with_columns(bbi_ratio.alias('bbi_ratio_factor'))
# 技术指标因子集合
TECHNICAL_OPERATORS = [
ATROperator(14),
ATROperator(6),
OBVOperator(),
OBVMAOperator(6),
RSIOperator(3),
EMAOperator(5),
EMAOperator(13),
EMAOperator(20),
EMAOperator(60),
ReturnOperator(5),
ReturnOperator(20),
ActivityFactor5Operator(),
ActivityFactor13Operator(),
ActivityFactor20Operator(),
ActivityFactor60Operator(),
ActivityFactor5and6Operator(),
Alpha003Operator(),
Alpha007Operator(),
Alpha013Operator(),
Alpha022Operator(),
BBIRatioOperator(),
]
def apply_technical_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
"""
应用所有技术指标因子
Args:
df: 输入的Polars DataFrame
operators: 要应用的算子列表如果为None则使用默认列表
Returns:
添加了技术指标因子的DataFrame
"""
if operators is None:
operators = TECHNICAL_OPERATORS
result_df = df
for operator in operators:
result_df = operator(result_df)
return result_df