Files
NewStock/main/factor/polars_technical_factors.py

489 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
技术指标因子 - 使用Polars实现
包含ATR、OBV、RSI、EMA等技术指标相关因子计算
"""
import polars as pl
import numpy as np
from typing import Dict, List, Optional, Any
from operator_framework import StockWiseOperator, OperatorConfig
import talib
class ATROperator(StockWiseOperator):
"""ATR算子"""
def __init__(self, period: int = 14):
config = OperatorConfig(
name=f"atr_{period}",
description=f"{period}日ATR",
required_columns=['high', 'low', 'close'],
output_columns=[f'atr_{period}'],
parameters={'period': period}
)
super().__init__(config)
self.period = period
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算ATR"""
# 使用TA-Lib计算ATR
atr_values = talib.ATR(
stock_df['high'].to_numpy(),
stock_df['low'].to_numpy(),
stock_df['close'].to_numpy(),
timeperiod=self.period
)
return stock_df.with_columns(pl.Series(atr_values).alias(f'atr_{self.period}'))
class OBVOperator(StockWiseOperator):
"""OBV算子"""
def __init__(self):
config = OperatorConfig(
name="obv",
description="OBV能量潮",
required_columns=['close', 'vol'],
output_columns=['obv'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算OBV"""
# 使用TA-Lib计算OBV
obv_values = talib.OBV(
stock_df['close'].to_numpy(),
stock_df['vol'].to_numpy()
)
return stock_df.with_columns(pl.Series(obv_values).alias('obv'))
class OBVMAOperator(StockWiseOperator):
"""OBV均线算子"""
def __init__(self, period: int = 6):
config = OperatorConfig(
name=f"obv_ma_{period}",
description=f"{period}日OBV均线",
required_columns=['obv'],
output_columns=[f'maobv_{period}'],
parameters={'period': period}
)
super().__init__(config)
self.period = period
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算OBV均线"""
# 使用TA-Lib计算SMA
ma_values = talib.SMA(
stock_df['obv'].to_numpy(),
timeperiod=self.period
)
return stock_df.with_columns(pl.Series(ma_values).alias(f'maobv_{self.period}'))
class RSIOperator(StockWiseOperator):
"""RSI算子"""
def __init__(self, period: int = 3):
config = OperatorConfig(
name=f"rsi_{period}",
description=f"{period}日RSI",
required_columns=['close'],
output_columns=[f'rsi_{period}'],
parameters={'period': period}
)
super().__init__(config)
self.period = period
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算RSI"""
# 使用TA-Lib计算RSI
rsi_values = talib.RSI(
stock_df['close'].to_numpy(),
timeperiod=self.period
)
return stock_df.with_columns(pl.Series(rsi_values).alias(f'rsi_{self.period}'))
class EMAOperator(StockWiseOperator):
"""EMA算子"""
def __init__(self, period: int):
config = OperatorConfig(
name=f"ema_{period}",
description=f"{period}日EMA",
required_columns=['close'],
output_columns=[f'_ema_{period}'],
parameters={'period': period}
)
super().__init__(config)
self.period = period
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算EMA"""
# 使用TA-Lib计算EMA
ema_values = talib.EMA(
stock_df['close'].to_numpy(),
timeperiod=self.period
)
return stock_df.with_columns(pl.Series(ema_values).alias(f'_ema_{self.period}'))
class ReturnOperator(StockWiseOperator):
"""收益率算子"""
def __init__(self, period: int):
config = OperatorConfig(
name=f"return_{period}",
description=f"{period}日收益率",
required_columns=['close'],
output_columns=[f'return_{period}'],
parameters={'period': period}
)
super().__init__(config)
self.period = period
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算收益率"""
# 计算收益率
ret = pl.col('close').pct_change(self.period)
return stock_df.with_columns(ret.alias(f'return_{self.period}'))
class ActivityFactorOperator(StockWiseOperator):
"""活跃度因子算子"""
def __init__(self, period: int, scale: float):
config = OperatorConfig(
name=f"act_factor_{period}",
description=f"{period}日活跃度因子",
required_columns=[f'_ema_{period}'],
output_columns=[f'act_factor{period}'],
parameters={'period': period, 'scale': scale}
)
super().__init__(config)
self.period = period
self.scale = scale
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算活跃度因子"""
# 计算EMA变化率
ema_change = (pl.col(f'_ema_{self.period}') / pl.col(f'_ema_{self.period}').shift(1) - 1) * 100
# 计算活跃度因子
activity_factor = (ema_change * 57.3 / self.scale).arctan()
return stock_df.with_columns(activity_factor.alias(f'act_factor{self.period}'))
class ActivityFactor5Operator(StockWiseOperator):
"""活跃度因子5算子"""
def __init__(self):
config = OperatorConfig(
name="act_factor_5",
description="5日活跃度因子",
required_columns=['_ema_5'],
output_columns=['act_factor1'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算5日活跃度因子"""
# 计算EMA变化率
ema_change = (pl.col('_ema_5') / pl.col('_ema_5').shift(1) - 1) * 100
# 计算活跃度因子
activity_factor = (ema_change * 57.3 / 50).arctan()
return stock_df.with_columns(activity_factor.alias('act_factor1'))
class ActivityFactor13Operator(StockWiseOperator):
"""活跃度因子13算子"""
def __init__(self):
config = OperatorConfig(
name="act_factor_13",
description="13日活跃度因子",
required_columns=['_ema_13'],
output_columns=['act_factor2'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算13日活跃度因子"""
# 计算EMA变化率
ema_change = (pl.col('_ema_13') / pl.col('_ema_13').shift(1) - 1) * 100
# 计算活跃度因子
activity_factor = (ema_change * 57.3 / 40).arctan()
return stock_df.with_columns(activity_factor.alias('act_factor2'))
class ActivityFactor20Operator(StockWiseOperator):
"""活跃度因子20算子"""
def __init__(self):
config = OperatorConfig(
name="act_factor_20",
description="20日活跃度因子",
required_columns=['_ema_20'],
output_columns=['act_factor3'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算20日活跃度因子"""
# 计算EMA变化率
ema_change = (pl.col('_ema_20') / pl.col('_ema_20').shift(1) - 1) * 100
# 计算活跃度因子
activity_factor = (ema_change * 57.3 / 21).arctan()
return stock_df.with_columns(activity_factor.alias('act_factor3'))
class ActivityFactor60Operator(StockWiseOperator):
"""活跃度因子60算子"""
def __init__(self):
config = OperatorConfig(
name="act_factor_60",
description="60日活跃度因子",
required_columns=['_ema_60'],
output_columns=['act_factor4'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算60日活跃度因子"""
# 计算EMA变化率
ema_change = (pl.col('_ema_60') / pl.col('_ema_60').shift(1) - 1) * 100
# 计算活跃度因子
activity_factor = (ema_change * 57.3 / 10).arctan()
return stock_df.with_columns(activity_factor.alias('act_factor4'))
class ActivityFactor5and6Operator(StockWiseOperator):
"""活跃度因子5和6算子"""
def __init__(self):
config = OperatorConfig(
name="act_factor_5_6",
description="活跃度因子5和6",
required_columns=['act_factor1', 'act_factor2'],
output_columns=['act_factor5', 'act_factor6'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算活跃度因子5和6"""
# 计算因子5
factor5 = pl.col('act_factor1') + pl.col('act_factor2') + pl.col('act_factor3') + pl.col('act_factor4')
# 计算因子6
numerator = pl.col('act_factor1') - pl.col('act_factor2')
denominator = (pl.col('act_factor1').pow(2) + pl.col('act_factor2').pow(2)).sqrt()
factor6 = numerator / (denominator + 1e-8) # 避免除零
return stock_df.with_columns([
factor5.alias('act_factor5'),
factor6.alias('act_factor6')
])
class Alpha003Operator(StockWiseOperator):
"""Alpha003算子"""
def __init__(self):
config = OperatorConfig(
name="alpha_003",
description="Alpha003因子",
required_columns=['open', 'close', 'high', 'low'],
output_columns=['alpha_003'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算Alpha003"""
# 计算因子
alpha_003 = pl.when(pl.col('high') != pl.col('low')) \
.then((pl.col('close') - pl.col('open')) / (pl.col('high') - pl.col('low'))) \
.otherwise(0)
return stock_df.with_columns(alpha_003.alias('alpha_003'))
class Alpha007Operator(StockWiseOperator):
"""Alpha007算子"""
def __init__(self):
config = OperatorConfig(
name="alpha_007",
description="Alpha007因子",
required_columns=['close', 'vol'],
output_columns=['alpha_007'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算Alpha007"""
# 计算5日相关性
corr_5 = pl.col('close').rolling_corr(pl.col('vol'), window=5)
return stock_df.with_columns(corr_5.alias('alpha_007'))
class Alpha013Operator(StockWiseOperator):
"""Alpha013算子"""
def __init__(self):
config = OperatorConfig(
name="alpha_013",
description="Alpha013因子",
required_columns=['close'],
output_columns=['alpha_013'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算Alpha013"""
# 计算5日和20日和
sum_5 = pl.col('close').rolling_sum(window=5)
sum_20 = pl.col('close').rolling_sum(window=20)
# 计算因子
alpha_013 = sum_5 - sum_20
return stock_df.with_columns(alpha_013.alias('alpha_013'))
class Alpha022Operator(StockWiseOperator):
"""Alpha022算子"""
def __init__(self):
config = OperatorConfig(
name="alpha_022",
description="Alpha022改进因子",
required_columns=['high', 'low', 'close', 'vol'],
output_columns=['alpha_22_improved'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算Alpha022改进因子"""
# 计算滚动协方差
cov_5 = pl.col('high').rolling_cov(pl.col('vol'), window=5)
# 计算协方差差分
delta_cov = cov_5.diff(5)
# 计算收盘价标准差
std_close = pl.col('close').rolling_std(window=20)
# 计算标准差排名 (简化版)
rank_std = std_close
# 计算最终因子
alpha_22 = -1 * delta_cov * rank_std
return stock_df.with_columns(alpha_22.alias('alpha_22_improved'))
class BBIRatioOperator(StockWiseOperator):
"""BBI比率算子"""
def __init__(self):
config = OperatorConfig(
name="bbi_ratio",
description="BBI比率因子",
required_columns=['close'],
output_columns=['bbi_ratio_factor'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算BBI比率"""
# 计算不同周期的SMA
sma3 = pl.col('close').rolling_mean(window=3)
sma6 = pl.col('close').rolling_mean(window=6)
sma12 = pl.col('close').rolling_mean(window=12)
sma24 = pl.col('close').rolling_mean(window=24)
# 计算BBI
bbi = (sma3 + sma6 + sma12 + sma24) / 4
# 计算比率
bbi_ratio = bbi / pl.col('close')
return stock_df.with_columns(bbi_ratio.alias('bbi_ratio_factor'))
# 技术指标因子集合
TECHNICAL_OPERATORS = [
ATROperator(14),
ATROperator(6),
OBVOperator(),
OBVMAOperator(6),
RSIOperator(3),
EMAOperator(5),
EMAOperator(13),
EMAOperator(20),
EMAOperator(60),
ReturnOperator(5),
ReturnOperator(20),
ActivityFactor5Operator(),
ActivityFactor13Operator(),
ActivityFactor20Operator(),
ActivityFactor60Operator(),
ActivityFactor5and6Operator(),
Alpha003Operator(),
Alpha007Operator(),
Alpha013Operator(),
Alpha022Operator(),
BBIRatioOperator(),
]
def apply_technical_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
"""
应用所有技术指标因子
Args:
df: 输入的Polars DataFrame
operators: 要应用的算子列表如果为None则使用默认列表
Returns:
添加了技术指标因子的DataFrame
"""
if operators is None:
operators = TECHNICAL_OPERATORS
result_df = df
for operator in operators:
result_df = operator(result_df)
return result_df