649 lines
23 KiB
Python
649 lines
23 KiB
Python
|
|
"""
|
|||
|
|
复杂组合因子 - 使用Polars实现
|
|||
|
|
包含复杂的组合因子和高级因子计算
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import polars as pl
|
|||
|
|
import numpy as np
|
|||
|
|
from typing import Dict, List, Optional, Any
|
|||
|
|
from operator_framework import StockWiseOperator, DateWiseOperator, OperatorConfig
|
|||
|
|
|
|||
|
|
|
|||
|
|
# 时间序列因子
|
|||
|
|
class LargeFlowMomentumCorrelationOperator(StockWiseOperator):
|
|||
|
|
"""大单资金流与价格动量相关性算子"""
|
|||
|
|
|
|||
|
|
def __init__(self, n: int = 20, m: int = 60):
|
|||
|
|
config = OperatorConfig(
|
|||
|
|
name=f"lg_flow_mom_corr_{n}_{m}",
|
|||
|
|
description=f"{n}日大单资金流与{m}日价格动量相关性",
|
|||
|
|
required_columns=['buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol', 'sell_elg_vol',
|
|||
|
|
'close', 'vol'],
|
|||
|
|
output_columns=[f'lg_flow_mom_corr_{n}_{m}'],
|
|||
|
|
parameters={'n': n, 'm': m}
|
|||
|
|
)
|
|||
|
|
super().__init__(config)
|
|||
|
|
self.n = n
|
|||
|
|
self.m = m
|
|||
|
|
|
|||
|
|
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
|||
|
|
"""计算大单资金流与价格动量相关性"""
|
|||
|
|
# 计算大单净额
|
|||
|
|
net_lg_flow_val = (
|
|||
|
|
(pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
|
|||
|
|
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')) * pl.col('close')
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 计算滚动净大单流
|
|||
|
|
rolling_net_lg_flow = net_lg_flow_val.rolling_sum(window=self.n)
|
|||
|
|
|
|||
|
|
# 计算价格动量
|
|||
|
|
price_mom = pl.col('close').pct_change(self.n)
|
|||
|
|
|
|||
|
|
# 计算相关性
|
|||
|
|
correlation = rolling_net_lg_flow.rolling_corr(price_mom, window=self.m)
|
|||
|
|
|
|||
|
|
return stock_df.with_columns(
|
|||
|
|
correlation.alias(f'lg_flow_mom_corr_{self.n}_{self.m}')
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
|
|||
|
|
class LargeBuyConsolidationOperator(StockWiseOperator):
|
|||
|
|
"""大单买入盘整期算子"""
|
|||
|
|
|
|||
|
|
def __init__(self, n: int = 20, vol_quantile: float = 0.2):
|
|||
|
|
config = OperatorConfig(
|
|||
|
|
name=f"lg_buy_consolidation_{n}",
|
|||
|
|
description=f"{n}日大单买入盘整期",
|
|||
|
|
required_columns=['close', 'buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol',
|
|||
|
|
'sell_elg_vol', 'vol'],
|
|||
|
|
output_columns=[f'lg_buy_consolidation_{n}'],
|
|||
|
|
parameters={'n': n, 'vol_quantile': vol_quantile}
|
|||
|
|
)
|
|||
|
|
super().__init__(config)
|
|||
|
|
self.n = n
|
|||
|
|
self.vol_quantile = vol_quantile
|
|||
|
|
|
|||
|
|
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
|||
|
|
"""计算大单买入盘整期"""
|
|||
|
|
epsilon = 1e-8
|
|||
|
|
|
|||
|
|
# 计算收盘价滚动标准差
|
|||
|
|
rolling_std = pl.col('close').rolling_std(window=self.n)
|
|||
|
|
|
|||
|
|
# 计算大单净流比率
|
|||
|
|
net_lg_flow_ratio = (
|
|||
|
|
(pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
|
|||
|
|
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')) /
|
|||
|
|
(pl.col('vol') + epsilon)
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 计算滚动均值
|
|||
|
|
rolling_mean_ratio = net_lg_flow_ratio.rolling_mean(window=self.n)
|
|||
|
|
|
|||
|
|
return stock_df.with_columns(
|
|||
|
|
rolling_mean_ratio.alias(f'lg_buy_consolidation_{self.n}')
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
|
|||
|
|
class IntradayLargeFlowCorrelationOperator(StockWiseOperator):
|
|||
|
|
"""日内趋势与大单流相关性算子"""
|
|||
|
|
|
|||
|
|
def __init__(self, n: int = 20):
|
|||
|
|
config = OperatorConfig(
|
|||
|
|
name=f"intraday_lg_flow_corr_{n}",
|
|||
|
|
description=f"{n}日日内趋势与大单流相关性",
|
|||
|
|
required_columns=['high', 'low', 'close', 'buy_lg_vol', 'buy_elg_vol',
|
|||
|
|
'sell_lg_vol', 'sell_elg_vol'],
|
|||
|
|
output_columns=[f'intraday_lg_flow_corr_{n}'],
|
|||
|
|
parameters={'n': n}
|
|||
|
|
)
|
|||
|
|
super().__init__(config)
|
|||
|
|
self.n = n
|
|||
|
|
|
|||
|
|
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
|||
|
|
"""计算日内趋势与大单流相关性"""
|
|||
|
|
# 这是一个复杂的因子,简化处理
|
|||
|
|
# 实际实现需要更复杂的日内数据
|
|||
|
|
placeholder = pl.lit(None).cast(float)
|
|||
|
|
|
|||
|
|
return stock_df.with_columns(
|
|||
|
|
placeholder.alias(f'intraday_lg_flow_corr_{self.n}')
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
|
|||
|
|
class ProfitPressureOperator(StockWiseOperator):
|
|||
|
|
"""获利压力指数算子"""
|
|||
|
|
|
|||
|
|
def __init__(self):
|
|||
|
|
config = OperatorConfig(
|
|||
|
|
name="profit_pressure",
|
|||
|
|
description="获利压力指数",
|
|||
|
|
required_columns=['close', 'cost_85pct', 'cost_95pct', 'winner_rate'],
|
|||
|
|
output_columns=['profit_pressure'],
|
|||
|
|
parameters={}
|
|||
|
|
)
|
|||
|
|
super().__init__(config)
|
|||
|
|
|
|||
|
|
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
|||
|
|
"""计算获利压力指数"""
|
|||
|
|
epsilon = 1e-8
|
|||
|
|
|
|||
|
|
# 计算盈利幅度
|
|||
|
|
profit_margin_85 = (pl.col('close') / (pl.col('cost_85pct') + epsilon)) - 1
|
|||
|
|
profit_margin_95 = (pl.col('close') / (pl.col('cost_95pct') + epsilon)) - 1
|
|||
|
|
|
|||
|
|
# 计算压力指数
|
|||
|
|
pressure = pl.col('winner_rate') * 0.5 * (profit_margin_85 + profit_margin_95)
|
|||
|
|
|
|||
|
|
return stock_df.with_columns(pressure.alias('profit_pressure'))
|
|||
|
|
|
|||
|
|
|
|||
|
|
class UnderwaterResistanceOperator(StockWiseOperator):
|
|||
|
|
"""套牢盘阻力算子"""
|
|||
|
|
|
|||
|
|
def __init__(self):
|
|||
|
|
config = OperatorConfig(
|
|||
|
|
name="underwater_resistance",
|
|||
|
|
description="套牢盘阻力",
|
|||
|
|
required_columns=['close', 'winner_rate', 'cost_15pct'],
|
|||
|
|
output_columns=['underwater_resistance'],
|
|||
|
|
parameters={}
|
|||
|
|
)
|
|||
|
|
super().__init__(config)
|
|||
|
|
|
|||
|
|
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
|||
|
|
"""计算套牢盘阻力"""
|
|||
|
|
epsilon = 1e-8
|
|||
|
|
|
|||
|
|
# 计算套牢比例
|
|||
|
|
underwater_ratio = 1.0 - pl.col('winner_rate')
|
|||
|
|
|
|||
|
|
# 计算与成本的距离
|
|||
|
|
dist_to_cost_15 = pl.max_horizontal(0, pl.col('cost_15pct') - pl.col('close')) / (pl.col('close') + epsilon)
|
|||
|
|
|
|||
|
|
# 计算阻力
|
|||
|
|
resistance = underwater_ratio * dist_to_cost_15
|
|||
|
|
|
|||
|
|
return stock_df.with_columns(resistance.alias('underwater_resistance'))
|
|||
|
|
|
|||
|
|
|
|||
|
|
class ProfitDecayOperator(StockWiseOperator):
|
|||
|
|
"""盈利预期衰减算子"""
|
|||
|
|
|
|||
|
|
def __init__(self, n: int = 20):
|
|||
|
|
config = OperatorConfig(
|
|||
|
|
name=f"profit_decay_{n}",
|
|||
|
|
description=f"{n}日盈利预期衰减",
|
|||
|
|
required_columns=['close', 'winner_rate'],
|
|||
|
|
output_columns=[f'profit_decay_{n}'],
|
|||
|
|
parameters={'n': n}
|
|||
|
|
)
|
|||
|
|
super().__init__(config)
|
|||
|
|
self.n = n
|
|||
|
|
|
|||
|
|
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
|||
|
|
"""计算盈利预期衰减"""
|
|||
|
|
# 计算n日收益率
|
|||
|
|
ret_n = pl.col('close').pct_change(self.n)
|
|||
|
|
|
|||
|
|
# 计算winner_rate变化
|
|||
|
|
winner_rate_change = pl.col('winner_rate').diff(self.n)
|
|||
|
|
|
|||
|
|
# 计算衰减因子
|
|||
|
|
decay = ret_n / (winner_rate_change + 1e-8)
|
|||
|
|
|
|||
|
|
return stock_df.with_columns(decay.alias(f'profit_decay_{self.n}'))
|
|||
|
|
|
|||
|
|
|
|||
|
|
class PullbackStrongOperator(StockWiseOperator):
|
|||
|
|
"""强势股回调深度算子"""
|
|||
|
|
|
|||
|
|
def __init__(self, n: int = 20, m: int = 20, gain_thresh: float = 0.2):
|
|||
|
|
config = OperatorConfig(
|
|||
|
|
name=f"pullback_strong_{n}_{m}",
|
|||
|
|
description=f"{n}日{m}期强势股回调深度",
|
|||
|
|
required_columns=['high', 'close'],
|
|||
|
|
output_columns=[f'pullback_strong_{n}_{m}'],
|
|||
|
|
parameters={'n': n, 'm': m, 'gain_thresh': gain_thresh}
|
|||
|
|
)
|
|||
|
|
super().__init__(config)
|
|||
|
|
self.n = n
|
|||
|
|
self.m = m
|
|||
|
|
self.gain_thresh = gain_thresh
|
|||
|
|
|
|||
|
|
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
|||
|
|
"""计算强势股回调深度"""
|
|||
|
|
# 计算n日最高价
|
|||
|
|
high_n = pl.col('high').rolling_max(window=self.n)
|
|||
|
|
|
|||
|
|
# 计算回调深度
|
|||
|
|
pullback_depth = (high_n - pl.col('close')) / high_n
|
|||
|
|
|
|||
|
|
# 计算近期涨幅
|
|||
|
|
recent_gain = (pl.col('close') / pl.col('close').shift(self.m)) - 1
|
|||
|
|
|
|||
|
|
# 计算回调因子
|
|||
|
|
pullback_factor = pullback_depth / (recent_gain + 1e-8)
|
|||
|
|
|
|||
|
|
return stock_df.with_columns(pullback_factor.alias(f'pullback_strong_{self.n}_{self.m}'))
|
|||
|
|
|
|||
|
|
|
|||
|
|
class HurstExponentFlowOperator(StockWiseOperator):
|
|||
|
|
"""资金流Hurst指数算子"""
|
|||
|
|
|
|||
|
|
def __init__(self, n: int = 60, flow_col: str = 'net_mf_vol'):
|
|||
|
|
config = OperatorConfig(
|
|||
|
|
name=f"hurst_{flow_col}_{n}",
|
|||
|
|
description=f"{n}日{flow_col}Hurst指数",
|
|||
|
|
required_columns=[flow_col],
|
|||
|
|
output_columns=[f'hurst_{flow_col}_{n}'],
|
|||
|
|
parameters={'n': n, 'flow_col': flow_col}
|
|||
|
|
)
|
|||
|
|
super().__init__(config)
|
|||
|
|
self.n = n
|
|||
|
|
self.flow_col = flow_col
|
|||
|
|
|
|||
|
|
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
|||
|
|
"""计算Hurst指数"""
|
|||
|
|
# Hurst指数计算复杂,这里使用占位符
|
|||
|
|
# 实际实现需要专门的Hurst指数计算库
|
|||
|
|
placeholder = pl.lit(None).cast(float)
|
|||
|
|
|
|||
|
|
return stock_df.with_columns(
|
|||
|
|
placeholder.alias(f'hurst_{self.flow_col}_{self.n}')
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
|
|||
|
|
class VolWeightedHistoricalPositionOperator(StockWiseOperator):
|
|||
|
|
"""成交量加权历史位置算子"""
|
|||
|
|
|
|||
|
|
def __init__(self, n: int = 20):
|
|||
|
|
config = OperatorConfig(
|
|||
|
|
name=f"vol_wgt_hist_pos_{n}",
|
|||
|
|
description=f"{n}日成交量加权历史位置",
|
|||
|
|
required_columns=['close', 'his_high', 'his_low', 'vol'],
|
|||
|
|
output_columns=[f'vol_wgt_hist_pos_{n}'],
|
|||
|
|
parameters={'n': n}
|
|||
|
|
)
|
|||
|
|
super().__init__(config)
|
|||
|
|
self.n = n
|
|||
|
|
|
|||
|
|
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
|||
|
|
"""计算成交量加权历史位置"""
|
|||
|
|
# 计算历史位置
|
|||
|
|
hist_pos = (pl.col('close') - pl.col('his_low')) / (pl.col('his_high') - pl.col('his_low'))
|
|||
|
|
hist_pos = hist_pos.clip(0, 1)
|
|||
|
|
|
|||
|
|
# 计算成交量相对强度
|
|||
|
|
rolling_mean_vol = pl.col('vol').rolling_mean(window=self.n)
|
|||
|
|
vol_rel_strength = pl.col('vol') / rolling_mean_vol
|
|||
|
|
|
|||
|
|
# 计算加权位置
|
|||
|
|
weighted_pos = hist_pos * vol_rel_strength
|
|||
|
|
|
|||
|
|
return stock_df.with_columns(weighted_pos.alias(f'vol_wgt_hist_pos_{self.n}'))
|
|||
|
|
|
|||
|
|
|
|||
|
|
# 横截面因子
|
|||
|
|
class CrossSectionalRankOperator(DateWiseOperator):
|
|||
|
|
"""横截面排名算子"""
|
|||
|
|
|
|||
|
|
def __init__(self, column: str, ascending: bool = True):
|
|||
|
|
config = OperatorConfig(
|
|||
|
|
name=f"cs_rank_{column}",
|
|||
|
|
description=f"{column}横截面排名",
|
|||
|
|
required_columns=[column],
|
|||
|
|
output_columns=[f'cs_rank_{column}'],
|
|||
|
|
parameters={'column': column, 'ascending': ascending}
|
|||
|
|
)
|
|||
|
|
super().__init__(config)
|
|||
|
|
self.column = column
|
|||
|
|
self.ascending = ascending
|
|||
|
|
|
|||
|
|
def apply_date(self, date_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
|||
|
|
"""计算横截面排名"""
|
|||
|
|
# 计算排名
|
|||
|
|
rank_col = pl.col(self.column).rank(method='dense', descending=not self.ascending)
|
|||
|
|
|
|||
|
|
# 转换为百分比排名
|
|||
|
|
pct_rank = rank_col / rank_col.max()
|
|||
|
|
|
|||
|
|
return date_df.with_columns(pct_rank.alias(f'cs_rank_{self.column}'))
|
|||
|
|
|
|||
|
|
|
|||
|
|
class CrossSectionalNetLargeFlowRankOperator(DateWiseOperator):
|
|||
|
|
"""横截面大单净额排名算子"""
|
|||
|
|
|
|||
|
|
def __init__(self):
|
|||
|
|
config = OperatorConfig(
|
|||
|
|
name="cs_rank_net_lg_flow_val",
|
|||
|
|
description="横截面大单净额排名",
|
|||
|
|
required_columns=['buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol', 'sell_elg_vol', 'close'],
|
|||
|
|
output_columns=['cs_rank_net_lg_flow_val'],
|
|||
|
|
parameters={}
|
|||
|
|
)
|
|||
|
|
super().__init__(config)
|
|||
|
|
|
|||
|
|
def apply_date(self, date_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
|||
|
|
"""计算横截面大单净额排名"""
|
|||
|
|
# 计算大单净额
|
|||
|
|
net_lg_flow_val = (
|
|||
|
|
(pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
|
|||
|
|
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')) * pl.col('close')
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 计算排名
|
|||
|
|
rank_col = net_lg_flow_val.rank(method='dense', descending=True)
|
|||
|
|
pct_rank = rank_col / rank_col.max()
|
|||
|
|
|
|||
|
|
return date_df.with_columns(pct_rank.alias('cs_rank_net_lg_flow_val'))
|
|||
|
|
|
|||
|
|
|
|||
|
|
class CrossSectionalFlowDivergenceRankOperator(DateWiseOperator):
|
|||
|
|
"""横截面流向背离度排名算子"""
|
|||
|
|
|
|||
|
|
def __init__(self):
|
|||
|
|
config = OperatorConfig(
|
|||
|
|
name="cs_rank_flow_divergence",
|
|||
|
|
description="横截面流向背离度排名",
|
|||
|
|
required_columns=['buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'buy_elg_vol',
|
|||
|
|
'sell_lg_vol', 'sell_elg_vol', 'vol'],
|
|||
|
|
output_columns=['cs_rank_flow_divergence'],
|
|||
|
|
parameters={}
|
|||
|
|
)
|
|||
|
|
super().__init__(config)
|
|||
|
|
|
|||
|
|
def apply_date(self, date_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
|||
|
|
"""计算横截面流向背离度排名"""
|
|||
|
|
epsilon = 1e-8
|
|||
|
|
|
|||
|
|
# 计算大单比率
|
|||
|
|
lg_ratio = (
|
|||
|
|
(pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
|
|||
|
|
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')) /
|
|||
|
|
(pl.col('vol') + epsilon)
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 计算小单比率
|
|||
|
|
sm_ratio = (pl.col('buy_sm_vol') - pl.col('sell_sm_vol')) / (pl.col('vol') + epsilon)
|
|||
|
|
|
|||
|
|
# 计算背离度
|
|||
|
|
divergence = lg_ratio - sm_ratio
|
|||
|
|
|
|||
|
|
# 计算排名
|
|||
|
|
rank_col = divergence.rank(method='dense', descending=True)
|
|||
|
|
pct_rank = rank_col / rank_col.max()
|
|||
|
|
|
|||
|
|
return date_df.with_columns(pct_rank.alias('cs_rank_flow_divergence'))
|
|||
|
|
|
|||
|
|
|
|||
|
|
class CrossSectionalRelativeProfitMarginRankOperator(DateWiseOperator):
|
|||
|
|
"""横截面相对盈利幅度排名算子"""
|
|||
|
|
|
|||
|
|
def __init__(self):
|
|||
|
|
config = OperatorConfig(
|
|||
|
|
name="cs_rank_rel_profit_margin",
|
|||
|
|
description="横截面相对盈利幅度排名",
|
|||
|
|
required_columns=['close', 'weight_avg'],
|
|||
|
|
output_columns=['cs_rank_rel_profit_margin'],
|
|||
|
|
parameters={}
|
|||
|
|
)
|
|||
|
|
super().__init__(config)
|
|||
|
|
|
|||
|
|
def apply_date(self, date_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
|||
|
|
"""计算横截面相对盈利幅度排名"""
|
|||
|
|
# 计算盈利幅度
|
|||
|
|
profit_margin = (pl.col('close') - pl.col('weight_avg')) / pl.col('close')
|
|||
|
|
|
|||
|
|
# 计算排名
|
|||
|
|
rank_col = profit_margin.rank(method='dense', descending=True)
|
|||
|
|
pct_rank = rank_col / rank_col.max()
|
|||
|
|
|
|||
|
|
return date_df.with_columns(pct_rank.alias('cs_rank_rel_profit_margin'))
|
|||
|
|
|
|||
|
|
|
|||
|
|
class CrossSectionalCostBreadthRankOperator(DateWiseOperator):
|
|||
|
|
"""横截面成本分布宽度排名算子"""
|
|||
|
|
|
|||
|
|
def __init__(self):
|
|||
|
|
config = OperatorConfig(
|
|||
|
|
name="cs_rank_cost_breadth",
|
|||
|
|
description="横截面成本分布宽度排名",
|
|||
|
|
required_columns=['cost_85pct', 'cost_15pct', 'weight_avg'],
|
|||
|
|
output_columns=['cs_rank_cost_breadth'],
|
|||
|
|
parameters={}
|
|||
|
|
)
|
|||
|
|
super().__init__(config)
|
|||
|
|
|
|||
|
|
def apply_date(self, date_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
|||
|
|
"""计算横截面成本分布宽度排名"""
|
|||
|
|
epsilon = 1e-8
|
|||
|
|
|
|||
|
|
# 计算成本宽度
|
|||
|
|
cost_breadth = (pl.col('cost_85pct') - pl.col('cost_15pct')) / (pl.col('weight_avg') + epsilon)
|
|||
|
|
|
|||
|
|
# 计算排名
|
|||
|
|
rank_col = cost_breadth.rank(method='dense', descending=True)
|
|||
|
|
pct_rank = rank_col / rank_col.max()
|
|||
|
|
|
|||
|
|
return date_df.with_columns(pct_rank.alias('cs_rank_cost_breadth'))
|
|||
|
|
|
|||
|
|
|
|||
|
|
class CrossSectionalWinnerRateRankOperator(DateWiseOperator):
|
|||
|
|
"""横截面获利盘比例排名算子"""
|
|||
|
|
|
|||
|
|
def __init__(self):
|
|||
|
|
config = OperatorConfig(
|
|||
|
|
name="cs_rank_winner_rate",
|
|||
|
|
description="横截面获利盘比例排名",
|
|||
|
|
required_columns=['winner_rate'],
|
|||
|
|
output_columns=['cs_rank_winner_rate'],
|
|||
|
|
parameters={}
|
|||
|
|
)
|
|||
|
|
super().__init__(config)
|
|||
|
|
|
|||
|
|
def apply_date(self, date_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
|||
|
|
"""计算横截面获利盘比例排名"""
|
|||
|
|
# 计算排名
|
|||
|
|
rank_col = pl.col('winner_rate').rank(method='dense', descending=True)
|
|||
|
|
pct_rank = rank_col / rank_col.max()
|
|||
|
|
|
|||
|
|
return date_df.with_columns(pct_rank.alias('cs_rank_winner_rate'))
|
|||
|
|
|
|||
|
|
|
|||
|
|
class CrossSectionalVolumeRatioRankOperator(DateWiseOperator):
|
|||
|
|
"""横截面量比排名算子"""
|
|||
|
|
|
|||
|
|
def __init__(self):
|
|||
|
|
config = OperatorConfig(
|
|||
|
|
name="cs_rank_volume_ratio",
|
|||
|
|
description="横截面量比排名",
|
|||
|
|
required_columns=['volume_ratio'],
|
|||
|
|
output_columns=['cs_rank_volume_ratio'],
|
|||
|
|
parameters={}
|
|||
|
|
)
|
|||
|
|
super().__init__(config)
|
|||
|
|
|
|||
|
|
def apply_date(self, date_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
|||
|
|
"""计算横截面量比排名"""
|
|||
|
|
# 计算排名
|
|||
|
|
rank_col = pl.col('volume_ratio').rank(method='dense', descending=True)
|
|||
|
|
pct_rank = rank_col / rank_col.max()
|
|||
|
|
|
|||
|
|
return date_df.with_columns(pct_rank.alias('cs_rank_volume_ratio'))
|
|||
|
|
|
|||
|
|
|
|||
|
|
# 复杂组合因子
|
|||
|
|
class ComplexFactorDEAPOperator(StockWiseOperator):
|
|||
|
|
"""DEAP复杂因子算子"""
|
|||
|
|
|
|||
|
|
def __init__(self):
|
|||
|
|
config = OperatorConfig(
|
|||
|
|
name="complex_factor_deap_1",
|
|||
|
|
description="DEAP复杂组合因子",
|
|||
|
|
required_columns=['pullback_strong_20_20', 'log_close', 'industry_return_5',
|
|||
|
|
'vol_adj_roc_20', 'vol_drop_profit_cnt_5', 'nonlinear_mv_volume',
|
|||
|
|
'alpha_007', 'lg_buy_consolidation_20', 'net_mf_vol', 'std_return_5',
|
|||
|
|
'arbr', 'industry_act_factor5', 'industry_act_factor1', 'low_cost_dev',
|
|||
|
|
'mv_weighted_turnover', 'act_factor4', 'vol', 'lg_elg_buy_prop',
|
|||
|
|
'intraday_lg_flow_corr_20'],
|
|||
|
|
output_columns=['complex_factor_deap_1'],
|
|||
|
|
parameters={}
|
|||
|
|
)
|
|||
|
|
super().__init__(config)
|
|||
|
|
|
|||
|
|
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
|||
|
|
"""计算DEAP复杂因子"""
|
|||
|
|
try:
|
|||
|
|
# 安全除法函数
|
|||
|
|
def safe_divide(a, b, default_val=0):
|
|||
|
|
return pl.when(b.abs() > 1e-8).then(a / b).otherwise(default_val)
|
|||
|
|
|
|||
|
|
# 计算组件D
|
|||
|
|
d_term1_div = safe_divide(pl.col('log_close'), pl.col('industry_return_5'))
|
|||
|
|
d_term1 = pl.col('pullback_strong_20_20') * d_term1_div
|
|||
|
|
|
|||
|
|
d_term2_sub = pl.col('nonlinear_mv_volume') - pl.col('alpha_007')
|
|||
|
|
d_term2_add = pl.col('vol_adj_roc_20') + pl.col('vol_drop_profit_cnt_5')
|
|||
|
|
d_term2 = safe_divide(d_term2_add, d_term2_sub)
|
|||
|
|
|
|||
|
|
temp_d = d_term1 - d_term2
|
|||
|
|
|
|||
|
|
# 计算组件A
|
|||
|
|
a_term1 = temp_d * pl.col('lg_buy_consolidation_20')
|
|||
|
|
a_term2 = a_term1 + pl.col('lg_buy_consolidation_20')
|
|||
|
|
temp_a = a_term2 + pl.col('pullback_strong_20_20')
|
|||
|
|
|
|||
|
|
# 计算组件F
|
|||
|
|
f_term1 = pl.col('net_mf_vol') + pl.col('std_return_5')
|
|||
|
|
f_term2 = pl.col('arbr') - pl.col('industry_act_factor5')
|
|||
|
|
temp_f = f_term1 * f_term2
|
|||
|
|
|
|||
|
|
# 计算组件H
|
|||
|
|
h_term1 = pl.col('industry_act_factor1') + pl.col('low_cost_dev')
|
|||
|
|
h_term2 = pl.col('mv_weighted_turnover') * pl.col('act_factor4')
|
|||
|
|
temp_h = h_term1 + h_term2
|
|||
|
|
|
|||
|
|
# 计算组件B
|
|||
|
|
b_term1 = temp_f + pl.col('vol')
|
|||
|
|
b_term2 = b_term1 + temp_h
|
|||
|
|
temp_b = safe_divide(b_term2, pl.col('lg_elg_buy_prop'))
|
|||
|
|
|
|||
|
|
# 计算组件C
|
|||
|
|
c_term1 = safe_divide(
|
|||
|
|
pl.col('intraday_lg_flow_corr_20').fill_null(0),
|
|||
|
|
pl.col('lg_elg_buy_prop')
|
|||
|
|
)
|
|||
|
|
temp_c = safe_divide(c_term1, pl.col('lg_elg_buy_prop'))
|
|||
|
|
|
|||
|
|
# 计算最终因子
|
|||
|
|
final_term1 = safe_divide(temp_a, temp_b)
|
|||
|
|
complex_factor = final_term1 - temp_c
|
|||
|
|
|
|||
|
|
return stock_df.with_columns(complex_factor.alias('complex_factor_deap_1'))
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
# 如果计算失败,填充NaN
|
|||
|
|
print(f"Error calculating complex_factor_deap_1: {e}")
|
|||
|
|
return stock_df.with_columns(pl.lit(None).cast(float).alias('complex_factor_deap_1'))
|
|||
|
|
|
|||
|
|
|
|||
|
|
# 因子集合
|
|||
|
|
COMPLEX_OPERATORS = [
|
|||
|
|
LargeFlowMomentumCorrelationOperator(),
|
|||
|
|
LargeBuyConsolidationOperator(),
|
|||
|
|
IntradayLargeFlowCorrelationOperator(),
|
|||
|
|
ProfitPressureOperator(),
|
|||
|
|
UnderwaterResistanceOperator(),
|
|||
|
|
ProfitDecayOperator(),
|
|||
|
|
PullbackStrongOperator(),
|
|||
|
|
HurstExponentFlowOperator(),
|
|||
|
|
VolWeightedHistoricalPositionOperator(),
|
|||
|
|
CrossSectionalRankOperator('close'),
|
|||
|
|
CrossSectionalNetLargeFlowRankOperator(),
|
|||
|
|
CrossSectionalFlowDivergenceRankOperator(),
|
|||
|
|
CrossSectionalRelativeProfitMarginRankOperator(),
|
|||
|
|
CrossSectionalCostBreadthRankOperator(),
|
|||
|
|
CrossSectionalWinnerRateRankOperator(),
|
|||
|
|
CrossSectionalVolumeRatioRankOperator(),
|
|||
|
|
ComplexFactorDEAPOperator(),
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
|
|||
|
|
def apply_complex_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
|||
|
|
"""
|
|||
|
|
应用所有复杂组合因子
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
df: 输入的Polars DataFrame
|
|||
|
|
operators: 要应用的算子列表,如果为None则使用默认列表
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
添加了复杂组合因子的DataFrame
|
|||
|
|
"""
|
|||
|
|
if operators is None:
|
|||
|
|
operators = COMPLEX_OPERATORS
|
|||
|
|
|
|||
|
|
result_df = df
|
|||
|
|
for operator in operators:
|
|||
|
|
result_df = operator(result_df)
|
|||
|
|
|
|||
|
|
return result_df
|
|||
|
|
|
|||
|
|
|
|||
|
|
# 主应用函数
|
|||
|
|
def apply_all_factors(df: pl.DataFrame,
|
|||
|
|
factor_categories: List[str] = None) -> pl.DataFrame:
|
|||
|
|
"""
|
|||
|
|
应用所有类别的因子
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
df: 输入的Polars DataFrame
|
|||
|
|
factor_categories: 要应用的因子类别列表,如果为None则应用所有类别
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
添加了所有因子的DataFrame
|
|||
|
|
"""
|
|||
|
|
if factor_categories is None:
|
|||
|
|
factor_categories = ['money_flow', 'chip', 'volatility', 'volume',
|
|||
|
|
'technical', 'sentiment', 'momentum', 'complex']
|
|||
|
|
|
|||
|
|
result_df = df
|
|||
|
|
|
|||
|
|
# 导入所有因子模块
|
|||
|
|
from polars_money_flow_factors import apply_money_flow_factors
|
|||
|
|
from polars_chip_factors import apply_chip_distribution_factors
|
|||
|
|
from polars_volatility_factors import apply_volatility_factors
|
|||
|
|
from polars_volume_factors import apply_volume_factors
|
|||
|
|
from polars_technical_factors import apply_technical_factors
|
|||
|
|
from polars_sentiment_factors import apply_sentiment_factors
|
|||
|
|
from polars_momentum_factors import apply_momentum_factors
|
|||
|
|
|
|||
|
|
# 应用各类因子
|
|||
|
|
if 'money_flow' in factor_categories:
|
|||
|
|
result_df = apply_money_flow_factors(result_df)
|
|||
|
|
|
|||
|
|
if 'chip' in factor_categories:
|
|||
|
|
result_df = apply_chip_distribution_factors(result_df)
|
|||
|
|
|
|||
|
|
if 'volatility' in factor_categories:
|
|||
|
|
result_df = apply_volatility_factors(result_df)
|
|||
|
|
|
|||
|
|
if 'volume' in factor_categories:
|
|||
|
|
result_df = apply_volume_factors(result_df)
|
|||
|
|
|
|||
|
|
if 'technical' in factor_categories:
|
|||
|
|
result_df = apply_technical_factors(result_df)
|
|||
|
|
|
|||
|
|
if 'sentiment' in factor_categories:
|
|||
|
|
result_df = apply_sentiment_factors(result_df)
|
|||
|
|
|
|||
|
|
if 'momentum' in factor_categories:
|
|||
|
|
result_df = apply_momentum_factors(result_df)
|
|||
|
|
|
|||
|
|
if 'complex' in factor_categories:
|
|||
|
|
result_df = apply_complex_factors(result_df)
|
|||
|
|
|
|||
|
|
return result_df
|