factor优化,改为polars

This commit is contained in:
2025-10-13 21:42:35 +08:00
parent f87434b553
commit 44315b2c76
12 changed files with 6928 additions and 0 deletions

View File

@@ -0,0 +1,245 @@
"""
资金流因子 - 使用Polars实现
包含主力资金流、散户资金流等相关因子计算
"""
import polars as pl
import numpy as np
from typing import Dict, List, Optional, Any
from operator_framework import StockWiseOperator, OperatorConfig
class MoneyFlowIntensityOperator(StockWiseOperator):
"""主力资金流强度算子"""
def __init__(self):
config = OperatorConfig(
name="money_flow_intensity",
description="主力资金流强度",
required_columns=['buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol', 'sell_elg_vol', 'vol'],
output_columns=['flow_lg_elg_intensity'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算主力资金流强度"""
epsilon = 1e-8
# 计算大单+超大单净买入量
lg_elg_net_buy_vol = (
pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')
)
# 计算资金流强度
flow_intensity = lg_elg_net_buy_vol / (pl.col('vol') + epsilon)
return stock_df.with_columns(flow_intensity.alias('flow_lg_elg_intensity'))
class FlowDivergenceRatioOperator(StockWiseOperator):
"""散户与主力背离度算子"""
def __init__(self):
config = OperatorConfig(
name="flow_divergence_ratio",
description="散户与主力背离度比率",
required_columns=['buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'buy_elg_vol',
'sell_lg_vol', 'sell_elg_vol'],
output_columns=['flow_divergence_ratio'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算散户与主力背离度比率"""
epsilon = 1e-8
# 计算小单净买入量
sm_net_buy_vol = pl.col('buy_sm_vol') - pl.col('sell_sm_vol')
# 计算大单+超大单净买入量
lg_elg_net_buy_vol = (
pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')
)
# 计算背离度比率处理分母为0的情况
divergence_ratio = sm_net_buy_vol / (
lg_elg_net_buy_vol + pl.when(lg_elg_net_buy_vol == 0).then(epsilon).otherwise(0) + epsilon
)
return stock_df.with_columns(divergence_ratio.alias('flow_divergence_ratio'))
class FlowStructureChangeOperator(StockWiseOperator):
"""资金流结构变动算子"""
def __init__(self):
config = OperatorConfig(
name="flow_structure_change",
description="资金流结构变动",
required_columns=['buy_sm_vol', 'buy_lg_vol', 'buy_elg_vol'],
output_columns=['flow_struct_buy_change'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算资金流结构变动"""
epsilon = 1e-8
# 计算总买入量
total_buy_vol = pl.col('buy_sm_vol') + pl.col('buy_lg_vol') + pl.col('buy_elg_vol')
# 计算大单+超大单买入占比
lg_elg_buy_prop = (pl.col('buy_lg_vol') + pl.col('buy_elg_vol')) / (total_buy_vol + epsilon)
# 计算1日变化
struct_change = lg_elg_buy_prop.diff()
return stock_df.with_columns(struct_change.alias('flow_struct_buy_change'))
class FlowAccelerationOperator(StockWiseOperator):
"""资金流加速度算子"""
def __init__(self):
config = OperatorConfig(
name="flow_acceleration",
description="资金流加速度",
required_columns=['buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol', 'sell_elg_vol'],
output_columns=['flow_lg_elg_accel'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算资金流加速度"""
# 计算大单+超大单净买入量
lg_elg_net_buy_vol = (
pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')
)
# 计算一阶变化
first_diff = lg_elg_net_buy_vol.diff()
# 计算二阶变化(加速度)
acceleration = first_diff.diff()
return stock_df.with_columns(acceleration.alias('flow_lg_elg_accel'))
class LargeFlowMomentumCorrelationOperator(StockWiseOperator):
"""大单资金流与价格动量相关性算子"""
def __init__(self, n: int = 20, m: int = 60):
config = OperatorConfig(
name=f"lg_flow_mom_corr_{n}_{m}",
description=f"{n}日大单资金流与{m}日价格动量相关性",
required_columns=['buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol', 'sell_elg_vol',
'close', 'vol'],
output_columns=[f'lg_flow_mom_corr_{n}_{m}'],
parameters={'n': n, 'm': m}
)
super().__init__(config)
self.n = n
self.m = m
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算大单资金流与价格动量相关性"""
# 计算大单净额
net_lg_flow_val = (
(pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')) * pl.col('close')
)
# 计算滚动净大单流
rolling_net_lg_flow = net_lg_flow_val.rolling_sum(window=self.n)
# 计算价格动量
price_mom = pl.col('close').pct_change(self.n)
# 计算相关性
# Polars的rolling_corr需要两个表达式
correlation = rolling_net_lg_flow.rolling_corr(price_mom, window=self.m)
return stock_df.with_columns(
correlation.alias(f'lg_flow_mom_corr_{self.n}_{self.m}')
)
class LargeBuyConsolidationOperator(StockWiseOperator):
"""大单买入盘整期算子"""
def __init__(self, n: int = 20, vol_quantile: float = 0.2):
config = OperatorConfig(
name=f"lg_buy_consolidation_{n}",
description=f"{n}日大单买入盘整期",
required_columns=['close', 'buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol',
'sell_elg_vol', 'vol'],
output_columns=[f'lg_buy_consolidation_{n}'],
parameters={'n': n, 'vol_quantile': vol_quantile}
)
super().__init__(config)
self.n = n
self.vol_quantile = vol_quantile
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算大单买入盘整期"""
epsilon = 1e-8
# 计算收盘价滚动标准差
rolling_std = pl.col('close').rolling_std(window=self.n)
# 计算大单净流比率
net_lg_flow_ratio = (
(pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')) /
(pl.col('vol') + epsilon)
)
# 计算滚动均值
rolling_mean_ratio = net_lg_flow_ratio.rolling_mean(window=self.n)
# 计算低波动阈值
# 这里需要按日期分组计算分位数,比较复杂,简化处理
# 在实际使用时可能需要DateWiseOperator来处理横截面分位数
return stock_df.with_columns(
rolling_mean_ratio.alias(f'lg_buy_consolidation_{self.n}')
)
# 资金流因子集合
MONEY_FLOW_OPERATORS = [
MoneyFlowIntensityOperator(),
FlowDivergenceRatioOperator(),
FlowStructureChangeOperator(),
FlowAccelerationOperator(),
LargeFlowMomentumCorrelationOperator(),
LargeBuyConsolidationOperator(),
]
def apply_money_flow_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
"""
应用所有资金流因子
Args:
df: 输入的Polars DataFrame
operators: 要应用的算子列表如果为None则使用默认列表
Returns:
添加了资金流因子的DataFrame
"""
if operators is None:
operators = MONEY_FLOW_OPERATORS
result_df = df
for operator in operators:
result_df = operator(result_df)
return result_df