348 lines
11 KiB
Python
348 lines
11 KiB
Python
"""
|
||
资金流因子模块
|
||
包含基于股票截面的资金流因子实现
|
||
"""
|
||
|
||
import numpy as np
|
||
import polars as pl
|
||
from main.factor.operator_framework import StockWiseFactor
|
||
|
||
|
||
class LGFlowFactor(StockWiseFactor):
|
||
"""大单净买量因子"""
|
||
|
||
def __init__(self):
|
||
super().__init__(
|
||
name="lg_flow",
|
||
parameters={},
|
||
required_factor_ids=["buy_lg_vol", "buy_elg_vol", "sell_lg_vol", "sell_elg_vol"]
|
||
)
|
||
|
||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||
# 计算大单净买量
|
||
buy_lg = group_df["buy_lg_vol"]
|
||
buy_elg = group_df["buy_elg_vol"]
|
||
sell_lg = group_df["sell_lg_vol"]
|
||
sell_elg = group_df["sell_elg_vol"]
|
||
|
||
lg_net_flow = (buy_lg + buy_elg) - (sell_lg + sell_elg)
|
||
return lg_net_flow.alias(self.factor_id)
|
||
|
||
|
||
class FlowIntensityFactor(StockWiseFactor):
|
||
"""资金流强度因子"""
|
||
|
||
def __init__(self):
|
||
super().__init__(
|
||
name="flow_intensity",
|
||
parameters={},
|
||
required_factor_ids=["buy_lg_vol", "buy_elg_vol", "sell_lg_vol", "sell_elg_vol", "vol"]
|
||
)
|
||
|
||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||
# 计算资金流强度
|
||
buy_lg = group_df["buy_lg_vol"]
|
||
buy_elg = group_df["buy_elg_vol"]
|
||
sell_lg = group_df["sell_lg_vol"]
|
||
sell_elg = group_df["sell_elg_vol"]
|
||
vol = group_df["vol"]
|
||
|
||
lg_net_flow = (buy_lg + buy_elg) - (sell_lg + sell_elg)
|
||
flow_intensity = lg_net_flow / (vol + 1e-8) # 避免除零
|
||
|
||
return flow_intensity.alias(self.factor_id)
|
||
|
||
|
||
class FlowDivergenceFactor(StockWiseFactor):
|
||
"""资金流背离因子"""
|
||
|
||
def __init__(self):
|
||
super().__init__(
|
||
name="flow_divergence",
|
||
parameters={},
|
||
required_factor_ids=["buy_sm_vol", "sell_sm_vol", "buy_lg_vol", "buy_elg_vol", "sell_lg_vol", "sell_elg_vol"]
|
||
)
|
||
|
||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||
# 计算资金流背离度
|
||
buy_sm = group_df["buy_sm_vol"]
|
||
sell_sm = group_df["sell_sm_vol"]
|
||
buy_lg = group_df["buy_lg_vol"]
|
||
buy_elg = group_df["buy_elg_vol"]
|
||
sell_lg = group_df["sell_lg_vol"]
|
||
sell_elg = group_df["sell_elg_vol"]
|
||
|
||
sm_net_flow = buy_sm - sell_sm
|
||
lg_net_flow = (buy_lg + buy_elg) - (sell_lg + sell_elg)
|
||
|
||
flow_divergence = sm_net_flow - lg_net_flow
|
||
return flow_divergence.alias(self.factor_id)
|
||
|
||
|
||
class FlowStructureFactor(StockWiseFactor):
|
||
"""资金流结构因子"""
|
||
|
||
def __init__(self):
|
||
super().__init__(
|
||
name="flow_structure",
|
||
parameters={},
|
||
required_factor_ids=["buy_sm_vol", "buy_lg_vol", "buy_elg_vol", "vol"]
|
||
)
|
||
|
||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||
# 计算资金流结构
|
||
buy_sm = group_df["buy_sm_vol"]
|
||
buy_lg = group_df["buy_lg_vol"]
|
||
buy_elg = group_df["buy_elg_vol"]
|
||
vol = group_df["vol"]
|
||
|
||
total_buy = buy_sm + buy_lg + buy_elg
|
||
lg_elg_buy_prop = (buy_lg + buy_elg) / (total_buy + 1e-8) # 避免除零
|
||
|
||
flow_structure = lg_elg_buy_prop.diff().alias(self.factor_id)
|
||
return flow_structure
|
||
|
||
|
||
class FlowAccelerationFactor(StockWiseFactor):
|
||
"""资金流加速度因子"""
|
||
|
||
def __init__(self):
|
||
super().__init__(
|
||
name="flow_acceleration",
|
||
parameters={},
|
||
required_factor_ids=["buy_lg_vol", "buy_elg_vol", "sell_lg_vol", "sell_elg_vol"]
|
||
)
|
||
|
||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||
# 计算资金流加速度
|
||
buy_lg = group_df["buy_lg_vol"]
|
||
buy_elg = group_df["buy_elg_vol"]
|
||
sell_lg = group_df["sell_lg_vol"]
|
||
sell_elg = group_df["sell_elg_vol"]
|
||
|
||
lg_net_flow = (buy_lg + buy_elg) - (sell_lg + sell_elg)
|
||
lg_net_flow_change = lg_net_flow.diff()
|
||
flow_acceleration = lg_net_flow_change.diff()
|
||
|
||
return flow_acceleration.alias(self.factor_id)
|
||
|
||
|
||
class CostSqueeze(StockWiseFactor):
|
||
factor_id = "factor_cost_squeeze"
|
||
required_factor_ids = ["close", "cost_15pct", "cost_50pct", "cost_85pct", "vol"]
|
||
|
||
def __init__(self):
|
||
super().__init__(
|
||
name=self.factor_id,
|
||
parameters={},
|
||
required_factor_ids=self.required_factor_ids
|
||
)
|
||
|
||
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
|
||
close = g["close"]
|
||
cost15 = g["cost_15pct"]
|
||
cost50 = g["cost_50pct"]
|
||
cost85 = g["cost_85pct"]
|
||
vol = g["vol"]
|
||
|
||
cost_range = cost85 - cost15
|
||
median_cost = cost50
|
||
price_pos = (close - median_cost) / (cost_range + 1e-6)
|
||
|
||
vol_5d = vol.rolling_mean(window_size=5, min_periods=1)
|
||
vol_ratio = vol / (vol_5d + 1e-6)
|
||
|
||
# 核心逻辑:成本区间窄 + 价格居中 + 量能萎缩 → 高挤压度
|
||
squeeze_score = (
|
||
(1.0 / (cost_range / (close + 1e-6) + 1e-6))
|
||
* (1.0 - price_pos.abs())
|
||
* (1.0 / (vol_ratio + 1e-6))
|
||
)
|
||
|
||
# 稳态化:对数变换
|
||
factor = (squeeze_score + 1.0).log()
|
||
return factor.alias(self.factor_id)
|
||
|
||
|
||
class HighCostSelling(StockWiseFactor):
|
||
factor_id = "factor_high_cost_selling"
|
||
required_factor_ids = ["close", "cost_85pct", "buy_sm_vol", "sell_lg_vol", "vol"]
|
||
|
||
def __init__(self):
|
||
super().__init__(
|
||
name=self.factor_id,
|
||
parameters={},
|
||
required_factor_ids=self.required_factor_ids
|
||
)
|
||
|
||
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
|
||
close = g["close"]
|
||
cost85 = g["cost_85pct"]
|
||
buy_sm_vol = g["buy_sm_vol"]
|
||
sell_lg_vol = g["sell_lg_vol"]
|
||
vol = g["vol"]
|
||
|
||
is_above_85 = (close > cost85).cast(pl.Float64)
|
||
small_buy_ratio = buy_sm_vol / (vol + 1e-6)
|
||
large_sell_ratio = sell_lg_vol / (vol + 1e-6)
|
||
|
||
hcsp = is_above_85 * small_buy_ratio * large_sell_ratio
|
||
|
||
# 稳态化:取对数(避免极端值)
|
||
factor = (hcsp + 1e-6).log()
|
||
return factor.alias(self.factor_id)
|
||
|
||
|
||
class LowCostAccumulation(StockWiseFactor):
|
||
factor_id = "factor_low_cost_accumulation"
|
||
required_factor_ids = ["close", "his_low", "cost_15pct", "buy_lg_vol", "buy_elg_vol", "vol"]
|
||
|
||
def __init__(self):
|
||
super().__init__(
|
||
name=self.factor_id,
|
||
parameters={}, # 无参数,可扩展
|
||
required_factor_ids=self.required_factor_ids
|
||
)
|
||
|
||
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
|
||
close = g["close"]
|
||
his_low = g["his_low"]
|
||
cost15 = g["cost_15pct"]
|
||
buy_lg_vol = g["buy_lg_vol"]
|
||
buy_elg_vol = g["buy_elg_vol"]
|
||
vol = g["vol"]
|
||
|
||
is_below_15 = (close < cost15).cast(pl.Float64)
|
||
|
||
# 近5日最低价(含当日)
|
||
rolling_min_5 = his_low.rolling_min(window_size=5, min_periods=1)
|
||
# 注意:his_low 通常是历史最低,但这里我们用 close 的滚动最小更合理
|
||
# 修正:应使用 close 的滚动最小判断是否新低
|
||
close_rolling_min_5 = close.rolling_min(window_size=5, min_periods=1)
|
||
not_new_low = (close >= close_rolling_min_5).cast(pl.Float64)
|
||
|
||
big_buy_vol = buy_lg_vol + buy_elg_vol
|
||
big_buy_ratio = big_buy_vol / (vol + 1e-6)
|
||
|
||
lc_am = is_below_15 * not_new_low * big_buy_ratio
|
||
|
||
# 稳态化
|
||
factor = (lc_am + 1e-6).log()
|
||
return factor.alias(self.factor_id)
|
||
|
||
|
||
|
||
class InstNetAccum(StockWiseFactor):
|
||
factor_id = "inst_net_accum"
|
||
required_factor_ids = ["close", "buy_lg_vol", "buy_elg_vol", "sell_lg_vol", "sell_elg_vol", "circ_mv"]
|
||
|
||
def __init__(self):
|
||
super(InstNetAccum, self).__init__(
|
||
name=self.factor_id,
|
||
parameters={},
|
||
required_factor_ids=self.required_factor_ids
|
||
)
|
||
|
||
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
|
||
close = g["close"]
|
||
buy_lg = g["buy_lg_vol"]
|
||
buy_elg = g["buy_elg_vol"]
|
||
sell_lg = g["sell_lg_vol"]
|
||
sell_elg = g["sell_elg_vol"]
|
||
circ_mv = g["circ_mv"]
|
||
|
||
big_net_vol = (buy_lg + buy_elg) - (sell_lg + sell_elg)
|
||
circ_shares = circ_mv / (close + 1e-6)
|
||
ina = big_net_vol / (circ_shares + 1e-6)
|
||
|
||
# 3日收益率(抑制大涨)
|
||
ret3 = close / close.shift(3) - 1
|
||
ret3 = ret3.fill_null(strategy="forward").fill_null(0.0)
|
||
ina = pl.when(ret3.abs() < 0.05).then(ina).otherwise(0.0)
|
||
|
||
return ina.log1p().alias(self.factor_id)
|
||
|
||
class ChipLockin(StockWiseFactor):
|
||
factor_id = "chip_lockin"
|
||
required_factor_ids = ["cost_5pct", "cost_95pct", "winner_rate"]
|
||
|
||
def __init__(self):
|
||
super(ChipLockin, self).__init__(
|
||
name=self.factor_id,
|
||
parameters={},
|
||
required_factor_ids=self.required_factor_ids
|
||
)
|
||
|
||
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
|
||
cost5 = g["cost_5pct"]
|
||
cost95 = g["cost_95pct"]
|
||
winner_rate = g["winner_rate"]
|
||
|
||
cost_width = cost95 - cost5
|
||
width_5d = cost_width.rolling_mean(window_size=5, min_periods=1)
|
||
width_10d = cost_width.rolling_mean(window_size=10, min_periods=1)
|
||
|
||
# 避免除零
|
||
width_contraction = (width_10d - width_5d) / (width_10d + 1e-6)
|
||
winner_std_5 = winner_rate.rolling_std(window_size=5, min_periods=1).fill_null(1e-6)
|
||
|
||
clm = width_contraction * (1.0 / (winner_std_5 + 1e-6))
|
||
return clm.log1p().alias(self.factor_id)
|
||
|
||
|
||
class RetailOutInstIn(StockWiseFactor):
|
||
factor_id = "retail_out_inst_in"
|
||
required_factor_ids = ["close", "buy_sm_vol", "sell_sm_vol", "buy_lg_vol", "buy_elg_vol", "sell_lg_vol", "sell_elg_vol"]
|
||
|
||
def __init__(self):
|
||
super(RetailOutInstIn, self).__init__(
|
||
name=self.factor_id,
|
||
parameters={},
|
||
required_factor_ids=self.required_factor_ids
|
||
)
|
||
|
||
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
|
||
close = g["close"]
|
||
buy_sm = g["buy_sm_vol"]
|
||
sell_sm = g["sell_sm_vol"]
|
||
buy_lg = g["buy_lg_vol"]
|
||
buy_elg = g["buy_elg_vol"]
|
||
sell_lg = g["sell_lg_vol"]
|
||
sell_elg = g["sell_elg_vol"]
|
||
|
||
small_net_out = sell_sm - buy_sm
|
||
big_net_in = (buy_lg + buy_elg) - (sell_lg + sell_elg)
|
||
|
||
# 价格抗跌:近5日未破位(用 close 自身滚动最小)
|
||
close_min_5 = close.rolling_min(window_size=5, min_periods=1)
|
||
|
||
roii = small_net_out * big_net_in
|
||
return roii.log1p().alias(self.factor_id)
|
||
|
||
|
||
class AccumAccel(StockWiseFactor):
|
||
factor_id = "accum_accel"
|
||
required_factor_ids = ["buy_lg_vol", "buy_elg_vol", "sell_lg_vol", "sell_elg_vol", "vol"]
|
||
|
||
def __init__(self):
|
||
super(AccumAccel, self).__init__(
|
||
name=self.factor_id,
|
||
parameters={},
|
||
required_factor_ids=self.required_factor_ids
|
||
)
|
||
|
||
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
|
||
buy_lg = g["buy_lg_vol"]
|
||
buy_elg = g["buy_elg_vol"]
|
||
sell_lg = g["sell_lg_vol"]
|
||
sell_elg = g["sell_elg_vol"]
|
||
vol = g["vol"]
|
||
|
||
big_net_vol = (buy_lg + buy_elg) - (sell_lg + sell_elg)
|
||
big_net_ratio = big_net_vol / (vol + 1e-6)
|
||
|
||
net_5d = big_net_ratio.rolling_mean(window_size=5, min_periods=1)
|
||
net_5d_lag = net_5d.shift(5).fill_null(strategy="forward").fill_null(0.0)
|
||
acceleration = net_5d - net_5d_lag
|
||
|
||
return acceleration.log1p().alias(self.factor_id) |