""" 追涨策略专用因子模块 包含:形态突破、筹码穿透、攻击资金流 """ import numpy as np import polars as pl from main.factor.all_factors import calculate_all_factors from main.factor.momentum_factors import ReturnFactor, VolatilityFactor from main.factor.money_flow_factors import AccumAccel, ChipLockin, CostSqueeze, FlowIntensityFactor, HighCostSelling, InstNetAccum, LGFlowFactor from main.factor.operator_framework import StockWiseFactor from main.factor.special_factors import VolumeRatioFactor from main.factor.technical_factors import CrossSectionalRankFactor, SMAFactor # ========================================== # 第一类:形态与趋势突破因子 (Price & Trend) # ========================================== class LimitUpGene(StockWiseFactor): """ 涨停基因因子 逻辑:寻找'准涨停'或'强势封板'的特征。 追涨策略不仅看是否涨停,更看封板的坚决度和实体的饱满度。 """ factor_id = "factor_limit_up_gene" required_factor_ids = ["close", "open", "high", "low", "up_limit"] def __init__(self): super().__init__( name=self.factor_id, parameters={}, required_factor_ids=self.required_factor_ids ) def calc_factor(self, g: pl.DataFrame) -> pl.Series: close = g["close"] open_price = g["open"] high = g["high"] low = g["low"] up_limit = g["up_limit"] # 1. 封板接近度:收盘价距离涨停价有多近 (1.0 表示封死涨停) # 注意:这里加 1e-6 是为了防止 up_limit 为 NaN (虽然理论上不该有) limit_proximity = close / (up_limit + 1e-6) # 2. 实体饱满度:(收盘-开盘) / (最高-最低)。越接近1,说明光头光脚,多头越强。 range_len = high - low body_len = close - open_price body_strength = body_len / (range_len + 1e-6) # 3. 极值修正:如果是涨停,body_strength 可能会失效(一字板),给予最高分 # 逻辑:如果封板 (limit_proximity > 0.99) 且 是一字板 (range_len 极小),给强分 # 综合打分:封板接近度 * 实体强度 score = limit_proximity * body_strength # 稳态化 return score.alias(self.factor_id) class TrendBreakout(StockWiseFactor): """ 历史新高突破因子 逻辑:股价越接近历史新高(his_high),上方的套牢盘越少,拉升阻力越小。 """ factor_id = "factor_trend_breakout" required_factor_ids = ["close", "his_high", "his_low"] def __init__(self): super().__init__( name=self.factor_id, parameters={}, required_factor_ids=self.required_factor_ids ) def calc_factor(self, g: pl.DataFrame) -> pl.Series: close = g["close"] his_high = g["his_high"] # 距离历史新高的比率 (接近1表示即将突破或已突破) # 注意:数据清洗时需确保 his_high 有效 dist_to_high = close / (his_high + 1e-6) # 动量加速:当前价格相对于5日前的涨幅 # 注意:这里假设 group_df 是按时间排序的单只股票数据 mom_5 = close / close.shift(5).fill_null(strategy="forward") - 1.0 # 核心逻辑:只有在接近新高时的动量才有效 breakout_score = dist_to_high * (1 + mom_5) return breakout_score.log1p().alias(self.factor_id) # ========================================== # 第二类:筹码穿透与真空因子 (Chip Structure) # ========================================== class ChipPenetration(StockWiseFactor): """ 筹码穿透率因子 (Blue Sky Factor) 逻辑:收盘价强力穿透95%筹码成本线,意味着上方进入'真空区', 此时所有持筹者都获利,抛压最小。 """ factor_id = "factor_chip_penetration" required_factor_ids = ["close", "cost_50pct", "cost_95pct", "vol"] def __init__(self): super().__init__( name=self.factor_id, parameters={}, required_factor_ids=self.required_factor_ids ) def calc_factor(self, g: pl.DataFrame) -> pl.Series: close = g["close"] cost95 = g["cost_95pct"] cost50 = g["cost_50pct"] vol = g["vol"] # 1. 穿透度:当前价格相对于95%成本线的位置 # > 0 表示突破,数值越大突破越强 penetration = (close - cost95) / (cost95 + 1e-6) # 2. 放量确认:突破必须伴随放量 vol_5d = vol.rolling_mean(window_size=5, min_periods=1) vol_ratio = vol / (vol_5d + 1e-6) # 逻辑:只有放量的突破才是真突破 # 使用 sigmoid 类似的逻辑平滑 volume 影响 valid_breakout = penetration * pl.when(vol_ratio > 1.0).then(vol_ratio.log1p()).otherwise(0.5) return valid_breakout.alias(self.factor_id) class WinnerExpansion(StockWiseFactor): """ 获利盘扩张速率因子 逻辑:追涨最核心的动力来源是'赚钱效应'的快速扩散。 如果我们无法直接获得 winner_rate,可以通过 cost 分布估算。 """ factor_id = "factor_winner_expansion" required_factor_ids = ["close", "cost_5pct", "cost_95pct"] def __init__(self): super().__init__( name=self.factor_id, parameters={}, required_factor_ids=self.required_factor_ids ) def calc_factor(self, g: pl.DataFrame) -> pl.Series: close = g["close"] cost5 = g["cost_5pct"] cost95 = g["cost_95pct"] # 简易估算获利盘比例 (0~1) # 假设筹码在 cost5 到 cost95 之间均匀分布 chip_range = cost95 - cost5 winner_proxy = (close - cost5) / (chip_range + 1e-6) # 截断到 0-1 winner_proxy = winner_proxy.clip(0.0, 1.0) # 计算获利盘的变化率 (一阶差分) expansion_rate = winner_proxy.diff() # 逻辑:我们要找的是获利盘突然急剧增加的时刻 (爆拉脱离成本区) return expansion_rate.fill_null(0.0).alias(self.factor_id) # ========================================== # 第三类:攻击型资金流因子 (Attack Flow) # ========================================== class AttackFlow(StockWiseFactor): """ 主力攻击流因子 逻辑:区别于普通的净流入,我们只关注'上涨过程中的'主力买入。 如果股价下跌而主力流入,可能是左侧抄底(不适合追涨); 如果股价上涨且主力大幅流入,这是右侧点火(适合追涨)。 """ factor_id = "factor_attack_flow" required_factor_ids = ["close", "open", "buy_lg_vol", "buy_elg_vol", "sell_lg_vol", "sell_elg_vol", "circ_mv"] def __init__(self): super().__init__( name=self.factor_id, parameters={}, required_factor_ids=self.required_factor_ids ) def calc_factor(self, g: pl.DataFrame) -> pl.Series: close = g["close"] open_price = g["open"] buy_elg = g["buy_elg_vol"] buy_lg = g["buy_lg_vol"] sell_elg = g["sell_elg_vol"] sell_lg = g["sell_lg_vol"] circ_mv = g["circ_mv"] # 1. 计算大单净额 main_net = (buy_elg + buy_lg) - (sell_elg + sell_lg) # 2. 归一化 (换手率视角) circ_shares = (circ_mv * 10000) / (close + 1e-6) # 假设 circ_mv 单位万元 net_rate = main_net / (circ_shares + 1e-6) # 3. 价格强度权重 # 如果是阳线 (Close > Open),权重为正且放大;阴线权重为 0 或 负 price_strength = (close - open_price) / (open_price + 1e-6) # 核心逻辑:资金流 * 价格涨幅 # 只有当 资金大幅净流入 AND 价格大涨 时,该因子才会有极高值 attack_score = net_rate * price_strength # 只保留正向攻击 (负向代表出货或洗盘,暂不计入追涨分) attack_score = pl.when(attack_score > 0).then(attack_score).otherwise(0.0) return attack_score.log1p().alias(self.factor_id) class DivergenceAlert(StockWiseFactor): """ 量价/资金背离因子 (负面因子) 逻辑:价格在涨,但主力在跑。这是追涨的大忌。 用于过滤掉诱多陷阱。 """ factor_id = "factor_divergence_alert" required_factor_ids = ["close", "open", "buy_lg_vol", "buy_elg_vol", "sell_lg_vol", "sell_elg_vol", "vol"] def __init__(self): super().__init__( name=self.factor_id, parameters={}, required_factor_ids=self.required_factor_ids ) def calc_factor(self, g: pl.DataFrame) -> pl.Series: close = g["close"] open_price = g["open"] vol = g["vol"] # 1. 价格涨跌幅 pct_change = (close - open_price) / open_price # 2. 主力资金净占比 main_net = (g["buy_elg_vol"] + g["buy_lg_vol"]) - (g["sell_elg_vol"] + g["sell_lg_vol"]) flow_ratio = main_net / (vol + 1e-6) # 3. 背离识别 # 情况A (诱多):价格大涨 (pct > 2%) 但 主力净流出 (flow < -0.1) trap_signal = (pct_change > 0.02) & (flow_ratio < -0.1) # 转换为因子值:背离越严重,值越负 # 正常情况给0,背离情况给负分 factor = pl.when(trap_signal).then(flow_ratio * 10).otherwise(0.0) return factor.alias(self.factor_id) class PriceGammaFactor(StockWiseFactor): """ 价格加速度因子 (Gamma) 逻辑:识别加速上涨的股票。 比如:前天涨1%,昨天涨3%,今天涨8% -> 加速度极高 -> 适合追涨。 """ factor_id = "factor_price_gamma" required_factor_ids = ["close"] def __init__(self): super().__init__(name=self.factor_id, parameters={}, required_factor_ids=self.required_factor_ids) def calc_factor(self, g: pl.DataFrame) -> pl.Series: close = g["close"] # 1. 计算每日收益率 ret = close.pct_change() # 2. 计算收益率的变化率 (即加速度) # 追涨策略看重短期的爆发,比如最近3天 # 使用线性回归斜率或者简单的差分来近似 # 简单版:(Ret_T - Ret_T-2) accel = ret - ret.shift(2) # 进阶版:只关注正向加速 (负向加速不重要,那是下跌或回调) # 如果收益率是负的,直接给低分 score = pl.when(ret > 0).then(accel).otherwise(-1.0) return score.alias(self.factor_id) class TrendEfficiencyFactor(StockWiseFactor): """ 趋势效率因子 (ER - Efficiency Ratio) 逻辑:位移 / 路程。 数值越接近 1.0,说明走势越像一根直线(单边拉升),追涨胜率越高。 """ factor_id = "factor_trend_efficiency" required_factor_ids = ["close"] def __init__(self, window=10): super().__init__(name=self.factor_id, parameters={"window": window}, required_factor_ids=self.required_factor_ids) def calc_factor(self, g: pl.DataFrame) -> pl.Series: close = g["close"] window = self.parameters["window"] # 1. 总位移 (Change): |Price_T - Price_T-n| change = (close - close.shift(window)).abs() # 2. 总路程 (Path): sum(|Price_t - Price_t-1|) # 也就是每一天波动的绝对值之和 path = (close - close.shift(1)).abs().rolling_sum(window) # 3. 效率 = 位移 / 路程 efficiency = change / (path + 1e-6) return efficiency.alias(self.factor_id) class MoneyUrgencyFactor(StockWiseFactor): """ 资金饥渴度因子 逻辑:量比 * 大单主动买入占比 """ factor_id = "factor_money_urgency" required_factor_ids = ["vol", "buy_lg_vol", "buy_elg_vol", "circ_mv"] def __init__(self): super().__init__(name=self.factor_id, parameters={}, required_factor_ids=self.required_factor_ids) def calc_factor(self, g: pl.DataFrame) -> pl.Series: vol = g["vol"] big_buy = g["buy_lg_vol"] + g["buy_elg_vol"] # 1. 量比 (Volume Ratio): 今日量 / 过去5日均量 vol_ma5 = vol.rolling_mean(5).shift(1) # 注意避开未来函数,分母用T-1及之前 vol_ratio = vol / (vol_ma5 + 1e-6) # 2. 攻击性买入占比 (Aggressive Buy Ratio) attack_ratio = big_buy / (vol + 1e-6) # 3. 共振:只有放量且主力大买,才是追涨信号 # 如果缩量大买(可能是控盘),如果放量大卖(出货) # 这里用乘法放大共振效应 urgency = vol_ratio * attack_ratio return urgency.alias(self.factor_id) from typing import List, Dict, Any import polars as pl # 假设你之前的因子定义都在这些模块中 # from main.factor.chasing_factors import ( # LimitUpGene, TrendBreakout, # 形态类 # ChipPenetration, WinnerExpansion, # 筹码类 # AttackFlow, DivergenceAlert # 资金类 # ) # from main.factor.fund_flow import LGFlowFactor, CostSqueeze, ... # 原有因子 # from main.factor.common import CrossSectionalRankFactor, SMAFactor ... # def run_chasing_strategy_pipeline(df: pl.DataFrame) -> pl.DataFrame: # """ # 执行【追涨/打板策略】的因子计算流程 # 该函数会组合: # 1. 基础动量与波动率因子 (Base) # 2. 资金流与筹码原有因子 (Legacy) # 3. 新增的追涨专用因子 (New Chasing Factors) # 4. 截面Rank因子 (用于选股排序) # Returns: # df_result: 包含所有因子列的 DataFrame # """ # # ======================================================= # # 1. 配置股票截面因子 (Stock Wise) # # ======================================================= # stock_configs = [ # # --- A. 新增:追涨核心因子 (Priority High) --- # {"class": LimitUpGene, "params": {}}, # 涨停基因 (形态) # {"class": TrendBreakout, "params": {}}, # 趋势突破 (形态) # {"class": ChipPenetration, "params": {}}, # 筹码穿透 (筹码) # {"class": WinnerExpansion, "params": {}}, # 获利盘扩张 (筹码) # {"class": AttackFlow, "params": {}}, # 攻击资金流 (资金) # {"class": DivergenceAlert, "params": {}}, # 顶背离警示 (风控) # # --- B. 保留:原有高价值因子 (用于辅助验证) --- # # 资金流强度 # {"class": FlowIntensityFactor, "params": {}}, # {"class": LGFlowFactor, "params": {}}, # {"class": InstNetAccum, "params": {}}, # 机构净累积 # {"class": AccumAccel, "params": {}}, # 累积加速 # # 筹码结构 # {"class": CostSqueeze, "params": {}}, # 成本挤压 (用于低位启动判断) # {"class": ChipLockin, "params": {}}, # 筹码锁定 # {"class": HighCostSelling, "params": {}}, # 高位抛压 (用于风控) # # --- C. 基础:技术指标 (用于过滤) --- # {"class": SMAFactor, "params": {"window": 5}}, # 5日线防守 # {"class": SMAFactor, "params": {"window": 20}}, # 趋势判断 # {"class": VolatilityFactor, "params": {"period": 10}}, # 波动率(剔除织布机) # {"class": ReturnFactor, "params": {"period": 5}}, # 5日涨幅 # {"class": ReturnFactor, "params": {"period": 20}}, # 月涨幅 # {"class": VolumeRatioFactor, "params": {}}, # 量比 # ] # # ======================================================= # # 2. 配置日期截面因子 (Date Wise / Cross Sectional) # # ======================================================= # # 追涨策略的核心在于:买入全市场最强的票。 # # 因此,我们需要对核心因子进行截面排序 (Rank)。 # date_configs = [ # # --- 基础排序 --- # {"class": CrossSectionalRankFactor, "params": {"column": "circ_mv", "name": "rank_mv"}}, # 市值排序(剔除微盘) # {"class": CrossSectionalRankFactor, "params": {"column": "return_5", "name": "rank_ret5"}}, # 短期强度排序 # # --- 策略核心排序 (重要!) --- # # 1. 攻击力排序:全市场谁的主力攻击性最强? # { # "class": CrossSectionalRankFactor, # "params": {"column": "factor_attack_flow", "name": "rank_attack_flow"} # }, # # 2. 突破度排序:全市场谁的上方真空度最高? # { # "class": CrossSectionalRankFactor, # "params": {"column": "factor_chip_penetration", "name": "rank_chip_penetration"} # }, # # 3. 涨停基因排序:全市场谁的板最硬? # { # "class": CrossSectionalRankFactor, # "params": {"column": "factor_limit_up_gene", "name": "rank_limit_gene"} # }, # # 4. 资金流强度排序 # { # "class": CrossSectionalRankFactor, # "params": {"column": "flow_intensity", "name": "rank_flow_intensity"} # } # ] # # ======================================================= # # 3. 调用统一计算接口 # # ======================================================= # print(f"开始计算追涨策略因子... 包含 {len(stock_configs)} 个股票因子配置") # # 调用你提供的 calculate_all_factors # # 注意:这里会覆盖函数内部的默认 list,只计算我们指定的 # result_df, factor_ids = calculate_all_factors( # df=df, # stock_factor_configs=stock_configs, # date_factor_configs=date_configs # ) # print(f"计算完成。生成因子列: {factor_ids}") # return result_df, factor_ids def run_chasing_strategy_pipeline(df: pl.DataFrame) -> pl.DataFrame: stock_configs = [ {"class": LimitUpGene, "params": {}}, # 涨停基因 (形态) {"class": TrendBreakout, "params": {}}, # 趋势突破 (形态) {"class": ChipPenetration, "params": {}}, # 筹码穿透 (筹码) {"class": WinnerExpansion, "params": {}}, # 获利盘扩张 (筹码) {"class": AttackFlow, "params": {}}, # 攻击资金流 (资金) {"class": DivergenceAlert, "params": {}}, # 顶背离警示 (风控) # --- B. 保留:原有高价值因子 (用于辅助验证) --- # 资金流强度 {"class": FlowIntensityFactor, "params": {}}, {"class": LGFlowFactor, "params": {}}, {"class": InstNetAccum, "params": {}}, # 机构净累积 {"class": AccumAccel, "params": {}}, # 累积加速 # 筹码结构 {"class": CostSqueeze, "params": {}}, # 成本挤压 (用于低位启动判断) {"class": ChipLockin, "params": {}}, # 筹码锁定 {"class": HighCostSelling, "params": {}}, # 高位抛压 (用于风控) # --- C. 基础:技术指标 (用于过滤) --- {"class": SMAFactor, "params": {"window": 5}}, # 5日线防守 {"class": SMAFactor, "params": {"window": 20}}, # 趋势判断 {"class": VolatilityFactor, "params": {"period": 10}}, # 波动率(剔除织布机) {"class": ReturnFactor, "params": {"period": 5}}, # 5日涨幅 {"class": ReturnFactor, "params": {"period": 20}}, # 月涨幅 {"class": VolumeRatioFactor, "params": {}}, # 量比 # 1. 爆发力 (Gamma) {"class": PriceGammaFactor, "params": {}}, # 2. 纯粹度 (Efficiency) {"class": TrendEfficiencyFactor, "params": {"window": 10}}, # 3. 饥渴度 (Urgency) {"class": MoneyUrgencyFactor, "params": {}}, # # 4. 辅助:位置修正 (防止追在山顶) # # 使用相对位置,剔除已经翻倍的票 # {"class": LowPositionStart, "params": {}}, ] # 日期截面配置 (关键步骤) date_configs = [ # 对三个核心因子进行排序 {"class": CrossSectionalRankFactor, "params": {"column": "factor_price_gamma", "name": "rank_gamma"}}, {"class": CrossSectionalRankFactor, "params": {"column": "factor_trend_efficiency_10", "name": "rank_eff"}}, {"class": CrossSectionalRankFactor, "params": {"column": "factor_money_urgency", "name": "rank_urgency"}}, ] # ... 执行计算 ... result_df, calc_feature = calculate_all_factors(df, stock_configs, date_configs) # ========================================== # 核心差异点:如何利用因子选股? # ========================================== # 不使用简单的加权求和,而是使用“漏斗筛选”或“极值乘积” # 模拟“规则型策略”的严格性 # result_df = result_df.with_columns([ # ( # # 逻辑: # # 1. 加速度要在市场前 10% (rank_gamma > 0.9) # # 2. 走势要非常丝滑 (rank_eff > 0.8) # # 3. 资金要非常急迫 (rank_urgency > 0.8) # # 4. 乘法效应:强者恒强 # pl.col("rank_gamma_true_factor_price_gamma") * pl.col("rank_eff_true_factor_trend_efficiency_10") * pl.col("rank_urgency") # ).alias("aggressive_score") # ]) return result_df, calc_feature