Files
NewStock/main/factor/dragon_factor.py
2026-01-04 22:43:13 +08:00

528 lines
21 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
追涨策略专用因子模块
包含:形态突破、筹码穿透、攻击资金流
"""
import numpy as np
import polars as pl
from main.factor.all_factors import calculate_all_factors
from main.factor.momentum_factors import ReturnFactor, VolatilityFactor
from main.factor.money_flow_factors import AccumAccel, ChipLockin, CostSqueeze, FlowIntensityFactor, HighCostSelling, InstNetAccum, LGFlowFactor
from main.factor.operator_framework import StockWiseFactor
from main.factor.special_factors import VolumeRatioFactor
from main.factor.technical_factors import CrossSectionalRankFactor, SMAFactor
# ==========================================
# 第一类:形态与趋势突破因子 (Price & Trend)
# ==========================================
class LimitUpGene(StockWiseFactor):
"""
涨停基因因子
逻辑:寻找'准涨停''强势封板'的特征。
追涨策略不仅看是否涨停,更看封板的坚决度和实体的饱满度。
"""
factor_id = "factor_limit_up_gene"
required_factor_ids = ["close", "open", "high", "low", "up_limit"]
def __init__(self):
super().__init__(
name=self.factor_id,
parameters={},
required_factor_ids=self.required_factor_ids
)
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
close = g["close"]
open_price = g["open"]
high = g["high"]
low = g["low"]
up_limit = g["up_limit"]
# 1. 封板接近度:收盘价距离涨停价有多近 (1.0 表示封死涨停)
# 注意:这里加 1e-6 是为了防止 up_limit 为 NaN (虽然理论上不该有)
limit_proximity = close / (up_limit + 1e-6)
# 2. 实体饱满度:(收盘-开盘) / (最高-最低)。越接近1说明光头光脚多头越强。
range_len = high - low
body_len = close - open_price
body_strength = body_len / (range_len + 1e-6)
# 3. 极值修正如果是涨停body_strength 可能会失效(一字板),给予最高分
# 逻辑:如果封板 (limit_proximity > 0.99) 且 是一字板 (range_len 极小),给强分
# 综合打分:封板接近度 * 实体强度
score = limit_proximity * body_strength
# 稳态化
return score.alias(self.factor_id)
class TrendBreakout(StockWiseFactor):
"""
历史新高突破因子
逻辑:股价越接近历史新高(his_high),上方的套牢盘越少,拉升阻力越小。
"""
factor_id = "factor_trend_breakout"
required_factor_ids = ["close", "his_high", "his_low"]
def __init__(self):
super().__init__(
name=self.factor_id,
parameters={},
required_factor_ids=self.required_factor_ids
)
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
close = g["close"]
his_high = g["his_high"]
# 距离历史新高的比率 (接近1表示即将突破或已突破)
# 注意:数据清洗时需确保 his_high 有效
dist_to_high = close / (his_high + 1e-6)
# 动量加速当前价格相对于5日前的涨幅
# 注意:这里假设 group_df 是按时间排序的单只股票数据
mom_5 = close / close.shift(5).fill_null(strategy="forward") - 1.0
# 核心逻辑:只有在接近新高时的动量才有效
breakout_score = dist_to_high * (1 + mom_5)
return breakout_score.log1p().alias(self.factor_id)
# ==========================================
# 第二类:筹码穿透与真空因子 (Chip Structure)
# ==========================================
class ChipPenetration(StockWiseFactor):
"""
筹码穿透率因子 (Blue Sky Factor)
逻辑收盘价强力穿透95%筹码成本线,意味着上方进入'真空区'
此时所有持筹者都获利,抛压最小。
"""
factor_id = "factor_chip_penetration"
required_factor_ids = ["close", "cost_50pct", "cost_95pct", "vol"]
def __init__(self):
super().__init__(
name=self.factor_id,
parameters={},
required_factor_ids=self.required_factor_ids
)
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
close = g["close"]
cost95 = g["cost_95pct"]
cost50 = g["cost_50pct"]
vol = g["vol"]
# 1. 穿透度当前价格相对于95%成本线的位置
# > 0 表示突破,数值越大突破越强
penetration = (close - cost95) / (cost95 + 1e-6)
# 2. 放量确认:突破必须伴随放量
vol_5d = vol.rolling_mean(window_size=5, min_periods=1)
vol_ratio = vol / (vol_5d + 1e-6)
# 逻辑:只有放量的突破才是真突破
# 使用 sigmoid 类似的逻辑平滑 volume 影响
valid_breakout = penetration * pl.when(vol_ratio > 1.0).then(vol_ratio.log1p()).otherwise(0.5)
return valid_breakout.alias(self.factor_id)
class WinnerExpansion(StockWiseFactor):
"""
获利盘扩张速率因子
逻辑:追涨最核心的动力来源是'赚钱效应'的快速扩散。
如果我们无法直接获得 winner_rate可以通过 cost 分布估算。
"""
factor_id = "factor_winner_expansion"
required_factor_ids = ["close", "cost_5pct", "cost_95pct"]
def __init__(self):
super().__init__(
name=self.factor_id,
parameters={},
required_factor_ids=self.required_factor_ids
)
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
close = g["close"]
cost5 = g["cost_5pct"]
cost95 = g["cost_95pct"]
# 简易估算获利盘比例 (0~1)
# 假设筹码在 cost5 到 cost95 之间均匀分布
chip_range = cost95 - cost5
winner_proxy = (close - cost5) / (chip_range + 1e-6)
# 截断到 0-1
winner_proxy = winner_proxy.clip(0.0, 1.0)
# 计算获利盘的变化率 (一阶差分)
expansion_rate = winner_proxy.diff()
# 逻辑:我们要找的是获利盘突然急剧增加的时刻 (爆拉脱离成本区)
return expansion_rate.fill_null(0.0).alias(self.factor_id)
# ==========================================
# 第三类:攻击型资金流因子 (Attack Flow)
# ==========================================
class AttackFlow(StockWiseFactor):
"""
主力攻击流因子
逻辑:区别于普通的净流入,我们只关注'上涨过程中的'主力买入。
如果股价下跌而主力流入,可能是左侧抄底(不适合追涨);
如果股价上涨且主力大幅流入,这是右侧点火(适合追涨)。
"""
factor_id = "factor_attack_flow"
required_factor_ids = ["close", "open", "buy_lg_vol", "buy_elg_vol", "sell_lg_vol", "sell_elg_vol", "circ_mv"]
def __init__(self):
super().__init__(
name=self.factor_id,
parameters={},
required_factor_ids=self.required_factor_ids
)
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
close = g["close"]
open_price = g["open"]
buy_elg = g["buy_elg_vol"]
buy_lg = g["buy_lg_vol"]
sell_elg = g["sell_elg_vol"]
sell_lg = g["sell_lg_vol"]
circ_mv = g["circ_mv"]
# 1. 计算大单净额
main_net = (buy_elg + buy_lg) - (sell_elg + sell_lg)
# 2. 归一化 (换手率视角)
circ_shares = (circ_mv * 10000) / (close + 1e-6) # 假设 circ_mv 单位万元
net_rate = main_net / (circ_shares + 1e-6)
# 3. 价格强度权重
# 如果是阳线 (Close > Open),权重为正且放大;阴线权重为 0 或 负
price_strength = (close - open_price) / (open_price + 1e-6)
# 核心逻辑:资金流 * 价格涨幅
# 只有当 资金大幅净流入 AND 价格大涨 时,该因子才会有极高值
attack_score = net_rate * price_strength
# 只保留正向攻击 (负向代表出货或洗盘,暂不计入追涨分)
attack_score = pl.when(attack_score > 0).then(attack_score).otherwise(0.0)
return attack_score.log1p().alias(self.factor_id)
class DivergenceAlert(StockWiseFactor):
"""
量价/资金背离因子 (负面因子)
逻辑:价格在涨,但主力在跑。这是追涨的大忌。
用于过滤掉诱多陷阱。
"""
factor_id = "factor_divergence_alert"
required_factor_ids = ["close", "open", "buy_lg_vol", "buy_elg_vol", "sell_lg_vol", "sell_elg_vol", "vol"]
def __init__(self):
super().__init__(
name=self.factor_id,
parameters={},
required_factor_ids=self.required_factor_ids
)
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
close = g["close"]
open_price = g["open"]
vol = g["vol"]
# 1. 价格涨跌幅
pct_change = (close - open_price) / open_price
# 2. 主力资金净占比
main_net = (g["buy_elg_vol"] + g["buy_lg_vol"]) - (g["sell_elg_vol"] + g["sell_lg_vol"])
flow_ratio = main_net / (vol + 1e-6)
# 3. 背离识别
# 情况A (诱多):价格大涨 (pct > 2%) 但 主力净流出 (flow < -0.1)
trap_signal = (pct_change > 0.02) & (flow_ratio < -0.1)
# 转换为因子值:背离越严重,值越负
# 正常情况给0背离情况给负分
factor = pl.when(trap_signal).then(flow_ratio * 10).otherwise(0.0)
return factor.alias(self.factor_id)
class PriceGammaFactor(StockWiseFactor):
"""
价格加速度因子 (Gamma)
逻辑:识别加速上涨的股票。
比如前天涨1%昨天涨3%今天涨8% -> 加速度极高 -> 适合追涨。
"""
factor_id = "factor_price_gamma"
required_factor_ids = ["close"]
def __init__(self):
super().__init__(name=self.factor_id, parameters={}, required_factor_ids=self.required_factor_ids)
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
close = g["close"]
# 1. 计算每日收益率
ret = close.pct_change()
# 2. 计算收益率的变化率 (即加速度)
# 追涨策略看重短期的爆发比如最近3天
# 使用线性回归斜率或者简单的差分来近似
# 简单版:(Ret_T - Ret_T-2)
accel = ret - ret.shift(2)
# 进阶版:只关注正向加速 (负向加速不重要,那是下跌或回调)
# 如果收益率是负的,直接给低分
score = pl.when(ret > 0).then(accel).otherwise(-1.0)
return score.alias(self.factor_id)
class TrendEfficiencyFactor(StockWiseFactor):
"""
趋势效率因子 (ER - Efficiency Ratio)
逻辑:位移 / 路程。
数值越接近 1.0,说明走势越像一根直线(单边拉升),追涨胜率越高。
"""
factor_id = "factor_trend_efficiency"
required_factor_ids = ["close"]
def __init__(self, window=10):
super().__init__(name=self.factor_id, parameters={"window": window}, required_factor_ids=self.required_factor_ids)
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
close = g["close"]
window = self.parameters["window"]
# 1. 总位移 (Change): |Price_T - Price_T-n|
change = (close - close.shift(window)).abs()
# 2. 总路程 (Path): sum(|Price_t - Price_t-1|)
# 也就是每一天波动的绝对值之和
path = (close - close.shift(1)).abs().rolling_sum(window)
# 3. 效率 = 位移 / 路程
efficiency = change / (path + 1e-6)
return efficiency.alias(self.factor_id)
class MoneyUrgencyFactor(StockWiseFactor):
"""
资金饥渴度因子
逻辑:量比 * 大单主动买入占比
"""
factor_id = "factor_money_urgency"
required_factor_ids = ["vol", "buy_lg_vol", "buy_elg_vol", "circ_mv"]
def __init__(self):
super().__init__(name=self.factor_id, parameters={}, required_factor_ids=self.required_factor_ids)
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
vol = g["vol"]
big_buy = g["buy_lg_vol"] + g["buy_elg_vol"]
# 1. 量比 (Volume Ratio): 今日量 / 过去5日均量
vol_ma5 = vol.rolling_mean(5).shift(1) # 注意避开未来函数分母用T-1及之前
vol_ratio = vol / (vol_ma5 + 1e-6)
# 2. 攻击性买入占比 (Aggressive Buy Ratio)
attack_ratio = big_buy / (vol + 1e-6)
# 3. 共振:只有放量且主力大买,才是追涨信号
# 如果缩量大买(可能是控盘),如果放量大卖(出货)
# 这里用乘法放大共振效应
urgency = vol_ratio * attack_ratio
return urgency.alias(self.factor_id)
from typing import List, Dict, Any
import polars as pl
# 假设你之前的因子定义都在这些模块中
# from main.factor.chasing_factors import (
# LimitUpGene, TrendBreakout, # 形态类
# ChipPenetration, WinnerExpansion, # 筹码类
# AttackFlow, DivergenceAlert # 资金类
# )
# from main.factor.fund_flow import LGFlowFactor, CostSqueeze, ... # 原有因子
# from main.factor.common import CrossSectionalRankFactor, SMAFactor ...
# def run_chasing_strategy_pipeline(df: pl.DataFrame) -> pl.DataFrame:
# """
# 执行【追涨/打板策略】的因子计算流程
# 该函数会组合:
# 1. 基础动量与波动率因子 (Base)
# 2. 资金流与筹码原有因子 (Legacy)
# 3. 新增的追涨专用因子 (New Chasing Factors)
# 4. 截面Rank因子 (用于选股排序)
# Returns:
# df_result: 包含所有因子列的 DataFrame
# """
# # =======================================================
# # 1. 配置股票截面因子 (Stock Wise)
# # =======================================================
# stock_configs = [
# # --- A. 新增:追涨核心因子 (Priority High) ---
# {"class": LimitUpGene, "params": {}}, # 涨停基因 (形态)
# {"class": TrendBreakout, "params": {}}, # 趋势突破 (形态)
# {"class": ChipPenetration, "params": {}}, # 筹码穿透 (筹码)
# {"class": WinnerExpansion, "params": {}}, # 获利盘扩张 (筹码)
# {"class": AttackFlow, "params": {}}, # 攻击资金流 (资金)
# {"class": DivergenceAlert, "params": {}}, # 顶背离警示 (风控)
# # --- B. 保留:原有高价值因子 (用于辅助验证) ---
# # 资金流强度
# {"class": FlowIntensityFactor, "params": {}},
# {"class": LGFlowFactor, "params": {}},
# {"class": InstNetAccum, "params": {}}, # 机构净累积
# {"class": AccumAccel, "params": {}}, # 累积加速
# # 筹码结构
# {"class": CostSqueeze, "params": {}}, # 成本挤压 (用于低位启动判断)
# {"class": ChipLockin, "params": {}}, # 筹码锁定
# {"class": HighCostSelling, "params": {}}, # 高位抛压 (用于风控)
# # --- C. 基础:技术指标 (用于过滤) ---
# {"class": SMAFactor, "params": {"window": 5}}, # 5日线防守
# {"class": SMAFactor, "params": {"window": 20}}, # 趋势判断
# {"class": VolatilityFactor, "params": {"period": 10}}, # 波动率(剔除织布机)
# {"class": ReturnFactor, "params": {"period": 5}}, # 5日涨幅
# {"class": ReturnFactor, "params": {"period": 20}}, # 月涨幅
# {"class": VolumeRatioFactor, "params": {}}, # 量比
# ]
# # =======================================================
# # 2. 配置日期截面因子 (Date Wise / Cross Sectional)
# # =======================================================
# # 追涨策略的核心在于:买入全市场最强的票。
# # 因此,我们需要对核心因子进行截面排序 (Rank)。
# date_configs = [
# # --- 基础排序 ---
# {"class": CrossSectionalRankFactor, "params": {"column": "circ_mv", "name": "rank_mv"}}, # 市值排序(剔除微盘)
# {"class": CrossSectionalRankFactor, "params": {"column": "return_5", "name": "rank_ret5"}}, # 短期强度排序
# # --- 策略核心排序 (重要!) ---
# # 1. 攻击力排序:全市场谁的主力攻击性最强?
# {
# "class": CrossSectionalRankFactor,
# "params": {"column": "factor_attack_flow", "name": "rank_attack_flow"}
# },
# # 2. 突破度排序:全市场谁的上方真空度最高?
# {
# "class": CrossSectionalRankFactor,
# "params": {"column": "factor_chip_penetration", "name": "rank_chip_penetration"}
# },
# # 3. 涨停基因排序:全市场谁的板最硬?
# {
# "class": CrossSectionalRankFactor,
# "params": {"column": "factor_limit_up_gene", "name": "rank_limit_gene"}
# },
# # 4. 资金流强度排序
# {
# "class": CrossSectionalRankFactor,
# "params": {"column": "flow_intensity", "name": "rank_flow_intensity"}
# }
# ]
# # =======================================================
# # 3. 调用统一计算接口
# # =======================================================
# print(f"开始计算追涨策略因子... 包含 {len(stock_configs)} 个股票因子配置")
# # 调用你提供的 calculate_all_factors
# # 注意:这里会覆盖函数内部的默认 list只计算我们指定的
# result_df, factor_ids = calculate_all_factors(
# df=df,
# stock_factor_configs=stock_configs,
# date_factor_configs=date_configs
# )
# print(f"计算完成。生成因子列: {factor_ids}")
# return result_df, factor_ids
def run_chasing_strategy_pipeline(df: pl.DataFrame) -> pl.DataFrame:
stock_configs = [
{"class": LimitUpGene, "params": {}}, # 涨停基因 (形态)
{"class": TrendBreakout, "params": {}}, # 趋势突破 (形态)
{"class": ChipPenetration, "params": {}}, # 筹码穿透 (筹码)
{"class": WinnerExpansion, "params": {}}, # 获利盘扩张 (筹码)
{"class": AttackFlow, "params": {}}, # 攻击资金流 (资金)
{"class": DivergenceAlert, "params": {}}, # 顶背离警示 (风控)
# --- B. 保留:原有高价值因子 (用于辅助验证) ---
# 资金流强度
{"class": FlowIntensityFactor, "params": {}},
{"class": LGFlowFactor, "params": {}},
{"class": InstNetAccum, "params": {}}, # 机构净累积
{"class": AccumAccel, "params": {}}, # 累积加速
# 筹码结构
{"class": CostSqueeze, "params": {}}, # 成本挤压 (用于低位启动判断)
{"class": ChipLockin, "params": {}}, # 筹码锁定
{"class": HighCostSelling, "params": {}}, # 高位抛压 (用于风控)
# --- C. 基础:技术指标 (用于过滤) ---
{"class": SMAFactor, "params": {"window": 5}}, # 5日线防守
{"class": SMAFactor, "params": {"window": 20}}, # 趋势判断
{"class": VolatilityFactor, "params": {"period": 10}}, # 波动率(剔除织布机)
{"class": ReturnFactor, "params": {"period": 5}}, # 5日涨幅
{"class": ReturnFactor, "params": {"period": 20}}, # 月涨幅
{"class": VolumeRatioFactor, "params": {}}, # 量比
# 1. 爆发力 (Gamma)
{"class": PriceGammaFactor, "params": {}},
# 2. 纯粹度 (Efficiency)
{"class": TrendEfficiencyFactor, "params": {"window": 10}},
# 3. 饥渴度 (Urgency)
{"class": MoneyUrgencyFactor, "params": {}},
# # 4. 辅助:位置修正 (防止追在山顶)
# # 使用相对位置,剔除已经翻倍的票
# {"class": LowPositionStart, "params": {}},
]
# 日期截面配置 (关键步骤)
date_configs = [
# 对三个核心因子进行排序
{"class": CrossSectionalRankFactor, "params": {"column": "factor_price_gamma", "name": "rank_gamma"}},
{"class": CrossSectionalRankFactor, "params": {"column": "factor_trend_efficiency_10", "name": "rank_eff"}},
{"class": CrossSectionalRankFactor, "params": {"column": "factor_money_urgency", "name": "rank_urgency"}},
]
# ... 执行计算 ...
result_df, calc_feature = calculate_all_factors(df, stock_configs, date_configs)
# ==========================================
# 核心差异点:如何利用因子选股?
# ==========================================
# 不使用简单的加权求和,而是使用“漏斗筛选”或“极值乘积”
# 模拟“规则型策略”的严格性
# result_df = result_df.with_columns([
# (
# # 逻辑:
# # 1. 加速度要在市场前 10% (rank_gamma > 0.9)
# # 2. 走势要非常丝滑 (rank_eff > 0.8)
# # 3. 资金要非常急迫 (rank_urgency > 0.8)
# # 4. 乘法效应:强者恒强
# pl.col("rank_gamma_true_factor_price_gamma") * pl.col("rank_eff_true_factor_trend_efficiency_10") * pl.col("rank_urgency")
# ).alias("aggressive_score")
# ])
return result_df, calc_feature