test(debug): 添加因子回测一致性问题的调试测试套件

- 分析GTJA_alpha032等因子在不同LOOKBACK_DAYS下的差异来源
- 验证cs_rank嵌套和截面股票数量对结果的影响
- 测试ts_rank NaN处理和除法除零修复
This commit is contained in:
2026-03-22 02:43:23 +08:00
parent ccd42082c2
commit 31b25074c3
10 changed files with 1532 additions and 87 deletions

View File

@@ -5,7 +5,7 @@
"""
from datetime import datetime
from typing import List, Optional
from typing import Dict, List, Optional
import polars as pl
@@ -182,14 +182,14 @@ SELECTED_FACTORS = [
"GTJA_alpha110",
"GTJA_alpha111",
"GTJA_alpha112",
"GTJA_alpha113",
# "GTJA_alpha113",
"GTJA_alpha114",
"GTJA_alpha115",
"GTJA_alpha117",
"GTJA_alpha118",
"GTJA_alpha119",
"GTJA_alpha120",
"GTJA_alpha121",
# "GTJA_alpha121",
"GTJA_alpha122",
"GTJA_alpha123",
"GTJA_alpha124",
@@ -205,13 +205,13 @@ SELECTED_FACTORS = [
"GTJA_alpha134",
"GTJA_alpha135",
"GTJA_alpha136",
"GTJA_alpha138",
# "GTJA_alpha138",
"GTJA_alpha139",
"GTJA_alpha140",
# "GTJA_alpha140",
"GTJA_alpha141",
"GTJA_alpha142",
"GTJA_alpha145",
"GTJA_alpha146",
# "GTJA_alpha146",
"GTJA_alpha148",
"GTJA_alpha150",
"GTJA_alpha151",
@@ -253,50 +253,50 @@ SELECTED_FACTORS = [
]
# 因子定义字典完整因子库用于存放尚未注册到metadata的因子
FACTOR_DEFINITIONS = {}
FACTOR_DEFINITIONS = {"cs_rank_circ_mv": "cs_rank(circ_mv)"}
# 需要排除的因子列表(这些因子不会被计算和使用)
# 用于临时屏蔽效果不好的因子,无需从 SELECTED_FACTORS 中删除
EXCLUDED_FACTORS: List[str] = [
# "GTJA_alpha005",
# "GTJA_alpha028",
# "GTJA_alpha023",
# "GTJA_alpha002",
# "GTJA_alpha010",
# "GTJA_alpha011",
# "GTJA_alpha044",
# "GTJA_alpha036",
# "GTJA_alpha027",
# "GTJA_alpha109",
# "GTJA_alpha104",
# "GTJA_alpha103",
# "GTJA_alpha085",
# "GTJA_alpha111",
# "GTJA_alpha092",
# "GTJA_alpha067",
# "GTJA_alpha060",
# "GTJA_alpha062",
# "GTJA_alpha063",
# "GTJA_alpha079",
# "GTJA_alpha073",
# "GTJA_alpha087",
# "GTJA_alpha117",
# "GTJA_alpha113",
# "GTJA_alpha138",
# "GTJA_alpha121",
# "GTJA_alpha124",
# "GTJA_alpha133",
# "GTJA_alpha131",
# "GTJA_alpha118",
# "GTJA_alpha164",
# "GTJA_alpha162",
# "GTJA_alpha157",
# "GTJA_alpha171",
# "GTJA_alpha177",
# "GTJA_alpha180",
# "GTJA_alpha188",
# "GTJA_alpha191",
]
# EXCLUDED_FACTORS: List[str] = [
# # "GTJA_alpha005",
# # "GTJA_alpha028",
# # "GTJA_alpha023",
# # "GTJA_alpha002",
# # "GTJA_alpha010",
# # "GTJA_alpha011",
# # "GTJA_alpha044",
# # "GTJA_alpha036",
# # "GTJA_alpha027",
# # "GTJA_alpha109",
# # "GTJA_alpha104",
# # "GTJA_alpha103",
# # "GTJA_alpha085",
# # "GTJA_alpha111",
# # "GTJA_alpha092",
# # "GTJA_alpha067",
# # "GTJA_alpha060",
# # "GTJA_alpha062",
# # "GTJA_alpha063",
# # "GTJA_alpha079",
# # "GTJA_alpha073",
# # "GTJA_alpha087",
# # "GTJA_alpha117",
# # "GTJA_alpha113",
# # "GTJA_alpha138",
# # "GTJA_alpha121",
# # "GTJA_alpha124",
# # "GTJA_alpha133",
# # "GTJA_alpha131",
# # "GTJA_alpha118",
# # "GTJA_alpha164",
# # "GTJA_alpha162",
# # "GTJA_alpha157",
# # "GTJA_alpha171",
# # "GTJA_alpha177",
# # "GTJA_alpha180",
# # "GTJA_alpha188",
# # "GTJA_alpha191",
# ]
def get_label_factor(label_name: str) -> dict:
@@ -471,17 +471,18 @@ def stock_pool_filter(df: pl.DataFrame) -> pl.Series:
& ~df["ts_code"].str.starts_with("4") # 排除北交所
)
# 在已筛选的股票中选取市值最小的500只
# 在已筛选的股票中,选取流通市值最小的500只
valid_df = df.filter(code_filter)
n = min(500, len(valid_df))
small_cap_codes = valid_df.sort("total_mv").head(n)["ts_code"]
n = min(1000, len(valid_df))
small_cap_codes = valid_df.sort("circ_mv").head(n)["ts_code"]
# 返回布尔 Series是否在被选中的股票中
return df["ts_code"].is_in(small_cap_codes)
# 定义筛选所需的基础列
STOCK_FILTER_REQUIRED_COLUMNS = ["total_mv"]
STOCK_FILTER_REQUIRED_COLUMNS = ["circ_mv"]
# =============================================================================
# 输出配置
@@ -490,7 +491,7 @@ OUTPUT_DIR = "output"
SAVE_PREDICTIONS = True
# 模型保存配置
SAVE_MODEL = True # 是否保存模型
SAVE_MODEL = False # 是否保存模型
MODEL_SAVE_DIR = "models" # 模型保存目录
# Top N 配置:每日推荐股票数量