feat(training): 支持 Label 预处理器

- DataPipeline 新增 label_processor_configs 参数
- 分离特征与 label 的预处理流程
- regression.py 添加 label 缩尾处理配置
- 调整学习率并更新排除因子列表
This commit is contained in:
2026-03-29 02:37:53 +08:00
parent c3d1b157e9
commit 9e0114c745
2 changed files with 140 additions and 86 deletions

View File

@@ -51,61 +51,55 @@ TRAINING_TYPE = "regression"
# 排除的因子列表
EXCLUDED_FACTORS = [
"GTJA_alpha062",
"GTJA_alpha060",
"GTJA_alpha058",
"GTJA_alpha056",
"GTJA_alpha053",
"GTJA_alpha040",
"GTJA_alpha043",
"GTJA_alpha027",
"CP",
"max_ret_20",
"debt_to_equity",
"close_vwap_deviation",
"EP",
"BP",
"EP_rank",
"GTJA_alpha044",
"GTJA_alpha036",
"GTJA_alpha010",
"GTJA_alpha005",
"GTJA_alpha001",
"GTJA_alpha002",
"GTJA_alpha007",
"GTJA_alpha016",
"GTJA_alpha073",
"GTJA_alpha133",
"GTJA_alpha131",
"GTJA_alpha117",
"GTJA_alpha124",
"GTJA_alpha120",
"GTJA_alpha119",
"GTJA_alpha103",
"GTJA_alpha099",
"GTJA_alpha105",
"GTJA_alpha104",
"GTJA_alpha090",
"GTJA_alpha085",
"GTJA_alpha083",
"GTJA_alpha084",
"GTJA_alpha087",
"GTJA_alpha092",
"GTJA_alpha074",
"GTJA_alpha089",
"GTJA_alpha173",
"GTJA_alpha157",
"GTJA_alpha139",
"GTJA_alpha162",
"GTJA_alpha163",
"GTJA_alpha177",
"price_to_avg_cost",
"cost_skewness",
"GTJA_alpha191",
"GTJA_alpha180",
"history_position",
"bottom_profit",
"smart_money_accumulation",
'GTJA_alpha036',
'GTJA_alpha032',
'GTJA_alpha010',
'GTJA_alpha005',
'CP',
'BP',
'debt_to_equity',
'current_ratio',
'GTJA_alpha002',
'GTJA_alpha027',
'GTJA_alpha064',
'GTJA_alpha062',
'GTJA_alpha043',
'GTJA_alpha044',
'GTJA_alpha120',
'GTJA_alpha117',
'GTJA_alpha103',
'GTJA_alpha104',
'GTJA_alpha105',
'GTJA_alpha073',
'GTJA_alpha077',
'GTJA_alpha085',
'GTJA_alpha090',
'GTJA_alpha087',
'GTJA_alpha083',
'GTJA_alpha092',
'GTJA_alpha133',
'GTJA_alpha131',
'GTJA_alpha126',
'GTJA_alpha124',
'GTJA_alpha162',
'GTJA_alpha164',
'GTJA_alpha157',
'GTJA_alpha177',
'price_to_avg_cost',
'cost_skewness',
'GTJA_alpha191',
'GTJA_alpha180',
'history_position',
'bottom_profit',
'mean_median_dev',
'smart_money_accumulation',
'GTJA_alpha013',
'GTJA_alpha099',
'GTJA_alpha107',
'GTJA_alpha119',
'GTJA_alpha141',
'GTJA_alpha130',
'GTJA_alpha173',
]
# 模型参数配置
@@ -118,7 +112,7 @@ MODEL_PARAMS = {
"num_leaves": 31, # 【修改】限制为 312的5次方-1确保树是不对称生长的防止过拟合
"min_data_in_leaf": 512, # 【大幅增加】从256加到1000。训练集有97万条极大地限制叶子节点样本量能有效抵抗股市噪音
# ==================== 学习参数 ====================
"learning_rate": 0.02, # 【修改】稍微调大一点,帮助模型跳出初始的局部最优(避免十几轮就早停)
"learning_rate": 0.01, # 【修改】稍微调大一点,帮助模型跳出初始的局部最优(避免十几轮就早停)
"n_estimators": 2000,
# ==================== 随机采样与降维 ====================
"subsample": 0.85,
@@ -182,6 +176,11 @@ def main():
(StandardScaler, {}),
# (CrossSectionalStandardScaler, {}),
],
label_processor_configs=[
# 对 label 进行缩尾处理(去除极端收益率)
(Winsorizer, {"lower": 0.05, "upper": 0.95}),
# (StandardScaler, {}),
],
filters=[STFilter(data_router=engine.router)],
stock_pool_filter_func=stock_pool_filter,
stock_pool_required_columns=STOCK_FILTER_REQUIRED_COLUMNS,