- 新增 check_data_quality 函数用于检测全空/全零/全NaN数据质量问题 - 重构 register_factors 函数,消除 FEATURE_COLS 和 PROCESSORS 冗余定义 - 修复实验脚本中特征列表不一致的问题,确保处理器覆盖所有特征 - 优化 LambdaRank 模型参数配置
78 lines
1.8 KiB
Python
78 lines
1.8 KiB
Python
"""训练模块 - ProStock 量化投资框架
|
||
|
||
提供模型训练、数据处理和预测的完整流程。
|
||
"""
|
||
|
||
# 基础抽象类
|
||
from src.training.components.base import BaseModel, BaseProcessor
|
||
|
||
# 注册中心
|
||
from src.training.registry import (
|
||
ModelRegistry,
|
||
ProcessorRegistry,
|
||
register_model,
|
||
register_processor,
|
||
)
|
||
|
||
# 数据划分器
|
||
from src.training.components.splitters import DateSplitter
|
||
|
||
# 股票池选择器配置(已迁移到 StockPoolManager,保留文件占位)
|
||
# from src.training.components.selectors import ...
|
||
|
||
# 数据处理器
|
||
from src.training.components.processors import (
|
||
CrossSectionalStandardScaler,
|
||
NullFiller,
|
||
StandardScaler,
|
||
Winsorizer,
|
||
)
|
||
|
||
# 模型
|
||
from src.training.components.models import LightGBMModel
|
||
|
||
# 数据过滤器
|
||
from src.training.components.filters import BaseFilter, STFilter
|
||
|
||
# 训练核心
|
||
from src.training.core import StockPoolManager, Trainer
|
||
|
||
# 工具函数
|
||
from src.training.utils import check_data_quality
|
||
|
||
# 配置
|
||
from src.training.config import TrainingConfig
|
||
|
||
__all__ = [
|
||
# 基础抽象类
|
||
"BaseModel",
|
||
"BaseProcessor",
|
||
# 注册中心
|
||
"ModelRegistry",
|
||
"ProcessorRegistry",
|
||
"register_model",
|
||
"register_processor",
|
||
# 数据划分器
|
||
"DateSplitter",
|
||
# 股票池选择器配置(已迁移,保留注释占位)
|
||
# "StockFilterConfig", # 已删除,使用 StockPoolManager + filter_func 替代
|
||
# "MarketCapSelectorConfig", # 已删除,使用 StockPoolManager + required_factors 替代
|
||
# 数据处理器
|
||
"NullFiller",
|
||
"StandardScaler",
|
||
"CrossSectionalStandardScaler",
|
||
"Winsorizer",
|
||
# 数据过滤器
|
||
"BaseFilter",
|
||
"STFilter",
|
||
# 模型
|
||
"LightGBMModel",
|
||
# 训练核心
|
||
"StockPoolManager",
|
||
"Trainer",
|
||
# 工具函数
|
||
"check_data_quality",
|
||
# 配置
|
||
"TrainingConfig",
|
||
]
|