Files
ProStock/src/pipeline/__init__.py
liaozhaorun 593ec99466 refactor: 存储层迁移DuckDB + 模块重构
- 存储层重构: HDF5 → DuckDB(UPSERT模式、线程安全存储)
- Sync类迁移: DataSync从sync.py迁移到api_daily.py(职责分离)
- 模型模块重构: src/models → src/pipeline(更清晰的命名)
- 新增因子模块: factors/momentum (MA、收益率排名)、factors/financial
- 新增API接口: api_namechange、api_bak_basic
- 新增训练入口: training模块(main.py、pipeline配置)
- 工具函数统一: get_today_date等移至utils.py
- 文档更新: AGENTS.md添加架构变更历史
2026-02-23 16:23:53 +08:00

88 lines
2.0 KiB
Python

"""ProStock ML Pipeline 组件库
提供组件化、低耦合、插件式的机器学习流水线组件。
包括处理器、模型、划分策略等可复用组件。
示例:
>>> from src.pipeline import (
... PluginRegistry, ProcessingPipeline,
... PipelineStage, BaseProcessor
... )
>>> # 获取注册的处理器
>>> scaler_class = PluginRegistry.get_processor("standard_scaler")
>>> scaler = scaler_class()
>>> # 创建处理流水线
>>> pipeline = ProcessingPipeline([
... PluginRegistry.get_processor("dropna")(),
... PluginRegistry.get_processor("winsorizer")(lower=0.01, upper=0.99),
... PluginRegistry.get_processor("standard_scaler")(),
... ])
"""
# 导入核心抽象类和划分策略
from src.pipeline.core import (
PipelineStage,
TaskType,
BaseProcessor,
BaseModel,
BaseSplitter,
BaseMetric,
TimeSeriesSplit,
WalkForwardSplit,
ExpandingWindowSplit,
)
# 导入注册中心
from src.pipeline.registry import PluginRegistry
# 导入处理流水线
from src.pipeline.pipeline import ProcessingPipeline
# 导入并注册内置处理器
from src.pipeline.processors.processors import (
DropNAProcessor,
FillNAProcessor,
Winsorizer,
StandardScaler,
MinMaxScaler,
RankTransformer,
Neutralizer,
)
# 导入并注册内置模型
from src.pipeline.models.models import (
LightGBMModel,
CatBoostModel,
)
__all__ = [
# 核心抽象
"PipelineStage",
"TaskType",
"BaseProcessor",
"BaseModel",
"BaseSplitter",
"BaseMetric",
# 划分策略
"TimeSeriesSplit",
"WalkForwardSplit",
"ExpandingWindowSplit",
# 注册中心
"PluginRegistry",
# 处理流水线
"ProcessingPipeline",
# 处理器
"DropNAProcessor",
"FillNAProcessor",
"Winsorizer",
"StandardScaler",
"MinMaxScaler",
"RankTransformer",
"Neutralizer",
# 模型
"LightGBMModel",
"CatBoostModel",
]