2026-02-23 16:23:53 +08:00
|
|
|
"""ProStock ML Pipeline 组件库
|
2026-02-23 01:37:34 +08:00
|
|
|
|
2026-02-23 16:23:53 +08:00
|
|
|
提供组件化、低耦合、插件式的机器学习流水线组件。
|
|
|
|
|
包括处理器、模型、划分策略等可复用组件。
|
2026-02-23 01:37:34 +08:00
|
|
|
|
|
|
|
|
示例:
|
2026-02-23 16:23:53 +08:00
|
|
|
>>> from src.pipeline import (
|
2026-02-23 01:37:34 +08:00
|
|
|
... PluginRegistry, ProcessingPipeline,
|
|
|
|
|
... PipelineStage, BaseProcessor
|
|
|
|
|
... )
|
|
|
|
|
|
|
|
|
|
>>> # 获取注册的处理器
|
|
|
|
|
>>> scaler_class = PluginRegistry.get_processor("standard_scaler")
|
|
|
|
|
>>> scaler = scaler_class()
|
|
|
|
|
|
|
|
|
|
>>> # 创建处理流水线
|
|
|
|
|
>>> pipeline = ProcessingPipeline([
|
|
|
|
|
... PluginRegistry.get_processor("dropna")(),
|
|
|
|
|
... PluginRegistry.get_processor("winsorizer")(lower=0.01, upper=0.99),
|
|
|
|
|
... PluginRegistry.get_processor("standard_scaler")(),
|
|
|
|
|
... ])
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
# 导入核心抽象类和划分策略
|
2026-02-23 16:23:53 +08:00
|
|
|
from src.pipeline.core import (
|
2026-02-23 01:37:34 +08:00
|
|
|
PipelineStage,
|
|
|
|
|
TaskType,
|
|
|
|
|
BaseProcessor,
|
|
|
|
|
BaseModel,
|
|
|
|
|
BaseSplitter,
|
|
|
|
|
BaseMetric,
|
|
|
|
|
TimeSeriesSplit,
|
|
|
|
|
WalkForwardSplit,
|
|
|
|
|
ExpandingWindowSplit,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# 导入注册中心
|
2026-02-23 16:23:53 +08:00
|
|
|
from src.pipeline.registry import PluginRegistry
|
2026-02-23 01:37:34 +08:00
|
|
|
|
|
|
|
|
# 导入处理流水线
|
2026-02-23 16:23:53 +08:00
|
|
|
from src.pipeline.pipeline import ProcessingPipeline
|
2026-02-23 01:37:34 +08:00
|
|
|
|
|
|
|
|
# 导入并注册内置处理器
|
2026-02-23 16:23:53 +08:00
|
|
|
from src.pipeline.processors.processors import (
|
2026-02-23 01:37:34 +08:00
|
|
|
DropNAProcessor,
|
|
|
|
|
FillNAProcessor,
|
|
|
|
|
Winsorizer,
|
|
|
|
|
StandardScaler,
|
|
|
|
|
MinMaxScaler,
|
|
|
|
|
RankTransformer,
|
|
|
|
|
Neutralizer,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# 导入并注册内置模型
|
2026-02-23 16:23:53 +08:00
|
|
|
from src.pipeline.models.models import (
|
2026-02-23 01:37:34 +08:00
|
|
|
LightGBMModel,
|
|
|
|
|
CatBoostModel,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
__all__ = [
|
|
|
|
|
# 核心抽象
|
|
|
|
|
"PipelineStage",
|
|
|
|
|
"TaskType",
|
|
|
|
|
"BaseProcessor",
|
|
|
|
|
"BaseModel",
|
|
|
|
|
"BaseSplitter",
|
|
|
|
|
"BaseMetric",
|
|
|
|
|
# 划分策略
|
|
|
|
|
"TimeSeriesSplit",
|
|
|
|
|
"WalkForwardSplit",
|
|
|
|
|
"ExpandingWindowSplit",
|
|
|
|
|
# 注册中心
|
|
|
|
|
"PluginRegistry",
|
|
|
|
|
# 处理流水线
|
|
|
|
|
"ProcessingPipeline",
|
|
|
|
|
# 处理器
|
|
|
|
|
"DropNAProcessor",
|
|
|
|
|
"FillNAProcessor",
|
|
|
|
|
"Winsorizer",
|
|
|
|
|
"StandardScaler",
|
|
|
|
|
"MinMaxScaler",
|
|
|
|
|
"RankTransformer",
|
|
|
|
|
"Neutralizer",
|
|
|
|
|
# 模型
|
|
|
|
|
"LightGBMModel",
|
|
|
|
|
"CatBoostModel",
|
|
|
|
|
]
|