diff --git a/src/pipeline/__init__.py b/src/pipeline/__init__.py deleted file mode 100644 index 9187292..0000000 --- a/src/pipeline/__init__.py +++ /dev/null @@ -1,87 +0,0 @@ -"""ProStock ML Pipeline 组件库 - -提供组件化、低耦合、插件式的机器学习流水线组件。 -包括处理器、模型、划分策略等可复用组件。 - -示例: - >>> from src.pipeline import ( - ... PluginRegistry, ProcessingPipeline, - ... PipelineStage, BaseProcessor - ... ) - - >>> # 获取注册的处理器 - >>> scaler_class = PluginRegistry.get_processor("standard_scaler") - >>> scaler = scaler_class() - - >>> # 创建处理流水线 - >>> pipeline = ProcessingPipeline([ - ... PluginRegistry.get_processor("dropna")(), - ... PluginRegistry.get_processor("winsorizer")(lower=0.01, upper=0.99), - ... PluginRegistry.get_processor("standard_scaler")(), - ... ]) -""" - -# 导入核心抽象类和划分策略 -from src.pipeline.core import ( - PipelineStage, - TaskType, - BaseProcessor, - BaseModel, - BaseSplitter, - BaseMetric, - TimeSeriesSplit, - WalkForwardSplit, - ExpandingWindowSplit, -) - -# 导入注册中心 -from src.pipeline.registry import PluginRegistry - -# 导入处理流水线 -from src.pipeline.pipeline import ProcessingPipeline - -# 导入并注册内置处理器 -from src.pipeline.processors.processors import ( - DropNAProcessor, - FillNAProcessor, - Winsorizer, - StandardScaler, - MinMaxScaler, - RankTransformer, - Neutralizer, -) - -# 导入并注册内置模型 -from src.pipeline.models.models import ( - LightGBMModel, - CatBoostModel, -) - -__all__ = [ - # 核心抽象 - "PipelineStage", - "TaskType", - "BaseProcessor", - "BaseModel", - "BaseSplitter", - "BaseMetric", - # 划分策略 - "TimeSeriesSplit", - "WalkForwardSplit", - "ExpandingWindowSplit", - # 注册中心 - "PluginRegistry", - # 处理流水线 - "ProcessingPipeline", - # 处理器 - "DropNAProcessor", - "FillNAProcessor", - "Winsorizer", - "StandardScaler", - "MinMaxScaler", - "RankTransformer", - "Neutralizer", - # 模型 - "LightGBMModel", - "CatBoostModel", -] diff --git a/src/pipeline/core/__init__.py b/src/pipeline/core/__init__.py deleted file mode 100644 index 70f6cf7..0000000 --- a/src/pipeline/core/__init__.py +++ /dev/null @@ -1,30 +0,0 @@ -"""核心模块导出""" - -from src.pipeline.core.base import ( - PipelineStage, - TaskType, - BaseProcessor, - BaseModel, - BaseSplitter, - BaseMetric, -) - -from src.pipeline.core.splitter import ( - TimeSeriesSplit, - WalkForwardSplit, - ExpandingWindowSplit, -) - -__all__ = [ - # 基础抽象 - "PipelineStage", - "TaskType", - "BaseProcessor", - "BaseModel", - "BaseSplitter", - "BaseMetric", - # 划分策略 - "TimeSeriesSplit", - "WalkForwardSplit", - "ExpandingWindowSplit", -] diff --git a/src/pipeline/core/base.py b/src/pipeline/core/base.py deleted file mode 100644 index b083add..0000000 --- a/src/pipeline/core/base.py +++ /dev/null @@ -1,351 +0,0 @@ -"""模型训练框架核心抽象类 - -提供处理器、模型、划分策略和评估指标的基类定义。 -""" - -from abc import ABC, abstractmethod -from enum import Enum, auto -from typing import Any, Dict, Iterator, List, Optional, Tuple, Literal -import polars as pl -import numpy as np - -# 任务类型 -TaskType = Literal["classification", "regression", "ranking"] - - -class PipelineStage(Enum): - """流水线阶段标记 - - 用于标记处理器在哪些阶段生效,防止数据泄露。 - - Attributes: - ALL: 适用于所有阶段(训练、测试、验证) - TRAIN: 仅训练阶段 - TEST: 仅测试阶段 - VALIDATION: 仅验证阶段 - """ - - ALL = auto() - TRAIN = auto() - TEST = auto() - VALIDATION = auto() - - -class BaseProcessor(ABC): - """数据处理器基类 - - 所有数据处理器必须继承此类。关键特性是通过 stage 属性控制处理器在哪些阶段生效。 - - 阶段标记规则: - - ALL: 训练和测试阶段都使用相同的参数 - - TRAIN: 只在训练阶段计算参数(如分位数、均值等),测试阶段使用训练阶段学到的参数 - - TEST: 只在测试阶段执行 - """ - - # 子类必须定义适用阶段 - stage: PipelineStage = PipelineStage.ALL - - def __init__(self, columns: Optional[List[str]] = None, **params): - """初始化处理器 - - Args: - columns: 要处理的列,None表示所有数值列 - **params: 处理器特定参数 - """ - self.columns = columns - self.params = params - self._is_fitted = False - self._fitted_params: Dict[str, Any] = {} - - @abstractmethod - def fit(self, data: pl.DataFrame) -> "BaseProcessor": - """在训练数据上学习参数 - - 此方法只在训练阶段调用一次。学习到的参数存储在 self._fitted_params 中。 - - Args: - data: 训练数据 - - Returns: - self (支持链式调用) - """ - pass - - @abstractmethod - def transform(self, data: pl.DataFrame) -> pl.DataFrame: - """转换数据 - - 在训练和测试阶段都会被调用。使用 fit() 阶段学习到的参数进行转换。 - - Args: - data: 输入数据 - - Returns: - 转换后的数据 - """ - pass - - def fit_transform(self, data: pl.DataFrame) -> pl.DataFrame: - """先fit再transform的便捷方法 - - Args: - data: 训练数据 - - Returns: - 转换后的数据 - """ - return self.fit(data).transform(data) - - def get_fitted_params(self) -> Dict[str, Any]: - """获取学习到的参数(用于保存/加载) - - Returns: - 学习到的参数字典 - """ - return self._fitted_params.copy() - - def set_fitted_params(self, params: Dict[str, Any]) -> "BaseProcessor": - """设置学习到的参数(用于从checkpoint恢复) - - Args: - params: 参数字典 - - Returns: - self (支持链式调用) - """ - self._fitted_params = params.copy() - self._is_fitted = True - return self - - -class BaseModel(ABC): - """机器学习模型基类 - - 统一接口支持多种模型(LightGBM, CatBoost, XGBoost等) - 和多种任务类型(分类、回归、排序)。 - """ - - def __init__( - self, - task_type: TaskType, - params: Optional[Dict[str, Any]] = None, - name: Optional[str] = None, - ): - """初始化模型 - - Args: - task_type: 任务类型 - "classification", "regression", "ranking" - params: 模型特定参数 - name: 模型名称(用于日志和报告) - """ - self.task_type = task_type - self.params = params or {} - self.name = name or self.__class__.__name__ - self._model: Any = None - self._is_fitted = False - - @abstractmethod - def fit( - self, - X: pl.DataFrame, - y: pl.Series, - X_val: Optional[pl.DataFrame] = None, - y_val: Optional[pl.Series] = None, - **fit_params, - ) -> "BaseModel": - """训练模型 - - Args: - X: 特征数据 - y: 目标变量 - X_val: 验证集特征(可选) - y_val: 验证集目标(可选) - **fit_params: 额外的fit参数 - - Returns: - self (支持链式调用) - """ - pass - - @abstractmethod - def predict(self, X: pl.DataFrame) -> np.ndarray: - """预测 - - Args: - X: 特征数据 - - Returns: - 预测结果数组 - - classification: 类别标签或概率 - - regression: 连续值 - - ranking: 排序分数 - """ - pass - - def predict_proba(self, X: pl.DataFrame) -> np.ndarray: - """预测概率(仅分类任务) - - Args: - X: 特征数据 - - Returns: - 类别概率数组 [n_samples, n_classes] - - Raises: - NotImplementedError: 非分类任务时抛出 - """ - raise NotImplementedError( - "predict_proba only available for classification tasks" - ) - - def get_feature_importance(self) -> Optional[pl.DataFrame]: - """获取特征重要性(如果模型支持) - - Returns: - DataFrame[feature, importance] 或 None - """ - return None - - def save(self, path: str) -> None: - """保存模型到文件 - - Args: - path: 保存路径 - """ - import pickle - - with open(path, "wb") as f: - pickle.dump(self, f) - - @classmethod - def load(cls, path: str) -> "BaseModel": - """从文件加载模型 - - Args: - path: 模型文件路径 - - Returns: - 加载的模型实例 - """ - import pickle - - with open(path, "rb") as f: - return pickle.load(f) - - -class BaseSplitter(ABC): - """数据划分策略基类 - - 针对时间序列数据的特殊划分策略,防止未来泄露。 - """ - - @abstractmethod - def split( - self, data: pl.DataFrame, date_col: str = "trade_date" - ) -> Iterator[Tuple[List[int], List[int]]]: - """生成训练/测试索引 - - Args: - data: 完整数据集 - date_col: 日期列名 - - Yields: - (train_indices, test_indices) 元组 - """ - pass - - @abstractmethod - def get_split_dates( - self, data: pl.DataFrame, date_col: str = "trade_date" - ) -> List[Tuple[str, str, str, str]]: - """获取划分日期范围 - - Args: - data: 完整数据集 - date_col: 日期列名 - - Returns: - [(train_start, train_end, test_start, test_end), ...] - """ - pass - - -class BaseMetric(ABC): - """评估指标基类 - - 所有评估指标必须继承此类。支持单次计算和累积计算两种模式。 - """ - - def __init__(self, name: Optional[str] = None): - """初始化指标 - - Args: - name: 指标名称 - """ - self.name = name or self.__class__.__name__ - self._values: List[float] = [] - - @abstractmethod - def compute(self, y_true: np.ndarray, y_pred: np.ndarray) -> float: - """计算指标值 - - Args: - y_true: 真实值 - y_pred: 预测值 - - Returns: - 指标值 - """ - pass - - def update(self, y_true: np.ndarray, y_pred: np.ndarray) -> "BaseMetric": - """更新累积值 - - Args: - y_true: 真实值 - y_pred: 预测值 - - Returns: - self (支持链式调用) - """ - self._values.append(self.compute(y_true, y_pred)) - return self - - def get_mean(self) -> float: - """获取累积值的均值 - - Returns: - 均值 - """ - if not self._values: - return 0.0 - return float(np.mean(self._values)) - - def get_std(self) -> float: - """获取累积值的标准差 - - Returns: - 标准差 - """ - if not self._values: - return 0.0 - return float(np.std(self._values)) - - def reset(self) -> "BaseMetric": - """重置累积值 - - Returns: - self (支持链式调用) - """ - self._values = [] - return self - - -__all__ = [ - "PipelineStage", - "TaskType", - "BaseProcessor", - "BaseModel", - "BaseSplitter", - "BaseMetric", -] diff --git a/src/pipeline/core/splitter.py b/src/pipeline/core/splitter.py deleted file mode 100644 index 8d63f4a..0000000 --- a/src/pipeline/core/splitter.py +++ /dev/null @@ -1,222 +0,0 @@ -"""时间序列数据划分策略 - -提供针对金融时间序列的特殊划分策略,防止未来泄露。 -""" - -from typing import Iterator, List, Tuple -import polars as pl - -from src.pipeline.core.base import BaseSplitter - - -class TimeSeriesSplit(BaseSplitter): - """时间序列划分 - 确保训练数据在测试数据之前 - - 按照时间顺序进行K折划分,每折的训练数据都在测试数据之前。 - 通过 gap 参数防止训练集和测试集之间的数据泄露。 - - Args: - n_splits: 划分折数 - gap: 训练集和测试集之间的间隔天数(防止泄露) - min_train_size: 最小训练集大小(天数) - """ - - def __init__(self, n_splits: int = 5, gap: int = 5, min_train_size: int = 252): - self.n_splits = n_splits - self.gap = gap - self.min_train_size = min_train_size - - def split( - self, data: pl.DataFrame, date_col: str = "trade_date" - ) -> Iterator[Tuple[List[int], List[int]]]: - """生成训练/测试索引""" - dates = data[date_col].unique().sort() - n_dates = len(dates) - - test_size = (n_dates - self.min_train_size) // self.n_splits - - for i in range(self.n_splits): - train_end_idx = self.min_train_size + i * test_size - test_start_idx = train_end_idx + self.gap - test_end_idx = test_start_idx + test_size - - if test_end_idx > n_dates: - break - - train_dates = dates[:train_end_idx] - test_dates = dates[test_start_idx:test_end_idx] - - train_mask = data[date_col].is_in(train_dates.to_list()) - test_mask = data[date_col].is_in(test_dates.to_list()) - - train_idx = data.with_row_index().filter(train_mask)["index"].to_list() - test_idx = data.with_row_index().filter(test_mask)["index"].to_list() - - yield train_idx, test_idx - - def get_split_dates( - self, data: pl.DataFrame, date_col: str = "trade_date" - ) -> List[Tuple[str, str, str, str]]: - """获取划分日期范围""" - dates = data[date_col].unique().sort() - n_dates = len(dates) - test_size = (n_dates - self.min_train_size) // self.n_splits - - result = [] - for i in range(self.n_splits): - train_end_idx = self.min_train_size + i * test_size - test_start_idx = train_end_idx + self.gap - test_end_idx = test_start_idx + test_size - - if test_end_idx > n_dates: - break - - result.append( - ( - str(dates[0]), - str(dates[train_end_idx - 1]), - str(dates[test_start_idx]), - str(dates[test_end_idx - 1]), - ) - ) - return result - - -class WalkForwardSplit(BaseSplitter): - """滚动前向验证 - 训练集逐步扩展 - - Args: - train_window: 训练集窗口大小(天数) - test_window: 测试集窗口大小(天数) - gap: 训练集和测试集之间的间隔天数 - """ - - def __init__(self, train_window: int = 504, test_window: int = 21, gap: int = 5): - self.train_window = train_window - self.test_window = test_window - self.gap = gap - - def split( - self, data: pl.DataFrame, date_col: str = "trade_date" - ) -> Iterator[Tuple[List[int], List[int]]]: - """生成训练/测试索引""" - dates = data[date_col].unique().sort() - n_dates = len(dates) - - start_idx = self.train_window - while start_idx + self.gap + self.test_window <= n_dates: - train_start = start_idx - self.train_window - train_end = start_idx - test_start = start_idx + self.gap - test_end = test_start + self.test_window - - train_dates = dates[train_start:train_end] - test_dates = dates[test_start:test_end] - - train_mask = data[date_col].is_in(train_dates.to_list()) - test_mask = data[date_col].is_in(test_dates.to_list()) - - train_idx = data.with_row_index().filter(train_mask)["index"].to_list() - test_idx = data.with_row_index().filter(test_mask)["index"].to_list() - - yield train_idx, test_idx - start_idx += self.test_window - - def get_split_dates( - self, data: pl.DataFrame, date_col: str = "trade_date" - ) -> List[Tuple[str, str, str, str]]: - """获取划分日期范围""" - dates = data[date_col].unique().sort() - n_dates = len(dates) - - result = [] - start_idx = self.train_window - while start_idx + self.gap + self.test_window <= n_dates: - train_start = start_idx - self.train_window - train_end = start_idx - test_start = start_idx + self.gap - test_end = test_start + self.test_window - - result.append( - ( - str(dates[train_start]), - str(dates[train_end - 1]), - str(dates[test_start]), - str(dates[test_end - 1]), - ) - ) - start_idx += self.test_window - - return result - - -class ExpandingWindowSplit(BaseSplitter): - """扩展窗口划分 - 训练集不断扩大 - - Args: - initial_train_size: 初始训练集大小(天数) - test_window: 测试集窗口大小(天数) - gap: 训练集和测试集之间的间隔天数 - """ - - def __init__( - self, initial_train_size: int = 252, test_window: int = 21, gap: int = 5 - ): - self.initial_train_size = initial_train_size - self.test_window = test_window - self.gap = gap - - def split( - self, data: pl.DataFrame, date_col: str = "trade_date" - ) -> Iterator[Tuple[List[int], List[int]]]: - """生成训练/测试索引""" - dates = data[date_col].unique().sort() - n_dates = len(dates) - - train_end_idx = self.initial_train_size - while train_end_idx + self.gap + self.test_window <= n_dates: - train_dates = dates[:train_end_idx] - test_start = train_end_idx + self.gap - test_end = test_start + self.test_window - test_dates = dates[test_start:test_end] - - train_mask = data[date_col].is_in(train_dates.to_list()) - test_mask = data[date_col].is_in(test_dates.to_list()) - - train_idx = data.with_row_index().filter(train_mask)["index"].to_list() - test_idx = data.with_row_index().filter(test_mask)["index"].to_list() - - yield train_idx, test_idx - train_end_idx += self.test_window - - def get_split_dates( - self, data: pl.DataFrame, date_col: str = "trade_date" - ) -> List[Tuple[str, str, str, str]]: - """获取划分日期范围""" - dates = data[date_col].unique().sort() - n_dates = len(dates) - - result = [] - train_end_idx = self.initial_train_size - while train_end_idx + self.gap + self.test_window <= n_dates: - test_start = train_end_idx + self.gap - test_end = test_start + self.test_window - - result.append( - ( - str(dates[0]), - str(dates[train_end_idx - 1]), - str(dates[test_start]), - str(dates[test_end - 1]), - ) - ) - train_end_idx += self.test_window - - return result - - -__all__ = [ - "TimeSeriesSplit", - "WalkForwardSplit", - "ExpandingWindowSplit", -] diff --git a/src/pipeline/models/__init__.py b/src/pipeline/models/__init__.py deleted file mode 100644 index cc2cdfc..0000000 --- a/src/pipeline/models/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -"""模型模块""" - -from src.pipeline.models.models import ( - LightGBMModel, - CatBoostModel, -) - -__all__ = [ - "LightGBMModel", - "CatBoostModel", -] diff --git a/src/pipeline/models/models.py b/src/pipeline/models/models.py deleted file mode 100644 index 40278b1..0000000 --- a/src/pipeline/models/models.py +++ /dev/null @@ -1,210 +0,0 @@ -"""内置机器学习模型 - -提供 LightGBM、CatBoost 等模型的统一接口包装器。 -""" - -from typing import Optional, Dict, Any -import polars as pl -import numpy as np - -from src.pipeline.core import BaseModel, TaskType -from src.pipeline.registry import PluginRegistry - - -@PluginRegistry.register_model("lightgbm") -class LightGBMModel(BaseModel): - """LightGBM 模型包装器 - - 支持分类、回归、排序三种任务类型。 - """ - - def __init__( - self, - task_type: TaskType, - params: Optional[Dict[str, Any]] = None, - name: Optional[str] = None, - ): - super().__init__(task_type, params, name) - self._model = None - - def fit( - self, - X: pl.DataFrame, - y: pl.Series, - X_val: Optional[pl.DataFrame] = None, - y_val: Optional[pl.Series] = None, - **fit_params, - ) -> "LightGBMModel": - """训练模型""" - try: - import lightgbm as lgb - except ImportError: - raise ImportError( - "lightgbm is required. Install with: uv pip install lightgbm" - ) - - X_arr = X.to_numpy() - y_arr = y.to_numpy() - - train_data = lgb.Dataset(X_arr, label=y_arr) - valid_sets = [train_data] - valid_names = ["train"] - - if X_val is not None and y_val is not None: - valid_data = lgb.Dataset(X_val.to_numpy(), label=y_val.to_numpy()) - valid_sets.append(valid_data) - valid_names.append("valid") - - default_params = { - "objective": self._get_objective(), - "metric": self._get_metric(), - "boosting_type": "gbdt", - "num_leaves": 31, - "learning_rate": 0.05, - "feature_fraction": 0.9, - "bagging_fraction": 0.8, - "bagging_freq": 5, - "verbose": -1, - } - default_params.update(self.params) - - callbacks = [] - if len(valid_sets) > 1: - callbacks.append(lgb.early_stopping(stopping_rounds=10, verbose=False)) - - self._model = lgb.train( - default_params, - train_data, - num_boost_round=fit_params.get("num_boost_round", 100), - valid_sets=valid_sets, - valid_names=valid_names, - callbacks=callbacks, - ) - self._is_fitted = True - return self - - def predict(self, X: pl.DataFrame) -> np.ndarray: - """预测""" - if not self._is_fitted: - raise RuntimeError("Model not fitted yet") - return self._model.predict(X.to_numpy()) - - def predict_proba(self, X: pl.DataFrame) -> np.ndarray: - """预测概率(仅分类任务)""" - if self.task_type != "classification": - raise ValueError("predict_proba only for classification") - probs = self.predict(X) - if len(probs.shape) == 1: - return np.vstack([1 - probs, probs]).T - return probs - - def get_feature_importance(self) -> Optional[pl.DataFrame]: - """获取特征重要性""" - if self._model is None: - return None - importance = self._model.feature_importance(importance_type="gain") - feature_names = getattr( - self._model, - "feature_name", - lambda: [f"feature_{i}" for i in range(len(importance))], - )() - return pl.DataFrame({"feature": feature_names, "importance": importance}).sort( - "importance", descending=True - ) - - def _get_objective(self) -> str: - objectives = { - "classification": "binary", - "regression": "regression", - "ranking": "lambdarank", - } - return objectives.get(self.task_type, "regression") - - def _get_metric(self) -> str: - metrics = {"classification": "auc", "regression": "rmse", "ranking": "ndcg"} - return metrics.get(self.task_type, "rmse") - - -@PluginRegistry.register_model("catboost") -class CatBoostModel(BaseModel): - """CatBoost 模型包装器""" - - def __init__( - self, - task_type: TaskType, - params: Optional[Dict[str, Any]] = None, - name: Optional[str] = None, - ): - super().__init__(task_type, params, name) - self._model = None - - def fit( - self, - X: pl.DataFrame, - y: pl.Series, - X_val: Optional[pl.DataFrame] = None, - y_val: Optional[pl.Series] = None, - **fit_params, - ) -> "CatBoostModel": - """训练模型""" - try: - from catboost import CatBoostClassifier, CatBoostRegressor - except ImportError: - raise ImportError( - "catboost is required. Install with: uv pip install catboost" - ) - - if self.task_type == "classification": - model_class = CatBoostClassifier - default_params = {"loss_function": "Logloss", "eval_metric": "AUC"} - elif self.task_type == "regression": - model_class = CatBoostRegressor - default_params = {"loss_function": "RMSE"} - else: - model_class = CatBoostRegressor - default_params = {"loss_function": "QueryRMSE"} - - default_params.update(self.params) - default_params["verbose"] = False - - self._model = model_class(**default_params) - - eval_set = None - if X_val is not None and y_val is not None: - eval_set = (X_val.to_pandas(), y_val.to_pandas()) - - self._model.fit( - X.to_pandas(), - y.to_pandas(), - eval_set=eval_set, - early_stopping_rounds=fit_params.get("early_stopping_rounds", 10), - verbose=False, - ) - self._is_fitted = True - return self - - def predict(self, X: pl.DataFrame) -> np.ndarray: - """预测""" - if not self._is_fitted: - raise RuntimeError("Model not fitted yet") - return self._model.predict(X.to_pandas()) - - def predict_proba(self, X: pl.DataFrame) -> np.ndarray: - """预测概率""" - if self.task_type != "classification": - raise ValueError("predict_proba only for classification") - return self._model.predict_proba(X.to_pandas()) - - def get_feature_importance(self) -> Optional[pl.DataFrame]: - """获取特征重要性""" - if self._model is None: - return None - return pl.DataFrame( - { - "feature": self._model.feature_names_, - "importance": self._model.feature_importances_, - } - ).sort("importance", descending=True) - - -__all__ = ["LightGBMModel", "CatBoostModel"] diff --git a/src/pipeline/pipeline.py b/src/pipeline/pipeline.py deleted file mode 100644 index 9a7407c..0000000 --- a/src/pipeline/pipeline.py +++ /dev/null @@ -1,70 +0,0 @@ -"""数据处理流水线 - -管理多个处理器的顺序执行,支持阶段感知处理。 -""" - -from typing import List, Dict -import polars as pl - -from src.pipeline.core import BaseProcessor, PipelineStage - - -class ProcessingPipeline: - """数据处理流水线 - - 按顺序执行多个处理器,自动处理阶段标记。 - 关键特性:在测试阶段使用训练阶段学习到的参数,防止数据泄露。 - """ - - def __init__(self, processors: List[BaseProcessor]): - """初始化流水线 - - Args: - processors: 处理器列表(按执行顺序) - """ - self.processors = processors - self._fitted_processors: Dict[int, BaseProcessor] = {} - - def fit_transform( - self, data: pl.DataFrame, stage: PipelineStage = PipelineStage.TRAIN - ) -> pl.DataFrame: - """在训练数据上fit所有处理器并transform""" - result = data - for i, processor in enumerate(self.processors): - if processor.stage in [PipelineStage.ALL, stage]: - result = processor.fit_transform(result) - self._fitted_processors[i] = processor - elif stage == PipelineStage.TRAIN and processor.stage == PipelineStage.TEST: - processor.fit(result) - self._fitted_processors[i] = processor - return result - - def transform( - self, data: pl.DataFrame, stage: PipelineStage = PipelineStage.TEST - ) -> pl.DataFrame: - """在测试数据上应用已fit的处理器""" - result = data - for i, processor in enumerate(self.processors): - if processor.stage in [PipelineStage.ALL, stage]: - if i in self._fitted_processors: - result = self._fitted_processors[i].transform(result) - else: - result = processor.transform(result) - return result - - def save_processors(self, path: str) -> None: - """保存所有已fit的处理器状态""" - import pickle - - with open(path, "wb") as f: - pickle.dump(self._fitted_processors, f) - - def load_processors(self, path: str) -> None: - """加载处理器状态""" - import pickle - - with open(path, "rb") as f: - self._fitted_processors = pickle.load(f) - - -__all__ = ["ProcessingPipeline"] diff --git a/src/pipeline/processors/__init__.py b/src/pipeline/processors/__init__.py deleted file mode 100644 index be33a77..0000000 --- a/src/pipeline/processors/__init__.py +++ /dev/null @@ -1,23 +0,0 @@ -"""处理器模块""" - -from src.pipeline.processors.processors import ( - DropNAProcessor, - FillNAProcessor, - Winsorizer, - StandardScaler, - MinMaxScaler, - RankTransformer, - Neutralizer, - MADClipper, -) - -__all__ = [ - "DropNAProcessor", - "FillNAProcessor", - "Winsorizer", - "StandardScaler", - "MinMaxScaler", - "RankTransformer", - "Neutralizer", - "MADClipper", -] diff --git a/src/pipeline/processors/processors.py b/src/pipeline/processors/processors.py deleted file mode 100644 index 72bb958..0000000 --- a/src/pipeline/processors/processors.py +++ /dev/null @@ -1,296 +0,0 @@ -"""内置数据处理器 - -提供常用的数据预处理和转换处理器。 -""" - -from typing import List, Optional -import polars as pl - -from src.pipeline.core import BaseProcessor, PipelineStage -from src.pipeline.registry import PluginRegistry - -# 数值类型列表 -FLOAT_TYPES = [pl.Float32, pl.Float64, pl.Int8, pl.Int16, pl.Int32, pl.Int64] - - -def _get_numeric_columns( - data: pl.DataFrame, columns: Optional[List[str]] = None -) -> List[str]: - """获取数值列""" - if columns is not None: - return columns - return [c for c in data.columns if data[c].dtype in FLOAT_TYPES] - - -@PluginRegistry.register_processor("dropna") -class DropNAProcessor(BaseProcessor): - """缺失值删除处理器""" - - stage = PipelineStage.ALL - - def fit(self, data: pl.DataFrame) -> "DropNAProcessor": - self._is_fitted = True - return self - - def transform(self, data: pl.DataFrame) -> pl.DataFrame: - cols = self.columns or data.columns - return data.drop_nulls(subset=cols) - - -@PluginRegistry.register_processor("fillna") -class FillNAProcessor(BaseProcessor): - """缺失值填充处理器(只在训练阶段计算填充值)""" - - stage = PipelineStage.TRAIN - - def __init__(self, columns: Optional[List[str]] = None, method: str = "median"): - super().__init__(columns) - if method not in ["median", "mean", "zero"]: - raise ValueError(f"Unknown fill method: {method}") - self.method = method - - def fit(self, data: pl.DataFrame) -> "FillNAProcessor": - cols = _get_numeric_columns(data, self.columns) - fill_values = {} - - for col in cols: - if self.method == "median": - fill_values[col] = data[col].median() - elif self.method == "mean": - fill_values[col] = data[col].mean() - elif self.method == "zero": - fill_values[col] = 0.0 - - self._fitted_params = {"fill_values": fill_values, "columns": cols} - self._is_fitted = True - return self - - def transform(self, data: pl.DataFrame) -> pl.DataFrame: - result = data - for col, val in self._fitted_params.get("fill_values", {}).items(): - if col in result.columns: - result = result.with_columns(pl.col(col).fill_null(val).alias(col)) - return result - - -@PluginRegistry.register_processor("winsorizer") -class Winsorizer(BaseProcessor): - """缩尾处理器 - 防止极端值影响(只在训练阶段计算分位数)""" - - stage = PipelineStage.TRAIN - - def __init__( - self, - columns: Optional[List[str]] = None, - lower: float = 0.01, - upper: float = 0.99, - ): - super().__init__(columns) - self.lower = lower - self.upper = upper - - def fit(self, data: pl.DataFrame) -> "Winsorizer": - cols = _get_numeric_columns(data, self.columns) - bounds = {} - - for col in cols: - bounds[col] = { - "lower": data[col].quantile(self.lower), - "upper": data[col].quantile(self.upper), - } - - self._fitted_params = {"bounds": bounds, "columns": cols} - self._is_fitted = True - return self - - def transform(self, data: pl.DataFrame) -> pl.DataFrame: - result = data - for col, bounds in self._fitted_params.get("bounds", {}).items(): - if col in result.columns: - result = result.with_columns( - pl.col(col).clip(bounds["lower"], bounds["upper"]).alias(col) - ) - return result - - -@PluginRegistry.register_processor("standard_scaler") -class StandardScaler(BaseProcessor): - """标准化处理器 - Z-score标准化""" - - stage = PipelineStage.ALL - - def fit(self, data: pl.DataFrame) -> "StandardScaler": - cols = _get_numeric_columns(data, self.columns) - stats = {} - - for col in cols: - stats[col] = {"mean": data[col].mean(), "std": data[col].std()} - - self._fitted_params = {"stats": stats, "columns": cols} - self._is_fitted = True - return self - - def transform(self, data: pl.DataFrame) -> pl.DataFrame: - result = data - for col, stats in self._fitted_params.get("stats", {}).items(): - if col in result.columns and stats["std"] is not None and stats["std"] > 0: - result = result.with_columns( - ((pl.col(col) - stats["mean"]) / stats["std"]).alias(col) - ) - return result - - -@PluginRegistry.register_processor("minmax_scaler") -class MinMaxScaler(BaseProcessor): - """归一化处理器 - 缩放到[0, 1]范围""" - - stage = PipelineStage.ALL - - def fit(self, data: pl.DataFrame) -> "MinMaxScaler": - cols = _get_numeric_columns(data, self.columns) - stats = {} - - for col in cols: - stats[col] = {"min": data[col].min(), "max": data[col].max()} - - self._fitted_params = {"stats": stats, "columns": cols} - self._is_fitted = True - return self - - def transform(self, data: pl.DataFrame) -> pl.DataFrame: - result = data - for col, stats in self._fitted_params.get("stats", {}).items(): - if col in result.columns: - range_val = stats["max"] - stats["min"] - if range_val is not None and range_val > 0: - result = result.with_columns( - ((pl.col(col) - stats["min"]) / range_val).alias(col) - ) - return result - - -@PluginRegistry.register_processor("rank_transformer") -class RankTransformer(BaseProcessor): - """排名转换处理器 - 转换为截面排名""" - - stage = PipelineStage.ALL - - def fit(self, data: pl.DataFrame) -> "RankTransformer": - self._is_fitted = True - return self - - def transform(self, data: pl.DataFrame) -> pl.DataFrame: - result = data - cols = self.columns or _get_numeric_columns(data) - - for col in cols: - if col in result.columns: - result = result.with_columns( - pl.col(col).rank().over("trade_date").alias(col) - ) - return result - - -@PluginRegistry.register_processor("neutralizer") -class Neutralizer(BaseProcessor): - """中性化处理器 - 行业/市值中性化""" - - stage = PipelineStage.ALL - - def __init__( - self, - columns: Optional[List[str]] = None, - group_col: str = "industry", - exclude_cols: Optional[List[str]] = None, - ): - super().__init__(columns) - self.group_col = group_col - self.exclude_cols = exclude_cols or [] - - def fit(self, data: pl.DataFrame) -> "Neutralizer": - self._is_fitted = True - return self - - def transform(self, data: pl.DataFrame) -> pl.DataFrame: - result = data - cols = self.columns or _get_numeric_columns(data) - - for col in cols: - if col in result.columns and col not in self.exclude_cols: - result = result.with_columns( - ( - pl.col(col) - - pl.col(col).mean().over(["trade_date", self.group_col]) - ).alias(col) - ) - return result - - -@PluginRegistry.register_processor("mad_clipper") -class MADClipper(BaseProcessor): - """MAD去极值处理器 - 基于每日截面的中位数绝对偏差去除极值 - - 使用3倍MAD作为阈值,比标准差方法更稳健,对异常值不敏感。 - 阈值范围: [median - n*MAD, median + n*MAD] - """ - - stage = PipelineStage.TRAIN - - def __init__( - self, - columns: Optional[List[str]] = None, - n_mad: float = 3.0, - ): - super().__init__(columns) - self.n_mad = n_mad - - def fit(self, data: pl.DataFrame) -> "MADClipper": - cols = _get_numeric_columns(data, self.columns) - bounds = {} - - for col in cols: - # 按日期分组计算每个截面的 median 和 MAD - daily_stats = data.group_by("trade_date").agg( - pl.col(col).median().alias("median"), - (pl.col(col) - pl.col(col).median()).abs().median().alias("mad"), - ) - bounds[col] = daily_stats - - self._fitted_params = {"bounds": bounds, "columns": cols} - self._is_fitted = True - return self - - def transform(self, data: pl.DataFrame) -> pl.DataFrame: - """使用窗口函数进行MAD去极值,避免join操作提升性能""" - result = data - bounds = self._fitted_params.get("bounds", {}) - - for col in bounds.keys(): - if col not in result.columns: - continue - - # 使用窗口函数直接计算每个截面的median和MAD,避免join - # 1. 计算每个日期截面的median - median = pl.col(col).median().over("trade_date") - # 2. 计算每个日期截面的MAD - mad = (pl.col(col) - median).abs().median().over("trade_date") - - # 3. 计算上下界并clip - lower = median - self.n_mad * mad - upper = median + self.n_mad * mad - - result = result.with_columns(pl.col(col).clip(lower, upper).alias(col)) - - return result - - -__all__ = [ - "DropNAProcessor", - "FillNAProcessor", - "Winsorizer", - "StandardScaler", - "MinMaxScaler", - "RankTransformer", - "Neutralizer", - "MADClipper", -] diff --git a/src/pipeline/registry.py b/src/pipeline/registry.py deleted file mode 100644 index 9ff8ecb..0000000 --- a/src/pipeline/registry.py +++ /dev/null @@ -1,297 +0,0 @@ -"""插件注册中心 - -提供装饰器方式注册处理器、模型、划分策略等组件。 -实现真正的插件式架构 - 新功能只需注册即可使用。 - -示例: - >>> @PluginRegistry.register_processor("standard_scaler") - ... class StandardScaler(BaseProcessor): - ... pass - - >>> # 使用 - >>> scaler = PluginRegistry.get_processor("standard_scaler")() -""" - -from typing import Type, Dict, List, TypeVar, Optional -from functools import wraps -from weakref import WeakValueDictionary -import contextlib - -from src.pipeline.core import BaseProcessor, BaseModel, BaseSplitter, BaseMetric - -T = TypeVar("T") - - -class PluginRegistry: - """插件注册中心 - - 管理所有组件的注册和获取。使用装饰器方式注册新组件。 - - Attributes: - _processors: 已注册的处理器字典 - _models: 已注册的模型字典 - _splitters: 已注册的划分策略字典 - _metrics: 已注册的评估指标字典 - """ - - _processors: Dict[str, Type[BaseProcessor]] = {} - _models: Dict[str, Type[BaseModel]] = {} - _splitters: Dict[str, Type[BaseSplitter]] = {} - _metrics: Dict[str, Type[BaseMetric]] = {} - - @classmethod - @contextlib.contextmanager - def temp_registry(cls): - """临时注册上下文管理器 - - 在上下文管理器内部注册的组件会在退出时自动清理, - 避免测试之间的状态污染。 - - 示例: - >>> with PluginRegistry.temp_registry(): - ... @PluginRegistry.register_processor("temp_processor") - ... class TempProcessor(BaseProcessor): - ... pass - ... # 在此处可以使用 temp_processor - ... # 退出后自动清理 - """ - original_state = { - "_processors": cls._processors.copy(), - "_models": cls._models.copy(), - "_splitters": cls._splitters.copy(), - "_metrics": cls._metrics.copy(), - } - try: - yield cls - finally: - cls._processors = original_state["_processors"] - cls._models = original_state["_models"] - cls._splitters = original_state["_splitters"] - cls._metrics = original_state["_metrics"] - - @classmethod - def register_processor(cls, name: Optional[str] = None): - """注册处理器装饰器 - - 用于装饰器方式注册数据处理器。 - - 示例: - >>> @PluginRegistry.register_processor("standard_scaler") - ... class StandardScaler(BaseProcessor): - ... pass - - >>> # 获取并使用 - >>> scaler_class = PluginRegistry.get_processor("standard_scaler") - >>> scaler = scaler_class() - - Args: - name: 注册名称,默认为类名 - - Returns: - 装饰器函数 - """ - - def decorator(processor_class: Type[BaseProcessor]) -> Type[BaseProcessor]: - key = name or processor_class.__name__ - cls._processors[key] = processor_class - processor_class._registry_name = key - return processor_class - - return decorator - - @classmethod - def register_model(cls, name: Optional[str] = None): - """注册模型装饰器 - - 用于装饰器方式注册机器学习模型。 - - 示例: - >>> @PluginRegistry.register_model("lightgbm") - ... class LightGBMModel(BaseModel): - ... pass - - Args: - name: 注册名称,默认为类名 - - Returns: - 装饰器函数 - """ - - def decorator(model_class: Type[BaseModel]) -> Type[BaseModel]: - key = name or model_class.__name__ - cls._models[key] = model_class - model_class._registry_name = key - return model_class - - return decorator - - @classmethod - def register_splitter(cls, name: Optional[str] = None): - """注册划分策略装饰器 - - 用于装饰器方式注册数据划分策略。 - - 示例: - >>> @PluginRegistry.register_splitter("time_series") - ... class TimeSeriesSplit(BaseSplitter): - ... pass - - Args: - name: 注册名称,默认为类名 - - Returns: - 装饰器函数 - """ - - def decorator(splitter_class: Type[BaseSplitter]) -> Type[BaseSplitter]: - key = name or splitter_class.__name__ - cls._splitters[key] = splitter_class - splitter_class._registry_name = key - return splitter_class - - return decorator - - @classmethod - def register_metric(cls, name: Optional[str] = None): - """注册评估指标装饰器 - - 用于装饰器方式注册评估指标。 - - 示例: - >>> @PluginRegistry.register_metric("ic") - ... class ICMetric(BaseMetric): - ... pass - - Args: - name: 注册名称,默认为类名 - - Returns: - 装饰器函数 - """ - - def decorator(metric_class: Type[BaseMetric]) -> Type[BaseMetric]: - key = name or metric_class.__name__ - cls._metrics[key] = metric_class - metric_class._registry_name = key - return metric_class - - return decorator - - @classmethod - def get_processor(cls, name: str) -> Type[BaseProcessor]: - """获取处理器类 - - Args: - name: 处理器注册名称 - - Returns: - 处理器类 - - Raises: - KeyError: 处理器不存在时抛出 - """ - if name not in cls._processors: - available = list(cls._processors.keys()) - raise KeyError(f"Processor '{name}' not found. Available: {available}") - return cls._processors[name] - - @classmethod - def get_model(cls, name: str) -> Type[BaseModel]: - """获取模型类 - - Args: - name: 模型注册名称 - - Returns: - 模型类 - - Raises: - KeyError: 模型不存在时抛出 - """ - if name not in cls._models: - available = list(cls._models.keys()) - raise KeyError(f"Model '{name}' not found. Available: {available}") - return cls._models[name] - - @classmethod - def get_splitter(cls, name: str) -> Type[BaseSplitter]: - """获取划分策略类 - - Args: - name: 划分策略注册名称 - - Returns: - 划分策略类 - - Raises: - KeyError: 划分策略不存在时抛出 - """ - if name not in cls._splitters: - available = list(cls._splitters.keys()) - raise KeyError(f"Splitter '{name}' not found. Available: {available}") - return cls._splitters[name] - - @classmethod - def get_metric(cls, name: str) -> Type[BaseMetric]: - """获取评估指标类 - - Args: - name: 评估指标注册名称 - - Returns: - 评估指标类 - - Raises: - KeyError: 评估指标不存在时抛出 - """ - if name not in cls._metrics: - available = list(cls._metrics.keys()) - raise KeyError(f"Metric '{name}' not found. Available: {available}") - return cls._metrics[name] - - @classmethod - def list_processors(cls) -> List[str]: - """列出所有可用处理器 - - Returns: - 处理器名称列表 - """ - return list(cls._processors.keys()) - - @classmethod - def list_models(cls) -> List[str]: - """列出所有可用模型 - - Returns: - 模型名称列表 - """ - return list(cls._models.keys()) - - @classmethod - def list_splitters(cls) -> List[str]: - """列出所有可用划分策略 - - Returns: - 划分策略名称列表 - """ - return list(cls._splitters.keys()) - - @classmethod - def list_metrics(cls) -> List[str]: - """列出所有可用评估指标 - - Returns: - 评估指标名称列表 - """ - return list(cls._metrics.keys()) - - @classmethod - def clear_all(cls) -> None: - """清除所有注册(主要用于测试)""" - cls._processors.clear() - cls._models.clear() - cls._splitters.clear() - cls._metrics.clear() - - -__all__ = ["PluginRegistry"] diff --git a/src/training/__init__.py b/src/training/__init__.py index 2615cd4..48e78ac 100644 --- a/src/training/__init__.py +++ b/src/training/__init__.py @@ -1,46 +1,26 @@ -"""ProStock 训练流程模块 +"""训练模块 - ProStock 量化投资框架 -本模块提供完整的模型训练流程: -1. 数据处理:Fillna(0) -> Dropna -2. 模型训练:LightGBM分类模型 -3. 预测选股:每日top5股票池 - -使用示例: - from src.training import run_training - - # 运行完整训练流程 - result = run_training( - train_start="20180101", - train_end="20230101", - test_start="20230101", - test_end="20240101", - top_n=5, - output_path="output/top_stocks.tsv" - ) - -因子使用: - from src.factors import MovingAverageFactor, ReturnRankFactor - - ma5 = MovingAverageFactor(period=5) # 5日移动平均 - ma10 = MovingAverageFactor(period=10) # 10日移动平均 - ret5 = ReturnRankFactor(period=5) # 5日收益率排名 +提供模型训练、数据处理和评估的完整流程。 """ -from src.training.pipeline import ( - create_pipeline, - predict_top_stocks, - prepare_data, - run_training, - save_top_stocks, - train_model, +# 基础抽象类 +from src.training.components.base import BaseModel, BaseProcessor + +# 注册中心 +from src.training.registry import ( + ModelRegistry, + ProcessorRegistry, + register_model, + register_processor, ) __all__ = [ - # 管道函数 - "prepare_data", - "create_pipeline", - "train_model", - "predict_top_stocks", - "save_top_stocks", - "run_training", + # 基础抽象类 + "BaseModel", + "BaseProcessor", + # 注册中心 + "ModelRegistry", + "ProcessorRegistry", + "register_model", + "register_processor", ] diff --git a/src/training/components/__init__.py b/src/training/components/__init__.py new file mode 100644 index 0000000..96f3916 --- /dev/null +++ b/src/training/components/__init__.py @@ -0,0 +1,12 @@ +"""训练组件子模块 + +包含模型、处理器、划分器、选择器等组件。 +""" + +# 基础抽象类 +from src.training.components.base import BaseModel, BaseProcessor + +__all__ = [ + "BaseModel", + "BaseProcessor", +] diff --git a/src/training/components/base.py b/src/training/components/base.py new file mode 100644 index 0000000..8c7031e --- /dev/null +++ b/src/training/components/base.py @@ -0,0 +1,141 @@ +"""基础抽象类定义 + +定义 BaseModel 和 BaseProcessor 抽象基类, +为所有训练组件提供统一的接口。 +""" + +from abc import ABC, abstractmethod +from typing import Optional +import pickle + +import polars as pl +import numpy as np +import pandas as pd + + +class BaseModel(ABC): + """模型基类 + + 所有机器学习模型必须继承此类并实现抽象方法。 + 提供统一的训练、预测、特征重要性和持久化接口。 + + Attributes: + name: 模型名称,子类必须定义 + """ + + name: str = "" # 模型名称 + + @abstractmethod + def fit(self, X: pl.DataFrame, y: pl.Series) -> "BaseModel": + """训练模型 + + Args: + X: 特征矩阵 (Polars DataFrame) + y: 目标变量 (Polars Series) + + Returns: + self (支持链式调用) + """ + raise NotImplementedError + + @abstractmethod + def predict(self, X: pl.DataFrame) -> np.ndarray: + """预测 + + Args: + X: 特征矩阵 (Polars DataFrame) + + Returns: + 预测结果 (numpy ndarray) + """ + raise NotImplementedError + + def feature_importance(self) -> Optional[pd.Series]: + """特征重要性 + + Returns: + 特征重要性序列,如果不支持则返回 None + """ + return None + + def save(self, path: str) -> None: + """保存模型到文件 + + 默认实现使用 pickle 序列化,子类可覆盖以使用更高效的格式。 + + Args: + path: 保存路径 + + Raises: + RuntimeError: 模型未训练时调用 + """ + with open(path, "wb") as f: + pickle.dump(self, f) + + @classmethod + def load(cls, path: str) -> "BaseModel": + """从文件加载模型 + + Args: + path: 模型文件路径 + + Returns: + 加载的模型实例 + """ + with open(path, "rb") as f: + return pickle.load(f) + + +class BaseProcessor(ABC): + """数据处理器基类 + + 重要:Processor 在不同阶段行为不同: + - 训练阶段:fit_transform(学习参数并应用) + - 验证/测试阶段:transform(使用训练阶段学到的参数) + + 这意味着 Processor 实例会在训练后被保存, + 用于后续的验证和测试数据转换。 + + Attributes: + name: 处理器名称,子类必须定义 + """ + + name: str = "" + + def fit(self, X: pl.DataFrame) -> "BaseProcessor": + """学习参数(仅在训练阶段调用) + + 子类应覆盖此方法以学习统计参数(如均值、标准差等)。 + + Args: + X: 训练数据 (Polars DataFrame) + + Returns: + self (支持链式调用) + """ + return self + + @abstractmethod + def transform(self, X: pl.DataFrame) -> pl.DataFrame: + """转换数据 + + Args: + X: 输入数据 (Polars DataFrame) + + Returns: + 转换后的数据 (Polars DataFrame) + """ + raise NotImplementedError + + def fit_transform(self, X: pl.DataFrame) -> pl.DataFrame: + """拟合并转换(训练阶段使用) + + 先调用 fit 学习参数,然后调用 transform 应用转换。 + + Args: + X: 训练数据 (Polars DataFrame) + + Returns: + 转换后的数据 (Polars DataFrame) + """ + return self.fit(X).transform(X) diff --git a/src/training/output/top_stocks.tsv b/src/training/output/top_stocks.tsv deleted file mode 100644 index fc7e297..0000000 --- a/src/training/output/top_stocks.tsv +++ /dev/null @@ -1,1216 +0,0 @@ -trade_date score ts_code -2025-01-02 0.7051179656547538 000759.SZ -2025-01-02 0.7051179656547538 600778.SH -2025-01-02 0.7051179656547538 002582.SZ -2025-01-02 0.7051179656547538 603068.SH -2025-01-02 0.7051179656547538 605299.SH -2025-01-03 0.7051179656547538 603016.SH -2025-01-03 0.7051179656547538 600071.SH -2025-01-03 0.7051179656547538 002952.SZ -2025-01-03 0.7051179656547538 600693.SH -2025-01-03 0.7051179656547538 600618.SH -2025-01-06 0.7051179656547538 000756.SZ -2025-01-06 0.7051179656547538 605378.SH -2025-01-06 0.7051179656547538 000597.SZ -2025-01-06 0.7051179656547538 600618.SH -2025-01-06 0.7051179656547538 002552.SZ -2025-01-07 0.7051179656547538 002620.SZ -2025-01-07 0.7051179656547538 600218.SH -2025-01-07 0.7051179656547538 000533.SZ -2025-01-07 0.7051179656547538 600105.SH -2025-01-07 0.7051179656547538 003003.SZ -2025-01-08 0.733000910769149 603070.SH -2025-01-08 0.7285396253124238 600693.SH -2025-01-08 0.7269798929107358 603683.SH -2025-01-08 0.7263330514684178 002364.SZ -2025-01-08 0.7250024174886921 603990.SH -2025-01-09 0.7254659521341397 603118.SH -2025-01-09 0.7250673670188915 002400.SZ -2025-01-09 0.7244838367978005 002536.SZ -2025-01-09 0.723226593227657 603777.SH -2025-01-09 0.7142213237842978 603305.SH -2025-01-10 0.733000910769149 600053.SH -2025-01-10 0.728364923591949 600650.SH -2025-01-10 0.7278727136705941 000561.SZ -2025-01-10 0.7269798929107358 002660.SZ -2025-01-10 0.7250024174886921 002917.SZ -2025-01-13 0.728999426321122 603166.SH -2025-01-13 0.728999426321122 603106.SH -2025-01-13 0.7274413474305255 002313.SZ -2025-01-13 0.7269798929107358 600128.SH -2025-01-13 0.7250024174886921 002725.SZ -2025-01-14 0.733000910769149 002583.SZ -2025-01-14 0.733000910769149 603825.SH -2025-01-14 0.728999426321122 002137.SZ -2025-01-14 0.7254659521341397 003003.SZ -2025-01-14 0.7254659521341397 002103.SZ -2025-01-15 0.6387075433643968 600289.SH -2025-01-15 0.6374385511426223 002656.SZ -2025-01-15 0.5410429736449522 601279.SH -2025-01-15 0.5393223279940808 002397.SZ -2025-01-15 0.5340821600522587 002793.SZ -2025-01-16 0.6781212885667098 002123.SZ -2025-01-16 0.6775871821434284 000016.SZ -2025-01-16 0.6775871821434284 600933.SH -2025-01-16 0.6387075433643968 600289.SH -2025-01-16 0.6159681320421684 002379.SZ -2025-01-17 0.6826622225079328 002656.SZ -2025-01-17 0.6826622225079328 600933.SH -2025-01-17 0.6387075433643968 600289.SH -2025-01-17 0.544416099620109 000408.SZ -2025-01-17 0.5361316966516099 002793.SZ -2025-01-20 0.5313247850374478 002872.SZ -2025-01-20 0.5270902024087297 603500.SH -2025-01-20 0.521655179277378 002218.SZ -2025-01-20 0.5146433933122077 000802.SZ -2025-01-20 0.5099224225735371 600664.SH -2025-01-21 0.6159124153292242 600289.SH -2025-01-21 0.521566979605915 603500.SH -2025-01-21 0.5192867029883689 003021.SZ -2025-01-21 0.5121463182849544 002218.SZ -2025-01-21 0.5120253271434904 603236.SH -2025-01-22 0.6620934004606905 002656.SZ -2025-01-22 0.661169148249026 600289.SH -2025-01-22 0.5351771629779175 002418.SZ -2025-01-22 0.5233214191633075 600589.SH -2025-01-22 0.5188882944862926 600243.SH -2025-01-23 0.6669403824458782 001356.SZ -2025-01-23 0.6620934004606905 002656.SZ -2025-01-23 0.661169148249026 600289.SH -2025-01-23 0.5823267181327698 002418.SZ -2025-01-23 0.5208278859839193 002686.SZ -2025-01-24 0.6474199399258881 002494.SZ -2025-01-24 0.5331050665207507 000615.SZ -2025-01-24 0.5295780449034047 600421.SH -2025-01-24 0.5289199536142031 600130.SH -2025-01-24 0.5256292685122648 002569.SZ -2025-01-27 0.6669403824458782 001395.SZ -2025-01-27 0.631159350200206 600130.SH -2025-01-27 0.5720072117966968 600421.SH -2025-01-27 0.5584407718655982 600358.SH -2025-01-27 0.5380300868300264 002542.SZ -2025-02-05 0.7244838367978005 002123.SZ -2025-02-05 0.6550780484687123 000972.SZ -2025-02-05 0.5890724385352274 600358.SH -2025-02-05 0.5560990328127002 000820.SZ -2025-02-05 0.5427844132995285 603682.SH -2025-02-06 0.6310880416420657 600130.SH -2025-02-06 0.6025512918092927 600358.SH -2025-02-06 0.5685222827863671 000736.SZ -2025-02-06 0.5572457637916861 600778.SH -2025-02-06 0.5513423924464075 601515.SH -2025-02-07 0.7027342936600967 001395.SZ -2025-02-07 0.6224057426565484 600358.SH -2025-02-07 0.5598843527597499 002767.SZ -2025-02-07 0.5542198914019132 605162.SH -2025-02-07 0.5450747783984903 600858.SH -2025-02-10 0.5585540848911058 000736.SZ -2025-02-10 0.5543271073999997 605162.SH -2025-02-10 0.5451710325592884 002767.SZ -2025-02-10 0.5433425776971076 603682.SH -2025-02-10 0.5392073380823367 002636.SZ -2025-02-11 0.543296731052936 603719.SH -2025-02-11 0.5323121465042445 601156.SH -2025-02-11 0.526231221718517 002636.SZ -2025-02-11 0.5136339707518657 603687.SH -2025-02-11 0.5131110240773495 002820.SZ -2025-02-12 0.5467464517435845 002052.SZ -2025-02-12 0.5150190047321577 603296.SH -2025-02-12 0.5108097318020007 603501.SH -2025-02-12 0.5072040838683077 600115.SH -2025-02-12 0.5062407534441172 600929.SH -2025-02-13 0.5260729456251799 002822.SZ -2025-02-13 0.5249034646482712 001356.SZ -2025-02-13 0.5225603550790333 002718.SZ -2025-02-13 0.5062880387830124 002207.SZ -2025-02-13 0.5062880387830124 600517.SH -2025-02-14 0.5129142393802463 002718.SZ -2025-02-14 0.5062880387830124 603323.SH -2025-02-14 0.5062407534441172 600929.SH -2025-02-14 0.5062407534441172 000930.SZ -2025-02-14 0.5059432414859574 600222.SH -2025-02-17 0.5404208504398543 002718.SZ -2025-02-17 0.5313441956279837 600381.SH -2025-02-17 0.511722434421143 000530.SZ -2025-02-17 0.5062786073876784 002697.SZ -2025-02-17 0.5062407534441172 601988.SH -2025-02-18 0.5345120731092181 002718.SZ -2025-02-18 0.516756626959392 000626.SZ -2025-02-18 0.5096590250388462 603036.SH -2025-02-18 0.5069762768566503 002059.SZ -2025-02-18 0.5069065869847293 600740.SH -2025-02-19 0.5326002636013797 600261.SH -2025-02-19 0.5221685385870379 600381.SH -2025-02-19 0.5205308388365613 601500.SH -2025-02-19 0.519151342531204 000530.SZ -2025-02-19 0.5109344127753974 600255.SH -2025-02-20 0.5195782215053089 600261.SH -2025-02-20 0.5173744533696785 600121.SH -2025-02-20 0.5166753540849746 000530.SZ -2025-02-20 0.5129988424429641 603116.SH -2025-02-20 0.5109344127753974 601113.SH -2025-02-21 0.516756626959392 600698.SH -2025-02-21 0.5158730698526023 000802.SZ -2025-02-21 0.5150190047321577 603728.SH -2025-02-21 0.5148248849090167 601111.SH -2025-02-21 0.5141087911649017 600320.SH -2025-02-24 0.5417239869858262 000802.SZ -2025-02-24 0.5171224060142442 002905.SZ -2025-02-24 0.5167878247389599 600630.SH -2025-02-24 0.5157554031130761 002739.SZ -2025-02-24 0.5117426671618643 000530.SZ -2025-02-25 0.5137333740114639 603103.SH -2025-02-25 0.5132838589586456 600664.SH -2025-02-25 0.5123761070094214 600630.SH -2025-02-25 0.5084389075155574 000600.SZ -2025-02-25 0.5072040838683077 600121.SH -2025-02-26 0.5178887396475126 603825.SH -2025-02-26 0.5176583632438975 000863.SZ -2025-02-26 0.5161890103955529 600609.SH -2025-02-26 0.5148248849090167 603993.SH -2025-02-26 0.5131783435030605 000802.SZ -2025-02-27 0.5234880337204921 600863.SH -2025-02-27 0.5192867029883689 001306.SZ -2025-02-27 0.5110428546997954 601599.SH -2025-02-27 0.5108097318020007 002896.SZ -2025-02-27 0.5072040838683077 600500.SH -2025-02-28 0.5302693298584756 603825.SH -2025-02-28 0.528551727159285 603848.SH -2025-02-28 0.5135320827288788 000488.SZ -2025-02-28 0.5099124664959997 600421.SH -2025-02-28 0.508592800689105 002550.SZ -2025-03-03 0.5388641146940151 603848.SH -2025-03-03 0.5284306205881467 600633.SH -2025-03-03 0.5189972724604688 002716.SZ -2025-03-03 0.5179075316852415 600863.SH -2025-03-03 0.5174473288476508 601000.SH -2025-03-04 0.6669403824458782 603409.SH -2025-03-04 0.5388421266082344 600986.SH -2025-03-04 0.5296286995283527 002175.SZ -2025-03-04 0.5296286995283527 002398.SZ -2025-03-04 0.5192811146990993 000903.SZ -2025-03-05 0.5713532860246767 002398.SZ -2025-03-05 0.5439725581069169 600633.SH -2025-03-05 0.5207195789387543 600556.SH -2025-03-05 0.5152424033193611 002238.SZ -2025-03-05 0.5148248849090167 601801.SH -2025-03-06 0.5440709142221977 600302.SH -2025-03-06 0.5350674079018686 600892.SH -2025-03-06 0.531155872343103 000816.SZ -2025-03-06 0.5255755545116174 002522.SZ -2025-03-06 0.5221685385870379 600130.SH -2025-03-07 0.5303266869959796 600825.SH -2025-03-07 0.5294564461378244 000892.SZ -2025-03-07 0.517257618824714 002191.SZ -2025-03-07 0.5169986507482853 000506.SZ -2025-03-07 0.5150190047321577 601100.SH -2025-03-10 0.5455320858284268 000679.SZ -2025-03-10 0.5179555419687653 002191.SZ -2025-03-10 0.5155746895304034 605300.SH -2025-03-10 0.5147422040064019 600611.SH -2025-03-10 0.5131569088886956 000042.SZ -2025-03-11 0.6669403824458782 603271.SH -2025-03-11 0.5195959454077144 000042.SZ -2025-03-11 0.5188882944862926 600892.SH -2025-03-11 0.5156039866864774 002742.SZ -2025-03-11 0.5104362501652622 002133.SZ -2025-03-12 0.5312791351753321 600622.SH -2025-03-12 0.5207195789387543 000506.SZ -2025-03-12 0.5192867029883689 603297.SH -2025-03-12 0.5124833570753022 002490.SZ -2025-03-12 0.5086182804866152 000560.SZ -2025-03-13 0.5306198355428968 605300.SH -2025-03-13 0.5237245769000003 002490.SZ -2025-03-13 0.5123724033923982 000042.SZ -2025-03-13 0.5105185694504328 002208.SZ -2025-03-13 0.5092585322658638 600525.SH -2025-03-14 0.5153716885780952 605066.SH -2025-03-14 0.5092800421012358 603700.SH -2025-03-14 0.5086237851754711 000016.SZ -2025-03-14 0.5065054255860063 600115.SH -2025-03-14 0.5062786073876784 601500.SH -2025-03-17 0.5249712719291497 603700.SH -2025-03-17 0.5216259044236606 605066.SH -2025-03-17 0.5192867029883689 002896.SZ -2025-03-17 0.511187246369438 603086.SH -2025-03-17 0.507539169599309 603278.SH -2025-03-18 0.5347903147450458 600216.SH -2025-03-18 0.5069762768566503 000822.SZ -2025-03-18 0.5066006223596465 600115.SH -2025-03-18 0.5062880387830124 600510.SH -2025-03-18 0.5062407534441172 601988.SH -2025-03-19 0.5246154292130808 600216.SH -2025-03-19 0.5173744533696785 000761.SZ -2025-03-19 0.5067526778780671 600115.SH -2025-03-19 0.5062880387830124 002360.SZ -2025-03-19 0.5062880387830124 002492.SZ -2025-03-20 0.6669403824458782 603124.SH -2025-03-20 0.5192867029883689 601799.SH -2025-03-20 0.506799811492834 601988.SH -2025-03-20 0.5062786073876784 600929.SH -2025-03-20 0.5059432414859574 002592.SZ -2025-03-21 0.6669403824458782 001382.SZ -2025-03-21 0.5125122512066596 601010.SH -2025-03-21 0.5102833212875136 002542.SZ -2025-03-21 0.5095087997335653 603388.SH -2025-03-21 0.5063152599497193 002898.SZ -2025-03-24 0.5287115738052487 600539.SH -2025-03-24 0.516756626959392 002175.SZ -2025-03-24 0.5159073332822095 002369.SZ -2025-03-24 0.5140870467512906 002348.SZ -2025-03-24 0.5127436997551269 002542.SZ -2025-03-25 0.5338833435320202 002418.SZ -2025-03-25 0.5288583540672106 000536.SZ -2025-03-25 0.5220201124248427 002542.SZ -2025-03-25 0.5210227197807852 600193.SH -2025-03-25 0.5208278859839193 600478.SH -2025-03-26 0.5471895490430322 603626.SH -2025-03-26 0.5454494909023954 600193.SH -2025-03-26 0.5319661551585267 600984.SH -2025-03-26 0.5317817863838529 000665.SZ -2025-03-26 0.5313866754947477 000892.SZ -2025-03-27 0.5503862351081134 603626.SH -2025-03-27 0.5448849775405112 002795.SZ -2025-03-27 0.5433145940449446 600552.SH -2025-03-27 0.5352209692776468 600355.SH -2025-03-27 0.533842022464007 000813.SZ -2025-03-28 0.5661570857190846 001382.SZ -2025-03-28 0.5585868058140843 002076.SZ -2025-03-28 0.5576433813007622 603315.SH -2025-03-28 0.5552114736653683 002490.SZ -2025-03-28 0.5456866035054126 002580.SZ -2025-03-31 0.553764702793686 000890.SZ -2025-03-31 0.5532818452700384 000736.SZ -2025-03-31 0.5528182733194853 600439.SH -2025-03-31 0.5422846765613857 605208.SH -2025-03-31 0.5411407392151744 603421.SH -2025-04-01 0.574923880807869 002713.SZ -2025-04-01 0.5708170878939591 601798.SH -2025-04-01 0.5674920738410727 600355.SH -2025-04-01 0.5674920738410727 000890.SZ -2025-04-01 0.5512737728527647 603789.SH -2025-04-02 0.5638354497475407 002164.SZ -2025-04-02 0.5552114736653683 000890.SZ -2025-04-02 0.5432957621006483 002724.SZ -2025-04-02 0.5413800214539841 603359.SH -2025-04-02 0.5411760159205538 603081.SH -2025-04-03 0.5584407718655982 002523.SZ -2025-04-03 0.5445463206684408 603081.SH -2025-04-03 0.5388926502267588 600758.SH -2025-04-03 0.538322659938494 603359.SH -2025-04-03 0.5373019343225885 600255.SH -2025-04-07 0.5858977654072869 002278.SZ -2025-04-07 0.5831732967328205 600758.SH -2025-04-07 0.5683367621802544 000852.SZ -2025-04-07 0.5683367621802544 000541.SZ -2025-04-07 0.5683049519792355 603359.SH -2025-04-08 0.6669403824458782 603257.SH -2025-04-08 0.5746354992094956 000541.SZ -2025-04-08 0.5713532860246767 000595.SZ -2025-04-08 0.5700320948798235 600758.SH -2025-04-08 0.5582398821422095 002523.SZ -2025-04-09 0.6669403824458782 603210.SH -2025-04-09 0.5977846147659792 600758.SH -2025-04-09 0.5747415726474464 002363.SZ -2025-04-09 0.5714595675545813 603002.SH -2025-04-09 0.5693345798065312 002523.SZ -2025-04-10 0.5826618787963105 002662.SZ -2025-04-10 0.5681804385990817 600579.SH -2025-04-10 0.5676768464120937 002686.SZ -2025-04-10 0.5673064474063121 002689.SZ -2025-04-10 0.566065683809122 600403.SH -2025-04-11 0.6199863617598848 600595.SH -2025-04-11 0.6097986210699184 600403.SH -2025-04-11 0.5825563460455111 000757.SZ -2025-04-11 0.575622310153987 000533.SZ -2025-04-11 0.575622310153987 002420.SZ -2025-04-14 0.6072536670709681 600595.SH -2025-04-14 0.5681804385990817 002579.SZ -2025-04-14 0.5647134283078418 603353.SH -2025-04-14 0.5604553606074223 605255.SH -2025-04-14 0.5543020950336787 002448.SZ -2025-04-15 0.6669403824458782 001335.SZ -2025-04-15 0.5604663668984082 002735.SZ -2025-04-15 0.5389745862910662 603353.SH -2025-04-15 0.5350674079018686 600403.SH -2025-04-15 0.5349437299046987 002526.SZ -2025-04-16 0.6669403824458782 603120.SH -2025-04-16 0.5116020755128445 600595.SH -2025-04-16 0.5104362501652622 600477.SH -2025-04-16 0.5098676844433959 002091.SZ -2025-04-16 0.5088902658690911 600152.SH -2025-04-17 0.5119422408566764 603330.SH -2025-04-17 0.5067526778780671 000725.SZ -2025-04-17 0.5062935365059151 002535.SZ -2025-04-17 0.5059432414859574 603817.SH -2025-04-17 0.5059432414859574 002022.SZ -2025-04-18 0.5553688921775346 603330.SH -2025-04-18 0.514635987194688 002688.SZ -2025-04-18 0.5110428546997954 002535.SZ -2025-04-18 0.5056110543461024 002752.SZ -2025-04-18 0.5056110543461024 000650.SZ -2025-04-21 0.5819957556599779 002084.SZ -2025-04-21 0.545414969408834 603336.SH -2025-04-21 0.5172847175023007 603330.SH -2025-04-21 0.516612009471722 000713.SZ -2025-04-21 0.5149332831592093 002688.SZ -2025-04-22 0.5843211073067424 002084.SZ -2025-04-22 0.5433478030227075 002900.SZ -2025-04-22 0.5153784216354333 603709.SH -2025-04-22 0.5100486966304032 000958.SZ -2025-04-22 0.5098912342080154 600195.SH -2025-04-23 0.5600621372160365 000785.SZ -2025-04-23 0.5518254167458194 603709.SH -2025-04-23 0.5377852469767108 002900.SZ -2025-04-23 0.5314798446365324 002084.SZ -2025-04-23 0.5284541045338316 003021.SZ -2025-04-24 0.6669403824458782 603202.SH -2025-04-24 0.6669403824458782 001400.SZ -2025-04-24 0.5977176355354952 000785.SZ -2025-04-24 0.5677858234677825 002084.SZ -2025-04-24 0.5259647450920883 600033.SH -2025-04-25 0.5622165720306567 000785.SZ -2025-04-25 0.5332605242520999 600965.SH -2025-04-25 0.5230859175308072 002234.SZ -2025-04-25 0.5219065617709089 002713.SZ -2025-04-25 0.5215694612086733 002529.SZ -2025-04-28 0.5842493402723935 000785.SZ -2025-04-28 0.5370901276046655 600381.SH -2025-04-28 0.5336039591479221 603605.SH -2025-04-28 0.5294515299872153 600965.SH -2025-04-28 0.519813501069505 600172.SH -2025-04-29 0.5786351360102432 600187.SH -2025-04-29 0.5296286995283527 600712.SH -2025-04-29 0.5237719024466865 600965.SH -2025-04-29 0.5215694612086733 600249.SH -2025-04-29 0.5194484553193683 000753.SZ -2025-04-30 0.5755387753130181 000504.SZ -2025-04-30 0.5538023549703315 000638.SZ -2025-04-30 0.5514808808570097 002910.SZ -2025-04-30 0.5513285701188216 002549.SZ -2025-04-30 0.5485578275612335 603535.SH -2025-05-06 0.5789643829047145 600300.SH -2025-05-06 0.5515931343933391 002529.SZ -2025-05-06 0.545227460940995 600249.SH -2025-05-06 0.5393720691752084 600250.SH -2025-05-06 0.5380380761759416 603021.SH -2025-05-07 0.5935722209684432 002231.SZ -2025-05-07 0.5555488333612659 600228.SH -2025-05-07 0.5533538665235384 600300.SH -2025-05-07 0.5320260837500719 603535.SH -2025-05-07 0.5313084167778416 002529.SZ -2025-05-08 0.592069575966614 000638.SZ -2025-05-08 0.5908651580077275 002231.SZ -2025-05-08 0.5512490630072319 600525.SH -2025-05-08 0.5495867905803188 002693.SZ -2025-05-08 0.5493178829982284 600308.SH -2025-05-09 0.6058279120586914 600200.SH -2025-05-09 0.5323391688950498 600107.SH -2025-05-09 0.524017663470185 600051.SH -2025-05-09 0.5205308388365613 002627.SZ -2025-05-09 0.5195782215053089 600308.SH -2025-05-12 0.5680157000763866 600107.SH -2025-05-12 0.5513472361064857 603580.SH -2025-05-12 0.5361465148203206 600599.SH -2025-05-12 0.5284541045338316 002779.SZ -2025-05-12 0.5261729504169856 003003.SZ -2025-05-13 0.5210012376735503 603696.SH -2025-05-13 0.5180067312425508 003003.SZ -2025-05-13 0.5172511366202515 002042.SZ -2025-05-13 0.5109344127753974 600248.SH -2025-05-13 0.5090820621498142 000796.SZ -2025-05-14 0.518624752638074 600107.SH -2025-05-14 0.510745439828272 600248.SH -2025-05-14 0.5064574868683113 600657.SH -2025-05-14 0.5057195347445462 002478.SZ -2025-05-14 0.5057195347445462 603336.SH -2025-05-15 0.5515931343933391 002042.SZ -2025-05-15 0.5466435753827111 603398.SH -2025-05-15 0.5201338233515974 601188.SH -2025-05-15 0.5062880387830124 600399.SH -2025-05-15 0.5058911839972406 000796.SZ -2025-05-16 0.5124106139335408 002571.SZ -2025-05-16 0.5077156767050429 002251.SZ -2025-05-16 0.5070956117261788 002131.SZ -2025-05-16 0.5062407534441172 000875.SZ -2025-05-16 0.5062407534441172 601789.SH -2025-05-19 0.6669403824458782 603014.SH -2025-05-19 0.5259647450920883 601188.SH -2025-05-19 0.5131783435030605 002042.SZ -2025-05-19 0.5094818860270377 600828.SH -2025-05-19 0.5094818860270377 002490.SZ -2025-05-20 0.5105453191187367 002490.SZ -2025-05-20 0.5077631117447703 601616.SH -2025-05-20 0.5074774556059805 002392.SZ -2025-05-20 0.5069065869847293 002131.SZ -2025-05-20 0.5069065869847293 002526.SZ -2025-05-21 0.5161866297338369 600615.SH -2025-05-21 0.5146014429165642 600396.SH -2025-05-21 0.5127621070520616 002987.SZ -2025-05-21 0.5076436730811701 600855.SH -2025-05-21 0.5062786073876784 603878.SH -2025-05-22 0.5602275718550119 002987.SZ -2025-05-22 0.5149332831592093 000617.SZ -2025-05-22 0.5128946932546128 600787.SH -2025-05-22 0.5123376364999294 002551.SZ -2025-05-22 0.5064438479509945 000812.SZ -2025-05-23 0.5607595502258559 603429.SH -2025-05-23 0.5485581030353742 002987.SZ -2025-05-23 0.5115221694101192 601127.SH -2025-05-23 0.5066006223596465 002110.SZ -2025-05-23 0.5062407534441172 002225.SZ -2025-05-26 0.5323121465042445 002809.SZ -2025-05-26 0.5307120448488225 605138.SH -2025-05-26 0.5242624770812656 002987.SZ -2025-05-26 0.5203142580785934 000601.SZ -2025-05-26 0.5179555419687653 603363.SH -2025-05-27 0.5515055057635462 605138.SH -2025-05-27 0.5386995229214847 603429.SH -2025-05-27 0.5299128239702332 002987.SZ -2025-05-27 0.5220201124248427 601018.SH -2025-05-27 0.5194698926054124 601188.SH -2025-05-28 0.5369275719717898 003036.SZ -2025-05-28 0.5290764404714322 600575.SH -2025-05-28 0.5279184941908557 000863.SZ -2025-05-28 0.5257761894248653 600595.SH -2025-05-28 0.5208278859839193 002419.SZ -2025-05-29 0.6669403824458782 001390.SZ -2025-05-29 0.5452589553634714 600448.SH -2025-05-29 0.5290764404714322 002397.SZ -2025-05-29 0.5290764404714322 603363.SH -2025-05-29 0.5290764404714322 600575.SH -2025-05-30 0.535116652342961 002613.SZ -2025-05-30 0.5289323063323099 000507.SZ -2025-05-30 0.5240797839653469 002596.SZ -2025-05-30 0.5162322534143614 002575.SZ -2025-05-30 0.5149332831592093 601008.SH -2025-06-03 0.5454823420038027 605055.SH -2025-06-03 0.5450992816935564 600698.SH -2025-06-03 0.5238461451726196 002811.SZ -2025-06-03 0.5213807481656725 600770.SH -2025-06-03 0.5177545512881411 600149.SH -2025-06-04 0.5538808999940212 600698.SH -2025-06-04 0.5281781513402405 600770.SH -2025-06-04 0.5281533861052373 600448.SH -2025-06-04 0.5271158989738494 000096.SZ -2025-06-04 0.5262771214083012 600403.SH -2025-06-05 0.6669403824458782 603049.SH -2025-06-05 0.542229301349679 600770.SH -2025-06-05 0.5310888011611477 000096.SZ -2025-06-05 0.5302922673899837 000715.SZ -2025-06-05 0.5281955222483072 002599.SZ -2025-06-06 0.5498718697458119 000096.SZ -2025-06-06 0.5467593142714535 603730.SH -2025-06-06 0.5428251312746337 000715.SZ -2025-06-06 0.5294768540665556 002889.SZ -2025-06-06 0.5168008602085614 605055.SH -2025-06-09 0.549005230154962 000096.SZ -2025-06-09 0.537538445742131 603730.SH -2025-06-09 0.5364841799206382 002889.SZ -2025-06-09 0.530842115323555 600936.SH -2025-06-09 0.5169332559702563 603269.SH -2025-06-10 0.55464361080565 603390.SH -2025-06-10 0.5366354494501869 002889.SZ -2025-06-10 0.5330131266030339 605377.SH -2025-06-10 0.5303443337374748 603730.SH -2025-06-10 0.5259305404171137 600546.SH -2025-06-11 0.5387561851960709 603003.SH -2025-06-11 0.5362268294017861 603329.SH -2025-06-11 0.529831272598744 605377.SH -2025-06-11 0.5265615066520655 000677.SZ -2025-06-11 0.5260729456251799 600936.SH -2025-06-12 0.6669403824458782 603382.SH -2025-06-12 0.5593320323165342 000677.SZ -2025-06-12 0.5588942462777563 603329.SH -2025-06-12 0.5324612012654403 001210.SZ -2025-06-12 0.5289369856185699 605377.SH -2025-06-13 0.5697318013954993 603003.SH -2025-06-13 0.5436378826363994 000677.SZ -2025-06-13 0.543296731052936 003025.SZ -2025-06-13 0.5249929388659182 605377.SH -2025-06-13 0.5142350880651031 002685.SZ -2025-06-16 0.5358732302918909 000859.SZ -2025-06-16 0.5353864503535868 003025.SZ -2025-06-16 0.5226174689038874 000695.SZ -2025-06-16 0.5182994300515202 600829.SH -2025-06-16 0.517730370195514 600082.SH -2025-06-17 0.5397963149123955 600082.SH -2025-06-17 0.5195782215053089 002131.SZ -2025-06-17 0.5179886611866528 603359.SH -2025-06-17 0.5178471867134444 002375.SZ -2025-06-17 0.5176797545742033 002431.SZ -2025-06-18 0.5710023471497209 002298.SZ -2025-06-18 0.5314522621191695 600082.SH -2025-06-18 0.5289323063323099 002546.SZ -2025-06-18 0.5203771586183269 000510.SZ -2025-06-18 0.5192867029883689 002916.SZ -2025-06-19 0.5383401964784511 603519.SH -2025-06-19 0.5344757618074522 002303.SZ -2025-06-19 0.5313441956279837 600107.SH -2025-06-19 0.5281863100024857 603681.SH -2025-06-19 0.5257761894248653 600500.SH -2025-06-20 0.6669403824458782 603400.SH -2025-06-20 0.5351753669128975 600439.SH -2025-06-20 0.533842022464007 600448.SH -2025-06-20 0.5317887439092939 600107.SH -2025-06-20 0.5292280061167861 000632.SZ -2025-06-23 0.5711086503758361 002181.SZ -2025-06-23 0.5458513047520547 605388.SH -2025-06-23 0.5379239101616969 002172.SZ -2025-06-23 0.5372498529358267 603871.SH -2025-06-23 0.5371815102808061 600232.SH -2025-06-24 0.6387075433643968 603400.SH -2025-06-24 0.5563046499206835 002181.SZ -2025-06-24 0.5508935566412092 601279.SH -2025-06-24 0.5473929425745812 600232.SH -2025-06-24 0.5427432402972987 002172.SZ -2025-06-25 0.7027342936600967 603400.SH -2025-06-25 0.5396084451598557 601279.SH -2025-06-25 0.5310909201864787 002172.SZ -2025-06-25 0.5294564461378244 600960.SH -2025-06-25 0.5287042731979068 603037.SH -2025-06-26 0.529156294049306 002172.SZ -2025-06-26 0.5272791776397703 600480.SH -2025-06-26 0.5179555419687653 600303.SH -2025-06-26 0.517352717899504 601279.SH -2025-06-26 0.5162912092682229 002735.SZ -2025-06-27 0.5516561587455301 600480.SH -2025-06-27 0.5464583932314039 603037.SH -2025-06-27 0.5257978492925677 002892.SZ -2025-06-27 0.5100252249571439 603587.SH -2025-06-27 0.5064483525900161 603697.SH -2025-06-30 0.5179555419687653 000698.SZ -2025-06-30 0.5174081762086218 002892.SZ -2025-06-30 0.5160731169272524 600295.SH -2025-06-30 0.5129787285127705 600722.SH -2025-06-30 0.5121103836753264 600617.SH -2025-07-01 0.6669403824458782 001388.SZ -2025-07-01 0.5453557949215517 002892.SZ -2025-07-01 0.5212102176264158 600617.SH -2025-07-01 0.5144821798859033 600722.SH -2025-07-01 0.5077228295768573 000554.SZ -2025-07-02 0.5393720691752084 002207.SZ -2025-07-02 0.5158085932031415 605189.SH -2025-07-02 0.5142289246458795 000637.SZ -2025-07-02 0.5126734764286909 002700.SZ -2025-07-02 0.5124016528312786 000159.SZ -2025-07-03 0.5210012376735503 605189.SH -2025-07-03 0.5138861961464867 000637.SZ -2025-07-03 0.5108097318020007 000661.SZ -2025-07-03 0.5058816487197313 000554.SZ -2025-07-03 0.5057893248482864 603866.SH -2025-07-04 0.5137342295321232 002141.SZ -2025-07-04 0.5080808860352188 600110.SH -2025-07-04 0.5064438479509945 002687.SZ -2025-07-04 0.5064438479509945 601500.SH -2025-07-04 0.5064438479509945 002224.SZ -2025-07-07 0.5316698039923937 002109.SZ -2025-07-07 0.5140400139221889 002141.SZ -2025-07-07 0.5110428546997954 600115.SH -2025-07-07 0.5080961531869804 001330.SZ -2025-07-07 0.5069762768566503 600050.SH -2025-07-08 0.5318581027868793 002108.SZ -2025-07-08 0.5295116119546419 002397.SZ -2025-07-08 0.5259647450920883 002109.SZ -2025-07-08 0.5169085372339255 002762.SZ -2025-07-08 0.5164582399968147 002207.SZ -2025-07-09 0.551178085508194 600241.SH -2025-07-09 0.535116652342961 002397.SZ -2025-07-09 0.528066744613198 002108.SZ -2025-07-09 0.5078461078321801 600738.SH -2025-07-09 0.5073539612208279 002455.SZ -2025-07-10 0.5290764404714322 002108.SZ -2025-07-10 0.5157013807857321 002769.SZ -2025-07-10 0.5137735386488207 002397.SZ -2025-07-10 0.5130491869366306 600738.SH -2025-07-10 0.5090675271420955 600241.SH -2025-07-11 0.5294086009072477 603325.SH -2025-07-11 0.5260729456251799 002397.SZ -2025-07-11 0.5207195789387543 600738.SH -2025-07-11 0.5203188769486073 600735.SH -2025-07-11 0.5192867029883689 002821.SZ -2025-07-14 0.5220825162760148 600738.SH -2025-07-14 0.5140400139221889 601218.SH -2025-07-14 0.5091816942372954 000551.SZ -2025-07-14 0.5080065165404044 002658.SZ -2025-07-14 0.5073582226595034 000859.SZ -2025-07-15 0.5259344753158761 603803.SH -2025-07-15 0.5203771586183269 002424.SZ -2025-07-15 0.5150190047321577 603296.SH -2025-07-15 0.5083307455772172 603316.SH -2025-07-15 0.5081916801506979 002012.SZ -2025-07-16 0.6669403824458782 600930.SH -2025-07-16 0.5637525947434893 002309.SZ -2025-07-16 0.5482190028602671 002581.SZ -2025-07-16 0.529349578877612 600039.SH -2025-07-16 0.5237245769000003 000889.SZ -2025-07-17 0.5286406736082913 002263.SZ -2025-07-17 0.5259344753158761 600039.SH -2025-07-17 0.5172578456633914 601101.SH -2025-07-17 0.5121666186315809 601010.SH -2025-07-17 0.5113139305546494 002177.SZ -2025-07-18 0.5208278859839193 600027.SH -2025-07-18 0.516612009471722 605006.SH -2025-07-18 0.5148248849090167 603113.SH -2025-07-18 0.5129787285127705 600229.SH -2025-07-18 0.5122750481447931 601010.SH -2025-07-21 0.7027342936600967 600930.SH -2025-07-21 0.5602677619779618 000903.SZ -2025-07-21 0.5256292685122648 002366.SZ -2025-07-21 0.5195817785669419 600403.SH -2025-07-21 0.5169092312460223 605006.SH -2025-07-22 0.61486236957925 000903.SZ -2025-07-22 0.529027199610689 603959.SH -2025-07-22 0.5188882944862926 002356.SZ -2025-07-22 0.5178471867134444 000720.SZ -2025-07-22 0.517019735866071 002218.SZ -2025-07-23 0.6669403824458782 603262.SH -2025-07-23 0.5386217893521608 002356.SZ -2025-07-23 0.5176583632438975 002141.SZ -2025-07-23 0.515669568888107 002822.SZ -2025-07-23 0.5101143450789728 600250.SH -2025-07-24 0.5295645625272489 603778.SH -2025-07-24 0.5248312020607117 002235.SZ -2025-07-24 0.5203750341785283 000903.SZ -2025-07-24 0.5150908046416858 002356.SZ -2025-07-24 0.5120818023990829 605366.SH -2025-07-25 0.5214304599725706 603778.SH -2025-07-25 0.518624752638074 601916.SH -2025-07-25 0.5074974609211786 600027.SH -2025-07-25 0.5074974609211786 002479.SZ -2025-07-25 0.5070956117261788 002141.SZ -2025-07-28 0.5334375570360811 002916.SZ -2025-07-28 0.5156157233044605 600822.SH -2025-07-28 0.5115300689823145 601107.SH -2025-07-28 0.5115239567079181 605162.SH -2025-07-28 0.5107791472497838 002141.SZ -2025-07-29 0.5170622974146505 600256.SH -2025-07-29 0.5142289246458795 000638.SZ -2025-07-29 0.5108097318020007 603259.SH -2025-07-29 0.5088902658690911 000601.SZ -2025-07-29 0.5084717010767641 600421.SH -2025-07-30 0.6669403824458782 001221.SZ -2025-07-30 0.5149332831592093 002107.SZ -2025-07-30 0.5148248849090167 600930.SH -2025-07-30 0.5117540751866493 600439.SH -2025-07-30 0.5094515226902939 600826.SH -2025-07-31 0.5291066892495949 002266.SZ -2025-07-31 0.5202253266995881 000877.SZ -2025-07-31 0.5195782215053089 600396.SH -2025-07-31 0.5164534160576106 600822.SH -2025-07-31 0.5149332831592093 600753.SH -2025-08-01 0.5336017119045484 002266.SZ -2025-08-01 0.5257761894248653 601015.SH -2025-08-01 0.5247117033289409 600439.SH -2025-08-01 0.5233769415422445 002542.SZ -2025-08-01 0.5195782215053089 601188.SH -2025-08-04 0.5569407228959676 002266.SZ -2025-08-04 0.5336039591479221 001306.SZ -2025-08-04 0.5254970680391339 600439.SH -2025-08-04 0.5235577174355214 002096.SZ -2025-08-04 0.5195782215053089 601188.SH -2025-08-05 0.5257818250477888 002512.SZ -2025-08-05 0.5203036402538412 603819.SH -2025-08-05 0.5192738960079799 600744.SH -2025-08-05 0.5178471867134444 600115.SH -2025-08-05 0.5178038522073544 601015.SH -2025-08-06 0.5296496642223002 002542.SZ -2025-08-06 0.5288681588434305 002512.SZ -2025-08-06 0.5234910779589691 002896.SZ -2025-08-06 0.51870263095642 600777.SH -2025-08-06 0.5172823322968993 600663.SH -2025-08-07 0.5291146231953325 002512.SZ -2025-08-07 0.5234910779589691 603290.SH -2025-08-07 0.5148248849090167 002208.SZ -2025-08-07 0.5144798838507378 002037.SZ -2025-08-07 0.5140870467512906 600537.SH -2025-08-08 0.6669403824458782 603406.SH -2025-08-08 0.5421215789673404 603669.SH -2025-08-08 0.5300403289774189 001216.SZ -2025-08-08 0.5173389658079611 600107.SH -2025-08-08 0.5169963705486088 002512.SZ -2025-08-11 0.5276630558210795 001318.SZ -2025-08-11 0.5169581185947476 001216.SZ -2025-08-11 0.512851095429037 603616.SH -2025-08-11 0.5094492309413429 000516.SZ -2025-08-11 0.506799811492834 601216.SH -2025-08-12 0.5315222086193077 000679.SZ -2025-08-12 0.519582801282802 001318.SZ -2025-08-12 0.5110352388373823 000597.SZ -2025-08-12 0.506799811492834 600027.SH -2025-08-12 0.506799811492834 600998.SH -2025-08-13 0.538757178989356 600117.SH -2025-08-13 0.5259305404171137 600579.SH -2025-08-13 0.5235577174355214 001318.SZ -2025-08-13 0.5149332831592093 000525.SZ -2025-08-13 0.5116385050716121 000597.SZ -2025-08-14 0.5148248849090167 600238.SH -2025-08-14 0.5100463982075332 600539.SH -2025-08-14 0.5062407534441172 601216.SH -2025-08-14 0.5061322757265252 002538.SZ -2025-08-14 0.5061322757265252 600998.SH -2025-08-15 0.5548900952321907 600117.SH -2025-08-15 0.5398898015532435 002795.SZ -2025-08-15 0.508592800689105 002742.SZ -2025-08-15 0.5081916801506979 002398.SZ -2025-08-15 0.5070956117261788 600792.SH -2025-08-18 0.5738782734368069 600117.SH -2025-08-18 0.5309645980537814 601718.SH -2025-08-18 0.5192867029883689 603516.SH -2025-08-18 0.511722434421143 000558.SZ -2025-08-18 0.5093068224471816 002775.SZ -2025-08-19 0.5207195789387543 603398.SH -2025-08-19 0.5192738960079799 002480.SZ -2025-08-19 0.5173595009824935 603878.SH -2025-08-19 0.5169798402349087 600792.SH -2025-08-19 0.5163178922289325 000802.SZ -2025-08-20 0.5259647450920883 000761.SZ -2025-08-20 0.5205308388365613 603398.SH -2025-08-20 0.5147161662005285 601718.SH -2025-08-20 0.5146901234462072 002596.SZ -2025-08-20 0.5120818023990829 002480.SZ -2025-08-21 0.5228829187684927 601718.SH -2025-08-21 0.5221685385870379 000761.SZ -2025-08-21 0.5108097318020007 603986.SH -2025-08-21 0.5059432414859574 002360.SZ -2025-08-21 0.5059329787807083 002330.SZ -2025-08-22 0.5192867029883689 603986.SH -2025-08-22 0.5192107504177909 600808.SH -2025-08-22 0.506799811492834 603398.SH -2025-08-22 0.506799811492834 000950.SZ -2025-08-22 0.506799811492834 600256.SH -2025-08-25 0.5156039866864774 600808.SH -2025-08-25 0.5150190047321577 603019.SH -2025-08-25 0.5115300689823145 601665.SH -2025-08-25 0.5074974609211786 600256.SH -2025-08-25 0.5062880387830124 600063.SH -2025-08-26 0.5078046064828842 603221.SH -2025-08-26 0.5061322757265252 600738.SH -2025-08-26 0.5061322757265252 601816.SH -2025-08-26 0.5052050973634397 002666.SZ -2025-08-26 0.5051870732773048 601096.SH -2025-08-27 0.5978889550464905 002305.SZ -2025-08-27 0.5158791642933552 603221.SH -2025-08-27 0.5115221694101192 603893.SH -2025-08-27 0.5108097318020007 603236.SH -2025-08-27 0.5064472297039752 603083.SH -2025-08-28 0.5357831647776657 603893.SH -2025-08-28 0.5336039591479221 603124.SH -2025-08-28 0.5192867029883689 603290.SH -2025-08-28 0.5162911266132231 603221.SH -2025-08-28 0.5143302235016333 002225.SZ -2025-08-29 0.5229758732518588 603221.SH -2025-08-29 0.5202253266995881 603980.SH -2025-08-29 0.5195782215053089 002437.SZ -2025-08-29 0.5179555419687653 600664.SH -2025-08-29 0.5163489167484762 002858.SZ -2025-09-01 0.5579671919498944 600658.SH -2025-09-01 0.5257036413547017 000889.SZ -2025-09-01 0.5248393833059358 603617.SH -2025-09-01 0.5178471867134444 600664.SH -2025-09-01 0.5177545512881411 603980.SH -2025-09-02 0.5317572447435938 002534.SZ -2025-09-02 0.5284541045338316 000661.SZ -2025-09-02 0.5275523725439486 600664.SH -2025-09-02 0.5266305456247979 600537.SH -2025-09-02 0.5227889876095065 603336.SH -2025-09-03 0.535116652342961 601599.SH -2025-09-03 0.5301462333144298 601869.SH -2025-09-03 0.526410328569605 603326.SH -2025-09-03 0.5257148174645001 603158.SH -2025-09-03 0.5221694020644978 600360.SH -2025-09-04 0.5333373315105012 603458.SH -2025-09-04 0.5324612012654403 002123.SZ -2025-09-04 0.5259647450920883 600664.SH -2025-09-04 0.5259344753158761 600234.SH -2025-09-04 0.5210986637536168 600271.SH -2025-09-05 0.6669403824458782 603370.SH -2025-09-05 0.5373956149682387 600728.SH -2025-09-05 0.5295205847835255 002640.SZ -2025-09-05 0.5291787670010305 603601.SH -2025-09-05 0.5290764404714322 601599.SH -2025-09-08 0.5517752644936849 600986.SH -2025-09-08 0.5417239869858262 603601.SH -2025-09-08 0.5336039591479221 603119.SH -2025-09-08 0.5301445991229797 002649.SZ -2025-09-08 0.526231221718517 002297.SZ -2025-09-09 0.549747924470271 600435.SH -2025-09-09 0.5343258195125252 002277.SZ -2025-09-09 0.5286617661336549 002815.SZ -2025-09-09 0.526231221718517 000670.SZ -2025-09-09 0.5208278859839193 603601.SH -2025-09-10 0.5537305785096447 600744.SH -2025-09-10 0.5301308361482894 600435.SH -2025-09-10 0.5284541045338316 603516.SH -2025-09-10 0.522871366052352 002277.SZ -2025-09-10 0.5206268768675039 002413.SZ -2025-09-11 0.5825563460455111 002210.SZ -2025-09-11 0.5392698727103348 600435.SH -2025-09-11 0.5336039591479221 603124.SH -2025-09-11 0.5301335488378848 603177.SH -2025-09-11 0.5234910779589691 603083.SH -2025-09-12 0.5257761894248653 002217.SZ -2025-09-12 0.5207195789387543 002413.SZ -2025-09-12 0.5199835318621187 002210.SZ -2025-09-12 0.5192867029883689 603083.SH -2025-09-12 0.5192867029883689 603516.SH -2025-09-15 0.5472970747630104 603839.SH -2025-09-15 0.5440495302362878 603626.SH -2025-09-15 0.5330472282985935 002210.SZ -2025-09-15 0.5290764404714322 000407.SZ -2025-09-15 0.5192867029883689 002896.SZ -2025-09-16 0.5392073380823367 603839.SH -2025-09-16 0.522977653412587 002691.SZ -2025-09-16 0.5192867029883689 603083.SH -2025-09-16 0.5192867029883689 601869.SH -2025-09-16 0.5108097318020007 603516.SH -2025-09-17 0.522839363247788 603709.SH -2025-09-17 0.5192867029883689 603297.SH -2025-09-17 0.5172075410270989 002174.SZ -2025-09-17 0.5150190047321577 001283.SZ -2025-09-17 0.5138861961464867 002168.SZ -2025-09-18 0.5280642376241884 603709.SH -2025-09-18 0.5192867029883689 601869.SH -2025-09-18 0.5192867029883689 605288.SH -2025-09-18 0.5192867029883689 603297.SH -2025-09-18 0.5173595009824935 600358.SH -2025-09-19 0.5292280061167861 002721.SZ -2025-09-19 0.5220825162760148 002316.SZ -2025-09-19 0.5202253266995881 600507.SH -2025-09-19 0.5192867029883689 601869.SH -2025-09-19 0.5192867029883689 001309.SZ -2025-09-22 0.5290764404714322 002721.SZ -2025-09-22 0.5209796446328382 601778.SH -2025-09-22 0.5207195789387543 601003.SH -2025-09-22 0.5192867029883689 603019.SH -2025-09-22 0.5192867029883689 001309.SZ -2025-09-23 0.6669403824458782 603418.SH -2025-09-23 0.5411662091594641 001309.SZ -2025-09-23 0.5290764404714322 002397.SZ -2025-09-23 0.5254926778536082 603778.SH -2025-09-23 0.5207195789387543 002333.SZ -2025-09-24 0.5392059286348174 000972.SZ -2025-09-24 0.5346476148255608 600503.SH -2025-09-24 0.5308810420188524 002692.SZ -2025-09-24 0.5290764404714322 601599.SH -2025-09-24 0.5262182185763995 600130.SH -2025-09-25 0.5294086009072477 000988.SZ -2025-09-25 0.5259647450920883 600525.SH -2025-09-25 0.5244785263384821 603778.SH -2025-09-25 0.524186490228001 002692.SZ -2025-09-25 0.5230859175308072 002719.SZ -2025-09-26 0.5503662271537297 603778.SH -2025-09-26 0.5468004475511966 000428.SZ -2025-09-26 0.5308727072969106 600716.SH -2025-09-26 0.5301217237062711 000959.SZ -2025-09-26 0.5263936646664212 000718.SZ -2025-09-29 0.5932557593164053 000428.SZ -2025-09-29 0.5670273182706154 600448.SH -2025-09-29 0.5549175264141686 002217.SZ -2025-09-29 0.5505530618759115 002263.SZ -2025-09-29 0.5445932653852826 000718.SZ -2025-09-30 0.6669403824458782 001285.SZ -2025-09-30 0.5786351360102432 000428.SZ -2025-09-30 0.5527109872312292 002263.SZ -2025-09-30 0.5357831647776657 001309.SZ -2025-09-30 0.5352209692776468 002622.SZ -2025-10-09 0.5288735076238339 002622.SZ -2025-10-09 0.5262051259218314 000545.SZ -2025-10-09 0.5260729456251799 002263.SZ -2025-10-09 0.5206296858612343 002097.SZ -2025-10-09 0.5149332831592093 603956.SH -2025-10-10 0.606757468204688 002622.SZ -2025-10-10 0.5669848068365473 600624.SH -2025-10-10 0.5421215789673404 600658.SH -2025-10-10 0.5226353674128292 000721.SZ -2025-10-10 0.5208278859839193 002042.SZ -2025-10-13 0.5334375570360811 603516.SH -2025-10-13 0.5267596517862191 600624.SH -2025-10-13 0.5189666077848415 002356.SZ -2025-10-13 0.5121632067888446 000721.SZ -2025-10-13 0.5115300689823145 002659.SZ -2025-10-14 0.5258952914012429 600165.SH -2025-10-14 0.5220569777779845 600624.SH -2025-10-14 0.5210752793150646 002059.SZ -2025-10-14 0.5203750341785283 002622.SZ -2025-10-14 0.5198560721779237 600977.SH -2025-10-15 0.5416299693968815 001330.SZ -2025-10-15 0.5318581027868793 000698.SZ -2025-10-15 0.5192867029883689 002779.SZ -2025-10-15 0.5170622974146505 002059.SZ -2025-10-15 0.5161866297338369 603429.SH -2025-10-16 0.5357831647776657 001309.SZ -2025-10-16 0.5260729456251799 002122.SZ -2025-10-16 0.5254307041644566 000698.SZ -2025-10-16 0.5215775357230317 603103.SH -2025-10-16 0.5173595009824935 002059.SZ -2025-10-17 0.6669403824458782 601026.SH -2025-10-17 0.5436378826363994 603335.SH -2025-10-17 0.5318208651273958 002193.SZ -2025-10-17 0.5232111562602314 600376.SH -2025-10-17 0.5227275460675327 605050.SH -2025-10-20 0.5542198914019132 002515.SZ -2025-10-20 0.5274715810404275 603809.SH -2025-10-20 0.5257357396563849 601519.SH -2025-10-20 0.5243850230152007 605050.SH -2025-10-20 0.5214694606436803 603618.SH -2025-10-21 0.5399718459160422 603618.SH -2025-10-21 0.528551727159285 002182.SZ -2025-10-21 0.5179555419687653 601618.SH -2025-10-21 0.5147102019586222 600376.SH -2025-10-21 0.5113139305546494 000967.SZ -2025-10-22 0.6669403824458782 001386.SZ -2025-10-22 0.5293305264816156 600468.SH -2025-10-22 0.5276711651816922 601618.SH -2025-10-22 0.526231221718517 002998.SZ -2025-10-22 0.521469242438557 603305.SH -2025-10-23 0.535116652342961 601618.SH -2025-10-23 0.5224835943742379 002196.SZ -2025-10-23 0.519700750715856 600255.SH -2025-10-23 0.5154701409400779 002998.SZ -2025-10-23 0.5149332831592093 000758.SZ -2025-10-24 0.6669403824458782 603175.SH -2025-10-24 0.5661570857190846 601026.SH -2025-10-24 0.5336039591479221 603124.SH -2025-10-24 0.5234910779589691 605288.SH -2025-10-24 0.5234910779589691 603160.SH -2025-10-27 0.5386364033186966 600857.SH -2025-10-27 0.5357831647776657 603986.SH -2025-10-27 0.5219351242774325 601212.SH -2025-10-27 0.514635987194688 002716.SZ -2025-10-27 0.5115221694101192 001309.SZ -2025-10-28 0.514635987194688 600333.SH -2025-10-28 0.5133335084224743 000010.SZ -2025-10-28 0.5130347572908087 000889.SZ -2025-10-28 0.5112284398631035 000782.SZ -2025-10-28 0.5105915720835598 600617.SH -2025-10-29 0.7027342936600967 603175.SH -2025-10-29 0.5455562348041093 000626.SZ -2025-10-29 0.5397963149123955 600617.SH -2025-10-29 0.5365722351193363 603027.SH -2025-10-29 0.5120818023990829 600333.SH -2025-10-30 0.5727564714989891 600617.SH -2025-10-30 0.5308131309654015 603027.SH -2025-10-30 0.5217135378764437 000626.SZ -2025-10-30 0.5208278859839193 600168.SH -2025-10-30 0.5207195789387543 601212.SH -2025-10-31 0.5787228086014843 600617.SH -2025-10-31 0.5207195789387543 600168.SH -2025-10-31 0.5207195789387543 000868.SZ -2025-10-31 0.5183163360678433 002251.SZ -2025-10-31 0.5179555419687653 000692.SZ -2025-11-03 0.534924877254506 600617.SH -2025-11-03 0.5243608302115037 002678.SZ -2025-11-03 0.5233797777110492 002542.SZ -2025-11-03 0.5208278859839193 002585.SZ -2025-11-03 0.5208278859839193 600168.SH -2025-11-04 0.6334108285080842 603175.SH -2025-11-04 0.5239557735459798 002678.SZ -2025-11-04 0.5207195789387543 000868.SZ -2025-11-04 0.5205617801737676 600168.SH -2025-11-04 0.5203771586183269 002766.SZ -2025-11-05 0.6669403824458782 603334.SH -2025-11-05 0.5205308388365613 601339.SH -2025-11-05 0.5201302686329659 002766.SZ -2025-11-05 0.5192107504177909 000090.SZ -2025-11-05 0.5176583632438975 002542.SZ -2025-11-06 0.6669403824458782 603376.SH -2025-11-06 0.5349437299046987 601618.SH -2025-11-06 0.5259305404171137 000600.SZ -2025-11-06 0.517352717899504 600248.SH -2025-11-06 0.5150190047321577 603040.SH -2025-11-07 0.6669403824458782 603092.SH -2025-11-07 0.5227889876095065 000600.SZ -2025-11-07 0.5194698926054124 601618.SH -2025-11-07 0.513454254303403 002337.SZ -2025-11-07 0.5122750481447931 600169.SH -2025-11-10 0.5234910779589691 601888.SH -2025-11-10 0.5233214191633075 000981.SZ -2025-11-10 0.5168008602085614 603378.SH -2025-11-10 0.5165434716045726 601618.SH -2025-11-10 0.5118573629020219 000679.SZ -2025-11-11 0.5114452710217089 000757.SZ -2025-11-11 0.5057893248482864 002766.SZ -2025-11-11 0.5055903451732487 601216.SH -2025-11-11 0.5053462265996771 002044.SZ -2025-11-11 0.5051870732773048 600963.SH -2025-11-12 0.5294123016954966 000981.SZ -2025-11-12 0.5116342321245925 000757.SZ -2025-11-12 0.5064438479509945 000802.SZ -2025-11-12 0.5064438479509945 002042.SZ -2025-11-12 0.5064438479509945 000088.SZ -2025-11-13 0.5129787285127705 600120.SH -2025-11-13 0.510685819260391 600416.SH -2025-11-13 0.5094954603413556 002211.SZ -2025-11-13 0.5062407534441172 600515.SH -2025-11-13 0.5057195347445462 000931.SZ -2025-11-14 0.5226681338608951 600416.SH -2025-11-14 0.513082107892262 002480.SZ -2025-11-14 0.5098171825913262 600375.SH -2025-11-14 0.509575707799716 000524.SZ -2025-11-14 0.5086804480442645 002164.SZ -2025-11-17 0.5434544826661134 603759.SH -2025-11-17 0.5253186667209742 600581.SH -2025-11-17 0.5097395009973196 002181.SZ -2025-11-17 0.5093884986165883 000524.SZ -2025-11-17 0.5063330752943594 600375.SH -2025-11-18 0.5294086009072477 605255.SH -2025-11-18 0.5255109699121789 603092.SH -2025-11-18 0.5241190636672188 600292.SH -2025-11-18 0.5196292030484984 603759.SH -2025-11-18 0.5159647339612334 002492.SZ -2025-11-19 0.5357831647776657 603129.SH -2025-11-19 0.5214600555100268 603759.SH -2025-11-19 0.5147101098480353 600094.SH -2025-11-19 0.514635987194688 000027.SZ -2025-11-19 0.5130347572908087 600939.SH -2025-11-20 0.5318208651273958 002522.SZ -2025-11-20 0.5298249062594883 600667.SH -2025-11-20 0.5292280061167861 000720.SZ -2025-11-20 0.5276630558210795 002112.SZ -2025-11-20 0.5184728793030473 601399.SH -2025-11-21 0.5557953906852645 600939.SH -2025-11-21 0.5463342922136836 000809.SZ -2025-11-21 0.5296286995283527 000514.SZ -2025-11-21 0.5277364647770626 002102.SZ -2025-11-21 0.5257761894248653 600708.SH -2025-11-24 0.5708682177963273 002160.SZ -2025-11-24 0.5684724381631168 600802.SH -2025-11-24 0.5632244428348601 002309.SZ -2025-11-24 0.5591187020609228 000809.SZ -2025-11-24 0.5524793574711755 000631.SZ -2025-11-25 0.6669403824458782 001233.SZ -2025-11-25 0.574307667182978 601011.SH -2025-11-25 0.5669124419405613 000631.SZ -2025-11-25 0.5588469864728527 002374.SZ -2025-11-25 0.5552114736653683 603030.SH -2025-11-26 0.5752947449644089 601011.SH -2025-11-26 0.5656926249079262 600807.SH -2025-11-26 0.5601931195838576 600617.SH -2025-11-26 0.559715459117151 002256.SZ -2025-11-26 0.5540608597415699 600798.SH -2025-11-27 0.5826618787963105 600617.SH -2025-11-27 0.5809430149117796 601908.SH -2025-11-27 0.561305310075055 601011.SH -2025-11-27 0.5595889780873284 600303.SH -2025-11-27 0.5546366793786385 600807.SH -2025-11-28 0.5509003340456048 600617.SH -2025-11-28 0.5410583063546074 601908.SH -2025-11-28 0.5376307909297392 000797.SZ -2025-11-28 0.535116652342961 002132.SZ -2025-11-28 0.5311380079716314 002691.SZ -2025-12-01 0.5454842829756713 601566.SH -2025-12-01 0.5286904261151575 002691.SZ -2025-12-01 0.5259647450920883 002132.SZ -2025-12-01 0.5192294534036106 002404.SZ -2025-12-01 0.51914944010669 600338.SH -2025-12-02 0.5351753669128975 600383.SH -2025-12-02 0.5293321778178374 000002.SZ -2025-12-02 0.5150119387056977 002205.SZ -2025-12-02 0.514635987194688 000968.SZ -2025-12-02 0.5088944832379734 002691.SZ -2025-12-03 0.6669403824458782 001280.SZ -2025-12-03 0.5200277459621825 002205.SZ -2025-12-03 0.5059329787807083 600738.SH -2025-12-03 0.5059329787807083 002145.SZ -2025-12-03 0.5059329787807083 000723.SZ -2025-12-04 0.5243608302115037 000002.SZ -2025-12-04 0.5086255830848415 000892.SZ -2025-12-04 0.5061322757265252 600050.SH -2025-12-04 0.5061322757265252 601669.SH -2025-12-04 0.505974230757272 600658.SH -2025-12-05 0.5260729456251799 601929.SH -2025-12-05 0.5194698926054124 002127.SZ -2025-12-05 0.5192867029883689 002779.SZ -2025-12-05 0.5172884903900697 002205.SZ -2025-12-05 0.514635987194688 000567.SZ -2025-12-08 0.5376131262635266 603398.SH -2025-12-08 0.5335642996732787 002811.SZ -2025-12-08 0.5307120448488225 002291.SZ -2025-12-08 0.5178038522073544 601929.SH -2025-12-08 0.5172823322968993 000676.SZ -2025-12-09 0.5404894018708571 002291.SZ -2025-12-09 0.5265064083159695 002811.SZ -2025-12-09 0.5260729456251799 002168.SZ -2025-12-09 0.5158900518232388 002141.SZ -2025-12-09 0.5152424033193611 600986.SH -2025-12-10 0.5869553596904462 002689.SZ -2025-12-10 0.5294086009072477 601869.SH -2025-12-10 0.5284174266960939 000695.SZ -2025-12-10 0.5277364647770626 002168.SZ -2025-12-10 0.5260729456251799 000889.SZ -2025-12-11 0.5541392269101565 001280.SZ -2025-12-11 0.5466581511209122 000638.SZ -2025-12-11 0.5417505813506529 000695.SZ -2025-12-11 0.5338886826939419 603176.SH -2025-12-11 0.5307460824444339 002689.SZ -2025-12-12 0.600272759621709 002689.SZ -2025-12-12 0.5208278859839193 000789.SZ -2025-12-12 0.5203771586183269 600616.SH -2025-12-12 0.5173595009824935 000937.SZ -2025-12-12 0.5173595009824935 002563.SZ -2025-12-15 0.5535428869099189 001280.SZ -2025-12-15 0.5360280914291116 603176.SH -2025-12-15 0.5270577999669818 002551.SZ -2025-12-15 0.5260729456251799 002060.SZ -2025-12-15 0.524017663470185 002686.SZ -2025-12-16 0.572690393905651 000536.SZ -2025-12-16 0.5627205484266725 603815.SH -2025-12-16 0.5410429736449522 000068.SZ -2025-12-16 0.5326635614877283 002634.SZ -2025-12-16 0.529027199610689 002193.SZ -2025-12-17 0.5951304562201396 002589.SZ -2025-12-17 0.5606871732446959 002390.SZ -2025-12-17 0.5517174366399105 000509.SZ -2025-12-17 0.5436378826363994 002193.SZ -2025-12-17 0.5375693919463156 600599.SH -2025-12-18 0.5567503731149535 000701.SZ -2025-12-18 0.5558441068891651 600696.SH -2025-12-18 0.5404570196520365 603815.SH -2025-12-18 0.5384676192562836 605299.SH -2025-12-18 0.5352009523422904 000020.SZ -2025-12-19 0.5550086601660389 600846.SH -2025-12-19 0.5539777914846407 000020.SZ -2025-12-19 0.5517196665917564 600815.SH -2025-12-19 0.5468004475511966 002589.SZ -2025-12-19 0.5361095563091214 600734.SH -2025-12-22 0.576437350624106 600815.SH -2025-12-22 0.5708170878939591 600802.SH -2025-12-22 0.5582571013351643 600696.SH -2025-12-22 0.5405691530605988 002589.SZ -2025-12-22 0.5293145743852032 603879.SH -2025-12-23 0.5360150094122866 600846.SH -2025-12-23 0.5192867029883689 603929.SH -2025-12-23 0.5192867029883689 002837.SZ -2025-12-23 0.514956054042036 000685.SZ -2025-12-23 0.5127101187507722 600939.SH -2025-12-24 0.5330131266030339 002682.SZ -2025-12-24 0.5314281167618613 000838.SZ -2025-12-24 0.5207195789387543 600624.SH -2025-12-24 0.5170622974146505 000035.SZ -2025-12-24 0.5160528360618574 002238.SZ -2025-12-25 0.5208278859839193 600624.SH -2025-12-25 0.5174997854530934 000838.SZ -2025-12-25 0.5169085372339255 000035.SZ -2025-12-25 0.5108097318020007 605123.SH -2025-12-25 0.5071739242869363 002343.SZ -2025-12-26 0.518589360677003 000078.SZ -2025-12-26 0.5141650961836706 600180.SH -2025-12-26 0.5115221694101192 001309.SZ -2025-12-26 0.5089666327619147 002697.SZ -2025-12-26 0.5082868674245398 600624.SH -2025-12-29 0.5399583041498672 000078.SZ -2025-12-29 0.5179555419687653 002694.SZ -2025-12-29 0.5116385050716121 002016.SZ -2025-12-29 0.5115221694101192 605255.SH -2025-12-29 0.5108097318020007 605123.SH -2025-12-30 0.5806552000864679 600800.SH -2025-12-30 0.5713532860246767 000890.SZ -2025-12-30 0.5308261919014939 000078.SZ -2025-12-30 0.5192867029883689 603119.SH -2025-12-30 0.5177545512881411 002044.SZ -2025-12-31 0.5764823074649972 600800.SH -2025-12-31 0.5323336836032145 000078.SZ -2025-12-31 0.5295773547573392 600730.SH -2025-12-31 0.5221685385870379 002047.SZ -2025-12-31 0.5208278859839193 002251.SZ diff --git a/src/training/pipeline.py b/src/training/pipeline.py deleted file mode 100644 index 483916c..0000000 --- a/src/training/pipeline.py +++ /dev/null @@ -1,667 +0,0 @@ -"""训练管道 - 包含数据处理、模型训练和预测功能 - -本模块提供: -1. 数据准备:使用 FactorEngine 从因子计算结果中准备训练/测试数据 -2. 数据处理:Fillna(0) -> Dropna -3. 模型训练:使用LightGBM训练分类模型 -4. 预测和选股:输出每日top5股票池 - -注意:本模块使用 src.factors 框架进行因子计算,确保防泄露机制生效。 - -因子配置示例: - from src.factors import MovingAverageFactor, ReturnRankFactor - from src.training.pipeline import prepare_data - - # 直接传入因子实例列表 - 简单直观 - factors = [ - MovingAverageFactor(period=5), - MovingAverageFactor(period=10), - ReturnRankFactor(period=5), - ] - - train_data, val_data, test_data, factor_config = prepare_data( - factors=factors, - ... - ) - - # 或者使用 FactorConfig 包装(支持链式添加) - from src.training.pipeline import FactorConfig - - config = FactorConfig() - .add(MovingAverageFactor(period=5)) - .add(MovingAverageFactor(period=10)) - .add(ReturnRankFactor(period=5)) -""" - -from datetime import datetime -from pathlib import Path -from typing import List, Optional - -import numpy as np -import polars as pl - -from src.factors import DataLoader, FactorEngine, BaseFactor -from src.factors.data_spec import DataSpec -from src.factors.momentum import MovingAverageFactor, ReturnRankFactor -from src.pipeline import ( - DropNAProcessor, - FillNAProcessor, - LightGBMModel, - PipelineStage, - ProcessingPipeline, - TaskType, -) - - -# ========== 因子配置类 ========== - - -class FactorConfig: - """因子配置类 - 管理因子列表 - - 用于包装因子实例列表,支持链式添加。 - - 示例: - # 方式1:初始化时传入列表 - config = FactorConfig([ - MovingAverageFactor(period=5), - ReturnRankFactor(period=5), - ]) - - # 方式2:链式添加 - config = FactorConfig() - .add(MovingAverageFactor(period=5)) - .add(ReturnRankFactor(period=5)) - - # 获取因子实例列表 - factors = config.get_factors() - - # 获取特征列名 - feature_cols = config.get_feature_names() - """ - - def __init__(self, factors: Optional[List[BaseFactor]] = None): - """初始化因子配置 - - Args: - factors: 因子实例列表 - """ - self._factors: List[BaseFactor] = factors or [] - - def add(self, factor: BaseFactor) -> "FactorConfig": - """添加因子到配置 - - 支持链式调用: - config = FactorConfig() - .add(MovingAverageFactor(period=5)) - .add(ReturnRankFactor(period=5)) - - Args: - factor: 因子实例 - - Returns: - self,支持链式调用 - """ - if not isinstance(factor, BaseFactor): - raise ValueError(f"必须是 BaseFactor 实例, got {type(factor)}") - self._factors.append(factor) - return self - - def get_factors(self) -> List[BaseFactor]: - """获取因子实例列表 - - Returns: - 因子实例列表 - """ - return self._factors - - def get_feature_names(self) -> List[str]: - """获取所有因子的特征列名 - - Returns: - 特征列名列表 - """ - return [f.name for f in self._factors] - - def get_max_lookback(self) -> int: - """获取所有因子中最大的 lookback 天数 - - Returns: - 最大 lookback 天数 - """ - max_lookback = 0 - for factor in self._factors: - for spec in factor.data_specs: - max_lookback = max(max_lookback, spec.lookback_days) - return max_lookback - - def __len__(self) -> int: - return len(self._factors) - - def __repr__(self) -> str: - names = [f.name for f in self._factors] - return f"FactorConfig({names})" - - -def prepare_data( - factors: Optional[List[BaseFactor]] = None, - data_dir: str = "data", - train_start: str = "20180101", - train_end: str = "20230101", - val_start: str = "20230101", - val_end: str = "20230601", - test_start: str = "20230601", - test_end: str = "20240101", -) -> tuple[pl.DataFrame, pl.DataFrame, pl.DataFrame, FactorConfig]: - """准备训练、验证和测试数据 - - 使用 FactorEngine 计算因子,确保防泄露机制生效。 - - Args: - factors: 因子实例列表,默认为 None(使用 MA5, MA10, ReturnRank5) - data_dir: 数据目录 - train_start: 训练集开始日期 - train_end: 训练集结束日期 - val_start: 验证集开始日期 - val_end: 验证集结束日期 - test_start: 测试集开始日期 - test_end: 测试集结束日期 - - Returns: - (train_data, val_data, test_data, factor_config): - 训练集、验证集、测试集的DataFrame,以及使用的因子配置 - """ - from src.data.storage import Storage - - storage = Storage() - - # 1. 处理因子配置 - if factors is None: - # 默认因子配置 - factor_config = FactorConfig( - [ - MovingAverageFactor(period=5), - MovingAverageFactor(period=10), - ReturnRankFactor(period=5), - ] - ) - elif isinstance(factors, FactorConfig): - factor_config = factors - elif isinstance(factors, list): - # 转换为 FactorConfig - factor_config = FactorConfig(factors) - else: - raise ValueError( - f"factors 必须是 List[BaseFactor] 或 FactorConfig, got {type(factors)}" - ) - - factors_list = factor_config.get_factors() - feature_cols = factor_config.get_feature_names() - - if not factors_list: - raise ValueError("至少需要提供一个因子") - - print(f"[PrepareData] 使用因子: {feature_cols}") - - # 2. 初始化 FactorEngine - loader = DataLoader(data_dir=data_dir) - engine = FactorEngine(loader) - - # 3. 计算需要的回溯天数 - max_lookback = factor_config.get_max_lookback() - start_with_lookback = str(int(train_start) - 10000) # 往前取一年 - - # 获取所有股票列表 - start_dt = f"{start_with_lookback[:4]}-{start_with_lookback[4:6]}-{start_with_lookback[6:8]}" - end_dt = f"{test_end[:4]}-{test_end[4:6]}-{test_end[6:8]}" - all_stocks_query = f""" - SELECT DISTINCT ts_code FROM daily - WHERE trade_date >= '{start_dt}' AND trade_date <= '{end_dt}' - """ - all_stocks_df = storage._connection.sql(all_stocks_query).pl() - all_stocks = all_stocks_df["ts_code"].to_list() - - print(f"[PrepareData] 股票数量: {len(all_stocks)}") - - # 4. 计算所有因子并合并 - all_features = None - - for factor in factors_list: - print(f"[PrepareData] 计算因子: {factor.name} ({factor.factor_type})") - - if factor.factor_type == "time_series": - # 时序因子需要传入股票列表 - result = engine.compute( - factor, - stock_codes=all_stocks, - start_date=start_with_lookback, - end_date=test_end, - ) - else: - # 截面因子不需要股票列表 - result = engine.compute( - factor, - start_date=start_with_lookback, - end_date=test_end, - ) - - # 合并结果 - if all_features is None: - all_features = result - else: - # 确保没有重复的 _right 列 - result = result.select([ - c for c in result.columns if not c.endswith("_right") - ]) - all_features = all_features.select([ - c for c in all_features.columns if not c.endswith("_right") - ]) - all_features = all_features.join( - result, on=["trade_date", "ts_code"], how="outer" - ) - - if all_features is None: - raise ValueError("没有计算任何因子") - - # 5. 计算标签(未来5日收益率) - all_data = _compute_label(all_features, start_date=train_start, end_date=test_end) - - # 6. 过滤不符合条件的股票 - all_data = _filter_invalid_stocks(all_data) - print(f"[PrepareData] After filtering: total={len(all_data)}") - - # 转换日期格式用于比较 - train_start_fmt = f"{train_start[:4]}-{train_start[4:6]}-{train_start[6:8]}" - train_end_fmt = f"{train_end[:4]}-{train_end[4:6]}-{train_end[6:8]}" - val_start_fmt = f"{val_start[:4]}-{val_start[4:6]}-{val_start[6:8]}" - val_end_fmt = f"{val_end[:4]}-{val_end[4:6]}-{val_end[6:8]}" - test_start_fmt = f"{test_start[:4]}-{test_start[4:6]}-{test_start[6:8]}" - test_end_fmt = f"{test_end[:4]}-{test_end[4:6]}-{test_end[6:8]}" - - # 拆分数据 - train_data = all_data.filter( - (pl.col("trade_date") >= train_start_fmt) - & (pl.col("trade_date") <= train_end_fmt) - ) - val_data = all_data.filter( - (pl.col("trade_date") >= val_start_fmt) & (pl.col("trade_date") <= val_end_fmt) - ) - test_data = all_data.filter( - (pl.col("trade_date") >= test_start_fmt) - & (pl.col("trade_date") <= test_end_fmt) - ) - - print( - f"[PrepareData] Split result: train={len(train_data)}, val={len(val_data)}, test={len(test_data)}" - ) - - return train_data, val_data, test_data, factor_config - - -def _filter_invalid_stocks(df: pl.DataFrame) -> pl.DataFrame: - """过滤不符合条件的股票 - - 过滤规则: - 1. 过滤北交所股票(ts_code 以 BJ 结尾) - 2. 过滤创业板股票(ts_code 以 30 开头) - 3. 过滤科创板股票(ts_code 以 68 开头) - 4. 过滤退市/风险股票(ts_code 以 8 开头) - - Args: - df: 原始数据 - - Returns: - 过滤后的数据 - """ - ts_code_col = pl.col("ts_code") - - return df.filter( - ~ts_code_col.str.ends_with("BJ") - & ~ts_code_col.str.starts_with("30") - & ~ts_code_col.str.starts_with("68") - & ~ts_code_col.str.starts_with("8") - ) - - -def _compute_label( - features_df: pl.DataFrame, - start_date: str, - end_date: str, -) -> pl.DataFrame: - """计算标签(未来5日收益率) - - 标签定义:未来5日收益率大于0为1,否则为0 - - Args: - features_df: 包含因子的DataFrame - start_date: 开始日期 - end_date: 结束日期 - - Returns: - 包含因子和标签的DataFrame - """ - from src.data.storage import Storage - - storage = Storage() - - # 从数据库获取收盘价数据用于计算标签 - start_dt = f"{start_date[:4]}-{start_date[4:6]}-{start_date[6:8]}" - end_dt = f"{end_date[:4]}-{end_date[4:6]}-{end_date[6:8]}" - - # 需要多取5天数据来计算未来收益率 - end_dt_extended = f"{end_date[:4]}-{end_date[4:6]}-{int(end_date[6:8]) + 5}" - - price_query = f""" - SELECT ts_code, trade_date, close - FROM daily - WHERE trade_date >= '{start_dt}' AND trade_date <= '{end_dt_extended}' - ORDER BY ts_code, trade_date - """ - price_data = storage._connection.sql(price_query).pl() - price_data = price_data.with_columns( - pl.col("trade_date").dt.strftime("%Y-%m-%d").alias("trade_date") - ) - - # 按股票计算未来5日收益率 - result_list = [] - for ts_code in price_data["ts_code"].unique(): - stock_data = price_data.filter(pl.col("ts_code") == ts_code).sort("trade_date") - - if len(stock_data) < 6: - continue - - # 计算未来5日收益率 - future_return = stock_data["close"].shift(-5) - stock_data["close"] - future_return_pct = future_return / stock_data["close"] - stock_data = stock_data.with_columns( - [ - future_return_pct.alias("future_return_5"), - ] - ) - - # 生成标签:收益率>0为1,否则为0 - stock_data = stock_data.with_columns( - [ - (pl.col("future_return_5") > 0).cast(pl.Int8).alias("label"), - ] - ) - - result_list.append(stock_data.select(["trade_date", "ts_code", "label"])) - - if not result_list: - return pl.DataFrame() - - label_df = pl.concat(result_list) - - # 将标签合并到因子数据 - result = features_df.join(label_df, on=["trade_date", "ts_code"], how="inner") - - # 过滤有效日期范围 - result = result.filter( - (pl.col("trade_date") >= start_dt) & (pl.col("trade_date") <= end_dt) - ) - - return result - - -def create_pipeline() -> ProcessingPipeline: - """创建数据处理流水线 - - 处理流程: - 1. FillNA(0): 将缺失值填充为0 - - 注意:不使用 Dropna,因为会导致训练和预测时的行数不匹配 - - Returns: - 配置好的ProcessingPipeline - """ - processors = [ - FillNAProcessor(method="zero"), # 缺失值填充为0 - ] - return ProcessingPipeline(processors) - - -def train_model( - train_data: pl.DataFrame, - val_data: Optional[pl.DataFrame], - feature_cols: List[str], - label_col: str = "label", - model_params: Optional[dict] = None, -) -> tuple[LightGBMModel, ProcessingPipeline]: - """训练LightGBM分类模型 - - Args: - train_data: 训练数据 - val_data: 验证数据(用于早停) - feature_cols: 特征列名列表 - label_col: 标签列名 - model_params: 模型参数字典 - - Returns: - (训练好的模型, 处理流水线) - """ - # 创建处理流水线 - pipeline = create_pipeline() - print("[TrainModel] Pipeline created: FillNA(0)") - - # 准备训练特征和标签 - X_train = train_data.select(feature_cols) - y_train = train_data[label_col] - print(f"[TrainModel] Train samples: {len(X_train)}, features: {feature_cols}") - - # 处理训练数据 - X_train_processed = pipeline.fit_transform(X_train, stage=PipelineStage.TRAIN) - print(f"[TrainModel] After processing: {len(X_train_processed)} samples") - - # 过滤训练集有效标签(排除-1等无效值) - valid_mask = y_train.is_in([0, 1]) - X_train_processed = X_train_processed.filter(valid_mask) - y_train = y_train.filter(valid_mask) - print( - f"[TrainModel] After filtering valid labels: {len(X_train_processed)} samples" - ) - print( - f"[TrainModel] Train label distribution: {dict(y_train.value_counts().sort('label').iter_rows())}" - ) - - # 准备验证集 - X_val_processed = None - y_val = None - if val_data is not None and len(val_data) > 0: - X_val = val_data.select(feature_cols) - y_val = val_data[label_col] - print(f"[TrainModel] Val samples: {len(X_val)}") - - # 处理验证集数据(使用训练集的参数) - X_val_processed = pipeline.transform(X_val, stage=PipelineStage.TEST) - - # 过滤验证集有效标签 - val_valid_mask = y_val.is_in([0, 1]) - X_val_processed = X_val_processed.filter(val_valid_mask) - y_val = y_val.filter(val_valid_mask) - print(f"[TrainModel] Val after filtering: {len(X_val_processed)} samples") - print( - f"[TrainModel] Val label distribution: {dict(y_val.value_counts().sort('label').iter_rows())}" - ) - - # 创建模型 - params = model_params or { - "n_estimators": 100, - "learning_rate": 0.05, - "max_depth": 5, - "num_leaves": 31, - } - print(f"[TrainModel] Model params: {params}") - model = LightGBMModel( - task_type="classification", - params=params, - ) - - # 训练模型(使用验证集早停) - print("[TrainModel] Training LightGBM...") - if X_val_processed is not None and y_val is not None: - print("[TrainModel] Using validation set for early stopping") - model.fit(X_train_processed, y_train, X_val_processed, y_val) - else: - model.fit(X_train_processed, y_train) - print("[TrainModel] Training completed!") - - return model, pipeline - - -def predict_top_stocks( - model: LightGBMModel, - pipeline: ProcessingPipeline, - test_data: pl.DataFrame, - feature_cols: List[str], - top_n: int = 5, -) -> pl.DataFrame: - """预测并选出每日top N股票 - - Args: - model: 训练好的模型 - pipeline: 数据处理流水线 - test_data: 测试数据 - feature_cols: 特征列名 - top_n: 每日选出的股票数量 - - Returns: - 包含日期和股票代码的DataFrame - """ - # 准备特征和必要列 - X_test = test_data.select(feature_cols) - key_cols = ["trade_date", "ts_code"] - key_data = test_data.select(key_cols) - - print(f"[Predict] Test samples: {len(X_test)}, top_n: {top_n}") - - # 处理数据(使用训练阶段的参数) - X_test_processed = pipeline.transform(X_test, stage=PipelineStage.TEST) - print(f"[Predict] Data processed, shape: {X_test_processed.shape}") - - # 预测概率 - probs = model.predict_proba(X_test_processed) - print(f"[Predict] Predictions generated, probability shape: {probs.shape}") - - # 使用 key_data 添加预测结果,保持行数一致 - result = key_data.with_columns( - pl.Series( - name="pred_prob", - values=probs[:, 1] - if len(probs.shape) > 1 and probs.shape[1] > 1 - else probs.flatten(), - ), - ) - - # 每日选出top N - top_stocks = [] - for date in result["trade_date"].unique().sort(): - day_data = result.filter(pl.col("trade_date") == date) - - # 按概率降序排序,选出top N - day_top = day_data.sort("pred_prob", descending=True).head(top_n) - - top_stocks.append( - day_top.select(["trade_date", "pred_prob", "ts_code"]).rename( - {"pred_prob": "score"} - ) - ) - - return pl.concat(top_stocks) - - -def save_top_stocks(top_stocks: pl.DataFrame, output_path: str) -> None: - """保存选股结果到TSV文件 - - Args: - top_stocks: 选股结果 - output_path: 输出文件路径 - """ - # 转换为pandas并保存为TSV - df = top_stocks.to_pandas() - df.to_csv(output_path, sep="\t", index=False) - print(f"[Training] Top stocks saved to: {output_path}") - - -def run_training( - factors: Optional[List[BaseFactor]] = None, - data_dir: str = "data", - output_path: str = "output/top_stocks.tsv", - train_start: str = "20180101", - train_end: str = "20230101", - val_start: str = "20230101", - val_end: str = "20230601", - test_start: str = "20230601", - test_end: str = "20240101", - top_n: int = 5, -) -> pl.DataFrame: - """运行完整训练流程 - - Args: - factors: 因子实例列表,默认为 None(使用 MA5, MA10, ReturnRank5) - data_dir: 数据目录 - output_path: 输出文件路径 - train_start: 训练集开始日期 - train_end: 训练集结束日期 - val_start: 验证集开始日期 - val_end: 验证集结束日期 - test_start: 测试集开始日期 - test_end: 测试集结束日期 - top_n: 每日选股数量 - - Returns: - 选股结果DataFrame - """ - print(f"[Training] Starting training pipeline...") - print(f"[Training] Train period: {train_start} -> {train_end}") - print(f"[Training] Val period: {val_start} -> {val_end}") - print(f"[Training] Test period: {test_start} -> {test_end}") - - # 1. 准备数据 - print("[Training] Preparing data...") - train_data, val_data, test_data, factor_config = prepare_data( - factors=factors, - data_dir=data_dir, - train_start=train_start, - train_end=train_end, - val_start=val_start, - val_end=val_end, - test_start=test_start, - test_end=test_end, - ) - print(f"[Training] Train samples: {len(train_data)}") - print(f"[Training] Val samples: {len(val_data)}") - print(f"[Training] Test samples: {len(test_data)}") - - # 2. 获取特征列名 - feature_cols = factor_config.get_feature_names() - label_col = "label" - print(f"[Training] Feature columns: {feature_cols}") - - # 3. 训练模型 - print("[Training] Training model...") - model, pipeline = train_model( - train_data=train_data, - val_data=val_data, - feature_cols=feature_cols, - label_col=label_col, - ) - - # 4. 测试集预测 - print("[Training] Predicting on test set...") - top_stocks = predict_top_stocks( - model=model, - pipeline=pipeline, - test_data=test_data, - feature_cols=feature_cols, - top_n=top_n, - ) - - # 5. 保存结果 - print(f"[Training] Saving results to {output_path}...") - Path(output_path).parent.mkdir(parents=True, exist_ok=True) - save_top_stocks(top_stocks, output_path) - - print("[Training] Training completed!") - - return top_stocks diff --git a/src/training/registry.py b/src/training/registry.py new file mode 100644 index 0000000..4009318 --- /dev/null +++ b/src/training/registry.py @@ -0,0 +1,184 @@ +"""组件注册中心 + +提供装饰器风格的组件注册机制,支持即插即用。 +""" + +from typing import Dict, Type, Callable, Any +from src.training.components.base import BaseModel, BaseProcessor + + +class ModelRegistry: + """模型注册中心 + + 管理所有可用的模型类,支持通过名称获取模型类。 + + Example: + >>> @register_model("lightgbm") + ... class LightGBMModel(BaseModel): + ... pass + >>> + >>> model_class = ModelRegistry.get_model("lightgbm") + >>> model = model_class(**params) + """ + + _registry: Dict[str, Type[BaseModel]] = {} + + @classmethod + def register(cls, name: str, model_class: Type[BaseModel]) -> None: + """注册模型类 + + Args: + name: 模型名称 + model_class: 模型类(必须继承 BaseModel) + + Raises: + ValueError: 名称已被注册或类不继承 BaseModel + """ + if name in cls._registry: + raise ValueError(f"模型 '{name}' 已被注册") + if not issubclass(model_class, BaseModel): + raise ValueError(f"模型类必须继承 BaseModel") + cls._registry[name] = model_class + + @classmethod + def get_model(cls, name: str) -> Type[BaseModel]: + """获取模型类 + + Args: + name: 模型名称 + + Returns: + 模型类 + + Raises: + KeyError: 未找到该名称的模型 + """ + if name not in cls._registry: + available = ", ".join(cls._registry.keys()) + raise KeyError(f"未知模型 '{name}',可用模型: {available}") + return cls._registry[name] + + @classmethod + def list_models(cls) -> list[str]: + """列出所有已注册的模型名称""" + return list(cls._registry.keys()) + + @classmethod + def clear(cls) -> None: + """清空注册表(主要用于测试)""" + cls._registry.clear() + + +class ProcessorRegistry: + """处理器注册中心 + + 管理所有可用的数据处理器类,支持通过名称获取处理器类。 + + Example: + >>> @register_processor("standard_scaler") + ... class StandardScaler(BaseProcessor): + ... pass + >>> + >>> processor_class = ProcessorRegistry.get_processor("standard_scaler") + >>> processor = processor_class(**params) + """ + + _registry: Dict[str, Type[BaseProcessor]] = {} + + @classmethod + def register(cls, name: str, processor_class: Type[BaseProcessor]) -> None: + """注册处理器类 + + Args: + name: 处理器名称 + processor_class: 处理器类(必须继承 BaseProcessor) + + Raises: + ValueError: 名称已被注册或类不继承 BaseProcessor + """ + if name in cls._registry: + raise ValueError(f"处理器 '{name}' 已被注册") + if not issubclass(processor_class, BaseProcessor): + raise ValueError(f"处理器类必须继承 BaseProcessor") + cls._registry[name] = processor_class + + @classmethod + def get_processor(cls, name: str) -> Type[BaseProcessor]: + """获取处理器类 + + Args: + name: 处理器名称 + + Returns: + 处理器类 + + Raises: + KeyError: 未找到该名称的处理器 + """ + if name not in cls._registry: + available = ", ".join(cls._registry.keys()) + raise KeyError(f"未知处理器 '{name}',可用处理器: {available}") + return cls._registry[name] + + @classmethod + def list_processors(cls) -> list[str]: + """列出所有已注册的处理器名称""" + return list(cls._registry.keys()) + + @classmethod + def clear(cls) -> None: + """清空注册表(主要用于测试)""" + cls._registry.clear() + + +def register_model(name: str) -> Callable[[Type[BaseModel]], Type[BaseModel]]: + """模型注册装饰器 + + 用于装饰继承 BaseModel 的类,将其注册到 ModelRegistry。 + + Args: + name: 模型名称 + + Returns: + 装饰器函数 + + Example: + >>> @register_model("lightgbm") + ... class LightGBMModel(BaseModel): + ... name = "lightgbm" + ... def fit(self, X, y): ... + ... def predict(self, X): ... + """ + + def decorator(cls: Type[BaseModel]) -> Type[BaseModel]: + ModelRegistry.register(name, cls) + return cls + + return decorator + + +def register_processor( + name: str, +) -> Callable[[Type[BaseProcessor]], Type[BaseProcessor]]: + """处理器注册装饰器 + + 用于装饰继承 BaseProcessor 的类,将其注册到 ProcessorRegistry。 + + Args: + name: 处理器名称 + + Returns: + 装饰器函数 + + Example: + >>> @register_processor("standard_scaler") + ... class StandardScaler(BaseProcessor): + ... name = "standard_scaler" + ... def transform(self, X): ... + """ + + def decorator(cls: Type[BaseProcessor]) -> Type[BaseProcessor]: + ProcessorRegistry.register(name, cls) + return cls + + return decorator diff --git a/tests/pipeline/test_core.py b/tests/pipeline/test_core.py deleted file mode 100644 index 4b0173b..0000000 --- a/tests/pipeline/test_core.py +++ /dev/null @@ -1,478 +0,0 @@ -"""Pipeline 组件库核心测试 - -测试核心抽象类、插件注册中心、处理器、模型和划分策略。 -""" - -import pytest -import polars as pl -import numpy as np -from typing import List, Optional - -# 确保导入时注册所有组件 -from src.pipeline import ( - PluginRegistry, - PipelineStage, - BaseProcessor, - BaseModel, - BaseSplitter, - ProcessingPipeline, -) -from src.pipeline.core import TaskType - - -# ========== 测试核心抽象类 ========== - - -class TestPipelineStage: - """测试阶段枚举""" - - def test_stage_values(self): - assert PipelineStage.ALL.name == "ALL" - assert PipelineStage.TRAIN.name == "TRAIN" - assert PipelineStage.TEST.name == "TEST" - assert PipelineStage.VALIDATION.name == "VALIDATION" - - -class TestBaseProcessor: - """测试处理器基类""" - - def test_processor_initialization(self): - """测试处理器初始化""" - - class DummyProcessor(BaseProcessor): - stage = PipelineStage.ALL - - def fit(self, data: pl.DataFrame) -> "DummyProcessor": - self._is_fitted = True - return self - - def transform(self, data: pl.DataFrame) -> pl.DataFrame: - return data - - processor = DummyProcessor(columns=["col1", "col2"]) - assert processor.columns == ["col1", "col2"] - assert processor.stage == PipelineStage.ALL - assert not processor._is_fitted - - def test_processor_fit_transform(self): - """测试 fit_transform 方法""" - - class AddOneProcessor(BaseProcessor): - stage = PipelineStage.ALL - - def fit(self, data: pl.DataFrame) -> "AddOneProcessor": - self._is_fitted = True - return self - - def transform(self, data: pl.DataFrame) -> pl.DataFrame: - result = data.clone() - for col in self.columns or []: - result = result.with_columns((pl.col(col) + 1).alias(col)) - return result - - processor = AddOneProcessor(columns=["value"]) - df = pl.DataFrame({"value": [1, 2, 3]}) - - result = processor.fit_transform(df) - - assert processor._is_fitted - assert result["value"].to_list() == [2, 3, 4] - - -class TestBaseModel: - """测试模型基类""" - - def test_model_initialization(self): - """测试模型初始化""" - - class DummyModel(BaseModel): - def fit(self, X, y, X_val=None, y_val=None, **kwargs): - self._is_fitted = True - return self - - def predict(self, X): - return np.zeros(len(X)) - - model = DummyModel( - task_type="regression", params={"lr": 0.01}, name="test_model" - ) - - assert model.task_type == "regression" - assert model.params == {"lr": 0.01} - assert model.name == "test_model" - assert not model._is_fitted - - def test_predict_proba_not_implemented(self): - """测试未实现 predict_proba 时抛出异常""" - - class DummyModel(BaseModel): - def fit(self, X, y, X_val=None, y_val=None, **kwargs): - return self - - def predict(self, X): - return np.zeros(len(X)) - - model = DummyModel(task_type="regression") - df = pl.DataFrame({"feature": [1, 2, 3]}) - - with pytest.raises(NotImplementedError): - model.predict_proba(df) - - -class TestBaseSplitter: - """测试划分策略基类""" - - def test_splitter_interface(self): - """测试划分策略接口""" - - class DummySplitter(BaseSplitter): - def split(self, data, date_col="trade_date"): - yield [0, 1], [2, 3] - - def get_split_dates(self, data, date_col="trade_date"): - return [("20200101", "20201231", "20210101", "20211231")] - - splitter = DummySplitter() - df = pl.DataFrame( - {"trade_date": ["20200101", "20200601", "20210101", "20210601"]} - ) - - splits = list(splitter.split(df)) - assert len(splits) == 1 - assert splits[0] == ([0, 1], [2, 3]) - - dates = splitter.get_split_dates(df) - assert dates == [("20200101", "20201231", "20210101", "20211231")] - - -# ========== 测试插件注册中心 ========== - - -class TestPluginRegistry: - """测试插件注册中心""" - - def setup_method(self): - """每个测试前清除注册""" - PluginRegistry.clear_all() - - def test_register_and_get_processor(self): - """测试注册和获取处理器""" - - @PluginRegistry.register_processor("test_processor") - class TestProcessor(BaseProcessor): - stage = PipelineStage.ALL - - def fit(self, data): - return self - - def transform(self, data): - return data - - processor_class = PluginRegistry.get_processor("test_processor") - assert processor_class == TestProcessor - assert "test_processor" in PluginRegistry.list_processors() - - def test_register_and_get_model(self): - """测试注册和获取模型""" - - @PluginRegistry.register_model("test_model") - class TestModel(BaseModel): - def fit(self, X, y, X_val=None, y_val=None, **kwargs): - return self - - def predict(self, X): - return np.zeros(len(X)) - - model_class = PluginRegistry.get_model("test_model") - assert model_class == TestModel - assert "test_model" in PluginRegistry.list_models() - - def test_register_and_get_splitter(self): - """测试注册和获取划分策略""" - - @PluginRegistry.register_splitter("test_splitter") - class TestSplitter(BaseSplitter): - def split(self, data, date_col="trade_date"): - yield [], [] - - def get_split_dates(self, data, date_col="trade_date"): - return [] - - splitter_class = PluginRegistry.get_splitter("test_splitter") - assert splitter_class == TestSplitter - assert "test_splitter" in PluginRegistry.list_splitters() - - def test_get_nonexistent_processor(self): - """测试获取不存在的处理器时抛出异常""" - with pytest.raises(KeyError) as exc_info: - PluginRegistry.get_processor("nonexistent") - assert "nonexistent" in str(exc_info.value) - - def test_register_with_default_name(self): - """测试使用默认名称注册""" - - @PluginRegistry.register_processor() - class MyCustomProcessor(BaseProcessor): - stage = PipelineStage.ALL - - def fit(self, data): - return self - - def transform(self, data): - return data - - assert "MyCustomProcessor" in PluginRegistry.list_processors() - - -# ========== 测试内置处理器 ========== - - -class TestBuiltInProcessors: - """测试内置处理器""" - - def test_dropna_processor(self): - """测试缺失值删除处理器""" - from src.pipeline.processors import DropNAProcessor - - processor = DropNAProcessor(columns=["a", "b"]) - df = pl.DataFrame({"a": [1, None, 3], "b": [4, 5, None], "c": [7, 8, 9]}) - - result = processor.fit_transform(df) - - # 只有第一行没有缺失值 - assert len(result) == 1 - assert result["a"].to_list() == [1] - assert result["b"].to_list() == [4] - - def test_fillna_processor(self): - """测试缺失值填充处理器""" - from src.pipeline.processors import FillNAProcessor - - processor = FillNAProcessor(columns=["a"], method="mean") - df = pl.DataFrame({"a": [1.0, 2.0, None, 4.0]}) - - result = processor.fit_transform(df) - - # 均值 = (1+2+4)/3 = 2.333... - assert result["a"][2] == pytest.approx(2.333, rel=0.01) - - def test_standard_scaler(self): - """测试标准化处理器""" - from src.pipeline.processors import StandardScaler - - processor = StandardScaler(columns=["value"]) - df = pl.DataFrame({"value": [1.0, 2.0, 3.0, 4.0, 5.0]}) - - result = processor.fit_transform(df) - - # Z-score 标准化后均值为0,标准差为1 - assert result["value"].mean() == pytest.approx(0.0, abs=1e-10) - assert result["value"].std() == pytest.approx(1.0, rel=0.01) - - def test_winsorizer(self): - """测试缩尾处理器""" - from src.pipeline.processors import Winsorizer - - processor = Winsorizer(columns=["value"], lower=0.1, upper=0.9) - df = pl.DataFrame( - { - "value": list(range(100)) # 0-99 - } - ) - - result = processor.fit_transform(df) - - # 10%和90%分位数应该是10和89(Polars的quantile行为) - assert result["value"].min() == 10 - assert result["value"].max() == 89 - - def test_rank_transformer(self): - """测试排名转换处理器""" - from src.pipeline.processors import RankTransformer - - processor = RankTransformer(columns=["value"]) - df = pl.DataFrame( - {"trade_date": ["20200101"] * 5, "value": [10, 30, 20, 50, 40]} - ) - - result = processor.fit_transform(df) - - # 排名应该是 1, 3, 2, 5, 4 - assert result["value"].to_list() == [1, 3, 2, 5, 4] - - def test_neutralizer(self): - """测试中性化处理器""" - from src.pipeline.processors import Neutralizer - - processor = Neutralizer(columns=["value"], group_col="industry") - df = pl.DataFrame( - { - "trade_date": ["20200101", "20200101", "20200101", "20200101"], - "industry": ["A", "A", "B", "B"], - "value": [10, 20, 30, 50], - } - ) - - result = processor.fit_transform(df) - - # 分组去均值后,每组的均值为0 - group_a = result.filter(pl.col("industry") == "A") - group_b = result.filter(pl.col("industry") == "B") - - assert group_a["value"].mean() == pytest.approx(0.0, abs=1e-10) - assert group_b["value"].mean() == pytest.approx(0.0, abs=1e-10) - - -# ========== 测试处理流水线 ========== - - -class TestProcessingPipeline: - """测试处理流水线""" - - def test_pipeline_fit_transform(self): - """测试流水线的 fit_transform""" - from src.pipeline.processors import StandardScaler - - scaler1 = StandardScaler(columns=["a"]) - scaler2 = StandardScaler(columns=["b"]) - - pipeline = ProcessingPipeline([scaler1, scaler2]) - - df = pl.DataFrame({"a": [1.0, 2.0, 3.0], "b": [10.0, 20.0, 30.0]}) - - result = pipeline.fit_transform(df) - - # 两个列都应该被标准化 - assert result["a"].mean() == pytest.approx(0.0, abs=1e-10) - assert result["b"].mean() == pytest.approx(0.0, abs=1e-10) - - def test_pipeline_transform_uses_fitted_params(self): - """测试 transform 使用已 fit 的参数""" - from src.pipeline.processors import StandardScaler - - scaler = StandardScaler(columns=["value"]) - pipeline = ProcessingPipeline([scaler]) - - # 训练数据 - train_df = pl.DataFrame( - { - "value": [1.0, 2.0, 3.0] # 均值=2,标准差=1 - } - ) - - # 测试数据(不同的分布) - test_df = pl.DataFrame( - { - "value": [4.0, 5.0, 6.0] # 如果重新计算应该是均值=5 - } - ) - - pipeline.fit_transform(train_df) - result = pipeline.transform(test_df) - - # 使用训练数据的均值=2和标准差=1进行标准化 - # 4 -> (4-2)/1 = 2 - assert result["value"].to_list()[0] == pytest.approx(2.0, abs=1e-10) - - -# ========== 测试划分策略 ========== - - -class TestSplitters: - """测试划分策略""" - - def test_time_series_split(self): - """测试时间序列划分""" - from src.pipeline.core import TimeSeriesSplit - - splitter = TimeSeriesSplit(n_splits=2, gap=1, min_train_size=3) - - # 10天的数据 - df = pl.DataFrame( - { - "trade_date": [f"202001{i:02d}" for i in range(1, 11)], - "value": list(range(10)), - } - ) - - splits = list(splitter.split(df)) - - # 应该有两折 - assert len(splits) == 2 - - # 检查每折训练集在测试集之前 - for train_idx, test_idx in splits: - assert max(train_idx) < min(test_idx) - - def test_walk_forward_split(self): - """测试滚动前向划分""" - from src.pipeline.core import WalkForwardSplit - - splitter = WalkForwardSplit(train_window=5, test_window=2, gap=1) - - df = pl.DataFrame( - { - "trade_date": [f"202001{i:02d}" for i in range(1, 13)], - "value": list(range(12)), - } - ) - - splits = list(splitter.split(df)) - - # 检查训练集大小固定 - for train_idx, test_idx in splits: - assert len(train_idx) == 5 - assert len(test_idx) == 2 - - def test_expanding_window_split(self): - """测试扩展窗口划分""" - from src.pipeline.core import ExpandingWindowSplit - - splitter = ExpandingWindowSplit(initial_train_size=3, test_window=2, gap=1) - - df = pl.DataFrame( - { - "trade_date": [f"202001{i:02d}" for i in range(1, 15)], - "value": list(range(14)), - } - ) - - splits = list(splitter.split(df)) - - # 训练集应该逐渐增大 - train_sizes = [len(train_idx) for train_idx, _ in splits] - assert train_sizes[0] == 3 - assert train_sizes[1] == 5 # 3 + 2 - assert train_sizes[2] == 7 # 5 + 2 - - -# ========== 测试内置模型(可选,需要安装依赖) ========== - - -class TestModels: - """测试内置模型(标记为跳过如果依赖未安装)""" - - @pytest.mark.skip(reason="需要安装 lightgbm") - def test_lightgbm_model(self): - """测试 LightGBM 模型""" - from src.pipeline.models import LightGBMModel - - model = LightGBMModel(task_type="regression", params={"n_estimators": 10}) - - X = pl.DataFrame( - { - "feature1": [1.0, 2.0, 3.0, 4.0, 5.0] * 10, - "feature2": [5.0, 4.0, 3.0, 2.0, 1.0] * 10, - } - ) - y = pl.Series("target", [1.0, 2.0, 3.0, 4.0, 5.0] * 10) - - model.fit(X, y) - predictions = model.predict(X) - - assert len(predictions) == len(X) - assert model._is_fitted - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) diff --git a/tests/training/test_base.py b/tests/training/test_base.py new file mode 100644 index 0000000..9883194 --- /dev/null +++ b/tests/training/test_base.py @@ -0,0 +1,338 @@ +"""训练模块基础架构测试 + +测试 Commit 1 实现的基础组件: +- BaseModel 抽象基类 +- BaseProcessor 抽象基类 +- ModelRegistry 模型注册中心 +- ProcessorRegistry 处理器注册中心 +""" + +import pytest +import pickle +import tempfile +import os + +import polars as pl +import numpy as np +import pandas as pd + +from src.training.components.base import BaseModel, BaseProcessor +from src.training.registry import ( + ModelRegistry, + ProcessorRegistry, + register_model, + register_processor, +) + + +class TestBaseModel: + """测试 BaseModel 抽象基类""" + + def test_base_model_abstract_methods(self): + """测试抽象方法必须被实现""" + # 不能直接实例化抽象类 + with pytest.raises(TypeError): + BaseModel() + + def test_base_model_concrete_implementation(self): + """测试具体实现""" + + class MockModel(BaseModel): + name = "mock_model" + + def __init__(self): + self.fitted = False + + def fit(self, X: pl.DataFrame, y: pl.Series) -> "MockModel": + self.fitted = True + return self + + def predict(self, X: pl.DataFrame) -> np.ndarray: + return np.zeros(len(X)) + + # 可以实例化具体实现 + model = MockModel() + assert model.name == "mock_model" + assert not model.fitted + + # 测试 fit + df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + series = pl.Series("target", [0, 1, 0]) + model.fit(df, series) + assert model.fitted + + # 测试 predict + predictions = model.predict(df) + assert len(predictions) == 3 + assert np.all(predictions == 0) + + def test_base_model_save_load(self): + """测试模型持久化(使用 pickle)""" + # 注意:pickle 无法序列化局部定义的类 + # 这里只测试 save/load 接口被正确调用 + # 实际使用中的模型类会在模块级别定义 + + class MockModel(BaseModel): + name = "mock_model" + + def __init__(self, value: int = 42): + self.value = value + self.fitted = False + + def fit(self, X: pl.DataFrame, y: pl.Series) -> "MockModel": + self.fitted = True + return self + + def predict(self, X: pl.DataFrame) -> np.ndarray: + return np.full(len(X), self.value) + + # 创建并训练模型 + model = MockModel(value=100) + df = pl.DataFrame({"a": [1, 2, 3]}) + series = pl.Series("target", [0, 1, 0]) + model.fit(df, series) + + # 验证模型状态 + assert model.value == 100 + assert model.fitted + + # 验证 pickle 模块被正确导入和使用 + # 实际序列化会在模块级别定义的类中正常工作 + import pickle + + assert hasattr(model, "save") + assert hasattr(MockModel, "load") + + def test_feature_importance_default(self): + """测试默认特征重要性返回 None""" + + class MockModel(BaseModel): + name = "mock" + + def fit(self, X, y): + return self + + def predict(self, X): + return np.array([]) + + model = MockModel() + assert model.feature_importance() is None + + +class TestBaseProcessor: + """测试 BaseProcessor 抽象基类""" + + def test_base_processor_abstract_methods(self): + """测试抽象方法必须被实现""" + # transform 是抽象的,不能直接实例化 + with pytest.raises(TypeError): + BaseProcessor() + + def test_base_processor_concrete_implementation(self): + """测试具体实现""" + + class AddOneProcessor(BaseProcessor): + name = "add_one" + + def transform(self, X: pl.DataFrame) -> pl.DataFrame: + numeric_cols = [c for c in X.columns if X[c].dtype.is_numeric()] + return X.with_columns([pl.col(c) + 1 for c in numeric_cols]) + + processor = AddOneProcessor() + assert processor.name == "add_one" + + # 测试 transform + df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + result = processor.transform(df) + assert result["a"].to_list() == [2, 3, 4] + assert result["b"].to_list() == [5, 6, 7] + + def test_fit_transform_chain(self): + """测试 fit_transform 链式调用""" + + class StatefulProcessor(BaseProcessor): + name = "stateful" + + def __init__(self): + self.mean = None + + def fit(self, X: pl.DataFrame) -> "StatefulProcessor": + numeric_cols = [c for c in X.columns if X[c].dtype.is_numeric()] + self.mean = {c: X[c].mean() for c in numeric_cols} + return self + + def transform(self, X: pl.DataFrame) -> pl.DataFrame: + numeric_cols = [c for c in X.columns if X[c].dtype.is_numeric()] + return X.with_columns( + [(pl.col(c) - self.mean[c]).alias(c) for c in numeric_cols] + ) + + processor = StatefulProcessor() + df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + + # fit_transform 应该返回转换后的结果 + result = processor.fit_transform(df) + assert processor.mean is not None + assert processor.mean["a"] == 2.0 + assert processor.mean["b"] == 5.0 + + # 结果应该是去均值化的 + assert result["a"].to_list() == [-1.0, 0.0, 1.0] + assert result["b"].to_list() == [-1.0, 0.0, 1.0] + + def test_fit_default_implementation(self): + """测试 fit 的默认实现返回 self""" + + class SimpleProcessor(BaseProcessor): + name = "simple" + + def transform(self, X: pl.DataFrame) -> pl.DataFrame: + return X + + processor = SimpleProcessor() + df = pl.DataFrame({"a": [1, 2, 3]}) + + # fit 默认返回 self + result = processor.fit(df) + assert result is processor + + +class TestModelRegistry: + """测试 ModelRegistry 模型注册中心""" + + def setup_method(self): + """每个测试前清空注册表""" + ModelRegistry.clear() + + def test_register_and_get_model(self): + """测试注册和获取模型""" + + class TestModel(BaseModel): + name = "test_model" + + def fit(self, X, y): + return self + + def predict(self, X): + return np.array([]) + + ModelRegistry.register("test", TestModel) + assert "test" in ModelRegistry.list_models() + + # 获取模型类并实例化 + model_class = ModelRegistry.get_model("test") + assert model_class is TestModel + assert model_class().name == "test_model" + + def test_register_duplicate_raises(self): + """测试重复注册抛出异常""" + + class TestModel(BaseModel): + name = "test" + + def fit(self, X, y): + return self + + def predict(self, X): + return np.array([]) + + ModelRegistry.register("dup_test", TestModel) + + with pytest.raises(ValueError, match="已被注册"): + ModelRegistry.register("dup_test", TestModel) + + def test_register_invalid_class(self): + """测试注册无效类抛出异常""" + + class NotAModel: + pass + + with pytest.raises(ValueError, match="必须继承 BaseModel"): + ModelRegistry.register("invalid", NotAModel) + + def test_get_unknown_model(self): + """测试获取未知模型抛出异常""" + with pytest.raises(KeyError, match="未知模型"): + ModelRegistry.get_model("unknown") + + def test_register_model_decorator(self): + """测试 register_model 装饰器""" + + @register_model("decorated") + class DecoratedModel(BaseModel): + name = "decorated" + + def fit(self, X, y): + return self + + def predict(self, X): + return np.array([]) + + assert "decorated" in ModelRegistry.list_models() + model_class = ModelRegistry.get_model("decorated") + assert model_class is DecoratedModel + + +class TestProcessorRegistry: + """测试 ProcessorRegistry 处理器注册中心""" + + def setup_method(self): + """每个测试前清空注册表""" + ProcessorRegistry.clear() + + def test_register_and_get_processor(self): + """测试注册和获取处理器""" + + class TestProcessor(BaseProcessor): + name = "test_processor" + + def transform(self, X): + return X + + ProcessorRegistry.register("test", TestProcessor) + assert "test" in ProcessorRegistry.list_processors() + + processor_class = ProcessorRegistry.get_processor("test") + assert processor_class is TestProcessor + + def test_register_duplicate_raises(self): + """测试重复注册抛出异常""" + + class TestProcessor(BaseProcessor): + name = "test" + + def transform(self, X): + return X + + ProcessorRegistry.register("dup_test", TestProcessor) + + with pytest.raises(ValueError, match="已被注册"): + ProcessorRegistry.register("dup_test", TestProcessor) + + def test_register_invalid_class(self): + """测试注册无效类抛出异常""" + + class NotAProcessor: + pass + + with pytest.raises(ValueError, match="必须继承 BaseProcessor"): + ProcessorRegistry.register("invalid", NotAProcessor) + + def test_get_unknown_processor(self): + """测试获取未知处理器抛出异常""" + with pytest.raises(KeyError, match="未知处理器"): + ProcessorRegistry.get_processor("unknown") + + def test_register_processor_decorator(self): + """测试 register_processor 装饰器""" + + @register_processor("decorated") + class DecoratedProcessor(BaseProcessor): + name = "decorated" + + def transform(self, X): + return X + + assert "decorated" in ProcessorRegistry.list_processors() + processor_class = ProcessorRegistry.get_processor("decorated") + assert processor_class is DecoratedProcessor