feat(data): 添加 DuckDB 只读模式支持

- Storage 类默认使用 read_only=True 模式,允许多进程并发读取
- ThreadSafeStorage 自动使用 read_only=False 模式,用于数据同步写入
- catalog.query_duckdb_to_polars 函数使用只读连接
This commit is contained in:
2026-03-11 21:33:08 +08:00
parent f3b3560d26
commit e8ac9d8662
4 changed files with 204 additions and 189 deletions

View File

@@ -439,6 +439,7 @@ def query_duckdb_to_polars(query: str, db_path: str) -> pl.LazyFrame:
"""执行 DuckDB 查询并返回 Polars LazyFrame。 """执行 DuckDB 查询并返回 Polars LazyFrame。
使用 duckdb.connect().sql(query).pl() 实现高速数据流转。 使用 duckdb.connect().sql(query).pl() 实现高速数据流转。
默认使用 read_only=True 模式,允许多进程并发读取。
Args: Args:
query: SQL 查询语句 query: SQL 查询语句
@@ -447,7 +448,7 @@ def query_duckdb_to_polars(query: str, db_path: str) -> pl.LazyFrame:
Returns: Returns:
Polars LazyFrame Polars LazyFrame
""" """
conn = duckdb.connect(db_path) conn = duckdb.connect(db_path, read_only=True)
try: try:
# DuckDB -> Polars 高速转换 # DuckDB -> Polars 高速转换
df = conn.sql(query).pl() df = conn.sql(query).pl()

View File

@@ -10,6 +10,10 @@ from datetime import datetime
from src.config.settings import get_settings from src.config.settings import get_settings
# Type alias for DuckDB connection
DuckDBConnection = duckdb.DuckDBPyConnection
# Default column type mapping for automatic schema inference # Default column type mapping for automatic schema inference
DEFAULT_TYPE_MAPPING = { DEFAULT_TYPE_MAPPING = {
"ts_code": "VARCHAR(16)", "ts_code": "VARCHAR(16)",
@@ -38,10 +42,14 @@ class Storage:
- 新增 load_polars() 方法支持 Polars 零拷贝导出 - 新增 load_polars() 方法支持 Polars 零拷贝导出
- 使用单例模式管理数据库连接 - 使用单例模式管理数据库连接
- 并发写入通过队列管理(见 ThreadSafeStorage - 并发写入通过队列管理(见 ThreadSafeStorage
注意:
- 默认使用 read_only=True 模式,允许多进程并发读取
- 只有在数据同步时才使用 read_only=False 模式
""" """
_instance = None _instance = None
_connection = None _connection: Optional[DuckDBConnection] = None
def __new__(cls, *args, **kwargs): def __new__(cls, *args, **kwargs):
"""Singleton to ensure single connection.""" """Singleton to ensure single connection."""
@@ -49,8 +57,14 @@ class Storage:
cls._instance = super().__new__(cls) cls._instance = super().__new__(cls)
return cls._instance return cls._instance
def __init__(self, path: Optional[Path] = None): def __init__(self, path: Optional[Path] = None, read_only: bool = True):
"""Initialize storage.""" """Initialize storage.
Args:
path: 数据库文件路径,默认为配置中的路径
read_only: 是否为只读模式,默认为 True
只有在数据同步时才需要设置为 False
"""
if hasattr(self, "_initialized"): if hasattr(self, "_initialized"):
return return
@@ -58,6 +72,7 @@ class Storage:
self.base_path = path or cfg.data_path_resolved self.base_path = path or cfg.data_path_resolved
self.base_path.mkdir(parents=True, exist_ok=True) self.base_path.mkdir(parents=True, exist_ok=True)
self.db_path = self.base_path / "prostock.db" self.db_path = self.base_path / "prostock.db"
self._read_only = read_only
self._init_db() self._init_db()
self._initialized = True self._initialized = True
@@ -73,7 +88,7 @@ class Storage:
- api_bak_basic.py: BakBasicSync.TABLE_SCHEMA - api_bak_basic.py: BakBasicSync.TABLE_SCHEMA
- api_financial_sync.py: FinancialSync.TABLE_SCHEMAS - api_financial_sync.py: FinancialSync.TABLE_SCHEMAS
""" """
self._connection = duckdb.connect(str(self.db_path)) self._connection = duckdb.connect(str(self.db_path), read_only=self._read_only)
def save( def save(
self, self,
@@ -304,10 +319,15 @@ class ThreadSafeStorage:
DuckDB 写入时不支持并发,使用队列收集写入请求, DuckDB 写入时不支持并发,使用队列收集写入请求,
在 sync 结束时统一批量写入。 在 sync 结束时统一批量写入。
注意:
- 此类自动使用 read_only=False 模式,用于数据同步
- 不要在多进程中同时使用此类,只应在单进程中用于批量写入
""" """
def __init__(self): def __init__(self):
self.storage = Storage() # 使用 read_only=False 模式创建 Storage用于写入操作
self.storage = Storage(read_only=False)
self._pending_writes: List[tuple] = [] # [(name, data, use_upsert), ...] self._pending_writes: List[tuple] = [] # [(name, data, use_upsert), ...]
def queue_save(self, name: str, data: pd.DataFrame, use_upsert: bool = True): def queue_save(self, name: str, data: pd.DataFrame, use_upsert: bool = True):

View File

@@ -27,8 +27,8 @@
"cell_type": "code", "cell_type": "code",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2026-03-10T14:55:40.429719Z", "end_time": "2026-03-11T13:28:00.454472Z",
"start_time": "2026-03-10T14:55:39.008639Z" "start_time": "2026-03-11T13:27:55.535146Z"
} }
}, },
"source": [ "source": [
@@ -69,8 +69,8 @@
"cell_type": "code", "cell_type": "code",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2026-03-10T14:55:40.441603Z", "end_time": "2026-03-11T13:28:00.468024Z",
"start_time": "2026-03-10T14:55:40.434663Z" "start_time": "2026-03-11T13:28:00.460009Z"
} }
}, },
"source": [ "source": [
@@ -273,8 +273,8 @@
"cell_type": "code", "cell_type": "code",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2026-03-10T14:55:40.451021Z", "end_time": "2026-03-11T13:28:00.479401Z",
"start_time": "2026-03-10T14:55:40.444975Z" "start_time": "2026-03-11T13:28:00.472206Z"
} }
}, },
"source": [ "source": [
@@ -363,8 +363,8 @@
"cell_type": "code", "cell_type": "code",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2026-03-10T14:55:40.459051Z", "end_time": "2026-03-11T13:28:00.489508Z",
"start_time": "2026-03-10T14:55:40.454871Z" "start_time": "2026-03-11T13:28:00.483843Z"
} }
}, },
"source": [ "source": [
@@ -456,8 +456,8 @@
"cell_type": "code", "cell_type": "code",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2026-03-10T14:56:04.604292Z", "end_time": "2026-03-11T13:28:24.494829Z",
"start_time": "2026-03-10T14:55:40.466222Z" "start_time": "2026-03-11T13:28:00.494691Z"
} }
}, },
"source": [ "source": [
@@ -634,23 +634,23 @@
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"数据形状: (7044952, 70)\n", "数据形状: (7044952, 70)\n",
"数据列: ['ts_code', 'trade_date', 'amount', 'vol', 'turnover_rate', 'close', 'low', 'high', 'open', 'total_assets', 'total_mv', 'f_ann_date', 'n_income', 'revenue', 'total_liab', 'total_cur_assets', 'total_hldr_eqy_exc_min_int', 'total_cur_liab', 'n_cashflow_act', 'ebit', 'ma_5', 'ma_20', 'ma_ratio_5_20', 'bias_10', 'high_low_ratio', 'bbi_ratio', 'return_5', 'return_20', 'kaufman_ER_20', 'mom_acceleration_10_20', 'drawdown_from_high_60', 'up_days_ratio_20', 'volatility_5', 'volatility_20', 'volatility_ratio', 'std_return_20', 'sharpe_ratio_20', 'min_ret_20', 'volatility_squeeze_5_60', 'overnight_intraday_diff', 'upper_shadow_ratio', 'capital_retention_20', 'max_ret_20', 'volume_ratio_5_20', 'turnover_rate_mean_5', 'turnover_deviation', 'amihud_illiq_20', 'turnover_cv_20', 'pv_corr_20', 'close_vwap_deviation', 'roe', 'roa', 'profit_margin', 'debt_to_equity', 'current_ratio', 'net_profit_yoy', 'revenue_yoy', 'healthy_expansion_velocity', 'EP', 'BP', 'CP', 'market_cap_rank', 'turnover_rank', 'return_5_rank', 'EP_rank', 'pe_expansion_trend', 'value_price_divergence', 'active_market_cap', 'ebit_rank', 'future_return_5_rank']\n", "数据列: ['ts_code', 'trade_date', 'close', 'open', 'high', 'amount', 'low', 'vol', 'turnover_rate', 'total_assets', 'total_mv', 'f_ann_date', 'n_income', 'revenue', 'total_liab', 'total_cur_liab', 'total_cur_assets', 'total_hldr_eqy_exc_min_int', 'n_cashflow_act', 'ebit', 'ma_5', 'ma_20', 'ma_ratio_5_20', 'bias_10', 'high_low_ratio', 'bbi_ratio', 'return_5', 'return_20', 'kaufman_ER_20', 'mom_acceleration_10_20', 'drawdown_from_high_60', 'up_days_ratio_20', 'volatility_5', 'volatility_20', 'volatility_ratio', 'std_return_20', 'sharpe_ratio_20', 'min_ret_20', 'volatility_squeeze_5_60', 'overnight_intraday_diff', 'upper_shadow_ratio', 'capital_retention_20', 'max_ret_20', 'volume_ratio_5_20', 'turnover_rate_mean_5', 'turnover_deviation', 'amihud_illiq_20', 'turnover_cv_20', 'pv_corr_20', 'close_vwap_deviation', 'roe', 'roa', 'profit_margin', 'debt_to_equity', 'current_ratio', 'net_profit_yoy', 'revenue_yoy', 'healthy_expansion_velocity', 'EP', 'BP', 'CP', 'market_cap_rank', 'turnover_rank', 'return_5_rank', 'EP_rank', 'pe_expansion_trend', 'value_price_divergence', 'active_market_cap', 'ebit_rank', 'future_return_5_rank']\n",
"\n", "\n",
"前5行预览:\n", "前5行预览:\n",
"shape: (5, 70)\n", "shape: (5, 70)\n",
"┌───────────┬─────────────────────┬───────────┬───┬──────────────────────┬───────────┬───────────┐\n", "┌───────────┬─────────────────────┬─────────┬───┬────────────┬────────────┬───────────┬───────────┐\n",
"│ ts_code ┆ trade_dat ┆ amount ┆ vol ┆ … ┆ value_pri ┆ active_ma ┆ ebit_rank ┆ future_re │\n", "│ ts_code ┆ trade_dateclose ┆ open ┆ … ┆ value_pric ┆ active_mar ┆ ebit_rank ┆ future_re │\n",
"│ --- ┆ e ┆ --- ┆ --- ┆ ┆ ce_diverg ┆ rket_cap ┆ --- ┆ turn_5_ra │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ e_divergen ┆ ket_cap ┆ --- ┆ turn_5_ra │\n",
"│ str ┆ --- ┆ f64 ┆ f64 ┆ ┆ ence ┆ --- ┆ f64 ┆ nk │\n", "│ str ┆ str ┆ f64 ┆ f64 ┆ ┆ ce ┆ --- ┆ f64 ┆ nk │\n",
"│ ┆ str ┆ ┆ --- ┆ f64 ┆ ┆ --- │\n", "│ ┆ ┆ ┆ --- ┆ f64 ┆ ┆ --- │\n",
"│ ┆ ┆ ┆ ┆ f64 ┆ ┆ f64 │\n", "│ ┆ ┆ ┆ ┆ ┆ f64 ┆ ┆ f64 │\n",
"╞═══════════╪═════════════════════╪═══════════╪═══╪══════════════════════╪═══════════╪═══════════╡\n", "╞═══════════╪═════════════════════╪═════════╪═══╪════════════╪════════════╪═══════════╪═══════════╡\n",
"│ 000001.SZ ┆ 20200102 ┆ 2.5712e6 ┆ 1.5302e6 ┆ … ┆ null ┆ null ┆ null ┆ -0.008857 │\n", "│ 000001.SZ ┆ 20200102 1841.69 ┆ 1817.67 ┆ … ┆ null ┆ null ┆ null ┆ -0.008857 │\n",
"│ 000001.SZ ┆ 20200103 ┆ 1.9145e6 ┆ 1.1162e6 ┆ … ┆ null ┆ null ┆ null ┆ -0.01881 │\n", "│ 000001.SZ ┆ 20200103 ┆ 1875.53 ┆ 1849.33 ┆ … ┆ null ┆ null ┆ null ┆ -0.01881 │\n",
"│ 000001.SZ ┆ 20200106 ┆ 1.4779e6 ┆ 862083.5 ┆ … ┆ null ┆ null ┆ null ┆ -0.008171 │\n", "│ 000001.SZ ┆ 20200106 ┆ 1863.52 ┆ 1856.97 ┆ … ┆ null ┆ null ┆ null ┆ -0.008171 │\n",
"│ 000001.SZ ┆ 20200107 ┆ 1.2470e6 ┆ 728607.56 ┆ … ┆ null ┆ null ┆ null ┆ -0.014117 │\n", "│ 000001.SZ ┆ 20200107 ┆ 1872.26 ┆ 1870.07 ┆ … ┆ null ┆ null ┆ null ┆ -0.014117 │\n",
"│ 000001.SZ ┆ 20200108 ┆ 1.4236e6 ┆ 847824.12 ┆ … ┆ null ┆ null ┆ null ┆ -0.017252 │\n", "│ 000001.SZ ┆ 20200108 ┆ 1818.76 ┆ 1855.88 ┆ … ┆ null ┆ null ┆ null ┆ -0.017252 │\n",
"└───────────┴─────────────────────┴───────────┴───┴──────────────────────┴───────────┴───────────┘\n", "└───────────┴─────────────────────┴─────────┴───┴────────────┴────────────┴───────────┴───────────┘\n",
"\n", "\n",
"[4] 转换为排序学习格式\n", "[4] 转换为排序学习格式\n",
"\n", "\n",
@@ -743,7 +743,7 @@
"name": "stderr", "name": "stderr",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"C:\\Users\\liaozhaorun\\AppData\\Local\\Temp\\ipykernel_7864\\551043002.py:108: DeprecationWarning: `pl.count()` is deprecated. Please use `pl.len()` instead.\n", "C:\\Users\\liaozhaorun\\AppData\\Local\\Temp\\ipykernel_29336\\551043002.py:108: DeprecationWarning: `pl.count()` is deprecated. Please use `pl.len()` instead.\n",
"(Deprecated in version 0.20.5)\n", "(Deprecated in version 0.20.5)\n",
" daily_counts = df_ranked.group_by(date_col).agg(pl.count().alias(\"count\"))\n" " daily_counts = df_ranked.group_by(date_col).agg(pl.count().alias(\"count\"))\n"
] ]
@@ -762,8 +762,8 @@
"cell_type": "code", "cell_type": "code",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2026-03-10T14:56:29.070549Z", "end_time": "2026-03-11T13:28:49.070709Z",
"start_time": "2026-03-10T14:56:04.619443Z" "start_time": "2026-03-11T13:28:24.501332Z"
} }
}, },
"source": [ "source": [
@@ -2250,7 +2250,7 @@
"name": "stderr", "name": "stderr",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"C:\\Users\\liaozhaorun\\AppData\\Local\\Temp\\ipykernel_7864\\174861970.py:63: DeprecationWarning: `is_in` with a collection of the same datatype is ambiguous and deprecated.\n", "C:\\Users\\liaozhaorun\\AppData\\Local\\Temp\\ipykernel_29336\\174861970.py:63: DeprecationWarning: `is_in` with a collection of the same datatype is ambiguous and deprecated.\n",
"Please use `implode` to return to previous behavior.\n", "Please use `implode` to return to previous behavior.\n",
"\n", "\n",
"See https://github.com/pola-rs/polars/issues/22149 for more information.\n", "See https://github.com/pola-rs/polars/issues/22149 for more information.\n",
@@ -2282,8 +2282,8 @@
"cell_type": "code", "cell_type": "code",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2026-03-10T14:56:29.152001Z", "end_time": "2026-03-11T13:28:49.140684Z",
"start_time": "2026-03-10T14:56:29.081042Z" "start_time": "2026-03-11T13:28:49.078377Z"
} }
}, },
"source": [ "source": [
@@ -2337,7 +2337,7 @@
"name": "stderr", "name": "stderr",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"C:\\Users\\liaozhaorun\\AppData\\Local\\Temp\\ipykernel_7864\\551043002.py:132: DeprecationWarning: `pl.count()` is deprecated. Please use `pl.len()` instead.\n", "C:\\Users\\liaozhaorun\\AppData\\Local\\Temp\\ipykernel_29336\\551043002.py:132: DeprecationWarning: `pl.count()` is deprecated. Please use `pl.len()` instead.\n",
"(Deprecated in version 0.20.5)\n", "(Deprecated in version 0.20.5)\n",
" pl.count().alias(\"count\")\n" " pl.count().alias(\"count\")\n"
] ]
@@ -2356,8 +2356,8 @@
"cell_type": "code", "cell_type": "code",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2026-03-10T14:56:29.729147Z", "end_time": "2026-03-11T13:28:49.680671Z",
"start_time": "2026-03-10T14:56:29.161969Z" "start_time": "2026-03-11T13:28:49.150412Z"
} }
}, },
"source": [ "source": [
@@ -2423,8 +2423,8 @@
"cell_type": "code", "cell_type": "code",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2026-03-10T14:56:32.377628Z", "end_time": "2026-03-11T13:28:52.285645Z",
"start_time": "2026-03-10T14:56:29.735814Z" "start_time": "2026-03-11T13:28:49.685562Z"
} }
}, },
"source": [ "source": [
@@ -2510,8 +2510,8 @@
"cell_type": "code", "cell_type": "code",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2026-03-10T14:56:34.997174Z", "end_time": "2026-03-11T13:28:54.767311Z",
"start_time": "2026-03-10T14:56:32.382086Z" "start_time": "2026-03-11T13:28:52.291103Z"
} }
}, },
"source": [ "source": [
@@ -2587,8 +2587,6 @@
"\n", "\n",
"重新训练模型以收集训练指标...\n", "重新训练模型以收集训练指标...\n",
"Training until validation scores don't improve for 50 rounds\n", "Training until validation scores don't improve for 50 rounds\n",
"Early stopping, best iteration is:\n",
"[50]\ttrain's ndcg@1: 0.676684\ttrain's ndcg@5: 0.440728\ttrain's ndcg@10: 0.361258\ttrain's ndcg@20: 0.296362\tval's ndcg@1: 0.272472\tval's ndcg@5: 0.215751\tval's ndcg@10: 0.198035\tval's ndcg@20: 0.191275\n",
"训练完成,指标已收集\n", "训练完成,指标已收集\n",
"\n", "\n",
"评估的 NDCG 指标: ['ndcg@1', 'ndcg@5', 'ndcg@10', 'ndcg@20']\n", "评估的 NDCG 指标: ['ndcg@1', 'ndcg@5', 'ndcg@10', 'ndcg@20']\n",
@@ -2612,8 +2610,8 @@
"cell_type": "code", "cell_type": "code",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2026-03-10T14:56:35.585609Z", "end_time": "2026-03-11T13:28:55.115683Z",
"start_time": "2026-03-10T14:56:35.001847Z" "start_time": "2026-03-11T13:28:54.771304Z"
} }
}, },
"source": [ "source": [
@@ -2705,8 +2703,8 @@
"cell_type": "code", "cell_type": "code",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2026-03-10T14:56:35.982083Z", "end_time": "2026-03-11T13:28:55.390466Z",
"start_time": "2026-03-10T14:56:35.604003Z" "start_time": "2026-03-11T13:28:55.124417Z"
} }
}, },
"source": [ "source": [
@@ -2800,8 +2798,8 @@
"cell_type": "code", "cell_type": "code",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2026-03-10T14:56:36.204207Z", "end_time": "2026-03-11T13:28:55.620414Z",
"start_time": "2026-03-10T14:56:35.991175Z" "start_time": "2026-03-11T13:28:55.402837Z"
} }
}, },
"source": [ "source": [

View File

@@ -11,8 +11,8 @@
"cell_type": "code", "cell_type": "code",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2026-03-10T13:47:49.943018Z", "end_time": "2026-03-11T13:27:44.613389Z",
"start_time": "2026-03-10T13:47:49.364849Z" "start_time": "2026-03-11T13:27:44.026988Z"
} }
}, },
"source": [ "source": [
@@ -49,8 +49,8 @@
{ {
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2026-03-10T13:47:49.953378Z", "end_time": "2026-03-11T13:27:44.632791Z",
"start_time": "2026-03-10T13:47:49.948493Z" "start_time": "2026-03-11T13:27:44.624747Z"
} }
}, },
"cell_type": "code", "cell_type": "code",
@@ -124,8 +124,8 @@
{ {
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2026-03-10T13:47:49.966103Z", "end_time": "2026-03-11T13:27:44.644952Z",
"start_time": "2026-03-10T13:47:49.958743Z" "start_time": "2026-03-11T13:27:44.640383Z"
} }
}, },
"cell_type": "code", "cell_type": "code",
@@ -244,8 +244,8 @@
"cell_type": "code", "cell_type": "code",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2026-03-10T13:47:49.975105Z", "end_time": "2026-03-11T13:27:44.654539Z",
"start_time": "2026-03-10T13:47:49.971219Z" "start_time": "2026-03-11T13:27:44.651010Z"
} }
}, },
"source": [ "source": [
@@ -354,8 +354,8 @@
{ {
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2026-03-10T13:48:12.225433Z", "end_time": "2026-03-11T13:28:08.730709Z",
"start_time": "2026-03-10T13:47:49.978375Z" "start_time": "2026-03-11T13:27:44.661048Z"
} }
}, },
"cell_type": "code", "cell_type": "code",
@@ -540,23 +540,23 @@
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"数据形状: (7255513, 70)\n", "数据形状: (7255513, 70)\n",
"数据列: ['ts_code', 'trade_date', 'close', 'vol', 'open', 'turnover_rate', 'high', 'low', 'amount', 'total_assets', 'total_mv', 'f_ann_date', 'n_income', 'revenue', 'total_cur_assets', 'total_liab', 'total_cur_liab', 'total_hldr_eqy_exc_min_int', 'n_cashflow_act', 'ebit', 'ma_5', 'ma_20', 'ma_ratio_5_20', 'bias_10', 'high_low_ratio', 'bbi_ratio', 'return_5', 'return_20', 'kaufman_ER_20', 'mom_acceleration_10_20', 'drawdown_from_high_60', 'up_days_ratio_20', 'volatility_5', 'volatility_20', 'volatility_ratio', 'std_return_20', 'sharpe_ratio_20', 'min_ret_20', 'volatility_squeeze_5_60', 'overnight_intraday_diff', 'upper_shadow_ratio', 'capital_retention_20', 'max_ret_20', 'volume_ratio_5_20', 'turnover_rate_mean_5', 'turnover_deviation', 'amihud_illiq_20', 'turnover_cv_20', 'pv_corr_20', 'close_vwap_deviation', 'roe', 'roa', 'profit_margin', 'debt_to_equity', 'current_ratio', 'net_profit_yoy', 'revenue_yoy', 'healthy_expansion_velocity', 'EP', 'BP', 'CP', 'market_cap_rank', 'turnover_rank', 'return_5_rank', 'EP_rank', 'pe_expansion_trend', 'value_price_divergence', 'active_market_cap', 'ebit_rank', 'future_return_5']\n", "数据列: ['ts_code', 'trade_date', 'low', 'open', 'turnover_rate', 'close', 'amount', 'vol', 'high', 'total_assets', 'total_mv', 'f_ann_date', 'n_income', 'revenue', 'total_cur_assets', 'total_liab', 'total_cur_liab', 'total_hldr_eqy_exc_min_int', 'n_cashflow_act', 'ebit', 'ma_5', 'ma_20', 'ma_ratio_5_20', 'bias_10', 'high_low_ratio', 'bbi_ratio', 'return_5', 'return_20', 'kaufman_ER_20', 'mom_acceleration_10_20', 'drawdown_from_high_60', 'up_days_ratio_20', 'volatility_5', 'volatility_20', 'volatility_ratio', 'std_return_20', 'sharpe_ratio_20', 'min_ret_20', 'volatility_squeeze_5_60', 'overnight_intraday_diff', 'upper_shadow_ratio', 'capital_retention_20', 'max_ret_20', 'volume_ratio_5_20', 'turnover_rate_mean_5', 'turnover_deviation', 'amihud_illiq_20', 'turnover_cv_20', 'pv_corr_20', 'close_vwap_deviation', 'roe', 'roa', 'profit_margin', 'debt_to_equity', 'current_ratio', 'net_profit_yoy', 'revenue_yoy', 'healthy_expansion_velocity', 'EP', 'BP', 'CP', 'market_cap_rank', 'turnover_rank', 'return_5_rank', 'EP_rank', 'pe_expansion_trend', 'value_price_divergence', 'active_market_cap', 'ebit_rank', 'future_return_5']\n",
"\n", "\n",
"前5行预览:\n", "前5行预览:\n",
"shape: (5, 70)\n", "shape: (5, 70)\n",
"┌───────────┬────────────┬─────────┬───────────┬───┬──────────────────────┬───────────┬───────────┐\n", "┌───────────┬────────────┬─────────┬─────────┬───┬────────────┬────────────┬───────────┬───────────┐\n",
"│ ts_code ┆ trade_date ┆ close ┆ vol ┆ … ┆ value_pri ┆ active_ma ┆ ebit_rank ┆ future_re │\n", "│ ts_code ┆ trade_date ┆ low ┆ open ┆ … ┆ value_pric ┆ active_mar ┆ ebit_rank ┆ future_re │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ ce_diverg ┆ rket_cap ┆ --- ┆ turn_5 │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ e_divergen ┆ ket_cap ┆ --- ┆ turn_5 │\n",
"│ str ┆ str ┆ f64 ┆ f64 ┆ ┆ ence ┆ --- ┆ f64 ┆ --- │\n", "│ str ┆ str ┆ f64 ┆ f64 ┆ ┆ ce ┆ --- ┆ f64 ┆ --- │\n",
"│ ┆ ┆ ┆ ┆ ┆ --- ┆ f64 ┆ ┆ f64 │\n", "│ ┆ ┆ ┆ ┆ ┆ --- ┆ f64 ┆ ┆ f64 │\n",
"│ ┆ ┆ ┆ ┆ ┆ f64 ┆ ┆ │\n", "│ ┆ ┆ ┆ ┆ ┆ f64 ┆ ┆ │\n",
"╞═══════════╪════════════╪═════════╪═══════════╪═══╪══════════════════════╪═══════════╪═══════════╡\n", "╞═══════════╪════════════╪═════════╪═════════╪═══╪════════════╪════════════╪═══════════╪═══════════╡\n",
"│ 000001.SZ ┆ 20200102 ┆ 1841.69 ┆ 1.5302e6 ┆ … ┆ null ┆ null ┆ null ┆ -0.008857 │\n", "│ 000001.SZ ┆ 20200102 ┆ 1806.75 ┆ 1817.67 ┆ … ┆ null ┆ null ┆ null ┆ -0.008857 │\n",
"│ 000001.SZ ┆ 20200103 ┆ 1875.53 ┆ 1.1162e6 ┆ … ┆ null ┆ null ┆ null ┆ -0.01881 │\n", "│ 000001.SZ ┆ 20200103 ┆ 1847.15 ┆ 1849.33 ┆ … ┆ null ┆ null ┆ null ┆ -0.01881 │\n",
"│ 000001.SZ ┆ 20200106 ┆ 1863.52 ┆ 862083.5 ┆ … ┆ null ┆ null ┆ null ┆ -0.008171 │\n", "│ 000001.SZ ┆ 20200106 ┆ 1846.05 ┆ 1856.97 ┆ … ┆ null ┆ null ┆ null ┆ -0.008171 │\n",
"│ 000001.SZ ┆ 20200107 ┆ 1872.26 ┆ 728607.56 ┆ … ┆ null ┆ null ┆ null ┆ -0.014117 │\n", "│ 000001.SZ ┆ 20200107 ┆ 1850.42 ┆ 1870.07 ┆ … ┆ null ┆ null ┆ null ┆ -0.014117 │\n",
"│ 000001.SZ ┆ 20200108 ┆ 1818.76 ┆ 847824.12 ┆ … ┆ null ┆ null ┆ null ┆ -0.017252 │\n", "│ 000001.SZ ┆ 20200108 ┆ 1815.49 ┆ 1855.88 ┆ … ┆ null ┆ null ┆ null ┆ -0.017252 │\n",
"└───────────┴────────────┴─────────┴───────────┴───┴──────────────────────┴───────────┴───────────┘\n", "└───────────┴────────────┴─────────┴─────────┴───┴────────────┴────────────┴───────────┴───────────┘\n",
"\n", "\n",
"[配置] 训练期: 20200101 - 20231231\n", "[配置] 训练期: 20200101 - 20231231\n",
"[配置] 验证期: 20240101 - 20241231\n", "[配置] 验证期: 20240101 - 20241231\n",
@@ -579,8 +579,8 @@
{ {
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2026-03-10T13:48:15.759149Z", "end_time": "2026-03-11T13:28:12.487988Z",
"start_time": "2026-03-10T13:48:12.232317Z" "start_time": "2026-03-11T13:28:08.737960Z"
} }
}, },
"cell_type": "code", "cell_type": "code",
@@ -623,7 +623,7 @@
"name": "stderr", "name": "stderr",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"C:\\Users\\liaozhaorun\\AppData\\Local\\Temp\\ipykernel_34448\\3491564681.py:71: DeprecationWarning: `is_in` with a collection of the same datatype is ambiguous and deprecated.\n", "C:\\Users\\liaozhaorun\\AppData\\Local\\Temp\\ipykernel_18404\\3491564681.py:71: DeprecationWarning: `is_in` with a collection of the same datatype is ambiguous and deprecated.\n",
"Please use `implode` to return to previous behavior.\n", "Please use `implode` to return to previous behavior.\n",
"\n", "\n",
"See https://github.com/pola-rs/polars/issues/22149 for more information.\n", "See https://github.com/pola-rs/polars/issues/22149 for more information.\n",
@@ -647,8 +647,8 @@
{ {
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2026-03-10T13:48:15.834150Z", "end_time": "2026-03-11T13:28:12.560547Z",
"start_time": "2026-03-10T13:48:15.767063Z" "start_time": "2026-03-11T13:28:12.492037Z"
} }
}, },
"cell_type": "code", "cell_type": "code",
@@ -706,56 +706,53 @@
"\n", "\n",
" 训练集前5行预览:\n", " 训练集前5行预览:\n",
"shape: (5, 70)\n", "shape: (5, 70)\n",
"┌───────────┬────────────┬───────┬──────────┬───┬────────────────────────┬───────────┬────────────┐\n", "┌───────────┬────────────┬───────┬───────┬───┬─────────────┬─────────────┬───────────┬────────────┐\n",
"│ ts_code ┆ trade_date ┆ close ┆ vol ┆ … ┆ value_pric ┆ active_mar ┆ ebit_rank ┆ future_ret │\n", "│ ts_code ┆ trade_date ┆ low ┆ open ┆ … ┆ value_price ┆ active_mark ┆ ebit_rank ┆ future_retu │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ e_divergen ┆ ket_cap ┆ --- ┆ urn_5 │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ _divergence ┆ et_cap ┆ --- ┆ rn_5 │\n",
"│ str ┆ str ┆ f64 ┆ f64 ┆ ┆ ce ┆ --- ┆ f64 ┆ --- │\n", "│ str ┆ str ┆ f64 ┆ f64 ┆ ┆ --- ┆ --- ┆ f64 ┆ --- │\n",
"│ ┆ ┆ ┆ ┆ ┆ --- ┆ f64 ┆ ┆ f64 │\n", "│ ┆ ┆ ┆ ┆ ┆ f64 ┆ f64 ┆ ┆ f64 │\n",
"│ ┆ ┆ ┆ ┆ ┆ f64 ┆ ┆ ┆ │\n", "╞═══════════╪════════════╪═══════╪═══════╪═══╪═════════════╪═════════════╪═══════════╪═════════════╡\n",
"╞═══════════╪════════════╪═══════╪══════════╪═══╪════════════╪════════════╪═══════════╪════════════╡\n", "│ 000004.SZ ┆ 20200102 ┆ 90.1 ┆ 92.05 ┆ … ┆ null ┆ null ┆ null ┆ 0.000441 │\n",
"│ 000004.SZ ┆ 2020010290.75 ┆ 17853.2 ┆ … ┆ null ┆ null ┆ null ┆ 0.000441 │\n", "│ 000004.SZ ┆ 2020010389.53 ┆ 90.67 ┆ … ┆ null ┆ null ┆ null ┆ 0.005875 │\n",
"│ 000004.SZ ┆ 2020010390.83 ┆ 13380.09 ┆ … ┆ null ┆ null ┆ null ┆ 0.005875 │\n", "│ 000004.SZ ┆ 2020010687.58 ┆ 90.22 ┆ … ┆ null ┆ null ┆ null ┆ 0.05644 │\n",
"│ 000004.SZ ┆ 20200106 ┆ 88.3920316.99 ┆ … ┆ null ┆ null ┆ null ┆ 0.05644 │\n", "│ 000004.SZ ┆ 20200107 ┆ 88.0688.59 ┆ … ┆ null ┆ null ┆ null ┆ 0.049753 │\n",
"│ 000004.SZ ┆ 20200107 ┆ 89.81 ┆ 17433.2 ┆ … ┆ null ┆ null ┆ null ┆ 0.049753 │\n", "│ 000004.SZ ┆ 20200108 ┆ 88.51 ┆ 89.04 ┆ … ┆ null ┆ null ┆ null ┆ 0.019922 │\n",
"│ 000004.SZ ┆ 20200108 ┆ 88.51 ┆ 15503.78 ┆ … ┆ null ┆ null ┆ null ┆ 0.019922 │\n", "└───────────┴────────────┴───────┴───────┴───┴─────────────┴─────────────┴───────────┴─────────────┘\n",
"└───────────┴────────────┴───────┴──────────┴───┴────────────┴────────────┴───────────┴────────────┘\n",
"\n", "\n",
" 验证集前5行预览:\n", " 验证集前5行预览:\n",
"shape: (5, 70)\n", "shape: (5, 70)\n",
"┌───────────┬────────────┬───────┬───────────┬───┬────────────────────────┬───────────┬───────────┐\n", "┌───────────┬────────────┬───────┬───────┬───┬─────────────┬─────────────┬───────────┬─────────────┐\n",
"│ ts_code ┆ trade_date ┆ close ┆ vol ┆ … ┆ value_pric ┆ active_mar ┆ ebit_rank ┆ future_re │\n", "│ ts_code ┆ trade_date ┆ low ┆ open ┆ … ┆ value_price ┆ active_mark ┆ ebit_rank ┆ future_retu │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ e_divergen ┆ ket_cap ┆ --- ┆ turn_5 │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ _divergence ┆ et_cap ┆ --- ┆ rn_5 │\n",
"│ str ┆ str ┆ f64 ┆ f64 ┆ ┆ ce ┆ --- ┆ f64 ┆ --- │\n", "│ str ┆ str ┆ f64 ┆ f64 ┆ ┆ --- ┆ --- ┆ f64 ┆ --- │\n",
"│ ┆ ┆ ┆ ┆ ┆ --- ┆ f64 ┆ ┆ f64 │\n", "│ ┆ ┆ ┆ ┆ ┆ f64 ┆ f64 ┆ ┆ f64 │\n",
"│ ┆ ┆ ┆ ┆ ┆ f64 ┆ ┆ ┆ │\n", "╞═══════════╪════════════╪═══════╪═══════╪═══╪═════════════╪═════════════╪═══════════╪═════════════╡\n",
"╞═══════════╪════════════╪═══════╪═══════════╪═══╪════════════╪════════════╪═══════════╪═══════════╡\n", "│ 000004.SZ ┆ 20240102 ┆ 65.23 ┆ 65.43 ┆ … ┆ null ┆ 770442.9948 ┆ null ┆ -0.014188 │\n",
"│ 000004.SZ ┆ 20240102 ┆ 65.59 ┆ 28867.0null770442.994 ┆ null-0.014188 │\n", "│ ┆ ┆ ┆ 33 ┆ │\n",
"│ ┆ ┆ ┆ 833 ┆ ┆ │\n", "│ 000004.SZ ┆ 20240103 ┆ 64.62 ┆ 65.55null751492.2017 ┆ null ┆ 0.002432 │\n",
"│ 000004.SZ ┆ 20240103 ┆ 66.24 ┆ 30331.0null751492.201 ┆ null ┆ 0.002432 │\n", "│ ┆ ┆ ┆ 8 ┆ ┆ │\n",
"│ ┆ ┆ ┆ 78 ┆ ┆ │\n", "│ 000004.SZ ┆ 20240104 ┆ 64.7 ┆ 65.8null866443.5445 ┆ null ┆ 0.016919 │\n",
"│ 000004.SZ ┆ 20240104 ┆ 68.6 ┆ 160185.52null ┆ 866443.544 ┆ null ┆ 0.016919 │\n", "│ ┆ ┆ ┆ ┆ 25 ┆ ┆ │\n",
"│ ┆ ┆ ┆ 525 ┆ ┆ │\n", "│ 000004.SZ ┆ 20240105 ┆ 65.19 ┆ 67.38null907980.5905 ┆ null ┆ -0.013477 │\n",
"│ 000004.SZ ┆ 20240105 ┆ 66.49 ┆ 129763.88null ┆ 907980.590 ┆ null-0.013477 │\n", "│ ┆ ┆ ┆ ┆ 95 ┆ │\n",
"│ ┆ ┆ ┆ 595 ┆ ┆ │\n", "│ 000004.SZ ┆ 20240108 ┆ 65.02 ┆ 66.04null931205.3950 ┆ null ┆ -0.024684 │\n",
"│ 000004.SZ ┆ 20240108 ┆ 66.41 ┆ 83138.52null931205.395 ┆ null-0.024684 │\n", "│ ┆ ┆ ┆ 63 ┆ │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ 063 ┆ ┆ │\n", "└───────────┴────────────┴───────┴───────┴───┴─────────────┴─────────────┴───────────┴─────────────┘\n",
"└───────────┴────────────┴───────┴───────────┴───┴────────────┴────────────┴───────────┴───────────┘\n",
"\n", "\n",
" 测试集前5行预览:\n", " 测试集前5行预览:\n",
"shape: (5, 70)\n", "shape: (5, 70)\n",
"┌───────────┬────────────┬───────┬───────────┬───┬────────────────────────┬───────────┬───────────┐\n", "┌───────────┬────────────┬───────┬───────┬───┬─────────────┬─────────────┬───────────┬─────────────┐\n",
"│ ts_code ┆ trade_date ┆ close ┆ vol ┆ … ┆ value_pric ┆ active_mar ┆ ebit_rank ┆ future_re │\n", "│ ts_code ┆ trade_date ┆ low ┆ open ┆ … ┆ value_price ┆ active_mark ┆ ebit_rank ┆ future_retu │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ e_divergen ┆ ket_cap ┆ --- ┆ turn_5 │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ _divergence ┆ et_cap ┆ --- ┆ rn_5 │\n",
"│ str ┆ str ┆ f64 ┆ f64 ┆ ┆ ce ┆ --- ┆ f64 ┆ --- │\n", "│ str ┆ str ┆ f64 ┆ f64 ┆ ┆ --- ┆ --- ┆ f64 ┆ --- │\n",
"│ ┆ ┆ ┆ ┆ ┆ --- ┆ f64 ┆ ┆ f64 │\n", "│ ┆ ┆ ┆ ┆ ┆ f64 ┆ f64 ┆ ┆ f64 │\n",
"│ ┆ ┆ ┆ ┆ ┆ f64 ┆ ┆ ┆ │\n", "╞═══════════╪════════════╪═══════╪═══════╪═══╪═════════════╪═════════════╪═══════════╪═════════════╡\n",
"╞═══════════╪════════════╪═══════╪═══════════╪═══╪════════════╪════════════╪═══════════╪═══════════╡\n", "│ 000004.SZ ┆ 20250102 ┆ 54.17 ┆ 55.8 ┆ … ┆ null ┆ 2.3754e6 ┆ null ┆ -0.066193 │\n",
"│ 000004.SZ ┆ 20250102 ┆ 57.63 ┆ 119760.37 ┆ … ┆ null ┆ 2.3754e6 ┆ null ┆ -0.066193 │\n", "│ 000004.SZ ┆ 20250103 ┆ 51.86 ┆ 57.71 ┆ … ┆ null ┆ 2.1884e6 ┆ null ┆ 0.00893 │\n",
"│ 000004.SZ ┆ 2025010352.02 ┆ 123929.75 ┆ … ┆ null ┆ 2.1884e6 ┆ null ┆ 0.00893 │\n", "│ 000004.SZ ┆ 2025010649.17 ┆ 50.39 ┆ … ┆ null ┆ 2.1549e6 ┆ null ┆ -0.0142 │\n",
"│ 000004.SZ ┆ 20250106 ┆ 50.88 ┆ 84810.46 ┆ … ┆ null ┆ 2.1549e6 ┆ null ┆ -0.0142 │\n", "│ 000004.SZ ┆ 20250107 ┆ 51.41 ┆ 51.41 ┆ … ┆ null ┆ 2.2770e6 ┆ null ┆ 0.013031 │\n",
"│ 000004.SZ ┆ 20250107 ┆ 53.28 ┆ 86097.07 ┆ … ┆ null ┆ 2.2770e6 ┆ null ┆ 0.013031 │\n", "│ 000004.SZ ┆ 20250108 ┆ 52.38 ┆ 52.95 ┆ … ┆ null ┆ 2.3533e6 ┆ null ┆ 0.00442 │\n",
"│ 000004.SZ ┆ 20250108 ┆ 54.46 ┆ 99782.01 ┆ … ┆ null ┆ 2.3533e6 ┆ null ┆ 0.00442 │\n", "└───────────┴────────────┴───────┴───────┴───┴─────────────┴─────────────┴───────────┴─────────────┘\n"
"└───────────┴────────────┴───────┴───────────┴───┴────────────┴────────────┴───────────┴───────────┘\n"
] ]
} }
], ],
@@ -764,8 +761,8 @@
{ {
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2026-03-10T13:48:16.310435Z", "end_time": "2026-03-11T13:28:13.045107Z",
"start_time": "2026-03-10T13:48:15.842816Z" "start_time": "2026-03-11T13:28:12.565811Z"
} }
}, },
"cell_type": "code", "cell_type": "code",
@@ -821,19 +818,18 @@
"\n", "\n",
" 训练集处理后前5行预览:\n", " 训练集处理后前5行预览:\n",
"shape: (5, 70)\n", "shape: (5, 70)\n",
"┌───────────┬────────────┬───────┬──────────┬───┬────────────────────────┬───────────┬────────────┐\n", "┌───────────┬────────────┬───────┬───────┬───┬─────────────┬─────────────┬───────────┬────────────┐\n",
"│ ts_code ┆ trade_date ┆ close ┆ vol ┆ … ┆ value_pric ┆ active_mar ┆ ebit_rank ┆ future_ret │\n", "│ ts_code ┆ trade_date ┆ low ┆ open ┆ … ┆ value_price ┆ active_mark ┆ ebit_rank ┆ future_retu │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ e_divergen ┆ ket_cap ┆ --- ┆ urn_5 │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ _divergence ┆ et_cap ┆ --- ┆ rn_5 │\n",
"│ str ┆ str ┆ f64 ┆ f64 ┆ ┆ ce ┆ --- ┆ f64 ┆ --- │\n", "│ str ┆ str ┆ f64 ┆ f64 ┆ ┆ --- ┆ --- ┆ f64 ┆ --- │\n",
"│ ┆ ┆ ┆ ┆ ┆ --- ┆ f64 ┆ ┆ f64 │\n", "│ ┆ ┆ ┆ ┆ ┆ f64 ┆ f64 ┆ ┆ f64 │\n",
"│ ┆ ┆ ┆ ┆ ┆ f64 ┆ ┆ ┆ │\n", "╞═══════════╪════════════╪═══════╪═══════╪═══╪═════════════╪═════════════╪═══════════╪═════════════╡\n",
"╞═══════════╪════════════╪═══════╪══════════╪═══╪════════════╪════════════╪═══════════╪════════════╡\n", "│ 000004.SZ ┆ 20200102 ┆ 90.1 ┆ 92.05 ┆ … ┆ null ┆ null ┆ null ┆ 0.000441 │\n",
"│ 000004.SZ ┆ 2020010290.75 ┆ 17853.2 ┆ … ┆ null ┆ null ┆ null ┆ 0.000441 │\n", "│ 000004.SZ ┆ 2020010389.53 ┆ 90.67 ┆ … ┆ null ┆ null ┆ null ┆ 0.005875 │\n",
"│ 000004.SZ ┆ 2020010390.83 ┆ 13380.09 ┆ … ┆ null ┆ null ┆ null ┆ 0.005875 │\n", "│ 000004.SZ ┆ 2020010687.58 ┆ 90.22 ┆ … ┆ null ┆ null ┆ null ┆ 0.05644 │\n",
"│ 000004.SZ ┆ 20200106 ┆ 88.3920316.99 ┆ … ┆ null ┆ null ┆ null ┆ 0.05644 │\n", "│ 000004.SZ ┆ 20200107 ┆ 88.0688.59 ┆ … ┆ null ┆ null ┆ null ┆ 0.049753 │\n",
"│ 000004.SZ ┆ 20200107 ┆ 89.81 ┆ 17433.2 ┆ … ┆ null ┆ null ┆ null ┆ 0.049753 │\n", "│ 000004.SZ ┆ 20200108 ┆ 88.51 ┆ 89.04 ┆ … ┆ null ┆ null ┆ null ┆ 0.019922 │\n",
"│ 000004.SZ ┆ 20200108 ┆ 88.51 ┆ 15503.78 ┆ … ┆ null ┆ null ┆ null ┆ 0.019922 │\n", "└───────────┴────────────┴───────┴───────┴───┴─────────────┴─────────────┴───────────┴─────────────┘\n",
"└───────────┴────────────┴───────┴──────────┴───┴────────────┴────────────┴───────────┴────────────┘\n",
"\n", "\n",
" 训练集特征统计:\n", " 训练集特征统计:\n",
" 特征数: 49\n", " 特征数: 49\n",
@@ -852,8 +848,8 @@
{ {
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2026-03-10T13:48:25.881677Z", "end_time": "2026-03-11T13:28:26.439057Z",
"start_time": "2026-03-10T13:48:16.314088Z" "start_time": "2026-03-11T13:28:13.048971Z"
} }
}, },
"cell_type": "code", "cell_type": "code",
@@ -910,8 +906,8 @@
{ {
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2026-03-10T13:48:25.928602Z", "end_time": "2026-03-11T13:28:26.494851Z",
"start_time": "2026-03-10T13:48:25.885966Z" "start_time": "2026-03-11T13:28:26.443216Z"
} }
}, },
"cell_type": "code", "cell_type": "code",
@@ -957,8 +953,8 @@
{ {
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2026-03-10T13:48:26.403496Z", "end_time": "2026-03-11T13:28:26.994246Z",
"start_time": "2026-03-10T13:48:25.937393Z" "start_time": "2026-03-11T13:28:26.501158Z"
} }
}, },
"cell_type": "code", "cell_type": "code",
@@ -1011,8 +1007,8 @@
{ {
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2026-03-10T13:48:30.743182Z", "end_time": "2026-03-11T13:28:34.229526Z",
"start_time": "2026-03-10T13:48:26.406849Z" "start_time": "2026-03-11T13:28:27.000586Z"
} }
}, },
"cell_type": "code", "cell_type": "code",
@@ -1114,8 +1110,8 @@
"cell_type": "code", "cell_type": "code",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2026-03-10T13:48:30.986141Z", "end_time": "2026-03-11T13:28:34.643007Z",
"start_time": "2026-03-10T13:48:30.746784Z" "start_time": "2026-03-11T13:28:34.233779Z"
} }
}, },
"source": [ "source": [
@@ -1198,8 +1194,8 @@
"cell_type": "code", "cell_type": "code",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2026-03-10T13:48:31.021266Z", "end_time": "2026-03-11T13:28:34.669584Z",
"start_time": "2026-03-10T13:48:30.995860Z" "start_time": "2026-03-11T13:28:34.650632Z"
} }
}, },
"source": [ "source": [
@@ -1239,42 +1235,42 @@
"================================================================================\n", "================================================================================\n",
"\n", "\n",
"结果数据形状: (282000, 71)\n", "结果数据形状: (282000, 71)\n",
"结果列: ['ts_code', 'trade_date', 'close', 'vol', 'open', 'turnover_rate', 'high', 'low', 'amount', 'total_assets', 'total_mv', 'f_ann_date', 'n_income', 'revenue', 'total_cur_assets', 'total_liab', 'total_cur_liab', 'total_hldr_eqy_exc_min_int', 'n_cashflow_act', 'ebit', 'ma_5', 'ma_20', 'ma_ratio_5_20', 'bias_10', 'high_low_ratio', 'bbi_ratio', 'return_5', 'return_20', 'kaufman_ER_20', 'mom_acceleration_10_20', 'drawdown_from_high_60', 'up_days_ratio_20', 'volatility_5', 'volatility_20', 'volatility_ratio', 'std_return_20', 'sharpe_ratio_20', 'min_ret_20', 'volatility_squeeze_5_60', 'overnight_intraday_diff', 'upper_shadow_ratio', 'capital_retention_20', 'max_ret_20', 'volume_ratio_5_20', 'turnover_rate_mean_5', 'turnover_deviation', 'amihud_illiq_20', 'turnover_cv_20', 'pv_corr_20', 'close_vwap_deviation', 'roe', 'roa', 'profit_margin', 'debt_to_equity', 'current_ratio', 'net_profit_yoy', 'revenue_yoy', 'healthy_expansion_velocity', 'EP', 'BP', 'CP', 'market_cap_rank', 'turnover_rank', 'return_5_rank', 'EP_rank', 'pe_expansion_trend', 'value_price_divergence', 'active_market_cap', 'ebit_rank', 'future_return_5', 'prediction']\n", "结果列: ['ts_code', 'trade_date', 'low', 'open', 'turnover_rate', 'close', 'amount', 'vol', 'high', 'total_assets', 'total_mv', 'f_ann_date', 'n_income', 'revenue', 'total_cur_assets', 'total_liab', 'total_cur_liab', 'total_hldr_eqy_exc_min_int', 'n_cashflow_act', 'ebit', 'ma_5', 'ma_20', 'ma_ratio_5_20', 'bias_10', 'high_low_ratio', 'bbi_ratio', 'return_5', 'return_20', 'kaufman_ER_20', 'mom_acceleration_10_20', 'drawdown_from_high_60', 'up_days_ratio_20', 'volatility_5', 'volatility_20', 'volatility_ratio', 'std_return_20', 'sharpe_ratio_20', 'min_ret_20', 'volatility_squeeze_5_60', 'overnight_intraday_diff', 'upper_shadow_ratio', 'capital_retention_20', 'max_ret_20', 'volume_ratio_5_20', 'turnover_rate_mean_5', 'turnover_deviation', 'amihud_illiq_20', 'turnover_cv_20', 'pv_corr_20', 'close_vwap_deviation', 'roe', 'roa', 'profit_margin', 'debt_to_equity', 'current_ratio', 'net_profit_yoy', 'revenue_yoy', 'healthy_expansion_velocity', 'EP', 'BP', 'CP', 'market_cap_rank', 'turnover_rank', 'return_5_rank', 'EP_rank', 'pe_expansion_trend', 'value_price_divergence', 'active_market_cap', 'ebit_rank', 'future_return_5', 'prediction']\n",
"\n", "\n",
"结果前10行预览:\n", "结果前10行预览:\n",
"shape: (10, 71)\n", "shape: (10, 71)\n",
"┌───────────┬────────────┬───────┬──────────┬───┬────────────┬───────────┬───────────────────────┐\n", "┌───────────┬────────────┬───────┬──────────┬──────────────┬───────────┬─────────────┬────────────┐\n",
"│ ts_code ┆ trade_date ┆ close ┆ vol ┆ … ┆ active_mar ┆ ebit_rank ┆ future_ret ┆ predictio │\n", "│ ts_code ┆ trade_date ┆ low ┆ open ┆ … ┆ active_marke ┆ ebit_rank ┆ future_retu ┆ prediction │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ ket_cap ┆ --- ┆ urn_5 ┆ n │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ t_cap ┆ --- ┆ rn_5 --- │\n",
"│ str ┆ str ┆ f64 ┆ f64 ┆ ┆ --- ┆ f64 ┆ --- ┆ --- │\n", "│ str ┆ str ┆ f64 ┆ f64 ┆ ┆ --- ┆ f64 ┆ --- f64 │\n",
"│ ┆ ┆ ┆ ┆ ┆ f64 ┆ ┆ f64 ┆ f64 │\n", "│ ┆ ┆ ┆ ┆ ┆ f64 ┆ ┆ f64 │\n",
"╞═══════════╪════════════╪═══════╪══════════╪═══╪════════════╪═══════════╪═══════════════════════╡\n", "╞═══════════╪════════════╪═══════╪══════════╪══════════════╪═══════════╪═════════════╪════════════╡\n",
"│ 000004.SZ ┆ 20250102 ┆ 57.63 ┆ 119760.37 ┆ … ┆ 2.027703 ┆ null ┆ -0.066193 ┆ 0.015616 │\n", "│ 000004.SZ ┆ 20250102 ┆ 54.17 ┆ 55.8 ┆ … ┆ 2.027703 ┆ null ┆ -0.066193 ┆ 0.015616 │\n",
"│ 000004.SZ ┆ 20250103 ┆ 52.02 ┆ 123929.75 ┆ … ┆ 1.80063 ┆ null ┆ 0.00893 ┆ 0.03083 │\n", "│ 000004.SZ ┆ 20250103 ┆ 51.86 ┆ 57.71 ┆ … ┆ 1.80063 ┆ null ┆ 0.00893 ┆ 0.03083 │\n",
"│ 000004.SZ ┆ 20250106 ┆ 50.88 ┆ 84810.46 ┆ … ┆ 1.759968 ┆ null ┆ -0.0142 ┆ 0.044376 │\n", "│ 000004.SZ ┆ 20250106 ┆ 49.17 ┆ 50.39 ┆ … ┆ 1.759968 ┆ null ┆ -0.0142 ┆ 0.044376 │\n",
"│ 000004.SZ ┆ 20250107 ┆ 53.28 ┆ 86097.07 ┆ … ┆ 1.908269 ┆ null ┆ 0.013031 ┆ 0.014036 │\n", "│ 000004.SZ ┆ 20250107 ┆ 51.41 ┆ 51.41 ┆ … ┆ 1.908269 ┆ null ┆ 0.013031 ┆ 0.014036 │\n",
"│ 000004.SZ ┆ 20250108 ┆ 54.46 ┆ 99782.01 ┆ … ┆ 2.000828 ┆ null ┆ 0.00442 ┆ 0.006524 │\n", "│ 000004.SZ ┆ 20250108 ┆ 52.38 ┆ 52.95 ┆ … ┆ 2.000828 ┆ null ┆ 0.00442 ┆ 0.006524 │\n",
"│ 000004.SZ ┆ 20250109 ┆ 53.89 ┆ 82884.3 ┆ … ┆ 2.005907 ┆ null ┆ 0.024865 ┆ -0.002277 │\n", "│ 000004.SZ ┆ 20250109 ┆ 53.69 ┆ 54.3 ┆ … ┆ 2.005907 ┆ null ┆ 0.024865 ┆ -0.002277 │\n",
"│ 000004.SZ ┆ 20250110 ┆ 50.8482023.45 ┆ … ┆ 1.795423 ┆ null ┆ 0.073486 ┆ 0.007954 │\n", "│ 000004.SZ ┆ 20250110 ┆ 50.8 53.89 ┆ … ┆ 1.795423 ┆ null ┆ 0.073486 ┆ 0.007954 │\n",
"│ 000004.SZ ┆ 20250113 ┆ 50.68 ┆ 69987.0 ┆ … ┆ 1.563935 ┆ null ┆ -0.04458 ┆ 0.005756 │\n", "│ 000004.SZ ┆ 20250113 ┆ 48.24 ┆ 50.35 ┆ … ┆ 1.563935 ┆ null ┆ -0.04458 ┆ 0.005756 │\n",
"│ 000004.SZ ┆ 20250114 ┆ 53.64 ┆ 78520.0 ┆ … ┆ 1.572468 ┆ null ┆ -0.156301 ┆ 0.000774 │\n", "│ 000004.SZ ┆ 20250114 ┆ 50.92 ┆ 50.92 ┆ … ┆ 1.572468 ┆ null ┆ -0.156301 ┆ 0.000774 │\n",
"│ 000004.SZ ┆ 20250115 ┆ 54.54 ┆ 117410.0 ┆ … ┆ 1.570886 ┆ null ┆ -0.203593 ┆ 0.000368 │\n", "│ 000004.SZ ┆ 20250115 ┆ 53.97 ┆ 55.15 ┆ … ┆ 1.570886 ┆ null ┆ -0.203593 ┆ 0.000368 │\n",
"└───────────┴────────────┴───────┴──────────┴───┴────────────┴───────────┴───────────────────────┘\n", "└───────────┴────────────┴───────┴──────────┴──────────────┴───────────┴─────────────┴────────────┘\n",
"\n", "\n",
"结果后5行预览:\n", "结果后5行预览:\n",
"shape: (5, 71)\n", "shape: (5, 71)\n",
"┌───────────┬────────────┬───────┬──────────┬───┬────────────┬───────────┬───────────────────────┐\n", "┌───────────┬────────────┬───────┬──────────┬──────────────┬───────────┬─────────────┬────────────┐\n",
"│ ts_code ┆ trade_date ┆ close ┆ vol ┆ … ┆ active_mar ┆ ebit_rank ┆ future_ret ┆ predictio │\n", "│ ts_code ┆ trade_date ┆ low ┆ open ┆ … ┆ active_marke ┆ ebit_rank ┆ future_retu ┆ prediction │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ ket_cap ┆ --- ┆ urn_5 ┆ n │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ t_cap ┆ --- ┆ rn_5 --- │\n",
"│ str ┆ str ┆ f64 ┆ f64 ┆ ┆ --- ┆ f64 ┆ --- ┆ --- │\n", "│ str ┆ str ┆ f64 ┆ f64 ┆ ┆ --- ┆ f64 ┆ --- f64 │\n",
"│ ┆ ┆ ┆ ┆ ┆ f64 ┆ ┆ f64 ┆ f64 │\n", "│ ┆ ┆ ┆ ┆ ┆ f64 ┆ ┆ f64 │\n",
"╞═══════════╪════════════╪═══════╪══════════╪═══╪════════════╪═══════════╪═══════════════════════╡\n", "╞═══════════╪════════════╪═══════╪══════════╪══════════════╪═══════════╪═════════════╪════════════╡\n",
"│ 605588.SH ┆ 20260305 ┆ 55.63 ┆ 18131.0 ┆ … ┆ 0.108525 ┆ null ┆ null ┆ 0.006038 │\n", "│ 605588.SH ┆ 20260305 ┆ 54.89 ┆ 55.01 ┆ … ┆ 0.108525 ┆ null ┆ null ┆ 0.006038 │\n",
"│ 605588.SH ┆ 20260306 ┆ 55.33 ┆ 12374.0 ┆ … ┆ 0.082701 ┆ null ┆ null ┆ 0.005326 │\n", "│ 605588.SH ┆ 20260306 ┆ 54.63 ┆ 55.23 ┆ … ┆ 0.082701 ┆ null ┆ null ┆ 0.005326 │\n",
"│ 605589.SH ┆ 20260303 ┆ 36.84 ┆ 342823.12 ┆ … ┆ 4.992206 ┆ null ┆ null ┆ -0.000194 │\n", "│ 605589.SH ┆ 20260303 ┆ 36.77 ┆ 39.02 ┆ … ┆ 4.992206 ┆ null ┆ null ┆ -0.000194 │\n",
"│ 605598.SH ┆ 20260303 ┆ 72.7852954.2 ┆ … ┆ 3.20713 ┆ null ┆ null ┆ 0.010633 │\n", "│ 605598.SH ┆ 20260303 ┆ 72.7277.17 ┆ … ┆ 3.20713 ┆ null ┆ null ┆ 0.010633 │\n",
"│ 605599.SH ┆ 20260303 ┆ 28.34156726.59 ┆ … ┆ 4.008987 ┆ null ┆ null ┆ 0.005141 │\n", "│ 605599.SH ┆ 20260303 ┆ 28.23 ┆ 30.23 ┆ … ┆ 4.008987 ┆ null ┆ null ┆ 0.005141 │\n",
"└───────────┴────────────┴───────┴──────────┴───┴────────────┴───────────┴───────────────────────┘\n", "└───────────┴────────────┴───────┴──────────┴──────────────┴───────────┴─────────────┴────────────┘\n",
"\n", "\n",
"每日预测样本数统计:\n", "每日预测样本数统计:\n",
" 最小: 1000\n", " 最小: 1000\n",
@@ -1314,8 +1310,8 @@
{ {
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2026-03-10T13:48:31.352041Z", "end_time": "2026-03-11T13:28:35.113238Z",
"start_time": "2026-03-10T13:48:31.029509Z" "start_time": "2026-03-11T13:28:34.675124Z"
} }
}, },
"cell_type": "code", "cell_type": "code",
@@ -1415,8 +1411,8 @@
"cell_type": "code", "cell_type": "code",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2026-03-10T13:48:31.360505Z", "end_time": "2026-03-11T13:28:35.122694Z",
"start_time": "2026-03-10T13:48:31.356175Z" "start_time": "2026-03-11T13:28:35.117401Z"
} }
}, },
"source": [ "source": [
@@ -1510,8 +1506,8 @@
"cell_type": "code", "cell_type": "code",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2026-03-10T13:48:31.368086Z", "end_time": "2026-03-11T13:28:35.137085Z",
"start_time": "2026-03-10T13:48:31.365653Z" "start_time": "2026-03-11T13:28:35.126226Z"
} }
}, },
"source": [ "source": [
@@ -1552,8 +1548,8 @@
{ {
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2026-03-10T13:48:31.491899Z", "end_time": "2026-03-11T13:28:35.308547Z",
"start_time": "2026-03-10T13:48:31.374401Z" "start_time": "2026-03-11T13:28:35.141844Z"
} }
}, },
"cell_type": "code", "cell_type": "code",