feat(data): 添加 DuckDB 只读模式支持
- Storage 类默认使用 read_only=True 模式,允许多进程并发读取 - ThreadSafeStorage 自动使用 read_only=False 模式,用于数据同步写入 - catalog.query_duckdb_to_polars 函数使用只读连接
This commit is contained in:
@@ -27,8 +27,8 @@
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2026-03-10T14:55:40.429719Z",
|
||||
"start_time": "2026-03-10T14:55:39.008639Z"
|
||||
"end_time": "2026-03-11T13:28:00.454472Z",
|
||||
"start_time": "2026-03-11T13:27:55.535146Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
@@ -69,8 +69,8 @@
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2026-03-10T14:55:40.441603Z",
|
||||
"start_time": "2026-03-10T14:55:40.434663Z"
|
||||
"end_time": "2026-03-11T13:28:00.468024Z",
|
||||
"start_time": "2026-03-11T13:28:00.460009Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
@@ -273,8 +273,8 @@
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2026-03-10T14:55:40.451021Z",
|
||||
"start_time": "2026-03-10T14:55:40.444975Z"
|
||||
"end_time": "2026-03-11T13:28:00.479401Z",
|
||||
"start_time": "2026-03-11T13:28:00.472206Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
@@ -363,8 +363,8 @@
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2026-03-10T14:55:40.459051Z",
|
||||
"start_time": "2026-03-10T14:55:40.454871Z"
|
||||
"end_time": "2026-03-11T13:28:00.489508Z",
|
||||
"start_time": "2026-03-11T13:28:00.483843Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
@@ -456,8 +456,8 @@
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2026-03-10T14:56:04.604292Z",
|
||||
"start_time": "2026-03-10T14:55:40.466222Z"
|
||||
"end_time": "2026-03-11T13:28:24.494829Z",
|
||||
"start_time": "2026-03-11T13:28:00.494691Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
@@ -634,23 +634,23 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"数据形状: (7044952, 70)\n",
|
||||
"数据列: ['ts_code', 'trade_date', 'amount', 'vol', 'turnover_rate', 'close', 'low', 'high', 'open', 'total_assets', 'total_mv', 'f_ann_date', 'n_income', 'revenue', 'total_liab', 'total_cur_assets', 'total_hldr_eqy_exc_min_int', 'total_cur_liab', 'n_cashflow_act', 'ebit', 'ma_5', 'ma_20', 'ma_ratio_5_20', 'bias_10', 'high_low_ratio', 'bbi_ratio', 'return_5', 'return_20', 'kaufman_ER_20', 'mom_acceleration_10_20', 'drawdown_from_high_60', 'up_days_ratio_20', 'volatility_5', 'volatility_20', 'volatility_ratio', 'std_return_20', 'sharpe_ratio_20', 'min_ret_20', 'volatility_squeeze_5_60', 'overnight_intraday_diff', 'upper_shadow_ratio', 'capital_retention_20', 'max_ret_20', 'volume_ratio_5_20', 'turnover_rate_mean_5', 'turnover_deviation', 'amihud_illiq_20', 'turnover_cv_20', 'pv_corr_20', 'close_vwap_deviation', 'roe', 'roa', 'profit_margin', 'debt_to_equity', 'current_ratio', 'net_profit_yoy', 'revenue_yoy', 'healthy_expansion_velocity', 'EP', 'BP', 'CP', 'market_cap_rank', 'turnover_rank', 'return_5_rank', 'EP_rank', 'pe_expansion_trend', 'value_price_divergence', 'active_market_cap', 'ebit_rank', 'future_return_5_rank']\n",
|
||||
"数据列: ['ts_code', 'trade_date', 'close', 'open', 'high', 'amount', 'low', 'vol', 'turnover_rate', 'total_assets', 'total_mv', 'f_ann_date', 'n_income', 'revenue', 'total_liab', 'total_cur_liab', 'total_cur_assets', 'total_hldr_eqy_exc_min_int', 'n_cashflow_act', 'ebit', 'ma_5', 'ma_20', 'ma_ratio_5_20', 'bias_10', 'high_low_ratio', 'bbi_ratio', 'return_5', 'return_20', 'kaufman_ER_20', 'mom_acceleration_10_20', 'drawdown_from_high_60', 'up_days_ratio_20', 'volatility_5', 'volatility_20', 'volatility_ratio', 'std_return_20', 'sharpe_ratio_20', 'min_ret_20', 'volatility_squeeze_5_60', 'overnight_intraday_diff', 'upper_shadow_ratio', 'capital_retention_20', 'max_ret_20', 'volume_ratio_5_20', 'turnover_rate_mean_5', 'turnover_deviation', 'amihud_illiq_20', 'turnover_cv_20', 'pv_corr_20', 'close_vwap_deviation', 'roe', 'roa', 'profit_margin', 'debt_to_equity', 'current_ratio', 'net_profit_yoy', 'revenue_yoy', 'healthy_expansion_velocity', 'EP', 'BP', 'CP', 'market_cap_rank', 'turnover_rank', 'return_5_rank', 'EP_rank', 'pe_expansion_trend', 'value_price_divergence', 'active_market_cap', 'ebit_rank', 'future_return_5_rank']\n",
|
||||
"\n",
|
||||
"前5行预览:\n",
|
||||
"shape: (5, 70)\n",
|
||||
"┌───────────┬───────────┬──────────┬───────────┬───┬───────────┬───────────┬───────────┬───────────┐\n",
|
||||
"│ ts_code ┆ trade_dat ┆ amount ┆ vol ┆ … ┆ value_pri ┆ active_ma ┆ ebit_rank ┆ future_re │\n",
|
||||
"│ --- ┆ e ┆ --- ┆ --- ┆ ┆ ce_diverg ┆ rket_cap ┆ --- ┆ turn_5_ra │\n",
|
||||
"│ str ┆ --- ┆ f64 ┆ f64 ┆ ┆ ence ┆ --- ┆ f64 ┆ nk │\n",
|
||||
"│ ┆ str ┆ ┆ ┆ ┆ --- ┆ f64 ┆ ┆ --- │\n",
|
||||
"│ ┆ ┆ ┆ ┆ ┆ f64 ┆ ┆ ┆ f64 │\n",
|
||||
"╞═══════════╪═══════════╪══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪═══════════╡\n",
|
||||
"│ 000001.SZ ┆ 20200102 ┆ 2.5712e6 ┆ 1.5302e6 ┆ … ┆ null ┆ null ┆ null ┆ -0.008857 │\n",
|
||||
"│ 000001.SZ ┆ 20200103 ┆ 1.9145e6 ┆ 1.1162e6 ┆ … ┆ null ┆ null ┆ null ┆ -0.01881 │\n",
|
||||
"│ 000001.SZ ┆ 20200106 ┆ 1.4779e6 ┆ 862083.5 ┆ … ┆ null ┆ null ┆ null ┆ -0.008171 │\n",
|
||||
"│ 000001.SZ ┆ 20200107 ┆ 1.2470e6 ┆ 728607.56 ┆ … ┆ null ┆ null ┆ null ┆ -0.014117 │\n",
|
||||
"│ 000001.SZ ┆ 20200108 ┆ 1.4236e6 ┆ 847824.12 ┆ … ┆ null ┆ null ┆ null ┆ -0.017252 │\n",
|
||||
"└───────────┴───────────┴──────────┴───────────┴───┴───────────┴───────────┴───────────┴───────────┘\n",
|
||||
"┌───────────┬────────────┬─────────┬─────────┬───┬────────────┬────────────┬───────────┬───────────┐\n",
|
||||
"│ ts_code ┆ trade_date ┆ close ┆ open ┆ … ┆ value_pric ┆ active_mar ┆ ebit_rank ┆ future_re │\n",
|
||||
"│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ e_divergen ┆ ket_cap ┆ --- ┆ turn_5_ra │\n",
|
||||
"│ str ┆ str ┆ f64 ┆ f64 ┆ ┆ ce ┆ --- ┆ f64 ┆ nk │\n",
|
||||
"│ ┆ ┆ ┆ ┆ ┆ --- ┆ f64 ┆ ┆ --- │\n",
|
||||
"│ ┆ ┆ ┆ ┆ ┆ f64 ┆ ┆ ┆ f64 │\n",
|
||||
"╞═══════════╪════════════╪═════════╪═════════╪═══╪════════════╪════════════╪═══════════╪═══════════╡\n",
|
||||
"│ 000001.SZ ┆ 20200102 ┆ 1841.69 ┆ 1817.67 ┆ … ┆ null ┆ null ┆ null ┆ -0.008857 │\n",
|
||||
"│ 000001.SZ ┆ 20200103 ┆ 1875.53 ┆ 1849.33 ┆ … ┆ null ┆ null ┆ null ┆ -0.01881 │\n",
|
||||
"│ 000001.SZ ┆ 20200106 ┆ 1863.52 ┆ 1856.97 ┆ … ┆ null ┆ null ┆ null ┆ -0.008171 │\n",
|
||||
"│ 000001.SZ ┆ 20200107 ┆ 1872.26 ┆ 1870.07 ┆ … ┆ null ┆ null ┆ null ┆ -0.014117 │\n",
|
||||
"│ 000001.SZ ┆ 20200108 ┆ 1818.76 ┆ 1855.88 ┆ … ┆ null ┆ null ┆ null ┆ -0.017252 │\n",
|
||||
"└───────────┴────────────┴─────────┴─────────┴───┴────────────┴────────────┴───────────┴───────────┘\n",
|
||||
"\n",
|
||||
"[4] 转换为排序学习格式\n",
|
||||
"\n",
|
||||
@@ -743,7 +743,7 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"C:\\Users\\liaozhaorun\\AppData\\Local\\Temp\\ipykernel_7864\\551043002.py:108: DeprecationWarning: `pl.count()` is deprecated. Please use `pl.len()` instead.\n",
|
||||
"C:\\Users\\liaozhaorun\\AppData\\Local\\Temp\\ipykernel_29336\\551043002.py:108: DeprecationWarning: `pl.count()` is deprecated. Please use `pl.len()` instead.\n",
|
||||
"(Deprecated in version 0.20.5)\n",
|
||||
" daily_counts = df_ranked.group_by(date_col).agg(pl.count().alias(\"count\"))\n"
|
||||
]
|
||||
@@ -762,8 +762,8 @@
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2026-03-10T14:56:29.070549Z",
|
||||
"start_time": "2026-03-10T14:56:04.619443Z"
|
||||
"end_time": "2026-03-11T13:28:49.070709Z",
|
||||
"start_time": "2026-03-11T13:28:24.501332Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
@@ -2250,7 +2250,7 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"C:\\Users\\liaozhaorun\\AppData\\Local\\Temp\\ipykernel_7864\\174861970.py:63: DeprecationWarning: `is_in` with a collection of the same datatype is ambiguous and deprecated.\n",
|
||||
"C:\\Users\\liaozhaorun\\AppData\\Local\\Temp\\ipykernel_29336\\174861970.py:63: DeprecationWarning: `is_in` with a collection of the same datatype is ambiguous and deprecated.\n",
|
||||
"Please use `implode` to return to previous behavior.\n",
|
||||
"\n",
|
||||
"See https://github.com/pola-rs/polars/issues/22149 for more information.\n",
|
||||
@@ -2282,8 +2282,8 @@
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2026-03-10T14:56:29.152001Z",
|
||||
"start_time": "2026-03-10T14:56:29.081042Z"
|
||||
"end_time": "2026-03-11T13:28:49.140684Z",
|
||||
"start_time": "2026-03-11T13:28:49.078377Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
@@ -2337,7 +2337,7 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"C:\\Users\\liaozhaorun\\AppData\\Local\\Temp\\ipykernel_7864\\551043002.py:132: DeprecationWarning: `pl.count()` is deprecated. Please use `pl.len()` instead.\n",
|
||||
"C:\\Users\\liaozhaorun\\AppData\\Local\\Temp\\ipykernel_29336\\551043002.py:132: DeprecationWarning: `pl.count()` is deprecated. Please use `pl.len()` instead.\n",
|
||||
"(Deprecated in version 0.20.5)\n",
|
||||
" pl.count().alias(\"count\")\n"
|
||||
]
|
||||
@@ -2356,8 +2356,8 @@
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2026-03-10T14:56:29.729147Z",
|
||||
"start_time": "2026-03-10T14:56:29.161969Z"
|
||||
"end_time": "2026-03-11T13:28:49.680671Z",
|
||||
"start_time": "2026-03-11T13:28:49.150412Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
@@ -2423,8 +2423,8 @@
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2026-03-10T14:56:32.377628Z",
|
||||
"start_time": "2026-03-10T14:56:29.735814Z"
|
||||
"end_time": "2026-03-11T13:28:52.285645Z",
|
||||
"start_time": "2026-03-11T13:28:49.685562Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
@@ -2510,8 +2510,8 @@
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2026-03-10T14:56:34.997174Z",
|
||||
"start_time": "2026-03-10T14:56:32.382086Z"
|
||||
"end_time": "2026-03-11T13:28:54.767311Z",
|
||||
"start_time": "2026-03-11T13:28:52.291103Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
@@ -2587,8 +2587,6 @@
|
||||
"\n",
|
||||
"重新训练模型以收集训练指标...\n",
|
||||
"Training until validation scores don't improve for 50 rounds\n",
|
||||
"Early stopping, best iteration is:\n",
|
||||
"[50]\ttrain's ndcg@1: 0.676684\ttrain's ndcg@5: 0.440728\ttrain's ndcg@10: 0.361258\ttrain's ndcg@20: 0.296362\tval's ndcg@1: 0.272472\tval's ndcg@5: 0.215751\tval's ndcg@10: 0.198035\tval's ndcg@20: 0.191275\n",
|
||||
"训练完成,指标已收集\n",
|
||||
"\n",
|
||||
"评估的 NDCG 指标: ['ndcg@1', 'ndcg@5', 'ndcg@10', 'ndcg@20']\n",
|
||||
@@ -2612,8 +2610,8 @@
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2026-03-10T14:56:35.585609Z",
|
||||
"start_time": "2026-03-10T14:56:35.001847Z"
|
||||
"end_time": "2026-03-11T13:28:55.115683Z",
|
||||
"start_time": "2026-03-11T13:28:54.771304Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
@@ -2705,8 +2703,8 @@
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2026-03-10T14:56:35.982083Z",
|
||||
"start_time": "2026-03-10T14:56:35.604003Z"
|
||||
"end_time": "2026-03-11T13:28:55.390466Z",
|
||||
"start_time": "2026-03-11T13:28:55.124417Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
@@ -2800,8 +2798,8 @@
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2026-03-10T14:56:36.204207Z",
|
||||
"start_time": "2026-03-10T14:56:35.991175Z"
|
||||
"end_time": "2026-03-11T13:28:55.620414Z",
|
||||
"start_time": "2026-03-11T13:28:55.402837Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
|
||||
Reference in New Issue
Block a user