(data leak)RollingRank-7.0,赚钱

This commit is contained in:
liaozhaorun
2025-04-09 22:57:01 +08:00
parent dc1e62c77c
commit 8aad47ce33
10 changed files with 3689 additions and 3701 deletions

View File

@@ -2,32 +2,30 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "18d1d622-b083-4cc4-a6f8-7c1ed2d0edd2",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-06T15:33:43.537483Z",
"start_time": "2025-04-06T15:33:42.844004Z"
"end_time": "2025-04-08T13:37:08.050676Z",
"start_time": "2025-04-08T13:37:07.328483Z"
}
},
"outputs": [],
"source": [
"import tushare as ts\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
]
],
"outputs": [],
"execution_count": 1
},
{
"cell_type": "code",
"execution_count": 2,
"id": "14671a7f72de2564",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-06T15:33:45.387772Z",
"start_time": "2025-04-06T15:33:43.537483Z"
"end_time": "2025-04-08T13:37:10.251715Z",
"start_time": "2025-04-08T13:37:08.055681Z"
}
},
"outputs": [],
"source": [
"from datetime import datetime\n",
"import pandas as pd\n",
@@ -75,38 +73,19 @@
" st_data = group[(group['change_reason'] == 'ST') | (group['change_reason'] == '*ST')]\n",
" if not st_data.empty:\n",
" name_change_dict[ts_code] = filter_rows(st_data)"
]
],
"outputs": [],
"execution_count": 2
},
{
"cell_type": "code",
"execution_count": 3,
"id": "e7f8cce2f80e2f20",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-06T15:33:54.089114Z",
"start_time": "2025-04-06T15:33:45.576286Z"
"end_time": "2025-04-08T13:37:37.727419Z",
"start_time": "2025-04-08T13:37:10.461897Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8502128 entries, 0 to 21571\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 194.6+ MB\n",
"None\n",
"20250403\n",
"20250407\n"
]
}
],
"source": [
"import time\n",
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
@@ -125,37 +104,39 @@
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(start_date)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "553cfb36-f560-4cc4-b2bc-68323ccc5072",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-06T15:33:57.041254Z",
"start_time": "2025-04-06T15:33:54.103322Z"
},
"scrolled": true
},
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250417 完成\n",
"任务 20250418 完成\n",
"任务 20250416 完成\n",
"任务 20250415 完成\n",
"任务 20250414 完成\n",
"任务 20250411 完成\n",
"任务 20250410 完成\n",
"任务 20250409 完成\n",
"任务 20250408 完成\n",
"任务 20250407 完成\n"
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8507519 entries, 0 to 5390\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 194.7+ MB\n",
"None\n",
"20250407\n",
"20250408\n"
]
}
],
"execution_count": 3
},
{
"cell_type": "code",
"id": "553cfb36-f560-4cc4-b2bc-68323ccc5072",
"metadata": {
"scrolled": true,
"ExecuteTime": {
"end_time": "2025-04-08T13:37:39.056144Z",
"start_time": "2025-04-08T13:37:37.770718Z"
}
},
"source": [
"\n",
"\n",
@@ -205,169 +186,192 @@
" # 重置批次起始时间\n",
" batch_start_time = time.time()\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "919023c693d7a47a",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-06T15:33:57.072796Z",
"start_time": "2025-04-06T15:33:57.061670Z"
}
},
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
"0 000059.SZ 20250407 4.54 1.8414 3.4767 \n",
"1 600830.SH 20250407 8.33 2.5217 3.6802 \n",
"2 688061.SH 20250407 24.45 3.1011 3.1011 \n",
"3 600868.SH 20250407 2.79 3.8477 4.1435 \n",
"4 605168.SH 20250407 25.98 1.3857 2.8470 \n",
"... ... ... ... ... ... \n",
"5386 688259.SH 20250407 34.99 5.9799 11.4393 \n",
"5387 301316.SZ 20250407 19.20 7.2272 7.9512 \n",
"5388 601116.SH 20250407 10.37 2.3317 7.1579 \n",
"5389 605016.SH 20250407 17.20 1.4773 3.9134 \n",
"5390 600148.SH 20250407 16.07 2.0776 4.5745 \n",
"\n",
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
"0 0.84 103.2927 NaN 0.5851 0.1574 0.1928 0.3084 \n",
"1 0.69 71.1750 71.1750 1.7467 11.2902 11.2902 0.1801 \n",
"2 2.31 292.8121 NaN 1.1504 6.1795 4.9755 NaN \n",
"3 1.16 NaN NaN 2.3425 16.8832 16.0274 0.0000 \n",
"4 1.56 10.3735 14.0394 1.9988 1.0366 1.2218 4.5870 \n",
"... ... ... ... ... ... ... ... \n",
"5386 1.10 66.8795 64.8845 2.6173 5.9119 6.5930 NaN \n",
"5387 1.30 94.0750 110.9182 7.1350 5.7094 4.8530 0.4126 \n",
"5388 1.78 41.2451 36.3656 1.7811 1.4576 1.4350 1.9286 \n",
"5389 1.05 28.7938 22.2858 3.3051 6.4003 4.8254 1.3640 \n",
"5390 2.12 3441.4901 274.8323 4.8916 3.2666 3.3043 0.1867 \n",
"\n",
" dv_ttm total_share float_share free_share total_mv circ_mv \\\n",
"0 0.3084 159944.2537 159944.2537 84712.3362 726146.9118 726146.9118 \n",
"1 0.1801 45432.2747 45432.2747 31131.0133 378450.8483 378450.8483 \n",
"2 NaN 11488.9391 4329.7770 4329.7770 280904.5610 105863.0477 \n",
"3 NaN 189814.8679 189814.8679 176264.8506 529583.4814 529583.4814 \n",
"4 4.5870 21081.6986 21081.6986 10260.7016 547702.5296 547702.5296 \n",
"... ... ... ... ... ... ... \n",
"5386 NaN 11170.0000 11170.0000 5839.1660 390838.3000 390838.3000 \n",
"5387 0.4126 40400.0000 24282.6503 22071.3403 775680.0000 466226.8858 \n",
"5388 1.9286 54767.8400 54767.8400 17840.9208 567942.5008 567942.5008 \n",
"5389 1.3640 32308.6400 32308.6400 12196.5716 555708.6080 555708.6080 \n",
"5390 0.1867 14151.6450 14151.6450 6427.3300 227416.9352 227416.9352 \n",
"\n",
" is_st \n",
"0 False \n",
"1 False \n",
"2 False \n",
"3 False \n",
"4 False \n",
"... ... \n",
"5386 False \n",
"5387 False \n",
"5388 False \n",
"5389 False \n",
"5390 False \n",
"\n",
"[5391 rows x 19 columns]\n"
"任务 20250417 完成\n",
"任务 20250418 完成\n",
"任务 20250416 完成\n",
"任务 20250415 完成\n",
"任务 20250411 完成\n",
"任务 20250414 完成\n",
"任务 20250410 完成\n",
"任务 20250409 完成\n",
"任务 20250408 完成\n"
]
}
],
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
"print(all_daily_data_df)"
]
"execution_count": 4
},
{
"cell_type": "code",
"execution_count": 6,
"id": "28cb78d032671b20",
"id": "919023c693d7a47a",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-06T15:33:57.104132Z",
"start_time": "2025-04-06T15:33:57.095010Z"
"end_time": "2025-04-08T13:37:39.072117Z",
"start_time": "2025-04-08T13:37:39.062189Z"
}
},
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
"print(all_daily_data_df)"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
"16 000656.SZ 20250407 1.28 0.9982 1.1644 \n",
"62 002748.SZ 20250407 7.32 0.5503 1.1888 \n",
"114 002490.SZ 20250407 3.49 0.7559 1.3380 \n",
"128 300165.SZ 20250407 2.78 4.0431 4.7932 \n",
"278 600303.SH 20250407 3.22 1.1873 1.4918 \n",
"0 300504.SZ 20250408 12.65 2.5494 4.8465 \n",
"1 002223.SZ 20250408 34.24 0.9832 1.6194 \n",
"2 002036.SZ 20250408 9.13 7.4710 8.1827 \n",
"3 688207.SH 20250408 12.29 4.6144 4.6144 \n",
"4 002401.SZ 20250408 13.88 4.9037 9.6159 \n",
"... ... ... ... ... ... \n",
"5263 002217.SZ 20250407 2.07 0.1251 0.1569 \n",
"5267 002808.SZ 20250407 2.99 4.0901 4.7924 \n",
"5290 002602.SZ 20250407 6.44 0.2276 0.2634 \n",
"5315 002501.SZ 20250407 1.92 1.5653 2.0207 \n",
"5375 300376.SZ 20250407 2.96 1.4873 3.4865 \n",
"5387 600610.SH 20250408 7.56 18.8004 29.6937 \n",
"5388 002215.SZ 20250408 8.84 5.7658 6.7838 \n",
"5389 600694.SH 20250408 25.00 3.3101 5.4481 \n",
"5390 600121.SH 20250408 3.66 3.0305 6.3012 \n",
"5391 873167.BJ 20250408 21.56 7.8805 14.2434 \n",
"\n",
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
"16 0.44 NaN NaN NaN 0.1081 0.1637 0.0000 \n",
"62 0.61 96.0467 49.7297 1.3328 0.8402 0.8839 1.3661 \n",
"114 0.19 NaN NaN 5.6564 2.0529 2.0529 0.0000 \n",
"128 2.22 NaN NaN 0.9988 1.3542 1.4288 0.0000 \n",
"278 0.77 NaN NaN 1.4997 1.6142 1.6353 0.0000 \n",
"... ... ... ... ... ... ... ... \n",
"5263 0.23 NaN NaN NaN 3.3436 10.3100 0.0000 \n",
"5267 0.79 NaN NaN 2.5039 5.2047 4.8881 0.6689 \n",
"5290 0.20 91.5846 53.4453 1.8455 3.6128 2.5226 0.0000 \n",
"5315 0.58 NaN NaN 7.1559 14.2934 20.0240 0.0000 \n",
"5375 4.52 12.2436 36.2242 0.9837 1.4380 2.0320 1.6554 \n",
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
"0 1.56 34.0479 220.6414 1.5349 1.3422 1.7126 1.5892 \n",
"1 1.07 14.3268 19.7636 2.8291 4.3058 4.6786 3.5030 \n",
"2 2.45 NaN NaN 3.6899 0.9822 0.9210 0.0000 \n",
"3 1.61 NaN NaN 1.5605 12.1348 26.4230 NaN \n",
"4 1.44 40.4258 40.4258 3.0931 2.8715 2.8715 1.2977 \n",
"... ... ... ... ... ... ... ... \n",
"5387 1.18 NaN NaN 122.1550 7.3648 7.3648 0.0000 \n",
"5388 2.49 37.7118 20.0533 2.2997 2.1570 1.7934 1.7092 \n",
"5389 3.51 15.4938 13.3524 0.9057 1.0676 1.1271 3.6364 \n",
"5390 1.13 15.7764 15.7764 2.3738 1.0605 1.0605 0.0000 \n",
"5391 0.79 33.5290 65.6770 3.2183 7.0572 9.9201 NaN \n",
"\n",
" dv_ttm total_share float_share free_share total_mv \\\n",
"16 NaN 533971.5816 531174.3236 455354.2392 6.834836e+05 \n",
"62 1.3661 24000.0000 24000.0000 11108.5000 1.756800e+05 \n",
"114 NaN 79784.8400 54161.3625 30599.6625 2.784491e+05 \n",
"128 NaN 49551.1725 42053.2110 35472.8422 1.377523e+05 \n",
"278 NaN 68360.4211 67560.4211 53770.9211 2.201206e+05 \n",
"... ... ... ... ... ... \n",
"5263 NaN 747939.8928 568036.4278 453036.0995 1.548236e+06 \n",
"5267 0.6689 26880.0000 18638.3713 15907.0731 8.037120e+04 \n",
"5290 NaN 745255.6968 687870.8273 594244.1179 4.799447e+06 \n",
"5315 NaN 355000.0000 354999.9006 274999.9006 6.816000e+05 \n",
"5375 1.6554 232824.0476 232743.4901 99284.6609 6.891592e+05 \n",
" dv_ttm total_share float_share free_share total_mv \\\n",
"0 1.5892 27102.4580 21826.2631 11481.0786 3.428461e+05 \n",
"1 3.5030 100247.6929 93867.3649 56990.4202 3.432481e+06 \n",
"2 NaN 105938.4915 105290.9483 96132.5171 9.672184e+05 \n",
"3 NaN 25897.3147 18867.6306 18867.6306 3.182780e+05 \n",
"4 1.2977 37166.8440 37136.3940 18937.9540 5.158758e+05 \n",
"... ... ... ... ... ... \n",
"5387 NaN 107127.4605 70872.6705 44872.6705 8.098836e+05 \n",
"5388 1.7092 100519.1310 79400.9515 67486.1454 8.885891e+05 \n",
"5389 3.6364 31305.2571 31305.2571 19020.4513 7.826314e+05 \n",
"5390 NaN 121841.2038 121841.2038 58597.2758 4.459388e+05 \n",
"5391 NaN 7086.1250 4178.1867 2311.6822 1.527769e+05 \n",
"\n",
" circ_mv is_st \n",
"16 6.799031e+05 True \n",
"62 1.756800e+05 True \n",
"114 1.890232e+05 True \n",
"128 1.169079e+05 True \n",
"278 2.175446e+05 True \n",
"0 2.761022e+05 False \n",
"1 3.214019e+06 False \n",
"2 9.613064e+05 False \n",
"3 2.318832e+05 False \n",
"4 5.154531e+05 False \n",
"... ... ... \n",
"5263 1.175835e+06 True \n",
"5267 5.572873e+04 True \n",
"5290 4.429888e+06 True \n",
"5315 6.815998e+05 True \n",
"5375 6.889207e+05 True \n",
"5387 5.357974e+05 False \n",
"5388 7.019044e+05 False \n",
"5389 7.826314e+05 False \n",
"5390 4.459388e+05 False \n",
"5391 9.008171e+04 False \n",
"\n",
"[5392 rows x 19 columns]\n"
]
}
],
"execution_count": 5
},
{
"cell_type": "code",
"id": "28cb78d032671b20",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-08T13:37:39.103515Z",
"start_time": "2025-04-08T13:37:39.093908Z"
}
},
"source": [
"print(all_daily_data_df[all_daily_data_df['is_st']])"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
"20 000488.SZ 20250408 1.74 2.5808 3.5449 \n",
"21 603608.SH 20250408 4.20 0.2313 0.3624 \n",
"88 603363.SH 20250408 3.35 1.2763 1.4156 \n",
"124 000989.SZ 20250408 7.60 2.5216 3.5863 \n",
"136 300965.SZ 20250408 36.20 1.9389 2.6640 \n",
"... ... ... ... ... ... \n",
"5261 603879.SH 20250408 4.13 4.3647 6.8212 \n",
"5273 002024.SZ 20250408 1.76 0.5005 1.3623 \n",
"5298 603828.SH 20250408 4.43 1.3711 2.7554 \n",
"5337 600234.SH 20250408 5.53 0.5518 1.0422 \n",
"5370 300536.SZ 20250408 7.99 2.2037 2.7214 \n",
"\n",
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
"20 0.69 NaN NaN 0.5590 0.2252 0.2252 0.0000 \n",
"21 0.35 NaN NaN 1.5767 1.3841 1.5604 0.0000 \n",
"88 2.09 NaN NaN NaN 0.4481 0.7781 0.0000 \n",
"124 1.71 30.0883 30.0883 1.7332 2.7432 2.7432 5.2053 \n",
"136 1.27 NaN NaN 1.7736 NaN NaN 0.0829 \n",
"... ... ... ... ... ... ... ... \n",
"5261 1.67 NaN NaN 5.6207 4.0072 4.0072 0.0000 \n",
"5273 1.06 26.7044 26.7044 1.3118 0.2871 0.2871 0.0000 \n",
"5298 0.38 NaN NaN 3.5130 1.0396 1.0348 0.0000 \n",
"5337 2.28 NaN NaN 3.2963 20.7089 9.4391 0.0000 \n",
"5370 0.86 NaN NaN 4.2696 32.8078 24.2873 0.0000 \n",
"\n",
" dv_ttm total_share float_share free_share total_mv \\\n",
"20 NaN 294145.6200 167582.4530 122004.3211 5.118134e+05 \n",
"21 NaN 41971.5446 41971.5446 26785.1109 1.762805e+05 \n",
"88 NaN 260296.1826 146776.2912 132325.9245 8.719922e+05 \n",
"124 5.2053 85594.2012 69415.3353 48807.3173 6.505159e+05 \n",
"136 0.0829 6000.0000 2060.9250 1500.0000 2.172000e+05 \n",
"... ... ... ... ... ... \n",
"5261 NaN 35934.4440 35934.4440 22993.7696 1.484093e+05 \n",
"5273 NaN 926476.7618 925444.1318 340007.5385 1.630599e+06 \n",
"5298 NaN 59596.0158 59593.9625 29654.2988 2.640103e+05 \n",
"5337 NaN 26252.0973 26252.0973 13899.8888 1.451741e+05 \n",
"5370 NaN 29328.8133 29325.3240 23747.3240 2.343372e+05 \n",
"\n",
" circ_mv is_st \n",
"20 2.915935e+05 True \n",
"21 1.762805e+05 True \n",
"88 4.917006e+05 True \n",
"124 5.275565e+05 True \n",
"136 7.460549e+04 True \n",
"... ... ... \n",
"5261 1.484093e+05 True \n",
"5273 1.628782e+06 True \n",
"5298 2.640013e+05 True \n",
"5337 1.451741e+05 True \n",
"5370 2.343093e+05 True \n",
"\n",
"[106 rows x 19 columns]\n"
]
}
],
"source": [
"print(all_daily_data_df[all_daily_data_df['is_st']])"
]
"execution_count": 6
},
{
"cell_type": "code",
"execution_count": 7,
"id": "692b58674b7462c9",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-06T15:33:57.927188Z",
"start_time": "2025-04-06T15:33:57.127166Z"
"end_time": "2025-04-08T13:37:39.921445Z",
"start_time": "2025-04-08T13:37:39.128232Z"
}
},
"source": [
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='daily_basic', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")\n"
],
"outputs": [
{
"name": "stdout",
@@ -377,30 +381,29 @@
]
}
],
"source": [
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='daily_basic', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")\n"
]
"execution_count": 7
},
{
"cell_type": "code",
"execution_count": 8,
"id": "d7a773fc20293477",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-06T15:34:06.721517Z",
"start_time": "2025-04-06T15:33:57.951119Z"
"end_time": "2025-04-08T13:37:46.393814Z",
"start_time": "2025-04-08T13:37:39.941474Z"
}
},
"source": [
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date', 'is_st']]\n",
" print(df.info())"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8507519 entries, 0 to 5390\n",
"Index: 8512911 entries, 0 to 5391\n",
"Data columns (total 3 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
@@ -408,16 +411,12 @@
" 1 trade_date object\n",
" 2 is_st bool \n",
"dtypes: bool(1), object(2)\n",
"memory usage: 202.8+ MB\n",
"memory usage: 203.0+ MB\n",
"None\n"
]
}
],
"source": [
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date', 'is_st']]\n",
" print(df.info())"
]
"execution_count": 8
}
],
"metadata": {