This commit is contained in:
2025-06-10 15:22:25 +08:00
parent 15f327b8ae
commit 0c12e6c2b1
25 changed files with 8157 additions and 5583 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@@ -5448,7 +5448,7 @@
"\n",
"\n",
"# 读取本地保存的股票列表 CSV 文件(假设文件名为 stocks_data.csv\n",
"stocks_df = pd.read_csv('../../stocks_list.csv', encoding='utf-8-sig')\n",
"stocks_df = pd.read_csv('stocks_list.csv', encoding='utf-8-sig')\n",
"\n",
"# 用于存放所有股票的日线数据(每次获取的 DataFrame\n",
"daily_data_list = []\n",
@@ -5488,7 +5488,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"id": "85bdf760cb83fbd3",
"metadata": {
"ExecuteTime": {
@@ -5510,7 +5510,7 @@
"if daily_data_list:\n",
" all_daily_data = pd.concat(daily_data_list, ignore_index=True)\n",
" # 使用 HDF5 格式保存到本地文件文件名daily_data.h5key设为 'daily_data'\n",
" all_daily_data.to_hdf('../../data/daily_data.h5', key='daily_data', mode='w', format='table')\n",
" all_daily_data.to_hdf('/mnt/d/PyProject/NewStock/data/daily_data.h5', key='daily_data', mode='w', format='table')\n",
" print(\"所有日线数据已保存到 daily_data.h5\")\n",
"else:\n",
" print(\"未获取到任何日线数据。\")"

View File

@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 4,
"id": "initial_id",
"metadata": {
"ExecuteTime": {
@@ -24,7 +24,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 5,
"id": "f448da220816bf98",
"metadata": {
"ExecuteTime": {
@@ -70,7 +70,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 6,
"id": "907f732d3c397bf",
"metadata": {
"ExecuteTime": {
@@ -84,32 +84,32 @@
"output_type": "stream",
"text": [
" ts_code trade_date close open high low \\\n",
"0 000905.SH 20250530 5671.0723 5704.7710 5704.7710 5665.5177 \n",
"1 000905.SH 20250529 5719.9101 5637.0633 5724.5185 5637.0633 \n",
"2 000905.SH 20250528 5637.2378 5651.8755 5660.4696 5628.4165 \n",
"3 000905.SH 20250527 5652.1454 5666.3027 5667.8710 5629.1343 \n",
"4 000905.SH 20250526 5669.4609 5653.2063 5693.6250 5644.5794 \n",
"0 000905.SH 20250606 5762.0778 5768.3989 5771.7558 5750.6592 \n",
"1 000905.SH 20250605 5769.9675 5741.0298 5778.0959 5719.5466 \n",
"2 000905.SH 20250604 5739.0058 5696.5692 5744.4543 5696.5692 \n",
"3 000905.SH 20250603 5694.8385 5653.6747 5710.4203 5653.2978 \n",
"4 000905.SH 20250530 5671.0723 5704.7710 5704.7710 5665.5177 \n",
"... ... ... ... ... ... ... \n",
"13546 399006.SZ 20100607 1069.4680 1005.0280 1075.2250 1001.7020 \n",
"13547 399006.SZ 20100604 1027.6810 989.6810 1027.6810 986.5040 \n",
"13548 399006.SZ 20100603 998.3940 1002.3550 1026.7020 997.7750 \n",
"13549 399006.SZ 20100602 997.1190 967.6090 997.1190 952.6110 \n",
"13550 399006.SZ 20100601 973.2330 986.0150 994.7930 948.1180 \n",
"13558 399006.SZ 20100607 1069.4680 1005.0280 1075.2250 1001.7020 \n",
"13559 399006.SZ 20100604 1027.6810 989.6810 1027.6810 986.5040 \n",
"13560 399006.SZ 20100603 998.3940 1002.3550 1026.7020 997.7750 \n",
"13561 399006.SZ 20100602 997.1190 967.6090 997.1190 952.6110 \n",
"13562 399006.SZ 20100601 973.2330 986.0150 994.7930 948.1180 \n",
"\n",
" pre_close change pct_chg vol amount \n",
"0 5719.9101 -48.8378 -0.8538 1.099007e+08 1.376706e+08 \n",
"1 5637.2378 82.6723 1.4665 1.146825e+08 1.480951e+08 \n",
"2 5652.1454 -14.9076 -0.2638 9.490888e+07 1.199598e+08 \n",
"3 5669.4609 -17.3155 -0.3054 9.514936e+07 1.252757e+08 \n",
"4 5653.0436 16.4173 0.2904 9.717099e+07 1.273436e+08 \n",
"0 5769.9675 -7.8897 -0.1367 1.082177e+08 1.480224e+08 \n",
"1 5739.0058 30.9617 0.5395 1.252236e+08 1.749701e+08 \n",
"2 5694.8385 44.1673 0.7756 1.161961e+08 1.503149e+08 \n",
"3 5671.0723 23.7662 0.4191 1.228539e+08 1.599968e+08 \n",
"4 5719.9101 -48.8378 -0.8538 1.099007e+08 1.376706e+08 \n",
"... ... ... ... ... ... \n",
"13546 1027.6810 41.7870 4.0661 2.655275e+06 9.106095e+06 \n",
"13547 998.3940 29.2870 2.9334 1.500295e+06 5.269441e+06 \n",
"13548 997.1190 1.2750 0.1279 1.616805e+06 6.240835e+06 \n",
"13549 973.2330 23.8860 2.4543 1.074628e+06 4.001206e+06 \n",
"13550 1000.0000 -26.7670 -2.6767 1.356285e+06 4.924177e+06 \n",
"13558 1027.6810 41.7870 4.0661 2.655275e+06 9.106095e+06 \n",
"13559 998.3940 29.2870 2.9334 1.500295e+06 5.269441e+06 \n",
"13560 997.1190 1.2750 0.1279 1.616805e+06 6.240835e+06 \n",
"13561 973.2330 23.8860 2.4543 1.074628e+06 4.001206e+06 \n",
"13562 1000.0000 -26.7670 -2.6767 1.356285e+06 4.924177e+06 \n",
"\n",
"[13551 rows x 11 columns]\n"
"[13563 rows x 11 columns]\n"
]
}
],

View File

@@ -39,15 +39,15 @@
"3 000006.SZ 20250312\n",
"4 000007.SZ 20250312\n",
"... ... ...\n",
"26917 920445.BJ 20250519\n",
"26918 920489.BJ 20250519\n",
"26919 920682.BJ 20250519\n",
"26920 920799.BJ 20250519\n",
"26921 920819.BJ 20250519\n",
"26894 920445.BJ 20250526\n",
"26895 920489.BJ 20250526\n",
"26896 920682.BJ 20250526\n",
"26897 920799.BJ 20250526\n",
"26898 920819.BJ 20250526\n",
"\n",
"[7724334 rows x 2 columns]\n",
"20250523\n",
"start_date: 20250526\n"
"[7751233 rows x 2 columns]\n",
"20250530\n",
"start_date: 20250603\n"
]
}
],
@@ -96,15 +96,10 @@
"任务 20250611 完成\n",
"任务 20250610 完成\n",
"任务 20250609 完成\n",
"任务 20250606 完成\n",
"任务 20250605 完成\n",
"任务 20250606 完成\n",
"任务 20250604 完成\n",
"任务 20250603 完成\n",
"任务 20250530 完成\n",
"任务 20250529 完成\n",
"任务 20250528 完成\n",
"任务 20250527 完成\n",
"任务 20250526 完成\n"
"任务 20250603 完成\n"
]
}
],

View File

@@ -39,15 +39,15 @@
"3 801005.SI 20250221\n",
"4 801010.SI 20250221\n",
"... ... ...\n",
"2190 859811.SI 20250519\n",
"2191 859821.SI 20250519\n",
"2192 859822.SI 20250519\n",
"2193 859852.SI 20250519\n",
"2194 859951.SI 20250519\n",
"2190 859811.SI 20250526\n",
"2191 859821.SI 20250526\n",
"2192 859822.SI 20250526\n",
"2193 859852.SI 20250526\n",
"2194 859951.SI 20250526\n",
"\n",
"[1071172 rows x 2 columns]\n",
"20250523\n",
"start_date: 20250526\n"
"[1073367 rows x 2 columns]\n",
"20250530\n",
"start_date: 20250603\n"
]
}
],
@@ -86,25 +86,20 @@
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250619 完成\n",
"任务 20250620 完成\n",
"任务 20250619 完成\n",
"任务 20250618 完成\n",
"任务 20250617 完成\n",
"任务 20250616 完成\n",
"任务 20250613 完成\n",
"任务 20250612 完成\n",
"任务 20250611 完成\n",
"任务 20250609 完成\n",
"任务 20250612 完成\n",
"任务 20250610 完成\n",
"任务 20250609 完成\n",
"任务 20250606 完成\n",
"任务 20250605 完成\n",
"任务 20250604 完成\n",
"任务 20250603 完成\n",
"任务 20250530 完成\n",
"任务 20250529 完成\n",
"任务 20250527 完成\n",
"任务 20250528 完成\n",
"任务 20250526 完成\n"
"任务 20250603 完成\n"
]
}
],

View File

@@ -94,17 +94,17 @@
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8674588 entries, 0 to 26945\n",
"Index: 8701511 entries, 0 to 26922\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 198.5+ MB\n",
"memory usage: 199.2+ MB\n",
"None\n",
"20250523\n",
"20250526\n"
"20250530\n",
"20250603\n"
]
}
],
@@ -150,12 +150,12 @@
"任务 20250715 完成\n",
"任务 20250714 完成\n",
"任务 20250711 完成\n",
"任务 20250709 完成\n",
"任务 20250710 完成\n",
"任务 20250709 完成\n",
"任务 20250708 完成\n",
"任务 20250707 完成\n",
"任务 20250704 完成\n",
"任务 20250703 完成\n",
"任务 20250704 完成\n",
"任务 20250702 完成\n",
"任务 20250701 完成\n",
"任务 20250630 完成\n",
@@ -174,15 +174,10 @@
"任务 20250611 完成\n",
"任务 20250610 完成\n",
"任务 20250609 完成\n",
"任务 20250606 完成\n",
"任务 20250605 完成\n",
"任务 20250603 完成\n",
"任务 20250606 完成\n",
"任务 20250604 完成\n",
"任务 20250530 完成\n",
"任务 20250529 完成\n",
"任务 20250528 完成\n",
"任务 20250527 完成\n",
"任务 20250526 完成\n"
"任务 20250603 完成\n"
]
}
],
@@ -253,58 +248,58 @@
"output_type": "stream",
"text": [
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
"0 603990.SH 20250530 14.96 3.7919 4.9168 \n",
"1 603666.SH 20250530 33.72 2.4954 4.7137 \n",
"2 001339.SZ 20250530 45.78 7.0710 7.0710 \n",
"3 002006.SZ 20250530 16.67 2.4368 3.4806 \n",
"4 603353.SH 20250530 15.21 1.3567 4.1316 \n",
"0 600845.SH 20250605 25.68 0.8243 2.5243 \n",
"1 000153.SZ 20250605 6.12 1.9588 2.7729 \n",
"2 002086.SZ 20250605 2.76 5.2365 6.0861 \n",
"3 300020.SZ 20250605 3.87 2.2399 2.5078 \n",
"4 605567.SH 20250605 9.90 2.5088 4.5825 \n",
"... ... ... ... ... ... \n",
"26918 002670.SZ 20250526 11.86 0.7662 2.3092 \n",
"26919 839946.BJ 20250526 9.67 4.8520 6.8863 \n",
"26920 688076.SH 20250526 49.59 5.9483 9.5054 \n",
"26921 300519.SZ 20250526 14.44 2.4601 3.8976 \n",
"26922 300468.SZ 20250526 18.15 6.8275 8.8410 \n",
"21540 000068.SZ 20250603 3.38 1.1289 2.0176 \n",
"21541 301135.SZ 20250603 25.84 4.8553 4.8553 \n",
"21542 603026.SH 20250603 33.42 0.4772 0.7542 \n",
"21543 002079.SZ 20250603 9.45 1.0524 1.3694 \n",
"21544 688335.SH 20250603 12.69 1.1169 2.2103 \n",
"\n",
" volume_ratio pe pe_ttm pb ps ps_ttm \\\n",
"0 0.65 NaN NaN 5.5665 9.8735 11.0137 \n",
"1 1.15 NaN NaN 3.2133 11.8990 10.3525 \n",
"2 1.22 91.7742 74.3709 5.3909 2.8419 2.7478 \n",
"3 0.81 58.9666 65.5384 3.6508 5.0124 5.4591 \n",
"4 1.10 90.1163 80.8019 1.5917 0.9380 0.9517 \n",
"... ... ... ... ... ... ... \n",
"26918 0.75 137.0866 106.8454 2.0610 15093.0115 14821.3328 \n",
"26919 0.55 NaN NaN 5.7695 2.5489 2.4978 \n",
"26920 3.15 27.5757 22.7263 3.7628 6.8632 6.0784 \n",
"26921 1.14 45.8504 44.3443 2.7022 8.6318 8.8737 \n",
"26922 1.08 142.9746 150.8960 5.8350 13.0086 13.6702 \n",
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
"0 1.47 32.6889 34.9249 6.1529 5.4276 5.7895 3.2460 \n",
"1 0.95 17.6853 18.7324 1.4099 0.6639 0.6930 1.7509 \n",
"2 1.00 NaN NaN 3.8361 15.8946 15.5013 0.0000 \n",
"3 0.66 NaN NaN 0.9763 5.6130 21.2702 0.0000 \n",
"4 0.99 242.4925 78.2360 1.8181 0.7875 0.7674 0.0000 \n",
"... ... ... ... ... ... ... ... \n",
"21540 1.02 259.4835 175.8911 4.9250 3.3696 3.4641 0.0000 \n",
"21541 0.98 68.9144 62.8352 2.0805 2.0868 1.9475 1.1264 \n",
"21542 0.95 412.5304 NaN 1.7468 1.2212 1.1453 0.1197 \n",
"21543 0.93 103.8909 74.2709 2.4969 1.3579 1.4180 0.4011 \n",
"21544 0.87 NaN NaN 1.6474 10.0514 8.7963 NaN \n",
"\n",
" dv_ratio dv_ttm total_share float_share free_share total_mv \\\n",
"0 0.0000 NaN 30628.2731 30628.2731 23620.5583 4.581990e+05 \n",
"1 0.0000 NaN 20649.0816 20649.0816 10931.3716 6.962870e+05 \n",
"2 0.2622 0.3498 25042.9670 7313.0995 7313.0995 1.146467e+06 \n",
"3 0.7749 0.7749 51979.3440 45516.0000 31865.7600 8.664957e+05 \n",
"4 0.6462 1.3036 17339.4000 17041.8000 5596.0000 2.637323e+05 \n",
"... ... ... ... ... ... ... \n",
"26918 0.0000 NaN 193508.4653 162335.0634 53860.6790 2.295010e+06 \n",
"26919 NaN NaN 13499.0443 9702.8595 6836.5574 1.305358e+05 \n",
"26920 NaN NaN 22487.0915 22487.0915 14071.9565 1.115135e+06 \n",
"26921 2.7701 2.7701 16000.0000 11410.0000 7201.9100 2.310400e+05 \n",
"26922 0.3306 0.3306 53064.9275 52979.4065 40913.5262 9.631284e+05 \n",
" dv_ttm total_share float_share free_share total_mv \\\n",
"0 3.2460 288380.3858 213374.0521 69678.6847 7.405608e+06 \n",
"1 1.6340 46477.3722 45294.3722 31996.8047 2.844415e+05 \n",
"2 NaN 195894.6500 151702.1291 130526.0564 5.406692e+05 \n",
"3 NaN 79467.7974 76663.9584 68475.6577 3.075404e+05 \n",
"4 NaN 20000.0000 20000.0000 10949.3050 1.980000e+05 \n",
"... ... ... ... ... ... \n",
"21540 NaN 100667.1464 100667.1464 56326.7969 3.402550e+05 \n",
"21541 1.1264 10195.2000 5558.9000 5558.9000 2.634440e+05 \n",
"21542 0.1197 20268.0000 20268.0000 12822.4285 6.773566e+05 \n",
"21543 0.4011 81013.9316 80937.8478 62203.4223 7.655817e+05 \n",
"21544 NaN 14803.4592 14803.4592 7480.3745 1.878559e+05 \n",
"\n",
" circ_mv is_st \n",
"0 4.581990e+05 False \n",
"1 6.962870e+05 False \n",
"2 3.347937e+05 False \n",
"3 7.587517e+05 False \n",
"4 2.592058e+05 False \n",
"0 5.479446e+06 False \n",
"1 2.772016e+05 False \n",
"2 4.186979e+05 False \n",
"3 2.966895e+05 True \n",
"4 1.980000e+05 False \n",
"... ... ... \n",
"26918 1.925294e+06 False \n",
"26919 9.382665e+04 False \n",
"26920 1.115135e+06 False \n",
"26921 1.647604e+05 False \n",
"26922 9.615762e+05 False \n",
"21540 3.402550e+05 False \n",
"21541 1.436420e+05 False \n",
"21542 6.773566e+05 False \n",
"21543 7.648627e+05 False \n",
"21544 1.878559e+05 False \n",
"\n",
"[26923 rows x 19 columns]\n"
"[21545 rows x 19 columns]\n"
]
}
],
@@ -329,58 +324,58 @@
"output_type": "stream",
"text": [
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
"16 300536.SZ 20250530 8.67 2.8854 3.5632 \n",
"78 000668.SZ 20250530 7.94 4.1498 7.0226 \n",
"112 002231.SZ 20250530 3.28 8.9944 10.0552 \n",
"147 300313.SZ 20250530 6.28 6.0110 12.4720 \n",
"158 603838.SH 20250530 5.73 0.9777 2.6542 \n",
"3 300020.SZ 20250605 3.87 2.2399 2.5078 \n",
"5 000506.SZ 20250605 8.11 11.2852 16.8442 \n",
"43 600243.SH 20250605 3.09 2.3385 2.8128 \n",
"48 002528.SZ 20250605 2.63 1.7748 3.7890 \n",
"78 300044.SZ 20250605 3.43 3.7959 3.9616 \n",
"... ... ... ... ... ... \n",
"26733 603828.SH 20250526 4.98 0.9734 1.9562 \n",
"26751 600599.SH 20250526 7.46 2.5125 6.3118 \n",
"26785 000820.SZ 20250526 3.02 13.6997 14.0750 \n",
"26885 002005.SZ 20250526 1.77 0.3214 0.5145 \n",
"26905 603869.SH 20250526 6.15 0.3000 0.7946 \n",
"21429 600243.SH 20250603 3.06 3.3544 4.0348 \n",
"21434 002528.SZ 20250603 2.52 1.4622 3.1216 \n",
"21464 300044.SZ 20250603 3.45 4.3894 4.5810 \n",
"21494 300097.SZ 20250603 4.89 2.6755 3.1205 \n",
"21515 600200.SH 20250603 2.59 6.4745 7.8264 \n",
"\n",
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
"16 0.55 NaN NaN 4.9112 10.9775 12.1174 0.0 \n",
"78 1.07 NaN NaN 1.6212 8.7361 5.6924 0.0 \n",
"112 0.74 NaN NaN 4.3227 3.9056 5.3690 0.0 \n",
"147 0.92 NaN NaN NaN 14.2840 13.5826 0.0 \n",
"158 1.06 NaN NaN 1.9039 6.4291 5.8279 0.0 \n",
"3 0.66 NaN NaN 0.9763 5.6130 21.2702 0.0 \n",
"5 5.96 NaN NaN 14.2472 22.6112 19.7704 0.0 \n",
"43 0.52 NaN NaN 2.1216 5.7313 5.8761 0.0 \n",
"48 1.08 NaN NaN 17.3769 3.3364 4.0382 0.0 \n",
"78 1.05 NaN NaN 25.1987 18.2860 27.0836 0.0 \n",
"... ... ... ... ... ... ... ... \n",
"26733 0.56 345.783 1670.8958 3.9261 1.2065 1.3013 0.0 \n",
"26751 0.68 NaN NaN 11.2319 3.8238 3.9211 0.0 \n",
"26785 2.40 NaN NaN 12.4588 15.8309 20.1399 0.0 \n",
"26885 0.48 NaN NaN 15.9120 4.2066 4.2221 0.0 \n",
"26905 1.00 149.594 167.2545 0.8344 4.6640 5.0668 0.0 \n",
"21429 0.68 NaN NaN 2.1010 5.6757 5.8190 0.0 \n",
"21434 0.77 NaN NaN 16.6502 3.1969 3.8693 0.0 \n",
"21464 1.26 NaN NaN 25.3456 18.3927 27.2415 0.0 \n",
"21494 1.55 NaN NaN 3.0435 3.6740 4.2734 0.0 \n",
"21515 0.79 26.1689 NaN 1.0523 1.1539 1.5214 0.0 \n",
"\n",
" dv_ttm total_share float_share free_share total_mv \\\n",
"16 NaN 29328.8133 29325.3240 23747.3240 254280.8113 \n",
"78 NaN 14684.1890 14684.1890 8677.2104 116592.4607 \n",
"112 NaN 34685.0017 29481.8767 26371.6067 113766.8056 \n",
"147 NaN 31297.7396 19735.2789 9511.5479 196549.8047 \n",
"158 NaN 32001.6000 32001.6000 11788.1468 183369.1680 \n",
"... ... ... ... ... ... \n",
"26733 NaN 59596.0158 59593.9625 29654.2988 296788.1587 \n",
"26751 NaN 16600.0000 16600.0000 6607.7948 123836.0000 \n",
"26785 NaN 64655.5179 29696.6877 28904.9696 195259.6641 \n",
"26885 NaN 175242.4858 175199.3158 109452.0915 310179.1999 \n",
"26905 NaN 50450.0508 50450.0508 19045.9689 310267.8124 \n",
" dv_ttm total_share float_share free_share total_mv circ_mv \\\n",
"3 NaN 79467.7974 76663.9584 68475.6577 307540.3759 296689.5190 \n",
"5 NaN 92901.7761 92867.0961 62218.8027 753433.4042 753152.1494 \n",
"43 NaN 43885.0000 43885.0000 36485.0000 135604.6500 135604.6500 \n",
"48 NaN 119867.5082 104974.0608 49171.2582 315251.5466 276081.7799 \n",
"78 NaN 76386.9228 76375.7508 73182.1277 262007.1452 261968.8252 \n",
"... ... ... ... ... ... ... \n",
"21429 NaN 43885.0000 43885.0000 36485.0000 134288.1000 134288.1000 \n",
"21434 NaN 119867.5082 104974.0608 49171.2582 302066.1207 264534.6332 \n",
"21464 NaN 76386.9228 76375.7508 73182.1277 263534.8837 263496.3403 \n",
"21494 NaN 28854.9669 27000.9948 23150.5534 141100.7881 132034.8646 \n",
"21515 NaN 71215.1832 71087.9480 58808.3718 184447.3245 184117.7853 \n",
"\n",
" circ_mv is_st \n",
"16 254250.5591 True \n",
"78 116592.4607 True \n",
"112 96700.5556 True \n",
"147 123937.5515 True \n",
"158 183369.1680 True \n",
"... ... ... \n",
"26733 296777.9333 True \n",
"26751 123836.0000 True \n",
"26785 89683.9969 True \n",
"26885 310102.7890 True \n",
"26905 310267.8124 True \n",
" is_st \n",
"3 True \n",
"5 True \n",
"43 True \n",
"48 True \n",
"78 True \n",
"... ... \n",
"21429 True \n",
"21434 True \n",
"21464 True \n",
"21494 True \n",
"21515 True \n",
"\n",
"[944 rows x 19 columns]\n"
"[753 rows x 19 columns]\n"
]
}
],
@@ -430,7 +425,7 @@
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8701511 entries, 0 to 26922\n",
"Index: 8723056 entries, 0 to 21544\n",
"Data columns (total 3 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
@@ -438,7 +433,7 @@
" 1 trade_date object\n",
" 2 is_st bool \n",
"dtypes: bool(1), object(2)\n",
"memory usage: 207.5+ MB\n",
"memory usage: 208.0+ MB\n",
"None\n"
]
}

File diff suppressed because it is too large Load Diff

View File

@@ -34,17 +34,17 @@
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8507431 entries, 0 to 25615\n",
"Index: 8533029 entries, 0 to 25597\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 194.7+ MB\n",
"memory usage: 195.3+ MB\n",
"None\n",
"20250523\n",
"start_date: 20250526\n"
"20250530\n",
"start_date: 20250603\n"
]
}
],
@@ -84,10 +84,10 @@
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250717 完成\n",
"任务 20250718 完成\n",
"任务 20250715 完成\n",
"任务 20250717 完成\n",
"任务 20250716 完成\n",
"任务 20250715 完成\n",
"任务 20250714 完成\n",
"任务 20250711 完成\n",
"任务 20250710 完成\n",
@@ -117,12 +117,7 @@
"任务 20250606 完成\n",
"任务 20250605 完成\n",
"任务 20250604 完成\n",
"任务 20250603 完成\n",
"任务 20250530 完成\n",
"任务 20250529 完成\n",
"任务 20250528 完成\n",
"任务 20250527 完成\n",
"任务 20250526 完成\n"
"任务 20250603 完成\n"
]
}
],

View File

@@ -34,23 +34,23 @@
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"2365 300067.SZ 20250509\n",
"2364 300066.SZ 20250509\n",
"2363 300065.SZ 20250509\n",
"2374 300076.SZ 20250509\n",
"7113 920819.BJ 20250509\n",
"4763 600259.SH 20250530\n",
"4764 600261.SH 20250530\n",
"4765 600262.SH 20250530\n",
"4754 600248.SH 20250530\n",
"7116 900957.BJ 20250530\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 10457633 entries, 0 to 7113\n",
"Index: 10564598 entries, 0 to 106964\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 239.4+ MB\n",
"memory usage: 241.8+ MB\n",
"None\n",
"20250509\n",
"20250512\n"
"20250530\n",
"20250603\n"
]
}
],
@@ -91,55 +91,40 @@
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250717 完成\n",
"任务 20250718 完成\n",
"任务 20250717 完成\n",
"任务 20250716 完成\n",
"任务 20250715 完成\n",
"任务 20250714 完成\n",
"任务 20250711 完成\n",
"任务 20250710 完成\n",
"任务 20250709 完成\n",
"任务 20250707 完成\n",
"任务 20250708 完成\n",
"任务 20250707 完成\n",
"任务 20250704 完成\n",
"任务 20250703 完成\n",
"任务 20250702 完成\n",
"任务 20250701 完成\n",
"任务 20250627 完成\n",
"任务 20250630 完成\n",
"任务 20250626 完成\n",
"任务 20250627 完成\n",
"任务 20250625 完成\n",
"任务 20250626 完成\n",
"任务 20250624 完成\n",
"任务 20250623 完成\n",
"任务 20250620 完成\n",
"任务 20250619 完成\n",
"任务 20250617 完成\n",
"任务 20250618 完成\n",
"任务 20250613 完成\n",
"任务 20250616 完成\n",
"任务 20250613 完成\n",
"任务 20250612 完成\n",
"任务 20250611 完成\n",
"任务 20250610 完成\n",
"任务 20250609 完成\n",
"任务 20250606 完成\n",
"任务 20250605 完成\n",
"任务 20250604 完成\n",
"任务 20250603 完成\n",
"任务 20250530 完成\n",
"任务 20250529 完成\n",
"任务 20250528 完成\n",
"任务 20250527 完成\n",
"任务 20250526 完成\n",
"任务 20250523 完成\n",
"任务 20250521 完成\n",
"任务 20250522 完成\n",
"任务 20250519 完成\n",
"任务 20250520 完成\n",
"任务 20250516 完成\n",
"任务 20250515 完成\n",
"任务 20250514 完成\n",
"任务 20250513 完成\n",
"任务 20250512 完成\n"
"任务 20250604 完成\n"
]
}
],
@@ -191,201 +176,58 @@
"output_type": "stream",
"text": [
"[ trade_date ts_code up_limit down_limit\n",
"0 20250530 000001.SZ 12.61 10.31\n",
"1 20250530 000002.SZ 7.37 6.03\n",
"2 20250530 000004.SZ 10.38 9.40\n",
"3 20250530 000006.SZ 7.69 6.29\n",
"4 20250530 000007.SZ 8.61 7.05\n",
"0 20250606 000001.SZ 12.84 10.50\n",
"1 20250606 000002.SZ 7.30 5.98\n",
"2 20250606 000004.SZ 10.35 9.37\n",
"3 20250606 000006.SZ 7.48 6.12\n",
"4 20250606 000007.SZ 9.06 7.42\n",
"... ... ... ... ...\n",
"7136 20250530 920445.BJ 13.61 7.33\n",
"7137 20250530 920489.BJ 32.64 17.58\n",
"7138 20250530 920682.BJ 13.81 7.45\n",
"7139 20250530 920799.BJ 78.92 42.50\n",
"7140 20250530 920819.BJ 5.90 3.18\n",
"7144 20250606 920445.BJ 13.36 7.20\n",
"7145 20250606 920489.BJ 31.46 16.94\n",
"7146 20250606 920682.BJ 13.14 7.08\n",
"7147 20250606 920799.BJ 77.80 41.90\n",
"7148 20250606 920819.BJ 5.70 3.08\n",
"\n",
"[7141 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20250529 000001.SZ 12.68 10.38\n",
"1 20250529 000002.SZ 7.35 6.01\n",
"2 20250529 000004.SZ 10.44 9.44\n",
"3 20250529 000006.SZ 7.78 6.36\n",
"4 20250529 000007.SZ 8.48 6.94\n",
"[7149 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20250605 000001.SZ 13.02 10.66\n",
"1 20250605 000002.SZ 7.28 5.96\n",
"2 20250605 000004.SZ 10.63 9.61\n",
"3 20250605 000006.SZ 7.41 6.07\n",
"4 20250605 000007.SZ 9.19 7.52\n",
"... ... ... ... ...\n",
"7132 20250529 920445.BJ 13.28 7.16\n",
"7133 20250529 920489.BJ 31.73 17.09\n",
"7134 20250529 920682.BJ 13.55 7.31\n",
"7135 20250529 920799.BJ 73.17 39.41\n",
"7136 20250529 920819.BJ 5.86 3.16\n",
"7143 20250605 920445.BJ 13.49 7.27\n",
"7144 20250605 920489.BJ 31.00 16.70\n",
"7145 20250605 920682.BJ 13.22 7.12\n",
"7146 20250605 920799.BJ 76.24 41.06\n",
"7147 20250605 920819.BJ 5.70 3.08\n",
"\n",
"[7137 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20250528 000001.SZ 12.64 10.34\n",
"1 20250528 000002.SZ 7.34 6.00\n",
"2 20250528 000004.SZ 10.52 9.52\n",
"3 20250528 000006.SZ 7.96 6.52\n",
"4 20250528 000007.SZ 8.51 6.97\n",
"[7148 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20250603 000001.SZ 12.72 10.40\n",
"1 20250603 000002.SZ 7.30 5.98\n",
"2 20250603 000004.SZ 10.90 9.86\n",
"3 20250603 000006.SZ 7.62 6.24\n",
"4 20250603 000007.SZ 8.65 7.07\n",
"... ... ... ... ...\n",
"7130 20250528 920445.BJ 13.50 7.28\n",
"7131 20250528 920489.BJ 32.70 17.62\n",
"7132 20250528 920682.BJ 13.71 7.39\n",
"7133 20250528 920799.BJ 73.60 39.64\n",
"7134 20250528 920819.BJ 5.87 3.17\n",
"7137 20250603 920445.BJ 13.18 7.10\n",
"7138 20250603 920489.BJ 31.25 16.83\n",
"7139 20250603 920682.BJ 13.20 7.12\n",
"7140 20250603 920799.BJ 76.31 41.09\n",
"7141 20250603 920819.BJ 5.72 3.08\n",
"\n",
"[7135 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20250527 000001.SZ 12.56 10.28\n",
"1 20250527 000002.SZ 7.29 5.97\n",
"2 20250527 000004.SZ 10.02 9.06\n",
"3 20250527 000006.SZ 7.58 6.20\n",
"4 20250527 000007.SZ 8.37 6.85\n",
"[7142 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20250604 000001.SZ 12.99 10.63\n",
"1 20250604 000002.SZ 7.24 5.92\n",
"2 20250604 000004.SZ 10.77 9.75\n",
"3 20250604 000006.SZ 7.41 6.07\n",
"4 20250604 000007.SZ 8.88 7.26\n",
"... ... ... ... ...\n",
"7128 20250527 920445.BJ 13.28 7.16\n",
"7129 20250527 920489.BJ 33.96 18.30\n",
"7130 20250527 920682.BJ 13.59 7.33\n",
"7131 20250527 920799.BJ 73.38 39.52\n",
"7132 20250527 920819.BJ 5.55 2.99\n",
"7140 20250604 920445.BJ 13.29 7.17\n",
"7141 20250604 920489.BJ 31.18 16.80\n",
"7142 20250604 920682.BJ 13.26 7.14\n",
"7143 20250604 920799.BJ 76.93 41.43\n",
"7144 20250604 920819.BJ 5.73 3.09\n",
"\n",
"[7133 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20250526 000001.SZ 12.61 10.31\n",
"1 20250526 000002.SZ 7.29 5.97\n",
"2 20250526 000004.SZ 9.54 8.64\n",
"3 20250526 000006.SZ 7.44 6.08\n",
"4 20250526 000007.SZ 8.66 7.08\n",
"... ... ... ... ...\n",
"7130 20250526 920445.BJ 12.88 6.94\n",
"7131 20250526 920489.BJ 31.96 17.22\n",
"7132 20250526 920682.BJ 12.77 6.89\n",
"7133 20250526 920799.BJ 72.35 38.97\n",
"7134 20250526 920819.BJ 5.48 2.96\n",
"\n",
"[7135 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20250523 000001.SZ 12.71 10.40\n",
"1 20250523 000002.SZ 7.34 6.00\n",
"2 20250523 000004.SZ 9.87 8.93\n",
"3 20250523 000006.SZ 7.54 6.17\n",
"4 20250523 000007.SZ 8.80 7.20\n",
"... ... ... ... ...\n",
"7130 20250523 920445.BJ 13.01 7.01\n",
"7131 20250523 920489.BJ 30.58 16.48\n",
"7132 20250523 920682.BJ 12.83 6.91\n",
"7133 20250523 920799.BJ 74.10 39.90\n",
"7134 20250523 920819.BJ 5.56 3.00\n",
"\n",
"[7135 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20250521 000001.SZ 12.53 10.25\n",
"1 20250521 000002.SZ 7.46 6.10\n",
"2 20250521 000004.SZ 9.47 8.57\n",
"3 20250521 000006.SZ 7.61 6.23\n",
"4 20250521 000007.SZ 8.28 6.78\n",
"... ... ... ... ...\n",
"7129 20250521 920445.BJ 14.02 7.56\n",
"7130 20250521 920489.BJ 32.89 17.71\n",
"7131 20250521 920682.BJ 13.83 7.45\n",
"7132 20250521 920799.BJ 77.87 41.93\n",
"7133 20250521 920819.BJ 5.95 3.21\n",
"\n",
"[7134 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20250522 000001.SZ 12.63 10.33\n",
"1 20250522 000002.SZ 7.44 6.08\n",
"2 20250522 000004.SZ 9.94 9.00\n",
"3 20250522 000006.SZ 7.43 6.08\n",
"4 20250522 000007.SZ 8.43 6.89\n",
"... ... ... ... ...\n",
"7130 20250522 920445.BJ 13.68 7.38\n",
"7131 20250522 920489.BJ 32.95 17.75\n",
"7132 20250522 920682.BJ 13.41 7.23\n",
"7133 20250522 920799.BJ 77.42 41.70\n",
"7134 20250522 920819.BJ 5.81 3.13\n",
"\n",
"[7135 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20250519 000001.SZ 12.52 10.24\n",
"1 20250519 000002.SZ 7.45 6.09\n",
"2 20250519 000004.SZ 8.68 7.86\n",
"3 20250519 000006.SZ 7.17 5.87\n",
"4 20250519 000007.SZ 8.05 6.59\n",
"... ... ... ... ...\n",
"7128 20250519 920445.BJ 13.96 7.52\n",
"7129 20250519 920489.BJ 30.29 16.31\n",
"7130 20250519 920682.BJ 13.35 7.19\n",
"7131 20250519 920799.BJ 77.87 41.93\n",
"7132 20250519 920819.BJ 5.91 3.19\n",
"\n",
"[7133 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20250520 000001.SZ 12.51 10.23\n",
"1 20250520 000002.SZ 7.48 6.12\n",
"2 20250520 000004.SZ 9.02 8.16\n",
"3 20250520 000006.SZ 7.66 6.26\n",
"4 20250520 000007.SZ 8.18 6.70\n",
"... ... ... ... ...\n",
"7128 20250520 920445.BJ 13.97 7.53\n",
"7129 20250520 920489.BJ 31.75 17.11\n",
"7130 20250520 920682.BJ 13.23 7.13\n",
"7131 20250520 920799.BJ 77.83 41.91\n",
"7132 20250520 920819.BJ 5.86 3.16\n",
"\n",
"[7133 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20250516 000001.SZ 12.53 10.25\n",
"1 20250516 000002.SZ 7.47 6.11\n",
"2 20250516 000004.SZ 9.14 8.27\n",
"3 20250516 000006.SZ 7.17 5.87\n",
"4 20250516 000007.SZ 8.03 6.57\n",
"... ... ... ... ...\n",
"7125 20250516 920445.BJ 14.80 7.98\n",
"7126 20250516 920489.BJ 30.31 16.33\n",
"7127 20250516 920682.BJ 13.71 7.39\n",
"7128 20250516 920799.BJ 78.03 42.03\n",
"7129 20250516 920819.BJ 5.74 3.10\n",
"\n",
"[7130 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20250515 000001.SZ 12.57 10.29\n",
"1 20250515 000002.SZ 7.58 6.20\n",
"2 20250515 000004.SZ 8.90 8.06\n",
"3 20250515 000006.SZ 7.26 5.94\n",
"4 20250515 000007.SZ 8.01 6.55\n",
"... ... ... ... ...\n",
"7119 20250515 920445.BJ 14.80 7.98\n",
"7120 20250515 920489.BJ 31.12 16.76\n",
"7121 20250515 920682.BJ 16.96 9.14\n",
"7122 20250515 920799.BJ 82.13 44.23\n",
"7123 20250515 920819.BJ 5.59 3.01\n",
"\n",
"[7124 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20250514 000001.SZ 12.42 10.16\n",
"1 20250514 000002.SZ 7.55 6.17\n",
"2 20250514 000004.SZ 8.96 8.10\n",
"3 20250514 000006.SZ 7.14 5.84\n",
"4 20250514 000007.SZ 8.02 6.56\n",
"... ... ... ... ...\n",
"7117 20250514 920445.BJ 14.04 7.56\n",
"7118 20250514 920489.BJ 31.42 16.92\n",
"7119 20250514 920682.BJ 17.23 9.29\n",
"7120 20250514 920799.BJ 78.22 42.12\n",
"7121 20250514 920819.BJ 5.59 3.01\n",
"\n",
"[7122 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20250513 000001.SZ 12.28 10.04\n",
"1 20250513 000002.SZ 7.54 6.17\n",
"2 20250513 000004.SZ 8.53 7.71\n",
"3 20250513 000006.SZ 7.12 5.82\n",
"4 20250513 000007.SZ 7.82 6.40\n",
"... ... ... ... ...\n",
"7116 20250513 920445.BJ 13.36 7.20\n",
"7117 20250513 920489.BJ 31.07 16.73\n",
"7118 20250513 920682.BJ 16.73 9.01\n",
"7119 20250513 920799.BJ 80.47 43.33\n",
"7120 20250513 920819.BJ 5.60 3.02\n",
"\n",
"[7121 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20250512 000001.SZ 12.27 10.04\n",
"1 20250512 000002.SZ 7.46 6.10\n",
"2 20250512 000004.SZ 8.12 7.34\n",
"3 20250512 000006.SZ 7.08 5.80\n",
"4 20250512 000007.SZ 7.81 6.39\n",
"... ... ... ... ...\n",
"7112 20250512 920445.BJ 13.19 7.11\n",
"7113 20250512 920489.BJ 30.55 16.45\n",
"7114 20250512 920682.BJ 16.34 8.80\n",
"7115 20250512 920799.BJ 78.13 42.07\n",
"7116 20250512 920819.BJ 5.57 3.01\n",
"\n",
"[7117 rows x 4 columns]]\n"
"[7145 rows x 4 columns]]\n"
]
}
],

View File

@@ -16,7 +16,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"e:\\PyProject\\NewStock\\main\\factor\n"
"/mnt/d/PyProject/NewStock\n"
]
}
],
@@ -62,8 +62,8 @@
"cyq perf\n",
"left merge on ['ts_code', 'trade_date']\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 5123740 entries, 0 to 5123739\n",
"Data columns (total 31 columns):\n",
"RangeIndex: 8713571 entries, 0 to 8713570\n",
"Data columns (total 33 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object \n",
@@ -74,57 +74,248 @@
" 5 low float64 \n",
" 6 vol float64 \n",
" 7 pct_chg float64 \n",
" 8 turnover_rate float64 \n",
" 9 pe_ttm float64 \n",
" 10 circ_mv float64 \n",
" 11 volume_ratio float64 \n",
" 12 is_st bool \n",
" 13 up_limit float64 \n",
" 14 down_limit float64 \n",
" 15 buy_sm_vol float64 \n",
" 16 sell_sm_vol float64 \n",
" 17 buy_lg_vol float64 \n",
" 18 sell_lg_vol float64 \n",
" 19 buy_elg_vol float64 \n",
" 20 sell_elg_vol float64 \n",
" 21 net_mf_vol float64 \n",
" 22 his_low float64 \n",
" 23 his_high float64 \n",
" 24 cost_5pct float64 \n",
" 25 cost_15pct float64 \n",
" 26 cost_50pct float64 \n",
" 27 cost_85pct float64 \n",
" 28 cost_95pct float64 \n",
" 29 weight_avg float64 \n",
" 30 winner_rate float64 \n",
"dtypes: bool(1), datetime64[ns](1), float64(28), object(1)\n",
"memory usage: 1.2+ GB\n",
"None\n",
"['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'pct_chg', 'turnover_rate', 'pe_ttm', 'circ_mv', 'volume_ratio', 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct', 'cost_50pct', 'cost_85pct', 'cost_95pct', 'weight_avg', 'winner_rate']\n",
" 8 amount float64 \n",
" 9 turnover_rate float64 \n",
" 10 pe_ttm float64 \n",
" 11 circ_mv float64 \n",
" 12 total_mv float64 \n",
" 13 volume_ratio float64 \n",
" 14 is_st bool \n",
" 15 up_limit float64 \n",
" 16 down_limit float64 \n",
" 17 buy_sm_vol float64 \n",
" 18 sell_sm_vol float64 \n",
" 19 buy_lg_vol float64 \n",
" 20 sell_lg_vol float64 \n",
" 21 buy_elg_vol float64 \n",
" 22 sell_elg_vol float64 \n",
" 23 net_mf_vol float64 \n",
" 24 his_low float64 \n",
" 25 his_high float64 \n",
" 26 cost_5pct float64 \n",
" 27 cost_15pct float64 \n",
" 28 cost_50pct float64 \n",
" 29 cost_85pct float64 \n",
" 30 cost_95pct float64 \n",
" 31 weight_avg float64 \n",
" 32 winner_rate float64 \n",
"dtypes: bool(1), datetime64[ns](1), float64(30), object(1)\n",
"memory usage: 2.1+ GB\n",
"None\n"
]
}
],
"source": [
"from main.utils.utils import read_and_merge_h5_data\n",
"\n",
"print('daily data')\n",
"df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/daily_data.h5', key='daily_data',\n",
" columns=['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'pct_chg', 'amount'],\n",
" df=None)\n",
"\n",
"print('daily basic')\n",
"df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/daily_basic.h5', key='daily_basic',\n",
" columns=['ts_code', 'trade_date', 'turnover_rate', 'pe_ttm', 'circ_mv', 'total_mv', 'volume_ratio',\n",
" 'is_st'], df=df, join='inner')\n",
"\n",
"print('stk limit')\n",
"df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/stk_limit.h5', key='stk_limit',\n",
" columns=['ts_code', 'trade_date', 'pre_close', 'up_limit', 'down_limit'],\n",
" df=df)\n",
"print('money flow')\n",
"df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/money_flow.h5', key='money_flow',\n",
" columns=['ts_code', 'trade_date', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol',\n",
" 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol'],\n",
" df=df)\n",
"print('cyq perf')\n",
"df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/cyq_perf.h5', key='cyq_perf',\n",
" columns=['ts_code', 'trade_date', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct',\n",
" 'cost_50pct',\n",
" 'cost_85pct', 'cost_95pct', 'weight_avg', 'winner_rate'],\n",
" df=df)\n",
"print(df.info())"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "0acb6625",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'pct_chg', 'amount', 'turnover_rate', 'pe_ttm', 'circ_mv', 'total_mv', 'volume_ratio', 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct', 'cost_50pct', 'cost_85pct', 'cost_95pct', 'weight_avg', 'winner_rate']\n"
]
}
],
"source": [
"\n",
"origin_columns = df.columns.tolist()\n",
"origin_columns = [col for col in origin_columns if 'cyq' not in col]\n",
"print(origin_columns)\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "820a6b50",
"metadata": {},
"outputs": [],
"source": [
"fina_indicator_df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/fina_indicator.h5', key='fina_indicator',\n",
" columns=['ts_code', 'ann_date', 'undist_profit_ps', 'ocfps', 'bps'],\n",
" df=None)\n",
"cashflow_df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/cashflow.h5', key='cashflow',\n",
" columns=['ts_code', 'ann_date', 'n_cashflow_act'],\n",
" df=None)\n",
"balancesheet_df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/balancesheet.h5', key='balancesheet',\n",
" columns=['ts_code', 'ann_date', 'money_cap', 'total_liab'],\n",
" df=None)\n",
"top_list_df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/top_list.h5', key='top_list',\n",
" columns=['ts_code', 'trade_date', 'reason'],\n",
" df=None)\n",
"\n",
"top_list_df = top_list_df.sort_values(by='trade_date', ascending=False).drop_duplicates(subset=['ts_code', 'trade_date'], keep='first').sort_values(by='trade_date')\n",
"\n",
"stk_holdertrade_df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/stk_holdertrade.h5', key='stk_holdertrade',\n",
" columns=['ts_code', 'ann_date', 'in_de', 'change_ratio'],\n",
" df=None)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "903469a7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"✅ 成功从 Redis Hash 'concept_stocks_daily_lists_pickle' 读取 1794 条每日概念股票数据。\n"
]
}
],
"source": [
"import redis\n",
"import pickle\n",
"from datetime import date, datetime\n",
"\n",
"# --- 配置 Redis 连接 ---\n",
"REDIS_HOST = '140.143.91.66'\n",
"REDIS_PORT = 6389\n",
"REDIS_DB = 0\n",
"\n",
"# --- 定义 Redis 键名 ---\n",
"HASH_KEY = \"concept_stocks_daily_lists_pickle\" # 区分之前的 JSON 版本\n",
"MAX_DATE_KEY = \"concept_stocks_max_date_pickle\" # 区分之前的 JSON 版本\n",
"\n",
"concept_dict = {}\n",
"\n",
"# --- 连接 Redis ---\n",
"try:\n",
" r = redis.StrictRedis(host=REDIS_HOST, port=REDIS_PORT, db=REDIS_DB, password='Redis520102')\n",
"\n",
" all_data_from_redis = r.hgetall(HASH_KEY) # 返回的是字典,键是字节,值是字节\n",
" \n",
" if all_data_from_redis:\n",
" for date_bytes, stocks_bytes in all_data_from_redis.items(): # 将变量名改为 date_bytes 更清晰\n",
" try:\n",
" # *** 修正点:将日期字节解码为字符串 ***\n",
" date_str = date_bytes.decode('utf-8') \n",
" date_obj = datetime.strptime(date_str, '%Y%m%d').date()\n",
" \n",
" stocks_list = pickle.loads(stocks_bytes)\n",
" concept_dict[date_obj] = stocks_list\n",
" except (ValueError, pickle.UnpicklingError) as e:\n",
" print(f\"⚠️ 警告: 解析 Redis 数据时出错 (日期键: '{date_bytes.decode('utf-8', errors='ignore')}'),跳过此条数据: {e}\") # 打印警告时也解码一下\n",
" print(f\"✅ 成功从 Redis Hash '{HASH_KEY}' 读取 {len(concept_dict)} 条每日概念股票数据。\")\n",
" else:\n",
" print(f\" Redis Hash '{HASH_KEY}' 中没有找到任何数据。\")\n",
"\n",
"except redis.exceptions.ConnectionError as e:\n",
" print(f\"❌ 错误: 无法连接到 Redis 服务器,请检查 Redis 是否正在运行或连接配置: {e}\")\n",
"except Exception as e:\n",
" print(f\"❌ 从 Redis 读取数据时发生未知错误: {e}\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "afb8da3d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"4566757\n",
"开始生成概念相关因子...\n",
"开始计算概念内截面排序因子,基于: ['pct_chg', 'turnover_rate', 'volume_ratio']\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Ranking Features in Concepts: 100%|██████████| 3/3 [00:00<00:00, 15.82it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"概念相关因子生成完毕。\n",
"4566757\n",
"开始计算股东增减持因子...\n",
"警告: 'in_de' 列中存在未映射的值,可能导致 _direction 列出现NaN。\n",
"股东增减持因子计算完成。\n",
"Calculating cat_senti_mom_vol_spike...\n",
"Finished cat_senti_mom_vol_spike.\n",
"Calculating cat_senti_pre_breakout...\n",
"Calculating atr_10 as it's missing...\n",
"Calculating atr_40 as it's missing...\n",
"Finished cat_senti_pre_breakout.\n",
"计算因子 ts_turnover_rate_acceleration_5_20\n",
"计算因子 ts_vol_sustain_10_30\n",
"计算因子 cs_amount_outlier_10\n",
"计算因子 ts_ff_to_total_turnover_ratio\n",
"计算因子 ts_price_volume_trend_coherence_5_20\n",
"计算因子 ts_ff_turnover_rate_surge_10\n",
"使用 'ann_date' 作为财务数据生效日期。\n",
"警告: 从 financial_data_subset 中移除了 366 行,因为其 'ts_code' 或 'ann_date' 列存在空值。\n",
"使用 'ann_date' 作为财务数据生效日期。\n",
"警告: 从 financial_data_subset 中移除了 366 行,因为其 'ts_code' 或 'ann_date' 列存在空值。\n",
"开始计算因子: AR, BR (原地修改)...\n",
"因子 AR, BR 计算成功。\n",
"因子 AR, BR 计算流程结束。\n",
"使用 'ann_date' 作为财务数据生效日期。\n",
"使用 'ann_date' 作为财务数据生效日期。\n",
"使用 'ann_date' 作为财务数据生效日期。\n",
"使用 'ann_date' 作为财务数据生效日期。\n",
"警告: 从 financial_data_subset 中移除了 366 行,因为其 'ts_code' 或 'ann_date' 列存在空值。\n",
"计算 BBI...\n",
"--- 计算日级别偏离度 (使用 pct_chg) ---\n",
"--- 计算日级别动量基准 (使用 pct_chg) ---\n",
"日级别动量基准计算完成 (使用 pct_chg)。\n",
"日级别偏离度计算完成 (使用 pct_chg)。\n",
"--- 计算日级别行业偏离度 (使用 pct_chg 和行业基准) ---\n",
"--- 计算日级别行业动量基准 (使用 pct_chg 和 cat_l2_code) ---\n",
"错误: 计算日级别行业动量基准需要以下列: ['pct_chg', 'cat_l2_code', 'trade_date', 'ts_code']。\n",
"错误: 计算日级别行业偏离度需要以下列: ['pct_chg', 'daily_industry_positive_benchmark', 'daily_industry_negative_benchmark']。请先运行 daily_industry_momentum_benchmark(df)。\n",
"Index(['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol',\n",
" 'pct_chg', 'turnover_rate', 'pe_ttm', 'circ_mv', 'volume_ratio',\n",
" 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol', 'sell_sm_vol',\n",
" 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol', 'sell_elg_vol',\n",
" 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct',\n",
" 'cost_50pct', 'cost_85pct', 'cost_95pct', 'weight_avg', 'winner_rate',\n",
" 'lg_elg_net_buy_vol', 'flow_lg_elg_intensity', 'sm_net_buy_vol',\n",
" 'flow_divergence_diff', 'flow_divergence_ratio', 'total_buy_vol',\n",
" 'lg_elg_buy_prop', 'flow_struct_buy_change',\n",
" 'lg_elg_net_buy_vol_change', 'flow_lg_elg_accel',\n",
" 'chip_concentration_range', 'chip_skewness', 'floating_chip_proxy',\n",
" 'cost_support_15pct_change', 'cat_winner_price_zone',\n",
" 'flow_chip_consistency', 'profit_taking_vs_absorb', '_is_positive',\n",
" '_is_negative', 'cat_is_positive', '_pos_returns', '_neg_returns',\n",
" '_pos_returns_sq', '_neg_returns_sq', 'upside_vol', 'downside_vol',\n",
" 'vol_ratio', 'return_skew', 'return_kurtosis', 'volume_change_rate',\n",
" 'pct_chg', 'amount', 'turnover_rate',\n",
" ...\n",
" 'cat_volume_breakout', 'turnover_deviation', 'cat_turnover_spike',\n",
" 'avg_volume_ratio', 'cat_volume_ratio_breakout', 'vol_spike',\n",
" 'vol_std_5', 'atr_14', 'atr_6', 'obv'],\n",
" dtype='object')\n",
" dtype='object', length=103)\n",
"Calculating senti_strong_inflow...\n",
"Finished senti_strong_inflow.\n",
"Calculating lg_flow_mom_corr_20_60...\n",
"Finished lg_flow_mom_corr_20_60.\n",
"Calculating lg_buy_consolidation_20...\n",
"Finished lg_buy_consolidation_20.\n",
"Calculating lg_flow_accel...\n",
"Finished lg_flow_accel.\n",
"Calculating profit_pressure...\n",
@@ -155,58 +346,73 @@
"Finished vol_wgt_hist_pos_20.\n",
"Calculating vol_adj_roc_20...\n",
"Finished vol_adj_roc_20.\n",
"Calculating intraday_lg_flow_corr_20 (Placeholder - complex implementation)...\n",
"Finished intraday_lg_flow_corr_20 (Placeholder).\n",
"Calculating cap_neutral_cost_metric (Placeholder - requires statsmodels)...\n",
"Finished cap_neutral_cost_metric (Placeholder).\n"
"Calculating cs_rank_net_lg_flow_val...\n",
"Finished cs_rank_net_lg_flow_val.\n",
"Calculating cs_rank_flow_divergence...\n",
"Finished cs_rank_flow_divergence.\n",
"Calculating cs_rank_ind_adj_lg_flow...\n",
"Error calculating cs_rank_ind_adj_lg_flow: Missing 'cat_l2_code' column. Assigning NaN.\n",
"Calculating cs_rank_elg_buy_ratio...\n",
"Finished cs_rank_elg_buy_ratio.\n",
"Calculating cs_rank_rel_profit_margin...\n",
"Finished cs_rank_rel_profit_margin.\n",
"Calculating cs_rank_cost_breadth...\n",
"Finished cs_rank_cost_breadth.\n",
"Calculating cs_rank_dist_to_upper_cost...\n",
"Finished cs_rank_dist_to_upper_cost.\n",
"Calculating cs_rank_winner_rate...\n",
"Finished cs_rank_winner_rate.\n",
"Calculating cs_rank_intraday_range...\n",
"Finished cs_rank_intraday_range.\n",
"Calculating cs_rank_close_pos_in_range...\n",
"Finished cs_rank_close_pos_in_range.\n",
"Calculating cs_rank_opening_gap...\n",
"Error calculating cs_rank_opening_gap: Missing 'pre_close' column. Assigning NaN.\n",
"Calculating cs_rank_pos_in_hist_range...\n",
"Finished cs_rank_pos_in_hist_range.\n",
"Calculating cs_rank_vol_x_profit_margin...\n",
"Finished cs_rank_vol_x_profit_margin.\n",
"Calculating cs_rank_lg_flow_price_concordance...\n",
"Finished cs_rank_lg_flow_price_concordance.\n",
"Calculating cs_rank_turnover_per_winner...\n",
"Finished cs_rank_turnover_per_winner.\n",
"Calculating cs_rank_ind_cap_neutral_pe (Placeholder - requires statsmodels)...\n",
"Finished cs_rank_ind_cap_neutral_pe (Placeholder).\n",
"Calculating cs_rank_volume_ratio...\n",
"Finished cs_rank_volume_ratio.\n",
"Calculating cs_rank_elg_buy_sell_sm_ratio...\n",
"Finished cs_rank_elg_buy_sell_sm_ratio.\n",
"Calculating cs_rank_cost_dist_vol_ratio...\n",
"Finished cs_rank_cost_dist_vol_ratio.\n",
"Calculating cs_rank_size...\n",
"Finished cs_rank_size.\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 4566757 entries, 0 to 4566756\n",
"Columns: 197 entries, ts_code to cs_rank_size\n",
"dtypes: bool(10), datetime64[ns](1), float64(176), int64(6), int8(1), object(3)\n",
"memory usage: 6.4+ GB\n",
"None\n",
"['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'pct_chg', 'amount', 'turnover_rate', 'pe_ttm', 'circ_mv', 'total_mv', 'volume_ratio', 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct', 'cost_50pct', 'cost_85pct', 'cost_95pct', 'weight_avg', 'winner_rate', 'cat_hot_concept_stock', 'concept_rank_pct_chg', 'concept_rank_turnover_rate', 'concept_rank_volume_ratio', 'holder_net_change_sum_10d', 'holder_increase_days_10d', 'holder_decrease_days_10d', 'holder_any_increase_flag_10d', 'holder_any_decrease_flag_10d', 'holder_direction_score_10d', 'cat_senti_mom_vol_spike', 'cat_senti_pre_breakout', 'ts_turnover_rate_acceleration_5_20', 'ts_vol_sustain_10_30', 'cs_amount_outlier_10', 'ts_ff_to_total_turnover_ratio', 'ts_price_volume_trend_coherence_5_20', 'ts_ff_turnover_rate_surge_10', 'undist_profit_ps', 'ocfps', 'AR', 'BR', 'AR_BR', 'log_circ_mv', 'cashflow_to_ev_factor', 'book_to_price_ratio', 'turnover_rate_mean_5', 'variance_20', 'bbi_ratio_factor', 'daily_deviation', 'lg_elg_net_buy_vol', 'flow_lg_elg_intensity', 'sm_net_buy_vol', 'flow_divergence_diff', 'flow_divergence_ratio', 'total_buy_vol', 'lg_elg_buy_prop', 'flow_struct_buy_change', 'lg_elg_net_buy_vol_change', 'flow_lg_elg_accel', 'chip_concentration_range', 'chip_skewness', 'floating_chip_proxy', 'cost_support_15pct_change', 'cat_winner_price_zone', 'flow_chip_consistency', 'profit_taking_vs_absorb', 'cat_is_positive', 'upside_vol', 'downside_vol', 'vol_ratio', 'return_skew', 'return_kurtosis', 'volume_change_rate', 'cat_volume_breakout', 'turnover_deviation', 'cat_turnover_spike', 'avg_volume_ratio', 'cat_volume_ratio_breakout', 'vol_spike', 'vol_std_5', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'rsi_3', 'return_5', 'return_20', 'std_return_5', 'std_return_90', 'std_return_90_2', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4', 'rank_act_factor1', 'rank_act_factor2', 'rank_act_factor3', 'cov', 'delta_cov', 'alpha_22_improved', 'alpha_003', 'alpha_007', 'alpha_013', 'vol_break', 'weight_roc5', 'price_cost_divergence', 'smallcap_concentration', 'cost_stability', 'high_cost_break_days', 'liquidity_risk', 'turnover_std', 'mv_volatility', 'volume_growth', 'mv_growth', 'momentum_factor', 'resonance_factor', 'log_close', 'cat_vol_spike', 'up', 'down', 'obv_maobv_6', 'std_return_5_over_std_return_90', 'std_return_90_minus_std_return_90_2', 'cat_af2', 'cat_af3', 'cat_af4', 'act_factor5', 'act_factor6', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'buy_lg_vol_minus_sell_lg_vol', 'buy_elg_vol_minus_sell_elg_vol', 'ctrl_strength', 'low_cost_dev', 'asymmetry', 'lock_factor', 'cat_vol_break', 'cost_atr_adj', 'cat_golden_resonance', 'mv_turnover_ratio', 'mv_adjusted_volume', 'mv_weighted_turnover', 'nonlinear_mv_volume', 'mv_volume_ratio', 'mv_momentum', 'senti_strong_inflow', 'lg_flow_mom_corr_20_60', 'lg_flow_accel', 'profit_pressure', 'underwater_resistance', 'cost_conc_std_20', 'profit_decay_20', 'vol_amp_loss_20', 'vol_drop_profit_cnt_5', 'lg_flow_vol_interact_20', 'cost_break_confirm_cnt_5', 'atr_norm_channel_pos_14', 'turnover_diff_skew_20', 'lg_sm_flow_diverge_20', 'pullback_strong_20_20', 'vol_wgt_hist_pos_20', 'vol_adj_roc_20', 'cs_rank_net_lg_flow_val', 'cs_rank_flow_divergence', 'cs_rank_ind_adj_lg_flow', 'cs_rank_elg_buy_ratio', 'cs_rank_rel_profit_margin', 'cs_rank_cost_breadth', 'cs_rank_dist_to_upper_cost', 'cs_rank_winner_rate', 'cs_rank_intraday_range', 'cs_rank_close_pos_in_range', 'cs_rank_opening_gap', 'cs_rank_pos_in_hist_range', 'cs_rank_vol_x_profit_margin', 'cs_rank_lg_flow_price_concordance', 'cs_rank_turnover_per_winner', 'cs_rank_ind_cap_neutral_pe', 'cs_rank_volume_ratio', 'cs_rank_elg_buy_sell_sm_ratio', 'cs_rank_cost_dist_vol_ratio', 'cs_rank_size']\n"
]
}
],
"source": [
"print('daily data')\n",
"df = read_and_merge_h5_data('../../data/daily_data.h5', key='daily_data',\n",
" columns=['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'pct_chg'],\n",
" df=None)\n",
"\n",
"print('daily basic')\n",
"df = read_and_merge_h5_data('../../data/daily_basic.h5', key='daily_basic',\n",
" columns=['ts_code', 'trade_date', 'turnover_rate', 'pe_ttm', 'circ_mv', 'volume_ratio',\n",
" 'is_st'], df=df, join='inner')\n",
"df = df[df['trade_date'] >= '2021-01-01']\n",
"\n",
"print('stk limit')\n",
"df = read_and_merge_h5_data('../../data/stk_limit.h5', key='stk_limit',\n",
" columns=['ts_code', 'trade_date', 'pre_close', 'up_limit', 'down_limit'],\n",
" df=df)\n",
"print('money flow')\n",
"df = read_and_merge_h5_data('../../data/money_flow.h5', key='money_flow',\n",
" columns=['ts_code', 'trade_date', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol',\n",
" 'sell_lg_vol',\n",
" 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol'],\n",
" df=df)\n",
"print('cyq perf')\n",
"df = read_and_merge_h5_data('../../data/cyq_perf.h5', key='cyq_perf',\n",
" columns=['ts_code', 'trade_date', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct',\n",
" 'cost_50pct',\n",
" 'cost_85pct', 'cost_95pct', 'weight_avg', 'winner_rate'],\n",
" df=df)\n",
"print(df.info())\n",
"\n",
"origin_columns = df.columns.tolist()\n",
"origin_columns = [col for col in origin_columns if 'cyq' not in col]\n",
"print(origin_columns)\n",
"import numpy as np\n",
"from main.factor.factor import *\n",
"from main.factor.money_factor import * \n",
"from main.factor.concept_factor import * \n",
"\n",
"\n",
"def filter_data(df):\n",
" # df = df.groupby('trade_date').apply(lambda x: x.nlargest(1000, 'act_factor1'))\n",
" df = df[~df['is_st']]\n",
" df = df[~df['ts_code'].str.endswith('BJ')]\n",
" df = df[~df['ts_code'].str.startswith('30')]\n",
" df = df[~df['ts_code'].str.startswith('68')]\n",
" df = df[~df['ts_code'].str.startswith('8')]\n",
" df = df[df['trade_date'] >= '2022-01-01']\n",
" if 'in_date' in df.columns:\n",
" df = df.drop(columns=['in_date'])\n",
" df = df[~df[\"is_st\"]]\n",
" df = df[~df[\"ts_code\"].str.endswith(\"BJ\")]\n",
" df = df[~df[\"ts_code\"].str.startswith(\"30\")]\n",
" df = df[~df[\"ts_code\"].str.startswith(\"68\")]\n",
" df = df[~df[\"ts_code\"].str.startswith(\"8\")]\n",
" df = df[df[\"trade_date\"] >= \"2019-01-01\"]\n",
" if \"in_date\" in df.columns:\n",
" df = df.drop(columns=[\"in_date\"])\n",
" df = df.reset_index(drop=True)\n",
" return df\n",
"\n",
@@ -214,11 +420,70 @@
"gc.collect()\n",
"\n",
"df = filter_data(df)\n",
"df = df.sort_values(by=[\"ts_code\", \"trade_date\"])\n",
"\n",
"# df = price_minus_deduction_price(df, n=120)\n",
"# df = price_deduction_price_diff_ratio_to_sma(df, n=120)\n",
"# df = cat_price_vs_sma_vs_deduction_price(df, n=120)\n",
"# df = cat_reason(df, top_list_df)\n",
"# df = cat_is_on_top_list(df, top_list_df)\n",
"print(len(df))\n",
"df = generate_concept_factors(df, concept_dict)\n",
"print(len(df))\n",
"\n",
"df = holder_trade_factors(df, stk_holdertrade_df)\n",
"\n",
"df = cat_senti_mom_vol_spike(\n",
" df,\n",
" return_period=3,\n",
" return_threshold=0.03, # 近3日涨幅超3%\n",
" volume_ratio_threshold=1.3,\n",
" current_pct_chg_min=0.0, # 当日必须收红\n",
" current_pct_chg_max=0.05,\n",
") # 当日涨幅不宜过大\n",
"\n",
"df = cat_senti_pre_breakout(\n",
" df,\n",
" atr_short_N=10,\n",
" atr_long_M=40,\n",
" vol_atrophy_N=10,\n",
" vol_atrophy_M=40,\n",
" price_stab_N=5,\n",
" price_stab_threshold=0.06,\n",
" current_pct_chg_min_signal=0.002,\n",
" current_pct_chg_max_signal=0.05,\n",
" volume_ratio_signal_threshold=1.1,\n",
")\n",
"\n",
"df = ts_turnover_rate_acceleration_5_20(df)\n",
"df = ts_vol_sustain_10_30(df)\n",
"# df = cs_turnover_rate_relative_strength_20(df)\n",
"df = cs_amount_outlier_10(df)\n",
"df = ts_ff_to_total_turnover_ratio(df)\n",
"df = ts_price_volume_trend_coherence_5_20(df)\n",
"# df = ts_turnover_rate_trend_strength_5(df)\n",
"df = ts_ff_turnover_rate_surge_10(df)\n",
"\n",
"df = add_financial_factor(df, fina_indicator_df, factor_value_col=\"undist_profit_ps\")\n",
"df = add_financial_factor(df, fina_indicator_df, factor_value_col=\"ocfps\")\n",
"calculate_arbr(df, N=26)\n",
"df[\"log_circ_mv\"] = np.log(df[\"circ_mv\"])\n",
"df = calculate_cashflow_to_ev_factor(df, cashflow_df, balancesheet_df)\n",
"df = caculate_book_to_price_ratio(df, fina_indicator_df)\n",
"df = turnover_rate_n(df, n=5)\n",
"df = variance_n(df, n=20)\n",
"df = bbi_ratio_factor(df)\n",
"df = daily_deviation(df)\n",
"df = daily_industry_deviation(df)\n",
"df, _ = get_rolling_factor(df)\n",
"df, _ = get_simple_factor(df)\n",
"from main.factor.factor import *\n",
"\n",
"df = calculate_strong_inflow_signal(df)\n",
"\n",
"df = df.rename(columns={\"l1_code\": \"cat_l1_code\"})\n",
"df = df.rename(columns={\"l2_code\": \"cat_l2_code\"})\n",
"\n",
"lg_flow_mom_corr(df, N=20, M=60)\n",
"lg_buy_consolidation(df, N=20)\n",
"lg_flow_accel(df)\n",
"profit_pressure(df)\n",
"underwater_resistance(df)\n",
@@ -234,12 +499,57 @@
"pullback_strong(df, N=20, M=20)\n",
"vol_wgt_hist_pos(df, N=20)\n",
"vol_adj_roc(df, N=20)\n",
"intraday_lg_flow_corr(df, N=20) # Placeholder\n",
"cap_neutral_cost_metric(df) # Placeholder\n",
"# hurst_exponent_flow(df, N=60) # Placeholder\n",
"# df['test'] = 1\n",
"# df['test2'] = 2\n",
"# df = df.merge(industry_df, on=['l2_code', 'trade_date'], how='left')\n",
"\n",
"cs_rank_net_lg_flow_val(df)\n",
"cs_rank_flow_divergence(df)\n",
"cs_rank_industry_adj_lg_flow(df) # Needs cat_l2_code\n",
"cs_rank_elg_buy_ratio(df)\n",
"cs_rank_rel_profit_margin(df)\n",
"cs_rank_cost_breadth(df)\n",
"cs_rank_dist_to_upper_cost(df)\n",
"cs_rank_winner_rate(df)\n",
"cs_rank_intraday_range(df)\n",
"cs_rank_close_pos_in_range(df)\n",
"cs_rank_opening_gap(df) # Needs pre_close\n",
"cs_rank_pos_in_hist_range(df) # Needs his_low, his_high\n",
"cs_rank_vol_x_profit_margin(df)\n",
"cs_rank_lg_flow_price_concordance(df)\n",
"cs_rank_turnover_per_winner(df)\n",
"cs_rank_ind_cap_neutral_pe(df) # Placeholder - needs external libraries\n",
"cs_rank_volume_ratio(df) # Needs volume_ratio\n",
"cs_rank_elg_buy_sell_sm_ratio(df)\n",
"cs_rank_cost_dist_vol_ratio(df) # Needs volume_ratio\n",
"cs_rank_size(df) # Needs circ_mv\n",
"\n",
"\n",
"# df = df.merge(index_data, on='trade_date', how='left')\n",
"\n",
"print(df.info())\n",
"print(df.columns.tolist())"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "48712034",
"metadata": {},
"outputs": [
{
"ename": "FileNotFoundError",
"evalue": "File ../../data/industry_data.h5 does not exist",
"output_type": "error",
"traceback": [
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
"\u001b[31mFileNotFoundError\u001b[39m Traceback (most recent call last)",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[8]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m l2_df = \u001b[43mread_and_merge_h5_data\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m../../data/industry_data.h5\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mindustry_data\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 2\u001b[39m \u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m=\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mts_code\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43ml2_code\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43min_date\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 3\u001b[39m \u001b[43m \u001b[49m\u001b[43mdf\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mon\u001b[49m\u001b[43m=\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mts_code\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mleft\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[32m 4\u001b[39m df = merge_with_industry_data(df, l2_df)\n\u001b[32m 5\u001b[39m df = df.rename(columns={\u001b[33m'\u001b[39m\u001b[33ml2_code\u001b[39m\u001b[33m'\u001b[39m: \u001b[33m'\u001b[39m\u001b[33mcat_l2_code\u001b[39m\u001b[33m'\u001b[39m})\n",
"\u001b[36mFile \u001b[39m\u001b[32m/mnt/d/PyProject/NewStock/main/utils/utils.py:14\u001b[39m, in \u001b[36mread_and_merge_h5_data\u001b[39m\u001b[34m(h5_filename, key, columns, df, join, on, prefix)\u001b[39m\n\u001b[32m 11\u001b[39m processed_columns.append(col)\n\u001b[32m 13\u001b[39m \u001b[38;5;66;03m# 从 HDF5 文件读取数据,选择需要的列\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m14\u001b[39m data = \u001b[43mpd\u001b[49m\u001b[43m.\u001b[49m\u001b[43mread_hdf\u001b[49m\u001b[43m(\u001b[49m\u001b[43mh5_filename\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m=\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m=\u001b[49m\u001b[43mprocessed_columns\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 16\u001b[39m \u001b[38;5;66;03m# 修改列名,如果列名以前有 _加上 _\u001b[39;00m\n\u001b[32m 17\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m col \u001b[38;5;129;01min\u001b[39;00m data.columns:\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/miniconda3/envs/stock/lib/python3.13/site-packages/pandas/io/pytables.py:424\u001b[39m, in \u001b[36mread_hdf\u001b[39m\u001b[34m(path_or_buf, key, mode, errors, where, start, stop, columns, iterator, chunksize, **kwargs)\u001b[39m\n\u001b[32m 421\u001b[39m exists = \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[32m 423\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m exists:\n\u001b[32m--> \u001b[39m\u001b[32m424\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mFile \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpath_or_buf\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m does not exist\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 426\u001b[39m store = HDFStore(path_or_buf, mode=mode, errors=errors, **kwargs)\n\u001b[32m 427\u001b[39m \u001b[38;5;66;03m# can't auto open/close if we are using an iterator\u001b[39;00m\n\u001b[32m 428\u001b[39m \u001b[38;5;66;03m# so delegate to the iterator\u001b[39;00m\n",
"\u001b[31mFileNotFoundError\u001b[39m: File ../../data/industry_data.h5 does not exist"
]
}
],
"source": [
"\n",
"l2_df = read_and_merge_h5_data('../../data/industry_data.h5', key='industry_data',\n",
" columns=['ts_code', 'l2_code', 'in_date'],\n",
" df=None, on=['ts_code'], join='left')\n",
@@ -247,7 +557,7 @@
"df = df.rename(columns={'l2_code': 'cat_l2_code'})\n",
"# df = df.merge(index_data, on='trade_date', how='left')\n",
"\n",
"days = 2\n",
"days = 5\n",
"df = df.sort_values(by=['ts_code', 'trade_date'])\n",
"# df['future_return'] = df.groupby('ts_code', group_keys=False)['close'].apply(lambda x: x.shift(-days) / x - 1)\n",
"df['future_return'] = (df.groupby('ts_code')['close'].shift(-days) - df.groupby('ts_code')['open'].shift(-1)) / \\\n",
@@ -265,7 +575,7 @@
"\n",
"def select_pre_zt_stocks_dynamic(stock_df):\n",
" def select_stocks(group):\n",
" return group.nlargest(1000, 'return_5') # 如果循环结束仍未找到足够标签,则返回最大数量的股票\n",
" return group.nsmallest(1000, 'total_mv') # 如果循环结束仍未找到足够标签,则返回最大数量的股票\n",
"\n",
" stock_df = stock_df.groupby('trade_date', group_keys=False).apply(select_stocks)\n",
" return stock_df\n",
@@ -281,7 +591,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"id": "1c1dd3d6",
"metadata": {},
"outputs": [
@@ -316,7 +626,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"id": "2c60c1ea",
"metadata": {},
"outputs": [
@@ -541,7 +851,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": null,
"id": "e088bd8a357e815a",
"metadata": {
"ExecuteTime": {
@@ -785,7 +1095,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": null,
"id": "a0b3d7551ef0c81f",
"metadata": {
"ExecuteTime": {
@@ -1006,7 +1316,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "new_trader",
"display_name": "stock",
"language": "python",
"name": "python3"
},
@@ -1020,7 +1330,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
"version": "3.13.2"
}
},
"nbformat": 4,

File diff suppressed because one or more lines are too long

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

File diff suppressed because it is too large Load Diff