parallel测试

This commit is contained in:
liaozhaorun
2025-05-26 21:34:36 +08:00
parent a4b05bb62f
commit bf86fd9415
28 changed files with 15054 additions and 7886 deletions

View File

@@ -75,7 +75,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"id": "1b5a82fbf4e380de",
"metadata": {
"ExecuteTime": {
@@ -88,7 +88,7 @@
"import pandas as pd\n",
"import time\n",
"\n",
"h5_filename = '../../../data/sw_daily.h5'\n",
"h5_filename = '../../../data/cyq_perf.h5'\n",
"\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",

View File

@@ -39,19 +39,16 @@
"text": [
"数据已经成功存储到index_data.h5文件中\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\liaozhaorun\\AppData\\Local\\Temp\\ipykernel_16940\\1832869062.py:13: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n",
" final_df = pd.concat(all_data, ignore_index=True)\n"
]
}
],
"source": [
"# 定义四个指数\n",
"index_list = ['399300.SH', '000905.SH', '000852.SH', '399006.SZ']\n",
"index_list = [\n",
" # '399300.SZ', \n",
" '000905.SH', \n",
" '000852.SH', \n",
" '399006.SZ'\n",
" ]\n",
"\n",
"# 获取并存储数据\n",
"all_data = []\n",
@@ -59,6 +56,7 @@
"for ts_code in index_list:\n",
" df = pro.index_daily(ts_code=ts_code) # 可根据需要设置日期\n",
" df['ts_code'] = ts_code # 添加ts_code列来区分数据\n",
" # print(df)\n",
" all_data.append(df)\n",
"\n",
"# 合并所有数据\n",
@@ -86,32 +84,32 @@
"output_type": "stream",
"text": [
" ts_code trade_date close open high low \\\n",
"0 000905.SH 20250509 5721.7225 5770.4410 5770.4410 5705.1654 \n",
"1 000905.SH 20250508 5773.8056 5731.7157 5783.7915 5724.9511 \n",
"2 000905.SH 20250507 5750.2911 5805.6560 5819.2422 5713.2734 \n",
"3 000905.SH 20250506 5740.3338 5668.8762 5740.3338 5666.4698 \n",
"4 000905.SH 20250430 5631.8249 5604.6537 5647.7821 5603.1718 \n",
"0 000905.SH 20250523 5653.0436 5697.1362 5738.0829 5653.0436 \n",
"1 000905.SH 20250522 5703.2797 5739.1909 5757.7946 5701.1614 \n",
"2 000905.SH 20250521 5757.9225 5741.6885 5763.0788 5733.8275 \n",
"3 000905.SH 20250520 5747.3670 5723.5055 5759.4582 5707.8101 \n",
"4 000905.SH 20250519 5720.7949 5719.4381 5729.0703 5669.7208 \n",
"... ... ... ... ... ... ... \n",
"13501 399006.SZ 20100607 1069.4680 1005.0280 1075.2250 1001.7020 \n",
"13502 399006.SZ 20100604 1027.6810 989.6810 1027.6810 986.5040 \n",
"13503 399006.SZ 20100603 998.3940 1002.3550 1026.7020 997.7750 \n",
"13504 399006.SZ 20100602 997.1190 967.6090 997.1190 952.6110 \n",
"13505 399006.SZ 20100601 973.2330 986.0150 994.7930 948.1180 \n",
"13531 399006.SZ 20100607 1069.4680 1005.0280 1075.2250 1001.7020 \n",
"13532 399006.SZ 20100604 1027.6810 989.6810 1027.6810 986.5040 \n",
"13533 399006.SZ 20100603 998.3940 1002.3550 1026.7020 997.7750 \n",
"13534 399006.SZ 20100602 997.1190 967.6090 997.1190 952.6110 \n",
"13535 399006.SZ 20100601 973.2330 986.0150 994.7930 948.1180 \n",
"\n",
" pre_close change pct_chg vol amount \n",
"0 5773.8056 -52.0831 -0.9021 1.239390e+08 1.781623e+08 \n",
"1 5750.2911 23.5145 0.4089 1.361403e+08 1.870326e+08 \n",
"2 5740.3338 9.9573 0.1735 1.710118e+08 2.275662e+08 \n",
"3 5631.8249 108.5089 1.9267 1.627736e+08 2.170600e+08 \n",
"4 5604.9057 26.9192 0.4803 1.383866e+08 1.816166e+08 \n",
"... ... ... ... ... ... \n",
"13501 1027.6810 41.7870 4.0661 2.655275e+06 9.106095e+06 \n",
"13502 998.3940 29.2870 2.9334 1.500295e+06 5.269441e+06 \n",
"13503 997.1190 1.2750 0.1279 1.616805e+06 6.240835e+06 \n",
"13504 973.2330 23.8860 2.4543 1.074628e+06 4.001206e+06 \n",
"13505 1000.0000 -26.7670 -2.6767 1.356285e+06 4.924177e+06 \n",
" pre_close change pct_chg vol amount \n",
"0 5703.2797 -50.2361 -0.8808 1.143612e+08 1.481236e+08 \n",
"1 5757.9225 -54.6428 -0.9490 1.090577e+08 1.416209e+08 \n",
"2 5747.3670 10.5555 0.1837 1.158045e+08 1.551474e+08 \n",
"3 5720.7949 26.5721 0.4645 1.168966e+08 1.517512e+08 \n",
"4 5715.8491 4.9458 0.0865 1.153849e+08 1.410987e+08 \n",
"... ... ... ... ... ... \n",
"13531 1027.6810 41.7870 4.0661 2.655275e+06 9.106095e+06 \n",
"13532 998.3940 29.2870 2.9334 1.500295e+06 5.269441e+06 \n",
"13533 997.1190 1.2750 0.1279 1.616805e+06 6.240835e+06 \n",
"13534 973.2330 23.8860 2.4543 1.074628e+06 4.001206e+06 \n",
"13535 1000.0000 -26.7670 -2.6767 1.356285e+06 4.924177e+06 \n",
"\n",
"[13506 rows x 11 columns]\n"
"[13536 rows x 11 columns]\n"
]
}
],

View File

@@ -183,7 +183,7 @@
"成功获取 000572.SZ 的数据\n",
"成功获取 000573.SZ 的数据\n",
"成功获取 000576.SZ 的数据\n",
"已调用300次API等待 40.75 秒以满足速率限制...\n",
"已调用300次API等待 40.21 秒以满足速率限制...\n",
"成功获取 000581.SZ 的数据\n",
"成功获取 000582.SZ 的数据\n",
"成功获取 000584.SZ 的数据\n",
@@ -334,7 +334,7 @@
"成功获取 000811.SZ 的数据\n",
"成功获取 000812.SZ 的数据\n",
"成功获取 000813.SZ 的数据\n",
"已调用300次API等待 8.60 秒以满足速率限制...\n",
"已调用300次API等待 40.67 秒以满足速率限制...\n",
"成功获取 000815.SZ 的数据\n",
"成功获取 000816.SZ 的数据\n",
"成功获取 000818.SZ 的数据\n",
@@ -485,7 +485,7 @@
"成功获取 001238.SZ 的数据\n",
"成功获取 001239.SZ 的数据\n",
"成功获取 001255.SZ 的数据\n",
"已调用300次API等待 16.22 秒以满足速率限制...\n",
"已调用300次API等待 40.55 秒以满足速率限制...\n",
"成功获取 001256.SZ 的数据\n",
"成功获取 001258.SZ 的数据\n",
"成功获取 001259.SZ 的数据\n",
@@ -636,7 +636,7 @@
"成功获取 002085.SZ 的数据\n",
"成功获取 002086.SZ 的数据\n",
"成功获取 002088.SZ 的数据\n",
"已调用300次API等待 16.73 秒以满足速率限制...\n",
"已调用300次API等待 40.85 秒以满足速率限制...\n",
"成功获取 002090.SZ 的数据\n",
"成功获取 002091.SZ 的数据\n",
"成功获取 002092.SZ 的数据\n",
@@ -787,7 +787,7 @@
"成功获取 002242.SZ 的数据\n",
"成功获取 002243.SZ 的数据\n",
"成功获取 002244.SZ 的数据\n",
"已调用300次API等待 21.90 秒以满足速率限制...\n",
"已调用300次API等待 39.17 秒以满足速率限制...\n",
"成功获取 002245.SZ 的数据\n",
"成功获取 002246.SZ 的数据\n",
"成功获取 002247.SZ 的数据\n",
@@ -938,7 +938,7 @@
"成功获取 002400.SZ 的数据\n",
"成功获取 002401.SZ 的数据\n",
"成功获取 002402.SZ 的数据\n",
"已调用300次API等待 38.45 秒以满足速率限制...\n",
"已调用300次API等待 39.14 秒以满足速率限制...\n",
"成功获取 002403.SZ 的数据\n",
"成功获取 002404.SZ 的数据\n",
"成功获取 002405.SZ 的数据\n",
@@ -1089,7 +1089,7 @@
"成功获取 002566.SZ 的数据\n",
"成功获取 002567.SZ 的数据\n",
"成功获取 002568.SZ 的数据\n",
"已调用300次API等待 39.17 秒以满足速率限制...\n",
"已调用300次API等待 39.79 秒以满足速率限制...\n",
"成功获取 002569.SZ 的数据\n",
"成功获取 002570.SZ 的数据\n",
"成功获取 002571.SZ 的数据\n",
@@ -1240,7 +1240,7 @@
"成功获取 002729.SZ 的数据\n",
"成功获取 002730.SZ 的数据\n",
"成功获取 002731.SZ 的数据\n",
"已调用300次API等待 38.52 秒以满足速率限制...\n",
"已调用300次API等待 40.41 秒以满足速率限制...\n",
"成功获取 002732.SZ 的数据\n",
"成功获取 002733.SZ 的数据\n",
"成功获取 002734.SZ 的数据\n",
@@ -1391,7 +1391,7 @@
"成功获取 002896.SZ 的数据\n",
"成功获取 002897.SZ 的数据\n",
"成功获取 002898.SZ 的数据\n",
"已调用300次API等待 38.60 秒以满足速率限制...\n",
"已调用300次API等待 40.53 秒以满足速率限制...\n",
"成功获取 002899.SZ 的数据\n",
"成功获取 002900.SZ 的数据\n",
"成功获取 002901.SZ 的数据\n",
@@ -1542,7 +1542,7 @@
"成功获取 300014.SZ 的数据\n",
"成功获取 300015.SZ 的数据\n",
"成功获取 300016.SZ 的数据\n",
"已调用300次API等待 37.75 秒以满足速率限制...\n",
"已调用300次API等待 40.55 秒以满足速率限制...\n",
"成功获取 300017.SZ 的数据\n",
"成功获取 300018.SZ 的数据\n",
"成功获取 300019.SZ 的数据\n",
@@ -1693,7 +1693,7 @@
"成功获取 300174.SZ 的数据\n",
"成功获取 300175.SZ 的数据\n",
"成功获取 300176.SZ 的数据\n",
"已调用300次API等待 40.54 秒以满足速率限制...\n",
"已调用300次API等待 38.94 秒以满足速率限制...\n",
"成功获取 300177.SZ 的数据\n",
"成功获取 300179.SZ 的数据\n",
"成功获取 300180.SZ 的数据\n",
@@ -1844,7 +1844,7 @@
"成功获取 300337.SZ 的数据\n",
"成功获取 300338.SZ 的数据\n",
"成功获取 300339.SZ 的数据\n",
"已调用300次API等待 40.50 秒以满足速率限制...\n",
"已调用300次API等待 37.41 秒以满足速率限制...\n",
"成功获取 300340.SZ 的数据\n",
"成功获取 300341.SZ 的数据\n",
"成功获取 300342.SZ 的数据\n",
@@ -1995,7 +1995,7 @@
"成功获取 300494.SZ 的数据\n",
"成功获取 300496.SZ 的数据\n",
"成功获取 300497.SZ 的数据\n",
"已调用300次API等待 38.45 秒以满足速率限制...\n",
"已调用300次API等待 39.25 秒以满足速率限制...\n",
"成功获取 300498.SZ 的数据\n",
"成功获取 300499.SZ 的数据\n",
"成功获取 300500.SZ 的数据\n",
@@ -2146,7 +2146,7 @@
"成功获取 300650.SZ 的数据\n",
"成功获取 300651.SZ 的数据\n",
"成功获取 300652.SZ 的数据\n",
"已调用300次API等待 40.83 秒以满足速率限制...\n",
"已调用300次API等待 39.97 秒以满足速率限制...\n",
"成功获取 300653.SZ 的数据\n",
"成功获取 300654.SZ 的数据\n",
"成功获取 300655.SZ 的数据\n",
@@ -2297,7 +2297,7 @@
"成功获取 300810.SZ 的数据\n",
"成功获取 300811.SZ 的数据\n",
"成功获取 300812.SZ 的数据\n",
"已调用300次API等待 40.11 秒以满足速率限制...\n",
"已调用300次API等待 37.73 秒以满足速率限制...\n",
"成功获取 300813.SZ 的数据\n",
"成功获取 300814.SZ 的数据\n",
"成功获取 300815.SZ 的数据\n",
@@ -2448,7 +2448,7 @@
"成功获取 300966.SZ 的数据\n",
"成功获取 300967.SZ 的数据\n",
"成功获取 300968.SZ 的数据\n",
"已调用300次API等待 40.87 秒以满足速率限制...\n",
"已调用300次API等待 41.01 秒以满足速率限制...\n",
"成功获取 300969.SZ 的数据\n",
"成功获取 300970.SZ 的数据\n",
"成功获取 300971.SZ 的数据\n",
@@ -2599,7 +2599,7 @@
"成功获取 301128.SZ 的数据\n",
"成功获取 301129.SZ 的数据\n",
"成功获取 301130.SZ 的数据\n",
"已调用300次API等待 40.16 秒以满足速率限制...\n",
"已调用300次API等待 40.96 秒以满足速率限制...\n",
"成功获取 301131.SZ 的数据\n",
"成功获取 301132.SZ 的数据\n",
"成功获取 301133.SZ 的数据\n",
@@ -2750,7 +2750,7 @@
"成功获取 301313.SZ 的数据\n",
"成功获取 301314.SZ 的数据\n",
"成功获取 301315.SZ 的数据\n",
"已调用300次API等待 40.63 秒以满足速率限制...\n",
"已调用300次API等待 40.38 秒以满足速率限制...\n",
"成功获取 301316.SZ 的数据\n",
"成功获取 301317.SZ 的数据\n",
"成功获取 301318.SZ 的数据\n",
@@ -2901,7 +2901,7 @@
"成功获取 301618.SZ 的数据\n",
"成功获取 301622.SZ 的数据\n",
"成功获取 301626.SZ 的数据\n",
"已调用300次API等待 39.95 秒以满足速率限制...\n",
"已调用300次API等待 39.97 秒以满足速率限制...\n",
"成功获取 301628.SZ 的数据\n",
"成功获取 301631.SZ 的数据\n",
"成功获取 301633.SZ 的数据\n",
@@ -3052,7 +3052,7 @@
"成功获取 600170.SH 的数据\n",
"成功获取 600171.SH 的数据\n",
"成功获取 600172.SH 的数据\n",
"已调用300次API等待 39.18 秒以满足速率限制...\n",
"已调用300次API等待 39.68 秒以满足速率限制...\n",
"成功获取 600173.SH 的数据\n",
"成功获取 600176.SH 的数据\n",
"成功获取 600177.SH 的数据\n",
@@ -3203,7 +3203,7 @@
"成功获取 600366.SH 的数据\n",
"成功获取 600367.SH 的数据\n",
"成功获取 600368.SH 的数据\n",
"已调用300次API等待 40.48 秒以满足速率限制...\n",
"已调用300次API等待 39.52 秒以满足速率限制...\n",
"成功获取 600369.SH 的数据\n",
"成功获取 600370.SH 的数据\n",
"成功获取 600371.SH 的数据\n",
@@ -3354,7 +3354,7 @@
"成功获取 600572.SH 的数据\n",
"成功获取 600573.SH 的数据\n",
"成功获取 600575.SH 的数据\n",
"已调用300次API等待 39.24 秒以满足速率限制...\n",
"已调用300次API等待 40.15 秒以满足速率限制...\n",
"成功获取 600576.SH 的数据\n",
"成功获取 600577.SH 的数据\n",
"成功获取 600578.SH 的数据\n",
@@ -3505,7 +3505,7 @@
"成功获取 600748.SH 的数据\n",
"成功获取 600749.SH 的数据\n",
"成功获取 600750.SH 的数据\n",
"已调用300次API等待 39.49 秒以满足速率限制...\n",
"已调用300次API等待 40.58 秒以满足速率限制...\n",
"成功获取 600751.SH 的数据\n",
"成功获取 600753.SH 的数据\n",
"成功获取 600754.SH 的数据\n",
@@ -3656,7 +3656,7 @@
"成功获取 600956.SH 的数据\n",
"成功获取 600958.SH 的数据\n",
"成功获取 600959.SH 的数据\n",
"已调用300次API等待 40.83 秒以满足速率限制...\n",
"已调用300次API等待 40.13 秒以满足速率限制...\n",
"成功获取 600960.SH 的数据\n",
"成功获取 600961.SH 的数据\n",
"成功获取 600962.SH 的数据\n",
@@ -3807,7 +3807,7 @@
"成功获取 601519.SH 的数据\n",
"成功获取 601528.SH 的数据\n",
"成功获取 601555.SH 的数据\n",
"已调用300次API等待 40.73 秒以满足速率限制...\n",
"已调用300次API等待 38.43 秒以满足速率限制...\n",
"成功获取 601566.SH 的数据\n",
"成功获取 601567.SH 的数据\n",
"成功获取 601568.SH 的数据\n",
@@ -3958,7 +3958,7 @@
"成功获取 603041.SH 的数据\n",
"成功获取 603042.SH 的数据\n",
"成功获取 603043.SH 的数据\n",
"已调用300次API等待 40.08 秒以满足速率限制...\n",
"已调用300次API等待 40.56 秒以满足速率限制...\n",
"成功获取 603045.SH 的数据\n",
"成功获取 603048.SH 的数据\n",
"成功获取 603050.SH 的数据\n",
@@ -4109,7 +4109,7 @@
"成功获取 603228.SH 的数据\n",
"成功获取 603229.SH 的数据\n",
"成功获取 603230.SH 的数据\n",
"已调用300次API等待 40.23 秒以满足速率限制...\n",
"已调用300次API等待 40.37 秒以满足速率限制...\n",
"成功获取 603231.SH 的数据\n",
"成功获取 603232.SH 的数据\n",
"成功获取 603233.SH 的数据\n",
@@ -4260,7 +4260,7 @@
"成功获取 603530.SH 的数据\n",
"成功获取 603533.SH 的数据\n",
"成功获取 603535.SH 的数据\n",
"已调用300次API等待 40.89 秒以满足速率限制...\n",
"已调用300次API等待 39.94 秒以满足速率限制...\n",
"成功获取 603536.SH 的数据\n",
"成功获取 603538.SH 的数据\n",
"成功获取 603551.SH 的数据\n",
@@ -4411,7 +4411,7 @@
"成功获取 603819.SH 的数据\n",
"成功获取 603822.SH 的数据\n",
"成功获取 603823.SH 的数据\n",
"已调用300次API等待 38.75 秒以满足速率限制...\n",
"已调用300次API等待 39.12 秒以满足速率限制...\n",
"成功获取 603825.SH 的数据\n",
"成功获取 603826.SH 的数据\n",
"成功获取 603828.SH 的数据\n",
@@ -4562,7 +4562,7 @@
"成功获取 605167.SH 的数据\n",
"成功获取 605168.SH 的数据\n",
"成功获取 605169.SH 的数据\n",
"已调用300次API等待 40.37 秒以满足速率限制...\n",
"已调用300次API等待 40.12 秒以满足速率限制...\n",
"成功获取 605177.SH 的数据\n",
"成功获取 605178.SH 的数据\n",
"成功获取 605179.SH 的数据\n",
@@ -4713,7 +4713,7 @@
"成功获取 688097.SH 的数据\n",
"成功获取 688098.SH 的数据\n",
"成功获取 688099.SH 的数据\n",
"已调用300次API等待 39.89 秒以满足速率限制...\n",
"已调用300次API等待 40.20 秒以满足速率限制...\n",
"成功获取 688100.SH 的数据\n",
"成功获取 688101.SH 的数据\n",
"成功获取 688102.SH 的数据\n",
@@ -4864,7 +4864,7 @@
"成功获取 688271.SH 的数据\n",
"成功获取 688272.SH 的数据\n",
"成功获取 688273.SH 的数据\n",
"已调用300次API等待 39.50 秒以满足速率限制...\n",
"已调用300次API等待 41.07 秒以满足速率限制...\n",
"成功获取 688275.SH 的数据\n",
"成功获取 688276.SH 的数据\n",
"成功获取 688277.SH 的数据\n",
@@ -5015,7 +5015,7 @@
"成功获取 688486.SH 的数据\n",
"成功获取 688488.SH 的数据\n",
"成功获取 688489.SH 的数据\n",
"已调用300次API等待 39.30 秒以满足速率限制...\n",
"已调用300次API等待 40.39 秒以满足速率限制...\n",
"成功获取 688496.SH 的数据\n",
"成功获取 688498.SH 的数据\n",
"成功获取 688499.SH 的数据\n",
@@ -5166,7 +5166,7 @@
"成功获取 688689.SH 的数据\n",
"成功获取 688690.SH 的数据\n",
"成功获取 688691.SH 的数据\n",
"已调用300次API等待 40.48 秒以满足速率限制...\n",
"已调用300次API等待 40.83 秒以满足速率限制...\n",
"成功获取 688692.SH 的数据\n",
"成功获取 688693.SH 的数据\n",
"成功获取 688695.SH 的数据\n",
@@ -5317,7 +5317,7 @@
"成功获取 835184.BJ 的数据\n",
"成功获取 835185.BJ 的数据\n",
"成功获取 835207.BJ 的数据\n",
"已调用300次API等待 40.17 秒以满足速率限制...\n",
"已调用300次API等待 41.07 秒以满足速率限制...\n",
"成功获取 835237.BJ 的数据\n",
"成功获取 835305.BJ 的数据\n",
"成功获取 835368.BJ 的数据\n",
@@ -5468,7 +5468,7 @@
"成功获取 000005.SZ 的数据\n",
"成功获取 000013.SZ 的数据\n",
"成功获取 000015.SZ 的数据\n",
"已调用300次API等待 40.97 秒以满足速率限制...\n",
"已调用300次API等待 41.06 秒以满足速率限制...\n",
"成功获取 000018.SZ 的数据\n",
"成功获取 000023.SZ 的数据\n",
"成功获取 000024.SZ 的数据\n",
@@ -5619,7 +5619,7 @@
"成功获取 300309.SZ 的数据\n",
"成功获取 300312.SZ 的数据\n",
"成功获取 300325.SZ 的数据\n",
"已调用300次API等待 39.81 秒以满足速率限制...\n",
"已调用300次API等待 41.04 秒以满足速率限制...\n",
"成功获取 300330.SZ 的数据\n",
"成功获取 300336.SZ 的数据\n",
"成功获取 300356.SZ 的数据\n",
@@ -5761,7 +5761,7 @@
"2 000001.SZ 深发展A 20070620 20120801 完成股改\n",
"3 000001.SZ 深发展A 20070620 20120801 完成股改\n",
"4 000001.SZ S深发展A 20061009 20070619 未股改加S\n",
"名称变化记录总数: 32259\n"
"名称变化记录总数: 32284\n"
]
}
],

2538
main/data/top_list.ipynb Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -32,22 +32,22 @@
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"0 000001.SZ 20250312\n",
"1 000002.SZ 20250312\n",
"2 000004.SZ 20250312\n",
"3 000006.SZ 20250312\n",
"4 000007.SZ 20250312\n",
"... ... ...\n",
"5384 920445.BJ 20250508\n",
"5385 920489.BJ 20250508\n",
"5386 920682.BJ 20250508\n",
"5387 920799.BJ 20250508\n",
"5388 920819.BJ 20250508\n",
" ts_code trade_date\n",
"0 000001.SZ 20250312\n",
"1 000002.SZ 20250312\n",
"2 000004.SZ 20250312\n",
"3 000006.SZ 20250312\n",
"4 000007.SZ 20250312\n",
"... ... ...\n",
"26947 920445.BJ 20250512\n",
"26948 920489.BJ 20250512\n",
"26949 920682.BJ 20250512\n",
"26950 920799.BJ 20250512\n",
"26951 920819.BJ 20250512\n",
"\n",
"[7665071 rows x 2 columns]\n",
"20250508\n",
"start_date: 20250509\n"
"[7697412 rows x 2 columns]\n",
"20250516\n",
"start_date: 20250519\n"
]
}
],
@@ -86,36 +86,30 @@
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250619 完成\n",
"任务 20250620 完成\n",
"任务 20250619 完成\n",
"任务 20250618 完成\n",
"任务 20250617 完成\n",
"任务 20250616 完成\n",
"任务 20250613 完成\n",
"任务 20250612 完成\n",
"任务 20250616 完成\n",
"任务 20250611 完成\n",
"任务 20250612 完成\n",
"任务 20250610 完成\n",
"任务 20250609 完成\n",
"任务 20250606 完成\n",
"任务 20250605 完成\n",
"任务 20250604 完成\n",
"任务 20250603 完成\n",
"任务 20250530 完成\n",
"任务 20250529 完成\n",
"任务 20250528 完成\n",
"任务 20250530 完成\n",
"任务 20250527 完成\n",
"任务 20250523 完成\n",
"任务 20250528 完成\n",
"任务 20250526 完成\n",
"任务 20250521 完成\n",
"任务 20250523 完成\n",
"任务 20250522 完成\n",
"任务 20250519 完成\n",
"任务 20250521 完成\n",
"任务 20250520 完成\n",
"任务 20250516 完成\n",
"任务 20250515 完成\n",
"任务 20250514 完成\n",
"任务 20250513 完成\n",
"任务 20250512 完成\n",
"任务 20250509 完成\n"
"任务 20250519 完成\n"
]
}
],

View File

@@ -32,22 +32,22 @@
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"0 801001.SI 20250221\n",
"1 801002.SI 20250221\n",
"2 801003.SI 20250221\n",
"3 801005.SI 20250221\n",
"4 801010.SI 20250221\n",
".. ... ...\n",
"434 859811.SI 20250508\n",
"435 859821.SI 20250508\n",
"436 859822.SI 20250508\n",
"437 859852.SI 20250508\n",
"438 859951.SI 20250508\n",
" ts_code trade_date\n",
"0 801001.SI 20250221\n",
"1 801002.SI 20250221\n",
"2 801003.SI 20250221\n",
"3 801005.SI 20250221\n",
"4 801010.SI 20250221\n",
"... ... ...\n",
"2190 859811.SI 20250512\n",
"2191 859821.SI 20250512\n",
"2192 859822.SI 20250512\n",
"2193 859852.SI 20250512\n",
"2194 859951.SI 20250512\n",
"\n",
"[1066343 rows x 2 columns]\n",
"20250508\n",
"start_date: 20250509\n"
"[1068977 rows x 2 columns]\n",
"20250516\n",
"start_date: 20250519\n"
]
}
],
@@ -90,16 +90,16 @@
"任务 20250620 完成\n",
"任务 20250617 完成\n",
"任务 20250618 完成\n",
"任务 20250616 完成\n",
"任务 20250613 完成\n",
"任务 20250616 完成\n",
"任务 20250612 完成\n",
"任务 20250611 完成\n",
"任务 20250610 完成\n",
"任务 20250609 完成\n",
"任务 20250606 完成\n",
"任务 20250605 完成\n",
"任务 20250604 完成\n",
"任务 20250606 完成\n",
"任务 20250603 完成\n",
"任务 20250604 完成\n",
"任务 20250530 完成\n",
"任务 20250529 完成\n",
"任务 20250528 完成\n",
@@ -109,13 +109,7 @@
"任务 20250522 完成\n",
"任务 20250521 完成\n",
"任务 20250520 完成\n",
"任务 20250519 完成\n",
"任务 20250515 完成\n",
"任务 20250516 完成\n",
"任务 20250514 完成\n",
"任务 20250513 完成\n",
"任务 20250512 完成\n",
"任务 20250509 完成\n"
"任务 20250519 完成\n"
]
}
],

View File

@@ -94,17 +94,17 @@
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8615301 entries, 0 to 5388\n",
"Index: 8647642 entries, 0 to 26951\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 197.2+ MB\n",
"memory usage: 197.9+ MB\n",
"None\n",
"20250508\n",
"20250509\n"
"20250516\n",
"20250519\n"
]
}
],
@@ -144,14 +144,14 @@
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250718 完成\n",
"任务 20250717 完成\n",
"任务 20250716 完成\n",
"任务 20250718 完成\n",
"任务 20250715 完成\n",
"任务 20250711 完成\n",
"任务 20250716 完成\n",
"任务 20250714 完成\n",
"任务 20250710 完成\n",
"任务 20250711 完成\n",
"任务 20250709 完成\n",
"任务 20250710 完成\n",
"任务 20250708 完成\n",
"任务 20250707 完成\n",
"任务 20250704 完成\n",
@@ -160,12 +160,12 @@
"任务 20250701 完成\n",
"任务 20250630 完成\n",
"任务 20250627 完成\n",
"任务 20250626 完成\n",
"任务 20250625 完成\n",
"任务 20250626 完成\n",
"任务 20250624 完成\n",
"任务 20250623 完成\n",
"任务 20250620 完成\n",
"任务 20250619 完成\n",
"任务 20250620 完成\n",
"任务 20250618 完成\n",
"任务 20250617 完成\n",
"任务 20250616 完成\n",
@@ -178,22 +178,16 @@
"任务 20250605 完成\n",
"任务 20250604 完成\n",
"任务 20250603 完成\n",
"任务 20250530 完成\n",
"任务 20250529 完成\n",
"任务 20250528 完成\n",
"任务 20250530 完成\n",
"任务 20250527 完成\n",
"任务 20250528 完成\n",
"任务 20250526 完成\n",
"任务 20250523 完成\n",
"任务 20250522 完成\n",
"任务 20250521 完成\n",
"任务 20250520 完成\n",
"任务 20250519 完成\n",
"任务 20250516 完成\n",
"任务 20250515 完成\n",
"任务 20250514 完成\n",
"任务 20250513 完成\n",
"任务 20250512 完成\n",
"任务 20250509 完成\n"
"任务 20250519 完成\n"
]
}
],
@@ -263,59 +257,59 @@
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
"0 300575.SZ 20250509 6.05 1.9284 2.1880 \n",
"1 300247.SZ 20250509 3.77 2.1735 2.5437 \n",
"2 603038.SH 20250509 15.80 17.5702 32.3972 \n",
"3 002030.SZ 20250509 5.82 0.8252 1.2070 \n",
"4 600157.SH 20250509 1.36 0.8369 1.0222 \n",
"... ... ... ... ... ... \n",
"5384 600841.SH 20250509 5.57 1.0271 3.2670 \n",
"5385 300968.SZ 20250509 14.76 1.2857 2.7636 \n",
"5386 300634.SZ 20250509 25.79 5.2551 9.4581 \n",
"5387 300295.SZ 20250509 15.73 3.0347 3.2458 \n",
"5388 688370.SH 20250509 19.15 1.2008 1.2008 \n",
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
"0 000839.SZ 20250523 2.67 0.8124 1.2782 \n",
"1 300274.SZ 20250523 60.60 3.2852 3.7071 \n",
"2 301356.SZ 20250523 17.59 5.0050 5.0698 \n",
"3 600152.SH 20250523 5.73 1.3359 2.0988 \n",
"4 300049.SZ 20250523 29.91 1.6066 1.7292 \n",
"... ... ... ... ... ... \n",
"26941 002458.SZ 20250519 8.36 2.1950 2.5416 \n",
"26942 600882.SH 20250519 27.18 2.2244 4.6853 \n",
"26943 001283.SZ 20250519 54.51 3.0453 3.0453 \n",
"26944 000718.SZ 20250519 2.20 1.4790 2.2404 \n",
"26945 002141.SZ 20250519 3.09 4.9267 7.1872 \n",
"\n",
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
"0 0.71 239.8914 NaN 1.3451 1.1608 1.1259 1.9835 \n",
"1 0.96 64.6952 53.1680 2.7649 4.4008 3.9673 0.0000 \n",
"2 4.47 183.7603 154.4297 3.1047 4.0259 3.7692 0.2434 \n",
"3 0.62 NaN NaN 1.0296 9.5754 9.9145 0.2577 \n",
"4 0.55 19.3625 26.3896 0.6394 1.0656 1.1327 0.4044 \n",
"... ... ... ... ... ... ... ... \n",
"5384 0.77 NaN NaN 2.3362 1.1952 1.2860 0.0000 \n",
"5385 0.71 115.0812 181.8721 3.2254 4.9990 5.1146 0.3388 \n",
"5386 1.01 50.5639 52.9222 4.1166 7.0433 6.7806 0.8063 \n",
"5387 0.65 NaN NaN 2.6398 24.2982 28.1758 0.0000 \n",
"5388 1.25 29.1668 36.1111 0.9812 4.4106 4.4983 NaN \n",
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
"0 0.62 NaN NaN 7.4695 3.0824 3.1095 0.0000 \n",
"1 1.82 11.3840 9.8414 3.0807 1.6137 1.4907 1.1292 \n",
"2 1.43 NaN 18055.4366 1.2789 4.2618 3.3028 0.0000 \n",
"3 1.11 NaN NaN 1.7367 1.9844 2.0758 0.0000 \n",
"4 1.05 70.3242 80.3071 4.4707 5.9056 5.8725 0.0000 \n",
"... ... ... ... ... ... ... ... \n",
"26941 1.47 18.3588 24.2570 2.1403 2.9497 3.0116 2.3923 \n",
"26942 0.89 122.4919 89.9537 3.0986 2.8733 2.7144 0.0000 \n",
"26943 0.92 48.1520 36.6481 2.1043 0.8602 0.8229 0.8691 \n",
"26944 1.76 40.4178 55.0402 0.7058 3.1476 3.2425 3.6364 \n",
"26945 1.51 NaN NaN 3.8214 7.2461 4.4422 0.0000 \n",
"\n",
" dv_ttm total_share float_share free_share total_mv \\\n",
"0 1.9835 4.647564e+04 3.427082e+04 3.020469e+04 2.811776e+05 \n",
"1 NaN 8.040403e+04 8.032753e+04 6.863630e+04 3.031232e+05 \n",
"2 0.2434 2.686771e+04 2.686771e+04 1.457134e+04 4.245098e+05 \n",
"3 0.2577 1.403446e+05 1.403446e+05 9.595371e+04 8.168056e+05 \n",
"4 0.4044 2.221776e+06 2.221776e+06 1.819047e+06 3.021616e+06 \n",
"... ... ... ... ... ... \n",
"5384 NaN 1.387822e+05 1.043024e+05 3.279094e+04 7.730167e+05 \n",
"5385 0.3388 4.133800e+04 4.133800e+04 1.923185e+04 6.101489e+05 \n",
"5386 0.8063 4.512109e+04 4.346809e+04 2.415175e+04 1.163673e+06 \n",
"5387 NaN 1.896137e+04 1.675486e+04 1.566518e+04 2.982624e+05 \n",
"5388 NaN 1.371079e+04 4.374912e+03 4.374912e+03 2.625616e+05 \n",
" dv_ttm total_share float_share free_share total_mv \\\n",
"0 NaN 391982.6352 391982.6352 249133.8007 1.046594e+06 \n",
"1 1.1292 207321.1424 158970.9449 140880.3307 1.256366e+07 \n",
"2 NaN 21600.0000 5481.0000 5410.9920 3.799440e+05 \n",
"3 NaN 52907.9375 52907.9375 33676.4965 3.031625e+05 \n",
"4 NaN 26635.6100 23351.5217 21696.0562 7.966711e+05 \n",
"... ... ... ... ... ... \n",
"26941 2.3577 110641.2915 74886.8285 64675.1303 9.249612e+05 \n",
"26942 NaN 51205.3647 51205.3647 24310.0793 1.391762e+06 \n",
"26943 0.8691 8061.0011 5785.5721 5785.5721 4.394052e+05 \n",
"26944 3.6364 303463.6384 228209.3122 150654.2061 6.676200e+05 \n",
"26945 NaN 103293.5798 103159.2875 70714.2228 3.191772e+05 \n",
"\n",
" circ_mv is_st \n",
"0 2.073385e+05 False \n",
"1 3.028348e+05 False \n",
"2 4.245098e+05 False \n",
"3 8.168056e+05 False \n",
"4 3.021616e+06 False \n",
"... ... ... \n",
"5384 5.809646e+05 False \n",
"5385 6.101489e+05 False \n",
"5386 1.121042e+06 False \n",
"5387 2.635540e+05 False \n",
"5388 8.377956e+04 False \n",
" circ_mv is_st \n",
"0 1.046594e+06 False \n",
"1 9.633639e+06 False \n",
"2 9.641079e+04 False \n",
"3 3.031625e+05 False \n",
"4 6.984440e+05 False \n",
"... ... ... \n",
"26941 6.260539e+05 False \n",
"26942 1.391762e+06 False \n",
"26943 3.153715e+05 False \n",
"26944 5.020605e+05 False \n",
"26945 3.187622e+05 True \n",
"\n",
"[5389 rows x 19 columns]\n"
"[26946 rows x 19 columns]\n"
]
}
],
@@ -339,59 +333,46 @@
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
"54 002496.SZ 20250509 1.43 3.1262 3.2341 \n",
"148 603828.SH 20250509 5.04 3.5674 7.1692 \n",
"166 600599.SH 20250509 7.70 10.8623 27.2882 \n",
"193 000820.SZ 20250509 2.16 5.5698 5.7239 \n",
"203 300506.SZ 20250509 3.28 0.6710 0.9449 \n",
"... ... ... ... ... ... \n",
"5204 002602.SZ 20250509 8.00 1.3867 1.7044 \n",
"5253 300147.SZ 20250509 7.37 7.2159 9.3379 \n",
"5264 002501.SZ 20250509 2.08 2.4301 3.1371 \n",
"5317 600421.SH 20250509 5.27 2.7391 5.8971 \n",
"5345 600289.SH 20250509 5.78 1.3847 2.0115 \n",
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
"23 002898.SZ 20250523 10.20 22.8874 36.4442 \n",
"35 000889.SZ 20250523 2.76 1.6609 2.2443 \n",
"53 300379.SZ 20250523 6.12 9.3935 9.5800 \n",
"58 300268.SZ 20250523 10.27 1.8178 2.5956 \n",
"155 000615.SZ 20250523 3.15 1.1640 1.7189 \n",
"... ... ... ... ... ... \n",
"26880 300147.SZ 20250519 8.80 6.8409 8.8527 \n",
"26891 002501.SZ 20250519 2.17 4.4260 5.7136 \n",
"26910 600421.SH 20250519 6.39 3.4329 7.3909 \n",
"26938 600289.SH 20250519 5.90 1.1380 1.6532 \n",
"26945 002141.SZ 20250519 3.09 4.9267 7.1872 \n",
"\n",
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
"54 0.73 NaN NaN 1.6044 7.6992 7.2633 0.0 \n",
"148 1.65 349.9490 1691.0271 3.9734 1.2211 1.3170 0.0 \n",
"166 4.51 NaN NaN 11.5933 3.9468 4.0472 0.0 \n",
"193 1.00 NaN NaN 9.5443 11.2714 14.3393 0.0 \n",
"203 0.87 NaN NaN 28.5909 19.5183 19.3088 0.0 \n",
"... ... ... ... ... ... ... ... \n",
"5204 0.78 49.1432 31.1887 2.2169 2.6358 2.2496 0.0 \n",
"5253 1.74 NaN NaN 5.0393 2.6221 2.8487 0.0 \n",
"5264 0.87 NaN NaN 22.5816 22.1370 26.0255 0.0 \n",
"5317 0.74 NaN NaN 143.1934 8.7976 8.9449 0.0 \n",
"5345 0.55 NaN NaN 2.9752 11.3890 11.6628 0.0 \n",
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio dv_ttm \\\n",
"23 10.43 NaN NaN 3.6011 6.8112 7.2338 0.1961 0.1961 \n",
"35 0.52 NaN NaN 27.2957 1.7661 1.7554 0.0000 NaN \n",
"53 0.89 NaN NaN 1.0993 4.5062 4.1828 0.0000 NaN \n",
"58 0.99 NaN NaN NaN 0.5235 0.5833 0.0000 NaN \n",
"155 0.99 NaN NaN NaN 2.1957 2.2727 0.0000 NaN \n",
"... ... .. ... ... ... ... ... ... \n",
"26880 1.55 NaN NaN 6.0171 3.1309 3.4015 0.0000 NaN \n",
"26891 1.83 NaN NaN 23.5587 23.0948 27.1516 0.0000 NaN \n",
"26910 0.92 NaN NaN 173.6254 10.6672 10.8459 0.0000 NaN \n",
"26938 0.46 NaN NaN 3.0370 11.6255 11.9049 0.0000 NaN \n",
"26945 1.51 NaN NaN 3.8214 7.2461 4.4422 0.0000 NaN \n",
"\n",
" dv_ttm total_share float_share free_share total_mv \\\n",
"54 NaN 150758.9677 118138.6559 114196.4999 2.155853e+05 \n",
"148 NaN 59596.0158 59593.9625 29654.2988 3.003639e+05 \n",
"166 NaN 16600.0000 16600.0000 6607.7948 1.278200e+05 \n",
"193 NaN 64362.0201 29403.1899 28611.4718 1.390220e+05 \n",
"203 NaN 69559.6569 57572.5450 40880.9749 2.281557e+05 \n",
"... ... ... ... ... ... \n",
"5204 NaN 745255.6968 687870.8273 559649.7754 5.962046e+06 \n",
"5253 NaN 66127.9045 65745.9042 50804.9121 4.873627e+05 \n",
"5264 NaN 355000.0000 354999.9006 274999.9006 7.384000e+05 \n",
"5317 NaN 19560.0000 19560.0000 9085.2748 1.030812e+05 \n",
"5345 NaN 63105.2069 56592.2684 38956.2787 3.647481e+05 \n",
" total_share float_share free_share total_mv circ_mv is_st \n",
"23 17600.0000 10126.2561 6359.4096 179520.0000 103287.8122 True \n",
"35 93629.1116 86984.9676 64375.7658 258416.3480 240078.5106 True \n",
"53 55792.2828 52663.7564 51638.5483 341448.7707 322302.1892 True \n",
"58 17420.0000 13370.7500 9364.1581 178903.4000 137317.6025 True \n",
"155 76297.9719 76250.0287 51632.2709 240338.6115 240187.5904 True \n",
"... ... ... ... ... ... ... \n",
"26880 66127.9045 65745.9042 50804.9121 581925.5596 578563.9570 True \n",
"26891 355000.0000 354999.9006 274999.9006 770350.0000 770349.7843 True \n",
"26910 19560.0000 19560.0000 9085.2748 124988.4000 124988.4000 True \n",
"26938 63105.2069 56592.2684 38956.2787 372320.7207 333894.3836 True \n",
"26945 103293.5798 103159.2875 70714.2228 319177.1616 318762.1984 True \n",
"\n",
" circ_mv is_st \n",
"54 1.689383e+05 True \n",
"148 3.003536e+05 True \n",
"166 1.278200e+05 True \n",
"193 6.351089e+04 True \n",
"203 1.888379e+05 True \n",
"... ... ... \n",
"5204 5.502967e+06 True \n",
"5253 4.845473e+05 True \n",
"5264 7.383998e+05 True \n",
"5317 1.030812e+05 True \n",
"5345 3.271033e+05 True \n",
"\n",
"[197 rows x 19 columns]\n"
"[944 rows x 19 columns]\n"
]
}
],
@@ -441,7 +422,7 @@
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8620690 entries, 0 to 5388\n",
"Index: 8674588 entries, 0 to 26945\n",
"Data columns (total 3 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
@@ -449,7 +430,7 @@
" 1 trade_date object\n",
" 2 is_st bool \n",
"dtypes: bool(1), object(2)\n",
"memory usage: 205.5+ MB\n",
"memory usage: 206.8+ MB\n",
"None\n"
]
}

File diff suppressed because it is too large Load Diff

View File

@@ -34,17 +34,17 @@
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8451068 entries, 0 to 5123\n",
"Index: 8481815 entries, 0 to 25622\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 193.4+ MB\n",
"memory usage: 194.1+ MB\n",
"None\n",
"20250508\n",
"start_date: 20250509\n"
"20250516\n",
"start_date: 20250519\n"
]
}
],
@@ -84,8 +84,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250717 完成\n",
"任务 20250718 完成\n",
"任务 20250717 完成\n",
"任务 20250715 完成\n",
"任务 20250716 完成\n",
"任务 20250714 完成\n",
@@ -100,18 +100,18 @@
"任务 20250701 完成\n",
"任务 20250630 完成\n",
"任务 20250627 完成\n",
"任务 20250625 完成\n",
"任务 20250626 完成\n",
"任务 20250625 完成\n",
"任务 20250624 完成\n",
"任务 20250623 完成\n",
"任务 20250620 完成\n",
"任务 20250619 完成\n",
"任务 20250618 完成\n",
"任务 20250617 完成\n",
"任务 20250618 完成\n",
"任务 20250616 完成\n",
"任务 20250613 完成\n",
"任务 20250612 完成\n",
"任务 20250611 完成\n",
"任务 20250612 完成\n",
"任务 20250610 完成\n",
"任务 20250609 完成\n",
"任务 20250606 完成\n",
@@ -127,13 +127,7 @@
"任务 20250522 完成\n",
"任务 20250521 完成\n",
"任务 20250520 完成\n",
"任务 20250519 完成\n",
"任务 20250516 完成\n",
"任务 20250515 完成\n",
"任务 20250514 完成\n",
"任务 20250513 完成\n",
"任务 20250512 完成\n",
"任务 20250509 完成\n"
"任务 20250519 完成\n"
]
}
],

Binary file not shown.

View File

@@ -2,6 +2,9 @@ import numpy as np
import pandas as pd
import talib
from pandarallel import pandarallel
pandarallel.initialize()
def get_rolling_factor(df):
old_columns = df.columns.tolist()[:]
@@ -2747,7 +2750,7 @@ def sentiment_panic_greed_index(df: pd.DataFrame, window_atr: int = 14, window_s
print(f"Error: DataFrame 缺少必需的 OHLCV 列。将为 {factor_name} 填充 NaN。")
df[factor_name] = np.nan
return
try:
df['_prev_close'] = df['close'].shift(1)
@@ -2848,4 +2851,186 @@ def sentiment_reversal_indicator(df: pd.DataFrame, window_ret: int = 5, window_v
if cols_to_drop:
df.drop(columns=cols_to_drop, inplace=True)
print(f"Finished {factor_name}.")
return df
return df
def price_minus_deduction_price(df, n=10):
"""
因子 1 (定量): 计算当前收盘价与N周期前收盘价均线抵扣价的差值。
该因子衡量当前价格相对于即将移出均线计算窗口的价格的差异。
正值表示当前价格高于抵扣价,下一周期均线倾向于上涨(如果价格不变)。
参数:
df (pd.DataFrame): 包含股票日线数据的DataFrame。必须包含 'ts_code', 'close' 列。
n (int): 均线计算的周期数。抵扣价是 n-1 周期前的数据点。
返回:
pd.DataFrame: 增加了 'price_minus_deduction_price_n' 列的DataFrame。
"""
if 'close' not in df.columns:
print("错误: DataFrame中没有'close'列,无法计算抵扣价相关因子。")
return df
if n <= 1:
print("错误: 均线周期 n 必须大于 1。")
df[f'price_minus_deduction_price_{n}'] = np.nan
return df
grouped = df.groupby('ts_code', group_keys=False)
# 抵扣价是当前窗口移除的最早的价格,即当前价格的 n-1 周期前的价格
# 例如计算 SMA(10) for P_t, 窗口是 P_{t-9}, ..., P_t. 移除的是 P_{t-9}.
# P_{t-9} 是 P_t 的 shift(9). So shift(n-1).
deduction_price = grouped['close'].shift(n - 1)
# 计算差值
df[f'price_minus_deduction_price_{n}'] = df['close'] - deduction_price
print(f"因子 price_minus_deduction_price_{n} 计算完成。")
return df
def price_deduction_price_diff_ratio_to_sma(df, n=10):
"""
因子 2 (定量): 计算当前收盘价与抵扣价的差值占N周期均线的比例。
该因子衡量当前价格高于抵扣价的程度相对于均线水平的大小。
参数:
df (pd.DataFrame): 包含股票日线数据的DataFrame。必须包含 'ts_code', 'close' 列。
n (int): 均线计算的周期数。抵扣价是 n-1 周期前的数据点。
返回:
pd.DataFrame: 增加了 'price_deduction_price_diff_ratio_to_sma_n' 列的DataFrame。
"""
if 'close' not in df.columns:
print("错误: DataFrame中没有'close'列,无法计算抵扣价相关因子。")
return df
if n <= 1:
print("错误: 均线周期 n 必须大于 1。")
df[f'price_deduction_price_diff_ratio_to_sma_{n}'] = np.nan
return df
grouped = df.groupby('ts_code', group_keys=False)
# 计算N周期SMA
# 使用 transform 可以保持与原始 df 的索引对齐
sma = grouped['close'].transform(lambda x: x.rolling(window=n).mean())
# 抵扣价
deduction_price = grouped['close'].shift(n - 1)
# 计算比例,处理均线为零的情况
diff = df['close'] - deduction_price
# 使用 np.divide 并指定 where 条件和 fill_value 来避免除以零警告和 NaN 结果
# 如果 sma 为 0则结果设为 NaN
df[f'price_deduction_price_diff_ratio_to_sma_{n}'] = np.divide(
diff,
sma,
out=np.full_like(diff, np.nan), # 输出数组形状与 diff 相同NaN 填充
where=(sma != 0) # 仅在 sma 不为 0 时执行除法
)
# np.divide with where handles Inf/-Inf and 0/0 (as NaN), but explicitly replace might be slightly safer depending on numpy version
# df[f'price_deduction_price_diff_ratio_to_sma_{n}'].replace([np.inf, -np.inf], np.nan, inplace=True) # This is often redundant with np.divide(..., where=...)
print(f"因子 price_deduction_price_diff_ratio_to_sma_{n} 计算完成。")
return df
def cat_price_vs_sma_vs_deduction_price(df, n=10):
"""
因子 3 (分类): 基于当前收盘价、N周期均线和抵扣价的位置关系构建分类因子。
捕捉当前价格和抵扣价相对于均线的位置,指示可能的趋势状态或变化。
分类定义:
0: 数据不足 (SMA 或抵扣价为 NaN) 或 均线为 NaN
1: 当前价 > SMA 且 抵扣价 > SMA (两者都在均线之上)
2: 当前价 < SMA 且 抵扣价 < SMA (两者都在均线之下)
3: 当前价 > SMA 且 抵扣价 <= SMA (当前价上穿或位于均线上方,抵扣价在均线下方或正好在均线)
4: 当前价 <= SMA 且 抵扣价 > SMA (当前价下穿或位于均线下方,抵扣价在均线上方)
参数:
df (pd.DataFrame): 包含股票日线数据的DataFrame。必须包含 'ts_code', 'close' 列。
n (int): 均线计算的周期数。抵扣价是 n-1 周期前的数据点。
返回:
pd.DataFrame: 增加了 'cat_price_vs_sma_vs_deduction_price_n' 列的DataFrame。
"""
if 'close' not in df.columns:
print("错误: DataFrame中没有'close'列,无法计算抵扣价相关因子。")
return df
if n <= 1:
print("错误: 均线周期 n 必须大于 1。")
df[f'cat_price_vs_sma_vs_deduction_price_{n}'] = np.nan
return df
grouped = df.groupby('ts_code', group_keys=False)
# 计算N周期SMA
sma = grouped['close'].transform(lambda x: x.rolling(window=n).mean())
# 抵扣价
deduction_price = grouped['close'].shift(n - 1)
# 定义条件和分类值
conditions = [
(df['close'] > sma) & (deduction_price > sma),
(df['close'] < sma) & (deduction_price < sma),
(df['close'] > sma) & (deduction_price <= sma), # 包含等于的情况
(df['close'] <= sma) & (deduction_price > sma) # 包含等于的情况
# 注意sma 或 deduction_price 为 NaN 的行,其条件结果为 False会落入 default=0
]
choices = [1, 2, 3, 4]
# 使用 np.select 进行分类
# 默认值为 0用于处理条件不满足或输入为 NaN 的情况
df[f'cat_price_vs_sma_vs_deduction_price_{n}'] = np.select(conditions, choices, default=0)
print(f"因子 cat_price_vs_sma_vs_deduction_price_{n} 计算完成。")
return df
def cat_is_on_top_list(df: pd.DataFrame, top_list: pd.DataFrame) -> pd.DataFrame:
if 'cat_reason' not in df.columns:
print('计算因子cat_is_on_top_list失败缺少cat_reason列')
return df
df['cat_is_on_top_list'] = df['cat_reason']
df['cat_is_on_top_list'] = df['cat_is_on_top_list'] * (df['pct_chg'] > 0).astype(int)
return df
def cat_reason(df: pd.DataFrame, top_list: pd.DataFrame) -> pd.DataFrame:
"""
高效地将龙虎榜的 reason 列转化为单一数值类型的因子列,并仅返回必要的列。
Args:
df (pd.DataFrame): 包含所有股票数据的 DataFrame需包含 'ts_code''trade_date' 列。
top_list (pd.DataFrame): 每日龙虎榜数据的 DataFrame需包含 'ts_code', 'trade_date''reason' 列。
Returns:
pd.DataFrame: 包含 'ts_code', 'trade_date''cat_reason' 列。
"""
# 提取所有唯一的 reason 并进行数值编码
unique_reasons = top_list['reason'].unique()
reason_mapping = {reason: i + 1 for i, reason in enumerate(unique_reasons)}
# 在 top_list 中创建数值型的 reason 列
top_list['cat_reason'] = top_list['reason'].map(reason_mapping).astype('Int64')
# 转换 trade_date 类型以进行合并
df['trade_date'] = pd.to_datetime(df['trade_date'])
top_list['trade_date'] = pd.to_datetime(top_list['trade_date'])
# 仅保留 top_list 中需要的列进行合并
top_list_slim = top_list[['ts_code', 'trade_date', 'cat_reason']]
# 合并 DataFrame
merged_df = df.merge(top_list_slim, on=['ts_code', 'trade_date'], how='left')
# 填充 NaN 为 0 并转换为 int 类型
merged_df['cat_reason'] = merged_df['cat_reason'].fillna(0).astype('int')
return merged_df

193
main/factor/index_factor.py Normal file
View File

@@ -0,0 +1,193 @@
import pandas as pd
import numpy as np
from scipy.stats import spearmanr # 用于因子3的原始思路但实际简化了
epsilon = 1e-10
def _safe_divide(numerator, denominator, default_val=0.0):
"""安全除法"""
with np.errstate(divide='ignore', invalid='ignore'):
result = numerator / denominator
result[~np.isfinite(result)] = default_val
return result
# --- 修改后的因子计算函数 ---
def calculate_size_style_strength_factor(df: pd.DataFrame, N: int = 5, factor_name_suffix: str = '') -> pd.DataFrame:
"""
计算大小盘风格相对强度因子。
返回: 以 trade_date 为索引,因子值为列的 DataFrame。
"""
factor_name = f'size_style_strength_{N}{factor_name_suffix}'
print(f"Calculating {factor_name}...")
required_indices = ['399300.SZ', '000905.SH', '000852.SH']
if not all(idx in df['ts_code'].unique() for idx in required_indices):
print(f"Error: DataFrame 中缺少部分必需的指数代码 ({required_indices})。返回空因子 Series。")
return pd.DataFrame(index=df['trade_date'].unique(), columns=[factor_name]).rename_axis('trade_date')
# 1. 计算各指数N日收益率
df_copy = df.copy() # 操作副本避免修改原始传入df
df_copy['_ret_N'] = df_copy.groupby('ts_code')['close'].pct_change(periods=N)
# 2. Pivot 以方便截面计算
pivot_ret_N = df_copy.pivot_table(index='trade_date', columns='ts_code', values='_ret_N')
# 确保列存在并获取
large_ret = pivot_ret_N.get('399300.SZ', pd.Series(np.nan, index=pivot_ret_N.index))
mid_ret = pivot_ret_N.get('000905.SH', pd.Series(np.nan, index=pivot_ret_N.index))
small_ret = pivot_ret_N.get('000852.SH', pd.Series(np.nan, index=pivot_ret_N.index))
# 3. 计算因子 (结果是每日一个标量值)
large_small_diff = large_ret - small_ret
avg_large_small_ret = (large_ret + small_ret) / 2
# 计算中盘偏离因子处理NaN如果中盘收益为NaN则偏离因子不起调整作用乘以1
mid_deviation_raw = mid_ret - avg_large_small_ret
mid_deviation_factor = 1 + np.sign(mid_ret.fillna(0)) * np.abs(mid_deviation_raw.fillna(0))
daily_factor_values = large_small_diff * mid_deviation_factor
daily_factor_values.name = factor_name # 给 Series 命名
print(f"Finished {factor_name}.")
return daily_factor_values.to_frame() # 转换为 DataFrame 返回
def calculate_volatility_structure_factor(df: pd.DataFrame, N: int = 10, factor_name_suffix: str = '') -> pd.DataFrame:
"""
计算市场波动结构因子。
返回: 以 trade_date 为索引,因子值为列的 DataFrame。
"""
factor_name = f'vol_structure_idx_{N}{factor_name_suffix}'
print(f"Calculating {factor_name}...")
required_indices = ['399300.SZ', '000905.SH', '000852.SH']
if not all(idx in df['ts_code'].unique() for idx in required_indices):
print(f"Error: DataFrame 中缺少部分必需的指数代码 ({required_indices})。返回空因子 Series。")
return pd.DataFrame(index=df['trade_date'].unique(), columns=[factor_name]).rename_axis('trade_date')
if 'pct_chg' not in df.columns:
print(f"Error: DataFrame 缺少 'pct_chg' 列。将为 {factor_name} 填充 NaN。")
return pd.DataFrame(index=df['trade_date'].unique(), columns=[factor_name]).rename_axis('trade_date')
df_copy = df.copy()
# 1. 计算各指数N日波动率
df_copy['_vol_N'] = df_copy.groupby('ts_code')['pct_chg'].rolling(N, min_periods=max(1, N//2)).std().reset_index(level=0, drop=True)
# 2. Pivot
pivot_vol_N = df_copy.pivot_table(index='trade_date', columns='ts_code', values='_vol_N')
large_vol = pivot_vol_N.get('399300.SZ', pd.Series(np.nan, index=pivot_vol_N.index))
mid_vol = pivot_vol_N.get('000905.SH', pd.Series(np.nan, index=pivot_vol_N.index))
small_vol = pivot_vol_N.get('000852.SH', pd.Series(np.nan, index=pivot_vol_N.index))
# 3. 计算因子
daily_factor_values = _safe_divide((small_vol - mid_vol), large_vol)
daily_factor_values.name = factor_name
print(f"Finished {factor_name}.")
return daily_factor_values.to_frame()
def calculate_market_divergence_factor(df: pd.DataFrame, factor_name_suffix: str = '') -> pd.DataFrame:
"""
计算市场分化度因子 (基于每日三个指数收益率符号的一致性)。
返回: 以 trade_date 为索引,因子值为列的 DataFrame。
"""
factor_name = f'market_divergence_score{factor_name_suffix}'
print(f"Calculating {factor_name}...")
required_indices = ['399300.SZ', '000905.SH', '000852.SH']
if not all(idx in df['ts_code'].unique() for idx in required_indices):
print(f"Error: DataFrame 中缺少部分必需的指数代码 ({required_indices})。返回空因子 Series。")
return pd.DataFrame(index=df['trade_date'].unique(), columns=[factor_name]).rename_axis('trade_date')
if 'pct_chg' not in df.columns:
print(f"Error: DataFrame 缺少 'pct_chg' 列。将为 {factor_name} 填充 NaN。")
return pd.DataFrame(index=df['trade_date'].unique(), columns=[factor_name]).rename_axis('trade_date')
pivot_pct_chg = df.pivot_table(index='trade_date', columns='ts_code', values='pct_chg')
# 确保列存在
idx_large_col = '399300.SZ'
idx_mid_col = '000905.SH'
idx_small_col = '000852.SH'
# 使用 reindex 确保所有期望的列都存在缺失的填充NaN
pivot_pct_chg = pivot_pct_chg.reindex(columns=[idx_large_col, idx_mid_col, idx_small_col])
def daily_divergence_score_calc(row):
# 当天只有这三个指数的收益率 Series
valid_returns = row.dropna() # 获取非 NaN 的收益率
if len(valid_returns) < 2: # 如果有效数据少于2个无法判断分化
return np.nan
signs = np.sign(valid_returns)
unique_sign_count = len(signs.unique())
if unique_sign_count == 1: # 所有符号相同 (或都为0sign后也是0)
return 0.0 # 分化度最低 (高度一致)
elif unique_sign_count == 2 and 0 in signs.unique(): # 一个方向一个0
return 0.25 # 较低分化
elif unique_sign_count == 2: # 两个方向 (例如两正一负,或两负一正)
return 0.75 # 较高分化
elif unique_sign_count == 3: # 三个不同方向 (+, -, 0)
return 1.0 # 分化度最高
return np.nan # 其他未覆盖的情况 (理论上不应发生)
daily_factor_values = pivot_pct_chg[[idx_large_col, idx_mid_col, idx_small_col]].apply(daily_divergence_score_calc, axis=1)
daily_factor_values.name = factor_name
print(f"Finished {factor_name}.")
return daily_factor_values.to_frame()
# --- 整合所有因子计算到一个主函数 ---
def generate_daily_index_relation_factors(df_input: pd.DataFrame) -> pd.DataFrame:
"""
计算所有基于大中小盘指数关系的每日截面因子。
Args:
df_input (pd.DataFrame): 长格式的指数行情数据,包含 'ts_code', 'trade_date', 'close', 'pct_chg'
Returns:
pd.DataFrame: 以 'trade_date' 为索引,各因子为列的 DataFrame。
"""
# 确保输入 df 不被修改
df = df_input.sort_values(['ts_code', 'trade_date']).reset_index(drop=True)
# 计算各个因子 (每个函数返回一个单列或多列的 DataFrame索引为 trade_date)
factor1_df = calculate_size_style_strength_factor(df, N=5)
factor2_df = calculate_volatility_structure_factor(df, N=10)
factor3_df = calculate_market_divergence_factor(df)
# 还可以继续添加其他每日截面因子...
# 合并所有因子 DataFrame
# 使用 functools.reduce 和 pd.merge 来优雅地合并多个 DataFrame
from functools import reduce
daily_factors_list = [factor1_df, factor2_df, factor3_df]
# 过滤掉可能因错误产生的完全为空或只有NaN的DataFrame
daily_factors_list = [f_df for f_df in daily_factors_list if not f_df.empty and not f_df.iloc[:,0].isna().all()]
if not daily_factors_list:
print("警告: 未能成功计算任何因子。返回空 DataFrame。")
# 返回一个以日期为索引的空DataFrame或者基于输入df的日期
return pd.DataFrame(index=df['trade_date'].unique()).rename_axis('trade_date')
# 使用 outer join 以保留所有日期,并确保索引是 trade_date
final_factors_df = reduce(lambda left, right: pd.merge(left, right, on='trade_date', how='outer'),
daily_factors_list)
final_factors_df = final_factors_df.sort_index() # 按日期排序
return final_factors_df
# --- 使用示例 ---
# 假设 all_indices_df 是你包含 '399300.SZ', '000905.SH', '000852.SH' 三个指数的长格式行情数据
# 确保它有 'ts_code', 'trade_date', 'open', 'high', 'low', 'close', 'vol', 'pct_chg' 列
# all_indices_df['trade_date'] = pd.to_datetime(all_indices_df['trade_date'])
# all_indices_df = all_indices_df.sort_values(['ts_code', 'trade_date'])
# daily_market_factors = generate_daily_index_relation_factors(all_indices_df)
# print("\n每日市场风格/情绪因子:")
# print(daily_market_factors.tail())
# 后续,你可以将 daily_market_factors 与你的个股数据 pdf 按 'trade_date' 合并
# pdf_with_market_factors = pd.merge(pdf, daily_market_factors, on='trade_date', how='left')

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

2204
main/train/Rank2.ipynb Normal file

File diff suppressed because it is too large Load Diff

2151
main/train/Regression2.ipynb Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Binary file not shown.

Binary file not shown.