This commit is contained in:
liaozhaorun
2025-02-15 23:33:34 +08:00
parent 18d0876d86
commit 49fa31ac63
13 changed files with 2028 additions and 2580 deletions

View File

@@ -2,6 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "500802dc-7a20-48b7-a470-a4bae3ec534b",
"metadata": {
"ExecuteTime": {
@@ -9,17 +10,17 @@
"start_time": "2025-02-11T15:18:36.020822Z"
}
},
"outputs": [],
"source": [
"import tushare as ts\n",
"\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
],
"outputs": [],
"execution_count": 1
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "5a84bc9da6d54868",
"metadata": {
"ExecuteTime": {
@@ -27,26 +28,6 @@
"start_time": "2025-02-11T15:20:00.110127Z"
}
},
"source": [
"import pandas as pd\n",
"import time\n",
"\n",
"h5_filename = '../../../data/stk_limit.h5'\n",
"key = '/stk_limit'\n",
"max_date = None\n",
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date']]\n",
" print(df.sort_values(by='trade_date', ascending=True).tail())\n",
" print(df.info())\n",
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250220')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(start_date)"
],
"outputs": [
{
"name": "stdout",
@@ -73,18 +54,53 @@
]
}
],
"execution_count": 5
"source": [
"import pandas as pd\n",
"import time\n",
"\n",
"h5_filename = '../../../data/stk_limit.h5'\n",
"key = '/stk_limit'\n",
"max_date = None\n",
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date']]\n",
" print(df.sort_values(by='trade_date', ascending=True).tail())\n",
" print(df.info())\n",
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250220')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(start_date)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "bb3191de-27a2-4c89-a3b5-32a0d7b9496f",
"metadata": {
"scrolled": true,
"ExecuteTime": {
"end_time": "2025-02-11T15:21:27.831699Z",
"start_time": "2025-02-11T15:21:26.665039Z"
}
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250219 完成\n",
"任务 20250220 完成\n",
"任务 20250217 完成\n",
"任务 20250218 完成\n",
"任务 20250214 完成\n",
"任务 20250213 完成\n",
"任务 20250212 完成\n"
]
}
],
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
@@ -115,64 +131,62 @@
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250220 完成\n",
"任务 20250219 完成\n",
"任务 20250217 完成\n",
"任务 20250218 完成\n",
"任务 20250214 完成\n",
"任务 20250213 完成\n",
"任务 20250212 完成\n"
]
}
],
"execution_count": 10
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "96a81aa5890ea3c3",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:21:29.294283Z",
"start_time": "2025-02-11T15:21:29.247112Z"
}
},
"cell_type": "code",
"source": [
"print(all_daily_data)\n",
"# 将所有数据合并为一个 DataFrame\n",
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)"
],
"id": "96a81aa5890ea3c3",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[]\n"
]
},
{
"ename": "ValueError",
"evalue": "No objects to concatenate",
"output_type": "error",
"traceback": [
"\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
"\u001B[1;31mValueError\u001B[0m Traceback (most recent call last)",
"Cell \u001B[1;32mIn[11], line 3\u001B[0m\n\u001B[0;32m 1\u001B[0m \u001B[38;5;28mprint\u001B[39m(all_daily_data)\n\u001B[0;32m 2\u001B[0m \u001B[38;5;66;03m# 将所有数据合并为一个 DataFrame\u001B[39;00m\n\u001B[1;32m----> 3\u001B[0m all_daily_data_df \u001B[38;5;241m=\u001B[39m \u001B[43mpd\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mconcat\u001B[49m\u001B[43m(\u001B[49m\u001B[43mall_daily_data\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mignore_index\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43;01mTrue\u001B[39;49;00m\u001B[43m)\u001B[49m\n",
"File \u001B[1;32mE:\\Python\\anaconda\\envs\\try_trader\\lib\\site-packages\\pandas\\core\\reshape\\concat.py:372\u001B[0m, in \u001B[0;36mconcat\u001B[1;34m(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy)\u001B[0m\n\u001B[0;32m 369\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m copy \u001B[38;5;129;01mand\u001B[39;00m using_copy_on_write():\n\u001B[0;32m 370\u001B[0m copy \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mFalse\u001B[39;00m\n\u001B[1;32m--> 372\u001B[0m op \u001B[38;5;241m=\u001B[39m \u001B[43m_Concatenator\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m 373\u001B[0m \u001B[43m \u001B[49m\u001B[43mobjs\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 374\u001B[0m \u001B[43m \u001B[49m\u001B[43maxis\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43maxis\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 375\u001B[0m \u001B[43m \u001B[49m\u001B[43mignore_index\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mignore_index\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 376\u001B[0m \u001B[43m \u001B[49m\u001B[43mjoin\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mjoin\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 377\u001B[0m \u001B[43m \u001B[49m\u001B[43mkeys\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mkeys\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 378\u001B[0m \u001B[43m \u001B[49m\u001B[43mlevels\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mlevels\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 379\u001B[0m \u001B[43m \u001B[49m\u001B[43mnames\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mnames\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 380\u001B[0m \u001B[43m \u001B[49m\u001B[43mverify_integrity\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mverify_integrity\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 381\u001B[0m \u001B[43m \u001B[49m\u001B[43mcopy\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mcopy\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 382\u001B[0m \u001B[43m \u001B[49m\u001B[43msort\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43msort\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 383\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 385\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m op\u001B[38;5;241m.\u001B[39mget_result()\n",
"File \u001B[1;32mE:\\Python\\anaconda\\envs\\try_trader\\lib\\site-packages\\pandas\\core\\reshape\\concat.py:429\u001B[0m, in \u001B[0;36m_Concatenator.__init__\u001B[1;34m(self, objs, axis, join, keys, levels, names, ignore_index, verify_integrity, copy, sort)\u001B[0m\n\u001B[0;32m 426\u001B[0m objs \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mlist\u001B[39m(objs)\n\u001B[0;32m 428\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mlen\u001B[39m(objs) \u001B[38;5;241m==\u001B[39m \u001B[38;5;241m0\u001B[39m:\n\u001B[1;32m--> 429\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mNo objects to concatenate\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[0;32m 431\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m keys \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[0;32m 432\u001B[0m objs \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mlist\u001B[39m(com\u001B[38;5;241m.\u001B[39mnot_none(\u001B[38;5;241m*\u001B[39mobjs))\n",
"\u001B[1;31mValueError\u001B[0m: No objects to concatenate"
"[ trade_date ts_code up_limit down_limit\n",
"0 20250213 000001.SZ 12.56 10.28\n",
"1 20250213 000002.SZ 8.76 7.16\n",
"2 20250213 000004.SZ 15.40 12.60\n",
"3 20250213 000006.SZ 7.92 6.48\n",
"4 20250213 000007.SZ 7.39 6.05\n",
"... ... ... ... ...\n",
"7014 20250213 920108.BJ 27.22 14.66\n",
"7015 20250213 920111.BJ 35.98 19.38\n",
"7016 20250213 920116.BJ 80.44 43.32\n",
"7017 20250213 920118.BJ 34.46 18.56\n",
"7018 20250213 920128.BJ 39.84 21.46\n",
"\n",
"[7019 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20250212 000001.SZ 12.56 10.28\n",
"1 20250212 000002.SZ 7.96 6.52\n",
"2 20250212 000004.SZ 15.07 12.33\n",
"3 20250212 000006.SZ 7.74 6.34\n",
"4 20250212 000007.SZ 7.40 6.06\n",
"... ... ... ... ...\n",
"7014 20250212 920108.BJ 27.41 14.77\n",
"7015 20250212 920111.BJ 34.51 18.59\n",
"7016 20250212 920116.BJ 79.66 42.90\n",
"7017 20250212 920118.BJ 34.81 18.75\n",
"7018 20250212 920128.BJ 38.98 21.00\n",
"\n",
"[7019 rows x 4 columns]]\n"
]
}
],
"execution_count": 11
"source": [
"print(all_daily_data)\n",
"# 将所有数据合并为一个 DataFrame\n",
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "ad9733a1-2f42-43ee-a98c-0bf699304c21",
"metadata": {
"ExecuteTime": {
@@ -180,6 +194,15 @@
"start_time": "2025-02-11T15:20:37.375220Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"所有每日基础数据获取并保存完毕!\n"
]
}
],
"source": [
"\n",
"\n",
@@ -187,23 +210,7 @@
"all_daily_data_df.to_hdf(h5_filename, key='stk_limit', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
],
"outputs": [
{
"ename": "ValueError",
"evalue": "All objects passed were None",
"output_type": "error",
"traceback": [
"\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
"\u001B[1;31mValueError\u001B[0m Traceback (most recent call last)",
"Cell \u001B[1;32mIn[7], line 2\u001B[0m\n\u001B[0;32m 1\u001B[0m \u001B[38;5;66;03m# 将所有数据合并为一个 DataFrame\u001B[39;00m\n\u001B[1;32m----> 2\u001B[0m all_daily_data_df \u001B[38;5;241m=\u001B[39m \u001B[43mpd\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mconcat\u001B[49m\u001B[43m(\u001B[49m\u001B[43mall_daily_data\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mignore_index\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43;01mTrue\u001B[39;49;00m\u001B[43m)\u001B[49m\n\u001B[0;32m 4\u001B[0m \u001B[38;5;66;03m# 将数据保存为 HDF5 文件table 格式)\u001B[39;00m\n\u001B[0;32m 5\u001B[0m all_daily_data_df\u001B[38;5;241m.\u001B[39mto_hdf(h5_filename, key\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mstk_limit\u001B[39m\u001B[38;5;124m'\u001B[39m, mode\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124ma\u001B[39m\u001B[38;5;124m'\u001B[39m, \u001B[38;5;28mformat\u001B[39m\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mtable\u001B[39m\u001B[38;5;124m'\u001B[39m, append\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m, data_columns\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m)\n",
"File \u001B[1;32mE:\\Python\\anaconda\\envs\\try_trader\\lib\\site-packages\\pandas\\core\\reshape\\concat.py:372\u001B[0m, in \u001B[0;36mconcat\u001B[1;34m(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy)\u001B[0m\n\u001B[0;32m 369\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m copy \u001B[38;5;129;01mand\u001B[39;00m using_copy_on_write():\n\u001B[0;32m 370\u001B[0m copy \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mFalse\u001B[39;00m\n\u001B[1;32m--> 372\u001B[0m op \u001B[38;5;241m=\u001B[39m \u001B[43m_Concatenator\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m 373\u001B[0m \u001B[43m \u001B[49m\u001B[43mobjs\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 374\u001B[0m \u001B[43m \u001B[49m\u001B[43maxis\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43maxis\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 375\u001B[0m \u001B[43m \u001B[49m\u001B[43mignore_index\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mignore_index\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 376\u001B[0m \u001B[43m \u001B[49m\u001B[43mjoin\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mjoin\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 377\u001B[0m \u001B[43m \u001B[49m\u001B[43mkeys\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mkeys\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 378\u001B[0m \u001B[43m \u001B[49m\u001B[43mlevels\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mlevels\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 379\u001B[0m \u001B[43m \u001B[49m\u001B[43mnames\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mnames\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 380\u001B[0m \u001B[43m \u001B[49m\u001B[43mverify_integrity\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mverify_integrity\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 381\u001B[0m \u001B[43m \u001B[49m\u001B[43mcopy\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mcopy\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 382\u001B[0m \u001B[43m \u001B[49m\u001B[43msort\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43msort\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 383\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 385\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m op\u001B[38;5;241m.\u001B[39mget_result()\n",
"File \u001B[1;32mE:\\Python\\anaconda\\envs\\try_trader\\lib\\site-packages\\pandas\\core\\reshape\\concat.py:452\u001B[0m, in \u001B[0;36m_Concatenator.__init__\u001B[1;34m(self, objs, axis, join, keys, levels, names, ignore_index, verify_integrity, copy, sort)\u001B[0m\n\u001B[0;32m 449\u001B[0m keys \u001B[38;5;241m=\u001B[39m Index(clean_keys, name\u001B[38;5;241m=\u001B[39mname, dtype\u001B[38;5;241m=\u001B[39m\u001B[38;5;28mgetattr\u001B[39m(keys, \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mdtype\u001B[39m\u001B[38;5;124m\"\u001B[39m, \u001B[38;5;28;01mNone\u001B[39;00m))\n\u001B[0;32m 451\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mlen\u001B[39m(objs) \u001B[38;5;241m==\u001B[39m \u001B[38;5;241m0\u001B[39m:\n\u001B[1;32m--> 452\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mAll objects passed were None\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[0;32m 454\u001B[0m \u001B[38;5;66;03m# figure out what our result ndim is going to be\u001B[39;00m\n\u001B[0;32m 455\u001B[0m ndims \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mset\u001B[39m()\n",
"\u001B[1;31mValueError\u001B[0m: All objects passed were None"
]
}
],
"execution_count": 7
]
},
{
"cell_type": "code",