Classify2

This commit is contained in:
liaozhaorun
2025-05-06 23:42:40 +08:00
parent 721e72c599
commit b783a6f968
19 changed files with 9390 additions and 2774 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -2,6 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "initial_id",
"metadata": {
"ExecuteTime": {
@@ -9,6 +10,7 @@
"start_time": "2025-04-09T14:57:26.124592Z"
}
},
"outputs": [],
"source": [
"from operator import index\n",
"\n",
@@ -18,12 +20,11 @@
"\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
],
"outputs": [],
"execution_count": 1
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "f448da220816bf98",
"metadata": {
"ExecuteTime": {
@@ -31,6 +32,23 @@
"start_time": "2025-04-09T14:57:27.392846Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"数据已经成功存储到index_data.h5文件中\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\liaozhaorun\\AppData\\Local\\Temp\\ipykernel_28220\\1832869062.py:13: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n",
" final_df = pd.concat(all_data, ignore_index=True)\n"
]
}
],
"source": [
"# 定义四个指数\n",
"index_list = ['399300.SH', '000905.SH', '000852.SH', '399006.SZ']\n",
@@ -50,28 +68,11 @@
"final_df.to_hdf('../../data/index_data.h5', key='index_data', mode='w')\n",
"\n",
"print(\"数据已经成功存储到index_data.h5文件中\")"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"数据已经成功存储到index_data.h5文件中\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\liaozhaorun\\AppData\\Local\\Temp\\ipykernel_15500\\3209233630.py:13: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n",
" final_df = pd.concat(all_data, ignore_index=True)\n"
]
}
],
"execution_count": 2
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "907f732d3c397bf",
"metadata": {
"ExecuteTime": {
@@ -79,54 +80,53 @@
"start_time": "2025-04-09T14:57:37.695917Z"
}
},
"source": [
"h5_filename = '../../data/index_data.h5'\n",
"key = '/index_data'\n",
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key]\n",
" print(df)\n"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date close open high low \\\n",
"0 000905.SH 20250409 5439.7716 5249.6841 5465.1449 5135.9655 \n",
"1 000905.SH 20250408 5326.9140 5279.7566 5371.1834 5249.2318 \n",
"2 000905.SH 20250407 5287.0333 5523.9636 5587.8502 5212.6773 \n",
"3 000905.SH 20250403 5845.5045 5842.6167 5906.7057 5817.9662 \n",
"4 000905.SH 20250402 5899.0865 5884.8925 5936.6467 5884.1126 \n",
"0 000905.SH 20250506 5740.3338 5668.8762 5740.3338 5666.4698 \n",
"1 000905.SH 20250430 5631.8249 5604.6537 5647.7821 5603.1718 \n",
"2 000905.SH 20250429 5604.9057 5583.7186 5622.0220 5571.2363 \n",
"3 000905.SH 20250428 5598.2951 5624.4166 5628.0778 5587.7857 \n",
"4 000905.SH 20250425 5627.1804 5613.1407 5661.5869 5596.5266 \n",
"... ... ... ... ... ... ... \n",
"13444 399006.SZ 20100607 1069.4680 1005.0280 1075.2250 1001.7020 \n",
"13445 399006.SZ 20100604 1027.6810 989.6810 1027.6810 986.5040 \n",
"13446 399006.SZ 20100603 998.3940 1002.3550 1026.7020 997.7750 \n",
"13447 399006.SZ 20100602 997.1190 967.6090 997.1190 952.6110 \n",
"13448 399006.SZ 20100601 973.2330 986.0150 994.7930 948.1180 \n",
"13492 399006.SZ 20100607 1069.4680 1005.0280 1075.2250 1001.7020 \n",
"13493 399006.SZ 20100604 1027.6810 989.6810 1027.6810 986.5040 \n",
"13494 399006.SZ 20100603 998.3940 1002.3550 1026.7020 997.7750 \n",
"13495 399006.SZ 20100602 997.1190 967.6090 997.1190 952.6110 \n",
"13496 399006.SZ 20100601 973.2330 986.0150 994.7930 948.1180 \n",
"\n",
" pre_close change pct_chg vol amount \n",
"0 5326.9140 112.8576 2.1186 2.451180e+08 2.882574e+08 \n",
"1 5287.0333 39.8807 0.7543 2.238407e+08 2.618753e+08 \n",
"2 5845.5045 -558.4712 -9.5539 2.365227e+08 2.673974e+08 \n",
"3 5899.0865 -53.5820 -0.9083 1.349386e+08 1.736621e+08 \n",
"4 5892.8502 6.2363 0.1058 1.121600e+08 1.406421e+08 \n",
"0 5631.8249 108.5089 1.9267 1.627736e+08 2.170600e+08 \n",
"1 5604.9057 26.9192 0.4803 1.383866e+08 1.816166e+08 \n",
"2 5598.2951 6.6106 0.1181 1.267429e+08 1.580330e+08 \n",
"3 5627.1804 -28.8853 -0.5133 1.362181e+08 1.676163e+08 \n",
"4 5605.8796 21.3008 0.3800 1.400008e+08 1.719338e+08 \n",
"... ... ... ... ... ... \n",
"13444 1027.6810 41.7870 4.0661 2.655275e+06 9.106095e+06 \n",
"13445 998.3940 29.2870 2.9334 1.500295e+06 5.269441e+06 \n",
"13446 997.1190 1.2750 0.1279 1.616805e+06 6.240835e+06 \n",
"13447 973.2330 23.8860 2.4543 1.074628e+06 4.001206e+06 \n",
"13448 1000.0000 -26.7670 -2.6767 1.356285e+06 4.924177e+06 \n",
"13492 1027.6810 41.7870 4.0661 2.655275e+06 9.106095e+06 \n",
"13493 998.3940 29.2870 2.9334 1.500295e+06 5.269441e+06 \n",
"13494 997.1190 1.2750 0.1279 1.616805e+06 6.240835e+06 \n",
"13495 973.2330 23.8860 2.4543 1.074628e+06 4.001206e+06 \n",
"13496 1000.0000 -26.7670 -2.6767 1.356285e+06 4.924177e+06 \n",
"\n",
"[13449 rows x 11 columns]\n"
"[13497 rows x 11 columns]\n"
]
}
],
"execution_count": 3
"source": [
"h5_filename = '../../data/index_data.h5'\n",
"key = '/index_data'\n",
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key]\n",
" print(df)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "new_trader",
"language": "python",
"name": "python3"
},

View File

@@ -2,6 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "94412ea8-aad7-47fb-8597-d80adef21a8b",
"metadata": {
"ExecuteTime": {
@@ -9,70 +10,24 @@
"start_time": "2025-03-01T09:19:23.930364Z"
}
},
"outputs": [],
"source": [
"import tushare as ts\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
],
"outputs": [],
"execution_count": 1
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "9067006f-6352-4fe6-9295-22208f40f235",
"metadata": {
"scrolled": true,
"ExecuteTime": {
"end_time": "2025-03-01T09:56:42.369757Z",
"start_time": "2025-03-01T09:19:24.709524Z"
}
},
"scrolled": true
},
"source": [
"from tqdm import tqdm\n",
"import pandas as pd\n",
"import time\n",
"\n",
"# 读取本地保存的股票列表 CSV 文件(假设文件名为 stocks_data.csv\n",
"stocks_df = pd.read_csv('../../stocks_list.csv', encoding='utf-8-sig')\n",
"\n",
"# 用于存放所有股票的日线数据(每次获取的 DataFrame\n",
"name_change_data_list = []\n",
"\n",
"# API 调用计数和时间控制变量\n",
"api_call_count = 0\n",
"batch_start_time = time.time()\n",
"\n",
"# 循环遍历每个股票代码并获取数据\n",
"for idx, row in stocks_df.iterrows():\n",
" ts_code = row['ts_code'] # 假设股票代码列名为 ts_code\n",
" try:\n",
" # 调用 tushare 接口获取该股票自 2017 年以来的日线数据\n",
" name_change_data = pro.namechange(ts_code=ts_code, fields='ts_code,name,start_date,end_date,change_reason')\n",
" # 如果返回数据不为空,则添加一列标识股票代码\n",
" if not name_change_data.empty:\n",
" name_change_data_list.append(name_change_data)\n",
" print(f\"成功获取 {ts_code} 的数据\")\n",
" except Exception as e:\n",
" print(f\"获取 {ts_code} 数据时出错: {e}\")\n",
"\n",
" # 计数一次 API 调用\n",
" api_call_count += 1\n",
"\n",
" # 每调用300次检查时间是否少于1分钟如果少于则等待剩余时间\n",
" if api_call_count % 150 == 0:\n",
" elapsed = time.time() - batch_start_time\n",
" if elapsed < 60:\n",
" sleep_time = 60 - elapsed\n",
" print(f\"已调用300次API等待 {sleep_time:.2f} 秒以满足速率限制...\")\n",
" time.sleep(sleep_time)\n",
" # 重置批次起始时间\n",
" batch_start_time = time.time()\n",
"\n",
"name_change_df = pd.concat(name_change_data_list, ignore_index=True)\n",
"# 输出部分结果\n",
"print(name_change_df.head())\n",
"print(f\"名称变化记录总数: {len(name_change_df)}\")\n"
],
"outputs": [
{
"name": "stdout",
@@ -228,7 +183,7 @@
"成功获取 000572.SZ 的数据\n",
"成功获取 000573.SZ 的数据\n",
"成功获取 000576.SZ 的数据\n",
"已调用300次API等待 41.14 秒以满足速率限制...\n",
"已调用300次API等待 38.79 秒以满足速率限制...\n",
"成功获取 000581.SZ 的数据\n",
"成功获取 000582.SZ 的数据\n",
"成功获取 000584.SZ 的数据\n",
@@ -379,7 +334,7 @@
"成功获取 000811.SZ 的数据\n",
"成功获取 000812.SZ 的数据\n",
"成功获取 000813.SZ 的数据\n",
"已调用300次API等待 40.78 秒以满足速率限制...\n",
"已调用300次API等待 38.14 秒以满足速率限制...\n",
"成功获取 000815.SZ 的数据\n",
"成功获取 000816.SZ 的数据\n",
"成功获取 000818.SZ 的数据\n",
@@ -530,7 +485,7 @@
"成功获取 001238.SZ 的数据\n",
"成功获取 001239.SZ 的数据\n",
"成功获取 001255.SZ 的数据\n",
"已调用300次API等待 40.77 秒以满足速率限制...\n",
"已调用300次API等待 38.70 秒以满足速率限制...\n",
"成功获取 001256.SZ 的数据\n",
"成功获取 001258.SZ 的数据\n",
"成功获取 001259.SZ 的数据\n",
@@ -681,7 +636,7 @@
"成功获取 002085.SZ 的数据\n",
"成功获取 002086.SZ 的数据\n",
"成功获取 002088.SZ 的数据\n",
"已调用300次API等待 40.70 秒以满足速率限制...\n",
"已调用300次API等待 38.23 秒以满足速率限制...\n",
"成功获取 002090.SZ 的数据\n",
"成功获取 002091.SZ 的数据\n",
"成功获取 002092.SZ 的数据\n",
@@ -832,7 +787,7 @@
"成功获取 002242.SZ 的数据\n",
"成功获取 002243.SZ 的数据\n",
"成功获取 002244.SZ 的数据\n",
"已调用300次API等待 40.20 秒以满足速率限制...\n",
"已调用300次API等待 38.48 秒以满足速率限制...\n",
"成功获取 002245.SZ 的数据\n",
"成功获取 002246.SZ 的数据\n",
"成功获取 002247.SZ 的数据\n",
@@ -983,7 +938,7 @@
"成功获取 002400.SZ 的数据\n",
"成功获取 002401.SZ 的数据\n",
"成功获取 002402.SZ 的数据\n",
"已调用300次API等待 40.84 秒以满足速率限制...\n",
"已调用300次API等待 38.28 秒以满足速率限制...\n",
"成功获取 002403.SZ 的数据\n",
"成功获取 002404.SZ 的数据\n",
"成功获取 002405.SZ 的数据\n",
@@ -1134,7 +1089,7 @@
"成功获取 002566.SZ 的数据\n",
"成功获取 002567.SZ 的数据\n",
"成功获取 002568.SZ 的数据\n",
"已调用300次API等待 41.66 秒以满足速率限制...\n",
"已调用300次API等待 38.10 秒以满足速率限制...\n",
"成功获取 002569.SZ 的数据\n",
"成功获取 002570.SZ 的数据\n",
"成功获取 002571.SZ 的数据\n",
@@ -1285,7 +1240,7 @@
"成功获取 002729.SZ 的数据\n",
"成功获取 002730.SZ 的数据\n",
"成功获取 002731.SZ 的数据\n",
"已调用300次API等待 40.74 秒以满足速率限制...\n",
"已调用300次API等待 39.07 秒以满足速率限制...\n",
"成功获取 002732.SZ 的数据\n",
"成功获取 002733.SZ 的数据\n",
"成功获取 002734.SZ 的数据\n",
@@ -1436,7 +1391,7 @@
"成功获取 002896.SZ 的数据\n",
"成功获取 002897.SZ 的数据\n",
"成功获取 002898.SZ 的数据\n",
"已调用300次API等待 41.14 秒以满足速率限制...\n",
"已调用300次API等待 38.58 秒以满足速率限制...\n",
"成功获取 002899.SZ 的数据\n",
"成功获取 002900.SZ 的数据\n",
"成功获取 002901.SZ 的数据\n",
@@ -1587,7 +1542,7 @@
"成功获取 300014.SZ 的数据\n",
"成功获取 300015.SZ 的数据\n",
"成功获取 300016.SZ 的数据\n",
"已调用300次API等待 40.57 秒以满足速率限制...\n",
"已调用300次API等待 39.18 秒以满足速率限制...\n",
"成功获取 300017.SZ 的数据\n",
"成功获取 300018.SZ 的数据\n",
"成功获取 300019.SZ 的数据\n",
@@ -1738,7 +1693,7 @@
"成功获取 300174.SZ 的数据\n",
"成功获取 300175.SZ 的数据\n",
"成功获取 300176.SZ 的数据\n",
"已调用300次API等待 41.05 秒以满足速率限制...\n",
"已调用300次API等待 38.05 秒以满足速率限制...\n",
"成功获取 300177.SZ 的数据\n",
"成功获取 300179.SZ 的数据\n",
"成功获取 300180.SZ 的数据\n",
@@ -1889,7 +1844,7 @@
"成功获取 300337.SZ 的数据\n",
"成功获取 300338.SZ 的数据\n",
"成功获取 300339.SZ 的数据\n",
"已调用300次API等待 40.69 秒以满足速率限制...\n",
"已调用300次API等待 38.83 秒以满足速率限制...\n",
"成功获取 300340.SZ 的数据\n",
"成功获取 300341.SZ 的数据\n",
"成功获取 300342.SZ 的数据\n",
@@ -2040,7 +1995,7 @@
"成功获取 300494.SZ 的数据\n",
"成功获取 300496.SZ 的数据\n",
"成功获取 300497.SZ 的数据\n",
"已调用300次API等待 40.51 秒以满足速率限制...\n",
"已调用300次API等待 38.36 秒以满足速率限制...\n",
"成功获取 300498.SZ 的数据\n",
"成功获取 300499.SZ 的数据\n",
"成功获取 300500.SZ 的数据\n",
@@ -2191,7 +2146,7 @@
"成功获取 300650.SZ 的数据\n",
"成功获取 300651.SZ 的数据\n",
"成功获取 300652.SZ 的数据\n",
"已调用300次API等待 39.15 秒以满足速率限制...\n",
"已调用300次API等待 39.00 秒以满足速率限制...\n",
"成功获取 300653.SZ 的数据\n",
"成功获取 300654.SZ 的数据\n",
"成功获取 300655.SZ 的数据\n",
@@ -2342,7 +2297,7 @@
"成功获取 300810.SZ 的数据\n",
"成功获取 300811.SZ 的数据\n",
"成功获取 300812.SZ 的数据\n",
"已调用300次API等待 38.87 秒以满足速率限制...\n",
"已调用300次API等待 39.10 秒以满足速率限制...\n",
"成功获取 300813.SZ 的数据\n",
"成功获取 300814.SZ 的数据\n",
"成功获取 300815.SZ 的数据\n",
@@ -2493,7 +2448,7 @@
"成功获取 300966.SZ 的数据\n",
"成功获取 300967.SZ 的数据\n",
"成功获取 300968.SZ 的数据\n",
"已调用300次API等待 40.54 秒以满足速率限制...\n",
"已调用300次API等待 38.14 秒以满足速率限制...\n",
"成功获取 300969.SZ 的数据\n",
"成功获取 300970.SZ 的数据\n",
"成功获取 300971.SZ 的数据\n",
@@ -2644,7 +2599,7 @@
"成功获取 301128.SZ 的数据\n",
"成功获取 301129.SZ 的数据\n",
"成功获取 301130.SZ 的数据\n",
"已调用300次API等待 41.03 秒以满足速率限制...\n",
"已调用300次API等待 38.08 秒以满足速率限制...\n",
"成功获取 301131.SZ 的数据\n",
"成功获取 301132.SZ 的数据\n",
"成功获取 301133.SZ 的数据\n",
@@ -2795,7 +2750,7 @@
"成功获取 301313.SZ 的数据\n",
"成功获取 301314.SZ 的数据\n",
"成功获取 301315.SZ 的数据\n",
"已调用300次API等待 40.99 秒以满足速率限制...\n",
"已调用300次API等待 38.67 秒以满足速率限制...\n",
"成功获取 301316.SZ 的数据\n",
"成功获取 301317.SZ 的数据\n",
"成功获取 301318.SZ 的数据\n",
@@ -2946,7 +2901,7 @@
"成功获取 301618.SZ 的数据\n",
"成功获取 301622.SZ 的数据\n",
"成功获取 301626.SZ 的数据\n",
"已调用300次API等待 41.17 秒以满足速率限制...\n",
"已调用300次API等待 39.59 秒以满足速率限制...\n",
"成功获取 301628.SZ 的数据\n",
"成功获取 301631.SZ 的数据\n",
"成功获取 301633.SZ 的数据\n",
@@ -3097,7 +3052,7 @@
"成功获取 600170.SH 的数据\n",
"成功获取 600171.SH 的数据\n",
"成功获取 600172.SH 的数据\n",
"已调用300次API等待 40.74 秒以满足速率限制...\n",
"已调用300次API等待 38.63 秒以满足速率限制...\n",
"成功获取 600173.SH 的数据\n",
"成功获取 600176.SH 的数据\n",
"成功获取 600177.SH 的数据\n",
@@ -3248,7 +3203,7 @@
"成功获取 600366.SH 的数据\n",
"成功获取 600367.SH 的数据\n",
"成功获取 600368.SH 的数据\n",
"已调用300次API等待 41.16 秒以满足速率限制...\n",
"已调用300次API等待 38.00 秒以满足速率限制...\n",
"成功获取 600369.SH 的数据\n",
"成功获取 600370.SH 的数据\n",
"成功获取 600371.SH 的数据\n",
@@ -3399,7 +3354,7 @@
"成功获取 600572.SH 的数据\n",
"成功获取 600573.SH 的数据\n",
"成功获取 600575.SH 的数据\n",
"已调用300次API等待 40.45 秒以满足速率限制...\n",
"已调用300次API等待 36.61 秒以满足速率限制...\n",
"成功获取 600576.SH 的数据\n",
"成功获取 600577.SH 的数据\n",
"成功获取 600578.SH 的数据\n",
@@ -3550,7 +3505,7 @@
"成功获取 600748.SH 的数据\n",
"成功获取 600749.SH 的数据\n",
"成功获取 600750.SH 的数据\n",
"已调用300次API等待 41.00 秒以满足速率限制...\n",
"已调用300次API等待 38.88 秒以满足速率限制...\n",
"成功获取 600751.SH 的数据\n",
"成功获取 600753.SH 的数据\n",
"成功获取 600754.SH 的数据\n",
@@ -3701,7 +3656,7 @@
"成功获取 600956.SH 的数据\n",
"成功获取 600958.SH 的数据\n",
"成功获取 600959.SH 的数据\n",
"已调用300次API等待 41.08 秒以满足速率限制...\n",
"已调用300次API等待 38.49 秒以满足速率限制...\n",
"成功获取 600960.SH 的数据\n",
"成功获取 600961.SH 的数据\n",
"成功获取 600962.SH 的数据\n",
@@ -3852,7 +3807,7 @@
"成功获取 601519.SH 的数据\n",
"成功获取 601528.SH 的数据\n",
"成功获取 601555.SH 的数据\n",
"已调用300次API等待 41.02 秒以满足速率限制...\n",
"已调用300次API等待 38.62 秒以满足速率限制...\n",
"成功获取 601566.SH 的数据\n",
"成功获取 601567.SH 的数据\n",
"成功获取 601568.SH 的数据\n",
@@ -4003,7 +3958,7 @@
"成功获取 603041.SH 的数据\n",
"成功获取 603042.SH 的数据\n",
"成功获取 603043.SH 的数据\n",
"已调用300次API等待 40.67 秒以满足速率限制...\n",
"已调用300次API等待 38.79 秒以满足速率限制...\n",
"成功获取 603045.SH 的数据\n",
"成功获取 603048.SH 的数据\n",
"成功获取 603050.SH 的数据\n",
@@ -4154,7 +4109,7 @@
"成功获取 603228.SH 的数据\n",
"成功获取 603229.SH 的数据\n",
"成功获取 603230.SH 的数据\n",
"已调用300次API等待 41.24 秒以满足速率限制...\n",
"已调用300次API等待 39.75 秒以满足速率限制...\n",
"成功获取 603231.SH 的数据\n",
"成功获取 603232.SH 的数据\n",
"成功获取 603233.SH 的数据\n",
@@ -4305,7 +4260,7 @@
"成功获取 603530.SH 的数据\n",
"成功获取 603533.SH 的数据\n",
"成功获取 603535.SH 的数据\n",
"已调用300次API等待 40.73 秒以满足速率限制...\n",
"已调用300次API等待 38.97 秒以满足速率限制...\n",
"成功获取 603536.SH 的数据\n",
"成功获取 603538.SH 的数据\n",
"成功获取 603551.SH 的数据\n",
@@ -4456,7 +4411,7 @@
"成功获取 603819.SH 的数据\n",
"成功获取 603822.SH 的数据\n",
"成功获取 603823.SH 的数据\n",
"已调用300次API等待 41.30 秒以满足速率限制...\n",
"已调用300次API等待 39.13 秒以满足速率限制...\n",
"成功获取 603825.SH 的数据\n",
"成功获取 603826.SH 的数据\n",
"成功获取 603828.SH 的数据\n",
@@ -4607,7 +4562,7 @@
"成功获取 605167.SH 的数据\n",
"成功获取 605168.SH 的数据\n",
"成功获取 605169.SH 的数据\n",
"已调用300次API等待 40.75 秒以满足速率限制...\n",
"已调用300次API等待 39.25 秒以满足速率限制...\n",
"成功获取 605177.SH 的数据\n",
"成功获取 605178.SH 的数据\n",
"成功获取 605179.SH 的数据\n",
@@ -4758,7 +4713,7 @@
"成功获取 688097.SH 的数据\n",
"成功获取 688098.SH 的数据\n",
"成功获取 688099.SH 的数据\n",
"已调用300次API等待 41.17 秒以满足速率限制...\n",
"已调用300次API等待 38.88 秒以满足速率限制...\n",
"成功获取 688100.SH 的数据\n",
"成功获取 688101.SH 的数据\n",
"成功获取 688102.SH 的数据\n",
@@ -4909,7 +4864,7 @@
"成功获取 688271.SH 的数据\n",
"成功获取 688272.SH 的数据\n",
"成功获取 688273.SH 的数据\n",
"已调用300次API等待 41.28 秒以满足速率限制...\n",
"已调用300次API等待 35.24 秒以满足速率限制...\n",
"成功获取 688275.SH 的数据\n",
"成功获取 688276.SH 的数据\n",
"成功获取 688277.SH 的数据\n",
@@ -5060,7 +5015,7 @@
"成功获取 688486.SH 的数据\n",
"成功获取 688488.SH 的数据\n",
"成功获取 688489.SH 的数据\n",
"已调用300次API等待 41.23 秒以满足速率限制...\n",
"已调用300次API等待 37.62 秒以满足速率限制...\n",
"成功获取 688496.SH 的数据\n",
"成功获取 688498.SH 的数据\n",
"成功获取 688499.SH 的数据\n",
@@ -5211,7 +5166,7 @@
"成功获取 688689.SH 的数据\n",
"成功获取 688690.SH 的数据\n",
"成功获取 688691.SH 的数据\n",
"已调用300次API等待 40.17 秒以满足速率限制...\n",
"已调用300次API等待 39.35 秒以满足速率限制...\n",
"成功获取 688692.SH 的数据\n",
"成功获取 688693.SH 的数据\n",
"成功获取 688695.SH 的数据\n",
@@ -5362,7 +5317,7 @@
"成功获取 835184.BJ 的数据\n",
"成功获取 835185.BJ 的数据\n",
"成功获取 835207.BJ 的数据\n",
"已调用300次API等待 41.36 秒以满足速率限制...\n",
"已调用300次API等待 39.39 秒以满足速率限制...\n",
"成功获取 835237.BJ 的数据\n",
"成功获取 835305.BJ 的数据\n",
"成功获取 835368.BJ 的数据\n",
@@ -5513,7 +5468,7 @@
"成功获取 000005.SZ 的数据\n",
"成功获取 000013.SZ 的数据\n",
"成功获取 000015.SZ 的数据\n",
"已调用300次API等待 40.98 秒以满足速率限制...\n",
"已调用300次API等待 38.64 秒以满足速率限制...\n",
"成功获取 000018.SZ 的数据\n",
"成功获取 000023.SZ 的数据\n",
"成功获取 000024.SZ 的数据\n",
@@ -5664,7 +5619,7 @@
"成功获取 300309.SZ 的数据\n",
"成功获取 300312.SZ 的数据\n",
"成功获取 300325.SZ 的数据\n",
"已调用300次API等待 40.90 秒以满足速率限制...\n",
"已调用300次API等待 39.83 秒以满足速率限制...\n",
"成功获取 300330.SZ 的数据\n",
"成功获取 300336.SZ 的数据\n",
"成功获取 300356.SZ 的数据\n",
@@ -5806,14 +5761,60 @@
"2 000001.SZ 深发展A 20070620 20120801 完成股改\n",
"3 000001.SZ 深发展A 20070620 20120801 完成股改\n",
"4 000001.SZ S深发展A 20061009 20070619 未股改加S\n",
"名称变化记录总数: 31934\n"
"名称变化记录总数: 32258\n"
]
}
],
"execution_count": 2
"source": [
"from tqdm import tqdm\n",
"import pandas as pd\n",
"import time\n",
"\n",
"# 读取本地保存的股票列表 CSV 文件(假设文件名为 stocks_data.csv\n",
"stocks_df = pd.read_csv('../../stocks_list.csv', encoding='utf-8-sig')\n",
"\n",
"# 用于存放所有股票的日线数据(每次获取的 DataFrame\n",
"name_change_data_list = []\n",
"\n",
"# API 调用计数和时间控制变量\n",
"api_call_count = 0\n",
"batch_start_time = time.time()\n",
"\n",
"# 循环遍历每个股票代码并获取数据\n",
"for idx, row in stocks_df.iterrows():\n",
" ts_code = row['ts_code'] # 假设股票代码列名为 ts_code\n",
" try:\n",
" # 调用 tushare 接口获取该股票自 2017 年以来的日线数据\n",
" name_change_data = pro.namechange(ts_code=ts_code, fields='ts_code,name,start_date,end_date,change_reason')\n",
" # 如果返回数据不为空,则添加一列标识股票代码\n",
" if not name_change_data.empty:\n",
" name_change_data_list.append(name_change_data)\n",
" print(f\"成功获取 {ts_code} 的数据\")\n",
" except Exception as e:\n",
" print(f\"获取 {ts_code} 数据时出错: {e}\")\n",
"\n",
" # 计数一次 API 调用\n",
" api_call_count += 1\n",
"\n",
" # 每调用300次检查时间是否少于1分钟如果少于则等待剩余时间\n",
" if api_call_count % 150 == 0:\n",
" elapsed = time.time() - batch_start_time\n",
" if elapsed < 60:\n",
" sleep_time = 60 - elapsed\n",
" print(f\"已调用300次API等待 {sleep_time:.2f} 秒以满足速率限制...\")\n",
" time.sleep(sleep_time)\n",
" # 重置批次起始时间\n",
" batch_start_time = time.time()\n",
"\n",
"name_change_df = pd.concat(name_change_data_list, ignore_index=True)\n",
"# 输出部分结果\n",
"print(name_change_df.head())\n",
"print(f\"名称变化记录总数: {len(name_change_df)}\")\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "4d5524b8-2a90-44bb-b5ef-e59cfa232ff0",
"metadata": {
"ExecuteTime": {
@@ -5821,14 +5822,6 @@
"start_time": "2025-03-01T09:56:42.431891Z"
}
},
"source": [
"# 合并所有获取到的日线数据\n",
"if True:\n",
" name_change_df.to_hdf('../../data/name_change.h5', key='name_change', mode='w', format='table')\n",
" print(\"所有日线数据已保存到 daily_data.h5\")\n",
"else:\n",
" print(\"未获取到任何日线数据。\")"
],
"outputs": [
{
"name": "stdout",
@@ -5838,10 +5831,18 @@
]
}
],
"execution_count": 3
"source": [
"# 合并所有获取到的日线数据\n",
"if True:\n",
" name_change_df.to_hdf('../../data/name_change.h5', key='name_change', mode='w', format='table')\n",
" print(\"所有日线数据已保存到 daily_data.h5\")\n",
"else:\n",
" print(\"未获取到任何日线数据。\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "1e920791-e8de-4a51-a39b-283f54132b44",
"metadata": {
"ExecuteTime": {
@@ -5849,9 +5850,6 @@
"start_time": "2025-03-01T09:56:42.545392Z"
}
},
"source": [
"print(name_change_df.head())"
],
"outputs": [
{
"name": "stdout",
@@ -5866,10 +5864,13 @@
]
}
],
"execution_count": 4
"source": [
"print(name_change_df.head())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4f5651f7-0910-4df5-9c3f-79d6ce033d53",
"metadata": {
"ExecuteTime": {
@@ -5877,14 +5878,13 @@
"start_time": "2025-03-01T09:56:42.569013Z"
}
},
"source": [],
"outputs": [],
"execution_count": null
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "new_trader",
"language": "python",
"name": "python3"
},
@@ -5898,7 +5898,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.19"
"version": "3.11.11"
}
},
"nbformat": 4,

View File

@@ -2,6 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "f74ce078-f7e8-4733-a14c-14d8815a3626",
"metadata": {
"ExecuteTime": {
@@ -9,16 +10,16 @@
"start_time": "2025-04-09T14:57:33.903794Z"
}
},
"outputs": [],
"source": [
"import tushare as ts\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
],
"outputs": [],
"execution_count": 1
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "44dd8d87-e60b-49e5-aed9-efaa7f92d4fe",
"metadata": {
"ExecuteTime": {
@@ -26,6 +27,30 @@
"start_time": "2025-04-09T14:57:34.666469Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"0 000001.SZ 20250312\n",
"1 000002.SZ 20250312\n",
"2 000004.SZ 20250312\n",
"3 000006.SZ 20250312\n",
"4 000007.SZ 20250312\n",
"... ... ...\n",
"43070 920108.BJ 20250421\n",
"43071 920111.BJ 20250421\n",
"43072 920116.BJ 20250421\n",
"43073 920118.BJ 20250421\n",
"43074 920128.BJ 20250421\n",
"\n",
"[7648931 rows x 2 columns]\n",
"20250430\n",
"start_date: 20250506\n"
]
}
],
"source": [
"import pandas as pd\n",
"import time\n",
@@ -39,40 +64,16 @@
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250620')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(f'start_date: {start_date}')"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"0 000001.SZ 20250312\n",
"1 000002.SZ 20250312\n",
"2 000004.SZ 20250312\n",
"3 000006.SZ 20250312\n",
"4 000007.SZ 20250312\n",
"... ... ...\n",
"5387 920108.BJ 20250408\n",
"5388 920111.BJ 20250408\n",
"5389 920116.BJ 20250408\n",
"5390 920118.BJ 20250408\n",
"5391 920128.BJ 20250408\n",
"\n",
"[7562721 rows x 2 columns]\n",
"20250408\n",
"start_date: 20250409\n"
]
}
],
"execution_count": 2
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "747acc47-0884-4f76-90fb-276f6494e31d",
"metadata": {
"ExecuteTime": {
@@ -80,6 +81,47 @@
"start_time": "2025-04-09T14:57:42.232250Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250619 完成\n",
"任务 20250620 完成\n",
"任务 20250618 完成\n",
"任务 20250617 完成\n",
"任务 20250613 完成\n",
"任务 20250616 完成\n",
"任务 20250611 完成\n",
"任务 20250612 完成\n",
"任务 20250610 完成\n",
"任务 20250609 完成\n",
"任务 20250606 完成\n",
"任务 20250605 完成\n",
"任务 20250604 完成\n",
"任务 20250603 完成\n",
"任务 20250529 完成\n",
"任务 20250530 完成\n",
"任务 20250528 完成\n",
"任务 20250527 完成\n",
"任务 20250526 完成\n",
"任务 20250523 完成\n",
"任务 20250522 完成\n",
"任务 20250521 完成\n",
"任务 20250519 完成\n",
"任务 20250520 完成\n",
"任务 20250516 完成\n",
"任务 20250515 完成\n",
"任务 20250514 完成\n",
"任务 20250513 完成\n",
"任务 20250512 完成\n",
"任务 20250509 完成\n",
"任务 20250508 完成\n",
"任务 20250507 完成\n",
"任务 20250506 完成\n"
]
}
],
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
@@ -109,27 +151,11 @@
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250418 完成\n",
"任务 20250417 完成\n",
"任务 20250416 完成\n",
"任务 20250414 完成\n",
"任务 20250415 完成\n",
"任务 20250411 完成\n",
"任务 20250410 完成\n",
"任务 20250409 完成\n"
]
}
],
"execution_count": 3
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "c6765638-481f-40d8-a259-2e7b25362618",
"metadata": {
"ExecuteTime": {
@@ -137,16 +163,6 @@
"start_time": "2025-04-09T14:57:45.698824Z"
}
},
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
"\n",
"# 将所有数据合并为一个 DataFrame\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key=key, mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
],
"outputs": [
{
"name": "stdout",
@@ -156,12 +172,21 @@
]
}
],
"execution_count": 4
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
"\n",
"# 将所有数据合并为一个 DataFrame\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key=key, mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "new_trader",
"language": "python",
"name": "python3"
},

View File

@@ -2,6 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "f74ce078-f7e8-4733-a14c-14d8815a3626",
"metadata": {
"ExecuteTime": {
@@ -9,16 +10,16 @@
"start_time": "2025-04-09T14:57:34.837095Z"
}
},
"outputs": [],
"source": [
"import tushare as ts\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
],
"outputs": [],
"execution_count": 1
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "44dd8d87-e60b-49e5-aed9-efaa7f92d4fe",
"metadata": {
"ExecuteTime": {
@@ -26,6 +27,30 @@
"start_time": "2025-04-09T14:57:35.854308Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"0 801001.SI 20250221\n",
"1 801002.SI 20250221\n",
"2 801003.SI 20250221\n",
"3 801005.SI 20250221\n",
"4 801010.SI 20250221\n",
"... ... ...\n",
"3507 859811.SI 20250421\n",
"3508 859821.SI 20250421\n",
"3509 859822.SI 20250421\n",
"3510 859852.SI 20250421\n",
"3511 859951.SI 20250421\n",
"\n",
"[1065026 rows x 2 columns]\n",
"20250430\n",
"start_date: 20250506\n"
]
}
],
"source": [
"import pandas as pd\n",
"import time\n",
@@ -39,40 +64,16 @@
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250620')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(f'start_date: {start_date}')"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"0 801001.SI 20250221\n",
"1 801002.SI 20250221\n",
"2 801003.SI 20250221\n",
"3 801005.SI 20250221\n",
"4 801010.SI 20250221\n",
".. ... ...\n",
"434 859811.SI 20250408\n",
"435 859821.SI 20250408\n",
"436 859822.SI 20250408\n",
"437 859852.SI 20250408\n",
"438 859951.SI 20250408\n",
"\n",
"[1058002 rows x 2 columns]\n",
"20250408\n",
"start_date: 20250409\n"
]
}
],
"execution_count": 2
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "747acc47-0884-4f76-90fb-276f6494e31d",
"metadata": {
"ExecuteTime": {
@@ -80,6 +81,47 @@
"start_time": "2025-04-09T14:57:38.104541Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250619 完成\n",
"任务 20250620 完成\n",
"任务 20250618 完成\n",
"任务 20250617 完成\n",
"任务 20250616 完成\n",
"任务 20250613 完成\n",
"任务 20250611 完成\n",
"任务 20250612 完成\n",
"任务 20250610 完成\n",
"任务 20250609 完成\n",
"任务 20250606 完成\n",
"任务 20250605 完成\n",
"任务 20250603 完成\n",
"任务 20250604 完成\n",
"任务 20250530 完成\n",
"任务 20250529 完成\n",
"任务 20250528 完成\n",
"任务 20250527 完成\n",
"任务 20250526 完成\n",
"任务 20250523 完成\n",
"任务 20250522 完成\n",
"任务 20250521 完成\n",
"任务 20250520 完成\n",
"任务 20250519 完成\n",
"任务 20250516 完成\n",
"任务 20250515 完成\n",
"任务 20250514 完成\n",
"任务 20250513 完成\n",
"任务 20250512 完成\n",
"任务 20250509 完成\n",
"任务 20250508 完成\n",
"任务 20250507 完成\n",
"任务 20250506 完成\n"
]
}
],
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
@@ -109,27 +151,11 @@
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250417 完成\n",
"任务 20250418 完成\n",
"任务 20250415 完成\n",
"任务 20250416 完成\n",
"任务 20250414 完成\n",
"任务 20250411 完成\n",
"任务 20250410 完成\n",
"任务 20250409 完成\n"
]
}
],
"execution_count": 3
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "c6765638-481f-40d8-a259-2e7b25362618",
"metadata": {
"ExecuteTime": {
@@ -137,16 +163,6 @@
"start_time": "2025-04-09T14:57:40.773783Z"
}
},
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
"\n",
"# 将所有数据合并为一个 DataFrame\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key=key, mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
],
"outputs": [
{
"name": "stdout",
@@ -156,12 +172,21 @@
]
}
],
"execution_count": 4
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
"\n",
"# 将所有数据合并为一个 DataFrame\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key=key, mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "new_trader",
"language": "python",
"name": "python3"
},

View File

@@ -2,6 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "18d1d622-b083-4cc4-a6f8-7c1ed2d0edd2",
"metadata": {
"ExecuteTime": {
@@ -9,16 +10,16 @@
"start_time": "2025-04-09T14:57:36.159612Z"
}
},
"outputs": [],
"source": [
"import tushare as ts\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
],
"outputs": [],
"execution_count": 1
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "14671a7f72de2564",
"metadata": {
"ExecuteTime": {
@@ -26,6 +27,7 @@
"start_time": "2025-04-09T14:57:36.918051Z"
}
},
"outputs": [],
"source": [
"from datetime import datetime\n",
"import pandas as pd\n",
@@ -70,15 +72,15 @@
"name_change_dict = {}\n",
"for ts_code, group in name_change_df.groupby('ts_code'):\n",
" # 只保留 'ST' 和 '*ST' 的记录\n",
" st_data = group[(group['change_reason'] == 'ST') | (group['change_reason'] == '*ST')]\n",
" # st_data = group[(group['change_reason'] == 'ST') | (group['change_reason'] == '*ST')]\n",
" st_data = group[group['name'].str.contains('ST')]\n",
" if not st_data.empty:\n",
" name_change_dict[ts_code] = filter_rows(st_data)"
],
"outputs": [],
"execution_count": 2
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "e7f8cce2f80e2f20",
"metadata": {
"ExecuteTime": {
@@ -86,6 +88,26 @@
"start_time": "2025-04-09T14:57:39.339423Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8599138 entries, 0 to 8599137\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 196.8+ MB\n",
"None\n",
"20250430\n",
"20250506\n"
]
}
],
"source": [
"import time\n",
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
@@ -99,44 +121,85 @@
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250720')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(start_date)"
],
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "553cfb36-f560-4cc4-b2bc-68323ccc5072",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:16.817010Z",
"start_time": "2025-04-09T14:58:09.326485Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8512911 entries, 0 to 5391\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 194.8+ MB\n",
"None\n",
"20250408\n",
"20250409\n"
"任务 20250718 完成\n",
"任务 20250717 完成\n",
"任务 20250715 完成\n",
"任务 20250716 完成\n",
"任务 20250711 完成\n",
"任务 20250714 完成\n",
"任务 20250709 完成\n",
"任务 20250710 完成\n",
"任务 20250707 完成\n",
"任务 20250708 完成\n",
"任务 20250704 完成\n",
"任务 20250703 完成\n",
"任务 20250702 完成\n",
"任务 20250701 完成\n",
"任务 20250630 完成\n",
"任务 20250627 完成\n",
"任务 20250626 完成\n",
"任务 20250625 完成\n",
"任务 20250624 完成\n",
"任务 20250623 完成\n",
"任务 20250619 完成\n",
"任务 20250620 完成\n",
"任务 20250618 完成\n",
"任务 20250617 完成\n",
"任务 20250616 完成\n",
"任务 20250613 完成\n",
"任务 20250612 完成\n",
"任务 20250611 完成\n",
"任务 20250610 完成\n",
"任务 20250609 完成\n",
"任务 20250606 完成\n",
"任务 20250605 完成\n",
"任务 20250604 完成\n",
"任务 20250603 完成\n",
"任务 20250530 完成\n",
"任务 20250529 完成\n",
"任务 20250528 完成\n",
"任务 20250527 完成\n",
"任务 20250526 完成\n",
"任务 20250523 完成\n",
"任务 20250522 完成\n",
"任务 20250521 完成\n",
"任务 20250520 完成\n",
"任务 20250519 完成\n",
"任务 20250516 完成\n",
"任务 20250515 完成\n",
"任务 20250514 完成\n",
"任务 20250513 完成\n",
"任务 20250512 完成\n",
"任务 20250509 完成\n",
"任务 20250508 完成\n",
"任务 20250507 完成\n",
"任务 20250506 完成\n"
]
}
],
"execution_count": 3
},
{
"cell_type": "code",
"id": "553cfb36-f560-4cc4-b2bc-68323ccc5072",
"metadata": {
"scrolled": true,
"ExecuteTime": {
"end_time": "2025-04-09T14:58:16.817010Z",
"start_time": "2025-04-09T14:58:09.326485Z"
}
},
"source": [
"\n",
"\n",
@@ -186,27 +249,11 @@
" # 重置批次起始时间\n",
" batch_start_time = time.time()\n",
"\n"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250418 完成\n",
"任务 20250417 完成\n",
"任务 20250416 完成\n",
"任务 20250415 完成\n",
"任务 20250414 完成\n",
"任务 20250411 完成\n",
"任务 20250410 完成\n",
"任务 20250409 完成\n"
]
}
],
"execution_count": 4
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "919023c693d7a47a",
"metadata": {
"ExecuteTime": {
@@ -214,75 +261,75 @@
"start_time": "2025-04-09T14:58:16.855084Z"
}
},
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
"print(all_daily_data_df)"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
"0 300285.SZ 20250409 16.61 2.1086 2.2506 \n",
"1 300458.SZ 20250409 44.48 9.9286 11.7046 \n",
"2 605090.SH 20250409 23.81 0.6834 1.1888 \n",
"3 688686.SH 20250409 69.52 1.6005 5.7492 \n",
"4 002057.SZ 20250409 7.18 4.7461 7.1088 \n",
"... ... ... ... ... ... \n",
"5390 301511.SZ 20250409 12.23 3.4040 4.6900 \n",
"5391 688355.SH 20250409 15.84 1.4154 4.4898 \n",
"5392 600019.SH 20250409 6.83 0.4729 1.2898 \n",
"5393 603507.SH 20250409 22.00 30.8936 42.4775 \n",
"5394 600886.SH 20250409 14.58 0.7795 2.4989 \n",
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
"0 002390.SZ 20250506 3.48 0.7696 1.3833 \n",
"1 300708.SZ 20250506 11.64 2.8994 3.2217 \n",
"2 301171.SZ 20250506 27.73 9.9120 10.7228 \n",
"3 301662.SZ 20250506 52.50 17.0926 17.0926 \n",
"4 001309.SZ 20250506 129.63 5.7123 6.3388 \n",
"... ... ... ... ... ... \n",
"5381 000551.SZ 20250506 12.39 2.0213 3.1432 \n",
"5382 600792.SH 20250506 3.17 0.8036 2.3531 \n",
"5383 300176.SZ 20250506 6.62 1.7530 2.5325 \n",
"5384 000016.SZ 20250506 5.57 13.9545 20.7669 \n",
"5385 300339.SZ 20250506 56.53 11.3184 11.9579 \n",
"\n",
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
"0 1.11 29.0985 27.1266 2.5144 4.2913 4.1010 0.6020 \n",
"1 1.54 168.9309 168.9309 9.3966 12.3119 12.3119 0.3364 \n",
"2 1.00 11.8377 9.0427 1.7135 0.5819 0.6421 3.2226 \n",
"3 1.18 43.8690 61.1222 2.9105 9.0031 9.2377 NaN \n",
"4 1.35 19.8304 29.3370 1.7625 1.9656 2.0487 3.2191 \n",
"... ... ... ... ... ... ... ... \n",
"5390 1.36 58.1209 NaN 1.9116 1.1803 1.1129 0.3212 \n",
"5391 1.31 133.9017 29.7427 1.8103 3.6805 3.1067 NaN \n",
"5392 1.28 12.5281 15.7915 0.7518 0.4344 0.4503 4.4796 \n",
"5393 2.89 22.7537 22.7537 1.6401 1.0276 1.0276 1.3553 \n",
"5394 1.04 17.4059 16.1402 1.8424 2.0579 1.9930 3.1604 \n",
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
"0 1.02 66.7242 80.7223 1.0020 1.1214 1.1483 2.5321 \n",
"1 1.14 40.4767 37.8935 2.9328 2.8689 2.7390 1.3334 \n",
"2 0.95 56.4451 55.0565 3.6159 5.1380 4.3691 0.4867 \n",
"3 0.79 20.2143 23.5423 2.7909 2.0091 2.2310 NaN \n",
"4 1.02 59.8205 243.9150 8.6523 4.3939 4.0221 0.0702 \n",
"... ... ... ... ... ... ... ... \n",
"5381 1.20 19.9692 18.7030 1.8602 1.1939 1.1927 0.5650 \n",
"5382 0.89 NaN NaN 1.1995 0.5271 0.5777 2.1767 \n",
"5383 1.12 92.1443 96.5538 2.7208 1.4839 1.4627 0.0000 \n",
"5384 3.66 NaN NaN 5.6643 1.2067 1.1979 0.0000 \n",
"5385 2.40 279.4392 270.1037 12.8967 13.2445 13.0061 0.0000 \n",
"\n",
" dv_ttm total_share float_share free_share total_mv \\\n",
"0 0.6020 9.970483e+04 8.039498e+04 75323.2612 1.656097e+06 \n",
"1 0.3364 6.332851e+04 5.179696e+04 43937.3622 2.816852e+06 \n",
"2 3.2226 6.492580e+04 6.426965e+04 36946.4646 1.545883e+06 \n",
"3 NaN 1.222355e+04 1.222355e+04 3402.7889 8.497809e+05 \n",
"4 3.2191 7.584828e+04 7.501396e+04 50081.8345 5.445906e+05 \n",
"... ... ... ... ... ... \n",
"5390 0.3212 6.303220e+04 3.736720e+04 27120.6014 7.708838e+05 \n",
"5391 NaN 1.239561e+04 1.239561e+04 3907.6756 1.963464e+05 \n",
"5392 4.4796 2.190864e+06 2.178208e+06 798651.6922 1.496360e+07 \n",
"5393 1.3553 1.843013e+04 1.843013e+04 13404.1045 4.054629e+05 \n",
"5394 3.1604 8.004494e+05 7.454180e+05 232532.2636 1.167055e+07 \n",
" dv_ttm total_share float_share free_share total_mv \\\n",
"0 2.5321 194385.1868 185230.5076 103045.2550 6.764605e+05 \n",
"1 1.3003 68015.2346 52260.4413 47031.2918 7.916973e+05 \n",
"2 0.4867 47188.5905 30877.5025 28542.8345 1.308540e+06 \n",
"3 NaN 8000.0000 1577.6325 1577.6325 4.200000e+05 \n",
"4 NaN 16177.0306 8763.6153 7897.4398 2.097028e+06 \n",
"... ... ... ... ... ... \n",
"5381 0.5650 40394.4205 40263.2044 25893.0990 5.004869e+05 \n",
"5382 2.1767 110992.3600 105986.8113 36194.3684 3.518458e+05 \n",
"5383 NaN 38728.0800 38728.0800 26808.2764 2.563799e+05 \n",
"5384 NaN 240794.5408 159659.3800 107284.6868 1.341226e+06 \n",
"5385 NaN 79641.0841 77768.6667 73609.4256 4.502110e+06 \n",
"\n",
" circ_mv is_st \n",
"0 1.335361e+06 False \n",
"1 2.303929e+06 False \n",
"2 1.530260e+06 False \n",
"3 8.497809e+05 False \n",
"4 5.386002e+05 False \n",
"0 6.446022e+05 False \n",
"1 6.083115e+05 False \n",
"2 8.562331e+05 False \n",
"3 8.282571e+04 False \n",
"4 1.136027e+06 False \n",
"... ... ... \n",
"5390 4.570009e+05 False \n",
"5391 1.963464e+05 False \n",
"5392 1.487716e+07 False \n",
"5393 4.054629e+05 False \n",
"5394 1.086819e+07 False \n",
"5381 4.988611e+05 False \n",
"5382 3.359782e+05 False \n",
"5383 2.563799e+05 False \n",
"5384 8.893027e+05 False \n",
"5385 4.396263e+06 False \n",
"\n",
"[5395 rows x 19 columns]\n"
"[5386 rows x 19 columns]\n"
]
}
],
"execution_count": 5
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
"print(all_daily_data_df)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "28cb78d032671b20",
"metadata": {
"ExecuteTime": {
@@ -290,74 +337,74 @@
"start_time": "2025-04-09T14:58:16.871184Z"
}
},
"source": [
"print(all_daily_data_df[all_daily_data_df['is_st']])"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
"85 002822.SZ 20250409 3.11 1.8467 1.9219 \n",
"123 603959.SH 20250409 3.27 1.7568 2.2420 \n",
"181 688282.SH 20250409 42.59 2.5546 3.0570 \n",
"259 600777.SH 20250409 2.66 1.9331 2.4597 \n",
"283 002052.SZ 20250409 6.15 1.5326 2.5481 \n",
"23 000820.SZ 20250506 2.04 11.8279 12.1552 \n",
"33 300506.SZ 20250506 3.27 0.6104 0.8597 \n",
"82 839680.BJ 20250506 7.25 34.6648 39.7153 \n",
"105 300159.SZ 20250506 1.83 3.6351 4.0740 \n",
"114 300301.SZ 20250506 1.82 1.3707 1.4819 \n",
"... ... ... ... ... ... \n",
"5286 002602.SZ 20250409 5.93 3.0376 3.5162 \n",
"5345 002501.SZ 20250409 1.89 4.3252 5.5834 \n",
"5364 600387.SH 20250409 2.34 0.0904 0.1163 \n",
"5366 002656.SZ 20250409 1.95 2.7047 3.0210 \n",
"5378 300013.SZ 20250409 3.57 2.8370 3.1107 \n",
"5259 600243.SH 20250506 2.43 6.7484 8.1172 \n",
"5264 002528.SZ 20250506 2.35 2.0592 4.3961 \n",
"5294 300044.SZ 20250506 3.31 12.8866 13.4490 \n",
"5324 300097.SZ 20250506 4.36 2.5814 3.0107 \n",
"5345 600200.SH 20250506 3.04 0.2013 0.2433 \n",
"\n",
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
"85 2.59 NaN NaN 1.2023 0.5923 0.7314 0.0 \n",
"123 2.22 NaN NaN 4.3282 0.7749 1.1811 0.0 \n",
"181 1.07 NaN NaN 2.9277 172.3150 21.9335 NaN \n",
"259 0.96 6.9694 7.6204 0.8381 2.0443 2.0567 0.0 \n",
"283 0.74 NaN NaN NaN 19.5551 17.1988 0.0 \n",
"... ... ... ... ... ... ... ... \n",
"5286 3.30 84.3318 49.2129 1.6993 3.3267 2.3228 0.0 \n",
"5345 1.75 NaN NaN 7.0441 14.0701 19.7111 0.0 \n",
"5364 1.33 NaN NaN 0.3818 0.5148 0.8454 0.0 \n",
"5366 1.75 NaN NaN 3.8456 4.7986 5.9354 0.0 \n",
"5378 0.90 NaN NaN 8.2438 4.8281 4.2666 0.0 \n",
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
"23 3.99 NaN NaN 9.0141 10.6452 13.5427 0.0 \n",
"33 0.77 NaN NaN 28.5038 19.4588 19.2499 0.0 \n",
"82 1.96 NaN NaN 7.4242 9.3299 11.0451 NaN \n",
"105 1.34 NaN NaN NaN 4.1337 4.1261 0.0 \n",
"114 1.22 NaN NaN 120.9449 2.9900 3.1074 0.0 \n",
"... ... ... ... ... ... ... ... \n",
"5259 0.73 NaN NaN 1.6685 4.5071 4.6210 0.0 \n",
"5264 1.52 NaN NaN 15.5269 2.9812 3.6083 0.0 \n",
"5294 2.91 NaN NaN 24.3171 17.6463 26.1361 0.0 \n",
"5324 0.99 NaN NaN 2.7137 3.2758 3.8102 0.0 \n",
"5345 0.05 30.7156 NaN 1.2351 1.3543 1.7858 0.0 \n",
"\n",
" dv_ttm total_share float_share free_share total_mv \\\n",
"85 NaN 73467.1821 56245.3696 54046.3738 2.284829e+05 \n",
"123 NaN 49029.8992 49029.8992 38419.3842 1.603278e+05 \n",
"181 NaN 8800.0000 3652.0000 3051.8414 3.747920e+05 \n",
"259 NaN 680049.5825 636615.2391 500325.8436 1.808932e+06 \n",
"283 NaN 74595.9694 74595.5944 44867.2806 4.587652e+05 \n",
"... ... ... ... ... ... \n",
"5286 NaN 745255.6968 687870.8273 594244.1179 4.419366e+06 \n",
"5345 NaN 355000.0000 354999.9006 274999.9006 6.709500e+05 \n",
"5364 NaN 46814.4464 40404.8492 31411.4405 1.095458e+05 \n",
"5366 NaN 71251.9844 60945.7555 54564.8212 1.389414e+05 \n",
"5378 NaN 55835.8894 44606.0865 40680.8215 1.993341e+05 \n",
" dv_ttm total_share float_share free_share total_mv circ_mv \\\n",
"23 NaN 64362.0201 29403.1899 28611.4718 131298.5210 59982.5074 \n",
"33 NaN 69559.6569 57572.5450 40880.9749 227460.0781 188262.2222 \n",
"82 NaN 6699.9900 4689.3344 4093.0077 48574.9275 33997.6744 \n",
"105 NaN 150196.5923 147183.9203 131325.6306 274859.7639 269346.5741 \n",
"114 NaN 82986.8769 78987.6719 73061.8561 151036.1160 143757.5629 \n",
"... ... ... ... ... ... ... \n",
"5259 NaN 43885.0000 43885.0000 36485.0000 106640.5500 106640.5500 \n",
"5264 NaN 119867.5082 104974.0608 49171.2582 281688.6443 246689.0429 \n",
"5294 NaN 76386.9228 76375.7508 73182.1277 252840.7145 252803.7351 \n",
"5324 NaN 28854.9669 27000.9948 23150.5534 125807.6557 117724.3373 \n",
"5345 NaN 71215.1832 71087.9480 58808.3718 216494.1569 216107.3619 \n",
"\n",
" circ_mv is_st \n",
"85 1.749231e+05 True \n",
"123 1.603278e+05 True \n",
"181 1.555387e+05 True \n",
"259 1.693397e+06 True \n",
"283 4.587629e+05 True \n",
"... ... ... \n",
"5286 4.079074e+06 True \n",
"5345 6.709498e+05 True \n",
"5364 9.454735e+04 True \n",
"5366 1.188442e+05 True \n",
"5378 1.592437e+05 True \n",
" is_st \n",
"23 True \n",
"33 True \n",
"82 True \n",
"105 True \n",
"114 True \n",
"... ... \n",
"5259 True \n",
"5264 True \n",
"5294 True \n",
"5324 True \n",
"5345 True \n",
"\n",
"[106 rows x 19 columns]\n"
"[196 rows x 19 columns]\n"
]
}
],
"execution_count": 6
"source": [
"print(all_daily_data_df[all_daily_data_df['is_st']])"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "692b58674b7462c9",
"metadata": {
"ExecuteTime": {
@@ -365,12 +412,6 @@
"start_time": "2025-04-09T14:58:16.903459Z"
}
},
"source": [
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='daily_basic', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")\n"
],
"outputs": [
{
"name": "stdout",
@@ -380,10 +421,16 @@
]
}
],
"execution_count": 7
"source": [
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='daily_basic', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "d7a773fc20293477",
"metadata": {
"ExecuteTime": {
@@ -391,18 +438,13 @@
"start_time": "2025-04-09T14:58:17.816332Z"
}
},
"source": [
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date', 'is_st']]\n",
" print(df.info())"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8518306 entries, 0 to 5394\n",
"Index: 8604524 entries, 0 to 5385\n",
"Data columns (total 3 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
@@ -410,17 +452,21 @@
" 1 trade_date object\n",
" 2 is_st bool \n",
"dtypes: bool(1), object(2)\n",
"memory usage: 203.1+ MB\n",
"memory usage: 205.1+ MB\n",
"None\n"
]
}
],
"execution_count": 8
"source": [
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date', 'is_st']]\n",
" print(df.info())"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "new_trader",
"language": "python",
"name": "python3"
},

File diff suppressed because it is too large Load Diff

View File

@@ -2,6 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"id": "17cc645336d4eb18",
"metadata": {
"ExecuteTime": {
@@ -9,73 +10,57 @@
"start_time": "2025-02-08T16:55:18.958639Z"
}
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import tushare as ts"
],
"outputs": [],
"execution_count": 1
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "48ae71ed02d61819",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-08T16:55:27.578361Z",
"start_time": "2025-02-08T16:55:19.882313Z"
}
},
"cell_type": "code",
"source": [
"daily_basic = pd.read_hdf('../../data/daily_basic.h5', key='daily_basic', columns=['ts_code', 'trade_date '])\n",
"name_change_df = pd.read_hdf('../../data/name_change.h5', key='name_change')\n",
"name_change_df = name_change_df.drop_duplicates(keep='first')\n",
"\n",
"# 确保 name_change_df 的日期格式正确\n",
"name_change_df['start_date'] = pd.to_datetime(name_change_df['start_date'], format='%Y%m%d')\n",
"name_change_df['end_date'] = pd.to_datetime(name_change_df['end_date'], format='%Y%m%d', errors='coerce')\n",
"name_change_df = name_change_df[name_change_df.name.str.contains('ST')]\n"
],
"id": "48ae71ed02d61819",
"outputs": [],
"execution_count": 2
"source": [
"daily_basic = pd.read_hdf('../../../data/daily_basic.h5', key='daily_basic')\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "e6606a96e5728b8",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-08T16:55:27.938078Z",
"start_time": "2025-02-08T16:55:27.584226Z"
}
},
"cell_type": "code",
"source": [
"name_change_dict = {}\n",
"for ts_code, group in name_change_df.groupby('ts_code'):\n",
" # 只保留 'ST' 和 '*ST' 的记录\n",
" st_data = group[(group['change_reason'] == 'ST') | (group['change_reason'] == '*ST')]\n",
" if not st_data.empty:\n",
" name_change_dict[ts_code] = st_data"
],
"id": "e6606a96e5728b8",
"outputs": [],
"execution_count": 3
},
{
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2025-02-08T16:59:20.537632Z",
"start_time": "2025-02-08T16:55:27.971219Z"
}
},
"cell_type": "code",
"source": [
"from datetime import datetime\n",
"import pandas as pd\n",
"import warnings\n",
"\n",
"warnings.filterwarnings(\"ignore\")\n",
"def filter_rows(df):\n",
" # 按照 name 和 start_date 分组\n",
" def select_row(group):\n",
" # 如果有 end_date 不为 NaT 的行,优先保留这些行\n",
" valid_rows = group[group['end_date'].notna()]\n",
" if not valid_rows.empty:\n",
" return valid_rows.iloc[0] # 返回第一个有效行\n",
" else:\n",
" return group.iloc[0] # 如果没有有效行,返回第一行\n",
"\n",
" filtered_df = df.groupby(['name', 'start_date'], group_keys=False).apply(select_row)\n",
" filtered_df = filtered_df.reset_index(drop=True)\n",
" return filtered_df\n",
"\n",
"# 判断股票是否为 ST 的函数\n",
"#stock_code = 'xxxxxx.SH'\n",
"#target_date = '20200830'\n",
"#若为ST返回True否则返回False\n",
"def is_st(name_change_dict, stock_code, target_date):\n",
" target_date = datetime.strptime(target_date, '%Y%m%d')\n",
" if stock_code not in name_change_dict.keys():\n",
@@ -84,15 +69,129 @@
" for i in range(len(df)):\n",
" sds = df.iloc[i, 2]\n",
" eds = df.iloc[i, 3]\n",
" # sd = datetime.strptime(sds, '%Y%m%d')\n",
" if eds == None:\n",
" ed = datetime.now()\n",
" # else:\n",
" # ed = datetime.strptime(eds, '%Y%m%d')\n",
" if eds is None or eds is pd.NaT:\n",
" eds = datetime.now()\n",
" if (target_date - sds).days >= 0 and (target_date - eds).days <= 0:\n",
" return True\n",
" return False\n",
"\n",
"name_change_df = pd.read_hdf('../../../data/name_change.h5', key='name_change')\n",
"name_change_df = name_change_df.drop_duplicates(keep='first')\n",
"\n",
"# 确保 name_change_df 的日期格式正确\n",
"name_change_df['start_date'] = pd.to_datetime(name_change_df['start_date'], format='%Y%m%d')\n",
"name_change_df['end_date'] = pd.to_datetime(name_change_df['end_date'], format='%Y%m%d', errors='coerce')\n",
"name_change_df = name_change_df[name_change_df.name.str.contains('ST')]\n",
"name_change_dict = {}\n",
"for ts_code, group in name_change_df.groupby('ts_code'):\n",
" # 只保留 'ST' 和 '*ST' 的记录\n",
" # st_data = group[(group['change_reason'] == 'ST') | (group['change_reason'] == '*ST')]\n",
" st_data = group[group['name'].str.contains('ST')]\n",
" if not st_data.empty:\n",
" name_change_dict[ts_code] = filter_rows(st_data)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "41bc125d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
"0 603848.SH 20250430 14.36 0.5401 4.6897 \n",
"1 300290.SZ 20250430 16.30 2.8540 3.5686 \n",
"2 603877.SH 20250430 15.90 0.3794 1.2707 \n",
"3 834639.BJ 20250430 8.37 6.1158 7.8866 \n",
"4 000909.SZ 20250430 5.72 0.6104 1.0424 \n",
"... ... ... ... ... ... \n",
"8594006 600708.SH 20170103 9.03 0.7694 1.0169 \n",
"8594007 600712.SH 20170103 10.29 0.5859 0.8028 \n",
"8594008 001872.SZ 20170103 19.33 1.0970 5.4258 \n",
"8594009 001914.SZ 20170103 12.37 3.2627 6.6991 \n",
"8594010 302132.SZ 20170103 23.28 0.4912 1.5149 \n",
"\n",
" volume_ratio pe pe_ttm pb ps ps_ttm \\\n",
"0 1.31 23.3421 25.6176 2.3433 3.7254 3.8065 \n",
"1 1.00 NaN NaN 13.1076 13.5867 13.5756 \n",
"2 0.98 29.1494 33.6975 1.6522 1.1075 1.1304 \n",
"3 0.87 70.0984 215.1863 2.0171 0.8405 0.8329 \n",
"4 0.55 NaN NaN 2.3539 7.7727 8.2925 \n",
"... ... ... ... ... ... ... \n",
"8594006 0.85 23.3367 22.2458 1.4847 0.9613 0.9248 \n",
"8594007 0.67 202.4855 287.1454 5.1852 2.3682 2.5386 \n",
"8594008 0.77 23.6158 23.1883 2.7052 6.6556 6.5584 \n",
"8594009 1.02 20.5631 15.1595 2.1186 1.4950 1.2600 \n",
"8594010 0.74 91.3908 84.6980 6.9391 8.9531 8.8570 \n",
"\n",
" dv_ratio dv_ttm total_share float_share free_share total_mv \\\n",
"0 2.0904 2.0904 40391.1511 40240.6511 4634.6511 5.800169e+05 \n",
"1 0.0000 NaN 63973.2569 63922.1969 51122.1969 1.042764e+06 \n",
"2 3.7471 3.7471 47382.5333 46932.3226 14014.3219 7.533823e+05 \n",
"3 NaN NaN 20160.0000 11721.5883 9089.7537 1.687392e+05 \n",
"4 0.0000 NaN 43771.4245 43771.0570 25634.2299 2.503725e+05 \n",
"... ... ... ... ... ... ... \n",
"8594006 1.1074 1.1074 131871.9966 75088.9215 56812.2811 1.190804e+06 \n",
"8594007 0.1555 0.1555 54465.5360 53795.9475 39266.3119 5.604504e+05 \n",
"8594008 2.1211 2.1211 64476.3730 46486.6050 9398.8050 1.246328e+06 \n",
"8594009 0.4042 0.4042 66696.1416 66678.0666 32475.1786 8.250313e+05 \n",
"8594010 0.2291 0.2291 39384.0333 30419.3588 9862.3809 9.168603e+05 \n",
"\n",
" circ_mv is_st \n",
"0 5.778557e+05 False \n",
"1 1.041932e+06 False \n",
"2 7.462239e+05 False \n",
"3 9.810969e+04 False \n",
"4 2.503704e+05 True \n",
"... ... ... \n",
"8594006 6.780530e+05 False \n",
"8594007 5.535603e+05 False \n",
"8594008 8.985861e+05 False \n",
"8594009 8.248077e+05 False \n",
"8594010 7.081627e+05 False \n",
"\n",
"[8594011 rows x 19 columns]\n"
]
}
],
"source": [
"print(daily_basic)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "initial_id",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-08T16:59:20.537632Z",
"start_time": "2025-02-08T16:55:27.971219Z"
},
"collapsed": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"is st...\n",
" ts_code trade_date is_st\n",
"0 603848.SH 20250430 False\n",
"1 300290.SZ 20250430 False\n",
"2 603877.SH 20250430 False\n",
"3 834639.BJ 20250430 False\n",
"4 000909.SZ 20250430 True\n"
]
}
],
"source": [
"from datetime import datetime\n",
"import pandas as pd\n",
"\n",
"\n",
"\n",
"print('is st...')\n",
"# 创建一个新的列 is_st判断每只股票是否是 ST\n",
@@ -101,47 +200,30 @@
")\n",
"\n",
"# 保存结果到新的 HDF5 文件\n",
"daily_basic.to_hdf('../../data/daily_basic_with_st.h5', key='daily_basic_with_st', mode='w', format='table')\n",
"daily_basic.to_hdf('../../../data/daily_basic.h5', key='daily_basic', mode='w', format='table')\n",
"\n",
"# 输出部分结果\n",
"print(daily_basic[['ts_code', 'trade_date', 'is_st']].head())\n"
],
"id": "initial_id",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"is st...\n",
" ts_code trade_date is_st\n",
"0 603429.SH 20250127 False\n",
"1 300917.SZ 20250127 False\n",
"2 301266.SZ 20250127 False\n",
"3 688399.SH 20250127 False\n",
"4 603737.SH 20250127 False\n"
]
}
],
"execution_count": 4
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "new_trader",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
"pygments_lexer": "ipython3",
"version": "3.11.11"
}
},
"nbformat": 4,

View File

@@ -2,6 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "b94bb1f2-5332-485e-ae1b-eea01f938106",
"metadata": {
"ExecuteTime": {
@@ -9,17 +10,17 @@
"start_time": "2025-04-09T14:57:39.137312Z"
}
},
"outputs": [],
"source": [
"import tushare as ts\n",
"\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
],
"outputs": [],
"execution_count": 1
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "742c29d453b9bb38",
"metadata": {
"ExecuteTime": {
@@ -27,6 +28,26 @@
"start_time": "2025-04-09T14:57:40.190466Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8435700 entries, 0 to 40956\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 193.1+ MB\n",
"None\n",
"20250430\n",
"start_date: 20250506\n"
]
}
],
"source": [
"import pandas as pd\n",
"import time\n",
@@ -40,44 +61,85 @@
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250720')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(f'start_date: {start_date}')"
],
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "679ce40e-8d62-4887-970c-e1d8cbdeee6b",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:17.197319Z",
"start_time": "2025-04-09T14:58:10.724923Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8353711 entries, 0 to 5126\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 191.2+ MB\n",
"None\n",
"20250408\n",
"start_date: 20250409\n"
"任务 20250717 完成\n",
"任务 20250718 完成\n",
"任务 20250715 完成\n",
"任务 20250716 完成\n",
"任务 20250714 完成\n",
"任务 20250711 完成\n",
"任务 20250710 完成\n",
"任务 20250709 完成\n",
"任务 20250708 完成\n",
"任务 20250707 完成\n",
"任务 20250704 完成\n",
"任务 20250703 完成\n",
"任务 20250702 完成\n",
"任务 20250701 完成\n",
"任务 20250630 完成\n",
"任务 20250627 完成\n",
"任务 20250626 完成\n",
"任务 20250625 完成\n",
"任务 20250624 完成\n",
"任务 20250623 完成\n",
"任务 20250620 完成\n",
"任务 20250619 完成\n",
"任务 20250618 完成\n",
"任务 20250617 完成\n",
"任务 20250616 完成\n",
"任务 20250613 完成\n",
"任务 20250612 完成\n",
"任务 20250611 完成\n",
"任务 20250610 完成\n",
"任务 20250609 完成\n",
"任务 20250606 完成\n",
"任务 20250605 完成\n",
"任务 20250604 完成\n",
"任务 20250603 完成\n",
"任务 20250530 完成\n",
"任务 20250529 完成\n",
"任务 20250527 完成\n",
"任务 20250528 完成\n",
"任务 20250523 完成\n",
"任务 20250526 完成\n",
"任务 20250521 完成\n",
"任务 20250522 完成\n",
"任务 20250520 完成\n",
"任务 20250519 完成\n",
"任务 20250516 完成\n",
"任务 20250515 完成\n",
"任务 20250514 完成\n",
"任务 20250513 完成\n",
"任务 20250512 完成\n",
"任务 20250509 完成\n",
"任务 20250508 完成\n",
"任务 20250507 完成\n",
"任务 20250506 完成\n"
]
}
],
"execution_count": 2
},
{
"cell_type": "code",
"id": "679ce40e-8d62-4887-970c-e1d8cbdeee6b",
"metadata": {
"scrolled": true,
"ExecuteTime": {
"end_time": "2025-04-09T14:58:17.197319Z",
"start_time": "2025-04-09T14:58:10.724923Z"
}
},
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
@@ -107,27 +169,11 @@
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250417 完成\n",
"任务 20250418 完成\n",
"任务 20250416 完成\n",
"任务 20250415 完成\n",
"任务 20250411 完成\n",
"任务 20250414 完成\n",
"任务 20250410 完成\n",
"任务 20250409 完成\n"
]
}
],
"execution_count": 3
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "9af80516849d4e80",
"metadata": {
"ExecuteTime": {
@@ -135,14 +181,14 @@
"start_time": "2025-04-09T14:58:17.210734Z"
}
},
"outputs": [],
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n"
],
"outputs": [],
"execution_count": 4
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "a2b05187-437f-4053-bc43-bd80d4cf8b0e",
"metadata": {
"ExecuteTime": {
@@ -150,15 +196,6 @@
"start_time": "2025-04-09T14:58:17.229837Z"
}
},
"source": [
"\n",
"# 将所有数据合并为一个 DataFrame\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='money_flow', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
],
"outputs": [
{
"name": "stdout",
@@ -168,12 +205,20 @@
]
}
],
"execution_count": 5
"source": [
"\n",
"# 将所有数据合并为一个 DataFrame\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='money_flow', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "new_trader",
"language": "python",
"name": "python3"
},

View File

@@ -2,6 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "500802dc-7a20-48b7-a470-a4bae3ec534b",
"metadata": {
"ExecuteTime": {
@@ -9,17 +10,17 @@
"start_time": "2025-04-09T14:57:40.584930Z"
}
},
"outputs": [],
"source": [
"import tushare as ts\n",
"\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
],
"outputs": [],
"execution_count": 1
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "5a84bc9da6d54868",
"metadata": {
"ExecuteTime": {
@@ -27,6 +28,32 @@
"start_time": "2025-04-09T14:57:41.540345Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"4745 600276.SH 20250506\n",
"4746 600278.SH 20250506\n",
"4747 600279.SH 20250506\n",
"4736 600262.SH 20250506\n",
"281 000791.SZ 20250506\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 10436295 entries, 0 to 113592\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 238.9+ MB\n",
"None\n",
"20250506\n",
"20250507\n"
]
}
],
"source": [
"import pandas as pd\n",
"import time\n",
@@ -41,50 +68,84 @@
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250720')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(start_date)"
],
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "bb3191de-27a2-4c89-a3b5-32a0d7b9496f",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:09.342522Z",
"start_time": "2025-04-09T14:58:05.259974Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"4721 600284.SH 20250408\n",
"4722 600285.SH 20250408\n",
"4723 600287.SH 20250408\n",
"4712 600272.SH 20250408\n",
"5 000008.SZ 20250408\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 10315620 entries, 0 to 14151\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 236.1+ MB\n",
"None\n",
"20250408\n",
"20250409\n"
"任务 20250718 完成\n",
"任务 20250717 完成\n",
"任务 20250715 完成\n",
"任务 20250716 完成\n",
"任务 20250714 完成\n",
"任务 20250711 完成\n",
"任务 20250709 完成\n",
"任务 20250710 完成\n",
"任务 20250708 完成\n",
"任务 20250707 完成\n",
"任务 20250703 完成\n",
"任务 20250704 完成\n",
"任务 20250701 完成\n",
"任务 20250702 完成\n",
"任务 20250630 完成\n",
"任务 20250627 完成\n",
"任务 20250626 完成\n",
"任务 20250625 完成\n",
"任务 20250624 完成\n",
"任务 20250623 完成\n",
"任务 20250620 完成\n",
"任务 20250619 完成\n",
"任务 20250618 完成\n",
"任务 20250617 完成\n",
"任务 20250616 完成\n",
"任务 20250613 完成\n",
"任务 20250612 完成\n",
"任务 20250611 完成\n",
"任务 20250610 完成\n",
"任务 20250609 完成\n",
"任务 20250606 完成\n",
"任务 20250605 完成\n",
"任务 20250604 完成\n",
"任务 20250603 完成\n",
"任务 20250530 完成\n",
"任务 20250529 完成\n",
"任务 20250528 完成\n",
"任务 20250527 完成\n",
"任务 20250526 完成\n",
"任务 20250523 完成\n",
"任务 20250522 完成\n",
"任务 20250521 完成\n",
"任务 20250520 完成\n",
"任务 20250519 完成\n",
"任务 20250516 完成\n",
"任务 20250515 完成\n",
"任务 20250514 完成\n",
"任务 20250513 完成\n",
"任务 20250512 完成\n",
"任务 20250509 完成\n",
"任务 20250508 完成\n",
"任务 20250507 完成\n"
]
}
],
"execution_count": 2
},
{
"cell_type": "code",
"id": "bb3191de-27a2-4c89-a3b5-32a0d7b9496f",
"metadata": {
"scrolled": true,
"ExecuteTime": {
"end_time": "2025-04-09T14:58:09.342522Z",
"start_time": "2025-04-09T14:58:05.259974Z"
}
},
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
@@ -115,27 +176,11 @@
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250417 完成\n",
"任务 20250418 完成\n",
"任务 20250416 完成\n",
"任务 20250415 完成\n",
"任务 20250414 完成\n",
"任务 20250410 完成\n",
"任务 20250409 完成\n",
"任务 20250411 完成\n"
]
}
],
"execution_count": 3
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "96a81aa5890ea3c3",
"metadata": {
"ExecuteTime": {
@@ -143,37 +188,38 @@
"start_time": "2025-04-09T14:58:09.346528Z"
}
},
"source": [
"print(all_daily_data)\n",
"# 将所有数据合并为一个 DataFrame\n",
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ trade_date ts_code up_limit down_limit\n",
"0 20250409 000001.SZ 11.90 9.74\n",
"1 20250409 000002.SZ 7.48 6.12\n",
"2 20250409 000004.SZ 9.53 7.79\n",
"3 20250409 000006.SZ 6.28 5.14\n",
"4 20250409 000007.SZ 5.91 4.83\n",
"... ... ... ... ...\n",
"7077 20250409 920108.BJ 26.55 14.31\n",
"7078 20250409 920111.BJ 30.84 16.62\n",
"7079 20250409 920116.BJ 100.29 54.01\n",
"7080 20250409 920118.BJ 31.62 17.04\n",
"7081 20250409 920128.BJ 35.26 19.00\n",
"\n",
"[7082 rows x 4 columns]]\n"
"[]\n"
]
},
{
"ename": "ValueError",
"evalue": "No objects to concatenate",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[4], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28mprint\u001b[39m(all_daily_data)\n\u001b[0;32m 2\u001b[0m \u001b[38;5;66;03m# 将所有数据合并为一个 DataFrame\u001b[39;00m\n\u001b[1;32m----> 3\u001b[0m all_daily_data_df \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mconcat(all_daily_data, ignore_index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
"File \u001b[1;32me:\\Python\\anaconda\\envs\\new_trader\\Lib\\site-packages\\pandas\\core\\reshape\\concat.py:382\u001b[0m, in \u001b[0;36mconcat\u001b[1;34m(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy)\u001b[0m\n\u001b[0;32m 379\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m copy \u001b[38;5;129;01mand\u001b[39;00m using_copy_on_write():\n\u001b[0;32m 380\u001b[0m copy \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m--> 382\u001b[0m op \u001b[38;5;241m=\u001b[39m _Concatenator(\n\u001b[0;32m 383\u001b[0m objs,\n\u001b[0;32m 384\u001b[0m axis\u001b[38;5;241m=\u001b[39maxis,\n\u001b[0;32m 385\u001b[0m ignore_index\u001b[38;5;241m=\u001b[39mignore_index,\n\u001b[0;32m 386\u001b[0m join\u001b[38;5;241m=\u001b[39mjoin,\n\u001b[0;32m 387\u001b[0m keys\u001b[38;5;241m=\u001b[39mkeys,\n\u001b[0;32m 388\u001b[0m levels\u001b[38;5;241m=\u001b[39mlevels,\n\u001b[0;32m 389\u001b[0m names\u001b[38;5;241m=\u001b[39mnames,\n\u001b[0;32m 390\u001b[0m verify_integrity\u001b[38;5;241m=\u001b[39mverify_integrity,\n\u001b[0;32m 391\u001b[0m copy\u001b[38;5;241m=\u001b[39mcopy,\n\u001b[0;32m 392\u001b[0m sort\u001b[38;5;241m=\u001b[39msort,\n\u001b[0;32m 393\u001b[0m )\n\u001b[0;32m 395\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m op\u001b[38;5;241m.\u001b[39mget_result()\n",
"File \u001b[1;32me:\\Python\\anaconda\\envs\\new_trader\\Lib\\site-packages\\pandas\\core\\reshape\\concat.py:445\u001b[0m, in \u001b[0;36m_Concatenator.__init__\u001b[1;34m(self, objs, axis, join, keys, levels, names, ignore_index, verify_integrity, copy, sort)\u001b[0m\n\u001b[0;32m 442\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mverify_integrity \u001b[38;5;241m=\u001b[39m verify_integrity\n\u001b[0;32m 443\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcopy \u001b[38;5;241m=\u001b[39m copy\n\u001b[1;32m--> 445\u001b[0m objs, keys \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_clean_keys_and_objs(objs, keys)\n\u001b[0;32m 447\u001b[0m \u001b[38;5;66;03m# figure out what our result ndim is going to be\u001b[39;00m\n\u001b[0;32m 448\u001b[0m ndims \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_ndims(objs)\n",
"File \u001b[1;32me:\\Python\\anaconda\\envs\\new_trader\\Lib\\site-packages\\pandas\\core\\reshape\\concat.py:507\u001b[0m, in \u001b[0;36m_Concatenator._clean_keys_and_objs\u001b[1;34m(self, objs, keys)\u001b[0m\n\u001b[0;32m 504\u001b[0m objs_list \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(objs)\n\u001b[0;32m 506\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(objs_list) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m--> 507\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo objects to concatenate\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 509\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m keys \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 510\u001b[0m objs_list \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(com\u001b[38;5;241m.\u001b[39mnot_none(\u001b[38;5;241m*\u001b[39mobjs_list))\n",
"\u001b[1;31mValueError\u001b[0m: No objects to concatenate"
]
}
],
"execution_count": 4
"source": [
"print(all_daily_data)\n",
"# 将所有数据合并为一个 DataFrame\n",
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ad9733a1-2f42-43ee-a98c-0bf699304c21",
"metadata": {
"ExecuteTime": {
@@ -181,14 +227,6 @@
"start_time": "2025-04-09T14:58:09.366441Z"
}
},
"source": [
"\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='stk_limit', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
],
"outputs": [
{
"name": "stdout",
@@ -198,10 +236,18 @@
]
}
],
"execution_count": 5
"source": [
"\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='stk_limit', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7e777f1f-4d54-4a74-b916-691ede6af055",
"metadata": {
"ExecuteTime": {
@@ -209,14 +255,13 @@
"start_time": "2025-04-09T14:58:09.686524Z"
}
},
"source": [],
"outputs": [],
"execution_count": null
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "new_trader",
"language": "python",
"name": "python3"
},