多线程rank6.0,赚钱,回撤略微减小

This commit is contained in:
liaozhaorun
2025-04-08 20:32:51 +08:00
parent e0087aa6e1
commit dc1e62c77c
9 changed files with 3737 additions and 4668 deletions

View File

@@ -2,30 +2,55 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "f74ce078-f7e8-4733-a14c-14d8815a3626",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-30T16:42:31.596637Z",
"start_time": "2025-03-30T16:42:30.883319Z"
"end_time": "2025-04-06T15:33:48.836794Z",
"start_time": "2025-04-06T15:33:48.098706Z"
}
},
"outputs": [],
"source": [
"import tushare as ts\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
],
"outputs": [],
"execution_count": 1
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "44dd8d87-e60b-49e5-aed9-efaa7f92d4fe",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-30T16:42:37.590148Z",
"start_time": "2025-03-30T16:42:31.596637Z"
"end_time": "2025-04-06T15:33:55.800360Z",
"start_time": "2025-04-06T15:33:49.011404Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"0 000001.SZ 20250312\n",
"1 000002.SZ 20250312\n",
"2 000004.SZ 20250312\n",
"3 000006.SZ 20250312\n",
"4 000007.SZ 20250312\n",
"... ... ...\n",
"21567 920108.BJ 20250401\n",
"21568 920111.BJ 20250401\n",
"21569 920116.BJ 20250401\n",
"21570 920118.BJ 20250401\n",
"21571 920128.BJ 20250401\n",
"\n",
"[7551938 rows x 2 columns]\n",
"20250403\n",
"start_date: 20250407\n"
]
}
],
"source": [
"import pandas as pd\n",
"import time\n",
@@ -44,42 +69,36 @@
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(f'start_date: {start_date}')"
],
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "747acc47-0884-4f76-90fb-276f6494e31d",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-06T15:33:57.293636Z",
"start_time": "2025-04-06T15:33:55.806283Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"0 000001.SZ 20250312\n",
"1 000002.SZ 20250312\n",
"2 000004.SZ 20250312\n",
"3 000006.SZ 20250312\n",
"4 000007.SZ 20250312\n",
"... ... ...\n",
"32304 920108.BJ 20250314\n",
"32305 920111.BJ 20250314\n",
"32306 920116.BJ 20250314\n",
"32307 920118.BJ 20250314\n",
"32308 920128.BJ 20250314\n",
"\n",
"[7503415 rows x 2 columns]\n",
"20250321\n",
"start_date: 20250324\n"
"任务 20250417 完成\n",
"任务 20250418 完成\n",
"任务 20250416 完成\n",
"任务 20250415 完成\n",
"任务 20250414 完成\n",
"任务 20250411 完成\n",
"任务 20250410 完成\n",
"任务 20250409 完成\n",
"任务 20250408 完成\n",
"任务 20250407 完成\n"
]
}
],
"execution_count": 2
},
{
"cell_type": "code",
"id": "747acc47-0884-4f76-90fb-276f6494e31d",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-30T16:43:29.275885Z",
"start_time": "2025-03-30T16:42:37.858763Z"
}
},
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
@@ -109,55 +128,18 @@
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250418 完成\n",
"任务 20250417 完成\n",
"任务 20250415 完成\n",
"任务 20250416 完成\n",
"任务 20250411 完成\n",
"任务 20250414 完成\n",
"任务 20250409 完成\n",
"任务 20250410 完成\n",
"任务 20250408 完成\n",
"任务 20250407 完成\n",
"任务 20250403 完成\n",
"任务 20250402 完成\n",
"任务 20250401 完成\n",
"任务 20250331 完成\n",
"任务 20250328 完成\n",
"任务 20250327 完成\n",
"任务 20250326 完成\n",
"任务 20250325 完成\n",
"任务 20250324 完成\n"
]
}
],
"execution_count": 3
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "c6765638-481f-40d8-a259-2e7b25362618",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-30T16:43:30.100678Z",
"start_time": "2025-03-30T16:43:29.311710Z"
"end_time": "2025-04-06T15:33:57.874278Z",
"start_time": "2025-04-06T15:33:57.304371Z"
}
},
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
"\n",
"# 将所有数据合并为一个 DataFrame\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key=key, mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
],
"outputs": [
{
"name": "stdout",
@@ -167,7 +149,16 @@
]
}
],
"execution_count": 4
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
"\n",
"# 将所有数据合并为一个 DataFrame\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key=key, mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
]
}
],
"metadata": {

View File

@@ -2,30 +2,55 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "f74ce078-f7e8-4733-a14c-14d8815a3626",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-30T16:42:32.996500Z",
"start_time": "2025-03-30T16:42:32.209631Z"
"end_time": "2025-04-06T15:33:36.424263Z",
"start_time": "2025-04-06T15:33:36.048334Z"
}
},
"outputs": [],
"source": [
"import tushare as ts\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
],
"outputs": [],
"execution_count": 1
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "44dd8d87-e60b-49e5-aed9-efaa7f92d4fe",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-30T16:42:34.591433Z",
"start_time": "2025-03-30T16:42:32.996500Z"
"end_time": "2025-04-06T15:33:37.871314Z",
"start_time": "2025-04-06T15:33:36.424263Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"0 801001.SI 20250221\n",
"1 801002.SI 20250221\n",
"2 801003.SI 20250221\n",
"3 801005.SI 20250221\n",
"4 801010.SI 20250221\n",
"... ... ...\n",
"1751 859811.SI 20250401\n",
"1752 859821.SI 20250401\n",
"1753 859822.SI 20250401\n",
"1754 859852.SI 20250401\n",
"1755 859951.SI 20250401\n",
"\n",
"[1057124 rows x 2 columns]\n",
"20250403\n",
"start_date: 20250407\n"
]
}
],
"source": [
"import pandas as pd\n",
"import time\n",
@@ -44,42 +69,36 @@
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(f'start_date: {start_date}')"
],
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "747acc47-0884-4f76-90fb-276f6494e31d",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-06T15:33:40.123037Z",
"start_time": "2025-04-06T15:33:37.881433Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"0 801001.SI 20250221\n",
"1 801002.SI 20250221\n",
"2 801003.SI 20250221\n",
"3 801005.SI 20250221\n",
"4 801010.SI 20250221\n",
"... ... ...\n",
"2629 859811.SI 20250314\n",
"2630 859821.SI 20250314\n",
"2631 859822.SI 20250314\n",
"2632 859852.SI 20250314\n",
"2633 859951.SI 20250314\n",
"\n",
"[1053173 rows x 2 columns]\n",
"20250321\n",
"start_date: 20250324\n"
"任务 20250417 完成\n",
"任务 20250418 完成\n",
"任务 20250416 完成\n",
"任务 20250415 完成\n",
"任务 20250414 完成\n",
"任务 20250411 完成\n",
"任务 20250409 完成\n",
"任务 20250410 完成\n",
"任务 20250408 完成\n",
"任务 20250407 完成\n"
]
}
],
"execution_count": 2
},
{
"cell_type": "code",
"id": "747acc47-0884-4f76-90fb-276f6494e31d",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-30T16:42:37.718270Z",
"start_time": "2025-03-30T16:42:34.817305Z"
}
},
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
@@ -109,55 +128,18 @@
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250417 完成\n",
"任务 20250418 完成\n",
"任务 20250416 完成\n",
"任务 20250415 完成\n",
"任务 20250414 完成\n",
"任务 20250411 完成\n",
"任务 20250410 完成\n",
"任务 20250409 完成\n",
"任务 20250408 完成\n",
"任务 20250407 完成\n",
"任务 20250403 完成\n",
"任务 20250402 完成\n",
"任务 20250401 完成\n",
"任务 20250331 完成\n",
"任务 20250328 完成\n",
"任务 20250327 完成\n",
"任务 20250326 完成\n",
"任务 20250325 完成\n",
"任务 20250324 完成\n"
]
}
],
"execution_count": 3
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "c6765638-481f-40d8-a259-2e7b25362618",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-30T16:42:37.922827Z",
"start_time": "2025-03-30T16:42:37.739040Z"
"end_time": "2025-04-06T15:33:40.286614Z",
"start_time": "2025-04-06T15:33:40.138227Z"
}
},
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
"\n",
"# 将所有数据合并为一个 DataFrame\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key=key, mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
],
"outputs": [
{
"name": "stdout",
@@ -167,7 +149,16 @@
]
}
],
"execution_count": 4
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
"\n",
"# 将所有数据合并为一个 DataFrame\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key=key, mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
]
}
],
"metadata": {

View File

@@ -2,30 +2,32 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "18d1d622-b083-4cc4-a6f8-7c1ed2d0edd2",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-30T16:42:34.194992Z",
"start_time": "2025-03-30T16:42:33.440178Z"
"end_time": "2025-04-06T15:33:43.537483Z",
"start_time": "2025-04-06T15:33:42.844004Z"
}
},
"outputs": [],
"source": [
"import tushare as ts\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
],
"outputs": [],
"execution_count": 1
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "14671a7f72de2564",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-30T16:42:36.432691Z",
"start_time": "2025-03-30T16:42:34.197998Z"
"end_time": "2025-04-06T15:33:45.387772Z",
"start_time": "2025-04-06T15:33:43.537483Z"
}
},
"outputs": [],
"source": [
"from datetime import datetime\n",
"import pandas as pd\n",
@@ -73,19 +75,38 @@
" st_data = group[(group['change_reason'] == 'ST') | (group['change_reason'] == '*ST')]\n",
" if not st_data.empty:\n",
" name_change_dict[ts_code] = filter_rows(st_data)"
],
"outputs": [],
"execution_count": 2
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "e7f8cce2f80e2f20",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-30T16:43:03.790361Z",
"start_time": "2025-03-30T16:42:36.633554Z"
"end_time": "2025-04-06T15:33:54.089114Z",
"start_time": "2025-04-06T15:33:45.576286Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8502128 entries, 0 to 21571\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 194.6+ MB\n",
"None\n",
"20250403\n",
"20250407\n"
]
}
],
"source": [
"import time\n",
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
@@ -104,39 +125,37 @@
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(start_date)"
],
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "553cfb36-f560-4cc4-b2bc-68323ccc5072",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-06T15:33:57.041254Z",
"start_time": "2025-04-06T15:33:54.103322Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8453605 entries, 0 to 32308\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 193.5+ MB\n",
"None\n",
"20250321\n",
"20250324\n"
"任务 20250417 完成\n",
"任务 20250418 完成\n",
"任务 20250416 完成\n",
"任务 20250415 完成\n",
"任务 20250414 完成\n",
"任务 20250411 完成\n",
"任务 20250410 完成\n",
"任务 20250409 完成\n",
"任务 20250408 完成\n",
"任务 20250407 完成\n"
]
}
],
"execution_count": 3
},
{
"cell_type": "code",
"id": "553cfb36-f560-4cc4-b2bc-68323ccc5072",
"metadata": {
"scrolled": true,
"ExecuteTime": {
"end_time": "2025-03-30T16:43:07.947442Z",
"start_time": "2025-03-30T16:43:03.827519Z"
}
},
"source": [
"\n",
"\n",
@@ -186,202 +205,169 @@
" # 重置批次起始时间\n",
" batch_start_time = time.time()\n",
"\n"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250418 完成\n",
"任务 20250417 完成\n",
"任务 20250416 完成\n",
"任务 20250415 完成\n",
"任务 20250411 完成\n",
"任务 20250414 完成\n",
"任务 20250410 完成\n",
"任务 20250409 完成\n",
"任务 20250408 完成\n",
"任务 20250407 完成\n",
"任务 20250403 完成\n",
"任务 20250402 完成\n",
"任务 20250331 完成\n",
"任务 20250401 完成\n",
"任务 20250327 完成\n",
"任务 20250328 完成\n",
"任务 20250326 完成\n",
"任务 20250324 完成\n",
"任务 20250325 完成\n"
]
}
],
"execution_count": 4
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "919023c693d7a47a",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-30T16:43:07.962318Z",
"start_time": "2025-03-30T16:43:07.951757Z"
"end_time": "2025-04-06T15:33:57.072796Z",
"start_time": "2025-04-06T15:33:57.061670Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
"0 000059.SZ 20250407 4.54 1.8414 3.4767 \n",
"1 600830.SH 20250407 8.33 2.5217 3.6802 \n",
"2 688061.SH 20250407 24.45 3.1011 3.1011 \n",
"3 600868.SH 20250407 2.79 3.8477 4.1435 \n",
"4 605168.SH 20250407 25.98 1.3857 2.8470 \n",
"... ... ... ... ... ... \n",
"5386 688259.SH 20250407 34.99 5.9799 11.4393 \n",
"5387 301316.SZ 20250407 19.20 7.2272 7.9512 \n",
"5388 601116.SH 20250407 10.37 2.3317 7.1579 \n",
"5389 605016.SH 20250407 17.20 1.4773 3.9134 \n",
"5390 600148.SH 20250407 16.07 2.0776 4.5745 \n",
"\n",
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
"0 0.84 103.2927 NaN 0.5851 0.1574 0.1928 0.3084 \n",
"1 0.69 71.1750 71.1750 1.7467 11.2902 11.2902 0.1801 \n",
"2 2.31 292.8121 NaN 1.1504 6.1795 4.9755 NaN \n",
"3 1.16 NaN NaN 2.3425 16.8832 16.0274 0.0000 \n",
"4 1.56 10.3735 14.0394 1.9988 1.0366 1.2218 4.5870 \n",
"... ... ... ... ... ... ... ... \n",
"5386 1.10 66.8795 64.8845 2.6173 5.9119 6.5930 NaN \n",
"5387 1.30 94.0750 110.9182 7.1350 5.7094 4.8530 0.4126 \n",
"5388 1.78 41.2451 36.3656 1.7811 1.4576 1.4350 1.9286 \n",
"5389 1.05 28.7938 22.2858 3.3051 6.4003 4.8254 1.3640 \n",
"5390 2.12 3441.4901 274.8323 4.8916 3.2666 3.3043 0.1867 \n",
"\n",
" dv_ttm total_share float_share free_share total_mv circ_mv \\\n",
"0 0.3084 159944.2537 159944.2537 84712.3362 726146.9118 726146.9118 \n",
"1 0.1801 45432.2747 45432.2747 31131.0133 378450.8483 378450.8483 \n",
"2 NaN 11488.9391 4329.7770 4329.7770 280904.5610 105863.0477 \n",
"3 NaN 189814.8679 189814.8679 176264.8506 529583.4814 529583.4814 \n",
"4 4.5870 21081.6986 21081.6986 10260.7016 547702.5296 547702.5296 \n",
"... ... ... ... ... ... ... \n",
"5386 NaN 11170.0000 11170.0000 5839.1660 390838.3000 390838.3000 \n",
"5387 0.4126 40400.0000 24282.6503 22071.3403 775680.0000 466226.8858 \n",
"5388 1.9286 54767.8400 54767.8400 17840.9208 567942.5008 567942.5008 \n",
"5389 1.3640 32308.6400 32308.6400 12196.5716 555708.6080 555708.6080 \n",
"5390 0.1867 14151.6450 14151.6450 6427.3300 227416.9352 227416.9352 \n",
"\n",
" is_st \n",
"0 False \n",
"1 False \n",
"2 False \n",
"3 False \n",
"4 False \n",
"... ... \n",
"5386 False \n",
"5387 False \n",
"5388 False \n",
"5389 False \n",
"5390 False \n",
"\n",
"[5391 rows x 19 columns]\n"
]
}
],
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
"print(all_daily_data_df)"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
"0 603328.SH 20250327 10.44 1.0910 2.6596 \n",
"1 603989.SH 20250327 15.66 0.9036 2.6145 \n",
"2 603194.SH 20250327 38.03 14.0348 14.0348 \n",
"3 600884.SH 20250327 7.13 1.9769 2.1153 \n",
"4 688325.SH 20250327 47.26 1.5250 1.8078 \n",
"... ... ... ... ... ... \n",
"26946 688539.SH 20250325 26.70 1.0257 1.3011 \n",
"26947 688479.SH 20250325 18.73 0.9840 1.2588 \n",
"26948 000552.SZ 20250325 2.63 1.8147 3.0665 \n",
"26949 688719.SH 20250325 31.64 4.2998 5.1737 \n",
"26950 002709.SZ 20250325 19.50 1.2468 1.4268 \n",
"\n",
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
"0 0.79 29.3625 23.3887 2.5786 3.2807 2.9727 1.8582 \n",
"1 0.79 17.8968 27.7940 1.7060 1.8591 1.6666 1.6823 \n",
"2 1.87 18.9266 18.3213 3.2891 2.5755 2.4322 NaN \n",
"3 0.52 20.9930 NaN 0.7305 0.8425 0.9106 2.7224 \n",
"4 0.93 67.1638 50.1073 2.3433 16.1029 10.2149 NaN \n",
"... ... ... ... ... ... ... ... \n",
"26946 0.56 51.5254 83.3548 2.8475 14.5500 13.9718 NaN \n",
"26947 0.61 23.5448 33.4921 1.4043 3.6736 4.5444 NaN \n",
"26948 1.42 8.0989 11.6324 0.8431 1.2501 1.3463 3.8023 \n",
"26949 1.64 26.3323 49.9921 2.0474 4.4195 3.6954 NaN \n",
"26950 0.76 19.7447 78.2248 2.9106 2.4233 3.0741 1.5444 \n",
"\n",
" dv_ttm total_share float_share free_share total_mv \\\n",
"0 1.8582 99844.2611 99844.2611 40955.5563 1.042374e+06 \n",
"1 1.6823 40113.0603 40113.0603 13863.2102 6.281705e+05 \n",
"2 NaN 40100.0000 4982.8436 4982.8436 1.525003e+06 \n",
"3 2.7224 225339.6168 175723.6492 164220.4548 1.606671e+06 \n",
"4 NaN 8494.7740 3830.4117 3231.0886 4.014630e+05 \n",
"... ... ... ... ... ... \n",
"26946 NaN 18592.0000 10286.0800 8109.0800 4.964064e+05 \n",
"26947 NaN 14431.7400 6087.4224 4758.2224 2.703065e+05 \n",
"26948 3.8023 535180.1936 372577.7383 220477.9354 1.407524e+06 \n",
"26949 NaN 11538.5418 7349.9938 6108.5305 3.650795e+05 \n",
"26950 1.5444 191434.3762 138501.6891 121034.9868 3.732970e+06 \n",
"\n",
" circ_mv is_st \n",
"0 1.042374e+06 False \n",
"1 6.281705e+05 False \n",
"2 1.894975e+05 False \n",
"3 1.252910e+06 False \n",
"4 1.810253e+05 False \n",
"... ... ... \n",
"26946 2.746383e+05 False \n",
"26947 1.140174e+05 False \n",
"26948 9.798795e+05 False \n",
"26949 2.325538e+05 False \n",
"26950 2.700783e+06 False \n",
"\n",
"[26951 rows x 19 columns]\n"
]
}
],
"execution_count": 5
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "28cb78d032671b20",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-30T16:43:08.000073Z",
"start_time": "2025-03-30T16:43:07.984082Z"
"end_time": "2025-04-06T15:33:57.104132Z",
"start_time": "2025-04-06T15:33:57.095010Z"
}
},
"source": [
"print(all_daily_data_df[all_daily_data_df['is_st']])"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
"100 002528.SZ 20250327 2.53 0.6855 1.4642 \n",
"128 300163.SZ 20250327 3.15 3.0563 3.2999 \n",
"129 300205.SZ 20250327 4.34 0.9211 1.5246 \n",
"147 000851.SZ 20250327 2.53 2.2990 2.6472 \n",
"299 300097.SZ 20250327 4.88 3.1648 3.6912 \n",
"... ... ... ... ... ... \n",
"26750 000506.SZ 20250325 5.21 1.2689 1.8939 \n",
"26770 002592.SZ 20250325 5.22 1.0547 1.6712 \n",
"26786 600603.SH 20250325 7.63 0.4610 1.0776 \n",
"26828 002528.SZ 20250325 2.51 0.9799 2.0928 \n",
"26906 300097.SZ 20250325 4.92 3.2717 3.8159 \n",
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
"16 000656.SZ 20250407 1.28 0.9982 1.1644 \n",
"62 002748.SZ 20250407 7.32 0.5503 1.1888 \n",
"114 002490.SZ 20250407 3.49 0.7559 1.3380 \n",
"128 300165.SZ 20250407 2.78 4.0431 4.7932 \n",
"278 600303.SH 20250407 3.22 1.1873 1.4918 \n",
"... ... ... ... ... ... \n",
"5263 002217.SZ 20250407 2.07 0.1251 0.1569 \n",
"5267 002808.SZ 20250407 2.99 4.0901 4.7924 \n",
"5290 002602.SZ 20250407 6.44 0.2276 0.2634 \n",
"5315 002501.SZ 20250407 1.92 1.5653 2.0207 \n",
"5375 300376.SZ 20250407 2.96 1.4873 3.4865 \n",
"\n",
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
"100 0.43 NaN NaN 7.3528 2.1714 2.7257 0.0000 \n",
"128 0.87 NaN NaN 3.0547 5.9187 5.8999 0.0000 \n",
"129 0.63 94.7108 NaN 1.3743 1.0976 1.5538 0.4608 \n",
"147 0.64 NaN NaN 1.0360 0.4939 0.8666 0.0000 \n",
"299 0.70 10.0614 NaN 2.2055 2.9549 3.1999 0.0000 \n",
"... ... ... ... ... ... ... ... \n",
"26750 0.37 725.4828 NaN 8.2869 17.0204 21.9262 0.0000 \n",
"26770 0.94 14.0192 61.1217 1.6387 2.7253 2.3121 0.0000 \n",
"26786 0.56 15.6086 24.2223 1.3160 1.8461 2.4398 0.0000 \n",
"26828 0.58 NaN NaN 7.2947 2.1542 2.7042 0.0000 \n",
"26906 0.53 10.1438 NaN 2.2236 2.9791 3.2261 0.0000 \n",
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
"16 0.44 NaN NaN NaN 0.1081 0.1637 0.0000 \n",
"62 0.61 96.0467 49.7297 1.3328 0.8402 0.8839 1.3661 \n",
"114 0.19 NaN NaN 5.6564 2.0529 2.0529 0.0000 \n",
"128 2.22 NaN NaN 0.9988 1.3542 1.4288 0.0000 \n",
"278 0.77 NaN NaN 1.4997 1.6142 1.6353 0.0000 \n",
"... ... ... ... ... ... ... ... \n",
"5263 0.23 NaN NaN NaN 3.3436 10.3100 0.0000 \n",
"5267 0.79 NaN NaN 2.5039 5.2047 4.8881 0.6689 \n",
"5290 0.20 91.5846 53.4453 1.8455 3.6128 2.5226 0.0000 \n",
"5315 0.58 NaN NaN 7.1559 14.2934 20.0240 0.0000 \n",
"5375 4.52 12.2436 36.2242 0.9837 1.4380 2.0320 1.6554 \n",
"\n",
" dv_ttm total_share float_share free_share total_mv circ_mv \\\n",
"100 NaN 119867.5082 105021.9577 49171.2582 303264.7957 265705.5530 \n",
"128 NaN 47400.0000 41596.4553 38525.5904 149310.0000 131028.8342 \n",
"129 0.4608 43005.6000 42599.1218 25737.4813 186644.3040 184880.1886 \n",
"147 NaN 115786.0020 113197.7266 98311.5254 292938.5851 286390.2483 \n",
"299 NaN 28854.9669 27000.9948 23150.5534 140812.2385 131764.8546 \n",
"... ... ... ... ... ... ... \n",
"26750 NaN 92901.7761 92867.0961 62218.8027 484018.2535 483837.5707 \n",
"26770 NaN 28333.1157 26271.6370 16580.1814 147898.8640 137137.9451 \n",
"26786 NaN 119332.9151 119332.9151 51048.6002 910510.1422 910510.1422 \n",
"26828 NaN 119867.5082 105021.9577 49171.2582 300867.4456 263605.1138 \n",
"26906 NaN 28854.9669 27000.9948 23150.5534 141966.4371 132844.8944 \n",
" dv_ttm total_share float_share free_share total_mv \\\n",
"16 NaN 533971.5816 531174.3236 455354.2392 6.834836e+05 \n",
"62 1.3661 24000.0000 24000.0000 11108.5000 1.756800e+05 \n",
"114 NaN 79784.8400 54161.3625 30599.6625 2.784491e+05 \n",
"128 NaN 49551.1725 42053.2110 35472.8422 1.377523e+05 \n",
"278 NaN 68360.4211 67560.4211 53770.9211 2.201206e+05 \n",
"... ... ... ... ... ... \n",
"5263 NaN 747939.8928 568036.4278 453036.0995 1.548236e+06 \n",
"5267 0.6689 26880.0000 18638.3713 15907.0731 8.037120e+04 \n",
"5290 NaN 745255.6968 687870.8273 594244.1179 4.799447e+06 \n",
"5315 NaN 355000.0000 354999.9006 274999.9006 6.816000e+05 \n",
"5375 1.6554 232824.0476 232743.4901 99284.6609 6.891592e+05 \n",
"\n",
" is_st \n",
"100 True \n",
"128 True \n",
"129 True \n",
"147 True \n",
"299 True \n",
"... ... \n",
"26750 True \n",
"26770 True \n",
"26786 True \n",
"26828 True \n",
"26906 True \n",
" circ_mv is_st \n",
"16 6.799031e+05 True \n",
"62 1.756800e+05 True \n",
"114 1.890232e+05 True \n",
"128 1.169079e+05 True \n",
"278 2.175446e+05 True \n",
"... ... ... \n",
"5263 1.175835e+06 True \n",
"5267 5.572873e+04 True \n",
"5290 4.429888e+06 True \n",
"5315 6.815998e+05 True \n",
"5375 6.889207e+05 True \n",
"\n",
"[540 rows x 19 columns]\n"
"[106 rows x 19 columns]\n"
]
}
],
"execution_count": 6
"source": [
"print(all_daily_data_df[all_daily_data_df['is_st']])"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "692b58674b7462c9",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-30T16:43:08.703938Z",
"start_time": "2025-03-30T16:43:08.021067Z"
"end_time": "2025-04-06T15:33:57.927188Z",
"start_time": "2025-04-06T15:33:57.127166Z"
}
},
"source": [
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='daily_basic', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")\n"
],
"outputs": [
{
"name": "stdout",
@@ -391,29 +377,30 @@
]
}
],
"execution_count": 7
"source": [
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='daily_basic', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "d7a773fc20293477",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-30T16:43:15.188800Z",
"start_time": "2025-03-30T16:43:08.725449Z"
"end_time": "2025-04-06T15:34:06.721517Z",
"start_time": "2025-04-06T15:33:57.951119Z"
}
},
"source": [
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date', 'is_st']]\n",
" print(df.info())"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8480556 entries, 0 to 26950\n",
"Index: 8507519 entries, 0 to 5390\n",
"Data columns (total 3 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
@@ -421,12 +408,16 @@
" 1 trade_date object\n",
" 2 is_st bool \n",
"dtypes: bool(1), object(2)\n",
"memory usage: 202.2+ MB\n",
"memory usage: 202.8+ MB\n",
"None\n"
]
}
],
"execution_count": 8
"source": [
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date', 'is_st']]\n",
" print(df.info())"
]
}
],
"metadata": {

File diff suppressed because it is too large Load Diff

View File

@@ -2,31 +2,52 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "b94bb1f2-5332-485e-ae1b-eea01f938106",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-30T16:42:37.847407Z",
"start_time": "2025-03-30T16:42:36.773187Z"
"end_time": "2025-04-06T15:34:19.686298Z",
"start_time": "2025-04-06T15:34:19.679462Z"
}
},
"outputs": [],
"source": [
"import tushare as ts\n",
"\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
],
"outputs": [],
"execution_count": 1
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "742c29d453b9bb38",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-30T16:42:59.016187Z",
"start_time": "2025-03-30T16:42:37.850022Z"
"end_time": "2025-04-06T15:34:29.569406Z",
"start_time": "2025-04-06T15:34:19.711970Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8343458 entries, 0 to 20511\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 191.0+ MB\n",
"None\n",
"20250403\n",
"start_date: 20250407\n"
]
}
],
"source": [
"import pandas as pd\n",
"import time\n",
@@ -45,39 +66,37 @@
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(f'start_date: {start_date}')"
],
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "679ce40e-8d62-4887-970c-e1d8cbdeee6b",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-06T15:34:32.842166Z",
"start_time": "2025-04-06T15:34:29.601368Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8297316 entries, 0 to 30724\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 189.9+ MB\n",
"None\n",
"20250321\n",
"start_date: 20250324\n"
"任务 20250417 完成\n",
"任务 20250418 完成\n",
"任务 20250415 完成\n",
"任务 20250416 完成\n",
"任务 20250414 完成\n",
"任务 20250411 完成\n",
"任务 20250409 完成\n",
"任务 20250410 完成\n",
"任务 20250408 完成\n",
"任务 20250407 完成\n"
]
}
],
"execution_count": 2
},
{
"cell_type": "code",
"id": "679ce40e-8d62-4887-970c-e1d8cbdeee6b",
"metadata": {
"scrolled": true,
"ExecuteTime": {
"end_time": "2025-03-30T16:43:03.168764Z",
"start_time": "2025-03-30T16:42:59.422934Z"
}
},
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
@@ -107,69 +126,33 @@
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250418 完成\n",
"任务 20250417 完成\n",
"任务 20250415 完成\n",
"任务 20250416 完成\n",
"任务 20250414 完成\n",
"任务 20250411 完成\n",
"任务 20250410 完成\n",
"任务 20250409 完成\n",
"任务 20250408 完成\n",
"任务 20250407 完成\n",
"任务 20250403 完成\n",
"任务 20250402 完成\n",
"任务 20250401 完成\n",
"任务 20250331 完成\n",
"任务 20250328 完成\n",
"任务 20250327 完成\n",
"任务 20250326 完成\n",
"任务 20250325 完成\n",
"任务 20250324 完成\n"
]
}
],
"execution_count": 3
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "9af80516849d4e80",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-30T16:43:03.181032Z",
"start_time": "2025-03-30T16:43:03.173867Z"
"end_time": "2025-04-06T15:34:32.851075Z",
"start_time": "2025-04-06T15:34:32.844866Z"
}
},
"outputs": [],
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n"
],
"outputs": [],
"execution_count": 4
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "a2b05187-437f-4053-bc43-bd80d4cf8b0e",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-30T16:43:05.401668Z",
"start_time": "2025-03-30T16:43:03.197033Z"
"end_time": "2025-04-06T15:34:35.261741Z",
"start_time": "2025-04-06T15:34:32.864789Z"
}
},
"source": [
"\n",
"# 将所有数据合并为一个 DataFrame\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='money_flow', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
],
"outputs": [
{
"name": "stdout",
@@ -179,7 +162,15 @@
]
}
],
"execution_count": 5
"source": [
"\n",
"# 将所有数据合并为一个 DataFrame\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='money_flow', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
]
}
],
"metadata": {

View File

@@ -2,31 +2,58 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "500802dc-7a20-48b7-a470-a4bae3ec534b",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-30T16:42:39.056767Z",
"start_time": "2025-03-30T16:42:37.817887Z"
"end_time": "2025-04-06T15:34:34.020485Z",
"start_time": "2025-04-06T15:34:33.497731Z"
}
},
"outputs": [],
"source": [
"import tushare as ts\n",
"\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
],
"outputs": [],
"execution_count": 1
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "5a84bc9da6d54868",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-30T16:42:59.784780Z",
"start_time": "2025-03-30T16:42:39.056767Z"
"end_time": "2025-04-06T15:34:46.227924Z",
"start_time": "2025-04-06T15:34:34.042810Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"4717 600285.SH 20250403\n",
"4718 600287.SH 20250403\n",
"4719 600288.SH 20250403\n",
"4708 600273.SH 20250403\n",
"5309 601121.SH 20250403\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 10301468 entries, 0 to 28272\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 235.8+ MB\n",
"None\n",
"20250403\n",
"20250407\n"
]
}
],
"source": [
"import pandas as pd\n",
"import time\n",
@@ -46,45 +73,37 @@
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(start_date)"
],
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "bb3191de-27a2-4c89-a3b5-32a0d7b9496f",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-06T15:34:48.652346Z",
"start_time": "2025-04-06T15:34:46.236695Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"4705 600289.SH 20250321\n",
"4706 600292.SH 20250321\n",
"4707 600293.SH 20250321\n",
"4696 600279.SH 20250321\n",
"7051 920116.BJ 20250321\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 10237887 entries, 0 to 35266\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 234.3+ MB\n",
"None\n",
"20250321\n",
"20250324\n"
"任务 20250417 完成\n",
"任务 20250418 完成\n",
"任务 20250416 完成\n",
"任务 20250415 完成\n",
"任务 20250414 完成\n",
"任务 20250411 完成\n",
"任务 20250410 完成\n",
"任务 20250409 完成\n",
"任务 20250408 完成\n",
"任务 20250407 完成\n"
]
}
],
"execution_count": 2
},
{
"cell_type": "code",
"id": "bb3191de-27a2-4c89-a3b5-32a0d7b9496f",
"metadata": {
"scrolled": true,
"ExecuteTime": {
"end_time": "2025-03-30T16:43:03.372001Z",
"start_time": "2025-03-30T16:43:00.012140Z"
}
},
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
@@ -115,143 +134,69 @@
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250418 完成\n",
"任务 20250417 完成\n",
"任务 20250416 完成\n",
"任务 20250415 完成\n",
"任务 20250411 完成\n",
"任务 20250414 完成\n",
"任务 20250409 完成\n",
"任务 20250410 完成\n",
"任务 20250408 完成\n",
"任务 20250407 完成\n",
"任务 20250403 完成\n",
"任务 20250402 完成\n",
"任务 20250401 完成\n",
"任务 20250331 完成\n",
"任务 20250327 完成\n",
"任务 20250328 完成\n",
"任务 20250326 完成\n",
"任务 20250325 完成\n",
"任务 20250324 完成\n"
]
}
],
"execution_count": 3
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "96a81aa5890ea3c3",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-30T16:43:03.397757Z",
"start_time": "2025-03-30T16:43:03.384786Z"
"end_time": "2025-04-06T15:34:48.680504Z",
"start_time": "2025-04-06T15:34:48.665530Z"
}
},
"source": [
"print(all_daily_data)\n",
"# 将所有数据合并为一个 DataFrame\n",
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ trade_date ts_code up_limit down_limit\n",
"0 20250327 000001.SZ 12.52 10.24\n",
"1 20250327 000002.SZ 7.92 6.48\n",
"2 20250327 000004.SZ 11.40 9.32\n",
"3 20250327 000006.SZ 7.44 6.08\n",
"4 20250327 000007.SZ 7.00 5.72\n",
"0 20250408 000001.SZ 11.77 9.63\n",
"1 20250408 000002.SZ 7.26 5.94\n",
"2 20250408 000004.SZ 9.72 7.96\n",
"3 20250408 000006.SZ 6.90 5.64\n",
"4 20250408 000007.SZ 6.14 5.02\n",
"... ... ... ... ...\n",
"7059 20250327 920108.BJ 33.56 18.08\n",
"7060 20250327 920111.BJ 40.57 21.85\n",
"7061 20250327 920116.BJ 126.29 68.01\n",
"7062 20250327 920118.BJ 44.14 23.78\n",
"7063 20250327 920128.BJ 47.35 25.51\n",
"7072 20250408 920108.BJ 25.33 13.65\n",
"7073 20250408 920111.BJ 29.38 15.82\n",
"7074 20250408 920116.BJ 96.40 51.92\n",
"7075 20250408 920118.BJ 30.16 16.24\n",
"7076 20250408 920128.BJ 34.15 18.39\n",
"\n",
"[7064 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20250328 000001.SZ 12.53 10.25\n",
"1 20250328 000002.SZ 7.89 6.45\n",
"2 20250328 000004.SZ 11.19 9.15\n",
"3 20250328 000006.SZ 8.18 6.70\n",
"4 20250328 000007.SZ 6.99 5.72\n",
"[7077 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20250407 000001.SZ 12.47 10.21\n",
"1 20250407 000002.SZ 7.85 6.43\n",
"2 20250407 000004.SZ 10.80 8.84\n",
"3 20250407 000006.SZ 7.67 6.27\n",
"4 20250407 000007.SZ 6.82 5.58\n",
"... ... ... ... ...\n",
"7060 20250328 920108.BJ 31.03 16.71\n",
"7061 20250328 920111.BJ 39.65 21.35\n",
"7062 20250328 920116.BJ 115.67 62.29\n",
"7063 20250328 920118.BJ 41.00 22.08\n",
"7064 20250328 920128.BJ 44.83 24.15\n",
"7070 20250407 920108.BJ 31.72 17.08\n",
"7071 20250407 920111.BJ 36.85 19.85\n",
"7072 20250407 920116.BJ 116.05 62.49\n",
"7073 20250407 920118.BJ 38.07 20.51\n",
"7074 20250407 920128.BJ 43.38 23.36\n",
"\n",
"[7065 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20250326 000001.SZ 12.57 10.29\n",
"1 20250326 000002.SZ 7.91 6.47\n",
"2 20250326 000004.SZ 11.28 9.23\n",
"3 20250326 000006.SZ 7.17 5.87\n",
"4 20250326 000007.SZ 6.67 5.45\n",
"... ... ... ... ...\n",
"7056 20250326 920108.BJ 33.96 18.30\n",
"7057 20250326 920111.BJ 41.92 22.58\n",
"7058 20250326 920116.BJ 133.64 71.96\n",
"7059 20250326 920118.BJ 41.93 22.59\n",
"7060 20250326 920128.BJ 49.40 26.60\n",
"\n",
"[7061 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20250325 000001.SZ 12.52 10.24\n",
"1 20250325 000002.SZ 7.90 6.46\n",
"2 20250325 000004.SZ 11.55 9.45\n",
"3 20250325 000006.SZ 7.13 5.83\n",
"4 20250325 000007.SZ 6.60 5.40\n",
"... ... ... ... ...\n",
"7055 20250325 920108.BJ 33.30 17.94\n",
"7056 20250325 920111.BJ 39.97 21.53\n",
"7057 20250325 920116.BJ 137.78 74.20\n",
"7058 20250325 920118.BJ 39.52 21.28\n",
"7059 20250325 920128.BJ 46.22 24.90\n",
"\n",
"[7060 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20250324 000001.SZ 12.56 10.28\n",
"1 20250324 000002.SZ 8.10 6.62\n",
"2 20250324 000004.SZ 12.82 10.49\n",
"3 20250324 000006.SZ 7.44 6.08\n",
"4 20250324 000007.SZ 6.89 5.63\n",
"... ... ... ... ...\n",
"7053 20250324 920108.BJ 34.84 18.76\n",
"7054 20250324 920111.BJ 40.41 21.77\n",
"7055 20250324 920116.BJ 134.55 72.45\n",
"7056 20250324 920118.BJ 38.67 20.83\n",
"7057 20250324 920128.BJ 45.86 24.70\n",
"\n",
"[7058 rows x 4 columns]]\n"
"[7075 rows x 4 columns]]\n"
]
}
],
"execution_count": 4
"source": [
"print(all_daily_data)\n",
"# 将所有数据合并为一个 DataFrame\n",
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "ad9733a1-2f42-43ee-a98c-0bf699304c21",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-30T16:43:03.696614Z",
"start_time": "2025-03-30T16:43:03.411036Z"
"end_time": "2025-04-06T15:34:48.966102Z",
"start_time": "2025-04-06T15:34:48.693158Z"
}
},
"source": [
"\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='stk_limit', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
],
"outputs": [
{
"name": "stdout",
@@ -261,20 +206,27 @@
]
}
],
"execution_count": 5
"source": [
"\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='stk_limit', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7e777f1f-4d54-4a74-b916-691ede6af055",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-30T16:43:03.713628Z",
"start_time": "2025-03-30T16:43:03.711521Z"
"end_time": "2025-04-06T15:34:48.980659Z",
"start_time": "2025-04-06T15:34:48.977771Z"
}
},
"source": [],
"outputs": [],
"execution_count": null
"source": []
}
],
"metadata": {