Files
NewStock/main/data/update/update_stk_limit.ipynb
liaozhaorun e407225d29 feat(qmt): 优化定时重连机制避免与健康检查冲突
- 添加 is_scheduled_reconnecting 标志位协调重连逻辑
- 增强定时重连任务的日志前缀便于追踪
- 改进异常处理和资源清理日志
- 优化代码格式和注释
2026-02-09 22:12:14 +08:00

304 lines
10 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "500802dc-7a20-48b7-a470-a4bae3ec534b",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:57:41.532210Z",
"start_time": "2025-04-09T14:57:40.584930Z"
}
},
"outputs": [],
"source": [
"import tushare as ts\n",
"\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "5a84bc9da6d54868",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:04.911924Z",
"start_time": "2025-04-09T14:57:41.540345Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"4971 600208.SH 20260130\n",
"4972 600210.SH 20260130\n",
"4973 600211.SH 20260130\n",
"4961 600195.SH 20260130\n",
"7441 920964.BJ 20260130\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 11775527 entries, 0 to 37233\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 269.5+ MB\n",
"None\n",
"20260130\n",
"20260202\n"
]
}
],
"source": [
"import pandas as pd\n",
"import time\n",
"\n",
"h5_filename = '/mnt/d/PyProject/NewStock/data/stk_limit.h5'\n",
"key = '/stk_limit'\n",
"max_date = None\n",
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date']]\n",
" print(df.sort_values(by='trade_date', ascending=True).tail())\n",
" print(df.info())\n",
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20260310')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(start_date)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "bb3191de-27a2-4c89-a3b5-32a0d7b9496f",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:09.342522Z",
"start_time": "2025-04-09T14:58:05.259974Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20260310 完成\n",
"任务 20260309 完成\n",
"任务 20260306 完成\n",
"任务 20260305 完成\n",
"任务 20260303 完成\n",
"任务 20260304 完成\n",
"任务 20260302 完成\n",
"任务 20260227 完成\n",
"任务 20260226 完成\n",
"任务 20260225 完成\n",
"任务 20260224 完成\n",
"任务 20260213 完成\n",
"任务 20260212 完成\n",
"任务 20260211 完成\n",
"任务 20260210 完成\n",
"任务 20260209 完成\n",
"任务 20260206 完成\n",
"任务 20260205 完成\n",
"任务 20260204 完成\n",
"任务 20260203 完成\n",
"任务 20260202 完成\n"
]
}
],
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
"all_daily_data = []\n",
"\n",
"# API 调用计数和时间控制变量\n",
"api_call_count = 0\n",
"batch_start_time = time.time()\n",
"\n",
"\n",
"def get_data(trade_date):\n",
" time.sleep(0.1)\n",
" stk_limit_data = pro.stk_limit(trade_date=trade_date)\n",
" if stk_limit_data is not None and not stk_limit_data.empty:\n",
" return stk_limit_data\n",
"\n",
"\n",
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
"\n",
" for future in as_completed(future_to_date):\n",
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
" try:\n",
" result = future.result() # 获取任务执行的结果\n",
" if result is not None:\n",
" all_daily_data.append(result)\n",
" print(f\"任务 {trade_date} 完成\")\n",
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "96a81aa5890ea3c3",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:09.353560Z",
"start_time": "2025-04-09T14:58:09.346528Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ trade_date ts_code up_limit down_limit\n",
"0 20260206 000001.SZ 12.20 9.98\n",
"1 20260206 000002.SZ 5.37 4.39\n",
"2 20260206 000004.SZ 10.79 9.77\n",
"3 20260206 000006.SZ 9.90 8.10\n",
"4 20260206 000007.SZ 12.31 10.07\n",
"... ... ... ... ...\n",
"7459 20260206 920978.BJ 41.60 22.40\n",
"7460 20260206 920981.BJ 41.63 22.43\n",
"7461 20260206 920982.BJ 298.63 160.81\n",
"7462 20260206 920985.BJ 11.37 6.13\n",
"7463 20260206 920992.BJ 22.32 12.02\n",
"\n",
"[7464 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20260205 000001.SZ 12.07 9.87\n",
"1 20260205 000002.SZ 5.39 4.41\n",
"2 20260205 000004.SZ 11.36 10.28\n",
"3 20260205 000006.SZ 9.97 8.15\n",
"4 20260205 000007.SZ 12.32 10.08\n",
"... ... ... ... ...\n",
"7454 20260205 920978.BJ 41.48 22.34\n",
"7455 20260205 920981.BJ 42.00 22.62\n",
"7456 20260205 920982.BJ 289.43 155.85\n",
"7457 20260205 920985.BJ 12.16 6.56\n",
"7458 20260205 920992.BJ 22.32 12.02\n",
"\n",
"[7459 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20260204 000001.SZ 11.92 9.76\n",
"1 20260204 000002.SZ 5.17 4.23\n",
"2 20260204 000004.SZ 11.96 10.82\n",
"3 20260204 000006.SZ 9.77 7.99\n",
"4 20260204 000007.SZ 12.32 10.08\n",
"... ... ... ... ...\n",
"7453 20260204 920978.BJ 38.15 20.55\n",
"7454 20260204 920981.BJ 43.01 23.17\n",
"7455 20260204 920982.BJ 279.50 150.50\n",
"7456 20260204 920985.BJ 10.66 5.74\n",
"7457 20260204 920992.BJ 22.28 12.00\n",
"\n",
"[7458 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20260203 000001.SZ 11.95 9.77\n",
"1 20260203 000002.SZ 5.15 4.21\n",
"2 20260203 000004.SZ 12.59 11.39\n",
"3 20260203 000006.SZ 9.58 7.84\n",
"4 20260203 000007.SZ 12.09 9.89\n",
"... ... ... ... ...\n",
"7453 20260203 920978.BJ 37.93 20.43\n",
"7454 20260203 920981.BJ 42.52 22.90\n",
"7455 20260203 920982.BJ 279.35 150.43\n",
"7456 20260203 920985.BJ 10.15 5.47\n",
"7457 20260203 920992.BJ 21.95 11.83\n",
"\n",
"[7458 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20260202 000001.SZ 11.91 9.75\n",
"1 20260202 000002.SZ 5.37 4.39\n",
"2 20260202 000004.SZ 13.25 11.99\n",
"3 20260202 000006.SZ 9.86 8.06\n",
"4 20260202 000007.SZ 11.97 9.79\n",
"... ... ... ... ...\n",
"7450 20260202 920978.BJ 40.36 21.74\n",
"7451 20260202 920981.BJ 45.50 24.50\n",
"7452 20260202 920982.BJ 287.80 154.98\n",
"7453 20260202 920985.BJ 10.33 5.57\n",
"7454 20260202 920992.BJ 22.63 12.19\n",
"\n",
"[7455 rows x 4 columns]]\n"
]
}
],
"source": [
"print(all_daily_data)\n",
"# 将所有数据合并为一个 DataFrame\n",
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "ad9733a1-2f42-43ee-a98c-0bf699304c21",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:09.674078Z",
"start_time": "2025-04-09T14:58:09.366441Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"所有每日基础数据获取并保存完毕!\n"
]
}
],
"source": [
"\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='stk_limit', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7e777f1f-4d54-4a74-b916-691ede6af055",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:09.689422Z",
"start_time": "2025-04-09T14:58:09.686524Z"
}
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "stock",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}