Files
NewStock/main/data/update/update_stk_limit.ipynb

299 lines
9.9 KiB
Plaintext
Raw Normal View History

2025-02-12 00:21:33 +08:00
{
"cells": [
{
"cell_type": "code",
2025-05-06 23:42:40 +08:00
"execution_count": 1,
2025-02-12 00:21:33 +08:00
"id": "500802dc-7a20-48b7-a470-a4bae3ec534b",
"metadata": {
"ExecuteTime": {
2025-04-10 23:17:22 +08:00
"end_time": "2025-04-09T14:57:41.532210Z",
"start_time": "2025-04-09T14:57:40.584930Z"
2025-02-12 00:21:33 +08:00
}
},
2025-05-06 23:42:40 +08:00
"outputs": [],
2025-02-12 00:21:33 +08:00
"source": [
"import tushare as ts\n",
"\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
2025-05-06 23:42:40 +08:00
]
2025-02-12 00:21:33 +08:00
},
{
"cell_type": "code",
2025-05-06 23:42:40 +08:00
"execution_count": 2,
2025-02-12 00:21:33 +08:00
"id": "5a84bc9da6d54868",
"metadata": {
"ExecuteTime": {
2025-04-10 23:17:22 +08:00
"end_time": "2025-04-09T14:58:04.911924Z",
"start_time": "2025-04-09T14:57:41.540345Z"
2025-02-12 00:21:33 +08:00
}
},
2025-05-06 23:42:40 +08:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
2026-02-24 13:06:14 +08:00
"4979 600202.SH 20260206\n",
"4980 600203.SH 20260206\n",
"4981 600206.SH 20260206\n",
"4969 600189.SH 20260206\n",
"7454 920964.BJ 20260206\n",
2025-05-06 23:42:40 +08:00
"<class 'pandas.core.frame.DataFrame'>\n",
2026-02-24 13:06:14 +08:00
"Index: 11812821 entries, 0 to 37293\n",
2025-05-06 23:42:40 +08:00
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
2026-02-24 13:06:14 +08:00
"memory usage: 270.4+ MB\n",
2025-05-06 23:42:40 +08:00
"None\n",
2026-02-24 13:06:14 +08:00
"20260206\n",
"20260209\n"
2025-05-06 23:42:40 +08:00
]
}
],
2025-02-15 23:33:34 +08:00
"source": [
"import pandas as pd\n",
"import time\n",
"\n",
2025-06-02 22:23:44 +08:00
"h5_filename = '/mnt/d/PyProject/NewStock/data/stk_limit.h5'\n",
2025-02-15 23:33:34 +08:00
"key = '/stk_limit'\n",
"max_date = None\n",
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date']]\n",
" print(df.sort_values(by='trade_date', ascending=True).tail())\n",
" print(df.info())\n",
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20260310')\n",
2025-02-15 23:33:34 +08:00
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(start_date)"
2025-05-06 23:42:40 +08:00
]
2025-04-09 22:57:01 +08:00
},
{
"cell_type": "code",
2025-05-06 23:42:40 +08:00
"execution_count": 3,
2025-04-09 22:57:01 +08:00
"id": "bb3191de-27a2-4c89-a3b5-32a0d7b9496f",
"metadata": {
"ExecuteTime": {
2025-04-10 23:17:22 +08:00
"end_time": "2025-04-09T14:58:09.342522Z",
"start_time": "2025-04-09T14:58:05.259974Z"
2025-05-06 23:42:40 +08:00
},
"scrolled": true
2025-04-09 22:57:01 +08:00
},
2025-05-06 23:42:40 +08:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20260310 完成\n",
"任务 20260309 完成\n",
"任务 20260306 完成\n",
"任务 20260305 完成\n",
"任务 20260304 完成\n",
2026-02-24 13:06:14 +08:00
"任务 20260303 完成\n",
"任务 20260302 完成\n",
"任务 20260227 完成\n",
"任务 20260226 完成\n",
"任务 20260225 完成\n",
"任务 20260224 完成\n",
"任务 20260213 完成\n",
"任务 20260212 完成\n",
"任务 20260211 完成\n",
"任务 20260210 完成\n",
2026-02-24 13:06:14 +08:00
"任务 20260209 完成\n"
2025-05-06 23:42:40 +08:00
]
}
],
2025-02-12 00:21:33 +08:00
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
"all_daily_data = []\n",
"\n",
"# API 调用计数和时间控制变量\n",
"api_call_count = 0\n",
"batch_start_time = time.time()\n",
"\n",
"\n",
"def get_data(trade_date):\n",
" time.sleep(0.1)\n",
" stk_limit_data = pro.stk_limit(trade_date=trade_date)\n",
" if stk_limit_data is not None and not stk_limit_data.empty:\n",
" return stk_limit_data\n",
"\n",
"\n",
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
"\n",
" for future in as_completed(future_to_date):\n",
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
" try:\n",
" result = future.result() # 获取任务执行的结果\n",
" if result is not None:\n",
" all_daily_data.append(result)\n",
" print(f\"任务 {trade_date} 完成\")\n",
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
2025-05-06 23:42:40 +08:00
]
2025-02-12 00:21:33 +08:00
},
{
2025-02-15 23:33:34 +08:00
"cell_type": "code",
2025-05-06 23:42:40 +08:00
"execution_count": 4,
2025-02-15 23:33:34 +08:00
"id": "96a81aa5890ea3c3",
2025-02-12 00:21:33 +08:00
"metadata": {
"ExecuteTime": {
2025-04-10 23:17:22 +08:00
"end_time": "2025-04-09T14:58:09.353560Z",
"start_time": "2025-04-09T14:58:09.346528Z"
2025-02-12 00:21:33 +08:00
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2025-05-08 15:42:17 +08:00
"[ trade_date ts_code up_limit down_limit\n",
2026-02-24 13:06:14 +08:00
"0 20260213 000001.SZ 12.06 9.86\n",
"1 20260213 000002.SZ 5.41 4.43\n",
"2 20260213 000004.SZ 8.36 7.56\n",
"3 20260213 000006.SZ 10.12 8.28\n",
"4 20260213 000007.SZ 13.44 11.00\n",
"... ... ... ... ...\n",
2026-02-24 13:06:14 +08:00
"7476 20260213 920978.BJ 39.09 21.05\n",
"7477 20260213 920981.BJ 41.61 22.41\n",
"7478 20260213 920982.BJ 284.06 152.96\n",
"7479 20260213 920985.BJ 11.34 6.12\n",
"7480 20260213 920992.BJ 21.77 11.73\n",
"\n",
2026-02-24 13:06:14 +08:00
"[7481 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20260212 000001.SZ 12.18 9.96\n",
"1 20260212 000002.SZ 5.38 4.40\n",
"2 20260212 000004.SZ 8.80 7.96\n",
"3 20260212 000006.SZ 10.29 8.42\n",
"4 20260212 000007.SZ 13.31 10.89\n",
"... ... ... ... ...\n",
2026-02-24 13:06:14 +08:00
"7476 20260212 920978.BJ 39.93 21.51\n",
"7477 20260212 920981.BJ 41.60 22.40\n",
"7478 20260212 920982.BJ 284.29 153.09\n",
"7479 20260212 920985.BJ 11.01 5.93\n",
"7480 20260212 920992.BJ 21.86 11.78\n",
"\n",
2026-02-24 13:06:14 +08:00
"[7481 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20260211 000001.SZ 12.17 9.95\n",
"1 20260211 000002.SZ 5.37 4.39\n",
"2 20260211 000004.SZ 9.26 8.38\n",
"3 20260211 000006.SZ 10.18 8.33\n",
"4 20260211 000007.SZ 13.37 10.94\n",
"... ... ... ... ...\n",
2026-02-24 13:06:14 +08:00
"7469 20260211 920978.BJ 41.02 22.10\n",
"7470 20260211 920981.BJ 41.91 22.57\n",
"7471 20260211 920982.BJ 286.35 154.19\n",
"7472 20260211 920985.BJ 11.34 6.12\n",
"7473 20260211 920992.BJ 22.04 11.88\n",
"\n",
2026-02-24 13:06:14 +08:00
"[7474 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20260210 000001.SZ 12.18 9.96\n",
"1 20260210 000002.SZ 5.46 4.46\n",
"2 20260210 000004.SZ 9.74 8.82\n",
"3 20260210 000006.SZ 10.32 8.44\n",
"4 20260210 000007.SZ 13.53 11.07\n",
"... ... ... ... ...\n",
2026-02-24 13:06:14 +08:00
"7466 20260210 920978.BJ 41.71 22.47\n",
"7467 20260210 920981.BJ 42.05 22.65\n",
"7468 20260210 920982.BJ 289.90 156.10\n",
"7469 20260210 920985.BJ 11.83 6.37\n",
"7470 20260210 920992.BJ 22.33 12.03\n",
"\n",
2026-02-24 13:06:14 +08:00
"[7471 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20260209 000001.SZ 12.16 9.95\n",
"1 20260209 000002.SZ 5.29 4.33\n",
"2 20260209 000004.SZ 10.26 9.28\n",
"3 20260209 000006.SZ 9.88 8.08\n",
"4 20260209 000007.SZ 13.00 10.64\n",
2025-06-02 22:23:44 +08:00
"... ... ... ... ...\n",
2026-02-24 13:06:14 +08:00
"7462 20260209 920978.BJ 42.45 22.87\n",
"7463 20260209 920981.BJ 41.40 22.30\n",
"7464 20260209 920982.BJ 291.70 157.08\n",
"7465 20260209 920985.BJ 11.32 6.10\n",
"7466 20260209 920992.BJ 22.13 11.93\n",
2025-06-02 22:23:44 +08:00
"\n",
2026-02-24 13:06:14 +08:00
"[7467 rows x 4 columns]]\n"
2025-02-12 00:21:33 +08:00
]
}
],
2025-05-06 23:42:40 +08:00
"source": [
"print(all_daily_data)\n",
"# 将所有数据合并为一个 DataFrame\n",
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)"
]
2025-02-12 00:21:33 +08:00
},
{
"cell_type": "code",
2025-05-08 15:42:17 +08:00
"execution_count": 5,
2025-02-12 00:21:33 +08:00
"id": "ad9733a1-2f42-43ee-a98c-0bf699304c21",
"metadata": {
"ExecuteTime": {
2025-04-10 23:17:22 +08:00
"end_time": "2025-04-09T14:58:09.674078Z",
"start_time": "2025-04-09T14:58:09.366441Z"
2025-02-12 00:21:33 +08:00
}
},
2025-02-15 23:33:34 +08:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"所有每日基础数据获取并保存完毕!\n"
]
}
],
2025-05-06 23:42:40 +08:00
"source": [
"\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='stk_limit', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
]
2025-02-12 00:21:33 +08:00
},
{
"cell_type": "code",
2025-05-06 23:42:40 +08:00
"execution_count": null,
2025-02-12 00:21:33 +08:00
"id": "7e777f1f-4d54-4a74-b916-691ede6af055",
2025-03-31 23:08:03 +08:00
"metadata": {
"ExecuteTime": {
2025-04-10 23:17:22 +08:00
"end_time": "2025-04-09T14:58:09.689422Z",
"start_time": "2025-04-09T14:58:09.686524Z"
2025-03-31 23:08:03 +08:00
}
},
2025-02-12 00:21:33 +08:00
"outputs": [],
2025-05-06 23:42:40 +08:00
"source": []
2025-02-12 00:21:33 +08:00
}
],
"metadata": {
"kernelspec": {
2025-06-02 22:23:44 +08:00
"display_name": "stock",
2025-02-12 00:21:33 +08:00
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
2026-02-24 13:06:14 +08:00
"version": "3.13.2"
2025-02-12 00:21:33 +08:00
}
},
"nbformat": 4,
"nbformat_minor": 5
}