Files
NewStock/main/data/update/update_stk_limit.ipynb

299 lines
9.9 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "500802dc-7a20-48b7-a470-a4bae3ec534b",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:57:41.532210Z",
"start_time": "2025-04-09T14:57:40.584930Z"
}
},
"outputs": [],
"source": [
"import tushare as ts\n",
"\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "5a84bc9da6d54868",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:04.911924Z",
"start_time": "2025-04-09T14:57:41.540345Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"4979 600202.SH 20260206\n",
"4980 600203.SH 20260206\n",
"4981 600206.SH 20260206\n",
"4969 600189.SH 20260206\n",
"7454 920964.BJ 20260206\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 11812821 entries, 0 to 37293\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 270.4+ MB\n",
"None\n",
"20260206\n",
"20260209\n"
]
}
],
"source": [
"import pandas as pd\n",
"import time\n",
"\n",
"h5_filename = '/mnt/d/PyProject/NewStock/data/stk_limit.h5'\n",
"key = '/stk_limit'\n",
"max_date = None\n",
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date']]\n",
" print(df.sort_values(by='trade_date', ascending=True).tail())\n",
" print(df.info())\n",
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20260310')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(start_date)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "bb3191de-27a2-4c89-a3b5-32a0d7b9496f",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:09.342522Z",
"start_time": "2025-04-09T14:58:05.259974Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20260310 完成\n",
"任务 20260309 完成\n",
"任务 20260306 完成\n",
"任务 20260305 完成\n",
"任务 20260304 完成\n",
"任务 20260303 完成\n",
"任务 20260302 完成\n",
"任务 20260227 完成\n",
"任务 20260226 完成\n",
"任务 20260225 完成\n",
"任务 20260224 完成\n",
"任务 20260213 完成\n",
"任务 20260212 完成\n",
"任务 20260211 完成\n",
"任务 20260210 完成\n",
"任务 20260209 完成\n"
]
}
],
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
"all_daily_data = []\n",
"\n",
"# API 调用计数和时间控制变量\n",
"api_call_count = 0\n",
"batch_start_time = time.time()\n",
"\n",
"\n",
"def get_data(trade_date):\n",
" time.sleep(0.1)\n",
" stk_limit_data = pro.stk_limit(trade_date=trade_date)\n",
" if stk_limit_data is not None and not stk_limit_data.empty:\n",
" return stk_limit_data\n",
"\n",
"\n",
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
"\n",
" for future in as_completed(future_to_date):\n",
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
" try:\n",
" result = future.result() # 获取任务执行的结果\n",
" if result is not None:\n",
" all_daily_data.append(result)\n",
" print(f\"任务 {trade_date} 完成\")\n",
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "96a81aa5890ea3c3",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:09.353560Z",
"start_time": "2025-04-09T14:58:09.346528Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ trade_date ts_code up_limit down_limit\n",
"0 20260213 000001.SZ 12.06 9.86\n",
"1 20260213 000002.SZ 5.41 4.43\n",
"2 20260213 000004.SZ 8.36 7.56\n",
"3 20260213 000006.SZ 10.12 8.28\n",
"4 20260213 000007.SZ 13.44 11.00\n",
"... ... ... ... ...\n",
"7476 20260213 920978.BJ 39.09 21.05\n",
"7477 20260213 920981.BJ 41.61 22.41\n",
"7478 20260213 920982.BJ 284.06 152.96\n",
"7479 20260213 920985.BJ 11.34 6.12\n",
"7480 20260213 920992.BJ 21.77 11.73\n",
"\n",
"[7481 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20260212 000001.SZ 12.18 9.96\n",
"1 20260212 000002.SZ 5.38 4.40\n",
"2 20260212 000004.SZ 8.80 7.96\n",
"3 20260212 000006.SZ 10.29 8.42\n",
"4 20260212 000007.SZ 13.31 10.89\n",
"... ... ... ... ...\n",
"7476 20260212 920978.BJ 39.93 21.51\n",
"7477 20260212 920981.BJ 41.60 22.40\n",
"7478 20260212 920982.BJ 284.29 153.09\n",
"7479 20260212 920985.BJ 11.01 5.93\n",
"7480 20260212 920992.BJ 21.86 11.78\n",
"\n",
"[7481 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20260211 000001.SZ 12.17 9.95\n",
"1 20260211 000002.SZ 5.37 4.39\n",
"2 20260211 000004.SZ 9.26 8.38\n",
"3 20260211 000006.SZ 10.18 8.33\n",
"4 20260211 000007.SZ 13.37 10.94\n",
"... ... ... ... ...\n",
"7469 20260211 920978.BJ 41.02 22.10\n",
"7470 20260211 920981.BJ 41.91 22.57\n",
"7471 20260211 920982.BJ 286.35 154.19\n",
"7472 20260211 920985.BJ 11.34 6.12\n",
"7473 20260211 920992.BJ 22.04 11.88\n",
"\n",
"[7474 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20260210 000001.SZ 12.18 9.96\n",
"1 20260210 000002.SZ 5.46 4.46\n",
"2 20260210 000004.SZ 9.74 8.82\n",
"3 20260210 000006.SZ 10.32 8.44\n",
"4 20260210 000007.SZ 13.53 11.07\n",
"... ... ... ... ...\n",
"7466 20260210 920978.BJ 41.71 22.47\n",
"7467 20260210 920981.BJ 42.05 22.65\n",
"7468 20260210 920982.BJ 289.90 156.10\n",
"7469 20260210 920985.BJ 11.83 6.37\n",
"7470 20260210 920992.BJ 22.33 12.03\n",
"\n",
"[7471 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20260209 000001.SZ 12.16 9.95\n",
"1 20260209 000002.SZ 5.29 4.33\n",
"2 20260209 000004.SZ 10.26 9.28\n",
"3 20260209 000006.SZ 9.88 8.08\n",
"4 20260209 000007.SZ 13.00 10.64\n",
"... ... ... ... ...\n",
"7462 20260209 920978.BJ 42.45 22.87\n",
"7463 20260209 920981.BJ 41.40 22.30\n",
"7464 20260209 920982.BJ 291.70 157.08\n",
"7465 20260209 920985.BJ 11.32 6.10\n",
"7466 20260209 920992.BJ 22.13 11.93\n",
"\n",
"[7467 rows x 4 columns]]\n"
]
}
],
"source": [
"print(all_daily_data)\n",
"# 将所有数据合并为一个 DataFrame\n",
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "ad9733a1-2f42-43ee-a98c-0bf699304c21",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:09.674078Z",
"start_time": "2025-04-09T14:58:09.366441Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"所有每日基础数据获取并保存完毕!\n"
]
}
],
"source": [
"\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='stk_limit', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7e777f1f-4d54-4a74-b916-691ede6af055",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:09.689422Z",
"start_time": "2025-04-09T14:58:09.686524Z"
}
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "stock",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}