Files
NewStock/main/data/update/update_stk_limit.ipynb

252 lines
7.1 KiB
Plaintext
Raw Normal View History

2025-02-12 00:21:33 +08:00
{
"cells": [
{
"cell_type": "code",
2025-05-06 23:42:40 +08:00
"execution_count": 1,
2025-02-12 00:21:33 +08:00
"id": "500802dc-7a20-48b7-a470-a4bae3ec534b",
"metadata": {
"ExecuteTime": {
2025-04-10 23:17:22 +08:00
"end_time": "2025-04-09T14:57:41.532210Z",
"start_time": "2025-04-09T14:57:40.584930Z"
2025-02-12 00:21:33 +08:00
}
},
2025-05-06 23:42:40 +08:00
"outputs": [],
2025-02-12 00:21:33 +08:00
"source": [
"import tushare as ts\n",
"\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
2025-05-06 23:42:40 +08:00
]
2025-02-12 00:21:33 +08:00
},
{
"cell_type": "code",
2025-05-06 23:42:40 +08:00
"execution_count": 2,
2025-02-12 00:21:33 +08:00
"id": "5a84bc9da6d54868",
"metadata": {
"ExecuteTime": {
2025-04-10 23:17:22 +08:00
"end_time": "2025-04-09T14:58:04.911924Z",
"start_time": "2025-04-09T14:57:41.540345Z"
2025-02-12 00:21:33 +08:00
}
},
2025-05-06 23:42:40 +08:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
2025-11-29 00:23:12 +08:00
"4915 600221.SH 20251120\n",
"4916 600222.SH 20251120\n",
"4917 600223.SH 20251120\n",
"4919 600227.SH 20251120\n",
"3693 301448.SZ 20251120\n",
2025-05-06 23:42:40 +08:00
"<class 'pandas.core.frame.DataFrame'>\n",
2025-11-29 00:23:12 +08:00
"Index: 11412627 entries, 0 to 29456\n",
2025-05-06 23:42:40 +08:00
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
2025-11-29 00:23:12 +08:00
"memory usage: 261.2+ MB\n",
2025-05-06 23:42:40 +08:00
"None\n",
2025-11-29 00:23:12 +08:00
"20251120\n",
"20251121\n"
2025-05-06 23:42:40 +08:00
]
}
],
2025-02-15 23:33:34 +08:00
"source": [
"import pandas as pd\n",
"import time\n",
"\n",
2025-06-02 22:23:44 +08:00
"h5_filename = '/mnt/d/PyProject/NewStock/data/stk_limit.h5'\n",
2025-02-15 23:33:34 +08:00
"key = '/stk_limit'\n",
"max_date = None\n",
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date']]\n",
" print(df.sort_values(by='trade_date', ascending=True).tail())\n",
" print(df.info())\n",
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
2025-11-29 00:23:12 +08:00
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20251220')\n",
2025-02-15 23:33:34 +08:00
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(start_date)"
2025-05-06 23:42:40 +08:00
]
2025-04-09 22:57:01 +08:00
},
{
"cell_type": "code",
2025-05-06 23:42:40 +08:00
"execution_count": 3,
2025-04-09 22:57:01 +08:00
"id": "bb3191de-27a2-4c89-a3b5-32a0d7b9496f",
"metadata": {
"ExecuteTime": {
2025-04-10 23:17:22 +08:00
"end_time": "2025-04-09T14:58:09.342522Z",
"start_time": "2025-04-09T14:58:05.259974Z"
2025-05-06 23:42:40 +08:00
},
"scrolled": true
2025-04-09 22:57:01 +08:00
},
2025-05-06 23:42:40 +08:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2025-11-29 00:23:12 +08:00
"任务 20251219 完成\n",
"任务 20251218 完成\n",
"任务 20251217 完成\n",
"任务 20251216 完成\n",
"任务 20251215 完成\n",
"任务 20251212 完成\n",
"任务 20251211 完成\n",
"任务 20251210 完成\n",
"任务 20251209 完成\n",
"任务 20251208 完成\n",
"任务 20251205 完成\n",
"任务 20251204 完成\n",
"任务 20251203 完成\n",
"任务 20251202 完成\n",
"任务 20251201 完成\n",
"任务 20251128 完成\n",
"任务 20251127 完成\n",
"任务 20251126 完成\n",
"任务 20251125 完成\n",
"任务 20251124 完成\n",
"任务 20251121 完成\n"
2025-05-06 23:42:40 +08:00
]
}
],
2025-02-12 00:21:33 +08:00
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
"all_daily_data = []\n",
"\n",
"# API 调用计数和时间控制变量\n",
"api_call_count = 0\n",
"batch_start_time = time.time()\n",
"\n",
"\n",
"def get_data(trade_date):\n",
" time.sleep(0.1)\n",
" stk_limit_data = pro.stk_limit(trade_date=trade_date)\n",
" if stk_limit_data is not None and not stk_limit_data.empty:\n",
" return stk_limit_data\n",
"\n",
"\n",
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
"\n",
" for future in as_completed(future_to_date):\n",
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
" try:\n",
" result = future.result() # 获取任务执行的结果\n",
" if result is not None:\n",
" all_daily_data.append(result)\n",
" print(f\"任务 {trade_date} 完成\")\n",
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
2025-05-06 23:42:40 +08:00
]
2025-02-12 00:21:33 +08:00
},
{
2025-02-15 23:33:34 +08:00
"cell_type": "code",
2025-05-06 23:42:40 +08:00
"execution_count": 4,
2025-02-15 23:33:34 +08:00
"id": "96a81aa5890ea3c3",
2025-02-12 00:21:33 +08:00
"metadata": {
"ExecuteTime": {
2025-04-10 23:17:22 +08:00
"end_time": "2025-04-09T14:58:09.353560Z",
"start_time": "2025-04-09T14:58:09.346528Z"
2025-02-12 00:21:33 +08:00
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2025-05-08 15:42:17 +08:00
"[ trade_date ts_code up_limit down_limit\n",
2025-11-29 00:23:12 +08:00
"0 20251121 000001.SZ 13.04 10.67\n",
"1 20251121 000002.SZ 6.82 5.58\n",
"2 20251121 000004.SZ 11.64 10.54\n",
"3 20251121 000006.SZ 12.07 9.87\n",
"4 20251121 000007.SZ 11.00 9.00\n",
2025-06-02 22:23:44 +08:00
"... ... ... ... ...\n",
2025-11-29 00:23:12 +08:00
"7363 20251121 920978.BJ 49.06 26.42\n",
"7364 20251121 920981.BJ 46.99 25.31\n",
"7365 20251121 920982.BJ 300.67 161.91\n",
"7366 20251121 920985.BJ 11.75 6.33\n",
"7367 20251121 920992.BJ 24.06 12.96\n",
2025-06-02 22:23:44 +08:00
"\n",
2025-11-29 00:23:12 +08:00
"[7368 rows x 4 columns]]\n"
2025-02-12 00:21:33 +08:00
]
}
],
2025-05-06 23:42:40 +08:00
"source": [
"print(all_daily_data)\n",
"# 将所有数据合并为一个 DataFrame\n",
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)"
]
2025-02-12 00:21:33 +08:00
},
{
"cell_type": "code",
2025-05-08 15:42:17 +08:00
"execution_count": 5,
2025-02-12 00:21:33 +08:00
"id": "ad9733a1-2f42-43ee-a98c-0bf699304c21",
"metadata": {
"ExecuteTime": {
2025-04-10 23:17:22 +08:00
"end_time": "2025-04-09T14:58:09.674078Z",
"start_time": "2025-04-09T14:58:09.366441Z"
2025-02-12 00:21:33 +08:00
}
},
2025-02-15 23:33:34 +08:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"所有每日基础数据获取并保存完毕!\n"
]
}
],
2025-05-06 23:42:40 +08:00
"source": [
"\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='stk_limit', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
]
2025-02-12 00:21:33 +08:00
},
{
"cell_type": "code",
2025-05-06 23:42:40 +08:00
"execution_count": null,
2025-02-12 00:21:33 +08:00
"id": "7e777f1f-4d54-4a74-b916-691ede6af055",
2025-03-31 23:08:03 +08:00
"metadata": {
"ExecuteTime": {
2025-04-10 23:17:22 +08:00
"end_time": "2025-04-09T14:58:09.689422Z",
"start_time": "2025-04-09T14:58:09.686524Z"
2025-03-31 23:08:03 +08:00
}
},
2025-02-12 00:21:33 +08:00
"outputs": [],
2025-05-06 23:42:40 +08:00
"source": []
2025-02-12 00:21:33 +08:00
}
],
"metadata": {
"kernelspec": {
2025-06-02 22:23:44 +08:00
"display_name": "stock",
2025-02-12 00:21:33 +08:00
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
2025-11-29 00:23:12 +08:00
"version": "3.12.11"
2025-02-12 00:21:33 +08:00
}
},
"nbformat": 4,
"nbformat_minor": 5
}