Files
NewStock/code/data/update/update_stk_limit.ipynb
liaozhaorun 49fa31ac63 2.15
2025-02-15 23:33:34 +08:00

246 lines
7.3 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "500802dc-7a20-48b7-a470-a4bae3ec534b",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:18:36.892437Z",
"start_time": "2025-02-11T15:18:36.020822Z"
}
},
"outputs": [],
"source": [
"import tushare as ts\n",
"\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "5a84bc9da6d54868",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:20:12.573607Z",
"start_time": "2025-02-11T15:20:00.110127Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"4682 600310.SH 20250211\n",
"4683 600312.SH 20250211\n",
"4684 600313.SH 20250211\n",
"4673 600299.SH 20250211\n",
"0 000001.SZ 20250211\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 10040878 entries, 0 to 10040877\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 229.8+ MB\n",
"None\n",
"20250211\n",
"20250212\n"
]
}
],
"source": [
"import pandas as pd\n",
"import time\n",
"\n",
"h5_filename = '../../../data/stk_limit.h5'\n",
"key = '/stk_limit'\n",
"max_date = None\n",
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date']]\n",
" print(df.sort_values(by='trade_date', ascending=True).tail())\n",
" print(df.info())\n",
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250220')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(start_date)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "bb3191de-27a2-4c89-a3b5-32a0d7b9496f",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:21:27.831699Z",
"start_time": "2025-02-11T15:21:26.665039Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250219 完成\n",
"任务 20250220 完成\n",
"任务 20250217 完成\n",
"任务 20250218 完成\n",
"任务 20250214 完成\n",
"任务 20250213 完成\n",
"任务 20250212 完成\n"
]
}
],
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
"all_daily_data = []\n",
"\n",
"# API 调用计数和时间控制变量\n",
"api_call_count = 0\n",
"batch_start_time = time.time()\n",
"\n",
"\n",
"def get_data(trade_date):\n",
" time.sleep(0.1)\n",
" stk_limit_data = pro.stk_limit(trade_date=trade_date)\n",
" if stk_limit_data is not None and not stk_limit_data.empty:\n",
" return stk_limit_data\n",
"\n",
"\n",
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
"\n",
" for future in as_completed(future_to_date):\n",
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
" try:\n",
" result = future.result() # 获取任务执行的结果\n",
" if result is not None:\n",
" all_daily_data.append(result)\n",
" print(f\"任务 {trade_date} 完成\")\n",
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "96a81aa5890ea3c3",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:21:29.294283Z",
"start_time": "2025-02-11T15:21:29.247112Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ trade_date ts_code up_limit down_limit\n",
"0 20250213 000001.SZ 12.56 10.28\n",
"1 20250213 000002.SZ 8.76 7.16\n",
"2 20250213 000004.SZ 15.40 12.60\n",
"3 20250213 000006.SZ 7.92 6.48\n",
"4 20250213 000007.SZ 7.39 6.05\n",
"... ... ... ... ...\n",
"7014 20250213 920108.BJ 27.22 14.66\n",
"7015 20250213 920111.BJ 35.98 19.38\n",
"7016 20250213 920116.BJ 80.44 43.32\n",
"7017 20250213 920118.BJ 34.46 18.56\n",
"7018 20250213 920128.BJ 39.84 21.46\n",
"\n",
"[7019 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20250212 000001.SZ 12.56 10.28\n",
"1 20250212 000002.SZ 7.96 6.52\n",
"2 20250212 000004.SZ 15.07 12.33\n",
"3 20250212 000006.SZ 7.74 6.34\n",
"4 20250212 000007.SZ 7.40 6.06\n",
"... ... ... ... ...\n",
"7014 20250212 920108.BJ 27.41 14.77\n",
"7015 20250212 920111.BJ 34.51 18.59\n",
"7016 20250212 920116.BJ 79.66 42.90\n",
"7017 20250212 920118.BJ 34.81 18.75\n",
"7018 20250212 920128.BJ 38.98 21.00\n",
"\n",
"[7019 rows x 4 columns]]\n"
]
}
],
"source": [
"print(all_daily_data)\n",
"# 将所有数据合并为一个 DataFrame\n",
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "ad9733a1-2f42-43ee-a98c-0bf699304c21",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:20:37.999493Z",
"start_time": "2025-02-11T15:20:37.375220Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"所有每日基础数据获取并保存完毕!\n"
]
}
],
"source": [
"\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='stk_limit', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7e777f1f-4d54-4a74-b916-691ede6af055",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.19"
}
},
"nbformat": 4,
"nbformat_minor": 5
}