Files
NewStock/code/data/update/index_data.ipynb

195 lines
5.7 KiB
Plaintext
Raw Normal View History

2025-04-03 00:45:07 +08:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "f74ce078-f7e8-4733-a14c-14d8815a3626",
"metadata": {},
"outputs": [],
"source": [
"import tushare as ts\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "44dd8d87-e60b-49e5-aed9-efaa7f92d4fe",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"0 801001.SI 20250221\n",
"1 801002.SI 20250221\n",
"2 801003.SI 20250221\n",
"3 801005.SI 20250221\n",
"4 801010.SI 20250221\n",
"... ... ...\n",
"1044388 857344.SI 20170103\n",
"1044389 857411.SI 20170103\n",
"1044390 857421.SI 20170103\n",
"1044391 857431.SI 20170103\n",
"1044392 858811.SI 20170103\n",
"\n",
"[1044393 rows x 2 columns]\n",
"20250221\n",
"start_date: 20250224\n"
]
}
],
"source": [
"import pandas as pd\n",
"import time\n",
"\n",
"h5_filename = '../../../data/sw_daily.h5'\n",
"key = '/sw_daily'\n",
"max_date = None\n",
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date']]\n",
" print(df)\n",
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(f'start_date: {start_date}')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "747acc47-0884-4f76-90fb-276f6494e31d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250417 完成\n",
"任务 20250418 完成\n",
"任务 20250416 完成\n",
"任务 20250415 完成\n",
"任务 20250411 完成\n",
"任务 20250414 完成\n",
"任务 20250410 完成\n",
"任务 20250409 完成\n",
"任务 20250408 完成\n",
"任务 20250403 完成\n",
"任务 20250407 完成\n",
"任务 20250402 完成\n",
"任务 20250401 完成\n",
"任务 20250331 完成\n",
"任务 20250328 完成\n",
"任务 20250327 完成\n",
"任务 20250326 完成\n",
"任务 20250325 完成\n",
"任务 20250324 完成\n",
"任务 20250321 完成\n",
"任务 20250320 完成\n",
"任务 20250319 完成\n",
"任务 20250317 完成\n",
"任务 20250314 完成\n",
"任务 20250318 完成\n",
"任务 20250313 完成\n",
"任务 20250312 完成\n",
"任务 20250311 完成\n",
"任务 20250310 完成\n",
"任务 20250307 完成\n",
"任务 20250306 完成\n",
"任务 20250305 完成\n",
"任务 20250304 完成\n",
"任务 20250303 完成\n",
"任务 20250228 完成\n",
"任务 20250227 完成\n",
"任务 20250226 完成\n",
"任务 20250225 完成\n",
"任务 20250224 完成\n"
]
}
],
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
"all_daily_data = []\n",
"\n",
"# API 调用计数和时间控制变量\n",
"api_call_count = 0\n",
"batch_start_time = time.time()\n",
"\n",
"index_list = ['399300.SH', '000905.SH', '000852.SH', '399006.SZ']\n",
"def get_data(trade_date):\n",
" time.sleep(0.1)\n",
" data = pro.sw_daily(trade_date=trade_date)\n",
" if data is not None and not data.empty:\n",
" return data\n",
"\n",
"\n",
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
"\n",
" for future in as_completed(future_to_date):\n",
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
" try:\n",
" result = future.result() # 获取任务执行的结果\n",
" all_daily_data.append(result)\n",
" print(f\"任务 {trade_date} 完成\")\n",
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "c6765638-481f-40d8-a259-2e7b25362618",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"所有每日基础数据获取并保存完毕!\n"
]
}
],
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
"\n",
"# 将所有数据合并为一个 DataFrame\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key=key, mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}