Files
NewStock/main/data/update/sw_daily.ipynb

206 lines
5.7 KiB
Plaintext
Raw Normal View History

2025-04-03 00:45:07 +08:00
{
"cells": [
{
"cell_type": "code",
2025-05-06 23:42:40 +08:00
"execution_count": 1,
2025-04-03 00:45:07 +08:00
"id": "f74ce078-f7e8-4733-a14c-14d8815a3626",
"metadata": {
"ExecuteTime": {
2025-04-10 23:17:22 +08:00
"end_time": "2025-04-09T14:57:35.618124Z",
"start_time": "2025-04-09T14:57:34.837095Z"
2025-04-03 00:45:07 +08:00
}
},
2025-05-06 23:42:40 +08:00
"outputs": [],
2025-04-03 00:45:07 +08:00
"source": [
"import tushare as ts\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
2025-05-06 23:42:40 +08:00
]
2025-04-03 00:45:07 +08:00
},
{
"cell_type": "code",
2025-05-06 23:42:40 +08:00
"execution_count": 2,
2025-04-03 00:45:07 +08:00
"id": "44dd8d87-e60b-49e5-aed9-efaa7f92d4fe",
"metadata": {
"ExecuteTime": {
2025-04-10 23:17:22 +08:00
"end_time": "2025-04-09T14:57:38.089531Z",
"start_time": "2025-04-09T14:57:35.854308Z"
2025-04-03 00:45:07 +08:00
}
},
2025-05-06 23:42:40 +08:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2025-05-08 15:42:17 +08:00
" ts_code trade_date\n",
"0 801001.SI 20250221\n",
"1 801002.SI 20250221\n",
"2 801003.SI 20250221\n",
"3 801005.SI 20250221\n",
"4 801010.SI 20250221\n",
".. ... ...\n",
2025-05-13 15:30:06 +08:00
"434 859811.SI 20250508\n",
"435 859821.SI 20250508\n",
"436 859822.SI 20250508\n",
"437 859852.SI 20250508\n",
"438 859951.SI 20250508\n",
2025-05-06 23:42:40 +08:00
"\n",
2025-05-13 15:30:06 +08:00
"[1066343 rows x 2 columns]\n",
"20250508\n",
"start_date: 20250509\n"
2025-05-06 23:42:40 +08:00
]
}
],
2025-04-03 00:45:07 +08:00
"source": [
"import pandas as pd\n",
"import time\n",
"\n",
"h5_filename = '../../../data/sw_daily.h5'\n",
"key = '/sw_daily'\n",
"max_date = None\n",
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date']]\n",
" print(df)\n",
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
2025-05-06 23:42:40 +08:00
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250620')\n",
2025-04-03 00:45:07 +08:00
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(f'start_date: {start_date}')"
2025-05-06 23:42:40 +08:00
]
2025-04-09 22:57:01 +08:00
},
{
"cell_type": "code",
2025-05-06 23:42:40 +08:00
"execution_count": 3,
2025-04-09 22:57:01 +08:00
"id": "747acc47-0884-4f76-90fb-276f6494e31d",
"metadata": {
"ExecuteTime": {
2025-04-10 23:17:22 +08:00
"end_time": "2025-04-09T14:57:40.754159Z",
"start_time": "2025-04-09T14:57:38.104541Z"
2025-04-09 22:57:01 +08:00
}
},
2025-05-06 23:42:40 +08:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250619 完成\n",
"任务 20250620 完成\n",
"任务 20250617 完成\n",
2025-05-13 15:30:06 +08:00
"任务 20250618 完成\n",
2025-05-06 23:42:40 +08:00
"任务 20250616 完成\n",
"任务 20250613 完成\n",
"任务 20250612 完成\n",
2025-05-08 15:42:17 +08:00
"任务 20250611 完成\n",
2025-05-06 23:42:40 +08:00
"任务 20250610 完成\n",
"任务 20250609 完成\n",
"任务 20250606 完成\n",
"任务 20250605 完成\n",
"任务 20250604 完成\n",
2025-05-08 15:42:17 +08:00
"任务 20250603 完成\n",
2025-05-06 23:42:40 +08:00
"任务 20250530 完成\n",
2025-05-08 15:42:17 +08:00
"任务 20250529 完成\n",
2025-05-13 15:30:06 +08:00
"任务 20250528 完成\n",
2025-05-08 15:42:17 +08:00
"任务 20250527 完成\n",
2025-05-13 15:30:06 +08:00
"任务 20250526 完成\n",
2025-05-06 23:42:40 +08:00
"任务 20250523 完成\n",
"任务 20250522 完成\n",
"任务 20250521 完成\n",
"任务 20250520 完成\n",
"任务 20250519 完成\n",
2025-05-13 15:30:06 +08:00
"任务 20250515 完成\n",
2025-05-06 23:42:40 +08:00
"任务 20250516 完成\n",
"任务 20250514 完成\n",
"任务 20250513 完成\n",
"任务 20250512 完成\n",
2025-05-13 15:30:06 +08:00
"任务 20250509 完成\n"
2025-05-06 23:42:40 +08:00
]
}
],
2025-04-03 00:45:07 +08:00
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
"all_daily_data = []\n",
"\n",
"# API 调用计数和时间控制变量\n",
"api_call_count = 0\n",
"batch_start_time = time.time()\n",
"\n",
"\n",
"def get_data(trade_date):\n",
" time.sleep(0.1)\n",
" data = pro.sw_daily(trade_date=trade_date)\n",
" if data is not None and not data.empty:\n",
" return data\n",
"\n",
"\n",
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
"\n",
" for future in as_completed(future_to_date):\n",
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
" try:\n",
" result = future.result() # 获取任务执行的结果\n",
" all_daily_data.append(result)\n",
" print(f\"任务 {trade_date} 完成\")\n",
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
2025-05-06 23:42:40 +08:00
]
2025-04-09 22:57:01 +08:00
},
{
"cell_type": "code",
2025-05-06 23:42:40 +08:00
"execution_count": 4,
2025-04-09 22:57:01 +08:00
"id": "c6765638-481f-40d8-a259-2e7b25362618",
"metadata": {
"ExecuteTime": {
2025-04-10 23:17:22 +08:00
"end_time": "2025-04-09T14:57:40.994975Z",
"start_time": "2025-04-09T14:57:40.773783Z"
2025-04-09 22:57:01 +08:00
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"所有每日基础数据获取并保存完毕!\n"
]
}
],
2025-05-06 23:42:40 +08:00
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
"\n",
"# 将所有数据合并为一个 DataFrame\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key=key, mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
]
2025-04-03 00:45:07 +08:00
}
],
"metadata": {
"kernelspec": {
2025-05-06 23:42:40 +08:00
"display_name": "new_trader",
2025-04-03 00:45:07 +08:00
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}