{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "f74ce078-f7e8-4733-a14c-14d8815a3626", "metadata": { "ExecuteTime": { "end_time": "2025-04-09T14:57:35.618124Z", "start_time": "2025-04-09T14:57:34.837095Z" } }, "outputs": [], "source": [ "import tushare as ts\n", "ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n", "pro = ts.pro_api()" ] }, { "cell_type": "code", "execution_count": 2, "id": "44dd8d87-e60b-49e5-aed9-efaa7f92d4fe", "metadata": { "ExecuteTime": { "end_time": "2025-04-09T14:57:38.089531Z", "start_time": "2025-04-09T14:57:35.854308Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " ts_code trade_date\n", "0 801001.SI 20250221\n", "1 801002.SI 20250221\n", "2 801003.SI 20250221\n", "3 801005.SI 20250221\n", "4 801010.SI 20250221\n", "... ... ...\n", "2190 859811.SI 20260209\n", "2191 859821.SI 20260209\n", "2192 859822.SI 20260209\n", "2193 859852.SI 20260209\n", "2194 859951.SI 20260209\n", "\n", "[1149753 rows x 2 columns]\n", "20260213\n", "start_date: 20260224\n" ] } ], "source": [ "import pandas as pd\n", "import time\n", "\n", "h5_filename = '/mnt/d/PyProject/NewStock/data/sw_daily.h5'\n", "key = '/sw_daily'\n", "max_date = None\n", "with pd.HDFStore(h5_filename, mode='r') as store:\n", " df = store[key][['ts_code', 'trade_date']]\n", " print(df)\n", " max_date = df['trade_date'].max()\n", "\n", "print(max_date)\n", "trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20260310')\n", "trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n", "trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n", "start_date = min(trade_dates)\n", "print(f'start_date: {start_date}')" ] }, { "cell_type": "code", "execution_count": 3, "id": "747acc47-0884-4f76-90fb-276f6494e31d", "metadata": { "ExecuteTime": { "end_time": "2025-04-09T14:57:40.754159Z", "start_time": "2025-04-09T14:57:38.104541Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "任务 20260310 完成\n", "任务 20260309 完成\n", "任务 20260306 完成\n", "任务 20260305 完成\n", "任务 20260304 完成\n", "任务 20260303 完成\n", "任务 20260302 完成\n", "任务 20260227 完成\n", "任务 20260226 完成\n", "任务 20260225 完成\n", "任务 20260224 完成\n" ] } ], "source": [ "from concurrent.futures import ThreadPoolExecutor, as_completed\n", "\n", "all_daily_data = []\n", "\n", "# API 调用计数和时间控制变量\n", "api_call_count = 0\n", "batch_start_time = time.time()\n", "\n", "\n", "def get_data(trade_date):\n", " time.sleep(0.1)\n", " data = pro.sw_daily(trade_date=trade_date)\n", " if data is not None and not data.empty:\n", " return data\n", "\n", "\n", "with ThreadPoolExecutor(max_workers=2) as executor:\n", " future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n", "\n", " for future in as_completed(future_to_date):\n", " trade_date = future_to_date[future] # 获取对应的交易日期\n", " try:\n", " result = future.result() # 获取任务执行的结果\n", " all_daily_data.append(result)\n", " print(f\"任务 {trade_date} 完成\")\n", " except Exception as e:\n", " print(f\"获取 {trade_date} 数据时出错: {e}\")\n", "\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "c6765638-481f-40d8-a259-2e7b25362618", "metadata": { "ExecuteTime": { "end_time": "2025-04-09T14:57:40.994975Z", "start_time": "2025-04-09T14:57:40.773783Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "所有每日基础数据获取并保存完毕!\n" ] } ], "source": [ "all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n", "\n", "# 将所有数据合并为一个 DataFrame\n", "\n", "# 将数据保存为 HDF5 文件(table 格式)\n", "all_daily_data_df.to_hdf(h5_filename, key=key, mode='a', format='table', append=True, data_columns=True)\n", "\n", "print(\"所有每日基础数据获取并保存完毕!\")" ] } ], "metadata": { "kernelspec": { "display_name": "stock", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.11" } }, "nbformat": 4, "nbformat_minor": 5 }