{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "f74ce078-f7e8-4733-a14c-14d8815a3626", "metadata": { "ExecuteTime": { "end_time": "2025-04-09T14:57:34.662465Z", "start_time": "2025-04-09T14:57:33.903794Z" } }, "outputs": [], "source": [ "import tushare as ts\n", "ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n", "pro = ts.pro_api()" ] }, { "cell_type": "code", "execution_count": 2, "id": "44dd8d87-e60b-49e5-aed9-efaa7f92d4fe", "metadata": { "ExecuteTime": { "end_time": "2025-04-09T14:57:41.818953Z", "start_time": "2025-04-09T14:57:34.666469Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " ts_code trade_date\n", "0 000001.SZ 20250312\n", "1 000002.SZ 20250312\n", "2 000004.SZ 20250312\n", "3 000006.SZ 20250312\n", "4 000007.SZ 20250312\n", "... ... ...\n", "27350 920978.BJ 20260209\n", "27351 920981.BJ 20260209\n", "27352 920982.BJ 20260209\n", "27353 920985.BJ 20260209\n", "27354 920992.BJ 20260209\n", "\n", "[8707050 rows x 2 columns]\n", "20260213\n", "start_date: 20260224\n" ] } ], "source": [ "import pandas as pd\n", "import time\n", "\n", "h5_filename = '/mnt/d/PyProject/NewStock/data/cyq_perf.h5'\n", "key = '/cyq_perf'\n", "max_date = None\n", "with pd.HDFStore(h5_filename, mode='r') as store:\n", " df = store[key][['ts_code', 'trade_date']]\n", " print(df)\n", " max_date = df['trade_date'].max()\n", "\n", "print(max_date)\n", "trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20260310')\n", "trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n", "trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n", "start_date = min(trade_dates)\n", "print(f'start_date: {start_date}')" ] }, { "cell_type": "code", "execution_count": 3, "id": "747acc47-0884-4f76-90fb-276f6494e31d", "metadata": { "ExecuteTime": { "end_time": "2025-04-09T14:57:45.660215Z", "start_time": "2025-04-09T14:57:42.232250Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "任务 20260310 完成\n", "任务 20260309 完成\n", "任务 20260306 完成\n", "任务 20260305 完成\n", "任务 20260304 完成\n", "任务 20260303 完成\n", "任务 20260302 完成\n", "任务 20260227 完成\n", "任务 20260226 完成\n", "任务 20260225 完成\n", "任务 20260224 完成\n" ] } ], "source": [ "from concurrent.futures import ThreadPoolExecutor, as_completed\n", "\n", "all_daily_data = []\n", "\n", "# API 调用计数和时间控制变量\n", "api_call_count = 0\n", "batch_start_time = time.time()\n", "\n", "\n", "def get_data(trade_date):\n", " time.sleep(0.1)\n", " data = pro.cyq_perf(trade_date=trade_date)\n", " if data is not None and not data.empty:\n", " return data\n", "\n", "\n", "with ThreadPoolExecutor(max_workers=2) as executor:\n", " future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n", "\n", " for future in as_completed(future_to_date):\n", " trade_date = future_to_date[future] # 获取对应的交易日期\n", " try:\n", " result = future.result() # 获取任务执行的结果\n", " all_daily_data.append(result)\n", " print(f\"任务 {trade_date} 完成\")\n", " except Exception as e:\n", " print(f\"获取 {trade_date} 数据时出错: {e}\")\n", "\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "c6765638-481f-40d8-a259-2e7b25362618", "metadata": { "ExecuteTime": { "end_time": "2025-04-09T14:57:48.970445Z", "start_time": "2025-04-09T14:57:45.698824Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "所有每日基础数据获取并保存完毕!\n" ] } ], "source": [ "all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n", "\n", "# 将所有数据合并为一个 DataFrame\n", "\n", "# 将数据保存为 HDF5 文件(table 格式)\n", "all_daily_data_df.to_hdf(h5_filename, key=key, mode='a', format='table', append=True, data_columns=True)\n", "\n", "print(\"所有每日基础数据获取并保存完毕!\")" ] } ], "metadata": { "kernelspec": { "display_name": "stock", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.11" } }, "nbformat": 4, "nbformat_minor": 5 }