{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "500802dc-7a20-48b7-a470-a4bae3ec534b", "metadata": { "ExecuteTime": { "end_time": "2025-04-09T14:57:41.532210Z", "start_time": "2025-04-09T14:57:40.584930Z" } }, "outputs": [], "source": [ "import tushare as ts\n", "\n", "ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n", "pro = ts.pro_api()" ] }, { "cell_type": "code", "execution_count": 2, "id": "5a84bc9da6d54868", "metadata": { "ExecuteTime": { "end_time": "2025-04-09T14:58:04.911924Z", "start_time": "2025-04-09T14:57:41.540345Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " ts_code trade_date\n", "4959 600211.SH 20260116\n", "4960 600212.SH 20260116\n", "4961 600215.SH 20260116\n", "4949 600197.SH 20260116\n", "3732 301501.SZ 20260116\n", "\n", "Index: 11701107 entries, 0 to 37139\n", "Data columns (total 2 columns):\n", " # Column Dtype \n", "--- ------ ----- \n", " 0 ts_code object\n", " 1 trade_date object\n", "dtypes: object(2)\n", "memory usage: 267.8+ MB\n", "None\n", "20260116\n", "20260119\n" ] } ], "source": [ "import pandas as pd\n", "import time\n", "\n", "h5_filename = '/mnt/d/PyProject/NewStock/data/stk_limit.h5'\n", "key = '/stk_limit'\n", "max_date = None\n", "with pd.HDFStore(h5_filename, mode='r') as store:\n", " df = store[key][['ts_code', 'trade_date']]\n", " print(df.sort_values(by='trade_date', ascending=True).tail())\n", " print(df.info())\n", " max_date = df['trade_date'].max()\n", "\n", "print(max_date)\n", "trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20260201')\n", "trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n", "trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n", "start_date = min(trade_dates)\n", "print(start_date)" ] }, { "cell_type": "code", "execution_count": 3, "id": "bb3191de-27a2-4c89-a3b5-32a0d7b9496f", "metadata": { "ExecuteTime": { "end_time": "2025-04-09T14:58:09.342522Z", "start_time": "2025-04-09T14:58:05.259974Z" }, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "任务 20260130 完成\n", "任务 20260129 完成\n", "任务 20260128 完成\n", "任务 20260127 完成\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "任务 20260126 完成\n", "任务 20260123 完成\n", "任务 20260122 完成\n", "任务 20260121 完成\n", "任务 20260120 完成\n", "任务 20260119 完成\n" ] } ], "source": [ "from concurrent.futures import ThreadPoolExecutor, as_completed\n", "\n", "all_daily_data = []\n", "\n", "# API 调用计数和时间控制变量\n", "api_call_count = 0\n", "batch_start_time = time.time()\n", "\n", "\n", "def get_data(trade_date):\n", " time.sleep(0.1)\n", " stk_limit_data = pro.stk_limit(trade_date=trade_date)\n", " if stk_limit_data is not None and not stk_limit_data.empty:\n", " return stk_limit_data\n", "\n", "\n", "with ThreadPoolExecutor(max_workers=2) as executor:\n", " future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n", "\n", " for future in as_completed(future_to_date):\n", " trade_date = future_to_date[future] # 获取对应的交易日期\n", " try:\n", " result = future.result() # 获取任务执行的结果\n", " if result is not None:\n", " all_daily_data.append(result)\n", " print(f\"任务 {trade_date} 完成\")\n", " except Exception as e:\n", " print(f\"获取 {trade_date} 数据时出错: {e}\")\n", "\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "96a81aa5890ea3c3", "metadata": { "ExecuteTime": { "end_time": "2025-04-09T14:58:09.353560Z", "start_time": "2025-04-09T14:58:09.346528Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[ trade_date ts_code up_limit down_limit\n", "0 20260123 000001.SZ 12.18 9.96\n", "1 20260123 000002.SZ 5.45 4.46\n", "2 20260123 000004.SZ 12.59 11.39\n", "3 20260123 000006.SZ 10.62 8.69\n", "4 20260123 000007.SZ 12.47 10.21\n", "... ... ... ... ...\n", "7435 20260123 920978.BJ 44.56 24.00\n", "7436 20260123 920981.BJ 45.61 24.57\n", "7437 20260123 920982.BJ 295.08 158.90\n", "7438 20260123 920985.BJ 10.20 5.50\n", "7439 20260123 920992.BJ 24.32 13.10\n", "\n", "[7440 rows x 4 columns], trade_date ts_code up_limit down_limit\n", "0 20260122 000001.SZ 12.18 9.96\n", "1 20260122 000002.SZ 5.51 4.51\n", "2 20260122 000004.SZ 11.99 10.85\n", "3 20260122 000006.SZ 10.62 8.69\n", "4 20260122 000007.SZ 12.41 10.15\n", "... ... ... ... ...\n", "7433 20260122 920978.BJ 45.48 24.50\n", "7434 20260122 920981.BJ 45.61 24.57\n", "7435 20260122 920982.BJ 301.76 162.50\n", "7436 20260122 920985.BJ 9.84 5.30\n", "7437 20260122 920992.BJ 23.95 12.91\n", "\n", "[7438 rows x 4 columns], trade_date ts_code up_limit down_limit\n", "0 20260121 000001.SZ 12.28 10.04\n", "1 20260121 000002.SZ 5.27 4.31\n", "2 20260121 000004.SZ 12.02 10.88\n", "3 20260121 000006.SZ 10.27 8.41\n", "4 20260121 000007.SZ 12.08 9.88\n", "... ... ... ... ...\n", "7433 20260121 920978.BJ 45.60 24.56\n", "7434 20260121 920981.BJ 43.81 23.59\n", "7435 20260121 920982.BJ 304.34 163.88\n", "7436 20260121 920985.BJ 9.90 5.34\n", "7437 20260121 920992.BJ 24.11 12.99\n", "\n", "[7438 rows x 4 columns], trade_date ts_code up_limit down_limit\n", "0 20260120 000001.SZ 12.23 10.01\n", "1 20260120 000002.SZ 5.20 4.26\n", "2 20260120 000004.SZ 11.46 10.36\n", "3 20260120 000006.SZ 10.07 8.24\n", "4 20260120 000007.SZ 12.49 10.22\n", "... ... ... ... ...\n", "7431 20260120 920978.BJ 46.41 24.99\n", "7432 20260120 920981.BJ 44.26 23.84\n", "7433 20260120 920982.BJ 310.42 167.16\n", "7434 20260120 920985.BJ 9.97 5.37\n", "7435 20260120 920992.BJ 24.49 13.19\n", "\n", "[7436 rows x 4 columns], trade_date ts_code up_limit down_limit\n", "0 20260119 000001.SZ 12.31 10.07\n", "1 20260119 000002.SZ 5.20 4.26\n", "2 20260119 000004.SZ 11.70 10.58\n", "3 20260119 000006.SZ 10.13 8.29\n", "4 20260119 000007.SZ 12.43 10.17\n", "... ... ... ... ...\n", "7429 20260119 920978.BJ 46.26 24.92\n", "7430 20260119 920981.BJ 45.51 24.51\n", "7431 20260119 920982.BJ 305.50 164.50\n", "7432 20260119 920985.BJ 9.88 5.32\n", "7433 20260119 920992.BJ 24.28 13.08\n", "\n", "[7434 rows x 4 columns]]\n" ] } ], "source": [ "print(all_daily_data)\n", "# 将所有数据合并为一个 DataFrame\n", "all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)" ] }, { "cell_type": "code", "execution_count": 5, "id": "ad9733a1-2f42-43ee-a98c-0bf699304c21", "metadata": { "ExecuteTime": { "end_time": "2025-04-09T14:58:09.674078Z", "start_time": "2025-04-09T14:58:09.366441Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "所有每日基础数据获取并保存完毕!\n" ] } ], "source": [ "\n", "\n", "# 将数据保存为 HDF5 文件(table 格式)\n", "all_daily_data_df.to_hdf(h5_filename, key='stk_limit', mode='a', format='table', append=True, data_columns=True)\n", "\n", "print(\"所有每日基础数据获取并保存完毕!\")" ] }, { "cell_type": "code", "execution_count": null, "id": "7e777f1f-4d54-4a74-b916-691ede6af055", "metadata": { "ExecuteTime": { "end_time": "2025-04-09T14:58:09.689422Z", "start_time": "2025-04-09T14:58:09.686524Z" } }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "stock", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.11" } }, "nbformat": 4, "nbformat_minor": 5 }