{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "500802dc-7a20-48b7-a470-a4bae3ec534b", "metadata": { "ExecuteTime": { "end_time": "2025-04-06T15:34:34.020485Z", "start_time": "2025-04-06T15:34:33.497731Z" } }, "outputs": [], "source": [ "import tushare as ts\n", "\n", "ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n", "pro = ts.pro_api()" ] }, { "cell_type": "code", "execution_count": 2, "id": "5a84bc9da6d54868", "metadata": { "ExecuteTime": { "end_time": "2025-04-06T15:34:46.227924Z", "start_time": "2025-04-06T15:34:34.042810Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " ts_code trade_date\n", "4717 600285.SH 20250403\n", "4718 600287.SH 20250403\n", "4719 600288.SH 20250403\n", "4708 600273.SH 20250403\n", "5309 601121.SH 20250403\n", "\n", "Index: 10301468 entries, 0 to 28272\n", "Data columns (total 2 columns):\n", " # Column Dtype \n", "--- ------ ----- \n", " 0 ts_code object\n", " 1 trade_date object\n", "dtypes: object(2)\n", "memory usage: 235.8+ MB\n", "None\n", "20250403\n", "20250407\n" ] } ], "source": [ "import pandas as pd\n", "import time\n", "\n", "h5_filename = '../../../data/stk_limit.h5'\n", "key = '/stk_limit'\n", "max_date = None\n", "with pd.HDFStore(h5_filename, mode='r') as store:\n", " df = store[key][['ts_code', 'trade_date']]\n", " print(df.sort_values(by='trade_date', ascending=True).tail())\n", " print(df.info())\n", " max_date = df['trade_date'].max()\n", "\n", "print(max_date)\n", "trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n", "trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n", "trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n", "start_date = min(trade_dates)\n", "print(start_date)" ] }, { "cell_type": "code", "execution_count": 3, "id": "bb3191de-27a2-4c89-a3b5-32a0d7b9496f", "metadata": { "ExecuteTime": { "end_time": "2025-04-06T15:34:48.652346Z", "start_time": "2025-04-06T15:34:46.236695Z" }, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "任务 20250417 完成\n", "任务 20250418 完成\n", "任务 20250416 完成\n", "任务 20250415 完成\n", "任务 20250414 完成\n", "任务 20250411 完成\n", "任务 20250410 完成\n", "任务 20250409 完成\n", "任务 20250408 完成\n", "任务 20250407 完成\n" ] } ], "source": [ "from concurrent.futures import ThreadPoolExecutor, as_completed\n", "\n", "all_daily_data = []\n", "\n", "# API 调用计数和时间控制变量\n", "api_call_count = 0\n", "batch_start_time = time.time()\n", "\n", "\n", "def get_data(trade_date):\n", " time.sleep(0.1)\n", " stk_limit_data = pro.stk_limit(trade_date=trade_date)\n", " if stk_limit_data is not None and not stk_limit_data.empty:\n", " return stk_limit_data\n", "\n", "\n", "with ThreadPoolExecutor(max_workers=2) as executor:\n", " future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n", "\n", " for future in as_completed(future_to_date):\n", " trade_date = future_to_date[future] # 获取对应的交易日期\n", " try:\n", " result = future.result() # 获取任务执行的结果\n", " if result is not None:\n", " all_daily_data.append(result)\n", " print(f\"任务 {trade_date} 完成\")\n", " except Exception as e:\n", " print(f\"获取 {trade_date} 数据时出错: {e}\")\n", "\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "96a81aa5890ea3c3", "metadata": { "ExecuteTime": { "end_time": "2025-04-06T15:34:48.680504Z", "start_time": "2025-04-06T15:34:48.665530Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[ trade_date ts_code up_limit down_limit\n", "0 20250408 000001.SZ 11.77 9.63\n", "1 20250408 000002.SZ 7.26 5.94\n", "2 20250408 000004.SZ 9.72 7.96\n", "3 20250408 000006.SZ 6.90 5.64\n", "4 20250408 000007.SZ 6.14 5.02\n", "... ... ... ... ...\n", "7072 20250408 920108.BJ 25.33 13.65\n", "7073 20250408 920111.BJ 29.38 15.82\n", "7074 20250408 920116.BJ 96.40 51.92\n", "7075 20250408 920118.BJ 30.16 16.24\n", "7076 20250408 920128.BJ 34.15 18.39\n", "\n", "[7077 rows x 4 columns], trade_date ts_code up_limit down_limit\n", "0 20250407 000001.SZ 12.47 10.21\n", "1 20250407 000002.SZ 7.85 6.43\n", "2 20250407 000004.SZ 10.80 8.84\n", "3 20250407 000006.SZ 7.67 6.27\n", "4 20250407 000007.SZ 6.82 5.58\n", "... ... ... ... ...\n", "7070 20250407 920108.BJ 31.72 17.08\n", "7071 20250407 920111.BJ 36.85 19.85\n", "7072 20250407 920116.BJ 116.05 62.49\n", "7073 20250407 920118.BJ 38.07 20.51\n", "7074 20250407 920128.BJ 43.38 23.36\n", "\n", "[7075 rows x 4 columns]]\n" ] } ], "source": [ "print(all_daily_data)\n", "# 将所有数据合并为一个 DataFrame\n", "all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)" ] }, { "cell_type": "code", "execution_count": 5, "id": "ad9733a1-2f42-43ee-a98c-0bf699304c21", "metadata": { "ExecuteTime": { "end_time": "2025-04-06T15:34:48.966102Z", "start_time": "2025-04-06T15:34:48.693158Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "所有每日基础数据获取并保存完毕!\n" ] } ], "source": [ "\n", "\n", "# 将数据保存为 HDF5 文件(table 格式)\n", "all_daily_data_df.to_hdf(h5_filename, key='stk_limit', mode='a', format='table', append=True, data_columns=True)\n", "\n", "print(\"所有每日基础数据获取并保存完毕!\")" ] }, { "cell_type": "code", "execution_count": null, "id": "7e777f1f-4d54-4a74-b916-691ede6af055", "metadata": { "ExecuteTime": { "end_time": "2025-04-06T15:34:48.980659Z", "start_time": "2025-04-06T15:34:48.977771Z" } }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.11" } }, "nbformat": 4, "nbformat_minor": 5 }