{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "initial_id", "metadata": { "ExecuteTime": { "end_time": "2025-03-12T15:31:25.004019Z", "start_time": "2025-03-12T15:31:24.322440Z" } }, "outputs": [], "source": [ "from operator import index\n", "\n", "import tushare as ts\n", "import pandas as pd\n", "import time\n", "\n", "ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n", "pro = ts.pro_api()" ] }, { "cell_type": "code", "execution_count": 7, "id": "972a5ac9f79fe373", "metadata": { "ExecuteTime": { "end_time": "2025-03-12T15:31:40.917015Z", "start_time": "2025-03-12T15:31:35.958771Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " ts_code trade_date his_low his_high cost_5pct cost_15pct \\\n", "0 000001.SZ 20180104 0.2 12.7 7.2 7.9 \n", "1 000002.SZ 20180104 0.3 31.8 14.1 15.6 \n", "2 000004.SZ 20180104 0.8 53.2 21.6 22.0 \n", "3 000008.SZ 20180104 0.1 13.9 7.2 7.8 \n", "4 000009.SZ 20180104 0.3 15.0 5.9 5.9 \n", "... ... ... ... ... ... ... \n", "3091 603991.SH 20180104 12.0 67.8 26.4 27.0 \n", "3092 603993.SH 20180104 1.5 8.1 5.6 5.8 \n", "3093 603997.SH 20180104 5.4 31.5 9.9 10.2 \n", "3094 603998.SH 20180104 3.9 18.9 9.8 10.1 \n", "3095 603999.SH 20180104 5.4 30.9 6.9 7.2 \n", "\n", " cost_50pct cost_85pct cost_95pct weight_avg winner_rate \n", "0 10.6 11.3 11.9 9.93 71.97 \n", "1 20.1 23.1 24.3 19.62 99.34 \n", "2 23.6 27.6 29.6 24.71 45.41 \n", "3 8.6 9.2 10.5 8.64 47.04 \n", "4 6.6 7.6 7.9 6.76 38.14 \n", "... ... ... ... ... ... \n", "3091 27.6 30.6 34.2 28.54 57.36 \n", "3092 6.3 7.1 7.6 6.34 73.50 \n", "3093 10.5 11.7 11.7 10.84 11.28 \n", "3094 11.9 13.5 15.7 12.13 17.93 \n", "3095 7.8 9.6 9.9 8.17 21.83 \n", "\n", "[3096 rows x 11 columns]\n" ] } ], "source": [ "\n", "df = pro.cyq_perf(trade_date='20180104')\n", "print(df)" ] }, { "cell_type": "code", "execution_count": null, "id": "1b5a82fbf4e380de", "metadata": { "ExecuteTime": { "end_time": "2025-03-12T15:30:20.421604Z", "start_time": "2025-03-12T15:30:20.224851Z" } }, "outputs": [], "source": [ "import pandas as pd\n", "import time\n", "\n", "h5_filename = '../../../data/cyq_perf.h5'\n", "\n", "trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n", "trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n", "trade_dates = trade_cal['cal_date'].tolist()" ] }, { "cell_type": "code", "execution_count": null, "id": "f448da220816bf98", "metadata": { "ExecuteTime": { "start_time": "2025-03-12T15:30:20.436796Z" }, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "任务 20250418 完成\n", "任务 20250417 完成\n", "任务 20250416 完成\n", "任务 20250415 完成\n", "任务 20250414 完成\n", "任务 20250411 完成\n", "任务 20250410 完成\n", "任务 20250409 完成\n", "任务 20250408 完成\n", "任务 20250407 完成\n", "任务 20250403 完成\n", "任务 20250402 完成\n", "任务 20250401 完成\n", "任务 20250331 完成\n", "任务 20250328 完成\n", "任务 20250327 完成\n", "任务 20250326 完成\n", "任务 20250325 完成\n", "任务 20250324 完成\n", "任务 20250321 完成\n", "任务 20250320 完成\n", "任务 20250319 完成\n", "任务 20250318 完成\n", "任务 20250317 完成\n", "任务 20250314 完成\n", "任务 20250313 完成\n", "任务 20250312 完成\n", "任务 20250311 完成\n", "任务 20250310 完成\n", "任务 20250307 完成\n", "任务 20250306 完成\n", "任务 20250305 完成\n", "任务 20250304 完成\n", "任务 20250303 完成\n", "任务 20250228 完成\n", "任务 20250227 完成\n", "任务 20250226 完成\n", "任务 20250225 完成\n", "任务 20250224 完成\n", "任务 20250221 完成\n", "任务 20250220 完成\n", "任务 20250219 完成\n", "任务 20250218 完成\n", "任务 20250217 完成\n", "任务 20250214 完成\n", "任务 20250213 完成\n", "任务 20250212 完成\n", "任务 20250211 完成\n", "任务 20250210 完成\n", "任务 20250207 完成\n", "任务 20250206 完成\n", "任务 20250205 完成\n", "任务 20250127 完成\n", "任务 20250124 完成\n", "任务 20250123 完成\n", "任务 20250122 完成\n", "任务 20250121 完成\n", "任务 20250120 完成\n", "任务 20250117 完成\n", "任务 20250116 完成\n", "任务 20250115 完成\n", "任务 20250114 完成\n", "任务 20250113 完成\n", "任务 20250110 完成\n", "任务 20250109 完成\n", "任务 20250108 完成\n", "任务 20250107 完成\n", "任务 20250106 完成\n", "任务 20250103 完成\n", "任务 20250102 完成\n", "任务 20241231 完成\n", "任务 20241230 完成\n", "任务 20241227 完成\n", "任务 20241226 完成\n", "任务 20241225 完成\n", "任务 20241224 完成\n", "任务 20241223 完成\n", "任务 20241220 完成\n", "任务 20241219 完成\n", "任务 20241218 完成\n", "任务 20241217 完成\n", "任务 20241216 完成\n", "任务 20241213 完成\n", "任务 20241212 完成\n", "任务 20241211 完成\n", "任务 20241210 完成\n", "任务 20241209 完成\n", "任务 20241206 完成\n", "任务 20241205 完成\n", "任务 20241204 完成\n", "任务 20241203 完成\n", "任务 20241202 完成\n", "任务 20241129 完成\n", "任务 20241128 完成\n", "任务 20241127 完成\n", "任务 20241126 完成\n", "任务 20241125 完成\n", "任务 20241122 完成\n", "任务 20241121 完成\n", "任务 20241120 完成\n", "任务 20241119 完成\n", "任务 20241118 完成\n", "任务 20241115 完成\n", "任务 20241114 完成\n", "任务 20241113 完成\n", "任务 20241112 完成\n", "任务 20241111 完成\n", "任务 20241108 完成\n", "任务 20241107 完成\n", "任务 20241106 完成\n", "任务 20241105 完成\n", "任务 20241104 完成\n", "任务 20241101 完成\n", "任务 20241031 完成\n", "任务 20241030 完成\n", "任务 20241029 完成\n", "任务 20241028 完成\n", "任务 20241025 完成\n", "任务 20241024 完成\n", "任务 20241022 完成\n", "任务 20241023 完成\n", "任务 20241021 完成\n", "任务 20241018 完成\n", "任务 20241017 完成\n", "任务 20241016 完成\n", "任务 20241015 完成\n", "任务 20241014 完成\n", "任务 20241010 完成\n", "任务 20241011 完成\n", "任务 20241009 完成\n", "任务 20241008 完成\n", "任务 20240930 完成\n", "任务 20240927 完成\n", "任务 20240926 完成\n", "任务 20240925 完成\n", "任务 20240924 完成\n", "任务 20240923 完成\n", "任务 20240919 完成\n", "任务 20240920 完成\n", "任务 20240913 完成\n", "任务 20240918 完成\n", "任务 20240911 完成\n", "任务 20240912 完成\n", "任务 20240910 完成\n", "任务 20240909 完成\n", "任务 20240905 完成\n", "任务 20240906 完成\n", "任务 20240904 完成\n", "任务 20240903 完成\n", "任务 20240902 完成\n", "任务 20240830 完成\n", "任务 20240829 完成\n", "任务 20240828 完成\n", "任务 20240827 完成\n", "任务 20240826 完成\n", "任务 20240823 完成\n", "任务 20240822 完成\n", "任务 20240821 完成\n", "任务 20240820 完成\n", "任务 20240819 完成\n", "任务 20240816 完成\n", "任务 20240815 完成\n", "任务 20240814 完成\n", "任务 20240813 完成\n", "任务 20240812 完成\n", "任务 20240809 完成\n", "任务 20240808 完成\n", "任务 20240807 完成\n", "任务 20240806 完成\n", "任务 20240805 完成\n", "任务 20240802 完成\n", "任务 20240801 完成\n", "任务 20240731 完成\n", "任务 20240730 完成\n", "任务 20240729 完成\n", "任务 20240726 完成\n", "任务 20240725 完成\n", "任务 20240724 完成\n", "任务 20240723 完成\n", "任务 20240722 完成\n", "任务 20240719 完成\n", "任务 20240718 完成\n", "任务 20240717 完成\n", "任务 20240716 完成\n", "任务 20240715 完成\n", "任务 20240712 完成\n", "任务 20240711 完成\n", "任务 20240710 完成\n", "任务 20240709 完成\n", "任务 20240708 完成\n", "任务 20240705 完成\n", "任务 20240704 完成\n", "任务 20240703 完成\n", "任务 20240702 完成\n", "任务 20240701 完成\n", "任务 20240628 完成\n", "任务 20240627 完成\n", "任务 20240626 完成\n", "任务 20240625 完成\n", "任务 20240624 完成\n", "任务 20240621 完成\n", "任务 20240620 完成\n", "任务 20240619 完成\n", "任务 20240618 完成\n", "任务 20240617 完成\n", "任务 20240614 完成\n", "任务 20240613 完成\n", "任务 20240612 完成\n", "任务 20240611 完成\n", "任务 20240607 完成\n", "任务 20240606 完成\n", "任务 20240605 完成\n", "任务 20240604 完成\n" ] } ], "source": [ "from concurrent.futures import ThreadPoolExecutor, as_completed\n", "\n", "all_daily_data = []\n", "\n", "# API 调用计数和时间控制变量\n", "api_call_count = 0\n", "batch_start_time = time.time()\n", "\n", "\n", "def get_data(trade_date):\n", " time.sleep(0.1)\n", " data = pro.cyq_perf(trade_date=trade_date)\n", " if data is not None and not data.empty:\n", " return data\n", "\n", "\n", "with ThreadPoolExecutor(max_workers=2) as executor:\n", " future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n", "\n", " for future in as_completed(future_to_date):\n", " trade_date = future_to_date[future] # 获取对应的交易日期\n", " try:\n", " result = future.result() # 获取任务执行的结果\n", " all_daily_data.append(result)\n", " print(f\"任务 {trade_date} 完成\")\n", " except Exception as e:\n", " print(f\"获取 {trade_date} 数据时出错: {e}\")\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "907f732d3c397bf", "metadata": { "ExecuteTime": { "end_time": "2025-03-12T15:31:10.381348500Z", "start_time": "2025-03-12T15:23:41.345460Z" } }, "outputs": [], "source": [ "\n", "# 将所有数据合并为一个 DataFrame\n", "all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n", "\n", "# 将数据保存为 HDF5 文件(table 格式)\n", "all_daily_data_df.to_hdf('../../data/cyq_perf.h5', key='cyq_perf', mode='w', format='table', data_columns=True)\n", "\n", "print(\"所有每日基础数据获取并保存完毕!\")" ] }, { "cell_type": "code", "execution_count": 9, "id": "73e829ac-ff3d-408e-beb3-0b87f5b00b19", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " ts_code trade_date\n", "0 000001.SZ 20250312\n", "1 000002.SZ 20250312\n", "2 000004.SZ 20250312\n", "3 000006.SZ 20250312\n", "4 000007.SZ 20250312\n", "... ... ...\n", "7465732 603991.SH 20180102\n", "7465733 603993.SH 20180102\n", "7465734 603997.SH 20180102\n", "7465735 603998.SH 20180102\n", "7465736 603999.SH 20180102\n", "\n", "[7465737 rows x 2 columns]\n" ] } ], "source": [ "h5_filename = '../../data/cyq_perf.h5'\n", "key = '/cyq_perf'\n", "max_date = None\n", "with pd.HDFStore(h5_filename, mode='r') as store:\n", " df = store[key][['ts_code', 'trade_date']]\n", " print(df)\n", " max_date = df['trade_date'].min()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.11" } }, "nbformat": 4, "nbformat_minor": 5 }