{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "b94bb1f2-5332-485e-ae1b-eea01f938106", "metadata": { "ExecuteTime": { "end_time": "2025-04-09T14:57:40.184418Z", "start_time": "2025-04-09T14:57:39.137312Z" } }, "outputs": [], "source": [ "import tushare as ts\n", "\n", "ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n", "pro = ts.pro_api()" ] }, { "cell_type": "code", "execution_count": 2, "id": "742c29d453b9bb38", "metadata": { "ExecuteTime": { "end_time": "2025-04-09T14:58:10.515830Z", "start_time": "2025-04-09T14:57:40.190466Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Index: 9439640 entries, 0 to 25891\n", "Data columns (total 2 columns):\n", " # Column Dtype \n", "--- ------ ----- \n", " 0 ts_code object\n", " 1 trade_date object\n", "dtypes: object(2)\n", "memory usage: 216.1+ MB\n", "None\n", "20260213\n", "start_date: 20260224\n" ] } ], "source": [ "import pandas as pd\n", "import time\n", "\n", "h5_filename = '/mnt/d/PyProject/NewStock/data/money_flow.h5'\n", "key = '/money_flow'\n", "max_date = None\n", "with pd.HDFStore(h5_filename, mode='r') as store:\n", " df = store[key][['ts_code', 'trade_date']]\n", " print(df.info())\n", " max_date = df['trade_date'].max()\n", "\n", "print(max_date)\n", "trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20260310')\n", "trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n", "trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n", "start_date = min(trade_dates)\n", "print(f'start_date: {start_date}')" ] }, { "cell_type": "code", "execution_count": 3, "id": "679ce40e-8d62-4887-970c-e1d8cbdeee6b", "metadata": { "ExecuteTime": { "end_time": "2025-04-09T14:58:17.197319Z", "start_time": "2025-04-09T14:58:10.724923Z" }, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "任务 20260310 完成\n", "任务 20260309 完成\n", "任务 20260306 完成\n", "任务 20260305 完成\n", "任务 20260304 完成\n", "任务 20260303 完成\n", "任务 20260302 完成\n", "任务 20260227 完成\n", "任务 20260226 完成\n", "任务 20260225 完成\n", "任务 20260224 完成\n" ] } ], "source": [ "from concurrent.futures import ThreadPoolExecutor, as_completed\n", "\n", "all_daily_data = []\n", "\n", "# API 调用计数和时间控制变量\n", "api_call_count = 0\n", "batch_start_time = time.time()\n", "\n", "\n", "def get_data(trade_date):\n", " time.sleep(0.1)\n", " money_flow_data = pro.moneyflow(trade_date=trade_date)\n", " if money_flow_data is not None and not money_flow_data.empty:\n", " return money_flow_data\n", "\n", "\n", "with ThreadPoolExecutor(max_workers=2) as executor:\n", " future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n", "\n", " for future in as_completed(future_to_date):\n", " trade_date = future_to_date[future] # 获取对应的交易日期\n", " try:\n", " result = future.result() # 获取任务执行的结果\n", " all_daily_data.append(result)\n", " print(f\"任务 {trade_date} 完成\")\n", " except Exception as e:\n", " print(f\"获取 {trade_date} 数据时出错: {e}\")\n", "\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "9af80516849d4e80", "metadata": { "ExecuteTime": { "end_time": "2025-04-09T14:58:17.214168Z", "start_time": "2025-04-09T14:58:17.210734Z" } }, "outputs": [], "source": [ "all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n" ] }, { "cell_type": "code", "execution_count": 5, "id": "a2b05187-437f-4053-bc43-bd80d4cf8b0e", "metadata": { "ExecuteTime": { "end_time": "2025-04-09T14:58:19.633456Z", "start_time": "2025-04-09T14:58:17.229837Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "所有每日基础数据获取并保存完毕!\n" ] } ], "source": [ "\n", "# 将所有数据合并为一个 DataFrame\n", "\n", "# 将数据保存为 HDF5 文件(table 格式)\n", "all_daily_data_df.to_hdf(h5_filename, key='money_flow', mode='a', format='table', append=True, data_columns=True)\n", "\n", "print(\"所有每日基础数据获取并保存完毕!\")" ] }, { "cell_type": "code", "execution_count": 6, "id": "e6f2a2fe", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " ts_code trade_date buy_sm_vol buy_sm_amount sell_sm_vol \\\n", "0 002968.SZ 20260227 21302 2803.57 22552 \n", "1 600822.SH 20260227 20371 2267.76 18994 \n", "2 688003.SH 20260227 10527 8037.01 11183 \n", "3 300894.SZ 20260227 15100 1751.27 16491 \n", "4 603026.SH 20260227 35310 23574.59 36976 \n", "... ... ... ... ... ... \n", "20704 002587.SZ 20260224 114009 8175.31 115996 \n", "20705 002474.SZ 20260224 132136 11662.67 104912 \n", "20706 002742.SZ 20260224 9048 381.92 6158 \n", "20707 601298.SH 20260224 95226 8736.97 102361 \n", "20708 301413.SZ 20260224 11648 17813.73 7614 \n", "\n", " sell_sm_amount buy_md_vol buy_md_amount sell_md_vol sell_md_amount \\\n", "0 2971.99 12991 1715.40 11827 1560.91 \n", "1 2114.45 11869 1321.48 13242 1474.72 \n", "2 8535.14 6168 4707.52 5857 4472.05 \n", "3 1914.17 12204 1416.49 10014 1161.98 \n", "4 24680.03 25979 17356.95 23472 15689.34 \n", "... ... ... ... ... ... \n", "20704 8318.63 70581 5059.80 77960 5594.40 \n", "20705 9262.36 65247 5760.34 70730 6242.71 \n", "20706 259.97 12268 517.35 10575 446.72 \n", "20707 9387.51 17890 1640.43 21755 1995.51 \n", "20708 11690.25 18481 28263.21 16404 25116.12 \n", "\n", " buy_lg_vol buy_lg_amount sell_lg_vol sell_lg_amount buy_elg_vol \\\n", "0 4640 616.17 5428 716.74 873 \n", "1 7412 824.96 7836 871.98 420 \n", "2 5007 3817.18 4258 3252.13 792 \n", "3 5426 629.63 6225 721.24 0 \n", "4 14372 9614.17 12027 8050.76 2859 \n", "... ... ... ... ... ... \n", "20704 59178 4247.14 57492 4122.70 13717 \n", "20705 28382 2504.08 47970 4231.98 8727 \n", "20706 8183 345.88 10348 436.35 2869 \n", "20707 15594 1429.59 14207 1303.96 12463 \n", "20708 11245 17223.41 14418 22047.20 2604 \n", "\n", " buy_elg_amount sell_elg_vol sell_elg_amount net_mf_vol \\\n", "0 114.50 0 0.00 8754 \n", "1 46.96 0 0.00 -2636 \n", "2 604.59 1196 906.98 -2186 \n", "3 0.00 0 0.00 -2514 \n", "4 1908.59 6045 4034.17 -2211 \n", "... ... ... ... ... \n", "20704 987.82 6038 434.35 32850 \n", "20705 770.48 10880 960.53 -24208 \n", "20706 120.50 5287 222.60 -5084 \n", "20707 1140.48 2850 260.49 4058 \n", "20708 3986.99 5543 8433.77 -7520 \n", "\n", " net_mf_amount \n", "0 1160.00 \n", "1 -291.18 \n", "2 -1656.29 \n", "3 -289.80 \n", "4 -1416.49 \n", "... ... \n", "20704 2367.64 \n", "20705 -2127.88 \n", "20706 -213.09 \n", "20707 379.12 \n", "20708 -11475.67 \n", "\n", "[20709 rows x 20 columns]\n" ] } ], "source": [ "print(all_daily_data_df)" ] } ], "metadata": { "kernelspec": { "display_name": "stock", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.11" } }, "nbformat": 4, "nbformat_minor": 5 }