{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "500802dc-7a20-48b7-a470-a4bae3ec534b", "metadata": { "ExecuteTime": { "end_time": "2025-04-09T14:57:41.532210Z", "start_time": "2025-04-09T14:57:40.584930Z" } }, "outputs": [], "source": [ "import tushare as ts\n", "\n", "ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n", "pro = ts.pro_api()" ] }, { "cell_type": "code", "execution_count": 2, "id": "5a84bc9da6d54868", "metadata": { "ExecuteTime": { "end_time": "2025-04-09T14:58:04.911924Z", "start_time": "2025-04-09T14:57:41.540345Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " ts_code trade_date\n", "4763 600259.SH 20250530\n", "4764 600261.SH 20250530\n", "4765 600262.SH 20250530\n", "4754 600248.SH 20250530\n", "7116 900957.BJ 20250530\n", "\n", "Index: 10564598 entries, 0 to 106964\n", "Data columns (total 2 columns):\n", " # Column Dtype \n", "--- ------ ----- \n", " 0 ts_code object\n", " 1 trade_date object\n", "dtypes: object(2)\n", "memory usage: 241.8+ MB\n", "None\n", "20250530\n", "20250603\n" ] } ], "source": [ "import pandas as pd\n", "import time\n", "\n", "h5_filename = '/mnt/d/PyProject/NewStock/data/stk_limit.h5'\n", "key = '/stk_limit'\n", "max_date = None\n", "with pd.HDFStore(h5_filename, mode='r') as store:\n", " df = store[key][['ts_code', 'trade_date']]\n", " print(df.sort_values(by='trade_date', ascending=True).tail())\n", " print(df.info())\n", " max_date = df['trade_date'].max()\n", "\n", "print(max_date)\n", "trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250720')\n", "trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n", "trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n", "start_date = min(trade_dates)\n", "print(start_date)" ] }, { "cell_type": "code", "execution_count": 3, "id": "bb3191de-27a2-4c89-a3b5-32a0d7b9496f", "metadata": { "ExecuteTime": { "end_time": "2025-04-09T14:58:09.342522Z", "start_time": "2025-04-09T14:58:05.259974Z" }, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "任务 20250718 完成\n", "任务 20250717 完成\n", "任务 20250716 完成\n", "任务 20250715 完成\n", "任务 20250714 完成\n", "任务 20250711 完成\n", "任务 20250710 完成\n", "任务 20250709 完成\n", "任务 20250708 完成\n", "任务 20250707 完成\n", "任务 20250704 完成\n", "任务 20250703 完成\n", "任务 20250702 完成\n", "任务 20250701 完成\n", "任务 20250630 完成\n", "任务 20250627 完成\n", "任务 20250625 完成\n", "任务 20250626 完成\n", "任务 20250624 完成\n", "任务 20250623 完成\n", "任务 20250620 完成\n", "任务 20250619 完成\n", "任务 20250617 完成\n", "任务 20250618 完成\n", "任务 20250616 完成\n", "任务 20250613 完成\n", "任务 20250612 完成\n", "任务 20250611 完成\n", "任务 20250610 完成\n", "任务 20250609 完成\n", "任务 20250606 完成\n", "任务 20250605 完成\n", "任务 20250603 完成\n", "任务 20250604 完成\n" ] } ], "source": [ "from concurrent.futures import ThreadPoolExecutor, as_completed\n", "\n", "all_daily_data = []\n", "\n", "# API 调用计数和时间控制变量\n", "api_call_count = 0\n", "batch_start_time = time.time()\n", "\n", "\n", "def get_data(trade_date):\n", " time.sleep(0.1)\n", " stk_limit_data = pro.stk_limit(trade_date=trade_date)\n", " if stk_limit_data is not None and not stk_limit_data.empty:\n", " return stk_limit_data\n", "\n", "\n", "with ThreadPoolExecutor(max_workers=2) as executor:\n", " future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n", "\n", " for future in as_completed(future_to_date):\n", " trade_date = future_to_date[future] # 获取对应的交易日期\n", " try:\n", " result = future.result() # 获取任务执行的结果\n", " if result is not None:\n", " all_daily_data.append(result)\n", " print(f\"任务 {trade_date} 完成\")\n", " except Exception as e:\n", " print(f\"获取 {trade_date} 数据时出错: {e}\")\n", "\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "96a81aa5890ea3c3", "metadata": { "ExecuteTime": { "end_time": "2025-04-09T14:58:09.353560Z", "start_time": "2025-04-09T14:58:09.346528Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[ trade_date ts_code up_limit down_limit\n", "0 20250606 000001.SZ 12.84 10.50\n", "1 20250606 000002.SZ 7.30 5.98\n", "2 20250606 000004.SZ 10.35 9.37\n", "3 20250606 000006.SZ 7.48 6.12\n", "4 20250606 000007.SZ 9.06 7.42\n", "... ... ... ... ...\n", "7144 20250606 920445.BJ 13.36 7.20\n", "7145 20250606 920489.BJ 31.46 16.94\n", "7146 20250606 920682.BJ 13.14 7.08\n", "7147 20250606 920799.BJ 77.80 41.90\n", "7148 20250606 920819.BJ 5.70 3.08\n", "\n", "[7149 rows x 4 columns], trade_date ts_code up_limit down_limit\n", "0 20250605 000001.SZ 13.02 10.66\n", "1 20250605 000002.SZ 7.28 5.96\n", "2 20250605 000004.SZ 10.63 9.61\n", "3 20250605 000006.SZ 7.41 6.07\n", "4 20250605 000007.SZ 9.19 7.52\n", "... ... ... ... ...\n", "7143 20250605 920445.BJ 13.49 7.27\n", "7144 20250605 920489.BJ 31.00 16.70\n", "7145 20250605 920682.BJ 13.22 7.12\n", "7146 20250605 920799.BJ 76.24 41.06\n", "7147 20250605 920819.BJ 5.70 3.08\n", "\n", "[7148 rows x 4 columns], trade_date ts_code up_limit down_limit\n", "0 20250603 000001.SZ 12.72 10.40\n", "1 20250603 000002.SZ 7.30 5.98\n", "2 20250603 000004.SZ 10.90 9.86\n", "3 20250603 000006.SZ 7.62 6.24\n", "4 20250603 000007.SZ 8.65 7.07\n", "... ... ... ... ...\n", "7137 20250603 920445.BJ 13.18 7.10\n", "7138 20250603 920489.BJ 31.25 16.83\n", "7139 20250603 920682.BJ 13.20 7.12\n", "7140 20250603 920799.BJ 76.31 41.09\n", "7141 20250603 920819.BJ 5.72 3.08\n", "\n", "[7142 rows x 4 columns], trade_date ts_code up_limit down_limit\n", "0 20250604 000001.SZ 12.99 10.63\n", "1 20250604 000002.SZ 7.24 5.92\n", "2 20250604 000004.SZ 10.77 9.75\n", "3 20250604 000006.SZ 7.41 6.07\n", "4 20250604 000007.SZ 8.88 7.26\n", "... ... ... ... ...\n", "7140 20250604 920445.BJ 13.29 7.17\n", "7141 20250604 920489.BJ 31.18 16.80\n", "7142 20250604 920682.BJ 13.26 7.14\n", "7143 20250604 920799.BJ 76.93 41.43\n", "7144 20250604 920819.BJ 5.73 3.09\n", "\n", "[7145 rows x 4 columns]]\n" ] } ], "source": [ "print(all_daily_data)\n", "# 将所有数据合并为一个 DataFrame\n", "all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)" ] }, { "cell_type": "code", "execution_count": 5, "id": "ad9733a1-2f42-43ee-a98c-0bf699304c21", "metadata": { "ExecuteTime": { "end_time": "2025-04-09T14:58:09.674078Z", "start_time": "2025-04-09T14:58:09.366441Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "所有每日基础数据获取并保存完毕!\n" ] } ], "source": [ "\n", "\n", "# 将数据保存为 HDF5 文件(table 格式)\n", "all_daily_data_df.to_hdf(h5_filename, key='stk_limit', mode='a', format='table', append=True, data_columns=True)\n", "\n", "print(\"所有每日基础数据获取并保存完毕!\")" ] }, { "cell_type": "code", "execution_count": null, "id": "7e777f1f-4d54-4a74-b916-691ede6af055", "metadata": { "ExecuteTime": { "end_time": "2025-04-09T14:58:09.689422Z", "start_time": "2025-04-09T14:58:09.686524Z" } }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "stock", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.2" } }, "nbformat": 4, "nbformat_minor": 5 }