302 lines
10 KiB
Plaintext
302 lines
10 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"id": "500802dc-7a20-48b7-a470-a4bae3ec534b",
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-03-30T16:42:39.056767Z",
|
||
"start_time": "2025-03-30T16:42:37.817887Z"
|
||
}
|
||
},
|
||
"source": [
|
||
"import tushare as ts\n",
|
||
"\n",
|
||
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
|
||
"pro = ts.pro_api()"
|
||
],
|
||
"outputs": [],
|
||
"execution_count": 1
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"id": "5a84bc9da6d54868",
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-03-30T16:42:59.784780Z",
|
||
"start_time": "2025-03-30T16:42:39.056767Z"
|
||
}
|
||
},
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"import time\n",
|
||
"\n",
|
||
"h5_filename = '../../../data/stk_limit.h5'\n",
|
||
"key = '/stk_limit'\n",
|
||
"max_date = None\n",
|
||
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
|
||
" df = store[key][['ts_code', 'trade_date']]\n",
|
||
" print(df.sort_values(by='trade_date', ascending=True).tail())\n",
|
||
" print(df.info())\n",
|
||
" max_date = df['trade_date'].max()\n",
|
||
"\n",
|
||
"print(max_date)\n",
|
||
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
|
||
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
|
||
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
|
||
"start_date = min(trade_dates)\n",
|
||
"print(start_date)"
|
||
],
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
" ts_code trade_date\n",
|
||
"4705 600289.SH 20250321\n",
|
||
"4706 600292.SH 20250321\n",
|
||
"4707 600293.SH 20250321\n",
|
||
"4696 600279.SH 20250321\n",
|
||
"7051 920116.BJ 20250321\n",
|
||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||
"Index: 10237887 entries, 0 to 35266\n",
|
||
"Data columns (total 2 columns):\n",
|
||
" # Column Dtype \n",
|
||
"--- ------ ----- \n",
|
||
" 0 ts_code object\n",
|
||
" 1 trade_date object\n",
|
||
"dtypes: object(2)\n",
|
||
"memory usage: 234.3+ MB\n",
|
||
"None\n",
|
||
"20250321\n",
|
||
"20250324\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 2
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"id": "bb3191de-27a2-4c89-a3b5-32a0d7b9496f",
|
||
"metadata": {
|
||
"scrolled": true,
|
||
"ExecuteTime": {
|
||
"end_time": "2025-03-30T16:43:03.372001Z",
|
||
"start_time": "2025-03-30T16:43:00.012140Z"
|
||
}
|
||
},
|
||
"source": [
|
||
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
|
||
"\n",
|
||
"all_daily_data = []\n",
|
||
"\n",
|
||
"# API 调用计数和时间控制变量\n",
|
||
"api_call_count = 0\n",
|
||
"batch_start_time = time.time()\n",
|
||
"\n",
|
||
"\n",
|
||
"def get_data(trade_date):\n",
|
||
" time.sleep(0.1)\n",
|
||
" stk_limit_data = pro.stk_limit(trade_date=trade_date)\n",
|
||
" if stk_limit_data is not None and not stk_limit_data.empty:\n",
|
||
" return stk_limit_data\n",
|
||
"\n",
|
||
"\n",
|
||
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
|
||
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
|
||
"\n",
|
||
" for future in as_completed(future_to_date):\n",
|
||
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
|
||
" try:\n",
|
||
" result = future.result() # 获取任务执行的结果\n",
|
||
" if result is not None:\n",
|
||
" all_daily_data.append(result)\n",
|
||
" print(f\"任务 {trade_date} 完成\")\n",
|
||
" except Exception as e:\n",
|
||
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
|
||
"\n"
|
||
],
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"任务 20250418 完成\n",
|
||
"任务 20250417 完成\n",
|
||
"任务 20250416 完成\n",
|
||
"任务 20250415 完成\n",
|
||
"任务 20250411 完成\n",
|
||
"任务 20250414 完成\n",
|
||
"任务 20250409 完成\n",
|
||
"任务 20250410 完成\n",
|
||
"任务 20250408 完成\n",
|
||
"任务 20250407 完成\n",
|
||
"任务 20250403 完成\n",
|
||
"任务 20250402 完成\n",
|
||
"任务 20250401 完成\n",
|
||
"任务 20250331 完成\n",
|
||
"任务 20250327 完成\n",
|
||
"任务 20250328 完成\n",
|
||
"任务 20250326 完成\n",
|
||
"任务 20250325 完成\n",
|
||
"任务 20250324 完成\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 3
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"id": "96a81aa5890ea3c3",
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-03-30T16:43:03.397757Z",
|
||
"start_time": "2025-03-30T16:43:03.384786Z"
|
||
}
|
||
},
|
||
"source": [
|
||
"print(all_daily_data)\n",
|
||
"# 将所有数据合并为一个 DataFrame\n",
|
||
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)"
|
||
],
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"[ trade_date ts_code up_limit down_limit\n",
|
||
"0 20250327 000001.SZ 12.52 10.24\n",
|
||
"1 20250327 000002.SZ 7.92 6.48\n",
|
||
"2 20250327 000004.SZ 11.40 9.32\n",
|
||
"3 20250327 000006.SZ 7.44 6.08\n",
|
||
"4 20250327 000007.SZ 7.00 5.72\n",
|
||
"... ... ... ... ...\n",
|
||
"7059 20250327 920108.BJ 33.56 18.08\n",
|
||
"7060 20250327 920111.BJ 40.57 21.85\n",
|
||
"7061 20250327 920116.BJ 126.29 68.01\n",
|
||
"7062 20250327 920118.BJ 44.14 23.78\n",
|
||
"7063 20250327 920128.BJ 47.35 25.51\n",
|
||
"\n",
|
||
"[7064 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
|
||
"0 20250328 000001.SZ 12.53 10.25\n",
|
||
"1 20250328 000002.SZ 7.89 6.45\n",
|
||
"2 20250328 000004.SZ 11.19 9.15\n",
|
||
"3 20250328 000006.SZ 8.18 6.70\n",
|
||
"4 20250328 000007.SZ 6.99 5.72\n",
|
||
"... ... ... ... ...\n",
|
||
"7060 20250328 920108.BJ 31.03 16.71\n",
|
||
"7061 20250328 920111.BJ 39.65 21.35\n",
|
||
"7062 20250328 920116.BJ 115.67 62.29\n",
|
||
"7063 20250328 920118.BJ 41.00 22.08\n",
|
||
"7064 20250328 920128.BJ 44.83 24.15\n",
|
||
"\n",
|
||
"[7065 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
|
||
"0 20250326 000001.SZ 12.57 10.29\n",
|
||
"1 20250326 000002.SZ 7.91 6.47\n",
|
||
"2 20250326 000004.SZ 11.28 9.23\n",
|
||
"3 20250326 000006.SZ 7.17 5.87\n",
|
||
"4 20250326 000007.SZ 6.67 5.45\n",
|
||
"... ... ... ... ...\n",
|
||
"7056 20250326 920108.BJ 33.96 18.30\n",
|
||
"7057 20250326 920111.BJ 41.92 22.58\n",
|
||
"7058 20250326 920116.BJ 133.64 71.96\n",
|
||
"7059 20250326 920118.BJ 41.93 22.59\n",
|
||
"7060 20250326 920128.BJ 49.40 26.60\n",
|
||
"\n",
|
||
"[7061 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
|
||
"0 20250325 000001.SZ 12.52 10.24\n",
|
||
"1 20250325 000002.SZ 7.90 6.46\n",
|
||
"2 20250325 000004.SZ 11.55 9.45\n",
|
||
"3 20250325 000006.SZ 7.13 5.83\n",
|
||
"4 20250325 000007.SZ 6.60 5.40\n",
|
||
"... ... ... ... ...\n",
|
||
"7055 20250325 920108.BJ 33.30 17.94\n",
|
||
"7056 20250325 920111.BJ 39.97 21.53\n",
|
||
"7057 20250325 920116.BJ 137.78 74.20\n",
|
||
"7058 20250325 920118.BJ 39.52 21.28\n",
|
||
"7059 20250325 920128.BJ 46.22 24.90\n",
|
||
"\n",
|
||
"[7060 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
|
||
"0 20250324 000001.SZ 12.56 10.28\n",
|
||
"1 20250324 000002.SZ 8.10 6.62\n",
|
||
"2 20250324 000004.SZ 12.82 10.49\n",
|
||
"3 20250324 000006.SZ 7.44 6.08\n",
|
||
"4 20250324 000007.SZ 6.89 5.63\n",
|
||
"... ... ... ... ...\n",
|
||
"7053 20250324 920108.BJ 34.84 18.76\n",
|
||
"7054 20250324 920111.BJ 40.41 21.77\n",
|
||
"7055 20250324 920116.BJ 134.55 72.45\n",
|
||
"7056 20250324 920118.BJ 38.67 20.83\n",
|
||
"7057 20250324 920128.BJ 45.86 24.70\n",
|
||
"\n",
|
||
"[7058 rows x 4 columns]]\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 4
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"id": "ad9733a1-2f42-43ee-a98c-0bf699304c21",
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-03-30T16:43:03.696614Z",
|
||
"start_time": "2025-03-30T16:43:03.411036Z"
|
||
}
|
||
},
|
||
"source": [
|
||
"\n",
|
||
"\n",
|
||
"# 将数据保存为 HDF5 文件(table 格式)\n",
|
||
"all_daily_data_df.to_hdf(h5_filename, key='stk_limit', mode='a', format='table', append=True, data_columns=True)\n",
|
||
"\n",
|
||
"print(\"所有每日基础数据获取并保存完毕!\")"
|
||
],
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"所有每日基础数据获取并保存完毕!\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 5
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"id": "7e777f1f-4d54-4a74-b916-691ede6af055",
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-03-30T16:43:03.713628Z",
|
||
"start_time": "2025-03-30T16:43:03.711521Z"
|
||
}
|
||
},
|
||
"source": [],
|
||
"outputs": [],
|
||
"execution_count": null
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.11.11"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|