Files
NewStock/main/data/update/update_stk_limit.ipynb
liaozhaorun dc29f153ca 1、load model
2、修改update data相关函数
2025-10-13 15:04:48 +08:00

280 lines
9.0 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "500802dc-7a20-48b7-a470-a4bae3ec534b",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:57:41.532210Z",
"start_time": "2025-04-09T14:57:40.584930Z"
}
},
"outputs": [],
"source": [
"import tushare as ts\n",
"\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "5a84bc9da6d54868",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:04.911924Z",
"start_time": "2025-04-09T14:57:41.540345Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"4872 600206.SH 20250926\n",
"4873 600207.SH 20250926\n",
"4874 600208.SH 20250926\n",
"4876 600211.SH 20250926\n",
"7280 920037.BJ 20250926\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 11170571 entries, 0 to 36462\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 255.7+ MB\n",
"None\n",
"20250926\n",
"20250929\n"
]
}
],
"source": [
"import pandas as pd\n",
"import time\n",
"\n",
"h5_filename = '/mnt/d/PyProject/NewStock/data/stk_limit.h5'\n",
"key = '/stk_limit'\n",
"max_date = None\n",
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date']]\n",
" print(df.sort_values(by='trade_date', ascending=True).tail())\n",
" print(df.info())\n",
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20251020')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(start_date)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "bb3191de-27a2-4c89-a3b5-32a0d7b9496f",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:09.342522Z",
"start_time": "2025-04-09T14:58:05.259974Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20251020 完成\n",
"任务 20251017 完成\n",
"任务 20251015 完成\n",
"任务 20251016 完成\n",
"任务 20251013 完成\n",
"任务 20251014 完成\n",
"任务 20251010 完成\n",
"任务 20251009 完成\n",
"任务 20250929 完成\n",
"任务 20250930 完成\n"
]
}
],
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
"all_daily_data = []\n",
"\n",
"# API 调用计数和时间控制变量\n",
"api_call_count = 0\n",
"batch_start_time = time.time()\n",
"\n",
"\n",
"def get_data(trade_date):\n",
" time.sleep(0.1)\n",
" stk_limit_data = pro.stk_limit(trade_date=trade_date)\n",
" if stk_limit_data is not None and not stk_limit_data.empty:\n",
" return stk_limit_data\n",
"\n",
"\n",
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
"\n",
" for future in as_completed(future_to_date):\n",
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
" try:\n",
" result = future.result() # 获取任务执行的结果\n",
" if result is not None:\n",
" all_daily_data.append(result)\n",
" print(f\"任务 {trade_date} 完成\")\n",
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "96a81aa5890ea3c3",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:09.353560Z",
"start_time": "2025-04-09T14:58:09.346528Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ trade_date ts_code up_limit down_limit\n",
"0 20251010 000001.SZ 12.54 10.26\n",
"1 20251010 000002.SZ 7.47 6.11\n",
"2 20251010 000004.SZ 12.26 11.10\n",
"3 20251010 000006.SZ 11.94 9.77\n",
"4 20251010 000007.SZ 8.12 6.64\n",
"... ... ... ... ...\n",
"7309 20251010 920978.BJ 50.08 26.98\n",
"7310 20251010 920981.BJ 48.04 25.88\n",
"7311 20251010 920982.BJ 354.64 190.96\n",
"7312 20251010 920985.BJ 11.86 6.40\n",
"7313 20251010 920992.BJ 27.87 15.01\n",
"\n",
"[7314 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20251009 000001.SZ 12.47 10.21\n",
"1 20251009 000002.SZ 7.58 6.20\n",
"2 20251009 000004.SZ 11.68 10.56\n",
"3 20251009 000006.SZ 11.32 9.26\n",
"4 20251009 000007.SZ 8.02 6.56\n",
"... ... ... ... ...\n",
"7306 20251009 920978.BJ 50.44 27.16\n",
"7307 20251009 920981.BJ 48.11 25.91\n",
"7308 20251009 920982.BJ 366.06 197.12\n",
"7309 20251009 920985.BJ 12.01 6.47\n",
"7310 20251009 920992.BJ 27.39 14.75\n",
"\n",
"[7311 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20250929 000001.SZ 12.54 10.26\n",
"1 20250929 000002.SZ 7.48 6.12\n",
"2 20250929 000004.SZ 11.00 9.96\n",
"3 20250929 000006.SZ 10.46 8.56\n",
"4 20250929 000007.SZ 7.63 6.25\n",
"... ... ... ... ...\n",
"7302 20250929 920445.BJ 14.37 7.75\n",
"7303 20250929 920489.BJ 29.34 15.80\n",
"7304 20250929 920682.BJ 13.10 7.06\n",
"7305 20250929 920799.BJ 70.78 38.12\n",
"7306 20250929 920819.BJ 5.52 2.98\n",
"\n",
"[7307 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20250930 000001.SZ 12.51 10.23\n",
"1 20250930 000002.SZ 7.49 6.13\n",
"2 20250930 000004.SZ 11.12 10.06\n",
"3 20250930 000006.SZ 10.29 8.42\n",
"4 20250930 000007.SZ 7.92 6.48\n",
"... ... ... ... ...\n",
"7305 20250930 920445.BJ 14.67 7.91\n",
"7306 20250930 920489.BJ 29.26 15.76\n",
"7307 20250930 920682.BJ 12.92 6.96\n",
"7308 20250930 920799.BJ 73.19 39.41\n",
"7309 20250930 920819.BJ 5.55 2.99\n",
"\n",
"[7310 rows x 4 columns]]\n"
]
}
],
"source": [
"print(all_daily_data)\n",
"# 将所有数据合并为一个 DataFrame\n",
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "ad9733a1-2f42-43ee-a98c-0bf699304c21",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:09.674078Z",
"start_time": "2025-04-09T14:58:09.366441Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"所有每日基础数据获取并保存完毕!\n"
]
}
],
"source": [
"\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='stk_limit', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7e777f1f-4d54-4a74-b916-691ede6af055",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:09.689422Z",
"start_time": "2025-04-09T14:58:09.686524Z"
}
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "stock",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}