This commit is contained in:
liaozhaorun
2025-02-12 00:21:33 +08:00
commit 71c9496df8
24 changed files with 34783 additions and 0 deletions

View File

@@ -0,0 +1,424 @@
{
"cells": [
{
"cell_type": "code",
"id": "18d1d622-b083-4cc4-a6f8-7c1ed2d0edd2",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:43:54.745322Z",
"start_time": "2025-02-11T15:43:53.837662Z"
}
},
"source": [
"import tushare as ts\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
],
"outputs": [],
"execution_count": 1
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:53:08.235573Z",
"start_time": "2025-02-11T15:53:07.753701Z"
}
},
"cell_type": "code",
"source": [
"from datetime import datetime\n",
"import pandas as pd\n",
"\n",
"def is_st(name_change_dict, stock_code, target_date):\n",
" target_date = datetime.strptime(target_date, '%Y%m%d')\n",
" if stock_code not in name_change_dict.keys():\n",
" return False\n",
" df = name_change_dict[stock_code]\n",
" for i in range(len(df)):\n",
" sds = df.iloc[i, 2]\n",
" eds = df.iloc[i, 3]\n",
" if eds is None or eds is pd.NaT:\n",
" eds = datetime.now()\n",
" if (target_date - sds).days >= 0 and (target_date - eds).days <= 0:\n",
" return True\n",
" return False\n",
"\n",
"name_change_df = pd.read_hdf('../../../data/name_change.h5', key='name_change')\n",
"name_change_df = name_change_df.drop_duplicates(keep='first')\n",
"\n",
"# 确保 name_change_df 的日期格式正确\n",
"name_change_df['start_date'] = pd.to_datetime(name_change_df['start_date'], format='%Y%m%d')\n",
"name_change_df['end_date'] = pd.to_datetime(name_change_df['end_date'], format='%Y%m%d', errors='coerce')\n",
"name_change_df = name_change_df[name_change_df.name.str.contains('ST')]\n",
"name_change_dict = {}\n",
"for ts_code, group in name_change_df.groupby('ts_code'):\n",
" # 只保留 'ST' 和 '*ST' 的记录\n",
" st_data = group[(group['change_reason'] == 'ST') | (group['change_reason'] == '*ST')]\n",
" if not st_data.empty:\n",
" name_change_dict[ts_code] = st_data"
],
"id": "14671a7f72de2564",
"outputs": [],
"execution_count": 31
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:53:19.812860Z",
"start_time": "2025-02-11T15:53:09.614377Z"
}
},
"cell_type": "code",
"source": [
"import time\n",
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
"h5_filename = '../../../data/daily_basic.h5'\n",
"key = '/daily_basic'\n",
"max_date = None\n",
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date']]\n",
" print(df.info())\n",
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250220')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(start_date)"
],
"id": "e7f8cce2f80e2f20",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8295494 entries, 0 to 8295493\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 189.9+ MB\n",
"None\n",
"20250210\n",
"20250211\n"
]
}
],
"execution_count": 32
},
{
"cell_type": "code",
"id": "553cfb36-f560-4cc4-b2bc-68323ccc5072",
"metadata": {
"scrolled": true,
"ExecuteTime": {
"end_time": "2025-02-11T15:53:24.100612Z",
"start_time": "2025-02-11T15:53:22.361257Z"
}
},
"source": [
"\n",
"\n",
"# 使用 HDFStore 存储数据\n",
"all_daily_data = []\n",
"\n",
"# API 调用计数和时间控制变量\n",
"api_call_count = 0\n",
"batch_start_time = time.time()\n",
"\n",
"\n",
"def get_data(trade_date):\n",
" daily_basic_data = pro.daily_basic(ts_code='', trade_date=trade_date)\n",
" if daily_basic_data is not None and not daily_basic_data.empty:\n",
" # 添加交易日期列标识\n",
" daily_basic_data['trade_date'] = trade_date\n",
" daily_basic_data['is_st'] = daily_basic_data.apply(\n",
" lambda row: is_st(name_change_dict, row['ts_code'], row['trade_date']), axis=1\n",
" )\n",
" time.sleep(0.2)\n",
" # print(f\"成功获取并保存 {trade_date} 的每日基础数据\")\n",
" return daily_basic_data\n",
"\n",
"\n",
"# 遍历每个交易日期并获取数据\n",
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
"\n",
" for future in as_completed(future_to_date):\n",
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
" try:\n",
" result = future.result() # 获取任务执行的结果\n",
" all_daily_data.append(result)\n",
" print(f\"任务 {trade_date} 完成\")\n",
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
" # 计数一次 API 调用\n",
" api_call_count += 1\n",
"\n",
" # 每调用 300 次,检查时间是否少于 1 分钟,如果少于则等待剩余时间\n",
" if api_call_count % 150 == 0:\n",
" elapsed = time.time() - batch_start_time\n",
" if elapsed < 60:\n",
" sleep_time = 60 - elapsed\n",
" print(f\"已调用 150 次 API等待 {sleep_time:.2f} 秒以满足速率限制...\")\n",
" time.sleep(sleep_time)\n",
" # 重置批次起始时间\n",
" batch_start_time = time.time()\n",
"\n"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250220 完成\n",
"任务 20250219 完成\n",
"任务 20250218 完成\n",
"任务 20250217 完成\n",
"任务 20250214 完成\n",
"任务 20250213 完成\n",
"任务 20250212 完成\n",
"任务 20250211 完成\n"
]
}
],
"execution_count": 33
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:53:25.913933Z",
"start_time": "2025-02-11T15:53:25.902629Z"
}
},
"cell_type": "code",
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
"print(all_daily_data_df)"
],
"id": "919023c693d7a47a",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
"0 002512.SZ 20250211 5.03 5.9759 7.8713 \n",
"1 600966.SH 20250211 4.83 0.6904 1.3494 \n",
"2 600358.SH 20250211 3.68 8.5826 11.3780 \n",
"3 002893.SZ 20250211 9.73 1.9217 2.6415 \n",
"4 300648.SZ 20250211 22.90 1.7775 2.3188 \n",
"... ... ... ... ... ... \n",
"5380 300886.SZ 20250211 21.80 8.9341 13.4176 \n",
"5381 600050.SH 20250211 5.48 2.3899 5.6722 \n",
"5382 300149.SZ 20250211 6.73 3.5271 5.3077 \n",
"5383 002197.SZ 20250211 4.42 4.0058 4.6595 \n",
"5384 688270.SH 20250211 37.34 2.9212 2.9212 \n",
"\n",
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
"0 0.87 NaN NaN 12.8888 2.9340 3.0625 0.0000 \n",
"1 1.16 35.5101 15.2315 0.9534 0.3454 0.3402 0.5633 \n",
"2 1.38 NaN NaN 15.2661 3.4220 4.2041 0.0000 \n",
"3 0.85 48.9883 41.5405 2.2074 2.3641 2.3637 0.8222 \n",
"4 0.69 NaN NaN 4.1442 3.7325 3.3186 0.0000 \n",
"... ... ... ... ... ... ... ... \n",
"5380 3.00 NaN 111.0678 2.9043 6.0326 4.9204 0.0000 \n",
"5381 1.15 21.3231 19.5079 1.0668 0.4677 0.4574 2.6625 \n",
"5382 1.34 NaN NaN 2.5009 2.9440 3.3158 0.0000 \n",
"5383 1.41 NaN NaN 1.1195 2.0851 2.5837 0.0000 \n",
"5384 0.75 110.2738 170.0477 3.7594 28.4642 27.3030 NaN \n",
"\n",
" dv_ttm total_share float_share free_share total_mv \\\n",
"0 NaN 1.147095e+05 1.048455e+05 7.959795e+04 5.769885e+05 \n",
"1 0.5633 1.336844e+05 1.336844e+05 6.839785e+04 6.456958e+05 \n",
"2 NaN 5.049367e+04 5.049367e+04 3.808829e+04 1.858167e+05 \n",
"3 0.8222 2.636400e+04 2.027786e+04 1.475173e+04 2.565217e+05 \n",
"4 NaN 1.477839e+04 1.061894e+04 8.140048e+03 3.384251e+05 \n",
"... ... ... ... ... ... \n",
"5380 NaN 7.455500e+03 4.346405e+03 2.894040e+03 1.625299e+05 \n",
"5381 2.6625 3.180058e+06 3.128014e+06 1.317969e+06 1.742672e+07 \n",
"5382 NaN 4.979640e+04 4.970844e+04 3.303210e+04 3.351298e+05 \n",
"5383 NaN 6.143629e+04 5.340007e+04 4.590857e+04 2.715484e+05 \n",
"5384 NaN 2.140516e+04 1.442317e+04 1.442317e+04 7.992687e+05 \n",
"\n",
" circ_mv is_st \n",
"0 5.273728e+05 False \n",
"1 6.456958e+05 False \n",
"2 1.858167e+05 True \n",
"3 1.973036e+05 False \n",
"4 2.431738e+05 False \n",
"... ... ... \n",
"5380 9.475163e+04 False \n",
"5381 1.714152e+07 False \n",
"5382 3.345378e+05 False \n",
"5383 2.360283e+05 True \n",
"5384 5.385612e+05 False \n",
"\n",
"[5385 rows x 19 columns]\n"
]
}
],
"execution_count": 34
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:53:42.062142Z",
"start_time": "2025-02-11T15:53:42.044324Z"
}
},
"cell_type": "code",
"source": "print(all_daily_data_df[all_daily_data_df['is_st']])",
"id": "28cb78d032671b20",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
"2 600358.SH 20250211 3.68 8.5826 11.3780 \n",
"20 000889.SZ 20250211 2.48 2.0846 2.8167 \n",
"50 603879.SH 20250211 3.58 1.7126 2.7285 \n",
"62 002024.SZ 20250211 1.99 0.2997 0.8575 \n",
"65 600078.SH 20250211 5.77 1.0536 1.8102 \n",
"... ... ... ... ... ... \n",
"5327 688309.SH 20250211 13.80 0.5594 1.0928 \n",
"5328 002800.SZ 20250211 10.57 2.0449 3.9025 \n",
"5342 300368.SZ 20250211 4.50 1.5755 2.2505 \n",
"5375 600515.SH 20250211 3.64 0.4111 0.6804 \n",
"5383 002197.SZ 20250211 4.42 4.0058 4.6595 \n",
"\n",
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
"2 1.38 NaN NaN 15.2661 3.4220 4.2041 0.0000 \n",
"20 1.08 NaN NaN 20.6126 1.6250 1.6047 0.0000 \n",
"50 1.08 NaN NaN 3.4116 3.8093 3.5391 0.0000 \n",
"62 1.01 NaN NaN 1.5246 0.2944 0.3546 0.0000 \n",
"65 0.97 NaN NaN 2.1866 1.2329 1.2311 0.5373 \n",
"... ... ... ... ... ... ... ... \n",
"5327 0.73 60.8452 186.0174 1.5353 6.7361 13.4432 NaN \n",
"5328 0.72 NaN NaN 3.0468 1.6938 1.3629 0.0000 \n",
"5342 0.99 NaN NaN 7.1301 6.7544 11.8519 0.0000 \n",
"5375 0.91 43.6494 110.6536 1.7765 6.1506 7.8214 0.0000 \n",
"5383 1.41 NaN NaN 1.1195 2.0851 2.5837 0.0000 \n",
"\n",
" dv_ttm total_share float_share free_share total_mv \\\n",
"2 NaN 5.049367e+04 50493.6660 38088.2934 1.858167e+05 \n",
"20 NaN 9.362911e+04 86984.9676 64375.7658 2.322002e+05 \n",
"50 NaN 3.593444e+04 35934.4440 22555.6496 1.286453e+05 \n",
"62 NaN 9.264768e+05 919834.5068 321453.1001 1.843689e+06 \n",
"65 0.5373 6.625729e+04 66257.2861 38563.8247 3.823045e+05 \n",
"... ... ... ... ... ... \n",
"5327 NaN 8.001073e+03 8001.0733 4095.6641 1.104148e+05 \n",
"5328 NaN 1.522531e+04 14165.4100 7422.5200 1.609315e+05 \n",
"5342 NaN 5.289435e+04 52894.3475 37030.2475 2.380246e+05 \n",
"5375 NaN 1.142531e+06 917601.2508 554411.0843 4.158813e+06 \n",
"5383 NaN 6.143629e+04 53400.0687 45908.5733 2.715484e+05 \n",
"\n",
" circ_mv is_st \n",
"2 1.858167e+05 True \n",
"20 2.157227e+05 True \n",
"50 1.286453e+05 True \n",
"62 1.830471e+06 True \n",
"65 3.823045e+05 True \n",
"... ... ... \n",
"5327 1.104148e+05 True \n",
"5328 1.497284e+05 True \n",
"5342 2.380246e+05 True \n",
"5375 3.340069e+06 True \n",
"5383 2.360283e+05 True \n",
"\n",
"[318 rows x 19 columns]\n"
]
}
],
"execution_count": 37
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:53:33.693894Z",
"start_time": "2025-02-11T15:53:33.609884Z"
}
},
"cell_type": "code",
"source": [
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='daily_basic', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")\n"
],
"id": "692b58674b7462c9",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"所有每日基础数据获取并保存完毕!\n"
]
}
],
"execution_count": 36
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:54:27.868021Z",
"start_time": "2025-02-11T15:54:18.853803Z"
}
},
"cell_type": "code",
"source": [
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date', 'is_st']]\n",
" print(df.info())"
],
"id": "d7a773fc20293477",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8300879 entries, 0 to 5384\n",
"Data columns (total 3 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
" 2 is_st bool \n",
"dtypes: bool(1), object(2)\n",
"memory usage: 197.9+ MB\n",
"None\n"
]
}
],
"execution_count": 39
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.19"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,149 @@
{
"cells": [
{
"cell_type": "code",
"id": "17cc645336d4eb18",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-08T16:55:19.819017Z",
"start_time": "2025-02-08T16:55:18.958639Z"
}
},
"source": [
"import pandas as pd\n",
"import tushare as ts"
],
"outputs": [],
"execution_count": 1
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-08T16:55:27.578361Z",
"start_time": "2025-02-08T16:55:19.882313Z"
}
},
"cell_type": "code",
"source": [
"daily_basic = pd.read_hdf('../../data/daily_basic.h5', key='daily_basic', columns=['ts_code', 'trade_date '])\n",
"name_change_df = pd.read_hdf('../../data/name_change.h5', key='name_change')\n",
"name_change_df = name_change_df.drop_duplicates(keep='first')\n",
"\n",
"# 确保 name_change_df 的日期格式正确\n",
"name_change_df['start_date'] = pd.to_datetime(name_change_df['start_date'], format='%Y%m%d')\n",
"name_change_df['end_date'] = pd.to_datetime(name_change_df['end_date'], format='%Y%m%d', errors='coerce')\n",
"name_change_df = name_change_df[name_change_df.name.str.contains('ST')]\n"
],
"id": "48ae71ed02d61819",
"outputs": [],
"execution_count": 2
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-08T16:55:27.938078Z",
"start_time": "2025-02-08T16:55:27.584226Z"
}
},
"cell_type": "code",
"source": [
"name_change_dict = {}\n",
"for ts_code, group in name_change_df.groupby('ts_code'):\n",
" # 只保留 'ST' 和 '*ST' 的记录\n",
" st_data = group[(group['change_reason'] == 'ST') | (group['change_reason'] == '*ST')]\n",
" if not st_data.empty:\n",
" name_change_dict[ts_code] = st_data"
],
"id": "e6606a96e5728b8",
"outputs": [],
"execution_count": 3
},
{
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2025-02-08T16:59:20.537632Z",
"start_time": "2025-02-08T16:55:27.971219Z"
}
},
"cell_type": "code",
"source": [
"from datetime import datetime\n",
"import pandas as pd\n",
"\n",
"\n",
"# 判断股票是否为 ST 的函数\n",
"#stock_code = 'xxxxxx.SH'\n",
"#target_date = '20200830'\n",
"#若为ST返回True否则返回False\n",
"def is_st(name_change_dict, stock_code, target_date):\n",
" target_date = datetime.strptime(target_date, '%Y%m%d')\n",
" if stock_code not in name_change_dict.keys():\n",
" return False\n",
" df = name_change_dict[stock_code]\n",
" for i in range(len(df)):\n",
" sds = df.iloc[i, 2]\n",
" eds = df.iloc[i, 3]\n",
" # sd = datetime.strptime(sds, '%Y%m%d')\n",
" if eds == None:\n",
" ed = datetime.now()\n",
" # else:\n",
" # ed = datetime.strptime(eds, '%Y%m%d')\n",
" if (target_date - sds).days >= 0 and (target_date - eds).days <= 0:\n",
" return True\n",
" return False\n",
"\n",
"\n",
"print('is st...')\n",
"# 创建一个新的列 is_st判断每只股票是否是 ST\n",
"daily_basic['is_st'] = daily_basic.apply(\n",
" lambda row: is_st(name_change_dict, row['ts_code'], row['trade_date']), axis=1\n",
")\n",
"\n",
"# 保存结果到新的 HDF5 文件\n",
"daily_basic.to_hdf('../../data/daily_basic_with_st.h5', key='daily_basic_with_st', mode='w', format='table')\n",
"\n",
"# 输出部分结果\n",
"print(daily_basic[['ts_code', 'trade_date', 'is_st']].head())\n"
],
"id": "initial_id",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"is st...\n",
" ts_code trade_date is_st\n",
"0 603429.SH 20250127 False\n",
"1 300917.SZ 20250127 False\n",
"2 301266.SZ 20250127 False\n",
"3 688399.SH 20250127 False\n",
"4 603737.SH 20250127 False\n"
]
}
],
"execution_count": 4
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,193 @@
{
"cells": [
{
"cell_type": "code",
"id": "b94bb1f2-5332-485e-ae1b-eea01f938106",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:21:54.821950Z",
"start_time": "2025-02-11T15:21:54.050569Z"
}
},
"source": [
"import tushare as ts\n",
"\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
],
"outputs": [],
"execution_count": 1
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:22:32.726905Z",
"start_time": "2025-02-11T15:22:25.018135Z"
}
},
"cell_type": "code",
"source": [
"import pandas as pd\n",
"import time\n",
"\n",
"h5_filename = '../../../data/money_flow.h5'\n",
"key = '/money_flow'\n",
"max_date = None\n",
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date']]\n",
" print(df.info())\n",
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250220')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(f'start_date: {start_date}')"
],
"id": "742c29d453b9bb38",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8153941 entries, 0 to 5120\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 186.6+ MB\n",
"None\n",
"20250211\n",
"start_date: 20250212\n"
]
}
],
"execution_count": 6
},
{
"cell_type": "code",
"id": "679ce40e-8d62-4887-970c-e1d8cbdeee6b",
"metadata": {
"scrolled": true,
"ExecuteTime": {
"end_time": "2025-02-11T15:22:14.513527Z",
"start_time": "2025-02-11T15:22:12.973331Z"
}
},
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
"all_daily_data = []\n",
"\n",
"# API 调用计数和时间控制变量\n",
"api_call_count = 0\n",
"batch_start_time = time.time()\n",
"\n",
"\n",
"def get_data(trade_date):\n",
" time.sleep(0.1)\n",
" money_flow_data = pro.moneyflow(trade_date=trade_date)\n",
" if money_flow_data is not None and not money_flow_data.empty:\n",
" return money_flow_data\n",
"\n",
"\n",
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
"\n",
" for future in as_completed(future_to_date):\n",
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
" try:\n",
" result = future.result() # 获取任务执行的结果\n",
" all_daily_data.append(result)\n",
" print(f\"任务 {trade_date} 完成\")\n",
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250219 完成\n",
"任务 20250220 完成\n",
"任务 20250218 完成\n",
"任务 20250217 完成\n",
"任务 20250214 完成\n",
"任务 20250213 完成\n",
"任务 20250212 完成\n",
"任务 20250211 完成\n"
]
}
],
"execution_count": 3
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:22:16.656650Z",
"start_time": "2025-02-11T15:22:16.639271Z"
}
},
"cell_type": "code",
"source": "all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
"id": "9af80516849d4e80",
"outputs": [],
"execution_count": 4
},
{
"cell_type": "code",
"id": "a2b05187-437f-4053-bc43-bd80d4cf8b0e",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:22:20.447350Z",
"start_time": "2025-02-11T15:22:19.145561Z"
}
},
"source": [
"\n",
"# 将所有数据合并为一个 DataFrame\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='money_flow', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"所有每日基础数据获取并保存完毕!\n"
]
}
],
"execution_count": 5
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.19"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,238 @@
{
"cells": [
{
"cell_type": "code",
"id": "500802dc-7a20-48b7-a470-a4bae3ec534b",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:18:36.892437Z",
"start_time": "2025-02-11T15:18:36.020822Z"
}
},
"source": [
"import tushare as ts\n",
"\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
],
"outputs": [],
"execution_count": 1
},
{
"cell_type": "code",
"id": "5a84bc9da6d54868",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:20:12.573607Z",
"start_time": "2025-02-11T15:20:00.110127Z"
}
},
"source": [
"import pandas as pd\n",
"import time\n",
"\n",
"h5_filename = '../../../data/stk_limit.h5'\n",
"key = '/stk_limit'\n",
"max_date = None\n",
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date']]\n",
" print(df.sort_values(by='trade_date', ascending=True).tail())\n",
" print(df.info())\n",
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250220')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(start_date)"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"4682 600310.SH 20250211\n",
"4683 600312.SH 20250211\n",
"4684 600313.SH 20250211\n",
"4673 600299.SH 20250211\n",
"0 000001.SZ 20250211\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 10040878 entries, 0 to 10040877\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 229.8+ MB\n",
"None\n",
"20250211\n",
"20250212\n"
]
}
],
"execution_count": 5
},
{
"cell_type": "code",
"id": "bb3191de-27a2-4c89-a3b5-32a0d7b9496f",
"metadata": {
"scrolled": true,
"ExecuteTime": {
"end_time": "2025-02-11T15:21:27.831699Z",
"start_time": "2025-02-11T15:21:26.665039Z"
}
},
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
"all_daily_data = []\n",
"\n",
"# API 调用计数和时间控制变量\n",
"api_call_count = 0\n",
"batch_start_time = time.time()\n",
"\n",
"\n",
"def get_data(trade_date):\n",
" time.sleep(0.1)\n",
" stk_limit_data = pro.stk_limit(trade_date=trade_date)\n",
" if stk_limit_data is not None and not stk_limit_data.empty:\n",
" return stk_limit_data\n",
"\n",
"\n",
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
"\n",
" for future in as_completed(future_to_date):\n",
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
" try:\n",
" result = future.result() # 获取任务执行的结果\n",
" if result is not None:\n",
" all_daily_data.append(result)\n",
" print(f\"任务 {trade_date} 完成\")\n",
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250220 完成\n",
"任务 20250219 完成\n",
"任务 20250217 完成\n",
"任务 20250218 完成\n",
"任务 20250214 完成\n",
"任务 20250213 完成\n",
"任务 20250212 完成\n"
]
}
],
"execution_count": 10
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:21:29.294283Z",
"start_time": "2025-02-11T15:21:29.247112Z"
}
},
"cell_type": "code",
"source": [
"print(all_daily_data)\n",
"# 将所有数据合并为一个 DataFrame\n",
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)"
],
"id": "96a81aa5890ea3c3",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[]\n"
]
},
{
"ename": "ValueError",
"evalue": "No objects to concatenate",
"output_type": "error",
"traceback": [
"\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
"\u001B[1;31mValueError\u001B[0m Traceback (most recent call last)",
"Cell \u001B[1;32mIn[11], line 3\u001B[0m\n\u001B[0;32m 1\u001B[0m \u001B[38;5;28mprint\u001B[39m(all_daily_data)\n\u001B[0;32m 2\u001B[0m \u001B[38;5;66;03m# 将所有数据合并为一个 DataFrame\u001B[39;00m\n\u001B[1;32m----> 3\u001B[0m all_daily_data_df \u001B[38;5;241m=\u001B[39m \u001B[43mpd\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mconcat\u001B[49m\u001B[43m(\u001B[49m\u001B[43mall_daily_data\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mignore_index\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43;01mTrue\u001B[39;49;00m\u001B[43m)\u001B[49m\n",
"File \u001B[1;32mE:\\Python\\anaconda\\envs\\try_trader\\lib\\site-packages\\pandas\\core\\reshape\\concat.py:372\u001B[0m, in \u001B[0;36mconcat\u001B[1;34m(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy)\u001B[0m\n\u001B[0;32m 369\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m copy \u001B[38;5;129;01mand\u001B[39;00m using_copy_on_write():\n\u001B[0;32m 370\u001B[0m copy \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mFalse\u001B[39;00m\n\u001B[1;32m--> 372\u001B[0m op \u001B[38;5;241m=\u001B[39m \u001B[43m_Concatenator\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m 373\u001B[0m \u001B[43m \u001B[49m\u001B[43mobjs\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 374\u001B[0m \u001B[43m \u001B[49m\u001B[43maxis\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43maxis\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 375\u001B[0m \u001B[43m \u001B[49m\u001B[43mignore_index\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mignore_index\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 376\u001B[0m \u001B[43m \u001B[49m\u001B[43mjoin\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mjoin\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 377\u001B[0m \u001B[43m \u001B[49m\u001B[43mkeys\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mkeys\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 378\u001B[0m \u001B[43m \u001B[49m\u001B[43mlevels\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mlevels\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 379\u001B[0m \u001B[43m \u001B[49m\u001B[43mnames\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mnames\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 380\u001B[0m \u001B[43m \u001B[49m\u001B[43mverify_integrity\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mverify_integrity\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 381\u001B[0m \u001B[43m \u001B[49m\u001B[43mcopy\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mcopy\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 382\u001B[0m \u001B[43m \u001B[49m\u001B[43msort\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43msort\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 383\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 385\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m op\u001B[38;5;241m.\u001B[39mget_result()\n",
"File \u001B[1;32mE:\\Python\\anaconda\\envs\\try_trader\\lib\\site-packages\\pandas\\core\\reshape\\concat.py:429\u001B[0m, in \u001B[0;36m_Concatenator.__init__\u001B[1;34m(self, objs, axis, join, keys, levels, names, ignore_index, verify_integrity, copy, sort)\u001B[0m\n\u001B[0;32m 426\u001B[0m objs \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mlist\u001B[39m(objs)\n\u001B[0;32m 428\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mlen\u001B[39m(objs) \u001B[38;5;241m==\u001B[39m \u001B[38;5;241m0\u001B[39m:\n\u001B[1;32m--> 429\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mNo objects to concatenate\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[0;32m 431\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m keys \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[0;32m 432\u001B[0m objs \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mlist\u001B[39m(com\u001B[38;5;241m.\u001B[39mnot_none(\u001B[38;5;241m*\u001B[39mobjs))\n",
"\u001B[1;31mValueError\u001B[0m: No objects to concatenate"
]
}
],
"execution_count": 11
},
{
"cell_type": "code",
"id": "ad9733a1-2f42-43ee-a98c-0bf699304c21",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:20:37.999493Z",
"start_time": "2025-02-11T15:20:37.375220Z"
}
},
"source": [
"\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='stk_limit', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
],
"outputs": [
{
"ename": "ValueError",
"evalue": "All objects passed were None",
"output_type": "error",
"traceback": [
"\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
"\u001B[1;31mValueError\u001B[0m Traceback (most recent call last)",
"Cell \u001B[1;32mIn[7], line 2\u001B[0m\n\u001B[0;32m 1\u001B[0m \u001B[38;5;66;03m# 将所有数据合并为一个 DataFrame\u001B[39;00m\n\u001B[1;32m----> 2\u001B[0m all_daily_data_df \u001B[38;5;241m=\u001B[39m \u001B[43mpd\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mconcat\u001B[49m\u001B[43m(\u001B[49m\u001B[43mall_daily_data\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mignore_index\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43;01mTrue\u001B[39;49;00m\u001B[43m)\u001B[49m\n\u001B[0;32m 4\u001B[0m \u001B[38;5;66;03m# 将数据保存为 HDF5 文件table 格式)\u001B[39;00m\n\u001B[0;32m 5\u001B[0m all_daily_data_df\u001B[38;5;241m.\u001B[39mto_hdf(h5_filename, key\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mstk_limit\u001B[39m\u001B[38;5;124m'\u001B[39m, mode\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124ma\u001B[39m\u001B[38;5;124m'\u001B[39m, \u001B[38;5;28mformat\u001B[39m\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mtable\u001B[39m\u001B[38;5;124m'\u001B[39m, append\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m, data_columns\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m)\n",
"File \u001B[1;32mE:\\Python\\anaconda\\envs\\try_trader\\lib\\site-packages\\pandas\\core\\reshape\\concat.py:372\u001B[0m, in \u001B[0;36mconcat\u001B[1;34m(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy)\u001B[0m\n\u001B[0;32m 369\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m copy \u001B[38;5;129;01mand\u001B[39;00m using_copy_on_write():\n\u001B[0;32m 370\u001B[0m copy \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mFalse\u001B[39;00m\n\u001B[1;32m--> 372\u001B[0m op \u001B[38;5;241m=\u001B[39m \u001B[43m_Concatenator\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m 373\u001B[0m \u001B[43m \u001B[49m\u001B[43mobjs\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 374\u001B[0m \u001B[43m \u001B[49m\u001B[43maxis\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43maxis\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 375\u001B[0m \u001B[43m \u001B[49m\u001B[43mignore_index\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mignore_index\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 376\u001B[0m \u001B[43m \u001B[49m\u001B[43mjoin\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mjoin\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 377\u001B[0m \u001B[43m \u001B[49m\u001B[43mkeys\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mkeys\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 378\u001B[0m \u001B[43m \u001B[49m\u001B[43mlevels\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mlevels\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 379\u001B[0m \u001B[43m \u001B[49m\u001B[43mnames\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mnames\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 380\u001B[0m \u001B[43m \u001B[49m\u001B[43mverify_integrity\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mverify_integrity\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 381\u001B[0m \u001B[43m \u001B[49m\u001B[43mcopy\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mcopy\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 382\u001B[0m \u001B[43m \u001B[49m\u001B[43msort\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43msort\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 383\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 385\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m op\u001B[38;5;241m.\u001B[39mget_result()\n",
"File \u001B[1;32mE:\\Python\\anaconda\\envs\\try_trader\\lib\\site-packages\\pandas\\core\\reshape\\concat.py:452\u001B[0m, in \u001B[0;36m_Concatenator.__init__\u001B[1;34m(self, objs, axis, join, keys, levels, names, ignore_index, verify_integrity, copy, sort)\u001B[0m\n\u001B[0;32m 449\u001B[0m keys \u001B[38;5;241m=\u001B[39m Index(clean_keys, name\u001B[38;5;241m=\u001B[39mname, dtype\u001B[38;5;241m=\u001B[39m\u001B[38;5;28mgetattr\u001B[39m(keys, \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mdtype\u001B[39m\u001B[38;5;124m\"\u001B[39m, \u001B[38;5;28;01mNone\u001B[39;00m))\n\u001B[0;32m 451\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mlen\u001B[39m(objs) \u001B[38;5;241m==\u001B[39m \u001B[38;5;241m0\u001B[39m:\n\u001B[1;32m--> 452\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mAll objects passed were None\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[0;32m 454\u001B[0m \u001B[38;5;66;03m# figure out what our result ndim is going to be\u001B[39;00m\n\u001B[0;32m 455\u001B[0m ndims \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mset\u001B[39m()\n",
"\u001B[1;31mValueError\u001B[0m: All objects passed were None"
]
}
],
"execution_count": 7
},
{
"cell_type": "code",
"execution_count": null,
"id": "7e777f1f-4d54-4a74-b916-691ede6af055",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.19"
}
},
"nbformat": 4,
"nbformat_minor": 5
}