Files
NewStock/main/data/update/update_daily_basic.ipynb
liaozhaorun 7bb0a0537b feat: 添加 Redis 消息展示功能到监控面板
- 新增 /api/messages API 接口,支持从 Redis Stream 读取消息
- 支持按策略筛选消息和分页展示
- 前端新增消息列表卡片,展示时间、策略、股票代码、动作、价格和状态
- 自动判断消息处理状态(已处理/待处理)
- 消息列表每30秒自动刷新,支持手动刷新
2026-03-01 22:06:42 +08:00

447 lines
17 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "18d1d622-b083-4cc4-a6f8-7c1ed2d0edd2",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:57:36.913044Z",
"start_time": "2025-04-09T14:57:36.159612Z"
}
},
"outputs": [],
"source": [
"import tushare as ts\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "14671a7f72de2564",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:57:39.128278Z",
"start_time": "2025-04-09T14:57:36.918051Z"
}
},
"outputs": [],
"source": [
"from datetime import datetime\n",
"import pandas as pd\n",
"import warnings\n",
"\n",
"warnings.filterwarnings(\"ignore\")\n",
"def filter_rows(df):\n",
" # 按照 name 和 start_date 分组\n",
" def select_row(group):\n",
" # 如果有 end_date 不为 NaT 的行,优先保留这些行\n",
" valid_rows = group[group['end_date'].notna()]\n",
" if not valid_rows.empty:\n",
" return valid_rows.iloc[0] # 返回第一个有效行\n",
" else:\n",
" return group.iloc[0] # 如果没有有效行,返回第一行\n",
"\n",
" filtered_df = df.groupby(['name', 'start_date'], group_keys=False).apply(select_row)\n",
" filtered_df = filtered_df.reset_index(drop=True)\n",
" return filtered_df\n",
"\n",
"def is_st(name_change_dict, stock_code, target_date):\n",
" target_date = datetime.strptime(target_date, '%Y%m%d')\n",
" if stock_code not in name_change_dict.keys():\n",
" return False\n",
" df = name_change_dict[stock_code]\n",
" for i in range(len(df)):\n",
" sds = df.iloc[i, 2]\n",
" eds = df.iloc[i, 3]\n",
" if eds is None or eds is pd.NaT:\n",
" eds = datetime.now()\n",
" if (target_date - sds).days >= 0 and (target_date - eds).days <= 0:\n",
" return True\n",
" return False\n",
"\n",
"name_change_df = pd.read_hdf('/mnt/d/PyProject/NewStock/data/name_change.h5', key='name_change')\n",
"name_change_df = name_change_df.drop_duplicates(keep='first')\n",
"\n",
"# 确保 name_change_df 的日期格式正确\n",
"name_change_df['start_date'] = pd.to_datetime(name_change_df['start_date'], format='%Y%m%d')\n",
"name_change_df['end_date'] = pd.to_datetime(name_change_df['end_date'], format='%Y%m%d', errors='coerce')\n",
"# name_change_df = name_change_df[name_change_df.name.str.contains('ST') ]\n",
"name_change_dict = {}\n",
"for ts_code, group in name_change_df.groupby('ts_code'):\n",
" # 只保留 'ST' 和 '*ST' 的记录\n",
" # st_data = group[(group['change_reason'] == 'ST') | (group['change_reason'] == '*ST')]\n",
" st_data = group[(group['name'].str.contains('ST')) | (group['name'].str.contains('退'))]\n",
" if not st_data.empty:\n",
" name_change_dict[ts_code] = filter_rows(st_data)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "e7f8cce2f80e2f20",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:09.296046Z",
"start_time": "2025-04-09T14:57:39.339423Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 9656995 entries, 0 to 27354\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 221.0+ MB\n",
"None\n",
"20260213\n",
"20260224\n"
]
}
],
"source": [
"import time\n",
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
"h5_filename = '/mnt/d/PyProject/NewStock/data/daily_basic.h5'\n",
"key = '/daily_basic'\n",
"max_date = None\n",
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date']]\n",
" print(df.info())\n",
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20260310')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(start_date)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "553cfb36-f560-4cc4-b2bc-68323ccc5072",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:16.817010Z",
"start_time": "2025-04-09T14:58:09.326485Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20260310 完成\n",
"任务 20260309 完成\n",
"任务 20260306 完成\n",
"任务 20260305 完成\n",
"任务 20260304 完成\n",
"任务 20260303 完成\n",
"任务 20260302 完成\n",
"任务 20260227 完成\n",
"任务 20260226 完成\n",
"任务 20260225 完成\n",
"任务 20260224 完成\n"
]
}
],
"source": [
"\n",
"\n",
"# 使用 HDFStore 存储数据\n",
"all_daily_data = []\n",
"\n",
"# API 调用计数和时间控制变量\n",
"api_call_count = 0\n",
"batch_start_time = time.time()\n",
"\n",
"\n",
"def get_data(trade_date):\n",
" daily_basic_data = pro.daily_basic(ts_code='', trade_date=trade_date)\n",
" if daily_basic_data is not None and not daily_basic_data.empty:\n",
" # 添加交易日期列标识\n",
" daily_basic_data['trade_date'] = trade_date\n",
" daily_basic_data['is_st'] = daily_basic_data.apply(\n",
" lambda row: is_st(name_change_dict, row['ts_code'], row['trade_date']), axis=1\n",
" )\n",
" time.sleep(0.2)\n",
" # print(f\"成功获取并保存 {trade_date} 的每日基础数据\")\n",
" return daily_basic_data\n",
"\n",
"\n",
"# 遍历每个交易日期并获取数据\n",
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
"\n",
" for future in as_completed(future_to_date):\n",
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
" try:\n",
" result = future.result() # 获取任务执行的结果\n",
" all_daily_data.append(result)\n",
" print(f\"任务 {trade_date} 完成\")\n",
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
" # 计数一次 API 调用\n",
" api_call_count += 1\n",
"\n",
" # 每调用 300 次,检查时间是否少于 1 分钟,如果少于则等待剩余时间\n",
" if api_call_count % 150 == 0:\n",
" elapsed = time.time() - batch_start_time\n",
" if elapsed < 60:\n",
" sleep_time = 60 - elapsed\n",
" print(f\"已调用 150 次 API等待 {sleep_time:.2f} 秒以满足速率限制...\")\n",
" time.sleep(sleep_time)\n",
" # 重置批次起始时间\n",
" batch_start_time = time.time()\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "919023c693d7a47a",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:16.864178Z",
"start_time": "2025-04-09T14:58:16.855084Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
"0 300819.SZ 20260227 58.92 9.7287 24.1597 \n",
"1 001378.SZ 20260227 25.85 3.5842 3.5842 \n",
"2 688505.SH 20260227 8.68 0.4993 1.1491 \n",
"3 605158.SH 20260227 8.58 0.8147 2.9625 \n",
"4 688193.SH 20260227 54.23 0.9385 1.7187 \n",
"... ... ... ... ... ... \n",
"21882 002289.SZ 20260224 27.97 1.5075 2.5139 \n",
"21883 600901.SH 20260224 6.38 0.3122 0.7895 \n",
"21884 002353.SZ 20260224 103.70 2.8704 3.4515 \n",
"21885 300435.SZ 20260224 30.50 3.6153 5.4615 \n",
"21886 300084.SZ 20260224 11.38 9.3900 10.1377 \n",
"\n",
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
"0 0.99 136.4038 148.8393 10.2870 14.6635 15.5540 0.6789 \n",
"1 1.20 37.0409 42.9823 1.8561 2.2661 2.3080 2.5094 \n",
"2 0.90 226.4426 NaN 3.9842 12.6831 12.6434 0.3456 \n",
"3 0.94 19.1275 26.8054 1.7756 0.6334 0.6516 1.7306 \n",
"4 0.89 NaN NaN 2.3813 12.2524 13.1011 0.4458 \n",
"... ... ... ... ... ... ... ... \n",
"21882 2.01 NaN NaN 35.8237 35.5848 28.2125 NaN \n",
"21883 0.78 12.5570 11.6882 1.5234 7.0010 6.2032 6.2700 \n",
"21884 1.07 40.4159 37.4306 4.7938 7.9502 6.7506 0.8096 \n",
"21885 1.10 NaN 172.8196 3.5518 4.3304 4.1720 0.6557 \n",
"21886 2.25 NaN NaN 4.6382 9.6453 9.5673 NaN \n",
"\n",
" dv_ttm total_share float_share free_share total_mv \\\n",
"0 0.6789 14920.5000 13476.2250 5426.6150 8.791159e+05 \n",
"1 2.5094 13333.3600 6450.5766 6450.5766 3.446674e+05 \n",
"2 0.3456 103657.2100 71057.2100 30872.7971 8.997446e+05 \n",
"3 1.7306 51142.0000 51142.0000 14064.0840 4.387984e+05 \n",
"4 0.4458 4006.9870 4006.9870 2188.0251 2.172989e+05 \n",
"... ... ... ... ... ... \n",
"21882 NaN 28025.3733 27995.3733 16788.3130 7.838697e+05 \n",
"21883 6.2700 579186.6431 579045.2431 228992.3580 3.695211e+06 \n",
"21884 0.8096 102385.5833 69324.3890 57652.8100 1.061738e+07 \n",
"21885 0.6557 38571.3000 36881.4630 24414.0100 1.176425e+06 \n",
"21886 NaN 50838.9899 34610.1638 32057.6638 5.785477e+05 \n",
"\n",
" circ_mv is_st \n",
"0 7.940192e+05 False \n",
"1 1.667474e+05 False \n",
"2 6.167766e+05 False \n",
"3 4.387984e+05 False \n",
"4 2.172989e+05 False \n",
"... ... ... \n",
"21882 7.830306e+05 True \n",
"21883 3.694309e+06 False \n",
"21884 7.188939e+06 False \n",
"21885 1.124885e+06 False \n",
"21886 3.938637e+05 False \n",
"\n",
"[21887 rows x 19 columns]\n"
]
}
],
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
"print(all_daily_data_df)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "28cb78d032671b20",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:16.881685Z",
"start_time": "2025-04-09T14:58:16.871184Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
"7 600360.SH 20260227 9.58 2.1195 2.7285 \n",
"77 000669.SZ 20260227 4.56 7.5951 9.5786 \n",
"108 603580.SH 20260227 17.19 0.8104 2.4841 \n",
"111 603557.SH 20260227 3.51 5.2656 7.8297 \n",
"127 603377.SH 20260227 3.70 1.1320 1.7903 \n",
"... ... ... ... ... ... \n",
"21722 603843.SH 20260224 5.66 2.1676 3.0441 \n",
"21751 002253.SZ 20260224 10.51 1.2118 1.4935 \n",
"21808 000903.SZ 20260224 2.70 1.2350 1.8672 \n",
"21813 000995.SZ 20260224 15.83 11.1164 16.4343 \n",
"21882 002289.SZ 20260224 27.97 1.5075 2.5139 \n",
"\n",
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
"7 1.08 72.0218 52.4452 2.7077 4.4710 4.1388 0.7724 \n",
"77 3.02 NaN NaN NaN 2.3735 2.4518 NaN \n",
"108 0.56 NaN NaN 5.3358 13.4080 9.7500 0.4770 \n",
"111 2.82 NaN NaN 27.9422 7.0938 8.9069 NaN \n",
"127 1.37 NaN NaN 3.2661 3.2762 4.0899 NaN \n",
"... ... ... ... ... ... ... ... \n",
"21722 0.61 NaN NaN 14.1532 2.9075 3.3288 NaN \n",
"21751 0.62 NaN NaN 2.2278 14.3426 20.5492 NaN \n",
"21808 0.95 NaN NaN 3.9295 1.1041 1.1752 NaN \n",
"21813 0.54 101.3839 NaN 19.0447 16.2904 19.2344 NaN \n",
"21882 2.01 NaN NaN 35.8237 35.5848 28.2125 NaN \n",
"\n",
" dv_ttm total_share float_share free_share total_mv \\\n",
"7 0.7724 96029.5304 96029.5304 74596.8648 919962.9012 \n",
"77 NaN 68040.8797 68040.8797 53950.9653 310266.4114 \n",
"108 0.4770 13067.3200 13067.3200 4263.2200 224627.2308 \n",
"111 NaN 62343.6888 62343.6888 41926.6552 218826.3477 \n",
"127 NaN 71491.6351 71491.6351 45201.8751 264519.0499 \n",
"... ... ... ... ... ... \n",
"21722 NaN 69962.3237 69962.3237 49817.1582 395986.7521 \n",
"21751 NaN 22562.6095 20830.4044 16901.0873 237133.0258 \n",
"21808 NaN 193484.4410 192026.1648 127011.9255 522407.9907 \n",
"21813 NaN 17740.8000 17740.8000 12000.1709 280836.8640 \n",
"21882 NaN 28025.3733 27995.3733 16788.3130 783869.6912 \n",
"\n",
" circ_mv is_st \n",
"7 919962.9012 True \n",
"77 310266.4114 True \n",
"108 224627.2308 True \n",
"111 218826.3477 True \n",
"127 264519.0499 True \n",
"... ... ... \n",
"21722 395986.7521 True \n",
"21751 218927.5502 True \n",
"21808 518470.6450 True \n",
"21813 280836.8640 True \n",
"21882 783030.5912 True \n",
"\n",
"[692 rows x 19 columns]\n"
]
}
],
"source": [
"print(all_daily_data_df[all_daily_data_df['is_st']])"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "692b58674b7462c9",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:17.773453Z",
"start_time": "2025-04-09T14:58:16.903459Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"所有每日基础数据获取并保存完毕!\n"
]
}
],
"source": [
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='daily_basic', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "d7a773fc20293477",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:24.305403Z",
"start_time": "2025-04-09T14:58:17.816332Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 9678882 entries, 0 to 21886\n",
"Data columns (total 3 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
" 2 is_st bool \n",
"dtypes: bool(1), object(2)\n",
"memory usage: 230.8+ MB\n",
"None\n"
]
}
],
"source": [
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date', 'is_st']]\n",
" print(df.info())"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "stock",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}