init

2025-04-03 00:45:07 +08:00
parent 01092b8cae
commit ea3955f80f
36 changed files with 44862 additions and 0 deletions
--- a/code/data/cyq_perf.ipynb
+++ b/code/data/cyq_perf.ipynb
@@ -0,0 +1,441 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "initial_id",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-12T15:31:25.004019Z",
+     "start_time": "2025-03-12T15:31:24.322440Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from operator import index\n",
+    "\n",
+    "import tushare as ts\n",
+    "import pandas as pd\n",
+    "import time\n",
+    "\n",
+    "ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
+    "pro = ts.pro_api()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "972a5ac9f79fe373",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-12T15:31:40.917015Z",
+     "start_time": "2025-03-12T15:31:35.958771Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "        ts_code trade_date  his_low  his_high  cost_5pct  cost_15pct  \\\n",
+      "0     000001.SZ   20180104      0.2      12.7        7.2         7.9   \n",
+      "1     000002.SZ   20180104      0.3      31.8       14.1        15.6   \n",
+      "2     000004.SZ   20180104      0.8      53.2       21.6        22.0   \n",
+      "3     000008.SZ   20180104      0.1      13.9        7.2         7.8   \n",
+      "4     000009.SZ   20180104      0.3      15.0        5.9         5.9   \n",
+      "...         ...        ...      ...       ...        ...         ...   \n",
+      "3091  603991.SH   20180104     12.0      67.8       26.4        27.0   \n",
+      "3092  603993.SH   20180104      1.5       8.1        5.6         5.8   \n",
+      "3093  603997.SH   20180104      5.4      31.5        9.9        10.2   \n",
+      "3094  603998.SH   20180104      3.9      18.9        9.8        10.1   \n",
+      "3095  603999.SH   20180104      5.4      30.9        6.9         7.2   \n",
+      "\n",
+      "      cost_50pct  cost_85pct  cost_95pct  weight_avg  winner_rate  \n",
+      "0           10.6        11.3        11.9        9.93        71.97  \n",
+      "1           20.1        23.1        24.3       19.62        99.34  \n",
+      "2           23.6        27.6        29.6       24.71        45.41  \n",
+      "3            8.6         9.2        10.5        8.64        47.04  \n",
+      "4            6.6         7.6         7.9        6.76        38.14  \n",
+      "...          ...         ...         ...         ...          ...  \n",
+      "3091        27.6        30.6        34.2       28.54        57.36  \n",
+      "3092         6.3         7.1         7.6        6.34        73.50  \n",
+      "3093        10.5        11.7        11.7       10.84        11.28  \n",
+      "3094        11.9        13.5        15.7       12.13        17.93  \n",
+      "3095         7.8         9.6         9.9        8.17        21.83  \n",
+      "\n",
+      "[3096 rows x 11 columns]\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "df = pro.cyq_perf(trade_date='20180104')\n",
+    "print(df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "1b5a82fbf4e380de",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-12T15:30:20.421604Z",
+     "start_time": "2025-03-12T15:30:20.224851Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import time\n",
+    "\n",
+    "h5_filename = '../../../data/sw_daily.h5'\n",
+    "\n",
+    "trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
+    "trade_cal = trade_cal[trade_cal['is_open'] == 1]  # 只保留交易日\n",
+    "trade_dates = trade_cal['cal_date'].tolist()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f448da220816bf98",
+   "metadata": {
+    "ExecuteTime": {
+     "start_time": "2025-03-12T15:30:20.436796Z"
+    },
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "任务 20250418 完成\n",
+      "任务 20250417 完成\n",
+      "任务 20250416 完成\n",
+      "任务 20250415 完成\n",
+      "任务 20250414 完成\n",
+      "任务 20250411 完成\n",
+      "任务 20250410 完成\n",
+      "任务 20250409 完成\n",
+      "任务 20250408 完成\n",
+      "任务 20250407 完成\n",
+      "任务 20250403 完成\n",
+      "任务 20250402 完成\n",
+      "任务 20250401 完成\n",
+      "任务 20250331 完成\n",
+      "任务 20250328 完成\n",
+      "任务 20250327 完成\n",
+      "任务 20250326 完成\n",
+      "任务 20250325 完成\n",
+      "任务 20250324 完成\n",
+      "任务 20250321 完成\n",
+      "任务 20250320 完成\n",
+      "任务 20250319 完成\n",
+      "任务 20250318 完成\n",
+      "任务 20250317 完成\n",
+      "任务 20250314 完成\n",
+      "任务 20250313 完成\n",
+      "任务 20250312 完成\n",
+      "任务 20250311 完成\n",
+      "任务 20250310 完成\n",
+      "任务 20250307 完成\n",
+      "任务 20250306 完成\n",
+      "任务 20250305 完成\n",
+      "任务 20250304 完成\n",
+      "任务 20250303 完成\n",
+      "任务 20250228 完成\n",
+      "任务 20250227 完成\n",
+      "任务 20250226 完成\n",
+      "任务 20250225 完成\n",
+      "任务 20250224 完成\n",
+      "任务 20250221 完成\n",
+      "任务 20250220 完成\n",
+      "任务 20250219 完成\n",
+      "任务 20250218 完成\n",
+      "任务 20250217 完成\n",
+      "任务 20250214 完成\n",
+      "任务 20250213 完成\n",
+      "任务 20250212 完成\n",
+      "任务 20250211 完成\n",
+      "任务 20250210 完成\n",
+      "任务 20250207 完成\n",
+      "任务 20250206 完成\n",
+      "任务 20250205 完成\n",
+      "任务 20250127 完成\n",
+      "任务 20250124 完成\n",
+      "任务 20250123 完成\n",
+      "任务 20250122 完成\n",
+      "任务 20250121 完成\n",
+      "任务 20250120 完成\n",
+      "任务 20250117 完成\n",
+      "任务 20250116 完成\n",
+      "任务 20250115 完成\n",
+      "任务 20250114 完成\n",
+      "任务 20250113 完成\n",
+      "任务 20250110 完成\n",
+      "任务 20250109 完成\n",
+      "任务 20250108 完成\n",
+      "任务 20250107 完成\n",
+      "任务 20250106 完成\n",
+      "任务 20250103 完成\n",
+      "任务 20250102 完成\n",
+      "任务 20241231 完成\n",
+      "任务 20241230 完成\n",
+      "任务 20241227 完成\n",
+      "任务 20241226 完成\n",
+      "任务 20241225 完成\n",
+      "任务 20241224 完成\n",
+      "任务 20241223 完成\n",
+      "任务 20241220 完成\n",
+      "任务 20241219 完成\n",
+      "任务 20241218 完成\n",
+      "任务 20241217 完成\n",
+      "任务 20241216 完成\n",
+      "任务 20241213 完成\n",
+      "任务 20241212 完成\n",
+      "任务 20241211 完成\n",
+      "任务 20241210 完成\n",
+      "任务 20241209 完成\n",
+      "任务 20241206 完成\n",
+      "任务 20241205 完成\n",
+      "任务 20241204 完成\n",
+      "任务 20241203 完成\n",
+      "任务 20241202 完成\n",
+      "任务 20241129 完成\n",
+      "任务 20241128 完成\n",
+      "任务 20241127 完成\n",
+      "任务 20241126 完成\n",
+      "任务 20241125 完成\n",
+      "任务 20241122 完成\n",
+      "任务 20241121 完成\n",
+      "任务 20241120 完成\n",
+      "任务 20241119 完成\n",
+      "任务 20241118 完成\n",
+      "任务 20241115 完成\n",
+      "任务 20241114 完成\n",
+      "任务 20241113 完成\n",
+      "任务 20241112 完成\n",
+      "任务 20241111 完成\n",
+      "任务 20241108 完成\n",
+      "任务 20241107 完成\n",
+      "任务 20241106 完成\n",
+      "任务 20241105 完成\n",
+      "任务 20241104 完成\n",
+      "任务 20241101 完成\n",
+      "任务 20241031 完成\n",
+      "任务 20241030 完成\n",
+      "任务 20241029 完成\n",
+      "任务 20241028 完成\n",
+      "任务 20241025 完成\n",
+      "任务 20241024 完成\n",
+      "任务 20241022 完成\n",
+      "任务 20241023 完成\n",
+      "任务 20241021 完成\n",
+      "任务 20241018 完成\n",
+      "任务 20241017 完成\n",
+      "任务 20241016 完成\n",
+      "任务 20241015 完成\n",
+      "任务 20241014 完成\n",
+      "任务 20241010 完成\n",
+      "任务 20241011 完成\n",
+      "任务 20241009 完成\n",
+      "任务 20241008 完成\n",
+      "任务 20240930 完成\n",
+      "任务 20240927 完成\n",
+      "任务 20240926 完成\n",
+      "任务 20240925 完成\n",
+      "任务 20240924 完成\n",
+      "任务 20240923 完成\n",
+      "任务 20240919 完成\n",
+      "任务 20240920 完成\n",
+      "任务 20240913 完成\n",
+      "任务 20240918 完成\n",
+      "任务 20240911 完成\n",
+      "任务 20240912 完成\n",
+      "任务 20240910 完成\n",
+      "任务 20240909 完成\n",
+      "任务 20240905 完成\n",
+      "任务 20240906 完成\n",
+      "任务 20240904 完成\n",
+      "任务 20240903 完成\n",
+      "任务 20240902 完成\n",
+      "任务 20240830 完成\n",
+      "任务 20240829 完成\n",
+      "任务 20240828 完成\n",
+      "任务 20240827 完成\n",
+      "任务 20240826 完成\n",
+      "任务 20240823 完成\n",
+      "任务 20240822 完成\n",
+      "任务 20240821 完成\n",
+      "任务 20240820 完成\n",
+      "任务 20240819 完成\n",
+      "任务 20240816 完成\n",
+      "任务 20240815 完成\n",
+      "任务 20240814 完成\n",
+      "任务 20240813 完成\n",
+      "任务 20240812 完成\n",
+      "任务 20240809 完成\n",
+      "任务 20240808 完成\n",
+      "任务 20240807 完成\n",
+      "任务 20240806 完成\n",
+      "任务 20240805 完成\n",
+      "任务 20240802 完成\n",
+      "任务 20240801 完成\n",
+      "任务 20240731 完成\n",
+      "任务 20240730 完成\n",
+      "任务 20240729 完成\n",
+      "任务 20240726 完成\n",
+      "任务 20240725 完成\n",
+      "任务 20240724 完成\n",
+      "任务 20240723 完成\n",
+      "任务 20240722 完成\n",
+      "任务 20240719 完成\n",
+      "任务 20240718 完成\n",
+      "任务 20240717 完成\n",
+      "任务 20240716 完成\n",
+      "任务 20240715 完成\n",
+      "任务 20240712 完成\n",
+      "任务 20240711 完成\n",
+      "任务 20240710 完成\n",
+      "任务 20240709 完成\n",
+      "任务 20240708 完成\n",
+      "任务 20240705 完成\n",
+      "任务 20240704 完成\n",
+      "任务 20240703 完成\n",
+      "任务 20240702 完成\n",
+      "任务 20240701 完成\n",
+      "任务 20240628 完成\n",
+      "任务 20240627 完成\n",
+      "任务 20240626 完成\n",
+      "任务 20240625 完成\n",
+      "任务 20240624 完成\n",
+      "任务 20240621 完成\n",
+      "任务 20240620 完成\n",
+      "任务 20240619 完成\n",
+      "任务 20240618 完成\n",
+      "任务 20240617 完成\n",
+      "任务 20240614 完成\n",
+      "任务 20240613 完成\n",
+      "任务 20240612 完成\n",
+      "任务 20240611 完成\n",
+      "任务 20240607 完成\n",
+      "任务 20240606 完成\n",
+      "任务 20240605 完成\n",
+      "任务 20240604 完成\n"
+     ]
+    }
+   ],
+   "source": [
+    "from concurrent.futures import ThreadPoolExecutor, as_completed\n",
+    "\n",
+    "all_daily_data = []\n",
+    "\n",
+    "# API 调用计数和时间控制变量\n",
+    "api_call_count = 0\n",
+    "batch_start_time = time.time()\n",
+    "\n",
+    "\n",
+    "def get_data(trade_date):\n",
+    "    time.sleep(0.1)\n",
+    "    data = pro.cyq_perf(trade_date=trade_date)\n",
+    "    if data is not None and not data.empty:\n",
+    "        return data\n",
+    "\n",
+    "\n",
+    "with ThreadPoolExecutor(max_workers=2) as executor:\n",
+    "    future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
+    "\n",
+    "    for future in as_completed(future_to_date):\n",
+    "        trade_date = future_to_date[future]  # 获取对应的交易日期\n",
+    "        try:\n",
+    "            result = future.result()  # 获取任务执行的结果\n",
+    "            all_daily_data.append(result)\n",
+    "            print(f\"任务 {trade_date} 完成\")\n",
+    "        except Exception as e:\n",
+    "            print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "907f732d3c397bf",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-12T15:31:10.381348500Z",
+     "start_time": "2025-03-12T15:23:41.345460Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "# 将所有数据合并为一个 DataFrame\n",
+    "all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
+    "\n",
+    "# 将数据保存为 HDF5 文件（table 格式）\n",
+    "all_daily_data_df.to_hdf('../../data/cyq_perf.h5', key='cyq_perf', mode='w', format='table', data_columns=True)\n",
+    "\n",
+    "print(\"所有每日基础数据获取并保存完毕！\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "73e829ac-ff3d-408e-beb3-0b87f5b00b19",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "           ts_code trade_date\n",
+      "0        000001.SZ   20250312\n",
+      "1        000002.SZ   20250312\n",
+      "2        000004.SZ   20250312\n",
+      "3        000006.SZ   20250312\n",
+      "4        000007.SZ   20250312\n",
+      "...            ...        ...\n",
+      "7465732  603991.SH   20180102\n",
+      "7465733  603993.SH   20180102\n",
+      "7465734  603997.SH   20180102\n",
+      "7465735  603998.SH   20180102\n",
+      "7465736  603999.SH   20180102\n",
+      "\n",
+      "[7465737 rows x 2 columns]\n"
+     ]
+    }
+   ],
+   "source": [
+    "h5_filename = '../../data/cyq_perf.h5'\n",
+    "key = '/cyq_perf'\n",
+    "max_date = None\n",
+    "with pd.HDFStore(h5_filename, mode='r') as store:\n",
+    "    df = store[key][['ts_code', 'trade_date']]\n",
+    "    print(df)\n",
+    "    max_date = df['trade_date'].min()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/code/data/index_and_industry.ipynb
+++ b/code/data/index_and_industry.ipynb
@@ -0,0 +1,148 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "id": "initial_id",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-30T16:42:23.864275Z",
+     "start_time": "2025-03-30T16:42:22.963221Z"
+    }
+   },
+   "source": [
+    "from operator import index\n",
+    "\n",
+    "import tushare as ts\n",
+    "import pandas as pd\n",
+    "import time\n",
+    "\n",
+    "ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
+    "pro = ts.pro_api()"
+   ],
+   "outputs": [],
+   "execution_count": 1
+  },
+  {
+   "cell_type": "code",
+   "id": "f448da220816bf98",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-30T16:42:25.559047Z",
+     "start_time": "2025-03-30T16:42:23.868783Z"
+    }
+   },
+   "source": [
+    "# 定义四个指数\n",
+    "index_list = ['399300.SH', '000905.SH', '000852.SH', '399006.SZ']\n",
+    "\n",
+    "# 获取并存储数据\n",
+    "all_data = []\n",
+    "\n",
+    "for ts_code in index_list:\n",
+    "    df = pro.index_daily(ts_code=ts_code)  # 可根据需要设置日期\n",
+    "    df['ts_code'] = ts_code  # 添加ts_code列来区分数据\n",
+    "    all_data.append(df)\n",
+    "\n",
+    "# 合并所有数据\n",
+    "final_df = pd.concat(all_data, ignore_index=True)\n",
+    "\n",
+    "# 存储到H5文件\n",
+    "final_df.to_hdf('../../data/index_data.h5', key='index_data', mode='w')\n",
+    "\n",
+    "print(\"数据已经成功存储到index_data.h5文件中\")"
+   ],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "数据已经成功存储到index_data.h5文件中\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\liaozhaorun\\AppData\\Local\\Temp\\ipykernel_6192\\3209233630.py:13: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n",
+      "  final_df = pd.concat(all_data, ignore_index=True)\n"
+     ]
+    }
+   ],
+   "execution_count": 2
+  },
+  {
+   "cell_type": "code",
+   "id": "907f732d3c397bf",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-30T16:42:25.802535Z",
+     "start_time": "2025-03-30T16:42:25.766399Z"
+    }
+   },
+   "source": [
+    "h5_filename = '../../data/index_data.h5'\n",
+    "key = '/index_data'\n",
+    "with pd.HDFStore(h5_filename, mode='r') as store:\n",
+    "    df = store[key]\n",
+    "    print(df)\n"
+   ],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "         ts_code trade_date      close       open       high        low  \\\n",
+      "0      000905.SH   20250328  5916.0314  5954.7297  5973.8015  5904.9159   \n",
+      "1      000905.SH   20250327  5957.6017  5932.5165  6000.6615  5891.7664   \n",
+      "2      000905.SH   20250326  5948.4986  5935.8537  5983.4739  5935.8537   \n",
+      "3      000905.SH   20250325  5946.9510  5969.4164  5993.9312  5929.6734   \n",
+      "4      000905.SH   20250324  5969.0789  5973.0466  5987.0606  5882.8780   \n",
+      "...          ...        ...        ...        ...        ...        ...   \n",
+      "13423  399006.SZ   20100607  1069.4680  1005.0280  1075.2250  1001.7020   \n",
+      "13424  399006.SZ   20100604  1027.6810   989.6810  1027.6810   986.5040   \n",
+      "13425  399006.SZ   20100603   998.3940  1002.3550  1026.7020   997.7750   \n",
+      "13426  399006.SZ   20100602   997.1190   967.6090   997.1190   952.6110   \n",
+      "13427  399006.SZ   20100601   973.2330   986.0150   994.7930   948.1180   \n",
+      "\n",
+      "       pre_close   change  pct_chg           vol        amount  \n",
+      "0      5957.6017 -41.5703  -0.6978  1.342619e+08  1.688995e+08  \n",
+      "1      5948.4986   9.1031   0.1530  1.347089e+08  1.765905e+08  \n",
+      "2      5946.9510   1.5476   0.0260  1.367021e+08  1.716958e+08  \n",
+      "3      5969.0789 -22.1279  -0.3707  1.474839e+08  1.922270e+08  \n",
+      "4      5971.9302  -2.8513  -0.0477  1.691924e+08  2.200943e+08  \n",
+      "...          ...      ...      ...           ...           ...  \n",
+      "13423  1027.6810  41.7870   4.0661  2.655275e+06  9.106095e+06  \n",
+      "13424   998.3940  29.2870   2.9334  1.500295e+06  5.269441e+06  \n",
+      "13425   997.1190   1.2750   0.1279  1.616805e+06  6.240835e+06  \n",
+      "13426   973.2330  23.8860   2.4543  1.074628e+06  4.001206e+06  \n",
+      "13427  1000.0000 -26.7670  -2.6767  1.356285e+06  4.924177e+06  \n",
+      "\n",
+      "[13428 rows x 11 columns]\n"
+     ]
+    }
+   ],
+   "execution_count": 3
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/code/data/industry_daily.ipynb
+++ b/code/data/industry_daily.ipynb
--- a/code/data/industry_data.ipynb
+++ b/code/data/industry_data.ipynb
--- a/code/data/kpl_concept.ipynb
+++ b/code/data/kpl_concept.ipynb
@@ -0,0 +1,273 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "initial_id",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-12T15:28:49.275220Z",
+     "start_time": "2025-03-12T15:28:48.624632Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from operator import index\n",
+    "\n",
+    "import tushare as ts\n",
+    "import pandas as pd\n",
+    "import time\n",
+    "\n",
+    "ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
+    "pro = ts.pro_api()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "972a5ac9f79fe373",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-12T15:28:49.280632Z",
+     "start_time": "2025-03-12T15:28:49.275220Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "# df = pro.cyq_perf(start_date='20220101', end_date='20220429')\n",
+    "# print(df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "f448da220816bf98",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-12T15:39:50.128089Z",
+     "start_time": "2025-03-12T15:28:49.437760Z"
+    },
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "成功获取并保存 20250228 的每日基础数据\n",
+      "成功获取并保存 20250227 的每日基础数据\n",
+      "成功获取并保存 20250226 的每日基础数据\n",
+      "成功获取并保存 20250225 的每日基础数据\n",
+      "成功获取并保存 20250224 的每日基础数据\n",
+      "成功获取并保存 20250221 的每日基础数据\n",
+      "成功获取并保存 20250220 的每日基础数据\n",
+      "成功获取并保存 20250219 的每日基础数据\n",
+      "成功获取并保存 20250218 的每日基础数据\n",
+      "成功获取并保存 20250217 的每日基础数据\n",
+      "成功获取并保存 20250214 的每日基础数据\n",
+      "成功获取并保存 20250213 的每日基础数据\n",
+      "成功获取并保存 20250212 的每日基础数据\n",
+      "成功获取并保存 20250211 的每日基础数据\n",
+      "成功获取并保存 20250210 的每日基础数据\n",
+      "成功获取并保存 20250207 的每日基础数据\n",
+      "成功获取并保存 20250206 的每日基础数据\n",
+      "成功获取并保存 20250205 的每日基础数据\n",
+      "成功获取并保存 20250127 的每日基础数据\n",
+      "成功获取并保存 20250124 的每日基础数据\n",
+      "成功获取并保存 20250123 的每日基础数据\n",
+      "成功获取并保存 20250122 的每日基础数据\n",
+      "成功获取并保存 20250121 的每日基础数据\n",
+      "成功获取并保存 20250120 的每日基础数据\n",
+      "成功获取并保存 20250117 的每日基础数据\n",
+      "成功获取并保存 20250116 的每日基础数据\n",
+      "成功获取并保存 20250115 的每日基础数据\n",
+      "成功获取并保存 20250114 的每日基础数据\n",
+      "成功获取并保存 20250113 的每日基础数据\n",
+      "成功获取并保存 20250110 的每日基础数据\n",
+      "成功获取并保存 20250109 的每日基础数据\n",
+      "成功获取并保存 20250108 的每日基础数据\n",
+      "成功获取并保存 20250107 的每日基础数据\n",
+      "成功获取并保存 20250106 的每日基础数据\n",
+      "成功获取并保存 20250103 的每日基础数据\n",
+      "成功获取并保存 20250102 的每日基础数据\n",
+      "成功获取并保存 20241231 的每日基础数据\n",
+      "成功获取并保存 20241230 的每日基础数据\n",
+      "成功获取并保存 20241227 的每日基础数据\n",
+      "成功获取并保存 20241226 的每日基础数据\n",
+      "成功获取并保存 20241225 的每日基础数据\n",
+      "成功获取并保存 20241224 的每日基础数据\n",
+      "成功获取并保存 20241223 的每日基础数据\n",
+      "成功获取并保存 20241220 的每日基础数据\n",
+      "成功获取并保存 20241219 的每日基础数据\n",
+      "成功获取并保存 20241218 的每日基础数据\n",
+      "成功获取并保存 20241217 的每日基础数据\n",
+      "成功获取并保存 20241216 的每日基础数据\n",
+      "成功获取并保存 20241213 的每日基础数据\n",
+      "成功获取并保存 20241212 的每日基础数据\n",
+      "成功获取并保存 20241211 的每日基础数据\n",
+      "成功获取并保存 20241210 的每日基础数据\n",
+      "成功获取并保存 20241209 的每日基础数据\n",
+      "成功获取并保存 20241206 的每日基础数据\n",
+      "成功获取并保存 20241205 的每日基础数据\n",
+      "成功获取并保存 20241204 的每日基础数据\n",
+      "成功获取并保存 20241203 的每日基础数据\n",
+      "成功获取并保存 20241202 的每日基础数据\n",
+      "成功获取并保存 20241129 的每日基础数据\n",
+      "成功获取并保存 20241128 的每日基础数据\n",
+      "成功获取并保存 20241127 的每日基础数据\n",
+      "成功获取并保存 20241126 的每日基础数据\n",
+      "成功获取并保存 20241125 的每日基础数据\n",
+      "成功获取并保存 20241122 的每日基础数据\n",
+      "成功获取并保存 20241121 的每日基础数据\n",
+      "成功获取并保存 20241120 的每日基础数据\n",
+      "成功获取并保存 20241119 的每日基础数据\n",
+      "成功获取并保存 20241118 的每日基础数据\n",
+      "成功获取并保存 20241115 的每日基础数据\n",
+      "成功获取并保存 20241114 的每日基础数据\n",
+      "成功获取并保存 20241113 的每日基础数据\n",
+      "成功获取并保存 20241112 的每日基础数据\n",
+      "成功获取并保存 20241111 的每日基础数据\n",
+      "成功获取并保存 20241108 的每日基础数据\n",
+      "成功获取并保存 20241107 的每日基础数据\n",
+      "成功获取并保存 20241106 的每日基础数据\n",
+      "成功获取并保存 20241105 的每日基础数据\n",
+      "成功获取并保存 20241104 的每日基础数据\n",
+      "成功获取并保存 20241101 的每日基础数据\n",
+      "成功获取并保存 20241031 的每日基础数据\n",
+      "成功获取并保存 20241030 的每日基础数据\n",
+      "成功获取并保存 20241029 的每日基础数据\n",
+      "成功获取并保存 20241028 的每日基础数据\n",
+      "成功获取并保存 20241025 的每日基础数据\n",
+      "成功获取并保存 20241024 的每日基础数据\n",
+      "成功获取并保存 20241023 的每日基础数据\n",
+      "成功获取并保存 20241022 的每日基础数据\n",
+      "成功获取并保存 20241021 的每日基础数据\n",
+      "成功获取并保存 20241014 的每日基础数据\n",
+      "150 1741835004.3988936 1741834982.2357981\n",
+      "已调用 150 次 API，等待 37.84 秒以满足速率限制...\n",
+      "300 1741835064.0700593 1741835042.2372077\n",
+      "已调用 150 次 API，等待 38.17 秒以满足速率限制...\n",
+      "450 1741835124.4976892 1741835102.2381623\n",
+      "已调用 150 次 API，等待 37.74 秒以满足速率限制...\n"
+     ]
+    },
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[4], line 22\u001b[0m\n\u001b[0;32m     19\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m trade_date \u001b[38;5;129;01min\u001b[39;00m trade_dates:\n\u001b[0;32m     20\u001b[0m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m     21\u001b[0m         \u001b[38;5;66;03m# 获取每日基础数据\u001b[39;00m\n\u001b[1;32m---> 22\u001b[0m         kpl_concept \u001b[38;5;241m=\u001b[39m pro\u001b[38;5;241m.\u001b[39mkpl_concept(trade_date\u001b[38;5;241m=\u001b[39mtrade_date)\n\u001b[0;32m     23\u001b[0m         \u001b[38;5;28;01mif\u001b[39;00m kpl_concept \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m kpl_concept\u001b[38;5;241m.\u001b[39mempty:\n\u001b[0;32m     24\u001b[0m             all_daily_data\u001b[38;5;241m.\u001b[39mappend(kpl_concept)\n",
+      "File \u001b[1;32mE:\\Python\\anaconda\\envs\\new_trader\\Lib\\site-packages\\tushare\\pro\\client.py:41\u001b[0m, in \u001b[0;36mDataApi.query\u001b[1;34m(self, api_name, fields, **kwargs)\u001b[0m\n\u001b[0;32m     33\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mquery\u001b[39m(\u001b[38;5;28mself\u001b[39m, api_name, fields\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m     34\u001b[0m     req_params \u001b[38;5;241m=\u001b[39m {\n\u001b[0;32m     35\u001b[0m         \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mapi_name\u001b[39m\u001b[38;5;124m'\u001b[39m: api_name,\n\u001b[0;32m     36\u001b[0m         \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtoken\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__token,\n\u001b[0;32m     37\u001b[0m         \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mparams\u001b[39m\u001b[38;5;124m'\u001b[39m: kwargs,\n\u001b[0;32m     38\u001b[0m         \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mfields\u001b[39m\u001b[38;5;124m'\u001b[39m: fields\n\u001b[0;32m     39\u001b[0m     }\n\u001b[1;32m---> 41\u001b[0m     res \u001b[38;5;241m=\u001b[39m requests\u001b[38;5;241m.\u001b[39mpost(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__http_url\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mapi_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, json\u001b[38;5;241m=\u001b[39mreq_params, timeout\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__timeout)\n\u001b[0;32m     42\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m res:\n\u001b[0;32m     43\u001b[0m         result \u001b[38;5;241m=\u001b[39m json\u001b[38;5;241m.\u001b[39mloads(res\u001b[38;5;241m.\u001b[39mtext)\n",
+      "File \u001b[1;32mE:\\Python\\anaconda\\envs\\new_trader\\Lib\\site-packages\\requests\\api.py:115\u001b[0m, in \u001b[0;36mpost\u001b[1;34m(url, data, json, **kwargs)\u001b[0m\n\u001b[0;32m    103\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpost\u001b[39m(url, data\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, json\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m    104\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124;03m\"\"\"Sends a POST request.\u001b[39;00m\n\u001b[0;32m    105\u001b[0m \n\u001b[0;32m    106\u001b[0m \u001b[38;5;124;03m    :param url: URL for the new :class:`Request` object.\u001b[39;00m\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    112\u001b[0m \u001b[38;5;124;03m    :rtype: requests.Response\u001b[39;00m\n\u001b[0;32m    113\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[1;32m--> 115\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m request(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpost\u001b[39m\u001b[38;5;124m\"\u001b[39m, url, data\u001b[38;5;241m=\u001b[39mdata, json\u001b[38;5;241m=\u001b[39mjson, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
+      "File \u001b[1;32mE:\\Python\\anaconda\\envs\\new_trader\\Lib\\site-packages\\requests\\api.py:59\u001b[0m, in \u001b[0;36mrequest\u001b[1;34m(method, url, **kwargs)\u001b[0m\n\u001b[0;32m     55\u001b[0m \u001b[38;5;66;03m# By using the 'with' statement we are sure the session is closed, thus we\u001b[39;00m\n\u001b[0;32m     56\u001b[0m \u001b[38;5;66;03m# avoid leaving sockets open which can trigger a ResourceWarning in some\u001b[39;00m\n\u001b[0;32m     57\u001b[0m \u001b[38;5;66;03m# cases, and look like a memory leak in others.\u001b[39;00m\n\u001b[0;32m     58\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m sessions\u001b[38;5;241m.\u001b[39mSession() \u001b[38;5;28;01mas\u001b[39;00m session:\n\u001b[1;32m---> 59\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m session\u001b[38;5;241m.\u001b[39mrequest(method\u001b[38;5;241m=\u001b[39mmethod, url\u001b[38;5;241m=\u001b[39murl, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
+      "File \u001b[1;32mE:\\Python\\anaconda\\envs\\new_trader\\Lib\\site-packages\\requests\\sessions.py:589\u001b[0m, in \u001b[0;36mSession.request\u001b[1;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[0;32m    584\u001b[0m send_kwargs \u001b[38;5;241m=\u001b[39m {\n\u001b[0;32m    585\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtimeout\u001b[39m\u001b[38;5;124m\"\u001b[39m: timeout,\n\u001b[0;32m    586\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mallow_redirects\u001b[39m\u001b[38;5;124m\"\u001b[39m: allow_redirects,\n\u001b[0;32m    587\u001b[0m }\n\u001b[0;32m    588\u001b[0m send_kwargs\u001b[38;5;241m.\u001b[39mupdate(settings)\n\u001b[1;32m--> 589\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msend(prep, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39msend_kwargs)\n\u001b[0;32m    591\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m resp\n",
+      "File \u001b[1;32mE:\\Python\\anaconda\\envs\\new_trader\\Lib\\site-packages\\requests\\sessions.py:724\u001b[0m, in \u001b[0;36mSession.send\u001b[1;34m(self, request, **kwargs)\u001b[0m\n\u001b[0;32m    721\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m allow_redirects:\n\u001b[0;32m    722\u001b[0m     \u001b[38;5;66;03m# Redirect resolving generator.\u001b[39;00m\n\u001b[0;32m    723\u001b[0m     gen \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mresolve_redirects(r, request, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m--> 724\u001b[0m     history \u001b[38;5;241m=\u001b[39m [resp \u001b[38;5;28;01mfor\u001b[39;00m resp \u001b[38;5;129;01min\u001b[39;00m gen]\n\u001b[0;32m    725\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m    726\u001b[0m     history \u001b[38;5;241m=\u001b[39m []\n",
+      "File \u001b[1;32mE:\\Python\\anaconda\\envs\\new_trader\\Lib\\site-packages\\requests\\sessions.py:724\u001b[0m, in \u001b[0;36m<listcomp>\u001b[1;34m(.0)\u001b[0m\n\u001b[0;32m    721\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m allow_redirects:\n\u001b[0;32m    722\u001b[0m     \u001b[38;5;66;03m# Redirect resolving generator.\u001b[39;00m\n\u001b[0;32m    723\u001b[0m     gen \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mresolve_redirects(r, request, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m--> 724\u001b[0m     history \u001b[38;5;241m=\u001b[39m [resp \u001b[38;5;28;01mfor\u001b[39;00m resp \u001b[38;5;129;01min\u001b[39;00m gen]\n\u001b[0;32m    725\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m    726\u001b[0m     history \u001b[38;5;241m=\u001b[39m []\n",
+      "File \u001b[1;32mE:\\Python\\anaconda\\envs\\new_trader\\Lib\\site-packages\\requests\\sessions.py:265\u001b[0m, in \u001b[0;36mSessionRedirectMixin.resolve_redirects\u001b[1;34m(self, resp, req, stream, timeout, verify, cert, proxies, yield_requests, **adapter_kwargs)\u001b[0m\n\u001b[0;32m    263\u001b[0m     \u001b[38;5;28;01myield\u001b[39;00m req\n\u001b[0;32m    264\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 265\u001b[0m     resp \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msend(\n\u001b[0;32m    266\u001b[0m         req,\n\u001b[0;32m    267\u001b[0m         stream\u001b[38;5;241m=\u001b[39mstream,\n\u001b[0;32m    268\u001b[0m         timeout\u001b[38;5;241m=\u001b[39mtimeout,\n\u001b[0;32m    269\u001b[0m         verify\u001b[38;5;241m=\u001b[39mverify,\n\u001b[0;32m    270\u001b[0m         cert\u001b[38;5;241m=\u001b[39mcert,\n\u001b[0;32m    271\u001b[0m         proxies\u001b[38;5;241m=\u001b[39mproxies,\n\u001b[0;32m    272\u001b[0m         allow_redirects\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m    273\u001b[0m         \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39madapter_kwargs,\n\u001b[0;32m    274\u001b[0m     )\n\u001b[0;32m    276\u001b[0m     extract_cookies_to_jar(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcookies, prepared_request, resp\u001b[38;5;241m.\u001b[39mraw)\n\u001b[0;32m    278\u001b[0m     \u001b[38;5;66;03m# extract redirect url, if any, for the next loop\u001b[39;00m\n",
+      "File \u001b[1;32mE:\\Python\\anaconda\\envs\\new_trader\\Lib\\site-packages\\requests\\sessions.py:703\u001b[0m, in \u001b[0;36mSession.send\u001b[1;34m(self, request, **kwargs)\u001b[0m\n\u001b[0;32m    700\u001b[0m start \u001b[38;5;241m=\u001b[39m preferred_clock()\n\u001b[0;32m    702\u001b[0m \u001b[38;5;66;03m# Send the request\u001b[39;00m\n\u001b[1;32m--> 703\u001b[0m r \u001b[38;5;241m=\u001b[39m adapter\u001b[38;5;241m.\u001b[39msend(request, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m    705\u001b[0m \u001b[38;5;66;03m# Total elapsed time of the request (approximately)\u001b[39;00m\n\u001b[0;32m    706\u001b[0m elapsed \u001b[38;5;241m=\u001b[39m preferred_clock() \u001b[38;5;241m-\u001b[39m start\n",
+      "File \u001b[1;32mE:\\Python\\anaconda\\envs\\new_trader\\Lib\\site-packages\\requests\\adapters.py:667\u001b[0m, in \u001b[0;36mHTTPAdapter.send\u001b[1;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[0;32m    664\u001b[0m     timeout \u001b[38;5;241m=\u001b[39m TimeoutSauce(connect\u001b[38;5;241m=\u001b[39mtimeout, read\u001b[38;5;241m=\u001b[39mtimeout)\n\u001b[0;32m    666\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 667\u001b[0m     resp \u001b[38;5;241m=\u001b[39m conn\u001b[38;5;241m.\u001b[39murlopen(\n\u001b[0;32m    668\u001b[0m         method\u001b[38;5;241m=\u001b[39mrequest\u001b[38;5;241m.\u001b[39mmethod,\n\u001b[0;32m    669\u001b[0m         url\u001b[38;5;241m=\u001b[39murl,\n\u001b[0;32m    670\u001b[0m         body\u001b[38;5;241m=\u001b[39mrequest\u001b[38;5;241m.\u001b[39mbody,\n\u001b[0;32m    671\u001b[0m         headers\u001b[38;5;241m=\u001b[39mrequest\u001b[38;5;241m.\u001b[39mheaders,\n\u001b[0;32m    672\u001b[0m         redirect\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m    673\u001b[0m         assert_same_host\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m    674\u001b[0m         preload_content\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m    675\u001b[0m         decode_content\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m    676\u001b[0m         retries\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmax_retries,\n\u001b[0;32m    677\u001b[0m         timeout\u001b[38;5;241m=\u001b[39mtimeout,\n\u001b[0;32m    678\u001b[0m         chunked\u001b[38;5;241m=\u001b[39mchunked,\n\u001b[0;32m    679\u001b[0m     )\n\u001b[0;32m    681\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (ProtocolError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[0;32m    682\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m(err, request\u001b[38;5;241m=\u001b[39mrequest)\n",
+      "File \u001b[1;32mE:\\Python\\anaconda\\envs\\new_trader\\Lib\\site-packages\\urllib3\\connectionpool.py:787\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[1;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)\u001b[0m\n\u001b[0;32m    784\u001b[0m response_conn \u001b[38;5;241m=\u001b[39m conn \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m release_conn \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m    786\u001b[0m \u001b[38;5;66;03m# Make the request on the HTTPConnection object\u001b[39;00m\n\u001b[1;32m--> 787\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_request(\n\u001b[0;32m    788\u001b[0m     conn,\n\u001b[0;32m    789\u001b[0m     method,\n\u001b[0;32m    790\u001b[0m     url,\n\u001b[0;32m    791\u001b[0m     timeout\u001b[38;5;241m=\u001b[39mtimeout_obj,\n\u001b[0;32m    792\u001b[0m     body\u001b[38;5;241m=\u001b[39mbody,\n\u001b[0;32m    793\u001b[0m     headers\u001b[38;5;241m=\u001b[39mheaders,\n\u001b[0;32m    794\u001b[0m     chunked\u001b[38;5;241m=\u001b[39mchunked,\n\u001b[0;32m    795\u001b[0m     retries\u001b[38;5;241m=\u001b[39mretries,\n\u001b[0;32m    796\u001b[0m     response_conn\u001b[38;5;241m=\u001b[39mresponse_conn,\n\u001b[0;32m    797\u001b[0m     preload_content\u001b[38;5;241m=\u001b[39mpreload_content,\n\u001b[0;32m    798\u001b[0m     decode_content\u001b[38;5;241m=\u001b[39mdecode_content,\n\u001b[0;32m    799\u001b[0m     \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mresponse_kw,\n\u001b[0;32m    800\u001b[0m )\n\u001b[0;32m    802\u001b[0m \u001b[38;5;66;03m# Everything went great!\u001b[39;00m\n\u001b[0;32m    803\u001b[0m clean_exit \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
+      "File \u001b[1;32mE:\\Python\\anaconda\\envs\\new_trader\\Lib\\site-packages\\urllib3\\connectionpool.py:534\u001b[0m, in \u001b[0;36mHTTPConnectionPool._make_request\u001b[1;34m(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)\u001b[0m\n\u001b[0;32m    532\u001b[0m \u001b[38;5;66;03m# Receive the response from the server\u001b[39;00m\n\u001b[0;32m    533\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 534\u001b[0m     response \u001b[38;5;241m=\u001b[39m conn\u001b[38;5;241m.\u001b[39mgetresponse()\n\u001b[0;32m    535\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (BaseSSLError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m    536\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_raise_timeout(err\u001b[38;5;241m=\u001b[39me, url\u001b[38;5;241m=\u001b[39murl, timeout_value\u001b[38;5;241m=\u001b[39mread_timeout)\n",
+      "File \u001b[1;32mE:\\Python\\anaconda\\envs\\new_trader\\Lib\\site-packages\\urllib3\\connection.py:516\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m    513\u001b[0m _shutdown \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msock, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mshutdown\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[0;32m    515\u001b[0m \u001b[38;5;66;03m# Get the response from http.client.HTTPConnection\u001b[39;00m\n\u001b[1;32m--> 516\u001b[0m httplib_response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39mgetresponse()\n\u001b[0;32m    518\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m    519\u001b[0m     assert_header_parsing(httplib_response\u001b[38;5;241m.\u001b[39mmsg)\n",
+      "File \u001b[1;32mE:\\Python\\anaconda\\envs\\new_trader\\Lib\\http\\client.py:1395\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m   1393\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m   1394\u001b[0m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m-> 1395\u001b[0m         response\u001b[38;5;241m.\u001b[39mbegin()\n\u001b[0;32m   1396\u001b[0m     \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m:\n\u001b[0;32m   1397\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclose()\n",
+      "File \u001b[1;32mE:\\Python\\anaconda\\envs\\new_trader\\Lib\\http\\client.py:325\u001b[0m, in \u001b[0;36mHTTPResponse.begin\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m    323\u001b[0m \u001b[38;5;66;03m# read until we get a non-100 response\u001b[39;00m\n\u001b[0;32m    324\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[1;32m--> 325\u001b[0m     version, status, reason \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_read_status()\n\u001b[0;32m    326\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m status \u001b[38;5;241m!=\u001b[39m CONTINUE:\n\u001b[0;32m    327\u001b[0m         \u001b[38;5;28;01mbreak\u001b[39;00m\n",
+      "File \u001b[1;32mE:\\Python\\anaconda\\envs\\new_trader\\Lib\\http\\client.py:286\u001b[0m, in \u001b[0;36mHTTPResponse._read_status\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m    285\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_read_status\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m--> 286\u001b[0m     line \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mstr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfp\u001b[38;5;241m.\u001b[39mreadline(_MAXLINE \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m), \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124miso-8859-1\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m    287\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(line) \u001b[38;5;241m>\u001b[39m _MAXLINE:\n\u001b[0;32m    288\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m LineTooLong(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstatus line\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+      "File \u001b[1;32mE:\\Python\\anaconda\\envs\\new_trader\\Lib\\socket.py:718\u001b[0m, in \u001b[0;36mSocketIO.readinto\u001b[1;34m(self, b)\u001b[0m\n\u001b[0;32m    716\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m    717\u001b[0m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 718\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_sock\u001b[38;5;241m.\u001b[39mrecv_into(b)\n\u001b[0;32m    719\u001b[0m     \u001b[38;5;28;01mexcept\u001b[39;00m timeout:\n\u001b[0;32m    720\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_timeout_occurred \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
+      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
+     ]
+    }
+   ],
+   "source": [
+    "import tushare as ts\n",
+    "import pandas as pd\n",
+    "import time\n",
+    "\n",
+    "\n",
+    "# 获取交易日历\n",
+    "trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250301')\n",
+    "trade_cal = trade_cal[trade_cal['is_open'] == 1]  # 只保留交易日\n",
+    "trade_dates = trade_cal['cal_date'].tolist()  # 获取所有交易日期列表\n",
+    "\n",
+    "# 使用 HDFStore 存储数据\n",
+    "all_daily_data = []\n",
+    "\n",
+    "# API 调用计数和时间控制变量\n",
+    "api_call_count = 0\n",
+    "batch_start_time = time.time()\n",
+    "\n",
+    "# 遍历每个交易日期并获取数据\n",
+    "for trade_date in trade_dates:\n",
+    "    try:\n",
+    "        # 获取每日基础数据\n",
+    "        kpl_concept = pro.kpl_concept(trade_date=trade_date)\n",
+    "        if kpl_concept is not None and not kpl_concept.empty:\n",
+    "            all_daily_data.append(kpl_concept)\n",
+    "            print(f\"成功获取并保存 {trade_date} 的每日基础数据\")\n",
+    "\n",
+    "        # 计数一次 API 调用\n",
+    "        api_call_count += 1\n",
+    "\n",
+    "        # 每调用 300 次，检查时间是否少于 1 分钟，如果少于则等待剩余时间\n",
+    "        if api_call_count % 150 == 0:\n",
+    "            print(api_call_count,time.time(), batch_start_time)\n",
+    "            elapsed = time.time() - batch_start_time\n",
+    "            if elapsed < 60:\n",
+    "                sleep_time = 60 - elapsed\n",
+    "                print(f\"已调用 150 次 API，等待 {sleep_time:.2f} 秒以满足速率限制...\")\n",
+    "                time.sleep(sleep_time)\n",
+    "            # 重置批次起始时间\n",
+    "            batch_start_time = time.time()\n",
+    "\n",
+    "    except Exception as e:\n",
+    "        print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "907f732d3c397bf",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-12T15:39:50.141920800Z",
+     "start_time": "2025-03-12T15:23:41.345460Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "# 将所有数据合并为一个 DataFrame\n",
+    "all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
+    "\n",
+    "# 将数据保存为 HDF5 文件（table 格式）\n",
+    "all_daily_data_df.to_hdf('../../data/kpl_concept.h5', key='kpl_concept', mode='w', format='table', data_columns=True)\n",
+    "\n",
+    "print(\"所有每日基础数据获取并保存完毕！\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/code/data/update/cyq-perf.ipynb
+++ b/code/data/update/cyq-perf.ipynb
@@ -0,0 +1,194 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "id": "f74ce078-f7e8-4733-a14c-14d8815a3626",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-30T16:42:31.596637Z",
+     "start_time": "2025-03-30T16:42:30.883319Z"
+    }
+   },
+   "source": [
+    "import tushare as ts\n",
+    "ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
+    "pro = ts.pro_api()"
+   ],
+   "outputs": [],
+   "execution_count": 1
+  },
+  {
+   "cell_type": "code",
+   "id": "44dd8d87-e60b-49e5-aed9-efaa7f92d4fe",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-30T16:42:37.590148Z",
+     "start_time": "2025-03-30T16:42:31.596637Z"
+    }
+   },
+   "source": [
+    "import pandas as pd\n",
+    "import time\n",
+    "\n",
+    "h5_filename = '../../../data/cyq_perf.h5'\n",
+    "key = '/cyq_perf'\n",
+    "max_date = None\n",
+    "with pd.HDFStore(h5_filename, mode='r') as store:\n",
+    "    df = store[key][['ts_code', 'trade_date']]\n",
+    "    print(df)\n",
+    "    max_date = df['trade_date'].max()\n",
+    "\n",
+    "print(max_date)\n",
+    "trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
+    "trade_cal = trade_cal[trade_cal['is_open'] == 1]  # 只保留交易日\n",
+    "trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
+    "start_date = min(trade_dates)\n",
+    "print(f'start_date: {start_date}')"
+   ],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "         ts_code trade_date\n",
+      "0      000001.SZ   20250312\n",
+      "1      000002.SZ   20250312\n",
+      "2      000004.SZ   20250312\n",
+      "3      000006.SZ   20250312\n",
+      "4      000007.SZ   20250312\n",
+      "...          ...        ...\n",
+      "32304  920108.BJ   20250314\n",
+      "32305  920111.BJ   20250314\n",
+      "32306  920116.BJ   20250314\n",
+      "32307  920118.BJ   20250314\n",
+      "32308  920128.BJ   20250314\n",
+      "\n",
+      "[7503415 rows x 2 columns]\n",
+      "20250321\n",
+      "start_date: 20250324\n"
+     ]
+    }
+   ],
+   "execution_count": 2
+  },
+  {
+   "cell_type": "code",
+   "id": "747acc47-0884-4f76-90fb-276f6494e31d",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-30T16:43:29.275885Z",
+     "start_time": "2025-03-30T16:42:37.858763Z"
+    }
+   },
+   "source": [
+    "from concurrent.futures import ThreadPoolExecutor, as_completed\n",
+    "\n",
+    "all_daily_data = []\n",
+    "\n",
+    "# API 调用计数和时间控制变量\n",
+    "api_call_count = 0\n",
+    "batch_start_time = time.time()\n",
+    "\n",
+    "\n",
+    "def get_data(trade_date):\n",
+    "    time.sleep(0.1)\n",
+    "    data = pro.cyq_perf(trade_date=trade_date)\n",
+    "    if data is not None and not data.empty:\n",
+    "        return data\n",
+    "\n",
+    "\n",
+    "with ThreadPoolExecutor(max_workers=2) as executor:\n",
+    "    future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
+    "\n",
+    "    for future in as_completed(future_to_date):\n",
+    "        trade_date = future_to_date[future]  # 获取对应的交易日期\n",
+    "        try:\n",
+    "            result = future.result()  # 获取任务执行的结果\n",
+    "            all_daily_data.append(result)\n",
+    "            print(f\"任务 {trade_date} 完成\")\n",
+    "        except Exception as e:\n",
+    "            print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
+    "\n"
+   ],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "任务 20250418 完成\n",
+      "任务 20250417 完成\n",
+      "任务 20250415 完成\n",
+      "任务 20250416 完成\n",
+      "任务 20250411 完成\n",
+      "任务 20250414 完成\n",
+      "任务 20250409 完成\n",
+      "任务 20250410 完成\n",
+      "任务 20250408 完成\n",
+      "任务 20250407 完成\n",
+      "任务 20250403 完成\n",
+      "任务 20250402 完成\n",
+      "任务 20250401 完成\n",
+      "任务 20250331 完成\n",
+      "任务 20250328 完成\n",
+      "任务 20250327 完成\n",
+      "任务 20250326 完成\n",
+      "任务 20250325 完成\n",
+      "任务 20250324 完成\n"
+     ]
+    }
+   ],
+   "execution_count": 3
+  },
+  {
+   "cell_type": "code",
+   "id": "c6765638-481f-40d8-a259-2e7b25362618",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-30T16:43:30.100678Z",
+     "start_time": "2025-03-30T16:43:29.311710Z"
+    }
+   },
+   "source": [
+    "all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
+    "\n",
+    "# 将所有数据合并为一个 DataFrame\n",
+    "\n",
+    "# 将数据保存为 HDF5 文件（table 格式）\n",
+    "all_daily_data_df.to_hdf(h5_filename, key=key, mode='a', format='table', append=True, data_columns=True)\n",
+    "\n",
+    "print(\"所有每日基础数据获取并保存完毕！\")"
+   ],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "所有每日基础数据获取并保存完毕！\n"
+     ]
+    }
+   ],
+   "execution_count": 4
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/code/data/update/index_data.ipynb
+++ b/code/data/update/index_data.ipynb
@@ -0,0 +1,194 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "f74ce078-f7e8-4733-a14c-14d8815a3626",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import tushare as ts\n",
+    "ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
+    "pro = ts.pro_api()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "44dd8d87-e60b-49e5-aed9-efaa7f92d4fe",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "           ts_code trade_date\n",
+      "0        801001.SI   20250221\n",
+      "1        801002.SI   20250221\n",
+      "2        801003.SI   20250221\n",
+      "3        801005.SI   20250221\n",
+      "4        801010.SI   20250221\n",
+      "...            ...        ...\n",
+      "1044388  857344.SI   20170103\n",
+      "1044389  857411.SI   20170103\n",
+      "1044390  857421.SI   20170103\n",
+      "1044391  857431.SI   20170103\n",
+      "1044392  858811.SI   20170103\n",
+      "\n",
+      "[1044393 rows x 2 columns]\n",
+      "20250221\n",
+      "start_date: 20250224\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "import time\n",
+    "\n",
+    "h5_filename = '../../../data/sw_daily.h5'\n",
+    "key = '/sw_daily'\n",
+    "max_date = None\n",
+    "with pd.HDFStore(h5_filename, mode='r') as store:\n",
+    "    df = store[key][['ts_code', 'trade_date']]\n",
+    "    print(df)\n",
+    "    max_date = df['trade_date'].max()\n",
+    "\n",
+    "print(max_date)\n",
+    "trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
+    "trade_cal = trade_cal[trade_cal['is_open'] == 1]  # 只保留交易日\n",
+    "trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
+    "start_date = min(trade_dates)\n",
+    "print(f'start_date: {start_date}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "747acc47-0884-4f76-90fb-276f6494e31d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "任务 20250417 完成\n",
+      "任务 20250418 完成\n",
+      "任务 20250416 完成\n",
+      "任务 20250415 完成\n",
+      "任务 20250411 完成\n",
+      "任务 20250414 完成\n",
+      "任务 20250410 完成\n",
+      "任务 20250409 完成\n",
+      "任务 20250408 完成\n",
+      "任务 20250403 完成\n",
+      "任务 20250407 完成\n",
+      "任务 20250402 完成\n",
+      "任务 20250401 完成\n",
+      "任务 20250331 完成\n",
+      "任务 20250328 完成\n",
+      "任务 20250327 完成\n",
+      "任务 20250326 完成\n",
+      "任务 20250325 完成\n",
+      "任务 20250324 完成\n",
+      "任务 20250321 完成\n",
+      "任务 20250320 完成\n",
+      "任务 20250319 完成\n",
+      "任务 20250317 完成\n",
+      "任务 20250314 完成\n",
+      "任务 20250318 完成\n",
+      "任务 20250313 完成\n",
+      "任务 20250312 完成\n",
+      "任务 20250311 完成\n",
+      "任务 20250310 完成\n",
+      "任务 20250307 完成\n",
+      "任务 20250306 完成\n",
+      "任务 20250305 完成\n",
+      "任务 20250304 完成\n",
+      "任务 20250303 完成\n",
+      "任务 20250228 完成\n",
+      "任务 20250227 完成\n",
+      "任务 20250226 完成\n",
+      "任务 20250225 完成\n",
+      "任务 20250224 完成\n"
+     ]
+    }
+   ],
+   "source": [
+    "from concurrent.futures import ThreadPoolExecutor, as_completed\n",
+    "\n",
+    "all_daily_data = []\n",
+    "\n",
+    "# API 调用计数和时间控制变量\n",
+    "api_call_count = 0\n",
+    "batch_start_time = time.time()\n",
+    "\n",
+    "index_list = ['399300.SH', '000905.SH', '000852.SH', '399006.SZ']\n",
+    "def get_data(trade_date):\n",
+    "    time.sleep(0.1)\n",
+    "    data = pro.sw_daily(trade_date=trade_date)\n",
+    "    if data is not None and not data.empty:\n",
+    "        return data\n",
+    "\n",
+    "\n",
+    "with ThreadPoolExecutor(max_workers=2) as executor:\n",
+    "    future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
+    "\n",
+    "    for future in as_completed(future_to_date):\n",
+    "        trade_date = future_to_date[future]  # 获取对应的交易日期\n",
+    "        try:\n",
+    "            result = future.result()  # 获取任务执行的结果\n",
+    "            all_daily_data.append(result)\n",
+    "            print(f\"任务 {trade_date} 完成\")\n",
+    "        except Exception as e:\n",
+    "            print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "c6765638-481f-40d8-a259-2e7b25362618",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "所有每日基础数据获取并保存完毕！\n"
+     ]
+    }
+   ],
+   "source": [
+    "all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
+    "\n",
+    "# 将所有数据合并为一个 DataFrame\n",
+    "\n",
+    "# 将数据保存为 HDF5 文件（table 格式）\n",
+    "all_daily_data_df.to_hdf(h5_filename, key=key, mode='a', format='table', append=True, data_columns=True)\n",
+    "\n",
+    "print(\"所有每日基础数据获取并保存完毕！\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/code/data/update/sw_daily.ipynb
+++ b/code/data/update/sw_daily.ipynb
@@ -0,0 +1,194 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "id": "f74ce078-f7e8-4733-a14c-14d8815a3626",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-30T16:42:32.996500Z",
+     "start_time": "2025-03-30T16:42:32.209631Z"
+    }
+   },
+   "source": [
+    "import tushare as ts\n",
+    "ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
+    "pro = ts.pro_api()"
+   ],
+   "outputs": [],
+   "execution_count": 1
+  },
+  {
+   "cell_type": "code",
+   "id": "44dd8d87-e60b-49e5-aed9-efaa7f92d4fe",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-30T16:42:34.591433Z",
+     "start_time": "2025-03-30T16:42:32.996500Z"
+    }
+   },
+   "source": [
+    "import pandas as pd\n",
+    "import time\n",
+    "\n",
+    "h5_filename = '../../../data/sw_daily.h5'\n",
+    "key = '/sw_daily'\n",
+    "max_date = None\n",
+    "with pd.HDFStore(h5_filename, mode='r') as store:\n",
+    "    df = store[key][['ts_code', 'trade_date']]\n",
+    "    print(df)\n",
+    "    max_date = df['trade_date'].max()\n",
+    "\n",
+    "print(max_date)\n",
+    "trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
+    "trade_cal = trade_cal[trade_cal['is_open'] == 1]  # 只保留交易日\n",
+    "trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
+    "start_date = min(trade_dates)\n",
+    "print(f'start_date: {start_date}')"
+   ],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "        ts_code trade_date\n",
+      "0     801001.SI   20250221\n",
+      "1     801002.SI   20250221\n",
+      "2     801003.SI   20250221\n",
+      "3     801005.SI   20250221\n",
+      "4     801010.SI   20250221\n",
+      "...         ...        ...\n",
+      "2629  859811.SI   20250314\n",
+      "2630  859821.SI   20250314\n",
+      "2631  859822.SI   20250314\n",
+      "2632  859852.SI   20250314\n",
+      "2633  859951.SI   20250314\n",
+      "\n",
+      "[1053173 rows x 2 columns]\n",
+      "20250321\n",
+      "start_date: 20250324\n"
+     ]
+    }
+   ],
+   "execution_count": 2
+  },
+  {
+   "cell_type": "code",
+   "id": "747acc47-0884-4f76-90fb-276f6494e31d",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-30T16:42:37.718270Z",
+     "start_time": "2025-03-30T16:42:34.817305Z"
+    }
+   },
+   "source": [
+    "from concurrent.futures import ThreadPoolExecutor, as_completed\n",
+    "\n",
+    "all_daily_data = []\n",
+    "\n",
+    "# API 调用计数和时间控制变量\n",
+    "api_call_count = 0\n",
+    "batch_start_time = time.time()\n",
+    "\n",
+    "\n",
+    "def get_data(trade_date):\n",
+    "    time.sleep(0.1)\n",
+    "    data = pro.sw_daily(trade_date=trade_date)\n",
+    "    if data is not None and not data.empty:\n",
+    "        return data\n",
+    "\n",
+    "\n",
+    "with ThreadPoolExecutor(max_workers=2) as executor:\n",
+    "    future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
+    "\n",
+    "    for future in as_completed(future_to_date):\n",
+    "        trade_date = future_to_date[future]  # 获取对应的交易日期\n",
+    "        try:\n",
+    "            result = future.result()  # 获取任务执行的结果\n",
+    "            all_daily_data.append(result)\n",
+    "            print(f\"任务 {trade_date} 完成\")\n",
+    "        except Exception as e:\n",
+    "            print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
+    "\n"
+   ],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "任务 20250417 完成\n",
+      "任务 20250418 完成\n",
+      "任务 20250416 完成\n",
+      "任务 20250415 完成\n",
+      "任务 20250414 完成\n",
+      "任务 20250411 完成\n",
+      "任务 20250410 完成\n",
+      "任务 20250409 完成\n",
+      "任务 20250408 完成\n",
+      "任务 20250407 完成\n",
+      "任务 20250403 完成\n",
+      "任务 20250402 完成\n",
+      "任务 20250401 完成\n",
+      "任务 20250331 完成\n",
+      "任务 20250328 完成\n",
+      "任务 20250327 完成\n",
+      "任务 20250326 完成\n",
+      "任务 20250325 完成\n",
+      "任务 20250324 完成\n"
+     ]
+    }
+   ],
+   "execution_count": 3
+  },
+  {
+   "cell_type": "code",
+   "id": "c6765638-481f-40d8-a259-2e7b25362618",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-30T16:42:37.922827Z",
+     "start_time": "2025-03-30T16:42:37.739040Z"
+    }
+   },
+   "source": [
+    "all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
+    "\n",
+    "# 将所有数据合并为一个 DataFrame\n",
+    "\n",
+    "# 将数据保存为 HDF5 文件（table 格式）\n",
+    "all_daily_data_df.to_hdf(h5_filename, key=key, mode='a', format='table', append=True, data_columns=True)\n",
+    "\n",
+    "print(\"所有每日基础数据获取并保存完毕！\")"
+   ],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "所有每日基础数据获取并保存完毕！\n"
+     ]
+    }
+   ],
+   "execution_count": 4
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/code/model/lightgbm_model_UpdateRegression_2025-2-25.pkl
+++ b/code/model/lightgbm_model_UpdateRegression_2025-2-25.pkl
--- a/code/test.py
+++ b/code/test.py
@@ -0,0 +1,5 @@
+import torch
+print(torch.__version__)
+
+print(torch.version.cuda)
+print(torch.backends.cudnn.version())
--- a/code/train/ClassifyLR.ipynb
+++ b/code/train/ClassifyLR.ipynb
--- a/code/train/DoubleQuntile.ipynb
+++ b/code/train/DoubleQuntile.ipynb
--- a/code/train/DoubleRank.ipynb
+++ b/code/train/DoubleRank.ipynb
--- a/code/train/PlUpdateClassify.ipynb
+++ b/code/train/PlUpdateClassify.ipynb
--- a/code/train/RollingRank.py
+++ b/code/train/RollingRank.py
--- a/code/train/TRank.ipynb
+++ b/code/train/TRank.ipynb
--- a/code/train/Transformer.ipynb
+++ b/code/train/Transformer.ipynb
--- a/code/train/UpdateRank.ipynb
+++ b/code/train/UpdateRank.ipynb
--- a/code/train/V1-copy.ipynb
+++ b/code/train/V1-copy.ipynb
@@ -0,0 +1,896 @@
+{
+ "cells": [
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-09T14:52:54.170824Z",
+     "start_time": "2025-02-09T14:52:53.544850Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "from utils.utils import read_and_merge_h5_data"
+   ],
+   "id": "79a7758178bafdd3",
+   "outputs": [],
+   "execution_count": 1
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-09T14:53:36.873700Z",
+     "start_time": "2025-02-09T14:52:54.170824Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "print('daily data')\n",
+    "df = read_and_merge_h5_data('../../data/daily_data.h5', key='daily_data',\n",
+    "                            columns=['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol'],\n",
+    "                            df=None)\n",
+    "\n",
+    "print('daily basic')\n",
+    "df = read_and_merge_h5_data('../../data/daily_basic.h5', key='daily_basic_with_st',\n",
+    "                            columns=['ts_code', 'trade_date', 'turnover_rate', 'pe_ttm', 'circ_mv', 'volume_ratio',\n",
+    "                                     'is_st'], df=df)\n",
+    "\n",
+    "print('stk limit')\n",
+    "df = read_and_merge_h5_data('../../data/stk_limit.h5', key='stk_limit',\n",
+    "                            columns=['ts_code', 'trade_date', 'pre_close', 'up_limit', 'down_limit'],\n",
+    "                            df=df)\n",
+    "print('money flow')\n",
+    "df = read_and_merge_h5_data('../../data/money_flow.h5', key='money_flow',\n",
+    "                            columns=['ts_code', 'trade_date', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol',\n",
+    "                                     'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol'],\n",
+    "                            df=df)"
+   ],
+   "id": "a79cafb06a7e0e43",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "daily data\n",
+      "daily basic\n",
+      "stk limit\n",
+      "money flow\n"
+     ]
+    }
+   ],
+   "execution_count": 2
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-09T14:53:37.426404Z",
+     "start_time": "2025-02-09T14:53:36.955552Z"
+    }
+   },
+   "cell_type": "code",
+   "source": "origin_columns = df.columns.tolist()",
+   "id": "c4e9e1d31da6dba6",
+   "outputs": [],
+   "execution_count": 3
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-09T14:53:38.164112Z",
+     "start_time": "2025-02-09T14:53:38.070007Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "import numpy as np\n",
+    "import talib\n",
+    "\n",
+    "\n",
+    "def get_technical_factor(df):\n",
+    "    df['up'] = (df['high'] - df[['close', 'open']].max(axis=1)) / df['close']\n",
+    "    df['down'] = (df[['close', 'open']].min(axis=1) - df['low']) / df['close']\n",
+    "\n",
+    "    df['atr_14'] = talib.ATR(df['high'], df['low'], df['close'], timeperiod=14)\n",
+    "    df['atr_6'] = talib.ATR(df['high'], df['low'], df['close'], timeperiod=6)\n",
+    "\n",
+    "    df['obv'] = talib.OBV(df['close'], df['vol'])\n",
+    "    df['maobv_6'] = talib.SMA(df['obv'], timeperiod=6)\n",
+    "    df['obv-maobv_6'] = df['obv'] - df['maobv_6']\n",
+    "\n",
+    "    df['rsi_3'] = talib.RSI(df['close'], timeperiod=3)\n",
+    "    df['rsi_6'] = talib.RSI(df['close'], timeperiod=6)\n",
+    "    df['rsi_9'] = talib.RSI(df['close'], timeperiod=9)\n",
+    "\n",
+    "    df['return_10'] = df['close'] / df['close'].shift(10) - 1\n",
+    "    df['return_20'] = df['close'] / df['close'].shift(20) - 1\n",
+    "\n",
+    "    # # 计算 _rank_return_10 和 _rank_return_20\n",
+    "    # df['_rank_return_10'] = df['return_10'].rank(pct=True)\n",
+    "    # df['_rank_return_20'] = df['return_20'].rank(pct=True)\n",
+    "\n",
+    "    # 计算 avg_close_5\n",
+    "    df['avg_close_5'] = df['close'].rolling(window=5).mean() / df['close']\n",
+    "\n",
+    "    # 计算 std_return_5, std_return_15, std_return_25, std_return_252, std_return_2522\n",
+    "    df['std_return_5'] = df['close'].pct_change().shift(-1).rolling(window=5).std()\n",
+    "    df['std_return_15'] = df['close'].pct_change().shift(-1).rolling(window=15).std()\n",
+    "    df['std_return_25'] = df['close'].pct_change().shift(-1).rolling(window=25).std()\n",
+    "    df['std_return_90'] = df['close'].pct_change().shift(-1).rolling(window=90).std()\n",
+    "    df['std_return_90_2'] = df['close'].shift(10).pct_change().shift(-1).rolling(window=90).std()\n",
+    "\n",
+    "    # 计算 std_return_5 / std_return_252 和 std_return_5 / std_return_25\n",
+    "    df['std_return_5 / std_return_90'] = df['std_return_5'] / df['std_return_90']\n",
+    "    df['std_return_5 / std_return_25'] = df['std_return_5'] / df['std_return_25']\n",
+    "\n",
+    "    # 计算 std_return_252 - std_return_2522\n",
+    "    df['std_return_90 - std_return_90_2'] = df['std_return_90'] - df['std_return_90_2']\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "def get_act_factor(df):\n",
+    "    # 计算 m_ta_ema(close, 5), m_ta_ema(close, 13), m_ta_ema(close, 20), m_ta_ema(close, 60)\n",
+    "    df['ema_5'] = talib.EMA(df['close'], timeperiod=5)\n",
+    "    df['ema_13'] = talib.EMA(df['close'], timeperiod=13)\n",
+    "    df['ema_20'] = talib.EMA(df['close'], timeperiod=20)\n",
+    "    df['ema_60'] = talib.EMA(df['close'], timeperiod=60)\n",
+    "\n",
+    "    # 计算 act_factor1, act_factor2, act_factor3, act_factor4\n",
+    "    df['act_factor1'] = np.arctan((df['ema_5'] / df['ema_5'].shift(1) - 1) * 100) * 57.3 / 50\n",
+    "    df['act_factor2'] = np.arctan((df['ema_13'] / df['ema_13'].shift(1) - 1) * 100) * 57.3 / 40\n",
+    "    df['act_factor3'] = np.arctan((df['ema_20'] / df['ema_20'].shift(1) - 1) * 100) * 57.3 / 21\n",
+    "    df['act_factor4'] = np.arctan((df['ema_60'] / df['ema_60'].shift(1) - 1) * 100) * 57.3 / 10\n",
+    "\n",
+    "    # 计算 act_factor5 和 act_factor6\n",
+    "    df['act_factor5'] = df['act_factor1'] + df['act_factor2'] + df['act_factor3'] + df['act_factor4']\n",
+    "    df['act_factor6'] = (df['act_factor1'] - df['act_factor2']) / np.sqrt(\n",
+    "        df['act_factor1'] ** 2 + df['act_factor2'] ** 2)\n",
+    "\n",
+    "    # 根据 'trade_date' 进行分组，在每个组内分别计算 'act_factor1', 'act_factor2', 'act_factor3' 的排名\n",
+    "    df['rank_act_factor1'] = df.groupby('trade_date')['act_factor1'].rank(ascending=False, pct=True)\n",
+    "    df['rank_act_factor2'] = df.groupby('trade_date')['act_factor2'].rank(ascending=False, pct=True)\n",
+    "    df['rank_act_factor3'] = df.groupby('trade_date')['act_factor3'].rank(ascending=False, pct=True)\n",
+    "\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "def get_money_flow_factor(df):\n",
+    "    df['active_buy_volume_large'] = df['buy_lg_vol'] / df['net_mf_vol']\n",
+    "    df['active_buy_volume_big'] = df['buy_elg_vol'] / df['net_mf_vol']\n",
+    "    df['active_buy_volume_small'] = df['buy_sm_vol'] / df['net_mf_vol']\n",
+    "\n",
+    "    df['buy_lg_vol - sell_lg_vol'] = (df['buy_lg_vol'] - df['sell_lg_vol']) / df['net_mf_vol']\n",
+    "    df['buy_elg_vol - sell_elg_vol'] = (df['buy_elg_vol'] - df['sell_elg_vol']) / df['net_mf_vol']\n",
+    "\n",
+    "    # # 你还提到了一些其他字段:\n",
+    "    # df['net_active_buy_volume_main'] = df['net_mf_vol'] / df['buy_sm_vol']\n",
+    "    # df['netflow_amount_main'] = df['net_mf_vol'] / df['buy_sm_vol']  # 这里假设 'net_mf_vol' 是主流资金流\n",
+    "\n",
+    "    # df['active_sell_volume_large'] = df['sell_lg_vol'] / df['sell_sm_vol']\n",
+    "    # df['active_sell_volume_big'] = df['sell_elg_vol'] / df['sell_sm_vol']\n",
+    "    # df['active_sell_volume_small'] = df['sell_sm_vol'] / df['sell_sm_vol']\n",
+    "\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "def get_alpha_factor(df):\n",
+    "    df['alpha_022'] = df['close'] - df['close'].shift(5)\n",
+    "\n",
+    "    # alpha_003: (close - open) / (high - low)\n",
+    "    df['alpha_003'] = (df['close'] - df['open']) / (df['high'] - df['low'])\n",
+    "\n",
+    "    # alpha_007: rank(correlation(close, volume, 5))\n",
+    "    df['alpha_007'] = df['close'].rolling(5).corr(df['vol']).rank(axis=1)\n",
+    "\n",
+    "    # alpha_013: rank(sum(close, 5) - sum(close, 20))\n",
+    "    df['alpha_013'] = (df['close'].rolling(5).sum() - df['close'].rolling(20).sum()).rank(axis=1)\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "def get_future_data(df):\n",
+    "    df['future_return1'] = (df['close'].shift(-1) - df['close']) / df['close']\n",
+    "    df['future_return2'] = (df['open'].shift(-2) - df['open'].shift(-1)) / df['open'].shift(-1)\n",
+    "    df['future_return3'] = (df['close'].shift(-2) - df['close'].shift(-1)) / df['close'].shift(-1)\n",
+    "    df['future_return4'] = (df['close'].shift(-2) - df['open'].shift(-1)) / df['open'].shift(-1)\n",
+    "    df['future_return5'] = (df['close'].shift(-5) - df['open'].shift(-1)) / df['open'].shift(-1)\n",
+    "    df['future_return6'] = (df['close'].shift(-10) - df['open'].shift(-1)) / df['open'].shift(-1)\n",
+    "    df['future_return7'] = (df['close'].shift(-20) - df['open'].shift(-1)) / df['open'].shift(-1)\n",
+    "    df['future_close1'] = (df['close'].shift(-1) - df['close']) / df['close']\n",
+    "    df['future_close2'] = (df['close'].shift(-2) - df['close']) / df['close']\n",
+    "    df['future_close3'] = (df['close'].shift(-3) - df['close']) / df['close']\n",
+    "    df['future_close4'] = (df['close'].shift(-4) - df['close']) / df['close']\n",
+    "    df['future_close5'] = (df['close'].shift(-5) - df['close']) / df['close']\n",
+    "    df['future_af11'] = df['act_factor1'].shift(-1)\n",
+    "    df['future_af12'] = df['act_factor1'].shift(-2)\n",
+    "    df['future_af13'] = df['act_factor1'].shift(-3)\n",
+    "    df['future_af14'] = df['act_factor1'].shift(-4)\n",
+    "    df['future_af15'] = df['act_factor1'].shift(-5)\n",
+    "    df['future_af21'] = df['act_factor2'].shift(-1)\n",
+    "    df['future_af22'] = df['act_factor2'].shift(-2)\n",
+    "    df['future_af23'] = df['act_factor2'].shift(-3)\n",
+    "    df['future_af24'] = df['act_factor2'].shift(-4)\n",
+    "    df['future_af25'] = df['act_factor2'].shift(-5)\n",
+    "    df['future_af31'] = df['act_factor3'].shift(-1)\n",
+    "    df['future_af32'] = df['act_factor3'].shift(-2)\n",
+    "    df['future_af33'] = df['act_factor3'].shift(-3)\n",
+    "    df['future_af34'] = df['act_factor3'].shift(-4)\n",
+    "    df['future_af35'] = df['act_factor3'].shift(-5)\n",
+    "\n",
+    "    return df\n"
+   ],
+   "id": "a735bc02ceb4d872",
+   "outputs": [],
+   "execution_count": 4
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-09T14:53:49.153376Z",
+     "start_time": "2025-02-09T14:53:38.164112Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "df = get_technical_factor(df)\n",
+    "df = get_act_factor(df)\n",
+    "df = get_money_flow_factor(df)\n",
+    "df = get_future_data(df)\n",
+    "# df = df.drop(columns=origin_columns)\n",
+    "\n",
+    "print(df.info())"
+   ],
+   "id": "53f86ddc0677a6d7",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 8364308 entries, 0 to 8364307\n",
+      "Data columns (total 83 columns):\n",
+      " #   Column                           Dtype         \n",
+      "---  ------                           -----         \n",
+      " 0   ts_code                          object        \n",
+      " 1   trade_date                       datetime64[ns]\n",
+      " 2   open                             float64       \n",
+      " 3   close                            float64       \n",
+      " 4   high                             float64       \n",
+      " 5   low                              float64       \n",
+      " 6   vol                              float64       \n",
+      " 7   is_st                            object        \n",
+      " 8   up_limit                         float64       \n",
+      " 9   down_limit                       float64       \n",
+      " 10  buy_sm_vol                       float64       \n",
+      " 11  sell_sm_vol                      float64       \n",
+      " 12  buy_lg_vol                       float64       \n",
+      " 13  sell_lg_vol                      float64       \n",
+      " 14  buy_elg_vol                      float64       \n",
+      " 15  sell_elg_vol                     float64       \n",
+      " 16  net_mf_vol                       float64       \n",
+      " 17  up                               float64       \n",
+      " 18  down                             float64       \n",
+      " 19  atr_14                           float64       \n",
+      " 20  atr_6                            float64       \n",
+      " 21  obv                              float64       \n",
+      " 22  maobv_6                          float64       \n",
+      " 23  obv-maobv_6                      float64       \n",
+      " 24  rsi_3                            float64       \n",
+      " 25  rsi_6                            float64       \n",
+      " 26  rsi_9                            float64       \n",
+      " 27  return_10                        float64       \n",
+      " 28  return_20                        float64       \n",
+      " 29  avg_close_5                      float64       \n",
+      " 30  std_return_5                     float64       \n",
+      " 31  std_return_15                    float64       \n",
+      " 32  std_return_25                    float64       \n",
+      " 33  std_return_90                    float64       \n",
+      " 34  std_return_90_2                  float64       \n",
+      " 35  std_return_5 / std_return_90     float64       \n",
+      " 36  std_return_5 / std_return_25     float64       \n",
+      " 37  std_return_90 - std_return_90_2  float64       \n",
+      " 38  ema_5                            float64       \n",
+      " 39  ema_13                           float64       \n",
+      " 40  ema_20                           float64       \n",
+      " 41  ema_60                           float64       \n",
+      " 42  act_factor1                      float64       \n",
+      " 43  act_factor2                      float64       \n",
+      " 44  act_factor3                      float64       \n",
+      " 45  act_factor4                      float64       \n",
+      " 46  act_factor5                      float64       \n",
+      " 47  act_factor6                      float64       \n",
+      " 48  rank_act_factor1                 float64       \n",
+      " 49  rank_act_factor2                 float64       \n",
+      " 50  rank_act_factor3                 float64       \n",
+      " 51  active_buy_volume_large          float64       \n",
+      " 52  active_buy_volume_big            float64       \n",
+      " 53  active_buy_volume_small          float64       \n",
+      " 54  buy_lg_vol - sell_lg_vol         float64       \n",
+      " 55  buy_elg_vol - sell_elg_vol       float64       \n",
+      " 56  future_return1                   float64       \n",
+      " 57  future_return2                   float64       \n",
+      " 58  future_return3                   float64       \n",
+      " 59  future_return4                   float64       \n",
+      " 60  future_return5                   float64       \n",
+      " 61  future_return6                   float64       \n",
+      " 62  future_return7                   float64       \n",
+      " 63  future_close1                    float64       \n",
+      " 64  future_close2                    float64       \n",
+      " 65  future_close3                    float64       \n",
+      " 66  future_close4                    float64       \n",
+      " 67  future_close5                    float64       \n",
+      " 68  future_af11                      float64       \n",
+      " 69  future_af12                      float64       \n",
+      " 70  future_af13                      float64       \n",
+      " 71  future_af14                      float64       \n",
+      " 72  future_af15                      float64       \n",
+      " 73  future_af21                      float64       \n",
+      " 74  future_af22                      float64       \n",
+      " 75  future_af23                      float64       \n",
+      " 76  future_af24                      float64       \n",
+      " 77  future_af25                      float64       \n",
+      " 78  future_af31                      float64       \n",
+      " 79  future_af32                      float64       \n",
+      " 80  future_af33                      float64       \n",
+      " 81  future_af34                      float64       \n",
+      " 82  future_af35                      float64       \n",
+      "dtypes: datetime64[ns](1), float64(80), object(2)\n",
+      "memory usage: 5.2+ GB\n",
+      "None\n"
+     ]
+    }
+   ],
+   "execution_count": 5
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-09T14:55:28.712343Z",
+     "start_time": "2025-02-09T14:53:49.279168Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "def filter_data(df):\n",
+    "    df = df.groupby('trade_date').apply(lambda x: x.nlargest(1000, 'act_factor3'))\n",
+    "    df = df[df['is_st'] == False]\n",
+    "    df = df[df['is_st'] == False]\n",
+    "    df = df[~df['ts_code'].str.startswith('30')]\n",
+    "    df = df[~df['ts_code'].str.startswith('68')]\n",
+    "    df = df[~df['ts_code'].str.startswith('8')]\n",
+    "    df = df.reset_index(drop=True)\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "df = filter_data(df)\n",
+    "print(df.info())"
+   ],
+   "id": "dbe2fd8021b9417f",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 1136157 entries, 0 to 1136156\n",
+      "Data columns (total 83 columns):\n",
+      " #   Column                           Non-Null Count    Dtype         \n",
+      "---  ------                           --------------    -----         \n",
+      " 0   ts_code                          1136157 non-null  object        \n",
+      " 1   trade_date                       1136157 non-null  datetime64[ns]\n",
+      " 2   open                             1136157 non-null  float64       \n",
+      " 3   close                            1136157 non-null  float64       \n",
+      " 4   high                             1136157 non-null  float64       \n",
+      " 5   low                              1136157 non-null  float64       \n",
+      " 6   vol                              1136157 non-null  float64       \n",
+      " 7   is_st                            1136157 non-null  object        \n",
+      " 8   up_limit                         1135878 non-null  float64       \n",
+      " 9   down_limit                       1135878 non-null  float64       \n",
+      " 10  buy_sm_vol                       1135663 non-null  float64       \n",
+      " 11  sell_sm_vol                      1135663 non-null  float64       \n",
+      " 12  buy_lg_vol                       1135663 non-null  float64       \n",
+      " 13  sell_lg_vol                      1135663 non-null  float64       \n",
+      " 14  buy_elg_vol                      1135663 non-null  float64       \n",
+      " 15  sell_elg_vol                     1135663 non-null  float64       \n",
+      " 16  net_mf_vol                       1135663 non-null  float64       \n",
+      " 17  up                               1136157 non-null  float64       \n",
+      " 18  down                             1136157 non-null  float64       \n",
+      " 19  atr_14                           1136157 non-null  float64       \n",
+      " 20  atr_6                            1136157 non-null  float64       \n",
+      " 21  obv                              1136157 non-null  float64       \n",
+      " 22  maobv_6                          1136157 non-null  float64       \n",
+      " 23  obv-maobv_6                      1136157 non-null  float64       \n",
+      " 24  rsi_3                            1136157 non-null  float64       \n",
+      " 25  rsi_6                            1136157 non-null  float64       \n",
+      " 26  rsi_9                            1136157 non-null  float64       \n",
+      " 27  return_10                        1136157 non-null  float64       \n",
+      " 28  return_20                        1136157 non-null  float64       \n",
+      " 29  avg_close_5                      1136157 non-null  float64       \n",
+      " 30  std_return_5                     1136157 non-null  float64       \n",
+      " 31  std_return_15                    1136157 non-null  float64       \n",
+      " 32  std_return_25                    1136157 non-null  float64       \n",
+      " 33  std_return_90                    1136131 non-null  float64       \n",
+      " 34  std_return_90_2                  1136129 non-null  float64       \n",
+      " 35  std_return_5 / std_return_90     1136131 non-null  float64       \n",
+      " 36  std_return_5 / std_return_25     1136157 non-null  float64       \n",
+      " 37  std_return_90 - std_return_90_2  1136129 non-null  float64       \n",
+      " 38  ema_5                            1136157 non-null  float64       \n",
+      " 39  ema_13                           1136157 non-null  float64       \n",
+      " 40  ema_20                           1136157 non-null  float64       \n",
+      " 41  ema_60                           1136153 non-null  float64       \n",
+      " 42  act_factor1                      1136157 non-null  float64       \n",
+      " 43  act_factor2                      1136157 non-null  float64       \n",
+      " 44  act_factor3                      1136157 non-null  float64       \n",
+      " 45  act_factor4                      1136152 non-null  float64       \n",
+      " 46  act_factor5                      1136152 non-null  float64       \n",
+      " 47  act_factor6                      1136157 non-null  float64       \n",
+      " 48  rank_act_factor1                 1136157 non-null  float64       \n",
+      " 49  rank_act_factor2                 1136157 non-null  float64       \n",
+      " 50  rank_act_factor3                 1136157 non-null  float64       \n",
+      " 51  active_buy_volume_large          1135659 non-null  float64       \n",
+      " 52  active_buy_volume_big            1135636 non-null  float64       \n",
+      " 53  active_buy_volume_small          1135663 non-null  float64       \n",
+      " 54  buy_lg_vol - sell_lg_vol         1135660 non-null  float64       \n",
+      " 55  buy_elg_vol - sell_elg_vol       1135640 non-null  float64       \n",
+      " 56  future_return1                   1136157 non-null  float64       \n",
+      " 57  future_return2                   1136157 non-null  float64       \n",
+      " 58  future_return3                   1136157 non-null  float64       \n",
+      " 59  future_return4                   1136157 non-null  float64       \n",
+      " 60  future_return5                   1136157 non-null  float64       \n",
+      " 61  future_return6                   1136157 non-null  float64       \n",
+      " 62  future_return7                   1136157 non-null  float64       \n",
+      " 63  future_close1                    1136157 non-null  float64       \n",
+      " 64  future_close2                    1136157 non-null  float64       \n",
+      " 65  future_close3                    1136157 non-null  float64       \n",
+      " 66  future_close4                    1136157 non-null  float64       \n",
+      " 67  future_close5                    1136157 non-null  float64       \n",
+      " 68  future_af11                      1136157 non-null  float64       \n",
+      " 69  future_af12                      1136157 non-null  float64       \n",
+      " 70  future_af13                      1136157 non-null  float64       \n",
+      " 71  future_af14                      1136157 non-null  float64       \n",
+      " 72  future_af15                      1136157 non-null  float64       \n",
+      " 73  future_af21                      1136157 non-null  float64       \n",
+      " 74  future_af22                      1136157 non-null  float64       \n",
+      " 75  future_af23                      1136157 non-null  float64       \n",
+      " 76  future_af24                      1136157 non-null  float64       \n",
+      " 77  future_af25                      1136157 non-null  float64       \n",
+      " 78  future_af31                      1136157 non-null  float64       \n",
+      " 79  future_af32                      1136157 non-null  float64       \n",
+      " 80  future_af33                      1136157 non-null  float64       \n",
+      " 81  future_af34                      1136157 non-null  float64       \n",
+      " 82  future_af35                      1136157 non-null  float64       \n",
+      "dtypes: datetime64[ns](1), float64(80), object(2)\n",
+      "memory usage: 719.5+ MB\n",
+      "None\n"
+     ]
+    }
+   ],
+   "execution_count": 6
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-09T15:00:45.828404Z",
+     "start_time": "2025-02-09T15:00:45.294830Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "def remove_outliers_iqr(series, lower_quantile=0.05, upper_quantile=0.95, threshold=1.5):\n",
+    "    Q1 = series.quantile(lower_quantile)\n",
+    "    Q3 = series.quantile(upper_quantile)\n",
+    "    IQR = Q3 - Q1\n",
+    "    lower_bound = Q1 - threshold * IQR\n",
+    "    upper_bound = Q3 + threshold * IQR\n",
+    "    # 过滤掉低于下边界或高于上边界的极值\n",
+    "    return (series >= lower_bound) & (series <= upper_bound)\n",
+    "\n",
+    "\n",
+    "def neutralize_labels(labels, features, feature_columns, z_threshold=3, method='regression'):\n",
+    "    labels_no_outliers = remove_outliers_iqr(labels)\n",
+    "    return labels_no_outliers\n",
+    "\n",
+    "\n",
+    "train_data = df[df['trade_date'] <= '2023-01-01']\n",
+    "test_data = df[df['trade_date'] >= '2023-01-01']\n",
+    "\n",
+    "feature_columns = [col for col in df.columns if col not in ['trade_date',\n",
+    "                                                            'ts_code',\n",
+    "                                                            'label']]\n",
+    "feature_columns = [col for col in feature_columns if 'future' not in col]\n",
+    "feature_columns = [col for col in feature_columns if 'score' not in col]\n",
+    "feature_columns = [col for col in feature_columns if col not in origin_columns]\n",
+    "\n",
+    "# for column in [column for column in train_data.columns if 'future' in column]:\n",
+    "#     label_index = neutralize_labels(train_data[column], train_data, feature_columns, z_threshold=3, method='regression')\n",
+    "#     train_data = train_data[label_index]\n",
+    "#     label_index = neutralize_labels(test_data[column], test_data, feature_columns, z_threshold=3, method='regression')\n",
+    "#     test_data = test_data[label_index]\n",
+    "\n",
+    "print(len(train_data))\n",
+    "print(len(test_data))"
+   ],
+   "id": "5f3d9aece75318cd",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['up', 'down', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'obv-maobv_6', 'rsi_3', 'rsi_6', 'rsi_9', 'return_10', 'return_20', 'avg_close_5', 'std_return_5', 'std_return_15', 'std_return_25', 'std_return_90', 'std_return_90_2', 'std_return_5 / std_return_90', 'std_return_5 / std_return_25', 'std_return_90 - std_return_90_2', 'ema_5', 'ema_13', 'ema_20', 'ema_60', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4', 'act_factor5', 'act_factor6', 'rank_act_factor1', 'rank_act_factor2', 'rank_act_factor3', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'buy_lg_vol - sell_lg_vol', 'buy_elg_vol - sell_elg_vol']\n"
+     ]
+    }
+   ],
+   "execution_count": 19
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-09T14:56:05.319915Z",
+     "start_time": "2025-02-09T14:56:03.355725Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "def get_qcuts(series, quantiles):\n",
+    "    q = pd.qcut(series, q=quantiles, labels=False, duplicates='drop')\n",
+    "    return q[-1]  # 返回窗口最后一个元素的分位数标签\n",
+    "\n",
+    "\n",
+    "window = 5\n",
+    "quantiles = 20\n",
+    "\n",
+    "\n",
+    "def get_label(df):\n",
+    "    labels = df['future_af13'] - df['act_factor1']\n",
+    "    # labels = df['future_close3']\n",
+    "    return labels\n",
+    "\n",
+    "\n",
+    "train_data['label'], test_data['label'] = get_label(train_data), get_label(test_data)\n",
+    "\n",
+    "train_data, test_data = train_data.dropna(subset=['label']), test_data.dropna(subset=['label'])\n",
+    "train_data, test_data = train_data.replace([np.inf, -np.inf], np.nan).dropna(), test_data.replace([np.inf, -np.inf],\n",
+    "                                                                                                  np.nan).dropna()\n",
+    "train_data, test_data = train_data.reset_index(drop=True), test_data.reset_index(drop=True)\n",
+    "\n",
+    "print(len(train_data))\n",
+    "print(len(test_data))"
+   ],
+   "id": "f4f16d63ad18d1bc",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "875004\n",
+      "最小日期: 2017-01-03\n",
+      "最大日期: 2022-12-30\n",
+      "260581\n",
+      "最小日期: 2023-01-03\n",
+      "最大日期: 2025-01-27\n"
+     ]
+    }
+   ],
+   "execution_count": 13
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-09T14:56:05.480695Z",
+     "start_time": "2025-02-09T14:56:05.367238Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "import lightgbm as lgb\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import optuna\n",
+    "from sklearn.model_selection import KFold\n",
+    "from sklearn.metrics import mean_absolute_error\n",
+    "import os\n",
+    "import json\n",
+    "import pickle\n",
+    "import hashlib\n",
+    "\n",
+    "\n",
+    "def objective(trial, X, y, num_boost_round, params):\n",
+    "    # 参数网格\n",
+    "    X, y = X.reset_index(drop=True), y.reset_index(drop=True)\n",
+    "    param_grid = {\n",
+    "        \"n_estimators\": trial.suggest_categorical(\"n_estimators\", [10000]),\n",
+    "        \"learning_rate\": trial.suggest_float(\"learning_rate\", 0.01, 0.3),\n",
+    "        \"num_leaves\": trial.suggest_int(\"num_leaves\", 20, 3000, step=25),\n",
+    "        \"max_depth\": trial.suggest_int(\"max_depth\", 3, 16),\n",
+    "        \"min_data_in_leaf\": trial.suggest_int(\"min_data_in_leaf\", 200, 10000, step=100),\n",
+    "        \"lambda_l1\": trial.suggest_int(\"lambda_l1\", 0, 100, step=5),\n",
+    "        \"lambda_l2\": trial.suggest_int(\"lambda_l2\", 0, 100, step=5),\n",
+    "        \"min_gain_to_split\": trial.suggest_float(\"min_gain_to_split\", 0, 15),\n",
+    "        \"bagging_fraction\": trial.suggest_float(\"bagging_fraction\", 0.2, 0.95, step=0.1),\n",
+    "        \"bagging_freq\": trial.suggest_categorical(\"bagging_freq\", [1]),\n",
+    "        \"feature_fraction\": trial.suggest_float(\"feature_fraction\", 0.2, 0.95, step=0.1),\n",
+    "        \"random_state\": 1,\n",
+    "        \"objective\": 'regression',\n",
+    "        'verbosity': -1\n",
+    "    }\n",
+    "    # 5折交叉验证\n",
+    "    cv = KFold(n_splits=5, shuffle=False)\n",
+    "\n",
+    "    cv_scores = np.empty(5)\n",
+    "    for idx, (train_idx, test_idx) in enumerate(cv.split(X, y)):\n",
+    "        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]\n",
+    "        y_train, y_test = y[train_idx], y[test_idx]\n",
+    "\n",
+    "        # LGBM建模\n",
+    "        model = lgb.LGBMRegressor(**param_grid, num_boost_round=num_boost_round)\n",
+    "        model.fit(\n",
+    "            X_train,\n",
+    "            y_train,\n",
+    "            eval_set=[(X_test, y_test)],\n",
+    "            eval_metric=\"l2\",\n",
+    "            callbacks=[\n",
+    "                # LightGBMPruningCallback(trial, \"l2\"),\n",
+    "                lgb.early_stopping(50, first_metric_only=True),\n",
+    "                lgb.log_evaluation(period=-1)\n",
+    "            ],\n",
+    "        )\n",
+    "        # 模型预测\n",
+    "        preds = model.predict(X_test)\n",
+    "        # 优化指标logloss最小\n",
+    "        cv_scores[idx] = mean_absolute_error(y_test, preds)\n",
+    "\n",
+    "    return np.mean(cv_scores)\n",
+    "\n",
+    "def generate_key(params, feature_columns, num_boost_round):\n",
+    "    key_data = {\n",
+    "        \"params\": params,\n",
+    "        \"feature_columns\": feature_columns,\n",
+    "        \"num_boost_round\": num_boost_round\n",
+    "    }\n",
+    "    # 转换成排序后的 JSON 字符串，再生成 md5 hash\n",
+    "    key_str = json.dumps(key_data, sort_keys=True)\n",
+    "    return hashlib.md5(key_str.encode('utf-8')).hexdigest()\n",
+    "\n",
+    "def train_light_model(df, params, feature_columns, callbacks, evals,\n",
+    "                      print_feature_importance=True, num_boost_round=100,\n",
+    "                      use_optuna=False):\n",
+    "    cache_file = 'light_model.pkl'\n",
+    "    cache_key = generate_key(params, feature_columns, num_boost_round)\n",
+    "\n",
+    "    # 检查缓存文件是否存在\n",
+    "    if os.path.exists(cache_file):\n",
+    "        try:\n",
+    "            with open(cache_file, 'rb') as f:\n",
+    "                cache_data = pickle.load(f)\n",
+    "            if cache_data.get('key') == cache_key:\n",
+    "                print(\"加载缓存模型...\")\n",
+    "                return cache_data.get('model')\n",
+    "            else:\n",
+    "                print(\"缓存模型的参数与当前参数不匹配，重新训练模型。\")\n",
+    "        except Exception as e:\n",
+    "            print(f\"加载缓存失败: {e}，重新训练模型。\")\n",
+    "    else:\n",
+    "        print(\"未发现缓存模型，开始训练新模型。\")\n",
+    "    # 确保数据按照 date 和 label 排序\n",
+    "    df_sorted = df.sort_values(by=['trade_date', 'label'], ascending=[True, False])  # 按日期升序、标签降序排序\n",
+    "    df_sorted = df_sorted.sort_values(by='trade_date')\n",
+    "    unique_dates = df_sorted['trade_date'].unique()\n",
+    "    val_date_count = int(len(unique_dates) * 0.1)\n",
+    "    val_dates = unique_dates[-val_date_count:]\n",
+    "    val_indices = df_sorted[df_sorted['trade_date'].isin(val_dates)].index\n",
+    "    train_indices = df_sorted[~df_sorted['trade_date'].isin(val_dates)].index\n",
+    "\n",
+    "    # 获取训练集和验证集的样本\n",
+    "    train_df = df_sorted.iloc[train_indices]\n",
+    "    val_df = df_sorted.iloc[val_indices]\n",
+    "\n",
+    "    X_train = train_df[feature_columns]\n",
+    "    y_train = train_df['label']\n",
+    "\n",
+    "    X_val = val_df[feature_columns]\n",
+    "    y_val = val_df['label']\n",
+    "\n",
+    "    train_data = lgb.Dataset(X_train, label=y_train)\n",
+    "    val_data = lgb.Dataset(X_val, label=y_val)\n",
+    "    if use_optuna:\n",
+    "        # study = optuna.create_study(direction='minimize' if classify else 'maximize')\n",
+    "        study = optuna.create_study(direction='minimize')\n",
+    "        study.optimize(lambda trial: objective(trial, X_train, y_train, num_boost_round, params), n_trials=20)\n",
+    "\n",
+    "        print(f\"Best parameters: {study.best_trial.params}\")\n",
+    "        print(f\"Best score: {study.best_trial.value}\")\n",
+    "\n",
+    "        params.update(study.best_trial.params)\n",
+    "    model = lgb.train(\n",
+    "        params, train_data, num_boost_round=num_boost_round,\n",
+    "        valid_sets=[train_data, val_data], valid_names=['train', 'valid'],\n",
+    "        callbacks=callbacks\n",
+    "    )\n",
+    "\n",
+    "    # 打印特征重要性（如果需要）\n",
+    "    if print_feature_importance:\n",
+    "        lgb.plot_metric(evals)\n",
+    "        lgb.plot_tree(model, figsize=(20, 8))\n",
+    "        lgb.plot_importance(model, importance_type='split', max_num_features=20)\n",
+    "        plt.show()\n",
+    "    # with open(cache_file, 'wb') as f:\n",
+    "    #     pickle.dump({'key': cache_key,\n",
+    "    #                  'model': model,\n",
+    "    #                  'feature_columns': feature_columns}, f)\n",
+    "    #     print(\"模型训练完成并已保存缓存。\")\n",
+    "    return model\n",
+    "\n",
+    "\n",
+    "from catboost import CatBoostRegressor\n",
+    "import pandas as pd\n",
+    "\n",
+    "\n",
+    "def train_catboost(df, num_boost_round, params=None):\n",
+    "    \"\"\"\n",
+    "    训练 CatBoost 排序模型\n",
+    "    - df: 包含因子、date、instrument 和 label 的 DataFrame\n",
+    "    - num_boost_round: 训练的轮数\n",
+    "    - print_feature_importance: 是否打印特征重要性\n",
+    "    - plot: 是否绘制特征重要性图\n",
+    "    - split_date: 用于划分训练集和验证集的日期（比如 '2020-01-01'）\n",
+    "\n",
+    "    返回训练好的模型\n",
+    "    \"\"\"\n",
+    "    df_sorted = df.sort_values(by=['date', 'label'], ascending=[True, False])\n",
+    "\n",
+    "    # 提取特征和标签\n",
+    "    feature_columns = [col for col in df.columns if col not in ['date',\n",
+    "                                                                'instrument',\n",
+    "                                                                'label']]\n",
+    "    feature_columns = [col for col in feature_columns if 'future' not in col]\n",
+    "    feature_columns = [col for col in feature_columns if 'score' not in col]\n",
+    "\n",
+    "    df_sorted = df_sorted.sort_values(by='date')\n",
+    "    unique_dates = df_sorted['date'].unique()\n",
+    "    val_date_count = int(len(unique_dates) * 0.1)\n",
+    "    val_dates = unique_dates[-val_date_count:]\n",
+    "    val_indices = df_sorted[df_sorted['date'].isin(val_dates)].index\n",
+    "    train_indices = df_sorted[~df_sorted['date'].isin(val_dates)].index\n",
+    "\n",
+    "    # 获取训练集和验证集的样本\n",
+    "    train_df = df_sorted.iloc[train_indices].sort_values(by=['date', 'label'], ascending=[True, False])\n",
+    "    val_df = df_sorted.iloc[val_indices].sort_values(by=['date', 'label'], ascending=[True, False])\n",
+    "\n",
+    "    X_train = train_df[feature_columns]\n",
+    "    y_train = train_df['label']\n",
+    "\n",
+    "    X_val = val_df[feature_columns]\n",
+    "    y_val = val_df['label']\n",
+    "\n",
+    "    model = CatBoostRegressor(**params, iterations=num_boost_round)\n",
+    "    model.fit(X_train,\n",
+    "              y_train,\n",
+    "              eval_set=(X_val, y_val))\n",
+    "\n",
+    "    return model"
+   ],
+   "id": "8f134d435f71e9e2",
+   "outputs": [],
+   "execution_count": 14
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-09T14:56:05.576927Z",
+     "start_time": "2025-02-09T14:56:05.480695Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "light_params = {\n",
+    "    'objective': 'regression',\n",
+    "    'metric': 'l2',\n",
+    "    'learning_rate': 0.05,\n",
+    "    'is_unbalance': True,\n",
+    "    'num_leaves': 2048,\n",
+    "    'min_data_in_leaf': 16,\n",
+    "    'max_depth': 32,\n",
+    "    'max_bin': 1024,\n",
+    "    'nthread': 2,\n",
+    "    'feature_fraction': 0.7,\n",
+    "    'bagging_fraction': 0.7,\n",
+    "    'bagging_freq': 5,\n",
+    "    'lambda_l1': 80,\n",
+    "    'lambda_l2': 65,\n",
+    "    'verbosity': -1\n",
+    "}"
+   ],
+   "id": "4a4542e1ed6afe7d",
+   "outputs": [],
+   "execution_count": 15
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-09T14:57:25.341222Z",
+     "start_time": "2025-02-09T14:56:05.640256Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "print('train data size: ', len(train_data))\n",
+    "df = train_data\n",
+    "\n",
+    "evals = {}\n",
+    "light_model = train_light_model(train_data, light_params, feature_columns,\n",
+    "                                 [lgb.log_evaluation(period=500),\n",
+    "                                  lgb.callback.record_evaluation(evals),\n",
+    "                                  lgb.early_stopping(50, first_metric_only=True)\n",
+    "                                  ], evals,\n",
+    "                                 num_boost_round=1000, use_optuna=False,\n",
+    "                                 print_feature_importance=False)"
+   ],
+   "id": "beeb098799ecfa6a",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "train data size:  875004\n",
+      "未发现缓存模型，开始训练新模型。\n",
+      "Training until validation scores don't improve for 50 rounds\n",
+      "Early stopping, best iteration is:\n",
+      "[378]\ttrain's l2: 0.435049\tvalid's l2: 0.589178\n",
+      "Evaluated only: l2\n"
+     ]
+    }
+   ],
+   "execution_count": 16
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-09T14:57:27.394697Z",
+     "start_time": "2025-02-09T14:57:25.373274Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "test_data['score'] = light_model.predict(test_data[feature_columns])\n",
+    "predictions = test_data.loc[test_data.groupby('trade_date')['score'].idxmax()]"
+   ],
+   "id": "5bb96ca8492e74d",
+   "outputs": [],
+   "execution_count": 17
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-09T14:57:27.489570Z",
+     "start_time": "2025-02-09T14:57:27.397368Z"
+    }
+   },
+   "cell_type": "code",
+   "source": "predictions[['trade_date', 'score', 'ts_code']].to_csv('predictions.csv', index=False)",
+   "id": "5d1522a7538db91b",
+   "outputs": [],
+   "execution_count": 18
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/code/train/best_model.pth
+++ b/code/train/best_model.pth
--- a/code/train/catboost_info/catboost_training.json
+++ b/code/train/catboost_info/catboost_training.json
--- a/code/train/catboost_info/learn/events.out.tfevents
+++ b/code/train/catboost_info/learn/events.out.tfevents
--- a/code/train/catboost_info/learn_error.tsv
+++ b/code/train/catboost_info/learn_error.tsv
--- a/code/train/catboost_info/test/events.out.tfevents
+++ b/code/train/catboost_info/test/events.out.tfevents
--- a/code/train/catboost_info/test1/events.out.tfevents
+++ b/code/train/catboost_info/test1/events.out.tfevents
--- a/code/train/catboost_info/test_error.tsv
+++ b/code/train/catboost_info/test_error.tsv
--- a/code/train/catboost_info/time_left.tsv
+++ b/code/train/catboost_info/time_left.tsv
--- a/code/train/catboost_info/tmp/cat_feature_index.7083db21-2a602535-fc74a793-f24f9ca1.tmp
+++ b/code/train/catboost_info/tmp/cat_feature_index.7083db21-2a602535-fc74a793-f24f9ca1.tmp
--- a/code/train/catboost_info/tmp/cat_feature_index.c8182d04-ba2f7d40-17be2993-7ebf9fac.tmp
+++ b/code/train/catboost_info/tmp/cat_feature_index.c8182d04-ba2f7d40-17be2993-7ebf9fac.tmp
--- a/code/train/catboost_info/tmp/cat_feature_index.d44701fd-1e864ae1-30a8fb3f-c689cc7f.tmp
+++ b/code/train/catboost_info/tmp/cat_feature_index.d44701fd-1e864ae1-30a8fb3f-c689cc7f.tmp
--- a/code/train/catboost_info/tmp/cat_feature_index.ea11afac-3656bfa6-7640c1ea-9df76981.tmp
+++ b/code/train/catboost_info/tmp/cat_feature_index.ea11afac-3656bfa6-7640c1ea-9df76981.tmp
--- a/code/train/code.ipynb
+++ b/code/train/code.ipynb
@@ -0,0 +1,384 @@
+{
+ "cells": [
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "\n",
+    "\n",
+    "def get_technical_factor(df):\n",
+    "    # 按股票和日期排序\n",
+    "    df = df.sort_values(by=['ts_code', 'trade_date'])\n",
+    "    grouped = df.groupby('ts_code', group_keys=False)\n",
+    "\n",
+    "    df['return_skew'] = grouped['pct_chg'].rolling(window=5).skew().reset_index(0, drop=True)\n",
+    "    df['return_kurtosis'] = grouped['pct_chg'].rolling(window=5).kurt().reset_index(0, drop=True)\n",
+    "\n",
+    "    # 因子 1：短期成交量变化率\n",
+    "    df['volume_change_rate'] = (\n",
+    "            grouped['vol'].rolling(window=2).mean() /\n",
+    "            grouped['vol'].rolling(window=5).mean() - 1\n",
+    "    ).reset_index(level=0, drop=True)  # 确保索引对齐\n",
+    "\n",
+    "    # 因子 2：成交量突破信号\n",
+    "    max_volume = grouped['vol'].rolling(window=5).max().reset_index(level=0, drop=True)  # 确保索引对齐\n",
+    "    df['cat_volume_breakout'] = (df['vol'] > max_volume)\n",
+    "\n",
+    "    # 因子 3：换手率均线偏离度\n",
+    "    mean_turnover = grouped['turnover_rate'].rolling(window=3).mean().reset_index(level=0, drop=True)\n",
+    "    std_turnover = grouped['turnover_rate'].rolling(window=3).std().reset_index(level=0, drop=True)\n",
+    "    df['turnover_deviation'] = (df['turnover_rate'] - mean_turnover) / std_turnover\n",
+    "\n",
+    "    # 因子 4：换手率激增信号\n",
+    "    df['cat_turnover_spike'] = (df['turnover_rate'] > mean_turnover + 2 * std_turnover)\n",
+    "\n",
+    "    # 因子 5：量比均值\n",
+    "    df['avg_volume_ratio'] = grouped['volume_ratio'].rolling(window=3).mean().reset_index(level=0, drop=True)\n",
+    "\n",
+    "    # 因子 6：量比突破信号\n",
+    "    max_volume_ratio = grouped['volume_ratio'].rolling(window=5).max().reset_index(level=0, drop=True)\n",
+    "    df['cat_volume_ratio_breakout'] = (df['volume_ratio'] > max_volume_ratio)\n",
+    "\n",
+    "    # 因子 7：成交量与换手率的综合动量因子\n",
+    "    alpha = 0.5\n",
+    "    df['momentum_factor'] = df['volume_change_rate'] + alpha * df['turnover_deviation']\n",
+    "\n",
+    "    # 因子 8：量价共振因子\n",
+    "    df['price_change_rate'] = grouped['close'].pct_change()\n",
+    "    df['resonance_factor'] = df['volume_ratio'] * df['price_change_rate']\n",
+    "\n",
+    "    # 计算 up 和 down\n",
+    "    df['log_close'] = np.log(df['close'])\n",
+    "\n",
+    "    df['vol_spike'] = grouped.apply(\n",
+    "        lambda x: pd.Series(x['vol'].rolling(20).mean(), index=x.index)\n",
+    "    )\n",
+    "    df['cat_vol_spike'] = df['vol'] > 2 * df['vol_spike']\n",
+    "    df['vol_std_5'] = df['vol'].pct_change().rolling(5).std()\n",
+    "\n",
+    "    df['up'] = (df['high'] - df[['close', 'open']].max(axis=1)) / df['close']\n",
+    "    df['down'] = (df[['close', 'open']].min(axis=1) - df['low']) / df['close']\n",
+    "\n",
+    "    # 计算 ATR\n",
+    "    df['atr_14'] = grouped.apply(\n",
+    "        lambda x: pd.Series(talib.ATR(x['high'].values, x['low'].values, x['close'].values, timeperiod=14),\n",
+    "                            index=x.index)\n",
+    "    )\n",
+    "    df['atr_6'] = grouped.apply(\n",
+    "        lambda x: pd.Series(talib.ATR(x['high'].values, x['low'].values, x['close'].values, timeperiod=6),\n",
+    "                            index=x.index)\n",
+    "    )\n",
+    "\n",
+    "    # 计算 OBV 及其均线\n",
+    "    df['obv'] = grouped.apply(\n",
+    "        lambda x: pd.Series(talib.OBV(x['close'].values, x['vol'].values), index=x.index)\n",
+    "    )\n",
+    "    df['maobv_6'] = grouped.apply(\n",
+    "        lambda x: pd.Series(talib.SMA(x['obv'].values, timeperiod=6), index=x.index)\n",
+    "    )\n",
+    "    df['obv-maobv_6'] = df['obv'] - df['maobv_6']\n",
+    "\n",
+    "    # 计算 RSI\n",
+    "    df['rsi_3'] = grouped.apply(\n",
+    "        lambda x: pd.Series(talib.RSI(x['close'].values, timeperiod=3), index=x.index)\n",
+    "    )\n",
+    "    df['rsi_6'] = grouped.apply(\n",
+    "        lambda x: pd.Series(talib.RSI(x['close'].values, timeperiod=6), index=x.index)\n",
+    "    )\n",
+    "    df['rsi_9'] = grouped.apply(\n",
+    "        lambda x: pd.Series(talib.RSI(x['close'].values, timeperiod=9), index=x.index)\n",
+    "    )\n",
+    "\n",
+    "    # 计算 return_10 和 return_20\n",
+    "    df['return_5'] = grouped['close'].apply(lambda x: x / x.shift(5) - 1)\n",
+    "    df['return_10'] = grouped['close'].apply(lambda x: x / x.shift(10) - 1)\n",
+    "    df['return_20'] = grouped['close'].apply(lambda x: x / x.shift(20) - 1)\n",
+    "\n",
+    "    # df['avg_close_5'] = grouped['close'].apply(lambda x: x.rolling(window=5).mean() / x)\n",
+    "\n",
+    "    # 计算标准差指标\n",
+    "    df['std_return_5'] = grouped['close'].apply(lambda x: x.pct_change().rolling(window=5).std())\n",
+    "    df['std_return_15'] = grouped['close'].apply(lambda x: x.pct_change().rolling(window=15).std())\n",
+    "    df['std_return_25'] = grouped['close'].apply(lambda x: x.pct_change().rolling(window=25).std())\n",
+    "    df['std_return_90'] = grouped['close'].apply(lambda x: x.pct_change().rolling(window=90).std())\n",
+    "    df['std_return_90_2'] = grouped['close'].apply(lambda x: x.shift(10).pct_change().rolling(window=90).std())\n",
+    "\n",
+    "    # 计算比值指标\n",
+    "    df['std_return_5 / std_return_90'] = df['std_return_5'] / df['std_return_90']\n",
+    "    df['std_return_5 / std_return_25'] = df['std_return_5'] / df['std_return_25']\n",
+    "\n",
+    "    # 计算标准差差值\n",
+    "    df['std_return_90 - std_return_90_2'] = df['std_return_90'] - df['std_return_90_2']\n",
+    "\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "def get_act_factor(df, cat=True):\n",
+    "    # 按股票和日期排序\n",
+    "    df = df.sort_values(by=['ts_code', 'trade_date'])\n",
+    "    grouped = df.groupby('ts_code', group_keys=False)\n",
+    "    # 计算 EMA 指标\n",
+    "    df['_ema_5'] = grouped['close'].apply(\n",
+    "        lambda x: pd.Series(talib.EMA(x.values, timeperiod=5), index=x.index)\n",
+    "    )\n",
+    "    df['_ema_13'] = grouped['close'].apply(\n",
+    "        lambda x: pd.Series(talib.EMA(x.values, timeperiod=13), index=x.index)\n",
+    "    )\n",
+    "    df['_ema_20'] = grouped['close'].apply(\n",
+    "        lambda x: pd.Series(talib.EMA(x.values, timeperiod=20), index=x.index)\n",
+    "    )\n",
+    "    df['_ema_60'] = grouped['close'].apply(\n",
+    "        lambda x: pd.Series(talib.EMA(x.values, timeperiod=60), index=x.index)\n",
+    "    )\n",
+    "\n",
+    "    # 计算 act_factor1, act_factor2, act_factor3, act_factor4\n",
+    "    df['act_factor1'] = grouped['_ema_5'].apply(\n",
+    "        lambda x: np.arctan((x / x.shift(1) - 1) * 100) * 57.3 / 50\n",
+    "    )\n",
+    "    df['act_factor2'] = grouped['_ema_13'].apply(\n",
+    "        lambda x: np.arctan((x / x.shift(1) - 1) * 100) * 57.3 / 40\n",
+    "    )\n",
+    "    df['act_factor3'] = grouped['_ema_20'].apply(\n",
+    "        lambda x: np.arctan((x / x.shift(1) - 1) * 100) * 57.3 / 21\n",
+    "    )\n",
+    "    df['act_factor4'] = grouped['_ema_60'].apply(\n",
+    "        lambda x: np.arctan((x / x.shift(1) - 1) * 100) * 57.3 / 10\n",
+    "    )\n",
+    "\n",
+    "    if cat:\n",
+    "        df['cat_af1'] = df['act_factor1'] > 0\n",
+    "        df['cat_af2'] = df['act_factor2'] > df['act_factor1']\n",
+    "        df['cat_af3'] = df['act_factor3'] > df['act_factor2']\n",
+    "        df['cat_af4'] = df['act_factor4'] > df['act_factor3']\n",
+    "\n",
+    "    # 计算 act_factor5 和 act_factor6\n",
+    "    df['act_factor5'] = df['act_factor1'] + df['act_factor2'] + df['act_factor3'] + df['act_factor4']\n",
+    "    df['act_factor6'] = (df['act_factor1'] - df['act_factor2']) / np.sqrt(\n",
+    "        df['act_factor1'] ** 2 + df['act_factor2'] ** 2)\n",
+    "\n",
+    "    # 根据 trade_date 截面计算排名\n",
+    "    df['rank_act_factor1'] = df.groupby('trade_date', group_keys=False)['act_factor1'].rank(ascending=False, pct=True)\n",
+    "    df['rank_act_factor2'] = df.groupby('trade_date', group_keys=False)['act_factor2'].rank(ascending=False, pct=True)\n",
+    "    df['rank_act_factor3'] = df.groupby('trade_date', group_keys=False)['act_factor3'].rank(ascending=False, pct=True)\n",
+    "\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "def get_money_flow_factor(df):\n",
+    "    # 计算资金流相关因子（字段名称见 tushare 数据说明）\n",
+    "    df['active_buy_volume_large'] = df['buy_lg_vol'] / df['net_mf_vol']\n",
+    "    df['active_buy_volume_big'] = df['buy_elg_vol'] / df['net_mf_vol']\n",
+    "    df['active_buy_volume_small'] = df['buy_sm_vol'] / df['net_mf_vol']\n",
+    "\n",
+    "    df['buy_lg_vol_minus_sell_lg_vol'] = (df['buy_lg_vol'] - df['sell_lg_vol']) / df['net_mf_vol']\n",
+    "    df['buy_elg_vol_minus_sell_elg_vol'] = (df['buy_elg_vol'] - df['sell_elg_vol']) / df['net_mf_vol']\n",
+    "\n",
+    "    df['log(circ_mv)'] = np.log(df['circ_mv'])\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "def get_alpha_factor(df):\n",
+    "    df = df.sort_values(by=['ts_code', 'trade_date'])\n",
+    "    grouped = df.groupby('ts_code')\n",
+    "\n",
+    "    # alpha_022: 当前 close 与 5 日前 close 差值\n",
+    "    df['alpha_022'] = grouped['close'].transform(lambda x: x - x.shift(5))\n",
+    "\n",
+    "    # alpha_003: (close - open) / (high - low)\n",
+    "    df['alpha_003'] = np.where(df['high'] != df['low'],\n",
+    "                               (df['close'] - df['open']) / (df['high'] - df['low']),\n",
+    "                               0)\n",
+    "\n",
+    "    # alpha_007: 计算过去5日 close 与 vol 的相关性，并按 trade_date 排名\n",
+    "    df['alpha_007'] = grouped.apply(lambda x: x['close'].rolling(5).corr(x['vol'])).reset_index(level=0, drop=True)\n",
+    "    df['alpha_007'] = df.groupby('trade_date', group_keys=False)['alpha_007'].rank(ascending=True, pct=True)\n",
+    "\n",
+    "    # alpha_013: 计算过去5日 close 之和 - 20日 close 之和，并按 trade_date 排名\n",
+    "    df['alpha_013'] = grouped['close'].transform(lambda x: x.rolling(5).sum() - x.rolling(20).sum())\n",
+    "    df['alpha_013'] = df.groupby('trade_date', group_keys=False)['alpha_013'].rank(ascending=True, pct=True)\n",
+    "\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "def get_limit_factor(df):\n",
+    "    # 按股票和日期排序\n",
+    "    df = df.sort_values(by=['ts_code', 'trade_date'])\n",
+    "\n",
+    "    # 分组处理\n",
+    "    grouped = df.groupby('ts_code', group_keys=False)\n",
+    "\n",
+    "    # 1. 今日是否涨停/跌停\n",
+    "    df['cat_up_limit'] = (df['close'] == df['up_limit']).astype(int)  # 是否涨停（1表示涨停，0表示未涨停）\n",
+    "    df['cat_down_limit'] = (df['close'] == df['down_limit']).astype(int)  # 是否跌停（1表示跌停，0表示未跌停）\n",
+    "\n",
+    "    # 2. 最近涨跌停次数（过去20个交易日）\n",
+    "    df['up_limit_count_10d'] = grouped['cat_up_limit'].rolling(window=10, min_periods=1).sum().reset_index(level=0,\n",
+    "                                                                                                           drop=True)\n",
+    "    df['down_limit_count_10d'] = grouped['cat_down_limit'].rolling(window=10, min_periods=1).sum().reset_index(level=0,\n",
+    "                                                                                                               drop=True)\n",
+    "\n",
+    "    # 3. 最近连续涨跌停天数\n",
+    "    def calculate_consecutive_limits(series):\n",
+    "        \"\"\"\n",
+    "        计算连续涨停/跌停天数。\n",
+    "        \"\"\"\n",
+    "        consecutive_up = series * (series.groupby((series != series.shift()).cumsum()).cumcount() + 1)\n",
+    "        consecutive_down = series * (series.groupby((series != series.shift()).cumsum()).cumcount() + 1)\n",
+    "        return consecutive_up, consecutive_down\n",
+    "\n",
+    "    # 连续涨停天数\n",
+    "    df['consecutive_up_limit'] = grouped['cat_up_limit'].apply(\n",
+    "        lambda x: calculate_consecutive_limits(x)[0]\n",
+    "    ).reset_index(level=0, drop=True)\n",
+    "\n",
+    "    # 连续跌停天数\n",
+    "    # df['consecutive_down_limit'] = grouped['cat_down_limit'].apply(\n",
+    "    #     lambda x: calculate_consecutive_limits(x)[1]\n",
+    "    # ).reset_index(level=0, drop=True)\n",
+    "\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "def get_cyp_perf_factor(df):\n",
+    "    # 预处理：按股票代码和时间排序\n",
+    "    df = df.sort_values(by=['ts_code', 'trade_date'])\n",
+    "\n",
+    "    # 按股票代码分组处理\n",
+    "    grouped = df.groupby('ts_code', group_keys=False)\n",
+    "\n",
+    "    df['ctrl_strength'] = (df['cost_85pct'] - df['cost_15pct']) / (df['his_high'] - df['his_low'])\n",
+    "\n",
+    "    df['low_cost_dev'] = (df['close'] - df['cost_5pct']) / (df['cost_50pct'] - df['cost_5pct'])\n",
+    "\n",
+    "    df['asymmetry'] = (df['cost_95pct'] - df['cost_50pct']) / (df['cost_50pct'] - df['cost_5pct'])\n",
+    "\n",
+    "    df['lock_factor'] = df['turnover_rate'] * (\n",
+    "            1 - (df['cost_95pct'] - df['cost_5pct']) / (df['his_high'] - df['his_low']))\n",
+    "\n",
+    "    df['vol_break'] = np.where((df['close'] > df['cost_85pct']) & (df['volume_ratio'] > 2), 1, 0)\n",
+    "\n",
+    "    df['weight_roc5'] = grouped['weight_avg'].apply(lambda x: x.pct_change(5))\n",
+    "\n",
+    "    def rolling_corr(group):\n",
+    "        roc_close = group['close'].pct_change()\n",
+    "        roc_weight = group['weight_avg'].pct_change()\n",
+    "        return roc_close.rolling(10).corr(roc_weight)\n",
+    "\n",
+    "    df['price_cost_divergence'] = grouped.apply(rolling_corr)\n",
+    "\n",
+    "    def calc_atr(group):\n",
+    "        high, low, close = group['high'], group['low'], group['close']\n",
+    "        tr = np.maximum(high - low,\n",
+    "                        np.maximum(abs(high - close.shift()),\n",
+    "                                   abs(low - close.shift())))\n",
+    "        return tr.rolling(14).mean()\n",
+    "\n",
+    "    df['atr_14'] = grouped.apply(calc_atr)\n",
+    "    df['cost_atr_adj'] = (df['cost_95pct'] - df['cost_5pct']) / df['atr_14']\n",
+    "\n",
+    "    # 12. 小盘股筹码集中度\n",
+    "    df['smallcap_concentration'] = (1 / df['circ_mv']) * (df['cost_85pct'] - df['cost_15pct'])\n",
+    "\n",
+    "    # 16. 筹码稳定性指数 (20日波动率)\n",
+    "    df['weight_std20'] = grouped['weight_avg'].apply(lambda x: x.rolling(20).std())\n",
+    "    df['cost_stability'] = df['weight_std20'] / grouped['weight_avg'].transform(lambda x: x.rolling(20).mean())\n",
+    "\n",
+    "    # 17. 成本区间突破标记\n",
+    "    df['high_cost_break_days'] = grouped.apply(lambda g: g['close'].gt(g['cost_95pct']).rolling(5).sum())\n",
+    "\n",
+    "    # 18. 黄金筹码共振 (复合事件)\n",
+    "    df['cat_golden_resonance'] = ((df['close'] > df['weight_avg']) &\n",
+    "                                  (df['volume_ratio'] > 1.5) &\n",
+    "                                  (df['winner_rate'] > 0.7))\n",
+    "\n",
+    "    # 20. 筹码-流动性风险\n",
+    "    df['liquidity_risk'] = (df['cost_95pct'] - df['cost_5pct']) * (\n",
+    "            1 / grouped['vol'].transform(lambda x: x.rolling(10).mean()))\n",
+    "\n",
+    "    df.drop(columns=['weight_std20'], inplace=True, errors='ignore')\n",
+    "\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "def get_mv_factors(df):\n",
+    "    \"\"\"\n",
+    "    计算多个因子并生成最终的综合因子。\n",
+    "\n",
+    "    参数:\n",
+    "        df (pd.DataFrame): 包含 ts_code, trade_date, turnover_rate, pe_ttm, pb, ps, circ_mv, volume_ratio, vol 等列的数据框。\n",
+    "\n",
+    "    返回:\n",
+    "        pd.DataFrame: 包含新增因子和最终综合因子的数据框。\n",
+    "    \"\"\"\n",
+    "    # 按 ts_code 和 trade_date 排序\n",
+    "    df = df.sort_values(by=['ts_code', 'trade_date'])\n",
+    "\n",
+    "    # 按 ts_code 分组\n",
+    "    grouped = df.groupby('ts_code', group_keys=False)\n",
+    "\n",
+    "    # 1. 市值流动比因子\n",
+    "    df['mv_turnover_ratio'] = df['turnover_rate'] / df['circ_mv']\n",
+    "\n",
+    "    # 2. 市值调整成交量因子\n",
+    "    df['mv_adjusted_volume'] = df['vol'] / df['circ_mv']\n",
+    "\n",
+    "    # 3. 市值加权换手率因子\n",
+    "    df['mv_weighted_turnover'] = df['turnover_rate'] * (1 / df['circ_mv'])\n",
+    "\n",
+    "    # 4. 非线性市值成交量因子\n",
+    "    df['nonlinear_mv_volume'] = df['vol'] / df['circ_mv']\n",
+    "\n",
+    "    # 5. 市值量比因子\n",
+    "    df['mv_volume_ratio'] = df['volume_ratio'] / df['circ_mv']\n",
+    "\n",
+    "    # 6. 市值动量因子\n",
+    "    df['mv_momentum'] = df['turnover_rate'] * df['volume_ratio'] / df['circ_mv']\n",
+    "\n",
+    "    # 7. 市值波动率因子\n",
+    "    df['turnover_std'] = grouped['turnover_rate'].rolling(window=20).std().reset_index(level=0, drop=True)\n",
+    "    df['mv_volatility'] = grouped.apply(lambda x: x['turnover_std'] / x['circ_mv']).reset_index(level=0, drop=True)\n",
+    "\n",
+    "    # 8. 市值成长性因子\n",
+    "    df['volume_growth'] = grouped['vol'].pct_change(periods=20).reset_index(level=0, drop=True)\n",
+    "    df['mv_growth'] = grouped.apply(lambda x: x['volume_growth'] / x['circ_mv']).reset_index(level=0, drop=True)\n",
+    "\n",
+    "    # # 标准化因子\n",
+    "    # factor_columns = [\n",
+    "    #     'mv_turnover_ratio', 'mv_adjusted_volume', 'mv_weighted_turnover',\n",
+    "    #     'nonlinear_mv_volume', 'mv_volume_ratio', 'mv_momentum',\n",
+    "    #     'mv_volatility', 'mv_growth'\n",
+    "    # ]\n",
+    "    # scaler = StandardScaler()\n",
+    "    # df[factor_columns] = scaler.fit_transform(df[factor_columns])\n",
+    "    #\n",
+    "    # # 加权合成因子\n",
+    "    # weights = [0.2, 0.15, 0.15, 0.1, 0.1, 0.1, 0.1, 0.1]  # 各因子权重\n",
+    "    # df['final_combined_factor'] = df[factor_columns].dot(weights)\n",
+    "\n",
+    "    return df"
+   ],
+   "id": "505e825945e4b8cf"
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/code/train/predictions.tsv
+++ b/code/train/predictions.tsv
--- a/code/train/predictions_test.tsv
+++ b/code/train/predictions_test.tsv
@@ -0,0 +1,262 @@
+trade_date,score,ts_code
+2024-03-01,0.48944956369028353,600515.SH
+2024-03-04,0.4910973129007265,601377.SH
+2024-03-05,0.49755441935533035,601377.SH
+2024-03-06,0.48530227310531543,000950.SZ
+2024-03-07,0.4852382924633875,002252.SZ
+2024-03-08,0.4295690224121115,000650.SZ
+2024-03-11,0.4772666567296332,002539.SZ
+2024-03-12,0.44912255948878027,002030.SZ
+2024-03-13,0.48036470172022555,603366.SH
+2024-03-14,0.47455902892485136,002616.SZ
+2024-03-15,0.4898609892396381,002616.SZ
+2024-03-18,0.4898609892396381,002616.SZ
+2024-03-19,0.4585803401938102,600749.SH
+2024-03-20,0.4717940942781549,601616.SH
+2024-03-21,0.4898609892396381,002616.SZ
+2024-03-22,0.4921881826907514,600268.SH
+2024-03-25,0.4529948903273269,000910.SZ
+2024-03-26,0.468535149074664,600163.SH
+2024-03-27,0.45345883846621166,600268.SH
+2024-03-28,0.4629678911934166,603689.SH
+2024-03-29,0.4576699667200496,600479.SH
+2024-04-01,0.4880682627555449,000407.SZ
+2024-04-02,0.4658882567202438,002228.SZ
+2024-04-03,0.46661866733947643,600279.SH
+2024-04-08,0.4348058590086616,601199.SH
+2024-04-09,0.440404827771838,600279.SH
+2024-04-10,0.45436095343545857,600279.SH
+2024-04-11,0.4860610115557819,002807.SZ
+2024-04-12,0.49251811801038575,002807.SZ
+2024-04-15,0.45282134310413785,002267.SZ
+2024-04-16,0.44573083040693495,601200.SH
+2024-04-17,0.4755082230960848,600681.SH
+2024-04-18,0.46969708080904654,600681.SH
+2024-04-19,0.4763960575203315,601963.SH
+2024-04-22,0.4797611675632624,601006.SH
+2024-04-23,0.4417060612494253,600905.SH
+2024-04-24,0.467078149060861,600681.SH
+2024-04-25,0.48593234463506524,601016.SH
+2024-04-26,0.485704669384595,600248.SH
+2024-04-29,0.4856232616952012,000088.SZ
+2024-04-30,0.47658127432737996,600717.SH
+2024-05-06,0.485599247202529,600057.SH
+2024-05-07,0.48502517567793635,600928.SH
+2024-05-08,0.4875800938455082,601997.SH
+2024-05-09,0.4764814479757618,601990.SH
+2024-05-10,0.4767299756060024,601187.SH
+2024-05-13,0.48339009051626075,002252.SZ
+2024-05-14,0.49804720312809037,601236.SH
+2024-05-15,0.4754440788180303,601669.SH
+2024-05-16,0.47188100886784723,002252.SZ
+2024-05-17,0.4738812084576224,000505.SZ
+2024-05-20,0.47680745811007763,600300.SH
+2024-05-21,0.4914822821325402,000919.SZ
+2024-05-22,0.43568399742751623,603777.SH
+2024-05-23,0.48230237417866073,000919.SZ
+2024-05-24,0.45217189085141946,600846.SH
+2024-05-27,0.4822386303700534,000012.SZ
+2024-05-28,0.4911043379651643,000012.SZ
+2024-05-29,0.4593967921412618,000012.SZ
+2024-05-30,0.46327623228814613,600033.SH
+2024-05-31,0.48434217554317305,601006.SH
+2024-06-03,0.49496311907072943,601158.SH
+2024-06-04,0.49311491712360267,601158.SH
+2024-06-05,0.4662101696226407,600681.SH
+2024-06-06,0.44598437609141306,600507.SH
+2024-06-07,0.46683069546117434,002936.SZ
+2024-06-11,0.48392216170099567,002936.SZ
+2024-06-12,0.48392216170099567,002936.SZ
+2024-06-13,0.4856066420239958,601555.SH
+2024-06-14,0.4626378515738043,600273.SH
+2024-06-17,0.4930611701154713,601868.SH
+2024-06-18,0.4850760483461876,600909.SH
+2024-06-19,0.48789755402053436,600909.SH
+2024-06-20,0.47168404865503394,600918.SH
+2024-06-21,0.4780215106901423,002939.SZ
+2024-06-24,0.4670168740500108,600507.SH
+2024-06-25,0.4462181161419812,600507.SH
+2024-06-26,0.47871585166988867,601108.SH
+2024-06-27,0.4550530100208326,601108.SH
+2024-06-28,0.4682668022638408,600056.SH
+2024-07-01,0.4956846242134755,601555.SH
+2024-07-02,0.4855061855012652,601375.SH
+2024-07-03,0.48208040944932096,000166.SZ
+2024-07-04,0.4741422882624783,002763.SZ
+2024-07-05,0.4207747223564723,600279.SH
+2024-07-08,0.4505916830616898,000778.SZ
+2024-07-09,0.47552387696457243,600108.SH
+2024-07-10,0.48433484852590997,601990.SH
+2024-07-11,0.4670143394143526,600597.SH
+2024-07-12,0.48598259773635294,601228.SH
+2024-07-15,0.46747598383004435,600704.SH
+2024-07-16,0.46517324677556354,601228.SH
+2024-07-17,0.4669329317249588,600681.SH
+2024-07-18,0.4342287048693295,603886.SH
+2024-07-19,0.4688897638752658,002936.SZ
+2024-07-22,0.4655336697859469,601008.SH
+2024-07-23,0.4385299287889017,000959.SZ
+2024-07-24,0.40980850428167526,600059.SH
+2024-07-25,0.4354808457559223,600928.SH
+2024-07-26,0.43863748866816843,002936.SZ
+2024-07-29,0.4757869671100767,002936.SZ
+2024-07-30,0.45055950278777573,600056.SH
+2024-07-31,0.43441391812299834,000672.SZ
+2024-08-01,0.46163952351711074,600597.SH
+2024-08-02,0.45912773679288216,601528.SH
+2024-08-05,0.42637336325271274,000012.SZ
+2024-08-06,0.44417354831155165,000498.SZ
+2024-08-07,0.46761174293945373,002029.SZ
+2024-08-08,0.44084265961702834,600061.SH
+2024-08-09,0.44756951600219536,600061.SH
+2024-08-12,0.43865468410238356,600517.SH
+2024-08-13,0.40119132713124916,600798.SH
+2024-08-14,0.4147687048594177,600279.SH
+2024-08-15,0.4285641100025751,600283.SH
+2024-08-16,0.43481178678490995,600279.SH
+2024-08-19,0.4334514206803078,000055.SZ
+2024-08-20,0.4271544196243579,600925.SH
+2024-08-21,0.44140030057394675,601718.SH
+2024-08-22,0.4260043286627239,002266.SZ
+2024-08-23,0.4272469992425721,002266.SZ
+2024-08-26,0.43298605818458036,601718.SH
+2024-08-27,0.42993335252901094,600308.SH
+2024-08-28,0.44014909636073596,601216.SH
+2024-08-29,0.4549086267823673,002108.SZ
+2024-08-30,0.4432950051114155,603817.SH
+2024-09-02,0.44924217940667366,603759.SH
+2024-09-03,0.45929491873592476,603759.SH
+2024-09-04,0.4555291720504659,603817.SH
+2024-09-05,0.45486187663776934,000581.SZ
+2024-09-06,0.458767486527876,600016.SH
+2024-09-09,0.42622859802922114,000725.SZ
+2024-09-10,0.47344109719180894,002239.SZ
+2024-09-11,0.4602775423090333,600050.SH
+2024-09-12,0.4581305095531178,603856.SH
+2024-09-13,0.48196532955068866,002697.SZ
+2024-09-18,0.48196532955068866,002697.SZ
+2024-09-19,0.4448663482243619,600210.SH
+2024-09-20,0.4894480847057984,600293.SH
+2024-09-23,0.48392216170099567,002239.SZ
+2024-09-24,0.43493260807615913,603182.SH
+2024-09-25,0.4279590881784511,601369.SH
+2024-09-26,0.457735727285402,600526.SH
+2024-09-27,0.464687096497251,002818.SZ
+2024-09-30,0.21397500413643297,600081.SH
+2024-10-08,0.22763716829204592,605333.SH
+2024-10-09,0.44498910797127905,600495.SH
+2024-10-10,0.4635725472634731,600251.SH
+2024-10-11,0.46083590103602623,000967.SZ
+2024-10-14,0.47310129519500743,600251.SH
+2024-10-15,0.4432757845922387,601727.SH
+2024-10-16,0.49142274407028197,002267.SZ
+2024-10-17,0.4969633584025033,600032.SH
+2024-10-18,0.49050625194789943,002267.SZ
+2024-10-21,0.4839204785725789,000709.SZ
+2024-10-22,0.47843667953797847,002237.SZ
+2024-10-23,0.4785259848937853,601577.SH
+2024-10-24,0.48592022305116356,601577.SH
+2024-10-25,0.4799806860019888,600820.SH
+2024-10-28,0.4616976069244002,002135.SZ
+2024-10-29,0.48907368099203313,600330.SH
+2024-10-30,0.47786058189302977,600516.SH
+2024-10-31,0.46572447890972274,600969.SH
+2024-11-01,0.46269570933167936,600261.SH
+2024-11-04,0.4819410916205754,600249.SH
+2024-11-05,0.4866489103957071,603167.SH
+2024-11-06,0.485704669384595,600423.SH
+2024-11-07,0.4906974157098039,600249.SH
+2024-11-08,0.47099968069944914,002328.SZ
+2024-11-11,0.47924756292999116,600103.SH
+2024-11-12,0.4864515748363791,600103.SH
+2024-11-13,0.4682636704183581,600419.SH
+2024-11-14,0.47314641913653194,000533.SZ
+2024-11-15,0.48200633444426155,600103.SH
+2024-11-18,0.4734312305146418,600300.SH
+2024-11-19,0.481591587838489,601113.SH
+2024-11-20,0.485704669384595,600493.SH
+2024-11-21,0.48103812387602335,600515.SH
+2024-11-22,0.48907368099203313,600284.SH
+2024-11-25,0.49351892138415066,603111.SH
+2024-11-26,0.49311491712360267,601187.SH
+2024-11-27,0.49823564043257407,601187.SH
+2024-11-28,0.49478522113574214,002091.SZ
+2024-11-29,0.4744243783704977,002390.SZ
+2024-12-02,0.47703304354620096,002390.SZ
+2024-12-03,0.48674050526244056,002566.SZ
+2024-12-04,0.4959275225246577,002753.SZ
+2024-12-05,0.48839819807517926,002753.SZ
+2024-12-06,0.4870618149295468,603128.SH
+2024-12-09,0.49249410351771356,600425.SH
+2024-12-10,0.4959275225246577,002772.SZ
+2024-12-11,0.48502517567793635,600035.SH
+2024-12-12,0.48907368099203313,600035.SH
+2024-12-13,0.4901095168698787,600035.SH
+2024-12-16,0.485704669384595,603577.SH
+2024-12-17,0.42271811250681296,000533.SZ
+2024-12-18,0.45253512664134277,000026.SZ
+2024-12-19,0.4668403018194925,601000.SH
+2024-12-20,0.47643624443298405,000026.SZ
+2024-12-23,0.4405301681206755,600305.SH
+2024-12-24,0.44052806294048863,000883.SZ
+2024-12-25,0.448810439825342,000589.SZ
+2024-12-26,0.47338854249265216,600582.SH
+2024-12-27,0.4495112908067394,000830.SZ
+2024-12-30,0.463164320301011,601006.SH
+2024-12-31,0.4773178744459276,600004.SH
+2025-01-02,0.47702414327290926,600572.SH
+2025-01-03,0.4474782636368997,601163.SH
+2025-01-06,0.4333771722554744,600821.SH
+2025-01-07,0.44770531932040636,600004.SH
+2025-01-08,0.47082512104142743,600116.SH
+2025-01-09,0.4541624257750102,600004.SH
+2025-01-10,0.4505883376349118,600905.SH
+2025-01-13,0.4505883376349118,600905.SH
+2025-01-14,0.4629277092655053,000088.SZ
+2025-01-15,0.4849467618585074,601222.SH
+2025-01-16,0.4904213884330184,600273.SH
+2025-01-17,0.49427977331421474,002267.SZ
+2025-01-20,0.4921881826907514,000088.SZ
+2025-01-21,0.49980885843191936,002233.SZ
+2025-01-22,0.49226959038014517,603817.SH
+2025-01-23,0.4845623679956794,600731.SH
+2025-01-24,0.4894480847057984,002443.SZ
+2025-01-27,0.480828304199342,600475.SH
+2025-02-05,0.49381191740852215,600475.SH
+2025-02-06,0.48051107405830695,600219.SH
+2025-02-07,0.4912337545022996,002365.SZ
+2025-02-10,0.4891404807504195,002606.SZ
+2025-02-11,0.48918316877240914,002454.SZ
+2025-02-12,0.48841224882795287,605138.SH
+2025-02-13,0.49427977331421474,603022.SH
+2025-02-14,0.4961990011809636,002454.SZ
+2025-02-17,0.49980885843191936,603172.SH
+2025-02-18,0.4859761480409009,600526.SH
+2025-02-19,0.47951904158629705,600526.SH
+2025-02-20,0.4865640468808261,002972.SZ
+2025-02-21,0.4878226668596109,002972.SZ
+2025-02-24,0.49427977331421474,002972.SZ
+2025-02-25,0.4959275225246577,002972.SZ
+2025-02-26,0.49559758720502334,000850.SZ
+2025-02-27,0.4962265008275085,600969.SH
+2025-02-28,0.45857905703475876,000931.SZ
+2025-03-03,0.475929514181944,600704.SH
+2025-03-04,0.4855766924008325,603176.SH
+2025-03-05,0.48947041607005387,600749.SH
+2025-03-06,0.48947041607005387,600749.SH
+2025-03-07,0.48852295338866114,002948.SZ
+2025-03-10,0.48013938012027435,600749.SH
+2025-03-11,0.4697442502219659,603916.SH
+2025-03-12,0.46825376442882116,600969.SH
+2025-03-13,0.485704669384595,601311.SH
+2025-03-14,0.47290759442027386,600784.SH
+2025-03-17,0.48236846890282975,002204.SZ
+2025-03-18,0.4753161955607809,600784.SH
+2025-03-19,0.4898609892396381,002627.SZ
+2025-03-20,0.4898609892396381,002627.SZ
+2025-03-21,0.47660773064492085,000589.SZ
+2025-03-24,0.4753538392607698,000589.SZ
+2025-03-25,0.4628733846203298,000589.SZ
+2025-03-26,0.45948501496487415,603367.SH
+2025-03-27,0.47591884404751766,600017.SH
+2025-03-28,0.4765671044505851,600925.SH
--- a/code/train/predictions_train.tsv
+++ b/code/train/predictions_train.tsv
--- a/code/train/utils/factor.py
+++ b/code/train/utils/factor.py
@@ -0,0 +1,738 @@
+import numpy as np
+import talib
+import pandas as pd
+
+def get_technical_factor(df):
+    # 按股票和日期排序
+    df = df.sort_values(by=['ts_code', 'trade_date'])
+    grouped = df.groupby('ts_code', group_keys=False)
+
+    df['return_skew'] = grouped['pct_chg'].rolling(window=5).skew().reset_index(0, drop=True)
+    df['return_kurtosis'] = grouped['pct_chg'].rolling(window=5).kurt().reset_index(0, drop=True)
+
+    # 因子 1：短期成交量变化率
+    df['volume_change_rate'] = (
+            grouped['vol'].rolling(window=2).mean() /
+            grouped['vol'].rolling(window=5).mean() - 1
+    ).reset_index(level=0, drop=True)  # 确保索引对齐
+
+    # 因子 2：成交量突破信号
+    max_volume = grouped['vol'].rolling(window=5).max().reset_index(level=0, drop=True)  # 确保索引对齐
+    df['cat_volume_breakout'] = (df['vol'] > max_volume)
+
+    # 因子 3：换手率均线偏离度
+    mean_turnover = grouped['turnover_rate'].rolling(window=3).mean().reset_index(level=0, drop=True)
+    std_turnover = grouped['turnover_rate'].rolling(window=3).std().reset_index(level=0, drop=True)
+    df['turnover_deviation'] = (df['turnover_rate'] - mean_turnover) / std_turnover
+
+    # 因子 4：换手率激增信号
+    df['cat_turnover_spike'] = (df['turnover_rate'] > mean_turnover + 2 * std_turnover)
+
+    # 因子 5：量比均值
+    df['avg_volume_ratio'] = grouped['volume_ratio'].rolling(window=3).mean().reset_index(level=0, drop=True)
+
+    # 因子 6：量比突破信号
+    max_volume_ratio = grouped['volume_ratio'].rolling(window=5).max().reset_index(level=0, drop=True)
+    df['cat_volume_ratio_breakout'] = (df['volume_ratio'] > max_volume_ratio)
+
+    # 因子 7：成交量与换手率的综合动量因子
+    alpha = 0.5
+    df['momentum_factor'] = df['volume_change_rate'] + alpha * df['turnover_deviation']
+
+    # 因子 8：量价共振因子
+    df['price_change_rate'] = grouped['close'].pct_change()
+    df['resonance_factor'] = df['volume_ratio'] * df['price_change_rate']
+
+    # 计算 up 和 down
+    df['log_close'] = np.log(df['close'])
+
+    df['vol_spike'] = grouped.apply(
+        lambda x: pd.Series(x['vol'].rolling(20).mean(), index=x.index)
+    )
+    df['cat_vol_spike'] = df['vol'] > 2 * df['vol_spike']
+    df['vol_std_5'] = df['vol'].pct_change().rolling(5).std()
+
+    df['up'] = (df['high'] - df[['close', 'open']].max(axis=1)) / df['close']
+    df['down'] = (df[['close', 'open']].min(axis=1) - df['low']) / df['close']
+
+    # 计算 ATR
+    df['atr_14'] = grouped.apply(
+        lambda x: pd.Series(talib.ATR(x['high'].values, x['low'].values, x['close'].values, timeperiod=14),
+                            index=x.index)
+    )
+    df['atr_6'] = grouped.apply(
+        lambda x: pd.Series(talib.ATR(x['high'].values, x['low'].values, x['close'].values, timeperiod=6),
+                            index=x.index)
+    )
+
+    # 计算 OBV 及其均线
+    df['obv'] = grouped.apply(
+        lambda x: pd.Series(talib.OBV(x['close'].values, x['vol'].values), index=x.index)
+    )
+    df['maobv_6'] = grouped.apply(
+        lambda x: pd.Series(talib.SMA(x['obv'].values, timeperiod=6), index=x.index)
+    )
+    df['obv-maobv_6'] = df['obv'] - df['maobv_6']
+
+    # 计算 RSI
+    df['rsi_3'] = grouped.apply(
+        lambda x: pd.Series(talib.RSI(x['close'].values, timeperiod=3), index=x.index)
+    )
+    df['rsi_6'] = grouped.apply(
+        lambda x: pd.Series(talib.RSI(x['close'].values, timeperiod=6), index=x.index)
+    )
+    df['rsi_9'] = grouped.apply(
+        lambda x: pd.Series(talib.RSI(x['close'].values, timeperiod=9), index=x.index)
+    )
+
+    # 计算 return_10 和 return_20
+    df['return_5'] = grouped['close'].apply(lambda x: x / x.shift(5) - 1)
+    df['return_10'] = grouped['close'].apply(lambda x: x / x.shift(10) - 1)
+    df['return_20'] = grouped['close'].apply(lambda x: x / x.shift(20) - 1)
+
+    # df['avg_close_5'] = grouped['close'].apply(lambda x: x.rolling(window=5).mean() / x)
+
+    # 计算标准差指标
+    df['std_return_5'] = grouped['close'].apply(lambda x: x.pct_change().rolling(window=5).std())
+    df['std_return_15'] = grouped['close'].apply(lambda x: x.pct_change().rolling(window=15).std())
+    df['std_return_25'] = grouped['close'].apply(lambda x: x.pct_change().rolling(window=25).std())
+    df['std_return_90'] = grouped['close'].apply(lambda x: x.pct_change().rolling(window=90).std())
+    df['std_return_90_2'] = grouped['close'].apply(lambda x: x.shift(10).pct_change().rolling(window=90).std())
+
+    # 计算比值指标
+    df['std_return_5 / std_return_90'] = df['std_return_5'] / df['std_return_90']
+    df['std_return_5 / std_return_25'] = df['std_return_5'] / df['std_return_25']
+
+    # 计算标准差差值
+    df['std_return_90 - std_return_90_2'] = df['std_return_90'] - df['std_return_90_2']
+
+    return df
+
+
+def get_act_factor(df, cat=True):
+    # 按股票和日期排序
+    df = df.sort_values(by=['ts_code', 'trade_date'])
+    grouped = df.groupby('ts_code', group_keys=False)
+    # 计算 EMA 指标
+    df['_ema_5'] = grouped['close'].apply(
+        lambda x: pd.Series(talib.EMA(x.values, timeperiod=5), index=x.index)
+    )
+    df['_ema_13'] = grouped['close'].apply(
+        lambda x: pd.Series(talib.EMA(x.values, timeperiod=13), index=x.index)
+    )
+    df['_ema_20'] = grouped['close'].apply(
+        lambda x: pd.Series(talib.EMA(x.values, timeperiod=20), index=x.index)
+    )
+    df['_ema_60'] = grouped['close'].apply(
+        lambda x: pd.Series(talib.EMA(x.values, timeperiod=60), index=x.index)
+    )
+
+    # 计算 act_factor1, act_factor2, act_factor3, act_factor4
+    df['act_factor1'] = grouped['_ema_5'].apply(
+        lambda x: np.arctan((x / x.shift(1) - 1) * 100) * 57.3 / 50
+    )
+    df['act_factor2'] = grouped['_ema_13'].apply(
+        lambda x: np.arctan((x / x.shift(1) - 1) * 100) * 57.3 / 40
+    )
+    df['act_factor3'] = grouped['_ema_20'].apply(
+        lambda x: np.arctan((x / x.shift(1) - 1) * 100) * 57.3 / 21
+    )
+    df['act_factor4'] = grouped['_ema_60'].apply(
+        lambda x: np.arctan((x / x.shift(1) - 1) * 100) * 57.3 / 10
+    )
+
+    if cat:
+        df['cat_af1'] = df['act_factor1'] > 0
+        df['cat_af2'] = df['act_factor2'] > df['act_factor1']
+        df['cat_af3'] = df['act_factor3'] > df['act_factor2']
+        df['cat_af4'] = df['act_factor4'] > df['act_factor3']
+
+    # 计算 act_factor5 和 act_factor6
+    df['act_factor5'] = df['act_factor1'] + df['act_factor2'] + df['act_factor3'] + df['act_factor4']
+    df['act_factor6'] = (df['act_factor1'] - df['act_factor2']) / np.sqrt(
+        df['act_factor1'] ** 2 + df['act_factor2'] ** 2)
+
+    # 根据 trade_date 截面计算排名
+    df['rank_act_factor1'] = df.groupby('trade_date', group_keys=False)['act_factor1'].rank(ascending=False, pct=True)
+    df['rank_act_factor2'] = df.groupby('trade_date', group_keys=False)['act_factor2'].rank(ascending=False, pct=True)
+    df['rank_act_factor3'] = df.groupby('trade_date', group_keys=False)['act_factor3'].rank(ascending=False, pct=True)
+
+    return df
+
+
+def get_money_flow_factor(df):
+    # 计算资金流相关因子（字段名称见 tushare 数据说明）
+    df['active_buy_volume_large'] = df['buy_lg_vol'] / df['net_mf_vol']
+    df['active_buy_volume_big'] = df['buy_elg_vol'] / df['net_mf_vol']
+    df['active_buy_volume_small'] = df['buy_sm_vol'] / df['net_mf_vol']
+
+    df['buy_lg_vol_minus_sell_lg_vol'] = (df['buy_lg_vol'] - df['sell_lg_vol']) / df['net_mf_vol']
+    df['buy_elg_vol_minus_sell_elg_vol'] = (df['buy_elg_vol'] - df['sell_elg_vol']) / df['net_mf_vol']
+
+    df['log(circ_mv)'] = np.log(df['circ_mv'])
+    return df
+
+
+def get_alpha_factor(df):
+    df = df.sort_values(by=['ts_code', 'trade_date'])
+    grouped = df.groupby('ts_code')
+
+    # alpha_022: 当前 close 与 5 日前 close 差值
+    # df['alpha_022'] = grouped['close'].transform(lambda x: x - x.shift(5))
+    def rolling_covariance(x, y, window):
+        return x.rolling(window).cov(y)
+
+    def delta(series, period):
+        return series.diff(period)
+
+    def rank(series):
+        return series.rank(pct=True)
+
+    def stddev(series, window):
+        return series.rolling(window).std()
+
+    # 计算改进后的 Alpha 22 因子
+    window_high_volume = 5
+    window_close_stddev = 20
+    period_delta = 5
+
+    df['cov'] = rolling_covariance(df['high'], df['volume'], window_high_volume)
+    df['delta_cov'] = delta(df['cov'], period_delta)
+    df['_rank_stddev'] = rank(stddev(df['close'], window_close_stddev))
+    df['alpha_22_improved'] = -1 * df['delta_cov'] * df['_rank_stddev']
+
+    # alpha_003: (close - open) / (high - low)
+    df['alpha_003'] = np.where(df['high'] != df['low'],
+                               (df['close'] - df['open']) / (df['high'] - df['low']),
+                               0)
+
+    # alpha_007: 计算过去5日 close 与 vol 的相关性，并按 trade_date 排名
+    df['alpha_007'] = grouped.apply(lambda x: x['close'].rolling(5).corr(x['vol'])).reset_index(level=0, drop=True)
+    df['alpha_007'] = df.groupby('trade_date', group_keys=False)['alpha_007'].rank(ascending=True, pct=True)
+
+    # alpha_013: 计算过去5日 close 之和 - 20日 close 之和，并按 trade_date 排名
+    df['alpha_013'] = grouped['close'].transform(lambda x: x.rolling(5).sum() - x.rolling(20).sum())
+    df['alpha_013'] = df.groupby('trade_date', group_keys=False)['alpha_013'].rank(ascending=True, pct=True)
+
+    return df
+
+
+def get_limit_factor(df):
+    # 按股票和日期排序
+    df = df.sort_values(by=['ts_code', 'trade_date'])
+
+    # 分组处理
+    grouped = df.groupby('ts_code', group_keys=False)
+
+    # 1. 今日是否涨停/跌停
+    df['cat_up_limit'] = (df['close'] == df['up_limit']).astype(int)  # 是否涨停（1表示涨停，0表示未涨停）
+    df['cat_down_limit'] = (df['close'] == df['down_limit']).astype(int)  # 是否跌停（1表示跌停，0表示未跌停）
+
+    # 2. 最近涨跌停次数（过去20个交易日）
+    df['up_limit_count_10d'] = grouped['cat_up_limit'].rolling(window=10, min_periods=1).sum().reset_index(level=0,
+                                                                                                           drop=True)
+    df['down_limit_count_10d'] = grouped['cat_down_limit'].rolling(window=10, min_periods=1).sum().reset_index(level=0,
+                                                                                                               drop=True)
+
+    # 3. 最近连续涨跌停天数
+    def calculate_consecutive_limits(series):
+        """
+        计算连续涨停/跌停天数。
+        """
+        consecutive_up = series * (series.groupby((series != series.shift()).cumsum()).cumcount() + 1)
+        consecutive_down = series * (series.groupby((series != series.shift()).cumsum()).cumcount() + 1)
+        return consecutive_up, consecutive_down
+
+    # 连续涨停天数
+    df['consecutive_up_limit'] = grouped['cat_up_limit'].apply(
+        lambda x: calculate_consecutive_limits(x)[0]
+    ).reset_index(level=0, drop=True)
+
+    # 连续跌停天数
+    # df['consecutive_down_limit'] = grouped['cat_down_limit'].apply(
+    #     lambda x: calculate_consecutive_limits(x)[1]
+    # ).reset_index(level=0, drop=True)
+
+    return df
+
+
+def get_cyp_perf_factor(df):
+    # 预处理：按股票代码和时间排序
+    df = df.sort_values(by=['ts_code', 'trade_date'])
+
+    # 按股票代码分组处理
+    grouped = df.groupby('ts_code', group_keys=False)
+
+    df['ctrl_strength'] = (df['cost_85pct'] - df['cost_15pct']) / (df['his_high'] - df['his_low'])
+
+    df['low_cost_dev'] = (df['close'] - df['cost_5pct']) / (df['cost_50pct'] - df['cost_5pct'])
+
+    df['asymmetry'] = (df['cost_95pct'] - df['cost_50pct']) / (df['cost_50pct'] - df['cost_5pct'])
+
+    df['lock_factor'] = df['turnover_rate'] * (
+            1 - (df['cost_95pct'] - df['cost_5pct']) / (df['his_high'] - df['his_low']))
+
+    df['vol_break'] = np.where((df['close'] > df['cost_85pct']) & (df['volume_ratio'] > 2), 1, 0)
+
+    df['weight_roc5'] = grouped['weight_avg'].apply(lambda x: x.pct_change(5))
+
+    def rolling_corr(group):
+        roc_close = group['close'].pct_change()
+        roc_weight = group['weight_avg'].pct_change()
+        return roc_close.rolling(10).corr(roc_weight)
+
+    df['price_cost_divergence'] = grouped.apply(rolling_corr)
+
+    def calc_atr(group):
+        high, low, close = group['high'], group['low'], group['close']
+        tr = np.maximum(high - low,
+                        np.maximum(abs(high - close.shift()),
+                                   abs(low - close.shift())))
+        return tr.rolling(14).mean()
+
+    df['atr_14'] = grouped.apply(calc_atr)
+    df['cost_atr_adj'] = (df['cost_95pct'] - df['cost_5pct']) / df['atr_14']
+
+    # 12. 小盘股筹码集中度
+    df['smallcap_concentration'] = (1 / df['circ_mv']) * (df['cost_85pct'] - df['cost_15pct'])
+
+    # 16. 筹码稳定性指数 (20日波动率)
+    df['weight_std20'] = grouped['weight_avg'].apply(lambda x: x.rolling(20).std())
+    df['cost_stability'] = df['weight_std20'] / grouped['weight_avg'].transform(lambda x: x.rolling(20).mean())
+
+    # 17. 成本区间突破标记
+    df['high_cost_break_days'] = grouped.apply(lambda g: g['close'].gt(g['cost_95pct']).rolling(5).sum())
+
+    # 18. 黄金筹码共振 (复合事件)
+    df['cat_golden_resonance'] = ((df['close'] > df['weight_avg']) &
+                                  (df['volume_ratio'] > 1.5) &
+                                  (df['winner_rate'] > 0.7))
+
+    # 20. 筹码-流动性风险
+    df['liquidity_risk'] = (df['cost_95pct'] - df['cost_5pct']) * (
+            1 / grouped['vol'].transform(lambda x: x.rolling(10).mean()))
+
+    df.drop(columns=['weight_std20'], inplace=True, errors='ignore')
+
+    return df
+
+
+def get_mv_factors(df):
+    """
+    计算多个因子并生成最终的综合因子。
+
+    参数:
+        df (pd.DataFrame): 包含 ts_code, trade_date, turnover_rate, pe_ttm, pb, ps, circ_mv, volume_ratio, vol 等列的数据框。
+
+    返回:
+        pd.DataFrame: 包含新增因子和最终综合因子的数据框。
+    """
+    # 按 ts_code 和 trade_date 排序
+    df = df.sort_values(by=['ts_code', 'trade_date'])
+
+    # 按 ts_code 分组
+    grouped = df.groupby('ts_code', group_keys=False)
+
+    # 1. 市值流动比因子
+    df['mv_turnover_ratio'] = df['turnover_rate'] / df['circ_mv']
+
+    # 2. 市值调整成交量因子
+    df['mv_adjusted_volume'] = df['vol'] / df['circ_mv']
+
+    # 3. 市值加权换手率因子
+    df['mv_weighted_turnover'] = df['turnover_rate'] * (1 / df['circ_mv'])
+
+    # 4. 非线性市值成交量因子
+    df['nonlinear_mv_volume'] = df['vol'] / df['circ_mv']
+
+    # 5. 市值量比因子
+    df['mv_volume_ratio'] = df['volume_ratio'] / df['circ_mv']
+
+    # 6. 市值动量因子
+    df['mv_momentum'] = df['turnover_rate'] * df['volume_ratio'] / df['circ_mv']
+
+    # 7. 市值波动率因子
+    df['turnover_std'] = grouped['turnover_rate'].rolling(window=20).std().reset_index(level=0, drop=True)
+    df['mv_volatility'] = grouped.apply(lambda x: x['turnover_std'] / x['circ_mv']).reset_index(level=0, drop=True)
+
+    # 8. 市值成长性因子
+    df['volume_growth'] = grouped['vol'].pct_change(periods=20).reset_index(level=0, drop=True)
+    df['mv_growth'] = grouped.apply(lambda x: x['volume_growth'] / x['circ_mv']).reset_index(level=0, drop=True)
+
+    # # 标准化因子
+    # factor_columns = [
+    #     'mv_turnover_ratio', 'mv_adjusted_volume', 'mv_weighted_turnover',
+    #     'nonlinear_mv_volume', 'mv_volume_ratio', 'mv_momentum',
+    #     'mv_volatility', 'mv_growth'
+    # ]
+    # scaler = StandardScaler()
+    # df[factor_columns] = scaler.fit_transform(df[factor_columns])
+    #
+    # # 加权合成因子
+    # weights = [0.2, 0.15, 0.15, 0.1, 0.1, 0.1, 0.1, 0.1]  # 各因子权重
+    # df['final_combined_factor'] = df[factor_columns].dot(weights)
+
+    return df
+
+
+import numpy as np
+import talib
+
+
+def get_rolling_factor(df):
+    old_columns = df.columns.tolist()[:]
+    # 按股票和日期排序
+    df = df.sort_values(by=['ts_code', 'trade_date'])
+    grouped = df.groupby('ts_code', group_keys=False)
+
+    df["gap_next_open"] = (df["open"].shift(-1) - df["close"]) / df["close"]
+
+    df['return_skew'] = grouped['pct_chg'].rolling(window=5).skew().reset_index(0, drop=True)
+    df['return_kurtosis'] = grouped['pct_chg'].rolling(window=5).kurt().reset_index(0, drop=True)
+
+    # 因子 1：短期成交量变化率
+    df['volume_change_rate'] = (
+            grouped['vol'].rolling(window=2).mean() /
+            grouped['vol'].rolling(window=10).mean() - 1
+    ).reset_index(level=0, drop=True)  # 确保索引对齐
+
+    # 因子 2：成交量突破信号
+    max_volume = grouped['vol'].rolling(window=5).max().reset_index(level=0, drop=True)  # 确保索引对齐
+    df['cat_volume_breakout'] = (df['vol'] > max_volume)
+
+    # 因子 3：换手率均线偏离度
+    mean_turnover = grouped['turnover_rate'].rolling(window=3).mean().reset_index(level=0, drop=True)
+    std_turnover = grouped['turnover_rate'].rolling(window=3).std().reset_index(level=0, drop=True)
+    df['turnover_deviation'] = (df['turnover_rate'] - mean_turnover) / std_turnover
+
+    # 因子 4：换手率激增信号
+    df['cat_turnover_spike'] = (df['turnover_rate'] > mean_turnover + 2 * std_turnover)
+
+    # 因子 5：量比均值
+    df['avg_volume_ratio'] = grouped['volume_ratio'].rolling(window=3).mean().reset_index(level=0, drop=True)
+
+    # 因子 6：量比突破信号
+    max_volume_ratio = grouped['volume_ratio'].rolling(window=5).max().reset_index(level=0, drop=True)
+    df['cat_volume_ratio_breakout'] = (df['volume_ratio'] > max_volume_ratio)
+
+    df['vol_spike'] = grouped.apply(
+        lambda x: pd.Series(x['vol'].rolling(20).mean(), index=x.index)
+    )
+    df['vol_std_5'] = df['vol'].pct_change().rolling(5).std()
+
+    # 计算 ATR
+    df['atr_14'] = grouped.apply(
+        lambda x: pd.Series(talib.ATR(x['high'].values, x['low'].values, x['close'].values, timeperiod=14),
+                            index=x.index)
+    )
+    df['atr_6'] = grouped.apply(
+        lambda x: pd.Series(talib.ATR(x['high'].values, x['low'].values, x['close'].values, timeperiod=6),
+                            index=x.index)
+    )
+
+    # 计算 OBV 及其均线
+    df['obv'] = grouped.apply(
+        lambda x: pd.Series(talib.OBV(x['close'].values, x['vol'].values), index=x.index)
+    )
+    df['maobv_6'] = grouped.apply(
+        lambda x: pd.Series(talib.SMA(x['obv'].values, timeperiod=6), index=x.index)
+    )
+
+    df['rsi_3'] = grouped.apply(
+        lambda x: pd.Series(talib.RSI(x['close'].values, timeperiod=3), index=x.index)
+    )
+    df['rsi_6'] = grouped.apply(
+        lambda x: pd.Series(talib.RSI(x['close'].values, timeperiod=6), index=x.index)
+    )
+    df['rsi_9'] = grouped.apply(
+        lambda x: pd.Series(talib.RSI(x['close'].values, timeperiod=9), index=x.index)
+    )
+
+    # 计算 return_10 和 return_20
+    df['return_5'] = grouped['close'].apply(lambda x: x / x.shift(5) - 1)
+    df['return_10'] = grouped['close'].apply(lambda x: x / x.shift(10) - 1)
+    df['return_20'] = grouped['close'].apply(lambda x: x / x.shift(20) - 1)
+
+    # df['avg_close_5'] = grouped['close'].apply(lambda x: x.rolling(window=5).mean() / x)
+
+    # 计算标准差指标
+    df['std_return_5'] = grouped['close'].apply(lambda x: x.pct_change().rolling(window=5).std())
+    df['std_return_15'] = grouped['close'].apply(lambda x: x.pct_change().rolling(window=15).std())
+    df['std_return_25'] = grouped['close'].apply(lambda x: x.pct_change().rolling(window=25).std())
+    df['std_return_90'] = grouped['close'].apply(lambda x: x.pct_change().rolling(window=90).std())
+    df['std_return_90_2'] = grouped['close'].apply(lambda x: x.shift(10).pct_change().rolling(window=90).std())
+
+    # 计算 EMA 指标
+    df['_ema_5'] = grouped['close'].apply(
+        lambda x: pd.Series(talib.EMA(x.values, timeperiod=5), index=x.index)
+    )
+    df['_ema_13'] = grouped['close'].apply(
+        lambda x: pd.Series(talib.EMA(x.values, timeperiod=13), index=x.index)
+    )
+    df['_ema_20'] = grouped['close'].apply(
+        lambda x: pd.Series(talib.EMA(x.values, timeperiod=20), index=x.index)
+    )
+    df['_ema_60'] = grouped['close'].apply(
+        lambda x: pd.Series(talib.EMA(x.values, timeperiod=60), index=x.index)
+    )
+
+    # 计算 act_factor1, act_factor2, act_factor3, act_factor4
+    df['act_factor1'] = grouped['_ema_5'].apply(
+        lambda x: np.arctan((x / x.shift(1) - 1) * 100) * 57.3 / 50
+    )
+    df['act_factor2'] = grouped['_ema_13'].apply(
+        lambda x: np.arctan((x / x.shift(1) - 1) * 100) * 57.3 / 40
+    )
+    df['act_factor3'] = grouped['_ema_20'].apply(
+        lambda x: np.arctan((x / x.shift(1) - 1) * 100) * 57.3 / 21
+    )
+    df['act_factor4'] = grouped['_ema_60'].apply(
+        lambda x: np.arctan((x / x.shift(1) - 1) * 100) * 57.3 / 10
+    )
+
+    # 根据 trade_date 截面计算排名
+    df['rank_act_factor1'] = df.groupby('trade_date', group_keys=False)['act_factor1'].rank(ascending=False, pct=True)
+    df['rank_act_factor2'] = df.groupby('trade_date', group_keys=False)['act_factor2'].rank(ascending=False, pct=True)
+    df['rank_act_factor3'] = df.groupby('trade_date', group_keys=False)['act_factor3'].rank(ascending=False, pct=True)
+
+    df['log(circ_mv)'] = np.log(df['circ_mv'])
+
+    def rolling_covariance(x, y, window):
+        return x.rolling(window).cov(y)
+
+    def delta(series, period):
+        return series.diff(period)
+
+    def rank(series):
+        return series.rank(pct=True)
+
+    def stddev(series, window):
+        return series.rolling(window).std()
+
+    window_high_volume = 5
+    window_close_stddev = 20
+    period_delta = 5
+    df['cov'] = rolling_covariance(df['high'], df['vol'], window_high_volume)
+    df['delta_cov'] = delta(df['cov'], period_delta)
+    df['_rank_stddev'] = rank(stddev(df['close'], window_close_stddev))
+    df['alpha_22_improved'] = -1 * df['delta_cov'] * df['_rank_stddev']
+
+    df['alpha_003'] = np.where(df['high'] != df['low'],
+                               (df['close'] - df['open']) / (df['high'] - df['low']),
+                               0)
+
+    df['alpha_007'] = grouped.apply(lambda x: x['close'].rolling(5).corr(x['vol'])).reset_index(level=0, drop=True)
+    df['alpha_007'] = df.groupby('trade_date', group_keys=False)['alpha_007'].rank(ascending=True, pct=True)
+
+    df['alpha_013'] = grouped['close'].transform(lambda x: x.rolling(5).sum() - x.rolling(20).sum())
+    df['alpha_013'] = df.groupby('trade_date', group_keys=False)['alpha_013'].rank(ascending=True, pct=True)
+
+    df['cat_up_limit'] = (df['close'] == df['up_limit'])  # 是否涨停（1表示涨停，0表示未涨停）
+    df['cat_down_limit'] = (df['close'] == df['down_limit'])  # 是否跌停（1表示跌停，0表示未跌停）
+    df['up_limit_count_10d'] = grouped['cat_up_limit'].rolling(window=10, min_periods=1).sum().reset_index(level=0,
+                                                                                                           drop=True)
+    df['down_limit_count_10d'] = grouped['cat_down_limit'].rolling(window=10, min_periods=1).sum().reset_index(level=0,
+                                                                                                               drop=True)
+
+    # 3. 最近连续涨跌停天数
+    def calculate_consecutive_limits(series):
+        """
+        计算连续涨停/跌停天数。
+        """
+        consecutive_up = series * (series.groupby((series != series.shift()).cumsum()).cumcount() + 1)
+        consecutive_down = series * (series.groupby((series != series.shift()).cumsum()).cumcount() + 1)
+        return consecutive_up, consecutive_down
+
+    # 连续涨停天数
+    df['consecutive_up_limit'] = grouped['cat_up_limit'].apply(
+        lambda x: calculate_consecutive_limits(x)[0]
+    ).reset_index(level=0, drop=True)
+
+    df['vol_break'] = np.where((df['close'] > df['cost_85pct']) & (df['volume_ratio'] > 2), 1, 0)
+
+    df['weight_roc5'] = grouped['weight_avg'].apply(lambda x: x.pct_change(5))
+
+    def rolling_corr(group):
+        roc_close = group['close'].pct_change()
+        roc_weight = group['weight_avg'].pct_change()
+        return roc_close.rolling(10).corr(roc_weight)
+
+    df['price_cost_divergence'] = grouped.apply(rolling_corr)
+
+    df['smallcap_concentration'] = (1 / df['circ_mv']) * (df['cost_85pct'] - df['cost_15pct'])
+
+    # 16. 筹码稳定性指数 (20日波动率)
+    df['weight_std20'] = grouped['weight_avg'].apply(lambda x: x.rolling(20).std())
+    df['cost_stability'] = df['weight_std20'] / grouped['weight_avg'].transform(lambda x: x.rolling(20).mean())
+
+    # 17. 成本区间突破标记
+    df['high_cost_break_days'] = grouped.apply(lambda g: g['close'].gt(g['cost_95pct']).rolling(5).sum())
+
+    # 20. 筹码-流动性风险
+    df['liquidity_risk'] = (df['cost_95pct'] - df['cost_5pct']) * (
+            1 / grouped['vol'].transform(lambda x: x.rolling(10).mean()))
+
+    # 7. 市值波动率因子
+    df['turnover_std'] = grouped['turnover_rate'].rolling(window=20).std().reset_index(level=0, drop=True)
+    df['mv_volatility'] = grouped.apply(lambda x: x['turnover_std'] / x['circ_mv']).reset_index(level=0, drop=True)
+
+    # 8. 市值成长性因子
+    df['volume_growth'] = grouped['vol'].pct_change(periods=20).reset_index(level=0, drop=True)
+    df['mv_growth'] = grouped.apply(lambda x: x['volume_growth'] / x['circ_mv']).reset_index(level=0, drop=True)
+
+    df.drop(columns=['weight_std20'], inplace=True, errors='ignore')
+    new_columns = [col for col in df.columns.tolist()[:] if col not in old_columns]
+
+    return df, new_columns
+
+
+def get_simple_factor(df):
+    old_columns = df.columns.tolist()[:]
+    df = df.sort_values(by=['ts_code', 'trade_date'])
+
+    alpha = 0.5
+    df['momentum_factor'] = df['volume_change_rate'] + alpha * df['turnover_deviation']
+    df['resonance_factor'] = df['volume_ratio'] * df['pct_chg']
+    df['log_close'] = np.log(df['close'])
+
+    df['cat_vol_spike'] = df['vol'] > 2 * df['vol_spike']
+
+    df['up'] = (df['high'] - df[['close', 'open']].max(axis=1)) / df['close']
+    df['down'] = (df[['close', 'open']].min(axis=1) - df['low']) / df['close']
+
+    df['obv-maobv_6'] = df['obv'] - df['maobv_6']
+
+    # 计算比值指标
+    df['std_return_5 / std_return_90'] = df['std_return_5'] / df['std_return_90']
+    df['std_return_5 / std_return_25'] = df['std_return_5'] / df['std_return_25']
+
+    # 计算标准差差值
+    df['std_return_90 - std_return_90_2'] = df['std_return_90'] - df['std_return_90_2']
+
+    df['cat_af1'] = df['act_factor1'] > 0
+    df['cat_af2'] = df['act_factor2'] > df['act_factor1']
+    df['cat_af3'] = df['act_factor3'] > df['act_factor2']
+    df['cat_af4'] = df['act_factor4'] > df['act_factor3']
+
+    # 计算 act_factor5 和 act_factor6
+    df['act_factor5'] = df['act_factor1'] + df['act_factor2'] + df['act_factor3'] + df['act_factor4']
+    df['act_factor6'] = (df['act_factor1'] - df['act_factor2']) / np.sqrt(
+        df['act_factor1'] ** 2 + df['act_factor2'] ** 2)
+
+    df['active_buy_volume_large'] = df['buy_lg_vol'] / df['net_mf_vol']
+    df['active_buy_volume_big'] = df['buy_elg_vol'] / df['net_mf_vol']
+    df['active_buy_volume_small'] = df['buy_sm_vol'] / df['net_mf_vol']
+
+    df['buy_lg_vol_minus_sell_lg_vol'] = (df['buy_lg_vol'] - df['sell_lg_vol']) / df['net_mf_vol']
+    df['buy_elg_vol_minus_sell_elg_vol'] = (df['buy_elg_vol'] - df['sell_elg_vol']) / df['net_mf_vol']
+
+    df['log(circ_mv)'] = np.log(df['circ_mv'])
+
+    df['ctrl_strength'] = (df['cost_85pct'] - df['cost_15pct']) / (df['his_high'] - df['his_low'])
+
+    df['low_cost_dev'] = (df['close'] - df['cost_5pct']) / (df['cost_50pct'] - df['cost_5pct'])
+
+    df['asymmetry'] = (df['cost_95pct'] - df['cost_50pct']) / (df['cost_50pct'] - df['cost_5pct'])
+
+    df['lock_factor'] = df['turnover_rate'] * (
+            1 - (df['cost_95pct'] - df['cost_5pct']) / (df['his_high'] - df['his_low']))
+
+    df['cat_vol_break'] = (df['close'] > df['cost_85pct']) & (df['volume_ratio'] > 2)
+
+    df['cost_atr_adj'] = (df['cost_95pct'] - df['cost_5pct']) / df['atr_14']
+
+    # 12. 小盘股筹码集中度
+    df['smallcap_concentration'] = (1 / df['circ_mv']) * (df['cost_85pct'] - df['cost_15pct'])
+
+    df['cat_golden_resonance'] = ((df['close'] > df['weight_avg']) &
+                                  (df['volume_ratio'] > 1.5) &
+                                  (df['winner_rate'] > 0.7))
+
+    df['mv_turnover_ratio'] = df['turnover_rate'] / df['circ_mv']
+
+    df['mv_adjusted_volume'] = df['vol'] / df['circ_mv']
+
+    df['mv_weighted_turnover'] = df['turnover_rate'] * (1 / df['circ_mv'])
+
+    df['nonlinear_mv_volume'] = df['vol'] / df['circ_mv']
+
+    df['mv_volume_ratio'] = df['volume_ratio'] / df['circ_mv']
+
+    df['mv_momentum'] = df['turnover_rate'] * df['volume_ratio'] / df['circ_mv']
+
+    drop_columns = [col for col in df.columns if col.startswith('_')]
+    df.drop(columns=drop_columns, inplace=True, errors='ignore')
+
+    new_columns = [col for col in df.columns.tolist()[:] if col not in old_columns]
+    return df, new_columns
+
+
+def calculate_indicators(df):
+    """
+    计算四个指标：当日涨跌幅、5日移动平均、RSI、MACD。
+    """
+    df = df.sort_values('trade_date')
+    df['daily_return'] = (df['close'] - df['pre_close']) / df['pre_close'] * 100
+    # df['5_day_ma'] = df['close'].rolling(window=5).mean()
+    delta = df['close'].diff()
+    gain = delta.where(delta > 0, 0)
+    loss = -delta.where(delta < 0, 0)
+    avg_gain = gain.rolling(window=14).mean()
+    avg_loss = loss.rolling(window=14).mean()
+    rs = avg_gain / avg_loss
+    df['RSI'] = 100 - (100 / (1 + rs))
+
+    # 计算MACD
+    ema12 = df['close'].ewm(span=12, adjust=False).mean()
+    ema26 = df['close'].ewm(span=26, adjust=False).mean()
+    df['MACD'] = ema12 - ema26
+    df['Signal_line'] = df['MACD'].ewm(span=9, adjust=False).mean()
+    df['MACD_hist'] = df['MACD'] - df['Signal_line']
+
+    # 4. 情绪因子1：市场上涨比例（Up Ratio）
+    df['up_ratio'] = df['daily_return'].apply(lambda x: 1 if x > 0 else 0)
+    df['up_ratio_20d'] = df['up_ratio'].rolling(window=20).mean()  # 过去20天上涨比例
+
+    # 5. 情绪因子2：成交量变化率（Volume Change Rate）
+    df['volume_mean'] = df['vol'].rolling(window=20).mean()  # 过去20天的平均成交量
+    df['volume_change_rate'] = (df['vol'] - df['volume_mean']) / df['volume_mean'] * 100  # 成交量变化率
+
+    # 6. 情绪因子3：波动率（Volatility）
+    df['volatility'] = df['daily_return'].rolling(window=20).std()  # 过去20天的日收益率标准差
+
+    # 7. 情绪因子4：成交额变化率（Amount Change Rate）
+    df['amount_mean'] = df['amount'].rolling(window=20).mean()  # 过去20天的平均成交额
+    df['amount_change_rate'] = (df['amount'] - df['amount_mean']) / df['amount_mean'] * 100  # 成交额变化率
+
+    return df
+
+
+def generate_index_indicators(h5_filename):
+    df = pd.read_hdf(h5_filename, key='index_data')
+    df['trade_date'] = pd.to_datetime(df['trade_date'], format='%Y%m%d')
+    df = df.sort_values('trade_date')
+
+    # 计算每个ts_code的相关指标
+    df_indicators = []
+    for ts_code in df['ts_code'].unique():
+        df_index = df[df['ts_code'] == ts_code].copy()
+        df_index = calculate_indicators(df_index)
+        df_indicators.append(df_index)
+
+    # 合并所有指数的结果
+    df_all_indicators = pd.concat(df_indicators, ignore_index=True)
+
+    # 保留trade_date列，并将同一天的数据按ts_code合并成一行
+    df_final = df_all_indicators.pivot_table(
+        index='trade_date',
+        columns='ts_code',
+        values=['daily_return', 'RSI', 'MACD', 'Signal_line',
+                'MACD_hist', 'up_ratio_20d', 'volume_change_rate', 'volatility',
+                'amount_change_rate', 'amount_mean'],
+        aggfunc='last'
+    )
+
+    df_final.columns = [f"{col[1]}_{col[0]}" for col in df_final.columns]
+    df_final = df_final.reset_index()
+
+    return df_final