Classify2

2025-05-13 15:30:06 +08:00
parent 791c84aba6
commit a4b05bb62f
20 changed files with 10737 additions and 7456 deletions
--- a/main/data/index_and_industry.ipynb
+++ b/main/data/index_and_industry.ipynb
@@ -44,7 +44,7 @@
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "C:\\Users\\liaozhaorun\\AppData\\Local\\Temp\\ipykernel_28220\\1832869062.py:13: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n",
+      "C:\\Users\\liaozhaorun\\AppData\\Local\\Temp\\ipykernel_16940\\1832869062.py:13: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n",
      "  final_df = pd.concat(all_data, ignore_index=True)\n"
     ]
    }
@@ -86,32 +86,32 @@
     "output_type": "stream",
     "text": [
      "         ts_code trade_date      close       open       high        low  \\\n",
-      "0      000905.SH   20250506  5740.3338  5668.8762  5740.3338  5666.4698   \n",
-      "1      000905.SH   20250430  5631.8249  5604.6537  5647.7821  5603.1718   \n",
-      "2      000905.SH   20250429  5604.9057  5583.7186  5622.0220  5571.2363   \n",
-      "3      000905.SH   20250428  5598.2951  5624.4166  5628.0778  5587.7857   \n",
-      "4      000905.SH   20250425  5627.1804  5613.1407  5661.5869  5596.5266   \n",
+      "0      000905.SH   20250509  5721.7225  5770.4410  5770.4410  5705.1654   \n",
+      "1      000905.SH   20250508  5773.8056  5731.7157  5783.7915  5724.9511   \n",
+      "2      000905.SH   20250507  5750.2911  5805.6560  5819.2422  5713.2734   \n",
+      "3      000905.SH   20250506  5740.3338  5668.8762  5740.3338  5666.4698   \n",
+      "4      000905.SH   20250430  5631.8249  5604.6537  5647.7821  5603.1718   \n",
      "...          ...        ...        ...        ...        ...        ...   \n",
-      "13492  399006.SZ   20100607  1069.4680  1005.0280  1075.2250  1001.7020   \n",
-      "13493  399006.SZ   20100604  1027.6810   989.6810  1027.6810   986.5040   \n",
-      "13494  399006.SZ   20100603   998.3940  1002.3550  1026.7020   997.7750   \n",
-      "13495  399006.SZ   20100602   997.1190   967.6090   997.1190   952.6110   \n",
-      "13496  399006.SZ   20100601   973.2330   986.0150   994.7930   948.1180   \n",
+      "13501  399006.SZ   20100607  1069.4680  1005.0280  1075.2250  1001.7020   \n",
+      "13502  399006.SZ   20100604  1027.6810   989.6810  1027.6810   986.5040   \n",
+      "13503  399006.SZ   20100603   998.3940  1002.3550  1026.7020   997.7750   \n",
+      "13504  399006.SZ   20100602   997.1190   967.6090   997.1190   952.6110   \n",
+      "13505  399006.SZ   20100601   973.2330   986.0150   994.7930   948.1180   \n",
      "\n",
      "       pre_close    change  pct_chg           vol        amount  \n",
-      "0      5631.8249  108.5089   1.9267  1.627736e+08  2.170600e+08  \n",
-      "1      5604.9057   26.9192   0.4803  1.383866e+08  1.816166e+08  \n",
-      "2      5598.2951    6.6106   0.1181  1.267429e+08  1.580330e+08  \n",
-      "3      5627.1804  -28.8853  -0.5133  1.362181e+08  1.676163e+08  \n",
-      "4      5605.8796   21.3008   0.3800  1.400008e+08  1.719338e+08  \n",
+      "0      5773.8056  -52.0831  -0.9021  1.239390e+08  1.781623e+08  \n",
+      "1      5750.2911   23.5145   0.4089  1.361403e+08  1.870326e+08  \n",
+      "2      5740.3338    9.9573   0.1735  1.710118e+08  2.275662e+08  \n",
+      "3      5631.8249  108.5089   1.9267  1.627736e+08  2.170600e+08  \n",
+      "4      5604.9057   26.9192   0.4803  1.383866e+08  1.816166e+08  \n",
      "...          ...       ...      ...           ...           ...  \n",
-      "13492  1027.6810   41.7870   4.0661  2.655275e+06  9.106095e+06  \n",
-      "13493   998.3940   29.2870   2.9334  1.500295e+06  5.269441e+06  \n",
-      "13494   997.1190    1.2750   0.1279  1.616805e+06  6.240835e+06  \n",
-      "13495   973.2330   23.8860   2.4543  1.074628e+06  4.001206e+06  \n",
-      "13496  1000.0000  -26.7670  -2.6767  1.356285e+06  4.924177e+06  \n",
+      "13501  1027.6810   41.7870   4.0661  2.655275e+06  9.106095e+06  \n",
+      "13502   998.3940   29.2870   2.9334  1.500295e+06  5.269441e+06  \n",
+      "13503   997.1190    1.2750   0.1279  1.616805e+06  6.240835e+06  \n",
+      "13504   973.2330   23.8860   2.4543  1.074628e+06  4.001206e+06  \n",
+      "13505  1000.0000  -26.7670  -2.6767  1.356285e+06  4.924177e+06  \n",
      "\n",
-      "[13497 rows x 11 columns]\n"
+      "[13506 rows x 11 columns]\n"
     ]
    }
   ],
--- a/main/data/update/cyq-perf.ipynb
+++ b/main/data/update/cyq-perf.ipynb
@@ -39,15 +39,15 @@
      "3     000006.SZ   20250312\n",
      "4     000007.SZ   20250312\n",
      "...         ...        ...\n",
-      "5381  920445.BJ   20250506\n",
-      "5382  920489.BJ   20250506\n",
-      "5383  920682.BJ   20250506\n",
-      "5384  920799.BJ   20250506\n",
-      "5385  920819.BJ   20250506\n",
+      "5384  920445.BJ   20250508\n",
+      "5385  920489.BJ   20250508\n",
+      "5386  920682.BJ   20250508\n",
+      "5387  920799.BJ   20250508\n",
+      "5388  920819.BJ   20250508\n",
      "\n",
-      "[7654317 rows x 2 columns]\n",
-      "20250506\n",
-      "start_date: 20250507\n"
+      "[7665071 rows x 2 columns]\n",
+      "20250508\n",
+      "start_date: 20250509\n"
     ]
    }
   ],
@@ -88,8 +88,8 @@
     "text": [
      "任务 20250619 完成\n",
      "任务 20250620 完成\n",
-      "任务 20250617 完成\n",
      "任务 20250618 完成\n",
+      "任务 20250617 完成\n",
      "任务 20250616 完成\n",
      "任务 20250613 完成\n",
      "任务 20250612 完成\n",
@@ -104,20 +104,18 @@
      "任务 20250529 完成\n",
      "任务 20250528 完成\n",
      "任务 20250527 完成\n",
-      "任务 20250526 完成\n",
      "任务 20250523 完成\n",
-      "任务 20250522 完成\n",
+      "任务 20250526 完成\n",
      "任务 20250521 完成\n",
-      "任务 20250520 完成\n",
+      "任务 20250522 完成\n",
      "任务 20250519 完成\n",
+      "任务 20250520 完成\n",
      "任务 20250516 完成\n",
      "任务 20250515 完成\n",
      "任务 20250514 完成\n",
      "任务 20250513 完成\n",
      "任务 20250512 完成\n",
-      "任务 20250509 完成\n",
-      "任务 20250508 完成\n",
-      "任务 20250507 完成\n"
+      "任务 20250509 完成\n"
     ]
    }
   ],
--- a/main/data/update/sw_daily.ipynb
+++ b/main/data/update/sw_daily.ipynb
@@ -39,15 +39,15 @@
      "3    801005.SI   20250221\n",
      "4    801010.SI   20250221\n",
      "..         ...        ...\n",
-      "434  859811.SI   20250506\n",
-      "435  859821.SI   20250506\n",
-      "436  859822.SI   20250506\n",
-      "437  859852.SI   20250506\n",
-      "438  859951.SI   20250506\n",
+      "434  859811.SI   20250508\n",
+      "435  859821.SI   20250508\n",
+      "436  859822.SI   20250508\n",
+      "437  859852.SI   20250508\n",
+      "438  859951.SI   20250508\n",
      "\n",
-      "[1065465 rows x 2 columns]\n",
-      "20250506\n",
-      "start_date: 20250507\n"
+      "[1066343 rows x 2 columns]\n",
+      "20250508\n",
+      "start_date: 20250509\n"
     ]
    }
   ],
@@ -88,8 +88,8 @@
     "text": [
      "任务 20250619 完成\n",
      "任务 20250620 完成\n",
-      "任务 20250618 完成\n",
      "任务 20250617 完成\n",
+      "任务 20250618 完成\n",
      "任务 20250616 完成\n",
      "任务 20250613 完成\n",
      "任务 20250612 完成\n",
@@ -101,23 +101,21 @@
      "任务 20250604 完成\n",
      "任务 20250603 完成\n",
      "任务 20250530 完成\n",
-      "任务 20250528 完成\n",
      "任务 20250529 完成\n",
-      "任务 20250526 完成\n",
+      "任务 20250528 完成\n",
      "任务 20250527 完成\n",
+      "任务 20250526 完成\n",
      "任务 20250523 完成\n",
      "任务 20250522 完成\n",
      "任务 20250521 完成\n",
      "任务 20250520 完成\n",
      "任务 20250519 完成\n",
+      "任务 20250515 完成\n",
      "任务 20250516 完成\n",
      "任务 20250514 完成\n",
-      "任务 20250515 完成\n",
      "任务 20250513 完成\n",
      "任务 20250512 完成\n",
-      "任务 20250509 完成\n",
-      "任务 20250508 完成\n",
-      "任务 20250507 完成\n"
+      "任务 20250509 完成\n"
     ]
    }
   ],
--- a/main/data/update/update_daily_basic.ipynb
+++ b/main/data/update/update_daily_basic.ipynb
@@ -19,7 +19,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
   "id": "14671a7f72de2564",
   "metadata": {
    "ExecuteTime": {
@@ -80,7 +80,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
   "id": "e7f8cce2f80e2f20",
   "metadata": {
    "ExecuteTime": {
@@ -94,17 +94,17 @@
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
-      "Index: 8599138 entries, 0 to 8599137\n",
+      "Index: 8615301 entries, 0 to 5388\n",
      "Data columns (total 2 columns):\n",
      " #   Column      Dtype \n",
      "---  ------      ----- \n",
      " 0   ts_code     object\n",
      " 1   trade_date  object\n",
      "dtypes: object(2)\n",
-      "memory usage: 196.8+ MB\n",
+      "memory usage: 197.2+ MB\n",
      "None\n",
-      "20250430\n",
-      "20250506\n"
+      "20250508\n",
+      "20250509\n"
     ]
    }
   ],
@@ -130,7 +130,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
   "id": "553cfb36-f560-4cc4-b2bc-68323ccc5072",
   "metadata": {
    "ExecuteTime": {
@@ -148,8 +148,8 @@
      "任务 20250717 完成\n",
      "任务 20250716 完成\n",
      "任务 20250715 完成\n",
-      "任务 20250714 完成\n",
      "任务 20250711 完成\n",
+      "任务 20250714 完成\n",
      "任务 20250710 完成\n",
      "任务 20250709 完成\n",
      "任务 20250708 完成\n",
@@ -178,14 +178,14 @@
      "任务 20250605 完成\n",
      "任务 20250604 完成\n",
      "任务 20250603 完成\n",
-      "任务 20250529 完成\n",
      "任务 20250530 完成\n",
-      "任务 20250527 完成\n",
+      "任务 20250529 完成\n",
      "任务 20250528 完成\n",
+      "任务 20250527 完成\n",
      "任务 20250526 完成\n",
      "任务 20250523 完成\n",
-      "任务 20250521 完成\n",
      "任务 20250522 完成\n",
+      "任务 20250521 完成\n",
      "任务 20250520 完成\n",
      "任务 20250519 完成\n",
      "任务 20250516 完成\n",
@@ -193,10 +193,7 @@
      "任务 20250514 完成\n",
      "任务 20250513 完成\n",
      "任务 20250512 完成\n",
-      "任务 20250509 完成\n",
-      "任务 20250508 完成\n",
-      "任务 20250507 完成\n",
-      "任务 20250506 完成\n"
+      "任务 20250509 完成\n"
     ]
    }
   ],
@@ -253,7 +250,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
   "id": "919023c693d7a47a",
   "metadata": {
    "ExecuteTime": {
@@ -266,59 +263,59 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "         ts_code trade_date  close  turnover_rate  turnover_rate_f  \\\n",
-      "0      301261.SZ   20250507  97.25        15.5042          19.6511   \n",
-      "1      002643.SZ   20250507  11.12         1.3481           2.3303   \n",
-      "2      001211.SZ   20250507  22.11         3.5506           6.1239   \n",
-      "3      002466.SZ   20250507  28.98         1.0588           1.5771   \n",
-      "4      603005.SH   20250507  29.32         5.1961           6.1690   \n",
-      "...          ...        ...    ...            ...              ...   \n",
-      "10769  000551.SZ   20250506  12.39         2.0213           3.1432   \n",
-      "10770  600792.SH   20250506   3.17         0.8036           2.3531   \n",
-      "10771  300176.SZ   20250506   6.62         1.7530           2.5325   \n",
-      "10772  000016.SZ   20250506   5.57        13.9545          20.7669   \n",
-      "10773  300339.SZ   20250506  56.53        11.3184          11.9579   \n",
+      "        ts_code trade_date  close  turnover_rate  turnover_rate_f  \\\n",
+      "0     300575.SZ   20250509   6.05         1.9284           2.1880   \n",
+      "1     300247.SZ   20250509   3.77         2.1735           2.5437   \n",
+      "2     603038.SH   20250509  15.80        17.5702          32.3972   \n",
+      "3     002030.SZ   20250509   5.82         0.8252           1.2070   \n",
+      "4     600157.SH   20250509   1.36         0.8369           1.0222   \n",
+      "...         ...        ...    ...            ...              ...   \n",
+      "5384  600841.SH   20250509   5.57         1.0271           3.2670   \n",
+      "5385  300968.SZ   20250509  14.76         1.2857           2.7636   \n",
+      "5386  300634.SZ   20250509  25.79         5.2551           9.4581   \n",
+      "5387  300295.SZ   20250509  15.73         3.0347           3.2458   \n",
+      "5388  688370.SH   20250509  19.15         1.2008           1.2008   \n",
      "\n",
-      "       volume_ratio        pe    pe_ttm       pb       ps   ps_ttm  dv_ratio  \\\n",
-      "0              0.84  122.6810  146.2352   5.5730   8.2774   8.3189    0.4627   \n",
-      "1              0.79   41.9902   45.3885   1.4569   2.8000   2.8594    2.6982   \n",
-      "2              0.83   56.0080   58.9563   1.8078   1.1637   1.1399    0.0000   \n",
-      "3              0.92       NaN       NaN   1.1380   3.6409   3.6410    4.6569   \n",
-      "4              1.35   75.6520   71.1174   4.4020  16.9225  16.2060    0.1570   \n",
-      "...             ...       ...       ...      ...      ...      ...       ...   \n",
-      "10769          1.20   19.9692   18.7030   1.8602   1.1939   1.1927    0.5650   \n",
-      "10770          0.89       NaN       NaN   1.1995   0.5271   0.5777    2.1767   \n",
-      "10771          1.12   92.1443   96.5538   2.7208   1.4839   1.4627    0.0000   \n",
-      "10772          3.66       NaN       NaN   5.6643   1.2067   1.1979    0.0000   \n",
-      "10773          2.40  279.4392  270.1037  12.8967  13.2445  13.0061    0.0000   \n",
+      "      volume_ratio        pe    pe_ttm      pb       ps   ps_ttm  dv_ratio  \\\n",
+      "0             0.71  239.8914       NaN  1.3451   1.1608   1.1259    1.9835   \n",
+      "1             0.96   64.6952   53.1680  2.7649   4.4008   3.9673    0.0000   \n",
+      "2             4.47  183.7603  154.4297  3.1047   4.0259   3.7692    0.2434   \n",
+      "3             0.62       NaN       NaN  1.0296   9.5754   9.9145    0.2577   \n",
+      "4             0.55   19.3625   26.3896  0.6394   1.0656   1.1327    0.4044   \n",
+      "...            ...       ...       ...     ...      ...      ...       ...   \n",
+      "5384          0.77       NaN       NaN  2.3362   1.1952   1.2860    0.0000   \n",
+      "5385          0.71  115.0812  181.8721  3.2254   4.9990   5.1146    0.3388   \n",
+      "5386          1.01   50.5639   52.9222  4.1166   7.0433   6.7806    0.8063   \n",
+      "5387          0.65       NaN       NaN  2.6398  24.2982  28.1758    0.0000   \n",
+      "5388          1.25   29.1668   36.1111  0.9812   4.4106   4.4983       NaN   \n",
      "\n",
-      "       dv_ttm  total_share  float_share   free_share      total_mv  \\\n",
-      "0      0.4627    8789.0196    3748.3321    2957.3203  8.547322e+05   \n",
-      "1      2.6982   92996.9005   90932.5570   52604.5851  1.034126e+06   \n",
-      "2         NaN    7200.0000    6699.6575    3884.4502  1.591920e+05   \n",
-      "3      4.6569  164122.1583  147584.5634   99084.9325  4.756260e+06   \n",
-      "4      0.1570   65217.1706   65217.1706   54932.1940  1.912167e+06   \n",
-      "...       ...          ...          ...          ...           ...   \n",
-      "10769  0.5650   40394.4205   40263.2044   25893.0990  5.004869e+05   \n",
-      "10770  2.1767  110992.3600  105986.8113   36194.3684  3.518458e+05   \n",
-      "10771     NaN   38728.0800   38728.0800   26808.2764  2.563799e+05   \n",
-      "10772     NaN  240794.5408  159659.3800  107284.6868  1.341226e+06   \n",
-      "10773     NaN   79641.0841   77768.6667   73609.4256  4.502110e+06   \n",
+      "      dv_ttm   total_share   float_share    free_share      total_mv  \\\n",
+      "0     1.9835  4.647564e+04  3.427082e+04  3.020469e+04  2.811776e+05   \n",
+      "1        NaN  8.040403e+04  8.032753e+04  6.863630e+04  3.031232e+05   \n",
+      "2     0.2434  2.686771e+04  2.686771e+04  1.457134e+04  4.245098e+05   \n",
+      "3     0.2577  1.403446e+05  1.403446e+05  9.595371e+04  8.168056e+05   \n",
+      "4     0.4044  2.221776e+06  2.221776e+06  1.819047e+06  3.021616e+06   \n",
+      "...      ...           ...           ...           ...           ...   \n",
+      "5384     NaN  1.387822e+05  1.043024e+05  3.279094e+04  7.730167e+05   \n",
+      "5385  0.3388  4.133800e+04  4.133800e+04  1.923185e+04  6.101489e+05   \n",
+      "5386  0.8063  4.512109e+04  4.346809e+04  2.415175e+04  1.163673e+06   \n",
+      "5387     NaN  1.896137e+04  1.675486e+04  1.566518e+04  2.982624e+05   \n",
+      "5388     NaN  1.371079e+04  4.374912e+03  4.374912e+03  2.625616e+05   \n",
      "\n",
-      "            circ_mv  is_st  \n",
-      "0      3.645253e+05  False  \n",
-      "1      1.011170e+06  False  \n",
-      "2      1.481294e+05  False  \n",
-      "3      4.277001e+06  False  \n",
-      "4      1.912167e+06  False  \n",
-      "...             ...    ...  \n",
-      "10769  4.988611e+05  False  \n",
-      "10770  3.359782e+05  False  \n",
-      "10771  2.563799e+05  False  \n",
-      "10772  8.893027e+05  False  \n",
-      "10773  4.396263e+06  False  \n",
+      "           circ_mv  is_st  \n",
+      "0     2.073385e+05  False  \n",
+      "1     3.028348e+05  False  \n",
+      "2     4.245098e+05  False  \n",
+      "3     8.168056e+05  False  \n",
+      "4     3.021616e+06  False  \n",
+      "...            ...    ...  \n",
+      "5384  5.809646e+05  False  \n",
+      "5385  6.101489e+05  False  \n",
+      "5386  1.121042e+06  False  \n",
+      "5387  2.635540e+05  False  \n",
+      "5388  8.377956e+04  False  \n",
      "\n",
-      "[10774 rows x 19 columns]\n"
+      "[5389 rows x 19 columns]\n"
     ]
    }
   ],
@@ -329,7 +326,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
   "id": "28cb78d032671b20",
   "metadata": {
    "ExecuteTime": {
@@ -342,59 +339,59 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "         ts_code trade_date  close  turnover_rate  turnover_rate_f  \\\n",
-      "8      300147.SZ   20250507   6.58         5.3209           6.8857   \n",
-      "19     002501.SZ   20250507   2.10         2.8874           3.7273   \n",
-      "52     600238.SH   20250507   4.55        11.2843          13.8699   \n",
-      "63     300391.SZ   20250507   5.58         5.5505           7.0395   \n",
-      "73     600421.SH   20250507   4.99         2.8571           6.1511   \n",
-      "...          ...        ...    ...            ...              ...   \n",
-      "10647  600243.SH   20250506   2.43         6.7484           8.1172   \n",
-      "10652  002528.SZ   20250506   2.35         2.0592           4.3961   \n",
-      "10682  300044.SZ   20250506   3.31        12.8866          13.4490   \n",
-      "10712  300097.SZ   20250506   4.36         2.5814           3.0107   \n",
-      "10733  600200.SH   20250506   3.04         0.2013           0.2433   \n",
+      "        ts_code trade_date  close  turnover_rate  turnover_rate_f  \\\n",
+      "54    002496.SZ   20250509   1.43         3.1262           3.2341   \n",
+      "148   603828.SH   20250509   5.04         3.5674           7.1692   \n",
+      "166   600599.SH   20250509   7.70        10.8623          27.2882   \n",
+      "193   000820.SZ   20250509   2.16         5.5698           5.7239   \n",
+      "203   300506.SZ   20250509   3.28         0.6710           0.9449   \n",
+      "...         ...        ...    ...            ...              ...   \n",
+      "5204  002602.SZ   20250509   8.00         1.3867           1.7044   \n",
+      "5253  300147.SZ   20250509   7.37         7.2159           9.3379   \n",
+      "5264  002501.SZ   20250509   2.08         2.4301           3.1371   \n",
+      "5317  600421.SH   20250509   5.27         2.7391           5.8971   \n",
+      "5345  600289.SH   20250509   5.78         1.3847           2.0115   \n",
      "\n",
-      "       volume_ratio       pe  pe_ttm        pb       ps   ps_ttm  dv_ratio  \\\n",
-      "8              1.62      NaN     NaN    4.4991   2.3410   2.5434       0.0   \n",
-      "19             1.28      NaN     NaN   22.7988  22.3498  26.2757       0.0   \n",
-      "52             2.57      NaN     NaN   20.0224  11.6394  12.3461       0.0   \n",
-      "63             1.35      NaN     NaN       NaN  17.5129  12.5138       0.0   \n",
-      "73             0.80      NaN     NaN  135.5854   8.3301   8.4697       0.0   \n",
-      "...             ...      ...     ...       ...      ...      ...       ...   \n",
-      "10647          0.73      NaN     NaN    1.6685   4.5071   4.6210       0.0   \n",
-      "10652          1.52      NaN     NaN   15.5269   2.9812   3.6083       0.0   \n",
-      "10682          2.91      NaN     NaN   24.3171  17.6463  26.1361       0.0   \n",
-      "10712          0.99      NaN     NaN    2.7137   3.2758   3.8102       0.0   \n",
-      "10733          0.05  30.7156     NaN    1.2351   1.3543   1.7858       0.0   \n",
+      "      volume_ratio        pe     pe_ttm        pb       ps   ps_ttm  dv_ratio  \\\n",
+      "54            0.73       NaN        NaN    1.6044   7.6992   7.2633       0.0   \n",
+      "148           1.65  349.9490  1691.0271    3.9734   1.2211   1.3170       0.0   \n",
+      "166           4.51       NaN        NaN   11.5933   3.9468   4.0472       0.0   \n",
+      "193           1.00       NaN        NaN    9.5443  11.2714  14.3393       0.0   \n",
+      "203           0.87       NaN        NaN   28.5909  19.5183  19.3088       0.0   \n",
+      "...            ...       ...        ...       ...      ...      ...       ...   \n",
+      "5204          0.78   49.1432    31.1887    2.2169   2.6358   2.2496       0.0   \n",
+      "5253          1.74       NaN        NaN    5.0393   2.6221   2.8487       0.0   \n",
+      "5264          0.87       NaN        NaN   22.5816  22.1370  26.0255       0.0   \n",
+      "5317          0.74       NaN        NaN  143.1934   8.7976   8.9449       0.0   \n",
+      "5345          0.55       NaN        NaN    2.9752  11.3890  11.6628       0.0   \n",
      "\n",
-      "       dv_ttm  total_share  float_share   free_share     total_mv  \\\n",
-      "8         NaN   66127.9045   65745.9042   50804.9121  435121.6116   \n",
-      "19        NaN  355000.0000  354999.9006  274999.9006  745500.0000   \n",
-      "52        NaN   44820.0000   44500.1580   36204.3908  203931.0000   \n",
-      "63        NaN   35033.6112   35033.6112   27623.1259  195487.5505   \n",
-      "73        NaN   19560.0000   19560.0000    9085.2748   97604.4000   \n",
-      "...       ...          ...          ...          ...          ...   \n",
-      "10647     NaN   43885.0000   43885.0000   36485.0000  106640.5500   \n",
-      "10652     NaN  119867.5082  104974.0608   49171.2582  281688.6443   \n",
-      "10682     NaN   76386.9228   76375.7508   73182.1277  252840.7145   \n",
-      "10712     NaN   28854.9669   27000.9948   23150.5534  125807.6557   \n",
-      "10733     NaN   71215.1832   71087.9480   58808.3718  216494.1569   \n",
+      "      dv_ttm  total_share  float_share   free_share      total_mv  \\\n",
+      "54       NaN  150758.9677  118138.6559  114196.4999  2.155853e+05   \n",
+      "148      NaN   59596.0158   59593.9625   29654.2988  3.003639e+05   \n",
+      "166      NaN   16600.0000   16600.0000    6607.7948  1.278200e+05   \n",
+      "193      NaN   64362.0201   29403.1899   28611.4718  1.390220e+05   \n",
+      "203      NaN   69559.6569   57572.5450   40880.9749  2.281557e+05   \n",
+      "...      ...          ...          ...          ...           ...   \n",
+      "5204     NaN  745255.6968  687870.8273  559649.7754  5.962046e+06   \n",
+      "5253     NaN   66127.9045   65745.9042   50804.9121  4.873627e+05   \n",
+      "5264     NaN  355000.0000  354999.9006  274999.9006  7.384000e+05   \n",
+      "5317     NaN   19560.0000   19560.0000    9085.2748  1.030812e+05   \n",
+      "5345     NaN   63105.2069   56592.2684   38956.2787  3.647481e+05   \n",
      "\n",
      "           circ_mv  is_st  \n",
-      "8      432608.0496   True  \n",
-      "19     745499.7913   True  \n",
-      "52     202475.7189   True  \n",
-      "63     195487.5505   True  \n",
-      "73      97604.4000   True  \n",
+      "54    1.689383e+05   True  \n",
+      "148   3.003536e+05   True  \n",
+      "166   1.278200e+05   True  \n",
+      "193   6.351089e+04   True  \n",
+      "203   1.888379e+05   True  \n",
      "...            ...    ...  \n",
-      "10647  106640.5500   True  \n",
-      "10652  246689.0429   True  \n",
-      "10682  252803.7351   True  \n",
-      "10712  117724.3373   True  \n",
-      "10733  216107.3619   True  \n",
+      "5204  5.502967e+06   True  \n",
+      "5253  4.845473e+05   True  \n",
+      "5264  7.383998e+05   True  \n",
+      "5317  1.030812e+05   True  \n",
+      "5345  3.271033e+05   True  \n",
      "\n",
-      "[394 rows x 19 columns]\n"
+      "[197 rows x 19 columns]\n"
     ]
    }
   ],
@@ -404,7 +401,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 7,
   "id": "692b58674b7462c9",
   "metadata": {
    "ExecuteTime": {
@@ -430,7 +427,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 8,
   "id": "d7a773fc20293477",
   "metadata": {
    "ExecuteTime": {
@@ -444,7 +441,7 @@
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
-      "Index: 8609912 entries, 0 to 10773\n",
+      "Index: 8620690 entries, 0 to 5388\n",
      "Data columns (total 3 columns):\n",
      " #   Column      Dtype \n",
      "---  ------      ----- \n",
@@ -452,7 +449,7 @@
      " 1   trade_date  object\n",
      " 2   is_st       bool  \n",
      "dtypes: bool(1), object(2)\n",
-      "memory usage: 205.3+ MB\n",
+      "memory usage: 205.5+ MB\n",
      "None\n"
     ]
    }
--- a/main/data/update/update_daily_data.ipynb
+++ b/main/data/update/update_daily_data.ipynb
--- a/main/data/update/update_money_flow.ipynb
+++ b/main/data/update/update_money_flow.ipynb
@@ -34,17 +34,17 @@
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
-      "Index: 8440821 entries, 0 to 5120\n",
+      "Index: 8451068 entries, 0 to 5123\n",
      "Data columns (total 2 columns):\n",
      " #   Column      Dtype \n",
      "---  ------      ----- \n",
      " 0   ts_code     object\n",
      " 1   trade_date  object\n",
      "dtypes: object(2)\n",
-      "memory usage: 193.2+ MB\n",
+      "memory usage: 193.4+ MB\n",
      "None\n",
-      "20250506\n",
-      "start_date: 20250507\n"
+      "20250508\n",
+      "start_date: 20250509\n"
     ]
    }
   ],
@@ -84,30 +84,30 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "任务 20250718 完成\n",
      "任务 20250717 完成\n",
-      "任务 20250716 完成\n",
+      "任务 20250718 完成\n",
      "任务 20250715 完成\n",
+      "任务 20250716 完成\n",
      "任务 20250714 完成\n",
      "任务 20250711 完成\n",
-      "任务 20250709 完成\n",
      "任务 20250710 完成\n",
+      "任务 20250709 完成\n",
      "任务 20250708 完成\n",
      "任务 20250707 完成\n",
-      "任务 20250703 完成\n",
      "任务 20250704 完成\n",
+      "任务 20250703 完成\n",
      "任务 20250702 完成\n",
      "任务 20250701 完成\n",
      "任务 20250630 完成\n",
      "任务 20250627 完成\n",
-      "任务 20250626 完成\n",
      "任务 20250625 完成\n",
+      "任务 20250626 完成\n",
      "任务 20250624 完成\n",
      "任务 20250623 完成\n",
      "任务 20250620 完成\n",
      "任务 20250619 完成\n",
-      "任务 20250617 完成\n",
      "任务 20250618 完成\n",
+      "任务 20250617 完成\n",
      "任务 20250616 完成\n",
      "任务 20250613 完成\n",
      "任务 20250612 完成\n",
@@ -126,16 +126,14 @@
      "任务 20250523 完成\n",
      "任务 20250522 完成\n",
      "任务 20250521 完成\n",
-      "任务 20250519 完成\n",
      "任务 20250520 完成\n",
+      "任务 20250519 完成\n",
      "任务 20250516 完成\n",
      "任务 20250515 完成\n",
      "任务 20250514 完成\n",
      "任务 20250513 完成\n",
      "任务 20250512 完成\n",
-      "任务 20250509 完成\n",
-      "任务 20250508 完成\n",
-      "任务 20250507 完成\n"
+      "任务 20250509 完成\n"
     ]
    }
   ],
--- a/main/data/update/update_stk_limit.ipynb
+++ b/main/data/update/update_stk_limit.ipynb
@@ -34,23 +34,23 @@
     "output_type": "stream",
     "text": [
      "        ts_code trade_date\n",
-      "4745  600276.SH   20250506\n",
-      "4746  600278.SH   20250506\n",
-      "4747  600279.SH   20250506\n",
-      "4736  600262.SH   20250506\n",
-      "281   000791.SZ   20250506\n",
+      "2364  300067.SZ   20250508\n",
+      "2363  300066.SZ   20250508\n",
+      "2362  300065.SZ   20250508\n",
+      "2373  300076.SZ   20250508\n",
+      "7111  920819.BJ   20250508\n",
      "<class 'pandas.core.frame.DataFrame'>\n",
-      "Index: 10436295 entries, 0 to 113592\n",
+      "Index: 10450519 entries, 0 to 7111\n",
      "Data columns (total 2 columns):\n",
      " #   Column      Dtype \n",
      "---  ------      ----- \n",
      " 0   ts_code     object\n",
      " 1   trade_date  object\n",
      "dtypes: object(2)\n",
-      "memory usage: 238.9+ MB\n",
+      "memory usage: 239.2+ MB\n",
      "None\n",
-      "20250506\n",
-      "20250507\n"
+      "20250508\n",
+      "20250509\n"
     ]
    }
   ],
@@ -123,11 +123,11 @@
      "任务 20250609 完成\n",
      "任务 20250606 完成\n",
      "任务 20250605 完成\n",
-      "任务 20250603 完成\n",
      "任务 20250604 完成\n",
-      "任务 20250529 完成\n",
+      "任务 20250603 完成\n",
      "任务 20250530 完成\n",
      "任务 20250528 完成\n",
+      "任务 20250529 完成\n",
      "任务 20250527 完成\n",
      "任务 20250526 完成\n",
      "任务 20250523 完成\n",
@@ -140,9 +140,7 @@
      "任务 20250514 完成\n",
      "任务 20250513 完成\n",
      "任务 20250512 完成\n",
-      "任务 20250509 完成\n",
-      "任务 20250508 完成\n",
-      "任务 20250507 完成\n"
+      "任务 20250509 完成\n"
     ]
    }
   ],
@@ -194,19 +192,19 @@
     "output_type": "stream",
     "text": [
      "[     trade_date    ts_code  up_limit  down_limit\n",
-      "0      20250507  000001.SZ     12.06        9.86\n",
-      "1      20250507  000002.SZ      7.51        6.15\n",
-      "2      20250507  000004.SZ      7.95        7.19\n",
-      "3      20250507  000006.SZ      7.11        5.81\n",
-      "4      20250507  000007.SZ      7.50        6.14\n",
+      "0      20250509  000001.SZ     12.19        9.97\n",
+      "1      20250509  000002.SZ      7.57        6.19\n",
+      "2      20250509  000004.SZ      7.86        7.12\n",
+      "3      20250509  000006.SZ      7.33        5.99\n",
+      "4      20250509  000007.SZ      7.66        6.26\n",
      "...         ...        ...       ...         ...\n",
-      "7107   20250507  920445.BJ     13.42        7.24\n",
-      "7108   20250507  920489.BJ     31.69       17.07\n",
-      "7109   20250507  920682.BJ     16.41        8.85\n",
-      "7110   20250507  920799.BJ     78.58       42.32\n",
-      "7111   20250507  920819.BJ      5.82        3.14\n",
+      "7109   20250509  920445.BJ     13.14        7.08\n",
+      "7110   20250509  920489.BJ     31.70       17.08\n",
+      "7111   20250509  920682.BJ     16.17        8.71\n",
+      "7112   20250509  920799.BJ     78.39       42.21\n",
+      "7113   20250509  920819.BJ      5.74        3.10\n",
      "\n",
-      "[7112 rows x 4 columns]]\n"
+      "[7114 rows x 4 columns]]\n"
     ]
    }
   ],
--- a/main/factor/pycache/factor.cpython-311.pyc
+++ b/main/factor/pycache/factor.cpython-311.pyc
--- a/main/factor/factor.py
+++ b/main/factor/factor.py
@@ -2547,3 +2547,305 @@ def limit_factor(df: pd.DataFrame) -> pd.DataFrame:
        lambda x: calculate_consecutive_limits(x)[0]
    )
    return df
+
+
+import pandas as pd
+import numpy as np
+
+# 假设 df 已经加载并包含 'ts_code', 'trade_date', 'pct_chg' 列
+# 并且已经按照 'ts_code' 和 'trade_date' 进行了排序
+
+def daily_momentum_benchmark(df):
+    """
+    计算日级别动量基准 (Positive and Negative)，使用现有的 'pct_chg' 列。
+
+    这个函数将原分钟级动量基准的概念应用于日线数据。
+    计算每日全市场上涨股票 ('pct_chg' > 0) 的平均涨跌幅
+    和下跌股票 ('pct_chg' < 0) 的平均涨跌幅。
+
+    参数:
+    df (pd.DataFrame): 包含日级别股票数据的DataFrame。
+                       必须包含 'ts_code', 'trade_date', 'pct_chg' 列，
+                       并已按 'ts_code' 和 'trade_date' 排序。
+
+    返回:
+    pd.DataFrame: 增加了 'daily_positive_benchmark', 'daily_negative_benchmark' 列的DataFrame。
+                  原始的 'pct_chg' 列会被直接使用。
+    """
+    print("--- 计算日级别动量基准 (使用 pct_chg) ---")
+
+    # 确保 pct_chg 列存在
+    if 'pct_chg' not in df.columns:
+        print("错误: DataFrame中没有'pct_chg'列，无法计算日级别动量基准。")
+        return df
+
+    # 计算每日的全市场动量基准
+    # 对于每一个交易日，计算所有股票中 pct_chg > 0 和 < 0 的平均值
+    # 使用 trade_date 进行分组
+    daily_benchmarks = df.groupby('trade_date')['pct_chg'].agg(
+        daily_positive_benchmark = lambda x: x[x > 0].mean(), # 日级别上涨股票的平均涨跌幅
+        daily_negative_benchmark = lambda x: x[x < 0].mean()  # 日级别下跌股票的平均涨跌幅
+    ).reset_index()
+
+    # 将日级别动量基准合并回原始日线数据DataFrame
+    df = pd.merge(
+        df,
+        daily_benchmarks,
+        on='trade_date',
+        how='left'
+    )
+
+    # 对可能出现的NaN基准进行填充，这里用0填充表示没有对应的同向基准
+    df['daily_positive_benchmark'].fillna(0, inplace=True)
+    df['daily_negative_benchmark'].fillna(0, inplace=True)
+
+
+    print("日级别动量基准计算完成 (使用 pct_chg)。")
+    return df
+
+def daily_deviation(df):
+    """
+    计算日级别偏离度，使用现有的 'pct_chg' 列和计算出的日级别动量基准。
+
+    计算每只股票的日涨跌幅 ('pct_chg') 相对于日级别动量基准的偏离。
+
+    参数:
+    df (pd.DataFrame): 包含日级别股票数据的DataFrame。
+                       必须包含 'ts_code', 'trade_date', 'pct_chg',
+                       'daily_positive_benchmark', 'daily_negative_benchmark' 列。
+                       这些基准列通常通过运行 daily_momentum_benchmark(df) 获得。
+
+    返回:
+    pd.DataFrame: 增加了 'daily_deviation' 列的DataFrame。
+    """
+    print("--- 计算日级别偏离度 (使用 pct_chg) ---")
+
+    # 确保所需的列存在
+    df = daily_momentum_benchmark(df)
+    required_cols = ['pct_chg', 'daily_positive_benchmark', 'daily_negative_benchmark']
+    if not all(col in df.columns for col in required_cols):
+        print(f"错误: 计算日级别偏离度需要以下列: {required_cols}。请先运行 daily_momentum_benchmark(df)。")
+        return df
+
+    conditions = [
+        (df['pct_chg'] > 0) & (df['daily_positive_benchmark'] > 0),
+        (df['pct_chg'] < 0) & (df['daily_negative_benchmark'] < 0)
+    ]
+    choices = [
+        df['pct_chg'] - df['daily_positive_benchmark'],
+        df['pct_chg'] - df['daily_negative_benchmark']
+    ]
+    df['daily_deviation'] = np.select(conditions, choices, default=0)
+
+    df = df.drop(columns=['daily_positive_benchmark', 'daily_negative_benchmark'])
+    print("日级别偏离度计算完成 (使用 pct_chg)。")
+    return df
+
+
+def daily_industry_momentum_benchmark(df):
+    """
+    计算日级别行业动量基准 (Positive and Negative)，使用现有的 'pct_chg' 列和 'cat_l2_code' 列。
+
+    计算每日每个行业内部上涨股票 ('pct_chg' > 0) 的平均涨跌幅
+    和下跌股票 ('pct_chg' < 0) 的平均涨跌幅。
+
+    参数:
+    df (pd.DataFrame): 包含日级别股票数据的DataFrame。
+                       必须包含 'ts_code', 'trade_date', 'pct_chg', 'cat_l2_code' 列，
+                       并已按 'ts_code' 和 'trade_date' 排序。
+
+    返回:
+    pd.DataFrame: 增加了 'daily_industry_positive_benchmark', 'daily_industry_negative_benchmark' 列的DataFrame。
+                  原始的 'pct_chg' 和 'cat_l2_code' 列会被直接使用。
+    """
+    print("--- 计算日级别行业动量基准 (使用 pct_chg 和 cat_l2_code) ---")
+
+    # 确保必需列存在
+    required_cols = ['pct_chg', 'cat_l2_code', 'trade_date', 'ts_code']
+    if not all(col in df.columns for col in required_cols):
+        print(f"错误: 计算日级别行业动量基准需要以下列: {required_cols}。")
+        return df
+
+    # 计算每日每个行业内部的动量基准
+    # 使用 trade_date 和 cat_l2_code 进行分组
+    industry_daily_benchmarks = df.groupby(['trade_date', 'cat_l2_code'])['pct_chg'].agg(
+        daily_industry_positive_benchmark = lambda x: x[x > 0].mean(), # 日级别行业内上涨股票的平均涨跌幅
+        daily_industry_negative_benchmark = lambda x: x[x < 0].mean()  # 日级别行业内下跌股票的平均涨跌幅
+    ).reset_index()
+
+    # 将日级别行业动量基准合并回原始日线数据DataFrame
+    # 使用 trade_date 和 cat_l2_code 进行 merge
+    df = pd.merge(
+        df,
+        industry_daily_benchmarks,
+        on=['trade_date', 'cat_l2_code'],
+        how='left'
+    )
+
+    # 对可能出现的NaN基准进行填充（例如某个行业某一天没有上涨或下跌的股票）
+    # 这里用0填充表示该行业该天没有对应的同向基准
+    df['daily_industry_positive_benchmark'].fillna(0, inplace=True)
+    df['daily_industry_negative_benchmark'].fillna(0, inplace=True)
+
+
+    print("日级别行业动量基准计算完成 (使用 pct_chg 和 cat_l2_code)。")
+    return df
+
+def daily_industry_deviation(df):
+    """
+    计算日级别行业偏离度，使用现有的 'pct_chg' 列和计算出的日级别行业动量基准。
+
+    计算每只股票的日涨跌幅 ('pct_chg') 相对于其所属行业日级别动量基准的偏离。
+
+    参数:
+    df (pd.DataFrame): 包含日级别股票数据的DataFrame。
+                       必须包含 'ts_code', 'trade_date', 'pct_chg', 'cat_l2_code',
+                       'daily_industry_positive_benchmark', 'daily_industry_negative_benchmark' 列。
+                       这些基准列通常通过运行 daily_industry_momentum_benchmark(df) 获得。
+
+    返回:
+    pd.DataFrame: 增加了 'daily_industry_deviation' 列的DataFrame。
+    """
+    print("--- 计算日级别行业偏离度 (使用 pct_chg 和行业基准) ---")
+
+    # 确保所需的列存在
+    df = daily_industry_momentum_benchmark(df)
+    required_cols = ['pct_chg', 'daily_industry_positive_benchmark', 'daily_industry_negative_benchmark']
+    if not all(col in df.columns for col in required_cols):
+        print(f"错误: 计算日级别行业偏离度需要以下列: {required_cols}。请先运行 daily_industry_momentum_benchmark(df)。")
+        return df
+
+    # 根据规则计算日级别行业偏离度：
+    # 如果 pct_chg > 0 且 daily_industry_positive_benchmark > 0，deviation = pct_chg - daily_industry_positive_benchmark
+    # 如果 pct_chg < 0 且 daily_industry_negative_benchmark < 0，deviation = pct_chg - daily_industry_negative_benchmark
+    # 否则 deviation = 0
+
+    conditions = [
+        (df['pct_chg'] > 0) & (df['daily_industry_positive_benchmark'] > 0),
+        (df['pct_chg'] < 0) & (df['daily_industry_negative_benchmark'] < 0)
+    ]
+    choices = [
+        df['pct_chg'] - df['daily_industry_positive_benchmark'],
+        df['pct_chg'] - df['daily_industry_negative_benchmark']
+    ]
+    df['daily_industry_deviation'] = np.select(conditions, choices, default=0)
+
+    df = df.drop(columns=['daily_industry_positive_benchmark', 'daily_industry_negative_benchmark'])
+    print("日级别行业偏离度计算完成 (使用 pct_chg 和行业基准)。")
+    return df
+
+
+def sentiment_panic_greed_index(df: pd.DataFrame, window_atr: int = 14, window_smooth: int = 5, factor_name: str = 'senti_panic_greed'):
+    """
+    计算市场恐慌/贪婪指数 (原地修改)。
+    结合日内振幅、影线、跳空及与近期ATR的比较。
+    WARNING: Modifies df in-place.
+    """
+    print(f"Calculating {factor_name}...")
+    _temp_cols = ['_prev_close', '_atr', '_true_range', '_upper_shadow', '_lower_shadow', '_body', '_gap', '_volatility_surprise']
+    if not all(col in df.columns for col in ['open', 'high', 'low', 'close', 'vol']):
+        print(f"Error: DataFrame 缺少必需的 OHLCV 列。将为 {factor_name} 填充 NaN。")
+        df[factor_name] = np.nan
+        return
+
+    try:
+        df['_prev_close'] = df['close'].shift(1)
+
+        # 计算真实波幅 (TR) 和 ATR
+        df['_true_range'] = talib.TRANGE(df['high'], df['low'], df['_prev_close'])
+        df['_atr'] = talib.ATR(df['high'], df['low'], df['_prev_close'], timeperiod=window_atr)
+
+        # 计算影线和实体
+        df['_upper_shadow'] = df['high'] - np.maximum(df['open'], df['close'])
+        df['_lower_shadow'] = np.minimum(df['open'], df['close']) - df['low']
+        df['_body'] = np.abs(df['close'] - df['open'])
+
+        # 计算跳空
+        df['_gap'] = (df['open'] / df['_prev_close'] - 1).fillna(0)
+
+        # 波动性意外: 当日真实波幅相对于近期ATR的倍数，乘以涨跌方向
+        # 如果真实波幅显著放大，根据涨跌幅赋予正负号，表明情绪的强度和方向
+        df['_volatility_surprise'] = (df['_true_range'] / (df['_atr'] + epsilon) -1) * np.sign(df['pct_chg'].fillna(0))
+
+
+        # 简化版情绪指标：(下影线 - 上影线) / ATR + 跳空幅度 + 当日涨跌幅, 然后平滑
+        # 更强的信号：波动性意外，结合跳空
+        # 考虑当日振幅相对于ATR的超额部分，并结合实体方向
+        # ( (真实波幅/ATR) * 涨跌方向 ) + 跳空幅度
+        raw_senti = (df['_true_range'] / (df['_atr'] + epsilon)) * np.sign(df['pct_chg'].fillna(0)) + df['_gap'] * 2 # 放大跳空影响
+        df[factor_name] = raw_senti.rolling(window_smooth, min_periods=1).mean()
+
+
+    except Exception as e:
+        print(f"Error calculating {factor_name}: {e}")
+        df[factor_name] = np.nan
+    finally:
+        cols_to_drop = [col for col in _temp_cols if col in df.columns]
+        if cols_to_drop:
+            df.drop(columns=cols_to_drop, inplace=True)
+        print(f"Finished {factor_name}.")
+        return df
+
+
+def sentiment_market_breadth_proxy(df: pd.DataFrame, window_vol: int = 20, window_smooth: int = 3, factor_name: str = 'senti_breadth_proxy'):
+    """
+    计算市场宽度情绪代理指标 (基于指数的价量配合度) (原地修改).
+    WARNING: Modifies df in-place.
+    """
+    print(f"Calculating {factor_name}...")
+    _temp_cols = ['_rolling_avg_vol']
+    if not all(col in df.columns for col in ['pct_chg', 'vol']):
+        print(f"Error: DataFrame 缺少 'pct_chg' 或 'vol' 列。将为 {factor_name} 填充 NaN。")
+        df[factor_name] = np.nan
+        return
+
+    try:
+        df['_rolling_avg_vol'] = df['vol'].rolling(window_vol, min_periods=max(1, window_vol//2)).mean()
+        # 价量配合度：涨跌幅乘以相对成交量强度
+        raw_breadth = df['pct_chg'] * (df['vol'] / (df['_rolling_avg_vol'] + epsilon))
+        df[factor_name] = raw_breadth.rolling(window_smooth, min_periods=1).mean() # 平滑处理
+
+    except Exception as e:
+        print(f"Error calculating {factor_name}: {e}")
+        df[factor_name] = np.nan
+    finally:
+        cols_to_drop = [col for col in _temp_cols if col in df.columns]
+        if cols_to_drop:
+            df.drop(columns=cols_to_drop, inplace=True)
+        print(f"Finished {factor_name}.")
+        return df
+    
+def sentiment_reversal_indicator(df: pd.DataFrame, window_ret: int = 5, window_vol: int = 5, factor_name: str = 'senti_reversal'):
+    """
+    计算短期情绪反转因子 (原地修改).
+    WARNING: Modifies df in-place.
+    """
+    print(f"Calculating {factor_name}...")
+    _temp_cols = ['_return_M', '_volatility_M']
+    if 'pct_chg' not in df.columns:
+        print(f"Error: DataFrame 缺少 'pct_chg' 列。将为 {factor_name} 填充 NaN。")
+        df[factor_name] = np.nan
+        return
+    try:
+        # 计算 M 日累计收益率 (这里用连乘近似，或者 sum of log returns)
+        # (close / close.shift(M)) -1
+        df['_return_M'] = (df['close'] / df['close'].shift(window_ret)) - 1
+        # df['_return_M'] = df['pct_chg'].rolling(window_ret, min_periods=1).sum() # 另一种近似
+
+        # 计算 M 日已实现波动率
+        df['_volatility_M'] = df['pct_chg'].rolling(window_vol, min_periods=max(1, window_vol//2)).std()
+
+        # 因子计算
+        df[factor_name] = -df['_return_M'] * df['_volatility_M']
+        # 对因子本身可以再做一次平滑
+        # df[factor_name] = df[factor_name].rolling(3, min_periods=1).mean()
+
+    except Exception as e:
+        print(f"Error calculating {factor_name}: {e}")
+        df[factor_name] = np.nan
+    finally:
+        cols_to_drop = [col for col in _temp_cols if col in df.columns]
+        if cols_to_drop:
+            df.drop(columns=cols_to_drop, inplace=True)
+        print(f"Finished {factor_name}.")
+        return df
--- a/main/test.txt
+++ b/main/test.txt
--- a/main/train/Classify2.ipynb
+++ b/main/train/Classify2.ipynb
@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 8,
   "id": "79a7758178bafdd3",
   "metadata": {
    "ExecuteTime": {
@@ -18,6 +18,8 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
+      "The autoreload extension is already loaded. To reload it, use:\n",
+      "  %reload_ext autoreload\n",
      "e:\\PyProject\\NewStock\\main\\train\n"
     ]
    }
@@ -44,7 +46,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 9,
   "id": "a79cafb06a7e0e43",
   "metadata": {
    "ExecuteTime": {
@@ -68,7 +70,7 @@
      "cyq perf\n",
      "left merge on ['ts_code', 'trade_date']\n",
      "<class 'pandas.core.frame.DataFrame'>\n",
-      "RangeIndex: 8601132 entries, 0 to 8601131\n",
+      "RangeIndex: 8611848 entries, 0 to 8611847\n",
      "Data columns (total 32 columns):\n",
      " #   Column         Dtype         \n",
      "---  ------         -----         \n",
@@ -143,7 +145,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 10,
   "id": "cac01788dac10678",
   "metadata": {
    "ExecuteTime": {
@@ -211,7 +213,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 11,
   "id": "c4e9e1d31da6dba6",
   "metadata": {
    "ExecuteTime": {
@@ -224,6 +226,8 @@
   },
   "outputs": [],
   "source": [
+    "from main.factor.factor import *\n",
+    "\n",
    "def calculate_indicators(df):\n",
    "    \"\"\"\n",
    "    计算四个指标：当日涨跌幅、5日移动平均、RSI、MACD。\n",
@@ -261,6 +265,10 @@
    "    df['amount_mean'] = df['amount'].rolling(window=20).mean()  # 过去20天的平均成交额\n",
    "    df['amount_change_rate'] = (df['amount'] - df['amount_mean']) / df['amount_mean'] * 100  # 成交额变化率\n",
    "\n",
+    "    # df = sentiment_panic_greed_index(df)\n",
+    "    # df = sentiment_market_breadth_proxy(df)\n",
+    "    # df = sentiment_reversal_indicator(df)\n",
+    "\n",
    "    return df\n",
    "\n",
    "\n",
@@ -283,8 +291,10 @@
    "    df_final = df_all_indicators.pivot_table(\n",
    "        index='trade_date',\n",
    "        columns='ts_code',\n",
-    "        values=['daily_return', 'RSI', 'MACD', 'Signal_line',\n",
-    "                'MACD_hist', 'up_ratio_20d', 'volume_change_rate', 'volatility',\n",
+    "        values=['daily_return', \n",
+    "                'RSI', 'MACD', 'Signal_line', 'MACD_hist', \n",
+    "                # 'sentiment_panic_greed_index',\n",
+    "                'up_ratio_20d', 'volume_change_rate', 'volatility',\n",
    "                'amount_change_rate', 'amount_mean'],\n",
    "        aggfunc='last'\n",
    "    )\n",
@@ -303,7 +313,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 12,
   "id": "a735bc02ceb4d872",
   "metadata": {
    "ExecuteTime": {
@@ -319,7 +329,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 30,
   "id": "53f86ddc0677a6d7",
   "metadata": {
    "ExecuteTime": {
@@ -368,6 +378,10 @@
    "        lambda x: x.rank(pct=True))\n",
    "    industry_data['return_20_percentile'] = industry_data.groupby('trade_date')['return_20'].transform(\n",
    "        lambda x: x.rank(pct=True))\n",
+    "\n",
+    "    # cs_rank_intraday_range(industry_data)\n",
+    "    # cs_rank_close_pos_in_range(industry_data)\n",
+    "\n",
    "    industry_data = industry_data.drop(columns=['open', 'close', 'high', 'low', 'pe', 'pb', 'vol'])\n",
    "\n",
    "    industry_data = industry_data.rename(\n",
@@ -382,7 +396,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 14,
   "id": "dbe2fd8021b9417f",
   "metadata": {
    "ExecuteTime": {
@@ -410,7 +424,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 15,
   "id": "85c3e3d0235ffffa",
   "metadata": {
    "ExecuteTime": {
@@ -433,7 +447,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 16,
   "id": "92d84ce15a562ec6",
   "metadata": {
    "ExecuteTime": {
@@ -459,6 +473,14 @@
      "使用 'ann_date' 作为财务数据生效日期。\n",
      "警告: 从 financial_data_subset 中移除了 366 行，因为其 'ts_code' 或 'ann_date' 列存在空值。\n",
      "计算 BBI...\n",
+      "--- 计算日级别偏离度 (使用 pct_chg) ---\n",
+      "--- 计算日级别动量基准 (使用 pct_chg) ---\n",
+      "日级别动量基准计算完成 (使用 pct_chg)。\n",
+      "日级别偏离度计算完成 (使用 pct_chg)。\n",
+      "--- 计算日级别行业偏离度 (使用 pct_chg 和行业基准) ---\n",
+      "--- 计算日级别行业动量基准 (使用 pct_chg 和 cat_l2_code) ---\n",
+      "错误: 计算日级别行业动量基准需要以下列: ['pct_chg', 'cat_l2_code', 'trade_date', 'ts_code']。\n",
+      "错误: 计算日级别行业偏离度需要以下列: ['pct_chg', 'daily_industry_positive_benchmark', 'daily_industry_negative_benchmark']。请先运行 daily_industry_momentum_benchmark(df)。\n",
      "Index(['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol',\n",
      "       'pct_chg', 'turnover_rate', 'pe_ttm', 'circ_mv', 'total_mv',\n",
      "       'volume_ratio', 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol',\n",
@@ -468,9 +490,9 @@
      "       'winner_rate', 'l2_code', 'undist_profit_ps', 'ocfps', 'AR', 'BR',\n",
      "       'AR_BR', 'log_circ_mv', 'cashflow_to_ev_factor', 'book_to_price_ratio',\n",
      "       'turnover_rate_mean_5', 'variance_20', 'bbi_ratio_factor',\n",
-      "       'lg_elg_net_buy_vol', 'flow_lg_elg_intensity', 'sm_net_buy_vol',\n",
-      "       'flow_divergence_diff', 'flow_divergence_ratio', 'total_buy_vol',\n",
-      "       'lg_elg_buy_prop', 'flow_struct_buy_change',\n",
+      "       'daily_deviation', 'lg_elg_net_buy_vol', 'flow_lg_elg_intensity',\n",
+      "       'sm_net_buy_vol', 'flow_divergence_diff', 'flow_divergence_ratio',\n",
+      "       'total_buy_vol', 'lg_elg_buy_prop', 'flow_struct_buy_change',\n",
      "       'lg_elg_net_buy_vol_change', 'flow_lg_elg_accel',\n",
      "       'chip_concentration_range', 'chip_skewness', 'floating_chip_proxy',\n",
      "       'cost_support_15pct_change', 'cat_winner_price_zone',\n",
@@ -519,7 +541,7 @@
      "Calculating cs_rank_flow_divergence...\n",
      "Finished cs_rank_flow_divergence.\n",
      "Calculating cs_rank_ind_adj_lg_flow...\n",
-      "Error calculating cs_rank_ind_adj_lg_flow: Missing 'cat_l2_code' column. Assigning NaN.\n",
+      "Finished cs_rank_ind_adj_lg_flow.\n",
      "Calculating cs_rank_elg_buy_ratio...\n",
      "Finished cs_rank_elg_buy_ratio.\n",
      "Calculating cs_rank_rel_profit_margin...\n",
@@ -555,12 +577,12 @@
      "Calculating cs_rank_size...\n",
      "Finished cs_rank_size.\n",
      "<class 'pandas.core.frame.DataFrame'>\n",
-      "Index: 4503567 entries, 0 to 4503566\n",
-      "Columns: 177 entries, ts_code to cs_rank_size\n",
-      "dtypes: bool(10), datetime64[ns](1), float64(161), int32(3), object(2)\n",
+      "RangeIndex: 4509585 entries, 0 to 4509584\n",
+      "Columns: 178 entries, ts_code to cs_rank_size\n",
+      "dtypes: bool(10), datetime64[ns](1), float64(162), int32(3), object(2)\n",
      "memory usage: 5.6+ GB\n",
      "None\n",
-      "['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'pct_chg', 'turnover_rate', 'pe_ttm', 'circ_mv', 'total_mv', 'volume_ratio', 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct', 'cost_50pct', 'cost_85pct', 'cost_95pct', 'weight_avg', 'winner_rate', 'cat_l2_code', 'undist_profit_ps', 'ocfps', 'AR', 'BR', 'AR_BR', 'log_circ_mv', 'cashflow_to_ev_factor', 'book_to_price_ratio', 'turnover_rate_mean_5', 'variance_20', 'bbi_ratio_factor', 'lg_elg_net_buy_vol', 'flow_lg_elg_intensity', 'sm_net_buy_vol', 'flow_divergence_diff', 'flow_divergence_ratio', 'total_buy_vol', 'lg_elg_buy_prop', 'flow_struct_buy_change', 'lg_elg_net_buy_vol_change', 'flow_lg_elg_accel', 'chip_concentration_range', 'chip_skewness', 'floating_chip_proxy', 'cost_support_15pct_change', 'cat_winner_price_zone', 'flow_chip_consistency', 'profit_taking_vs_absorb', 'cat_is_positive', 'upside_vol', 'downside_vol', 'vol_ratio', 'return_skew', 'return_kurtosis', 'volume_change_rate', 'cat_volume_breakout', 'turnover_deviation', 'cat_turnover_spike', 'avg_volume_ratio', 'cat_volume_ratio_breakout', 'vol_spike', 'vol_std_5', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'rsi_3', 'return_5', 'return_20', 'std_return_5', 'std_return_90', 'std_return_90_2', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4', 'rank_act_factor1', 'rank_act_factor2', 'rank_act_factor3', 'cov', 'delta_cov', 'alpha_22_improved', 'alpha_003', 'alpha_007', 'alpha_013', 'vol_break', 'weight_roc5', 'price_cost_divergence', 'smallcap_concentration', 'cost_stability', 'high_cost_break_days', 'liquidity_risk', 'turnover_std', 'mv_volatility', 'volume_growth', 'mv_growth', 'momentum_factor', 'resonance_factor', 'log_close', 'cat_vol_spike', 'up', 'down', 'obv_maobv_6', 'std_return_5_over_std_return_90', 'std_return_90_minus_std_return_90_2', 'cat_af2', 'cat_af3', 'cat_af4', 'act_factor5', 'act_factor6', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'buy_lg_vol_minus_sell_lg_vol', 'buy_elg_vol_minus_sell_elg_vol', 'ctrl_strength', 'low_cost_dev', 'asymmetry', 'lock_factor', 'cat_vol_break', 'cost_atr_adj', 'cat_golden_resonance', 'mv_turnover_ratio', 'mv_adjusted_volume', 'mv_weighted_turnover', 'nonlinear_mv_volume', 'mv_volume_ratio', 'mv_momentum', 'lg_flow_mom_corr_20_60', 'lg_flow_accel', 'profit_pressure', 'underwater_resistance', 'cost_conc_std_20', 'profit_decay_20', 'vol_amp_loss_20', 'vol_drop_profit_cnt_5', 'lg_flow_vol_interact_20', 'cost_break_confirm_cnt_5', 'atr_norm_channel_pos_14', 'turnover_diff_skew_20', 'lg_sm_flow_diverge_20', 'pullback_strong_20_20', 'vol_wgt_hist_pos_20', 'vol_adj_roc_20', 'cs_rank_net_lg_flow_val', 'cs_rank_flow_divergence', 'cs_rank_ind_adj_lg_flow', 'cs_rank_elg_buy_ratio', 'cs_rank_rel_profit_margin', 'cs_rank_cost_breadth', 'cs_rank_dist_to_upper_cost', 'cs_rank_winner_rate', 'cs_rank_intraday_range', 'cs_rank_close_pos_in_range', 'cs_rank_opening_gap', 'cs_rank_pos_in_hist_range', 'cs_rank_vol_x_profit_margin', 'cs_rank_lg_flow_price_concordance', 'cs_rank_turnover_per_winner', 'cs_rank_ind_cap_neutral_pe', 'cs_rank_volume_ratio', 'cs_rank_elg_buy_sell_sm_ratio', 'cs_rank_cost_dist_vol_ratio', 'cs_rank_size']\n"
+      "['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'pct_chg', 'turnover_rate', 'pe_ttm', 'circ_mv', 'total_mv', 'volume_ratio', 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct', 'cost_50pct', 'cost_85pct', 'cost_95pct', 'weight_avg', 'winner_rate', 'cat_l2_code', 'undist_profit_ps', 'ocfps', 'AR', 'BR', 'AR_BR', 'log_circ_mv', 'cashflow_to_ev_factor', 'book_to_price_ratio', 'turnover_rate_mean_5', 'variance_20', 'bbi_ratio_factor', 'daily_deviation', 'lg_elg_net_buy_vol', 'flow_lg_elg_intensity', 'sm_net_buy_vol', 'flow_divergence_diff', 'flow_divergence_ratio', 'total_buy_vol', 'lg_elg_buy_prop', 'flow_struct_buy_change', 'lg_elg_net_buy_vol_change', 'flow_lg_elg_accel', 'chip_concentration_range', 'chip_skewness', 'floating_chip_proxy', 'cost_support_15pct_change', 'cat_winner_price_zone', 'flow_chip_consistency', 'profit_taking_vs_absorb', 'cat_is_positive', 'upside_vol', 'downside_vol', 'vol_ratio', 'return_skew', 'return_kurtosis', 'volume_change_rate', 'cat_volume_breakout', 'turnover_deviation', 'cat_turnover_spike', 'avg_volume_ratio', 'cat_volume_ratio_breakout', 'vol_spike', 'vol_std_5', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'rsi_3', 'return_5', 'return_20', 'std_return_5', 'std_return_90', 'std_return_90_2', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4', 'rank_act_factor1', 'rank_act_factor2', 'rank_act_factor3', 'cov', 'delta_cov', 'alpha_22_improved', 'alpha_003', 'alpha_007', 'alpha_013', 'vol_break', 'weight_roc5', 'price_cost_divergence', 'smallcap_concentration', 'cost_stability', 'high_cost_break_days', 'liquidity_risk', 'turnover_std', 'mv_volatility', 'volume_growth', 'mv_growth', 'momentum_factor', 'resonance_factor', 'log_close', 'cat_vol_spike', 'up', 'down', 'obv_maobv_6', 'std_return_5_over_std_return_90', 'std_return_90_minus_std_return_90_2', 'cat_af2', 'cat_af3', 'cat_af4', 'act_factor5', 'act_factor6', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'buy_lg_vol_minus_sell_lg_vol', 'buy_elg_vol_minus_sell_elg_vol', 'ctrl_strength', 'low_cost_dev', 'asymmetry', 'lock_factor', 'cat_vol_break', 'cost_atr_adj', 'cat_golden_resonance', 'mv_turnover_ratio', 'mv_adjusted_volume', 'mv_weighted_turnover', 'nonlinear_mv_volume', 'mv_volume_ratio', 'mv_momentum', 'lg_flow_mom_corr_20_60', 'lg_flow_accel', 'profit_pressure', 'underwater_resistance', 'cost_conc_std_20', 'profit_decay_20', 'vol_amp_loss_20', 'vol_drop_profit_cnt_5', 'lg_flow_vol_interact_20', 'cost_break_confirm_cnt_5', 'atr_norm_channel_pos_14', 'turnover_diff_skew_20', 'lg_sm_flow_diverge_20', 'pullback_strong_20_20', 'vol_wgt_hist_pos_20', 'vol_adj_roc_20', 'cs_rank_net_lg_flow_val', 'cs_rank_flow_divergence', 'cs_rank_ind_adj_lg_flow', 'cs_rank_elg_buy_ratio', 'cs_rank_rel_profit_margin', 'cs_rank_cost_breadth', 'cs_rank_dist_to_upper_cost', 'cs_rank_winner_rate', 'cs_rank_intraday_range', 'cs_rank_close_pos_in_range', 'cs_rank_opening_gap', 'cs_rank_pos_in_hist_range', 'cs_rank_vol_x_profit_margin', 'cs_rank_lg_flow_price_concordance', 'cs_rank_turnover_per_winner', 'cs_rank_ind_cap_neutral_pe', 'cs_rank_volume_ratio', 'cs_rank_elg_buy_sell_sm_ratio', 'cs_rank_cost_dist_vol_ratio', 'cs_rank_size']\n"
     ]
    }
   ],
@@ -595,9 +617,14 @@
    "df = turnover_rate_n(df, n=5)\n",
    "df = variance_n(df, n=20)\n",
    "df = bbi_ratio_factor(df)\n",
+    "df = daily_deviation(df)\n",
+    "df = daily_industry_deviation(df)\n",
    "df, _ = get_rolling_factor(df)\n",
    "df, _ = get_simple_factor(df)\n",
    "\n",
+    "df = df.rename(columns={'l1_code': 'cat_l1_code'})\n",
+    "df = df.rename(columns={'l2_code': 'cat_l2_code'})\n",
+    "\n",
    "lg_flow_mom_corr(df, N=20, M=60)\n",
    "lg_flow_accel(df)\n",
    "profit_pressure(df)\n",
@@ -636,8 +663,6 @@
    "cs_rank_cost_dist_vol_ratio(df) # Needs volume_ratio\n",
    "cs_rank_size(df) # Needs circ_mv\n",
    "\n",
-    "df = df.rename(columns={'l1_code': 'cat_l1_code'})\n",
-    "df = df.rename(columns={'l2_code': 'cat_l2_code'})\n",
    "\n",
    "# df = df.merge(index_data, on='trade_date', how='left')\n",
    "\n",
@@ -647,7 +672,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 17,
   "id": "b87b938028afa206",
   "metadata": {
    "ExecuteTime": {
@@ -685,7 +710,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 18,
   "id": "f4f16d63ad18d1bc",
   "metadata": {
    "ExecuteTime": {
@@ -931,7 +956,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 19,
   "id": "40e6b68a91b30c79",
   "metadata": {
    "ExecuteTime": {
@@ -1217,12 +1242,48 @@
    "    n_actual = min(n, len(rankic_series))\n",
    "    top_features = rankic_series.sort_values(ascending=False).head(n_actual).index.tolist()\n",
    "    top_features = [col for col in feature_columns if col in top_features or col not in numeric_columns]\n",
-    "    return top_features"
+    "    return top_features\n",
+    "\n",
+    "def create_deviation_within_dates(df, feature_columns):\n",
+    "    groupby_col = 'cat_l2_code'  # 使用 trade_date 进行分组\n",
+    "    new_columns = {}\n",
+    "    ret_feature_columns = feature_columns[:]\n",
+    "\n",
+    "    # 自动选择所有数值型特征\n",
+    "    num_features = [col for col in feature_columns if 'cat' not in col and 'index' not in col]\n",
+    "\n",
+    "    # num_features = ['vol', 'pct_chg', 'turnover_rate', 'volume_ratio', 'cat_vol_spike', 'obv', 'maobv_6', 'return_5', 'return_10', 'return_20', 'std_return_5', 'std_return_15', 'std_return_90', 'std_return_90_2', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4', 'act_factor5', 'act_factor6', 'rank_act_factor1', 'rank_act_factor2', 'rank_act_factor3', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'alpha_022', 'alpha_003', 'alpha_007', 'alpha_013']\n",
+    "    num_features = [col for col in num_features if 'cat' not in col and 'industry' not in col]\n",
+    "    num_features = [col for col in num_features if 'limit' not in col]\n",
+    "    num_features = [col for col in num_features if 'cyq' not in col]\n",
+    "\n",
+    "    # 遍历所有数值型特征\n",
+    "    for feature in num_features:\n",
+    "        if feature == 'trade_date':  # 不需要对 'trade_date' 计算偏差\n",
+    "            continue\n",
+    "\n",
+    "        # grouped_mean = df.groupby(['trade_date'])[feature].transform('mean')\n",
+    "        # deviation_col_name = f'deviation_mean_{feature}'\n",
+    "        # new_columns[deviation_col_name] = df[feature] - grouped_mean\n",
+    "        # ret_feature_columns.append(deviation_col_name)\n",
+    "\n",
+    "        grouped_mean = df.groupby(['trade_date', groupby_col])[feature].transform('mean')\n",
+    "        deviation_col_name = f'deviation_mean_{feature}'\n",
+    "        new_columns[deviation_col_name] = df[feature] - grouped_mean\n",
+    "        ret_feature_columns.append(deviation_col_name)\n",
+    "\n",
+    "    # 将新计算的偏差特征与原始 DataFrame 合并\n",
+    "    df = pd.concat([df, pd.DataFrame(new_columns)], axis=1)\n",
+    "\n",
+    "    # for feature in ['obv', 'return_20', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4']:\n",
+    "    #     df[f'deviation_industry_{feature}'] = df[feature] - df[f'industry_{feature}']\n",
+    "\n",
+    "    return df, ret_feature_columns\n"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 20,
   "id": "47c12bb34062ae7a",
   "metadata": {
    "ExecuteTime": {
@@ -1256,7 +1317,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
   "id": "29221dde",
   "metadata": {},
   "outputs": [],
@@ -1288,13 +1349,13 @@
    "\n",
    "# df = fill_nan_with_daily_median(df, feature_columns)\n",
    "for feature_col in [col for col in feature_columns if col in df.columns]:\n",
-    "    median_val = df[feature_col].median()\n",
+    "    # median_val = df[feature_col].median()\n",
    "    df[feature_col].fillna(0, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 22,
   "id": "b76ea08a",
   "metadata": {},
   "outputs": [
@@ -1302,11 +1363,11 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "        ts_code trade_date  log_circ_mv\n",
-      "0     000001.SZ 2019-01-02    16.574219\n",
-      "2738  000001.SZ 2019-01-03    16.583965\n",
-      "5477  000001.SZ 2019-01-04    16.633371\n",
-      "['vol', 'pct_chg', 'turnover_rate', 'volume_ratio', 'winner_rate', 'undist_profit_ps', 'ocfps', 'AR', 'BR', 'AR_BR', 'cashflow_to_ev_factor', 'book_to_price_ratio', 'turnover_rate_mean_5', 'variance_20', 'bbi_ratio_factor', 'lg_elg_net_buy_vol', 'flow_lg_elg_intensity', 'sm_net_buy_vol', 'total_buy_vol', 'lg_elg_buy_prop', 'flow_struct_buy_change', 'lg_elg_net_buy_vol_change', 'flow_lg_elg_accel', 'chip_concentration_range', 'chip_skewness', 'floating_chip_proxy', 'cost_support_15pct_change', 'cat_winner_price_zone', 'flow_chip_consistency', 'profit_taking_vs_absorb', 'cat_is_positive', 'upside_vol', 'downside_vol', 'vol_ratio', 'return_skew', 'return_kurtosis', 'volume_change_rate', 'cat_volume_breakout', 'turnover_deviation', 'cat_turnover_spike', 'avg_volume_ratio', 'cat_volume_ratio_breakout', 'vol_spike', 'vol_std_5', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'rsi_3', 'return_5', 'return_20', 'std_return_5', 'std_return_90', 'std_return_90_2', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4', 'rank_act_factor1', 'rank_act_factor2', 'rank_act_factor3', 'cov', 'delta_cov', 'alpha_22_improved', 'alpha_003', 'alpha_007', 'alpha_013', 'vol_break', 'weight_roc5', 'smallcap_concentration', 'cost_stability', 'high_cost_break_days', 'liquidity_risk', 'turnover_std', 'mv_volatility', 'volume_growth', 'mv_growth', 'momentum_factor', 'resonance_factor', 'log_close', 'cat_vol_spike', 'up', 'down', 'obv_maobv_6', 'std_return_5_over_std_return_90', 'std_return_90_minus_std_return_90_2', 'cat_af2', 'cat_af3', 'cat_af4', 'act_factor5', 'act_factor6', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'buy_lg_vol_minus_sell_lg_vol', 'buy_elg_vol_minus_sell_elg_vol', 'ctrl_strength', 'low_cost_dev', 'asymmetry', 'lock_factor', 'cat_vol_break', 'cost_atr_adj', 'cat_golden_resonance', 'mv_turnover_ratio', 'mv_adjusted_volume', 'mv_weighted_turnover', 'nonlinear_mv_volume', 'mv_volume_ratio', 'mv_momentum', 'lg_flow_mom_corr_20_60', 'lg_flow_accel', 'profit_pressure', 'underwater_resistance', 'cost_conc_std_20', 'profit_decay_20', 'vol_amp_loss_20', 'vol_drop_profit_cnt_5', 'lg_flow_vol_interact_20', 'cost_break_confirm_cnt_5', 'atr_norm_channel_pos_14', 'turnover_diff_skew_20', 'lg_sm_flow_diverge_20', 'pullback_strong_20_20', 'vol_wgt_hist_pos_20', 'vol_adj_roc_20', 'cs_rank_net_lg_flow_val', 'cs_rank_elg_buy_ratio', 'cs_rank_rel_profit_margin', 'cs_rank_cost_breadth', 'cs_rank_dist_to_upper_cost', 'cs_rank_winner_rate', 'cs_rank_intraday_range', 'cs_rank_close_pos_in_range', 'cs_rank_pos_in_hist_range', 'cs_rank_vol_x_profit_margin', 'cs_rank_lg_flow_price_concordance', 'cs_rank_turnover_per_winner', 'cs_rank_volume_ratio', 'cs_rank_elg_buy_sell_sm_ratio', 'cs_rank_cost_dist_vol_ratio', 'cs_rank_size', 'cat_up_limit', 'industry_obv', 'industry_return_5', 'industry_return_20', 'industry__ema_5', 'industry__ema_13', 'industry__ema_20', 'industry__ema_60', 'industry_act_factor1', 'industry_act_factor2', 'industry_act_factor3', 'industry_act_factor4', 'industry_act_factor5', 'industry_act_factor6', 'industry_rank_act_factor1', 'industry_rank_act_factor2', 'industry_rank_act_factor3', 'industry_return_5_percentile', 'industry_return_20_percentile', '000852.SH_MACD', '000905.SH_MACD', '399006.SZ_MACD', '000852.SH_MACD_hist', '000905.SH_MACD_hist', '399006.SZ_MACD_hist', '000852.SH_RSI', '000905.SH_RSI', '399006.SZ_RSI', '000852.SH_Signal_line', '000905.SH_Signal_line', '399006.SZ_Signal_line', '000852.SH_amount_change_rate', '000905.SH_amount_change_rate', '399006.SZ_amount_change_rate', '000852.SH_amount_mean', '000905.SH_amount_mean', '399006.SZ_amount_mean', '000852.SH_daily_return', '000905.SH_daily_return', '399006.SZ_daily_return', '000852.SH_up_ratio_20d', '000905.SH_up_ratio_20d', '399006.SZ_up_ratio_20d', '000852.SH_volatility', '000905.SH_volatility', '399006.SZ_volatility', '000852.SH_volume_change_rate', '000905.SH_volume_change_rate', '399006.SZ_volume_change_rate']\n",
+      "     ts_code trade_date  log_circ_mv\n",
+      "0  000001.SZ 2019-01-02    16.574219\n",
+      "1  000001.SZ 2019-01-03    16.583965\n",
+      "2  000001.SZ 2019-01-04    16.633371\n",
+      "['vol', 'pct_chg', 'turnover_rate', 'volume_ratio', 'winner_rate', 'undist_profit_ps', 'ocfps', 'AR', 'BR', 'AR_BR', 'cashflow_to_ev_factor', 'book_to_price_ratio', 'turnover_rate_mean_5', 'variance_20', 'bbi_ratio_factor', 'daily_deviation', 'lg_elg_net_buy_vol', 'flow_lg_elg_intensity', 'sm_net_buy_vol', 'total_buy_vol', 'lg_elg_buy_prop', 'flow_struct_buy_change', 'lg_elg_net_buy_vol_change', 'flow_lg_elg_accel', 'chip_concentration_range', 'chip_skewness', 'floating_chip_proxy', 'cost_support_15pct_change', 'cat_winner_price_zone', 'flow_chip_consistency', 'profit_taking_vs_absorb', 'cat_is_positive', 'upside_vol', 'downside_vol', 'vol_ratio', 'return_skew', 'return_kurtosis', 'volume_change_rate', 'cat_volume_breakout', 'turnover_deviation', 'cat_turnover_spike', 'avg_volume_ratio', 'cat_volume_ratio_breakout', 'vol_spike', 'vol_std_5', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'rsi_3', 'return_5', 'return_20', 'std_return_5', 'std_return_90', 'std_return_90_2', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4', 'rank_act_factor1', 'rank_act_factor2', 'rank_act_factor3', 'cov', 'delta_cov', 'alpha_22_improved', 'alpha_003', 'alpha_007', 'alpha_013', 'vol_break', 'weight_roc5', 'smallcap_concentration', 'cost_stability', 'high_cost_break_days', 'liquidity_risk', 'turnover_std', 'mv_volatility', 'volume_growth', 'mv_growth', 'momentum_factor', 'resonance_factor', 'log_close', 'cat_vol_spike', 'up', 'down', 'obv_maobv_6', 'std_return_5_over_std_return_90', 'std_return_90_minus_std_return_90_2', 'cat_af2', 'cat_af3', 'cat_af4', 'act_factor5', 'act_factor6', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'buy_lg_vol_minus_sell_lg_vol', 'buy_elg_vol_minus_sell_elg_vol', 'ctrl_strength', 'low_cost_dev', 'asymmetry', 'lock_factor', 'cat_vol_break', 'cost_atr_adj', 'cat_golden_resonance', 'mv_turnover_ratio', 'mv_adjusted_volume', 'mv_weighted_turnover', 'nonlinear_mv_volume', 'mv_volume_ratio', 'mv_momentum', 'lg_flow_mom_corr_20_60', 'lg_flow_accel', 'profit_pressure', 'underwater_resistance', 'cost_conc_std_20', 'profit_decay_20', 'vol_amp_loss_20', 'vol_drop_profit_cnt_5', 'lg_flow_vol_interact_20', 'cost_break_confirm_cnt_5', 'atr_norm_channel_pos_14', 'turnover_diff_skew_20', 'lg_sm_flow_diverge_20', 'pullback_strong_20_20', 'vol_wgt_hist_pos_20', 'vol_adj_roc_20', 'cs_rank_net_lg_flow_val', 'cs_rank_elg_buy_ratio', 'cs_rank_rel_profit_margin', 'cs_rank_cost_breadth', 'cs_rank_dist_to_upper_cost', 'cs_rank_winner_rate', 'cs_rank_intraday_range', 'cs_rank_close_pos_in_range', 'cs_rank_pos_in_hist_range', 'cs_rank_vol_x_profit_margin', 'cs_rank_lg_flow_price_concordance', 'cs_rank_turnover_per_winner', 'cs_rank_volume_ratio', 'cs_rank_elg_buy_sell_sm_ratio', 'cs_rank_cost_dist_vol_ratio', 'cs_rank_size', 'cat_up_limit', 'industry_obv', 'industry_return_5', 'industry_return_20', 'industry__ema_5', 'industry__ema_13', 'industry__ema_20', 'industry__ema_60', 'industry_act_factor1', 'industry_act_factor2', 'industry_act_factor3', 'industry_act_factor4', 'industry_act_factor5', 'industry_act_factor6', 'industry_rank_act_factor1', 'industry_rank_act_factor2', 'industry_rank_act_factor3', 'industry_return_5_percentile', 'industry_return_20_percentile', 'industry_cs_rank_intraday_range', 'industry_cs_rank_close_pos_in_range', '000852.SH_MACD', '000905.SH_MACD', '399006.SZ_MACD', '000852.SH_MACD_hist', '000905.SH_MACD_hist', '399006.SZ_MACD_hist', '000852.SH_RSI', '000905.SH_RSI', '399006.SZ_RSI', '000852.SH_Signal_line', '000905.SH_Signal_line', '399006.SZ_Signal_line', '000852.SH_amount_change_rate', '000905.SH_amount_change_rate', '399006.SZ_amount_change_rate', '000852.SH_amount_mean', '000905.SH_amount_mean', '399006.SZ_amount_mean', '000852.SH_daily_return', '000905.SH_daily_return', '399006.SZ_daily_return', '000852.SH_up_ratio_20d', '000905.SH_up_ratio_20d', '399006.SZ_up_ratio_20d', '000852.SH_volatility', '000905.SH_volatility', '399006.SZ_volatility', '000852.SH_volume_change_rate', '000905.SH_volume_change_rate', '399006.SZ_volume_change_rate']\n",
      "去除极值\n",
      "开始截面 MAD 去极值处理 (k=3.0)...\n"
     ]
@@ -1315,7 +1376,7 @@
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "MAD Filtering: 100%|██████████| 130/130 [00:28<00:00,  4.62it/s]\n"
+      "MAD Filtering: 100%|██████████| 131/131 [00:27<00:00,  4.69it/s]\n"
     ]
    },
    {
@@ -1330,7 +1391,7 @@
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "MAD Filtering: 100%|██████████| 130/130 [00:23<00:00,  5.55it/s]\n"
+      "MAD Filtering: 100%|██████████| 131/131 [00:23<00:00,  5.52it/s]\n"
     ]
    },
    {
@@ -1368,25 +1429,26 @@
     "output_type": "stream",
     "text": [
      "截面 MAD 去极值处理完成。\n",
-      "feature_columns: ['vol', 'pct_chg', 'turnover_rate', 'volume_ratio', 'winner_rate', 'undist_profit_ps', 'ocfps', 'AR', 'BR', 'AR_BR', 'cashflow_to_ev_factor', 'book_to_price_ratio', 'turnover_rate_mean_5', 'variance_20', 'bbi_ratio_factor', 'lg_elg_net_buy_vol', 'flow_lg_elg_intensity', 'sm_net_buy_vol', 'total_buy_vol', 'lg_elg_buy_prop', 'flow_struct_buy_change', 'lg_elg_net_buy_vol_change', 'flow_lg_elg_accel', 'chip_concentration_range', 'chip_skewness', 'floating_chip_proxy', 'cost_support_15pct_change', 'cat_winner_price_zone', 'flow_chip_consistency', 'profit_taking_vs_absorb', 'cat_is_positive', 'upside_vol', 'downside_vol', 'vol_ratio', 'return_skew', 'return_kurtosis', 'volume_change_rate', 'cat_volume_breakout', 'turnover_deviation', 'cat_turnover_spike', 'avg_volume_ratio', 'cat_volume_ratio_breakout', 'vol_spike', 'vol_std_5', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'rsi_3', 'return_5', 'return_20', 'std_return_5', 'std_return_90', 'std_return_90_2', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4', 'rank_act_factor1', 'rank_act_factor2', 'rank_act_factor3', 'cov', 'delta_cov', 'alpha_22_improved', 'alpha_003', 'alpha_007', 'alpha_013', 'vol_break', 'weight_roc5', 'smallcap_concentration', 'cost_stability', 'high_cost_break_days', 'liquidity_risk', 'turnover_std', 'mv_volatility', 'volume_growth', 'mv_growth', 'momentum_factor', 'resonance_factor', 'log_close', 'cat_vol_spike', 'up', 'down', 'obv_maobv_6', 'std_return_5_over_std_return_90', 'std_return_90_minus_std_return_90_2', 'cat_af2', 'cat_af3', 'cat_af4', 'act_factor5', 'act_factor6', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'buy_lg_vol_minus_sell_lg_vol', 'buy_elg_vol_minus_sell_elg_vol', 'ctrl_strength', 'low_cost_dev', 'asymmetry', 'lock_factor', 'cat_vol_break', 'cost_atr_adj', 'cat_golden_resonance', 'mv_turnover_ratio', 'mv_adjusted_volume', 'mv_weighted_turnover', 'nonlinear_mv_volume', 'mv_volume_ratio', 'mv_momentum', 'lg_flow_mom_corr_20_60', 'lg_flow_accel', 'profit_pressure', 'underwater_resistance', 'cost_conc_std_20', 'profit_decay_20', 'vol_amp_loss_20', 'vol_drop_profit_cnt_5', 'lg_flow_vol_interact_20', 'cost_break_confirm_cnt_5', 'atr_norm_channel_pos_14', 'turnover_diff_skew_20', 'lg_sm_flow_diverge_20', 'pullback_strong_20_20', 'vol_wgt_hist_pos_20', 'vol_adj_roc_20', 'cs_rank_net_lg_flow_val', 'cs_rank_elg_buy_ratio', 'cs_rank_rel_profit_margin', 'cs_rank_cost_breadth', 'cs_rank_dist_to_upper_cost', 'cs_rank_winner_rate', 'cs_rank_intraday_range', 'cs_rank_close_pos_in_range', 'cs_rank_pos_in_hist_range', 'cs_rank_vol_x_profit_margin', 'cs_rank_lg_flow_price_concordance', 'cs_rank_turnover_per_winner', 'cs_rank_volume_ratio', 'cs_rank_elg_buy_sell_sm_ratio', 'cs_rank_cost_dist_vol_ratio', 'cs_rank_size', 'cat_up_limit', 'industry_obv', 'industry_return_5', 'industry_return_20', 'industry__ema_5', 'industry__ema_13', 'industry__ema_20', 'industry__ema_60', 'industry_act_factor1', 'industry_act_factor2', 'industry_act_factor3', 'industry_act_factor4', 'industry_act_factor5', 'industry_act_factor6', 'industry_rank_act_factor1', 'industry_rank_act_factor2', 'industry_rank_act_factor3', 'industry_return_5_percentile', 'industry_return_20_percentile', '000852.SH_MACD', '000905.SH_MACD', '399006.SZ_MACD', '000852.SH_MACD_hist', '000905.SH_MACD_hist', '399006.SZ_MACD_hist', '000852.SH_RSI', '000905.SH_RSI', '399006.SZ_RSI', '000852.SH_Signal_line', '000905.SH_Signal_line', '399006.SZ_Signal_line', '000852.SH_amount_change_rate', '000905.SH_amount_change_rate', '399006.SZ_amount_change_rate', '000852.SH_amount_mean', '000905.SH_amount_mean', '399006.SZ_amount_mean', '000852.SH_daily_return', '000905.SH_daily_return', '399006.SZ_daily_return', '000852.SH_up_ratio_20d', '000905.SH_up_ratio_20d', '399006.SZ_up_ratio_20d', '000852.SH_volatility', '000905.SH_volatility', '399006.SZ_volatility', '000852.SH_volume_change_rate', '000905.SH_volume_change_rate', '399006.SZ_volume_change_rate']\n",
+      "feature_columns: ['vol', 'pct_chg', 'turnover_rate', 'volume_ratio', 'winner_rate', 'undist_profit_ps', 'ocfps', 'AR', 'BR', 'AR_BR', 'cashflow_to_ev_factor', 'book_to_price_ratio', 'turnover_rate_mean_5', 'variance_20', 'bbi_ratio_factor', 'daily_deviation', 'lg_elg_net_buy_vol', 'flow_lg_elg_intensity', 'sm_net_buy_vol', 'total_buy_vol', 'lg_elg_buy_prop', 'flow_struct_buy_change', 'lg_elg_net_buy_vol_change', 'flow_lg_elg_accel', 'chip_concentration_range', 'chip_skewness', 'floating_chip_proxy', 'cost_support_15pct_change', 'cat_winner_price_zone', 'flow_chip_consistency', 'profit_taking_vs_absorb', 'cat_is_positive', 'upside_vol', 'downside_vol', 'vol_ratio', 'return_skew', 'return_kurtosis', 'volume_change_rate', 'cat_volume_breakout', 'turnover_deviation', 'cat_turnover_spike', 'avg_volume_ratio', 'cat_volume_ratio_breakout', 'vol_spike', 'vol_std_5', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'rsi_3', 'return_5', 'return_20', 'std_return_5', 'std_return_90', 'std_return_90_2', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4', 'rank_act_factor1', 'rank_act_factor2', 'rank_act_factor3', 'cov', 'delta_cov', 'alpha_22_improved', 'alpha_003', 'alpha_007', 'alpha_013', 'vol_break', 'weight_roc5', 'smallcap_concentration', 'cost_stability', 'high_cost_break_days', 'liquidity_risk', 'turnover_std', 'mv_volatility', 'volume_growth', 'mv_growth', 'momentum_factor', 'resonance_factor', 'log_close', 'cat_vol_spike', 'up', 'down', 'obv_maobv_6', 'std_return_5_over_std_return_90', 'std_return_90_minus_std_return_90_2', 'cat_af2', 'cat_af3', 'cat_af4', 'act_factor5', 'act_factor6', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'buy_lg_vol_minus_sell_lg_vol', 'buy_elg_vol_minus_sell_elg_vol', 'ctrl_strength', 'low_cost_dev', 'asymmetry', 'lock_factor', 'cat_vol_break', 'cost_atr_adj', 'cat_golden_resonance', 'mv_turnover_ratio', 'mv_adjusted_volume', 'mv_weighted_turnover', 'nonlinear_mv_volume', 'mv_volume_ratio', 'mv_momentum', 'lg_flow_mom_corr_20_60', 'lg_flow_accel', 'profit_pressure', 'underwater_resistance', 'cost_conc_std_20', 'profit_decay_20', 'vol_amp_loss_20', 'vol_drop_profit_cnt_5', 'lg_flow_vol_interact_20', 'cost_break_confirm_cnt_5', 'atr_norm_channel_pos_14', 'turnover_diff_skew_20', 'lg_sm_flow_diverge_20', 'pullback_strong_20_20', 'vol_wgt_hist_pos_20', 'vol_adj_roc_20', 'cs_rank_net_lg_flow_val', 'cs_rank_elg_buy_ratio', 'cs_rank_rel_profit_margin', 'cs_rank_cost_breadth', 'cs_rank_dist_to_upper_cost', 'cs_rank_winner_rate', 'cs_rank_intraday_range', 'cs_rank_close_pos_in_range', 'cs_rank_pos_in_hist_range', 'cs_rank_vol_x_profit_margin', 'cs_rank_lg_flow_price_concordance', 'cs_rank_turnover_per_winner', 'cs_rank_volume_ratio', 'cs_rank_elg_buy_sell_sm_ratio', 'cs_rank_cost_dist_vol_ratio', 'cs_rank_size', 'cat_up_limit', 'industry_obv', 'industry_return_5', 'industry_return_20', 'industry__ema_5', 'industry__ema_13', 'industry__ema_20', 'industry__ema_60', 'industry_act_factor1', 'industry_act_factor2', 'industry_act_factor3', 'industry_act_factor4', 'industry_act_factor5', 'industry_act_factor6', 'industry_rank_act_factor1', 'industry_rank_act_factor2', 'industry_rank_act_factor3', 'industry_return_5_percentile', 'industry_return_20_percentile', 'industry_cs_rank_intraday_range', 'industry_cs_rank_close_pos_in_range', '000852.SH_MACD', '000905.SH_MACD', '399006.SZ_MACD', '000852.SH_MACD_hist', '000905.SH_MACD_hist', '399006.SZ_MACD_hist', '000852.SH_RSI', '000905.SH_RSI', '399006.SZ_RSI', '000852.SH_Signal_line', '000905.SH_Signal_line', '399006.SZ_Signal_line', '000852.SH_amount_change_rate', '000905.SH_amount_change_rate', '399006.SZ_amount_change_rate', '000852.SH_amount_mean', '000905.SH_amount_mean', '399006.SZ_amount_mean', '000852.SH_daily_return', '000905.SH_daily_return', '399006.SZ_daily_return', '000852.SH_up_ratio_20d', '000905.SH_up_ratio_20d', '399006.SZ_up_ratio_20d', '000852.SH_volatility', '000905.SH_volatility', '399006.SZ_volatility', '000852.SH_volume_change_rate', '000905.SH_volume_change_rate', '399006.SZ_volume_change_rate']\n",
      "df最小日期: 2019-01-02\n",
-      "df最大日期: 2025-05-07\n",
-      "2057678\n",
+      "df最大日期: 2025-05-09\n",
+      "2057671\n",
      "train_data最小日期: 2020-01-02\n",
      "train_data最大日期: 2022-12-30\n",
-      "1730630\n",
+      "1736644\n",
      "test_data最小日期: 2023-01-03\n",
-      "test_data最大日期: 2025-05-07\n",
-      "        ts_code trade_date  log_circ_mv\n",
-      "0     000001.SZ 2019-01-02    16.574219\n",
-      "2738  000001.SZ 2019-01-03    16.583965\n",
-      "5477  000001.SZ 2019-01-04    16.633371\n"
+      "test_data最大日期: 2025-05-09\n",
+      "     ts_code trade_date  log_circ_mv\n",
+      "0  000001.SZ 2019-01-02    16.574219\n",
+      "1  000001.SZ 2019-01-03    16.583965\n",
+      "2  000001.SZ 2019-01-04    16.633371\n"
     ]
    }
   ],
   "source": [
-    "train_data = df[filter_index & (df['trade_date'] <= '2023-01-01') & (df['trade_date'] >= '2020-01-01')]\n",
-    "test_data = df[(df['trade_date'] >= '2023-01-01')]\n",
+    "split_date = '2023-01-01'\n",
+    "train_data = df[filter_index & (df['trade_date'] <= split_date) & (df['trade_date'] >= '2020-01-01')]\n",
+    "test_data = df[(df['trade_date'] >= split_date)]\n",
    "\n",
    "print(df[['ts_code', 'trade_date', 'log_circ_mv']].head(3))\n",
    "\n",
@@ -1401,8 +1463,8 @@
    "train_data, test_data = train_data.replace([np.inf, -np.inf], np.nan), test_data.replace([np.inf, -np.inf], np.nan)\n",
    "\n",
    "# feature_columns_new = feature_columns[:]\n",
-    "# train_data, _ = create_deviation_within_dates(train_data, feature_columns)\n",
-    "# test_data, _ = create_deviation_within_dates(test_data, feature_columns)\n",
+    "# train_data, _ = create_deviation_within_dates(train_data, [col for col in feature_columns if col in train_data.columns])\n",
+    "# test_data, _ = create_deviation_within_dates(test_data, [col for col in feature_columns if col in train_data.columns])\n",
    "\n",
    "# feature_columns = [\n",
    "#        'undist_profit_ps', \n",
@@ -1511,75 +1573,10 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 16,
-   "id": "e23d1759",
+   "execution_count": 34,
+   "id": "3ff2d1c5",
   "metadata": {},
   "outputs": [],
-   "source": [
-    "# feature_columns = [\n",
-    "#        'undist_profit_ps', \n",
-    "#        'AR_BR', \n",
-    "#     #    'pe_ttm',\n",
-    "#     #    'alpha_22_improved', \n",
-    "#     #    'alpha_003', \n",
-    "#     #    'alpha_007', \n",
-    "#     #    'alpha_013', \n",
-    "#     #    'cat_up_limit', \n",
-    "#     #    'cat_down_limit', \n",
-    "#     #    'up_limit_count_10d', \n",
-    "#     #    'down_limit_count_10d', \n",
-    "#     #    'consecutive_up_limit', \n",
-    "#     #    'vol_break', \n",
-    "#     #    'weight_roc5', \n",
-    "#     #    'price_cost_divergence', \n",
-    "#     #    'smallcap_concentration', \n",
-    "#     #    'cost_stability', \n",
-    "#     #    'high_cost_break_days', \n",
-    "#     #    'liquidity_risk', \n",
-    "#     #    'turnover_std', \n",
-    "#     #    'mv_volatility', \n",
-    "#     #    'volume_growth', \n",
-    "#     #    'mv_growth', \n",
-    "#     #    'lg_flow_mom_corr_20_60', \n",
-    "#     #    'lg_flow_accel', \n",
-    "#     #    'profit_pressure', \n",
-    "#     #    'underwater_resistance', \n",
-    "#     #    'cost_conc_std_20', \n",
-    "#     #    'profit_decay_20', \n",
-    "#     #    'vol_amp_loss_20', \n",
-    "#     #    'vol_drop_profit_cnt_5', \n",
-    "#     #    'lg_flow_vol_interact_20', \n",
-    "#     #    'cost_break_confirm_cnt_5', \n",
-    "#     #    'atr_norm_channel_pos_14', \n",
-    "#     #    'turnover_diff_skew_20', \n",
-    "#     #    'lg_sm_flow_diverge_20', \n",
-    "#     #    'pullback_strong_20_20', \n",
-    "#     #    'vol_wgt_hist_pos_20', \n",
-    "#     #    'vol_adj_roc_20',\n",
-    "#        'cashflow_to_ev_factor',\n",
-    "#        'ocfps',\n",
-    "#        'book_to_price_ratio',\n",
-    "#        'turnover_rate_mean_5',\n",
-    "#        'variance_20',\n",
-    "#        'bbi_ratio_factor'\n",
-    "# ]\n",
-    "# feature_columns = [col for col in feature_columns if col in train_data.columns]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "id": "8f134d435f71e9e2",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2025-04-03T14:57:51.050696Z",
-     "start_time": "2025-04-03T14:57:51.034030Z"
-    },
-    "jupyter": {
-     "source_hidden": true
-    }
-   },
-   "outputs": [],
   "source": [
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.linear_model import LogisticRegression\n",
@@ -1590,12 +1587,12 @@
    "import datetime # 用于日期计算\n",
    "from catboost import CatBoostClassifier\n",
    "from catboost import Pool\n",
-    "\n",
+    "import lightgbm as lgb\n",
    "\n",
    "def train_model(train_data_df, feature_columns,\n",
    "                                  print_info=True, # 调整参数名，更通用\n",
    "                                  validation_days=180, use_pca=False, split_date=None,\n",
-    "                                  target_column='label'): # 增加目标列参数\n",
+    "                                  target_column='label', type='light'): # 增加目标列参数\n",
    "\n",
    "    print('train data size: ', len(train_data_df))\n",
    "    print(train_data_df[['ts_code', 'trade_date', 'log_circ_mv']])\n",
@@ -1636,38 +1633,80 @@
    "    \n",
    "    # # 使用处理后的特征和样本权重进行训练\n",
    "    # model.fit(X_train, y_train)\n",
-    "    \n",
-    "    \n",
-    "    cat_features = [i for i, col in enumerate(feature_columns) if col.startswith('cat')]\n",
-    "    print(f'cat_features: {cat_features}')\n",
-    "    # cat_features = []\n",
-    "\n",
-    "    params = {\n",
-    "        'loss_function': 'Logloss',  # 适用于二分类\n",
-    "        'eval_metric': 'Logloss',  # 评估指标\n",
-    "        'iterations': 1500,\n",
-    "        'learning_rate': 0.01,\n",
-    "        'depth': 8,  # 控制模型复杂度\n",
-    "        'l2_leaf_reg': 5,  # L2 正则化\n",
-    "        'verbose': 5000,\n",
-    "        'early_stopping_rounds': 3000,\n",
-    "        'one_hot_max_size': 50,\n",
-    "        'class_weights': [0.6, 1.2],\n",
-    "        'task_type': 'GPU',\n",
-    "        'has_time': True,\n",
-    "        'random_seed': 7\n",
-    "    }\n",
-    "\n",
-    "    train_pool = Pool(data=X_train, label=y_train, cat_features=cat_features)\n",
-    "    val_pool = Pool(data=X_val, label=y_val, cat_features=cat_features)\n",
    "\n",
    "\n",
-    "    model = CatBoostClassifier(**params)\n",
-    "    model.fit(train_pool,\n",
-    "              eval_set=val_pool, \n",
-    "              plot=True, \n",
-    "              use_best_model=True\n",
-    "              )\n",
+    "    if type == 'cat':\n",
+    "        params = {\n",
+    "            'loss_function': 'Logloss',  # 适用于二分类\n",
+    "            'eval_metric': 'Logloss',  # 评估指标\n",
+    "            'iterations': 1500,\n",
+    "            'learning_rate': 0.01,\n",
+    "            'depth': 10,  # 控制模型复杂度\n",
+    "            'l2_leaf_reg': 50,  # L2 正则化\n",
+    "            'verbose': 5000,\n",
+    "            'early_stopping_rounds': 3000,\n",
+    "            'one_hot_max_size': 50,\n",
+    "            'class_weights': [0.6, 1.2],\n",
+    "            'task_type': 'GPU',\n",
+    "            'has_time': True,\n",
+    "            'random_seed': 7\n",
+    "        }\n",
+    "        cat_features = [i for i, col in enumerate(feature_columns) if col.startswith('cat')]\n",
+    "        train_pool = Pool(data=X_train, label=y_train, cat_features=cat_features)\n",
+    "        val_pool = Pool(data=X_val, label=y_val, cat_features=cat_features)\n",
+    "\n",
+    "\n",
+    "        model = CatBoostClassifier(**params)\n",
+    "        model.fit(train_pool,\n",
+    "                eval_set=val_pool, \n",
+    "                plot=True, \n",
+    "                use_best_model=True\n",
+    "                )\n",
+    "    elif type == 'light':\n",
+    "        params = {\n",
+    "            'objective': 'binary',\n",
+    "            'metric': 'average_precision',\n",
+    "            'learning_rate': 0.01,\n",
+    "            'is_unbalance': True,\n",
+    "            'num_leaves': 2048,\n",
+    "            'min_data_in_leaf': 1024,\n",
+    "            'max_depth': 32,\n",
+    "            'max_bin': 1024,\n",
+    "            'feature_fraction': 0.5,\n",
+    "            'bagging_fraction': 0.5,\n",
+    "            'bagging_freq': 1,\n",
+    "            'lambda_l1': 50,\n",
+    "            'lambda_l2': 50,\n",
+    "            'verbosity': -1,\n",
+    "            'num_threads' : 8\n",
+    "        }\n",
+    "        categorical_feature = [col for col in feature_columns if 'cat' in col]\n",
+    "        train_dataset = lgb.Dataset(\n",
+    "            X_train, label=y_train,\n",
+    "            categorical_feature=categorical_feature\n",
+    "        )\n",
+    "        val_dataset = lgb.Dataset(\n",
+    "            X_val, label=y_val,\n",
+    "            categorical_feature=categorical_feature\n",
+    "        )\n",
+    "\n",
+    "        evals = {}\n",
+    "        callbacks = [lgb.log_evaluation(period=1000),\n",
+    "                        lgb.callback.record_evaluation(evals),\n",
+    "                        lgb.early_stopping(100, first_metric_only=True)\n",
+    "                    ]\n",
+    "        # 训练模型\n",
+    "        model = lgb.train(\n",
+    "            params, train_dataset, num_boost_round=1000,\n",
+    "            valid_sets=[train_dataset, val_dataset], valid_names=['train', 'valid'],\n",
+    "            callbacks=callbacks\n",
+    "        )\n",
+    "\n",
+    "        # 打印特征重要性（如果需要）\n",
+    "        if True:\n",
+    "            lgb.plot_metric(evals)\n",
+    "            lgb.plot_importance(model, importance_type='split', max_num_features=20)\n",
+    "            plt.show()\n",
    "\n",
    "\n",
    "    return model, scaler, None # 返回训练好的模型、scaler 和 pca 对象"
@@ -1675,7 +1714,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 35,
   "id": "c6eb5cd4-e714-420a-ac48-39af3e11ee81",
   "metadata": {
    "ExecuteTime": {
@@ -1703,14 +1742,13 @@
      "36399  600561.SH 2022-12-30    11.858571\n",
      "\n",
      "[36400 rows x 3 columns]\n",
-      "原始样本数: 36400, 去除标签为空后样本数: 36400\n",
-      "cat_features: [27, 30, 37, 39, 41, 80, 86, 87, 88, 100, 102, 141]\n"
+      "原始样本数: 36400, 去除标签为空后样本数: 36400\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "73e4fa876f004bafb847cea54620f732",
+       "model_id": "0acc9aa66b564c16ba0dfdaa7dab6a9e",
       "version_major": 2,
       "version_minor": 0
      },
@@ -1725,11 +1763,11 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "0:\tlearn: 0.6886803\ttest: 0.6892921\tbest: 0.6892921 (0)\ttotal: 141ms\tremaining: 3m 32s\n",
-      "1499:\tlearn: 0.3359066\ttest: 0.5174742\tbest: 0.5163721 (873)\ttotal: 1m 51s\tremaining: 0us\n",
-      "bestTest = 0.5163720682\n",
-      "bestIteration = 873\n",
-      "Shrink model to first 874 iterations.\n"
+      "0:\tlearn: 0.6886094\ttest: 0.6894541\tbest: 0.6894541 (0)\ttotal: 255ms\tremaining: 6m 22s\n",
+      "1499:\tlearn: 0.3197977\ttest: 0.5228570\tbest: 0.5197799 (414)\ttotal: 5m 23s\tremaining: 0us\n",
+      "bestTest = 0.5197798592\n",
+      "bestIteration = 414\n",
+      "Shrink model to first 415 iterations.\n"
     ]
    }
   ],
@@ -1738,6 +1776,7 @@
    "gc.collect()\n",
    "\n",
    "use_pca = False\n",
+    "type = 'cat'\n",
    "# feature_contri = [2 if feat.startswith('act_factor') or 'buy' in feat or 'sell' in feat else 1 for feat in feature_columns]\n",
    "# light_params['feature_contri'] = feature_contri\n",
    "# print(f'feature_contri: {feature_contri}')\n",
@@ -1745,71 +1784,12 @@
    "                                 .dropna(subset=['label']).groupby('trade_date', group_keys=False)\n",
    "                                 .apply(lambda x: x.nsmallest(50, 'total_mv'))\n",
    "                                 .merge(industry_df, on=['cat_l2_code', 'trade_date'], how='left')\n",
-    "                                 .merge(index_data, on='trade_date', how='left'), feature_columns)\n"
+    "                                 .merge(index_data, on='trade_date', how='left'), feature_columns, type=type)\n"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 19,
-   "id": "ec189398",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# if True:\n",
-    "#     train_data_df = train_data.dropna(subset=['label']).groupby('trade_date', group_keys=False).apply(lambda x: x.nsmallest(50, 'total_mv'))\n",
-    "#     # 识别数值型特征列\n",
-    "\n",
-    "#     # 去除标签为空的样本\n",
-    "#     initial_len = len(train_data_df)\n",
-    "#     train_data_df = train_data_df.dropna(subset=['label'])\n",
-    "\n",
-    "\n",
-    "#     # 提取特征和标签，只取数值型特征用于线性回归\n",
-    "        \n",
-    "#     all_dates = train_data_df['trade_date'].unique()  # 获取所有唯一的 trade_date\n",
-    "#     split_date = all_dates[-validation_days]  # 划分点为倒数第 validation_days 天\n",
-    "#     val_data_split = train_data_df[train_data_df['trade_date'] >= split_date]  # 验证集\n",
-    "    \n",
-    "\n",
-    "#     score_df = val_data_split\n",
-    "#     score_df = fill_nan_with_daily_median(score_df, ['pe_ttm'])\n",
-    "#     score_df = score_df[score_df['pe_ttm'] > 0]\n",
-    "#     score_df = score_df.merge(industry_df, on=['cat_l2_code', 'trade_date'], how='left')\n",
-    "#     score_df = score_df.merge(index_data, on='trade_date', how='left')\n",
-    "#     # score_df = score_df.groupby('trade_date', group_keys=False).apply(lambda x: x.nsmallest(50, 'total_mv')).reset_index()\n",
-    "#     numeric_columns = score_df.select_dtypes(include=['float64', 'int64']).columns\n",
-    "#     numeric_columns = [col for col in feature_columns if col in numeric_columns]\n",
-    "#     # score_df.loc[:, numeric_columns] = scaler.transform(score_df[numeric_columns])\n",
-    "#     # score_df = cross_sectional_standardization(score_df, numeric_columns)\n",
-    "#     print(score_df.columns.tolist())\n",
-    "\n",
-    "#     score_df['score'] = model.predict_proba(score_df[feature_columns])[:, 1]\n",
-    "#     score_df['score_ranks'] = score_df.groupby('trade_date')['score'].rank(ascending=True)\n",
-    "\n",
-    "#     score_df = score_df.groupby('trade_date', group_keys=False).apply(\n",
-    "#         lambda x: x[x['score'] >= x['score'].quantile(0.90)] # 计算90%分位数作为阈值，筛选分数>=阈值的行\n",
-    "#     ).reset_index(drop=True) # drop=True 避免添加旧索引列\n",
-    "#     # save_df = score_df.groupby('trade_date', group_keys=False).apply(lambda x: x.nlargest(1, 'score')).reset_index()\n",
-    "#     save_df = score_df.groupby('trade_date', group_keys=False).apply(lambda x: x.nsmallest(1, 'total_mv')).reset_index()\n",
-    "#     # save_df[['trade_date', 'score', 'ts_code']].to_csv('predictions_test.tsv', index=False)\n",
-    "#     import pandas as pd\n",
-    "#     from sklearn.metrics import accuracy_score\n",
-    "\n",
-    "#     # 假设 df 是你的 DataFrame\n",
-    "#     # df = pd.read_csv('your_data.csv')\n",
-    "\n",
-    "#     # 将预测分数转换为类别预测（例如：0.5 为阈值）\n",
-    "#     save_df['pred'] = (save_df['score'] >= 0.5).astype(int)\n",
-    "\n",
-    "#     # 计算准确率\n",
-    "#     acc = accuracy_score(save_df['label'], save_df['pred'])\n",
-    "\n",
-    "#     print(f\"准确率为：{acc:.4f}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 36,
   "id": "5d1522a7538db91b",
   "metadata": {
    "ExecuteTime": {
@@ -1819,13 +1799,6 @@
   },
   "outputs": [],
   "source": [
-    "# train_data = train_data.sort_values(by='trade_date')\n",
-    "# all_dates = train_data['trade_date'].unique()  # 获取所有唯一的 trade_date\n",
-    "# split_date = all_dates[-120]  # 划分点为倒数第 validation_days 天\n",
-    "# print(split_date)\n",
-    "# print(all_dates)\n",
-    "# val_data_split = train_data[train_data['trade_date'] >= split_date]  # 验证集\n",
-    "\n",
    "score_df = test_data.groupby('trade_date', group_keys=False).apply(lambda x: x.nsmallest(500, 'total_mv'))\n",
    "# score_df = fill_nan_with_daily_median(score_df, ['pe_ttm'])\n",
    "# score_df = score_df[score_df['pe_ttm'] > 0]\n",
@@ -1837,21 +1810,24 @@
    "# score_df.loc[:, numeric_columns] = scaler.transform(score_df[numeric_columns])\n",
    "# score_df = cross_sectional_standardization(score_df, numeric_columns)\n",
    "\n",
-    "score_df['score'] = model.predict_proba(score_df[feature_columns])[:, 1]\n",
+    "if type == 'cat':\n",
+    "    score_df['score'] = model.predict_proba(score_df[feature_columns])[:, 1]\n",
+    "elif type == 'light':\n",
+    "    score_df['score'] = model.predict(score_df[feature_columns])\n",
    "score_df['score_ranks'] = score_df.groupby('trade_date')['score'].rank(ascending=True)\n",
    "\n",
    "score_df = score_df.groupby('trade_date', group_keys=False).apply(\n",
    "    lambda x: x[x['score'] >= x['score'].quantile(0.90)] # 计算90%分位数作为阈值，筛选分数>=阈值的行\n",
    ").reset_index(drop=True) # drop=True 避免添加旧索引列\n",
    "# save_df = score_df.groupby('trade_date', group_keys=False).apply(lambda x: x.nlargest(1, 'score')).reset_index()\n",
-    "save_df = score_df.groupby('trade_date', group_keys=False).apply(lambda x: x.nsmallest(1, 'total_mv')).reset_index()\n",
+    "save_df = score_df.groupby('trade_date', group_keys=False).apply(lambda x: x.nsmallest(2, 'total_mv')).reset_index()\n",
    "save_df = save_df.sort_values(['trade_date', 'score'])\n",
    "save_df[['trade_date', 'score', 'ts_code']].to_csv('predictions_test.tsv', index=False)\n"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 37,
   "id": "09b1799e",
   "metadata": {},
   "outputs": [
@@ -1859,8 +1835,8 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "190\n",
-      "['vol', 'pct_chg', 'turnover_rate', 'volume_ratio', 'winner_rate', 'undist_profit_ps', 'ocfps', 'AR', 'BR', 'AR_BR', 'cashflow_to_ev_factor', 'book_to_price_ratio', 'turnover_rate_mean_5', 'variance_20', 'bbi_ratio_factor', 'lg_elg_net_buy_vol', 'flow_lg_elg_intensity', 'sm_net_buy_vol', 'total_buy_vol', 'lg_elg_buy_prop', 'flow_struct_buy_change', 'lg_elg_net_buy_vol_change', 'flow_lg_elg_accel', 'chip_concentration_range', 'chip_skewness', 'floating_chip_proxy', 'cost_support_15pct_change', 'cat_winner_price_zone', 'flow_chip_consistency', 'profit_taking_vs_absorb', 'cat_is_positive', 'upside_vol', 'downside_vol', 'vol_ratio', 'return_skew', 'return_kurtosis', 'volume_change_rate', 'cat_volume_breakout', 'turnover_deviation', 'cat_turnover_spike', 'avg_volume_ratio', 'cat_volume_ratio_breakout', 'vol_spike', 'vol_std_5', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'rsi_3', 'return_5', 'return_20', 'std_return_5', 'std_return_90', 'std_return_90_2', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4', 'rank_act_factor1', 'rank_act_factor2', 'rank_act_factor3', 'cov', 'delta_cov', 'alpha_22_improved', 'alpha_003', 'alpha_007', 'alpha_013', 'vol_break', 'weight_roc5', 'smallcap_concentration', 'cost_stability', 'high_cost_break_days', 'liquidity_risk', 'turnover_std', 'mv_volatility', 'volume_growth', 'mv_growth', 'momentum_factor', 'resonance_factor', 'log_close', 'cat_vol_spike', 'up', 'down', 'obv_maobv_6', 'std_return_5_over_std_return_90', 'std_return_90_minus_std_return_90_2', 'cat_af2', 'cat_af3', 'cat_af4', 'act_factor5', 'act_factor6', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'buy_lg_vol_minus_sell_lg_vol', 'buy_elg_vol_minus_sell_elg_vol', 'ctrl_strength', 'low_cost_dev', 'asymmetry', 'lock_factor', 'cat_vol_break', 'cost_atr_adj', 'cat_golden_resonance', 'mv_turnover_ratio', 'mv_adjusted_volume', 'mv_weighted_turnover', 'nonlinear_mv_volume', 'mv_volume_ratio', 'mv_momentum', 'lg_flow_mom_corr_20_60', 'lg_flow_accel', 'profit_pressure', 'underwater_resistance', 'cost_conc_std_20', 'profit_decay_20', 'vol_amp_loss_20', 'vol_drop_profit_cnt_5', 'lg_flow_vol_interact_20', 'cost_break_confirm_cnt_5', 'atr_norm_channel_pos_14', 'turnover_diff_skew_20', 'lg_sm_flow_diverge_20', 'pullback_strong_20_20', 'vol_wgt_hist_pos_20', 'vol_adj_roc_20', 'cs_rank_net_lg_flow_val', 'cs_rank_elg_buy_ratio', 'cs_rank_rel_profit_margin', 'cs_rank_cost_breadth', 'cs_rank_dist_to_upper_cost', 'cs_rank_winner_rate', 'cs_rank_intraday_range', 'cs_rank_close_pos_in_range', 'cs_rank_pos_in_hist_range', 'cs_rank_vol_x_profit_margin', 'cs_rank_lg_flow_price_concordance', 'cs_rank_turnover_per_winner', 'cs_rank_volume_ratio', 'cs_rank_elg_buy_sell_sm_ratio', 'cs_rank_cost_dist_vol_ratio', 'cs_rank_size', 'cat_up_limit', 'industry_obv', 'industry_return_5', 'industry_return_20', 'industry__ema_5', 'industry__ema_13', 'industry__ema_20', 'industry__ema_60', 'industry_act_factor1', 'industry_act_factor2', 'industry_act_factor3', 'industry_act_factor4', 'industry_act_factor5', 'industry_act_factor6', 'industry_rank_act_factor1', 'industry_rank_act_factor2', 'industry_rank_act_factor3', 'industry_return_5_percentile', 'industry_return_20_percentile', '000852.SH_MACD', '000905.SH_MACD', '399006.SZ_MACD', '000852.SH_MACD_hist', '000905.SH_MACD_hist', '399006.SZ_MACD_hist', '000852.SH_RSI', '000905.SH_RSI', '399006.SZ_RSI', '000852.SH_Signal_line', '000905.SH_Signal_line', '399006.SZ_Signal_line', '000852.SH_amount_change_rate', '000905.SH_amount_change_rate', '399006.SZ_amount_change_rate', '000852.SH_amount_mean', '000905.SH_amount_mean', '399006.SZ_amount_mean', '000852.SH_daily_return', '000905.SH_daily_return', '399006.SZ_daily_return', '000852.SH_up_ratio_20d', '000905.SH_up_ratio_20d', '399006.SZ_up_ratio_20d', '000852.SH_volatility', '000905.SH_volatility', '399006.SZ_volatility', '000852.SH_volume_change_rate', '000905.SH_volume_change_rate', '399006.SZ_volume_change_rate']\n"
+      "191\n",
+      "['vol', 'pct_chg', 'turnover_rate', 'volume_ratio', 'winner_rate', 'undist_profit_ps', 'ocfps', 'AR', 'BR', 'AR_BR', 'cashflow_to_ev_factor', 'book_to_price_ratio', 'turnover_rate_mean_5', 'variance_20', 'bbi_ratio_factor', 'daily_deviation', 'lg_elg_net_buy_vol', 'flow_lg_elg_intensity', 'sm_net_buy_vol', 'total_buy_vol', 'lg_elg_buy_prop', 'flow_struct_buy_change', 'lg_elg_net_buy_vol_change', 'flow_lg_elg_accel', 'chip_concentration_range', 'chip_skewness', 'floating_chip_proxy', 'cost_support_15pct_change', 'cat_winner_price_zone', 'flow_chip_consistency', 'profit_taking_vs_absorb', 'cat_is_positive', 'upside_vol', 'downside_vol', 'vol_ratio', 'return_skew', 'return_kurtosis', 'volume_change_rate', 'cat_volume_breakout', 'turnover_deviation', 'cat_turnover_spike', 'avg_volume_ratio', 'cat_volume_ratio_breakout', 'vol_spike', 'vol_std_5', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'rsi_3', 'return_5', 'return_20', 'std_return_5', 'std_return_90', 'std_return_90_2', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4', 'rank_act_factor1', 'rank_act_factor2', 'rank_act_factor3', 'cov', 'delta_cov', 'alpha_22_improved', 'alpha_003', 'alpha_007', 'alpha_013', 'vol_break', 'weight_roc5', 'smallcap_concentration', 'cost_stability', 'high_cost_break_days', 'liquidity_risk', 'turnover_std', 'mv_volatility', 'volume_growth', 'mv_growth', 'momentum_factor', 'resonance_factor', 'log_close', 'cat_vol_spike', 'up', 'down', 'obv_maobv_6', 'std_return_5_over_std_return_90', 'std_return_90_minus_std_return_90_2', 'cat_af2', 'cat_af3', 'cat_af4', 'act_factor5', 'act_factor6', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'buy_lg_vol_minus_sell_lg_vol', 'buy_elg_vol_minus_sell_elg_vol', 'ctrl_strength', 'low_cost_dev', 'asymmetry', 'lock_factor', 'cat_vol_break', 'cost_atr_adj', 'cat_golden_resonance', 'mv_turnover_ratio', 'mv_adjusted_volume', 'mv_weighted_turnover', 'nonlinear_mv_volume', 'mv_volume_ratio', 'mv_momentum', 'lg_flow_mom_corr_20_60', 'lg_flow_accel', 'profit_pressure', 'underwater_resistance', 'cost_conc_std_20', 'profit_decay_20', 'vol_amp_loss_20', 'vol_drop_profit_cnt_5', 'lg_flow_vol_interact_20', 'cost_break_confirm_cnt_5', 'atr_norm_channel_pos_14', 'turnover_diff_skew_20', 'lg_sm_flow_diverge_20', 'pullback_strong_20_20', 'vol_wgt_hist_pos_20', 'vol_adj_roc_20', 'cs_rank_net_lg_flow_val', 'cs_rank_elg_buy_ratio', 'cs_rank_rel_profit_margin', 'cs_rank_cost_breadth', 'cs_rank_dist_to_upper_cost', 'cs_rank_winner_rate', 'cs_rank_intraday_range', 'cs_rank_close_pos_in_range', 'cs_rank_pos_in_hist_range', 'cs_rank_vol_x_profit_margin', 'cs_rank_lg_flow_price_concordance', 'cs_rank_turnover_per_winner', 'cs_rank_volume_ratio', 'cs_rank_elg_buy_sell_sm_ratio', 'cs_rank_cost_dist_vol_ratio', 'cs_rank_size', 'cat_up_limit', 'industry_obv', 'industry_return_5', 'industry_return_20', 'industry__ema_5', 'industry__ema_13', 'industry__ema_20', 'industry__ema_60', 'industry_act_factor1', 'industry_act_factor2', 'industry_act_factor3', 'industry_act_factor4', 'industry_act_factor5', 'industry_act_factor6', 'industry_rank_act_factor1', 'industry_rank_act_factor2', 'industry_rank_act_factor3', 'industry_return_5_percentile', 'industry_return_20_percentile', '000852.SH_MACD', '000905.SH_MACD', '399006.SZ_MACD', '000852.SH_MACD_hist', '000905.SH_MACD_hist', '399006.SZ_MACD_hist', '000852.SH_RSI', '000905.SH_RSI', '399006.SZ_RSI', '000852.SH_Signal_line', '000905.SH_Signal_line', '399006.SZ_Signal_line', '000852.SH_amount_change_rate', '000905.SH_amount_change_rate', '399006.SZ_amount_change_rate', '000852.SH_amount_mean', '000905.SH_amount_mean', '399006.SZ_amount_mean', '000852.SH_daily_return', '000905.SH_daily_return', '399006.SZ_daily_return', '000852.SH_up_ratio_20d', '000905.SH_up_ratio_20d', '399006.SZ_up_ratio_20d', '000852.SH_volatility', '000905.SH_volatility', '399006.SZ_volatility', '000852.SH_volume_change_rate', '000905.SH_volume_change_rate', '399006.SZ_volume_change_rate']\n"
     ]
    }
   ],
@@ -1871,7 +1847,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 38,
   "id": "7e9023cc",
   "metadata": {},
   "outputs": [],
@@ -2071,7 +2047,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 39,
   "id": "a0000d75",
   "metadata": {},
   "outputs": [
@@ -2081,7 +2057,7 @@
     "text": [
      "开始分析 'score' 在 'circ_mv' 和 'future_return' 下的表现...\n",
      "准备数据，处理 NaN 值...\n",
-      "原始数据 28200 行，移除 NaN 后剩余 27807 行用于分析。\n",
+      "原始数据 28300 行，移除 NaN 后剩余 27850 行用于分析。\n",
      "对 'circ_mv' 和 'future_return' 进行 100 分位数分箱...\n",
      "按二维分箱分组计算 Spearman Rank IC...\n",
      "整理结果用于绘图...\n",
@@ -2319,7 +2295,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 40,
   "id": "a436dba4",
   "metadata": {},
   "outputs": [
--- a/main/train/Classify3.ipynb
+++ b/main/train/Classify3.ipynb
--- a/main/train/catboost_info/catboost_training.json
+++ b/main/train/catboost_info/catboost_training.json
--- a/main/train/catboost_info/learn/events.out.tfevents
+++ b/main/train/catboost_info/learn/events.out.tfevents
--- a/main/train/catboost_info/learn_error.tsv
+++ b/main/train/catboost_info/learn_error.tsv
--- a/main/train/catboost_info/test/events.out.tfevents
+++ b/main/train/catboost_info/test/events.out.tfevents
--- a/main/train/catboost_info/test_error.tsv
+++ b/main/train/catboost_info/test_error.tsv
--- a/main/train/catboost_info/time_left.tsv
+++ b/main/train/catboost_info/time_left.tsv
--- a/main/train/predictions_test.tsv
+++ b/main/train/predictions_test.tsv
--- a/main/train/test.py
+++ b/main/train/test.py
@@ -10,6 +10,6 @@ from main.factor.factor import calculate_arbr
 ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')
 pro = ts.pro_api()

-df = pro.dc_member(trade_date='20250102', ts_code='BK1184.DC')
+df = pro.dc_member(trade_date='20190105')

-print(df.sort_values('end_date'))
+print(df)