Files
NewStock/main/train/RollingRank.ipynb
liaozhaorun 791c84aba6 Classify2
2025-05-08 15:42:17 +08:00

4657 lines
267 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "code",
"execution_count": 21,
"id": "79a7758178bafdd3",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-11T11:40:18.436038Z",
"start_time": "2025-04-11T11:40:17.948261Z"
},
"jupyter": {
"source_hidden": true
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"e:\\PyProject\\NewStock\\main\\train\n"
]
}
],
"source": [
"# %load_ext autoreload\n",
"# %autoreload 2\n",
"\n",
"import gc\n",
"import os\n",
"import sys\n",
"sys.path.append('../../')\n",
"print(os.getcwd())\n",
"import pandas as pd\n",
"from main.factor.factor import get_rolling_factor, get_simple_factor\n",
"from main.utils.factor import read_industry_data\n",
"from main.utils.factor_processor import calculate_score\n",
"from main.utils.utils import read_and_merge_h5_data, merge_with_industry_data\n",
"\n",
"import warnings\n",
"\n",
"warnings.filterwarnings(\"ignore\")"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "a79cafb06a7e0e43",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-11T11:41:04.815882Z",
"start_time": "2025-04-11T11:40:18.445044Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"daily data\n",
"daily basic\n",
"inner merge on ['ts_code', 'trade_date']\n",
"stk limit\n",
"left merge on ['ts_code', 'trade_date']\n",
"money flow\n",
"left merge on ['ts_code', 'trade_date']\n",
"cyq perf\n",
"left merge on ['ts_code', 'trade_date']\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 5209903 entries, 0 to 5209902\n",
"Data columns (total 32 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object \n",
" 1 trade_date datetime64[ns]\n",
" 2 open float64 \n",
" 3 close float64 \n",
" 4 high float64 \n",
" 5 low float64 \n",
" 6 vol float64 \n",
" 7 pct_chg float64 \n",
" 8 turnover_rate float64 \n",
" 9 pe_ttm float64 \n",
" 10 circ_mv float64 \n",
" 11 total_mv float64 \n",
" 12 volume_ratio float64 \n",
" 13 is_st bool \n",
" 14 up_limit float64 \n",
" 15 down_limit float64 \n",
" 16 buy_sm_vol float64 \n",
" 17 sell_sm_vol float64 \n",
" 18 buy_lg_vol float64 \n",
" 19 sell_lg_vol float64 \n",
" 20 buy_elg_vol float64 \n",
" 21 sell_elg_vol float64 \n",
" 22 net_mf_vol float64 \n",
" 23 his_low float64 \n",
" 24 his_high float64 \n",
" 25 cost_5pct float64 \n",
" 26 cost_15pct float64 \n",
" 27 cost_50pct float64 \n",
" 28 cost_85pct float64 \n",
" 29 cost_95pct float64 \n",
" 30 weight_avg float64 \n",
" 31 winner_rate float64 \n",
"dtypes: bool(1), datetime64[ns](1), float64(29), object(1)\n",
"memory usage: 1.2+ GB\n",
"None\n"
]
}
],
"source": [
"from main.utils.utils import read_and_merge_h5_data\n",
"\n",
"\n",
"print('daily data')\n",
"df = read_and_merge_h5_data('../../data/daily_data.h5', key='daily_data',\n",
" columns=['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'pct_chg'],\n",
" df=None)\n",
"\n",
"print('daily basic')\n",
"df = read_and_merge_h5_data('../../data/daily_basic.h5', key='daily_basic',\n",
" columns=['ts_code', 'trade_date', 'turnover_rate', 'pe_ttm', 'circ_mv', 'total_mv', 'volume_ratio',\n",
" 'is_st'], df=df, join='inner')\n",
"df = df[df['trade_date'] >= '2021-01-01']\n",
"\n",
"print('stk limit')\n",
"df = read_and_merge_h5_data('../../data/stk_limit.h5', key='stk_limit',\n",
" columns=['ts_code', 'trade_date', 'pre_close', 'up_limit', 'down_limit'],\n",
" df=df)\n",
"print('money flow')\n",
"df = read_and_merge_h5_data('../../data/money_flow.h5', key='money_flow',\n",
" columns=['ts_code', 'trade_date', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol',\n",
" 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol'],\n",
" df=df)\n",
"print('cyq perf')\n",
"df = read_and_merge_h5_data('../../data/cyq_perf.h5', key='cyq_perf',\n",
" columns=['ts_code', 'trade_date', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct',\n",
" 'cost_50pct',\n",
" 'cost_85pct', 'cost_95pct', 'weight_avg', 'winner_rate'],\n",
" df=df)\n",
"print(df.info())"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "cac01788dac10678",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-11T11:41:09.893735Z",
"start_time": "2025-04-11T11:41:05.012569Z"
},
"jupyter": {
"source_hidden": true
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"industry\n"
]
}
],
"source": [
"print('industry')\n",
"industry_df = read_and_merge_h5_data('../../data/industry_data.h5', key='industry_data',\n",
" columns=['ts_code', 'l1_code', 'l2_code', 'in_date'],\n",
" df=None, on=['ts_code'], join='left')\n",
"\n",
"\n",
"def merge_with_industry_data(df, industry_df):\n",
" # 确保日期字段是 datetime 类型\n",
" df['trade_date'] = pd.to_datetime(df['trade_date'])\n",
" industry_df['in_date'] = pd.to_datetime(industry_df['in_date'])\n",
"\n",
" # 对 industry_df 按 ts_code 和 in_date 排序\n",
" industry_df_sorted = industry_df.sort_values(['in_date', 'ts_code'])\n",
"\n",
" # 对原始 df 按 ts_code 和 trade_date 排序\n",
" df_sorted = df.sort_values(['trade_date', 'ts_code'])\n",
"\n",
" # 使用 merge_asof 进行向后合并\n",
" merged = pd.merge_asof(\n",
" df_sorted,\n",
" industry_df_sorted,\n",
" by='ts_code', # 按 ts_code 分组\n",
" left_on='trade_date',\n",
" right_on='in_date',\n",
" direction='backward'\n",
" )\n",
"\n",
" # 获取每个 ts_code 的最早 in_date 记录\n",
" min_in_date_per_ts = (industry_df_sorted\n",
" .groupby('ts_code')\n",
" .first()\n",
" .reset_index()[['ts_code', 'l1_code']])\n",
"\n",
" # 填充未匹配到的记录trade_date 早于所有 in_date 的情况)\n",
" merged['l1_code'] = merged['l1_code'].fillna(\n",
" merged['ts_code'].map(min_in_date_per_ts.set_index('ts_code')['l1_code'])\n",
" )\n",
"\n",
" # 获取每个 ts_code 的最早 in_date 记录\n",
" min_in_date_per_ts = (industry_df_sorted\n",
" .groupby('ts_code')\n",
" .first()\n",
" .reset_index()[['ts_code', 'l2_code']])\n",
"\n",
" # 填充未匹配到的记录trade_date 早于所有 in_date 的情况)\n",
" merged['l2_code'] = merged['l2_code'].fillna(\n",
" merged['ts_code'].map(min_in_date_per_ts.set_index('ts_code')['l2_code'])\n",
" )\n",
"\n",
" # 保留需要的列并重置索引\n",
" result = merged.reset_index(drop=True)\n",
" return result\n",
"\n",
"\n",
"# 使用示例\n",
"df = merge_with_industry_data(df, industry_df)\n",
"# print(mdf[mdf['ts_code'] == '600751.SH'][['ts_code', 'trade_date', 'l1_code']])"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "c4e9e1d31da6dba6",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-11T11:41:10.010631Z",
"start_time": "2025-04-11T11:41:09.918750Z"
},
"jupyter": {
"source_hidden": true
}
},
"outputs": [],
"source": [
"def calculate_indicators(df):\n",
" \"\"\"\n",
" 计算四个指标当日涨跌幅、5日移动平均、RSI、MACD。\n",
" \"\"\"\n",
" df = df.sort_values('trade_date')\n",
" df['daily_return'] = (df['close'] - df['pre_close']) / df['pre_close'] * 100\n",
" # df['5_day_ma'] = df['close'].rolling(window=5).mean()\n",
" delta = df['close'].diff()\n",
" gain = delta.where(delta > 0, 0)\n",
" loss = -delta.where(delta < 0, 0)\n",
" avg_gain = gain.rolling(window=14).mean()\n",
" avg_loss = loss.rolling(window=14).mean()\n",
" rs = avg_gain / avg_loss\n",
" df['RSI'] = 100 - (100 / (1 + rs))\n",
"\n",
" # 计算MACD\n",
" ema12 = df['close'].ewm(span=12, adjust=False).mean()\n",
" ema26 = df['close'].ewm(span=26, adjust=False).mean()\n",
" df['MACD'] = ema12 - ema26\n",
" df['Signal_line'] = df['MACD'].ewm(span=9, adjust=False).mean()\n",
" df['MACD_hist'] = df['MACD'] - df['Signal_line']\n",
"\n",
" # 4. 情绪因子1市场上涨比例Up Ratio\n",
" df['up_ratio'] = df['daily_return'].apply(lambda x: 1 if x > 0 else 0)\n",
" df['up_ratio_20d'] = df['up_ratio'].rolling(window=20).mean() # 过去20天上涨比例\n",
"\n",
" # 5. 情绪因子2成交量变化率Volume Change Rate\n",
" df['volume_mean'] = df['vol'].rolling(window=20).mean() # 过去20天的平均成交量\n",
" df['volume_change_rate'] = (df['vol'] - df['volume_mean']) / df['volume_mean'] * 100 # 成交量变化率\n",
"\n",
" # 6. 情绪因子3波动率Volatility\n",
" df['volatility'] = df['daily_return'].rolling(window=20).std() # 过去20天的日收益率标准差\n",
"\n",
" # 7. 情绪因子4成交额变化率Amount Change Rate\n",
" df['amount_mean'] = df['amount'].rolling(window=20).mean() # 过去20天的平均成交额\n",
" df['amount_change_rate'] = (df['amount'] - df['amount_mean']) / df['amount_mean'] * 100 # 成交额变化率\n",
"\n",
" return df\n",
"\n",
"\n",
"def generate_index_indicators(h5_filename):\n",
" df = pd.read_hdf(h5_filename, key='index_data')\n",
" df['trade_date'] = pd.to_datetime(df['trade_date'], format='%Y%m%d')\n",
" df = df.sort_values('trade_date')\n",
"\n",
" # 计算每个ts_code的相关指标\n",
" df_indicators = []\n",
" for ts_code in df['ts_code'].unique():\n",
" df_index = df[df['ts_code'] == ts_code].copy()\n",
" df_index = calculate_indicators(df_index)\n",
" df_indicators.append(df_index)\n",
"\n",
" # 合并所有指数的结果\n",
" df_all_indicators = pd.concat(df_indicators, ignore_index=True)\n",
"\n",
" # 保留trade_date列并将同一天的数据按ts_code合并成一行\n",
" df_final = df_all_indicators.pivot_table(\n",
" index='trade_date',\n",
" columns='ts_code',\n",
" values=['daily_return', 'RSI', 'MACD', 'Signal_line',\n",
" 'MACD_hist', 'up_ratio_20d', 'volume_change_rate', 'volatility',\n",
" 'amount_change_rate', 'amount_mean'],\n",
" aggfunc='last'\n",
" )\n",
"\n",
" df_final.columns = [f\"{col[1]}_{col[0]}\" for col in df_final.columns]\n",
" df_final = df_final.reset_index()\n",
"\n",
" return df_final\n",
"\n",
"\n",
"# 使用函数\n",
"h5_filename = '../../data/index_data.h5'\n",
"index_data = generate_index_indicators(h5_filename)\n",
"index_data = index_data.dropna()\n"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "a735bc02ceb4d872",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-11T11:41:10.069433Z",
"start_time": "2025-04-11T11:41:10.018146Z"
}
},
"outputs": [],
"source": [
"\n",
"import talib\n",
"\n",
"\n",
"def get_rolling_factor(df):\n",
" old_columns = df.columns.tolist()[:]\n",
"\n",
" # 按股票和日期排序(如果尚未排序)\n",
" df = df.sort_values(by=['ts_code', 'trade_date'])\n",
"\n",
" grouped = df.groupby('ts_code', group_keys=False)\n",
"\n",
" epsilon = 1e-8\n",
" df['lg_elg_net_buy_vol'] = df['buy_lg_vol'] + df['buy_elg_vol'] - df['sell_lg_vol'] - df['sell_elg_vol']\n",
" # 检查 'volume' 列是否存在且有效\n",
" df['flow_lg_elg_intensity'] = df['lg_elg_net_buy_vol'] / (df['vol'] + epsilon)\n",
" \n",
" \n",
" # 2. 散户与主力背离度 (Retail vs Institutional Divergence)\n",
" # 衡量小单净流入与(大单+超大单)净流入的差异或比率\n",
" df['sm_net_buy_vol'] = df['buy_sm_vol'] - df['sell_sm_vol']\n",
" df['flow_divergence_diff'] = df['sm_net_buy_vol'] - df['lg_elg_net_buy_vol']\n",
" # 比率形式可能更稳定\n",
" df['flow_divergence_ratio'] = df['sm_net_buy_vol'] / (df['lg_elg_net_buy_vol'] + np.sign(df['lg_elg_net_buy_vol']) * epsilon + epsilon) # 复杂处理避免0/0\n",
" \n",
" # 3. 资金流结构变动 (Flow Structure Change - Relative Strength of Large Flow)\n",
" # 大单+超大单买入额占总买入额的比例的变化\n",
" df['total_buy_vol'] = df['buy_sm_vol'] + df['buy_lg_vol'] + df['buy_elg_vol']\n",
" df['lg_elg_buy_prop'] = (df['buy_lg_vol'] + df['buy_elg_vol']) / (df['total_buy_vol'] + epsilon)\n",
" df['flow_struct_buy_change'] = grouped['lg_elg_buy_prop'].diff(1) # 1日变化\n",
" \n",
" # 4. 资金流加速度 (Flow Acceleration)\n",
" # 净主力资金流的变化率(二阶导)\n",
" df['lg_elg_net_buy_vol_change'] = grouped['lg_elg_net_buy_vol'].diff(1)\n",
" df['flow_lg_elg_accel'] = grouped['lg_elg_net_buy_vol_change'].diff(1)\n",
" \n",
" # # 5. 极端资金流事件 (Categorical: Extreme Flow Event)\n",
" # # 定义主力资金流强度是否处于其历史极端水平例如过去N天的90分位数以上或10分位数以下\n",
" # rolling_window = 20 # 可调整窗口期\n",
" \n",
" # # Step 1: Calculate the rolling quantiles separately\n",
" # rolling_high = grouped['flow_lg_elg_intensity'].rolling(rolling_window, min_periods=1).quantile(0.9) # min_periods=1 保证窗口未满时也有输出\n",
" # rolling_low = grouped['flow_lg_elg_intensity'].rolling(rolling_window, min_periods=1).quantile(0.1)\n",
" \n",
" # # Step 2: Assign the results to the DataFrame\n",
" # # 确保 df 和 rolling_high/low 的索引是一致的\n",
" # # 如果 df 的索引在此期间没有被修改过,这通常是安全的\n",
" # df['flow_lg_elg_intensity_rolling_high'] = rolling_high\n",
" # df['flow_lg_elg_intensity_rolling_low'] = rolling_low\n",
" \n",
" # # Step 3: Continue with the logic using the new columns\n",
" # conditions_flow = [\n",
" # df['flow_lg_elg_intensity'] > df['flow_lg_elg_intensity_rolling_high'],\n",
" # df['flow_lg_elg_intensity'] < df['flow_lg_elg_intensity_rolling_low']\n",
" # ]\n",
" # choices_flow = [1, -1] # 1: 极端流入, -1: 极端流出\n",
" # df['cat_extreme_flow'] = np.select(conditions_flow, choices_flow, default=0)\n",
" \n",
" # --- 筹码分布因子 ---\n",
" \n",
" # 6. 筹码集中度 (Chip Concentration)\n",
" # 衡量筹码分布的紧密程度,例如 95% 与 5% 成本价的差距,相对于当前价格进行标准化\n",
" # 检查 'close' 列是否存在且有效\n",
" df['chip_concentration_range'] = (df['cost_95pct'] - df['cost_5pct']) / (df['close'] + epsilon)\n",
" \n",
" \n",
" # 7. 筹码分布偏度 (Chip Distribution Skewness Proxy)\n",
" # 比较中位数成本 (cost_50pct) 和加权平均成本 (weight_avg)\n",
" # weight_avg > cost_50pct 暗示高成本区有较多筹码(右偏)\n",
" df['chip_skewness'] = (df['weight_avg'] - df['cost_50pct']) / (df['cost_50pct'] + epsilon)\n",
" \n",
" # 8. 浮筹比例 (Floating Chips Proxy)\n",
" # 衡量短期内例如15%成本线以下)的筹码比例与总获利盘比例的关系\n",
" # winner_rate 高但 cost_15pct 接近当前价,可能意味着大部分获利盘成本不高,易浮动\n",
" # 这里简化为:获利盘比例 与 (当前价-15%成本价)/当前价 的乘积\n",
" price_dist_cost15 = (df['close'] - df['cost_15pct']) / (df['close'] + epsilon)\n",
" df['floating_chip_proxy'] = df['winner_rate'] * np.maximum(0, price_dist_cost15) # 只考虑价格高于15%成本线的情况\n",
" \n",
" # 9. 成本支撑强度变化 (Cost Support Strength Change)\n",
" # 观察低位筹码成本(如 5% 或 15% 分位点)的变化率,看支撑位是上移还是下移\n",
" df['cost_support_15pct_change'] = grouped['cost_15pct'].pct_change(1) * 100 # 百分比变化\n",
" \n",
" # 10. 获利盘压力/支撑区 (Categorical: Winner Rate Zone & Price Position)\n",
" # 结合获利盘比例和当前价格相对于筹码成本的位置\n",
" # 例如: 价格在 85% 成本线之上 & 获利盘 > 0.8 -> 高位派发风险区?\n",
" # 价格在 15% 成本线之下 & 获利盘 < 0.2 -> 低位吸筹潜力区?\n",
" conditions_winner = [\n",
" (df['close'] > df['cost_85pct']) & (df['winner_rate'] > 0.8), # 高位 & 高获利盘\n",
" (df['close'] < df['cost_15pct']) & (df['winner_rate'] < 0.2), # 低位 & 低获利盘\n",
" (df['close'] > df['cost_50pct']) & (df['winner_rate'] > 0.5), # 中高位 & 多数获利\n",
" (df['close'] < df['cost_50pct']) & (df['winner_rate'] < 0.5), # 中低位 & 多数亏损\n",
" ]\n",
" choices_winner = [1, 2, 3, 4] # 1:高风险区, 2:低潜力区, 3:中上获利区, 4:中下亏损区\n",
" df['cat_winner_price_zone'] = np.select(conditions_winner, choices_winner, default=0) # 0: 其他\n",
" \n",
" \n",
" # --- 结合因子 ---\n",
" \n",
" # 11. 主力行为与筹码结构一致性 (Flow-Chip Consistency)\n",
" # 例如:主力净买入发生在价格接近下方筹码密集区(如 cost_15pct 到 cost_50pct时\n",
" price_near_low_support = (df['close'] > df['cost_15pct']) & (df['close'] < df['cost_50pct'])\n",
" df['flow_chip_consistency'] = df['lg_elg_net_buy_vol'] * price_near_low_support.astype(int)\n",
" # 可以进一步标准化或做成 categorical\n",
" \n",
" # 12. 获利了结压力/承接盘强度 (Profit-Taking Pressure vs Absorption)\n",
" # 在高获利盘(winner_rate > 0.7)的情况下,观察主力资金是净流出(了结)还是净流入(高位换手/承接)\n",
" high_winner_rate_flag = (df['winner_rate'] > 0.7).astype(int)\n",
" df['profit_taking_vs_absorb'] = df['lg_elg_net_buy_vol'] * high_winner_rate_flag\n",
" # 正值表示高获利盘下主力仍在买入(承接),负值表示主力在卖出(了结)\n",
" \n",
" \n",
" # 清理临时列和可能产生的 NaN (可选,根据需要处理)\n",
" cols_to_drop = ['lg_elg_net_buy_vol', 'sm_net_buy_vol', 'total_buy_vol', 'lg_elg_buy_prop',\n",
" 'lg_elg_net_buy_vol_change', 'flow_lg_elg_intensity_rolling_high',\n",
" 'flow_lg_elg_intensity_rolling_low']\n",
" # df = df.drop(columns=cols_to_drop)\n",
"\n",
"\n",
" window = 20\n",
" df['_is_positive'] = (df['pct_chg'] > 0).astype(int)\n",
" df['_is_negative'] = (df['pct_chg'] < 0).astype(int)\n",
" df['cat_is_positive'] = (df['pct_chg'] > 0).astype(int)\n",
"\n",
" # 分离正负收益率 (用于计算各自的均值和平方均值)\n",
" # 注意:这里我们保留原始收益率用于计算,而不是 clip 到 0\n",
" df['_pos_returns'] = df['pct_chg'].where(df['pct_chg'] > 0, 0) # 非正设为0便于求和\n",
" df['_neg_returns'] = df['pct_chg'].where(df['pct_chg'] < 0, 0) # 非负设为0便于求和\n",
"\n",
" # 计算收益率的平方 (用于计算 E[X^2])\n",
" df['_pos_returns_sq'] = np.square(df['_pos_returns'])\n",
" df['_neg_returns_sq'] = np.square(df['_neg_returns']) # 平方后负数变正\n",
"\n",
" # 4. 计算滚动统计量 (使用内置函数,速度较快)\n",
" # 计算正收益日的统计量\n",
" rolling_pos_count = grouped['_is_positive'].rolling(window, min_periods=max(1, window // 2)).sum()\n",
" rolling_pos_sum = grouped['_pos_returns'].rolling(window, min_periods=max(1, window // 2)).sum()\n",
" rolling_pos_sum_sq = grouped['_pos_returns_sq'].rolling(window, min_periods=max(1, window // 2)).sum()\n",
"\n",
" # 计算负收益日的统计量\n",
" rolling_neg_count = grouped['_is_negative'].rolling(window, min_periods=max(1, window // 2)).sum()\n",
" rolling_neg_sum = grouped['_neg_returns'].rolling(window, min_periods=max(1, window // 2)).sum()\n",
" rolling_neg_sum_sq = grouped['_neg_returns_sq'].rolling(window, min_periods=max(1, window // 2)).sum()\n",
"\n",
" # 5. 计算方差和标准差\n",
" pos_mean_sq = rolling_pos_sum_sq / rolling_pos_count\n",
" pos_mean = rolling_pos_sum / rolling_pos_count\n",
" pos_var = pos_mean_sq - np.square(pos_mean)\n",
" pos_var = pos_var.where(rolling_pos_count >= 2, np.nan).clip(lower=0)\n",
" upside_vol = np.sqrt(pos_var)\n",
"\n",
" neg_mean_sq = rolling_neg_sum_sq / rolling_neg_count\n",
" neg_mean = rolling_neg_sum / rolling_neg_count # 注意 neg_mean 是负数\n",
" neg_var = neg_mean_sq - np.square(neg_mean)\n",
" neg_var = neg_var.where(rolling_neg_count >= 2, np.nan).clip(lower=0)\n",
" downside_vol = np.sqrt(neg_var)\n",
"\n",
" # rolling 操作后结果带有 MultiIndex需要去除股票代码层级以便合并\n",
" df['upside_vol'] = upside_vol.reset_index(level=0, drop=True)\n",
" df['downside_vol'] = downside_vol.reset_index(level=0, drop=True)\n",
"\n",
" df['vol_ratio'] = df['upside_vol'] / df['downside_vol']\n",
" df['vol_ratio'] = df['vol_ratio'].replace([np.inf, -np.inf], np.nan).fillna(0) # 或 fillna(np.nan)\n",
"\n",
" df['return_skew'] = grouped['pct_chg'].rolling(window=5).skew().reset_index(0, drop=True)\n",
" df['return_kurtosis'] = grouped['pct_chg'].rolling(window=5).kurt().reset_index(0, drop=True)\n",
"\n",
" # 因子 1短期成交量变化率\n",
" df['volume_change_rate'] = (\n",
" grouped['vol'].rolling(window=2).mean() /\n",
" grouped['vol'].rolling(window=10).mean() - 1\n",
" ).reset_index(level=0, drop=True) # 确保索引对齐\n",
"\n",
" # 因子 2成交量突破信号\n",
" max_volume = grouped['vol'].rolling(window=5).max().reset_index(level=0, drop=True) # 确保索引对齐\n",
" df['cat_volume_breakout'] = (df['vol'] > max_volume)\n",
"\n",
" # 因子 3换手率均线偏离度\n",
" mean_turnover = grouped['turnover_rate'].rolling(window=3).mean().reset_index(level=0, drop=True)\n",
" std_turnover = grouped['turnover_rate'].rolling(window=3).std().reset_index(level=0, drop=True)\n",
" df['turnover_deviation'] = (df['turnover_rate'] - mean_turnover) / std_turnover\n",
"\n",
" # 因子 4换手率激增信号\n",
" df['cat_turnover_spike'] = (df['turnover_rate'] > mean_turnover + 2 * std_turnover)\n",
"\n",
" # 因子 5量比均值\n",
" df['avg_volume_ratio'] = grouped['volume_ratio'].rolling(window=3).mean().reset_index(level=0, drop=True)\n",
"\n",
" # 因子 6量比突破信号\n",
" max_volume_ratio = grouped['volume_ratio'].rolling(window=5).max().reset_index(level=0, drop=True)\n",
" df['cat_volume_ratio_breakout'] = (df['volume_ratio'] > max_volume_ratio)\n",
"\n",
" df['vol_spike'] = grouped.apply(\n",
" lambda x: pd.Series(x['vol'].rolling(20).mean(), index=x.index)\n",
" )\n",
" df['vol_std_5'] = grouped['vol'].pct_change().rolling(window=5).std()\n",
"\n",
" # 计算 ATR\n",
" df['atr_14'] = grouped.apply(\n",
" lambda x: pd.Series(talib.ATR(x['high'].values, x['low'].values, x['close'].values, timeperiod=14),\n",
" index=x.index)\n",
" )\n",
" df['atr_6'] = grouped.apply(\n",
" lambda x: pd.Series(talib.ATR(x['high'].values, x['low'].values, x['close'].values, timeperiod=6),\n",
" index=x.index)\n",
" )\n",
"\n",
" # 计算 OBV 及其均线\n",
" df['obv'] = grouped.apply(\n",
" lambda x: pd.Series(talib.OBV(x['close'].values, x['vol'].values), index=x.index)\n",
" )\n",
" print(df.columns)\n",
" df['maobv_6'] = grouped.apply(\n",
" lambda x: pd.Series(talib.SMA(x['obv'].values, timeperiod=6), index=x.index)\n",
" )\n",
"\n",
" df['rsi_3'] = grouped.apply(\n",
" lambda x: pd.Series(talib.RSI(x['close'].values, timeperiod=3), index=x.index)\n",
" )\n",
" # df['rsi_6'] = grouped.apply(\n",
" # lambda x: pd.Series(talib.RSI(x['close'].values, timeperiod=6), index=x.index)\n",
" # )\n",
" # df['rsi_9'] = grouped.apply(\n",
" # lambda x: pd.Series(talib.RSI(x['close'].values, timeperiod=9), index=x.index)\n",
" # )\n",
"\n",
" # 计算 return_10 和 return_20\n",
" df['return_5'] = grouped['close'].apply(lambda x: x / x.shift(5) - 1)\n",
" # df['return_10'] = grouped['close'].apply(lambda x: x / x.shift(10) - 1)\n",
" df['return_20'] = grouped['close'].apply(lambda x: x / x.shift(20) - 1)\n",
"\n",
" # df['avg_close_5'] = grouped['close'].apply(lambda x: x.rolling(window=5).mean() / x)\n",
"\n",
" # 计算标准差指标\n",
" df['std_return_5'] = grouped['close'].apply(lambda x: x.pct_change().rolling(window=5).std())\n",
" # df['std_return_15'] = grouped['close'].apply(lambda x: x.pct_change().rolling(window=15).std())\n",
" # df['std_return_25'] = grouped['close'].apply(lambda x: x.pct_change().rolling(window=25).std())\n",
" df['std_return_90'] = grouped['close'].apply(lambda x: x.pct_change().rolling(window=90).std())\n",
" df['std_return_90_2'] = grouped['close'].apply(lambda x: x.shift(10).pct_change().rolling(window=90).std())\n",
"\n",
" # 计算 EMA 指标\n",
" df['_ema_5'] = grouped['close'].apply(\n",
" lambda x: pd.Series(talib.EMA(x.values, timeperiod=5), index=x.index)\n",
" )\n",
" df['_ema_13'] = grouped['close'].apply(\n",
" lambda x: pd.Series(talib.EMA(x.values, timeperiod=13), index=x.index)\n",
" )\n",
" df['_ema_20'] = grouped['close'].apply(\n",
" lambda x: pd.Series(talib.EMA(x.values, timeperiod=20), index=x.index)\n",
" )\n",
" df['_ema_60'] = grouped['close'].apply(\n",
" lambda x: pd.Series(talib.EMA(x.values, timeperiod=60), index=x.index)\n",
" )\n",
"\n",
" # 计算 act_factor1, act_factor2, act_factor3, act_factor4\n",
" df['act_factor1'] = grouped['_ema_5'].apply(\n",
" lambda x: np.arctan((x / x.shift(1) - 1) * 100) * 57.3 / 50\n",
" )\n",
" df['act_factor2'] = grouped['_ema_13'].apply(\n",
" lambda x: np.arctan((x / x.shift(1) - 1) * 100) * 57.3 / 40\n",
" )\n",
" df['act_factor3'] = grouped['_ema_20'].apply(\n",
" lambda x: np.arctan((x / x.shift(1) - 1) * 100) * 57.3 / 21\n",
" )\n",
" df['act_factor4'] = grouped['_ema_60'].apply(\n",
" lambda x: np.arctan((x / x.shift(1) - 1) * 100) * 57.3 / 10\n",
" )\n",
"\n",
" # 根据 trade_date 截面计算排名\n",
" df['rank_act_factor1'] = df.groupby('trade_date', group_keys=False)['act_factor1'].rank(ascending=False, pct=True)\n",
" df['rank_act_factor2'] = df.groupby('trade_date', group_keys=False)['act_factor2'].rank(ascending=False, pct=True)\n",
" df['rank_act_factor3'] = df.groupby('trade_date', group_keys=False)['act_factor3'].rank(ascending=False, pct=True)\n",
"\n",
" df['log(circ_mv)'] = np.log(df['circ_mv'])\n",
"\n",
" window_high_volume = 5\n",
" window_close_stddev = 20\n",
" period_delta = 5\n",
"\n",
" # 计算每只股票的滚动协方差\n",
" def calculate_rolling_cov(group):\n",
" return group['high'].rolling(window_high_volume).cov(group['vol'])\n",
"\n",
" df['cov'] = grouped.apply(calculate_rolling_cov)\n",
"\n",
" # 计算每只股票的协方差差分\n",
" def calculate_delta_cov(group):\n",
" return group['cov'].diff(period_delta)\n",
"\n",
" df['delta_cov'] = grouped.apply(calculate_delta_cov)\n",
"\n",
" # 计算每只股票的滚动标准差\n",
" def calculate_stddev_close(group):\n",
" return group['close'].rolling(window_close_stddev).std()\n",
"\n",
" df['_stddev_close'] = grouped.apply(calculate_stddev_close)\n",
" df['_rank_stddev'] = df.groupby('trade_date')['_stddev_close'].rank(pct=True)\n",
" df['alpha_22_improved'] = -1 * df['delta_cov'] * df['_rank_stddev']\n",
"\n",
" df['alpha_003'] = np.where(df['high'] != df['low'],\n",
" (df['close'] - df['open']) / (df['high'] - df['low']),\n",
" 0)\n",
"\n",
" df['alpha_007'] = grouped.apply(lambda x: x['close'].rolling(5).corr(x['vol']))\n",
" df['alpha_007'] = df.groupby('trade_date', group_keys=False)['alpha_007'].rank(ascending=True, pct=True)\n",
"\n",
" df['alpha_013'] = grouped['close'].transform(lambda x: x.rolling(5).sum() - x.rolling(20).sum())\n",
" df['alpha_013'] = df.groupby('trade_date', group_keys=False)['alpha_013'].rank(ascending=True, pct=True)\n",
"\n",
" df['cat_up_limit'] = (df['close'] == df['up_limit']) # 是否涨停1表示涨停0表示未涨停\n",
" df['cat_down_limit'] = (df['close'] == df['down_limit']) # 是否跌停1表示跌停0表示未跌停\n",
" df['up_limit_count_10d'] = grouped['cat_up_limit'].rolling(window=10, min_periods=1).sum().reset_index(level=0,\n",
" drop=True)\n",
" df['down_limit_count_10d'] = grouped['cat_down_limit'].rolling(window=10, min_periods=1).sum().reset_index(level=0,\n",
" drop=True)\n",
"\n",
" # 3. 最近连续涨跌停天数\n",
" def calculate_consecutive_limits(series):\n",
" \"\"\"\n",
" 计算连续涨停/跌停天数。\n",
" \"\"\"\n",
" consecutive_up = series * (series.groupby((series != series.shift()).cumsum()).cumcount() + 1)\n",
" consecutive_down = series * (series.groupby((series != series.shift()).cumsum()).cumcount() + 1)\n",
" return consecutive_up, consecutive_down\n",
"\n",
" # 连续涨停天数\n",
" df['consecutive_up_limit'] = grouped['cat_up_limit'].apply(\n",
" lambda x: calculate_consecutive_limits(x)[0]\n",
" )\n",
"\n",
" df['vol_break'] = np.where((df['close'] > df['cost_85pct']) & (df['volume_ratio'] > 2), 1, 0)\n",
"\n",
" df['weight_roc5'] = grouped['weight_avg'].apply(lambda x: x.pct_change(5))\n",
"\n",
" def rolling_corr(group):\n",
" roc_close = group['close'].pct_change()\n",
" roc_weight = group['weight_avg'].pct_change()\n",
" return roc_close.rolling(10).corr(roc_weight)\n",
"\n",
" df['price_cost_divergence'] = grouped.apply(rolling_corr)\n",
"\n",
" df['smallcap_concentration'] = (1 / df['log(circ_mv)']) * (df['cost_85pct'] - df['cost_15pct'])\n",
"\n",
" # 16. 筹码稳定性指数 (20日波动率)\n",
" df['weight_std20'] = grouped['weight_avg'].apply(lambda x: x.rolling(20).std())\n",
" df['cost_stability'] = df['weight_std20'] / grouped['weight_avg'].transform(lambda x: x.rolling(20).mean())\n",
"\n",
" # 17. 成本区间突破标记\n",
" df['high_cost_break_days'] = grouped.apply(lambda g: g['close'].gt(g['cost_95pct']).rolling(5).sum())\n",
"\n",
" # 20. 筹码-流动性风险\n",
" df['liquidity_risk'] = (df['cost_95pct'] - df['cost_5pct']) * (\n",
" 1 / grouped['vol'].transform(lambda x: x.rolling(10).mean()))\n",
"\n",
" # 7. 市值波动率因子 (使用 grouped)\n",
" df['turnover_std'] = grouped['turnover_rate'].transform(lambda x: x.rolling(window=20).std())\n",
" df['mv_volatility'] = grouped.apply(lambda x: x['turnover_std'] / x['log(circ_mv)'])\n",
"\n",
" # 8. 市值成长性因子\n",
" df['volume_growth'] = grouped['vol'].pct_change(periods=20)\n",
" df['mv_growth'] = df['volume_growth'] / df['log(circ_mv)']\n",
"\n",
" # AR 指标\n",
" df[\"ar\"] = grouped.apply(\n",
" lambda x: (x[\"high\"].div(x[\"open\"]).rolling(3).sum()) / (x[\"open\"].div(x[\"low\"]).rolling(3).sum()) * 100)\n",
"\n",
" # BR 指标\n",
" df[\"pre_close\"] = grouped[\"close\"].shift(1)\n",
" df[\"br_up\"] = (df[\"high\"] - df[\"pre_close\"]).clip(lower=0)\n",
" df[\"br_down\"] = (df[\"pre_close\"] - df[\"low\"]).clip(lower=0)\n",
" df[\"br\"] = grouped.apply(lambda x: (x[\"br_up\"].rolling(3).sum()) / (x[\"br_down\"].rolling(3).sum()) * 100)\n",
"\n",
" # ARBR\n",
" df['arbr'] = df['ar'] - df['br']\n",
" df.drop(columns=[\"pre_close\", \"br_up\", \"br_down\", 'ar', 'br'], inplace=True)\n",
"\n",
" df.drop(columns=['weight_std20'], inplace=True, errors='ignore')\n",
" df.drop(\n",
" columns=['_is_positive', '_is_negative', '_pos_returns', '_neg_returns', '_pos_returns_sq', '_neg_returns_sq'],\n",
" inplace=True, errors='ignore')\n",
" new_columns = [col for col in df.columns.tolist()[:] if col not in old_columns]\n",
"\n",
" return df, new_columns\n",
"\n",
"\n",
"def get_simple_factor(df):\n",
" old_columns = df.columns.tolist()[:]\n",
" df = df.sort_values(by=['ts_code', 'trade_date'])\n",
"\n",
" alpha = 0.5\n",
" df['momentum_factor'] = df['volume_change_rate'] + alpha * df['turnover_deviation']\n",
" df['resonance_factor'] = df['volume_ratio'] * df['pct_chg']\n",
" df['log_close'] = np.log(df['close'])\n",
"\n",
" df['cat_vol_spike'] = df['vol'] > 2 * df['vol_spike']\n",
"\n",
" df['up'] = (df['high'] - df[['close', 'open']].max(axis=1)) / df['close']\n",
" df['down'] = (df[['close', 'open']].min(axis=1) - df['low']) / df['close']\n",
"\n",
" df['obv-maobv_6'] = df['obv'] - df['maobv_6']\n",
"\n",
" # 计算比值指标\n",
" df['std_return_5 / std_return_90'] = df['std_return_5'] / df['std_return_90']\n",
" # df['std_return_5 / std_return_25'] = df['std_return_5'] / df['std_return_25']\n",
"\n",
" # 计算标准差差值\n",
" df['std_return_90 - std_return_90_2'] = df['std_return_90'] - df['std_return_90_2']\n",
"\n",
" # df['cat_af1'] = df['act_factor1'] > 0\n",
" df['cat_af2'] = df['act_factor2'] > df['act_factor1']\n",
" df['cat_af3'] = df['act_factor3'] > df['act_factor2']\n",
" df['cat_af4'] = df['act_factor4'] > df['act_factor3']\n",
"\n",
" # 计算 act_factor5 和 act_factor6\n",
" df['act_factor5'] = df['act_factor1'] + df['act_factor2'] + df['act_factor3'] + df['act_factor4']\n",
" df['act_factor6'] = (df['act_factor1'] - df['act_factor2']) / np.sqrt(\n",
" df['act_factor1'] ** 2 + df['act_factor2'] ** 2)\n",
"\n",
" df['active_buy_volume_large'] = df['buy_lg_vol'] / df['net_mf_vol']\n",
" df['active_buy_volume_big'] = df['buy_elg_vol'] / df['net_mf_vol']\n",
" df['active_buy_volume_small'] = df['buy_sm_vol'] / df['net_mf_vol']\n",
"\n",
" df['buy_lg_vol_minus_sell_lg_vol'] = (df['buy_lg_vol'] - df['sell_lg_vol']) / df['net_mf_vol']\n",
" df['buy_elg_vol_minus_sell_elg_vol'] = (df['buy_elg_vol'] - df['sell_elg_vol']) / df['net_mf_vol']\n",
"\n",
" df['log(circ_mv)'] = np.log(df['circ_mv'])\n",
"\n",
" df['ctrl_strength'] = (df['cost_85pct'] - df['cost_15pct']) / (df['his_high'] - df['his_low'])\n",
"\n",
" df['low_cost_dev'] = (df['close'] - df['cost_5pct']) / (df['cost_50pct'] - df['cost_5pct'])\n",
"\n",
" df['asymmetry'] = (df['cost_95pct'] - df['cost_50pct']) / (df['cost_50pct'] - df['cost_5pct'])\n",
"\n",
" df['lock_factor'] = df['turnover_rate'] * (\n",
" 1 - (df['cost_95pct'] - df['cost_5pct']) / (df['his_high'] - df['his_low']))\n",
"\n",
" df['cat_vol_break'] = (df['close'] > df['cost_85pct']) & (df['volume_ratio'] > 2)\n",
"\n",
" df['cost_atr_adj'] = (df['cost_95pct'] - df['cost_5pct']) / df['atr_14']\n",
"\n",
" # 12. 小盘股筹码集中度\n",
" df['smallcap_concentration'] = (1 / df['log(circ_mv)']) * (df['cost_85pct'] - df['cost_15pct'])\n",
"\n",
" df['cat_golden_resonance'] = ((df['close'] > df['weight_avg']) &\n",
" (df['volume_ratio'] > 1.5) &\n",
" (df['winner_rate'] > 0.7))\n",
"\n",
" df['mv_turnover_ratio'] = df['turnover_rate'] / df['log(circ_mv)']\n",
"\n",
" df['mv_adjusted_volume'] = df['vol'] / df['log(circ_mv)']\n",
"\n",
" df['mv_weighted_turnover'] = df['turnover_rate'] * (1 / df['log(circ_mv)'])\n",
"\n",
" df['nonlinear_mv_volume'] = df['vol'] / df['log(circ_mv)']\n",
"\n",
" df['mv_volume_ratio'] = df['volume_ratio'] / df['log(circ_mv)']\n",
"\n",
" df['mv_momentum'] = df['turnover_rate'] * df['volume_ratio'] / df['log(circ_mv)']\n",
"\n",
" drop_columns = [col for col in df.columns if col.startswith('_')]\n",
" df.drop(columns=drop_columns, inplace=True, errors='ignore')\n",
"\n",
" new_columns = [col for col in df.columns.tolist()[:] if col not in old_columns]\n",
" return df, new_columns\n"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "53f86ddc0677a6d7",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-11T11:41:15.152455Z",
"start_time": "2025-04-11T11:41:10.084099Z"
},
"scrolled": true
},
"outputs": [],
"source": [
"from main.utils.factor import get_act_factor\n",
"\n",
"\n",
"def read_industry_data(h5_filename):\n",
" # 读取 H5 文件中所有的行业数据\n",
" industry_data = pd.read_hdf(h5_filename, key='sw_daily', columns=[\n",
" 'ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'pe', 'pb', 'vol'\n",
" ]) # 假设 H5 文件的键是 'industry_data'\n",
" industry_data = industry_data.sort_values(by=['ts_code', 'trade_date'])\n",
" industry_data = industry_data.reindex()\n",
" industry_data['trade_date'] = pd.to_datetime(industry_data['trade_date'], format='%Y%m%d')\n",
"\n",
" grouped = industry_data.groupby('ts_code', group_keys=False)\n",
" industry_data['obv'] = grouped.apply(\n",
" lambda x: pd.Series(talib.OBV(x['close'].values, x['vol'].values), index=x.index)\n",
" )\n",
" industry_data['return_5'] = grouped['close'].apply(lambda x: x / x.shift(5) - 1)\n",
" industry_data['return_20'] = grouped['close'].apply(lambda x: x / x.shift(20) - 1)\n",
"\n",
" industry_data = get_act_factor(industry_data, cat=False)\n",
" industry_data = industry_data.sort_values(by=['trade_date', 'ts_code'])\n",
"\n",
" # # 计算每天每个 ts_code 的因子和当天所有 ts_code 的中位数的偏差\n",
" # factor_columns = ['obv', 'return_5', 'return_20', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4'] # 因子列\n",
" #\n",
" # for factor in factor_columns:\n",
" # if factor in industry_data.columns:\n",
" # # 计算每天每个 ts_code 的因子值与当天所有 ts_code 的中位数的偏差\n",
" # industry_data[f'{factor}_deviation'] = industry_data.groupby('trade_date')[factor].transform(\n",
" # lambda x: x - x.mean())\n",
"\n",
" industry_data['return_5_percentile'] = industry_data.groupby('trade_date')['return_5'].transform(\n",
" lambda x: x.rank(pct=True))\n",
" industry_data['return_20_percentile'] = industry_data.groupby('trade_date')['return_20'].transform(\n",
" lambda x: x.rank(pct=True))\n",
" industry_data = industry_data.drop(columns=['open', 'close', 'high', 'low', 'pe', 'pb', 'vol'])\n",
"\n",
" industry_data = industry_data.rename(\n",
" columns={col: f'industry_{col}' for col in industry_data.columns if col not in ['ts_code', 'trade_date']})\n",
"\n",
" industry_data = industry_data.rename(columns={'ts_code': 'cat_l2_code'})\n",
" return industry_data\n",
"\n",
"industry_df = read_industry_data('../../data/sw_daily.h5')"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "dbe2fd8021b9417f",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-11T11:41:15.172103Z",
"start_time": "2025-04-11T11:41:15.167533Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['ts_code', 'open', 'close', 'high', 'low', 'circ_mv', 'total_mv', 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct', 'cost_50pct', 'cost_85pct', 'cost_95pct', 'weight_avg', 'l2_code', 'in_date']\n"
]
}
],
"source": [
"origin_columns = df.columns.tolist()\n",
"origin_columns = [col for col in origin_columns if\n",
" col not in ['turnover_rate', 'pe_ttm', 'volume_ratio', 'vol', 'pct_chg', 'l1_code', 'winner_rate']]\n",
"origin_columns = [col for col in origin_columns if col not in index_data.columns]\n",
"origin_columns = [col for col in origin_columns if 'cyq' not in col]\n",
"print(origin_columns)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "85c3e3d0235ffffa",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-11T11:42:31.593231Z",
"start_time": "2025-04-11T11:41:15.188575Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol',\n",
" 'pct_chg', 'turnover_rate', 'pe_ttm', 'circ_mv', 'total_mv',\n",
" 'volume_ratio', 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol',\n",
" 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol',\n",
" 'sell_elg_vol', 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct',\n",
" 'cost_15pct', 'cost_50pct', 'cost_85pct', 'cost_95pct', 'weight_avg',\n",
" 'winner_rate', 'l1_code', 'l2_code', 'lg_elg_net_buy_vol',\n",
" 'flow_lg_elg_intensity', 'sm_net_buy_vol', 'flow_divergence_diff',\n",
" 'flow_divergence_ratio', 'total_buy_vol', 'lg_elg_buy_prop',\n",
" 'flow_struct_buy_change', 'lg_elg_net_buy_vol_change',\n",
" 'flow_lg_elg_accel', 'chip_concentration_range', 'chip_skewness',\n",
" 'floating_chip_proxy', 'cost_support_15pct_change',\n",
" 'cat_winner_price_zone', 'flow_chip_consistency',\n",
" 'profit_taking_vs_absorb', '_is_positive', '_is_negative',\n",
" 'cat_is_positive', '_pos_returns', '_neg_returns', '_pos_returns_sq',\n",
" '_neg_returns_sq', 'upside_vol', 'downside_vol', 'vol_ratio',\n",
" 'return_skew', 'return_kurtosis', 'volume_change_rate',\n",
" 'cat_volume_breakout', 'turnover_deviation', 'cat_turnover_spike',\n",
" 'avg_volume_ratio', 'cat_volume_ratio_breakout', 'vol_spike',\n",
" 'vol_std_5', 'atr_14', 'atr_6', 'obv'],\n",
" dtype='object')\n",
"Calculating lg_flow_mom_corr_20_60...\n",
"Finished lg_flow_mom_corr_20_60.\n",
"Calculating lg_buy_consolidation_20...\n",
"Finished lg_buy_consolidation_20.\n",
"Calculating lg_flow_accel...\n",
"Finished lg_flow_accel.\n",
"Calculating profit_pressure...\n",
"Finished profit_pressure.\n",
"Calculating underwater_resistance...\n",
"Finished underwater_resistance.\n",
"Calculating cost_conc_std_20...\n",
"Finished cost_conc_std_20.\n",
"Calculating profit_decay_20...\n",
"Finished profit_decay_20.\n",
"Calculating vol_amp_loss_20...\n",
"Finished vol_amp_loss_20.\n",
"Calculating vol_drop_profit_cnt_5...\n",
"Finished vol_drop_profit_cnt_5.\n",
"Calculating lg_flow_vol_interact_20...\n",
"Finished lg_flow_vol_interact_20.\n",
"Calculating cost_break_confirm_cnt_5...\n",
"Finished cost_break_confirm_cnt_5.\n",
"Calculating atr_norm_channel_pos_14...\n",
"Finished atr_norm_channel_pos_14.\n",
"Calculating turnover_diff_skew_20...\n",
"Finished turnover_diff_skew_20.\n",
"Calculating lg_sm_flow_diverge_20...\n",
"Finished lg_sm_flow_diverge_20.\n",
"Calculating pullback_strong_20_20...\n",
"Finished pullback_strong_20_20.\n",
"Calculating vol_wgt_hist_pos_20...\n",
"Finished vol_wgt_hist_pos_20.\n",
"Calculating vol_adj_roc_20...\n",
"Finished vol_adj_roc_20.\n",
"Calculating intraday_lg_flow_corr_20 (Placeholder - complex implementation)...\n",
"Finished intraday_lg_flow_corr_20 (Placeholder).\n",
"Calculating cap_neutral_cost_metric (Placeholder - requires statsmodels)...\n",
"Finished cap_neutral_cost_metric (Placeholder).\n",
"Calculating hurst_net_mf_vol_60 (Placeholder - requires hurst library)...\n",
"Error: 'hurst' library not installed. Cannot calculate factor.\n",
"Finished hurst_net_mf_vol_60 (Placeholder).\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 3166509 entries, 0 to 3166508\n",
"Columns: 158 entries, ts_code to hurst_net_mf_vol_60\n",
"dtypes: bool(12), datetime64[ns](1), float64(138), int32(3), int64(1), object(3)\n",
"memory usage: 3.5+ GB\n",
"None\n"
]
}
],
"source": [
"import numpy as np\n",
"\n",
"def filter_data(df):\n",
" # df = df.groupby('trade_date').apply(lambda x: x.nlargest(1000, 'act_factor1'))\n",
" df = df[~df['is_st']]\n",
" df = df[~df['ts_code'].str.endswith('BJ')]\n",
" df = df[~df['ts_code'].str.startswith('30')]\n",
" df = df[~df['ts_code'].str.startswith('68')]\n",
" df = df[~df['ts_code'].str.startswith('8')]\n",
" df = df[df['trade_date'] >= '2021-01-01']\n",
" if 'in_date' in df.columns:\n",
" df = df.drop(columns=['in_date'])\n",
" df = df.reset_index(drop=True)\n",
" return df\n",
"\n",
"\n",
"df = filter_data(df)\n",
"# df = get_technical_factor(df)\n",
"# df = get_act_factor(df)\n",
"# df = get_money_flow_factor(df)\n",
"# df = get_alpha_factor(df)\n",
"# df = get_limit_factor(df)\n",
"# df = get_cyp_perf_factor(df)\n",
"# df = get_mv_factors(df)\n",
"df, _ = get_rolling_factor(df)\n",
"df, _ = get_simple_factor(df)\n",
"from main.factor.factor import *\n",
"lg_flow_mom_corr(df, N=20, M=60)\n",
"lg_buy_consolidation(df, N=20)\n",
"lg_flow_accel(df)\n",
"profit_pressure(df)\n",
"underwater_resistance(df)\n",
"cost_conc_std(df, N=20)\n",
"profit_decay(df, N=20)\n",
"vol_amp_loss(df, N=20)\n",
"vol_drop_profit_cnt(df, N=20, M=5)\n",
"lg_flow_vol_interact(df, N=20)\n",
"cost_break_confirm_cnt(df, M=5)\n",
"atr_norm_channel_pos(df, N=14)\n",
"turnover_diff_skew(df, N=20)\n",
"lg_sm_flow_diverge(df, N=20)\n",
"pullback_strong(df, N=20, M=20)\n",
"vol_wgt_hist_pos(df, N=20)\n",
"vol_adj_roc(df, N=20)\n",
"intraday_lg_flow_corr(df, N=20) # Placeholder\n",
"cap_neutral_cost_metric(df) # Placeholder\n",
"hurst_exponent_flow(df, N=60) # Placeholder\n",
"# calculate_complex_factor(df)\n",
"# cs_rank_net_lg_flow_val(df)\n",
"# cs_rank_flow_divergence(df)\n",
"# cs_rank_industry_adj_lg_flow(df) # Needs cat_l2_code\n",
"# cs_rank_elg_buy_ratio(df)\n",
"# cs_rank_rel_profit_margin(df)\n",
"# cs_rank_cost_breadth(df)\n",
"# cs_rank_dist_to_upper_cost(df)\n",
"# cs_rank_winner_rate(df)\n",
"# cs_rank_intraday_range(df)\n",
"# cs_rank_close_pos_in_range(df)\n",
"# cs_rank_opening_gap(df) # Needs pre_close\n",
"# cs_rank_pos_in_hist_range(df) # Needs his_low, his_high\n",
"# cs_rank_vol_x_profit_margin(df)\n",
"# cs_rank_lg_flow_price_concordance(df)\n",
"# cs_rank_turnover_per_winner(df)\n",
"# cs_rank_ind_cap_neutral_pe(df) # Placeholder - needs external libraries\n",
"# cs_rank_volume_ratio(df) # Needs volume_ratio\n",
"# cs_rank_elg_buy_sell_sm_ratio(df)\n",
"# cs_rank_cost_dist_vol_ratio(df) # Needs volume_ratio\n",
"# cs_rank_size(df) # Needs circ_mv\n",
"# df = df.merge(industry_df, on=['l1_code', 'trade_date'], how='left')\n",
"df = df.rename(columns={'l1_code': 'cat_l1_code'})\n",
"df = df.rename(columns={'l2_code': 'cat_l2_code'})\n",
"\n",
"# df = df.merge(index_data, on='trade_date', how='left')\n",
"\n",
"print(df.info())"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "f4f16d63ad18d1bc",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-11T11:42:31.775937Z",
"start_time": "2025-04-11T11:42:31.765571Z"
},
"jupyter": {
"source_hidden": true
}
},
"outputs": [],
"source": [
"def create_deviation_within_dates(df, feature_columns):\n",
" groupby_col = 'cat_l2_code' # 使用 trade_date 进行分组\n",
" new_columns = {}\n",
" ret_feature_columns = feature_columns[:]\n",
"\n",
" # 自动选择所有数值型特征\n",
" num_features = [col for col in feature_columns if 'cat' not in col and 'index' not in col]\n",
"\n",
" # num_features = ['vol', 'pct_chg', 'turnover_rate', 'volume_ratio', 'cat_vol_spike', 'obv', 'maobv_6', 'return_5', 'return_10', 'return_20', 'std_return_5', 'std_return_15', 'std_return_90', 'std_return_90_2', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4', 'act_factor5', 'act_factor6', 'rank_act_factor1', 'rank_act_factor2', 'rank_act_factor3', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'alpha_022', 'alpha_003', 'alpha_007', 'alpha_013']\n",
" num_features = [col for col in num_features if 'cat' not in col and 'industry' not in col]\n",
" num_features = [col for col in num_features if 'limit' not in col]\n",
" num_features = [col for col in num_features if 'cyq' not in col]\n",
"\n",
" # 遍历所有数值型特征\n",
" for feature in num_features:\n",
" if feature == 'trade_date': # 不需要对 'trade_date' 计算偏差\n",
" continue\n",
"\n",
" # grouped_mean = df.groupby(['trade_date'])[feature].transform('mean')\n",
" # deviation_col_name = f'deviation_mean_{feature}'\n",
" # new_columns[deviation_col_name] = df[feature] - grouped_mean\n",
" # ret_feature_columns.append(deviation_col_name)\n",
"\n",
" grouped_mean = df.groupby(['trade_date', groupby_col])[feature].transform('mean')\n",
" deviation_col_name = f'deviation_mean_{feature}'\n",
" new_columns[deviation_col_name] = df[feature] - grouped_mean\n",
" ret_feature_columns.append(deviation_col_name)\n",
"\n",
" # 将新计算的偏差特征与原始 DataFrame 合并\n",
" df = pd.concat([df, pd.DataFrame(new_columns)], axis=1)\n",
"\n",
" # for feature in ['obv', 'return_20', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4']:\n",
" # df[f'deviation_industry_{feature}'] = df[feature] - df[f'industry_{feature}']\n",
"\n",
" return df, ret_feature_columns\n"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "40e6b68a91b30c79",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-11T11:42:33.375897Z",
"start_time": "2025-04-11T11:42:31.854118Z"
},
"jupyter": {
"source_hidden": true
}
},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"from scipy.stats import ks_2samp\n",
"from sklearn.discriminant_analysis import StandardScaler\n",
"\n",
"\n",
"def remove_shifted_features(train_data, feature_columns, ks_threshold=0.05, wasserstein_threshold=0.1, size=0.8,\n",
" log=True, val_data=None):\n",
" dropped_features = []\n",
"\n",
" if val_data is None:\n",
" all_dates = sorted(train_data['trade_date'].unique().tolist()) # 获取所有唯一的 trade_date\n",
" split_date = all_dates[int(len(all_dates) * size)] # 划分点为倒数第 validation_days 天\n",
" train_data_split = train_data[train_data['trade_date'] < split_date] # 训练集\n",
" val_data_split = train_data[train_data['trade_date'] >= split_date] # 验证集\n",
" else:\n",
" train_data_split = train_data\n",
" val_data_split = val_data\n",
"\n",
" # **统计数据漂移**\n",
" numeric_columns = train_data_split.select_dtypes(include=['float64', 'int64']).columns\n",
" numeric_columns = [col for col in numeric_columns if col in feature_columns]\n",
" for feature in numeric_columns:\n",
" ks_stat, p_value = ks_2samp(train_data_split[feature], val_data_split[feature])\n",
" # wasserstein_dist = wasserstein_distance(train_data_split[feature], val_data_split[feature])\n",
"\n",
" # if p_value < ks_threshold or wasserstein_dist > wasserstein_threshold:\n",
" if p_value < ks_threshold:\n",
" dropped_features.append(feature)\n",
" if log:\n",
" print(f\"检测到 {len(dropped_features)} 个可能漂移的特征: {dropped_features}\")\n",
"\n",
" # **应用阈值进行最终筛选**\n",
" filtered_features = [f for f in feature_columns if f not in dropped_features]\n",
"\n",
" return filtered_features, dropped_features\n",
"\n",
"\n",
"def remove_outliers_label_percentile(label: pd.Series, lower_percentile: float = 0.01, upper_percentile: float = 0.99,\n",
" log=True):\n",
" if not (0 <= lower_percentile < upper_percentile <= 1):\n",
" raise ValueError(\"Percentile values must satisfy 0 <= lower_percentile < upper_percentile <= 1.\")\n",
"\n",
" # Calculate lower and upper bounds based on percentiles\n",
" lower_bound = label.quantile(lower_percentile)\n",
" upper_bound = label.quantile(upper_percentile)\n",
"\n",
" # Filter out values outside the bounds\n",
" filtered_label = label[(label >= lower_bound) & (label <= upper_bound)]\n",
"\n",
" # Print the number of removed outliers\n",
" if log:\n",
" print(f\"Removed {len(label) - len(filtered_label)} outliers.\")\n",
" return filtered_label\n",
"\n",
"\n",
"def calculate_risk_adjusted_target(df, days=5):\n",
" df = df.sort_values(by=['ts_code', 'trade_date'])\n",
"\n",
" df['future_close'] = df.groupby('ts_code')['close'].shift(-days)\n",
" df['future_open'] = df.groupby('ts_code')['open'].shift(-1)\n",
" df['future_return'] = (df['future_close'] - df['future_open']) / df['future_open']\n",
"\n",
" df['future_volatility'] = df.groupby('ts_code')['future_return'].rolling(days, min_periods=1).std().reset_index(\n",
" level=0, drop=True)\n",
" sharpe_ratio = df['future_return'] * df['future_volatility']\n",
" sharpe_ratio.replace([np.inf, -np.inf], np.nan, inplace=True)\n",
"\n",
" return sharpe_ratio\n",
"\n",
"\n",
"def calculate_score(df, days=5, lambda_param=1.0):\n",
" def calculate_max_drawdown(prices):\n",
" peak = prices.iloc[0] # 初始化峰值\n",
" max_drawdown = 0 # 初始化最大回撤\n",
"\n",
" for price in prices:\n",
" if price > peak:\n",
" peak = price # 更新峰值\n",
" else:\n",
" drawdown = (peak - price) / peak # 计算当前回撤\n",
" max_drawdown = max(max_drawdown, drawdown) # 更新最大回撤\n",
"\n",
" return max_drawdown\n",
"\n",
" def compute_stock_score(stock_df):\n",
" stock_df = stock_df.sort_values(by=['trade_date'])\n",
" future_return = stock_df['future_return']\n",
" # 使用已有的 pct_chg 字段计算波动率\n",
" volatility = stock_df['pct_chg'].rolling(days).std().shift(-days)\n",
" max_drawdown = stock_df['close'].rolling(days).apply(calculate_max_drawdown, raw=False).shift(-days)\n",
" score = future_return - lambda_param * max_drawdown\n",
" return score\n",
"\n",
" # # 确保 DataFrame 按照股票代码和交易日期排序\n",
" # df = df.sort_values(by=['ts_code', 'trade_date'])\n",
"\n",
" # 对每个股票分别计算 score\n",
" df['score'] = df.groupby('ts_code').apply(compute_stock_score).reset_index(level=0, drop=True)\n",
"\n",
" return df['score']\n",
"\n",
"\n",
"def remove_highly_correlated_features(df, feature_columns, threshold=0.9):\n",
" numeric_features = df[feature_columns].select_dtypes(include=[np.number]).columns.tolist()\n",
" if not numeric_features:\n",
" raise ValueError(\"No numeric features found in the provided data.\")\n",
"\n",
" corr_matrix = df[numeric_features].corr().abs()\n",
" upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))\n",
" to_drop = [column for column in upper.columns if any(upper[column] > threshold)]\n",
" remaining_features = [col for col in feature_columns if col not in to_drop\n",
" or 'act' in col or 'af' in col]\n",
" return remaining_features\n",
"\n",
"\n",
"def cross_sectional_standardization(df, features):\n",
" df_sorted = df.sort_values(by='trade_date') # 按时间排序\n",
" df_standardized = df_sorted.copy()\n",
"\n",
" for date in df_sorted['trade_date'].unique():\n",
" # 获取当前时间点的数据\n",
" current_data = df_standardized[df_standardized['trade_date'] == date]\n",
"\n",
" # 只对指定特征进行标准化\n",
" scaler = StandardScaler()\n",
" standardized_values = scaler.fit_transform(current_data[features])\n",
"\n",
" # 将标准化结果重新赋值回去\n",
" df_standardized.loc[df_standardized['trade_date'] == date, features] = standardized_values\n",
"\n",
" return df_standardized\n",
"\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"\n",
"def neutralize_manual(df, features, industry_col, mkt_cap_col):\n",
" \"\"\" 手动实现简单回归以提升速度 \"\"\"\n",
"\n",
" for col in features:\n",
" residuals = []\n",
" for _, group in df.groupby(industry_col):\n",
" if len(group) > 1:\n",
" x = np.log(group[mkt_cap_col]) # 市值对数\n",
" y = group[col] # 因子值\n",
" beta = np.cov(y, x)[0, 1] / np.var(x) # 计算斜率\n",
" alpha = np.mean(y) - beta * np.mean(x) # 计算截距\n",
" resid = y - (alpha + beta * x) # 计算残差\n",
" residuals.extend(resid)\n",
" else:\n",
" residuals.extend(group[col]) # 样本不足时保留原值\n",
"\n",
" df[col] = residuals\n",
"\n",
" return df\n",
"\n",
"\n",
"import gc\n",
"\n",
"gc.collect()\n",
"\n",
"\n",
"def mad_filter(df, features, n=3):\n",
" for col in features:\n",
" median = df[col].median()\n",
" mad = np.median(np.abs(df[col] - median))\n",
" upper = median + n * mad\n",
" lower = median - n * mad\n",
" df[col] = np.clip(df[col], lower, upper) # 截断极值\n",
" return df\n",
"\n",
"\n",
"def percentile_filter(df, features, lower_percentile=0.01, upper_percentile=0.99):\n",
" for col in features:\n",
" # 按日期分组计算上下百分位数\n",
" lower_bound = df.groupby('trade_date')[col].transform(\n",
" lambda x: x.quantile(lower_percentile)\n",
" )\n",
" upper_bound = df.groupby('trade_date')[col].transform(\n",
" lambda x: x.quantile(upper_percentile)\n",
" )\n",
" # 截断超出范围的值\n",
" df[col] = np.clip(df[col], lower_bound, upper_bound)\n",
" return df\n",
"\n",
"\n",
"from scipy.stats import iqr\n",
"\n",
"\n",
"def iqr_filter(df, features):\n",
" for col in features:\n",
" df[col] = df.groupby('trade_date')[col].transform(\n",
" lambda x: (x - x.median()) / iqr(x) if iqr(x) != 0 else x\n",
" )\n",
" return df\n",
"\n",
"\n",
"def quantile_filter(df, features, lower_quantile=0.01, upper_quantile=0.99, window=60):\n",
" df = df.copy()\n",
" for col in features:\n",
" # 计算 rolling 统计量,需要按日期进行 groupby\n",
" rolling_lower = df.groupby('trade_date')[col].transform(\n",
" lambda x: x.rolling(window=min(len(x), window)).quantile(lower_quantile))\n",
" rolling_upper = df.groupby('trade_date')[col].transform(\n",
" lambda x: x.rolling(window=min(len(x), window)).quantile(upper_quantile))\n",
"\n",
" # 对数据进行裁剪\n",
" df[col] = np.clip(df[col], rolling_lower, rolling_upper)\n",
"\n",
" return df\n",
"\n",
"\n",
"def time_series_quantile_filter(df, features, lower_quantile=0.01, upper_quantile=0.99, window=60):\n",
" df = df.copy()\n",
" # 确保按股票和时间排序\n",
" df = df.sort_values(['ts_code', 'trade_date'])\n",
" grouped = df.groupby('ts_code')\n",
" for col in features:\n",
" # 对每个股票的时间序列计算滚动分位数\n",
" rolling_lower = grouped[col].rolling(window=window, min_periods=window // 2).quantile(lower_quantile)\n",
" rolling_upper = grouped[col].rolling(window=window, min_periods=window // 2).quantile(upper_quantile)\n",
" # rolling结果带有多重索引需要对齐\n",
" rolling_lower = rolling_lower.reset_index(level=0, drop=True)\n",
" rolling_upper = rolling_upper.reset_index(level=0, drop=True)\n",
" # 应用 clip\n",
" df[col] = np.clip(df[col], rolling_lower, rolling_upper)\n",
" return df\n",
"\n",
"\n",
"def cross_sectional_quantile_filter(df, features, lower_quantile=0.01, upper_quantile=0.99):\n",
" df = df.copy()\n",
" grouped = df.groupby('trade_date')\n",
" for col in features:\n",
" # 计算每日截面的分位数边界\n",
" lower_bound = grouped[col].transform(lambda x: x.quantile(lower_quantile))\n",
" upper_bound = grouped[col].transform(lambda x: x.quantile(upper_quantile))\n",
" # 应用 clip\n",
" df[col] = np.clip(df[col], lower_bound, upper_bound)\n",
" return df\n",
"\n",
"from scipy.stats import spearmanr\n",
"\n",
"def select_top_rankic_features(train_data: pd.DataFrame, feature_columns: list, target_column: str = 'future_return', n: int = 10):\n",
" rankic_values = {}\n",
" numeric_columns = train_data.select_dtypes(include=['float64', 'int64']).columns\n",
" feature_columns = [col for col in numeric_columns if col in feature_columns]\n",
" if target_column not in train_data.columns:\n",
" print(f\"警告: 目标列 '{target_column}' 不存在于 train_data 中。\")\n",
" return []\n",
"\n",
" for feature in feature_columns:\n",
" if feature in train_data.columns:\n",
" factor_values = train_data[feature].values\n",
" target_values = train_data[target_column].values\n",
"\n",
" # 处理 NaN 值\n",
" valid_indices = ~np.isnan(factor_values) & ~np.isnan(target_values)\n",
" factor_values_valid = factor_values[valid_indices]\n",
" target_values_valid = target_values[valid_indices]\n",
"\n",
" if len(factor_values_valid) >= 2:\n",
" correlation, p_value = spearmanr(factor_values_valid, target_values_valid)\n",
" rankic_values[feature] = correlation\n",
" else:\n",
" rankic_values[feature] = np.nan\n",
" print(f\"警告: 特征 '{feature}' 和目标列 '{target_column}' 共同有效的非 NaN 数据点少于 2 个,无法计算 Rank IC。\")\n",
" else:\n",
" print(f\"警告: 特征列 '{feature}' 不存在于 train_data 中。\")\n",
"\n",
" # 根据 Rank IC 的绝对值进行排序\n",
" sorted_rankic = sorted(rankic_values.items(), key=lambda item: abs(item[1]), reverse=True)\n",
"\n",
" # 保留前 n 个特征\n",
" top_n_features = [item[0] for item in sorted_rankic[:n] if not np.isnan(item[1])]\n",
"\n",
" return top_n_features"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "da2bb202843d9275",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-11T11:42:33.974040Z",
"start_time": "2025-04-11T11:42:33.430374Z"
},
"jupyter": {
"source_hidden": true
}
},
"outputs": [],
"source": [
"from sklearn.preprocessing import StandardScaler\n",
"import lightgbm as lgb\n",
"import matplotlib.pyplot as plt\n",
"\n",
"\n",
"def train_light_model(train_data_df, params, feature_columns, callbacks, evals,\n",
" print_feature_importance=True, num_boost_round=100,\n",
" validation_days=180, use_pca=False, split_date=None): # 新增参数validation_days\n",
" # 确保数据按时间排序\n",
" train_data_df = train_data_df.sort_values(by='trade_date')\n",
"\n",
" numeric_columns = train_data_df.select_dtypes(include=['float64', 'int64']).columns\n",
" numeric_columns = [col for col in numeric_columns if col in feature_columns]\n",
" # X_train.loc[:, numeric_columns] = scaler.fit_transform(X_train[numeric_columns])\n",
" # X_val.loc[:, numeric_columns] = scaler.transform(X_val[numeric_columns])\n",
" # train_data_df = cross_sectional_standardization(train_data_df, numeric_columns)\n",
"\n",
" # 去除标签为空的样本\n",
" train_data_df = train_data_df.dropna(subset=['label'])\n",
" # print('原始训练集大小: ', len(train_data_df))\n",
"\n",
" # 按时间顺序划分训练集和验证集\n",
" if split_date is None:\n",
" all_dates = train_data_df['trade_date'].unique() # 获取所有唯一的 trade_date\n",
" if validation_days == 0:\n",
" split_date = all_dates[-1]\n",
" else:\n",
" split_date = all_dates[-validation_days] # 划分点为倒数第 validation_days 天\n",
" if validation_days == 0:\n",
" train_data_split = train_data_df\n",
" else:\n",
" train_data_split = train_data_df[train_data_df['trade_date'] < split_date] # 训练集\n",
" val_data_split = train_data_df[train_data_df['trade_date'] >= split_date] # 验证集\n",
"\n",
" # 打印划分结果\n",
" print(f\"划分后的训练集大小: {len(train_data_split)}, 验证集大小: {len(val_data_split)}\")\n",
"\n",
" # 提取特征和标签\n",
" X_train = train_data_split[feature_columns]\n",
" y_train = train_data_split['label']\n",
"\n",
" # 标准化数值特征\n",
" scaler = StandardScaler()\n",
"\n",
" # 计算每个 trade_date 内的样本数LTR 需要 group 信息)\n",
" train_groups = train_data_split.groupby('trade_date').size().tolist()\n",
" val_groups = val_data_split.groupby('trade_date').size().tolist()\n",
"\n",
" # 处理类别特征\n",
" categorical_feature = [col for col in feature_columns if 'cat' in col]\n",
"\n",
" # 计算权重(基于时间)\n",
" # trade_date = train_data_split['trade_date'] # 交易日期\n",
" # weights = (trade_date - trade_date.min()).dt.days / (trade_date.max() - trade_date.min()).days + 1\n",
" # weights = train_data_split.groupby('trade_date')['std_return_5'].transform(\n",
" # lambda x: x / x.mean()\n",
" # )\n",
" ud = sorted(train_data_split[\"trade_date\"].unique().tolist())\n",
" date_weights = {date: weight * weight for date, weight in zip(ud, np.linspace(1, 10, len(ud)))}\n",
" params['weight'] = train_data_split[\"trade_date\"].map(date_weights).tolist()\n",
"\n",
" train_dataset = lgb.Dataset(\n",
" X_train, label=y_train, group=train_groups,\n",
" categorical_feature=categorical_feature\n",
" )\n",
"\n",
" if validation_days > 0:\n",
" X_val = val_data_split[feature_columns]\n",
" y_val = val_data_split['label']\n",
" val_groups = val_data_split.groupby('trade_date').size().tolist()\n",
" val_dataset = lgb.Dataset(\n",
" X_val, label=y_val, group=val_groups,\n",
" categorical_feature=categorical_feature\n",
" )\n",
" # 训练模型\n",
" model = lgb.train(\n",
" params, train_dataset, num_boost_round=num_boost_round,\n",
" valid_sets=[train_dataset, val_dataset], valid_names=['train', 'valid'],\n",
" callbacks=callbacks\n",
" )\n",
" else:\n",
" model = lgb.train(\n",
" params, train_dataset, num_boost_round=num_boost_round, callbacks=callbacks\n",
" )\n",
"\n",
" # 打印特征重要性(如果需要)\n",
" if print_feature_importance:\n",
" lgb.plot_metric(evals)\n",
" lgb.plot_importance(model, importance_type='split', max_num_features=20)\n",
" plt.show()\n",
"\n",
" return model, scaler, None"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "ff19e3f1e051a489",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-11T17:50:59.056134Z",
"start_time": "2025-04-11T17:49:54.975048Z"
}
},
"outputs": [],
"source": [
"\n",
"days = 1\n",
"df = df.sort_values(by=['ts_code', 'trade_date'])\n",
"# df['future_return'] = df.groupby('ts_code', group_keys=False)['close'].apply(lambda x: x.shift(-days) / x - 1)\n",
"df['future_return'] = (df.groupby('ts_code')['close'].shift(-days) - df.groupby('ts_code')['open'].shift(-1)) / \\\n",
" df.groupby('ts_code')['open'].shift(-1)\n",
"# df['future_return'] = df.groupby('ts_code')['pct_chg'].shift(-1)\n",
"df['future_return2'] = (df.groupby('ts_code')['close'].shift(-1) - df.groupby('ts_code')['open'].shift(-1)) / \\\n",
" df.groupby('ts_code')['open'].shift(-1)\n",
"\n",
"df['future_volatility'] = (\n",
" df.groupby('ts_code')['pct_chg']\n",
" .transform(lambda x: x.rolling(days).std().shift(-days))\n",
")\n",
"# df['future_score'] = calculate_score(df, days=days, lambda_param=0.3)\n",
"# df['future_score'] = df['future_return'] + 0.3 * df['future_volatility']\n",
"df['label'] = df.groupby('trade_date', group_keys=False)['future_return'].transform(\n",
" lambda x: pd.qcut(x, q=20, labels=False, duplicates='drop')\n",
")\n",
"df['label2'] = df.groupby('trade_date', group_keys=False)['future_return2'].transform(\n",
" lambda x: pd.qcut(x, q=20, labels=False, duplicates='drop')\n",
")\n",
"\n",
"# df['label'] = df.groupby('trade_date', group_keys=False)['future_score'].transform(\n",
"# lambda x: pd.qcut(x.rank(method='first'), q=20, labels=False, duplicates='raise')\n",
"# )\n",
"# df['future_score'] = (\n",
"# 0.7 * df['future_return']\n",
"# * 0.3 * df['future_volatility']\n",
"# )"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "27dba27b2e108316",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-11T17:51:05.124540Z",
"start_time": "2025-04-11T17:50:59.121885Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2025-05-07 00:00:00\n"
]
}
],
"source": [
"# def select_stocks(stock_df: pd.DataFrame) -> pd.DataFrame:\n",
"# \"\"\"\n",
"# 筛选出当日所属行业5日平均涨幅排名前5的行业的股票。\n",
"# 假设输入的 stock_df 包含多日数据,并有 'trade_date', 'return_5', 'cat_l2_code' 列。\n",
"# 筛选时会过滤掉行业中股票数量小于等于5的行业。\n",
"# \"\"\"\n",
"\n",
"# def select_func(day_df: pd.DataFrame):\n",
"# # day_df 是某一个交易日的股票数据\n",
"\n",
"# # 检查必需列是否存在\n",
"# required_cols = ['return_5', 'cat_l2_code']\n",
"# if not all(col in day_df.columns for col in required_cols):\n",
"# # 如果必需列缺失返回一个列名与原始输入一致的空DataFrame\n",
"# return pd.DataFrame(columns=day_df.columns)\n",
"\n",
"# # --- 新增逻辑:计算每个行业的股票数量并过滤小行业 ---\n",
"# stock_count_by_industry = day_df.groupby('cat_l2_code').size()\n",
"\n",
"# # 找出股票数量大于5的行业代码\n",
"# industries_to_consider = stock_count_by_industry[stock_count_by_industry >= 5].index.tolist()\n",
"\n",
"# # 过滤掉股票数量不足的行业,只保留要考虑的行业数据\n",
"# filtered_day_df = day_df[day_df['cat_l2_code'].isin(industries_to_consider)]\n",
"# # 如果过滤后没有数据直接返回空DataFrame\n",
"# if filtered_day_df.empty:\n",
"# return pd.DataFrame(columns=day_df.columns)\n",
"\n",
"# industry_avg_return = filtered_day_df.groupby('cat_l2_code')['return_5'].mean()\n",
"\n",
"# industry_avg_return = industry_avg_return.dropna()\n",
"\n",
"# if industry_avg_return.empty:\n",
"# return pd.DataFrame(columns=day_df.columns)\n",
"# top_industries = industry_avg_return.nlargest(5).index.tolist()\n",
"# return filtered_day_df[filtered_day_df['cat_l2_code'].isin(top_industries)].copy()\n",
"\n",
"# stock_df['trade_date'] = pd.to_datetime(stock_df['trade_date'])\n",
"# selected_stocks_df = stock_df.groupby('trade_date', group_keys=False).apply(select_func)\n",
"# return selected_stocks_df\n",
"\n",
"def select_stocks(stock_df):\n",
" def select(group):\n",
" max_stocks = 150\n",
" initial_data = group.nlargest(150, 'return_5')\n",
" unique_labels = initial_data['label'].nunique()\n",
" if unique_labels >= 20 or unique_labels == 0: # 包含标签种类为0的情况\n",
" return initial_data\n",
" for i in range(110, max_stocks + 1, 10):\n",
" data = group.nlargest(i, 'return_5')\n",
" unique_labels = data['label'].nunique()\n",
" if unique_labels >= 20:\n",
" return data\n",
" return group.nlargest(max_stocks, 'return_5') # 如果循环结束仍未找到足够标签,则返回最大数量的股票\n",
" stock_df = stock_df.groupby('trade_date', group_keys=False).apply(select)\n",
" return stock_df\n",
"\n",
"gc.collect()\n",
"\n",
"pdf = select_stocks(df[(df['trade_date'] >= '2022-01-01') & (df['trade_date'] <= '2029-04-07')])\n",
"print(pdf['trade_date'].max())\n",
"\n",
"# pdf['label'] = pdf.groupby('trade_date', group_keys=False)['future_score'].transform(\n",
"# lambda x: pd.qcut(x, q=20, labels=False, duplicates='drop')\n",
"# )"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "ca96fb81e17c4a90",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-11T17:51:06.005791Z",
"start_time": "2025-04-11T17:51:05.133551Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['vol', 'pct_chg', 'turnover_rate', 'volume_ratio', 'winner_rate', 'lg_elg_net_buy_vol', 'flow_lg_elg_intensity', 'sm_net_buy_vol', 'total_buy_vol', 'lg_elg_buy_prop', 'flow_struct_buy_change', 'lg_elg_net_buy_vol_change', 'flow_lg_elg_accel', 'chip_concentration_range', 'chip_skewness', 'floating_chip_proxy', 'cost_support_15pct_change', 'cat_winner_price_zone', 'flow_chip_consistency', 'profit_taking_vs_absorb', 'cat_is_positive', 'upside_vol', 'downside_vol', 'vol_ratio', 'return_skew', 'return_kurtosis', 'volume_change_rate', 'cat_volume_breakout', 'turnover_deviation', 'cat_turnover_spike', 'avg_volume_ratio', 'cat_volume_ratio_breakout', 'vol_spike', 'vol_std_5', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'rsi_3', 'return_5', 'return_20', 'std_return_5', 'std_return_90', 'std_return_90_2', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4', 'rank_act_factor1', 'rank_act_factor2', 'rank_act_factor3', 'cov', 'delta_cov', 'alpha_22_improved', 'alpha_003', 'alpha_007', 'alpha_013', 'cat_up_limit', 'cat_down_limit', 'up_limit_count_10d', 'down_limit_count_10d', 'consecutive_up_limit', 'vol_break', 'weight_roc5', 'smallcap_concentration', 'cost_stability', 'high_cost_break_days', 'liquidity_risk', 'turnover_std', 'mv_volatility', 'volume_growth', 'mv_growth', 'arbr', 'momentum_factor', 'resonance_factor', 'log_close', 'cat_vol_spike', 'up', 'down', 'obv-maobv_6', 'std_return_5 / std_return_90', 'std_return_90 - std_return_90_2', 'cat_af2', 'cat_af3', 'cat_af4', 'act_factor5', 'act_factor6', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'buy_lg_vol_minus_sell_lg_vol', 'buy_elg_vol_minus_sell_elg_vol', 'ctrl_strength', 'low_cost_dev', 'asymmetry', 'lock_factor', 'cat_vol_break', 'cost_atr_adj', 'cat_golden_resonance', 'mv_turnover_ratio', 'mv_adjusted_volume', 'mv_weighted_turnover', 'nonlinear_mv_volume', 'mv_volume_ratio', 'mv_momentum', 'lg_flow_mom_corr_20_60', 'lg_flow_accel', 'profit_pressure', 'underwater_resistance', 'cost_conc_std_20', 'profit_decay_20', 'vol_amp_loss_20', 'vol_drop_profit_cnt_5', 'lg_flow_vol_interact_20', 'cost_break_confirm_cnt_5', 'atr_norm_channel_pos_14', 'turnover_diff_skew_20', 'lg_sm_flow_diverge_20', 'pullback_strong_20_20', 'vol_wgt_hist_pos_20', 'vol_adj_roc_20', 'industry_obv', 'industry_return_5', 'industry_return_20', 'industry__ema_5', 'industry__ema_13', 'industry__ema_20', 'industry__ema_60', 'industry_act_factor1', 'industry_act_factor2', 'industry_act_factor3', 'industry_act_factor4', 'industry_act_factor5', 'industry_act_factor6', 'industry_rank_act_factor1', 'industry_rank_act_factor2', 'industry_rank_act_factor3', 'industry_return_5_percentile', 'industry_return_20_percentile']\n"
]
}
],
"source": [
"pdf = pdf.merge(industry_df, on=['cat_l2_code', 'trade_date'], how='left')\n",
"pdf = pdf.replace([np.inf, -np.inf], np.nan)\n",
"\n",
"feature_columns = [col for col in pdf.columns if col in pdf.columns]\n",
"feature_columns = [col for col in feature_columns if col not in ['trade_date',\n",
" 'ts_code',\n",
" 'label']]\n",
"feature_columns = [col for col in feature_columns if 'future' not in col]\n",
"feature_columns = [col for col in feature_columns if 'label' not in col]\n",
"feature_columns = [col for col in feature_columns if 'score' not in col]\n",
"feature_columns = [col for col in feature_columns if 'gen' not in col]\n",
"feature_columns = [col for col in feature_columns if 'is_st' not in col]\n",
"feature_columns = [col for col in feature_columns if 'pe_ttm' not in col]\n",
"# feature_columns = [col for col in feature_columns if 'volatility' not in col]\n",
"feature_columns = [col for col in feature_columns if 'circ_mv' not in col]\n",
"feature_columns = [col for col in feature_columns if 'code' not in col]\n",
"feature_columns = [col for col in feature_columns if col not in origin_columns]\n",
"feature_columns = [col for col in feature_columns if not col.startswith('_')]\n",
"# feature_columns = [col for col in feature_columns if col not in ['ts_code', 'trade_date', 'vol_std_5', 'cov', 'delta_cov', 'alpha_22_improved', 'alpha_007', 'consecutive_up_limit', 'mv_volatility', 'volume_growth', 'mv_growth', 'arbr']]\n",
"feature_columns = [col for col in feature_columns if col not in ['intraday_lg_flow_corr_20', \n",
" 'cap_neutral_cost_metric', \n",
" 'hurst_net_mf_vol_60', \n",
" 'complex_factor_deap_1', \n",
" 'lg_buy_consolidation_20',\n",
" 'cs_rank_ind_cap_neutral_pe',\n",
" 'cs_rank_opening_gap',\n",
" 'cs_rank_ind_adj_lg_flow']]\n",
"print(feature_columns)\n",
"numeric_columns = pdf.select_dtypes(include=['float64', 'int64']).columns\n",
"numeric_columns = [col for col in numeric_columns if col in feature_columns]\n",
"\n",
"\n",
"# filter_index = pdf['future_volatility'].between(pdf['future_volatility'].quantile(0.01),\n",
"# pdf['future_volatility'].quantile(0.99)) | filter_index"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "6746b3d1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"特征列分析:\n",
"特征: vol 最大值: 56161348.41 最小值: 412.4 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: pct_chg 最大值: 10.64 最小值: -10.26 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: turnover_rate 最大值: 86.2547 最小值: 0.0238 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: volume_ratio 最大值: 147.72 最小值: 0.01 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: winner_rate 最大值: 100.59 最小值: 0.0 NaN 数量: 1034 NaN 占比: 0.008552522746071134\n",
"特征: lg_elg_net_buy_vol 最大值: 6033873.0 最小值: -9918925.0 NaN 数量: 4 NaN 占比: 3.3085194375516956e-05\n",
"特征: flow_lg_elg_intensity 最大值: 1.0001389979829125 最小值: -0.8916193146876132 NaN 数量: 4 NaN 占比: 3.3085194375516956e-05\n",
"特征: sm_net_buy_vol 最大值: 6829932.0 最小值: -3215196.0 NaN 数量: 4 NaN 占比: 3.3085194375516956e-05\n",
"特征: total_buy_vol 最大值: 39152290.0 最小值: 5.0 NaN 数量: 4 NaN 占比: 3.3085194375516956e-05\n",
"特征: lg_elg_buy_prop 最大值: 0.999999999999989 最小值: 0.0 NaN 数量: 4 NaN 占比: 3.3085194375516956e-05\n",
"特征: flow_struct_buy_change 最大值: 0.9999999999996841 最小值: -0.9999999999988995 NaN 数量: 7 NaN 占比: 5.789909015715467e-05\n",
"特征: lg_elg_net_buy_vol_change 最大值: 8937693.0 最小值: -15389140.0 NaN 数量: 7 NaN 占比: 5.789909015715467e-05\n",
"特征: flow_lg_elg_accel 最大值: 23486299.0 最小值: -20413545.0 NaN 数量: 10 NaN 占比: 8.271298593879239e-05\n",
"特征: chip_concentration_range 最大值: 2.528409090460155 最小值: 1.8163778247505023e-05 NaN 数量: 1034 NaN 占比: 0.008552522746071134\n",
"特征: chip_skewness 最大值: 0.5899999941000001 最小值: -0.24642857134856214 NaN 数量: 1034 NaN 占比: 0.008552522746071134\n",
"特征: floating_chip_proxy 最大值: 99.9998754669836 最小值: 0.0 NaN 数量: 1034 NaN 占比: 0.008552522746071134\n",
"特征: cost_support_15pct_change 最大值: 362.06896551724145 最小值: -34.48275862068966 NaN 数量: 1034 NaN 占比: 0.008552522746071134\n",
"特征: flow_chip_consistency 最大值: 202833.0 最小值: -366952.0 NaN 数量: 4 NaN 占比: 3.3085194375516956e-05\n",
"特征: profit_taking_vs_absorb 最大值: 6033873.0 最小值: -9918925.0 NaN 数量: 4 NaN 占比: 3.3085194375516956e-05\n",
"特征: upside_vol 最大值: 764.4778008549366 最小值: 0.0165334933205142 NaN 数量: 218 NaN 占比: 0.0018031430934656741\n",
"特征: downside_vol 最大值: 13.530263354653558 最小值: 0.0 NaN 数量: 349 NaN 占比: 0.0028866832092638546\n",
"特征: vol_ratio 最大值: 2247.2826795128544 最小值: 0.0 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: return_skew 最大值: 2.236072323764041 最小值: -2.2360835630243616 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: return_kurtosis 最大值: 5.176755179819847 最小值: -3.3969791567236354 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: volume_change_rate 最大值: 3.6858958043807935 最小值: -0.9853275631981724 NaN 数量: 218 NaN 占比: 0.0018031430934656741\n",
"特征: turnover_deviation 最大值: 1.1547005383792526 最小值: -1.1547005383793087 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: avg_volume_ratio 最大值: 51.17666666666667 最小值: 0.016666666666666077 NaN 数量: 117 NaN 占比: 0.000967741935483871\n",
"特征: vol_spike 最大值: 17304365.832 最小值: 2255.2485 NaN 数量: 468 NaN 占比: 0.003870967741935484\n",
"特征: vol_std_5 最大值: 195.41495999867848 最小值: 0.014991524185255834 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: atr_14 最大值: 2724.9261989170695 最小值: 0.03221843258883346 NaN 数量: 370 NaN 占比: 0.0030603804797353184\n",
"特征: atr_6 最大值: 3297.064666161383 最小值: 0.047258943138373116 NaN 数量: 71 NaN 占比: 0.000587262200165426\n",
"特征: obv 最大值: 331434005.2600001 最小值: -65316063.65999998 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: maobv_6 最大值: 301175240.9416665 最小值: -63824730.77333332 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: rsi_3 最大值: 100.0 最小值: 20.533752609951414 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: return_5 最大值: 5.017205781142464 最小值: -0.02557183243382155 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: return_20 最大值: 5.826058201058201 最小值: -0.5717581910475312 NaN 数量: 487 NaN 占比: 0.004028122415219189\n",
"特征: std_return_5 最大值: 1.6582378585020816 最小值: 2.3779477130116365e-05 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: std_return_90 最大值: 0.39102330156826715 最小值: 0.006668710967306505 NaN 数量: 1770 NaN 占比: 0.014640198511166254\n",
"特征: std_return_90_2 最大值: 0.3907567413557471 最小值: 0.005959840082236695 NaN 数量: 1951 NaN 占比: 0.016137303556658395\n",
"特征: act_factor1 最大值: 1.7903251438438559 最小值: -1.4752711929884126 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: act_factor2 最大值: 2.2207580059881558 最小值: -1.7526892749536007 NaN 数量: 346 NaN 占比: 0.0028618693134822167\n",
"特征: act_factor3 最大值: 4.202337904307388 最小值: -3.3460071644261493 NaN 数量: 487 NaN 占比: 0.004028122415219189\n",
"特征: act_factor4 最大值: 8.541255729360902 最小值: -5.575401659184761 NaN 数量: 1146 NaN 占比: 0.009478908188585608\n",
"特征: rank_act_factor1 最大值: 0.9993552546744036 最小值: 0.0003222687721559781 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: rank_act_factor2 最大值: 0.9993455497382199 最小值: 0.0003223726627981947 NaN 数量: 346 NaN 占比: 0.0028618693134822167\n",
"特征: rank_act_factor3 最大值: 1.0 最小值: 0.0003224766204450177 NaN 数量: 487 NaN 占比: 0.004028122415219189\n",
"特征: cov 最大值: 9211640500.850372 最小值: -1025351031.6684246 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: delta_cov 最大值: 9207544083.643394 最小值: -4494222099.3107605 NaN 数量: 218 NaN 占比: 0.0018031430934656741\n",
"特征: alpha_22_improved 最大值: 4494222099.3107605 最小值: -9207544083.643394 NaN 数量: 468 NaN 占比: 0.003870967741935484\n",
"特征: alpha_003 最大值: 1.0 最小值: -1.0 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: alpha_007 最大值: 1.0 最小值: 0.0003223726627981947 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: alpha_013 最大值: 1.0 最小值: 0.0003225806451612903 NaN 数量: 468 NaN 占比: 0.003870967741935484\n",
"特征: up_limit_count_10d 最大值: 10.0 最小值: 0.0 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: down_limit_count_10d 最大值: 3.0 最小值: 0.0 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: consecutive_up_limit 最大值: 17 最小值: 0 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: weight_roc5 最大值: 4.360439560439561 最小值: -0.4019607843137255 NaN 数量: 1034 NaN 占比: 0.008552522746071134\n",
"特征: smallcap_concentration 最大值: 31.905964365688405 最小值: 0.0 NaN 数量: 1034 NaN 占比: 0.008552522746071134\n",
"特征: cost_stability 最大值: 0.9735383316685713 最小值: 0.0 NaN 数量: 1510 NaN 占比: 0.01248966087675765\n",
"特征: high_cost_break_days 最大值: 5.0 最小值: 0.0 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: liquidity_risk 最大值: 0.02727568902683877 最小值: 1.5428810759666794e-08 NaN 数量: 1252 NaN 占比: 0.010355665839536808\n",
"特征: turnover_std 最大值: 29.89517932088497 最小值: 0.014570369174964438 NaN 数量: 468 NaN 占比: 0.003870967741935484\n",
"特征: mv_volatility 最大值: 2.394008567880757 最小值: 0.0008012062079915182 NaN 数量: 468 NaN 占比: 0.003870967741935484\n",
"特征: volume_growth 最大值: 293.7911250760605 最小值: -0.9934612815303644 NaN 数量: 487 NaN 占比: 0.004028122415219189\n",
"特征: mv_growth 最大值: 24.5789647232902 最小值: -0.08328263450104802 NaN 数量: 487 NaN 占比: 0.004028122415219189\n",
"特征: arbr 最大值: 100.95433748749065 最小值: -168097.89086977823 NaN 数量: 3190 NaN 占比: 0.026385442514474774\n",
"特征: momentum_factor 最大值: 4.193633393631664 最小值: -1.50529286642075 NaN 数量: 218 NaN 占比: 0.0018031430934656741\n",
"特征: resonance_factor 最大值: 932.1131999999999 最小值: -700.3989 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: log_close 最大值: 10.555153146092515 最小值: 0.5068176023684519 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: up 最大值: 0.20949720670391064 最小值: 0.0 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: down 最大值: 0.1827803785874212 最小值: 0.0 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: obv-maobv_6 最大值: 91437345.39833337 最小值: -27497457.346666686 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: std_return_5 / std_return_90 最大值: 4.303704383077534 最小值: 0.00044776171350482643 NaN 数量: 1770 NaN 占比: 0.014640198511166254\n",
"特征: std_return_90 - std_return_90_2 最大值: 0.11510243684648414 最小值: -0.05541087845752879 NaN 数量: 1951 NaN 占比: 0.016137303556658395\n",
"特征: act_factor5 最大值: 16.7546767835003 最小值: -11.517387087951299 NaN 数量: 1146 NaN 占比: 0.009478908188585608\n",
"特征: act_factor6 最大值: 1.4142127937678 最小值: -1.414213562373081 NaN 数量: 346 NaN 占比: 0.0028618693134822167\n",
"特征: active_buy_volume_large 最大值: 46116.0 最小值: -39192.25 NaN 数量: 7 NaN 占比: 5.789909015715467e-05\n",
"特征: active_buy_volume_big 最大值: 24021.0 最小值: -16309.69642857143 NaN 数量: 7 NaN 占比: 5.789909015715467e-05\n",
"特征: active_buy_volume_small 最大值: 101231.0 最小值: -60896.75 NaN 数量: 7 NaN 占比: 5.789909015715467e-05\n",
"特征: buy_lg_vol_minus_sell_lg_vol 最大值: 6315.5 最小值: -10406.0 NaN 数量: 7 NaN 占比: 5.789909015715467e-05\n",
"特征: buy_elg_vol_minus_sell_elg_vol 最大值: 15169.0 最小值: -4364.428571428572 NaN 数量: 7 NaN 占比: 5.789909015715467e-05\n",
"特征: ctrl_strength 最大值: 0.8142857142857142 最小值: 0.0 NaN 数量: 1034 NaN 占比: 0.008552522746071134\n",
"特征: low_cost_dev 最大值: 159371.40000000023 最小值: -2.2444444444444476 NaN 数量: 1091 NaN 占比: 0.00902398676592225\n",
"特征: asymmetry 最大值: 44.00000000000096 最小值: 0.0 NaN 数量: 1091 NaN 占比: 0.00902398676592225\n",
"特征: lock_factor 最大值: 76.68641720430107 最小值: 0.008677083333333335 NaN 数量: 1034 NaN 占比: 0.008552522746071134\n",
"特征: cost_atr_adj 最大值: 88.61860174093074 最小值: 0.0002894966414380616 NaN 数量: 1402 NaN 占比: 0.011596360628618694\n",
"特征: mv_turnover_ratio 最大值: 7.519401708236303 最小值: 0.001323022901502634 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: mv_adjusted_volume 最大值: 3675084.7886439813 最小值: 33.57880521115875 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: mv_weighted_turnover 最大值: 7.519401708236303 最小值: 0.001323022901502634 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: nonlinear_mv_volume 最大值: 3675084.7886439813 最小值: 33.57880521115875 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: mv_volume_ratio 最大值: 11.5095150992187 最小值: 0.0007549215390555348 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: mv_momentum 最大值: 829.2617138502171 最小值: 5.7669216057474964e-05 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: lg_flow_mom_corr_20_60 最大值: 0.9909560531477801 最小值: -0.9776689270625863 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: lg_flow_accel 最大值: 23486299.0 最小值: -20413545.0 NaN 数量: 10 NaN 占比: 8.271298593879239e-05\n",
"特征: profit_pressure 最大值: 1219604.64459375 最小值: -32.29264392059554 NaN 数量: 1034 NaN 占比: 0.008552522746071134\n",
"特征: underwater_resistance 最大值: 0.09745390693548084 最小值: -0.4039719240263924 NaN 数量: 1034 NaN 占比: 0.008552522746071134\n",
"特征: cost_conc_std_20 最大值: 0.5663669515051302 最小值: 0.0 NaN 数量: 218 NaN 占比: 0.0018031430934656741\n",
"特征: profit_decay_20 最大值: 63.63523419055014 最小值: -201.87032418942292 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: vol_amp_loss_20 最大值: 5.1986896779851355 最小值: 0.0 NaN 数量: 1252 NaN 占比: 0.010355665839536808\n",
"特征: vol_drop_profit_cnt_5 最大值: 3.0 最小值: 0.0 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: lg_flow_vol_interact_20 最大值: 42.845555877280596 最小值: 0.018505969076852875 NaN 数量: 218 NaN 占比: 0.0018031430934656741\n",
"特征: cost_break_confirm_cnt_5 最大值: 5.0 最小值: -4.0 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: atr_norm_channel_pos_14 最大值: 14.000000000000002 最小值: 0.0 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: turnover_diff_skew_20 最大值: 4.459639535682866 最小值: -3.4969687384726353 NaN 数量: 250 NaN 占比: 0.0020678246484698098\n",
"特征: lg_sm_flow_diverge_20 最大值: 1.999775245257143 最小值: -0.3913656926280843 NaN 数量: 218 NaN 占比: 0.0018031430934656741\n",
"特征: pullback_strong_20_20 最大值: 253.95565410196681 最小值: -675.6615306894897 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: vol_wgt_hist_pos_20 最大值: 13.234987955447918 最小值: 0.0 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: vol_adj_roc_20 最大值: 1.152339990791505 最小值: -0.3411549434026346 NaN 数量: 0 NaN 占比: 0.0\n",
"特征: industry_obv 最大值: 33399643.0 最小值: -17512962.0 NaN 数量: 446 NaN 占比: 0.0036889991728701406\n",
"特征: industry_return_5 最大值: 0.524069457577581 最小值: -0.2267495964295888 NaN 数量: 446 NaN 占比: 0.0036889991728701406\n",
"特征: industry_return_20 最大值: 0.8652496476530473 最小值: -0.3099103385178408 NaN 数量: 446 NaN 占比: 0.0036889991728701406\n",
"特征: industry__ema_5 最大值: 73886.17770955434 最小值: 378.62741122827816 NaN 数量: 446 NaN 占比: 0.0036889991728701406\n",
"特征: industry__ema_13 最大值: 73322.71553688897 最小值: 379.5203678371781 NaN 数量: 446 NaN 占比: 0.0036889991728701406\n",
"特征: industry__ema_20 最大值: 74222.1571225974 最小值: 381.359643019974 NaN 数量: 446 NaN 占比: 0.0036889991728701406\n",
"特征: industry__ema_60 最大值: 74769.33183235777 最小值: 401.1538611414743 NaN 数量: 446 NaN 占比: 0.0036889991728701406\n",
"特征: industry_act_factor1 最大值: 1.6986610274753051 最小值: -1.5833811847067532 NaN 数量: 446 NaN 占比: 0.0036889991728701406\n",
"特征: industry_act_factor2 最大值: 2.0328341745548633 最小值: -1.7615460391418363 NaN 数量: 446 NaN 占比: 0.0036889991728701406\n",
"特征: industry_act_factor3 最大值: 3.7292424474002375 最小值: -3.0674619011216553 NaN 数量: 446 NaN 占比: 0.0036889991728701406\n",
"特征: industry_act_factor4 最大值: 6.509680946056449 最小值: -4.337297946027389 NaN 数量: 446 NaN 占比: 0.0036889991728701406\n",
"特征: industry_act_factor5 最大值: 13.628812636988545 最小值: -10.685144264799936 NaN 数量: 446 NaN 占比: 0.0036889991728701406\n",
"特征: industry_act_factor6 最大值: 1.4142134956701664 最小值: -1.4142135617846938 NaN 数量: 446 NaN 占比: 0.0036889991728701406\n",
"特征: industry_rank_act_factor1 最大值: 1.0 最小值: 0.002277904328018223 NaN 数量: 446 NaN 占比: 0.0036889991728701406\n",
"特征: industry_rank_act_factor2 最大值: 1.0 最小值: 0.002277904328018223 NaN 数量: 446 NaN 占比: 0.0036889991728701406\n",
"特征: industry_rank_act_factor3 最大值: 1.0 最小值: 0.002277904328018223 NaN 数量: 446 NaN 占比: 0.0036889991728701406\n",
"特征: industry_return_5_percentile 最大值: 1.0 最小值: 0.002277904328018223 NaN 数量: 446 NaN 占比: 0.0036889991728701406\n",
"特征: industry_return_20_percentile 最大值: 1.0 最小值: 0.002277904328018223 NaN 数量: 446 NaN 占比: 0.0036889991728701406\n"
]
}
],
"source": [
"import pandas as pd\n",
"\n",
"def analyze_features(df: pd.DataFrame, feature_columns: list):\n",
" \"\"\"\n",
" 分析 DataFrame 中指定特征列的基本信息,包括最大值、最小值和 NaN 数量。\n",
"\n",
" Args:\n",
" df (pd.DataFrame): 要分析的数据框。\n",
" feature_columns (list): 需要分析的特征列名列表。\n",
" \"\"\"\n",
" print(\"特征列分析:\")\n",
" for col in feature_columns:\n",
" if col in df.columns:\n",
" max_val = df[col].max()\n",
" min_val = df[col].min()\n",
" nan_count = df[col].isnull().sum()\n",
" df_count = len(df)\n",
" print(f\"特征: {col} 最大值: {max_val} 最小值: {min_val} NaN 数量: {nan_count} NaN 占比: {nan_count / df_count}\")\n",
" else:\n",
" print(f\"警告: 特征列 '{col}' 不存在于 DataFrame 中。\")\n",
"\n",
"\n",
"analyze_features(pdf, numeric_columns)\n"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "81d4570663ae21d7",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-11T17:52:34.201657Z",
"start_time": "2025-04-11T17:51:34.426871Z"
}
},
"outputs": [],
"source": [
"\n",
"# pdf = time_series_quantile_filter(pdf, numeric_columns)\n",
"pdf = cross_sectional_quantile_filter(pdf, numeric_columns)\n",
"pdf = cross_sectional_standardization(pdf, numeric_columns)\n",
"\n",
"pdf = pdf.sort_values(by=['ts_code', 'trade_date'])\n",
"\n",
"filter_index = pdf['future_return'].between(pdf['future_return'].quantile(0.01), pdf['future_return'].quantile(0.99))\n",
"\n",
"feature_columns = remove_highly_correlated_features(pdf, feature_columns)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "92428d543f4727ad",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-11T17:54:48.103331Z",
"start_time": "2025-04-11T17:54:47.906668Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# print('train data size: ', len(train_data))\n",
"\n",
"label_gain = list(range(len(df['label'].unique())))\n",
"label_gain = [gain * gain for gain in label_gain]\n",
"light_params = {\n",
" 'label_gain': label_gain,\n",
" 'objective': 'lambdarank',\n",
" 'metric': 'ndcg',\n",
" 'learning_rate': 0.03,\n",
" 'num_leaves': 32,\n",
" # 'min_data_in_leaf': 128,\n",
" 'max_depth': 8,\n",
" 'max_bin': 32,\n",
" 'feature_fraction': 0.7,\n",
" 'bagging_fraction': 0.7,\n",
" 'bagging_freq': 1,\n",
" 'lambda_l1': 0.1,\n",
" 'lambda_l2': 0.1,\n",
" 'boosting': 'gbdt',\n",
" 'verbosity': -1,\n",
" 'extra_trees': True,\n",
" 'max_position': 20,\n",
" 'ndcg_at': 1,\n",
" 'quant_train_renew_leaf': True,\n",
" 'lambdarank_truncation_level': 20,\n",
" # 'lambdarank_position_bias_regularization': 1,\n",
" 'seed': 7\n",
"}\n",
"evals = {}\n",
"\n",
"gc.collect()"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "8f134d435f71e9e2",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-11T17:54:48.336088Z",
"start_time": "2025-04-11T17:54:48.146675Z"
}
},
"outputs": [],
"source": [
"gc.collect()\n",
"\n",
"\n",
"def rolling_train_predict(df, train_days, test_days, feature_columns, days=5, use_pca=False, validation_days=60,\n",
" filter_index=None, params=None):\n",
" # 1. 按照交易日期排序\n",
" unique_dates = df[df['trade_date'] >= '2020-01-01']['trade_date'].unique().tolist()\n",
" unique_dates = sorted(unique_dates)\n",
" n = len(unique_dates)\n",
"\n",
" # 2. 计算需要跳过的天数,使后续窗口对齐\n",
" extra_days = (n - train_days) % test_days\n",
" start_index = extra_days # 从此索引开始滚动\n",
"\n",
" predictions_list = []\n",
"\n",
" for start in range(start_index, n - train_days - test_days + 1, test_days):\n",
" try:\n",
" # train_dates = unique_dates[start: start + train_days]\n",
" train_dates = unique_dates[max(0, start - days + 1): start + train_days - days + 1]\n",
" test_dates = unique_dates[start + train_days: start + train_days + test_days]\n",
"\n",
" # 根据日期筛选数据\n",
" # train_data = df[df['trade_date'].isin(train_dates)]\n",
" train_data = df[filter_index & df['trade_date'].isin(train_dates)]\n",
" val_data = df[(df['trade_date'] == unique_dates[start + train_days - days + 1])]\n",
" val_data['label'] = val_data['label2']\n",
" # train_data = pd.concat([train_data, val_data], axis=0)\n",
" test_data = df[df['trade_date'].isin(test_dates)]\n",
"\n",
" train_data = train_data.sort_values('trade_date')\n",
" test_data = test_data.sort_values('trade_date')\n",
"\n",
" final_feature_columns = [col for col in feature_columns]\n",
" # final_feature_columns = select_top_rankic_features(train_data, final_feature_columns, n=50)\n",
" # final_feature_columns, _ = remove_shifted_features(train_data, final_feature_columns, size=0.8, log=False,\n",
" # val_data=val_data)\n",
"\n",
" train_data = train_data.dropna(subset=final_feature_columns)\n",
" train_data = train_data.dropna(subset=['label'])\n",
" train_data = train_data.reset_index(drop=True)\n",
"\n",
"\n",
" # print(test_data.tail())\n",
" test_data = test_data.dropna(subset=final_feature_columns)\n",
" # test_data = test_data.dropna(subset=['label'])\n",
" test_data = test_data.reset_index(drop=True)\n",
"\n",
" # print(len(train_data))\n",
" # print(f\"最小日期: {train_data['trade_date'].min().strftime('%Y-%m-%d')}\")\n",
" print(\n",
" f\"train_data最大日期: {train_data['trade_date'].max().strftime('%Y-%m-%d')}, 训练天数:{train_data['trade_date'].nunique()}, feat size:{len(final_feature_columns)}\")\n",
" # # print(len(test_data))\n",
" # print(f\"最小日期: {test_data['trade_date'].min().strftime('%Y-%m-%d')}\")\n",
" print(f\"test_data最大日期: {test_data['trade_date'].max().strftime('%Y-%m-%d')}\")\n",
"\n",
" cat_columns = [col for col in df.columns if col.startswith('cat')]\n",
" for col in cat_columns:\n",
" train_data[col] = train_data[col].astype('category')\n",
" test_data[col] = test_data[col].astype('category')\n",
"\n",
" label_gain = list(range(len(train_data['label'].unique())))\n",
" label_gain = [(gain + 1) * (gain + 1) for gain in label_gain]\n",
" params['label_gain'] = label_gain\n",
"\n",
" # ud = train_data[\"trade_date\"].unique()\n",
" # date_weights = {date: weight for date, weight in zip(ud, np.linspace(1, 2, len(unique_dates)))}\n",
" # params['weight'] = train_data[\"trade_date\"].map(date_weights).tolist()\n",
"\n",
" # print(f'feature_columns: {feature_columns}')\n",
" # feature_contri = [2 if feat.startswith('act_factor') else 1 for feat in feature_columns]\n",
" # params['feature_contri'] = feature_contri\n",
" try:\n",
" model, _, _ = train_light_model(train_data.dropna(subset=['label']),\n",
" params, final_feature_columns,\n",
" [lgb.log_evaluation(period=100),\n",
" lgb.callback.record_evaluation(evals),\n",
" # lgb.early_stopping(100, first_metric_only=True)\n",
" ], evals,\n",
" num_boost_round=100, validation_days=validation_days,\n",
" print_feature_importance=False, use_pca=False)\n",
"\n",
" score_df = test_data.copy()\n",
" score_df['score'] = model.predict(score_df[final_feature_columns])\n",
" # score_df = score_df.loc[score_df.groupby('trade_date')['score'].idxmax()]\n",
"\n",
" score_df = score_df.groupby('trade_date', group_keys=False).apply(\n",
" lambda x: x[x['score'] >= x['score'].quantile(0.90)] # 计算90%分位数作为阈值,筛选分数>=阈值的行\n",
" ).reset_index(drop=True) # drop=True 避免添加旧索引列\n",
" # save_df = score_df.groupby('trade_date', group_keys=False).apply(lambda x: x.nlargest(1, 'score')).reset_index()\n",
" score_df = score_df.groupby('trade_date', group_keys=False).apply(lambda x: x.nsmallest(1, 'total_mv')).reset_index()\n",
" score_df = score_df.sort_values(['trade_date', 'score'])\n",
" score_df = score_df[['trade_date', 'score', 'ts_code']]\n",
" predictions_list.append(score_df)\n",
" except Exception as e:\n",
" print(e)\n",
" print(train_data['label'].unique().tolist())\n",
" except Exception as e:\n",
" print(df[df['trade_date'].isin(test_dates)])\n",
" print(train_dates, test_dates)\n",
" raise e \n",
"\n",
"\n",
" final_predictions = pd.concat(predictions_list, ignore_index=True)\n",
" return final_predictions\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "777822bd",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 120900 entries, 2237 to 120811\n",
"Columns: 181 entries, ts_code to industry_return_20_percentile\n",
"dtypes: bool(12), datetime64[ns](1), float64(162), int32(3), object(3)\n",
"memory usage: 156.8+ MB\n",
"None\n"
]
}
],
"source": [
"print(pdf.info())"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "63235069-dc59-48fb-961a-e80373e41a61",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-11T18:03:45.666269Z",
"start_time": "2025-04-11T18:02:13.319322Z"
},
"editable": true,
"scrolled": true,
"slideshow": {
"slide_type": ""
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"finish\n",
"train_data最大日期: 2022-02-07, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-02-08\n",
"划分后的训练集大小: 2752, 验证集大小: 145\n",
"train_data最大日期: 2022-02-08, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-02-09\n",
"划分后的训练集大小: 2763, 验证集大小: 136\n",
"train_data最大日期: 2022-02-09, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-02-10\n",
"划分后的训练集大小: 2767, 验证集大小: 131\n",
"train_data最大日期: 2022-02-10, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-02-11\n",
"划分后的训练集大小: 2765, 验证集大小: 133\n",
"train_data最大日期: 2022-02-11, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-02-14\n",
"划分后的训练集大小: 2762, 验证集大小: 135\n",
"train_data最大日期: 2022-02-14, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-02-15\n",
"划分后的训练集大小: 2769, 验证集大小: 140\n",
"train_data最大日期: 2022-02-15, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-02-16\n",
"划分后的训练集大小: 2776, 验证集大小: 144\n",
"train_data最大日期: 2022-02-16, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-02-17\n",
"划分后的训练集大小: 2775, 验证集大小: 140\n",
"train_data最大日期: 2022-02-17, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-02-18\n",
"划分后的训练集大小: 2779, 验证集大小: 142\n",
"train_data最大日期: 2022-02-18, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-02-21\n",
"划分后的训练集大小: 2781, 验证集大小: 138\n",
"train_data最大日期: 2022-02-21, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-02-22\n",
"划分后的训练集大小: 2790, 验证集大小: 139\n",
"train_data最大日期: 2022-02-22, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-02-23\n",
"划分后的训练集大小: 2778, 验证集大小: 125\n",
"train_data最大日期: 2022-02-23, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-02-24\n",
"划分后的训练集大小: 2773, 验证集大小: 134\n",
"train_data最大日期: 2022-02-24, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-02-25\n",
"划分后的训练集大小: 2760, 验证集大小: 131\n",
"train_data最大日期: 2022-02-25, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-02-28\n",
"划分后的训练集大小: 2748, 验证集大小: 131\n",
"train_data最大日期: 2022-02-28, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-03-01\n",
"划分后的训练集大小: 2751, 验证集大小: 141\n",
"train_data最大日期: 2022-03-01, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-03-02\n",
"划分后的训练集大小: 2751, 验证集大小: 139\n",
"train_data最大日期: 2022-03-02, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-03-03\n",
"划分后的训练集大小: 2740, 验证集大小: 132\n",
"train_data最大日期: 2022-03-03, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-03-04\n",
"划分后的训练集大小: 2733, 验证集大小: 132\n",
"train_data最大日期: 2022-03-04, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-03-07\n",
"划分后的训练集大小: 2721, 验证集大小: 133\n",
"train_data最大日期: 2022-03-07, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-03-08\n",
"划分后的训练集大小: 2702, 验证集大小: 126\n",
"train_data最大日期: 2022-03-08, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-03-09\n",
"划分后的训练集大小: 2689, 验证集大小: 123\n",
"train_data最大日期: 2022-03-09, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-03-10\n",
"划分后的训练集大小: 2686, 验证集大小: 128\n",
"train_data最大日期: 2022-03-10, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-03-11\n",
"划分后的训练集大小: 2681, 验证集大小: 128\n",
"train_data最大日期: 2022-03-11, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-03-14\n",
"划分后的训练集大小: 2684, 验证集大小: 138\n",
"train_data最大日期: 2022-03-14, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-03-15\n",
"划分后的训练集大小: 2681, 验证集大小: 137\n",
"train_data最大日期: 2022-03-15, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-03-16\n",
"划分后的训练集大小: 2677, 验证集大小: 140\n",
"train_data最大日期: 2022-03-16, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-03-17\n",
"划分后的训练集大小: 2681, 验证集大小: 144\n",
"train_data最大日期: 2022-03-17, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-03-18\n",
"划分后的训练集大小: 2681, 验证集大小: 142\n",
"train_data最大日期: 2022-03-18, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-03-21\n",
"划分后的训练集大小: 2676, 验证集大小: 133\n",
"train_data最大日期: 2022-03-21, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-03-22\n",
"划分后的训练集大小: 2665, 验证集大小: 128\n",
"train_data最大日期: 2022-03-22, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-03-23\n",
"划分后的训练集大小: 2673, 验证集大小: 133\n",
"train_data最大日期: 2022-03-23, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-03-24\n",
"划分后的训练集大小: 2665, 验证集大小: 126\n",
"train_data最大日期: 2022-03-24, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-03-25\n",
"划分后的训练集大小: 2663, 验证集大小: 129\n",
"train_data最大日期: 2022-03-25, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-03-28\n",
"划分后的训练集大小: 2662, 验证集大小: 130\n",
"train_data最大日期: 2022-03-28, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-03-29\n",
"划分后的训练集大小: 2653, 验证集大小: 132\n",
"train_data最大日期: 2022-03-29, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-03-30\n",
"划分后的训练集大小: 2649, 验证集大小: 135\n",
"train_data最大日期: 2022-03-30, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-03-31\n",
"划分后的训练集大小: 2652, 验证集大小: 135\n",
"train_data最大日期: 2022-03-31, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-04-01\n",
"划分后的训练集大小: 2649, 验证集大小: 129\n",
"train_data最大日期: 2022-04-01, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-04-06\n",
"划分后的训练集大小: 2648, 验证集大小: 132\n",
"train_data最大日期: 2022-04-06, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-04-07\n",
"划分后的训练集大小: 2653, 验证集大小: 131\n",
"train_data最大日期: 2022-04-07, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-04-08\n",
"划分后的训练集大小: 2665, 验证集大小: 135\n",
"train_data最大日期: 2022-04-08, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-04-11\n",
"划分后的训练集大小: 2671, 验证集大小: 134\n",
"train_data最大日期: 2022-04-11, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-04-12\n",
"划分后的训练集大小: 2674, 验证集大小: 131\n",
"train_data最大日期: 2022-04-12, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-04-13\n",
"划分后的训练集大小: 2674, 验证集大小: 138\n",
"train_data最大日期: 2022-04-13, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-04-14\n",
"划分后的训练集大小: 2676, 验证集大小: 139\n",
"train_data最大日期: 2022-04-14, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-04-15\n",
"划分后的训练集大小: 2670, 验证集大小: 134\n",
"train_data最大日期: 2022-04-15, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-04-18\n",
"划分后的训练集大小: 2661, 验证集大小: 135\n",
"train_data最大日期: 2022-04-18, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-04-19\n",
"划分后的训练集大小: 2660, 验证集大小: 141\n",
"train_data最大日期: 2022-04-19, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-04-20\n",
"划分后的训练集大小: 2664, 验证集大小: 137\n",
"train_data最大日期: 2022-04-20, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-04-21\n",
"划分后的训练集大小: 2675, 验证集大小: 139\n",
"train_data最大日期: 2022-04-21, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-04-22\n",
"划分后的训练集大小: 2675, 验证集大小: 133\n",
"train_data最大日期: 2022-04-22, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-04-25\n",
"划分后的训练集大小: 2681, 验证集大小: 132\n",
"train_data最大日期: 2022-04-25, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-04-26\n",
"划分后的训练集大小: 2682, 验证集大小: 130\n",
"train_data最大日期: 2022-04-26, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-04-27\n",
"划分后的训练集大小: 2692, 验证集大小: 140\n",
"train_data最大日期: 2022-04-27, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-04-28\n",
"划分后的训练集大小: 2698, 验证集大小: 138\n",
"train_data最大日期: 2022-04-28, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-04-29\n",
"划分后的训练集大小: 2704, 验证集大小: 141\n",
"train_data最大日期: 2022-04-29, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-05-05\n",
"划分后的训练集大小: 2710, 验证集大小: 141\n",
"train_data最大日期: 2022-05-05, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-05-06\n",
"划分后的训练集大小: 2723, 验证集大小: 142\n",
"train_data最大日期: 2022-05-06, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-05-09\n",
"划分后的训练集大小: 2724, 验证集大小: 133\n",
"train_data最大日期: 2022-05-09, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-05-10\n",
"划分后的训练集大小: 2723, 验证集大小: 130\n",
"train_data最大日期: 2022-05-10, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-05-11\n",
"划分后的训练集大小: 2715, 验证集大小: 127\n",
"train_data最大日期: 2022-05-11, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-05-12\n",
"划分后的训练集大小: 2707, 验证集大小: 126\n",
"train_data最大日期: 2022-05-12, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-05-13\n",
"划分后的训练集大小: 2712, 验证集大小: 136\n",
"train_data最大日期: 2022-05-13, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-05-16\n",
"划分后的训练集大小: 2711, 验证集大小: 137\n",
"train_data最大日期: 2022-05-16, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-05-17\n",
"划分后的训练集大小: 2709, 验证集大小: 137\n",
"train_data最大日期: 2022-05-17, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-05-18\n",
"划分后的训练集大小: 2718, 验证集大小: 143\n",
"train_data最大日期: 2022-05-18, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-05-19\n",
"划分后的训练集大小: 2720, 验证集大小: 137\n",
"train_data最大日期: 2022-05-19, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-05-20\n",
"划分后的训练集大小: 2712, 验证集大小: 133\n",
"train_data最大日期: 2022-05-20, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-05-23\n",
"划分后的训练集大小: 2705, 验证集大小: 130\n",
"train_data最大日期: 2022-05-23, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-05-24\n",
"划分后的训练集大小: 2692, 验证集大小: 126\n",
"train_data最大日期: 2022-05-24, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-05-25\n",
"划分后的训练集大小: 2685, 验证集大小: 126\n",
"train_data最大日期: 2022-05-25, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-05-26\n",
"划分后的训练集大小: 2687, 验证集大小: 134\n",
"train_data最大日期: 2022-05-26, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-05-27\n",
"划分后的训练集大小: 2693, 验证集大小: 136\n",
"train_data最大日期: 2022-05-27, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-05-30\n",
"划分后的训练集大小: 2684, 验证集大小: 131\n",
"train_data最大日期: 2022-05-30, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-05-31\n",
"划分后的训练集大小: 2677, 验证集大小: 131\n",
"train_data最大日期: 2022-05-31, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-06-01\n",
"划分后的训练集大小: 2665, 验证集大小: 129\n",
"train_data最大日期: 2022-06-01, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-06-02\n",
"划分后的训练集大小: 2656, 验证集大小: 132\n",
"train_data最大日期: 2022-06-02, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-06-06\n",
"划分后的训练集大小: 2648, 验证集大小: 134\n",
"train_data最大日期: 2022-06-06, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-06-07\n",
"划分后的训练集大小: 2655, 验证集大小: 140\n",
"train_data最大日期: 2022-06-07, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-06-08\n",
"划分后的训练集大小: 2668, 验证集大小: 143\n",
"train_data最大日期: 2022-06-08, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-06-09\n",
"划分后的训练集大小: 2679, 验证集大小: 138\n",
"train_data最大日期: 2022-06-09, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-06-10\n",
"划分后的训练集大小: 2695, 验证集大小: 142\n",
"train_data最大日期: 2022-06-10, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-06-13\n",
"划分后的训练集大小: 2698, 验证集大小: 139\n",
"train_data最大日期: 2022-06-13, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-06-14\n",
"划分后的训练集大小: 2696, 验证集大小: 135\n",
"train_data最大日期: 2022-06-14, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-06-15\n",
"划分后的训练集大小: 2697, 验证集大小: 138\n",
"train_data最大日期: 2022-06-15, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-06-16\n",
"划分后的训练集大小: 2692, 验证集大小: 138\n",
"train_data最大日期: 2022-06-16, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-06-17\n",
"划分后的训练集大小: 2694, 验证集大小: 139\n",
"train_data最大日期: 2022-06-17, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-06-20\n",
"划分后的训练集大小: 2700, 验证集大小: 139\n",
"train_data最大日期: 2022-06-20, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-06-21\n",
"划分后的训练集大小: 2708, 验证集大小: 138\n",
"train_data最大日期: 2022-06-21, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-06-22\n",
"划分后的训练集大小: 2721, 验证集大小: 139\n",
"train_data最大日期: 2022-06-22, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-06-23\n",
"划分后的训练集大小: 2726, 验证集大小: 131\n",
"train_data最大日期: 2022-06-23, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-06-24\n",
"划分后的训练集大小: 2730, 验证集大小: 138\n",
"train_data最大日期: 2022-06-24, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-06-27\n",
"划分后的训练集大小: 2729, 验证集大小: 135\n",
"train_data最大日期: 2022-06-27, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-06-28\n",
"划分后的训练集大小: 2724, 验证集大小: 126\n",
"train_data最大日期: 2022-06-28, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-06-29\n",
"划分后的训练集大小: 2710, 验证集大小: 117\n",
"train_data最大日期: 2022-06-29, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-06-30\n",
"划分后的训练集大小: 2711, 验证集大小: 130\n",
"train_data最大日期: 2022-06-30, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-07-01\n",
"划分后的训练集大小: 2718, 验证集大小: 139\n",
"train_data最大日期: 2022-07-01, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-07-04\n",
"划分后的训练集大小: 2725, 验证集大小: 141\n",
"train_data最大日期: 2022-07-04, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-07-05\n",
"划分后的训练集大小: 2723, 验证集大小: 138\n",
"train_data最大日期: 2022-07-05, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-07-06\n",
"划分后的训练集大小: 2721, 验证集大小: 141\n",
"train_data最大日期: 2022-07-06, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-07-07\n",
"划分后的训练集大小: 2722, 验证集大小: 139\n",
"train_data最大日期: 2022-07-07, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-07-08\n",
"划分后的训练集大小: 2725, 验证集大小: 145\n",
"train_data最大日期: 2022-07-08, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-07-11\n",
"划分后的训练集大小: 2725, 验证集大小: 139\n",
"train_data最大日期: 2022-07-11, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-07-12\n",
"划分后的训练集大小: 2729, 验证集大小: 139\n",
"train_data最大日期: 2022-07-12, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-07-13\n",
"划分后的训练集大小: 2728, 验证集大小: 137\n",
"train_data最大日期: 2022-07-13, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-07-14\n",
"划分后的训练集大小: 2720, 验证集大小: 130\n",
"train_data最大日期: 2022-07-14, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-07-15\n",
"划分后的训练集大小: 2717, 验证集大小: 136\n",
"train_data最大日期: 2022-07-15, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-07-18\n",
"划分后的训练集大小: 2716, 验证集大小: 138\n",
"train_data最大日期: 2022-07-18, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-07-19\n",
"划分后的训练集大小: 2713, 验证集大小: 135\n",
"train_data最大日期: 2022-07-19, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-07-20\n",
"划分后的训练集大小: 2704, 验证集大小: 130\n",
"train_data最大日期: 2022-07-20, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-07-21\n",
"划分后的训练集大小: 2706, 验证集大小: 133\n",
"train_data最大日期: 2022-07-21, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-07-22\n",
"划分后的训练集大小: 2707, 验证集大小: 139\n",
"train_data最大日期: 2022-07-22, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-07-25\n",
"划分后的训练集大小: 2710, 验证集大小: 138\n",
"train_data最大日期: 2022-07-25, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-07-26\n",
"划分后的训练集大小: 2724, 验证集大小: 140\n",
"train_data最大日期: 2022-07-26, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-07-27\n",
"划分后的训练集大小: 2743, 验证集大小: 136\n",
"train_data最大日期: 2022-07-27, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-07-28\n",
"划分后的训练集大小: 2750, 验证集大小: 137\n",
"train_data最大日期: 2022-07-28, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-07-29\n",
"划分后的训练集大小: 2745, 验证集大小: 134\n",
"train_data最大日期: 2022-07-29, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-08-01\n",
"划分后的训练集大小: 2741, 验证集大小: 137\n",
"train_data最大日期: 2022-08-01, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-08-02\n",
"划分后的训练集大小: 2737, 验证集大小: 134\n",
"train_data最大日期: 2022-08-02, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-08-03\n",
"划分后的训练集大小: 2729, 验证集大小: 133\n",
"train_data最大日期: 2022-08-03, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-08-04\n",
"划分后的训练集大小: 2724, 验证集大小: 134\n",
"train_data最大日期: 2022-08-04, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-08-05\n",
"划分后的训练集大小: 2711, 验证集大小: 132\n",
"train_data最大日期: 2022-08-05, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-08-08\n",
"划分后的训练集大小: 2703, 验证集大小: 131\n",
"train_data最大日期: 2022-08-08, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-08-09\n",
"划分后的训练集大小: 2688, 验证集大小: 124\n",
"train_data最大日期: 2022-08-09, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-08-10\n",
"划分后的训练集大小: 2689, 验证集大小: 138\n",
"train_data最大日期: 2022-08-10, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-08-11\n",
"划分后的训练集大小: 2700, 验证集大小: 141\n",
"train_data最大日期: 2022-08-11, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-08-12\n",
"划分后的训练集大小: 2705, 验证集大小: 141\n",
"train_data最大日期: 2022-08-12, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-08-15\n",
"划分后的训练集大小: 2701, 验证集大小: 134\n",
"train_data最大日期: 2022-08-15, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-08-16\n",
"划分后的训练集大小: 2701, 验证集大小: 135\n",
"train_data最大日期: 2022-08-16, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-08-17\n",
"划分后的训练集大小: 2705, 验证集大小: 134\n",
"train_data最大日期: 2022-08-17, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-08-18\n",
"划分后的训练集大小: 2708, 验证集大小: 136\n",
"train_data最大日期: 2022-08-18, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-08-19\n",
"划分后的训练集大小: 2703, 验证集大小: 134\n",
"train_data最大日期: 2022-08-19, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-08-22\n",
"划分后的训练集大小: 2705, 验证集大小: 140\n",
"train_data最大日期: 2022-08-22, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-08-23\n",
"划分后的训练集大小: 2710, 验证集大小: 145\n",
"train_data最大日期: 2022-08-23, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-08-24\n",
"划分后的训练集大小: 2716, 验证集大小: 142\n",
"train_data最大日期: 2022-08-24, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-08-25\n",
"划分后的训练集大小: 2716, 验证集大小: 137\n",
"train_data最大日期: 2022-08-25, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-08-26\n",
"划分后的训练集大小: 2715, 验证集大小: 133\n",
"train_data最大日期: 2022-08-26, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-08-29\n",
"划分后的训练集大小: 2711, 验证集大小: 133\n",
"train_data最大日期: 2022-08-29, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-08-30\n",
"划分后的训练集大小: 2708, 验证集大小: 131\n",
"train_data最大日期: 2022-08-30, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-08-31\n",
"划分后的训练集大小: 2708, 验证集大小: 133\n",
"train_data最大日期: 2022-08-31, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-09-01\n",
"划分后的训练集大小: 2707, 验证集大小: 133\n",
"train_data最大日期: 2022-09-01, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-09-02\n",
"划分后的训练集大小: 2713, 验证集大小: 138\n",
"train_data最大日期: 2022-09-02, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-09-05\n",
"划分后的训练集大小: 2721, 验证集大小: 139\n",
"train_data最大日期: 2022-09-05, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-09-06\n",
"划分后的训练集大小: 2740, 验证集大小: 143\n",
"train_data最大日期: 2022-09-06, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-09-07\n",
"划分后的训练集大小: 2738, 验证集大小: 136\n",
"train_data最大日期: 2022-09-07, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-09-08\n",
"划分后的训练集大小: 2738, 验证集大小: 141\n",
"train_data最大日期: 2022-09-08, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-09-09\n",
"划分后的训练集大小: 2739, 验证集大小: 142\n",
"train_data最大日期: 2022-09-09, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-09-13\n",
"划分后的训练集大小: 2749, 验证集大小: 144\n",
"train_data最大日期: 2022-09-13, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-09-14\n",
"划分后的训练集大小: 2755, 验证集大小: 141\n",
"train_data最大日期: 2022-09-14, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-09-15\n",
"划分后的训练集大小: 2755, 验证集大小: 134\n",
"train_data最大日期: 2022-09-15, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-09-16\n",
"划分后的训练集大小: 2758, 验证集大小: 139\n",
"train_data最大日期: 2022-09-16, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-09-19\n",
"划分后的训练集大小: 2761, 验证集大小: 137\n",
"train_data最大日期: 2022-09-19, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-09-20\n",
"划分后的训练集大小: 2761, 验证集大小: 140\n",
"train_data最大日期: 2022-09-20, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-09-21\n",
"划分后的训练集大小: 2760, 验证集大小: 144\n",
"train_data最大日期: 2022-09-21, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-09-22\n",
"划分后的训练集大小: 2758, 验证集大小: 140\n",
"train_data最大日期: 2022-09-22, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-09-23\n",
"划分后的训练集大小: 2755, 验证集大小: 134\n",
"train_data最大日期: 2022-09-23, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-09-26\n",
"划分后的训练集大小: 2759, 验证集大小: 137\n",
"train_data最大日期: 2022-09-26, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-09-27\n",
"划分后的训练集大小: 2763, 验证集大小: 137\n",
"train_data最大日期: 2022-09-27, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-09-28\n",
"划分后的训练集大小: 2772, 验证集大小: 140\n",
"train_data最大日期: 2022-09-28, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-09-29\n",
"划分后的训练集大小: 2778, 验证集大小: 139\n",
"train_data最大日期: 2022-09-29, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-09-30\n",
"划分后的训练集大小: 2787, 验证集大小: 142\n",
"train_data最大日期: 2022-09-30, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-10-10\n",
"划分后的训练集大小: 2792, 验证集大小: 143\n",
"train_data最大日期: 2022-10-10, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-10-11\n",
"划分后的训练集大小: 2789, 验证集大小: 136\n",
"train_data最大日期: 2022-10-11, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-10-12\n",
"划分后的训练集大小: 2786, 验证集大小: 140\n",
"train_data最大日期: 2022-10-12, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-10-13\n",
"划分后的训练集大小: 2790, 验证集大小: 140\n",
"train_data最大日期: 2022-10-13, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-10-14\n",
"划分后的训练集大小: 2791, 验证集大小: 142\n",
"train_data最大日期: 2022-10-14, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-10-17\n",
"划分后的训练集大小: 2786, 验证集大小: 137\n",
"train_data最大日期: 2022-10-17, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-10-18\n",
"划分后的训练集大小: 2782, 验证集大小: 140\n",
"train_data最大日期: 2022-10-18, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-10-19\n",
"划分后的训练集大小: 2778, 验证集大小: 137\n",
"train_data最大日期: 2022-10-19, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-10-20\n",
"划分后的训练集大小: 2776, 验证集大小: 132\n",
"train_data最大日期: 2022-10-20, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-10-21\n",
"划分后的训练集大小: 2772, 验证集大小: 135\n",
"train_data最大日期: 2022-10-21, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-10-24\n",
"划分后的训练集大小: 2767, 验证集大小: 132\n",
"train_data最大日期: 2022-10-24, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-10-25\n",
"划分后的训练集大小: 2766, 验证集大小: 139\n",
"train_data最大日期: 2022-10-25, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-10-26\n",
"划分后的训练集大小: 2758, 验证集大小: 136\n",
"train_data最大日期: 2022-10-26, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-10-27\n",
"划分后的训练集大小: 2752, 验证集大小: 134\n",
"train_data最大日期: 2022-10-27, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-10-28\n",
"划分后的训练集大小: 2760, 验证集大小: 142\n",
"train_data最大日期: 2022-10-28, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-10-31\n",
"划分后的训练集大小: 2763, 验证集大小: 140\n",
"train_data最大日期: 2022-10-31, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-11-01\n",
"划分后的训练集大小: 2771, 验证集大小: 145\n",
"train_data最大日期: 2022-11-01, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-11-02\n",
"划分后的训练集大小: 2775, 验证集大小: 144\n",
"train_data最大日期: 2022-11-02, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-11-03\n",
"划分后的训练集大小: 2777, 验证集大小: 141\n",
"train_data最大日期: 2022-11-03, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-11-04\n",
"划分后的训练集大小: 2777, 验证集大小: 142\n",
"train_data最大日期: 2022-11-04, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-11-07\n",
"划分后的训练集大小: 2775, 验证集大小: 141\n",
"train_data最大日期: 2022-11-07, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-11-08\n",
"划分后的训练集大小: 2776, 验证集大小: 137\n",
"train_data最大日期: 2022-11-08, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-11-09\n",
"划分后的训练集大小: 2770, 验证集大小: 134\n",
"train_data最大日期: 2022-11-09, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-11-10\n",
"划分后的训练集大小: 2766, 验证集大小: 136\n",
"train_data最大日期: 2022-11-10, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-11-11\n",
"划分后的训练集大小: 2752, 验证集大小: 128\n",
"train_data最大日期: 2022-11-11, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-11-14\n",
"划分后的训练集大小: 2747, 验证集大小: 132\n",
"train_data最大日期: 2022-11-14, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-11-15\n",
"划分后的训练集大小: 2746, 验证集大小: 139\n",
"train_data最大日期: 2022-11-15, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-11-16\n",
"划分后的训练集大小: 2740, 验证集大小: 131\n",
"train_data最大日期: 2022-11-16, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-11-17\n",
"划分后的训练集大小: 2745, 验证集大小: 137\n",
"train_data最大日期: 2022-11-17, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-11-18\n",
"划分后的训练集大小: 2748, 验证集大小: 138\n",
"train_data最大日期: 2022-11-18, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-11-21\n",
"划分后的训练集大小: 2757, 验证集大小: 141\n",
"train_data最大日期: 2022-11-21, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-11-22\n",
"划分后的训练集大小: 2761, 验证集大小: 143\n",
"train_data最大日期: 2022-11-22, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-11-23\n",
"划分后的训练集大小: 2769, 验证集大小: 144\n",
"train_data最大日期: 2022-11-23, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-11-24\n",
"划分后的训练集大小: 2779, 验证集大小: 144\n",
"train_data最大日期: 2022-11-24, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-11-25\n",
"划分后的训练集大小: 2781, 验证集大小: 144\n",
"train_data最大日期: 2022-11-25, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-11-28\n",
"划分后的训练集大小: 2774, 验证集大小: 133\n",
"train_data最大日期: 2022-11-28, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-11-29\n",
"划分后的训练集大小: 2761, 验证集大小: 132\n",
"train_data最大日期: 2022-11-29, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-11-30\n",
"划分后的训练集大小: 2746, 验证集大小: 129\n",
"train_data最大日期: 2022-11-30, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-12-01\n",
"划分后的训练集大小: 2734, 验证集大小: 129\n",
"train_data最大日期: 2022-12-01, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-12-02\n",
"划分后的训练集大小: 2722, 验证集大小: 130\n",
"train_data最大日期: 2022-12-02, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-12-05\n",
"划分后的训练集大小: 2720, 验证集大小: 139\n",
"train_data最大日期: 2022-12-05, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-12-06\n",
"划分后的训练集大小: 2714, 验证集大小: 131\n",
"train_data最大日期: 2022-12-06, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-12-07\n",
"划分后的训练集大小: 2721, 验证集大小: 141\n",
"train_data最大日期: 2022-12-07, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-12-08\n",
"划分后的训练集大小: 2721, 验证集大小: 136\n",
"train_data最大日期: 2022-12-08, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-12-09\n",
"划分后的训练集大小: 2730, 验证集大小: 137\n",
"train_data最大日期: 2022-12-09, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-12-12\n",
"划分后的训练集大小: 2739, 验证集大小: 141\n",
"train_data最大日期: 2022-12-12, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-12-13\n",
"划分后的训练集大小: 2740, 验证集大小: 140\n",
"train_data最大日期: 2022-12-13, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-12-14\n",
"划分后的训练集大小: 2749, 验证集大小: 140\n",
"train_data最大日期: 2022-12-14, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-12-15\n",
"划分后的训练集大小: 2759, 验证集大小: 147\n",
"train_data最大日期: 2022-12-15, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-12-16\n",
"划分后的训练集大小: 2761, 验证集大小: 140\n",
"train_data最大日期: 2022-12-16, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-12-19\n",
"划分后的训练集大小: 2760, 验证集大小: 140\n",
"train_data最大日期: 2022-12-19, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-12-20\n",
"划分后的训练集大小: 2757, 验证集大小: 140\n",
"train_data最大日期: 2022-12-20, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-12-21\n",
"划分后的训练集大小: 2754, 验证集大小: 141\n",
"train_data最大日期: 2022-12-21, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-12-22\n",
"划分后的训练集大小: 2753, 验证集大小: 143\n",
"train_data最大日期: 2022-12-22, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-12-23\n",
"划分后的训练集大小: 2749, 验证集大小: 140\n",
"train_data最大日期: 2022-12-23, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-12-26\n",
"划分后的训练集大小: 2759, 验证集大小: 143\n",
"train_data最大日期: 2022-12-26, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-12-27\n",
"划分后的训练集大小: 2770, 验证集大小: 143\n",
"train_data最大日期: 2022-12-27, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-12-28\n",
"划分后的训练集大小: 2777, 验证集大小: 136\n",
"train_data最大日期: 2022-12-28, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-12-29\n",
"划分后的训练集大小: 2789, 验证集大小: 141\n",
"train_data最大日期: 2022-12-29, 训练天数:20, feat size:116\n",
"test_data最大日期: 2022-12-30\n",
"划分后的训练集大小: 2799, 验证集大小: 140\n",
"train_data最大日期: 2022-12-30, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-01-03\n",
"划分后的训练集大小: 2801, 验证集大小: 141\n",
"train_data最大日期: 2023-01-03, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-01-04\n",
"划分后的训练集大小: 2810, 验证集大小: 140\n",
"train_data最大日期: 2023-01-04, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-01-05\n",
"划分后的训练集大小: 2809, 验证集大小: 140\n",
"train_data最大日期: 2023-01-05, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-01-06\n",
"划分后的训练集大小: 2812, 验证集大小: 139\n",
"train_data最大日期: 2023-01-06, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-01-09\n",
"划分后的训练集大小: 2816, 验证集大小: 141\n",
"train_data最大日期: 2023-01-09, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-01-10\n",
"划分后的训练集大小: 2817, 验证集大小: 142\n",
"train_data最大日期: 2023-01-10, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-01-11\n",
"划分后的训练集大小: 2821, 验证集大小: 144\n",
"train_data最大日期: 2023-01-11, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-01-12\n",
"划分后的训练集大小: 2820, 验证集大小: 139\n",
"train_data最大日期: 2023-01-12, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-01-13\n",
"划分后的训练集大小: 2814, 验证集大小: 141\n",
"train_data最大日期: 2023-01-13, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-01-16\n",
"划分后的训练集大小: 2810, 验证集大小: 136\n",
"train_data最大日期: 2023-01-16, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-01-17\n",
"划分后的训练集大小: 2805, 验证集大小: 135\n",
"train_data最大日期: 2023-01-17, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-01-18\n",
"划分后的训练集大小: 2798, 验证集大小: 133\n",
"train_data最大日期: 2023-01-18, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-01-19\n",
"划分后的训练集大小: 2791, 验证集大小: 134\n",
"train_data最大日期: 2023-01-19, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-01-20\n",
"划分后的训练集大小: 2787, 验证集大小: 139\n",
"train_data最大日期: 2023-01-20, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-01-30\n",
"划分后的训练集大小: 2794, 验证集大小: 147\n",
"train_data最大日期: 2023-01-30, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-01-31\n",
"划分后的训练集大小: 2797, 验证集大小: 146\n",
"train_data最大日期: 2023-01-31, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-02-01\n",
"划分后的训练集大小: 2792, 验证集大小: 138\n",
"train_data最大日期: 2023-02-01, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-02-02\n",
"划分后的训练集大小: 2793, 验证集大小: 137\n",
"train_data最大日期: 2023-02-02, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-02-03\n",
"划分后的训练集大小: 2793, 验证集大小: 141\n",
"train_data最大日期: 2023-02-03, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-02-06\n",
"划分后的训练集大小: 2796, 验证集大小: 143\n",
"train_data最大日期: 2023-02-06, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-02-07\n",
"划分后的训练集大小: 2800, 验证集大小: 145\n",
"train_data最大日期: 2023-02-07, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-02-08\n",
"划分后的训练集大小: 2804, 验证集大小: 144\n",
"train_data最大日期: 2023-02-08, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-02-09\n",
"划分后的训练集大小: 2804, 验证集大小: 140\n",
"train_data最大日期: 2023-02-09, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-02-10\n",
"划分后的训练集大小: 2808, 验证集大小: 143\n",
"train_data最大日期: 2023-02-10, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-02-13\n",
"划分后的训练集大小: 2808, 验证集大小: 141\n",
"train_data最大日期: 2023-02-13, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-02-14\n",
"划分后的训练集大小: 2804, 验证集大小: 138\n",
"train_data最大日期: 2023-02-14, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-02-15\n",
"划分后的训练集大小: 2803, 验证集大小: 143\n",
"train_data最大日期: 2023-02-15, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-02-16\n",
"划分后的训练集大小: 2806, 验证集大小: 142\n",
"train_data最大日期: 2023-02-16, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-02-17\n",
"划分后的训练集大小: 2811, 验证集大小: 146\n",
"train_data最大日期: 2023-02-17, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-02-20\n",
"划分后的训练集大小: 2820, 验证集大小: 145\n",
"train_data最大日期: 2023-02-20, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-02-21\n",
"划分后的训练集大小: 2831, 验证集大小: 146\n",
"train_data最大日期: 2023-02-21, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-02-22\n",
"划分后的训练集大小: 2838, 验证集大小: 140\n",
"train_data最大日期: 2023-02-22, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-02-23\n",
"划分后的训练集大小: 2848, 验证集大小: 144\n",
"train_data最大日期: 2023-02-23, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-02-24\n",
"划分后的训练集大小: 2849, 验证集大小: 140\n",
"train_data最大日期: 2023-02-24, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-02-27\n",
"划分后的训练集大小: 2843, 验证集大小: 141\n",
"train_data最大日期: 2023-02-27, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-02-28\n",
"划分后的训练集大小: 2838, 验证集大小: 141\n",
"train_data最大日期: 2023-02-28, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-03-01\n",
"划分后的训练集大小: 2843, 验证集大小: 143\n",
"train_data最大日期: 2023-03-01, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-03-02\n",
"划分后的训练集大小: 2849, 验证集大小: 143\n",
"train_data最大日期: 2023-03-02, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-03-03\n",
"划分后的训练集大小: 2846, 验证集大小: 138\n",
"train_data最大日期: 2023-03-03, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-03-06\n",
"划分后的训练集大小: 2849, 验证集大小: 146\n",
"train_data最大日期: 2023-03-06, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-03-07\n",
"划分后的训练集大小: 2852, 验证集大小: 148\n",
"train_data最大日期: 2023-03-07, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-03-08\n",
"划分后的训练集大小: 2853, 验证集大小: 145\n",
"train_data最大日期: 2023-03-08, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-03-09\n",
"划分后的训练集大小: 2857, 验证集大小: 144\n",
"train_data最大日期: 2023-03-09, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-03-10\n",
"划分后的训练集大小: 2858, 验证集大小: 144\n",
"train_data最大日期: 2023-03-10, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-03-13\n",
"划分后的训练集大小: 2859, 验证集大小: 142\n",
"train_data最大日期: 2023-03-13, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-03-14\n",
"划分后的训练集大小: 2863, 验证集大小: 142\n",
"train_data最大日期: 2023-03-14, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-03-15\n",
"划分后的训练集大小: 2863, 验证集大小: 143\n",
"train_data最大日期: 2023-03-15, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-03-16\n",
"划分后的训练集大小: 2864, 验证集大小: 143\n",
"train_data最大日期: 2023-03-16, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-03-17\n",
"划分后的训练集大小: 2862, 验证集大小: 144\n",
"train_data最大日期: 2023-03-17, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-03-20\n",
"划分后的训练集大小: 2858, 验证集大小: 141\n",
"train_data最大日期: 2023-03-20, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-03-21\n",
"划分后的训练集大小: 2850, 验证集大小: 138\n",
"train_data最大日期: 2023-03-21, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-03-22\n",
"划分后的训练集大小: 2846, 验证集大小: 136\n",
"train_data最大日期: 2023-03-22, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-03-23\n",
"划分后的训练集大小: 2843, 验证集大小: 141\n",
"train_data最大日期: 2023-03-23, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-03-24\n",
"划分后的训练集大小: 2846, 验证集大小: 143\n",
"train_data最大日期: 2023-03-24, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-03-27\n",
"划分后的训练集大小: 2847, 验证集大小: 142\n",
"train_data最大日期: 2023-03-27, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-03-28\n",
"划分后的训练集大小: 2850, 验证集大小: 144\n",
"train_data最大日期: 2023-03-28, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-03-29\n",
"划分后的训练集大小: 2849, 验证集大小: 142\n",
"train_data最大日期: 2023-03-29, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-03-30\n",
"划分后的训练集大小: 2848, 验证集大小: 142\n",
"train_data最大日期: 2023-03-30, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-03-31\n",
"划分后的训练集大小: 2854, 验证集大小: 144\n",
"train_data最大日期: 2023-03-31, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-04-03\n",
"划分后的训练集大小: 2852, 验证集大小: 144\n",
"train_data最大日期: 2023-04-03, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-04-04\n",
"划分后的训练集大小: 2848, 验证集大小: 144\n",
"train_data最大日期: 2023-04-04, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-04-06\n",
"划分后的训练集大小: 2847, 验证集大小: 144\n",
"train_data最大日期: 2023-04-06, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-04-07\n",
"划分后的训练集大小: 2849, 验证集大小: 146\n",
"train_data最大日期: 2023-04-07, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-04-10\n",
"划分后的训练集大小: 2852, 验证集大小: 147\n",
"train_data最大日期: 2023-04-10, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-04-11\n",
"划分后的训练集大小: 2854, 验证集大小: 144\n",
"train_data最大日期: 2023-04-11, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-04-12\n",
"划分后的训练集大小: 2854, 验证集大小: 142\n",
"train_data最大日期: 2023-04-12, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-04-13\n",
"划分后的训练集大小: 2857, 验证集大小: 146\n",
"train_data最大日期: 2023-04-13, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-04-14\n",
"划分后的训练集大小: 2857, 验证集大小: 143\n",
"train_data最大日期: 2023-04-14, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-04-17\n",
"划分后的训练集大小: 2855, 验证集大小: 142\n",
"train_data最大日期: 2023-04-17, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-04-18\n",
"划分后的训练集大小: 2858, 验证集大小: 144\n",
"train_data最大日期: 2023-04-18, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-04-19\n",
"划分后的训练集大小: 2864, 验证集大小: 144\n",
"train_data最大日期: 2023-04-19, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-04-20\n",
"划分后的训练集大小: 2869, 验证集大小: 141\n",
"train_data最大日期: 2023-04-20, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-04-21\n",
"划分后的训练集大小: 2872, 验证集大小: 144\n",
"train_data最大日期: 2023-04-21, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-04-24\n",
"划分后的训练集大小: 2871, 验证集大小: 142\n",
"train_data最大日期: 2023-04-24, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-04-25\n",
"划分后的训练集大小: 2877, 验证集大小: 148\n",
"train_data最大日期: 2023-04-25, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-04-26\n",
"划分后的训练集大小: 2879, 验证集大小: 146\n",
"train_data最大日期: 2023-04-26, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-04-27\n",
"划分后的训练集大小: 2886, 验证集大小: 149\n",
"train_data最大日期: 2023-04-27, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-04-28\n",
"划分后的训练集大小: 2887, 验证集大小: 143\n",
"train_data最大日期: 2023-04-28, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-05-04\n",
"划分后的训练集大小: 2889, 验证集大小: 146\n",
"train_data最大日期: 2023-05-04, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-05-05\n",
"划分后的训练集大小: 2890, 验证集大小: 145\n",
"train_data最大日期: 2023-05-05, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-05-08\n",
"划分后的训练集大小: 2884, 验证集大小: 138\n",
"train_data最大日期: 2023-05-08, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-05-09\n",
"划分后的训练集大小: 2880, 验证集大小: 140\n",
"train_data最大日期: 2023-05-09, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-05-10\n",
"划分后的训练集大小: 2872, 验证集大小: 138\n",
"train_data最大日期: 2023-05-10, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-05-11\n",
"划分后的训练集大小: 2863, 验证集大小: 138\n",
"train_data最大日期: 2023-05-11, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-05-12\n",
"划分后的训练集大小: 2856, 验证集大小: 137\n",
"train_data最大日期: 2023-05-12, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-05-15\n",
"划分后的训练集大小: 2847, 验证集大小: 133\n",
"train_data最大日期: 2023-05-15, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-05-16\n",
"划分后的训练集大小: 2841, 验证集大小: 140\n",
"train_data最大日期: 2023-05-16, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-05-17\n",
"划分后的训练集大小: 2843, 验证集大小: 145\n",
"train_data最大日期: 2023-05-17, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-05-18\n",
"划分后的训练集大小: 2843, 验证集大小: 142\n",
"train_data最大日期: 2023-05-18, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-05-19\n",
"划分后的训练集大小: 2842, 验证集大小: 143\n",
"train_data最大日期: 2023-05-19, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-05-22\n",
"划分后的训练集大小: 2841, 验证集大小: 143\n",
"train_data最大日期: 2023-05-22, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-05-23\n",
"划分后的训练集大小: 2841, 验证集大小: 141\n",
"train_data最大日期: 2023-05-23, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-05-24\n",
"划分后的训练集大小: 2835, 验证集大小: 138\n",
"train_data最大日期: 2023-05-24, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-05-25\n",
"划分后的训练集大小: 2830, 验证集大小: 137\n",
"train_data最大日期: 2023-05-25, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-05-26\n",
"划分后的训练集大小: 2819, 验证集大小: 137\n",
"train_data最大日期: 2023-05-26, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-05-29\n",
"划分后的训练集大小: 2811, 验证集大小: 138\n",
"train_data最大日期: 2023-05-29, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-05-30\n",
"划分后的训练集大小: 2798, 验证集大小: 136\n",
"train_data最大日期: 2023-05-30, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-05-31\n",
"划分后的训练集大小: 2791, 验证集大小: 136\n",
"train_data最大日期: 2023-05-31, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-06-01\n",
"划分后的训练集大小: 2783, 验证集大小: 138\n",
"train_data最大日期: 2023-06-01, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-06-02\n",
"划分后的训练集大小: 2778, 验证集大小: 140\n",
"train_data最大日期: 2023-06-02, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-06-05\n",
"划分后的训练集大小: 2782, 验证集大小: 142\n",
"train_data最大日期: 2023-06-05, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-06-06\n",
"划分后的训练集大小: 2782, 验证集大小: 140\n",
"train_data最大日期: 2023-06-06, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-06-07\n",
"划分后的训练集大小: 2777, 验证集大小: 133\n",
"train_data最大日期: 2023-06-07, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-06-08\n",
"划分后的训练集大小: 2780, 验证集大小: 141\n",
"train_data最大日期: 2023-06-08, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-06-09\n",
"划分后的训练集大小: 2784, 验证集大小: 141\n",
"train_data最大日期: 2023-06-09, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-06-12\n",
"划分后的训练集大小: 2795, 验证集大小: 144\n",
"train_data最大日期: 2023-06-12, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-06-13\n",
"划分后的训练集大小: 2799, 验证集大小: 144\n",
"train_data最大日期: 2023-06-13, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-06-14\n",
"划分后的训练集大小: 2792, 验证集大小: 138\n",
"train_data最大日期: 2023-06-14, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-06-15\n",
"划分后的训练集大小: 2790, 验证集大小: 140\n",
"train_data最大日期: 2023-06-15, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-06-16\n",
"划分后的训练集大小: 2789, 验证集大小: 142\n",
"train_data最大日期: 2023-06-16, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-06-19\n",
"划分后的训练集大小: 2793, 验证集大小: 147\n",
"train_data最大日期: 2023-06-19, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-06-20\n",
"划分后的训练集大小: 2796, 验证集大小: 144\n",
"train_data最大日期: 2023-06-20, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-06-21\n",
"划分后的训练集大小: 2801, 验证集大小: 143\n",
"train_data最大日期: 2023-06-21, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-06-26\n",
"划分后的训练集大小: 2807, 验证集大小: 143\n",
"train_data最大日期: 2023-06-26, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-06-27\n",
"划分后的训练集大小: 2814, 验证集大小: 144\n",
"train_data最大日期: 2023-06-27, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-06-28\n",
"划分后的训练集大小: 2820, 验证集大小: 144\n",
"train_data最大日期: 2023-06-28, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-06-29\n",
"划分后的训练集大小: 2827, 验证集大小: 143\n",
"train_data最大日期: 2023-06-29, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-06-30\n",
"划分后的训练集大小: 2832, 验证集大小: 141\n",
"train_data最大日期: 2023-06-30, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-07-03\n",
"划分后的训练集大小: 2837, 验证集大小: 143\n",
"train_data最大日期: 2023-07-03, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-07-04\n",
"划分后的训练集大小: 2841, 验证集大小: 144\n",
"train_data最大日期: 2023-07-04, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-07-05\n",
"划分后的训练集大小: 2844, 验证集大小: 145\n",
"train_data最大日期: 2023-07-05, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-07-06\n",
"划分后的训练集大小: 2846, 验证集大小: 142\n",
"train_data最大日期: 2023-07-06, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-07-07\n",
"划分后的训练集大小: 2857, 验证集大小: 144\n",
"train_data最大日期: 2023-07-07, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-07-10\n",
"划分后的训练集大小: 2862, 验证集大小: 146\n",
"train_data最大日期: 2023-07-10, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-07-11\n",
"划分后的训练集大小: 2863, 验证集大小: 142\n",
"train_data最大日期: 2023-07-11, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-07-12\n",
"划分后的训练集大小: 2861, 验证集大小: 142\n",
"train_data最大日期: 2023-07-12, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-07-13\n",
"划分后的训练集大小: 2857, 验证集大小: 140\n",
"train_data最大日期: 2023-07-13, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-07-14\n",
"划分后的训练集大小: 2861, 验证集大小: 142\n",
"train_data最大日期: 2023-07-14, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-07-17\n",
"划分后的训练集大小: 2865, 验证集大小: 144\n",
"train_data最大日期: 2023-07-17, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-07-18\n",
"划分后的训练集大小: 2865, 验证集大小: 142\n",
"train_data最大日期: 2023-07-18, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-07-19\n",
"划分后的训练集大小: 2860, 验证集大小: 142\n",
"train_data最大日期: 2023-07-19, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-07-20\n",
"划分后的训练集大小: 2857, 验证集大小: 141\n",
"train_data最大日期: 2023-07-20, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-07-21\n",
"划分后的训练集大小: 2854, 验证集大小: 140\n",
"train_data最大日期: 2023-07-21, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-07-24\n",
"划分后的训练集大小: 2846, 验证集大小: 135\n",
"train_data最大日期: 2023-07-24, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-07-25\n",
"划分后的训练集大小: 2842, 验证集大小: 140\n",
"train_data最大日期: 2023-07-25, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-07-26\n",
"划分后的训练集大小: 2837, 验证集大小: 139\n",
"train_data最大日期: 2023-07-26, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-07-27\n",
"划分后的训练集大小: 2831, 验证集大小: 137\n",
"train_data最大日期: 2023-07-27, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-07-28\n",
"划分后的训练集大小: 2832, 验证集大小: 142\n",
"train_data最大日期: 2023-07-28, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-07-31\n",
"划分后的训练集大小: 2835, 验证集大小: 146\n",
"train_data最大日期: 2023-07-31, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-08-01\n",
"划分后的训练集大小: 2833, 验证集大小: 142\n",
"train_data最大日期: 2023-08-01, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-08-02\n",
"划分后的训练集大小: 2831, 验证集大小: 143\n",
"train_data最大日期: 2023-08-02, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-08-03\n",
"划分后的训练集大小: 2835, 验证集大小: 146\n",
"train_data最大日期: 2023-08-03, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-08-04\n",
"划分后的训练集大小: 2834, 验证集大小: 143\n",
"train_data最大日期: 2023-08-04, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-08-07\n",
"划分后的训练集大小: 2832, 验证集大小: 144\n",
"train_data最大日期: 2023-08-07, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-08-08\n",
"划分后的训练集大小: 2834, 验证集大小: 144\n",
"train_data最大日期: 2023-08-08, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-08-09\n",
"划分后的训练集大小: 2839, 验证集大小: 147\n",
"train_data最大日期: 2023-08-09, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-08-10\n",
"划分后的训练集大小: 2843, 验证集大小: 144\n",
"train_data最大日期: 2023-08-10, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-08-11\n",
"划分后的训练集大小: 2843, 验证集大小: 142\n",
"train_data最大日期: 2023-08-11, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-08-14\n",
"划分后的训练集大小: 2838, 验证集大小: 139\n",
"train_data最大日期: 2023-08-14, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-08-15\n",
"划分后的训练集大小: 2836, 验证集大小: 140\n",
"train_data最大日期: 2023-08-15, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-08-16\n",
"划分后的训练集大小: 2831, 验证集大小: 137\n",
"train_data最大日期: 2023-08-16, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-08-17\n",
"划分后的训练集大小: 2828, 验证集大小: 138\n",
"train_data最大日期: 2023-08-17, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-08-18\n",
"划分后的训练集大小: 2825, 验证集大小: 137\n",
"train_data最大日期: 2023-08-18, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-08-21\n",
"划分后的训练集大小: 2825, 验证集大小: 135\n",
"train_data最大日期: 2023-08-21, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-08-22\n",
"划分后的训练集大小: 2817, 验证集大小: 132\n",
"train_data最大日期: 2023-08-22, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-08-23\n",
"划分后的训练集大小: 2818, 验证集大小: 140\n",
"train_data最大日期: 2023-08-23, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-08-24\n",
"划分后的训练集大小: 2814, 验证集大小: 133\n",
"train_data最大日期: 2023-08-24, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-08-25\n",
"划分后的训练集大小: 2813, 验证集大小: 141\n",
"train_data最大日期: 2023-08-25, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-08-28\n",
"划分后的训练集大小: 2812, 验证集大小: 145\n",
"train_data最大日期: 2023-08-28, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-08-29\n",
"划分后的训练集大小: 2815, 验证集大小: 145\n",
"train_data最大日期: 2023-08-29, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-08-30\n",
"划分后的训练集大小: 2813, 验证集大小: 141\n",
"train_data最大日期: 2023-08-30, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-08-31\n",
"划分后的训练集大小: 2808, 验证集大小: 141\n",
"train_data最大日期: 2023-08-31, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-09-01\n",
"划分后的训练集大小: 2812, 验证集大小: 147\n",
"train_data最大日期: 2023-09-01, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-09-04\n",
"划分后的训练集大小: 2810, 验证集大小: 142\n",
"train_data最大日期: 2023-09-04, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-09-05\n",
"划分后的训练集大小: 2812, 验证集大小: 146\n",
"train_data最大日期: 2023-09-05, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-09-06\n",
"划分后的训练集大小: 2810, 验证集大小: 145\n",
"train_data最大日期: 2023-09-06, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-09-07\n",
"划分后的训练集大小: 2810, 验证集大小: 144\n",
"train_data最大日期: 2023-09-07, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-09-08\n",
"划分后的训练集大小: 2812, 验证集大小: 144\n",
"train_data最大日期: 2023-09-08, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-09-11\n",
"划分后的训练集大小: 2817, 验证集大小: 144\n",
"train_data最大日期: 2023-09-11, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-09-12\n",
"划分后的训练集大小: 2821, 验证集大小: 144\n",
"train_data最大日期: 2023-09-12, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-09-13\n",
"划分后的训练集大小: 2830, 验证集大小: 146\n",
"train_data最大日期: 2023-09-13, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-09-14\n",
"划分后的训练集大小: 2839, 验证集大小: 147\n",
"train_data最大日期: 2023-09-14, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-09-15\n",
"划分后的训练集大小: 2848, 验证集大小: 146\n",
"train_data最大日期: 2023-09-15, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-09-18\n",
"划分后的训练集大小: 2859, 验证集大小: 146\n",
"train_data最大日期: 2023-09-18, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-09-19\n",
"划分后的训练集大小: 2868, 验证集大小: 141\n",
"train_data最大日期: 2023-09-19, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-09-20\n",
"划分后的训练集大小: 2870, 验证集大小: 142\n",
"train_data最大日期: 2023-09-20, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-09-21\n",
"划分后的训练集大小: 2881, 验证集大小: 144\n",
"train_data最大日期: 2023-09-21, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-09-22\n",
"划分后的训练集大小: 2884, 验证集大小: 144\n",
"train_data最大日期: 2023-09-22, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-09-25\n",
"划分后的训练集大小: 2885, 验证集大小: 146\n",
"train_data最大日期: 2023-09-25, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-09-26\n",
"划分后的训练集大小: 2881, 验证集大小: 141\n",
"train_data最大日期: 2023-09-26, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-09-27\n",
"划分后的训练集大小: 2882, 验证集大小: 142\n",
"train_data最大日期: 2023-09-27, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-09-28\n",
"划分后的训练集大小: 2883, 验证集大小: 142\n",
"train_data最大日期: 2023-09-28, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-10-09\n",
"划分后的训练集大小: 2880, 验证集大小: 144\n",
"train_data最大日期: 2023-10-09, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-10-10\n",
"划分后的训练集大小: 2881, 验证集大小: 143\n",
"train_data最大日期: 2023-10-10, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-10-11\n",
"划分后的训练集大小: 2876, 验证集大小: 141\n",
"train_data最大日期: 2023-10-11, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-10-12\n",
"划分后的训练集大小: 2871, 验证集大小: 140\n",
"train_data最大日期: 2023-10-12, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-10-13\n",
"划分后的训练集大小: 2873, 验证集大小: 146\n",
"train_data最大日期: 2023-10-13, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-10-16\n",
"划分后的训练集大小: 2873, 验证集大小: 144\n",
"train_data最大日期: 2023-10-16, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-10-17\n",
"划分后的训练集大小: 2870, 验证集大小: 141\n",
"train_data最大日期: 2023-10-17, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-10-18\n",
"划分后的训练集大小: 2869, 验证集大小: 143\n",
"train_data最大日期: 2023-10-18, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-10-19\n",
"划分后的训练集大小: 2865, 验证集大小: 142\n",
"train_data最大日期: 2023-10-19, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-10-20\n",
"划分后的训练集大小: 2860, 验证集大小: 142\n",
"train_data最大日期: 2023-10-20, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-10-23\n",
"划分后的训练集大小: 2857, 验证集大小: 143\n",
"train_data最大日期: 2023-10-23, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-10-24\n",
"划分后的训练集大小: 2855, 验证集大小: 144\n",
"train_data最大日期: 2023-10-24, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-10-25\n",
"划分后的训练集大小: 2853, 验证集大小: 139\n",
"train_data最大日期: 2023-10-25, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-10-26\n",
"划分后的训练集大小: 2849, 验证集大小: 138\n",
"train_data最大日期: 2023-10-26, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-10-27\n",
"划分后的训练集大小: 2840, 验证集大小: 135\n",
"train_data最大日期: 2023-10-27, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-10-30\n",
"划分后的训练集大小: 2835, 验证集大小: 139\n",
"train_data最大日期: 2023-10-30, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-10-31\n",
"划分后的训练集大小: 2832, 验证集大小: 143\n",
"train_data最大日期: 2023-10-31, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-11-01\n",
"划分后的训练集大小: 2831, 验证集大小: 140\n",
"train_data最大日期: 2023-11-01, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-11-02\n",
"划分后的训练集大小: 2834, 验证集大小: 145\n",
"train_data最大日期: 2023-11-02, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-11-03\n",
"划分后的训练集大小: 2836, 验证集大小: 144\n",
"train_data最大日期: 2023-11-03, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-11-06\n",
"划分后的训练集大小: 2837, 验证集大小: 145\n",
"train_data最大日期: 2023-11-06, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-11-07\n",
"划分后的训练集大小: 2839, 验证集大小: 145\n",
"train_data最大日期: 2023-11-07, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-11-08\n",
"划分后的训练集大小: 2832, 验证集大小: 134\n",
"train_data最大日期: 2023-11-08, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-11-09\n",
"划分后的训练集大小: 2830, 验证集大小: 138\n",
"train_data最大日期: 2023-11-09, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-11-10\n",
"划分后的训练集大小: 2826, 验证集大小: 142\n",
"train_data最大日期: 2023-11-10, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-11-13\n",
"划分后的训练集大小: 2824, 验证集大小: 142\n",
"train_data最大日期: 2023-11-13, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-11-14\n",
"划分后的训练集大小: 2828, 验证集大小: 145\n",
"train_data最大日期: 2023-11-14, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-11-15\n",
"划分后的训练集大小: 2827, 验证集大小: 142\n",
"train_data最大日期: 2023-11-15, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-11-16\n",
"划分后的训练集大小: 2822, 验证集大小: 137\n",
"train_data最大日期: 2023-11-16, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-11-17\n",
"划分后的训练集大小: 2821, 验证集大小: 141\n",
"train_data最大日期: 2023-11-17, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-11-20\n",
"划分后的训练集大小: 2824, 验证集大小: 146\n",
"train_data最大日期: 2023-11-20, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-11-21\n",
"划分后的训练集大小: 2824, 验证集大小: 144\n",
"train_data最大日期: 2023-11-21, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-11-22\n",
"划分后的训练集大小: 2822, 验证集大小: 137\n",
"train_data最大日期: 2023-11-22, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-11-23\n",
"划分后的训练集大小: 2820, 验证集大小: 136\n",
"train_data最大日期: 2023-11-23, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-11-24\n",
"划分后的训练集大小: 2824, 验证集大小: 139\n",
"train_data最大日期: 2023-11-24, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-11-27\n",
"划分后的训练集大小: 2828, 验证集大小: 143\n",
"train_data最大日期: 2023-11-27, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-11-28\n",
"划分后的训练集大小: 2829, 验证集大小: 144\n",
"train_data最大日期: 2023-11-28, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-11-29\n",
"划分后的训练集大小: 2835, 验证集大小: 146\n",
"train_data最大日期: 2023-11-29, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-11-30\n",
"划分后的训练集大小: 2834, 验证集大小: 144\n",
"train_data最大日期: 2023-11-30, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-12-01\n",
"划分后的训练集大小: 2838, 验证集大小: 148\n",
"train_data最大日期: 2023-12-01, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-12-04\n",
"划分后的训练集大小: 2839, 验证集大小: 146\n",
"train_data最大日期: 2023-12-04, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-12-05\n",
"划分后的训练集大小: 2836, 验证集大小: 142\n",
"train_data最大日期: 2023-12-05, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-12-06\n",
"划分后的训练集大小: 2841, 验证集大小: 139\n",
"train_data最大日期: 2023-12-06, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-12-07\n",
"划分后的训练集大小: 2839, 验证集大小: 136\n",
"train_data最大日期: 2023-12-07, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-12-08\n",
"划分后的训练集大小: 2837, 验证集大小: 140\n",
"train_data最大日期: 2023-12-08, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-12-11\n",
"划分后的训练集大小: 2838, 验证集大小: 143\n",
"train_data最大日期: 2023-12-11, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-12-12\n",
"划分后的训练集大小: 2836, 验证集大小: 143\n",
"train_data最大日期: 2023-12-12, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-12-13\n",
"划分后的训练集大小: 2838, 验证集大小: 144\n",
"train_data最大日期: 2023-12-13, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-12-14\n",
"划分后的训练集大小: 2845, 验证集大小: 144\n",
"train_data最大日期: 2023-12-14, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-12-15\n",
"划分后的训练集大小: 2846, 验证集大小: 142\n",
"train_data最大日期: 2023-12-15, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-12-18\n",
"划分后的训练集大小: 2841, 验证集大小: 141\n",
"train_data最大日期: 2023-12-18, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-12-19\n",
"划分后的训练集大小: 2840, 验证集大小: 143\n",
"train_data最大日期: 2023-12-19, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-12-20\n",
"划分后的训练集大小: 2849, 验证集大小: 146\n",
"train_data最大日期: 2023-12-20, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-12-21\n",
"划分后的训练集大小: 2856, 验证集大小: 143\n",
"train_data最大日期: 2023-12-21, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-12-22\n",
"划分后的训练集大小: 2856, 验证集大小: 139\n",
"train_data最大日期: 2023-12-22, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-12-25\n",
"划分后的训练集大小: 2856, 验证集大小: 143\n",
"train_data最大日期: 2023-12-25, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-12-26\n",
"划分后的训练集大小: 2856, 验证集大小: 144\n",
"train_data最大日期: 2023-12-26, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-12-27\n",
"划分后的训练集大小: 2851, 验证集大小: 141\n",
"train_data最大日期: 2023-12-27, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-12-28\n",
"划分后的训练集大小: 2848, 验证集大小: 141\n",
"train_data最大日期: 2023-12-28, 训练天数:20, feat size:116\n",
"test_data最大日期: 2023-12-29\n",
"划分后的训练集大小: 2843, 验证集大小: 143\n",
"train_data最大日期: 2023-12-29, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-01-02\n",
"划分后的训练集大小: 2842, 验证集大小: 145\n",
"train_data最大日期: 2024-01-02, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-01-03\n",
"划分后的训练集大小: 2844, 验证集大小: 144\n",
"train_data最大日期: 2024-01-03, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-01-04\n",
"划分后的训练集大小: 2844, 验证集大小: 139\n",
"train_data最大日期: 2024-01-04, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-01-05\n",
"划分后的训练集大小: 2847, 验证集大小: 139\n",
"train_data最大日期: 2024-01-05, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-01-08\n",
"划分后的训练集大小: 2849, 验证集大小: 142\n",
"train_data最大日期: 2024-01-08, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-01-09\n",
"划分后的训练集大小: 2842, 验证集大小: 136\n",
"train_data最大日期: 2024-01-09, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-01-10\n",
"划分后的训练集大小: 2839, 验证集大小: 140\n",
"train_data最大日期: 2024-01-10, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-01-11\n",
"划分后的训练集大小: 2831, 验证集大小: 136\n",
"train_data最大日期: 2024-01-11, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-01-12\n",
"划分后的训练集大小: 2823, 验证集大小: 136\n",
"train_data最大日期: 2024-01-12, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-01-15\n",
"划分后的训练集大小: 2821, 验证集大小: 140\n",
"train_data最大日期: 2024-01-15, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-01-16\n",
"划分后的训练集大小: 2811, 验证集大小: 131\n",
"train_data最大日期: 2024-01-16, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-01-17\n",
"划分后的训练集大小: 2806, 验证集大小: 138\n",
"train_data最大日期: 2024-01-17, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-01-18\n",
"划分后的训练集大小: 2800, 验证集大小: 140\n",
"train_data最大日期: 2024-01-18, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-01-19\n",
"划分后的训练集大小: 2797, 验证集大小: 140\n",
"train_data最大日期: 2024-01-19, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-01-22\n",
"划分后的训练集大小: 2799, 验证集大小: 141\n",
"train_data最大日期: 2024-01-22, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-01-23\n",
"划分后的训练集大小: 2795, 验证集大小: 139\n",
"train_data最大日期: 2024-01-23, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-01-24\n",
"划分后的训练集大小: 2793, 验证集大小: 142\n",
"train_data最大日期: 2024-01-24, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-01-25\n",
"划分后的训练集大小: 2793, 验证集大小: 141\n",
"train_data最大日期: 2024-01-25, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-01-26\n",
"划分后的训练集大小: 2791, 验证集大小: 139\n",
"train_data最大日期: 2024-01-26, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-01-29\n",
"划分后的训练集大小: 2773, 验证集大小: 125\n",
"train_data最大日期: 2024-01-29, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-01-30\n",
"划分后的训练集大小: 2759, 验证集大小: 131\n",
"train_data最大日期: 2024-01-30, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-01-31\n",
"划分后的训练集大小: 2757, 验证集大小: 142\n",
"train_data最大日期: 2024-01-31, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-02-01\n",
"划分后的训练集大小: 2761, 验证集大小: 143\n",
"train_data最大日期: 2024-02-01, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-02-02\n",
"划分后的训练集大小: 2767, 验证集大小: 145\n",
"train_data最大日期: 2024-02-02, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-02-05\n",
"划分后的训练集大小: 2771, 验证集大小: 146\n",
"train_data最大日期: 2024-02-05, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-02-06\n",
"划分后的训练集大小: 2782, 验证集大小: 147\n",
"train_data最大日期: 2024-02-06, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-02-07\n",
"划分后的训练集大小: 2789, 验证集大小: 147\n",
"train_data最大日期: 2024-02-07, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-02-08\n",
"划分后的训练集大小: 2800, 验证集大小: 147\n",
"train_data最大日期: 2024-02-08, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-02-19\n",
"划分后的训练集大小: 2808, 验证集大小: 144\n",
"train_data最大日期: 2024-02-19, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-02-20\n",
"划分后的训练集大小: 2809, 验证集大小: 141\n",
"train_data最大日期: 2024-02-20, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-02-21\n",
"划分后的训练集大小: 2820, 验证集大小: 142\n",
"train_data最大日期: 2024-02-21, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-02-22\n",
"划分后的训练集大小: 2819, 验证集大小: 137\n",
"train_data最大日期: 2024-02-22, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-02-23\n",
"划分后的训练集大小: 2814, 验证集大小: 135\n",
"train_data最大日期: 2024-02-23, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-02-26\n",
"划分后的训练集大小: 2804, 验证集大小: 130\n",
"train_data最大日期: 2024-02-26, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-02-27\n",
"划分后的训练集大小: 2788, 验证集大小: 125\n",
"train_data最大日期: 2024-02-27, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-02-28\n",
"划分后的训练集大小: 2752, 验证集大小: 103\n",
"train_data最大日期: 2024-02-28, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-02-29\n",
"划分后的训练集大小: 2736, 验证集大小: 126\n",
"train_data最大日期: 2024-02-29, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-03-01\n",
"划分后的训练集大小: 2729, 验证集大小: 134\n",
"train_data最大日期: 2024-03-01, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-03-04\n",
"划分后的训练集大小: 2730, 验证集大小: 140\n",
"train_data最大日期: 2024-03-04, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-03-05\n",
"划分后的训练集大小: 2746, 验证集大小: 141\n",
"train_data最大日期: 2024-03-05, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-03-06\n",
"划分后的训练集大小: 2752, 验证集大小: 137\n",
"train_data最大日期: 2024-03-06, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-03-07\n",
"划分后的训练集大小: 2751, 验证集大小: 141\n",
"train_data最大日期: 2024-03-07, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-03-08\n",
"划分后的训练集大小: 2749, 验证集大小: 141\n",
"train_data最大日期: 2024-03-08, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-03-11\n",
"划分后的训练集大小: 2741, 验证集大小: 137\n",
"train_data最大日期: 2024-03-11, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-03-12\n",
"划分后的训练集大小: 2735, 验证集大小: 140\n",
"train_data最大日期: 2024-03-12, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-03-13\n",
"划分后的训练集大小: 2731, 验证集大小: 143\n",
"train_data最大日期: 2024-03-13, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-03-14\n",
"划分后的训练集大小: 2722, 验证集大小: 138\n",
"train_data最大日期: 2024-03-14, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-03-15\n",
"划分后的训练集大小: 2719, 验证集大小: 144\n",
"train_data最大日期: 2024-03-15, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-03-18\n",
"划分后的训练集大小: 2722, 验证集大小: 147\n",
"train_data最大日期: 2024-03-18, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-03-19\n",
"划分后的训练集大小: 2722, 验证集大小: 141\n",
"train_data最大日期: 2024-03-19, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-03-20\n",
"划分后的训练集大小: 2720, 验证集大小: 140\n",
"train_data最大日期: 2024-03-20, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-03-21\n",
"划分后的训练集大小: 2718, 验证集大小: 135\n",
"train_data最大日期: 2024-03-21, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-03-22\n",
"划分后的训练集大小: 2714, 验证集大小: 131\n",
"train_data最大日期: 2024-03-22, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-03-25\n",
"划分后的训练集大小: 2717, 验证集大小: 133\n",
"train_data最大日期: 2024-03-25, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-03-26\n",
"划分后的训练集大小: 2732, 验证集大小: 140\n",
"train_data最大日期: 2024-03-26, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-03-27\n",
"划分后的训练集大小: 2767, 验证集大小: 138\n",
"train_data最大日期: 2024-03-27, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-03-28\n",
"划分后的训练集大小: 2783, 验证集大小: 142\n",
"train_data最大日期: 2024-03-28, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-03-29\n",
"划分后的训练集大小: 2793, 验证集大小: 144\n",
"train_data最大日期: 2024-03-29, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-04-01\n",
"划分后的训练集大小: 2795, 验证集大小: 142\n",
"train_data最大日期: 2024-04-01, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-04-02\n",
"划分后的训练集大小: 2789, 验证集大小: 135\n",
"train_data最大日期: 2024-04-02, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-04-03\n",
"划分后的训练集大小: 2784, 验证集大小: 132\n",
"train_data最大日期: 2024-04-03, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-04-08\n",
"划分后的训练集大小: 2782, 验证集大小: 139\n",
"train_data最大日期: 2024-04-08, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-04-09\n",
"划分后的训练集大小: 2783, 验证集大小: 142\n",
"train_data最大日期: 2024-04-09, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-04-10\n",
"划分后的训练集大小: 2792, 验证集大小: 146\n",
"train_data最大日期: 2024-04-10, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-04-11\n",
"划分后的训练集大小: 2797, 验证集大小: 145\n",
"train_data最大日期: 2024-04-11, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-04-12\n",
"划分后的训练集大小: 2794, 验证集大小: 140\n",
"train_data最大日期: 2024-04-12, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-04-15\n",
"划分后的训练集大小: 2796, 验证集大小: 140\n",
"train_data最大日期: 2024-04-15, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-04-16\n",
"划分后的训练集大小: 2790, 验证集大小: 138\n",
"train_data最大日期: 2024-04-16, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-04-17\n",
"划分后的训练集大小: 2786, 验证集大小: 143\n",
"train_data最大日期: 2024-04-17, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-04-18\n",
"划分后的训练集大小: 2784, 验证集大小: 139\n",
"train_data最大日期: 2024-04-18, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-04-19\n",
"划分后的训练集大小: 2783, 验证集大小: 139\n",
"train_data最大日期: 2024-04-19, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-04-22\n",
"划分后的训练集大小: 2791, 验证集大小: 143\n",
"train_data最大日期: 2024-04-22, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-04-23\n",
"划分后的训练集大小: 2807, 验证集大小: 147\n",
"train_data最大日期: 2024-04-23, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-04-24\n",
"划分后的训练集大小: 2815, 验证集大小: 141\n",
"train_data最大日期: 2024-04-24, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-04-25\n",
"划分后的训练集大小: 2815, 验证集大小: 140\n",
"train_data最大日期: 2024-04-25, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-04-26\n",
"划分后的训练集大小: 2822, 验证集大小: 145\n",
"train_data最大日期: 2024-04-26, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-04-29\n",
"划分后的训练集大小: 2825, 验证集大小: 145\n",
"train_data最大日期: 2024-04-29, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-04-30\n",
"划分后的训练集大小: 2825, 验证集大小: 144\n",
"train_data最大日期: 2024-04-30, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-05-06\n",
"划分后的训练集大小: 2827, 验证集大小: 144\n",
"train_data最大日期: 2024-05-06, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-05-07\n",
"划分后的训练集大小: 2827, 验证集大小: 135\n",
"train_data最大日期: 2024-05-07, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-05-08\n",
"划分后的训练集大小: 2834, 验证集大小: 139\n",
"train_data最大日期: 2024-05-08, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-05-09\n",
"划分后的训练集大小: 2834, 验证集大小: 139\n",
"train_data最大日期: 2024-05-09, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-05-10\n",
"划分后的训练集大小: 2833, 验证集大小: 141\n",
"train_data最大日期: 2024-05-10, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-05-13\n",
"划分后的训练集大小: 2830, 验证集大小: 143\n",
"train_data最大日期: 2024-05-13, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-05-14\n",
"划分后的训练集大小: 2829, 验证集大小: 144\n",
"train_data最大日期: 2024-05-14, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-05-15\n",
"划分后的训练集大小: 2835, 验证集大小: 146\n",
"train_data最大日期: 2024-05-15, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-05-16\n",
"划分后的训练集大小: 2845, 验证集大小: 150\n",
"train_data最大日期: 2024-05-16, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-05-17\n",
"划分后的训练集大小: 2853, 验证集大小: 146\n",
"train_data最大日期: 2024-05-17, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-05-20\n",
"划分后的训练集大小: 2854, 验证集大小: 144\n",
"train_data最大日期: 2024-05-20, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-05-21\n",
"划分后的训练集大小: 2856, 验证集大小: 141\n",
"train_data最大日期: 2024-05-21, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-05-22\n",
"划分后的训练集大小: 2862, 验证集大小: 145\n",
"train_data最大日期: 2024-05-22, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-05-23\n",
"划分后的训练集大小: 2863, 验证集大小: 144\n",
"train_data最大日期: 2024-05-23, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-05-24\n",
"划分后的训练集大小: 2860, 验证集大小: 144\n",
"train_data最大日期: 2024-05-24, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-05-27\n",
"划分后的训练集大小: 2867, 验证集大小: 148\n",
"train_data最大日期: 2024-05-27, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-05-28\n",
"划分后的训练集大小: 2875, 验证集大小: 148\n",
"train_data最大日期: 2024-05-28, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-05-29\n",
"划分后的训练集大小: 2873, 验证集大小: 143\n",
"train_data最大日期: 2024-05-29, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-05-30\n",
"划分后的训练集大小: 2870, 验证集大小: 142\n",
"train_data最大日期: 2024-05-30, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-05-31\n",
"划分后的训练集大小: 2874, 验证集大小: 148\n",
"train_data最大日期: 2024-05-31, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-06-03\n",
"划分后的训练集大小: 2877, 验证集大小: 147\n",
"train_data最大日期: 2024-06-03, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-06-04\n",
"划分后的训练集大小: 2889, 验证集大小: 147\n",
"train_data最大日期: 2024-06-04, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-06-05\n",
"划分后的训练集大小: 2895, 验证集大小: 145\n",
"train_data最大日期: 2024-06-05, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-06-06\n",
"划分后的训练集大小: 2901, 验证集大小: 145\n",
"train_data最大日期: 2024-06-06, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-06-07\n",
"划分后的训练集大小: 2908, 验证集大小: 148\n",
"train_data最大日期: 2024-06-07, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-06-11\n",
"划分后的训练集大小: 2913, 验证集大小: 148\n",
"train_data最大日期: 2024-06-11, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-06-12\n",
"划分后的训练集大小: 2916, 验证集大小: 147\n",
"train_data最大日期: 2024-06-12, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-06-13\n",
"划分后的训练集大小: 2913, 验证集大小: 143\n",
"train_data最大日期: 2024-06-13, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-06-14\n",
"划分后的训练集大小: 2907, 验证集大小: 144\n",
"train_data最大日期: 2024-06-14, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-06-17\n",
"划分后的训练集大小: 2909, 验证集大小: 148\n",
"train_data最大日期: 2024-06-17, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-06-18\n",
"划分后的训练集大小: 2912, 验证集大小: 147\n",
"train_data最大日期: 2024-06-18, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-06-19\n",
"划分后的训练集大小: 2917, 验证集大小: 146\n",
"train_data最大日期: 2024-06-19, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-06-20\n",
"划分后的训练集大小: 2916, 验证集大小: 144\n",
"train_data最大日期: 2024-06-20, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-06-21\n",
"划分后的训练集大小: 2917, 验证集大小: 145\n",
"train_data最大日期: 2024-06-21, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-06-24\n",
"划分后的训练集大小: 2921, 验证集大小: 148\n",
"train_data最大日期: 2024-06-24, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-06-25\n",
"划分后的训练集大小: 2919, 验证集大小: 146\n",
"train_data最大日期: 2024-06-25, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-06-26\n",
"划分后的训练集大小: 2917, 验证集大小: 146\n",
"train_data最大日期: 2024-06-26, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-06-27\n",
"划分后的训练集大小: 2920, 验证集大小: 146\n",
"train_data最大日期: 2024-06-27, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-06-28\n",
"划分后的训练集大小: 2925, 验证集大小: 147\n",
"train_data最大日期: 2024-06-28, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-07-01\n",
"划分后的训练集大小: 2923, 验证集大小: 146\n",
"train_data最大日期: 2024-07-01, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-07-02\n",
"划分后的训练集大小: 2923, 验证集大小: 147\n",
"train_data最大日期: 2024-07-02, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-07-03\n",
"划分后的训练集大小: 2919, 验证集大小: 143\n",
"train_data最大日期: 2024-07-03, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-07-04\n",
"划分后的训练集大小: 2917, 验证集大小: 143\n",
"train_data最大日期: 2024-07-04, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-07-05\n",
"划分后的训练集大小: 2917, 验证集大小: 145\n",
"train_data最大日期: 2024-07-05, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-07-08\n",
"划分后的训练集大小: 2913, 验证集大小: 144\n",
"train_data最大日期: 2024-07-08, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-07-09\n",
"划分后的训练集大小: 2911, 验证集大小: 146\n",
"train_data最大日期: 2024-07-09, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-07-10\n",
"划分后的训练集大小: 2912, 验证集大小: 148\n",
"train_data最大日期: 2024-07-10, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-07-11\n",
"划分后的训练集大小: 2917, 验证集大小: 148\n",
"train_data最大日期: 2024-07-11, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-07-12\n",
"划分后的训练集大小: 2917, 验证集大小: 144\n",
"train_data最大日期: 2024-07-12, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-07-15\n",
"划分后的训练集大小: 2911, 验证集大小: 142\n",
"train_data最大日期: 2024-07-15, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-07-16\n",
"划分后的训练集大小: 2907, 验证集大小: 143\n",
"train_data最大日期: 2024-07-16, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-07-17\n",
"划分后的训练集大小: 2909, 验证集大小: 148\n",
"train_data最大日期: 2024-07-17, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-07-18\n",
"划分后的训练集大小: 2907, 验证集大小: 142\n",
"train_data最大日期: 2024-07-18, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-07-19\n",
"划分后的训练集大小: 2909, 验证集大小: 147\n",
"train_data最大日期: 2024-07-19, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-07-22\n",
"划分后的训练集大小: 2909, 验证集大小: 148\n",
"train_data最大日期: 2024-07-22, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-07-23\n",
"划分后的训练集大小: 2908, 验证集大小: 145\n",
"train_data最大日期: 2024-07-23, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-07-24\n",
"划分后的训练集大小: 2905, 验证集大小: 143\n",
"train_data最大日期: 2024-07-24, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-07-25\n",
"划分后的训练集大小: 2903, 验证集大小: 144\n",
"train_data最大日期: 2024-07-25, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-07-26\n",
"划分后的训练集大小: 2904, 验证集大小: 148\n",
"train_data最大日期: 2024-07-26, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-07-29\n",
"划分后的训练集大小: 2904, 验证集大小: 146\n",
"train_data最大日期: 2024-07-29, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-07-30\n",
"划分后的训练集大小: 2906, 验证集大小: 149\n",
"train_data最大日期: 2024-07-30, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-07-31\n",
"划分后的训练集大小: 2900, 验证集大小: 137\n",
"train_data最大日期: 2024-07-31, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-08-01\n",
"划分后的训练集大小: 2902, 验证集大小: 145\n",
"train_data最大日期: 2024-08-01, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-08-02\n",
"划分后的训练集大小: 2898, 验证集大小: 141\n",
"train_data最大日期: 2024-08-02, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-08-05\n",
"划分后的训练集大小: 2892, 验证集大小: 138\n",
"train_data最大日期: 2024-08-05, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-08-06\n",
"划分后的训练集大小: 2888, 验证集大小: 142\n",
"train_data最大日期: 2024-08-06, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-08-07\n",
"划分后的训练集大小: 2885, 验证集大小: 145\n",
"train_data最大日期: 2024-08-07, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-08-08\n",
"划分后的训练集大小: 2878, 验证集大小: 141\n",
"train_data最大日期: 2024-08-08, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-08-09\n",
"划分后的训练集大小: 2872, 验证集大小: 138\n",
"train_data最大日期: 2024-08-09, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-08-12\n",
"划分后的训练集大小: 2872, 验证集大小: 142\n",
"train_data最大日期: 2024-08-12, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-08-13\n",
"划分后的训练集大小: 2876, 验证集大小: 147\n",
"train_data最大日期: 2024-08-13, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-08-14\n",
"划分后的训练集大小: 2875, 验证集大小: 147\n",
"train_data最大日期: 2024-08-14, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-08-15\n",
"划分后的训练集大小: 2882, 验证集大小: 149\n",
"train_data最大日期: 2024-08-15, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-08-16\n",
"划分后的训练集大小: 2883, 验证集大小: 148\n",
"train_data最大日期: 2024-08-16, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-08-19\n",
"划分后的训练集大小: 2882, 验证集大小: 147\n",
"train_data最大日期: 2024-08-19, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-08-20\n",
"划分后的训练集大小: 2886, 验证集大小: 149\n",
"train_data最大日期: 2024-08-20, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-08-21\n",
"划分后的训练集大小: 2891, 验证集大小: 148\n",
"train_data最大日期: 2024-08-21, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-08-22\n",
"划分后的训练集大小: 2893, 验证集大小: 146\n",
"train_data最大日期: 2024-08-22, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-08-23\n",
"划分后的训练集大小: 2892, 验证集大小: 147\n",
"train_data最大日期: 2024-08-23, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-08-26\n",
"划分后的训练集大小: 2893, 验证集大小: 147\n",
"train_data最大日期: 2024-08-26, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-08-27\n",
"划分后的训练集大小: 2889, 验证集大小: 145\n",
"train_data最大日期: 2024-08-27, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-08-28\n",
"划分后的训练集大小: 2900, 验证集大小: 148\n",
"train_data最大日期: 2024-08-28, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-08-29\n",
"划分后的训练集大小: 2899, 验证集大小: 144\n",
"train_data最大日期: 2024-08-29, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-08-30\n",
"划分后的训练集大小: 2904, 验证集大小: 146\n",
"train_data最大日期: 2024-08-30, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-09-02\n",
"划分后的训练集大小: 2912, 验证集大小: 146\n",
"train_data最大日期: 2024-09-02, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-09-03\n",
"划分后的训练集大小: 2915, 验证集大小: 145\n",
"train_data最大日期: 2024-09-03, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-09-04\n",
"划分后的训练集大小: 2913, 验证集大小: 143\n",
"train_data最大日期: 2024-09-04, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-09-05\n",
"划分后的训练集大小: 2917, 验证集大小: 145\n",
"train_data最大日期: 2024-09-05, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-09-06\n",
"划分后的训练集大小: 2924, 验证集大小: 145\n",
"train_data最大日期: 2024-09-06, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-09-09\n",
"划分后的训练集大小: 2920, 验证集大小: 138\n",
"train_data最大日期: 2024-09-09, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-09-10\n",
"划分后的训练集大小: 2915, 验证集大小: 142\n",
"train_data最大日期: 2024-09-10, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-09-11\n",
"划分后的训练集大小: 2914, 验证集大小: 146\n",
"train_data最大日期: 2024-09-11, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-09-12\n",
"划分后的训练集大小: 2912, 验证集大小: 147\n",
"train_data最大日期: 2024-09-12, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-09-13\n",
"划分后的训练集大小: 2910, 验证集大小: 146\n",
"train_data最大日期: 2024-09-13, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-09-18\n",
"划分后的训练集大小: 2908, 验证集大小: 145\n",
"train_data最大日期: 2024-09-18, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-09-19\n",
"划分后的训练集大小: 2904, 验证集大小: 145\n",
"train_data最大日期: 2024-09-19, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-09-20\n",
"划分后的训练集大小: 2900, 验证集大小: 144\n",
"train_data最大日期: 2024-09-20, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-09-23\n",
"划分后的训练集大小: 2898, 验证集大小: 144\n",
"train_data最大日期: 2024-09-23, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-09-24\n",
"划分后的训练集大小: 2887, 验证集大小: 136\n",
"train_data最大日期: 2024-09-24, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-09-25\n",
"划分后的训练集大小: 2880, 验证集大小: 140\n",
"train_data最大日期: 2024-09-25, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-09-26\n",
"划分后的训练集大小: 2879, 验证集大小: 144\n",
"train_data最大日期: 2024-09-26, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-09-27\n",
"划分后的训练集大小: 2866, 验证集大小: 135\n",
"train_data最大日期: 2024-09-27, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-09-30\n",
"划分后的训练集大小: 2849, 验证集大小: 127\n",
"train_data最大日期: 2024-09-30, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-10-08\n",
"划分后的训练集大小: 2823, 验证集大小: 120\n",
"train_data最大日期: 2024-10-08, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-10-09\n",
"划分后的训练集大小: 2719, 验证集大小: 42\n",
"train_data最大日期: 2024-10-09, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-10-10\n",
"划分后的训练集大小: 2695, 验证集大小: 121\n",
"train_data最大日期: 2024-10-10, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-10-11\n",
"划分后的训练集大小: 2694, 验证集大小: 142\n",
"train_data最大日期: 2024-10-11, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-10-14\n",
"划分后的训练集大小: 2693, 验证集大小: 144\n",
"train_data最大日期: 2024-10-14, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-10-15\n",
"划分后的训练集大小: 2692, 验证集大小: 144\n",
"train_data最大日期: 2024-10-15, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-10-16\n",
"划分后的训练集大小: 2700, 验证集大小: 146\n",
"train_data最大日期: 2024-10-16, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-10-17\n",
"划分后的训练集大小: 2698, 验证集大小: 140\n",
"train_data最大日期: 2024-10-17, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-10-18\n",
"划分后的训练集大小: 2692, 验证集大小: 140\n",
"train_data最大日期: 2024-10-18, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-10-21\n",
"划分后的训练集大小: 2687, 验证集大小: 142\n",
"train_data最大日期: 2024-10-21, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-10-22\n",
"划分后的训练集大小: 2676, 验证集大小: 135\n",
"train_data最大日期: 2024-10-22, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-10-23\n",
"划分后的训练集大小: 2657, 验证集大小: 126\n",
"train_data最大日期: 2024-10-23, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-10-24\n",
"划分后的训练集大小: 2639, 验证集大小: 127\n",
"train_data最大日期: 2024-10-24, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-10-25\n",
"划分后的训练集大小: 2632, 验证集大小: 137\n",
"train_data最大日期: 2024-10-25, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-10-28\n",
"划分后的训练集大小: 2620, 验证集大小: 132\n",
"train_data最大日期: 2024-10-28, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-10-29\n",
"划分后的训练集大小: 2606, 验证集大小: 122\n",
"train_data最大日期: 2024-10-29, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-10-30\n",
"划分后的训练集大小: 2552, 验证集大小: 86\n",
"train_data最大日期: 2024-10-30, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-10-31\n",
"划分后的训练集大小: 2529, 验证集大小: 121\n",
"train_data最大日期: 2024-10-31, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-11-01\n",
"划分后的训练集大小: 2488, 验证集大小: 94\n",
"train_data最大日期: 2024-11-01, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-11-04\n",
"划分后的训练集大小: 2490, 验证集大小: 129\n",
"train_data最大日期: 2024-11-04, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-11-05\n",
"划分后的训练集大小: 2503, 验证集大小: 133\n",
"train_data最大日期: 2024-11-05, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-11-06\n",
"划分后的训练集大小: 2600, 验证集大小: 139\n",
"train_data最大日期: 2024-11-06, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-11-07\n",
"划分后的训练集大小: 2597, 验证集大小: 118\n",
"train_data最大日期: 2024-11-07, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-11-08\n",
"划分后的训练集大小: 2583, 验证集大小: 128\n",
"train_data最大日期: 2024-11-08, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-11-11\n",
"划分后的训练集大小: 2570, 验证集大小: 131\n",
"train_data最大日期: 2024-11-11, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-11-12\n",
"划分后的训练集大小: 2558, 验证集大小: 132\n",
"train_data最大日期: 2024-11-12, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-11-13\n",
"划分后的训练集大小: 2542, 验证集大小: 130\n",
"train_data最大日期: 2024-11-13, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-11-14\n",
"划分后的训练集大小: 2539, 验证集大小: 137\n",
"train_data最大日期: 2024-11-14, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-11-15\n",
"划分后的训练集大小: 2539, 验证集大小: 140\n",
"train_data最大日期: 2024-11-15, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-11-18\n",
"划分后的训练集大小: 2531, 验证集大小: 134\n",
"train_data最大日期: 2024-11-18, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-11-19\n",
"划分后的训练集大小: 2533, 验证集大小: 137\n",
"train_data最大日期: 2024-11-19, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-11-20\n",
"划分后的训练集大小: 2544, 验证集大小: 137\n",
"train_data最大日期: 2024-11-20, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-11-21\n",
"划分后的训练集大小: 2558, 验证集大小: 141\n",
"train_data最大日期: 2024-11-21, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-11-22\n",
"划分后的训练集大小: 2550, 验证集大小: 129\n",
"train_data最大日期: 2024-11-22, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-11-25\n",
"划分后的训练集大小: 2548, 验证集大小: 130\n",
"train_data最大日期: 2024-11-25, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-11-26\n",
"划分后的训练集大小: 2544, 验证集大小: 118\n",
"train_data最大日期: 2024-11-26, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-11-27\n",
"划分后的训练集大小: 2593, 验证集大小: 135\n",
"train_data最大日期: 2024-11-27, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-11-28\n",
"划分后的训练集大小: 2610, 验证集大小: 138\n",
"train_data最大日期: 2024-11-28, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-11-29\n",
"划分后的训练集大小: 2652, 验证集大小: 136\n",
"train_data最大日期: 2024-11-29, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-12-02\n",
"划分后的训练集大小: 2658, 验证集大小: 135\n",
"train_data最大日期: 2024-12-02, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-12-03\n",
"划分后的训练集大小: 2659, 验证集大小: 134\n",
"train_data最大日期: 2024-12-03, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-12-04\n",
"划分后的训练集大小: 2645, 验证集大小: 125\n",
"train_data最大日期: 2024-12-04, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-12-05\n",
"划分后的训练集大小: 2654, 验证集大小: 127\n",
"train_data最大日期: 2024-12-05, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-12-06\n",
"划分后的训练集大小: 2659, 验证集大小: 133\n",
"train_data最大日期: 2024-12-06, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-12-09\n",
"划分后的训练集大小: 2662, 验证集大小: 134\n",
"train_data最大日期: 2024-12-09, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-12-10\n",
"划分后的训练集大小: 2666, 验证集大小: 136\n",
"train_data最大日期: 2024-12-10, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-12-11\n",
"划分后的训练集大小: 2674, 验证集大小: 138\n",
"train_data最大日期: 2024-12-11, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-12-12\n",
"划分后的训练集大小: 2680, 验证集大小: 143\n",
"train_data最大日期: 2024-12-12, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-12-13\n",
"划分后的训练集大小: 2664, 验证集大小: 124\n",
"train_data最大日期: 2024-12-13, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-12-16\n",
"划分后的训练集大小: 2656, 验证集大小: 126\n",
"train_data最大日期: 2024-12-16, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-12-17\n",
"划分后的训练集大小: 2643, 验证集大小: 124\n",
"train_data最大日期: 2024-12-17, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-12-18\n",
"划分后的训练集大小: 2640, 验证集大小: 134\n",
"train_data最大日期: 2024-12-18, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-12-19\n",
"划分后的训练集大小: 2637, 验证集大小: 138\n",
"train_data最大日期: 2024-12-19, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-12-20\n",
"划分后的训练集大小: 2651, 验证集大小: 143\n",
"train_data最大日期: 2024-12-20, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-12-23\n",
"划分后的训练集大小: 2664, 验证集大小: 143\n",
"train_data最大日期: 2024-12-23, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-12-24\n",
"划分后的训练集大小: 2688, 验证集大小: 142\n",
"train_data最大日期: 2024-12-24, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-12-25\n",
"划分后的训练集大小: 2695, 验证集大小: 142\n",
"train_data最大日期: 2024-12-25, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-12-26\n",
"划分后的训练集大小: 2697, 验证集大小: 140\n",
"train_data最大日期: 2024-12-26, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-12-27\n",
"划分后的训练集大小: 2701, 验证集大小: 140\n",
"train_data最大日期: 2024-12-27, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-12-30\n",
"划分后的训练集大小: 2713, 验证集大小: 147\n",
"train_data最大日期: 2024-12-30, 训练天数:20, feat size:116\n",
"test_data最大日期: 2024-12-31\n",
"划分后的训练集大小: 2723, 验证集大小: 144\n",
"train_data最大日期: 2024-12-31, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-01-02\n",
"划分后的训练集大小: 2736, 验证集大小: 138\n",
"train_data最大日期: 2025-01-02, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-01-03\n",
"划分后的训练集大小: 2747, 验证集大小: 138\n",
"train_data最大日期: 2025-01-03, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-01-06\n",
"划分后的训练集大小: 2758, 验证集大小: 144\n",
"train_data最大日期: 2025-01-06, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-01-07\n",
"划分后的训练集大小: 2763, 验证集大小: 139\n",
"train_data最大日期: 2025-01-07, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-01-08\n",
"划分后的训练集大小: 2770, 验证集大小: 143\n",
"train_data最大日期: 2025-01-08, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-01-09\n",
"划分后的训练集大小: 2777, 验证集大小: 145\n",
"train_data最大日期: 2025-01-09, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-01-10\n",
"划分后的训练集大小: 2772, 验证集大小: 138\n",
"train_data最大日期: 2025-01-10, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-01-13\n",
"划分后的训练集大小: 2793, 验证集大小: 145\n",
"train_data最大日期: 2025-01-13, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-01-14\n",
"划分后的训练集大小: 2813, 验证集大小: 146\n",
"train_data最大日期: 2025-01-14, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-01-15\n",
"划分后的训练集大小: 2830, 验证集大小: 141\n",
"train_data最大日期: 2025-01-15, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-01-16\n",
"划分后的训练集大小: 2843, 验证集大小: 147\n",
"train_data最大日期: 2025-01-16, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-01-17\n",
"划分后的训练集大小: 2838, 验证集大小: 133\n",
"train_data最大日期: 2025-01-17, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-01-20\n",
"划分后的训练集大小: 2836, 验证集大小: 141\n",
"train_data最大日期: 2025-01-20, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-01-21\n",
"划分后的训练集大小: 2840, 验证集大小: 147\n",
"train_data最大日期: 2025-01-21, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-01-22\n",
"划分后的训练集大小: 2840, 验证集大小: 142\n",
"train_data最大日期: 2025-01-22, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-01-23\n",
"划分后的训练集大小: 2836, 验证集大小: 138\n",
"train_data最大日期: 2025-01-23, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-01-24\n",
"划分后的训练集大小: 2840, 验证集大小: 144\n",
"train_data最大日期: 2025-01-24, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-01-27\n",
"划分后的训练集大小: 2847, 验证集大小: 147\n",
"train_data最大日期: 2025-01-27, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-02-05\n",
"划分后的训练集大小: 2843, 验证集大小: 143\n",
"train_data最大日期: 2025-02-05, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-02-06\n",
"划分后的训练集大小: 2837, 验证集大小: 138\n",
"train_data最大日期: 2025-02-06, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-02-07\n",
"划分后的训练集大小: 2839, 验证集大小: 140\n",
"train_data最大日期: 2025-02-07, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-02-10\n",
"划分后的训练集大小: 2837, 验证集大小: 136\n",
"train_data最大日期: 2025-02-10, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-02-11\n",
"划分后的训练集大小: 2834, 验证集大小: 141\n",
"train_data最大日期: 2025-02-11, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-02-12\n",
"划分后的训练集大小: 2830, 验证集大小: 135\n",
"train_data最大日期: 2025-02-12, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-02-13\n",
"划分后的训练集大小: 2826, 验证集大小: 139\n",
"train_data最大日期: 2025-02-13, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-02-14\n",
"划分后的训练集大小: 2821, 验证集大小: 140\n",
"train_data最大日期: 2025-02-14, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-02-17\n",
"划分后的训练集大小: 2824, 验证集大小: 141\n",
"train_data最大日期: 2025-02-17, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-02-18\n",
"划分后的训练集大小: 2819, 验证集大小: 140\n",
"train_data最大日期: 2025-02-18, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-02-19\n",
"划分后的训练集大小: 2817, 验证集大小: 144\n",
"train_data最大日期: 2025-02-19, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-02-20\n",
"划分后的训练集大小: 2819, 验证集大小: 143\n",
"train_data最大日期: 2025-02-20, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-02-21\n",
"划分后的训练集大小: 2815, 验证集大小: 143\n",
"train_data最大日期: 2025-02-21, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-02-24\n",
"划分后的训练集大小: 2822, 验证集大小: 140\n",
"train_data最大日期: 2025-02-24, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-02-25\n",
"划分后的训练集大小: 2819, 验证集大小: 138\n",
"train_data最大日期: 2025-02-25, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-02-26\n",
"划分后的训练集大小: 2819, 验证集大小: 147\n",
"train_data最大日期: 2025-02-26, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-02-27\n",
"划分后的训练集大小: 2820, 验证集大小: 143\n",
"train_data最大日期: 2025-02-27, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-02-28\n",
"划分后的训练集大小: 2823, 验证集大小: 141\n",
"train_data最大日期: 2025-02-28, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-03-03\n",
"划分后的训练集大小: 2818, 验证集大小: 139\n",
"train_data最大日期: 2025-03-03, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-03-04\n",
"划分后的训练集大小: 2812, 验证集大小: 141\n",
"train_data最大日期: 2025-03-04, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-03-05\n",
"划分后的训练集大小: 2814, 验证集大小: 145\n",
"train_data最大日期: 2025-03-05, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-03-06\n",
"划分后的训练集大小: 2818, 验证集大小: 142\n",
"train_data最大日期: 2025-03-06, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-03-07\n",
"划分后的训练集大小: 2825, 验证集大小: 147\n",
"train_data最大日期: 2025-03-07, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-03-10\n",
"划分后的训练集大小: 2834, 验证集大小: 145\n",
"train_data最大日期: 2025-03-10, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-03-11\n",
"划分后的训练集大小: 2832, 验证集大小: 139\n",
"train_data最大日期: 2025-03-11, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-03-12\n",
"划分后的训练集大小: 2843, 验证集大小: 146\n",
"train_data最大日期: 2025-03-12, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-03-13\n",
"划分后的训练集大小: 2847, 验证集大小: 143\n",
"train_data最大日期: 2025-03-13, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-03-14\n",
"划分后的训练集大小: 2854, 验证集大小: 147\n",
"train_data最大日期: 2025-03-14, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-03-17\n",
"划分后的训练集大小: 2856, 验证集大小: 143\n",
"train_data最大日期: 2025-03-17, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-03-18\n",
"划分后的训练集大小: 2861, 验证集大小: 145\n",
"train_data最大日期: 2025-03-18, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-03-19\n",
"划分后的训练集大小: 2856, 验证集大小: 139\n",
"train_data最大日期: 2025-03-19, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-03-20\n",
"划分后的训练集大小: 2851, 验证集大小: 138\n",
"train_data最大日期: 2025-03-20, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-03-21\n",
"划分后的训练集大小: 2851, 验证集大小: 143\n",
"train_data最大日期: 2025-03-21, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-03-24\n",
"划分后的训练集大小: 2849, 验证集大小: 138\n",
"train_data最大日期: 2025-03-24, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-03-25\n",
"划分后的训练集大小: 2854, 验证集大小: 143\n",
"train_data最大日期: 2025-03-25, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-03-26\n",
"划分后的训练集大小: 2847, 验证集大小: 140\n",
"train_data最大日期: 2025-03-26, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-03-27\n",
"划分后的训练集大小: 2844, 验证集大小: 140\n",
"train_data最大日期: 2025-03-27, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-03-28\n",
"划分后的训练集大小: 2844, 验证集大小: 141\n",
"train_data最大日期: 2025-03-28, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-03-31\n",
"划分后的训练集大小: 2852, 验证集大小: 147\n",
"train_data最大日期: 2025-03-31, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-04-01\n",
"划分后的训练集大小: 2853, 验证集大小: 142\n",
"train_data最大日期: 2025-04-01, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-04-02\n",
"划分后的训练集大小: 2852, 验证集大小: 144\n",
"train_data最大日期: 2025-04-02, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-04-03\n",
"划分后的训练集大小: 2853, 验证集大小: 143\n",
"train_data最大日期: 2025-04-03, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-04-07\n",
"划分后的训练集大小: 2847, 验证集大小: 141\n",
"train_data最大日期: 2025-04-07, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-04-08\n",
"划分后的训练集大小: 2837, 验证集大小: 135\n",
"train_data最大日期: 2025-04-08, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-04-09\n",
"划分后的训练集大小: 2838, 验证集大小: 140\n",
"train_data最大日期: 2025-04-09, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-04-10\n",
"划分后的训练集大小: 2827, 验证集大小: 135\n",
"train_data最大日期: 2025-04-10, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-04-11\n",
"划分后的训练集大小: 2827, 验证集大小: 143\n",
"train_data最大日期: 2025-04-11, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-04-14\n",
"划分后的训练集大小: 2820, 验证集大小: 140\n",
"train_data最大日期: 2025-04-14, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-04-15\n",
"划分后的训练集大小: 2813, 验证集大小: 136\n",
"train_data最大日期: 2025-04-15, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-04-16\n",
"划分后的训练集大小: 2814, 验证集大小: 146\n",
"train_data最大日期: 2025-04-16, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-04-17\n",
"划分后的训练集大小: 2817, 验证集大小: 142\n",
"train_data最大日期: 2025-04-17, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-04-18\n",
"划分后的训练集大小: 2823, 验证集大小: 144\n",
"train_data最大日期: 2025-04-18, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-04-21\n",
"划分后的训练集大小: 2822, 验证集大小: 142\n",
"train_data最大日期: 2025-04-21, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-04-22\n",
"划分后的训练集大小: 2826, 验证集大小: 142\n",
"train_data最大日期: 2025-04-22, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-04-23\n",
"划分后的训练集大小: 2826, 验证集大小: 143\n",
"train_data最大日期: 2025-04-23, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-04-24\n",
"划分后的训练集大小: 2824, 验证集大小: 138\n",
"train_data最大日期: 2025-04-24, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-04-25\n",
"划分后的训练集大小: 2825, 验证集大小: 141\n",
"train_data最大日期: 2025-04-25, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-04-28\n",
"划分后的训练集大小: 2829, 验证集大小: 145\n",
"train_data最大日期: 2025-04-28, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-04-29\n",
"划分后的训练集大小: 2823, 验证集大小: 141\n",
"train_data最大日期: 2025-04-29, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-04-30\n",
"划分后的训练集大小: 2829, 验证集大小: 148\n",
"train_data最大日期: 2025-04-30, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-05-06\n",
"划分后的训练集大小: 2832, 验证集大小: 147\n",
"train_data最大日期: 2025-05-06, 训练天数:20, feat size:116\n",
"test_data最大日期: 2025-05-07\n",
"划分后的训练集大小: 2832, 验证集大小: 143\n"
]
}
],
"source": [
"\n",
"gc.collect()\n",
"\n",
"print('finish')\n",
"# qdf = qdf[qdf['trade_date'] >= '2022-01-01']\n",
"\n",
"final_predictions = rolling_train_predict(\n",
" pdf[(pdf['trade_date'] >= '2022-01-01') & (pdf['trade_date'] <= '2029-03-26')], 20, 1, feature_columns,\n",
" days=days, validation_days=0, filter_index=filter_index, params=light_params)\n",
"# final_predictions2 = rolling_train_predict(\n",
"# pdf[(pdf['trade_date'] >= '2022-01-01') & (pdf['trade_date'] <= '2029-03-26')], 20, 1, feature_columns,\n",
"# days=days, validation_days=0, filter_index=filter_index, params=light_params)\n",
"# final_predictions['score'] = final_predictions.groupby('trade_date')['score'].rank(ascending=False)\n",
"# final_predictions2['score'] = final_predictions2.groupby('trade_date')['score'].rank(ascending=False)\n",
"# final_predictions = pd.merge(final_predictions, final_predictions2, on=['trade_date', 'ts_code'], suffixes=['_1', '_2'])\n",
"# final_predictions['score'] = final_predictions['score_1'] + final_predictions['score_2']\n",
"final_predictions = final_predictions.loc[final_predictions.groupby('trade_date')['score'].idxmax()]\n",
"# final_predictions = final_predictions[final_predictions['score'] <= 6]\n",
"final_predictions[['trade_date', 'score', 'ts_code']].to_csv('predictions_test.tsv', index=False)\n"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "e01fe33b-e30d-4bc6-bf40-de91e61862b4",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-11T18:00:15.486598Z",
"start_time": "2025-04-11T18:00:15.482042Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Empty DataFrame\n",
"Columns: [ts_code, trade_date, open, close, high, low, vol, pct_chg, turnover_rate, pe_ttm, circ_mv, total_mv, volume_ratio, is_st, up_limit, down_limit, buy_sm_vol, sell_sm_vol, buy_lg_vol, sell_lg_vol, buy_elg_vol, sell_elg_vol, net_mf_vol, his_low, his_high, cost_5pct, cost_15pct, cost_50pct, cost_85pct, cost_95pct, weight_avg, winner_rate, cat_l1_code, cat_l2_code, lg_elg_net_buy_vol, flow_lg_elg_intensity, sm_net_buy_vol, flow_divergence_diff, flow_divergence_ratio, total_buy_vol, lg_elg_buy_prop, flow_struct_buy_change, lg_elg_net_buy_vol_change, flow_lg_elg_accel, chip_concentration_range, chip_skewness, floating_chip_proxy, cost_support_15pct_change, cat_winner_price_zone, flow_chip_consistency, profit_taking_vs_absorb, cat_is_positive, upside_vol, downside_vol, vol_ratio, return_skew, return_kurtosis, volume_change_rate, cat_volume_breakout, turnover_deviation, cat_turnover_spike, avg_volume_ratio, cat_volume_ratio_breakout, vol_spike, vol_std_5, atr_14, atr_6, obv, maobv_6, rsi_3, return_5, return_20, std_return_5, std_return_90, std_return_90_2, act_factor1, act_factor2, act_factor3, act_factor4, rank_act_factor1, rank_act_factor2, rank_act_factor3, log(circ_mv), cov, delta_cov, alpha_22_improved, alpha_003, alpha_007, alpha_013, cat_up_limit, cat_down_limit, up_limit_count_10d, down_limit_count_10d, consecutive_up_limit, vol_break, weight_roc5, price_cost_divergence, smallcap_concentration, cost_stability, high_cost_break_days, ...]\n",
"Index: []\n",
"\n",
"[0 rows x 181 columns]\n"
]
}
],
"source": [
"print(pdf[pdf['trade_date'] == '2025-03-08'])"
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "0dc75517-c857-4f1d-8815-e807400a6d33",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-11T18:00:15.508026Z",
"start_time": "2025-04-11T18:00:15.503740Z"
}
},
"outputs": [],
"source": [
"# pdf1 = select_pre_zt_stocks_dynamic(df[(df['trade_date'] >= '2022-03-26') & (df['trade_date'] <= '2029-03-26')])\n",
"# pdf1 = pdf1.merge(industry_df, on=['cat_l1_code', 'trade_date'], how='left')\n",
"# pdf1 = pdf1.replace([np.inf, -np.inf], np.nan)\n",
"#\n",
"# feature_columns = [col for col in pdf1.columns if col in pdf.columns]\n",
"# feature_columns = [col for col in feature_columns if col not in ['trade_date',\n",
"# 'ts_code',\n",
"# 'label']]\n",
"# feature_columns = [col for col in feature_columns if 'future' not in col]\n",
"# feature_columns = [col for col in feature_columns if 'label' not in col]\n",
"# feature_columns = [col for col in feature_columns if 'score' not in col]\n",
"# feature_columns = [col for col in feature_columns if 'gen' not in col]\n",
"# feature_columns = [col for col in feature_columns if 'pe_ttm' not in col]\n",
"# feature_columns = [col for col in feature_columns if 'volatility' not in col]\n",
"# feature_columns = [col for col in feature_columns if 'cat_l1_code' not in col]\n",
"# feature_columns = [col for col in feature_columns if col not in origin_columns]\n",
"# feature_columns = [col for col in feature_columns if not col.startswith('_')]\n",
"# # feature_columns = [col for col in feature_columns if col not in ['ts_code', 'trade_date', 'vol_std_5', 'cov', 'delta_cov', 'alpha_22_improved', 'alpha_007', 'consecutive_up_limit', 'mv_volatility', 'volume_growth', 'mv_growth', 'arbr']]\n",
"#\n",
"# print(feature_columns)\n",
"# numeric_columns = pdf.select_dtypes(include=['float64', 'int64']).columns\n",
"# numeric_columns = [col for col in numeric_columns if col in feature_columns]\n",
"#\n",
"# pdf1 = cross_sectional_quantile_filter(pdf1, numeric_columns)\n",
"# pdf1 = cross_sectional_standardization(pdf1, numeric_columns)\n",
"#\n",
"# # pdf1 = cross_sectional_standardization(pdf1, numeric_columns)\n",
"# # pdf1 = pdf1[pdf1['trade_date'] <= '2025-03-26']\n",
"# pdf1 = pdf1.sort_values(by=['ts_code', 'trade_date'])\n",
"# filter_index1 = pdf1['future_return'].between(pdf1['future_return'].quantile(0.01), pdf1['future_return'].quantile(0.99))\n",
"#\n",
"# print('-----------------------------------------')\n",
"# pdf2 = select_pre_zt_stocks_dynamic(df[(df['trade_date'] >= '2022-03-26') & (df['trade_date'] <= '2025-03-26')])\n",
"# pdf2 = pdf2.merge(industry_df, on=['cat_l2_code', 'trade_date'], how='left')\n",
"# pdf2 = pdf2.replace([np.inf, -np.inf], np.nan)\n",
"#\n",
"# pdf2 = cross_sectional_quantile_filter(pdf2, numeric_columns)\n",
"# pdf2 = cross_sectional_standardization(pdf2, numeric_columns)\n",
"#\n",
"# # pdf2 = cross_sectional_standardization(pdf2, numeric_columns)\n",
"#\n",
"# # pdf2 = pdf2[pdf2['trade_date'] <= '2025-03-26']\n",
"# pdf2 = pdf2.sort_values(by=['ts_code', 'trade_date'])\n",
"# filter_index2 = pdf2['future_return'].between(pdf2['future_return'].quantile(0.01), pdf2['future_return'].quantile(0.99))\n"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "8299a6f461097f14",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-11T18:00:15.552767Z",
"start_time": "2025-04-11T18:00:15.549412Z"
}
},
"outputs": [],
"source": [
"# are_equal = pdf1[filter_index1].equals(pdf2[filter_index2])\n",
"# print(are_equal) # 输出 True 或 False\n",
"#\n",
"# are_equal = pdf1.equals(pdf2)\n",
"# print(are_equal) # 输出 True 或 False\n",
"#\n",
"# are_equal = filter_index1.equals(filter_index2)\n",
"# print(are_equal) # 输出 True 或 False"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "3f5079aa2c937c22",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-11T18:00:15.630669Z",
"start_time": "2025-04-11T18:00:15.626474Z"
}
},
"outputs": [],
"source": [
"# final_predictions1 = rolling_train_predict(\n",
"# pdf1[(pdf1['trade_date'] >= '2024-12-01')], 5, 1, feature_columns,\n",
"# days=days, validation_days=0, filter_index=filter_index1, params=light_params)\n",
"# final_predictions.to_csv('test1.tsv', index=False)\n",
"#\n",
"# final_predictions2 = rolling_train_predict(\n",
"# pdf2[(pdf2['trade_date'] >= '2024-12-01')], 5, 1, feature_columns,\n",
"# days=days, validation_days=0, filter_index=filter_index2, params=light_params)\n",
"# final_predictions2.to_csv('test2.tsv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "199b12e7e20e4e6a",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-11T18:00:15.643137Z",
"start_time": "2025-04-11T18:00:15.640776Z"
}
},
"outputs": [],
"source": [
"# print(final_predictions1['trade_date'].max())\n",
"# print(final_predictions2['trade_date'].max())\n",
"#\n",
"# are_equal = final_predictions1[(final_predictions1['trade_date'] >= '2022-12-01') & (final_predictions1['trade_date'] <= '2025-03-26')].equals(final_predictions2[(final_predictions2['trade_date'] >= '2022-12-01') & (final_predictions2['trade_date'] <= '2025-03-26')])\n",
"# print(are_equal) # 输出 True 或 False"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "new_trader",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}