{ "cells": [ { "cell_type": "code", "id": "79a7758178bafdd3", "metadata": { "jupyter": { "source_hidden": true }, "ExecuteTime": { "end_time": "2025-04-05T17:29:47.416680Z", "start_time": "2025-04-05T17:29:46.949355Z" } }, "source": [ "# %load_ext autoreload\n", "# %autoreload 2\n", "\n", "import pandas as pd\n", "import warnings\n", "\n", "warnings.filterwarnings(\"ignore\")\n", "\n", "pd.set_option('display.max_columns', None)\n" ], "outputs": [], "execution_count": 1 }, { "cell_type": "code", "id": "a79cafb06a7e0e43", "metadata": { "scrolled": true, "ExecuteTime": { "end_time": "2025-04-05T17:30:40.600943Z", "start_time": "2025-04-05T17:29:47.418440Z" } }, "source": [ "from utils.utils import read_and_merge_h5_data\n", "\n", "print('daily data')\n", "df = read_and_merge_h5_data('../../data/daily_data.h5', key='daily_data',\n", " columns=['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'pct_chg'],\n", " df=None)\n", "\n", "print('daily basic')\n", "df = read_and_merge_h5_data('../../data/daily_basic.h5', key='daily_basic',\n", " columns=['ts_code', 'trade_date', 'turnover_rate', 'pe_ttm', 'circ_mv', 'volume_ratio',\n", " 'is_st'], df=df, join='inner')\n", "\n", "print('stk limit')\n", "df = read_and_merge_h5_data('../../data/stk_limit.h5', key='stk_limit',\n", " columns=['ts_code', 'trade_date', 'pre_close', 'up_limit', 'down_limit'],\n", " df=df)\n", "print('money flow')\n", "df = read_and_merge_h5_data('../../data/money_flow.h5', key='money_flow',\n", " columns=['ts_code', 'trade_date', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol',\n", " 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol'],\n", " df=df)\n", "print('cyq perf')\n", "df = read_and_merge_h5_data('../../data/cyq_perf.h5', key='cyq_perf',\n", " columns=['ts_code', 'trade_date', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct',\n", " 'cost_50pct',\n", " 'cost_85pct', 'cost_95pct', 'weight_avg', 'winner_rate'],\n", " df=df)\n", "print(df.info())" ], "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "daily data\n", "daily basic\n", "inner merge on ['ts_code', 'trade_date']\n", "stk limit\n", "left merge on ['ts_code', 'trade_date']\n", "money flow\n", "left merge on ['ts_code', 'trade_date']\n", "cyq perf\n", "left merge on ['ts_code', 'trade_date']\n", "\n", "RangeIndex: 8477357 entries, 0 to 8477356\n", "Data columns (total 31 columns):\n", " # Column Dtype \n", "--- ------ ----- \n", " 0 ts_code object \n", " 1 trade_date datetime64[ns]\n", " 2 open float64 \n", " 3 close float64 \n", " 4 high float64 \n", " 5 low float64 \n", " 6 vol float64 \n", " 7 pct_chg float64 \n", " 8 turnover_rate float64 \n", " 9 pe_ttm float64 \n", " 10 circ_mv float64 \n", " 11 volume_ratio float64 \n", " 12 is_st bool \n", " 13 up_limit float64 \n", " 14 down_limit float64 \n", " 15 buy_sm_vol float64 \n", " 16 sell_sm_vol float64 \n", " 17 buy_lg_vol float64 \n", " 18 sell_lg_vol float64 \n", " 19 buy_elg_vol float64 \n", " 20 sell_elg_vol float64 \n", " 21 net_mf_vol float64 \n", " 22 his_low float64 \n", " 23 his_high float64 \n", " 24 cost_5pct float64 \n", " 25 cost_15pct float64 \n", " 26 cost_50pct float64 \n", " 27 cost_85pct float64 \n", " 28 cost_95pct float64 \n", " 29 weight_avg float64 \n", " 30 winner_rate float64 \n", "dtypes: bool(1), datetime64[ns](1), float64(28), object(1)\n", "memory usage: 1.9+ GB\n", "None\n" ] } ], "execution_count": 2 }, { "cell_type": "code", "id": "cac01788dac10678", "metadata": { "jupyter": { "source_hidden": true }, "ExecuteTime": { "end_time": "2025-04-05T17:30:55.594572Z", "start_time": "2025-04-05T17:30:40.882850Z" } }, "source": [ "print('industry')\n", "industry_df = read_and_merge_h5_data('../../data/industry_data.h5', key='industry_data',\n", " columns=['ts_code', 'l2_code', 'in_date'],\n", " df=None, on=['ts_code'], join='left')\n", "\n", "\n", "def merge_with_industry_data(df, industry_df):\n", " # 确保日期字段是 datetime 类型\n", " df['trade_date'] = pd.to_datetime(df['trade_date'])\n", " industry_df['in_date'] = pd.to_datetime(industry_df['in_date'])\n", "\n", " # 对 industry_df 按 ts_code 和 in_date 排序\n", " industry_df_sorted = industry_df.sort_values(['in_date', 'ts_code'])\n", "\n", " # 对原始 df 按 ts_code 和 trade_date 排序\n", " df_sorted = df.sort_values(['trade_date', 'ts_code'])\n", "\n", " # 使用 merge_asof 进行向后合并\n", " merged = pd.merge_asof(\n", " df_sorted,\n", " industry_df_sorted,\n", " by='ts_code', # 按 ts_code 分组\n", " left_on='trade_date',\n", " right_on='in_date',\n", " direction='backward'\n", " )\n", "\n", " # 获取每个 ts_code 的最早 in_date 记录\n", " min_in_date_per_ts = (industry_df_sorted\n", " .groupby('ts_code')\n", " .first()\n", " .reset_index()[['ts_code', 'l2_code']])\n", "\n", " # 填充未匹配到的记录(trade_date 早于所有 in_date 的情况)\n", " merged['l2_code'] = merged['l2_code'].fillna(\n", " merged['ts_code'].map(min_in_date_per_ts.set_index('ts_code')['l2_code'])\n", " )\n", "\n", " # 保留需要的列并重置索引\n", " result = merged.reset_index(drop=True)\n", " return result\n", "\n", "\n", "# 使用示例\n", "df = merge_with_industry_data(df, industry_df)\n", "# print(mdf[mdf['ts_code'] == '600751.SH'][['ts_code', 'trade_date', 'l2_code']])" ], "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "industry\n" ] } ], "execution_count": 3 }, { "cell_type": "code", "id": "c4e9e1d31da6dba6", "metadata": { "ExecuteTime": { "end_time": "2025-04-05T17:30:55.846316Z", "start_time": "2025-04-05T17:30:55.635945Z" } }, "source": [ "def calculate_indicators(df):\n", " \"\"\"\n", " 计算四个指标:当日涨跌幅、5日移动平均、RSI、MACD。\n", " \"\"\"\n", " df = df.sort_values('trade_date')\n", " df['daily_return'] = (df['close'] - df['pre_close']) / df['pre_close'] * 100\n", " # df['5_day_ma'] = df['close'].rolling(window=5).mean()\n", " delta = df['close'].diff()\n", " gain = delta.where(delta > 0, 0)\n", " loss = -delta.where(delta < 0, 0)\n", " avg_gain = gain.rolling(window=14).mean()\n", " avg_loss = loss.rolling(window=14).mean()\n", " rs = avg_gain / avg_loss\n", " df['RSI'] = 100 - (100 / (1 + rs))\n", "\n", " # 计算MACD\n", " ema12 = df['close'].ewm(span=12, adjust=False).mean()\n", " ema26 = df['close'].ewm(span=26, adjust=False).mean()\n", " df['MACD'] = ema12 - ema26\n", " df['Signal_line'] = df['MACD'].ewm(span=9, adjust=False).mean()\n", " df['MACD_hist'] = df['MACD'] - df['Signal_line']\n", "\n", " # 4. 情绪因子1:市场上涨比例(Up Ratio)\n", " df['up_ratio'] = df['daily_return'].apply(lambda x: 1 if x > 0 else 0)\n", " df['up_ratio_20d'] = df['up_ratio'].rolling(window=20).mean() # 过去20天上涨比例\n", "\n", " # 5. 情绪因子2:成交量变化率(Volume Change Rate)\n", " df['volume_mean'] = df['vol'].rolling(window=20).mean() # 过去20天的平均成交量\n", " df['volume_change_rate'] = (df['vol'] - df['volume_mean']) / df['volume_mean'] * 100 # 成交量变化率\n", "\n", " # 6. 情绪因子3:波动率(Volatility)\n", " df['volatility'] = df['daily_return'].rolling(window=20).std() # 过去20天的日收益率标准差\n", "\n", " # 7. 情绪因子4:成交额变化率(Amount Change Rate)\n", " df['amount_mean'] = df['amount'].rolling(window=20).mean() # 过去20天的平均成交额\n", " df['amount_change_rate'] = (df['amount'] - df['amount_mean']) / df['amount_mean'] * 100 # 成交额变化率\n", "\n", " return df\n", "\n", "\n", "def generate_index_indicators(h5_filename):\n", " df = pd.read_hdf(h5_filename, key='index_data')\n", " df['trade_date'] = pd.to_datetime(df['trade_date'], format='%Y%m%d')\n", " df = df.sort_values('trade_date')\n", "\n", " # 计算每个ts_code的相关指标\n", " df_indicators = []\n", " for ts_code in df['ts_code'].unique():\n", " df_index = df[df['ts_code'] == ts_code].copy()\n", " df_index = calculate_indicators(df_index)\n", " df_indicators.append(df_index)\n", "\n", " # 合并所有指数的结果\n", " df_all_indicators = pd.concat(df_indicators, ignore_index=True)\n", "\n", " # 保留trade_date列,并将同一天的数据按ts_code合并成一行\n", " df_final = df_all_indicators.pivot_table(\n", " index='trade_date',\n", " columns='ts_code',\n", " values=['daily_return', 'RSI', 'MACD', 'Signal_line',\n", " 'MACD_hist', 'up_ratio_20d', 'volume_change_rate', 'volatility',\n", " 'amount_change_rate', 'amount_mean'],\n", " aggfunc='last'\n", " )\n", "\n", " df_final.columns = [f\"{col[1]}_{col[0]}\" for col in df_final.columns]\n", " df_final = df_final.reset_index()\n", "\n", " return df_final\n", "\n", "\n", "# 使用函数\n", "h5_filename = '../../data/index_data.h5'\n", "index_data = generate_index_indicators(h5_filename)\n", "index_data = index_data.dropna()\n" ], "outputs": [], "execution_count": 4 }, { "cell_type": "code", "id": "a735bc02ceb4d872", "metadata": { "jupyter": { "source_hidden": true }, "ExecuteTime": { "end_time": "2025-04-05T17:30:55.912328Z", "start_time": "2025-04-05T17:30:55.863571Z" } }, "source": [ "import numpy as np\n", "import talib\n", "\n", "\n", "def get_rolling_factor(df):\n", " old_columns = df.columns.tolist()[:]\n", "\n", " # 按股票和日期排序(如果尚未排序)\n", " df = df.sort_values(by=['ts_code', 'trade_date'])\n", "\n", " grouped = df.groupby('ts_code', group_keys=False)\n", "\n", " window = 20\n", " df['_is_positive'] = (df['pct_chg'] > 0).astype(int)\n", " df['_is_negative'] = (df['pct_chg'] < 0).astype(int)\n", " df['cat_is_positive'] = (df['pct_chg'] > 0).astype(int)\n", "\n", " # 分离正负收益率 (用于计算各自的均值和平方均值)\n", " # 注意:这里我们保留原始收益率用于计算,而不是 clip 到 0\n", " df['_pos_returns'] = df['pct_chg'].where(df['pct_chg'] > 0, 0) # 非正设为0,便于求和\n", " df['_neg_returns'] = df['pct_chg'].where(df['pct_chg'] < 0, 0) # 非负设为0,便于求和\n", "\n", " # 计算收益率的平方 (用于计算 E[X^2])\n", " df['_pos_returns_sq'] = np.square(df['_pos_returns'])\n", " df['_neg_returns_sq'] = np.square(df['_neg_returns']) # 平方后负数变正\n", "\n", " # 4. 计算滚动统计量 (使用内置函数,速度较快)\n", " # 计算正收益日的统计量\n", " rolling_pos_count = grouped['_is_positive'].rolling(window, min_periods=max(1, window // 2)).sum()\n", " rolling_pos_sum = grouped['_pos_returns'].rolling(window, min_periods=max(1, window // 2)).sum()\n", " rolling_pos_sum_sq = grouped['_pos_returns_sq'].rolling(window, min_periods=max(1, window // 2)).sum()\n", "\n", " # 计算负收益日的统计量\n", " rolling_neg_count = grouped['_is_negative'].rolling(window, min_periods=max(1, window // 2)).sum()\n", " rolling_neg_sum = grouped['_neg_returns'].rolling(window, min_periods=max(1, window // 2)).sum()\n", " rolling_neg_sum_sq = grouped['_neg_returns_sq'].rolling(window, min_periods=max(1, window // 2)).sum()\n", "\n", " # 5. 计算方差和标准差\n", " pos_mean_sq = rolling_pos_sum_sq / rolling_pos_count\n", " pos_mean = rolling_pos_sum / rolling_pos_count\n", " pos_var = pos_mean_sq - np.square(pos_mean)\n", " pos_var = pos_var.where(rolling_pos_count >= 2, np.nan).clip(lower=0)\n", " upside_vol = np.sqrt(pos_var)\n", "\n", " neg_mean_sq = rolling_neg_sum_sq / rolling_neg_count\n", " neg_mean = rolling_neg_sum / rolling_neg_count # 注意 neg_mean 是负数\n", " neg_var = neg_mean_sq - np.square(neg_mean)\n", " neg_var = neg_var.where(rolling_neg_count >= 2, np.nan).clip(lower=0)\n", " downside_vol = np.sqrt(neg_var)\n", "\n", " # rolling 操作后结果带有 MultiIndex,需要去除股票代码层级以便合并\n", " df['upside_vol'] = upside_vol.reset_index(level=0, drop=True)\n", " df['downside_vol'] = downside_vol.reset_index(level=0, drop=True)\n", "\n", " df['vol_ratio'] = df['upside_vol'] / df['downside_vol']\n", " df['vol_ratio'] = df['vol_ratio'].replace([np.inf, -np.inf], np.nan).fillna(0) # 或 fillna(np.nan)\n", "\n", " df['return_skew'] = grouped['pct_chg'].rolling(window=5).skew().reset_index(0, drop=True)\n", " df['return_kurtosis'] = grouped['pct_chg'].rolling(window=5).kurt().reset_index(0, drop=True)\n", "\n", " # 因子 1:短期成交量变化率\n", " df['volume_change_rate'] = (\n", " grouped['vol'].rolling(window=2).mean() /\n", " grouped['vol'].rolling(window=10).mean() - 1\n", " ).reset_index(level=0, drop=True) # 确保索引对齐\n", "\n", " # 因子 2:成交量突破信号\n", " max_volume = grouped['vol'].rolling(window=5).max().reset_index(level=0, drop=True) # 确保索引对齐\n", " df['cat_volume_breakout'] = (df['vol'] > max_volume)\n", "\n", " # 因子 3:换手率均线偏离度\n", " mean_turnover = grouped['turnover_rate'].rolling(window=3).mean().reset_index(level=0, drop=True)\n", " std_turnover = grouped['turnover_rate'].rolling(window=3).std().reset_index(level=0, drop=True)\n", " df['turnover_deviation'] = (df['turnover_rate'] - mean_turnover) / std_turnover\n", "\n", " # 因子 4:换手率激增信号\n", " df['cat_turnover_spike'] = (df['turnover_rate'] > mean_turnover + 2 * std_turnover)\n", "\n", " # 因子 5:量比均值\n", " df['avg_volume_ratio'] = grouped['volume_ratio'].rolling(window=3).mean().reset_index(level=0, drop=True)\n", "\n", " # 因子 6:量比突破信号\n", " max_volume_ratio = grouped['volume_ratio'].rolling(window=5).max().reset_index(level=0, drop=True)\n", " df['cat_volume_ratio_breakout'] = (df['volume_ratio'] > max_volume_ratio)\n", "\n", " df['vol_spike'] = grouped.apply(\n", " lambda x: pd.Series(x['vol'].rolling(20).mean(), index=x.index)\n", " )\n", " df['vol_std_5'] = df['vol'].pct_change().rolling(5).std()\n", "\n", " # 计算 ATR\n", " df['atr_14'] = grouped.apply(\n", " lambda x: pd.Series(talib.ATR(x['high'].values, x['low'].values, x['close'].values, timeperiod=14),\n", " index=x.index)\n", " )\n", " df['atr_6'] = grouped.apply(\n", " lambda x: pd.Series(talib.ATR(x['high'].values, x['low'].values, x['close'].values, timeperiod=6),\n", " index=x.index)\n", " )\n", "\n", " # 计算 OBV 及其均线\n", " df['obv'] = grouped.apply(\n", " lambda x: pd.Series(talib.OBV(x['close'].values, x['vol'].values), index=x.index)\n", " )\n", " print(df.columns)\n", " df['maobv_6'] = grouped.apply(\n", " lambda x: pd.Series(talib.SMA(x['obv'].values, timeperiod=6), index=x.index)\n", " )\n", "\n", " df['rsi_3'] = grouped.apply(\n", " lambda x: pd.Series(talib.RSI(x['close'].values, timeperiod=3), index=x.index)\n", " )\n", " # df['rsi_6'] = grouped.apply(\n", " # lambda x: pd.Series(talib.RSI(x['close'].values, timeperiod=6), index=x.index)\n", " # )\n", " # df['rsi_9'] = grouped.apply(\n", " # lambda x: pd.Series(talib.RSI(x['close'].values, timeperiod=9), index=x.index)\n", " # )\n", "\n", " # 计算 return_10 和 return_20\n", " df['return_5'] = grouped['close'].apply(lambda x: x / x.shift(5) - 1)\n", " # df['return_10'] = grouped['close'].apply(lambda x: x / x.shift(10) - 1)\n", " df['return_20'] = grouped['close'].apply(lambda x: x / x.shift(20) - 1)\n", "\n", " # df['avg_close_5'] = grouped['close'].apply(lambda x: x.rolling(window=5).mean() / x)\n", "\n", " # 计算标准差指标\n", " df['std_return_5'] = grouped['close'].apply(lambda x: x.pct_change().rolling(window=5).std())\n", " # df['std_return_15'] = grouped['close'].apply(lambda x: x.pct_change().rolling(window=15).std())\n", " # df['std_return_25'] = grouped['close'].apply(lambda x: x.pct_change().rolling(window=25).std())\n", " df['std_return_90'] = grouped['close'].apply(lambda x: x.pct_change().rolling(window=90).std())\n", " df['std_return_90_2'] = grouped['close'].apply(lambda x: x.shift(10).pct_change().rolling(window=90).std())\n", "\n", " # 计算 EMA 指标\n", " df['_ema_5'] = grouped['close'].apply(\n", " lambda x: pd.Series(talib.EMA(x.values, timeperiod=5), index=x.index)\n", " )\n", " df['_ema_13'] = grouped['close'].apply(\n", " lambda x: pd.Series(talib.EMA(x.values, timeperiod=13), index=x.index)\n", " )\n", " df['_ema_20'] = grouped['close'].apply(\n", " lambda x: pd.Series(talib.EMA(x.values, timeperiod=20), index=x.index)\n", " )\n", " df['_ema_60'] = grouped['close'].apply(\n", " lambda x: pd.Series(talib.EMA(x.values, timeperiod=60), index=x.index)\n", " )\n", "\n", " # 计算 act_factor1, act_factor2, act_factor3, act_factor4\n", " df['act_factor1'] = grouped['_ema_5'].apply(\n", " lambda x: np.arctan((x / x.shift(1) - 1) * 100) * 57.3 / 50\n", " )\n", " df['act_factor2'] = grouped['_ema_13'].apply(\n", " lambda x: np.arctan((x / x.shift(1) - 1) * 100) * 57.3 / 40\n", " )\n", " df['act_factor3'] = grouped['_ema_20'].apply(\n", " lambda x: np.arctan((x / x.shift(1) - 1) * 100) * 57.3 / 21\n", " )\n", " df['act_factor4'] = grouped['_ema_60'].apply(\n", " lambda x: np.arctan((x / x.shift(1) - 1) * 100) * 57.3 / 10\n", " )\n", "\n", " # 根据 trade_date 截面计算排名\n", " df['rank_act_factor1'] = df.groupby('trade_date', group_keys=False)['act_factor1'].rank(ascending=False, pct=True)\n", " df['rank_act_factor2'] = df.groupby('trade_date', group_keys=False)['act_factor2'].rank(ascending=False, pct=True)\n", " df['rank_act_factor3'] = df.groupby('trade_date', group_keys=False)['act_factor3'].rank(ascending=False, pct=True)\n", "\n", " df['log(circ_mv)'] = np.log(df['circ_mv'])\n", "\n", " def rolling_covariance(x, y, window):\n", " return x.rolling(window).cov(y)\n", "\n", " def delta(series, period):\n", " return series.diff(period)\n", "\n", " def rank(series):\n", " return series.rank(pct=True)\n", "\n", " def stddev(series, window):\n", " return series.rolling(window).std()\n", "\n", " window_high_volume = 5\n", " window_close_stddev = 20\n", " period_delta = 5\n", " df['cov'] = rolling_covariance(df['high'], df['vol'], window_high_volume)\n", " df['delta_cov'] = delta(df['cov'], period_delta)\n", " df['_rank_stddev'] = rank(stddev(df['close'], window_close_stddev))\n", " df['alpha_22_improved'] = -1 * df['delta_cov'] * df['_rank_stddev']\n", "\n", " df['alpha_003'] = np.where(df['high'] != df['low'],\n", " (df['close'] - df['open']) / (df['high'] - df['low']),\n", " 0)\n", "\n", " df['alpha_007'] = grouped.apply(lambda x: x['close'].rolling(5).corr(x['vol'])).reset_index(level=0, drop=True)\n", " df['alpha_007'] = df.groupby('trade_date', group_keys=False)['alpha_007'].rank(ascending=True, pct=True)\n", "\n", " df['alpha_013'] = grouped['close'].transform(lambda x: x.rolling(5).sum() - x.rolling(20).sum())\n", " df['alpha_013'] = df.groupby('trade_date', group_keys=False)['alpha_013'].rank(ascending=True, pct=True)\n", "\n", " df['cat_up_limit'] = (df['close'] == df['up_limit']) # 是否涨停(1表示涨停,0表示未涨停)\n", " df['cat_down_limit'] = (df['close'] == df['down_limit']) # 是否跌停(1表示跌停,0表示未跌停)\n", " df['up_limit_count_10d'] = grouped['cat_up_limit'].rolling(window=10, min_periods=1).sum().reset_index(level=0,\n", " drop=True)\n", " df['down_limit_count_10d'] = grouped['cat_down_limit'].rolling(window=10, min_periods=1).sum().reset_index(level=0,\n", " drop=True)\n", "\n", " # 3. 最近连续涨跌停天数\n", " def calculate_consecutive_limits(series):\n", " \"\"\"\n", " 计算连续涨停/跌停天数。\n", " \"\"\"\n", " consecutive_up = series * (series.groupby((series != series.shift()).cumsum()).cumcount() + 1)\n", " consecutive_down = series * (series.groupby((series != series.shift()).cumsum()).cumcount() + 1)\n", " return consecutive_up, consecutive_down\n", "\n", " # 连续涨停天数\n", " df['consecutive_up_limit'] = grouped['cat_up_limit'].apply(\n", " lambda x: calculate_consecutive_limits(x)[0]\n", " ).reset_index(level=0, drop=True)\n", "\n", " df['vol_break'] = np.where((df['close'] > df['cost_85pct']) & (df['volume_ratio'] > 2), 1, 0)\n", "\n", " df['weight_roc5'] = grouped['weight_avg'].apply(lambda x: x.pct_change(5))\n", "\n", " def rolling_corr(group):\n", " roc_close = group['close'].pct_change()\n", " roc_weight = group['weight_avg'].pct_change()\n", " return roc_close.rolling(10).corr(roc_weight)\n", "\n", " df['price_cost_divergence'] = grouped.apply(rolling_corr)\n", "\n", " df['smallcap_concentration'] = (1 / df['log(circ_mv)']) * (df['cost_85pct'] - df['cost_15pct'])\n", "\n", " # 16. 筹码稳定性指数 (20日波动率)\n", " df['weight_std20'] = grouped['weight_avg'].apply(lambda x: x.rolling(20).std())\n", " df['cost_stability'] = df['weight_std20'] / grouped['weight_avg'].transform(lambda x: x.rolling(20).mean())\n", "\n", " # 17. 成本区间突破标记\n", " df['high_cost_break_days'] = grouped.apply(lambda g: g['close'].gt(g['cost_95pct']).rolling(5).sum())\n", "\n", " # 20. 筹码-流动性风险\n", " df['liquidity_risk'] = (df['cost_95pct'] - df['cost_5pct']) * (\n", " 1 / grouped['vol'].transform(lambda x: x.rolling(10).mean()))\n", "\n", " # 7. 市值波动率因子\n", " df['turnover_std'] = grouped['turnover_rate'].rolling(window=20).std().reset_index(level=0, drop=True)\n", " df['mv_volatility'] = grouped.apply(lambda x: x['turnover_std'] / x['log(circ_mv)']).reset_index(level=0, drop=True)\n", "\n", " # 8. 市值成长性因子\n", " df['volume_growth'] = grouped['vol'].pct_change(periods=20).reset_index(level=0, drop=True)\n", " df['mv_growth'] = grouped.apply(lambda x: x['volume_growth'] / x['log(circ_mv)']).reset_index(level=0, drop=True)\n", "\n", " df[\"ar\"] = df[\"high\"].div(df[\"open\"]).rolling(3).sum() / df[\"open\"].div(df[\"low\"]).rolling(3).sum() * 100\n", " # 计算 BR 指标\n", " df[\"pre_close\"] = df[\"close\"].shift(1)\n", " df[\"br_up\"] = (df[\"high\"] - df[\"pre_close\"]).clip(lower=0)\n", " df[\"br_down\"] = (df[\"pre_close\"] - df[\"low\"]).clip(lower=0)\n", " df[\"br\"] = df[\"br_up\"].rolling(3).sum() / df[\"br_down\"].rolling(3).sum() * 100\n", " df['arbr'] = df['ar'] - df['br']\n", " df.drop(columns=[\"pre_close\", \"br_up\", \"br_down\", 'ar', 'br'], inplace=True)\n", "\n", " df.drop(columns=['weight_std20'], inplace=True, errors='ignore')\n", " df.drop(\n", " columns=['_is_positive', '_is_negative', '_pos_returns', '_neg_returns', '_pos_returns_sq', '_neg_returns_sq'],\n", " inplace=True, errors='ignore')\n", " new_columns = [col for col in df.columns.tolist()[:] if col not in old_columns]\n", "\n", " return df, new_columns\n", "\n", "\n", "def get_simple_factor(df):\n", " old_columns = df.columns.tolist()[:]\n", " df = df.sort_values(by=['ts_code', 'trade_date'])\n", "\n", " alpha = 0.5\n", " df['momentum_factor'] = df['volume_change_rate'] + alpha * df['turnover_deviation']\n", " df['resonance_factor'] = df['volume_ratio'] * df['pct_chg']\n", " df['log_close'] = np.log(df['close'])\n", "\n", " df['cat_vol_spike'] = df['vol'] > 2 * df['vol_spike']\n", "\n", " df['up'] = (df['high'] - df[['close', 'open']].max(axis=1)) / df['close']\n", " df['down'] = (df[['close', 'open']].min(axis=1) - df['low']) / df['close']\n", "\n", " df['obv-maobv_6'] = df['obv'] - df['maobv_6']\n", "\n", " # 计算比值指标\n", " df['std_return_5 / std_return_90'] = df['std_return_5'] / df['std_return_90']\n", " # df['std_return_5 / std_return_25'] = df['std_return_5'] / df['std_return_25']\n", "\n", " # 计算标准差差值\n", " df['std_return_90 - std_return_90_2'] = df['std_return_90'] - df['std_return_90_2']\n", "\n", " # df['cat_af1'] = df['act_factor1'] > 0\n", " df['cat_af2'] = df['act_factor2'] > df['act_factor1']\n", " df['cat_af3'] = df['act_factor3'] > df['act_factor2']\n", " df['cat_af4'] = df['act_factor4'] > df['act_factor3']\n", "\n", " # 计算 act_factor5 和 act_factor6\n", " df['act_factor5'] = df['act_factor1'] + df['act_factor2'] + df['act_factor3'] + df['act_factor4']\n", " df['act_factor6'] = (df['act_factor1'] - df['act_factor2']) / np.sqrt(\n", " df['act_factor1'] ** 2 + df['act_factor2'] ** 2)\n", "\n", " df['active_buy_volume_large'] = df['buy_lg_vol'] / df['net_mf_vol']\n", " df['active_buy_volume_big'] = df['buy_elg_vol'] / df['net_mf_vol']\n", " df['active_buy_volume_small'] = df['buy_sm_vol'] / df['net_mf_vol']\n", "\n", " df['buy_lg_vol_minus_sell_lg_vol'] = (df['buy_lg_vol'] - df['sell_lg_vol']) / df['net_mf_vol']\n", " df['buy_elg_vol_minus_sell_elg_vol'] = (df['buy_elg_vol'] - df['sell_elg_vol']) / df['net_mf_vol']\n", "\n", " df['log(circ_mv)'] = np.log(df['circ_mv'])\n", "\n", " df['ctrl_strength'] = (df['cost_85pct'] - df['cost_15pct']) / (df['his_high'] - df['his_low'])\n", "\n", " df['low_cost_dev'] = (df['close'] - df['cost_5pct']) / (df['cost_50pct'] - df['cost_5pct'])\n", "\n", " df['asymmetry'] = (df['cost_95pct'] - df['cost_50pct']) / (df['cost_50pct'] - df['cost_5pct'])\n", "\n", " df['lock_factor'] = df['turnover_rate'] * (\n", " 1 - (df['cost_95pct'] - df['cost_5pct']) / (df['his_high'] - df['his_low']))\n", "\n", " df['cat_vol_break'] = (df['close'] > df['cost_85pct']) & (df['volume_ratio'] > 2)\n", "\n", " df['cost_atr_adj'] = (df['cost_95pct'] - df['cost_5pct']) / df['atr_14']\n", "\n", " # 12. 小盘股筹码集中度\n", " df['smallcap_concentration'] = (1 / df['log(circ_mv)']) * (df['cost_85pct'] - df['cost_15pct'])\n", "\n", " df['cat_golden_resonance'] = ((df['close'] > df['weight_avg']) &\n", " (df['volume_ratio'] > 1.5) &\n", " (df['winner_rate'] > 0.7))\n", "\n", " df['mv_turnover_ratio'] = df['turnover_rate'] / df['log(circ_mv)']\n", "\n", " df['mv_adjusted_volume'] = df['vol'] / df['log(circ_mv)']\n", "\n", " df['mv_weighted_turnover'] = df['turnover_rate'] * (1 / df['log(circ_mv)'])\n", "\n", " df['nonlinear_mv_volume'] = df['vol'] / df['log(circ_mv)']\n", "\n", " df['mv_volume_ratio'] = df['volume_ratio'] / df['log(circ_mv)']\n", "\n", " df['mv_momentum'] = df['turnover_rate'] * df['volume_ratio'] / df['log(circ_mv)']\n", "\n", " drop_columns = [col for col in df.columns if col.startswith('_')]\n", " df.drop(columns=drop_columns, inplace=True, errors='ignore')\n", "\n", " new_columns = [col for col in df.columns.tolist()[:] if col not in old_columns]\n", " return df, new_columns\n" ], "outputs": [], "execution_count": 5 }, { "cell_type": "code", "id": "53f86ddc0677a6d7", "metadata": { "jupyter": { "source_hidden": true }, "scrolled": true, "ExecuteTime": { "end_time": "2025-04-05T17:31:01.075820Z", "start_time": "2025-04-05T17:30:55.916344Z" } }, "source": [ "from utils.factor import get_act_factor\n", "\n", "\n", "def read_industry_data(h5_filename):\n", " # 读取 H5 文件中所有的行业数据\n", " industry_data = pd.read_hdf(h5_filename, key='sw_daily', columns=[\n", " 'ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'pe', 'pb', 'vol'\n", " ]) # 假设 H5 文件的键是 'industry_data'\n", " industry_data = industry_data.sort_values(by=['ts_code', 'trade_date'])\n", " industry_data = industry_data.reindex()\n", " industry_data['trade_date'] = pd.to_datetime(industry_data['trade_date'], format='%Y%m%d')\n", "\n", " grouped = industry_data.groupby('ts_code', group_keys=False)\n", " industry_data['obv'] = grouped.apply(\n", " lambda x: pd.Series(talib.OBV(x['close'].values, x['vol'].values), index=x.index)\n", " )\n", " industry_data['return_5'] = grouped['close'].apply(lambda x: x / x.shift(5) - 1)\n", " industry_data['return_20'] = grouped['close'].apply(lambda x: x / x.shift(20) - 1)\n", "\n", " industry_data = get_act_factor(industry_data, cat=False)\n", " industry_data = industry_data.sort_values(by=['trade_date', 'ts_code'])\n", "\n", " # # 计算每天每个 ts_code 的因子和当天所有 ts_code 的中位数的偏差\n", " # factor_columns = ['obv', 'return_5', 'return_20', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4'] # 因子列\n", " # \n", " # for factor in factor_columns:\n", " # if factor in industry_data.columns:\n", " # # 计算每天每个 ts_code 的因子值与当天所有 ts_code 的中位数的偏差\n", " # industry_data[f'{factor}_deviation'] = industry_data.groupby('trade_date')[factor].transform(\n", " # lambda x: x - x.mean())\n", "\n", " industry_data['return_5_percentile'] = industry_data.groupby('trade_date')['return_5'].transform(\n", " lambda x: x.rank(pct=True))\n", " industry_data['return_20_percentile'] = industry_data.groupby('trade_date')['return_20'].transform(\n", " lambda x: x.rank(pct=True))\n", " industry_data = industry_data.drop(columns=['open', 'close', 'high', 'low', 'pe', 'pb', 'vol'])\n", "\n", " industry_data = industry_data.rename(\n", " columns={col: f'industry_{col}' for col in industry_data.columns if col not in ['ts_code', 'trade_date']})\n", "\n", " industry_data = industry_data.rename(columns={'ts_code': 'cat_l2_code'})\n", " return industry_data\n", "\n", "\n", "industry_df = read_industry_data('../../data/sw_daily.h5')\n" ], "outputs": [], "execution_count": 6 }, { "cell_type": "code", "id": "dbe2fd8021b9417f", "metadata": { "ExecuteTime": { "end_time": "2025-04-05T17:31:01.095475Z", "start_time": "2025-04-05T17:31:01.090126Z" } }, "source": [ "origin_columns = df.columns.tolist()\n", "origin_columns = [col for col in origin_columns if\n", " col not in ['turnover_rate', 'pe_ttm', 'volume_ratio', 'vol', 'pct_chg', 'l2_code', 'winner_rate']]\n", "origin_columns = [col for col in origin_columns if col not in index_data.columns]\n", "origin_columns = [col for col in origin_columns if 'cyq' not in col]\n", "print(origin_columns)" ], "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['ts_code', 'open', 'close', 'high', 'low', 'circ_mv', 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct', 'cost_50pct', 'cost_85pct', 'cost_95pct', 'weight_avg', 'in_date']\n" ] } ], "execution_count": 7 }, { "cell_type": "code", "id": "85c3e3d0235ffffa", "metadata": { "ExecuteTime": { "end_time": "2025-04-05T17:33:15.657140Z", "start_time": "2025-04-05T17:31:01.114838Z" } }, "source": [ "def filter_data(df):\n", " # df = df.groupby('trade_date').apply(lambda x: x.nlargest(1000, 'act_factor1'))\n", " df = df[~df['is_st']]\n", " df = df[~df['ts_code'].str.endswith('BJ')]\n", " df = df[~df['ts_code'].str.startswith('30')]\n", " df = df[~df['ts_code'].str.startswith('68')]\n", " df = df[~df['ts_code'].str.startswith('8')]\n", " df = df[df['trade_date'] >= '20180101']\n", " if 'in_date' in df.columns:\n", " df = df.drop(columns=['in_date'])\n", " df = df.reset_index(drop=True)\n", " return df\n", "\n", "\n", "df = filter_data(df)\n", "# df = get_technical_factor(df)\n", "# df = get_act_factor(df)\n", "# df = get_money_flow_factor(df)\n", "# df = get_alpha_factor(df)\n", "# df = get_limit_factor(df)\n", "# df = get_cyp_perf_factor(df)\n", "# df = get_mv_factors(df)\n", "df, _ = get_rolling_factor(df)\n", "df, _ = get_simple_factor(df)\n", "# df = df.merge(industry_df, on=['l2_code', 'trade_date'], how='left')\n", "df = df.rename(columns={'l2_code': 'cat_l2_code'})\n", "# df = df.merge(index_data, on='trade_date', how='left')\n", "\n", "print(df.info())" ], "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Index(['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol',\n", " 'pct_chg', 'turnover_rate', 'pe_ttm', 'circ_mv', 'volume_ratio',\n", " 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol', 'sell_sm_vol',\n", " 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol', 'sell_elg_vol',\n", " 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct',\n", " 'cost_50pct', 'cost_85pct', 'cost_95pct', 'weight_avg', 'winner_rate',\n", " 'l2_code', '_is_positive', '_is_negative', 'cat_is_positive',\n", " '_pos_returns', '_neg_returns', '_pos_returns_sq', '_neg_returns_sq',\n", " 'upside_vol', 'downside_vol', 'vol_ratio', 'return_skew',\n", " 'return_kurtosis', 'volume_change_rate', 'cat_volume_breakout',\n", " 'turnover_deviation', 'cat_turnover_spike', 'avg_volume_ratio',\n", " 'cat_volume_ratio_breakout', 'vol_spike', 'vol_std_5', 'atr_14',\n", " 'atr_6', 'obv'],\n", " dtype='object')\n", "\n", "Index: 5118212 entries, 0 to 5118211\n", "Columns: 119 entries, ts_code to mv_momentum\n", "dtypes: bool(12), datetime64[ns](1), float64(101), int32(2), int64(1), object(2)\n", "memory usage: 4.1+ GB\n", "None\n" ] } ], "execution_count": 8 }, { "cell_type": "code", "id": "f4f16d63ad18d1bc", "metadata": { "ExecuteTime": { "end_time": "2025-04-05T17:33:16.052975Z", "start_time": "2025-04-05T17:33:16.039150Z" } }, "source": [ "def create_deviation_within_dates(df, feature_columns):\n", " groupby_col = 'cat_l2_code' # 使用 trade_date 进行分组\n", " new_columns = {}\n", " ret_feature_columns = feature_columns[:]\n", "\n", " # 自动选择所有数值型特征\n", " num_features = [col for col in feature_columns if 'cat' not in col and 'index' not in col]\n", "\n", " # num_features = ['vol', 'pct_chg', 'turnover_rate', 'volume_ratio', 'cat_vol_spike', 'obv', 'maobv_6', 'return_5', 'return_10', 'return_20', 'std_return_5', 'std_return_15', 'std_return_90', 'std_return_90_2', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4', 'act_factor5', 'act_factor6', 'rank_act_factor1', 'rank_act_factor2', 'rank_act_factor3', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'alpha_022', 'alpha_003', 'alpha_007', 'alpha_013']\n", " num_features = [col for col in num_features if 'cat' not in col and 'industry' not in col]\n", " num_features = [col for col in num_features if 'limit' not in col]\n", " num_features = [col for col in num_features if 'cyq' not in col]\n", "\n", " # 遍历所有数值型特征\n", " for feature in num_features:\n", " if feature == 'trade_date': # 不需要对 'trade_date' 计算偏差\n", " continue\n", "\n", " # grouped_mean = df.groupby(['trade_date'])[feature].transform('mean')\n", " # deviation_col_name = f'deviation_mean_{feature}'\n", " # new_columns[deviation_col_name] = df[feature] - grouped_mean\n", " # ret_feature_columns.append(deviation_col_name)\n", "\n", " grouped_mean = df.groupby(['trade_date', groupby_col])[feature].transform('mean')\n", " deviation_col_name = f'deviation_mean_{feature}'\n", " new_columns[deviation_col_name] = df[feature] - grouped_mean\n", " ret_feature_columns.append(deviation_col_name)\n", "\n", " # 将新计算的偏差特征与原始 DataFrame 合并\n", " df = pd.concat([df, pd.DataFrame(new_columns)], axis=1)\n", "\n", " # for feature in ['obv', 'return_20', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4']:\n", " # df[f'deviation_industry_{feature}'] = df[feature] - df[f'industry_{feature}']\n", "\n", " return df, ret_feature_columns\n" ], "outputs": [], "execution_count": 9 }, { "cell_type": "code", "id": "40e6b68a91b30c79", "metadata": { "ExecuteTime": { "end_time": "2025-04-05T17:33:18.110752Z", "start_time": "2025-04-05T17:33:16.243912Z" } }, "source": [ "import pandas as pd\n", "\n", "from scipy.stats import ks_2samp, wasserstein_distance\n", "from sklearn.metrics import roc_auc_score\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.preprocessing import StandardScaler\n", "\n", "\n", "def remove_shifted_features(train_data, feature_columns, ks_threshold=0.05, wasserstein_threshold=0.1, size=0.8,\n", " log=True):\n", " dropped_features = []\n", "\n", " all_dates = sorted(train_data['trade_date'].unique().tolist()) # 获取所有唯一的 trade_date\n", " split_date = all_dates[int(len(all_dates) * size)] # 划分点为倒数第 validation_days 天\n", " train_data_split = train_data[train_data['trade_date'] < split_date] # 训练集\n", " val_data_split = train_data[train_data['trade_date'] >= split_date] # 验证集\n", "\n", " # **统计数据漂移**\n", " numeric_columns = train_data_split.select_dtypes(include=['float64', 'int64']).columns\n", " numeric_columns = [col for col in numeric_columns if col in feature_columns]\n", " for feature in numeric_columns:\n", " ks_stat, p_value = ks_2samp(train_data_split[feature], val_data_split[feature])\n", " wasserstein_dist = wasserstein_distance(train_data_split[feature], val_data_split[feature])\n", "\n", " if p_value < ks_threshold or wasserstein_dist > wasserstein_threshold:\n", " dropped_features.append(feature)\n", " if log:\n", " print(f\"检测到 {len(dropped_features)} 个可能漂移的特征: {dropped_features}\")\n", "\n", " # **应用阈值进行最终筛选**\n", " filtered_features = [f for f in feature_columns if f not in dropped_features]\n", "\n", " return filtered_features, dropped_features\n", "\n", "\n", "def remove_outliers_label_percentile(label: pd.Series, lower_percentile: float = 0.01, upper_percentile: float = 0.99,\n", " log=True):\n", " if not (0 <= lower_percentile < upper_percentile <= 1):\n", " raise ValueError(\"Percentile values must satisfy 0 <= lower_percentile < upper_percentile <= 1.\")\n", "\n", " # Calculate lower and upper bounds based on percentiles\n", " lower_bound = label.quantile(lower_percentile)\n", " upper_bound = label.quantile(upper_percentile)\n", "\n", " # Filter out values outside the bounds\n", " filtered_label = label[(label >= lower_bound) & (label <= upper_bound)]\n", "\n", " # Print the number of removed outliers\n", " if log:\n", " print(f\"Removed {len(label) - len(filtered_label)} outliers.\")\n", " return filtered_label\n", "\n", "\n", "def calculate_risk_adjusted_target(df, days=5):\n", " df = df.sort_values(by=['ts_code', 'trade_date'])\n", "\n", " df['future_close'] = df.groupby('ts_code')['close'].shift(-days)\n", " df['future_open'] = df.groupby('ts_code')['open'].shift(-1)\n", " df['future_return'] = (df['future_close'] - df['future_open']) / df['future_open']\n", "\n", " df['future_volatility'] = df.groupby('ts_code')['future_return'].rolling(days, min_periods=1).std().reset_index(\n", " level=0, drop=True)\n", " sharpe_ratio = df['future_return'] * df['future_volatility']\n", " sharpe_ratio.replace([np.inf, -np.inf], np.nan, inplace=True)\n", "\n", " return sharpe_ratio\n", "\n", "\n", "def calculate_score(df, days=5, lambda_param=1.0):\n", " def calculate_max_drawdown(prices):\n", " peak = prices.iloc[0] # 初始化峰值\n", " max_drawdown = 0 # 初始化最大回撤\n", "\n", " for price in prices:\n", " if price > peak:\n", " peak = price # 更新峰值\n", " else:\n", " drawdown = (peak - price) / peak # 计算当前回撤\n", " max_drawdown = max(max_drawdown, drawdown) # 更新最大回撤\n", "\n", " return max_drawdown\n", "\n", " def compute_stock_score(stock_df):\n", " stock_df = stock_df.sort_values(by=['trade_date'])\n", " future_return = stock_df['future_return']\n", " # 使用已有的 pct_chg 字段计算波动率\n", " volatility = stock_df['pct_chg'].rolling(days).std().shift(-days)\n", " max_drawdown = stock_df['close'].rolling(days).apply(calculate_max_drawdown, raw=False).shift(-days)\n", " score = future_return - lambda_param * max_drawdown\n", " return score\n", "\n", " # # 确保 DataFrame 按照股票代码和交易日期排序\n", " # df = df.sort_values(by=['ts_code', 'trade_date'])\n", "\n", " # 对每个股票分别计算 score\n", " df['score'] = df.groupby('ts_code').apply(compute_stock_score).reset_index(level=0, drop=True)\n", "\n", " return df['score']\n", "\n", "\n", "def remove_highly_correlated_features(df, feature_columns, threshold=0.9):\n", " numeric_features = df[feature_columns].select_dtypes(include=[np.number]).columns.tolist()\n", " if not numeric_features:\n", " raise ValueError(\"No numeric features found in the provided data.\")\n", "\n", " corr_matrix = df[numeric_features].corr().abs()\n", " upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))\n", " to_drop = [column for column in upper.columns if any(upper[column] > threshold)]\n", " remaining_features = [col for col in feature_columns if col not in to_drop\n", " or 'act' in col or 'af' in col]\n", " return remaining_features\n", "\n", "\n", "import pandas as pd\n", "from sklearn.preprocessing import StandardScaler\n", "\n", "\n", "def cross_sectional_standardization(df, features):\n", " df_sorted = df.sort_values(by='trade_date') # 按时间排序\n", " df_standardized = df_sorted.copy()\n", "\n", " for date in df_sorted['trade_date'].unique():\n", " # 获取当前时间点的数据\n", " current_data = df_standardized[df_standardized['trade_date'] == date]\n", "\n", " # 只对指定特征进行标准化\n", " scaler = StandardScaler()\n", " standardized_values = scaler.fit_transform(current_data[features])\n", "\n", " # 将标准化结果重新赋值回去\n", " df_standardized.loc[df_standardized['trade_date'] == date, features] = standardized_values\n", "\n", " return df_standardized\n", "\n", "\n", "import numpy as np\n", "import pandas as pd\n", "import statsmodels.api as sm\n", "\n", "from concurrent.futures import ProcessPoolExecutor\n", "\n", "\n", "def neutralize_manual(df, features, industry_col, mkt_cap_col):\n", " \"\"\" 手动实现简单回归以提升速度 \"\"\"\n", "\n", " for col in features:\n", " residuals = []\n", " for _, group in df.groupby(industry_col):\n", " if len(group) > 1:\n", " x = np.log(group[mkt_cap_col]) # 市值对数\n", " y = group[col] # 因子值\n", " beta = np.cov(y, x)[0, 1] / np.var(x) # 计算斜率\n", " alpha = np.mean(y) - beta * np.mean(x) # 计算截距\n", " resid = y - (alpha + beta * x) # 计算残差\n", " residuals.extend(resid)\n", " else:\n", " residuals.extend(group[col]) # 样本不足时保留原值\n", "\n", " df[col] = residuals\n", "\n", " return df\n", "\n", "\n", "import gc\n", "\n", "gc.collect()\n", "\n", "\n", "def mad_filter(df, features, n=3):\n", " for col in features:\n", " median = df[col].median()\n", " mad = np.median(np.abs(df[col] - median))\n", " upper = median + n * mad\n", " lower = median - n * mad\n", " df[col] = np.clip(df[col], lower, upper) # 截断极值\n", " return df\n", "\n", "\n", "def percentile_filter(df, features, lower_percentile=0.01, upper_percentile=0.99):\n", " for col in features:\n", " # 按日期分组计算上下百分位数\n", " lower_bound = df.groupby('trade_date')[col].transform(\n", " lambda x: x.quantile(lower_percentile)\n", " )\n", " upper_bound = df.groupby('trade_date')[col].transform(\n", " lambda x: x.quantile(upper_percentile)\n", " )\n", " # 截断超出范围的值\n", " df[col] = np.clip(df[col], lower_bound, upper_bound)\n", " return df\n", "\n", "\n", "from scipy.stats import iqr\n", "\n", "\n", "def iqr_filter(df, features):\n", " for col in features:\n", " df[col] = df.groupby('trade_date')[col].transform(\n", " lambda x: (x - x.median()) / iqr(x) if iqr(x) != 0 else x\n", " )\n", " return df\n", "\n", "\n", "def quantile_filter(df, features, lower_quantile=0.01, upper_quantile=0.99, window=60):\n", " df = df.copy()\n", " for col in features:\n", " # 计算 rolling 统计量,需要按日期进行 groupby\n", " rolling_lower = df.groupby('trade_date')[col].transform(\n", " lambda x: x.rolling(window=min(len(x), window)).quantile(lower_quantile))\n", " rolling_upper = df.groupby('trade_date')[col].transform(\n", " lambda x: x.rolling(window=min(len(x), window)).quantile(upper_quantile))\n", "\n", " # 对数据进行裁剪\n", " df[col] = np.clip(df[col], rolling_lower, rolling_upper)\n", "\n", " return df\n" ], "outputs": [], "execution_count": 10 }, { "cell_type": "code", "id": "1c46817a-b5dd-4bec-8bb4-e6e80bfd9d66", "metadata": { "ExecuteTime": { "end_time": "2025-04-05T17:33:18.119789Z", "start_time": "2025-04-05T17:33:18.111760Z" } }, "source": [ "# print(test_data.head()[['act_factor1', 'act_factor2', 'ts_code', 'trade_date']])" ], "outputs": [], "execution_count": 11 }, { "cell_type": "code", "id": "da2bb202843d9275", "metadata": { "ExecuteTime": { "end_time": "2025-04-05T17:33:18.721857Z", "start_time": "2025-04-05T17:33:18.147528Z" } }, "source": [ "from sklearn.preprocessing import StandardScaler\n", "import lightgbm as lgb\n", "import matplotlib.pyplot as plt\n", "from sklearn.decomposition import PCA\n", "\n", "\n", "def train_light_model(train_data_df, params, feature_columns, callbacks, evals,\n", " print_feature_importance=True, num_boost_round=100,\n", " validation_days=180, use_pca=False, split_date=None): # 新增参数:validation_days\n", " # 确保数据按时间排序\n", " train_data_df = train_data_df.sort_values(by='trade_date')\n", "\n", " numeric_columns = train_data_df.select_dtypes(include=['float64', 'int64']).columns\n", " numeric_columns = [col for col in numeric_columns if col in feature_columns]\n", " # X_train.loc[:, numeric_columns] = scaler.fit_transform(X_train[numeric_columns])\n", " # X_val.loc[:, numeric_columns] = scaler.transform(X_val[numeric_columns])\n", " # train_data_df = cross_sectional_standardization(train_data_df, numeric_columns)\n", "\n", " # 去除标签为空的样本\n", " train_data_df = train_data_df.dropna(subset=['label'])\n", " # print('原始训练集大小: ', len(train_data_df))\n", "\n", " # 按时间顺序划分训练集和验证集\n", " if split_date is None:\n", " all_dates = train_data_df['trade_date'].unique() # 获取所有唯一的 trade_date\n", " if validation_days == 0:\n", " split_date = all_dates[-1]\n", " else:\n", " split_date = all_dates[-validation_days] # 划分点为倒数第 validation_days 天\n", " if validation_days == 0:\n", " train_data_split = train_data_df\n", " else:\n", " train_data_split = train_data_df[train_data_df['trade_date'] < split_date] # 训练集\n", " val_data_split = train_data_df[train_data_df['trade_date'] >= split_date] # 验证集\n", "\n", " # 打印划分结果\n", " print(f\"划分后的训练集大小: {len(train_data_split)}, 验证集大小: {len(val_data_split)}\")\n", "\n", " # 提取特征和标签\n", " X_train = train_data_split[feature_columns]\n", " y_train = train_data_split['label']\n", "\n", " X_val = val_data_split[feature_columns]\n", " y_val = val_data_split['label']\n", "\n", " # 标准化数值特征\n", " scaler = StandardScaler()\n", "\n", " # 计算每个 trade_date 内的样本数(LTR 需要 group 信息)\n", " train_groups = train_data_split.groupby('trade_date').size().tolist()\n", " val_groups = val_data_split.groupby('trade_date').size().tolist()\n", "\n", " # 处理类别特征\n", " categorical_feature = [col for col in feature_columns if 'cat' in col]\n", "\n", " pca = None\n", " if use_pca:\n", " pca = PCA(n_components=0.95) # 或指定 n_components=固定值(如 10)\n", " numeric_features = [col for col in feature_columns if col not in categorical_feature]\n", " numeric_pca = pca.fit_transform(X_train[numeric_features])\n", " X_train = pd.concat([pd.DataFrame(numeric_pca, index=X_train.index), X_train[categorical_feature]], axis=1)\n", "\n", " numeric_pca = pca.transform(X_val[numeric_features])\n", " X_val = pd.concat([pd.DataFrame(numeric_pca, index=X_val.index), X_val[categorical_feature]], axis=1)\n", "\n", " # 计算权重(基于时间)\n", " # trade_date = train_data_split['trade_date'] # 交易日期\n", " # weights = (trade_date - trade_date.min()).dt.days / (trade_date.max() - trade_date.min()).days + 1\n", " # weights = train_data_split.groupby('trade_date')['std_return_5'].transform(\n", " # lambda x: x / x.mean()\n", " # )\n", " ud = sorted(train_data_split[\"trade_date\"].unique().tolist())\n", " date_weights = {date: weight * weight for date, weight in zip(ud, np.linspace(1, 10, len(ud)))}\n", " params['weight'] = train_data_split[\"trade_date\"].map(date_weights).tolist()\n", "\n", " train_dataset = lgb.Dataset(\n", " X_train, label=y_train, group=train_groups,\n", " categorical_feature=categorical_feature\n", " )\n", "\n", " if validation_days > 0:\n", " X_val = val_data_split[feature_columns]\n", " y_val = val_data_split['label']\n", " val_groups = val_data_split.groupby('trade_date').size().tolist()\n", " val_dataset = lgb.Dataset(\n", " X_val, label=y_val, group=val_groups,\n", " categorical_feature=categorical_feature\n", " )\n", " # 训练模型\n", " model = lgb.train(\n", " params, train_dataset, num_boost_round=num_boost_round,\n", " valid_sets=[train_dataset, val_dataset], valid_names=['train', 'valid'],\n", " callbacks=callbacks\n", " )\n", " else:\n", " model = lgb.train(\n", " params, train_dataset, num_boost_round=num_boost_round, callbacks=callbacks\n", " )\n", "\n", " # 打印特征重要性(如果需要)\n", " if print_feature_importance:\n", " lgb.plot_metric(evals)\n", " lgb.plot_importance(model, importance_type='split', max_num_features=20)\n", " plt.show()\n", "\n", " return model, scaler, pca" ], "outputs": [], "execution_count": 12 }, { "metadata": { "ExecuteTime": { "end_time": "2025-04-05T17:34:29.738328Z", "start_time": "2025-04-05T17:33:18.744104Z" } }, "cell_type": "code", "source": [ "\n", "days = 2\n", "df = df.sort_values(by=['ts_code', 'trade_date'])\n", "# df['future_return'] = df.groupby('ts_code', group_keys=False)['close'].apply(lambda x: x.shift(-days) / x - 1)\n", "df['future_return'] = (df.groupby('ts_code')['close'].shift(-days) - df.groupby('ts_code')['open'].shift(-1)) / \\\n", " df.groupby('ts_code')['open'].shift(-1)\n", "df['future_volatility'] = (\n", " df.groupby('ts_code')['pct_chg']\n", " .transform(lambda x: x.rolling(days).std().shift(-days))\n", ")\n", "df['future_score'] = calculate_score(df, days=2, lambda_param=0.3)\n", "df['label'] = df.groupby('trade_date', group_keys=False)['future_score'].transform(\n", " lambda x: pd.qcut(x, q=20, labels=False, duplicates='drop')\n", ")\n", "# df['label'] = df.groupby('trade_date', group_keys=False)['future_score'].transform(\n", "# lambda x: pd.qcut(x.rank(method='first'), q=20, labels=False, duplicates='raise')\n", "# )\n", "# df['future_score'] = (\n", "# 0.7 * df['future_return']\n", "# * 0.3 * df['future_volatility']\n", "# )" ], "id": "ff19e3f1e051a489", "outputs": [], "execution_count": 13 }, { "metadata": { "ExecuteTime": { "end_time": "2025-04-05T17:56:23.772145Z", "start_time": "2025-04-05T17:55:42.635301Z" } }, "cell_type": "code", "source": [ "def select_pre_zt_stocks_dynamic(stock_df):\n", " def select_stocks(group):\n", " data = group.nlargest(100, 'return_20')\n", " unique_labels = data['label'].nunique()\n", " if unique_labels >= 20 or unique_labels == 0:\n", " return data\n", " else:\n", " for i in range(100, 1000, 10):\n", " data = group.nlargest(i, 'return_20')\n", " unique_labels = data['label'].nunique()\n", " if unique_labels >= 20:\n", " return data\n", "\n", " stock_df = stock_df.groupby('trade_date', group_keys=False).apply(select_stocks)\n", " return stock_df\n", "\n", "\n", "pdf = select_pre_zt_stocks_dynamic(df)\n", "print(pdf['trade_date'].max())\n", "\n", "# pdf['label'] = pdf.groupby('trade_date', group_keys=False)['future_score'].transform(\n", "# lambda x: pd.qcut(x, q=20, labels=False, duplicates='drop')\n", "# )" ], "id": "27dba27b2e108316", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2025-03-28 00:00:00\n" ] } ], "execution_count": 55 }, { "metadata": { "ExecuteTime": { "end_time": "2025-04-05T17:56:24.793211Z", "start_time": "2025-04-05T17:56:23.826074Z" } }, "cell_type": "code", "source": [ "pdf = pdf.merge(industry_df, on=['cat_l2_code', 'trade_date'], how='left')\n", "pdf = pdf.sort_values(['trade_date'])\n", "pdf = pdf.replace([np.inf, -np.inf], np.nan)\n", "\n", "feature_columns = [col for col in pdf.columns if col in pdf.columns]\n", "feature_columns = [col for col in feature_columns if col not in ['trade_date',\n", " 'ts_code',\n", " 'label']]\n", "feature_columns = [col for col in feature_columns if 'future' not in col]\n", "feature_columns = [col for col in feature_columns if 'label' not in col]\n", "feature_columns = [col for col in feature_columns if 'score' not in col]\n", "feature_columns = [col for col in feature_columns if 'gen' not in col]\n", "feature_columns = [col for col in feature_columns if 'pe_ttm' not in col]\n", "feature_columns = [col for col in feature_columns if 'volatility' not in col]\n", "feature_columns = [col for col in feature_columns if 'cat_l2_code' not in col]\n", "feature_columns = [col for col in feature_columns if col not in origin_columns]\n", "feature_columns = [col for col in feature_columns if not col.startswith('_')]\n", "\n", "print(feature_columns)\n", "numeric_columns = pdf.select_dtypes(include=['float64', 'int64']).columns\n", "numeric_columns = [col for col in numeric_columns if col in feature_columns]\n", "\n", "filter_index = pdf['future_return'].between(pdf['future_return'].quantile(0.01), pdf['future_return'].quantile(0.99))\n", "\n", "# filter_index = pdf['future_volatility'].between(pdf['future_volatility'].quantile(0.01),\n", "# pdf['future_volatility'].quantile(0.99)) | filter_index" ], "id": "ca96fb81e17c4a90", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['vol', 'pct_chg', 'turnover_rate', 'volume_ratio', 'winner_rate', 'cat_is_positive', 'upside_vol', 'downside_vol', 'vol_ratio', 'return_skew', 'return_kurtosis', 'volume_change_rate', 'cat_volume_breakout', 'turnover_deviation', 'cat_turnover_spike', 'avg_volume_ratio', 'cat_volume_ratio_breakout', 'vol_spike', 'vol_std_5', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'rsi_3', 'return_5', 'return_20', 'std_return_5', 'std_return_90', 'std_return_90_2', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4', 'rank_act_factor1', 'rank_act_factor2', 'rank_act_factor3', 'log(circ_mv)', 'cov', 'delta_cov', 'alpha_22_improved', 'alpha_003', 'alpha_007', 'alpha_013', 'cat_up_limit', 'cat_down_limit', 'up_limit_count_10d', 'down_limit_count_10d', 'consecutive_up_limit', 'vol_break', 'weight_roc5', 'smallcap_concentration', 'cost_stability', 'high_cost_break_days', 'liquidity_risk', 'turnover_std', 'volume_growth', 'mv_growth', 'arbr', 'momentum_factor', 'resonance_factor', 'log_close', 'cat_vol_spike', 'up', 'down', 'obv-maobv_6', 'std_return_5 / std_return_90', 'std_return_90 - std_return_90_2', 'cat_af2', 'cat_af3', 'cat_af4', 'act_factor5', 'act_factor6', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'buy_lg_vol_minus_sell_lg_vol', 'buy_elg_vol_minus_sell_elg_vol', 'ctrl_strength', 'low_cost_dev', 'asymmetry', 'lock_factor', 'cat_vol_break', 'cost_atr_adj', 'cat_golden_resonance', 'mv_turnover_ratio', 'mv_adjusted_volume', 'mv_weighted_turnover', 'nonlinear_mv_volume', 'mv_volume_ratio', 'mv_momentum', 'industry_obv', 'industry_return_5', 'industry_return_20', 'industry__ema_5', 'industry__ema_13', 'industry__ema_20', 'industry__ema_60', 'industry_act_factor1', 'industry_act_factor2', 'industry_act_factor3', 'industry_act_factor4', 'industry_act_factor5', 'industry_act_factor6', 'industry_rank_act_factor1', 'industry_rank_act_factor2', 'industry_rank_act_factor3', 'industry_return_5_percentile', 'industry_return_20_percentile']\n" ] } ], "execution_count": 56 }, { "metadata": { "ExecuteTime": { "end_time": "2025-04-05T17:57:54.876778Z", "start_time": "2025-04-05T17:56:24.836264Z" } }, "cell_type": "code", "source": [ "pdf = quantile_filter(pdf, numeric_columns)\n", "\n", "pdf = cross_sectional_standardization(pdf, numeric_columns)\n", "\n", "feature_columns = remove_highly_correlated_features(pdf,\n", " feature_columns)\n", "print(len(pdf))" ], "id": "81d4570663ae21d7", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "253290\n" ] } ], "execution_count": 57 }, { "cell_type": "code", "id": "92428d543f4727ad", "metadata": { "ExecuteTime": { "end_time": "2025-04-05T17:57:55.077967Z", "start_time": "2025-04-05T17:57:54.949402Z" } }, "source": [ "# print('train data size: ', len(train_data))\n", "\n", "label_gain = list(range(len(df['label'].unique())))\n", "label_gain = [gain * gain for gain in label_gain]\n", "light_params = {\n", " 'label_gain': label_gain,\n", " 'objective': 'lambdarank',\n", " 'metric': 'ndcg',\n", " 'learning_rate': 0.03,\n", " 'num_leaves': 32,\n", " # 'min_data_in_leaf': 128,\n", " 'max_depth': 8,\n", " 'max_bin': 32,\n", " 'feature_fraction': 0.7,\n", " # 'bagging_fraction': 0.7,\n", " 'bagging_freq': 5,\n", " 'lambda_l1': 0.1,\n", " 'lambda_l2': 0.1,\n", " 'boosting': 'gbdt',\n", " 'verbosity': -1,\n", " 'extra_trees': True,\n", " 'max_position': 5,\n", " 'ndcg_at': 1,\n", " 'quant_train_renew_leaf': True,\n", " 'lambdarank_truncation_level': 3,\n", " # 'lambdarank_position_bias_regularization': 1,\n", " 'seed': 7\n", "}\n", "evals = {}\n", "\n", "gc.collect()" ], "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 58, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 58 }, { "cell_type": "code", "id": "8f134d435f71e9e2", "metadata": { "ExecuteTime": { "end_time": "2025-04-05T17:57:55.223213Z", "start_time": "2025-04-05T17:57:55.122064Z" } }, "source": [ "gc.collect()\n", "\n", "\n", "def rolling_train_predict(df, train_days, test_days, feature_columns_origin, days=5, use_pca=False, validation_days=60,\n", " filter_index=None):\n", " # 1. 按照交易日期排序\n", " unique_dates = df[df['trade_date'] >= '2020-01-01']['trade_date'].unique().tolist()\n", " unique_dates = sorted(unique_dates)\n", " n = len(unique_dates)\n", "\n", " # 2. 计算需要跳过的天数,使后续窗口对齐\n", " extra_days = (n - train_days) % test_days\n", " start_index = extra_days # 从此索引开始滚动\n", "\n", " predictions_list = []\n", "\n", " for start in range(start_index, n - train_days - test_days + 1, test_days):\n", "\n", " train_dates = unique_dates[start: start + train_days]\n", " test_dates = unique_dates[start + train_days: start + train_days + test_days]\n", "\n", " # 根据日期筛选数据\n", " train_data = df[filter_index & df['trade_date'].isin(train_dates)]\n", " test_data = df[df['trade_date'].isin(test_dates)]\n", "\n", " train_data = train_data.sort_values('trade_date')\n", " test_data = test_data.sort_values('trade_date')\n", "\n", " feature_columns, _ = remove_shifted_features(train_data, feature_columns_origin, size=0.8, log=False)\n", "\n", " train_data = train_data.dropna(subset=feature_columns)\n", " train_data = train_data.dropna(subset=['label'])\n", " train_data = train_data.reset_index(drop=True)\n", "\n", " # print(test_data.tail())\n", " test_data = test_data.dropna(subset=feature_columns)\n", " # test_data = test_data.dropna(subset=['label'])\n", " test_data = test_data.reset_index(drop=True)\n", "\n", " # print(len(train_data))\n", " # print(f\"最小日期: {train_data['trade_date'].min().strftime('%Y-%m-%d')}\")\n", " # print(f\"最大日期: {train_data['trade_date'].max().strftime('%Y-%m-%d')}\")\n", " # # print(len(test_data))\n", " # print(f\"最小日期: {test_data['trade_date'].min().strftime('%Y-%m-%d')}\")\n", " print(f\"最大日期: {test_data['trade_date'].max().strftime('%Y-%m-%d')}\")\n", "\n", " cat_columns = [col for col in df.columns if col.startswith('cat')]\n", " for col in cat_columns:\n", " train_data[col] = train_data[col].astype('category')\n", " test_data[col] = test_data[col].astype('category')\n", "\n", " label_gain = list(range(len(train_data['label'].unique())))\n", " label_gain = [(gain + 1) * (gain + 1) for gain in label_gain]\n", " light_params['label_gain'] = label_gain\n", "\n", " # ud = train_data[\"trade_date\"].unique()\n", " # date_weights = {date: weight for date, weight in zip(ud, np.linspace(1, 2, len(unique_dates)))}\n", " # light_params['weight'] = train_data[\"trade_date\"].map(date_weights).tolist()\n", "\n", " # print(f'feature_columns: {feature_columns}')\n", " # feature_contri = [2 if feat.startswith('act_factor') else 1 for feat in feature_columns]\n", " # light_params['feature_contri'] = feature_contri\n", " model, _, _ = train_light_model(train_data.dropna(subset=['label']),\n", " light_params, feature_columns,\n", " [lgb.log_evaluation(period=100),\n", " lgb.callback.record_evaluation(evals),\n", " # lgb.early_stopping(100, first_metric_only=True)\n", " ], evals,\n", " num_boost_round=100, validation_days=validation_days,\n", " print_feature_importance=False, use_pca=False)\n", "\n", " score_df = test_data.copy()\n", " score_df['score'] = model.predict(score_df[feature_columns])\n", " score_df = score_df.loc[score_df.groupby('trade_date')['score'].idxmax()]\n", " score_df = score_df[['trade_date', 'score', 'ts_code']]\n", " predictions_list.append(score_df)\n", "\n", " final_predictions = pd.concat(predictions_list, ignore_index=True)\n", " return final_predictions\n", "\n", "\n", "from concurrent.futures import ThreadPoolExecutor\n", "\n", "\n", "def worker(train_data, test_data, train_days, test_days, feature_columns_origin, unique_dates, start, filter_index,\n", " validation_days):\n", " train_data = train_data.sort_values('trade_date')\n", " test_data = test_data.sort_values('trade_date')\n", "\n", " feature_columns, _ = remove_shifted_features(train_data, feature_columns_origin, size=0.8, log=False)\n", "\n", " train_data = train_data.dropna(subset=feature_columns + ['label'])\n", " train_data = train_data.reset_index(drop=True)\n", "\n", " test_data = test_data.dropna(subset=feature_columns)\n", " test_data = test_data.reset_index(drop=True)\n", "\n", " cat_columns = [col for col in df.columns if col.startswith('cat')]\n", " for col in cat_columns:\n", " if col in train_data.columns:\n", " train_data[col] = train_data[col].astype('category')\n", " if col in test_data.columns:\n", " test_data[col] = test_data[col].astype('category')\n", "\n", " label_gain = list(range(len(train_data['label'].unique())))\n", " label_gain = [(gain + 1) * (gain + 1) for gain in label_gain]\n", " current_light_params = light_params.copy()\n", " current_light_params['label_gain'] = label_gain\n", "\n", " model, _, _ = train_light_model(train_data.dropna(subset=['label']),\n", " light_params, feature_columns,\n", " [lgb.log_evaluation(period=100),\n", " lgb.callback.record_evaluation(evals),\n", " # lgb.early_stopping(100, first_metric_only=True)\n", " ], evals,\n", " num_boost_round=100, validation_days=validation_days,\n", " print_feature_importance=False, use_pca=False)\n", "\n", " score_df = test_data.copy()\n", " score_df['score'] = model.predict(score_df[feature_columns])\n", " if not score_df.empty:\n", " score_df = score_df.loc[score_df.groupby('trade_date')['score'].idxmax()]\n", " score_df = score_df[['trade_date', 'score', 'ts_code']]\n", " return score_df\n", " return None\n", "\n", "\n", "def rolling_train_predict_mt(df, train_days, test_days, feature_columns_origin, days=5, use_pca=False,\n", " validation_days=60, filter_index=None, num_threads=4):\n", " unique_dates = df['trade_date'].unique().tolist()\n", " unique_dates = sorted(unique_dates)\n", " n = len(unique_dates)\n", " extra_days = (n - train_days) % test_days\n", " start_index = extra_days\n", "\n", " predictions_list = []\n", " with ThreadPoolExecutor(max_workers=num_threads) as executor:\n", " futures = []\n", " for start in range(start_index, n - train_days - test_days + 1, test_days):\n", " train_dates = unique_dates[start: start + train_days]\n", " test_dates = unique_dates[start + train_days: start + train_days + test_days]\n", "\n", " train_data = df[filter_index & df['trade_date'].isin(train_dates)].copy()\n", " test_data = df[df['trade_date'].isin(test_dates)].copy()\n", " future = executor.submit(worker, train_data, test_data, train_days, test_days, feature_columns_origin,\n", " unique_dates, start, filter_index, validation_days)\n", " futures.append(future)\n", "\n", " for future in futures:\n", " result = future.result()\n", " if result is not None:\n", " predictions_list.append(result)\n", "\n", " final_predictions = pd.concat(predictions_list, ignore_index=True)\n", " return final_predictions\n" ], "outputs": [], "execution_count": 59 }, { "cell_type": "code", "id": "63235069-dc59-48fb-961a-e80373e41a61", "metadata": { "editable": true, "scrolled": true, "slideshow": { "slide_type": "" }, "tags": [], "ExecuteTime": { "end_time": "2025-04-05T18:07:51.160629Z", "start_time": "2025-04-05T18:04:17.429621Z" } }, "source": [ "\n", "gc.collect()\n", "\n", "print(df[df['ts_code'] == '000001.SZ'].tail(1)[['act_factor1', 'act_factor2']])\n", "print('finish')\n", "# qdf = qdf[qdf['trade_date'] >= '2022-01-01']\n", "\n", "final_predictions = rolling_train_predict(pdf[(pdf['trade_date'] >= '2020-01-01') & (pdf['trade_date'] <= '2025-03-26')], 5, 1, feature_columns,\n", " days=days, validation_days=0, filter_index=filter_index)\n", "final_predictions.to_csv('predictions_test.tsv', index=False)\n" ], "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " act_factor1 act_factor2\n", "5115129 -0.222482 -0.256946\n", "finish\n", "最大日期: 2020-01-09\n", "划分后的训练集大小: 636, 验证集大小: 167\n", "最大日期: 2020-01-10\n", "划分后的训练集大小: 648, 验证集大小: 132\n", "最大日期: 2020-01-13\n", "划分后的训练集大小: 702, 验证集大小: 145\n", "最大日期: 2020-01-14\n", "划分后的训练集大小: 708, 验证集大小: 145\n", "最大日期: 2020-01-15\n", "划分后的训练集大小: 793, 验证集大小: 204\n", "最大日期: 2020-01-16\n", "划分后的训练集大小: 816, 验证集大小: 190\n", "最大日期: 2020-01-17\n", "划分后的训练集大小: 774, 验证集大小: 90\n", "最大日期: 2020-01-20\n", "划分后的训练集大小: 718, 验证集大小: 89\n", "最大日期: 2020-01-21\n", "划分后的训练集大小: 783, 验证集大小: 210\n", "最大日期: 2020-01-22\n", "划分后的训练集大小: 684, 验证集大小: 105\n", "最大日期: 2020-01-23\n", "划分后的训练集大小: 578, 验证集大小: 84\n", "最大日期: 2020-02-03\n", "划分后的训练集大小: 829, 验证集大小: 341\n", "最大日期: 2020-02-04\n", "划分后的训练集大小: 887, 验证集大小: 147\n", "最大日期: 2020-02-05\n", "划分后的训练集大小: 779, 验证集大小: 102\n", "最大日期: 2020-02-06\n", "划分后的训练集大小: 827, 验证集大小: 153\n", "最大日期: 2020-02-07\n", "划分后的训练集大小: 942, 验证集大小: 199\n", "最大日期: 2020-02-10\n", "划分后的训练集大小: 747, 验证集大小: 146\n", "最大日期: 2020-02-11\n", "划分后的训练集大小: 692, 验证集大小: 92\n", "最大日期: 2020-02-12\n", "划分后的训练集大小: 681, 验证集大小: 91\n", "最大日期: 2020-02-13\n", "划分后的训练集大小: 706, 验证集大小: 178\n", "最大日期: 2020-02-14\n", "划分后的训练集大小: 629, 验证集大小: 122\n", "最大日期: 2020-02-17\n", "划分后的训练集大小: 587, 验证集大小: 104\n", "最大日期: 2020-02-18\n", "划分后的训练集大小: 622, 验证集大小: 127\n", "最大日期: 2020-02-19\n", "划分后的训练集大小: 711, 验证集大小: 180\n", "最大日期: 2020-02-20\n", "划分后的训练集大小: 621, 验证集大小: 88\n", "最大日期: 2020-02-21\n", "划分后的训练集大小: 671, 验证集大小: 172\n", "最大日期: 2020-02-24\n", "划分后的训练集大小: 831, 验证集大小: 264\n", "最大日期: 2020-02-25\n", "划分后的训练集大小: 896, 验证集大小: 192\n", "最大日期: 2020-02-26\n", "划分后的训练集大小: 865, 验证集大小: 149\n", "最大日期: 2020-02-27\n", "划分后的训练集大小: 901, 验证集大小: 124\n", "最大日期: 2020-02-28\n", "划分后的训练集大小: 861, 验证集大小: 132\n", "最大日期: 2020-03-02\n", "划分后的训练集大小: 687, 验证集大小: 90\n", "最大日期: 2020-03-03\n", "划分后的训练集大小: 599, 验证集大小: 104\n", "最大日期: 2020-03-04\n", "划分后的训练集大小: 544, 验证集大小: 94\n", "最大日期: 2020-03-05\n", "划分后的训练集大小: 574, 验证集大小: 154\n", "最大日期: 2020-03-06\n", "划分后的训练集大小: 544, 验证集大小: 102\n", "最大日期: 2020-03-09\n", "划分后的训练集大小: 533, 验证集大小: 79\n", "最大日期: 2020-03-10\n", "划分后的训练集大小: 519, 验证集大小: 90\n", "最大日期: 2020-03-11\n", "划分后的训练集大小: 570, 验证集大小: 145\n", "最大日期: 2020-03-12\n", "划分后的训练集大小: 578, 验证集大小: 162\n", "最大日期: 2020-03-13\n", "划分后的训练集大小: 607, 验证集大小: 131\n", "最大日期: 2020-03-16\n", "划分后的训练集大小: 672, 验证集大小: 144\n", "最大日期: 2020-03-17\n", "划分后的训练集大小: 673, 验证集大小: 91\n", "最大日期: 2020-03-18\n", "划分后的训练集大小: 613, 验证集大小: 85\n", "最大日期: 2020-03-19\n", "划分后的训练集大小: 574, 验证集大小: 123\n", "最大日期: 2020-03-20\n", "划分后的训练集大小: 587, 验证集大小: 144\n", "最大日期: 2020-03-23\n", "划分后的训练集大小: 544, 验证集大小: 101\n", "最大日期: 2020-03-24\n", "划分后的训练集大小: 557, 验证集大小: 104\n", "最大日期: 2020-03-25\n", "划分后的训练集大小: 602, 验证集大小: 130\n", "最大日期: 2020-03-26\n", "划分后的训练集大小: 665, 验证集大小: 186\n", "最大日期: 2020-03-27\n", "划分后的训练集大小: 603, 验证集大小: 82\n", "最大日期: 2020-03-30\n", "划分后的训练集大小: 595, 验证集大小: 93\n", "最大日期: 2020-03-31\n", "划分后的训练集大小: 589, 验证集大小: 98\n", "最大日期: 2020-04-01\n", "划分后的训练集大小: 642, 验证集大小: 183\n", "最大日期: 2020-04-02\n", "划分后的训练集大小: 553, 验证集大小: 97\n", "最大日期: 2020-04-03\n", "划分后的训练集大小: 603, 验证集大小: 132\n", "最大日期: 2020-04-07\n", "划分后的训练集大小: 604, 验证集大小: 94\n", "最大日期: 2020-04-08\n", "划分后的训练集大小: 611, 验证集大小: 105\n", "最大日期: 2020-04-09\n", "划分后的训练集大小: 621, 验证集大小: 193\n", "最大日期: 2020-04-10\n", "划分后的训练集大小: 642, 验证集大小: 118\n", "最大日期: 2020-04-13\n", "划分后的训练集大小: 667, 验证集大小: 157\n", "最大日期: 2020-04-14\n", "划分后的训练集大小: 759, 验证集大小: 186\n", "最大日期: 2020-04-15\n", "划分后的训练集大小: 784, 验证集大小: 130\n", "最大日期: 2020-04-16\n", "划分后的训练集大小: 744, 验证集大小: 153\n", "最大日期: 2020-04-17\n", "划分后的训练集大小: 723, 验证集大小: 97\n", "最大日期: 2020-04-20\n", "划分后的训练集大小: 683, 验证集大小: 117\n", "最大日期: 2020-04-21\n", "划分后的训练集大小: 585, 验证集大小: 88\n", "最大日期: 2020-04-22\n", "划分后的训练集大小: 538, 验证集大小: 83\n", "最大日期: 2020-04-23\n", "划分后的训练集大小: 490, 验证集大小: 105\n", "最大日期: 2020-04-24\n", "划分后的训练集大小: 490, 验证集大小: 97\n", "最大日期: 2020-04-27\n", "划分后的训练集大小: 458, 验证集大小: 85\n", "最大日期: 2020-04-28\n", "划分后的训练集大小: 475, 验证集大小: 105\n", "最大日期: 2020-04-29\n", "划分后的训练集大小: 525, 验证集大小: 133\n", "最大日期: 2020-04-30\n", "划分后的训练集大小: 584, 验证集大小: 164\n", "最大日期: 2020-05-06\n", "划分后的训练集大小: 577, 验证集大小: 90\n", "最大日期: 2020-05-07\n", "划分后的训练集大小: 576, 验证集大小: 84\n", "最大日期: 2020-05-08\n", "划分后的训练集大小: 562, 验证集大小: 91\n", "最大日期: 2020-05-11\n", "划分后的训练集大小: 551, 验证集大小: 122\n", "最大日期: 2020-05-12\n", "划分后的训练集大小: 494, 验证集大小: 107\n", "最大日期: 2020-05-13\n", "划分后的训练集大小: 613, 验证集大小: 209\n", "最大日期: 2020-05-14\n", "划分后的训练集大小: 648, 验证集大小: 119\n", "最大日期: 2020-05-15\n", "划分后的训练集大小: 784, 验证集大小: 227\n", "最大日期: 2020-05-18\n", "划分后的训练集大小: 841, 验证集大小: 179\n", "最大日期: 2020-05-19\n", "划分后的训练集大小: 820, 验证集大小: 86\n", "最大日期: 2020-05-20\n", "划分后的训练集大小: 764, 验证集大小: 153\n", "最大日期: 2020-05-21\n", "划分后的训练集大小: 727, 验证集大小: 82\n", "最大日期: 2020-05-22\n", "划分后的训练集大小: 584, 验证集大小: 84\n", "最大日期: 2020-05-25\n", "划分后的训练集大小: 489, 验证集大小: 84\n", "最大日期: 2020-05-26\n", "划分后的训练集大小: 571, 验证集大小: 168\n", "最大日期: 2020-05-27\n", "划分后的训练集大小: 509, 验证集大小: 91\n", "最大日期: 2020-05-28\n", "划分后的训练集大小: 577, 验证集大小: 150\n", "最大日期: 2020-05-29\n", "划分后的训练集大小: 678, 验证集大小: 185\n", "最大日期: 2020-06-01\n", "划分后的训练集大小: 791, 验证集大小: 197\n", "最大日期: 2020-06-02\n", "划分后的训练集大小: 719, 验证集大小: 96\n", "最大日期: 2020-06-03\n", "划分后的训练集大小: 760, 验证集大小: 132\n", "最大日期: 2020-06-04\n", "划分后的训练集大小: 745, 验证集大小: 135\n", "最大日期: 2020-06-05\n", "划分后的训练集大小: 741, 验证集大小: 181\n", "最大日期: 2020-06-08\n", "划分后的训练集大小: 627, 验证集大小: 83\n", "最大日期: 2020-06-09\n", "划分后的训练集大小: 613, 验证集大小: 82\n", "最大日期: 2020-06-10\n", "划分后的训练集大小: 627, 验证集大小: 146\n", "最大日期: 2020-06-11\n", "划分后的训练集大小: 585, 验证集大小: 93\n", "最大日期: 2020-06-12\n", "划分后的训练集大小: 589, 验证集大小: 185\n", "最大日期: 2020-06-15\n", "划分后的训练集大小: 634, 验证集大小: 128\n", "最大日期: 2020-06-16\n", "划分后的训练集大小: 638, 验证集大小: 86\n", "最大日期: 2020-06-17\n", "划分后的训练集大小: 796, 验证集大小: 304\n", "最大日期: 2020-06-18\n", "划分后的训练集大小: 842, 验证集大小: 139\n", "最大日期: 2020-06-19\n", "划分后的训练集大小: 751, 验证集大小: 94\n", "最大日期: 2020-06-22\n", "划分后的训练集大小: 714, 验证集大小: 91\n", "最大日期: 2020-06-23\n", "划分后的训练集大小: 715, 验证集大小: 87\n", "最大日期: 2020-06-24\n", "划分后的训练集大小: 519, 验证集大小: 108\n", "最大日期: 2020-06-29\n", "划分后的训练集大小: 552, 验证集大小: 172\n", "最大日期: 2020-06-30\n", "划分后的训练集大小: 606, 验证集大小: 148\n", "最大日期: 2020-07-01\n", "划分后的训练集大小: 610, 验证集大小: 95\n", "最大日期: 2020-07-02\n", "划分后的训练集大小: 646, 验证集大小: 123\n", "最大日期: 2020-07-03\n", "划分后的训练集大小: 657, 验证集大小: 119\n", "最大日期: 2020-07-06\n", "划分后的训练集大小: 690, 验证集大小: 205\n", "最大日期: 2020-07-07\n", "划分后的训练集大小: 620, 验证集大小: 78\n", "最大日期: 2020-07-08\n", "划分后的训练集大小: 620, 验证集大小: 95\n", "最大日期: 2020-07-09\n", "划分后的训练集大小: 575, 验证集大小: 78\n", "最大日期: 2020-07-10\n", "划分后的训练集大小: 531, 验证集大小: 75\n", "最大日期: 2020-07-13\n", "划分后的训练集大小: 472, 验证集大小: 146\n", "最大日期: 2020-07-14\n", "划分后的训练集大小: 492, 验证集大小: 98\n", "最大日期: 2020-07-15\n", "划分后的训练集大小: 508, 验证集大小: 111\n", "最大日期: 2020-07-16\n", "划分后的训练集大小: 503, 验证集大小: 73\n", "最大日期: 2020-07-17\n", "划分后的训练集大小: 618, 验证集大小: 190\n", "最大日期: 2020-07-20\n", "划分后的训练集大小: 679, 验证集大小: 207\n", "最大日期: 2020-07-21\n", "划分后的训练集大小: 738, 验证集大小: 157\n", "最大日期: 2020-07-22\n", "划分后的训练集大小: 741, 验证集大小: 114\n", "最大日期: 2020-07-23\n", "划分后的训练集大小: 870, 验证集大小: 202\n", "最大日期: 2020-07-24\n", "划分后的训练集大小: 765, 验证集大小: 85\n", "最大日期: 2020-07-27\n", "划分后的训练集大小: 647, 验证集大小: 89\n", "最大日期: 2020-07-28\n", "划分后的训练集大小: 651, 验证集大小: 161\n", "最大日期: 2020-07-29\n", "划分后的训练集大小: 643, 验证集大小: 106\n", "最大日期: 2020-07-30\n", "划分后的训练集大小: 566, 验证集大小: 125\n", "最大日期: 2020-07-31\n", "划分后的训练集大小: 614, 验证集大小: 133\n", "最大日期: 2020-08-03\n", "划分后的训练集大小: 705, 验证集大小: 180\n", "最大日期: 2020-08-04\n", "划分后的训练集大小: 626, 验证集大小: 82\n", "最大日期: 2020-08-05\n", "划分后的训练集大小: 628, 验证集大小: 108\n", "最大日期: 2020-08-06\n", "划分后的训练集大小: 585, 验证集大小: 82\n", "最大日期: 2020-08-07\n", "划分后的训练集大小: 584, 验证集大小: 132\n", "最大日期: 2020-08-10\n", "划分后的训练集大小: 564, 验证集大小: 160\n", "最大日期: 2020-08-11\n", "划分后的训练集大小: 676, 验证集大小: 194\n", "最大日期: 2020-08-12\n", "划分后的训练集大小: 767, 验证集大小: 199\n", "最大日期: 2020-08-13\n", "划分后的训练集大小: 932, 验证集大小: 247\n", "最大日期: 2020-08-14\n", "划分后的训练集大小: 924, 验证集大小: 124\n", "最大日期: 2020-08-17\n", "划分后的训练集大小: 863, 验证集大小: 99\n", "最大日期: 2020-08-18\n", "划分后的训练集大小: 812, 验证集大小: 143\n", "最大日期: 2020-08-19\n", "划分后的训练集大小: 727, 验证集大小: 114\n", "最大日期: 2020-08-20\n", "划分后的训练集大小: 672, 验证集大小: 192\n", "最大日期: 2020-08-21\n", "划分后的训练集大小: 716, 验证集大小: 168\n", "最大日期: 2020-08-24\n", "划分后的训练集大小: 738, 验证集大小: 121\n", "最大日期: 2020-08-25\n", "划分后的训练集大小: 708, 验证集大小: 113\n", "最大日期: 2020-08-26\n", "划分后的训练集大小: 719, 验证集大小: 125\n", "最大日期: 2020-08-27\n", "划分后的训练集大小: 653, 验证集大小: 126\n", "最大日期: 2020-08-28\n", "划分后的训练集大小: 606, 验证集大小: 121\n", "最大日期: 2020-08-31\n", "划分后的训练集大小: 595, 验证集大小: 110\n", "最大日期: 2020-09-01\n", "划分后的训练集大小: 704, 验证集大小: 222\n", "最大日期: 2020-09-02\n", "划分后的训练集大小: 664, 验证集大小: 85\n", "最大日期: 2020-09-03\n", "划分后的训练集大小: 638, 验证集大小: 100\n", "最大日期: 2020-09-04\n", "划分后的训练集大小: 687, 验证集大小: 170\n", "最大日期: 2020-09-07\n", "划分后的训练集大小: 805, 验证集大小: 228\n", "最大日期: 2020-09-08\n", "划分后的训练集大小: 677, 验证集大小: 94\n", "最大日期: 2020-09-09\n", "划分后的训练集大小: 781, 验证集大小: 189\n", "最大日期: 2020-09-10\n", "划分后的训练集大小: 772, 验证集大小: 91\n", "最大日期: 2020-09-11\n", "划分后的训练集大小: 691, 验证集大小: 89\n", "最大日期: 2020-09-14\n", "划分后的训练集大小: 556, 验证集大小: 93\n", "最大日期: 2020-09-15\n", "划分后的训练集大小: 552, 验证集大小: 90\n", "最大日期: 2020-09-16\n", "划分后的训练集大小: 514, 验证集大小: 151\n", "最大日期: 2020-09-17\n", "划分后的训练集大小: 509, 验证集大小: 86\n", "最大日期: 2020-09-18\n", "划分后的训练集大小: 545, 验证集大小: 125\n", "最大日期: 2020-09-21\n", "划分后的训练集大小: 540, 验证集大小: 88\n", "最大日期: 2020-09-22\n", "划分后的训练集大小: 536, 验证集大小: 86\n", "最大日期: 2020-09-23\n", "划分后的训练集大小: 466, 验证集大小: 81\n", "最大日期: 2020-09-24\n", "划分后的训练集大小: 460, 验证集大小: 80\n", "最大日期: 2020-09-25\n", "划分后的训练集大小: 453, 验证集大小: 118\n", "最大日期: 2020-09-28\n", "划分后的训练集大小: 519, 验证集大小: 154\n", "最大日期: 2020-09-29\n", "划分后的训练集大小: 558, 验证集大小: 125\n", "最大日期: 2020-09-30\n", "划分后的训练集大小: 598, 验证集大小: 121\n", "最大日期: 2020-10-09\n", "划分后的训练集大小: 675, 验证集大小: 157\n", "最大日期: 2020-10-12\n", "划分后的训练集大小: 669, 验证集大小: 112\n", "最大日期: 2020-10-13\n", "划分后的训练集大小: 738, 验证集大小: 223\n", "最大日期: 2020-10-14\n", "划分后的训练集大小: 698, 验证集大小: 85\n", "最大日期: 2020-10-15\n", "划分后的训练集大小: 668, 验证集大小: 91\n", "最大日期: 2020-10-16\n", "划分后的训练集大小: 654, 验证集大小: 143\n", "最大日期: 2020-10-19\n", "划分后的训练集大小: 635, 验证集大小: 93\n", "最大日期: 2020-10-20\n", "划分后的训练集大小: 515, 验证集大小: 103\n", "最大日期: 2020-10-21\n", "划分后的训练集大小: 574, 验证集大小: 144\n", "最大日期: 2020-10-22\n", "划分后的训练集大小: 621, 验证集大小: 138\n", "最大日期: 2020-10-23\n", "划分后的训练集大小: 567, 验证集大小: 89\n", "最大日期: 2020-10-26\n", "划分后的训练集大小: 560, 验证集大小: 86\n", "最大日期: 2020-10-27\n", "划分后的训练集大小: 572, 验证集大小: 115\n", "最大日期: 2020-10-28\n", "划分后的训练集大小: 533, 验证集大小: 105\n", "最大日期: 2020-10-29\n", "划分后的训练集大小: 482, 验证集大小: 87\n", "最大日期: 2020-10-30\n", "划分后的训练集大小: 480, 验证集大小: 87\n", "最大日期: 2020-11-02\n", "划分后的训练集大小: 485, 验证集大小: 91\n", "最大日期: 2020-11-03\n", "划分后的训练集大小: 455, 验证集大小: 85\n", "最大日期: 2020-11-04\n", "划分后的训练集大小: 449, 验证集大小: 99\n", "最大日期: 2020-11-05\n", "划分后的训练集大小: 488, 验证集大小: 126\n", "最大日期: 2020-11-06\n", "划分后的训练集大小: 624, 验证集大小: 223\n", "最大日期: 2020-11-09\n", "划分后的训练集大小: 672, 验证集大小: 139\n", "最大日期: 2020-11-10\n", "划分后的训练集大小: 760, 验证集大小: 173\n", "最大日期: 2020-11-11\n", "划分后的训练集大小: 767, 验证集大小: 106\n", "最大日期: 2020-11-12\n", "划分后的训练集大小: 736, 验证集大小: 95\n", "最大日期: 2020-11-13\n", "划分后的训练集大小: 690, 验证集大小: 177\n", "最大日期: 2020-11-16\n", "划分后的训练集大小: 639, 验证集大小: 88\n", "最大日期: 2020-11-17\n", "划分后的训练集大小: 685, 验证集大小: 219\n", "最大日期: 2020-11-18\n", "划分后的训练集大小: 688, 验证集大小: 109\n", "最大日期: 2020-11-19\n", "划分后的训练集大小: 703, 验证集大小: 110\n", "最大日期: 2020-11-20\n", "划分后的训练集大小: 612, 验证集大小: 86\n", "最大日期: 2020-11-23\n", "划分后的训练集大小: 613, 验证集大小: 89\n", "最大日期: 2020-11-24\n", "划分后的训练集大小: 551, 验证集大小: 157\n", "最大日期: 2020-11-25\n", "划分后的训练集大小: 532, 验证集大小: 90\n", "最大日期: 2020-11-26\n", "划分后的训练集大小: 511, 验证集大小: 89\n", "最大日期: 2020-11-27\n", "划分后的训练集大小: 552, 验证集大小: 127\n", "最大日期: 2020-11-30\n", "划分后的训练集大小: 569, 验证集大小: 106\n", "最大日期: 2020-12-01\n", "划分后的训练集大小: 493, 验证集大小: 81\n", "最大日期: 2020-12-02\n", "划分后的训练集大小: 544, 验证集大小: 141\n", "最大日期: 2020-12-03\n", "划分后的训练集大小: 535, 验证集大小: 80\n", "最大日期: 2020-12-04\n", "划分后的训练集大小: 486, 验证集大小: 78\n", "最大日期: 2020-12-07\n", "划分后的训练集大小: 481, 验证集大小: 101\n", "最大日期: 2020-12-08\n", "划分后的训练集大小: 484, 验证集大小: 84\n", "最大日期: 2020-12-09\n", "划分后的训练集大小: 429, 验证集大小: 86\n", "最大日期: 2020-12-10\n", "划分后的训练集大小: 438, 验证集大小: 89\n", "最大日期: 2020-12-11\n", "划分后的训练集大小: 487, 验证集大小: 127\n", "最大日期: 2020-12-14\n", "划分后的训练集大小: 480, 验证集大小: 94\n", "最大日期: 2020-12-15\n", "划分后的训练集大小: 481, 验证集大小: 85\n", "最大日期: 2020-12-16\n", "划分后的训练集大小: 480, 验证集大小: 85\n", "最大日期: 2020-12-17\n", "划分后的训练集大小: 477, 验证集大小: 86\n", "最大日期: 2020-12-18\n", "划分后的训练集大小: 535, 验证集大小: 185\n", "最大日期: 2020-12-21\n", "划分后的训练集大小: 562, 验证集大小: 121\n", "最大日期: 2020-12-22\n", "划分后的训练集大小: 605, 验证集大小: 128\n", "最大日期: 2020-12-23\n", "划分后的训练集大小: 666, 验证集大小: 146\n", "最大日期: 2020-12-24\n", "划分后的训练集大小: 664, 验证集大小: 84\n", "最大日期: 2020-12-25\n", "划分后的训练集大小: 624, 验证集大小: 145\n", "最大日期: 2020-12-28\n", "划分后的训练集大小: 653, 验证集大小: 150\n", "最大日期: 2020-12-29\n", "划分后的训练集大小: 633, 验证集大小: 108\n", "最大日期: 2020-12-30\n", "划分后的训练集大小: 605, 验证集大小: 118\n", "最大日期: 2020-12-31\n", "划分后的训练集大小: 647, 验证集大小: 126\n", "最大日期: 2021-01-04\n", "划分后的训练集大小: 612, 验证集大小: 110\n", "最大日期: 2021-01-05\n", "划分后的训练集大小: 584, 验证集大小: 122\n", "最大日期: 2021-01-06\n", "划分后的训练集大小: 597, 验证集大小: 121\n", "最大日期: 2021-01-07\n", "划分后的训练集大小: 589, 验证集大小: 110\n", "最大日期: 2021-01-08\n", "划分后的训练集大小: 598, 验证集大小: 135\n", "最大日期: 2021-01-11\n", "划分后的训练集大小: 614, 验证集大小: 126\n", "最大日期: 2021-01-12\n", "划分后的训练集大小: 595, 验证集大小: 103\n", "最大日期: 2021-01-13\n", "划分后的训练集大小: 719, 验证集大小: 245\n", "最大日期: 2021-01-14\n", "划分后的训练集大小: 917, 验证集大小: 308\n", "最大日期: 2021-01-15\n", "划分后的训练集大小: 899, 验证集大小: 117\n", "最大日期: 2021-01-18\n", "划分后的训练集大小: 955, 验证集大小: 182\n", "最大日期: 2021-01-19\n", "划分后的训练集大小: 944, 验证集大小: 92\n", "最大日期: 2021-01-20\n", "划分后的训练集大小: 890, 验证集大小: 191\n", "最大日期: 2021-01-21\n", "划分后的训练集大小: 673, 验证集大小: 91\n", "最大日期: 2021-01-22\n", "划分后的训练集大小: 643, 验证集大小: 87\n", "最大日期: 2021-01-25\n", "划分后的训练集大小: 593, 验证集大小: 132\n", "最大日期: 2021-01-26\n", "划分后的训练集大小: 615, 验证集大小: 114\n", "最大日期: 2021-01-27\n", "划分后的训练集大小: 589, 验证集大小: 165\n", "最大日期: 2021-01-28\n", "划分后的训练集大小: 595, 验证集大小: 97\n", "最大日期: 2021-01-29\n", "划分后的训练集大小: 671, 验证集大小: 163\n", "最大日期: 2021-02-01\n", "划分后的训练集大小: 630, 验证集大小: 91\n", "最大日期: 2021-02-02\n", "划分后的训练集大小: 604, 验证集大小: 88\n", "最大日期: 2021-02-03\n", "划分后的训练集大小: 545, 验证集大小: 106\n", "最大日期: 2021-02-04\n", "划分后的训练集大小: 530, 验证集大小: 82\n", "最大日期: 2021-02-05\n", "划分后的训练集大小: 459, 验证集大小: 92\n", "最大日期: 2021-02-08\n", "划分后的训练集大小: 460, 验证集大小: 92\n", "最大日期: 2021-02-09\n", "划分后的训练集大小: 475, 验证集大小: 103\n", "最大日期: 2021-02-10\n", "划分后的训练集大小: 454, 验证集大小: 85\n", "最大日期: 2021-02-18\n", "划分后的训练集大小: 541, 验证集大小: 169\n", "最大日期: 2021-02-19\n", "划分后的训练集大小: 552, 验证集大小: 103\n", "最大日期: 2021-02-22\n", "划分后的训练集大小: 580, 验证集大小: 120\n", "最大日期: 2021-02-23\n", "划分后的训练集大小: 586, 验证集大小: 109\n", "最大日期: 2021-02-24\n", "划分后的训练集大小: 580, 验证集大小: 79\n", "最大日期: 2021-02-25\n", "划分后的训练集大小: 622, 验证集大小: 211\n", "最大日期: 2021-02-26\n", "划分后的训练集大小: 607, 验证集大小: 88\n", "最大日期: 2021-03-01\n", "划分后的训练集大小: 603, 验证集大小: 116\n", "最大日期: 2021-03-02\n", "划分后的训练集大小: 585, 验证集大小: 91\n", "最大日期: 2021-03-03\n", "划分后的训练集大小: 591, 验证集大小: 85\n", "最大日期: 2021-03-04\n", "划分后的训练集大小: 484, 验证集大小: 104\n", "最大日期: 2021-03-05\n", "划分后的训练集大小: 525, 验证集大小: 129\n", "最大日期: 2021-03-08\n", "划分后的训练集大小: 547, 验证集大小: 138\n", "最大日期: 2021-03-09\n", "划分后的训练集大小: 538, 验证集大小: 82\n", "最大日期: 2021-03-10\n", "划分后的训练集大小: 548, 验证集大小: 95\n", "最大日期: 2021-03-11\n", "划分后的训练集大小: 528, 验证集大小: 84\n", "最大日期: 2021-03-12\n", "划分后的训练集大小: 487, 验证集大小: 88\n", "最大日期: 2021-03-15\n", "划分后的训练集大小: 458, 验证集大小: 109\n", "最大日期: 2021-03-16\n", "划分后的训练集大小: 480, 验证集大小: 104\n", "最大日期: 2021-03-17\n", "划分后的训练集大小: 477, 验证集大小: 92\n", "最大日期: 2021-03-18\n", "划分后的训练集大小: 473, 验证集大小: 80\n", "最大日期: 2021-03-19\n", "划分后的训练集大小: 505, 验证集大小: 120\n", "最大日期: 2021-03-22\n", "划分后的训练集大小: 636, 验证集大小: 240\n", "最大日期: 2021-03-23\n", "划分后的训练集大小: 692, 验证集大小: 160\n", "最大日期: 2021-03-24\n", "划分后的训练集大小: 699, 验证集大小: 99\n", "最大日期: 2021-03-25\n", "划分后的训练集大小: 770, 验证集大小: 151\n", "最大日期: 2021-03-26\n", "划分后的训练集大小: 746, 验证集大小: 96\n", "最大日期: 2021-03-29\n", "划分后的训练集大小: 607, 验证集大小: 101\n", "最大日期: 2021-03-30\n", "划分后的训练集大小: 554, 验证集大小: 107\n", "最大日期: 2021-03-31\n", "划分后的训练集大小: 568, 验证集大小: 113\n", "最大日期: 2021-04-01\n", "划分后的训练集大小: 607, 验证集大小: 190\n", "最大日期: 2021-04-02\n", "划分后的训练集大小: 691, 验证集大小: 180\n", "最大日期: 2021-04-06\n", "划分后的训练集大小: 702, 验证集大小: 112\n", "最大日期: 2021-04-07\n", "划分后的训练集大小: 693, 验证集大小: 98\n", "最大日期: 2021-04-08\n", "划分后的训练集大小: 735, 验证集大小: 155\n", "最大日期: 2021-04-09\n", "划分后的训练集大小: 652, 验证集大小: 107\n", "最大日期: 2021-04-12\n", "划分后的训练集大小: 598, 验证集大小: 126\n", "最大日期: 2021-04-13\n", "划分后的训练集大小: 594, 验证集大小: 108\n", "最大日期: 2021-04-14\n", "划分后的训练集大小: 582, 验证集大小: 86\n", "最大日期: 2021-04-15\n", "划分后的训练集大小: 547, 验证集大小: 120\n", "最大日期: 2021-04-16\n", "划分后的训练集大小: 561, 验证集大小: 121\n", "最大日期: 2021-04-19\n", "划分后的训练集大小: 582, 验证集大小: 147\n", "最大日期: 2021-04-20\n", "划分后的训练集大小: 628, 验证集大小: 154\n", "最大日期: 2021-04-21\n", "划分后的训练集大小: 736, 验证集大小: 194\n", "最大日期: 2021-04-22\n", "划分后的训练集大小: 719, 验证集大小: 103\n", "最大日期: 2021-04-23\n", "划分后的训练集大小: 682, 验证集大小: 84\n", "最大日期: 2021-04-26\n", "划分后的训练集大小: 642, 验证集大小: 107\n", "最大日期: 2021-04-27\n", "划分后的训练集大小: 591, 验证集大小: 103\n", "最大日期: 2021-04-28\n", "划分后的训练集大小: 500, 验证集大小: 103\n", "最大日期: 2021-04-29\n", "划分后的训练集大小: 558, 验证集大小: 161\n", "最大日期: 2021-04-30\n", "划分后的训练集大小: 573, 验证集大小: 99\n", "最大日期: 2021-05-06\n", "划分后的训练集大小: 766, 验证集大小: 300\n", "最大日期: 2021-05-07\n", "划分后的训练集大小: 765, 验证集大小: 102\n", "最大日期: 2021-05-10\n", "划分后的训练集大小: 753, 验证集大小: 91\n", "最大日期: 2021-05-11\n", "划分后的训练集大小: 704, 验证集大小: 112\n", "最大日期: 2021-05-12\n", "划分后的训练集大小: 690, 验证集大小: 85\n", "最大日期: 2021-05-13\n", "划分后的训练集大小: 519, 验证集大小: 129\n", "最大日期: 2021-05-14\n", "划分后的训练集大小: 540, 验证集大小: 123\n", "最大日期: 2021-05-17\n", "划分后的训练集大小: 552, 验证集大小: 103\n", "最大日期: 2021-05-18\n", "划分后的训练集大小: 532, 验证集大小: 92\n", "最大日期: 2021-05-19\n", "划分后的训练集大小: 579, 验证集大小: 132\n", "最大日期: 2021-05-20\n", "划分后的训练集大小: 538, 验证集大小: 88\n", "最大日期: 2021-05-21\n", "划分后的训练集大小: 520, 验证集大小: 105\n", "最大日期: 2021-05-24\n", "划分后的训练集大小: 547, 验证集大小: 130\n", "最大日期: 2021-05-25\n", "划分后的训练集大小: 578, 验证集大小: 123\n", "最大日期: 2021-05-26\n", "划分后的训练集大小: 545, 验证集大小: 99\n", "最大日期: 2021-05-27\n", "划分后的训练集大小: 587, 验证集大小: 130\n", "最大日期: 2021-05-28\n", "划分后的训练集大小: 677, 验证集大小: 195\n", "最大日期: 2021-05-31\n", "划分后的训练集大小: 754, 验证集大小: 207\n", "最大日期: 2021-06-01\n", "划分后的训练集大小: 740, 验证集大小: 109\n", "最大日期: 2021-06-02\n", "划分后的训练集大小: 761, 验证集大小: 120\n", "最大日期: 2021-06-03\n", "划分后的训练集大小: 813, 验证集大小: 182\n", "最大日期: 2021-06-04\n", "划分后的训练集大小: 758, 验证集大小: 140\n", "最大日期: 2021-06-07\n", "划分后的训练集大小: 684, 验证集大小: 133\n", "最大日期: 2021-06-08\n", "划分后的训练集大小: 758, 验证集大小: 183\n", "最大日期: 2021-06-09\n", "划分后的训练集大小: 771, 验证集大小: 133\n", "最大日期: 2021-06-10\n", "划分后的训练集大小: 684, 验证集大小: 95\n", "最大日期: 2021-06-11\n", "划分后的训练集大小: 659, 验证集大小: 115\n", "最大日期: 2021-06-15\n", "划分后的训练集大小: 796, 验证集大小: 270\n", "最大日期: 2021-06-16\n", "划分后的训练集大小: 857, 验证集大小: 244\n", "最大日期: 2021-06-17\n", "划分后的训练集大小: 837, 验证集大小: 113\n", "最大日期: 2021-06-18\n", "划分后的训练集大小: 847, 验证集大小: 105\n", "最大日期: 2021-06-21\n", "划分后的训练集大小: 873, 验证集大小: 141\n", "最大日期: 2021-06-22\n", "划分后的训练集大小: 771, 验证集大小: 168\n", "最大日期: 2021-06-23\n", "划分后的训练集大小: 619, 验证集大小: 92\n", "最大日期: 2021-06-24\n", "划分后的训练集大小: 619, 验证集大小: 113\n", "最大日期: 2021-06-25\n", "划分后的训练集大小: 617, 验证集大小: 103\n", "最大日期: 2021-06-28\n", "划分后的训练集大小: 609, 验证集大小: 133\n", "最大日期: 2021-06-29\n", "划分后的训练集大小: 548, 验证集大小: 107\n", "最大日期: 2021-06-30\n", "划分后的训练集大小: 551, 验证集大小: 95\n", "最大日期: 2021-07-01\n", "划分后的训练集大小: 524, 验证集大小: 86\n", "最大日期: 2021-07-02\n", "划分后的训练集大小: 504, 验证集大小: 83\n", "最大日期: 2021-07-05\n", "划分后的训练集大小: 596, 验证集大小: 225\n", "最大日期: 2021-07-06\n", "划分后的训练集大小: 600, 验证集大小: 111\n", "最大日期: 2021-07-07\n", "划分后的训练集大小: 677, 验证集大小: 172\n", "最大日期: 2021-07-08\n", "划分后的训练集大小: 697, 验证集大小: 106\n", "最大日期: 2021-07-09\n", "划分后的训练集大小: 713, 验证集大小: 99\n", "最大日期: 2021-07-12\n", "划分后的训练集大小: 590, 验证集大小: 102\n", "最大日期: 2021-07-13\n", "划分后的训练集大小: 579, 验证集大小: 100\n", "最大日期: 2021-07-14\n", "划分后的训练集大小: 496, 验证集大小: 89\n", "最大日期: 2021-07-15\n", "划分后的训练集大小: 589, 验证集大小: 199\n", "最大日期: 2021-07-16\n", "划分后的训练集大小: 605, 验证集大小: 115\n", "最大日期: 2021-07-19\n", "划分后的训练集大小: 660, 验证集大小: 157\n", "最大日期: 2021-07-20\n", "划分后的训练集大小: 749, 验证集大小: 189\n", "最大日期: 2021-07-21\n", "划分后的训练集大小: 875, 验证集大小: 215\n", "最大日期: 2021-07-22\n", "划分后的训练集大小: 778, 验证集大小: 102\n", "最大日期: 2021-07-23\n", "划分后的训练集大小: 808, 验证集大小: 145\n", "最大日期: 2021-07-26\n", "划分后的训练集大小: 862, 验证集大小: 211\n", "最大日期: 2021-07-27\n", "划分后的训练集大小: 754, 验证集大小: 81\n", "最大日期: 2021-07-28\n", "划分后的训练集大小: 660, 验证集大小: 121\n", "最大日期: 2021-07-29\n", "划分后的训练集大小: 665, 验证集大小: 107\n", "最大日期: 2021-07-30\n", "划分后的训练集大小: 656, 验证集大小: 136\n", "最大日期: 2021-08-02\n", "划分后的训练集大小: 891, 验证集大小: 446\n", "最大日期: 2021-08-03\n", "划分后的训练集大小: 956, 验证集大小: 146\n", "最大日期: 2021-08-04\n", "划分后的训练集大小: 980, 验证集大小: 145\n", "最大日期: 2021-08-05\n", "划分后的训练集大小: 1036, 验证集大小: 163\n", "最大日期: 2021-08-06\n", "划分后的训练集大小: 1000, 验证集大小: 100\n", "最大日期: 2021-08-09\n", "划分后的训练集大小: 716, 验证集大小: 162\n", "最大日期: 2021-08-10\n", "划分后的训练集大小: 694, 验证集大小: 124\n", "最大日期: 2021-08-11\n", "划分后的训练集大小: 708, 验证集大小: 159\n", "最大日期: 2021-08-12\n", "划分后的训练集大小: 651, 验证集大小: 106\n", "最大日期: 2021-08-13\n", "划分后的训练集大小: 734, 验证集大小: 183\n", "最大日期: 2021-08-16\n", "划分后的训练集大小: 715, 验证集大小: 143\n", "最大日期: 2021-08-17\n", "划分后的训练集大小: 677, 验证集大小: 86\n", "最大日期: 2021-08-18\n", "划分后的训练集大小: 606, 验证集大小: 88\n", "最大日期: 2021-08-19\n", "划分后的训练集大小: 588, 验证集大小: 88\n", "最大日期: 2021-08-20\n", "划分后的训练集大小: 492, 验证集大小: 87\n", "最大日期: 2021-08-23\n", "划分后的训练集大小: 469, 验证集大小: 120\n", "最大日期: 2021-08-24\n", "划分后的训练集大小: 544, 验证集大小: 161\n", "最大日期: 2021-08-25\n", "划分后的训练集大小: 544, 验证集大小: 88\n", "最大日期: 2021-08-26\n", "划分后的训练集大小: 545, 验证集大小: 89\n", "最大日期: 2021-08-27\n", "划分后的训练集大小: 579, 验证集大小: 121\n", "最大日期: 2021-08-30\n", "划分后的训练集大小: 675, 验证集大小: 216\n", "最大日期: 2021-08-31\n", "划分后的训练集大小: 712, 验证集大小: 198\n", "最大日期: 2021-09-01\n", "划分后的训练集大小: 741, 验证集大小: 117\n", "最大日期: 2021-09-02\n", "划分后的训练集大小: 776, 验证集大小: 124\n", "最大日期: 2021-09-03\n", "划分后的训练集大小: 904, 验证集大小: 249\n", "最大日期: 2021-09-06\n", "划分后的训练集大小: 871, 验证集大小: 183\n", "最大日期: 2021-09-07\n", "划分后的训练集大小: 790, 验证集大小: 117\n", "最大日期: 2021-09-08\n", "划分后的训练集大小: 772, 验证集大小: 99\n", "最大日期: 2021-09-09\n", "划分后的训练集大小: 764, 验证集大小: 116\n", "最大日期: 2021-09-10\n", "划分后的训练集大小: 674, 验证集大小: 159\n", "最大日期: 2021-09-13\n", "划分后的训练集大小: 632, 验证集大小: 141\n", "最大日期: 2021-09-14\n", "划分后的训练集大小: 751, 验证集大小: 236\n", "最大日期: 2021-09-15\n", "划分后的训练集大小: 786, 验证集大小: 134\n", "最大日期: 2021-09-16\n", "划分后的训练集大小: 893, 验证集大小: 223\n", "最大日期: 2021-09-17\n", "划分后的训练集大小: 853, 验证集大小: 119\n", "最大日期: 2021-09-22\n", "划分后的训练集大小: 799, 验证集大小: 87\n", "最大日期: 2021-09-23\n", "划分后的训练集大小: 723, 验证集大小: 160\n", "最大日期: 2021-09-24\n", "划分后的训练集大小: 682, 验证集大小: 93\n", "最大日期: 2021-09-27\n", "划分后的训练集大小: 679, 验证集大小: 220\n", "最大日期: 2021-09-28\n", "划分后的训练集大小: 730, 验证集大小: 170\n", "最大日期: 2021-09-29\n", "划分后的训练集大小: 776, 验证集大小: 133\n", "最大日期: 2021-09-30\n", "划分后的训练集大小: 782, 验证集大小: 166\n", "最大日期: 2021-10-08\n", "划分后的训练集大小: 869, 验证集大小: 180\n", "最大日期: 2021-10-11\n", "划分后的训练集大小: 824, 验证集大小: 175\n", "最大日期: 2021-10-12\n", "划分后的训练集大小: 830, 验证集大小: 176\n", "最大日期: 2021-10-13\n", "划分后的训练集大小: 861, 验证集大小: 164\n", "最大日期: 2021-10-14\n", "划分后的训练集大小: 819, 验证集大小: 124\n", "最大日期: 2021-10-15\n", "划分后的训练集大小: 769, 验证集大小: 130\n", "最大日期: 2021-10-18\n", "划分后的训练集大小: 685, 验证集大小: 91\n", "最大日期: 2021-10-19\n", "划分后的训练集大小: 624, 验证集大小: 115\n", "最大日期: 2021-10-20\n", "划分后的训练集大小: 696, 验证集大小: 236\n", "最大日期: 2021-10-21\n", "划分后的训练集大小: 660, 验证集大小: 88\n", "最大日期: 2021-10-22\n", "划分后的训练集大小: 616, 验证集大小: 86\n", "最大日期: 2021-10-25\n", "划分后的训练集大小: 643, 验证集大小: 118\n", "最大日期: 2021-10-26\n", "划分后的训练集大小: 622, 验证集大小: 94\n", "最大日期: 2021-10-27\n", "划分后的训练集大小: 477, 验证集大小: 91\n", "最大日期: 2021-10-28\n", "划分后的训练集大小: 490, 验证集大小: 101\n", "最大日期: 2021-10-29\n", "划分后的训练集大小: 532, 验证集大小: 128\n", "最大日期: 2021-11-01\n", "划分后的训练集大小: 508, 验证集大小: 94\n", "最大日期: 2021-11-02\n", "划分后的训练集大小: 622, 验证集大小: 208\n", "最大日期: 2021-11-03\n", "划分后的训练集大小: 657, 验证集大小: 126\n", "最大日期: 2021-11-04\n", "划分后的训练集大小: 644, 验证集大小: 88\n", "最大日期: 2021-11-05\n", "划分后的训练集大小: 608, 验证集大小: 92\n", "最大日期: 2021-11-08\n", "划分后的训练集大小: 616, 验证集大小: 102\n", "最大日期: 2021-11-09\n", "划分后的训练集大小: 627, 验证集大小: 219\n", "最大日期: 2021-11-10\n", "划分后的训练集大小: 659, 验证集大小: 158\n", "最大日期: 2021-11-11\n", "划分后的训练集大小: 739, 验证集大小: 168\n", "最大日期: 2021-11-12\n", "划分后的训练集大小: 936, 验证集大小: 289\n", "最大日期: 2021-11-15\n", "划分后的训练集大小: 965, 验证集大小: 131\n", "最大日期: 2021-11-16\n", "划分后的训练集大小: 874, 验证集大小: 128\n", "最大日期: 2021-11-17\n", "划分后的训练集大小: 816, 验证集大小: 100\n", "最大日期: 2021-11-18\n", "划分后的训练集大小: 785, 验证集大小: 137\n", "最大日期: 2021-11-19\n", "划分后的训练集大小: 682, 验证集大小: 186\n", "最大日期: 2021-11-22\n", "划分后的训练集大小: 698, 验证集大小: 147\n", "最大日期: 2021-11-23\n", "划分后的训练集大小: 665, 验证集大小: 95\n", "最大日期: 2021-11-24\n", "划分后的训练集大小: 657, 验证集大小: 92\n", "最大日期: 2021-11-25\n", "划分后的训练集大小: 611, 验证集大小: 91\n", "最大日期: 2021-11-26\n", "划分后的训练集大小: 536, 验证集大小: 111\n", "最大日期: 2021-11-29\n", "划分后的训练集大小: 643, 验证集大小: 254\n", "最大日期: 2021-11-30\n", "划分后的训练集大小: 801, 验证集大小: 253\n", "最大日期: 2021-12-01\n", "划分后的训练集大小: 804, 验证集大小: 95\n", "最大日期: 2021-12-02\n", "划分后的训练集大小: 806, 验证集大小: 93\n", "最大日期: 2021-12-03\n", "划分后的训练集大小: 831, 验证集大小: 136\n", "最大日期: 2021-12-06\n", "划分后的训练集大小: 698, 验证集大小: 121\n", "最大日期: 2021-12-07\n", "划分后的训练集大小: 556, 验证集大小: 111\n", "最大日期: 2021-12-08\n", "划分后的训练集大小: 644, 验证集大小: 183\n", "最大日期: 2021-12-09\n", "划分后的训练集大小: 688, 验证集大小: 137\n", "最大日期: 2021-12-10\n", "划分后的训练集大小: 649, 验证集大小: 97\n", "最大日期: 2021-12-13\n", "划分后的训练集大小: 709, 验证集大小: 181\n", "最大日期: 2021-12-14\n", "划分后的训练集大小: 691, 验证集大小: 93\n", "最大日期: 2021-12-15\n", "划分后的训练集大小: 637, 验证集大小: 129\n", "最大日期: 2021-12-16\n", "划分后的训练集大小: 585, 验证集大小: 85\n", "最大日期: 2021-12-17\n", "划分后的训练集大小: 591, 验证集大小: 103\n", "最大日期: 2021-12-20\n", "划分后的训练集大小: 515, 验证集大小: 105\n", "最大日期: 2021-12-21\n", "划分后的训练集大小: 552, 验证集大小: 130\n", "最大日期: 2021-12-22\n", "划分后的训练集大小: 536, 验证集大小: 113\n", "最大日期: 2021-12-23\n", "划分后的训练集大小: 601, 验证集大小: 150\n", "最大日期: 2021-12-24\n", "划分后的训练集大小: 584, 验证集大小: 86\n", "最大日期: 2021-12-27\n", "划分后的训练集大小: 612, 验证集大小: 133\n", "最大日期: 2021-12-28\n", "划分后的训练集大小: 645, 验证集大小: 163\n", "最大日期: 2021-12-29\n", "划分后的训练集大小: 651, 验证集大小: 119\n", "最大日期: 2021-12-30\n", "划分后的训练集大小: 747, 验证集大小: 246\n", "最大日期: 2021-12-31\n", "划分后的训练集大小: 827, 验证集大小: 166\n", "最大日期: 2022-01-04\n", "划分后的训练集大小: 805, 验证集大小: 111\n", "最大日期: 2022-01-05\n", "划分后的训练集大小: 731, 验证集大小: 89\n", "最大日期: 2022-01-06\n", "划分后的训练集大小: 777, 验证集大小: 165\n", "最大日期: 2022-01-07\n", "划分后的训练集大小: 667, 验证集大小: 136\n", "最大日期: 2022-01-10\n", "划分后的训练集大小: 660, 验证集大小: 159\n", "最大日期: 2022-01-11\n", "划分后的训练集大小: 652, 验证集大小: 103\n", "最大日期: 2022-01-12\n", "划分后的训练集大小: 671, 验证集大小: 108\n", "最大日期: 2022-01-13\n", "划分后的训练集大小: 603, 验证集大小: 97\n", "最大日期: 2022-01-14\n", "划分后的训练集大小: 556, 验证集大小: 89\n", "最大日期: 2022-01-17\n", "划分后的训练集大小: 620, 验证集大小: 223\n", "最大日期: 2022-01-18\n", "划分后的训练集大小: 699, 验证集大小: 182\n", "最大日期: 2022-01-19\n", "划分后的训练集大小: 779, 验证集大小: 188\n", "最大日期: 2022-01-20\n", "划分后的训练集大小: 876, 验证集大小: 194\n", "最大日期: 2022-01-21\n", "划分后的训练集大小: 914, 验证集大小: 127\n", "最大日期: 2022-01-24\n", "划分后的训练集大小: 778, 验证集大小: 87\n", "最大日期: 2022-01-25\n", "划分后的训练集大小: 715, 验证集大小: 119\n", "最大日期: 2022-01-26\n", "划分后的训练集大小: 656, 验证集大小: 129\n", "最大日期: 2022-01-27\n", "划分后的训练集大小: 555, 验证集大小: 93\n", "最大日期: 2022-01-28\n", "划分后的训练集大小: 515, 验证集大小: 87\n", "最大日期: 2022-02-07\n", "划分后的训练集大小: 536, 验证集大小: 108\n", "最大日期: 2022-02-08\n", "划分后的训练集大小: 527, 验证集大小: 110\n", "最大日期: 2022-02-09\n", "划分后的训练集大小: 527, 验证集大小: 129\n", "最大日期: 2022-02-10\n", "划分后的训练集大小: 527, 验证集大小: 93\n", "最大日期: 2022-02-11\n", "划分后的训练集大小: 581, 验证集大小: 141\n", "最大日期: 2022-02-14\n", "划分后的训练集大小: 730, 验证集大小: 257\n", "最大日期: 2022-02-15\n", "划分后的训练集大小: 747, 验证集大小: 127\n", "最大日期: 2022-02-16\n", "划分后的训练集大小: 775, 验证集大小: 157\n", "最大日期: 2022-02-17\n", "划分后的训练集大小: 803, 验证集大小: 121\n", "最大日期: 2022-02-18\n", "划分后的训练集大小: 760, 验证集大小: 98\n", "最大日期: 2022-02-21\n", "划分后的训练集大小: 637, 验证集大小: 134\n", "最大日期: 2022-02-22\n", "划分后的训练集大小: 790, 验证集大小: 280\n", "最大日期: 2022-02-23\n", "划分后的训练集大小: 799, 验证集大小: 166\n", "最大日期: 2022-02-24\n", "划分后的训练集大小: 820, 验证集大小: 142\n", "最大日期: 2022-02-25\n", "划分后的训练集大小: 831, 验证集大小: 109\n", "最大日期: 2022-02-28\n", "划分后的训练集大小: 818, 验证集大小: 121\n", "最大日期: 2022-03-01\n", "划分后的训练集大小: 784, 验证集大小: 246\n", "最大日期: 2022-03-02\n", "划分后的训练集大小: 705, 验证集大小: 87\n", "最大日期: 2022-03-03\n", "划分后的训练集大小: 659, 验证集大小: 96\n", "最大日期: 2022-03-04\n", "划分后的训练集大小: 666, 验证集大小: 116\n", "最大日期: 2022-03-07\n", "划分后的训练集大小: 653, 验证集大小: 108\n", "最大日期: 2022-03-08\n", "划分后的训练集大小: 502, 验证集大小: 95\n", "最大日期: 2022-03-09\n", "划分后的训练集大小: 548, 验证集大小: 133\n", "最大日期: 2022-03-10\n", "划分后的训练集大小: 584, 验证集大小: 132\n", "最大日期: 2022-03-11\n", "划分后的训练集大小: 573, 验证集大小: 105\n", "最大日期: 2022-03-14\n", "划分后的训练集大小: 563, 验证集大小: 98\n", "最大日期: 2022-03-15\n", "划分后的训练集大小: 553, 验证集大小: 85\n", "最大日期: 2022-03-16\n", "划分后的训练集大小: 537, 验证集大小: 117\n", "最大日期: 2022-03-17\n", "划分后的训练集大小: 619, 验证集大小: 214\n", "最大日期: 2022-03-18\n", "划分后的训练集大小: 632, 验证集大小: 118\n", "最大日期: 2022-03-21\n", "划分后的训练集大小: 638, 验证集大小: 104\n", "最大日期: 2022-03-22\n", "划分后的训练集大小: 713, 验证集大小: 160\n", "最大日期: 2022-03-23\n", "划分后的训练集大小: 790, 验证集大小: 194\n", "最大日期: 2022-03-24\n", "划分后的训练集大小: 729, 验证集大小: 153\n", "最大日期: 2022-03-25\n", "划分后的训练集大小: 694, 验证集大小: 83\n", "最大日期: 2022-03-28\n", "划分后的训练集大小: 712, 验证集大小: 122\n", "最大日期: 2022-03-29\n", "划分后的训练集大小: 739, 验证集大小: 187\n", "最大日期: 2022-03-30\n", "划分后的训练集大小: 666, 验证集大小: 121\n", "最大日期: 2022-03-31\n", "划分后的训练集大小: 716, 验证集大小: 203\n", "最大日期: 2022-04-01\n", "划分后的训练集大小: 730, 验证集大小: 97\n", "最大日期: 2022-04-06\n", "划分后的训练集大小: 744, 验证集大小: 136\n", "最大日期: 2022-04-07\n", "划分后的训练集大小: 665, 验证集大小: 108\n", "最大日期: 2022-04-08\n", "划分后的训练集大小: 629, 验证集大小: 85\n", "最大日期: 2022-04-11\n", "划分后的训练集大小: 527, 验证集大小: 101\n", "最大日期: 2022-04-12\n", "划分后的训练集大小: 575, 验证集大小: 145\n", "最大日期: 2022-04-13\n", "划分后的训练集大小: 522, 验证集大小: 83\n", "最大日期: 2022-04-14\n", "划分后的训练集大小: 495, 验证集大小: 81\n", "最大日期: 2022-04-15\n", "划分后的训练集大小: 599, 验证集大小: 189\n", "最大日期: 2022-04-18\n", "划分后的训练集大小: 629, 验证集大小: 131\n", "最大日期: 2022-04-19\n", "划分后的训练集大小: 598, 验证集大小: 114\n", "最大日期: 2022-04-20\n", "划分后的训练集大小: 609, 验证集大小: 94\n", "最大日期: 2022-04-21\n", "划分后的训练集大小: 727, 验证集大小: 199\n", "最大日期: 2022-04-22\n", "划分后的训练集大小: 635, 验证集大小: 97\n", "最大日期: 2022-04-25\n", "划分后的训练集大小: 581, 验证集大小: 77\n", "最大日期: 2022-04-26\n", "划分后的训练集大小: 560, 验证集大小: 93\n", "最大日期: 2022-04-27\n", "划分后的训练集大小: 554, 验证集大小: 88\n", "最大日期: 2022-04-28\n", "划分后的训练集大小: 464, 验证集大小: 109\n", "最大日期: 2022-04-29\n", "划分后的训练集大小: 518, 验证集大小: 151\n", "最大日期: 2022-05-05\n", "划分后的训练集大小: 596, 验证集大小: 155\n", "最大日期: 2022-05-06\n", "划分后的训练集大小: 592, 验证集大小: 89\n", "最大日期: 2022-05-09\n", "划分后的训练集大小: 591, 验证集大小: 87\n", "最大日期: 2022-05-10\n", "划分后的训练集大小: 565, 验证集大小: 83\n", "最大日期: 2022-05-11\n", "划分后的训练集大小: 562, 验证集大小: 148\n", "最大日期: 2022-05-12\n", "划分后的训练集大小: 539, 验证集大小: 132\n", "最大日期: 2022-05-13\n", "划分后的训练集大小: 549, 验证集大小: 99\n", "最大日期: 2022-05-16\n", "划分后的训练集大小: 589, 验证集大小: 127\n", "最大日期: 2022-05-17\n", "划分后的训练集大小: 671, 验证集大小: 165\n", "最大日期: 2022-05-18\n", "划分后的训练集大小: 642, 验证集大小: 119\n", "最大日期: 2022-05-19\n", "划分后的训练集大小: 603, 验证集大小: 93\n", "最大日期: 2022-05-20\n", "划分后的训练集大小: 595, 验证集大小: 91\n", "最大日期: 2022-05-23\n", "划分后的训练集大小: 596, 验证集大小: 128\n", "最大日期: 2022-05-24\n", "划分后的训练集大小: 521, 验证集大小: 90\n", "最大日期: 2022-05-25\n", "划分后的训练集大小: 511, 验证集大小: 109\n", "最大日期: 2022-05-26\n", "划分后的训练集大小: 524, 验证集大小: 106\n", "最大日期: 2022-05-27\n", "划分后的训练集大小: 543, 验证集大小: 110\n", "最大日期: 2022-05-30\n", "划分后的训练集大小: 537, 验证集大小: 122\n", "最大日期: 2022-05-31\n", "划分后的训练集大小: 565, 验证集大小: 118\n", "最大日期: 2022-06-01\n", "划分后的训练集大小: 544, 验证集大小: 88\n", "最大日期: 2022-06-02\n", "划分后的训练集大小: 563, 验证集大小: 125\n", "最大日期: 2022-06-06\n", "划分后的训练集大小: 550, 验证集大小: 97\n", "最大日期: 2022-06-07\n", "划分后的训练集大小: 548, 验证集大小: 120\n", "最大日期: 2022-06-08\n", "划分后的训练集大小: 560, 验证集大小: 130\n", "最大日期: 2022-06-09\n", "划分后的训练集大小: 587, 验证集大小: 115\n", "最大日期: 2022-06-10\n", "划分后的训练集大小: 606, 验证集大小: 144\n", "最大日期: 2022-06-13\n", "划分后的训练集大小: 613, 验证集大小: 104\n", "最大日期: 2022-06-14\n", "划分后的训练集大小: 602, 验证集大小: 109\n", "最大日期: 2022-06-15\n", "划分后的训练集大小: 648, 验证集大小: 176\n", "最大日期: 2022-06-16\n", "划分后的训练集大小: 652, 验证集大小: 119\n", "最大日期: 2022-06-17\n", "划分后的训练集大小: 705, 验证集大小: 197\n", "最大日期: 2022-06-20\n", "划分后的训练集大小: 762, 验证集大小: 161\n", "最大日期: 2022-06-21\n", "划分后的训练集大小: 745, 验证集大小: 92\n", "最大日期: 2022-06-22\n", "划分后的训练集大小: 769, 验证集大小: 200\n", "最大日期: 2022-06-23\n", "划分后的训练集大小: 739, 验证集大小: 89\n", "最大日期: 2022-06-24\n", "划分后的训练集大小: 640, 验证集大小: 98\n", "最大日期: 2022-06-27\n", "划分后的训练集大小: 570, 验证集大小: 91\n", "最大日期: 2022-06-28\n", "划分后的训练集大小: 766, 验证集大小: 288\n", "最大日期: 2022-06-29\n", "划分后的训练集大小: 727, 验证集大小: 161\n", "最大日期: 2022-06-30\n", "划分后的训练集大小: 720, 验证集大小: 82\n", "最大日期: 2022-07-01\n", "划分后的训练集大小: 724, 验证集大小: 102\n", "最大日期: 2022-07-04\n", "划分后的训练集大小: 758, 验证集大小: 125\n", "最大日期: 2022-07-05\n", "划分后的训练集大小: 564, 验证集大小: 94\n", "最大日期: 2022-07-06\n", "划分后的训练集大小: 510, 验证集大小: 107\n", "最大日期: 2022-07-07\n", "划分后的训练集大小: 573, 验证集大小: 145\n", "最大日期: 2022-07-08\n", "划分后的训练集大小: 584, 验证集大小: 113\n", "最大日期: 2022-07-11\n", "划分后的训练集大小: 594, 验证集大小: 135\n", "最大日期: 2022-07-12\n", "划分后的训练集大小: 616, 验证集大小: 116\n", "最大日期: 2022-07-13\n", "划分后的训练集大小: 622, 验证集大小: 113\n", "最大日期: 2022-07-14\n", "划分后的训练集大小: 644, 验证集大小: 167\n", "最大日期: 2022-07-15\n", "划分后的训练集大小: 625, 验证集大小: 94\n", "最大日期: 2022-07-18\n", "划分后的训练集大小: 638, 验证集大小: 148\n", "最大日期: 2022-07-19\n", "划分后的训练集大小: 665, 验证集大小: 143\n", "最大日期: 2022-07-20\n", "划分后的训练集大小: 675, 验证集大小: 123\n", "最大日期: 2022-07-21\n", "划分后的训练集大小: 596, 验证集大小: 88\n", "最大日期: 2022-07-22\n", "划分后的训练集大小: 622, 验证集大小: 120\n", "最大日期: 2022-07-25\n", "划分后的训练集大小: 613, 验证集大小: 139\n", "最大日期: 2022-07-26\n", "划分后的训练集大小: 561, 验证集大小: 91\n", "最大日期: 2022-07-27\n", "划分后的训练集大小: 591, 验证集大小: 153\n", "最大日期: 2022-07-28\n", "划分后的训练集大小: 648, 验证集大小: 145\n", "最大日期: 2022-07-29\n", "划分后的训练集大小: 713, 验证集大小: 185\n", "最大日期: 2022-08-01\n", "划分后的训练集大小: 733, 验证集大小: 159\n", "最大日期: 2022-08-02\n", "划分后的训练集大小: 806, 验证集大小: 164\n", "最大日期: 2022-08-03\n", "划分后的训练集大小: 871, 验证集大小: 218\n", "最大日期: 2022-08-04\n", "划分后的训练集大小: 890, 验证集大小: 164\n", "最大日期: 2022-08-05\n", "划分后的训练集大小: 833, 验证集大小: 128\n", "最大日期: 2022-08-08\n", "划分后的训练集大小: 813, 验证集大小: 139\n", "最大日期: 2022-08-09\n", "划分后的训练集大小: 869, 验证集大小: 220\n", "最大日期: 2022-08-10\n", "划分后的训练集大小: 969, 验证集大小: 318\n", "最大日期: 2022-08-11\n", "划分后的训练集大小: 985, 验证集大小: 180\n", "最大日期: 2022-08-12\n", "划分后的训练集大小: 948, 验证集大小: 91\n", "最大日期: 2022-08-15\n", "划分后的训练集大小: 985, 验证集大小: 176\n", "最大日期: 2022-08-16\n", "划分后的训练集大小: 894, 验证集大小: 129\n", "最大日期: 2022-08-17\n", "划分后的训练集大小: 735, 验证集大小: 159\n", "最大日期: 2022-08-18\n", "划分后的训练集大小: 740, 验证集大小: 185\n", "最大日期: 2022-08-19\n", "划分后的训练集大小: 906, 验证集大小: 257\n", "最大日期: 2022-08-22\n", "划分后的训练集大小: 934, 验证集大小: 204\n", "最大日期: 2022-08-23\n", "划分后的训练集大小: 1019, 验证集大小: 214\n", "最大日期: 2022-08-24\n", "划分后的训练集大小: 950, 验证集大小: 90\n", "最大日期: 2022-08-25\n", "划分后的训练集大小: 912, 验证集大小: 147\n", "最大日期: 2022-08-26\n", "划分后的训练集大小: 800, 验证集大小: 145\n", "最大日期: 2022-08-29\n", "划分后的训练集大小: 770, 验证集大小: 174\n", "最大日期: 2022-08-30\n", "划分后的训练集大小: 660, 验证集大小: 104\n", "最大日期: 2022-08-31\n", "划分后的训练集大小: 694, 验证集大小: 124\n", "最大日期: 2022-09-01\n", "划分后的训练集大小: 735, 验证集大小: 188\n", "最大日期: 2022-09-02\n", "划分后的训练集大小: 739, 验证集大小: 149\n", "最大日期: 2022-09-05\n", "划分后的训练集大小: 724, 验证集大小: 159\n", "最大日期: 2022-09-06\n", "划分后的训练集大小: 738, 验证集大小: 118\n", "最大日期: 2022-09-07\n", "划分后的训练集大小: 726, 验证集大小: 112\n", "最大日期: 2022-09-08\n", "划分后的训练集大小: 638, 验证集大小: 100\n", "最大日期: 2022-09-09\n", "划分后的训练集大小: 703, 验证集大小: 214\n", "最大日期: 2022-09-13\n", "划分后的训练集大小: 667, 验证集大小: 123\n", "最大日期: 2022-09-14\n", "划分后的训练集大小: 641, 验证集大小: 92\n", "最大日期: 2022-09-15\n", "划分后的训练集大小: 706, 验证集大小: 177\n", "最大日期: 2022-09-16\n", "划分后的训练集大小: 707, 验证集大小: 101\n", "最大日期: 2022-09-19\n", "划分后的训练集大小: 638, 验证集大小: 145\n", "最大日期: 2022-09-20\n", "划分后的训练集大小: 606, 验证集大小: 91\n", "最大日期: 2022-09-21\n", "划分后的训练集大小: 647, 验证集大小: 133\n", "最大日期: 2022-09-22\n", "划分后的训练集大小: 561, 验证集大小: 91\n", "最大日期: 2022-09-23\n", "划分后的训练集大小: 547, 验证集大小: 87\n", "最大日期: 2022-09-26\n", "划分后的训练集大小: 522, 验证集大小: 120\n", "最大日期: 2022-09-27\n", "划分后的训练集大小: 522, 验证集大小: 91\n", "最大日期: 2022-09-28\n", "划分后的训练集大小: 499, 验证集大小: 110\n", "最大日期: 2022-09-29\n", "划分后的训练集大小: 562, 验证集大小: 154\n", "最大日期: 2022-09-30\n", "划分后的训练集大小: 556, 验证集大小: 81\n", "最大日期: 2022-10-10\n", "划分后的训练集大小: 547, 验证集大小: 111\n", "最大日期: 2022-10-11\n", "划分后的训练集大小: 581, 验证集大小: 125\n", "最大日期: 2022-10-12\n", "划分后的训练集大小: 558, 验证集大小: 87\n", "最大日期: 2022-10-13\n", "划分后的训练集大小: 495, 验证集大小: 91\n", "最大日期: 2022-10-14\n", "划分后的训练集大小: 531, 验证集大小: 117\n", "最大日期: 2022-10-17\n", "划分后的训练集大小: 567, 验证集大小: 147\n", "最大日期: 2022-10-18\n", "划分后的训练集大小: 589, 验证集大小: 147\n", "最大日期: 2022-10-19\n", "划分后的训练集大小: 618, 验证集大小: 116\n", "最大日期: 2022-10-20\n", "划分后的训练集大小: 621, 验证集大小: 94\n", "最大日期: 2022-10-21\n", "划分后的训练集大小: 593, 验证集大小: 89\n", "最大日期: 2022-10-24\n", "划分后的训练集大小: 534, 验证集大小: 88\n", "最大日期: 2022-10-25\n", "划分后的训练集大小: 512, 验证集大小: 125\n", "最大日期: 2022-10-26\n", "划分后的训练集大小: 486, 验证集大小: 90\n", "最大日期: 2022-10-27\n", "划分后的训练集大小: 565, 验证集大小: 173\n", "最大日期: 2022-10-28\n", "划分后的训练集大小: 568, 验证集大小: 92\n", "最大日期: 2022-10-31\n", "划分后的训练集大小: 575, 验证集大小: 95\n", "最大日期: 2022-11-01\n", "划分后的训练集大小: 562, 验证集大小: 112\n", "最大日期: 2022-11-02\n", "划分后的训练集大小: 586, 验证集大小: 114\n", "最大日期: 2022-11-03\n", "划分后的训练集大小: 522, 验证集大小: 109\n", "最大日期: 2022-11-04\n", "划分后的训练集大小: 518, 验证集大小: 88\n", "最大日期: 2022-11-07\n", "划分后的训练集大小: 590, 验证集大小: 167\n", "最大日期: 2022-11-08\n", "划分后的训练集大小: 594, 验证集大小: 116\n", "最大日期: 2022-11-09\n", "划分后的训练集大小: 564, 验证集大小: 84\n", "最大日期: 2022-11-10\n", "划分后的训练集大小: 659, 验证集大小: 204\n", "最大日期: 2022-11-11\n", "划分后的训练集大小: 725, 验证集大小: 154\n", "最大日期: 2022-11-14\n", "划分后的训练集大小: 703, 验证集大小: 145\n", "最大日期: 2022-11-15\n", "划分后的训练集大小: 846, 验证集大小: 259\n", "最大日期: 2022-11-16\n", "划分后的训练集大小: 888, 验证集大小: 126\n", "最大日期: 2022-11-17\n", "划分后的训练集大小: 807, 验证集大小: 123\n", "最大日期: 2022-11-18\n", "划分后的训练集大小: 818, 验证集大小: 165\n", "最大日期: 2022-11-21\n", "划分后的训练集大小: 841, 验证集大小: 168\n", "最大日期: 2022-11-22\n", "划分后的训练集大小: 769, 验证集大小: 187\n", "最大日期: 2022-11-23\n", "划分后的训练集大小: 793, 验证集大小: 150\n", "最大日期: 2022-11-24\n", "划分后的训练集大小: 890, 验证集大小: 220\n", "最大日期: 2022-11-25\n", "划分后的训练集大小: 829, 验证集大小: 104\n", "最大日期: 2022-11-28\n", "划分后的训练集大小: 827, 验证集大小: 166\n", "最大日期: 2022-11-29\n", "划分后的训练集大小: 797, 验证集大小: 157\n", "最大日期: 2022-11-30\n", "划分后的训练集大小: 800, 验证集大小: 153\n", "最大日期: 2022-12-01\n", "划分后的训练集大小: 691, 验证集大小: 111\n", "最大日期: 2022-12-02\n", "划分后的训练集大小: 679, 验证集大小: 92\n", "最大日期: 2022-12-05\n", "划分后的训练集大小: 621, 验证集大小: 108\n", "最大日期: 2022-12-06\n", "划分后的训练集大小: 555, 验证集大小: 91\n", "最大日期: 2022-12-07\n", "划分后的训练集大小: 502, 验证集大小: 100\n", "最大日期: 2022-12-08\n", "划分后的训练集大小: 482, 验证集大小: 91\n", "最大日期: 2022-12-09\n", "划分后的训练集大小: 492, 验证集大小: 102\n", "最大日期: 2022-12-12\n", "划分后的训练集大小: 476, 验证集大小: 92\n", "最大日期: 2022-12-13\n", "划分后的训练集大小: 488, 验证集大小: 103\n", "最大日期: 2022-12-14\n", "划分后的训练集大小: 566, 验证集大小: 178\n", "最大日期: 2022-12-15\n", "划分后的训练集大小: 572, 验证集大小: 97\n", "最大日期: 2022-12-16\n", "划分后的训练集大小: 639, 验证集大小: 169\n", "最大日期: 2022-12-19\n", "划分后的训练集大小: 638, 验证集大小: 91\n", "最大日期: 2022-12-20\n", "划分后的训练集大小: 669, 验证集大小: 134\n", "最大日期: 2022-12-21\n", "划分后的训练集大小: 627, 验证集大小: 136\n", "最大日期: 2022-12-22\n", "划分后的训练集大小: 623, 验证集大小: 93\n", "最大日期: 2022-12-23\n", "划分后的训练集大小: 572, 验证集大小: 118\n", "最大日期: 2022-12-26\n", "划分后的训练集大小: 571, 验证集大小: 90\n", "最大日期: 2022-12-27\n", "划分后的训练集大小: 529, 验证集大小: 92\n", "最大日期: 2022-12-28\n", "划分后的训练集大小: 480, 验证集大小: 87\n", "最大日期: 2022-12-29\n", "划分后的训练集大小: 476, 验证集大小: 89\n", "最大日期: 2022-12-30\n", "划分后的训练集大小: 512, 验证集大小: 154\n", "最大日期: 2023-01-03\n", "划分后的训练集大小: 549, 验证集大小: 127\n", "最大日期: 2023-01-04\n", "划分后的训练集大小: 612, 验证集大小: 155\n", "最大日期: 2023-01-05\n", "划分后的训练集大小: 725, 验证集大小: 200\n", "最大日期: 2023-01-06\n", "划分后的训练集大小: 918, 验证集大小: 282\n", "最大日期: 2023-01-09\n", "划分后的训练集大小: 860, 验证集大小: 96\n", "最大日期: 2023-01-10\n", "划分后的训练集大小: 826, 验证集大小: 93\n", "最大日期: 2023-01-11\n", "划分后的训练集大小: 771, 验证集大小: 100\n", "最大日期: 2023-01-12\n", "划分后的训练集大小: 658, 验证集大小: 87\n", "最大日期: 2023-01-13\n", "划分后的训练集大小: 524, 验证集大小: 148\n", "最大日期: 2023-01-16\n", "划分后的训练集大小: 526, 验证集大小: 98\n", "最大日期: 2023-01-17\n", "划分后的训练集大小: 541, 验证集大小: 108\n", "最大日期: 2023-01-18\n", "划分后的训练集大小: 564, 验证集大小: 123\n", "最大日期: 2023-01-19\n", "划分后的训练集大小: 590, 验证集大小: 113\n", "最大日期: 2023-01-20\n", "划分后的训练集大小: 572, 验证集大小: 130\n", "最大日期: 2023-01-30\n", "划分后的训练集大小: 569, 验证集大小: 95\n", "最大日期: 2023-01-31\n", "划分后的训练集大小: 583, 验证集大小: 122\n", "最大日期: 2023-02-01\n", "划分后的训练集大小: 550, 验证集大小: 90\n", "最大日期: 2023-02-02\n", "划分后的训练集大小: 539, 验证集大小: 102\n", "最大日期: 2023-02-03\n", "划分后的训练集大小: 505, 验证集大小: 96\n", "最大日期: 2023-02-06\n", "划分后的训练集大小: 527, 验证集大小: 117\n", "最大日期: 2023-02-07\n", "划分后的训练集大小: 519, 验证集大小: 114\n", "最大日期: 2023-02-08\n", "划分后的训练集大小: 580, 验证集大小: 151\n", "最大日期: 2023-02-09\n", "划分后的训练集大小: 622, 验证集大小: 144\n", "最大日期: 2023-02-10\n", "划分后的训练集大小: 737, 验证集大小: 211\n", "最大日期: 2023-02-13\n", "划分后的训练集大小: 744, 验证集大小: 124\n", "最大日期: 2023-02-14\n", "划分后的训练集大小: 762, 验证集大小: 132\n", "最大日期: 2023-02-15\n", "划分后的训练集大小: 774, 验证集大小: 163\n", "最大日期: 2023-02-16\n", "划分后的训练集大小: 817, 验证集大小: 187\n", "最大日期: 2023-02-17\n", "划分后的训练集大小: 763, 验证集大小: 157\n", "最大日期: 2023-02-20\n", "划分后的训练集大小: 732, 验证集大小: 93\n", "最大日期: 2023-02-21\n", "划分后的训练集大小: 728, 验证集大小: 128\n", "最大日期: 2023-02-22\n", "划分后的训练集大小: 659, 验证集大小: 94\n", "最大日期: 2023-02-23\n", "划分后的训练集大小: 599, 验证集大小: 127\n", "最大日期: 2023-02-24\n", "划分后的训练集大小: 539, 验证集大小: 97\n", "最大日期: 2023-02-27\n", "划分后的训练集大小: 701, 验证集大小: 255\n", "最大日期: 2023-02-28\n", "划分后的训练集大小: 758, 验证集大小: 185\n", "最大日期: 2023-03-01\n", "划分后的训练集大小: 754, 验证集大小: 90\n", "最大日期: 2023-03-02\n", "划分后的训练集大小: 717, 验证集大小: 90\n", "最大日期: 2023-03-03\n", "划分后的训练集大小: 713, 验证集大小: 93\n", "最大日期: 2023-03-06\n", "划分后的训练集大小: 598, 验证集大小: 140\n", "最大日期: 2023-03-07\n", "划分后的训练集大小: 509, 验证集大小: 96\n", "最大日期: 2023-03-08\n", "划分后的训练集大小: 531, 验证集大小: 112\n", "最大日期: 2023-03-09\n", "划分后的训练集大小: 552, 验证集大小: 111\n", "最大日期: 2023-03-10\n", "划分后的训练集大小: 601, 验证集大小: 142\n", "最大日期: 2023-03-13\n", "划分后的训练集大小: 622, 验证集大小: 161\n", "最大日期: 2023-03-14\n", "划分后的训练集大小: 629, 验证集大小: 103\n", "最大日期: 2023-03-15\n", "划分后的训练集大小: 697, 验证集大小: 180\n", "最大日期: 2023-03-16\n", "划分后的训练集大小: 709, 验证集大小: 123\n", "最大日期: 2023-03-17\n", "划分后的训练集大小: 797, 验证集大小: 230\n", "最大日期: 2023-03-20\n", "划分后的训练集大小: 741, 验证集大小: 105\n", "最大日期: 2023-03-21\n", "划分后的训练集大小: 764, 验证集大小: 126\n", "最大日期: 2023-03-22\n", "划分后的训练集大小: 679, 验证集大小: 95\n", "最大日期: 2023-03-23\n", "划分后的训练集大小: 705, 验证集大小: 149\n", "最大日期: 2023-03-24\n", "划分后的训练集大小: 615, 验证集大小: 140\n", "最大日期: 2023-03-27\n", "划分后的训练集大小: 636, 验证集大小: 126\n", "最大日期: 2023-03-28\n", "划分后的训练集大小: 601, 验证集大小: 91\n", "最大日期: 2023-03-29\n", "划分后的训练集大小: 598, 验证集大小: 92\n", "最大日期: 2023-03-30\n", "划分后的训练集大小: 574, 验证集大小: 125\n", "最大日期: 2023-03-31\n", "划分后的训练集大小: 528, 验证集大小: 94\n", "最大日期: 2023-04-03\n", "划分后的训练集大小: 563, 验证集大小: 161\n", "最大日期: 2023-04-04\n", "划分后的训练集大小: 700, 验证集大小: 228\n", "最大日期: 2023-04-06\n", "划分后的训练集大小: 719, 验证集大小: 111\n", "最大日期: 2023-04-07\n", "划分后的训练集大小: 761, 验证集大小: 167\n", "最大日期: 2023-04-10\n", "划分后的训练集大小: 812, 验证集大小: 145\n", "最大日期: 2023-04-11\n", "划分后的训练集大小: 787, 验证集大小: 136\n", "最大日期: 2023-04-12\n", "划分后的训练集大小: 668, 验证集大小: 109\n", "最大日期: 2023-04-13\n", "划分后的训练集大小: 685, 验证集大小: 128\n", "最大日期: 2023-04-14\n", "划分后的训练集大小: 731, 验证集大小: 213\n", "最大日期: 2023-04-17\n", "划分后的训练集大小: 716, 验证集大小: 130\n", "最大日期: 2023-04-18\n", "划分后的训练集大小: 676, 验证集大小: 96\n", "最大日期: 2023-04-19\n", "划分后的训练集大小: 702, 验证集大小: 135\n", "最大日期: 2023-04-20\n", "划分后的训练集大小: 668, 验证集大小: 94\n", "最大日期: 2023-04-21\n", "划分后的训练集大小: 598, 验证集大小: 143\n", "最大日期: 2023-04-24\n", "划分后的训练集大小: 597, 验证集大小: 129\n", "最大日期: 2023-04-25\n", "划分后的训练集大小: 652, 验证集大小: 151\n", "最大日期: 2023-04-26\n", "划分后的训练集大小: 652, 验证集大小: 135\n", "最大日期: 2023-04-27\n", "划分后的训练集大小: 651, 验证集大小: 93\n", "最大日期: 2023-04-28\n", "划分后的训练集大小: 624, 验证集大小: 116\n", "最大日期: 2023-05-04\n", "划分后的训练集大小: 633, 验证集大小: 138\n", "最大日期: 2023-05-05\n", "划分后的训练集大小: 596, 验证集大小: 114\n", "最大日期: 2023-05-08\n", "划分后的训练集大小: 610, 验证集大小: 149\n", "最大日期: 2023-05-09\n", "划分后的训练集大小: 746, 验证集大小: 229\n", "最大日期: 2023-05-10\n", "划分后的训练集大小: 722, 验证集大小: 92\n", "最大日期: 2023-05-11\n", "划分后的训练集大小: 704, 验证集大小: 120\n", "最大日期: 2023-05-12\n", "划分后的训练集大小: 777, 验证集大小: 187\n", "最大日期: 2023-05-15\n", "划分后的训练集大小: 788, 验证集大小: 160\n", "最大日期: 2023-05-16\n", "划分后的训练集大小: 648, 验证集大小: 89\n", "最大日期: 2023-05-17\n", "划分后的训练集大小: 652, 验证集大小: 96\n", "最大日期: 2023-05-18\n", "划分后的训练集大小: 632, 验证集大小: 100\n", "最大日期: 2023-05-19\n", "划分后的训练集大小: 558, 验证集大小: 113\n", "最大日期: 2023-05-22\n", "划分后的训练集大小: 502, 验证集大小: 104\n", "最大日期: 2023-05-23\n", "划分后的训练集大小: 505, 验证集大小: 92\n", "最大日期: 2023-05-24\n", "划分后的训练集大小: 527, 验证集大小: 118\n", "最大日期: 2023-05-25\n", "划分后的训练集大小: 522, 验证集大小: 95\n", "最大日期: 2023-05-26\n", "划分后的训练集大小: 496, 验证集大小: 87\n", "最大日期: 2023-05-29\n", "划分后的训练集大小: 482, 验证集大小: 90\n", "最大日期: 2023-05-30\n", "划分后的训练集大小: 498, 验证集大小: 108\n", "最大日期: 2023-05-31\n", "划分后的训练集大小: 515, 验证集大小: 135\n", "最大日期: 2023-06-01\n", "划分后的训练集大小: 554, 验证集大小: 134\n", "最大日期: 2023-06-02\n", "划分后的训练集大小: 577, 验证集大小: 110\n", "最大日期: 2023-06-05\n", "划分后的训练集大小: 627, 验证集大小: 140\n", "最大日期: 2023-06-06\n", "划分后的训练集大小: 628, 验证集大小: 109\n", "最大日期: 2023-06-07\n", "划分后的训练集大小: 583, 验证集大小: 90\n", "最大日期: 2023-06-08\n", "划分后的训练集大小: 587, 验证集大小: 138\n", "最大日期: 2023-06-09\n", "划分后的训练集大小: 570, 验证集大小: 93\n", "最大日期: 2023-06-12\n", "划分后的训练集大小: 525, 验证集大小: 95\n", "最大日期: 2023-06-13\n", "划分后的训练集大小: 548, 验证集大小: 132\n", "最大日期: 2023-06-14\n", "划分后的训练集大小: 632, 验证集大小: 174\n", "最大日期: 2023-06-15\n", "划分后的训练集大小: 653, 验证集大小: 159\n", "最大日期: 2023-06-16\n", "划分后的训练集大小: 728, 验证集大小: 168\n", "最大日期: 2023-06-19\n", "划分后的训练集大小: 763, 验证集大小: 130\n", "最大日期: 2023-06-20\n", "划分后的训练集大小: 800, 验证集大小: 169\n", "最大日期: 2023-06-21\n", "划分后的训练集大小: 867, 验证集大小: 241\n", "最大日期: 2023-06-26\n", "划分后的训练集大小: 860, 验证集大小: 152\n", "最大日期: 2023-06-27\n", "划分后的训练集大小: 935, 验证集大小: 243\n", "最大日期: 2023-06-28\n", "划分后的训练集大小: 907, 验证集大小: 102\n", "最大日期: 2023-06-29\n", "划分后的训练集大小: 830, 验证集大小: 92\n", "最大日期: 2023-06-30\n", "划分后的训练集大小: 824, 验证集大小: 235\n", "最大日期: 2023-07-03\n", "划分后的训练集大小: 793, 验证集大小: 121\n", "最大日期: 2023-07-04\n", "划分后的训练集大小: 760, 验证集大小: 210\n", "最大日期: 2023-07-05\n", "划分后的训练集大小: 794, 验证集大小: 136\n", "最大日期: 2023-07-06\n", "划分后的训练集大小: 841, 验证集大小: 139\n", "最大日期: 2023-07-07\n", "划分后的训练集大小: 767, 验证集大小: 161\n", "最大日期: 2023-07-10\n", "划分后的训练集大小: 803, 验证集大小: 157\n", "最大日期: 2023-07-11\n", "划分后的训练集大小: 699, 验证集大小: 106\n", "最大日期: 2023-07-12\n", "划分后的训练集大小: 730, 验证集大小: 167\n", "最大日期: 2023-07-13\n", "划分后的训练集大小: 684, 验证集大小: 93\n", "最大日期: 2023-07-14\n", "划分后的训练集大小: 644, 验证集大小: 121\n", "最大日期: 2023-07-17\n", "划分后的训练集大小: 599, 验证集大小: 112\n", "最大日期: 2023-07-18\n", "划分后的训练集大小: 644, 验证集大小: 151\n", "最大日期: 2023-07-19\n", "划分后的训练集大小: 616, 验证集大小: 139\n", "最大日期: 2023-07-20\n", "划分后的训练集大小: 641, 验证集大小: 118\n", "最大日期: 2023-07-21\n", "划分后的训练集大小: 620, 验证集大小: 100\n", "最大日期: 2023-07-24\n", "划分后的训练集大小: 597, 验证集大小: 89\n", "最大日期: 2023-07-25\n", "划分后的训练集大小: 546, 验证集大小: 100\n", "最大日期: 2023-07-26\n", "划分后的训练集大小: 535, 验证集大小: 128\n", "最大日期: 2023-07-27\n", "划分后的训练集大小: 508, 验证集大小: 91\n", "最大日期: 2023-07-28\n", "划分后的训练集大小: 517, 验证集大小: 109\n", "最大日期: 2023-07-31\n", "划分后的训练集大小: 524, 验证集大小: 96\n", "最大日期: 2023-08-01\n", "划分后的训练集大小: 558, 验证集大小: 134\n", "最大日期: 2023-08-02\n", "划分后的训练集大小: 630, 验证集大小: 200\n", "最大日期: 2023-08-03\n", "划分后的训练集大小: 634, 验证集大小: 95\n", "最大日期: 2023-08-04\n", "划分后的训练集大小: 646, 验证集大小: 121\n", "最大日期: 2023-08-07\n", "划分后的训练集大小: 666, 验证集大小: 116\n", "最大日期: 2023-08-08\n", "划分后的训练集大小: 634, 验证集大小: 102\n", "最大日期: 2023-08-09\n", "划分后的训练集大小: 555, 验证集大小: 121\n", "最大日期: 2023-08-10\n", "划分后的训练集大小: 576, 验证集大小: 116\n", "最大日期: 2023-08-11\n", "划分后的训练集大小: 642, 验证集大小: 187\n", "最大日期: 2023-08-14\n", "划分后的训练集大小: 613, 验证集大小: 87\n", "最大日期: 2023-08-15\n", "划分后的训练集大小: 611, 验证集大小: 100\n", "最大日期: 2023-08-16\n", "划分后的训练集大小: 623, 验证集大小: 133\n", "最大日期: 2023-08-17\n", "划分后的训练集大小: 653, 验证集大小: 146\n", "最大日期: 2023-08-18\n", "划分后的训练集大小: 585, 验证集大小: 119\n", "最大日期: 2023-08-21\n", "划分后的训练集大小: 590, 验证集大小: 92\n", "最大日期: 2023-08-22\n", "划分后的训练集大小: 619, 验证集大小: 129\n", "最大日期: 2023-08-23\n", "划分后的训练集大小: 574, 验证集大小: 88\n", "最大日期: 2023-08-24\n", "划分后的训练集大小: 594, 验证集大小: 166\n", "最大日期: 2023-08-25\n", "划分后的训练集大小: 575, 验证集大小: 100\n", "最大日期: 2023-08-28\n", "划分后的训练集大小: 588, 验证集大小: 105\n", "最大日期: 2023-08-29\n", "划分后的训练集大小: 600, 验证集大小: 141\n", "最大日期: 2023-08-30\n", "划分后的训练集大小: 600, 验证集大小: 88\n", "最大日期: 2023-08-31\n", "划分后的训练集大小: 543, 验证集大小: 109\n", "最大日期: 2023-09-01\n", "划分后的训练集大小: 536, 验证集大小: 93\n", "最大日期: 2023-09-04\n", "划分后的训练集大小: 577, 验证集大小: 146\n", "最大日期: 2023-09-05\n", "划分后的训练集大小: 538, 验证集大小: 102\n", "最大日期: 2023-09-06\n", "划分后的训练集大小: 571, 验证集大小: 121\n", "最大日期: 2023-09-07\n", "划分后的训练集大小: 614, 验证集大小: 152\n", "最大日期: 2023-09-08\n", "划分后的训练集大小: 625, 验证集大小: 104\n", "最大日期: 2023-09-11\n", "划分后的训练集大小: 599, 验证集大小: 120\n", "最大日期: 2023-09-12\n", "划分后的训练集大小: 608, 验证集大小: 111\n", "最大日期: 2023-09-13\n", "划分后的训练集大小: 581, 验证集大小: 94\n", "最大日期: 2023-09-14\n", "划分后的训练集大小: 546, 验证集大小: 117\n", "最大日期: 2023-09-15\n", "划分后的训练集大小: 537, 验证集大小: 95\n", "最大日期: 2023-09-18\n", "划分后的训练集大小: 552, 验证集大小: 135\n", "最大日期: 2023-09-19\n", "划分后的训练集大小: 551, 验证集大小: 110\n", "最大日期: 2023-09-20\n", "划分后的训练集大小: 553, 验证集大小: 96\n", "最大日期: 2023-09-21\n", "划分后的训练集大小: 562, 验证集大小: 126\n", "最大日期: 2023-09-22\n", "划分后的训练集大小: 590, 验证集大小: 123\n", "最大日期: 2023-09-25\n", "划分后的训练集大小: 558, 验证集大小: 103\n", "最大日期: 2023-09-26\n", "划分后的训练集大小: 623, 验证集大小: 175\n", "最大日期: 2023-09-27\n", "划分后的训练集大小: 714, 验证集大小: 187\n", "最大日期: 2023-09-28\n", "划分后的训练集大小: 703, 验证集大小: 115\n", "最大日期: 2023-10-09\n", "划分后的训练集大小: 674, 验证集大小: 94\n", "最大日期: 2023-10-10\n", "划分后的训练集大小: 668, 验证集大小: 97\n", "最大日期: 2023-10-11\n", "划分后的训练集大小: 586, 验证集大小: 93\n", "最大日期: 2023-10-12\n", "划分后的训练集大小: 518, 验证集大小: 119\n", "最大日期: 2023-10-13\n", "划分后的训练集大小: 497, 验证集大小: 94\n", "最大日期: 2023-10-16\n", "划分后的训练集大小: 532, 验证集大小: 129\n", "最大日期: 2023-10-17\n", "划分后的训练集大小: 584, 验证集大小: 149\n", "最大日期: 2023-10-18\n", "划分后的训练集大小: 772, 验证集大小: 281\n", "最大日期: 2023-10-19\n", "划分后的训练集大小: 844, 验证集大小: 191\n", "最大日期: 2023-10-20\n", "划分后的训练集大小: 845, 验证集大小: 95\n", "最大日期: 2023-10-23\n", "划分后的训练集大小: 819, 验证集大小: 103\n", "最大日期: 2023-10-24\n", "划分后的训练集大小: 810, 验证集大小: 140\n", "最大日期: 2023-10-25\n", "划分后的训练集大小: 665, 验证集大小: 136\n", "最大日期: 2023-10-26\n", "划分后的训练集大小: 579, 验证集大小: 105\n", "最大日期: 2023-10-27\n", "划分后的训练集大小: 578, 验证集大小: 94\n", "最大日期: 2023-10-30\n", "划分后的训练集大小: 623, 验证集大小: 148\n", "最大日期: 2023-10-31\n", "划分后的训练集大小: 647, 验证集大小: 164\n", "最大日期: 2023-11-01\n", "划分后的训练集大小: 700, 验证集大小: 189\n", "最大日期: 2023-11-02\n", "划分后的训练集大小: 717, 验证集大小: 122\n", "最大日期: 2023-11-03\n", "划分后的训练集大小: 732, 验证集大小: 109\n", "最大日期: 2023-11-06\n", "划分后的训练集大小: 741, 验证集大小: 157\n", "最大日期: 2023-11-07\n", "划分后的训练集大小: 695, 验证集大小: 118\n", "最大日期: 2023-11-08\n", "划分后的训练集大小: 727, 验证集大小: 221\n", "最大日期: 2023-11-09\n", "划分后的训练集大小: 857, 验证集大小: 252\n", "最大日期: 2023-11-10\n", "划分后的训练集大小: 869, 验证集大小: 121\n", "最大日期: 2023-11-13\n", "划分后的训练集大小: 839, 验证集大小: 127\n", "最大日期: 2023-11-14\n", "划分后的训练集大小: 872, 验证集大小: 151\n", "最大日期: 2023-11-15\n", "划分后的训练集大小: 742, 验证集大小: 91\n", "最大日期: 2023-11-16\n", "划分后的训练集大小: 607, 验证集大小: 117\n", "最大日期: 2023-11-17\n", "划分后的训练集大小: 583, 验证集大小: 97\n", "最大日期: 2023-11-20\n", "划分后的训练集大小: 615, 验证集大小: 159\n", "最大日期: 2023-11-21\n", "划分后的训练集大小: 638, 验证集大小: 174\n", "最大日期: 2023-11-22\n", "划分后的训练集大小: 694, 验证集大小: 147\n", "最大日期: 2023-11-23\n", "划分后的训练集大小: 684, 验证集大小: 107\n", "最大日期: 2023-11-24\n", "划分后的训练集大小: 794, 验证集大小: 207\n", "最大日期: 2023-11-27\n", "划分后的训练集大小: 824, 验证集大小: 189\n", "最大日期: 2023-11-28\n", "划分后的训练集大小: 816, 验证集大小: 166\n", "最大日期: 2023-11-29\n", "划分后的训练集大小: 794, 验证集大小: 125\n", "最大日期: 2023-11-30\n", "划分后的训练集大小: 873, 验证集大小: 186\n", "最大日期: 2023-12-01\n", "划分后的训练集大小: 806, 验证集大小: 140\n", "最大日期: 2023-12-04\n", "划分后的训练集大小: 754, 验证集大小: 137\n", "最大日期: 2023-12-05\n", "划分后的训练集大小: 710, 验证集大小: 122\n", "最大日期: 2023-12-06\n", "划分后的训练集大小: 753, 验证集大小: 168\n", "最大日期: 2023-12-07\n", "划分后的训练集大小: 701, 验证集大小: 134\n", "最大日期: 2023-12-08\n", "划分后的训练集大小: 655, 验证集大小: 94\n", "最大日期: 2023-12-11\n", "划分后的训练集大小: 688, 验证集大小: 170\n", "最大日期: 2023-12-12\n", "划分后的训练集大小: 678, 验证集大小: 112\n", "最大日期: 2023-12-13\n", "划分后的训练集大小: 609, 验证集大小: 99\n", "最大日期: 2023-12-14\n", "划分后的训练集大小: 674, 验证集大小: 199\n", "最大日期: 2023-12-15\n", "划分后的训练集大小: 705, 验证集大小: 125\n", "最大日期: 2023-12-18\n", "划分后的训练集大小: 723, 验证集大小: 188\n", "最大日期: 2023-12-19\n", "划分后的训练集大小: 713, 验证集大小: 102\n", "最大日期: 2023-12-20\n", "划分后的训练集大小: 765, 验证集大小: 151\n", "最大日期: 2023-12-21\n", "划分后的训练集大小: 748, 验证集大小: 182\n", "最大日期: 2023-12-22\n", "划分后的训练集大小: 760, 验证集大小: 137\n", "最大日期: 2023-12-25\n", "划分后的训练集大小: 704, 验证集大小: 132\n", "最大日期: 2023-12-26\n", "划分后的训练集大小: 875, 验证集大小: 273\n", "最大日期: 2023-12-27\n", "划分后的训练集大小: 862, 验证集大小: 138\n", "最大日期: 2023-12-28\n", "划分后的训练集大小: 772, 验证集大小: 92\n", "最大日期: 2023-12-29\n", "划分后的训练集大小: 768, 验证集大小: 133\n", "最大日期: 2024-01-02\n", "划分后的训练集大小: 800, 验证集大小: 164\n", "最大日期: 2024-01-03\n", "划分后的训练集大小: 683, 验证集大小: 156\n", "最大日期: 2024-01-04\n", "划分后的训练集大小: 728, 验证集大小: 183\n", "最大日期: 2024-01-05\n", "划分后的训练集大小: 812, 验证集大小: 176\n", "最大日期: 2024-01-08\n", "划分后的训练集大小: 829, 验证集大小: 150\n", "最大日期: 2024-01-09\n", "划分后的训练集大小: 763, 验证集大小: 98\n", "最大日期: 2024-01-10\n", "划分后的训练集大小: 769, 验证集大小: 162\n", "最大日期: 2024-01-11\n", "划分后的训练集大小: 801, 验证集大小: 215\n", "最大日期: 2024-01-12\n", "划分后的训练集大小: 716, 验证集大小: 91\n", "最大日期: 2024-01-15\n", "划分后的训练集大小: 726, 验证集大小: 160\n", "最大日期: 2024-01-16\n", "划分后的训练集大小: 726, 验证集大小: 98\n", "最大日期: 2024-01-17\n", "划分后的训练集大小: 683, 验证集大小: 119\n", "最大日期: 2024-01-18\n", "划分后的训练集大小: 647, 验证集大小: 179\n", "最大日期: 2024-01-19\n", "划分后的训练集大小: 689, 验证集大小: 133\n", "最大日期: 2024-01-22\n", "划分后的训练集大小: 619, 验证集大小: 90\n", "最大日期: 2024-01-23\n", "划分后的训练集大小: 642, 验证集大小: 121\n", "最大日期: 2024-01-24\n", "划分后的训练集大小: 659, 验证集大小: 136\n", "最大日期: 2024-01-25\n", "划分后的训练集大小: 636, 验证集大小: 156\n", "最大日期: 2024-01-26\n", "划分后的训练集大小: 705, 验证集大小: 202\n", "最大日期: 2024-01-29\n", "划分后的训练集大小: 744, 验证集大小: 129\n", "最大日期: 2024-01-30\n", "划分后的训练集大小: 821, 验证集大小: 198\n", "最大日期: 2024-01-31\n", "划分后的训练集大小: 772, 验证集大小: 87\n", "最大日期: 2024-02-01\n", "划分后的训练集大小: 795, 验证集大小: 179\n", "最大日期: 2024-02-02\n", "划分后的训练集大小: 730, 验证集大小: 137\n", "最大日期: 2024-02-05\n", "划分后的训练集大小: 856, 验证集大小: 255\n", "最大日期: 2024-02-06\n", "划分后的训练集大小: 813, 验证集大小: 155\n", "最大日期: 2024-02-07\n", "划分后的训练集大小: 898, 验证集大小: 172\n", "最大日期: 2024-02-08\n", "划分后的训练集大小: 1535, 验证集大小: 816\n", "最大日期: 2024-02-19\n", "划分后的训练集大小: 2275, 验证集大小: 877\n", "最大日期: 2024-02-20\n", "划分后的训练集大小: 2114, 验证集大小: 94\n", "最大日期: 2024-02-21\n", "划分后的训练集大小: 2056, 验证集大小: 97\n", "最大日期: 2024-02-22\n", "划分后的训练集大小: 2119, 验证集大小: 235\n", "最大日期: 2024-02-23\n", "划分后的训练集大小: 1564, 验证集大小: 261\n", "最大日期: 2024-02-26\n", "划分后的训练集大小: 886, 验证集大小: 199\n", "最大日期: 2024-02-27\n", "划分后的训练集大小: 884, 验证集大小: 92\n", "最大日期: 2024-02-28\n", "划分后的训练集大小: 973, 验证集大小: 186\n", "最大日期: 2024-02-29\n", "划分后的训练集大小: 829, 验证集大小: 91\n", "最大日期: 2024-03-01\n", "划分后的训练集大小: 672, 验证集大小: 104\n", "最大日期: 2024-03-04\n", "划分后的训练集大小: 564, 验证集大小: 91\n", "最大日期: 2024-03-05\n", "划分后的训练集大小: 565, 验证集大小: 93\n", "最大日期: 2024-03-06\n", "划分后的训练集大小: 470, 验证集大小: 91\n", "最大日期: 2024-03-07\n", "划分后的训练集大小: 472, 验证集大小: 93\n", "最大日期: 2024-03-08\n", "划分后的训练集大小: 525, 验证集大小: 157\n", "最大日期: 2024-03-11\n", "划分后的训练集大小: 528, 验证集大小: 94\n", "最大日期: 2024-03-12\n", "划分后的训练集大小: 530, 验证集大小: 95\n", "最大日期: 2024-03-13\n", "划分后的训练集大小: 585, 验证集大小: 146\n", "最大日期: 2024-03-14\n", "划分后的训练集大小: 612, 验证集大小: 120\n", "最大日期: 2024-03-15\n", "划分后的训练集大小: 546, 验证集大小: 91\n", "最大日期: 2024-03-18\n", "划分后的训练集大小: 543, 验证集大小: 91\n", "最大日期: 2024-03-19\n", "划分后的训练集大小: 629, 验证集大小: 181\n", "最大日期: 2024-03-20\n", "划分后的训练集大小: 568, 验证集大小: 85\n", "最大日期: 2024-03-21\n", "划分后的训练集大小: 654, 验证集大小: 206\n", "最大日期: 2024-03-22\n", "划分后的训练集大小: 761, 验证集大小: 198\n", "最大日期: 2024-03-25\n", "划分后的训练集大小: 853, 验证集大小: 183\n", "最大日期: 2024-03-26\n", "划分后的训练集大小: 801, 验证集大小: 129\n", "最大日期: 2024-03-27\n", "划分后的训练集大小: 836, 验证集大小: 120\n", "最大日期: 2024-03-28\n", "划分后的训练集大小: 739, 验证集大小: 109\n", "最大日期: 2024-03-29\n", "划分后的训练集大小: 709, 验证集大小: 168\n", "最大日期: 2024-04-01\n", "划分后的训练集大小: 629, 验证集大小: 103\n", "最大日期: 2024-04-02\n", "划分后的训练集大小: 646, 验证集大小: 146\n", "最大日期: 2024-04-03\n", "划分后的训练集大小: 636, 验证集大小: 110\n", "最大日期: 2024-04-08\n", "划分后的训练集大小: 686, 验证集大小: 159\n", "最大日期: 2024-04-09\n", "划分后的训练集大小: 621, 验证集大小: 103\n", "最大日期: 2024-04-10\n", "划分后的训练集大小: 612, 验证集大小: 94\n", "最大日期: 2024-04-11\n", "划分后的训练集大小: 605, 验证集大小: 139\n", "最大日期: 2024-04-12\n", "划分后的训练集大小: 586, 验证集大小: 91\n", "最大日期: 2024-04-15\n", "划分后的训练集大小: 509, 验证集大小: 82\n", "最大日期: 2024-04-16\n", "划分后的训练集大小: 530, 验证集大小: 124\n", "最大日期: 2024-04-17\n", "划分后的训练集大小: 525, 验证集大小: 89\n", "最大日期: 2024-04-18\n", "划分后的训练集大小: 539, 验证集大小: 153\n", "最大日期: 2024-04-19\n", "划分后的训练集大小: 539, 验证集大小: 91\n", "最大日期: 2024-04-22\n", "划分后的训练集大小: 607, 验证集大小: 150\n", "最大日期: 2024-04-23\n", "划分后的训练集大小: 646, 验证集大小: 163\n", "最大日期: 2024-04-24\n", "划分后的训练集大小: 648, 验证集大小: 91\n", "最大日期: 2024-04-25\n", "划分后的训练集大小: 589, 验证集大小: 94\n", "最大日期: 2024-04-26\n", "划分后的训练集大小: 602, 验证集大小: 104\n", "最大日期: 2024-04-29\n", "划分后的训练集大小: 608, 验证集大小: 156\n", "最大日期: 2024-04-30\n", "划分后的训练集大小: 576, 验证集大小: 131\n", "最大日期: 2024-05-06\n", "划分后的训练集大小: 577, 验证集大小: 92\n", "最大日期: 2024-05-07\n", "划分后的训练集大小: 595, 验证集大小: 112\n", "最大日期: 2024-05-08\n", "划分后的训练集大小: 583, 验证集大小: 92\n", "最大日期: 2024-05-09\n", "划分后的训练集大小: 519, 验证集大小: 92\n", "最大日期: 2024-05-10\n", "划分后的训练集大小: 477, 验证集大小: 89\n", "最大日期: 2024-05-13\n", "划分后的训练集大小: 542, 验证集大小: 157\n", "最大日期: 2024-05-14\n", "划分后的训练集大小: 613, 验证集大小: 183\n", "最大日期: 2024-05-15\n", "划分后的训练集大小: 720, 验证集大小: 199\n", "最大日期: 2024-05-16\n", "划分后的训练集大小: 769, 验证集大小: 141\n", "最大日期: 2024-05-17\n", "划分后的训练集大小: 776, 验证集大小: 96\n", "最大日期: 2024-05-20\n", "划分后的训练集大小: 713, 验证集大小: 94\n", "最大日期: 2024-05-21\n", "划分后的训练集大小: 671, 验证集大小: 141\n", "最大日期: 2024-05-22\n", "划分后的训练集大小: 580, 验证集大小: 108\n", "最大日期: 2024-05-23\n", "划分后的训练集大小: 534, 验证集大小: 95\n", "最大日期: 2024-05-24\n", "划分后的训练集大小: 532, 验证集大小: 94\n", "最大日期: 2024-05-27\n", "划分后的训练集大小: 576, 验证集大小: 138\n", "最大日期: 2024-05-28\n", "划分后的训练集大小: 534, 验证集大小: 99\n", "最大日期: 2024-05-29\n", "划分后的训练集大小: 619, 验证集大小: 193\n", "最大日期: 2024-05-30\n", "划分后的训练集大小: 646, 验证集大小: 122\n", "最大日期: 2024-05-31\n", "划分后的训练集大小: 650, 验证集大小: 98\n", "最大日期: 2024-06-03\n", "划分后的训练集大小: 607, 验证集大小: 95\n", "最大日期: 2024-06-04\n", "划分后的训练集大小: 620, 验证集大小: 112\n", "最大日期: 2024-06-05\n", "划分后的训练集大小: 524, 验证集大小: 97\n", "最大日期: 2024-06-06\n", "划分后的训练集大小: 517, 验证集大小: 115\n", "最大日期: 2024-06-07\n", "划分后的训练集大小: 558, 验证集大小: 139\n", "最大日期: 2024-06-11\n", "划分后的训练集大小: 557, 验证集大小: 94\n", "最大日期: 2024-06-12\n", "划分后的训练集大小: 542, 验证集大小: 97\n", "最大日期: 2024-06-13\n", "划分后的训练集大小: 588, 验证集大小: 143\n", "最大日期: 2024-06-14\n", "划分后的训练集大小: 650, 验证集大小: 177\n", "最大日期: 2024-06-17\n", "划分后的训练集大小: 636, 验证集大小: 125\n", "最大日期: 2024-06-18\n", "划分后的训练集大小: 636, 验证集大小: 94\n", "最大日期: 2024-06-19\n", "划分后的训练集大小: 676, 验证集大小: 137\n", "最大日期: 2024-06-20\n", "划分后的训练集大小: 630, 验证集大小: 97\n", "最大日期: 2024-06-21\n", "划分后的训练集大小: 551, 验证集大小: 98\n", "最大日期: 2024-06-24\n", "划分后的训练集大小: 550, 验证集大小: 124\n", "最大日期: 2024-06-25\n", "划分后的训练集大小: 618, 验证集大小: 162\n", "最大日期: 2024-06-26\n", "划分后的训练集大小: 572, 验证集大小: 91\n", "最大日期: 2024-06-27\n", "划分后的训练集大小: 571, 验证集大小: 96\n", "最大日期: 2024-06-28\n", "划分后的训练集大小: 587, 验证集大小: 114\n", "最大日期: 2024-07-01\n", "划分后的训练集大小: 560, 验证集大小: 97\n", "最大日期: 2024-07-02\n", "划分后的训练集大小: 494, 验证集大小: 96\n", "最大日期: 2024-07-03\n", "划分后的训练集大小: 573, 验证集大小: 170\n", "最大日期: 2024-07-04\n", "划分后的训练集大小: 583, 验证集大小: 106\n", "最大日期: 2024-07-05\n", "划分后的训练集大小: 749, 验证集大小: 280\n", "最大日期: 2024-07-08\n", "划分后的训练集大小: 904, 验证集大小: 252\n", "最大日期: 2024-07-09\n", "划分后的训练集大小: 906, 验证集大小: 98\n", "最大日期: 2024-07-10\n", "划分后的训练集大小: 829, 验证集大小: 93\n", "最大日期: 2024-07-11\n", "划分后的训练集大小: 895, 验证集大小: 172\n", "最大日期: 2024-07-12\n", "划分后的训练集大小: 708, 验证集大小: 93\n", "最大日期: 2024-07-15\n", "划分后的训练集大小: 574, 验证集大小: 118\n", "最大日期: 2024-07-16\n", "划分后的训练集大小: 564, 验证集大小: 88\n", "最大日期: 2024-07-17\n", "划分后的训练集大小: 605, 验证集大小: 134\n", "最大日期: 2024-07-18\n", "划分后的训练集大小: 525, 验证集大小: 92\n", "最大日期: 2024-07-19\n", "划分后的训练集大小: 529, 验证集大小: 97\n", "最大日期: 2024-07-22\n", "划分后的训练集大小: 544, 验证集大小: 133\n", "最大日期: 2024-07-23\n", "划分后的训练集大小: 553, 验证集大小: 97\n", "最大日期: 2024-07-24\n", "划分后的训练集大小: 529, 验证集大小: 110\n", "最大日期: 2024-07-25\n", "划分后的训练集大小: 624, 验证集大小: 187\n", "最大日期: 2024-07-26\n", "划分后的训练集大小: 622, 验证集大小: 95\n", "最大日期: 2024-07-29\n", "划分后的训练集大小: 610, 验证集大小: 121\n", "最大日期: 2024-07-30\n", "划分后的训练集大小: 654, 验证集大小: 141\n", "最大日期: 2024-07-31\n", "划分后的训练集大小: 675, 验证集大小: 131\n", "最大日期: 2024-08-01\n", "划分后的训练集大小: 643, 验证集大小: 155\n", "最大日期: 2024-08-02\n", "划分后的训练集大小: 703, 验证集大小: 155\n", "最大日期: 2024-08-05\n", "划分后的训练集大小: 684, 验证集大小: 102\n", "最大日期: 2024-08-06\n", "划分后的训练集大小: 664, 验证集大小: 121\n", "最大日期: 2024-08-07\n", "划分后的训练集大小: 716, 验证集大小: 183\n", "最大日期: 2024-08-08\n", "划分后的训练集大小: 782, 验证集大小: 221\n", "最大日期: 2024-08-09\n", "划分后的训练集大小: 826, 验证集大小: 199\n", "最大日期: 2024-08-12\n", "划分后的训练集大小: 840, 验证集大小: 116\n", "最大日期: 2024-08-13\n", "划分后的训练集大小: 811, 验证集大小: 92\n", "最大日期: 2024-08-14\n", "划分后的训练集大小: 742, 验证集大小: 114\n", "最大日期: 2024-08-15\n", "划分后的训练集大小: 621, 验证集大小: 100\n", "最大日期: 2024-08-16\n", "划分后的训练集大小: 558, 验证集大小: 136\n", "最大日期: 2024-08-19\n", "划分后的训练集大小: 585, 验证集大小: 143\n", "最大日期: 2024-08-20\n", "划分后的训练集大小: 666, 验证集大小: 173\n", "最大日期: 2024-08-21\n", "划分后的训练集大小: 719, 验证集大小: 167\n", "最大日期: 2024-08-22\n", "划分后的训练集大小: 732, 验证集大小: 113\n", "最大日期: 2024-08-23\n", "划分后的训练集大小: 737, 验证集大小: 141\n", "最大日期: 2024-08-26\n", "划分后的训练集大小: 748, 验证集大小: 154\n", "最大日期: 2024-08-27\n", "划分后的训练集大小: 733, 验证集大小: 158\n", "最大日期: 2024-08-28\n", "划分后的训练集大小: 661, 验证集大小: 95\n", "最大日期: 2024-08-29\n", "划分后的训练集大小: 650, 验证集大小: 102\n", "最大日期: 2024-08-30\n", "划分后的训练集大小: 601, 验证集大小: 92\n", "最大日期: 2024-09-02\n", "划分后的训练集大小: 551, 验证集大小: 104\n", "最大日期: 2024-09-03\n", "划分后的训练集大小: 496, 验证集大小: 103\n", "最大日期: 2024-09-04\n", "划分后的训练集大小: 499, 验证集大小: 98\n", "最大日期: 2024-09-05\n", "划分后的训练集大小: 489, 验证集大小: 92\n", "最大日期: 2024-09-06\n", "划分后的训练集大小: 491, 验证集大小: 94\n", "最大日期: 2024-09-09\n", "划分后的训练集大小: 536, 验证集大小: 149\n", "最大日期: 2024-09-10\n", "划分后的训练集大小: 550, 验证集大小: 117\n", "最大日期: 2024-09-11\n", "划分后的训练集大小: 546, 验证集大小: 94\n", "最大日期: 2024-09-12\n", "划分后的训练集大小: 576, 验证集大小: 122\n", "最大日期: 2024-09-13\n", "划分后的训练集大小: 579, 验证集大小: 97\n", "最大日期: 2024-09-18\n", "划分后的训练集大小: 527, 验证集大小: 97\n", "最大日期: 2024-09-19\n", "划分后的训练集大小: 512, 验证集大小: 102\n", "最大日期: 2024-09-20\n", "划分后的训练集大小: 547, 验证集大小: 129\n", "最大日期: 2024-09-23\n", "划分后的训练集大小: 572, 验证集大小: 147\n", "最大日期: 2024-09-24\n", "划分后的训练集大小: 558, 验证集大小: 83\n", "最大日期: 2024-09-25\n", "划分后的训练集大小: 590, 验证集大小: 129\n", "最大日期: 2024-09-26\n", "划分后的训练集大小: 577, 验证集大小: 89\n", "最大日期: 2024-09-27\n", "划分后的训练集大小: 553, 验证集大小: 105\n", "最大日期: 2024-09-30\n", "划分后的训练集大小: 489, 验证集大小: 83\n", "最大日期: 2024-10-08\n", "划分后的训练集大小: 510, 验证集大小: 104\n", "最大日期: 2024-10-09\n", "划分后的训练集大小: 471, 验证集大小: 90\n", "最大日期: 2024-10-10\n", "划分后的训练集大小: 583, 验证集大小: 201\n", "最大日期: 2024-10-11\n", "划分后的训练集大小: 627, 验证集大小: 149\n", "最大日期: 2024-10-14\n", "划分后的训练集大小: 752, 验证集大小: 208\n", "最大日期: 2024-10-15\n", "划分后的训练集大小: 751, 验证集大小: 103\n", "最大日期: 2024-10-16\n", "划分后的训练集大小: 833, 验证集大小: 172\n", "最大日期: 2024-10-17\n", "划分后的训练集大小: 829, 验证集大小: 197\n", "最大日期: 2024-10-18\n", "划分后的训练集大小: 794, 验证集大小: 114\n", "最大日期: 2024-10-21\n", "划分后的训练集大小: 744, 验证集大小: 158\n", "最大日期: 2024-10-22\n", "划分后的训练集大小: 862, 验证集大小: 221\n", "最大日期: 2024-10-23\n", "划分后的训练集大小: 799, 验证集大小: 109\n", "最大日期: 2024-10-24\n", "划分后的训练集大小: 685, 验证集大小: 83\n", "最大日期: 2024-10-25\n", "划分后的训练集大小: 691, 验证集大小: 120\n", "最大日期: 2024-10-28\n", "划分后的训练集大小: 643, 验证集大小: 110\n", "最大日期: 2024-10-29\n", "划分后的训练集大小: 527, 验证集大小: 105\n", "最大日期: 2024-10-30\n", "划分后的训练集大小: 523, 验证集大小: 105\n", "最大日期: 2024-10-31\n", "划分后的训练集大小: 643, 验证集大小: 203\n", "最大日期: 2024-11-01\n", "划分后的训练集大小: 728, 验证集大小: 205\n", "最大日期: 2024-11-04\n", "划分后的训练集大小: 815, 验证集大小: 197\n", "最大日期: 2024-11-05\n", "划分后的训练集大小: 876, 验证集大小: 166\n", "最大日期: 2024-11-06\n", "划分后的训练集大小: 959, 验证集大小: 188\n", "最大日期: 2024-11-07\n", "划分后的训练集大小: 855, 验证集大小: 99\n", "最大日期: 2024-11-08\n", "划分后的训练集大小: 733, 验证集大小: 83\n", "最大日期: 2024-11-11\n", "划分后的训练集大小: 717, 验证集大小: 181\n", "最大日期: 2024-11-12\n", "划分后的训练集大小: 682, 验证集大小: 131\n", "最大日期: 2024-11-13\n", "划分后的训练集大小: 692, 验证集大小: 198\n", "最大日期: 2024-11-14\n", "划分后的训练集大小: 780, 验证集大小: 187\n", "最大日期: 2024-11-15\n", "划分后的训练集大小: 841, 验证集大小: 144\n", "最大日期: 2024-11-18\n", "划分后的训练集大小: 800, 验证集大小: 140\n", "最大日期: 2024-11-19\n", "划分后的训练集大小: 767, 验证集大小: 98\n", "最大日期: 2024-11-20\n", "划分后的训练集大小: 687, 验证集大小: 118\n", "最大日期: 2024-11-21\n", "划分后的训练集大小: 643, 验证集大小: 143\n", "最大日期: 2024-11-22\n", "划分后的训练集大小: 645, 验证集大小: 146\n", "最大日期: 2024-11-25\n", "划分后的训练集大小: 621, 验证集大小: 116\n", "最大日期: 2024-11-26\n", "划分后的训练集大小: 761, 验证集大小: 238\n", "最大日期: 2024-11-27\n", "划分后的训练集大小: 781, 验证集大小: 138\n", "最大日期: 2024-11-28\n", "划分后的训练集大小: 798, 验证集大小: 160\n", "最大日期: 2024-11-29\n", "划分后的训练集大小: 797, 验证集大小: 145\n", "最大日期: 2024-12-02\n", "划分后的训练集大小: 826, 验证集大小: 145\n", "最大日期: 2024-12-03\n", "划分后的训练集大小: 778, 验证集大小: 190\n", "最大日期: 2024-12-04\n", "划分后的训练集大小: 761, 验证集大小: 121\n", "最大日期: 2024-12-05\n", "划分后的训练集大小: 812, 验证集大小: 211\n", "最大日期: 2024-12-06\n", "划分后的训练集大小: 770, 验证集大小: 103\n", "最大日期: 2024-12-09\n", "划分后的训练集大小: 738, 验证集大小: 113\n", "最大日期: 2024-12-10\n", "划分后的训练集大小: 735, 验证集大小: 187\n", "最大日期: 2024-12-11\n", "划分后的训练集大小: 726, 验证集大小: 112\n", "最大日期: 2024-12-12\n", "划分后的训练集大小: 689, 验证集大小: 174\n", "最大日期: 2024-12-13\n", "划分后的训练集大小: 718, 验证集大小: 132\n", "最大日期: 2024-12-16\n", "划分后的训练集大小: 701, 验证集大小: 96\n", "最大日期: 2024-12-17\n", "划分后的训练集大小: 796, 验证集大小: 282\n", "最大日期: 2024-12-18\n", "划分后的训练集大小: 791, 验证集大小: 107\n", "最大日期: 2024-12-19\n", "划分后的训练集大小: 941, 验证集大小: 324\n", "最大日期: 2024-12-20\n", "划分后的训练集大小: 1044, 验证集大小: 235\n", "最大日期: 2024-12-23\n", "划分后的训练集大小: 1047, 验证集大小: 99\n", "最大日期: 2024-12-24\n", "划分后的训练集大小: 876, 验证集大小: 111\n", "最大日期: 2024-12-25\n", "划分后的训练集大小: 984, 验证集大小: 215\n", "最大日期: 2024-12-26\n", "划分后的训练集大小: 800, 验证集大小: 140\n", "最大日期: 2024-12-27\n", "划分后的训练集大小: 697, 验证集大小: 132\n", "最大日期: 2024-12-30\n", "划分后的训练集大小: 720, 验证集大小: 122\n", "最大日期: 2024-12-31\n", "划分后的训练集大小: 710, 验证集大小: 101\n", "最大日期: 2025-01-02\n", "划分后的训练集大小: 586, 验证集大小: 91\n", "最大日期: 2025-01-03\n", "划分后的训练集大小: 538, 验证集大小: 92\n", "最大日期: 2025-01-06\n", "划分后的训练集大小: 525, 验证集大小: 119\n", "最大日期: 2025-01-07\n", "划分后的训练集大小: 495, 验证集大小: 92\n", "最大日期: 2025-01-08\n", "划分后的训练集大小: 540, 验证集大小: 146\n", "最大日期: 2025-01-09\n", "划分后的训练集大小: 557, 验证集大小: 108\n", "最大日期: 2025-01-10\n", "划分后的训练集大小: 720, 验证集大小: 255\n", "最大日期: 2025-01-13\n", "划分后的训练集大小: 733, 验证集大小: 132\n", "最大日期: 2025-01-14\n", "划分后的训练集大小: 738, 验证集大小: 97\n", "最大日期: 2025-01-15\n", "划分后的训练集大小: 734, 验证集大小: 142\n", "最大日期: 2025-01-16\n", "划分后的训练集大小: 769, 验证集大小: 143\n", "最大日期: 2025-01-17\n", "划分后的训练集大小: 603, 验证集大小: 89\n", "最大日期: 2025-01-20\n", "划分后的训练集大小: 594, 验证集大小: 123\n", "最大日期: 2025-01-21\n", "划分后的训练集大小: 758, 验证集大小: 261\n", "最大日期: 2025-01-22\n", "划分后的训练集大小: 801, 验证集大小: 185\n", "最大日期: 2025-01-23\n", "划分后的训练集大小: 836, 验证集大小: 178\n", "最大日期: 2025-01-24\n", "划分后的训练集大小: 942, 验证集大小: 195\n", "最大日期: 2025-01-27\n", "划分后的训练集大小: 954, 验证集大小: 135\n", "最大日期: 2025-02-05\n", "划分后的训练集大小: 787, 验证集大小: 94\n", "最大日期: 2025-02-06\n", "划分后的训练集大小: 733, 验证集大小: 131\n", "最大日期: 2025-02-07\n", "划分后的训练集大小: 647, 验证集大小: 92\n", "最大日期: 2025-02-10\n", "划分后的训练集大小: 567, 验证集大小: 115\n", "最大日期: 2025-02-11\n", "划分后的训练集大小: 564, 验证集大小: 132\n", "最大日期: 2025-02-12\n", "划分后的训练集大小: 594, 验证集大小: 124\n", "最大日期: 2025-02-13\n", "划分后的训练集大小: 687, 验证集大小: 224\n", "最大日期: 2025-02-14\n", "划分后的训练集大小: 699, 验证集大小: 104\n", "最大日期: 2025-02-17\n", "划分后的训练集大小: 780, 验证集大小: 196\n", "最大日期: 2025-02-18\n", "划分后的训练集大小: 798, 验证集大小: 150\n", "最大日期: 2025-02-19\n", "划分后的训练集大小: 787, 验证集大小: 113\n", "最大日期: 2025-02-20\n", "划分后的训练集大小: 655, 验证集大小: 92\n", "最大日期: 2025-02-21\n", "划分后的训练集大小: 724, 验证集大小: 173\n", "最大日期: 2025-02-24\n", "划分后的训练集大小: 718, 验证集大小: 190\n", "最大日期: 2025-02-25\n", "划分后的训练集大小: 701, 验证集大小: 133\n", "最大日期: 2025-02-26\n", "划分后的训练集大小: 828, 验证集大小: 240\n", "最大日期: 2025-02-27\n", "划分后的训练集大小: 862, 验证集大小: 126\n", "最大日期: 2025-02-28\n", "划分后的训练集大小: 913, 验证集大小: 224\n", "最大日期: 2025-03-03\n", "划分后的训练集大小: 841, 验证集大小: 118\n", "最大日期: 2025-03-04\n", "划分后的训练集大小: 841, 验证集大小: 133\n", "最大日期: 2025-03-05\n", "划分后的训练集大小: 750, 验证集大小: 149\n", "最大日期: 2025-03-06\n", "划分后的训练集大小: 727, 验证集大小: 103\n", "最大日期: 2025-03-07\n", "划分后的训练集大小: 682, 验证集大小: 179\n", "最大日期: 2025-03-10\n", "划分后的训练集大小: 723, 验证集大小: 159\n", "最大日期: 2025-03-11\n", "划分后的训练集大小: 682, 验证集大小: 92\n", "最大日期: 2025-03-12\n", "划分后的训练集大小: 684, 验证集大小: 151\n", "最大日期: 2025-03-13\n", "划分后的训练集大小: 796, 验证集大小: 215\n", "最大日期: 2025-03-14\n", "划分后的训练集大小: 820, 验证集大小: 203\n", "最大日期: 2025-03-17\n", "划分后的训练集大小: 767, 验证集大小: 106\n", "最大日期: 2025-03-18\n", "划分后的训练集大小: 822, 验证集大小: 147\n", "最大日期: 2025-03-19\n", "划分后的训练集大小: 758, 验证集大小: 87\n", "最大日期: 2025-03-20\n", "划分后的训练集大小: 656, 验证集大小: 113\n", "最大日期: 2025-03-21\n", "划分后的训练集大小: 595, 验证集大小: 142\n", "最大日期: 2025-03-24\n", "划分后的训练集大小: 645, 验证集大小: 156\n", "最大日期: 2025-03-25\n", "划分后的训练集大小: 603, 验证集大小: 105\n", "最大日期: 2025-03-26\n", "划分后的训练集大小: 665, 验证集大小: 149\n" ] } ], "execution_count": 64 }, { "cell_type": "code", "id": "10f15e935aa02a34", "metadata": { "ExecuteTime": { "end_time": "2025-04-05T18:01:46.449474Z", "start_time": "2025-04-05T18:01:46.441493Z" } }, "source": "print(pdf[pdf['trade_date'] == '2024-10-30'][['ts_code', 'label']])", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " ts_code label\n", "236729 603268.SH 19.0\n", "236728 002685.SZ 1.0\n", "236727 002693.SZ 1.0\n", "236726 600839.SH 0.0\n", "236725 002272.SZ 1.0\n", "... ... ...\n", "236954 002843.SZ 2.0\n", "236953 600570.SH 10.0\n", "236952 002988.SZ 1.0\n", "236951 002708.SZ 0.0\n", "236955 002760.SZ 9.0\n", "\n", "[240 rows x 2 columns]\n" ] } ], "execution_count": 61 }, { "cell_type": "code", "id": "0dc75517-c857-4f1d-8815-e807400a6d33", "metadata": { "ExecuteTime": { "end_time": "2025-04-05T18:01:46.512618Z", "start_time": "2025-04-05T18:01:46.449474Z" } }, "source": [ "train_data = pdf[filter_index & (pdf['trade_date'] == '2023-01-03')]\n", "# train_data = train_data.reset_index(drop=True)\n", "print(len(train_data))" ], "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "167\n" ] } ], "execution_count": 62 }, { "metadata": { "ExecuteTime": { "end_time": "2025-04-05T18:01:46.579788Z", "start_time": "2025-04-05T18:01:46.566533Z" } }, "cell_type": "code", "source": [ "import pandas as pd\n", "import numpy as np\n", "\n", "\n", "def analyze_nan_factors(df, factor_columns):\n", " \"\"\"\n", " 分析 DataFrame 中指定因子列的 NaN 值情况。\n", "\n", " Args:\n", " df (pd.DataFrame): 包含因子数据的 DataFrame。\n", " factor_columns (list): 包含因子列名的列表。\n", " \"\"\"\n", "\n", " print(\"### 各因子 NaN 值占比 ###\")\n", " nan_percentage = df[factor_columns].isnull().sum() / len(df) * 100\n", " print(nan_percentage.sort_values(ascending=False))\n", " print(\"\\n\")\n", "\n", " print(\"### 包含 NaN 值最多的前 5 个因子 ###\")\n", " top_nan_factors = nan_percentage[nan_percentage > 0].sort_values(ascending=False).head(5)\n", " print(top_nan_factors)\n", " print(\"\\n\")\n", "\n", " if not top_nan_factors.empty:\n", " for factor in top_nan_factors.index:\n", " print(f\"### 因子 '{factor}' 的 NaN 值分析 ###\")\n", "\n", " # 按交易日期分组,计算每日 NaN 值数量\n", " nan_by_date = df.groupby('trade_date')[factor].apply(lambda x: x.isnull().sum())\n", " print(\"\\n每日 NaN 值数量:\")\n", " print(nan_by_date.sort_values(ascending=False).head())\n", "\n", " # # 按股票代码分组,计算每只股票 NaN 值数量\n", " # nan_by_stock = df.groupby('ts_code')[factor].apply(lambda x: x.isnull().sum())\n", " # print(\"\\n每只股票 NaN 值数量:\")\n", " # print(nan_by_stock.sort_values(ascending=False).head())\n", "\n", " print(\"-\" * 30)\n", " print(\"\\n\")\n", "\n", "\n", "# 假设您的 DataFrame 名称是 pdf,并且您已经定义了 feature_columns\n", "if 'pdf' in locals() and 'feature_columns' in locals():\n", " analyze_nan_factors(train_data.copy(), feature_columns)\n", "else:\n", " print(\"请确保您的 DataFrame 名称为 'pdf' 且因子列名列表为 'feature_columns' 后再运行代码。\")" ], "id": "53ce0624ed8e3123", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "### 各因子 NaN 值占比 ###\n", "mv_growth 2.395210\n", "arbr 1.796407\n", "industry_rank_act_factor3 1.197605\n", "industry_act_factor1 1.197605\n", "std_return_90 - std_return_90_2 1.197605\n", " ... \n", "obv 0.000000\n", "momentum_factor 0.000000\n", "resonance_factor 0.000000\n", "cat_vol_spike 0.000000\n", "vol 0.000000\n", "Length: 94, dtype: float64\n", "\n", "\n", "### 包含 NaN 值最多的前 5 个因子 ###\n", "mv_growth 2.395210\n", "arbr 1.796407\n", "industry_rank_act_factor3 1.197605\n", "industry_act_factor3 1.197605\n", "industry_return_5 1.197605\n", "dtype: float64\n", "\n", "\n", "### 因子 'mv_growth' 的 NaN 值分析 ###\n", "\n", "每日 NaN 值数量:\n", "trade_date\n", "2023-01-03 4\n", "Name: mv_growth, dtype: int64\n", "------------------------------\n", "\n", "\n", "### 因子 'arbr' 的 NaN 值分析 ###\n", "\n", "每日 NaN 值数量:\n", "trade_date\n", "2023-01-03 3\n", "Name: arbr, dtype: int64\n", "------------------------------\n", "\n", "\n", "### 因子 'industry_rank_act_factor3' 的 NaN 值分析 ###\n", "\n", "每日 NaN 值数量:\n", "trade_date\n", "2023-01-03 2\n", "Name: industry_rank_act_factor3, dtype: int64\n", "------------------------------\n", "\n", "\n", "### 因子 'industry_act_factor3' 的 NaN 值分析 ###\n", "\n", "每日 NaN 值数量:\n", "trade_date\n", "2023-01-03 2\n", "Name: industry_act_factor3, dtype: int64\n", "------------------------------\n", "\n", "\n", "### 因子 'industry_return_5' 的 NaN 值分析 ###\n", "\n", "每日 NaN 值数量:\n", "trade_date\n", "2023-01-03 2\n", "Name: industry_return_5, dtype: int64\n", "------------------------------\n", "\n", "\n" ] } ], "execution_count": 63 } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.11" } }, "nbformat": 4, "nbformat_minor": 5 }