RollingRank赚钱- Sharp-1.43

2025-04-28 11:02:52 +08:00
parent 94cd9aa6c8
commit 9e598d4ed0
93 changed files with 18134 additions and 4342 deletions
--- a/main/train/AnalyzeData.ipynb
+++ b/main/train/AnalyzeData.ipynb
--- a/main/train/Classify.ipynb
+++ b/main/train/Classify.ipynb
--- a/main/train/ClassifyLR.ipynb
+++ b/main/train/ClassifyLR.ipynb
--- a/main/train/DoubleQuntile.ipynb
+++ b/main/train/DoubleQuntile.ipynb
--- a/main/train/DoubleRank.ipynb
+++ b/main/train/DoubleRank.ipynb
--- a/main/train/MultiClassify.ipynb
+++ b/main/train/MultiClassify.ipynb
--- a/main/train/PlUpdateClassify.ipynb
+++ b/main/train/PlUpdateClassify.ipynb
--- a/main/train/Rank.ipynb
+++ b/main/train/Rank.ipynb
--- a/main/train/Regression.ipynb
+++ b/main/train/Regression.ipynb
--- a/main/train/RollingRank.ipynb
+++ b/main/train/RollingRank.ipynb
--- a/main/train/RollingRank.txt
+++ b/main/train/RollingRank.txt
@@ -0,0 +1,918 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# In[1]:
+
+
+# %load_ext autoreload
+# %autoreload 2
+
+import pandas as pd
+import warnings
+
+warnings.filterwarnings("ignore")
+
+pd.set_option('display.max_columns', None)
+
+
+# In[2]:
+
+
+from utils.utils import read_and_merge_h5_data
+
+print('daily data')
+df = read_and_merge_h5_data('../../data/daily_data.h5', key='daily_data',
+                            columns=['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'pct_chg'],
+                            df=None)
+
+print('daily basic')
+df = read_and_merge_h5_data('../../data/daily_basic.h5', key='daily_basic',
+                            columns=['ts_code', 'trade_date', 'turnover_rate', 'pe_ttm', 'circ_mv', 'volume_ratio',
+                                     'is_st'], df=df, join='inner')
+
+print('stk limit')
+df = read_and_merge_h5_data('../../data/stk_limit.h5', key='stk_limit',
+                            columns=['ts_code', 'trade_date', 'pre_close', 'up_limit', 'down_limit'],
+                            df=df)
+print('money flow')
+df = read_and_merge_h5_data('../../data/money_flow.h5', key='money_flow',
+                            columns=['ts_code', 'trade_date', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol',
+                                     'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol'],
+                            df=df)
+print('cyq perf')
+df = read_and_merge_h5_data('../../data/cyq_perf.h5', key='cyq_perf',
+                            columns=['ts_code', 'trade_date', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct',
+                                     'cost_50pct',
+                                     'cost_85pct', 'cost_95pct', 'weight_avg', 'winner_rate'],
+                            df=df)
+print(df.info())
+
+
+# In[3]:
+
+
+print('industry')
+industry_df = read_and_merge_h5_data('../../data/industry_data.h5', key='industry_data',
+                                     columns=['ts_code', 'l2_code', 'in_date'],
+                                     df=None, on=['ts_code'], join='left')
+
+
+def merge_with_industry_data(df, industry_df):
+    # 确保日期字段是 datetime 类型
+    df['trade_date'] = pd.to_datetime(df['trade_date'])
+    industry_df['in_date'] = pd.to_datetime(industry_df['in_date'])
+
+    # 对 industry_df 按 ts_code 和 in_date 排序
+    industry_df_sorted = industry_df.sort_values(['in_date', 'ts_code'])
+
+    # 对原始 df 按 ts_code 和 trade_date 排序
+    df_sorted = df.sort_values(['trade_date', 'ts_code'])
+
+    # 使用 merge_asof 进行向后合并
+    merged = pd.merge_asof(
+        df_sorted,
+        industry_df_sorted,
+        by='ts_code',  # 按 ts_code 分组
+        left_on='trade_date',
+        right_on='in_date',
+        direction='backward'
+    )
+
+    # 获取每个 ts_code 的最早 in_date 记录
+    min_in_date_per_ts = (industry_df_sorted
+    .groupby('ts_code')
+    .first()
+    .reset_index()[['ts_code', 'l2_code']])
+
+    # 填充未匹配到的记录（trade_date 早于所有 in_date 的情况）
+    merged['l2_code'] = merged['l2_code'].fillna(
+        merged['ts_code'].map(min_in_date_per_ts.set_index('ts_code')['l2_code'])
+    )
+
+    # 保留需要的列并重置索引
+    result = merged.reset_index(drop=True)
+    return result
+
+
+# 使用示例
+df = merge_with_industry_data(df, industry_df)
+# print(mdf[mdf['ts_code'] == '600751.SH'][['ts_code', 'trade_date', 'l2_code']])
+
+
+# In[4]:
+
+
+def calculate_indicators(df):
+    """
+    计算四个指标：当日涨跌幅、5日移动平均、RSI、MACD。
+    """
+    df = df.sort_values('trade_date')
+    df['daily_return'] = (df['close'] - df['pre_close']) / df['pre_close'] * 100
+    # df['5_day_ma'] = df['close'].rolling(window=5).mean()
+    delta = df['close'].diff()
+    gain = delta.where(delta > 0, 0)
+    loss = -delta.where(delta < 0, 0)
+    avg_gain = gain.rolling(window=14).mean()
+    avg_loss = loss.rolling(window=14).mean()
+    rs = avg_gain / avg_loss
+    df['RSI'] = 100 - (100 / (1 + rs))
+
+    # 计算MACD
+    ema12 = df['close'].ewm(span=12, adjust=False).mean()
+    ema26 = df['close'].ewm(span=26, adjust=False).mean()
+    df['MACD'] = ema12 - ema26
+    df['Signal_line'] = df['MACD'].ewm(span=9, adjust=False).mean()
+    df['MACD_hist'] = df['MACD'] - df['Signal_line']
+
+    # 4. 情绪因子1：市场上涨比例（Up Ratio）
+    df['up_ratio'] = df['daily_return'].apply(lambda x: 1 if x > 0 else 0)
+    df['up_ratio_20d'] = df['up_ratio'].rolling(window=20).mean()  # 过去20天上涨比例
+
+    # 5. 情绪因子2：成交量变化率（Volume Change Rate）
+    df['volume_mean'] = df['vol'].rolling(window=20).mean()  # 过去20天的平均成交量
+    df['volume_change_rate'] = (df['vol'] - df['volume_mean']) / df['volume_mean'] * 100  # 成交量变化率
+
+    # 6. 情绪因子3：波动率（Volatility）
+    df['volatility'] = df['daily_return'].rolling(window=20).std()  # 过去20天的日收益率标准差
+
+    # 7. 情绪因子4：成交额变化率（Amount Change Rate）
+    df['amount_mean'] = df['amount'].rolling(window=20).mean()  # 过去20天的平均成交额
+    df['amount_change_rate'] = (df['amount'] - df['amount_mean']) / df['amount_mean'] * 100  # 成交额变化率
+
+    return df
+
+
+def generate_index_indicators(h5_filename):
+    df = pd.read_hdf(h5_filename, key='index_data')
+    df['trade_date'] = pd.to_datetime(df['trade_date'], format='%Y%m%d')
+    df = df.sort_values('trade_date')
+
+    # 计算每个ts_code的相关指标
+    df_indicators = []
+    for ts_code in df['ts_code'].unique():
+        df_index = df[df['ts_code'] == ts_code].copy()
+        df_index = calculate_indicators(df_index)
+        df_indicators.append(df_index)
+
+    # 合并所有指数的结果
+    df_all_indicators = pd.concat(df_indicators, ignore_index=True)
+
+    # 保留trade_date列，并将同一天的数据按ts_code合并成一行
+    df_final = df_all_indicators.pivot_table(
+        index='trade_date',
+        columns='ts_code',
+        values=['daily_return', 'RSI', 'MACD', 'Signal_line',
+                'MACD_hist', 'up_ratio_20d', 'volume_change_rate', 'volatility',
+                'amount_change_rate', 'amount_mean'],
+        aggfunc='last'
+    )
+
+    df_final.columns = [f"{col[1]}_{col[0]}" for col in df_final.columns]
+    df_final = df_final.reset_index()
+
+    return df_final
+
+
+# 使用函数
+h5_filename = '../../data/index_data.h5'
+index_data = generate_index_indicators(h5_filename)
+index_data = index_data.dropna()
+
+
+
+# In[6]:
+
+
+from utils.factor import get_act_factor
+
+
+def read_industry_data(h5_filename):
+    # 读取 H5 文件中所有的行业数据
+    industry_data = pd.read_hdf(h5_filename, key='sw_daily', columns=[
+        'ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'pe', 'pb', 'vol'
+    ])  # 假设 H5 文件的键是 'industry_data'
+    industry_data = industry_data.sort_values(by=['ts_code', 'trade_date'])
+    industry_data = industry_data.reindex()
+    industry_data['trade_date'] = pd.to_datetime(industry_data['trade_date'], format='%Y%m%d')
+
+    grouped = industry_data.groupby('ts_code', group_keys=False)
+    industry_data['obv'] = grouped.apply(
+        lambda x: pd.Series(talib.OBV(x['close'].values, x['vol'].values), index=x.index)
+    )
+    industry_data['return_5'] = grouped['close'].apply(lambda x: x / x.shift(5) - 1)
+    industry_data['return_20'] = grouped['close'].apply(lambda x: x / x.shift(20) - 1)
+
+    industry_data = get_act_factor(industry_data, cat=False)
+    industry_data = industry_data.sort_values(by=['trade_date', 'ts_code'])
+
+    # # 计算每天每个 ts_code 的因子和当天所有 ts_code 的中位数的偏差
+    # factor_columns = ['obv', 'return_5', 'return_20', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4']  # 因子列
+    # 
+    # for factor in factor_columns:
+    #     if factor in industry_data.columns:
+    #         # 计算每天每个 ts_code 的因子值与当天所有 ts_code 的中位数的偏差
+    #         industry_data[f'{factor}_deviation'] = industry_data.groupby('trade_date')[factor].transform(
+    #             lambda x: x - x.mean())
+
+    industry_data['return_5_percentile'] = industry_data.groupby('trade_date')['return_5'].transform(
+        lambda x: x.rank(pct=True))
+    industry_data['return_20_percentile'] = industry_data.groupby('trade_date')['return_20'].transform(
+        lambda x: x.rank(pct=True))
+    industry_data = industry_data.drop(columns=['open', 'close', 'high', 'low', 'pe', 'pb', 'vol'])
+
+    industry_data = industry_data.rename(
+        columns={col: f'industry_{col}' for col in industry_data.columns if col not in ['ts_code', 'trade_date']})
+
+    industry_data = industry_data.rename(columns={'ts_code': 'cat_l2_code'})
+    return industry_data
+
+
+industry_df = read_industry_data('../../data/sw_daily.h5')
+
+
+# In[7]:
+
+
+origin_columns = df.columns.tolist()
+origin_columns = [col for col in origin_columns if
+                  col not in ['turnover_rate', 'pe_ttm', 'volume_ratio', 'vol', 'pct_chg', 'l2_code', 'winner_rate']]
+origin_columns = [col for col in origin_columns if col not in index_data.columns]
+origin_columns = [col for col in origin_columns if 'cyq' not in col]
+print(origin_columns)
+
+
+# In[8]:
+
+
+def filter_data(df):
+    # df = df.groupby('trade_date').apply(lambda x: x.nlargest(1000, 'act_factor1'))
+    df = df[~df['is_st']]
+    df = df[~df['ts_code'].str.endswith('BJ')]
+    df = df[~df['ts_code'].str.startswith('30')]
+    df = df[~df['ts_code'].str.startswith('68')]
+    df = df[~df['ts_code'].str.startswith('8')]
+    df = df[df['trade_date'] >= '20180101']
+    df = df.drop(columns=['in_date'])
+    df = df.reset_index(drop=True)
+    return df
+
+
+df = filter_data(df)
+# df = get_technical_factor(df)
+# df = get_act_factor(df)
+# df = get_money_flow_factor(df)
+# df = get_alpha_factor(df)
+# df = get_limit_factor(df)
+# df = get_cyp_perf_factor(df)
+# df = get_mv_factors(df)
+df, _ = get_rolling_factor(df)
+df, _ = get_simple_factor(df)
+# df = df.merge(industry_df, on=['l2_code', 'trade_date'], how='left')
+df = df.rename(columns={'l2_code': 'cat_l2_code'})
+# df = df.merge(index_data, on='trade_date', how='left')
+
+print(df.info())
+
+
+# In[9]:
+
+
+def create_deviation_within_dates(df, feature_columns):
+    groupby_col = 'cat_l2_code'  # 使用 trade_date 进行分组
+    new_columns = {}
+    ret_feature_columns = feature_columns[:]
+
+    # 自动选择所有数值型特征
+    num_features = [col for col in feature_columns if 'cat' not in col and 'index' not in col]
+
+    # num_features = ['vol', 'pct_chg', 'turnover_rate', 'volume_ratio', 'cat_vol_spike', 'obv', 'maobv_6', 'return_5', 'return_10', 'return_20', 'std_return_5', 'std_return_15', 'std_return_90', 'std_return_90_2', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4', 'act_factor5', 'act_factor6', 'rank_act_factor1', 'rank_act_factor2', 'rank_act_factor3', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'alpha_022', 'alpha_003', 'alpha_007', 'alpha_013']
+    num_features = [col for col in num_features if 'cat' not in col and 'industry' not in col]
+    num_features = [col for col in num_features if 'limit' not in col]
+    num_features = [col for col in num_features if 'cyq' not in col]
+
+    # 遍历所有数值型特征
+    for feature in num_features:
+        if feature == 'trade_date':  # 不需要对 'trade_date' 计算偏差
+            continue
+
+        # grouped_mean = df.groupby(['trade_date'])[feature].transform('mean')
+        # deviation_col_name = f'deviation_mean_{feature}'
+        # new_columns[deviation_col_name] = df[feature] - grouped_mean
+        # ret_feature_columns.append(deviation_col_name)
+
+        grouped_mean = df.groupby(['trade_date', groupby_col])[feature].transform('mean')
+        deviation_col_name = f'deviation_mean_{feature}'
+        new_columns[deviation_col_name] = df[feature] - grouped_mean
+        ret_feature_columns.append(deviation_col_name)
+
+    # 将新计算的偏差特征与原始 DataFrame 合并
+    df = pd.concat([df, pd.DataFrame(new_columns)], axis=1)
+
+    # for feature in ['obv', 'return_20', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4']:
+    #     df[f'deviation_industry_{feature}'] = df[feature] - df[f'industry_{feature}']
+
+    return df, ret_feature_columns
+
+
+# In[10]:
+
+
+import pandas as pd
+
+from scipy.stats import ks_2samp, wasserstein_distance
+from sklearn.metrics import roc_auc_score
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
+
+
+def remove_shifted_features(train_data, feature_columns, ks_threshold=0.05, wasserstein_threshold=0.1, size=0.8):
+    dropped_features = []
+
+    all_dates = train_data['trade_date'].unique()  # 获取所有唯一的 trade_date
+    split_date = all_dates[int(len(all_dates) * size)]  # 划分点为倒数第 validation_days 天
+    train_data_split = train_data[train_data['trade_date'] < split_date]  # 训练集
+    val_data_split = train_data[train_data['trade_date'] >= split_date]  # 验证集
+
+    # **统计数据漂移**
+    numeric_columns = train_data_split.select_dtypes(include=['float64', 'int64']).columns
+    numeric_columns = [col for col in numeric_columns if col in feature_columns]
+    for feature in numeric_columns:
+        ks_stat, p_value = ks_2samp(train_data_split[feature], val_data_split[feature])
+        wasserstein_dist = wasserstein_distance(train_data_split[feature], val_data_split[feature])
+
+        if p_value < ks_threshold or wasserstein_dist > wasserstein_threshold:
+            dropped_features.append(feature)
+
+    print(f"检测到 {len(dropped_features)} 个可能漂移的特征: {dropped_features}")
+
+    # **应用阈值进行最终筛选**
+    filtered_features = [f for f in feature_columns if f not in dropped_features]
+
+    return filtered_features, dropped_features
+
+
+def remove_outliers_label_percentile(label: pd.Series, lower_percentile: float = 0.01, upper_percentile: float = 0.99,
+                                     log=True):
+    if not (0 <= lower_percentile < upper_percentile <= 1):
+        raise ValueError("Percentile values must satisfy 0 <= lower_percentile < upper_percentile <= 1.")
+
+    # Calculate lower and upper bounds based on percentiles
+    lower_bound = label.quantile(lower_percentile)
+    upper_bound = label.quantile(upper_percentile)
+
+    # Filter out values outside the bounds
+    filtered_label = label[(label >= lower_bound) & (label <= upper_bound)]
+
+    # Print the number of removed outliers
+    if log:
+        print(f"Removed {len(label) - len(filtered_label)} outliers.")
+    return filtered_label
+
+
+def calculate_risk_adjusted_target(df, days=5):
+    df = df.sort_values(by=['ts_code', 'trade_date'])
+
+    df['future_close'] = df.groupby('ts_code')['close'].shift(-days)
+    df['future_open'] = df.groupby('ts_code')['open'].shift(-1)
+    df['future_return'] = (df['future_close'] - df['future_open']) / df['future_open']
+
+    df['future_volatility'] = df.groupby('ts_code')['future_return'].rolling(days, min_periods=1).std().reset_index(
+        level=0, drop=True)
+    sharpe_ratio = df['future_return'] * df['future_volatility']
+    sharpe_ratio.replace([np.inf, -np.inf], np.nan, inplace=True)
+
+    return sharpe_ratio
+
+
+def calculate_score(df, days=5, lambda_param=1.0):
+    def calculate_max_drawdown(prices):
+        peak = prices.iloc[0]  # 初始化峰值
+        max_drawdown = 0  # 初始化最大回撤
+
+        for price in prices:
+            if price > peak:
+                peak = price  # 更新峰值
+            else:
+                drawdown = (peak - price) / peak  # 计算当前回撤
+                max_drawdown = max(max_drawdown, drawdown)  # 更新最大回撤
+
+        return max_drawdown
+
+    def compute_stock_score(stock_df):
+        stock_df = stock_df.sort_values(by=['trade_date'])
+        future_return = stock_df['future_return']
+        # 使用已有的 pct_chg 字段计算波动率
+        volatility = stock_df['pct_chg'].rolling(days).std().shift(-days)
+        max_drawdown = stock_df['close'].rolling(days).apply(calculate_max_drawdown, raw=False).shift(-days)
+        score = future_return - lambda_param * max_drawdown
+        return score
+
+    # # 确保 DataFrame 按照股票代码和交易日期排序
+    # df = df.sort_values(by=['ts_code', 'trade_date'])
+
+    # 对每个股票分别计算 score
+    df['score'] = df.groupby('ts_code').apply(compute_stock_score).reset_index(level=0, drop=True)
+
+    return df['score']
+
+
+def remove_highly_correlated_features(df, feature_columns, threshold=0.9):
+    numeric_features = df[feature_columns].select_dtypes(include=[np.number]).columns.tolist()
+    if not numeric_features:
+        raise ValueError("No numeric features found in the provided data.")
+
+    corr_matrix = df[numeric_features].corr().abs()
+    upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
+    to_drop = [column for column in upper.columns if any(upper[column] > threshold)]
+    remaining_features = [col for col in feature_columns if col not in to_drop
+                          or 'act' in col or 'af' in col]
+    return remaining_features
+
+
+import pandas as pd
+from sklearn.preprocessing import StandardScaler
+
+
+def cross_sectional_standardization(df, features):
+    df_sorted = df.sort_values(by='trade_date')  # 按时间排序
+    df_standardized = df_sorted.copy()
+
+    for date in df_sorted['trade_date'].unique():
+        # 获取当前时间点的数据
+        current_data = df_standardized[df_standardized['trade_date'] == date]
+
+        # 只对指定特征进行标准化
+        scaler = StandardScaler()
+        standardized_values = scaler.fit_transform(current_data[features])
+
+        # 将标准化结果重新赋值回去
+        df_standardized.loc[df_standardized['trade_date'] == date, features] = standardized_values
+
+    return df_standardized
+
+
+import numpy as np
+import pandas as pd
+import statsmodels.api as sm
+
+from concurrent.futures import ProcessPoolExecutor
+
+
+def neutralize_manual(df, features, industry_col, mkt_cap_col):
+    """ 手动实现简单回归以提升速度 """
+
+    for col in features:
+        residuals = []
+        for _, group in df.groupby(industry_col):
+            if len(group) > 1:
+                x = np.log(group[mkt_cap_col])  # 市值对数
+                y = group[col]  # 因子值
+                beta = np.cov(y, x)[0, 1] / np.var(x)  # 计算斜率
+                alpha = np.mean(y) - beta * np.mean(x)  # 计算截距
+                resid = y - (alpha + beta * x)  # 计算残差
+                residuals.extend(resid)
+            else:
+                residuals.extend(group[col])  # 样本不足时保留原值
+
+        df[col] = residuals
+
+    return df
+
+
+import gc
+
+gc.collect()
+
+
+def mad_filter(df, features, n=3):
+    for col in features:
+        median = df[col].median()
+        mad = np.median(np.abs(df[col] - median))
+        upper = median + n * mad
+        lower = median - n * mad
+        df[col] = np.clip(df[col], lower, upper)  # 截断极值
+    return df
+
+
+def percentile_filter(df, features, lower_percentile=0.01, upper_percentile=0.99):
+    for col in features:
+        # 按日期分组计算上下百分位数
+        lower_bound = df.groupby('trade_date')[col].transform(
+            lambda x: x.quantile(lower_percentile)
+        )
+        upper_bound = df.groupby('trade_date')[col].transform(
+            lambda x: x.quantile(upper_percentile)
+        )
+        # 截断超出范围的值
+        df[col] = np.clip(df[col], lower_bound, upper_bound)
+    return df
+
+
+from scipy.stats import iqr
+
+
+def iqr_filter(df, features):
+    for col in features:
+        df[col] = df.groupby('trade_date')[col].transform(
+            lambda x: (x - x.median()) / iqr(x) if iqr(x) != 0 else x
+        )
+    return df
+
+
+def quantile_filter(df, features, lower_quantile=0.01, upper_quantile=0.99, window=60):
+    df = df.copy()
+    for col in features:
+        # 计算 rolling 统计量，需要按日期进行 groupby
+        rolling_lower = df.groupby('trade_date')[col].transform(
+            lambda x: x.rolling(window=min(len(x), window)).quantile(lower_quantile))
+        rolling_upper = df.groupby('trade_date')[col].transform(
+            lambda x: x.rolling(window=min(len(x), window)).quantile(upper_quantile))
+
+        # 对数据进行裁剪
+        df[col] = np.clip(df[col], rolling_lower, rolling_upper)
+
+    return df
+
+
+# In[11]:
+
+
+# print(test_data.head()[['act_factor1', 'act_factor2', 'ts_code', 'trade_date']])
+
+
+# In[12]:
+
+
+from sklearn.preprocessing import StandardScaler
+import lightgbm as lgb
+import matplotlib.pyplot as plt
+from sklearn.decomposition import PCA
+
+
+def train_light_model(train_data_df, params, feature_columns, callbacks, evals,
+                      print_feature_importance=True, num_boost_round=100,
+                      validation_days=180, use_pca=False, split_date=None):  # 新增参数：validation_days
+    # 确保数据按时间排序
+    train_data_df = train_data_df.sort_values(by='trade_date')
+
+    numeric_columns = train_data_df.select_dtypes(include=['float64', 'int64']).columns
+    numeric_columns = [col for col in numeric_columns if col in feature_columns]
+    # X_train.loc[:, numeric_columns] = scaler.fit_transform(X_train[numeric_columns])
+    # X_val.loc[:, numeric_columns] = scaler.transform(X_val[numeric_columns])
+    # train_data_df = cross_sectional_standardization(train_data_df, numeric_columns)
+
+    # 去除标签为空的样本
+    train_data_df = train_data_df.dropna(subset=['label'])
+    print('原始训练集大小: ', len(train_data_df))
+
+    # 按时间顺序划分训练集和验证集
+    if split_date is None:
+        all_dates = train_data_df['trade_date'].unique()  # 获取所有唯一的 trade_date
+        split_date = all_dates[-validation_days]  # 划分点为倒数第 validation_days 天
+    train_data_split = train_data_df[train_data_df['trade_date'] < split_date]  # 训练集
+    val_data_split = train_data_df[train_data_df['trade_date'] >= split_date]  # 验证集
+
+    # 打印划分结果
+    print(f"划分后的训练集大小: {len(train_data_split)}, 验证集大小: {len(val_data_split)}")
+
+    # 提取特征和标签
+    X_train = train_data_split[feature_columns]
+    y_train = train_data_split['label']
+
+    X_val = val_data_split[feature_columns]
+    y_val = val_data_split['label']
+
+    # 标准化数值特征
+    scaler = StandardScaler()
+
+    # 计算每个 trade_date 内的样本数（LTR 需要 group 信息）
+    train_groups = train_data_split.groupby('trade_date').size().tolist()
+    val_groups = val_data_split.groupby('trade_date').size().tolist()
+
+    # 处理类别特征
+    categorical_feature = [col for col in feature_columns if 'cat' in col]
+
+    pca = None
+    if use_pca:
+        pca = PCA(n_components=0.95)  # 或指定 n_components=固定值（如 10）
+        numeric_features = [col for col in feature_columns if col not in categorical_feature]
+        numeric_pca = pca.fit_transform(X_train[numeric_features])
+        X_train = pd.concat([pd.DataFrame(numeric_pca, index=X_train.index), X_train[categorical_feature]], axis=1)
+
+        numeric_pca = pca.transform(X_val[numeric_features])
+        X_val = pd.concat([pd.DataFrame(numeric_pca, index=X_val.index), X_val[categorical_feature]], axis=1)
+
+    # 计算权重（基于时间）
+    # trade_date = train_data_split['trade_date']  # 交易日期
+    # weights = (trade_date - trade_date.min()).dt.days / (trade_date.max() - trade_date.min()).days + 1
+    # weights = train_data_split.groupby('trade_date')['std_return_5'].transform(
+    #     lambda x: x / x.mean()
+    # )
+    ud = sorted(train_data_split["trade_date"].unique().tolist())
+    date_weights = {date: weight * weight for date, weight in zip(ud, np.linspace(1, 10, len(ud)))}
+    params['weight'] = train_data_split["trade_date"].map(date_weights).tolist()
+
+    train_dataset = lgb.Dataset(
+        X_train, label=y_train, group=train_groups,
+        categorical_feature=categorical_feature
+    )
+
+    # weights = val_data_split.groupby('trade_date')['std_return_5'].transform(
+    #     lambda x: x / x.mean()
+    # )
+    val_dataset = lgb.Dataset(
+        X_val, label=y_val, group=val_groups,
+        categorical_feature=categorical_feature
+    )
+
+    # 训练模型
+    model = lgb.train(
+        params, train_dataset, num_boost_round=num_boost_round,
+        valid_sets=[train_dataset, val_dataset], valid_names=['train', 'valid'],
+        callbacks=callbacks
+    )
+
+    # 打印特征重要性（如果需要）
+    if print_feature_importance:
+        lgb.plot_metric(evals)
+        lgb.plot_importance(model, importance_type='split', max_num_features=20)
+        plt.show()
+
+    return model, scaler, pca
+
+
+# In[13]:
+
+
+days = 2
+df = df.sort_values(by=['ts_code', 'trade_date'])
+# df['future_return'] = df.groupby('ts_code', group_keys=False)['close'].apply(lambda x: x.shift(-days) / x - 1)
+df['future_return'] = (df.groupby('ts_code')['close'].shift(-days) - df.groupby('ts_code')['open'].shift(-1)) / \
+                      df.groupby('ts_code')['open'].shift(-1)
+df['future_volatility'] = (
+    df.groupby('ts_code')['pct_chg']
+    .transform(lambda x: x.rolling(days).std().shift(-days))
+)
+df['future_score'] = calculate_score(df, days=2, lambda_param=0.3)
+df['label'] = df.groupby('trade_date', group_keys=False)['future_score'].transform(
+    lambda x: pd.qcut(x, q=20, labels=False, duplicates='drop')
+)
+# df['future_score'] = (
+#         0.7 * df['future_return']
+#         * 0.3 * df['future_volatility']
+# )
+
+
+# In[30]:
+
+
+def select_pre_zt_stocks_dynamic(
+        stock_df,
+):
+    stock_df = stock_df.groupby('trade_date', group_keys=False).apply(
+        lambda x: x.nlargest(1000, 'return_20')
+    )
+    return stock_df
+
+
+pdf = select_pre_zt_stocks_dynamic(df)
+filter_index = pdf['future_return'].between(pdf['future_return'].quantile(0.01), pdf['future_return'].quantile(0.99))
+
+# filter_index = pdf['future_volatility'].between(pdf['future_volatility'].quantile(0.01),
+#                                                 pdf['future_volatility'].quantile(0.99)) | filter_index
+
+
+# In[ ]:
+
+
+pdf = pdf.merge(industry_df, on=['cat_l2_code', 'trade_date'], how='left')
+pdf = pdf.sort_values(['trade_date'])
+pdf = pdf.replace([np.inf, -np.inf], np.nan)
+
+
+feature_columns = [col for col in pdf.columns if col in pdf.columns]
+feature_columns = [col for col in feature_columns if col not in ['trade_date',
+                                                                 'ts_code',
+                                                                 'label']]
+feature_columns = [col for col in feature_columns if 'future' not in col]
+feature_columns = [col for col in feature_columns if 'label' not in col]
+feature_columns = [col for col in feature_columns if 'score' not in col]
+feature_columns = [col for col in feature_columns if 'gen' not in col]
+feature_columns = [col for col in feature_columns if 'cat_l2_code' not in col]
+feature_columns = [col for col in feature_columns if col not in origin_columns]
+feature_columns = [col for col in feature_columns if not col.startswith('_')]
+
+numeric_columns = pdf.select_dtypes(include=['float64', 'int64']).columns
+numeric_columns = [col for col in numeric_columns if col in feature_columns]
+
+# feature_columns, _ = remove_shifted_features(pdf, feature_columns, size=0.8)
+
+pdf = quantile_filter(pdf, numeric_columns)
+
+pdf = cross_sectional_standardization(pdf, numeric_columns)
+
+
+# print('去极值')
+# train_data = quantile_filter(train_data, numeric_columns)  # 去极值
+# # print('中性化')
+# # train_data = neutralize_manual(train_data, numeric_columns, industry_col='cat_l2_code', mkt_cap_col='log(circ_mv)')  # 中性化
+# print('去极值')
+# test_data = quantile_filter(test_data, numeric_columns)  # 去极值
+
+feature_columns = remove_highly_correlated_features(pdf,
+                                                    feature_columns)
+print(len(pdf))
+
+
+# In[123]:
+
+
+# print('train data size: ', len(train_data))
+
+label_gain = list(range(len(df['label'].unique())))
+label_gain = [gain * gain for gain in label_gain]
+light_params = {
+    'label_gain': label_gain,
+    'objective': 'lambdarank',
+    'metric': 'ndcg',
+    'learning_rate': 0.03,
+    'num_leaves': 32,
+    # 'min_data_in_leaf': 128,
+    'max_depth': 8,
+    'max_bin': 32,
+    'feature_fraction': 0.7,
+    # 'bagging_fraction': 0.7,
+    'bagging_freq': 5,
+    'lambda_l1': 0.1,
+    'lambda_l2': 0.1,
+    'boosting': 'gbdt',
+    'verbosity': -1,
+    'extra_trees': True,
+    'max_position': 5,
+    'ndcg_at': 1,
+    'quant_train_renew_leaf': True,
+    'lambdarank_truncation_level': 3,
+    # 'lambdarank_position_bias_regularization': 1,
+    'seed': 7
+}
+evals = {}
+
+gc.collect()
+
+
+# In[128]:
+
+
+gc.collect()
+
+
+def rolling_train_predict(df, train_days, test_days, feature_columns_origin, days=5, use_pca=False, validation_days=60,
+                          filter_index=None):
+    # 1. 按照交易日期排序
+    unique_dates = df[df['trade_date'] >= '2020-01-01']['trade_date'].unique().tolist()
+    unique_dates = sorted(unique_dates)
+    n = len(unique_dates)
+
+    # 2. 计算需要跳过的天数，使后续窗口对齐
+    extra_days = (n - train_days) % test_days
+    start_index = extra_days  # 从此索引开始滚动
+
+    predictions_list = []
+
+    for start in range(start_index, n - train_days - test_days + 1, test_days):
+
+        train_dates = unique_dates[start: start + train_days]
+        test_dates = unique_dates[start + train_days: start + train_days + test_days]
+
+        # 根据日期筛选数据
+        train_data = df[filter_index & df['trade_date'].isin(train_dates)]
+        test_data = df[df['trade_date'].isin(test_dates)]
+
+        train_data = train_data.sort_values('trade_date')
+        test_data = test_data.sort_values('trade_date')
+
+        # feature_columns, _ = remove_shifted_features(train_data, feature_columns_origin, size=0.8)
+
+        train_data = train_data.dropna(subset=feature_columns)
+        train_data = train_data.dropna(subset=['label'])
+        train_data = train_data.reset_index(drop=True)
+
+        # print(test_data.tail())
+        test_data = test_data.dropna(subset=feature_columns)
+        # test_data = test_data.dropna(subset=['label'])
+        test_data = test_data.reset_index(drop=True)
+
+        # print(len(train_data))
+        print(f"最小日期: {train_data['trade_date'].min().strftime('%Y-%m-%d')}")
+        print(f"最大日期: {train_data['trade_date'].max().strftime('%Y-%m-%d')}")
+        # print(len(test_data))
+        print(f"最小日期: {test_data['trade_date'].min().strftime('%Y-%m-%d')}")
+        print(f"最大日期: {test_data['trade_date'].max().strftime('%Y-%m-%d')}")
+
+        cat_columns = [col for col in df.columns if col.startswith('cat')]
+        for col in cat_columns:
+            train_data[col] = train_data[col].astype('category')
+            test_data[col] = test_data[col].astype('category')
+
+        label_gain = list(range(len(train_data['label'].unique())))
+        label_gain = [(gain + 1) * (gain + 1) for gain in label_gain]
+        light_params['label_gain'] = label_gain
+
+        # ud = train_data["trade_date"].unique()
+        # date_weights = {date: weight for date, weight in zip(ud, np.linspace(1, 2, len(unique_dates)))}
+        # light_params['weight'] = train_data["trade_date"].map(date_weights).tolist()
+
+        # print(f'feature_columns: {feature_columns}')
+        # feature_contri = [2 if feat.startswith('act_factor') else 1 for feat in feature_columns]
+        # light_params['feature_contri'] = feature_contri
+        model, _, _ = train_light_model(train_data.dropna(subset=['label']),
+                                        light_params, feature_columns,
+                                        [lgb.log_evaluation(period=100),
+                                         lgb.callback.record_evaluation(evals),
+                                         lgb.early_stopping(100, first_metric_only=True)
+                                         ], evals,
+                                        num_boost_round=3000, validation_days=validation_days,
+                                        print_feature_importance=False, use_pca=False)
+
+        score_df = test_data.copy()
+        score_df['score'] = model.predict(score_df[feature_columns])
+        score_df = score_df.loc[score_df.groupby('trade_date')['score'].idxmax()]
+        score_df = score_df[['trade_date', 'score', 'ts_code']]
+        predictions_list.append(score_df)
+
+        # m = 5
+        # all_data = []
+        # for i, trade_date in enumerate(sorted(score_df['trade_date'].unique().tolist())):
+        #     # 提取当前日期的数据
+        #     current_data = score_df[score_df['trade_date'] == trade_date]
+        #     all_data.append(current_data)
+        #
+        #     numeric_columns = [col for col in feature_columns if col in current_data.select_dtypes(include=['float64', 'int64']).columns]
+        #     current_data = cross_sectional_standardization(current_data, numeric_columns)
+        #     current_data['score'] = model.predict(current_data[feature_columns])
+        #     daily_top_score = current_data.loc[[current_data['score'].idxmax()]]
+        #     predictions_list.append(daily_top_score[['trade_date', 'score', 'ts_code']])
+        #
+        #     if i % m == 0:
+        #         train_data_split = pd.concat(all_data)
+        #         train_data_split = train_data_split.dropna(subset=['label'])
+        #
+        #         X_train = train_data_split[feature_columns]
+        #         y_train = train_data_split['label']
+        #
+        #         train_groups = train_data_split.groupby('trade_date').size().tolist()
+        #         categorical_feature = [col for col in feature_columns if 'cat' in col]
+        #
+        #         train_dataset = lgb.Dataset(
+        #             X_train, label=y_train, group=train_groups,
+        #             categorical_feature=categorical_feature
+        #         )
+        #
+        #         model = lgb.train(
+        #             light_params, train_dataset, num_boost_round=36,
+        #             init_model=model
+        #         )
+        #         all_data = []
+
+    final_predictions = pd.concat(predictions_list, ignore_index=True)
+    return final_predictions
+
+
+# In[129]:
+
+
+gc.collect()
+
+print(df[df['ts_code'] == '000001.SZ'].tail(1)[['act_factor1', 'act_factor2']])
+print('finish')
+# qdf = qdf[qdf['trade_date'] >= '2022-01-01']
+
+final_predictions = rolling_train_predict(pdf[pdf['trade_date'] >= '2020-01-01'], 500, 20, feature_columns,
+                                          days=days, validation_days=60, filter_index=filter_index)
+final_predictions.to_csv('predictions_test.tsv', index=False)
+
+
+# In[126]:
+
+
+print(df[df['ts_code'] == '000001.SZ'].tail(1)[['act_factor1', 'act_factor2']])
+print('finish')
+
+
+# In[29]:
+
+
+train_data = pdf[filter_index & (pdf['trade_date'] == '2023-01-03')]
+train_data = train_data.dropna(subset=['label'])
+train_data = train_data.reset_index(drop=True)
+print(len(train_data))
+
+
+# In[34]:
+
+
+# filter_index = pdf['future_return'].between(pdf['future_return'].quantile(0.01), pdf['future_return'].quantile(0.99))
+
+train_data = pdf[filter_index & (pdf['trade_date'] == '2023-01-03')]
+print(len(train_data))
+
--- a/main/train/RollingRankCopy.ipynb
+++ b/main/train/RollingRankCopy.ipynb
--- a/main/train/TRank.ipynb
+++ b/main/train/TRank.ipynb
--- a/main/train/Transformer.ipynb
+++ b/main/train/Transformer.ipynb
--- a/main/train/UpdateClassify.ipynb
+++ b/main/train/UpdateClassify.ipynb
--- a/main/train/UpdateRank.ipynb
+++ b/main/train/UpdateRank.ipynb
--- a/main/train/UpdateRegression.ipynb
+++ b/main/train/UpdateRegression.ipynb
--- a/main/train/UpdateSGD.ipynb
+++ b/main/train/UpdateSGD.ipynb
--- a/main/train/V1-copy.ipynb
+++ b/main/train/V1-copy.ipynb
@@ -0,0 +1,896 @@
+{
+ "cells": [
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-09T14:52:54.170824Z",
+     "start_time": "2025-02-09T14:52:53.544850Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "from code.utils.utils import read_and_merge_h5_data"
+   ],
+   "id": "79a7758178bafdd3",
+   "outputs": [],
+   "execution_count": 1
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-09T14:53:36.873700Z",
+     "start_time": "2025-02-09T14:52:54.170824Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "print('daily data')\n",
+    "df = read_and_merge_h5_data('../../data/daily_data.h5', key='daily_data',\n",
+    "                            columns=['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol'],\n",
+    "                            df=None)\n",
+    "\n",
+    "print('daily basic')\n",
+    "df = read_and_merge_h5_data('../../data/daily_basic.h5', key='daily_basic_with_st',\n",
+    "                            columns=['ts_code', 'trade_date', 'turnover_rate', 'pe_ttm', 'circ_mv', 'volume_ratio',\n",
+    "                                     'is_st'], df=df)\n",
+    "\n",
+    "print('stk limit')\n",
+    "df = read_and_merge_h5_data('../../data/stk_limit.h5', key='stk_limit',\n",
+    "                            columns=['ts_code', 'trade_date', 'pre_close', 'up_limit', 'down_limit'],\n",
+    "                            df=df)\n",
+    "print('money flow')\n",
+    "df = read_and_merge_h5_data('../../data/money_flow.h5', key='money_flow',\n",
+    "                            columns=['ts_code', 'trade_date', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol',\n",
+    "                                     'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol'],\n",
+    "                            df=df)"
+   ],
+   "id": "a79cafb06a7e0e43",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "daily data\n",
+      "daily basic\n",
+      "stk limit\n",
+      "money flow\n"
+     ]
+    }
+   ],
+   "execution_count": 2
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-09T14:53:37.426404Z",
+     "start_time": "2025-02-09T14:53:36.955552Z"
+    }
+   },
+   "cell_type": "code",
+   "source": "origin_columns = df.columns.tolist()",
+   "id": "c4e9e1d31da6dba6",
+   "outputs": [],
+   "execution_count": 3
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-09T14:53:38.164112Z",
+     "start_time": "2025-02-09T14:53:38.070007Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "import numpy as np\n",
+    "import talib\n",
+    "\n",
+    "\n",
+    "def get_technical_factor(df):\n",
+    "    df['up'] = (df['high'] - df[['close', 'open']].max(axis=1)) / df['close']\n",
+    "    df['down'] = (df[['close', 'open']].min(axis=1) - df['low']) / df['close']\n",
+    "\n",
+    "    df['atr_14'] = talib.ATR(df['high'], df['low'], df['close'], timeperiod=14)\n",
+    "    df['atr_6'] = talib.ATR(df['high'], df['low'], df['close'], timeperiod=6)\n",
+    "\n",
+    "    df['obv'] = talib.OBV(df['close'], df['vol'])\n",
+    "    df['maobv_6'] = talib.SMA(df['obv'], timeperiod=6)\n",
+    "    df['obv-maobv_6'] = df['obv'] - df['maobv_6']\n",
+    "\n",
+    "    df['rsi_3'] = talib.RSI(df['close'], timeperiod=3)\n",
+    "    df['rsi_6'] = talib.RSI(df['close'], timeperiod=6)\n",
+    "    df['rsi_9'] = talib.RSI(df['close'], timeperiod=9)\n",
+    "\n",
+    "    df['return_10'] = df['close'] / df['close'].shift(10) - 1\n",
+    "    df['return_20'] = df['close'] / df['close'].shift(20) - 1\n",
+    "\n",
+    "    # # 计算 _rank_return_10 和 _rank_return_20\n",
+    "    # df['_rank_return_10'] = df['return_10'].rank(pct=True)\n",
+    "    # df['_rank_return_20'] = df['return_20'].rank(pct=True)\n",
+    "\n",
+    "    # 计算 avg_close_5\n",
+    "    df['avg_close_5'] = df['close'].rolling(window=5).mean() / df['close']\n",
+    "\n",
+    "    # 计算 std_return_5, std_return_15, std_return_25, std_return_252, std_return_2522\n",
+    "    df['std_return_5'] = df['close'].pct_change().shift(-1).rolling(window=5).std()\n",
+    "    df['std_return_15'] = df['close'].pct_change().shift(-1).rolling(window=15).std()\n",
+    "    df['std_return_25'] = df['close'].pct_change().shift(-1).rolling(window=25).std()\n",
+    "    df['std_return_90'] = df['close'].pct_change().shift(-1).rolling(window=90).std()\n",
+    "    df['std_return_90_2'] = df['close'].shift(10).pct_change().shift(-1).rolling(window=90).std()\n",
+    "\n",
+    "    # 计算 std_return_5 / std_return_252 和 std_return_5 / std_return_25\n",
+    "    df['std_return_5 / std_return_90'] = df['std_return_5'] / df['std_return_90']\n",
+    "    df['std_return_5 / std_return_25'] = df['std_return_5'] / df['std_return_25']\n",
+    "\n",
+    "    # 计算 std_return_252 - std_return_2522\n",
+    "    df['std_return_90 - std_return_90_2'] = df['std_return_90'] - df['std_return_90_2']\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "def get_act_factor(df):\n",
+    "    # 计算 m_ta_ema(close, 5), m_ta_ema(close, 13), m_ta_ema(close, 20), m_ta_ema(close, 60)\n",
+    "    df['ema_5'] = talib.EMA(df['close'], timeperiod=5)\n",
+    "    df['ema_13'] = talib.EMA(df['close'], timeperiod=13)\n",
+    "    df['ema_20'] = talib.EMA(df['close'], timeperiod=20)\n",
+    "    df['ema_60'] = talib.EMA(df['close'], timeperiod=60)\n",
+    "\n",
+    "    # 计算 act_factor1, act_factor2, act_factor3, act_factor4\n",
+    "    df['act_factor1'] = np.arctan((df['ema_5'] / df['ema_5'].shift(1) - 1) * 100) * 57.3 / 50\n",
+    "    df['act_factor2'] = np.arctan((df['ema_13'] / df['ema_13'].shift(1) - 1) * 100) * 57.3 / 40\n",
+    "    df['act_factor3'] = np.arctan((df['ema_20'] / df['ema_20'].shift(1) - 1) * 100) * 57.3 / 21\n",
+    "    df['act_factor4'] = np.arctan((df['ema_60'] / df['ema_60'].shift(1) - 1) * 100) * 57.3 / 10\n",
+    "\n",
+    "    # 计算 act_factor5 和 act_factor6\n",
+    "    df['act_factor5'] = df['act_factor1'] + df['act_factor2'] + df['act_factor3'] + df['act_factor4']\n",
+    "    df['act_factor6'] = (df['act_factor1'] - df['act_factor2']) / np.sqrt(\n",
+    "        df['act_factor1'] ** 2 + df['act_factor2'] ** 2)\n",
+    "\n",
+    "    # 根据 'trade_date' 进行分组，在每个组内分别计算 'act_factor1', 'act_factor2', 'act_factor3' 的排名\n",
+    "    df['rank_act_factor1'] = df.groupby('trade_date')['act_factor1'].rank(ascending=False, pct=True)\n",
+    "    df['rank_act_factor2'] = df.groupby('trade_date')['act_factor2'].rank(ascending=False, pct=True)\n",
+    "    df['rank_act_factor3'] = df.groupby('trade_date')['act_factor3'].rank(ascending=False, pct=True)\n",
+    "\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "def get_money_flow_factor(df):\n",
+    "    df['active_buy_volume_large'] = df['buy_lg_vol'] / df['net_mf_vol']\n",
+    "    df['active_buy_volume_big'] = df['buy_elg_vol'] / df['net_mf_vol']\n",
+    "    df['active_buy_volume_small'] = df['buy_sm_vol'] / df['net_mf_vol']\n",
+    "\n",
+    "    df['buy_lg_vol - sell_lg_vol'] = (df['buy_lg_vol'] - df['sell_lg_vol']) / df['net_mf_vol']\n",
+    "    df['buy_elg_vol - sell_elg_vol'] = (df['buy_elg_vol'] - df['sell_elg_vol']) / df['net_mf_vol']\n",
+    "\n",
+    "    # # 你还提到了一些其他字段:\n",
+    "    # df['net_active_buy_volume_main'] = df['net_mf_vol'] / df['buy_sm_vol']\n",
+    "    # df['netflow_amount_main'] = df['net_mf_vol'] / df['buy_sm_vol']  # 这里假设 'net_mf_vol' 是主流资金流\n",
+    "\n",
+    "    # df['active_sell_volume_large'] = df['sell_lg_vol'] / df['sell_sm_vol']\n",
+    "    # df['active_sell_volume_big'] = df['sell_elg_vol'] / df['sell_sm_vol']\n",
+    "    # df['active_sell_volume_small'] = df['sell_sm_vol'] / df['sell_sm_vol']\n",
+    "\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "def get_alpha_factor(df):\n",
+    "    df['alpha_022'] = df['close'] - df['close'].shift(5)\n",
+    "\n",
+    "    # alpha_003: (close - open) / (high - low)\n",
+    "    df['alpha_003'] = (df['close'] - df['open']) / (df['high'] - df['low'])\n",
+    "\n",
+    "    # alpha_007: rank(correlation(close, volume, 5))\n",
+    "    df['alpha_007'] = df['close'].rolling(5).corr(df['vol']).rank(axis=1)\n",
+    "\n",
+    "    # alpha_013: rank(sum(close, 5) - sum(close, 20))\n",
+    "    df['alpha_013'] = (df['close'].rolling(5).sum() - df['close'].rolling(20).sum()).rank(axis=1)\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "def get_future_data(df):\n",
+    "    df['future_return1'] = (df['close'].shift(-1) - df['close']) / df['close']\n",
+    "    df['future_return2'] = (df['open'].shift(-2) - df['open'].shift(-1)) / df['open'].shift(-1)\n",
+    "    df['future_return3'] = (df['close'].shift(-2) - df['close'].shift(-1)) / df['close'].shift(-1)\n",
+    "    df['future_return4'] = (df['close'].shift(-2) - df['open'].shift(-1)) / df['open'].shift(-1)\n",
+    "    df['future_return5'] = (df['close'].shift(-5) - df['open'].shift(-1)) / df['open'].shift(-1)\n",
+    "    df['future_return6'] = (df['close'].shift(-10) - df['open'].shift(-1)) / df['open'].shift(-1)\n",
+    "    df['future_return7'] = (df['close'].shift(-20) - df['open'].shift(-1)) / df['open'].shift(-1)\n",
+    "    df['future_close1'] = (df['close'].shift(-1) - df['close']) / df['close']\n",
+    "    df['future_close2'] = (df['close'].shift(-2) - df['close']) / df['close']\n",
+    "    df['future_close3'] = (df['close'].shift(-3) - df['close']) / df['close']\n",
+    "    df['future_close4'] = (df['close'].shift(-4) - df['close']) / df['close']\n",
+    "    df['future_close5'] = (df['close'].shift(-5) - df['close']) / df['close']\n",
+    "    df['future_af11'] = df['act_factor1'].shift(-1)\n",
+    "    df['future_af12'] = df['act_factor1'].shift(-2)\n",
+    "    df['future_af13'] = df['act_factor1'].shift(-3)\n",
+    "    df['future_af14'] = df['act_factor1'].shift(-4)\n",
+    "    df['future_af15'] = df['act_factor1'].shift(-5)\n",
+    "    df['future_af21'] = df['act_factor2'].shift(-1)\n",
+    "    df['future_af22'] = df['act_factor2'].shift(-2)\n",
+    "    df['future_af23'] = df['act_factor2'].shift(-3)\n",
+    "    df['future_af24'] = df['act_factor2'].shift(-4)\n",
+    "    df['future_af25'] = df['act_factor2'].shift(-5)\n",
+    "    df['future_af31'] = df['act_factor3'].shift(-1)\n",
+    "    df['future_af32'] = df['act_factor3'].shift(-2)\n",
+    "    df['future_af33'] = df['act_factor3'].shift(-3)\n",
+    "    df['future_af34'] = df['act_factor3'].shift(-4)\n",
+    "    df['future_af35'] = df['act_factor3'].shift(-5)\n",
+    "\n",
+    "    return df\n"
+   ],
+   "id": "a735bc02ceb4d872",
+   "outputs": [],
+   "execution_count": 4
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-09T14:53:49.153376Z",
+     "start_time": "2025-02-09T14:53:38.164112Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "df = get_technical_factor(df)\n",
+    "df = get_act_factor(df)\n",
+    "df = get_money_flow_factor(df)\n",
+    "df = get_future_data(df)\n",
+    "# df = df.drop(columns=origin_columns)\n",
+    "\n",
+    "print(df.info())"
+   ],
+   "id": "53f86ddc0677a6d7",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 8364308 entries, 0 to 8364307\n",
+      "Data columns (total 83 columns):\n",
+      " #   Column                           Dtype         \n",
+      "---  ------                           -----         \n",
+      " 0   ts_code                          object        \n",
+      " 1   trade_date                       datetime64[ns]\n",
+      " 2   open                             float64       \n",
+      " 3   close                            float64       \n",
+      " 4   high                             float64       \n",
+      " 5   low                              float64       \n",
+      " 6   vol                              float64       \n",
+      " 7   is_st                            object        \n",
+      " 8   up_limit                         float64       \n",
+      " 9   down_limit                       float64       \n",
+      " 10  buy_sm_vol                       float64       \n",
+      " 11  sell_sm_vol                      float64       \n",
+      " 12  buy_lg_vol                       float64       \n",
+      " 13  sell_lg_vol                      float64       \n",
+      " 14  buy_elg_vol                      float64       \n",
+      " 15  sell_elg_vol                     float64       \n",
+      " 16  net_mf_vol                       float64       \n",
+      " 17  up                               float64       \n",
+      " 18  down                             float64       \n",
+      " 19  atr_14                           float64       \n",
+      " 20  atr_6                            float64       \n",
+      " 21  obv                              float64       \n",
+      " 22  maobv_6                          float64       \n",
+      " 23  obv-maobv_6                      float64       \n",
+      " 24  rsi_3                            float64       \n",
+      " 25  rsi_6                            float64       \n",
+      " 26  rsi_9                            float64       \n",
+      " 27  return_10                        float64       \n",
+      " 28  return_20                        float64       \n",
+      " 29  avg_close_5                      float64       \n",
+      " 30  std_return_5                     float64       \n",
+      " 31  std_return_15                    float64       \n",
+      " 32  std_return_25                    float64       \n",
+      " 33  std_return_90                    float64       \n",
+      " 34  std_return_90_2                  float64       \n",
+      " 35  std_return_5 / std_return_90     float64       \n",
+      " 36  std_return_5 / std_return_25     float64       \n",
+      " 37  std_return_90 - std_return_90_2  float64       \n",
+      " 38  ema_5                            float64       \n",
+      " 39  ema_13                           float64       \n",
+      " 40  ema_20                           float64       \n",
+      " 41  ema_60                           float64       \n",
+      " 42  act_factor1                      float64       \n",
+      " 43  act_factor2                      float64       \n",
+      " 44  act_factor3                      float64       \n",
+      " 45  act_factor4                      float64       \n",
+      " 46  act_factor5                      float64       \n",
+      " 47  act_factor6                      float64       \n",
+      " 48  rank_act_factor1                 float64       \n",
+      " 49  rank_act_factor2                 float64       \n",
+      " 50  rank_act_factor3                 float64       \n",
+      " 51  active_buy_volume_large          float64       \n",
+      " 52  active_buy_volume_big            float64       \n",
+      " 53  active_buy_volume_small          float64       \n",
+      " 54  buy_lg_vol - sell_lg_vol         float64       \n",
+      " 55  buy_elg_vol - sell_elg_vol       float64       \n",
+      " 56  future_return1                   float64       \n",
+      " 57  future_return2                   float64       \n",
+      " 58  future_return3                   float64       \n",
+      " 59  future_return4                   float64       \n",
+      " 60  future_return5                   float64       \n",
+      " 61  future_return6                   float64       \n",
+      " 62  future_return7                   float64       \n",
+      " 63  future_close1                    float64       \n",
+      " 64  future_close2                    float64       \n",
+      " 65  future_close3                    float64       \n",
+      " 66  future_close4                    float64       \n",
+      " 67  future_close5                    float64       \n",
+      " 68  future_af11                      float64       \n",
+      " 69  future_af12                      float64       \n",
+      " 70  future_af13                      float64       \n",
+      " 71  future_af14                      float64       \n",
+      " 72  future_af15                      float64       \n",
+      " 73  future_af21                      float64       \n",
+      " 74  future_af22                      float64       \n",
+      " 75  future_af23                      float64       \n",
+      " 76  future_af24                      float64       \n",
+      " 77  future_af25                      float64       \n",
+      " 78  future_af31                      float64       \n",
+      " 79  future_af32                      float64       \n",
+      " 80  future_af33                      float64       \n",
+      " 81  future_af34                      float64       \n",
+      " 82  future_af35                      float64       \n",
+      "dtypes: datetime64[ns](1), float64(80), object(2)\n",
+      "memory usage: 5.2+ GB\n",
+      "None\n"
+     ]
+    }
+   ],
+   "execution_count": 5
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-09T14:55:28.712343Z",
+     "start_time": "2025-02-09T14:53:49.279168Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "def filter_data(df):\n",
+    "    df = df.groupby('trade_date').apply(lambda x: x.nlargest(1000, 'act_factor3'))\n",
+    "    df = df[df['is_st'] == False]\n",
+    "    df = df[df['is_st'] == False]\n",
+    "    df = df[~df['ts_code'].str.startswith('30')]\n",
+    "    df = df[~df['ts_code'].str.startswith('68')]\n",
+    "    df = df[~df['ts_code'].str.startswith('8')]\n",
+    "    df = df.reset_index(drop=True)\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "df = filter_data(df)\n",
+    "print(df.info())"
+   ],
+   "id": "dbe2fd8021b9417f",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 1136157 entries, 0 to 1136156\n",
+      "Data columns (total 83 columns):\n",
+      " #   Column                           Non-Null Count    Dtype         \n",
+      "---  ------                           --------------    -----         \n",
+      " 0   ts_code                          1136157 non-null  object        \n",
+      " 1   trade_date                       1136157 non-null  datetime64[ns]\n",
+      " 2   open                             1136157 non-null  float64       \n",
+      " 3   close                            1136157 non-null  float64       \n",
+      " 4   high                             1136157 non-null  float64       \n",
+      " 5   low                              1136157 non-null  float64       \n",
+      " 6   vol                              1136157 non-null  float64       \n",
+      " 7   is_st                            1136157 non-null  object        \n",
+      " 8   up_limit                         1135878 non-null  float64       \n",
+      " 9   down_limit                       1135878 non-null  float64       \n",
+      " 10  buy_sm_vol                       1135663 non-null  float64       \n",
+      " 11  sell_sm_vol                      1135663 non-null  float64       \n",
+      " 12  buy_lg_vol                       1135663 non-null  float64       \n",
+      " 13  sell_lg_vol                      1135663 non-null  float64       \n",
+      " 14  buy_elg_vol                      1135663 non-null  float64       \n",
+      " 15  sell_elg_vol                     1135663 non-null  float64       \n",
+      " 16  net_mf_vol                       1135663 non-null  float64       \n",
+      " 17  up                               1136157 non-null  float64       \n",
+      " 18  down                             1136157 non-null  float64       \n",
+      " 19  atr_14                           1136157 non-null  float64       \n",
+      " 20  atr_6                            1136157 non-null  float64       \n",
+      " 21  obv                              1136157 non-null  float64       \n",
+      " 22  maobv_6                          1136157 non-null  float64       \n",
+      " 23  obv-maobv_6                      1136157 non-null  float64       \n",
+      " 24  rsi_3                            1136157 non-null  float64       \n",
+      " 25  rsi_6                            1136157 non-null  float64       \n",
+      " 26  rsi_9                            1136157 non-null  float64       \n",
+      " 27  return_10                        1136157 non-null  float64       \n",
+      " 28  return_20                        1136157 non-null  float64       \n",
+      " 29  avg_close_5                      1136157 non-null  float64       \n",
+      " 30  std_return_5                     1136157 non-null  float64       \n",
+      " 31  std_return_15                    1136157 non-null  float64       \n",
+      " 32  std_return_25                    1136157 non-null  float64       \n",
+      " 33  std_return_90                    1136131 non-null  float64       \n",
+      " 34  std_return_90_2                  1136129 non-null  float64       \n",
+      " 35  std_return_5 / std_return_90     1136131 non-null  float64       \n",
+      " 36  std_return_5 / std_return_25     1136157 non-null  float64       \n",
+      " 37  std_return_90 - std_return_90_2  1136129 non-null  float64       \n",
+      " 38  ema_5                            1136157 non-null  float64       \n",
+      " 39  ema_13                           1136157 non-null  float64       \n",
+      " 40  ema_20                           1136157 non-null  float64       \n",
+      " 41  ema_60                           1136153 non-null  float64       \n",
+      " 42  act_factor1                      1136157 non-null  float64       \n",
+      " 43  act_factor2                      1136157 non-null  float64       \n",
+      " 44  act_factor3                      1136157 non-null  float64       \n",
+      " 45  act_factor4                      1136152 non-null  float64       \n",
+      " 46  act_factor5                      1136152 non-null  float64       \n",
+      " 47  act_factor6                      1136157 non-null  float64       \n",
+      " 48  rank_act_factor1                 1136157 non-null  float64       \n",
+      " 49  rank_act_factor2                 1136157 non-null  float64       \n",
+      " 50  rank_act_factor3                 1136157 non-null  float64       \n",
+      " 51  active_buy_volume_large          1135659 non-null  float64       \n",
+      " 52  active_buy_volume_big            1135636 non-null  float64       \n",
+      " 53  active_buy_volume_small          1135663 non-null  float64       \n",
+      " 54  buy_lg_vol - sell_lg_vol         1135660 non-null  float64       \n",
+      " 55  buy_elg_vol - sell_elg_vol       1135640 non-null  float64       \n",
+      " 56  future_return1                   1136157 non-null  float64       \n",
+      " 57  future_return2                   1136157 non-null  float64       \n",
+      " 58  future_return3                   1136157 non-null  float64       \n",
+      " 59  future_return4                   1136157 non-null  float64       \n",
+      " 60  future_return5                   1136157 non-null  float64       \n",
+      " 61  future_return6                   1136157 non-null  float64       \n",
+      " 62  future_return7                   1136157 non-null  float64       \n",
+      " 63  future_close1                    1136157 non-null  float64       \n",
+      " 64  future_close2                    1136157 non-null  float64       \n",
+      " 65  future_close3                    1136157 non-null  float64       \n",
+      " 66  future_close4                    1136157 non-null  float64       \n",
+      " 67  future_close5                    1136157 non-null  float64       \n",
+      " 68  future_af11                      1136157 non-null  float64       \n",
+      " 69  future_af12                      1136157 non-null  float64       \n",
+      " 70  future_af13                      1136157 non-null  float64       \n",
+      " 71  future_af14                      1136157 non-null  float64       \n",
+      " 72  future_af15                      1136157 non-null  float64       \n",
+      " 73  future_af21                      1136157 non-null  float64       \n",
+      " 74  future_af22                      1136157 non-null  float64       \n",
+      " 75  future_af23                      1136157 non-null  float64       \n",
+      " 76  future_af24                      1136157 non-null  float64       \n",
+      " 77  future_af25                      1136157 non-null  float64       \n",
+      " 78  future_af31                      1136157 non-null  float64       \n",
+      " 79  future_af32                      1136157 non-null  float64       \n",
+      " 80  future_af33                      1136157 non-null  float64       \n",
+      " 81  future_af34                      1136157 non-null  float64       \n",
+      " 82  future_af35                      1136157 non-null  float64       \n",
+      "dtypes: datetime64[ns](1), float64(80), object(2)\n",
+      "memory usage: 719.5+ MB\n",
+      "None\n"
+     ]
+    }
+   ],
+   "execution_count": 6
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-09T15:00:45.828404Z",
+     "start_time": "2025-02-09T15:00:45.294830Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "def remove_outliers_iqr(series, lower_quantile=0.05, upper_quantile=0.95, threshold=1.5):\n",
+    "    Q1 = series.quantile(lower_quantile)\n",
+    "    Q3 = series.quantile(upper_quantile)\n",
+    "    IQR = Q3 - Q1\n",
+    "    lower_bound = Q1 - threshold * IQR\n",
+    "    upper_bound = Q3 + threshold * IQR\n",
+    "    # 过滤掉低于下边界或高于上边界的极值\n",
+    "    return (series >= lower_bound) & (series <= upper_bound)\n",
+    "\n",
+    "\n",
+    "def neutralize_labels(labels, features, feature_columns, z_threshold=3, method='regression'):\n",
+    "    labels_no_outliers = remove_outliers_iqr(labels)\n",
+    "    return labels_no_outliers\n",
+    "\n",
+    "\n",
+    "train_data = df[df['trade_date'] <= '2023-01-01']\n",
+    "test_data = df[df['trade_date'] >= '2023-01-01']\n",
+    "\n",
+    "feature_columns = [col for col in df.columns if col not in ['trade_date',\n",
+    "                                                            'ts_code',\n",
+    "                                                            'label']]\n",
+    "feature_columns = [col for col in feature_columns if 'future' not in col]\n",
+    "feature_columns = [col for col in feature_columns if 'score' not in col]\n",
+    "feature_columns = [col for col in feature_columns if col not in origin_columns]\n",
+    "\n",
+    "# for column in [column for column in train_data.columns if 'future' in column]:\n",
+    "#     label_index = neutralize_labels(train_data[column], train_data, feature_columns, z_threshold=3, method='regression')\n",
+    "#     train_data = train_data[label_index]\n",
+    "#     label_index = neutralize_labels(test_data[column], test_data, feature_columns, z_threshold=3, method='regression')\n",
+    "#     test_data = test_data[label_index]\n",
+    "\n",
+    "print(len(train_data))\n",
+    "print(len(test_data))"
+   ],
+   "id": "5f3d9aece75318cd",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['up', 'down', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'obv-maobv_6', 'rsi_3', 'rsi_6', 'rsi_9', 'return_10', 'return_20', 'avg_close_5', 'std_return_5', 'std_return_15', 'std_return_25', 'std_return_90', 'std_return_90_2', 'std_return_5 / std_return_90', 'std_return_5 / std_return_25', 'std_return_90 - std_return_90_2', 'ema_5', 'ema_13', 'ema_20', 'ema_60', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4', 'act_factor5', 'act_factor6', 'rank_act_factor1', 'rank_act_factor2', 'rank_act_factor3', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'buy_lg_vol - sell_lg_vol', 'buy_elg_vol - sell_elg_vol']\n"
+     ]
+    }
+   ],
+   "execution_count": 19
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-09T14:56:05.319915Z",
+     "start_time": "2025-02-09T14:56:03.355725Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "def get_qcuts(series, quantiles):\n",
+    "    q = pd.qcut(series, q=quantiles, labels=False, duplicates='drop')\n",
+    "    return q[-1]  # 返回窗口最后一个元素的分位数标签\n",
+    "\n",
+    "\n",
+    "window = 5\n",
+    "quantiles = 20\n",
+    "\n",
+    "\n",
+    "def get_label(df):\n",
+    "    labels = df['future_af13'] - df['act_factor1']\n",
+    "    # labels = df['future_close3']\n",
+    "    return labels\n",
+    "\n",
+    "\n",
+    "train_data['label'], test_data['label'] = get_label(train_data), get_label(test_data)\n",
+    "\n",
+    "train_data, test_data = train_data.dropna(subset=['label']), test_data.dropna(subset=['label'])\n",
+    "train_data, test_data = train_data.replace([np.inf, -np.inf], np.nan).dropna(), test_data.replace([np.inf, -np.inf],\n",
+    "                                                                                                  np.nan).dropna()\n",
+    "train_data, test_data = train_data.reset_index(drop=True), test_data.reset_index(drop=True)\n",
+    "\n",
+    "print(len(train_data))\n",
+    "print(len(test_data))"
+   ],
+   "id": "f4f16d63ad18d1bc",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "875004\n",
+      "最小日期: 2017-01-03\n",
+      "最大日期: 2022-12-30\n",
+      "260581\n",
+      "最小日期: 2023-01-03\n",
+      "最大日期: 2025-01-27\n"
+     ]
+    }
+   ],
+   "execution_count": 13
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-09T14:56:05.480695Z",
+     "start_time": "2025-02-09T14:56:05.367238Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "import lightgbm as lgb\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import optuna\n",
+    "from sklearn.model_selection import KFold\n",
+    "from sklearn.metrics import mean_absolute_error\n",
+    "import os\n",
+    "import json\n",
+    "import pickle\n",
+    "import hashlib\n",
+    "\n",
+    "\n",
+    "def objective(trial, X, y, num_boost_round, params):\n",
+    "    # 参数网格\n",
+    "    X, y = X.reset_index(drop=True), y.reset_index(drop=True)\n",
+    "    param_grid = {\n",
+    "        \"n_estimators\": trial.suggest_categorical(\"n_estimators\", [10000]),\n",
+    "        \"learning_rate\": trial.suggest_float(\"learning_rate\", 0.01, 0.3),\n",
+    "        \"num_leaves\": trial.suggest_int(\"num_leaves\", 20, 3000, step=25),\n",
+    "        \"max_depth\": trial.suggest_int(\"max_depth\", 3, 16),\n",
+    "        \"min_data_in_leaf\": trial.suggest_int(\"min_data_in_leaf\", 200, 10000, step=100),\n",
+    "        \"lambda_l1\": trial.suggest_int(\"lambda_l1\", 0, 100, step=5),\n",
+    "        \"lambda_l2\": trial.suggest_int(\"lambda_l2\", 0, 100, step=5),\n",
+    "        \"min_gain_to_split\": trial.suggest_float(\"min_gain_to_split\", 0, 15),\n",
+    "        \"bagging_fraction\": trial.suggest_float(\"bagging_fraction\", 0.2, 0.95, step=0.1),\n",
+    "        \"bagging_freq\": trial.suggest_categorical(\"bagging_freq\", [1]),\n",
+    "        \"feature_fraction\": trial.suggest_float(\"feature_fraction\", 0.2, 0.95, step=0.1),\n",
+    "        \"random_state\": 1,\n",
+    "        \"objective\": 'regression',\n",
+    "        'verbosity': -1\n",
+    "    }\n",
+    "    # 5折交叉验证\n",
+    "    cv = KFold(n_splits=5, shuffle=False)\n",
+    "\n",
+    "    cv_scores = np.empty(5)\n",
+    "    for idx, (train_idx, test_idx) in enumerate(cv.split(X, y)):\n",
+    "        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]\n",
+    "        y_train, y_test = y[train_idx], y[test_idx]\n",
+    "\n",
+    "        # LGBM建模\n",
+    "        model = lgb.LGBMRegressor(**param_grid, num_boost_round=num_boost_round)\n",
+    "        model.fit(\n",
+    "            X_train,\n",
+    "            y_train,\n",
+    "            eval_set=[(X_test, y_test)],\n",
+    "            eval_metric=\"l2\",\n",
+    "            callbacks=[\n",
+    "                # LightGBMPruningCallback(trial, \"l2\"),\n",
+    "                lgb.early_stopping(50, first_metric_only=True),\n",
+    "                lgb.log_evaluation(period=-1)\n",
+    "            ],\n",
+    "        )\n",
+    "        # 模型预测\n",
+    "        preds = model.predict(X_test)\n",
+    "        # 优化指标logloss最小\n",
+    "        cv_scores[idx] = mean_absolute_error(y_test, preds)\n",
+    "\n",
+    "    return np.mean(cv_scores)\n",
+    "\n",
+    "def generate_key(params, feature_columns, num_boost_round):\n",
+    "    key_data = {\n",
+    "        \"params\": params,\n",
+    "        \"feature_columns\": feature_columns,\n",
+    "        \"num_boost_round\": num_boost_round\n",
+    "    }\n",
+    "    # 转换成排序后的 JSON 字符串，再生成 md5 hash\n",
+    "    key_str = json.dumps(key_data, sort_keys=True)\n",
+    "    return hashlib.md5(key_str.encode('utf-8')).hexdigest()\n",
+    "\n",
+    "def train_light_model(df, params, feature_columns, callbacks, evals,\n",
+    "                      print_feature_importance=True, num_boost_round=100,\n",
+    "                      use_optuna=False):\n",
+    "    cache_file = 'light_model.pkl'\n",
+    "    cache_key = generate_key(params, feature_columns, num_boost_round)\n",
+    "\n",
+    "    # 检查缓存文件是否存在\n",
+    "    if os.path.exists(cache_file):\n",
+    "        try:\n",
+    "            with open(cache_file, 'rb') as f:\n",
+    "                cache_data = pickle.load(f)\n",
+    "            if cache_data.get('key') == cache_key:\n",
+    "                print(\"加载缓存模型...\")\n",
+    "                return cache_data.get('model')\n",
+    "            else:\n",
+    "                print(\"缓存模型的参数与当前参数不匹配，重新训练模型。\")\n",
+    "        except Exception as e:\n",
+    "            print(f\"加载缓存失败: {e}，重新训练模型。\")\n",
+    "    else:\n",
+    "        print(\"未发现缓存模型，开始训练新模型。\")\n",
+    "    # 确保数据按照 date 和 label 排序\n",
+    "    df_sorted = df.sort_values(by=['trade_date', 'label'], ascending=[True, False])  # 按日期升序、标签降序排序\n",
+    "    df_sorted = df_sorted.sort_values(by='trade_date')\n",
+    "    unique_dates = df_sorted['trade_date'].unique()\n",
+    "    val_date_count = int(len(unique_dates) * 0.1)\n",
+    "    val_dates = unique_dates[-val_date_count:]\n",
+    "    val_indices = df_sorted[df_sorted['trade_date'].isin(val_dates)].index\n",
+    "    train_indices = df_sorted[~df_sorted['trade_date'].isin(val_dates)].index\n",
+    "\n",
+    "    # 获取训练集和验证集的样本\n",
+    "    train_df = df_sorted.iloc[train_indices]\n",
+    "    val_df = df_sorted.iloc[val_indices]\n",
+    "\n",
+    "    X_train = train_df[feature_columns]\n",
+    "    y_train = train_df['label']\n",
+    "\n",
+    "    X_val = val_df[feature_columns]\n",
+    "    y_val = val_df['label']\n",
+    "\n",
+    "    train_data = lgb.Dataset(X_train, label=y_train)\n",
+    "    val_data = lgb.Dataset(X_val, label=y_val)\n",
+    "    if use_optuna:\n",
+    "        # study = optuna.create_study(direction='minimize' if classify else 'maximize')\n",
+    "        study = optuna.create_study(direction='minimize')\n",
+    "        study.optimize(lambda trial: objective(trial, X_train, y_train, num_boost_round, params), n_trials=20)\n",
+    "\n",
+    "        print(f\"Best parameters: {study.best_trial.params}\")\n",
+    "        print(f\"Best score: {study.best_trial.value}\")\n",
+    "\n",
+    "        params.update(study.best_trial.params)\n",
+    "    model = lgb.train(\n",
+    "        params, train_data, num_boost_round=num_boost_round,\n",
+    "        valid_sets=[train_data, val_data], valid_names=['train', 'valid'],\n",
+    "        callbacks=callbacks\n",
+    "    )\n",
+    "\n",
+    "    # 打印特征重要性（如果需要）\n",
+    "    if print_feature_importance:\n",
+    "        lgb.plot_metric(evals)\n",
+    "        lgb.plot_tree(model, figsize=(20, 8))\n",
+    "        lgb.plot_importance(model, importance_type='split', max_num_features=20)\n",
+    "        plt.show()\n",
+    "    # with open(cache_file, 'wb') as f:\n",
+    "    #     pickle.dump({'key': cache_key,\n",
+    "    #                  'model': model,\n",
+    "    #                  'feature_columns': feature_columns}, f)\n",
+    "    #     print(\"模型训练完成并已保存缓存。\")\n",
+    "    return model\n",
+    "\n",
+    "\n",
+    "from catboost import CatBoostRegressor\n",
+    "import pandas as pd\n",
+    "\n",
+    "\n",
+    "def train_catboost(df, num_boost_round, params=None):\n",
+    "    \"\"\"\n",
+    "    训练 CatBoost 排序模型\n",
+    "    - df: 包含因子、date、instrument 和 label 的 DataFrame\n",
+    "    - num_boost_round: 训练的轮数\n",
+    "    - print_feature_importance: 是否打印特征重要性\n",
+    "    - plot: 是否绘制特征重要性图\n",
+    "    - split_date: 用于划分训练集和验证集的日期（比如 '2020-01-01'）\n",
+    "\n",
+    "    返回训练好的模型\n",
+    "    \"\"\"\n",
+    "    df_sorted = df.sort_values(by=['date', 'label'], ascending=[True, False])\n",
+    "\n",
+    "    # 提取特征和标签\n",
+    "    feature_columns = [col for col in df.columns if col not in ['date',\n",
+    "                                                                'instrument',\n",
+    "                                                                'label']]\n",
+    "    feature_columns = [col for col in feature_columns if 'future' not in col]\n",
+    "    feature_columns = [col for col in feature_columns if 'score' not in col]\n",
+    "\n",
+    "    df_sorted = df_sorted.sort_values(by='date')\n",
+    "    unique_dates = df_sorted['date'].unique()\n",
+    "    val_date_count = int(len(unique_dates) * 0.1)\n",
+    "    val_dates = unique_dates[-val_date_count:]\n",
+    "    val_indices = df_sorted[df_sorted['date'].isin(val_dates)].index\n",
+    "    train_indices = df_sorted[~df_sorted['date'].isin(val_dates)].index\n",
+    "\n",
+    "    # 获取训练集和验证集的样本\n",
+    "    train_df = df_sorted.iloc[train_indices].sort_values(by=['date', 'label'], ascending=[True, False])\n",
+    "    val_df = df_sorted.iloc[val_indices].sort_values(by=['date', 'label'], ascending=[True, False])\n",
+    "\n",
+    "    X_train = train_df[feature_columns]\n",
+    "    y_train = train_df['label']\n",
+    "\n",
+    "    X_val = val_df[feature_columns]\n",
+    "    y_val = val_df['label']\n",
+    "\n",
+    "    model = CatBoostRegressor(**params, iterations=num_boost_round)\n",
+    "    model.fit(X_train,\n",
+    "              y_train,\n",
+    "              eval_set=(X_val, y_val))\n",
+    "\n",
+    "    return model"
+   ],
+   "id": "8f134d435f71e9e2",
+   "outputs": [],
+   "execution_count": 14
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-09T14:56:05.576927Z",
+     "start_time": "2025-02-09T14:56:05.480695Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "light_params = {\n",
+    "    'objective': 'regression',\n",
+    "    'metric': 'l2',\n",
+    "    'learning_rate': 0.05,\n",
+    "    'is_unbalance': True,\n",
+    "    'num_leaves': 2048,\n",
+    "    'min_data_in_leaf': 16,\n",
+    "    'max_depth': 32,\n",
+    "    'max_bin': 1024,\n",
+    "    'nthread': 2,\n",
+    "    'feature_fraction': 0.7,\n",
+    "    'bagging_fraction': 0.7,\n",
+    "    'bagging_freq': 5,\n",
+    "    'lambda_l1': 80,\n",
+    "    'lambda_l2': 65,\n",
+    "    'verbosity': -1\n",
+    "}"
+   ],
+   "id": "4a4542e1ed6afe7d",
+   "outputs": [],
+   "execution_count": 15
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-09T14:57:25.341222Z",
+     "start_time": "2025-02-09T14:56:05.640256Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "print('train data size: ', len(train_data))\n",
+    "df = train_data\n",
+    "\n",
+    "evals = {}\n",
+    "light_model = train_light_model(train_data, light_params, feature_columns,\n",
+    "                                 [lgb.log_evaluation(period=500),\n",
+    "                                  lgb.callback.record_evaluation(evals),\n",
+    "                                  lgb.early_stopping(50, first_metric_only=True)\n",
+    "                                  ], evals,\n",
+    "                                 num_boost_round=1000, use_optuna=False,\n",
+    "                                 print_feature_importance=False)"
+   ],
+   "id": "beeb098799ecfa6a",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "train data size:  875004\n",
+      "未发现缓存模型，开始训练新模型。\n",
+      "Training until validation scores don't improve for 50 rounds\n",
+      "Early stopping, best iteration is:\n",
+      "[378]\ttrain's l2: 0.435049\tvalid's l2: 0.589178\n",
+      "Evaluated only: l2\n"
+     ]
+    }
+   ],
+   "execution_count": 16
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-09T14:57:27.394697Z",
+     "start_time": "2025-02-09T14:57:25.373274Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "test_data['score'] = light_model.predict(test_data[feature_columns])\n",
+    "predictions = test_data.loc[test_data.groupby('trade_date')['score'].idxmax()]"
+   ],
+   "id": "5bb96ca8492e74d",
+   "outputs": [],
+   "execution_count": 17
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-09T14:57:27.489570Z",
+     "start_time": "2025-02-09T14:57:27.397368Z"
+    }
+   },
+   "cell_type": "code",
+   "source": "predictions[['trade_date', 'score', 'ts_code']].to_csv('predictions.csv', index=False)",
+   "id": "5d1522a7538db91b",
+   "outputs": [],
+   "execution_count": 18
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/main/train/V1.1.ipynb
+++ b/main/train/V1.1.ipynb
--- a/main/train/V1.ipynb
+++ b/main/train/V1.ipynb
@@ -0,0 +1,929 @@
+{
+ "cells": [
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-11T16:39:38.576665Z",
+     "start_time": "2025-02-11T16:39:38.019824Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "from code.utils.utils import read_and_merge_h5_data\n"
+   ],
+   "id": "79a7758178bafdd3",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The autoreload extension is already loaded. To reload it, use:\n",
+      "  %reload_ext autoreload\n"
+     ]
+    }
+   ],
+   "execution_count": 8
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-11T16:40:45.842510Z",
+     "start_time": "2025-02-11T16:39:54.757326Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "\n",
+    "print('daily data')\n",
+    "df = read_and_merge_h5_data('../../data/daily_data.h5', key='daily_data',\n",
+    "                            columns=['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol'],\n",
+    "                            df=None)\n",
+    "\n",
+    "print('daily basic')\n",
+    "df = read_and_merge_h5_data('../../data/daily_basic.h5', key='daily_basic',\n",
+    "                            columns=['ts_code', 'trade_date', 'turnover_rate', 'pe_ttm', 'circ_mv', 'volume_ratio',\n",
+    "                                     'is_st'], df=df)\n",
+    "\n",
+    "print('stk limit')\n",
+    "df = read_and_merge_h5_data('../../data/stk_limit.h5', key='stk_limit',\n",
+    "                            columns=['ts_code', 'trade_date', 'pre_close', 'up_limit', 'down_limit'],\n",
+    "                            df=df)\n",
+    "print('money flow')\n",
+    "df = read_and_merge_h5_data('../../data/money_flow.h5', key='money_flow',\n",
+    "                            columns=['ts_code', 'trade_date', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol',\n",
+    "                                     'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol'],\n",
+    "                            df=df)"
+   ],
+   "id": "a79cafb06a7e0e43",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "daily data\n",
+      "daily basic\n",
+      "stk limit\n",
+      "money flow\n"
+     ]
+    }
+   ],
+   "execution_count": 10
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-11T16:40:45.905077Z",
+     "start_time": "2025-02-11T16:40:45.848510Z"
+    }
+   },
+   "cell_type": "code",
+   "source": "origin_columns = df.columns.tolist()",
+   "id": "c4e9e1d31da6dba6",
+   "outputs": [],
+   "execution_count": 11
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-11T16:40:46.016229Z",
+     "start_time": "2025-02-11T16:40:45.938587Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "import numpy as np\n",
+    "import talib\n",
+    "\n",
+    "\n",
+    "def get_technical_factor(df):\n",
+    "    df['up'] = (df['high'] - df[['close', 'open']].max(axis=1)) / df['close']\n",
+    "    df['down'] = (df[['close', 'open']].min(axis=1) - df['low']) / df['close']\n",
+    "\n",
+    "    df['atr_14'] = talib.ATR(df['high'], df['low'], df['close'], timeperiod=14)\n",
+    "    df['atr_6'] = talib.ATR(df['high'], df['low'], df['close'], timeperiod=6)\n",
+    "\n",
+    "    df['obv'] = talib.OBV(df['close'], df['vol'])\n",
+    "    df['maobv_6'] = talib.SMA(df['obv'], timeperiod=6)\n",
+    "    df['obv-maobv_6'] = df['obv'] - df['maobv_6']\n",
+    "\n",
+    "    df['rsi_3'] = talib.RSI(df['close'], timeperiod=3)\n",
+    "    df['rsi_6'] = talib.RSI(df['close'], timeperiod=6)\n",
+    "    df['rsi_9'] = talib.RSI(df['close'], timeperiod=9)\n",
+    "\n",
+    "    df['return_10'] = df['close'] / df['close'].shift(10) - 1\n",
+    "    df['return_20'] = df['close'] / df['close'].shift(20) - 1\n",
+    "\n",
+    "    # # 计算 _rank_return_10 和 _rank_return_20\n",
+    "    # df['_rank_return_10'] = df['return_10'].rank(pct=True)\n",
+    "    # df['_rank_return_20'] = df['return_20'].rank(pct=True)\n",
+    "\n",
+    "    # 计算 avg_close_5\n",
+    "    df['avg_close_5'] = df['close'].rolling(window=5).mean() / df['close']\n",
+    "\n",
+    "    # 计算 std_return_5, std_return_15, std_return_25, std_return_252, std_return_2522\n",
+    "    df['std_return_5'] = df['close'].pct_change().shift(-1).rolling(window=5).std()\n",
+    "    df['std_return_15'] = df['close'].pct_change().shift(-1).rolling(window=15).std()\n",
+    "    df['std_return_25'] = df['close'].pct_change().shift(-1).rolling(window=25).std()\n",
+    "    df['std_return_90'] = df['close'].pct_change().shift(-1).rolling(window=90).std()\n",
+    "    df['std_return_90_2'] = df['close'].shift(10).pct_change().shift(-1).rolling(window=90).std()\n",
+    "\n",
+    "    # 计算 std_return_5 / std_return_252 和 std_return_5 / std_return_25\n",
+    "    df['std_return_5 / std_return_90'] = df['std_return_5'] / df['std_return_90']\n",
+    "    df['std_return_5 / std_return_25'] = df['std_return_5'] / df['std_return_25']\n",
+    "\n",
+    "    # 计算 std_return_252 - std_return_2522\n",
+    "    df['std_return_90 - std_return_90_2'] = df['std_return_90'] - df['std_return_90_2']\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "def get_act_factor(df):\n",
+    "    # 计算 m_ta_ema(close, 5), m_ta_ema(close, 13), m_ta_ema(close, 20), m_ta_ema(close, 60)\n",
+    "    df['ema_5'] = talib.EMA(df['close'], timeperiod=5)\n",
+    "    df['ema_13'] = talib.EMA(df['close'], timeperiod=13)\n",
+    "    df['ema_20'] = talib.EMA(df['close'], timeperiod=20)\n",
+    "    df['ema_60'] = talib.EMA(df['close'], timeperiod=60)\n",
+    "\n",
+    "    # 计算 act_factor1, act_factor2, act_factor3, act_factor4\n",
+    "    df['act_factor1'] = np.arctan((df['ema_5'] / df['ema_5'].shift(1) - 1) * 100) * 57.3 / 50\n",
+    "    df['act_factor2'] = np.arctan((df['ema_13'] / df['ema_13'].shift(1) - 1) * 100) * 57.3 / 40\n",
+    "    df['act_factor3'] = np.arctan((df['ema_20'] / df['ema_20'].shift(1) - 1) * 100) * 57.3 / 21\n",
+    "    df['act_factor4'] = np.arctan((df['ema_60'] / df['ema_60'].shift(1) - 1) * 100) * 57.3 / 10\n",
+    "\n",
+    "    # 计算 act_factor5 和 act_factor6\n",
+    "    df['act_factor5'] = df['act_factor1'] + df['act_factor2'] + df['act_factor3'] + df['act_factor4']\n",
+    "    df['act_factor6'] = (df['act_factor1'] - df['act_factor2']) / np.sqrt(\n",
+    "        df['act_factor1'] ** 2 + df['act_factor2'] ** 2)\n",
+    "\n",
+    "    # 根据 'trade_date' 进行分组，在每个组内分别计算 'act_factor1', 'act_factor2', 'act_factor3' 的排名\n",
+    "    df['rank_act_factor1'] = df.groupby('trade_date')['act_factor1'].rank(ascending=False, pct=True)\n",
+    "    df['rank_act_factor2'] = df.groupby('trade_date')['act_factor2'].rank(ascending=False, pct=True)\n",
+    "    df['rank_act_factor3'] = df.groupby('trade_date')['act_factor3'].rank(ascending=False, pct=True)\n",
+    "\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "def get_money_flow_factor(df):\n",
+    "    df['active_buy_volume_large'] = df['buy_lg_vol'] / df['net_mf_vol']\n",
+    "    df['active_buy_volume_big'] = df['buy_elg_vol'] / df['net_mf_vol']\n",
+    "    df['active_buy_volume_small'] = df['buy_sm_vol'] / df['net_mf_vol']\n",
+    "\n",
+    "    df['buy_lg_vol - sell_lg_vol'] = (df['buy_lg_vol'] - df['sell_lg_vol']) / df['net_mf_vol']\n",
+    "    df['buy_elg_vol - sell_elg_vol'] = (df['buy_elg_vol'] - df['sell_elg_vol']) / df['net_mf_vol']\n",
+    "\n",
+    "    # # 你还提到了一些其他字段:\n",
+    "    # df['net_active_buy_volume_main'] = df['net_mf_vol'] / df['buy_sm_vol']\n",
+    "    # df['netflow_amount_main'] = df['net_mf_vol'] / df['buy_sm_vol']  # 这里假设 'net_mf_vol' 是主流资金流\n",
+    "\n",
+    "    # df['active_sell_volume_large'] = df['sell_lg_vol'] / df['sell_sm_vol']\n",
+    "    # df['active_sell_volume_big'] = df['sell_elg_vol'] / df['sell_sm_vol']\n",
+    "    # df['active_sell_volume_small'] = df['sell_sm_vol'] / df['sell_sm_vol']\n",
+    "\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "def get_alpha_factor(df):\n",
+    "    df['alpha_022'] = df['close'] - df['close'].shift(5)\n",
+    "\n",
+    "    # alpha_003: (close - open) / (high - low)\n",
+    "    df['alpha_003'] = (df['close'] - df['open']) / (df['high'] - df['low'])\n",
+    "\n",
+    "    # alpha_007: rank(correlation(close, volume, 5))\n",
+    "    df['alpha_007'] = df['close'].rolling(5).corr(df['vol']).rank(axis=1)\n",
+    "\n",
+    "    # alpha_013: rank(sum(close, 5) - sum(close, 20))\n",
+    "    df['alpha_013'] = (df['close'].rolling(5).sum() - df['close'].rolling(20).sum()).rank(axis=1)\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "def get_future_data(df):\n",
+    "    df['future_return1'] = (df['close'].shift(-1) - df['close']) / df['close']\n",
+    "    df['future_return2'] = (df['open'].shift(-2) - df['open'].shift(-1)) / df['open'].shift(-1)\n",
+    "    df['future_return3'] = (df['close'].shift(-2) - df['close'].shift(-1)) / df['close'].shift(-1)\n",
+    "    df['future_return4'] = (df['close'].shift(-2) - df['open'].shift(-1)) / df['open'].shift(-1)\n",
+    "    df['future_return5'] = (df['close'].shift(-5) - df['open'].shift(-1)) / df['open'].shift(-1)\n",
+    "    df['future_return6'] = (df['close'].shift(-10) - df['open'].shift(-1)) / df['open'].shift(-1)\n",
+    "    df['future_return7'] = (df['close'].shift(-20) - df['open'].shift(-1)) / df['open'].shift(-1)\n",
+    "    df['future_close1'] = (df['close'].shift(-1) - df['close']) / df['close']\n",
+    "    df['future_close2'] = (df['close'].shift(-2) - df['close']) / df['close']\n",
+    "    df['future_close3'] = (df['close'].shift(-3) - df['close']) / df['close']\n",
+    "    df['future_close4'] = (df['close'].shift(-4) - df['close']) / df['close']\n",
+    "    df['future_close5'] = (df['close'].shift(-5) - df['close']) / df['close']\n",
+    "    df['future_af11'] = df['act_factor1'].shift(-1)\n",
+    "    df['future_af12'] = df['act_factor1'].shift(-2)\n",
+    "    df['future_af13'] = df['act_factor1'].shift(-3)\n",
+    "    df['future_af14'] = df['act_factor1'].shift(-4)\n",
+    "    df['future_af15'] = df['act_factor1'].shift(-5)\n",
+    "    df['future_af21'] = df['act_factor2'].shift(-1)\n",
+    "    df['future_af22'] = df['act_factor2'].shift(-2)\n",
+    "    df['future_af23'] = df['act_factor2'].shift(-3)\n",
+    "    df['future_af24'] = df['act_factor2'].shift(-4)\n",
+    "    df['future_af25'] = df['act_factor2'].shift(-5)\n",
+    "    df['future_af31'] = df['act_factor3'].shift(-1)\n",
+    "    df['future_af32'] = df['act_factor3'].shift(-2)\n",
+    "    df['future_af33'] = df['act_factor3'].shift(-3)\n",
+    "    df['future_af34'] = df['act_factor3'].shift(-4)\n",
+    "    df['future_af35'] = df['act_factor3'].shift(-5)\n",
+    "\n",
+    "    return df\n"
+   ],
+   "id": "a735bc02ceb4d872",
+   "outputs": [],
+   "execution_count": 12
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-11T16:40:56.805530Z",
+     "start_time": "2025-02-11T16:40:46.048312Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "df = get_technical_factor(df)\n",
+    "df = get_act_factor(df)\n",
+    "df = get_money_flow_factor(df)\n",
+    "df = get_future_data(df)\n",
+    "# df = df.drop(columns=origin_columns)\n",
+    "\n",
+    "print(df.info())"
+   ],
+   "id": "53f86ddc0677a6d7",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 8375079 entries, 0 to 8375078\n",
+      "Data columns (total 87 columns):\n",
+      " #   Column                           Dtype         \n",
+      "---  ------                           -----         \n",
+      " 0   ts_code                          object        \n",
+      " 1   trade_date                       datetime64[ns]\n",
+      " 2   open                             float64       \n",
+      " 3   close                            float64       \n",
+      " 4   high                             float64       \n",
+      " 5   low                              float64       \n",
+      " 6   vol                              float64       \n",
+      " 7   turnover_rate                    float64       \n",
+      " 8   pe_ttm                           float64       \n",
+      " 9   circ_mv                          float64       \n",
+      " 10  volume_ratio                     float64       \n",
+      " 11  is_st                            object        \n",
+      " 12  up_limit                         float64       \n",
+      " 13  down_limit                       float64       \n",
+      " 14  buy_sm_vol                       float64       \n",
+      " 15  sell_sm_vol                      float64       \n",
+      " 16  buy_lg_vol                       float64       \n",
+      " 17  sell_lg_vol                      float64       \n",
+      " 18  buy_elg_vol                      float64       \n",
+      " 19  sell_elg_vol                     float64       \n",
+      " 20  net_mf_vol                       float64       \n",
+      " 21  up                               float64       \n",
+      " 22  down                             float64       \n",
+      " 23  atr_14                           float64       \n",
+      " 24  atr_6                            float64       \n",
+      " 25  obv                              float64       \n",
+      " 26  maobv_6                          float64       \n",
+      " 27  obv-maobv_6                      float64       \n",
+      " 28  rsi_3                            float64       \n",
+      " 29  rsi_6                            float64       \n",
+      " 30  rsi_9                            float64       \n",
+      " 31  return_10                        float64       \n",
+      " 32  return_20                        float64       \n",
+      " 33  avg_close_5                      float64       \n",
+      " 34  std_return_5                     float64       \n",
+      " 35  std_return_15                    float64       \n",
+      " 36  std_return_25                    float64       \n",
+      " 37  std_return_90                    float64       \n",
+      " 38  std_return_90_2                  float64       \n",
+      " 39  std_return_5 / std_return_90     float64       \n",
+      " 40  std_return_5 / std_return_25     float64       \n",
+      " 41  std_return_90 - std_return_90_2  float64       \n",
+      " 42  ema_5                            float64       \n",
+      " 43  ema_13                           float64       \n",
+      " 44  ema_20                           float64       \n",
+      " 45  ema_60                           float64       \n",
+      " 46  act_factor1                      float64       \n",
+      " 47  act_factor2                      float64       \n",
+      " 48  act_factor3                      float64       \n",
+      " 49  act_factor4                      float64       \n",
+      " 50  act_factor5                      float64       \n",
+      " 51  act_factor6                      float64       \n",
+      " 52  rank_act_factor1                 float64       \n",
+      " 53  rank_act_factor2                 float64       \n",
+      " 54  rank_act_factor3                 float64       \n",
+      " 55  active_buy_volume_large          float64       \n",
+      " 56  active_buy_volume_big            float64       \n",
+      " 57  active_buy_volume_small          float64       \n",
+      " 58  buy_lg_vol - sell_lg_vol         float64       \n",
+      " 59  buy_elg_vol - sell_elg_vol       float64       \n",
+      " 60  future_return1                   float64       \n",
+      " 61  future_return2                   float64       \n",
+      " 62  future_return3                   float64       \n",
+      " 63  future_return4                   float64       \n",
+      " 64  future_return5                   float64       \n",
+      " 65  future_return6                   float64       \n",
+      " 66  future_return7                   float64       \n",
+      " 67  future_close1                    float64       \n",
+      " 68  future_close2                    float64       \n",
+      " 69  future_close3                    float64       \n",
+      " 70  future_close4                    float64       \n",
+      " 71  future_close5                    float64       \n",
+      " 72  future_af11                      float64       \n",
+      " 73  future_af12                      float64       \n",
+      " 74  future_af13                      float64       \n",
+      " 75  future_af14                      float64       \n",
+      " 76  future_af15                      float64       \n",
+      " 77  future_af21                      float64       \n",
+      " 78  future_af22                      float64       \n",
+      " 79  future_af23                      float64       \n",
+      " 80  future_af24                      float64       \n",
+      " 81  future_af25                      float64       \n",
+      " 82  future_af31                      float64       \n",
+      " 83  future_af32                      float64       \n",
+      " 84  future_af33                      float64       \n",
+      " 85  future_af34                      float64       \n",
+      " 86  future_af35                      float64       \n",
+      "dtypes: datetime64[ns](1), float64(84), object(2)\n",
+      "memory usage: 5.4+ GB\n",
+      "None\n"
+     ]
+    }
+   ],
+   "execution_count": 13
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-11T16:42:03.707721Z",
+     "start_time": "2025-02-11T16:40:56.889317Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "def filter_data(df):\n",
+    "    df = df.groupby('trade_date').apply(lambda x: x.nlargest(1000, 'act_factor3'))\n",
+    "    df = df[~df['is_st']]\n",
+    "    df = df[~df['ts_code'].str.startswith('30')]\n",
+    "    df = df[~df['ts_code'].str.startswith('68')]\n",
+    "    df = df[~df['ts_code'].str.startswith('8')]\n",
+    "    df = df.reset_index(drop=True)\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "df = filter_data(df)\n",
+    "print(df.info())"
+   ],
+   "id": "dbe2fd8021b9417f",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 1101560 entries, 0 to 1101559\n",
+      "Data columns (total 87 columns):\n",
+      " #   Column                           Non-Null Count    Dtype         \n",
+      "---  ------                           --------------    -----         \n",
+      " 0   ts_code                          1101560 non-null  object        \n",
+      " 1   trade_date                       1101560 non-null  datetime64[ns]\n",
+      " 2   open                             1101560 non-null  float64       \n",
+      " 3   close                            1101560 non-null  float64       \n",
+      " 4   high                             1101560 non-null  float64       \n",
+      " 5   low                              1101560 non-null  float64       \n",
+      " 6   vol                              1101560 non-null  float64       \n",
+      " 7   turnover_rate                    1101560 non-null  float64       \n",
+      " 8   pe_ttm                           932908 non-null   float64       \n",
+      " 9   circ_mv                          1101560 non-null  float64       \n",
+      " 10  volume_ratio                     1101096 non-null  float64       \n",
+      " 11  is_st                            1101560 non-null  object        \n",
+      " 12  up_limit                         1101282 non-null  float64       \n",
+      " 13  down_limit                       1101282 non-null  float64       \n",
+      " 14  buy_sm_vol                       1101069 non-null  float64       \n",
+      " 15  sell_sm_vol                      1101069 non-null  float64       \n",
+      " 16  buy_lg_vol                       1101069 non-null  float64       \n",
+      " 17  sell_lg_vol                      1101069 non-null  float64       \n",
+      " 18  buy_elg_vol                      1101069 non-null  float64       \n",
+      " 19  sell_elg_vol                     1101069 non-null  float64       \n",
+      " 20  net_mf_vol                       1101069 non-null  float64       \n",
+      " 21  up                               1101560 non-null  float64       \n",
+      " 22  down                             1101560 non-null  float64       \n",
+      " 23  atr_14                           1100687 non-null  float64       \n",
+      " 24  atr_6                            1100687 non-null  float64       \n",
+      " 25  obv                              1101560 non-null  float64       \n",
+      " 26  maobv_6                          1101560 non-null  float64       \n",
+      " 27  obv-maobv_6                      1101560 non-null  float64       \n",
+      " 28  rsi_3                            1100687 non-null  float64       \n",
+      " 29  rsi_6                            1100687 non-null  float64       \n",
+      " 30  rsi_9                            1100687 non-null  float64       \n",
+      " 31  return_10                        1101560 non-null  float64       \n",
+      " 32  return_20                        1101560 non-null  float64       \n",
+      " 33  avg_close_5                      1101560 non-null  float64       \n",
+      " 34  std_return_5                     1101560 non-null  float64       \n",
+      " 35  std_return_15                    1101560 non-null  float64       \n",
+      " 36  std_return_25                    1101559 non-null  float64       \n",
+      " 37  std_return_90                    1101533 non-null  float64       \n",
+      " 38  std_return_90_2                  1101531 non-null  float64       \n",
+      " 39  std_return_5 / std_return_90     1101533 non-null  float64       \n",
+      " 40  std_return_5 / std_return_25     1101559 non-null  float64       \n",
+      " 41  std_return_90 - std_return_90_2  1101531 non-null  float64       \n",
+      " 42  ema_5                            1100687 non-null  float64       \n",
+      " 43  ema_13                           1100687 non-null  float64       \n",
+      " 44  ema_20                           1100687 non-null  float64       \n",
+      " 45  ema_60                           1100682 non-null  float64       \n",
+      " 46  act_factor1                      1100687 non-null  float64       \n",
+      " 47  act_factor2                      1100687 non-null  float64       \n",
+      " 48  act_factor3                      1100687 non-null  float64       \n",
+      " 49  act_factor4                      1100682 non-null  float64       \n",
+      " 50  act_factor5                      1100682 non-null  float64       \n",
+      " 51  act_factor6                      1100687 non-null  float64       \n",
+      " 52  rank_act_factor1                 1100687 non-null  float64       \n",
+      " 53  rank_act_factor2                 1100687 non-null  float64       \n",
+      " 54  rank_act_factor3                 1100687 non-null  float64       \n",
+      " 55  active_buy_volume_large          1101065 non-null  float64       \n",
+      " 56  active_buy_volume_big            1101042 non-null  float64       \n",
+      " 57  active_buy_volume_small          1101069 non-null  float64       \n",
+      " 58  buy_lg_vol - sell_lg_vol         1101066 non-null  float64       \n",
+      " 59  buy_elg_vol - sell_elg_vol       1101046 non-null  float64       \n",
+      " 60  future_return1                   1101560 non-null  float64       \n",
+      " 61  future_return2                   1101560 non-null  float64       \n",
+      " 62  future_return3                   1101560 non-null  float64       \n",
+      " 63  future_return4                   1101560 non-null  float64       \n",
+      " 64  future_return5                   1101560 non-null  float64       \n",
+      " 65  future_return6                   1101560 non-null  float64       \n",
+      " 66  future_return7                   1101560 non-null  float64       \n",
+      " 67  future_close1                    1101560 non-null  float64       \n",
+      " 68  future_close2                    1101560 non-null  float64       \n",
+      " 69  future_close3                    1101560 non-null  float64       \n",
+      " 70  future_close4                    1101560 non-null  float64       \n",
+      " 71  future_close5                    1101560 non-null  float64       \n",
+      " 72  future_af11                      1100687 non-null  float64       \n",
+      " 73  future_af12                      1100687 non-null  float64       \n",
+      " 74  future_af13                      1100687 non-null  float64       \n",
+      " 75  future_af14                      1100687 non-null  float64       \n",
+      " 76  future_af15                      1100687 non-null  float64       \n",
+      " 77  future_af21                      1100687 non-null  float64       \n",
+      " 78  future_af22                      1100687 non-null  float64       \n",
+      " 79  future_af23                      1100687 non-null  float64       \n",
+      " 80  future_af24                      1100687 non-null  float64       \n",
+      " 81  future_af25                      1100687 non-null  float64       \n",
+      " 82  future_af31                      1100687 non-null  float64       \n",
+      " 83  future_af32                      1100687 non-null  float64       \n",
+      " 84  future_af33                      1100687 non-null  float64       \n",
+      " 85  future_af34                      1100687 non-null  float64       \n",
+      " 86  future_af35                      1100687 non-null  float64       \n",
+      "dtypes: datetime64[ns](1), float64(84), object(2)\n",
+      "memory usage: 731.2+ MB\n",
+      "None\n"
+     ]
+    }
+   ],
+   "execution_count": 14
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-11T16:42:04.317134Z",
+     "start_time": "2025-02-11T16:42:03.969288Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "def remove_outliers_iqr(series, lower_quantile=0.05, upper_quantile=0.95, threshold=1.5):\n",
+    "    Q1 = series.quantile(lower_quantile)\n",
+    "    Q3 = series.quantile(upper_quantile)\n",
+    "    IQR = Q3 - Q1\n",
+    "    lower_bound = Q1 - threshold * IQR\n",
+    "    upper_bound = Q3 + threshold * IQR\n",
+    "    # 过滤掉低于下边界或高于上边界的极值\n",
+    "    return (series >= lower_bound) & (series <= upper_bound)\n",
+    "\n",
+    "\n",
+    "def neutralize_labels(labels, features, feature_columns, z_threshold=3, method='regression'):\n",
+    "    labels_no_outliers = remove_outliers_iqr(labels)\n",
+    "    return labels_no_outliers\n",
+    "\n",
+    "\n",
+    "train_data = df[df['trade_date'] <= '2023-01-01']\n",
+    "test_data = df[df['trade_date'] >= '2023-01-01']\n",
+    "\n",
+    "feature_columns = [col for col in df.columns if col not in ['trade_date',\n",
+    "                                                            'ts_code',\n",
+    "                                                            'label']]\n",
+    "feature_columns = [col for col in feature_columns if 'future' not in col]\n",
+    "feature_columns = [col for col in feature_columns if 'score' not in col]\n",
+    "feature_columns = [col for col in feature_columns if col not in origin_columns]\n",
+    "\n",
+    "# for column in [column for column in train_data.columns if 'future' in column]:\n",
+    "#     label_index = neutralize_labels(train_data[column], train_data, feature_columns, z_threshold=3, method='regression')\n",
+    "#     train_data = train_data[label_index]\n",
+    "#     label_index = neutralize_labels(test_data[column], test_data, feature_columns, z_threshold=3, method='regression')\n",
+    "#     test_data = test_data[label_index]\n",
+    "\n",
+    "print(len(train_data))\n",
+    "print(len(test_data))"
+   ],
+   "id": "5f3d9aece75318cd",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "860933\n",
+      "240627\n"
+     ]
+    }
+   ],
+   "execution_count": 15
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-11T16:42:06.417436Z",
+     "start_time": "2025-02-11T16:42:04.322121Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "def get_qcuts(series, quantiles):\n",
+    "    q = pd.qcut(series, q=quantiles, labels=False, duplicates='drop')\n",
+    "    return q[-1]  # 返回窗口最后一个元素的分位数标签\n",
+    "\n",
+    "\n",
+    "window = 5\n",
+    "quantiles = 20\n",
+    "\n",
+    "\n",
+    "def get_label(df):\n",
+    "    labels = df['future_af13'] - df['act_factor1']\n",
+    "    # labels = df['future_close3']\n",
+    "    return labels\n",
+    "\n",
+    "\n",
+    "train_data['label'], test_data['label'] = get_label(train_data), get_label(test_data)\n",
+    "\n",
+    "train_data, test_data = train_data.dropna(subset=['label']), test_data.dropna(subset=['label'])\n",
+    "train_data, test_data = train_data.replace([np.inf, -np.inf], np.nan).dropna(), test_data.replace([np.inf, -np.inf],\n",
+    "                                                                                                  np.nan).dropna()\n",
+    "train_data, test_data = train_data.reset_index(drop=True), test_data.reset_index(drop=True)\n",
+    "\n",
+    "print(len(train_data))\n",
+    "print(len(test_data))"
+   ],
+   "id": "f4f16d63ad18d1bc",
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\liaozhaorun\\AppData\\Local\\Temp\\ipykernel_88940\\2181928612.py:16: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+      "Try using .loc[row_indexer,col_indexer] = value instead\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  train_data['label'], test_data['label'] = get_label(train_data), get_label(test_data)\n",
+      "C:\\Users\\liaozhaorun\\AppData\\Local\\Temp\\ipykernel_88940\\2181928612.py:16: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+      "Try using .loc[row_indexer,col_indexer] = value instead\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  train_data['label'], test_data['label'] = get_label(train_data), get_label(test_data)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "747134\n",
+      "184095\n"
+     ]
+    }
+   ],
+   "execution_count": 16
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-11T16:42:08.032246Z",
+     "start_time": "2025-02-11T16:42:06.481439Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "import lightgbm as lgb\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import optuna\n",
+    "from sklearn.model_selection import KFold\n",
+    "from sklearn.metrics import mean_absolute_error\n",
+    "import os\n",
+    "import json\n",
+    "import pickle\n",
+    "import hashlib\n",
+    "\n",
+    "\n",
+    "def objective(trial, X, y, num_boost_round, params):\n",
+    "    # 参数网格\n",
+    "    X, y = X.reset_index(drop=True), y.reset_index(drop=True)\n",
+    "    param_grid = {\n",
+    "        \"n_estimators\": trial.suggest_categorical(\"n_estimators\", [10000]),\n",
+    "        \"learning_rate\": trial.suggest_float(\"learning_rate\", 0.01, 0.3),\n",
+    "        \"num_leaves\": trial.suggest_int(\"num_leaves\", 20, 3000, step=25),\n",
+    "        \"max_depth\": trial.suggest_int(\"max_depth\", 3, 16),\n",
+    "        \"min_data_in_leaf\": trial.suggest_int(\"min_data_in_leaf\", 200, 10000, step=100),\n",
+    "        \"lambda_l1\": trial.suggest_int(\"lambda_l1\", 0, 100, step=5),\n",
+    "        \"lambda_l2\": trial.suggest_int(\"lambda_l2\", 0, 100, step=5),\n",
+    "        \"min_gain_to_split\": trial.suggest_float(\"min_gain_to_split\", 0, 15),\n",
+    "        \"bagging_fraction\": trial.suggest_float(\"bagging_fraction\", 0.2, 0.95, step=0.1),\n",
+    "        \"bagging_freq\": trial.suggest_categorical(\"bagging_freq\", [1]),\n",
+    "        \"feature_fraction\": trial.suggest_float(\"feature_fraction\", 0.2, 0.95, step=0.1),\n",
+    "        \"random_state\": 1,\n",
+    "        \"objective\": 'regression',\n",
+    "        'verbosity': -1\n",
+    "    }\n",
+    "    # 5折交叉验证\n",
+    "    cv = KFold(n_splits=5, shuffle=False)\n",
+    "\n",
+    "    cv_scores = np.empty(5)\n",
+    "    for idx, (train_idx, test_idx) in enumerate(cv.split(X, y)):\n",
+    "        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]\n",
+    "        y_train, y_test = y[train_idx], y[test_idx]\n",
+    "\n",
+    "        # LGBM建模\n",
+    "        model = lgb.LGBMRegressor(**param_grid, num_boost_round=num_boost_round)\n",
+    "        model.fit(\n",
+    "            X_train,\n",
+    "            y_train,\n",
+    "            eval_set=[(X_test, y_test)],\n",
+    "            eval_metric=\"l2\",\n",
+    "            callbacks=[\n",
+    "                # LightGBMPruningCallback(trial, \"l2\"),\n",
+    "                lgb.early_stopping(50, first_metric_only=True),\n",
+    "                lgb.log_evaluation(period=-1)\n",
+    "            ],\n",
+    "        )\n",
+    "        # 模型预测\n",
+    "        preds = model.predict(X_test)\n",
+    "        # 优化指标logloss最小\n",
+    "        cv_scores[idx] = mean_absolute_error(y_test, preds)\n",
+    "\n",
+    "    return np.mean(cv_scores)\n",
+    "\n",
+    "def generate_key(params, feature_columns, num_boost_round):\n",
+    "    key_data = {\n",
+    "        \"params\": params,\n",
+    "        \"feature_columns\": feature_columns,\n",
+    "        \"num_boost_round\": num_boost_round\n",
+    "    }\n",
+    "    # 转换成排序后的 JSON 字符串，再生成 md5 hash\n",
+    "    key_str = json.dumps(key_data, sort_keys=True)\n",
+    "    return hashlib.md5(key_str.encode('utf-8')).hexdigest()\n",
+    "\n",
+    "def train_light_model(df, params, feature_columns, callbacks, evals,\n",
+    "                      print_feature_importance=True, num_boost_round=100,\n",
+    "                      use_optuna=False):\n",
+    "    cache_file = 'light_model.pkl'\n",
+    "    cache_key = generate_key(params, feature_columns, num_boost_round)\n",
+    "\n",
+    "    # 检查缓存文件是否存在\n",
+    "    if os.path.exists(cache_file):\n",
+    "        try:\n",
+    "            with open(cache_file, 'rb') as f:\n",
+    "                cache_data = pickle.load(f)\n",
+    "            if cache_data.get('key') == cache_key:\n",
+    "                print(\"加载缓存模型...\")\n",
+    "                return cache_data.get('model')\n",
+    "            else:\n",
+    "                print(\"缓存模型的参数与当前参数不匹配，重新训练模型。\")\n",
+    "        except Exception as e:\n",
+    "            print(f\"加载缓存失败: {e}，重新训练模型。\")\n",
+    "    else:\n",
+    "        print(\"未发现缓存模型，开始训练新模型。\")\n",
+    "    # 确保数据按照 date 和 label 排序\n",
+    "    df_sorted = df.sort_values(by=['trade_date', 'label'], ascending=[True, False])  # 按日期升序、标签降序排序\n",
+    "    df_sorted = df_sorted.sort_values(by='trade_date')\n",
+    "    unique_dates = df_sorted['trade_date'].unique()\n",
+    "    val_date_count = int(len(unique_dates) * 0.1)\n",
+    "    val_dates = unique_dates[-val_date_count:]\n",
+    "    val_indices = df_sorted[df_sorted['trade_date'].isin(val_dates)].index\n",
+    "    train_indices = df_sorted[~df_sorted['trade_date'].isin(val_dates)].index\n",
+    "\n",
+    "    # 获取训练集和验证集的样本\n",
+    "    train_df = df_sorted.iloc[train_indices]\n",
+    "    val_df = df_sorted.iloc[val_indices]\n",
+    "\n",
+    "    X_train = train_df[feature_columns]\n",
+    "    y_train = train_df['label']\n",
+    "\n",
+    "    X_val = val_df[feature_columns]\n",
+    "    y_val = val_df['label']\n",
+    "\n",
+    "    train_data = lgb.Dataset(X_train, label=y_train)\n",
+    "    val_data = lgb.Dataset(X_val, label=y_val)\n",
+    "    if use_optuna:\n",
+    "        # study = optuna.create_study(direction='minimize' if classify else 'maximize')\n",
+    "        study = optuna.create_study(direction='minimize')\n",
+    "        study.optimize(lambda trial: objective(trial, X_train, y_train, num_boost_round, params), n_trials=20)\n",
+    "\n",
+    "        print(f\"Best parameters: {study.best_trial.params}\")\n",
+    "        print(f\"Best score: {study.best_trial.value}\")\n",
+    "\n",
+    "        params.update(study.best_trial.params)\n",
+    "    model = lgb.train(\n",
+    "        params, train_data, num_boost_round=num_boost_round,\n",
+    "        valid_sets=[train_data, val_data], valid_names=['train', 'valid'],\n",
+    "        callbacks=callbacks\n",
+    "    )\n",
+    "\n",
+    "    # 打印特征重要性（如果需要）\n",
+    "    if print_feature_importance:\n",
+    "        lgb.plot_metric(evals)\n",
+    "        lgb.plot_tree(model, figsize=(20, 8))\n",
+    "        lgb.plot_importance(model, importance_type='split', max_num_features=20)\n",
+    "        plt.show()\n",
+    "    # with open(cache_file, 'wb') as f:\n",
+    "    #     pickle.dump({'key': cache_key,\n",
+    "    #                  'model': model,\n",
+    "    #                  'feature_columns': feature_columns}, f)\n",
+    "    #     print(\"模型训练完成并已保存缓存。\")\n",
+    "    return model\n",
+    "\n",
+    "\n",
+    "from catboost import CatBoostRegressor\n",
+    "import pandas as pd\n",
+    "\n",
+    "\n",
+    "def train_catboost(df, num_boost_round, params=None):\n",
+    "    \"\"\"\n",
+    "    训练 CatBoost 排序模型\n",
+    "    - df: 包含因子、date、instrument 和 label 的 DataFrame\n",
+    "    - num_boost_round: 训练的轮数\n",
+    "    - print_feature_importance: 是否打印特征重要性\n",
+    "    - plot: 是否绘制特征重要性图\n",
+    "    - split_date: 用于划分训练集和验证集的日期（比如 '2020-01-01'）\n",
+    "\n",
+    "    返回训练好的模型\n",
+    "    \"\"\"\n",
+    "    df_sorted = df.sort_values(by=['date', 'label'], ascending=[True, False])\n",
+    "\n",
+    "    # 提取特征和标签\n",
+    "    feature_columns = [col for col in df.columns if col not in ['date',\n",
+    "                                                                'instrument',\n",
+    "                                                                'label']]\n",
+    "    feature_columns = [col for col in feature_columns if 'future' not in col]\n",
+    "    feature_columns = [col for col in feature_columns if 'score' not in col]\n",
+    "\n",
+    "    df_sorted = df_sorted.sort_values(by='date')\n",
+    "    unique_dates = df_sorted['date'].unique()\n",
+    "    val_date_count = int(len(unique_dates) * 0.1)\n",
+    "    val_dates = unique_dates[-val_date_count:]\n",
+    "    val_indices = df_sorted[df_sorted['date'].isin(val_dates)].index\n",
+    "    train_indices = df_sorted[~df_sorted['date'].isin(val_dates)].index\n",
+    "\n",
+    "    # 获取训练集和验证集的样本\n",
+    "    train_df = df_sorted.iloc[train_indices].sort_values(by=['date', 'label'], ascending=[True, False])\n",
+    "    val_df = df_sorted.iloc[val_indices].sort_values(by=['date', 'label'], ascending=[True, False])\n",
+    "\n",
+    "    X_train = train_df[feature_columns]\n",
+    "    y_train = train_df['label']\n",
+    "\n",
+    "    X_val = val_df[feature_columns]\n",
+    "    y_val = val_df['label']\n",
+    "\n",
+    "    model = CatBoostRegressor(**params, iterations=num_boost_round)\n",
+    "    model.fit(X_train,\n",
+    "              y_train,\n",
+    "              eval_set=(X_val, y_val))\n",
+    "\n",
+    "    return model"
+   ],
+   "id": "8f134d435f71e9e2",
+   "outputs": [],
+   "execution_count": 17
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-11T16:42:08.126033Z",
+     "start_time": "2025-02-11T16:42:08.047878Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "light_params = {\n",
+    "    'objective': 'regression',\n",
+    "    'metric': 'l2',\n",
+    "    'learning_rate': 0.05,\n",
+    "    'is_unbalance': True,\n",
+    "    'num_leaves': 2048,\n",
+    "    'min_data_in_leaf': 16,\n",
+    "    'max_depth': 32,\n",
+    "    'max_bin': 1024,\n",
+    "    'nthread': 2,\n",
+    "    'feature_fraction': 0.7,\n",
+    "    'bagging_fraction': 0.7,\n",
+    "    'bagging_freq': 5,\n",
+    "    'lambda_l1': 80,\n",
+    "    'lambda_l2': 65,\n",
+    "    'verbosity': -1\n",
+    "}"
+   ],
+   "id": "4a4542e1ed6afe7d",
+   "outputs": [],
+   "execution_count": 18
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-11T16:43:30.267422Z",
+     "start_time": "2025-02-11T16:42:08.363141Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "print('train data size: ', len(train_data))\n",
+    "df = train_data\n",
+    "\n",
+    "evals = {}\n",
+    "light_model = train_light_model(train_data, light_params, feature_columns,\n",
+    "                                 [lgb.log_evaluation(period=500),\n",
+    "                                  lgb.callback.record_evaluation(evals),\n",
+    "                                  lgb.early_stopping(50, first_metric_only=True)\n",
+    "                                  ], evals,\n",
+    "                                 num_boost_round=1000, use_optuna=False,\n",
+    "                                 print_feature_importance=False)"
+   ],
+   "id": "beeb098799ecfa6a",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "train data size:  747134\n",
+      "未发现缓存模型，开始训练新模型。\n",
+      "Training until validation scores don't improve for 50 rounds\n",
+      "[500]\ttrain's l2: 0.415226\tvalid's l2: 0.57674\n",
+      "Early stopping, best iteration is:\n",
+      "[477]\ttrain's l2: 0.417575\tvalid's l2: 0.576627\n",
+      "Evaluated only: l2\n"
+     ]
+    }
+   ],
+   "execution_count": 19
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-11T16:43:32.102494Z",
+     "start_time": "2025-02-11T16:43:30.315429Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "test_data['score'] = light_model.predict(test_data[feature_columns])\n",
+    "predictions = test_data.loc[test_data.groupby('trade_date')['score'].idxmax()]"
+   ],
+   "id": "5bb96ca8492e74d",
+   "outputs": [],
+   "execution_count": 20
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-02-11T16:43:32.197091Z",
+     "start_time": "2025-02-11T16:43:32.107477Z"
+    }
+   },
+   "cell_type": "code",
+   "source": "predictions[['trade_date', 'score', 'ts_code']].to_csv('predictions.csv', index=False)",
+   "id": "5d1522a7538db91b",
+   "outputs": [],
+   "execution_count": 21
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/main/train/init.py
+++ b/main/train/init.py
--- a/main/train/best_model.pth
+++ b/main/train/best_model.pth
--- a/main/train/catboost_info/catboost_training.json
+++ b/main/train/catboost_info/catboost_training.json
--- a/main/train/catboost_info/learn/events.out.tfevents
+++ b/main/train/catboost_info/learn/events.out.tfevents
--- a/main/train/catboost_info/learn_error.tsv
+++ b/main/train/catboost_info/learn_error.tsv
--- a/main/train/catboost_info/test/events.out.tfevents
+++ b/main/train/catboost_info/test/events.out.tfevents
--- a/main/train/catboost_info/test1/events.out.tfevents
+++ b/main/train/catboost_info/test1/events.out.tfevents
--- a/main/train/catboost_info/test_error.tsv
+++ b/main/train/catboost_info/test_error.tsv
--- a/main/train/catboost_info/time_left.tsv
+++ b/main/train/catboost_info/time_left.tsv
--- a/main/train/catboost_info/tmp/cat_feature_index.7083db21-2a602535-fc74a793-f24f9ca1.tmp
+++ b/main/train/catboost_info/tmp/cat_feature_index.7083db21-2a602535-fc74a793-f24f9ca1.tmp
--- a/main/train/catboost_info/tmp/cat_feature_index.c8182d04-ba2f7d40-17be2993-7ebf9fac.tmp
+++ b/main/train/catboost_info/tmp/cat_feature_index.c8182d04-ba2f7d40-17be2993-7ebf9fac.tmp
--- a/main/train/catboost_info/tmp/cat_feature_index.d44701fd-1e864ae1-30a8fb3f-c689cc7f.tmp
+++ b/main/train/catboost_info/tmp/cat_feature_index.d44701fd-1e864ae1-30a8fb3f-c689cc7f.tmp
--- a/main/train/catboost_info/tmp/cat_feature_index.ea11afac-3656bfa6-7640c1ea-9df76981.tmp
+++ b/main/train/catboost_info/tmp/cat_feature_index.ea11afac-3656bfa6-7640c1ea-9df76981.tmp
--- a/main/train/code.ipynb
+++ b/main/train/code.ipynb
@@ -0,0 +1,384 @@
+{
+ "cells": [
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "\n",
+    "\n",
+    "def get_technical_factor(df):\n",
+    "    # 按股票和日期排序\n",
+    "    df = df.sort_values(by=['ts_code', 'trade_date'])\n",
+    "    grouped = df.groupby('ts_code', group_keys=False)\n",
+    "\n",
+    "    df['return_skew'] = grouped['pct_chg'].rolling(window=5).skew().reset_index(0, drop=True)\n",
+    "    df['return_kurtosis'] = grouped['pct_chg'].rolling(window=5).kurt().reset_index(0, drop=True)\n",
+    "\n",
+    "    # 因子 1：短期成交量变化率\n",
+    "    df['volume_change_rate'] = (\n",
+    "            grouped['vol'].rolling(window=2).mean() /\n",
+    "            grouped['vol'].rolling(window=5).mean() - 1\n",
+    "    ).reset_index(level=0, drop=True)  # 确保索引对齐\n",
+    "\n",
+    "    # 因子 2：成交量突破信号\n",
+    "    max_volume = grouped['vol'].rolling(window=5).max().reset_index(level=0, drop=True)  # 确保索引对齐\n",
+    "    df['cat_volume_breakout'] = (df['vol'] > max_volume)\n",
+    "\n",
+    "    # 因子 3：换手率均线偏离度\n",
+    "    mean_turnover = grouped['turnover_rate'].rolling(window=3).mean().reset_index(level=0, drop=True)\n",
+    "    std_turnover = grouped['turnover_rate'].rolling(window=3).std().reset_index(level=0, drop=True)\n",
+    "    df['turnover_deviation'] = (df['turnover_rate'] - mean_turnover) / std_turnover\n",
+    "\n",
+    "    # 因子 4：换手率激增信号\n",
+    "    df['cat_turnover_spike'] = (df['turnover_rate'] > mean_turnover + 2 * std_turnover)\n",
+    "\n",
+    "    # 因子 5：量比均值\n",
+    "    df['avg_volume_ratio'] = grouped['volume_ratio'].rolling(window=3).mean().reset_index(level=0, drop=True)\n",
+    "\n",
+    "    # 因子 6：量比突破信号\n",
+    "    max_volume_ratio = grouped['volume_ratio'].rolling(window=5).max().reset_index(level=0, drop=True)\n",
+    "    df['cat_volume_ratio_breakout'] = (df['volume_ratio'] > max_volume_ratio)\n",
+    "\n",
+    "    # 因子 7：成交量与换手率的综合动量因子\n",
+    "    alpha = 0.5\n",
+    "    df['momentum_factor'] = df['volume_change_rate'] + alpha * df['turnover_deviation']\n",
+    "\n",
+    "    # 因子 8：量价共振因子\n",
+    "    df['price_change_rate'] = grouped['close'].pct_change()\n",
+    "    df['resonance_factor'] = df['volume_ratio'] * df['price_change_rate']\n",
+    "\n",
+    "    # 计算 up 和 down\n",
+    "    df['log_close'] = np.log(df['close'])\n",
+    "\n",
+    "    df['vol_spike'] = grouped.apply(\n",
+    "        lambda x: pd.Series(x['vol'].rolling(20).mean(), index=x.index)\n",
+    "    )\n",
+    "    df['cat_vol_spike'] = df['vol'] > 2 * df['vol_spike']\n",
+    "    df['vol_std_5'] = df['vol'].pct_change().rolling(5).std()\n",
+    "\n",
+    "    df['up'] = (df['high'] - df[['close', 'open']].max(axis=1)) / df['close']\n",
+    "    df['down'] = (df[['close', 'open']].min(axis=1) - df['low']) / df['close']\n",
+    "\n",
+    "    # 计算 ATR\n",
+    "    df['atr_14'] = grouped.apply(\n",
+    "        lambda x: pd.Series(talib.ATR(x['high'].values, x['low'].values, x['close'].values, timeperiod=14),\n",
+    "                            index=x.index)\n",
+    "    )\n",
+    "    df['atr_6'] = grouped.apply(\n",
+    "        lambda x: pd.Series(talib.ATR(x['high'].values, x['low'].values, x['close'].values, timeperiod=6),\n",
+    "                            index=x.index)\n",
+    "    )\n",
+    "\n",
+    "    # 计算 OBV 及其均线\n",
+    "    df['obv'] = grouped.apply(\n",
+    "        lambda x: pd.Series(talib.OBV(x['close'].values, x['vol'].values), index=x.index)\n",
+    "    )\n",
+    "    df['maobv_6'] = grouped.apply(\n",
+    "        lambda x: pd.Series(talib.SMA(x['obv'].values, timeperiod=6), index=x.index)\n",
+    "    )\n",
+    "    df['obv-maobv_6'] = df['obv'] - df['maobv_6']\n",
+    "\n",
+    "    # 计算 RSI\n",
+    "    df['rsi_3'] = grouped.apply(\n",
+    "        lambda x: pd.Series(talib.RSI(x['close'].values, timeperiod=3), index=x.index)\n",
+    "    )\n",
+    "    df['rsi_6'] = grouped.apply(\n",
+    "        lambda x: pd.Series(talib.RSI(x['close'].values, timeperiod=6), index=x.index)\n",
+    "    )\n",
+    "    df['rsi_9'] = grouped.apply(\n",
+    "        lambda x: pd.Series(talib.RSI(x['close'].values, timeperiod=9), index=x.index)\n",
+    "    )\n",
+    "\n",
+    "    # 计算 return_10 和 return_20\n",
+    "    df['return_5'] = grouped['close'].apply(lambda x: x / x.shift(5) - 1)\n",
+    "    df['return_10'] = grouped['close'].apply(lambda x: x / x.shift(10) - 1)\n",
+    "    df['return_20'] = grouped['close'].apply(lambda x: x / x.shift(20) - 1)\n",
+    "\n",
+    "    # df['avg_close_5'] = grouped['close'].apply(lambda x: x.rolling(window=5).mean() / x)\n",
+    "\n",
+    "    # 计算标准差指标\n",
+    "    df['std_return_5'] = grouped['close'].apply(lambda x: x.pct_change().rolling(window=5).std())\n",
+    "    df['std_return_15'] = grouped['close'].apply(lambda x: x.pct_change().rolling(window=15).std())\n",
+    "    df['std_return_25'] = grouped['close'].apply(lambda x: x.pct_change().rolling(window=25).std())\n",
+    "    df['std_return_90'] = grouped['close'].apply(lambda x: x.pct_change().rolling(window=90).std())\n",
+    "    df['std_return_90_2'] = grouped['close'].apply(lambda x: x.shift(10).pct_change().rolling(window=90).std())\n",
+    "\n",
+    "    # 计算比值指标\n",
+    "    df['std_return_5 / std_return_90'] = df['std_return_5'] / df['std_return_90']\n",
+    "    df['std_return_5 / std_return_25'] = df['std_return_5'] / df['std_return_25']\n",
+    "\n",
+    "    # 计算标准差差值\n",
+    "    df['std_return_90 - std_return_90_2'] = df['std_return_90'] - df['std_return_90_2']\n",
+    "\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "def get_act_factor(df, cat=True):\n",
+    "    # 按股票和日期排序\n",
+    "    df = df.sort_values(by=['ts_code', 'trade_date'])\n",
+    "    grouped = df.groupby('ts_code', group_keys=False)\n",
+    "    # 计算 EMA 指标\n",
+    "    df['_ema_5'] = grouped['close'].apply(\n",
+    "        lambda x: pd.Series(talib.EMA(x.values, timeperiod=5), index=x.index)\n",
+    "    )\n",
+    "    df['_ema_13'] = grouped['close'].apply(\n",
+    "        lambda x: pd.Series(talib.EMA(x.values, timeperiod=13), index=x.index)\n",
+    "    )\n",
+    "    df['_ema_20'] = grouped['close'].apply(\n",
+    "        lambda x: pd.Series(talib.EMA(x.values, timeperiod=20), index=x.index)\n",
+    "    )\n",
+    "    df['_ema_60'] = grouped['close'].apply(\n",
+    "        lambda x: pd.Series(talib.EMA(x.values, timeperiod=60), index=x.index)\n",
+    "    )\n",
+    "\n",
+    "    # 计算 act_factor1, act_factor2, act_factor3, act_factor4\n",
+    "    df['act_factor1'] = grouped['_ema_5'].apply(\n",
+    "        lambda x: np.arctan((x / x.shift(1) - 1) * 100) * 57.3 / 50\n",
+    "    )\n",
+    "    df['act_factor2'] = grouped['_ema_13'].apply(\n",
+    "        lambda x: np.arctan((x / x.shift(1) - 1) * 100) * 57.3 / 40\n",
+    "    )\n",
+    "    df['act_factor3'] = grouped['_ema_20'].apply(\n",
+    "        lambda x: np.arctan((x / x.shift(1) - 1) * 100) * 57.3 / 21\n",
+    "    )\n",
+    "    df['act_factor4'] = grouped['_ema_60'].apply(\n",
+    "        lambda x: np.arctan((x / x.shift(1) - 1) * 100) * 57.3 / 10\n",
+    "    )\n",
+    "\n",
+    "    if cat:\n",
+    "        df['cat_af1'] = df['act_factor1'] > 0\n",
+    "        df['cat_af2'] = df['act_factor2'] > df['act_factor1']\n",
+    "        df['cat_af3'] = df['act_factor3'] > df['act_factor2']\n",
+    "        df['cat_af4'] = df['act_factor4'] > df['act_factor3']\n",
+    "\n",
+    "    # 计算 act_factor5 和 act_factor6\n",
+    "    df['act_factor5'] = df['act_factor1'] + df['act_factor2'] + df['act_factor3'] + df['act_factor4']\n",
+    "    df['act_factor6'] = (df['act_factor1'] - df['act_factor2']) / np.sqrt(\n",
+    "        df['act_factor1'] ** 2 + df['act_factor2'] ** 2)\n",
+    "\n",
+    "    # 根据 trade_date 截面计算排名\n",
+    "    df['rank_act_factor1'] = df.groupby('trade_date', group_keys=False)['act_factor1'].rank(ascending=False, pct=True)\n",
+    "    df['rank_act_factor2'] = df.groupby('trade_date', group_keys=False)['act_factor2'].rank(ascending=False, pct=True)\n",
+    "    df['rank_act_factor3'] = df.groupby('trade_date', group_keys=False)['act_factor3'].rank(ascending=False, pct=True)\n",
+    "\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "def get_money_flow_factor(df):\n",
+    "    # 计算资金流相关因子（字段名称见 tushare 数据说明）\n",
+    "    df['active_buy_volume_large'] = df['buy_lg_vol'] / df['net_mf_vol']\n",
+    "    df['active_buy_volume_big'] = df['buy_elg_vol'] / df['net_mf_vol']\n",
+    "    df['active_buy_volume_small'] = df['buy_sm_vol'] / df['net_mf_vol']\n",
+    "\n",
+    "    df['buy_lg_vol_minus_sell_lg_vol'] = (df['buy_lg_vol'] - df['sell_lg_vol']) / df['net_mf_vol']\n",
+    "    df['buy_elg_vol_minus_sell_elg_vol'] = (df['buy_elg_vol'] - df['sell_elg_vol']) / df['net_mf_vol']\n",
+    "\n",
+    "    df['log(circ_mv)'] = np.log(df['circ_mv'])\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "def get_alpha_factor(df):\n",
+    "    df = df.sort_values(by=['ts_code', 'trade_date'])\n",
+    "    grouped = df.groupby('ts_code')\n",
+    "\n",
+    "    # alpha_022: 当前 close 与 5 日前 close 差值\n",
+    "    df['alpha_022'] = grouped['close'].transform(lambda x: x - x.shift(5))\n",
+    "\n",
+    "    # alpha_003: (close - open) / (high - low)\n",
+    "    df['alpha_003'] = np.where(df['high'] != df['low'],\n",
+    "                               (df['close'] - df['open']) / (df['high'] - df['low']),\n",
+    "                               0)\n",
+    "\n",
+    "    # alpha_007: 计算过去5日 close 与 vol 的相关性，并按 trade_date 排名\n",
+    "    df['alpha_007'] = grouped.apply(lambda x: x['close'].rolling(5).corr(x['vol'])).reset_index(level=0, drop=True)\n",
+    "    df['alpha_007'] = df.groupby('trade_date', group_keys=False)['alpha_007'].rank(ascending=True, pct=True)\n",
+    "\n",
+    "    # alpha_013: 计算过去5日 close 之和 - 20日 close 之和，并按 trade_date 排名\n",
+    "    df['alpha_013'] = grouped['close'].transform(lambda x: x.rolling(5).sum() - x.rolling(20).sum())\n",
+    "    df['alpha_013'] = df.groupby('trade_date', group_keys=False)['alpha_013'].rank(ascending=True, pct=True)\n",
+    "\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "def get_limit_factor(df):\n",
+    "    # 按股票和日期排序\n",
+    "    df = df.sort_values(by=['ts_code', 'trade_date'])\n",
+    "\n",
+    "    # 分组处理\n",
+    "    grouped = df.groupby('ts_code', group_keys=False)\n",
+    "\n",
+    "    # 1. 今日是否涨停/跌停\n",
+    "    df['cat_up_limit'] = (df['close'] == df['up_limit']).astype(int)  # 是否涨停（1表示涨停，0表示未涨停）\n",
+    "    df['cat_down_limit'] = (df['close'] == df['down_limit']).astype(int)  # 是否跌停（1表示跌停，0表示未跌停）\n",
+    "\n",
+    "    # 2. 最近涨跌停次数（过去20个交易日）\n",
+    "    df['up_limit_count_10d'] = grouped['cat_up_limit'].rolling(window=10, min_periods=1).sum().reset_index(level=0,\n",
+    "                                                                                                           drop=True)\n",
+    "    df['down_limit_count_10d'] = grouped['cat_down_limit'].rolling(window=10, min_periods=1).sum().reset_index(level=0,\n",
+    "                                                                                                               drop=True)\n",
+    "\n",
+    "    # 3. 最近连续涨跌停天数\n",
+    "    def calculate_consecutive_limits(series):\n",
+    "        \"\"\"\n",
+    "        计算连续涨停/跌停天数。\n",
+    "        \"\"\"\n",
+    "        consecutive_up = series * (series.groupby((series != series.shift()).cumsum()).cumcount() + 1)\n",
+    "        consecutive_down = series * (series.groupby((series != series.shift()).cumsum()).cumcount() + 1)\n",
+    "        return consecutive_up, consecutive_down\n",
+    "\n",
+    "    # 连续涨停天数\n",
+    "    df['consecutive_up_limit'] = grouped['cat_up_limit'].apply(\n",
+    "        lambda x: calculate_consecutive_limits(x)[0]\n",
+    "    ).reset_index(level=0, drop=True)\n",
+    "\n",
+    "    # 连续跌停天数\n",
+    "    # df['consecutive_down_limit'] = grouped['cat_down_limit'].apply(\n",
+    "    #     lambda x: calculate_consecutive_limits(x)[1]\n",
+    "    # ).reset_index(level=0, drop=True)\n",
+    "\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "def get_cyp_perf_factor(df):\n",
+    "    # 预处理：按股票代码和时间排序\n",
+    "    df = df.sort_values(by=['ts_code', 'trade_date'])\n",
+    "\n",
+    "    # 按股票代码分组处理\n",
+    "    grouped = df.groupby('ts_code', group_keys=False)\n",
+    "\n",
+    "    df['ctrl_strength'] = (df['cost_85pct'] - df['cost_15pct']) / (df['his_high'] - df['his_low'])\n",
+    "\n",
+    "    df['low_cost_dev'] = (df['close'] - df['cost_5pct']) / (df['cost_50pct'] - df['cost_5pct'])\n",
+    "\n",
+    "    df['asymmetry'] = (df['cost_95pct'] - df['cost_50pct']) / (df['cost_50pct'] - df['cost_5pct'])\n",
+    "\n",
+    "    df['lock_factor'] = df['turnover_rate'] * (\n",
+    "            1 - (df['cost_95pct'] - df['cost_5pct']) / (df['his_high'] - df['his_low']))\n",
+    "\n",
+    "    df['vol_break'] = np.where((df['close'] > df['cost_85pct']) & (df['volume_ratio'] > 2), 1, 0)\n",
+    "\n",
+    "    df['weight_roc5'] = grouped['weight_avg'].apply(lambda x: x.pct_change(5))\n",
+    "\n",
+    "    def rolling_corr(group):\n",
+    "        roc_close = group['close'].pct_change()\n",
+    "        roc_weight = group['weight_avg'].pct_change()\n",
+    "        return roc_close.rolling(10).corr(roc_weight)\n",
+    "\n",
+    "    df['price_cost_divergence'] = grouped.apply(rolling_corr)\n",
+    "\n",
+    "    def calc_atr(group):\n",
+    "        high, low, close = group['high'], group['low'], group['close']\n",
+    "        tr = np.maximum(high - low,\n",
+    "                        np.maximum(abs(high - close.shift()),\n",
+    "                                   abs(low - close.shift())))\n",
+    "        return tr.rolling(14).mean()\n",
+    "\n",
+    "    df['atr_14'] = grouped.apply(calc_atr)\n",
+    "    df['cost_atr_adj'] = (df['cost_95pct'] - df['cost_5pct']) / df['atr_14']\n",
+    "\n",
+    "    # 12. 小盘股筹码集中度\n",
+    "    df['smallcap_concentration'] = (1 / df['circ_mv']) * (df['cost_85pct'] - df['cost_15pct'])\n",
+    "\n",
+    "    # 16. 筹码稳定性指数 (20日波动率)\n",
+    "    df['weight_std20'] = grouped['weight_avg'].apply(lambda x: x.rolling(20).std())\n",
+    "    df['cost_stability'] = df['weight_std20'] / grouped['weight_avg'].transform(lambda x: x.rolling(20).mean())\n",
+    "\n",
+    "    # 17. 成本区间突破标记\n",
+    "    df['high_cost_break_days'] = grouped.apply(lambda g: g['close'].gt(g['cost_95pct']).rolling(5).sum())\n",
+    "\n",
+    "    # 18. 黄金筹码共振 (复合事件)\n",
+    "    df['cat_golden_resonance'] = ((df['close'] > df['weight_avg']) &\n",
+    "                                  (df['volume_ratio'] > 1.5) &\n",
+    "                                  (df['winner_rate'] > 0.7))\n",
+    "\n",
+    "    # 20. 筹码-流动性风险\n",
+    "    df['liquidity_risk'] = (df['cost_95pct'] - df['cost_5pct']) * (\n",
+    "            1 / grouped['vol'].transform(lambda x: x.rolling(10).mean()))\n",
+    "\n",
+    "    df.drop(columns=['weight_std20'], inplace=True, errors='ignore')\n",
+    "\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "def get_mv_factors(df):\n",
+    "    \"\"\"\n",
+    "    计算多个因子并生成最终的综合因子。\n",
+    "\n",
+    "    参数:\n",
+    "        df (pd.DataFrame): 包含 ts_code, trade_date, turnover_rate, pe_ttm, pb, ps, circ_mv, volume_ratio, vol 等列的数据框。\n",
+    "\n",
+    "    返回:\n",
+    "        pd.DataFrame: 包含新增因子和最终综合因子的数据框。\n",
+    "    \"\"\"\n",
+    "    # 按 ts_code 和 trade_date 排序\n",
+    "    df = df.sort_values(by=['ts_code', 'trade_date'])\n",
+    "\n",
+    "    # 按 ts_code 分组\n",
+    "    grouped = df.groupby('ts_code', group_keys=False)\n",
+    "\n",
+    "    # 1. 市值流动比因子\n",
+    "    df['mv_turnover_ratio'] = df['turnover_rate'] / df['circ_mv']\n",
+    "\n",
+    "    # 2. 市值调整成交量因子\n",
+    "    df['mv_adjusted_volume'] = df['vol'] / df['circ_mv']\n",
+    "\n",
+    "    # 3. 市值加权换手率因子\n",
+    "    df['mv_weighted_turnover'] = df['turnover_rate'] * (1 / df['circ_mv'])\n",
+    "\n",
+    "    # 4. 非线性市值成交量因子\n",
+    "    df['nonlinear_mv_volume'] = df['vol'] / df['circ_mv']\n",
+    "\n",
+    "    # 5. 市值量比因子\n",
+    "    df['mv_volume_ratio'] = df['volume_ratio'] / df['circ_mv']\n",
+    "\n",
+    "    # 6. 市值动量因子\n",
+    "    df['mv_momentum'] = df['turnover_rate'] * df['volume_ratio'] / df['circ_mv']\n",
+    "\n",
+    "    # 7. 市值波动率因子\n",
+    "    df['turnover_std'] = grouped['turnover_rate'].rolling(window=20).std().reset_index(level=0, drop=True)\n",
+    "    df['mv_volatility'] = grouped.apply(lambda x: x['turnover_std'] / x['circ_mv']).reset_index(level=0, drop=True)\n",
+    "\n",
+    "    # 8. 市值成长性因子\n",
+    "    df['volume_growth'] = grouped['vol'].pct_change(periods=20).reset_index(level=0, drop=True)\n",
+    "    df['mv_growth'] = grouped.apply(lambda x: x['volume_growth'] / x['circ_mv']).reset_index(level=0, drop=True)\n",
+    "\n",
+    "    # # 标准化因子\n",
+    "    # factor_columns = [\n",
+    "    #     'mv_turnover_ratio', 'mv_adjusted_volume', 'mv_weighted_turnover',\n",
+    "    #     'nonlinear_mv_volume', 'mv_volume_ratio', 'mv_momentum',\n",
+    "    #     'mv_volatility', 'mv_growth'\n",
+    "    # ]\n",
+    "    # scaler = StandardScaler()\n",
+    "    # df[factor_columns] = scaler.fit_transform(df[factor_columns])\n",
+    "    #\n",
+    "    # # 加权合成因子\n",
+    "    # weights = [0.2, 0.15, 0.15, 0.1, 0.1, 0.1, 0.1, 0.1]  # 各因子权重\n",
+    "    # df['final_combined_factor'] = df[factor_columns].dot(weights)\n",
+    "\n",
+    "    return df"
+   ],
+   "id": "505e825945e4b8cf"
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/main/train/predictions.tsv
+++ b/main/train/predictions.tsv
--- a/main/train/predictions_test.tsv
+++ b/main/train/predictions_test.tsv
@@ -0,0 +1,565 @@
+trade_date,score,ts_code
+2022-12-08,0.27431420966080605,600778.SH
+2022-12-09,0.6150539465999814,002995.SZ
+2022-12-12,0.32582588516973016,001219.SZ
+2022-12-13,0.449772253615743,603183.SH
+2022-12-14,0.6769511128551923,001219.SZ
+2022-12-15,0.5930979713048357,001219.SZ
+2022-12-16,0.43211109874606424,603183.SH
+2022-12-19,0.5066203384263489,000892.SZ
+2022-12-20,0.2882618462700443,000691.SZ
+2022-12-21,0.40494380930765467,001219.SZ
+2022-12-22,0.7379517535413331,002762.SZ
+2022-12-23,0.5775898117404806,002566.SZ
+2022-12-26,0.3292293609625978,002719.SZ
+2022-12-27,0.580738686242899,000679.SZ
+2022-12-28,0.5180122078878033,605289.SH
+2022-12-29,0.643325626734685,002103.SZ
+2022-12-30,0.5378362015974298,603209.SH
+2023-01-03,0.36814451293952416,000985.SZ
+2023-01-04,0.4506419163930136,605133.SH
+2023-01-05,-0.08745711573292192,605167.SH
+2023-01-06,0.3958417326952953,605289.SH
+2023-01-09,0.16620697664167175,600778.SH
+2023-01-10,0.25992110313636035,000985.SZ
+2023-01-11,0.5095437644681087,002771.SZ
+2023-01-12,0.4397750442288285,605258.SH
+2023-01-13,0.6102622318789971,003043.SZ
+2023-01-16,0.31204276505440004,002808.SZ
+2023-01-17,0.4972787924897241,002975.SZ
+2023-01-18,0.026553404105244968,002975.SZ
+2023-01-19,0.29558268580158115,603860.SH
+2023-01-20,0.2510349420297213,002849.SZ
+2023-01-30,0.21942028551157527,003039.SZ
+2023-01-31,0.3575069234093295,605081.SH
+2023-02-01,0.4427957172082794,002893.SZ
+2023-02-02,0.6212207641739337,600817.SH
+2023-02-03,0.6202750689624308,002993.SZ
+2023-02-06,0.20845430964837489,000010.SZ
+2023-02-07,0.3667829939094325,600593.SH
+2023-02-08,0.32215761217132205,000820.SZ
+2023-02-09,0.1516026707537734,002021.SZ
+2023-02-10,0.5453734923733047,003016.SZ
+2023-02-13,0.7491169288183265,003037.SZ
+2023-02-14,0.32476512974212635,002828.SZ
+2023-02-15,0.6984519009806621,605128.SH
+2023-02-16,0.2078494458450699,605378.SH
+2023-02-17,0.20087261579967608,000668.SZ
+2023-02-20,0.6724819126277912,002715.SZ
+2023-02-21,0.7209181859866042,605028.SH
+2023-02-22,0.42474501256326314,002900.SZ
+2023-02-23,0.43124729325039124,001236.SZ
+2023-02-24,0.6008854884810912,603102.SH
+2023-02-27,0.5702542696831331,605259.SH
+2023-02-28,0.24318268223778186,002857.SZ
+2023-03-01,0.5388577927345274,603950.SH
+2023-03-02,0.6815724852841429,001236.SZ
+2023-03-03,0.6064483180272962,002098.SZ
+2023-03-06,0.5180664638865109,605178.SH
+2023-03-07,0.7291442722387731,001339.SZ
+2023-03-08,0.3240206100047592,603268.SH
+2023-03-09,0.5619204909224714,603030.SH
+2023-03-10,0.6055962888677536,003027.SZ
+2023-03-13,0.10471064296768949,605296.SH
+2023-03-14,0.5148688231123284,603176.SH
+2023-03-15,0.41425644779572274,605287.SH
+2023-03-16,0.3858205191834723,605303.SH
+2023-03-17,0.38210649704563177,002899.SZ
+2023-03-20,0.20755090351337924,002778.SZ
+2023-03-21,0.2184477420463366,603155.SH
+2023-03-22,0.07842488490864312,002836.SZ
+2023-03-23,0.26327386834675565,002899.SZ
+2023-03-24,0.21281930224537013,605086.SH
+2023-03-27,0.19455767073518335,603729.SH
+2023-03-28,0.18440479662298903,603324.SH
+2023-03-29,0.5577394899737692,002995.SZ
+2023-03-30,0.28537485170922117,603679.SH
+2023-03-31,0.30705863202777134,603615.SH
+2023-04-03,0.43719928717137047,603321.SH
+2023-04-04,0.7949399014212187,603139.SH
+2023-04-06,0.5079656399994698,002715.SZ
+2023-04-07,0.701235747536229,605299.SH
+2023-04-10,0.5142089175897191,001316.SZ
+2023-04-11,0.6097058153625001,002835.SZ
+2023-04-12,0.42821688099056865,003043.SZ
+2023-04-13,0.6086458195457266,605296.SH
+2023-04-14,0.40520429106061684,001316.SZ
+2023-04-17,0.7332476184295339,002862.SZ
+2023-04-18,0.24978196798538302,600768.SH
+2023-04-19,0.5235224445388739,603657.SH
+2023-04-20,0.5073410973887871,000702.SZ
+2023-04-21,0.25827344858110657,002848.SZ
+2023-04-24,0.441433820804789,603685.SH
+2023-04-25,0.45710917638850534,603230.SH
+2023-04-26,0.28288056233393655,002725.SZ
+2023-04-27,0.13616135413238703,002972.SZ
+2023-04-28,0.26068199992734814,603178.SH
+2023-05-04,0.5654404518697154,600107.SH
+2023-05-05,0.26758125911217795,603021.SH
+2023-05-08,0.23558429168600836,002778.SZ
+2023-05-09,0.2707962779077066,603213.SH
+2023-05-10,0.33701828135159717,600778.SH
+2023-05-11,0.5467076847749692,603958.SH
+2023-05-12,0.6956005090125644,603958.SH
+2023-05-15,0.28587355864974423,000679.SZ
+2023-05-16,0.6092507418432053,600796.SH
+2023-05-17,0.4723632871528185,002633.SZ
+2023-05-18,0.44171920333992315,605089.SH
+2023-05-19,0.15743942037394715,001317.SZ
+2023-05-22,0.47338926108587503,603151.SH
+2023-05-23,0.7537765588258426,603721.SH
+2023-05-24,0.35894033254239865,003007.SZ
+2023-05-25,0.6230303733419829,003005.SZ
+2023-05-26,0.5243725213664181,003005.SZ
+2023-05-29,0.5460639613578377,001288.SZ
+2023-05-30,-0.14324964018444036,605151.SH
+2023-05-31,0.1321851497388741,003041.SZ
+2023-06-01,0.488265280236323,603170.SH
+2023-06-02,0.2725329302903607,002875.SZ
+2023-06-05,0.4445215836414108,001316.SZ
+2023-06-06,0.233866225393599,600753.SH
+2023-06-07,0.1512953839015877,603097.SH
+2023-06-08,0.5303933339784708,002780.SZ
+2023-06-09,0.595474766855165,002893.SZ
+2023-06-12,0.7044220035173576,002820.SZ
+2023-06-13,0.46792362066084003,002702.SZ
+2023-06-14,0.5917956764629129,000880.SZ
+2023-06-15,0.3231002542961875,002981.SZ
+2023-06-16,0.3426911954075076,600847.SH
+2023-06-19,-0.02150391139369695,603132.SH
+2023-06-20,0.6704208966606625,002949.SZ
+2023-06-21,0.7415606269689047,002806.SZ
+2023-06-26,0.2389199769543643,600847.SH
+2023-06-27,0.2003853580878301,605169.SH
+2023-06-28,0.46623595119888966,605218.SH
+2023-06-29,0.5589108980336046,603958.SH
+2023-06-30,0.6290675381060588,603286.SH
+2023-07-03,0.30433310431106353,600778.SH
+2023-07-04,0.41651276650561014,002513.SZ
+2023-07-05,0.3473548650199746,603132.SH
+2023-07-06,0.40969750497772167,002591.SZ
+2023-07-07,0.2430362735691786,001231.SZ
+2023-07-10,0.4983750803303532,001267.SZ
+2023-07-11,0.733386176985722,002551.SZ
+2023-07-12,0.7936049551065578,000004.SZ
+2023-07-13,0.40916765144188155,000638.SZ
+2023-07-14,0.21996055437116258,605580.SH
+2023-07-17,0.22071234127281886,605369.SH
+2023-07-18,0.32992331418284704,002802.SZ
+2023-07-19,0.3337178034533016,001222.SZ
+2023-07-20,0.44391528952121656,600234.SH
+2023-07-21,0.5703993630872055,600448.SH
+2023-07-24,0.13840705878806345,002753.SZ
+2023-07-25,0.14047801960398054,002377.SZ
+2023-07-26,0.44664932418756537,003032.SZ
+2023-07-27,0.5452266508240136,603838.SH
+2023-07-28,0.6501197606840003,002397.SZ
+2023-07-31,0.8161881604231447,002397.SZ
+2023-08-01,0.8054314713785248,002397.SZ
+2023-08-02,0.5699471212343736,600119.SH
+2023-08-03,0.3961899673469923,002787.SZ
+2023-08-04,0.747143154431229,600082.SH
+2023-08-07,0.5542807418220157,605162.SH
+2023-08-08,0.4660499541690493,605369.SH
+2023-08-09,0.1846960748819621,605060.SH
+2023-08-10,0.6657079567366654,003020.SZ
+2023-08-11,0.657079341742516,000953.SZ
+2023-08-14,0.42201036027484534,002495.SZ
+2023-08-15,0.4783974585467736,002495.SZ
+2023-08-16,0.5756999736912221,003030.SZ
+2023-08-17,0.7578964013923504,002052.SZ
+2023-08-18,0.004897979672684783,603151.SH
+2023-08-21,0.11103177103375994,605339.SH
+2023-08-22,0.5482563310657345,603021.SH
+2023-08-23,0.7223546665888397,000669.SZ
+2023-08-24,0.750140979575826,600235.SH
+2023-08-25,0.34893747282432125,001318.SZ
+2023-08-28,0.17097259367409923,603329.SH
+2023-08-29,0.290639411928478,001267.SZ
+2023-08-30,0.07811174210597455,603021.SH
+2023-08-31,0.2910924076064356,603838.SH
+2023-09-01,0.6435370857973789,002696.SZ
+2023-09-04,0.04554737468797225,605259.SH
+2023-09-05,0.18999529865866976,001231.SZ
+2023-09-06,0.47343827547785233,002982.SZ
+2023-09-07,0.5685183560937441,001231.SZ
+2023-09-08,0.4682919982486746,003025.SZ
+2023-09-11,0.511414318533627,002535.SZ
+2023-09-12,0.34217637355801866,003020.SZ
+2023-09-13,0.4524964916922371,001269.SZ
+2023-09-14,0.6440683894231696,002856.SZ
+2023-09-15,0.6265975964127983,001269.SZ
+2023-09-18,0.6451154901817582,002857.SZ
+2023-09-19,0.41416994363886955,605151.SH
+2023-09-20,0.4097659657161061,600615.SH
+2023-09-21,0.3308468663518861,603616.SH
+2023-09-22,0.7278118492027132,600608.SH
+2023-09-25,0.3087058065638187,002963.SZ
+2023-09-26,0.44957535540535354,000638.SZ
+2023-09-27,0.6529354742977974,000609.SZ
+2023-09-28,0.5608546287364546,605080.SH
+2023-10-09,0.2684689324603092,000004.SZ
+2023-10-10,0.7514903867910352,001337.SZ
+2023-10-11,0.6833204831817536,000010.SZ
+2023-10-12,0.6849345854259707,001288.SZ
+2023-10-13,0.45213040270359944,001223.SZ
+2023-10-16,0.44367713319364266,001311.SZ
+2023-10-17,0.4358062265247695,001266.SZ
+2023-10-18,0.7896227965981543,002535.SZ
+2023-10-19,0.8091287635227896,000609.SZ
+2023-10-20,0.7497841605463051,000705.SZ
+2023-10-23,0.5162013866354915,600615.SH
+2023-10-24,0.5346626404470584,000554.SZ
+2023-10-25,0.31017050910898813,002836.SZ
+2023-10-26,0.41928742617604475,002798.SZ
+2023-10-27,0.6206681321070086,600791.SH
+2023-10-30,0.2809249638133884,600697.SH
+2023-10-31,0.41380204486883465,605299.SH
+2023-11-01,0.3913649017002345,002952.SZ
+2023-11-02,0.25190883932779223,603272.SH
+2023-11-03,0.25483193696737405,600697.SH
+2023-11-06,0.6843951349633363,603900.SH
+2023-11-07,0.6794539224187386,002005.SZ
+2023-11-08,0.2787637201989255,605337.SH
+2023-11-09,0.43869442213023335,603307.SH
+2023-11-10,0.2901012944614997,002615.SZ
+2023-11-13,0.48928158926409887,003020.SZ
+2023-11-14,0.46232236500040824,603268.SH
+2023-11-15,0.6895995906987776,000010.SZ
+2023-11-16,0.45286066066734804,001298.SZ
+2023-11-17,0.7301876489705413,000010.SZ
+2023-11-20,0.7343688038104235,000004.SZ
+2023-11-21,0.23914531702237296,600361.SH
+2023-11-22,0.6007850824537518,002735.SZ
+2023-11-23,0.6504458118708949,603655.SH
+2023-11-24,0.46094310596129545,002842.SZ
+2023-11-27,0.6273014444813882,603729.SH
+2023-11-28,0.44076850931480105,002188.SZ
+2023-11-29,0.2215431212240851,605598.SH
+2023-11-30,0.47752407474308556,002247.SZ
+2023-12-01,0.5451043441108514,603045.SH
+2023-12-04,0.37633081988016603,603183.SH
+2023-12-05,0.7161351255511346,000929.SZ
+2023-12-06,0.467262040140511,002848.SZ
+2023-12-07,0.39095280707015256,600883.SH
+2023-12-08,0.4253618928722024,001373.SZ
+2023-12-11,0.2828511933586843,002753.SZ
+2023-12-12,0.5178019880022604,600099.SH
+2023-12-13,0.5539680447662736,000702.SZ
+2023-12-14,0.7341038153763678,000609.SZ
+2023-12-15,0.5186263801346903,002495.SZ
+2023-12-18,0.45857609424669377,002835.SZ
+2023-12-19,0.4688848440159626,000004.SZ
+2023-12-20,0.528943977776945,002571.SZ
+2023-12-21,0.22718655814787117,000020.SZ
+2023-12-22,0.5928968748610841,002052.SZ
+2023-12-25,0.3483050510250432,001201.SZ
+2023-12-26,0.32710963694268524,600778.SH
+2023-12-27,0.8023420246227895,603061.SH
+2023-12-28,0.6117050022740004,001223.SZ
+2023-12-29,0.30978967753335185,600791.SH
+2024-01-02,0.7213136293380755,001339.SZ
+2024-01-03,0.27966735262244463,603255.SH
+2024-01-04,0.4113020595403535,002569.SZ
+2024-01-05,0.5862299814842546,002397.SZ
+2024-01-08,0.2751799131522872,002207.SZ
+2024-01-09,0.3983758716199103,600462.SH
+2024-01-10,0.27583418641572577,002381.SZ
+2024-01-11,0.5163369671207251,001308.SZ
+2024-01-12,0.1869457075250673,001333.SZ
+2024-01-15,0.38890973684559677,600778.SH
+2024-01-16,0.628350379976437,002973.SZ
+2024-01-17,0.567643592779436,001259.SZ
+2024-01-18,0.45358303557672786,605003.SH
+2024-01-19,0.17143291074790643,605003.SH
+2024-01-22,0.38033202178552034,600137.SH
+2024-01-23,0.2682996358307573,603307.SH
+2024-01-24,0.09762885957278981,002200.SZ
+2024-01-25,0.2791248936457759,600322.SH
+2024-01-26,0.6698373552722515,001300.SZ
+2024-01-29,0.3453399213641141,001212.SZ
+2024-01-30,0.2018197040316142,000609.SZ
+2024-01-31,0.13839592742162668,002862.SZ
+2024-02-01,0.304731410465905,002397.SZ
+2024-02-02,0.4968354158495188,600791.SH
+2024-02-05,0.6076128945914563,002883.SZ
+2024-02-06,0.27529438199416406,001313.SZ
+2024-02-07,0.7032777773473656,601279.SH
+2024-02-08,0.5124649392628607,001339.SZ
+2024-02-19,0.47259882754683735,600608.SH
+2024-02-20,-0.14666017874686663,605289.SH
+2024-02-21,0.658199830069353,001317.SZ
+2024-02-22,-0.03512517816725689,603895.SH
+2024-02-23,0.49070407040956543,603286.SH
+2024-02-26,0.5874761268473985,603192.SH
+2024-02-27,0.6988377838547319,002856.SZ
+2024-02-28,0.2914956876374956,002513.SZ
+2024-02-29,0.39780267057003393,002760.SZ
+2024-03-01,0.270122240597905,000908.SZ
+2024-03-04,0.6403053454891889,000622.SZ
+2024-03-05,0.7898552633062592,002848.SZ
+2024-03-06,0.3809277227308279,002888.SZ
+2024-03-07,0.853455303753637,002199.SZ
+2024-03-08,0.2596463171676263,603206.SH
+2024-03-11,0.7555631824344116,002888.SZ
+2024-03-12,0.22682284013007994,001298.SZ
+2024-03-13,0.5422854277369126,001298.SZ
+2024-03-14,0.45779191251005164,600444.SH
+2024-03-15,0.6383376746111096,603023.SH
+2024-03-18,0.3201696636960583,001217.SZ
+2024-03-19,0.5996501433774017,001300.SZ
+2024-03-20,0.679915283803281,603273.SH
+2024-03-21,0.33698105904573383,000820.SZ
+2024-03-22,0.5218294653991029,002272.SZ
+2024-03-25,0.6420731490646061,002272.SZ
+2024-03-26,0.2833072190123176,000985.SZ
+2024-03-27,0.47781267803027716,002094.SZ
+2024-03-28,0.3495920493283059,001368.SZ
+2024-03-29,0.6814619283051855,605167.SH
+2024-04-01,0.13340757834476227,002787.SZ
+2024-04-02,0.6485933245807073,002629.SZ
+2024-04-03,0.5005374249650347,600889.SH
+2024-04-08,0.3458211793083884,002998.SZ
+2024-04-09,0.4543106999676991,603268.SH
+2024-04-10,0.3782795993313912,600289.SH
+2024-04-11,0.5682306510607521,002999.SZ
+2024-04-12,0.7563586796123376,001333.SZ
+2024-04-15,0.6820371060072895,605259.SH
+2024-04-16,0.20646370209051096,603177.SH
+2024-04-17,0.397823887165538,001367.SZ
+2024-04-18,0.5284023840181794,001288.SZ
+2024-04-19,0.5531779115633008,603137.SH
+2024-04-22,0.5285690780224108,603270.SH
+2024-04-23,0.4948891344668796,605003.SH
+2024-04-24,0.18745324505195465,002899.SZ
+2024-04-25,0.5561527695473475,605287.SH
+2024-04-26,0.3910800271321991,603813.SH
+2024-04-29,0.22800522777162466,600083.SH
+2024-04-30,0.47575239303431954,603991.SH
+2024-05-06,0.24135147235517762,603991.SH
+2024-05-07,0.6505337985284155,603955.SH
+2024-05-08,-0.008011195615733824,000820.SZ
+2024-05-09,0.3612607813981246,002848.SZ
+2024-05-10,0.5729784871026853,002295.SZ
+2024-05-13,0.42642459942636673,001228.SZ
+2024-05-14,0.616654660412696,603150.SH
+2024-05-15,0.14069160886717613,000995.SZ
+2024-05-16,0.2893164763341796,002893.SZ
+2024-05-17,0.26559138928183623,000593.SZ
+2024-05-20,0.6202830453921107,605318.SH
+2024-05-21,0.5962739970033031,002231.SZ
+2024-05-22,0.24212148187955357,600408.SH
+2024-05-23,0.5866973439001457,000056.SZ
+2024-05-24,0.4950026505575876,002620.SZ
+2024-05-27,0.2716568170957144,001367.SZ
+2024-05-28,0.614584069552624,603062.SH
+2024-05-29,0.5190445551469995,605567.SH
+2024-05-30,0.5650446836882232,001299.SZ
+2024-05-31,0.05127480727491687,002811.SZ
+2024-06-03,0.34948424376750514,603276.SH
+2024-06-04,0.2962107247005024,605365.SH
+2024-06-05,0.4265710639424621,600793.SH
+2024-06-06,0.2352508879607737,603193.SH
+2024-06-07,0.48735816467729426,001306.SZ
+2024-06-11,0.46798640763968125,002569.SZ
+2024-06-12,0.47153387530090407,001215.SZ
+2024-06-13,0.5520110912359264,603280.SH
+2024-06-14,0.19454715507705864,605598.SH
+2024-06-17,0.0963230849661045,002888.SZ
+2024-06-18,-0.07804798333558113,002615.SZ
+2024-06-19,0.610634439198648,002848.SZ
+2024-06-20,0.528548137823535,002888.SZ
+2024-06-21,0.5548914160403448,603097.SH
+2024-06-24,0.39920928373259934,001336.SZ
+2024-06-25,0.3187385965144802,603255.SH
+2024-06-26,0.32449821843842575,002582.SZ
+2024-06-27,0.5820092583390628,001387.SZ
+2024-06-28,0.3390536217231177,600493.SH
+2024-07-01,0.6778726275046811,002875.SZ
+2024-07-02,0.20520989636689582,605255.SH
+2024-07-03,0.5743008555845837,002495.SZ
+2024-07-04,0.9067074210889693,002247.SZ
+2024-07-05,0.7460402549612926,002094.SZ
+2024-07-08,0.6484051297498691,002012.SZ
+2024-07-09,0.4656464752321579,000056.SZ
+2024-07-10,0.5651250099764935,600421.SH
+2024-07-11,0.3919914680384934,000010.SZ
+2024-07-12,0.6138163480575665,002861.SZ
+2024-07-15,0.7779874584252403,605318.SH
+2024-07-16,0.4480483662651257,001217.SZ
+2024-07-17,0.4172203427639832,002094.SZ
+2024-07-18,0.7704811874850669,600561.SH
+2024-07-19,0.47075926007399854,002883.SZ
+2024-07-22,0.7111332900169166,002094.SZ
+2024-07-23,0.2511920883015322,002094.SZ
+2024-07-24,0.4172115642303865,000017.SZ
+2024-07-25,0.3128542596085397,002485.SZ
+2024-07-26,0.7143056769547913,002800.SZ
+2024-07-29,0.7348554188058531,001212.SZ
+2024-07-30,0.6544735763131749,600678.SH
+2024-07-31,0.33170353517328943,000619.SZ
+2024-08-01,0.5622688198785722,603021.SH
+2024-08-02,0.553901968970459,600355.SH
+2024-08-05,0.7203888768112711,002652.SZ
+2024-08-06,0.06845913509306308,002388.SZ
+2024-08-07,0.32607442357451333,002647.SZ
+2024-08-08,0.16487864247139689,600538.SH
+2024-08-09,0.541184606366378,603657.SH
+2024-08-12,0.26199426116748675,605180.SH
+2024-08-13,0.3717346916082516,600281.SH
+2024-08-14,0.6248284781260683,603151.SH
+2024-08-15,0.4701587394310862,001288.SZ
+2024-08-16,0.4980961856837102,600538.SH
+2024-08-19,0.6280757532182177,002622.SZ
+2024-08-20,0.4216952108346084,002094.SZ
+2024-08-21,0.8872335373988718,001317.SZ
+2024-08-22,0.5745604057145399,000056.SZ
+2024-08-23,0.505744305740485,603122.SH
+2024-08-26,0.8419619283484038,002072.SZ
+2024-08-27,0.6810091806857562,002084.SZ
+2024-08-28,0.6875657067302675,002072.SZ
+2024-08-29,0.44229937751556,600692.SH
+2024-08-30,0.5551556278148088,605100.SH
+2024-09-02,0.6992787682915002,000004.SZ
+2024-09-03,0.29976910325882256,001218.SZ
+2024-09-04,0.6768429298090569,002717.SZ
+2024-09-05,0.6206771676543172,002622.SZ
+2024-09-06,0.7271087976454396,002861.SZ
+2024-09-09,0.5051345343079485,002199.SZ
+2024-09-10,0.606941618095118,600533.SH
+2024-09-11,0.44563057902498,000632.SZ
+2024-09-12,0.544127097473236,600889.SH
+2024-09-13,0.8023872948409544,000702.SZ
+2024-09-18,0.27102315041971126,002729.SZ
+2024-09-19,0.8222201895966966,002629.SZ
+2024-09-20,0.7270786760641249,001296.SZ
+2024-09-23,0.6662260924500418,603778.SH
+2024-09-24,0.45201298931080247,600322.SH
+2024-09-25,0.7020112241768041,000573.SZ
+2024-09-26,0.9042164267563502,600322.SH
+2024-09-27,0.6660782992616279,600156.SH
+2024-09-30,0.6863709879538685,600156.SH
+2024-10-08,0.6127578713627717,600791.SH
+2024-10-09,0.42399006878436024,002486.SZ
+2024-10-10,0.19353810507130897,001379.SZ
+2024-10-11,0.32072111647856577,001306.SZ
+2024-10-14,0.3485052899457631,605169.SH
+2024-10-15,0.23682051553226138,603130.SH
+2024-10-16,0.06953864745342996,000695.SZ
+2024-10-17,0.6514983088909282,600599.SH
+2024-10-18,0.4389209760325074,002977.SZ
+2024-10-21,0.7197768607464181,603261.SH
+2024-10-22,0.6635882450692222,002806.SZ
+2024-10-23,0.45400588695990063,603172.SH
+2024-10-24,0.38830884380759845,600107.SH
+2024-10-25,0.8934238346702872,600243.SH
+2024-10-28,0.2797309154889543,600539.SH
+2024-10-29,0.2969987104969114,600768.SH
+2024-10-30,0.40287164661797453,002551.SZ
+2024-10-31,0.3564256188428194,002086.SZ
+2024-11-01,0.6321036634810505,002551.SZ
+2024-11-04,0.5456367520507691,603682.SH
+2024-11-05,0.4395577653608702,600802.SH
+2024-11-06,0.6650198272206568,600243.SH
+2024-11-07,0.43235770657243566,000430.SZ
+2024-11-08,0.5643849119436214,002076.SZ
+2024-11-11,0.7137448249635079,000007.SZ
+2024-11-12,0.5201716819040841,000679.SZ
+2024-11-13,0.6337096887315559,603214.SH
+2024-11-14,0.4979383369165651,603201.SH
+2024-11-15,0.45240021627553617,603201.SH
+2024-11-18,0.22669552732089976,605177.SH
+2024-11-19,0.38573714473203297,603331.SH
+2024-11-20,0.2979426663257885,001378.SZ
+2024-11-21,0.42041152689995204,002980.SZ
+2024-11-22,0.4998877526755884,003003.SZ
+2024-11-25,0.3166022033285757,002381.SZ
+2024-11-26,0.5105217556899093,001300.SZ
+2024-11-27,0.4581772650911419,603183.SH
+2024-11-28,0.2582863137914294,600302.SH
+2024-11-29,0.6516611156931627,600202.SH
+2024-12-02,0.6264742314126309,603021.SH
+2024-12-03,0.17876631396560913,605287.SH
+2024-12-04,0.571815529773696,603637.SH
+2024-12-05,0.5009836942607793,002615.SZ
+2024-12-06,0.5935816089479097,002615.SZ
+2024-12-09,0.2732740647491455,000880.SZ
+2024-12-10,0.8403246240655503,002211.SZ
+2024-12-11,0.6063814254598854,000952.SZ
+2024-12-12,0.65530104057359,002213.SZ
+2024-12-13,0.4326422618589393,002193.SZ
+2024-12-16,0.6951945851895344,002582.SZ
+2024-12-17,0.4367668554492269,002846.SZ
+2024-12-18,0.6034186500026795,600844.SH
+2024-12-19,0.6478095239545749,000695.SZ
+2024-12-20,0.12678291780050743,002687.SZ
+2024-12-23,0.0005977226174160126,603176.SH
+2024-12-24,0.6931664421535906,000790.SZ
+2024-12-25,0.9280754228431425,000004.SZ
+2024-12-26,0.5025149234980124,603082.SH
+2024-12-27,0.5619791111821815,603325.SH
+2024-12-30,0.5970375646444621,603291.SH
+2024-12-31,0.6210268787938896,603798.SH
+2025-01-02,0.5150847228562943,603255.SH
+2025-01-03,0.3936455223882481,001238.SZ
+2025-01-06,0.42512989288467096,001256.SZ
+2025-01-07,0.20046282642128765,002763.SZ
+2025-01-08,0.3594789763099251,603137.SH
+2025-01-09,0.17148562512671917,603150.SH
+2025-01-10,0.8141461510449396,603909.SH
+2025-01-13,0.7278259334554208,002365.SZ
+2025-01-14,0.5332116728293693,000955.SZ
+2025-01-15,0.26395540555061114,001223.SZ
+2025-01-16,0.20321325349056088,603637.SH
+2025-01-17,0.5155710238940482,000586.SZ
+2025-01-20,0.7692783436325927,002072.SZ
+2025-01-21,0.3743531875901297,601798.SH
+2025-01-22,0.5609509357408301,603059.SH
+2025-01-23,0.20207700522454125,001378.SZ
+2025-01-24,0.3138610824997807,002760.SZ
+2025-01-27,0.3887191549307029,603192.SH
+2025-02-05,0.5131470103792286,600599.SH
+2025-02-06,0.42133133196663924,603255.SH
+2025-02-07,0.2981033776962701,605003.SH
+2025-02-10,0.78495727017451,600225.SH
+2025-02-11,0.9081192688195034,600225.SH
+2025-02-12,0.8027995368952746,600225.SH
+2025-02-13,0.6500142590724168,603789.SH
+2025-02-14,0.7725392087365835,002058.SZ
+2025-02-17,0.49255867173448825,600228.SH
+2025-02-18,0.5576519868864848,600243.SH
+2025-02-19,0.2592919075461544,002496.SZ
+2025-02-20,0.421298468924212,002848.SZ
+2025-02-21,0.29697775540100313,001316.SZ
+2025-02-24,0.7638868267339545,603211.SH
+2025-02-25,0.5526784340520452,003028.SZ
+2025-02-26,0.5065861650146529,603716.SH
+2025-02-27,0.7407038446632749,603211.SH
+2025-02-28,0.4668521688585335,003043.SZ
+2025-03-03,0.2680721667617631,600753.SH
+2025-03-04,0.34955816615272756,002977.SZ
+2025-03-05,0.7482244194415444,603057.SH
+2025-03-06,0.6305254140888802,603280.SH
+2025-03-07,0.19351037427994797,600241.SH
+2025-03-10,0.38766031239447357,603325.SH
+2025-03-11,0.4303913500153944,002872.SZ
+2025-03-12,0.18459289202598228,002898.SZ
+2025-03-13,0.8576596052682522,001319.SZ
+2025-03-14,0.6230146680130096,000757.SZ
+2025-03-17,0.7328875682123387,603843.SH
+2025-03-18,0.47499472013228067,000669.SZ
+2025-03-19,0.3268919044509167,002305.SZ
+2025-03-20,0.4569272632462979,600356.SH
+2025-03-21,0.07591851931376978,000586.SZ
+2025-03-24,0.49072061900133407,603335.SH
+2025-03-25,0.4306632631450777,603381.SH
+2025-03-26,0.46006642069249487,001299.SZ
+2025-03-27,0.41362062710862235,002394.SZ
+2025-03-28,0.564157006795436,001332.SZ
+2025-03-31,0.5981134959932276,001238.SZ
+2025-04-01,0.6363729449100586,603102.SH
+2025-04-02,0.2865246522723796,002872.SZ
+2025-04-03,0.29802040504689753,000633.SZ
+2025-04-07,0.554762051627518,002872.SZ
+2025-04-08,0.34687738661031947,603682.SH
+2025-04-09,0.13896921728258024,001331.SZ
--- a/main/train/predictions_train.tsv
+++ b/main/train/predictions_train.tsv
--- a/main/train/test.py
+++ b/main/train/test.py
@@ -0,0 +1,14 @@
+from operator import index
+
+import tushare as ts
+import pandas as pd
+import time
+
+ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')
+pro = ts.pro_api()
+
+df = pro.index_member_all(ts_code='603579.SH')
+print(df)
+
+df = pro.sw_daily(trade_date='20250305', fields='ts_code,name,open,close,vol,pe,pb')
+print(df[df['ts_code'] == '851171.SI'])
--- a/main/train/test1.tsv
+++ b/main/train/test1.tsv
@@ -0,0 +1,565 @@
+trade_date,score,ts_code
+2022-12-08,1.2708337806641494,603816.SH
+2022-12-09,1.4207120834806832,603567.SH
+2022-12-12,1.0198883623815167,002305.SZ
+2022-12-13,1.7022732146012465,002910.SZ
+2022-12-14,0.4115956442621504,600493.SH
+2022-12-15,1.2308250306434583,601858.SH
+2022-12-16,0.5214964254452716,601677.SH
+2022-12-19,1.5635207796349075,000721.SZ
+2022-12-20,0.9950031675966513,002314.SZ
+2022-12-21,1.867139344678808,603238.SH
+2022-12-22,0.11397346668733664,002095.SZ
+2022-12-23,0.7020503260530933,600706.SH
+2022-12-26,1.064077090528082,002707.SZ
+2022-12-27,0.5487905008977592,000978.SZ
+2022-12-28,0.9795388321537417,600225.SH
+2022-12-29,0.6402559056339422,600056.SH
+2022-12-30,0.9466308655445547,002357.SZ
+2023-01-03,0.6849950582517478,002031.SZ
+2023-01-04,0.8958700703884613,003010.SZ
+2023-01-05,0.9901544872773684,002357.SZ
+2023-01-06,0.7029762528454185,000929.SZ
+2023-01-09,1.2070723183050875,002279.SZ
+2023-01-10,0.28632510343867906,002933.SZ
+2023-01-11,0.7059503351778397,002576.SZ
+2023-01-12,1.700028635026902,002576.SZ
+2023-01-13,1.4228228373146723,002043.SZ
+2023-01-16,0.24930703006686591,600958.SH
+2023-01-17,1.0616927130654037,603882.SH
+2023-01-18,0.6166412038694548,000739.SZ
+2023-01-19,0.5967697229641841,603806.SH
+2023-01-20,0.8290879039003781,600705.SH
+2023-01-30,1.0826864888349266,000972.SZ
+2023-01-31,1.7476350470413293,605133.SH
+2023-02-01,1.0698795326344217,002297.SZ
+2023-02-02,1.168956058233029,002762.SZ
+2023-02-03,0.6068761459217956,002474.SZ
+2023-02-06,1.3603267774479497,002855.SZ
+2023-02-07,1.3722562072579707,002167.SZ
+2023-02-08,1.444800461687164,002117.SZ
+2023-02-09,0.6478721098934555,600501.SH
+2023-02-10,1.7330712792214502,002122.SZ
+2023-02-13,1.0751336841418047,603711.SH
+2023-02-14,0.858121706097957,002354.SZ
+2023-02-15,1.0628443879922715,600817.SH
+2023-02-16,1.0941227999628862,002660.SZ
+2023-02-17,0.5452970336991657,002792.SZ
+2023-02-20,0.7452925786277558,600817.SH
+2023-02-21,1.2263444506836183,601360.SH
+2023-02-22,0.8498400500947443,002882.SZ
+2023-02-23,1.3778772059701936,002942.SZ
+2023-02-24,0.8116211264751758,002942.SZ
+2023-02-27,1.369491951000112,600118.SH
+2023-02-28,1.7437044662527195,600325.SH
+2023-03-01,0.6172338223208104,002350.SZ
+2023-03-02,0.9753294078191806,002261.SZ
+2023-03-03,0.9460072368251595,605389.SH
+2023-03-06,0.7661730237898733,000977.SZ
+2023-03-07,1.5306012129925908,601728.SH
+2023-03-08,1.7347243229852956,603042.SH
+2023-03-09,1.7785688963407722,601698.SH
+2023-03-10,1.794639030708944,002808.SZ
+2023-03-13,2.2765957078169055,601728.SH
+2023-03-14,1.5770232731123273,002236.SZ
+2023-03-15,1.9886076279595977,601698.SH
+2023-03-16,1.7538871949426555,601138.SH
+2023-03-17,1.2850616649676168,000506.SZ
+2023-03-20,0.6617355633181617,601117.SH
+2023-03-21,1.2834165832572753,600633.SH
+2023-03-22,1.286625601927238,002803.SZ
+2023-03-23,1.2442366849499193,601138.SH
+2023-03-24,1.7385288121049993,601138.SH
+2023-03-27,0.5271836596864287,600633.SH
+2023-03-28,0.9233261884964775,000890.SZ
+2023-03-29,1.0388156797328032,600633.SH
+2023-03-30,0.880222808466912,600975.SH
+2023-03-31,1.7723670660012394,002153.SZ
+2023-04-03,1.4447814388081068,600633.SH
+2023-04-04,0.9805981968002965,000988.SZ
+2023-04-06,1.2735568908129031,002558.SZ
+2023-04-07,0.5977729773368881,002222.SZ
+2023-04-10,0.36120306701232185,000032.SZ
+2023-04-11,2.0134197062348904,603258.SH
+2023-04-12,0.6807091195842823,603888.SH
+2023-04-13,1.5510435282176684,600415.SH
+2023-04-14,1.6158618609191548,603258.SH
+2023-04-17,0.5935406330588169,603918.SH
+2023-04-18,1.438798944751228,603258.SH
+2023-04-19,0.4851330354034662,002975.SZ
+2023-04-20,0.17004215747506052,600415.SH
+2023-04-21,1.3733089702528274,601595.SH
+2023-04-24,2.3249160418531685,603258.SH
+2023-04-25,2.4887955829326054,601858.SH
+2023-04-26,1.9420082198135482,601019.SH
+2023-04-27,2.3040109178691113,601811.SH
+2023-04-28,1.0754625899722956,601811.SH
+2023-05-04,1.6688121146522907,601336.SH
+2023-05-05,1.1037723664352612,601989.SH
+2023-05-08,1.6994199603704685,601288.SH
+2023-05-09,1.2636377329259567,002354.SZ
+2023-05-10,1.2628967915122853,601949.SH
+2023-05-11,0.8020741700988911,603083.SH
+2023-05-12,0.22312816960298115,600629.SH
+2023-05-15,0.7341052846591558,002229.SZ
+2023-05-16,0.6350705971737554,603268.SH
+2023-05-17,1.0396627856239795,603958.SH
+2023-05-18,1.4091099521269763,601858.SH
+2023-05-19,0.6341161328902458,600239.SH
+2023-05-22,0.4664478043150085,603798.SH
+2023-05-23,0.3950180406443093,002864.SZ
+2023-05-24,0.9532057286987137,002366.SZ
+2023-05-25,0.661525047825837,605011.SH
+2023-05-26,0.873646794491419,600088.SH
+2023-05-29,1.0161343809163572,600636.SH
+2023-05-30,1.8522924730896868,603918.SH
+2023-05-31,0.14065827549083917,002315.SZ
+2023-06-01,1.0647192154325815,002229.SZ
+2023-06-02,1.0897714474656055,605028.SH
+2023-06-05,0.818149194152834,002995.SZ
+2023-06-06,1.1559913886165554,002229.SZ
+2023-06-07,0.9730919792856488,603933.SH
+2023-06-08,1.1740853193005574,003010.SZ
+2023-06-09,0.7055820145524615,002395.SZ
+2023-06-12,0.8768369889703852,000977.SZ
+2023-06-13,0.5333934871843615,600839.SH
+2023-06-14,1.1828705214010444,002229.SZ
+2023-06-15,1.9054644381740913,600602.SH
+2023-06-16,1.6671793256997451,002920.SZ
+2023-06-19,0.4424093682681172,002194.SZ
+2023-06-20,0.7166566485622967,600100.SH
+2023-06-21,1.185368125310508,600592.SH
+2023-06-26,0.49477817284107434,605016.SH
+2023-06-27,0.6467017315354233,002865.SZ
+2023-06-28,1.4462997720570885,600310.SH
+2023-06-29,0.9079748876905797,000809.SZ
+2023-06-30,1.1417365323043627,002920.SZ
+2023-07-03,1.0292231512798002,600105.SH
+2023-07-04,0.9764499369108617,002355.SZ
+2023-07-05,1.1950967963313073,603489.SH
+2023-07-06,0.8067305519266362,603809.SH
+2023-07-07,-0.11113958569144997,603786.SH
+2023-07-10,1.4365223354022805,002835.SZ
+2023-07-11,0.9055036034028278,603767.SH
+2023-07-12,0.662603535490377,002265.SZ
+2023-07-13,0.6580169744401991,605005.SH
+2023-07-14,0.7806145283148259,002284.SZ
+2023-07-17,0.8928179964563782,002616.SZ
+2023-07-18,1.0102033286200784,603709.SH
+2023-07-19,0.28926601683884473,603429.SH
+2023-07-20,1.0778442223423874,603709.SH
+2023-07-21,0.49418969039024113,000068.SZ
+2023-07-24,1.152792861172028,002172.SZ
+2023-07-25,-0.48175589465657037,000656.SZ
+2023-07-26,0.9348834119551785,000608.SZ
+2023-07-27,1.5360069738010982,600231.SH
+2023-07-28,1.5514157372959714,600782.SH
+2023-07-31,0.31996521869248884,000750.SZ
+2023-08-01,0.5474246928163892,601860.SH
+2023-08-02,1.277218052964389,600162.SH
+2023-08-03,0.784739789704388,002400.SZ
+2023-08-04,1.5311245596423297,002310.SZ
+2023-08-07,1.3664999923727883,002377.SZ
+2023-08-08,0.6775719496805408,000004.SZ
+2023-08-09,1.0473202246778897,003005.SZ
+2023-08-10,0.7095500932241833,600610.SH
+2023-08-11,0.9061227536071346,000656.SZ
+2023-08-14,1.712926297349448,600272.SH
+2023-08-15,0.7173624078005385,000656.SZ
+2023-08-16,1.0612842867590515,600155.SH
+2023-08-17,-0.11242980180119416,002898.SZ
+2023-08-18,0.7958378602153298,002221.SZ
+2023-08-21,1.2818341414622505,000006.SZ
+2023-08-22,1.4925485997390293,000006.SZ
+2023-08-23,1.0205726179128791,002178.SZ
+2023-08-24,0.5123818439671383,603000.SH
+2023-08-25,1.1561261254770698,600684.SH
+2023-08-28,1.0243035845865234,600602.SH
+2023-08-29,0.48711445602297804,002279.SZ
+2023-08-30,1.5892134655969756,002279.SZ
+2023-08-31,0.8832038176770832,002235.SZ
+2023-09-01,0.7778267621876187,002527.SZ
+2023-09-04,0.9715794190221472,000766.SZ
+2023-09-05,0.6363690681522609,000701.SZ
+2023-09-06,1.1626954331771984,603078.SH
+2023-09-07,0.8950438021027404,601001.SH
+2023-09-08,0.4907685411177441,603722.SH
+2023-09-11,1.4880263364860575,000851.SZ
+2023-09-12,0.7192761393042792,603329.SH
+2023-09-13,0.7091837012523252,000158.SZ
+2023-09-14,0.7548676419597591,601699.SH
+2023-09-15,0.9847401608369946,600257.SH
+2023-09-18,1.7523740102808545,600814.SH
+2023-09-19,0.9311114338734422,000536.SZ
+2023-09-20,1.5094699709720083,002902.SZ
+2023-09-21,1.10032325281936,603890.SH
+2023-09-22,0.5455145772639567,001268.SZ
+2023-09-25,1.5392432178391338,600895.SH
+2023-09-26,1.139513204589203,002642.SZ
+2023-09-27,1.0155696225867201,002827.SZ
+2023-09-28,0.7631131516304462,001298.SZ
+2023-10-09,1.3121583124085132,002513.SZ
+2023-10-10,1.192726847273738,600895.SH
+2023-10-11,1.378300878017007,600895.SH
+2023-10-12,1.3374848728915072,000010.SZ
+2023-10-13,1.247352917522287,000536.SZ
+2023-10-16,1.0205919288199263,002456.SZ
+2023-10-17,1.8124967402525871,601127.SH
+2023-10-18,1.6545503123646252,002456.SZ
+2023-10-19,1.403565591711023,603009.SH
+2023-10-20,1.479674671497312,002176.SZ
+2023-10-23,1.1364881895480998,002337.SZ
+2023-10-24,1.603597993080332,002122.SZ
+2023-10-25,1.2135837368610372,603123.SH
+2023-10-26,1.0507834567170797,002671.SZ
+2023-10-27,1.2313059730365872,603266.SH
+2023-10-30,1.4721372697796935,603353.SH
+2023-10-31,0.8329626816487755,600599.SH
+2023-11-01,0.7167096735509632,603186.SH
+2023-11-02,1.2802635893255563,601633.SH
+2023-11-03,1.2590421473918265,002640.SZ
+2023-11-06,0.7531355226612281,003040.SZ
+2023-11-07,0.6577034595225779,603768.SH
+2023-11-08,1.8511623446368441,002889.SZ
+2023-11-09,1.4717991539794943,603266.SH
+2023-11-10,0.46890167407485095,002584.SZ
+2023-11-13,1.1394805956388967,603220.SH
+2023-11-14,1.0217982309018654,603883.SH
+2023-11-15,0.6661740992600602,600106.SH
+2023-11-16,1.3719183134079516,002584.SZ
+2023-11-17,1.5131093025579179,603266.SH
+2023-11-20,1.6229596862404188,002103.SZ
+2023-11-21,0.9595377845746105,002291.SZ
+2023-11-22,1.1541727944363123,600506.SH
+2023-11-23,0.6172428975445288,000029.SZ
+2023-11-24,0.3581002329377547,000710.SZ
+2023-11-27,1.0865084227106108,002691.SZ
+2023-11-28,0.36551005528336356,603721.SH
+2023-11-29,0.6325689777455243,600780.SH
+2023-11-30,0.9673540111120547,600148.SH
+2023-12-01,1.6031005266953247,002238.SZ
+2023-12-04,0.3139675086881397,601156.SH
+2023-12-05,0.4471787298728605,002238.SZ
+2023-12-06,1.519722809770461,600128.SH
+2023-12-07,0.9306959636346697,601599.SH
+2023-12-08,1.11955592447384,600678.SH
+2023-12-11,0.920549654717692,600981.SH
+2023-12-12,1.1192972102468268,603999.SH
+2023-12-13,1.059999047897406,601595.SH
+2023-12-14,1.3135891954923238,605577.SH
+2023-12-15,1.0121314183460337,603358.SH
+2023-12-18,1.2923561000198138,601595.SH
+2023-12-19,0.4727877781694898,002856.SZ
+2023-12-20,-0.01336745195249621,002937.SZ
+2023-12-21,0.9432845860894691,603825.SH
+2023-12-22,1.5927408932377043,603167.SH
+2023-12-25,1.1489781197830256,001314.SZ
+2023-12-26,2.075405615830143,605117.SH
+2023-12-27,1.1634401553385165,002952.SZ
+2023-12-28,1.9276471606617283,600610.SH
+2023-12-29,1.1664703290672742,600621.SH
+2024-01-02,1.4036288516782467,603396.SH
+2024-01-03,0.6064916104024441,603052.SH
+2024-01-04,0.27194078032727403,000810.SZ
+2024-01-05,0.7248236406349026,002884.SZ
+2024-01-08,1.917000409535365,600983.SH
+2024-01-09,1.3670026723187405,601225.SH
+2024-01-10,0.6428111801429847,002419.SZ
+2024-01-11,0.8603468507895841,603097.SH
+2024-01-12,0.8616154280062541,001269.SZ
+2024-01-15,1.6162131927011785,002140.SZ
+2024-01-16,1.6278064283944136,605366.SH
+2024-01-17,1.2234956634332685,603556.SH
+2024-01-18,0.2930411130350266,601865.SH
+2024-01-19,0.5463003069500746,002033.SZ
+2024-01-22,1.4568410090998594,600438.SH
+2024-01-23,0.4796543547584619,002056.SZ
+2024-01-24,1.1933657317735877,000921.SZ
+2024-01-25,1.1196073236619992,000070.SZ
+2024-01-26,0.22116166302548493,601225.SH
+2024-01-29,1.0247482544629951,000717.SZ
+2024-01-30,1.0239528607343813,000651.SZ
+2024-01-31,1.371059822546121,600188.SH
+2024-02-01,0.9952706097104108,601288.SH
+2024-02-02,1.1495734266360917,601658.SH
+2024-02-05,0.29949274220153294,600018.SH
+2024-02-06,1.299845239071017,600004.SH
+2024-02-07,1.000836675958177,600350.SH
+2024-02-08,0.6401652900537433,600131.SH
+2024-02-19,0.08475967168317824,002749.SZ
+2024-02-20,0.5804821425310229,002085.SZ
+2024-02-21,0.5362126107341831,600053.SH
+2024-02-22,1.206204153162725,600639.SH
+2024-02-23,1.6848572441872742,603958.SH
+2024-02-26,-0.22984388407558198,000017.SZ
+2024-02-27,-0.02096017812243845,603011.SH
+2024-02-28,1.1590591855669556,603933.SH
+2024-02-29,0.1819266114797644,002085.SZ
+2024-03-01,0.6727454630028324,600571.SH
+2024-03-04,1.113038537485005,601058.SH
+2024-03-05,0.8578265704409984,600160.SH
+2024-03-06,1.0085170866918751,600331.SH
+2024-03-07,1.0605223005794961,603220.SH
+2024-03-08,0.9054891550652612,601001.SH
+2024-03-11,1.3538575989659345,002085.SZ
+2024-03-12,0.9889456011580702,603580.SH
+2024-03-13,1.212835645663162,603499.SH
+2024-03-14,0.648581745020845,603773.SH
+2024-03-15,1.2772879612108654,002902.SZ
+2024-03-18,0.8179927345023094,002591.SZ
+2024-03-19,1.6214555790374034,000908.SZ
+2024-03-20,2.2115174455952333,603580.SH
+2024-03-21,1.545209963736103,603499.SH
+2024-03-22,0.6838446792203144,605180.SH
+2024-03-25,0.31578511420724414,603985.SH
+2024-03-26,1.781904626165001,605198.SH
+2024-03-27,-0.16938824239950148,603006.SH
+2024-03-28,0.9649663433118687,002739.SZ
+2024-03-29,0.3320943922901912,603499.SH
+2024-04-01,0.9952388185722638,603878.SH
+2024-04-02,0.9364155422135148,000603.SZ
+2024-04-03,0.7982619216871609,603878.SH
+2024-04-08,1.358471943036704,605198.SH
+2024-04-09,0.18053926667744466,002574.SZ
+2024-04-10,1.3153422390391303,000528.SZ
+2024-04-11,1.3657881440950266,002716.SZ
+2024-04-12,1.5676274828749224,000975.SZ
+2024-04-15,-0.13567914644119722,000157.SZ
+2024-04-16,0.93425932274213,600690.SH
+2024-04-17,1.327024106545604,001696.SZ
+2024-04-18,1.8296222086183656,600210.SH
+2024-04-19,0.578844593874061,002716.SZ
+2024-04-22,1.0676680912492613,603050.SH
+2024-04-23,1.5669359888338785,002167.SZ
+2024-04-24,0.7255550236691839,603017.SH
+2024-04-25,1.997033122105935,000737.SZ
+2024-04-26,1.1631716891035941,001696.SZ
+2024-04-29,0.5416930775042976,002715.SZ
+2024-04-30,0.4865144780512015,002167.SZ
+2024-05-06,1.6985755115133785,002125.SZ
+2024-05-07,1.5949401523876496,600789.SH
+2024-05-08,0.978355142201031,605177.SH
+2024-05-09,0.1605421067543249,603018.SH
+2024-05-10,0.6676316772066117,002264.SZ
+2024-05-13,0.733933392442504,603222.SH
+2024-05-14,0.5547173325807716,603025.SH
+2024-05-15,1.3155422143795996,000913.SZ
+2024-05-16,1.4221204991742524,001696.SZ
+2024-05-17,1.1562985723597323,600383.SH
+2024-05-20,0.4695922603130449,002264.SZ
+2024-05-21,1.8689264604736717,000002.SZ
+2024-05-22,1.3121218875647056,600843.SH
+2024-05-23,0.7664908704814034,002225.SZ
+2024-05-24,0.7589930057344444,600530.SH
+2024-05-27,1.19159766228358,601919.SH
+2024-05-28,1.0019105122452157,600726.SH
+2024-05-29,0.4881622770453212,601019.SH
+2024-05-30,1.4287668616222775,002938.SZ
+2024-05-31,1.0528834774374836,601019.SH
+2024-06-03,0.7762962067944593,603530.SH
+2024-06-04,1.7857240618535462,601117.SH
+2024-06-05,1.3040177518621878,603530.SH
+2024-06-06,1.0260169367235976,600900.SH
+2024-06-07,0.8588448524419576,603172.SH
+2024-06-11,1.1812779982242239,600584.SH
+2024-06-12,1.3882505145638304,003026.SZ
+2024-06-13,1.4302228321179788,001339.SZ
+2024-06-14,0.9306175962728962,002889.SZ
+2024-06-17,0.6869594774433183,600530.SH
+2024-06-18,1.1904711061862112,001298.SZ
+2024-06-19,1.697703035579816,605258.SH
+2024-06-20,0.8952978126779231,600733.SH
+2024-06-21,1.6755370442001838,001298.SZ
+2024-06-24,0.7658870375300119,002252.SZ
+2024-06-25,0.5860602429129975,002485.SZ
+2024-06-26,0.3448818899550934,003031.SZ
+2024-06-27,0.5265851943756428,000793.SZ
+2024-06-28,1.9979062957915383,603838.SH
+2024-07-01,1.0825164001234038,002485.SZ
+2024-07-02,0.9075039211419761,601985.SH
+2024-07-03,1.409183048681464,600025.SH
+2024-07-04,0.904029442040085,600025.SH
+2024-07-05,1.0426545217304006,001286.SZ
+2024-07-08,1.698642218338944,600584.SH
+2024-07-09,0.6470181687793882,002463.SZ
+2024-07-10,2.06122700428312,002947.SZ
+2024-07-11,0.7410903122804028,600601.SH
+2024-07-12,1.340083218079972,600686.SH
+2024-07-15,1.5618937384484992,600733.SH
+2024-07-16,1.758580945058497,600733.SH
+2024-07-17,1.5157121004090266,000625.SZ
+2024-07-18,0.3083377315759717,603898.SH
+2024-07-19,0.8944208661047769,000078.SZ
+2024-07-22,1.0429143541686636,002829.SZ
+2024-07-23,0.8234894660266799,002005.SZ
+2024-07-24,1.0749929990189133,600626.SH
+2024-07-25,1.880220214501819,600611.SH
+2024-07-26,1.8509059338376548,600650.SH
+2024-07-29,2.076506303850798,600817.SH
+2024-07-30,2.1084511316806602,000712.SZ
+2024-07-31,1.638115197432452,000421.SZ
+2024-08-01,1.040980502901104,600811.SH
+2024-08-02,1.7885572982299731,001696.SZ
+2024-08-05,0.9537751859233105,001379.SZ
+2024-08-06,0.3092017392193045,002461.SZ
+2024-08-07,1.466643911628964,002488.SZ
+2024-08-08,0.7882843739164674,603863.SH
+2024-08-09,0.23494689741035102,603488.SH
+2024-08-12,2.131668395298663,002488.SZ
+2024-08-13,2.0937880721661606,603978.SH
+2024-08-14,1.131943220068558,002232.SZ
+2024-08-15,0.5873737710201326,002611.SZ
+2024-08-16,1.199835048451392,000062.SZ
+2024-08-19,1.1805751411608383,600811.SH
+2024-08-20,1.5503271129180245,600661.SH
+2024-08-21,0.578554281440386,000880.SZ
+2024-08-22,1.2853820450612699,600105.SH
+2024-08-23,0.6423692277445572,002403.SZ
+2024-08-26,0.9239367963781929,002986.SZ
+2024-08-27,0.46409556994532736,605183.SH
+2024-08-28,1.0631407155165504,002647.SZ
+2024-08-29,1.1245501698512903,603639.SH
+2024-08-30,0.6361599235773499,002246.SZ
+2024-09-02,1.2492364629991852,002072.SZ
+2024-09-03,1.1926755374832714,002199.SZ
+2024-09-04,1.6204729565651932,600599.SH
+2024-09-05,1.0957778793973632,002595.SZ
+2024-09-06,1.6168023970816094,002005.SZ
+2024-09-09,0.8605482386029011,002456.SZ
+2024-09-10,1.3546410789459808,002717.SZ
+2024-09-11,0.5980254226205031,603196.SH
+2024-09-12,0.9187303745519808,002640.SZ
+2024-09-13,0.6801121207893971,600358.SH
+2024-09-18,1.079264299860147,600550.SH
+2024-09-19,1.9477947178464494,002786.SZ
+2024-09-20,1.1194340099294373,002123.SZ
+2024-09-23,1.6714675557693415,002453.SZ
+2024-09-24,1.3675826070587052,002453.SZ
+2024-09-25,0.7076301816428471,000627.SZ
+2024-09-26,0.8882412710980511,603398.SH
+2024-09-27,0.7521975260737696,000002.SZ
+2024-09-30,0.7230331038260748,600570.SH
+2024-10-08,1.0593517928482987,600550.SH
+2024-10-09,1.0220239311477122,000560.SZ
+2024-10-10,1.234368197028218,600606.SH
+2024-10-11,0.8664109594444052,000750.SZ
+2024-10-14,1.8450296800922745,603822.SH
+2024-10-15,1.8353472785641625,002583.SZ
+2024-10-16,0.9543901161454763,000536.SZ
+2024-10-17,1.0015738096430133,600619.SH
+2024-10-18,1.2591757462030437,600622.SH
+2024-10-21,1.364327760137209,002583.SZ
+2024-10-22,2.0756244365506276,002685.SZ
+2024-10-23,0.38458758527962894,000004.SZ
+2024-10-24,1.5124132416681377,002094.SZ
+2024-10-25,1.4347033681199353,600839.SH
+2024-10-28,1.5745059258009038,001696.SZ
+2024-10-29,1.8529251428567266,002628.SZ
+2024-10-30,1.7713369405635928,002717.SZ
+2024-10-31,1.1151492405672683,001696.SZ
+2024-11-01,0.7164870376829126,600246.SH
+2024-11-04,1.769839917258226,002384.SZ
+2024-11-05,1.092728971983151,600212.SH
+2024-11-06,0.6671879019120053,603336.SH
+2024-11-07,0.5602821558966659,002416.SZ
+2024-11-08,1.1858491806130256,001696.SZ
+2024-11-11,1.3270787455846025,002456.SZ
+2024-11-12,1.4104327679900899,603398.SH
+2024-11-13,1.2197272384042277,600839.SH
+2024-11-14,0.6314176029145626,603697.SH
+2024-11-15,-0.13737144527610326,603268.SH
+2024-11-18,0.7120357386859574,000605.SZ
+2024-11-19,0.42252100937945863,002469.SZ
+2024-11-20,1.007664521961136,600619.SH
+2024-11-21,0.9709988043471746,002694.SZ
+2024-11-22,-0.01837433439991007,000415.SZ
+2024-11-25,0.7354373323734968,000973.SZ
+2024-11-26,1.4832316580516,002348.SZ
+2024-11-27,1.0092630330488712,002899.SZ
+2024-11-28,1.6528548440755675,600327.SH
+2024-11-29,1.5310967942763765,003010.SZ
+2024-12-02,0.9985829805322318,000981.SZ
+2024-12-03,1.1009990341574898,600628.SH
+2024-12-04,1.4406720428536548,000679.SZ
+2024-12-05,1.5166810165916147,002640.SZ
+2024-12-06,1.5480151614841313,003021.SZ
+2024-12-09,0.7658546815142482,600593.SH
+2024-12-10,1.6164052159572586,000981.SZ
+2024-12-11,1.0871733525157767,000882.SZ
+2024-12-12,1.3618111397645727,002265.SZ
+2024-12-13,1.1378238320491278,605006.SH
+2024-12-16,0.5942319336561258,603533.SH
+2024-12-17,0.8922367248452927,600503.SH
+2024-12-18,0.29961260322010785,600796.SH
+2024-12-19,0.5126832965650956,002031.SZ
+2024-12-20,0.4049032161823233,600172.SH
+2024-12-23,0.378293448285725,600172.SH
+2024-12-24,0.7270156370860105,600673.SH
+2024-12-25,0.8938311286037234,603610.SH
+2024-12-26,0.8824820051198142,603893.SH
+2024-12-27,1.3362649834557183,002068.SZ
+2024-12-30,0.35138947858631225,600183.SH
+2024-12-31,1.1948708599448123,600506.SH
+2025-01-02,1.4558889081434663,002730.SZ
+2025-01-03,0.7128978848132725,600707.SH
+2025-01-06,0.7650844601290192,600803.SH
+2025-01-07,1.0635380792047187,600405.SH
+2025-01-08,0.7951289392847068,002837.SZ
+2025-01-09,1.0252099583039425,603306.SH
+2025-01-10,0.30155175957668673,002916.SZ
+2025-01-13,0.9708056750766493,603986.SH
+2025-01-14,0.7870312186305877,000063.SZ
+2025-01-15,1.6878038525240218,002917.SZ
+2025-01-16,0.8478674953917144,002449.SZ
+2025-01-17,1.0235316084839805,600693.SH
+2025-01-20,0.8827244215589688,002730.SZ
+2025-01-21,0.9142145000698546,002379.SZ
+2025-01-22,1.632439883586815,603228.SH
+2025-01-23,0.5950475737248152,603121.SH
+2025-01-24,1.512800316493977,000880.SZ
+2025-01-27,0.280247948525134,002532.SZ
+2025-02-05,0.7525772298409897,600722.SH
+2025-02-06,-0.059951823103750426,601869.SH
+2025-02-07,0.45215019046862537,000977.SZ
+2025-02-10,1.2658737579405763,003007.SZ
+2025-02-11,1.0408324160127955,603106.SH
+2025-02-12,1.6119030499830551,000856.SZ
+2025-02-13,1.59310517514652,002929.SZ
+2025-02-14,1.5077797651403821,002410.SZ
+2025-02-17,1.3860825096123015,002044.SZ
+2025-02-18,1.1270921559091267,002779.SZ
+2025-02-19,1.187211682113541,603286.SH
+2025-02-20,1.2669618354736996,605488.SH
+2025-02-21,1.025703816755235,600588.SH
+2025-02-24,1.5648046166656084,600602.SH
+2025-02-25,0.8745657353291226,002896.SZ
+2025-02-26,0.9230669528117501,000032.SZ
+2025-02-27,0.9367812438709472,603200.SH
+2025-02-28,1.0306691811926367,002276.SZ
+2025-03-03,0.6479828536311146,002044.SZ
+2025-03-04,0.9022328614595625,001306.SZ
+2025-03-05,0.13297506000529996,002036.SZ
+2025-03-06,1.0473094549911606,001309.SZ
+2025-03-07,0.8908293616489041,002580.SZ
+2025-03-10,0.9714032058498194,600126.SH
+2025-03-11,1.5066583997940508,000678.SZ
+2025-03-12,1.2822828774552384,603059.SH
+2025-03-13,1.1088751386616387,003038.SZ
+2025-03-14,1.2806594439606795,002713.SZ
+2025-03-17,0.9239379376977839,001256.SZ
+2025-03-18,1.155518645532713,600610.SH
+2025-03-19,1.566452580640379,605008.SH
+2025-03-20,1.6658038834776503,603949.SH
+2025-03-21,0.3002046262826852,603112.SH
+2025-03-24,0.969025415982965,001256.SZ
+2025-03-25,1.0089862742297053,002300.SZ
+2025-03-26,0.9563316876479583,600468.SH
+2025-03-27,0.7596417124134163,000633.SZ
+2025-03-28,0.8823950983342874,000006.SZ
+2025-03-31,1.5478113985550597,002851.SZ
+2025-04-01,0.5961759062409911,002847.SZ
+2025-04-02,0.1297394192678443,002093.SZ
+2025-04-03,1.6113567681708816,603353.SH
+2025-04-07,1.6243924524047828,601008.SH
+2025-04-08,1.5465757662622548,600598.SH
+2025-04-09,1.2262057864670963,601952.SH
--- a/main/train/test2.tsv
+++ b/main/train/test2.tsv
@@ -0,0 +1,72 @@
+trade_date,score,ts_code
+2024-12-09,1.1968650846005326,600593.SH
+2024-12-10,0.21490252296809745,002611.SZ
+2024-12-11,0.5721914798956016,002321.SZ
+2024-12-12,0.6509338263544048,600628.SH
+2024-12-13,2.1288113028385376,600628.SH
+2024-12-16,1.378346480524284,002086.SZ
+2024-12-17,1.45986967550941,002741.SZ
+2024-12-18,1.3436778254529067,600579.SH
+2024-12-19,0.41218776805787716,600796.SH
+2024-12-20,1.0840917563770454,603421.SH
+2024-12-23,1.00141172278312,600889.SH
+2024-12-24,1.0354156548919864,600725.SH
+2024-12-25,0.9562524807100355,600066.SH
+2024-12-26,1.1279048294352958,002916.SZ
+2024-12-27,0.6532174116474766,002068.SZ
+2024-12-30,-0.1308794790538431,002918.SZ
+2024-12-31,0.7160474599127873,600857.SH
+2025-01-02,1.5067649520721882,002449.SZ
+2025-01-03,0.9282246137432282,603379.SH
+2025-01-06,0.6797051204009213,603893.SH
+2025-01-07,0.9376184079476354,603236.SH
+2025-01-08,0.9064516934700023,603308.SH
+2025-01-09,0.9314493554789942,000880.SZ
+2025-01-10,0.5025761501709369,600584.SH
+2025-01-13,0.7483210862212708,000063.SZ
+2025-01-14,1.2632673941368837,000063.SZ
+2025-01-15,1.8580661802761587,002917.SZ
+2025-01-16,1.1918089652002073,600693.SH
+2025-01-17,0.8288939941365315,600693.SH
+2025-01-20,0.677726091977902,002577.SZ
+2025-01-21,1.8336548268410158,603893.SH
+2025-01-22,1.0395051538956546,000573.SZ
+2025-01-23,0.4308220427423068,003021.SZ
+2025-01-24,1.8057941775723685,002862.SZ
+2025-01-27,1.216662909774701,002779.SZ
+2025-02-05,0.8484867753831473,603990.SH
+2025-02-06,0.5038824073142949,001380.SZ
+2025-02-07,0.7672133571524726,002031.SZ
+2025-02-10,0.5417223016033719,000681.SZ
+2025-02-11,0.9399374716518157,000034.SZ
+2025-02-12,1.8742056631297925,000856.SZ
+2025-02-13,1.4837670146272484,600633.SH
+2025-02-14,1.2043600916692372,605488.SH
+2025-02-17,1.1259104542173328,603918.SH
+2025-02-18,1.1806931791732853,600126.SH
+2025-02-19,1.020437698817749,603956.SH
+2025-02-20,0.5818349669113919,003021.SZ
+2025-02-21,1.0941497070930342,603950.SH
+2025-02-24,1.867258980329339,600602.SH
+2025-02-25,0.8646726218943293,002691.SZ
+2025-02-26,1.2878484406363957,002245.SZ
+2025-02-27,1.3013902577988068,600173.SH
+2025-02-28,0.7804376426721501,603040.SH
+2025-03-03,0.45593268249434266,002345.SZ
+2025-03-04,0.9265705061587579,600589.SH
+2025-03-05,0.766962270753268,002575.SZ
+2025-03-06,0.7030260458187082,601100.SH
+2025-03-07,0.924011193171594,002896.SZ
+2025-03-10,1.0811487252993004,600126.SH
+2025-03-11,1.272392599656189,002896.SZ
+2025-03-12,1.0905437448562905,002276.SZ
+2025-03-13,1.0688995313878895,003038.SZ
+2025-03-14,1.2418913857438587,001256.SZ
+2025-03-17,1.004550155323,001256.SZ
+2025-03-18,0.7517848278576412,600403.SH
+2025-03-19,1.5106246878723002,605008.SH
+2025-03-20,1.1509811695536982,600610.SH
+2025-03-21,0.6033998331536018,603196.SH
+2025-03-24,0.3456173948047773,002345.SZ
+2025-03-25,1.470314131581159,600320.SH
+2025-03-26,0.745243100558546,603325.SH