RollingRank赚钱- Sharp-1.43
This commit is contained in:
2179
main/train/AnalyzeData.ipynb
Normal file
2179
main/train/AnalyzeData.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
1245
main/train/Classify.ipynb
Normal file
1245
main/train/Classify.ipynb
Normal file
File diff suppressed because one or more lines are too long
1317
main/train/ClassifyLR.ipynb
Normal file
1317
main/train/ClassifyLR.ipynb
Normal file
File diff suppressed because one or more lines are too long
967
main/train/DoubleQuntile.ipynb
Normal file
967
main/train/DoubleQuntile.ipynb
Normal file
File diff suppressed because one or more lines are too long
1381
main/train/DoubleRank.ipynb
Normal file
1381
main/train/DoubleRank.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
1158
main/train/MultiClassify.ipynb
Normal file
1158
main/train/MultiClassify.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
1010
main/train/PlUpdateClassify.ipynb
Normal file
1010
main/train/PlUpdateClassify.ipynb
Normal file
File diff suppressed because one or more lines are too long
1984
main/train/Rank.ipynb
Normal file
1984
main/train/Rank.ipynb
Normal file
File diff suppressed because one or more lines are too long
1210
main/train/Regression.ipynb
Normal file
1210
main/train/Regression.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
4641
main/train/RollingRank.ipynb
Normal file
4641
main/train/RollingRank.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
918
main/train/RollingRank.txt
Normal file
918
main/train/RollingRank.txt
Normal file
@@ -0,0 +1,918 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# In[1]:
|
||||
|
||||
|
||||
# %load_ext autoreload
|
||||
# %autoreload 2
|
||||
|
||||
import pandas as pd
|
||||
import warnings
|
||||
|
||||
warnings.filterwarnings("ignore")
|
||||
|
||||
pd.set_option('display.max_columns', None)
|
||||
|
||||
|
||||
# In[2]:
|
||||
|
||||
|
||||
from utils.utils import read_and_merge_h5_data
|
||||
|
||||
print('daily data')
|
||||
df = read_and_merge_h5_data('../../data/daily_data.h5', key='daily_data',
|
||||
columns=['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'pct_chg'],
|
||||
df=None)
|
||||
|
||||
print('daily basic')
|
||||
df = read_and_merge_h5_data('../../data/daily_basic.h5', key='daily_basic',
|
||||
columns=['ts_code', 'trade_date', 'turnover_rate', 'pe_ttm', 'circ_mv', 'volume_ratio',
|
||||
'is_st'], df=df, join='inner')
|
||||
|
||||
print('stk limit')
|
||||
df = read_and_merge_h5_data('../../data/stk_limit.h5', key='stk_limit',
|
||||
columns=['ts_code', 'trade_date', 'pre_close', 'up_limit', 'down_limit'],
|
||||
df=df)
|
||||
print('money flow')
|
||||
df = read_and_merge_h5_data('../../data/money_flow.h5', key='money_flow',
|
||||
columns=['ts_code', 'trade_date', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol',
|
||||
'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol'],
|
||||
df=df)
|
||||
print('cyq perf')
|
||||
df = read_and_merge_h5_data('../../data/cyq_perf.h5', key='cyq_perf',
|
||||
columns=['ts_code', 'trade_date', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct',
|
||||
'cost_50pct',
|
||||
'cost_85pct', 'cost_95pct', 'weight_avg', 'winner_rate'],
|
||||
df=df)
|
||||
print(df.info())
|
||||
|
||||
|
||||
# In[3]:
|
||||
|
||||
|
||||
print('industry')
|
||||
industry_df = read_and_merge_h5_data('../../data/industry_data.h5', key='industry_data',
|
||||
columns=['ts_code', 'l2_code', 'in_date'],
|
||||
df=None, on=['ts_code'], join='left')
|
||||
|
||||
|
||||
def merge_with_industry_data(df, industry_df):
|
||||
# 确保日期字段是 datetime 类型
|
||||
df['trade_date'] = pd.to_datetime(df['trade_date'])
|
||||
industry_df['in_date'] = pd.to_datetime(industry_df['in_date'])
|
||||
|
||||
# 对 industry_df 按 ts_code 和 in_date 排序
|
||||
industry_df_sorted = industry_df.sort_values(['in_date', 'ts_code'])
|
||||
|
||||
# 对原始 df 按 ts_code 和 trade_date 排序
|
||||
df_sorted = df.sort_values(['trade_date', 'ts_code'])
|
||||
|
||||
# 使用 merge_asof 进行向后合并
|
||||
merged = pd.merge_asof(
|
||||
df_sorted,
|
||||
industry_df_sorted,
|
||||
by='ts_code', # 按 ts_code 分组
|
||||
left_on='trade_date',
|
||||
right_on='in_date',
|
||||
direction='backward'
|
||||
)
|
||||
|
||||
# 获取每个 ts_code 的最早 in_date 记录
|
||||
min_in_date_per_ts = (industry_df_sorted
|
||||
.groupby('ts_code')
|
||||
.first()
|
||||
.reset_index()[['ts_code', 'l2_code']])
|
||||
|
||||
# 填充未匹配到的记录(trade_date 早于所有 in_date 的情况)
|
||||
merged['l2_code'] = merged['l2_code'].fillna(
|
||||
merged['ts_code'].map(min_in_date_per_ts.set_index('ts_code')['l2_code'])
|
||||
)
|
||||
|
||||
# 保留需要的列并重置索引
|
||||
result = merged.reset_index(drop=True)
|
||||
return result
|
||||
|
||||
|
||||
# 使用示例
|
||||
df = merge_with_industry_data(df, industry_df)
|
||||
# print(mdf[mdf['ts_code'] == '600751.SH'][['ts_code', 'trade_date', 'l2_code']])
|
||||
|
||||
|
||||
# In[4]:
|
||||
|
||||
|
||||
def calculate_indicators(df):
|
||||
"""
|
||||
计算四个指标:当日涨跌幅、5日移动平均、RSI、MACD。
|
||||
"""
|
||||
df = df.sort_values('trade_date')
|
||||
df['daily_return'] = (df['close'] - df['pre_close']) / df['pre_close'] * 100
|
||||
# df['5_day_ma'] = df['close'].rolling(window=5).mean()
|
||||
delta = df['close'].diff()
|
||||
gain = delta.where(delta > 0, 0)
|
||||
loss = -delta.where(delta < 0, 0)
|
||||
avg_gain = gain.rolling(window=14).mean()
|
||||
avg_loss = loss.rolling(window=14).mean()
|
||||
rs = avg_gain / avg_loss
|
||||
df['RSI'] = 100 - (100 / (1 + rs))
|
||||
|
||||
# 计算MACD
|
||||
ema12 = df['close'].ewm(span=12, adjust=False).mean()
|
||||
ema26 = df['close'].ewm(span=26, adjust=False).mean()
|
||||
df['MACD'] = ema12 - ema26
|
||||
df['Signal_line'] = df['MACD'].ewm(span=9, adjust=False).mean()
|
||||
df['MACD_hist'] = df['MACD'] - df['Signal_line']
|
||||
|
||||
# 4. 情绪因子1:市场上涨比例(Up Ratio)
|
||||
df['up_ratio'] = df['daily_return'].apply(lambda x: 1 if x > 0 else 0)
|
||||
df['up_ratio_20d'] = df['up_ratio'].rolling(window=20).mean() # 过去20天上涨比例
|
||||
|
||||
# 5. 情绪因子2:成交量变化率(Volume Change Rate)
|
||||
df['volume_mean'] = df['vol'].rolling(window=20).mean() # 过去20天的平均成交量
|
||||
df['volume_change_rate'] = (df['vol'] - df['volume_mean']) / df['volume_mean'] * 100 # 成交量变化率
|
||||
|
||||
# 6. 情绪因子3:波动率(Volatility)
|
||||
df['volatility'] = df['daily_return'].rolling(window=20).std() # 过去20天的日收益率标准差
|
||||
|
||||
# 7. 情绪因子4:成交额变化率(Amount Change Rate)
|
||||
df['amount_mean'] = df['amount'].rolling(window=20).mean() # 过去20天的平均成交额
|
||||
df['amount_change_rate'] = (df['amount'] - df['amount_mean']) / df['amount_mean'] * 100 # 成交额变化率
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def generate_index_indicators(h5_filename):
|
||||
df = pd.read_hdf(h5_filename, key='index_data')
|
||||
df['trade_date'] = pd.to_datetime(df['trade_date'], format='%Y%m%d')
|
||||
df = df.sort_values('trade_date')
|
||||
|
||||
# 计算每个ts_code的相关指标
|
||||
df_indicators = []
|
||||
for ts_code in df['ts_code'].unique():
|
||||
df_index = df[df['ts_code'] == ts_code].copy()
|
||||
df_index = calculate_indicators(df_index)
|
||||
df_indicators.append(df_index)
|
||||
|
||||
# 合并所有指数的结果
|
||||
df_all_indicators = pd.concat(df_indicators, ignore_index=True)
|
||||
|
||||
# 保留trade_date列,并将同一天的数据按ts_code合并成一行
|
||||
df_final = df_all_indicators.pivot_table(
|
||||
index='trade_date',
|
||||
columns='ts_code',
|
||||
values=['daily_return', 'RSI', 'MACD', 'Signal_line',
|
||||
'MACD_hist', 'up_ratio_20d', 'volume_change_rate', 'volatility',
|
||||
'amount_change_rate', 'amount_mean'],
|
||||
aggfunc='last'
|
||||
)
|
||||
|
||||
df_final.columns = [f"{col[1]}_{col[0]}" for col in df_final.columns]
|
||||
df_final = df_final.reset_index()
|
||||
|
||||
return df_final
|
||||
|
||||
|
||||
# 使用函数
|
||||
h5_filename = '../../data/index_data.h5'
|
||||
index_data = generate_index_indicators(h5_filename)
|
||||
index_data = index_data.dropna()
|
||||
|
||||
|
||||
|
||||
# In[6]:
|
||||
|
||||
|
||||
from utils.factor import get_act_factor
|
||||
|
||||
|
||||
def read_industry_data(h5_filename):
|
||||
# 读取 H5 文件中所有的行业数据
|
||||
industry_data = pd.read_hdf(h5_filename, key='sw_daily', columns=[
|
||||
'ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'pe', 'pb', 'vol'
|
||||
]) # 假设 H5 文件的键是 'industry_data'
|
||||
industry_data = industry_data.sort_values(by=['ts_code', 'trade_date'])
|
||||
industry_data = industry_data.reindex()
|
||||
industry_data['trade_date'] = pd.to_datetime(industry_data['trade_date'], format='%Y%m%d')
|
||||
|
||||
grouped = industry_data.groupby('ts_code', group_keys=False)
|
||||
industry_data['obv'] = grouped.apply(
|
||||
lambda x: pd.Series(talib.OBV(x['close'].values, x['vol'].values), index=x.index)
|
||||
)
|
||||
industry_data['return_5'] = grouped['close'].apply(lambda x: x / x.shift(5) - 1)
|
||||
industry_data['return_20'] = grouped['close'].apply(lambda x: x / x.shift(20) - 1)
|
||||
|
||||
industry_data = get_act_factor(industry_data, cat=False)
|
||||
industry_data = industry_data.sort_values(by=['trade_date', 'ts_code'])
|
||||
|
||||
# # 计算每天每个 ts_code 的因子和当天所有 ts_code 的中位数的偏差
|
||||
# factor_columns = ['obv', 'return_5', 'return_20', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4'] # 因子列
|
||||
#
|
||||
# for factor in factor_columns:
|
||||
# if factor in industry_data.columns:
|
||||
# # 计算每天每个 ts_code 的因子值与当天所有 ts_code 的中位数的偏差
|
||||
# industry_data[f'{factor}_deviation'] = industry_data.groupby('trade_date')[factor].transform(
|
||||
# lambda x: x - x.mean())
|
||||
|
||||
industry_data['return_5_percentile'] = industry_data.groupby('trade_date')['return_5'].transform(
|
||||
lambda x: x.rank(pct=True))
|
||||
industry_data['return_20_percentile'] = industry_data.groupby('trade_date')['return_20'].transform(
|
||||
lambda x: x.rank(pct=True))
|
||||
industry_data = industry_data.drop(columns=['open', 'close', 'high', 'low', 'pe', 'pb', 'vol'])
|
||||
|
||||
industry_data = industry_data.rename(
|
||||
columns={col: f'industry_{col}' for col in industry_data.columns if col not in ['ts_code', 'trade_date']})
|
||||
|
||||
industry_data = industry_data.rename(columns={'ts_code': 'cat_l2_code'})
|
||||
return industry_data
|
||||
|
||||
|
||||
industry_df = read_industry_data('../../data/sw_daily.h5')
|
||||
|
||||
|
||||
# In[7]:
|
||||
|
||||
|
||||
origin_columns = df.columns.tolist()
|
||||
origin_columns = [col for col in origin_columns if
|
||||
col not in ['turnover_rate', 'pe_ttm', 'volume_ratio', 'vol', 'pct_chg', 'l2_code', 'winner_rate']]
|
||||
origin_columns = [col for col in origin_columns if col not in index_data.columns]
|
||||
origin_columns = [col for col in origin_columns if 'cyq' not in col]
|
||||
print(origin_columns)
|
||||
|
||||
|
||||
# In[8]:
|
||||
|
||||
|
||||
def filter_data(df):
|
||||
# df = df.groupby('trade_date').apply(lambda x: x.nlargest(1000, 'act_factor1'))
|
||||
df = df[~df['is_st']]
|
||||
df = df[~df['ts_code'].str.endswith('BJ')]
|
||||
df = df[~df['ts_code'].str.startswith('30')]
|
||||
df = df[~df['ts_code'].str.startswith('68')]
|
||||
df = df[~df['ts_code'].str.startswith('8')]
|
||||
df = df[df['trade_date'] >= '20180101']
|
||||
df = df.drop(columns=['in_date'])
|
||||
df = df.reset_index(drop=True)
|
||||
return df
|
||||
|
||||
|
||||
df = filter_data(df)
|
||||
# df = get_technical_factor(df)
|
||||
# df = get_act_factor(df)
|
||||
# df = get_money_flow_factor(df)
|
||||
# df = get_alpha_factor(df)
|
||||
# df = get_limit_factor(df)
|
||||
# df = get_cyp_perf_factor(df)
|
||||
# df = get_mv_factors(df)
|
||||
df, _ = get_rolling_factor(df)
|
||||
df, _ = get_simple_factor(df)
|
||||
# df = df.merge(industry_df, on=['l2_code', 'trade_date'], how='left')
|
||||
df = df.rename(columns={'l2_code': 'cat_l2_code'})
|
||||
# df = df.merge(index_data, on='trade_date', how='left')
|
||||
|
||||
print(df.info())
|
||||
|
||||
|
||||
# In[9]:
|
||||
|
||||
|
||||
def create_deviation_within_dates(df, feature_columns):
|
||||
groupby_col = 'cat_l2_code' # 使用 trade_date 进行分组
|
||||
new_columns = {}
|
||||
ret_feature_columns = feature_columns[:]
|
||||
|
||||
# 自动选择所有数值型特征
|
||||
num_features = [col for col in feature_columns if 'cat' not in col and 'index' not in col]
|
||||
|
||||
# num_features = ['vol', 'pct_chg', 'turnover_rate', 'volume_ratio', 'cat_vol_spike', 'obv', 'maobv_6', 'return_5', 'return_10', 'return_20', 'std_return_5', 'std_return_15', 'std_return_90', 'std_return_90_2', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4', 'act_factor5', 'act_factor6', 'rank_act_factor1', 'rank_act_factor2', 'rank_act_factor3', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'alpha_022', 'alpha_003', 'alpha_007', 'alpha_013']
|
||||
num_features = [col for col in num_features if 'cat' not in col and 'industry' not in col]
|
||||
num_features = [col for col in num_features if 'limit' not in col]
|
||||
num_features = [col for col in num_features if 'cyq' not in col]
|
||||
|
||||
# 遍历所有数值型特征
|
||||
for feature in num_features:
|
||||
if feature == 'trade_date': # 不需要对 'trade_date' 计算偏差
|
||||
continue
|
||||
|
||||
# grouped_mean = df.groupby(['trade_date'])[feature].transform('mean')
|
||||
# deviation_col_name = f'deviation_mean_{feature}'
|
||||
# new_columns[deviation_col_name] = df[feature] - grouped_mean
|
||||
# ret_feature_columns.append(deviation_col_name)
|
||||
|
||||
grouped_mean = df.groupby(['trade_date', groupby_col])[feature].transform('mean')
|
||||
deviation_col_name = f'deviation_mean_{feature}'
|
||||
new_columns[deviation_col_name] = df[feature] - grouped_mean
|
||||
ret_feature_columns.append(deviation_col_name)
|
||||
|
||||
# 将新计算的偏差特征与原始 DataFrame 合并
|
||||
df = pd.concat([df, pd.DataFrame(new_columns)], axis=1)
|
||||
|
||||
# for feature in ['obv', 'return_20', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4']:
|
||||
# df[f'deviation_industry_{feature}'] = df[feature] - df[f'industry_{feature}']
|
||||
|
||||
return df, ret_feature_columns
|
||||
|
||||
|
||||
# In[10]:
|
||||
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from scipy.stats import ks_2samp, wasserstein_distance
|
||||
from sklearn.metrics import roc_auc_score
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
|
||||
|
||||
def remove_shifted_features(train_data, feature_columns, ks_threshold=0.05, wasserstein_threshold=0.1, size=0.8):
|
||||
dropped_features = []
|
||||
|
||||
all_dates = train_data['trade_date'].unique() # 获取所有唯一的 trade_date
|
||||
split_date = all_dates[int(len(all_dates) * size)] # 划分点为倒数第 validation_days 天
|
||||
train_data_split = train_data[train_data['trade_date'] < split_date] # 训练集
|
||||
val_data_split = train_data[train_data['trade_date'] >= split_date] # 验证集
|
||||
|
||||
# **统计数据漂移**
|
||||
numeric_columns = train_data_split.select_dtypes(include=['float64', 'int64']).columns
|
||||
numeric_columns = [col for col in numeric_columns if col in feature_columns]
|
||||
for feature in numeric_columns:
|
||||
ks_stat, p_value = ks_2samp(train_data_split[feature], val_data_split[feature])
|
||||
wasserstein_dist = wasserstein_distance(train_data_split[feature], val_data_split[feature])
|
||||
|
||||
if p_value < ks_threshold or wasserstein_dist > wasserstein_threshold:
|
||||
dropped_features.append(feature)
|
||||
|
||||
print(f"检测到 {len(dropped_features)} 个可能漂移的特征: {dropped_features}")
|
||||
|
||||
# **应用阈值进行最终筛选**
|
||||
filtered_features = [f for f in feature_columns if f not in dropped_features]
|
||||
|
||||
return filtered_features, dropped_features
|
||||
|
||||
|
||||
def remove_outliers_label_percentile(label: pd.Series, lower_percentile: float = 0.01, upper_percentile: float = 0.99,
|
||||
log=True):
|
||||
if not (0 <= lower_percentile < upper_percentile <= 1):
|
||||
raise ValueError("Percentile values must satisfy 0 <= lower_percentile < upper_percentile <= 1.")
|
||||
|
||||
# Calculate lower and upper bounds based on percentiles
|
||||
lower_bound = label.quantile(lower_percentile)
|
||||
upper_bound = label.quantile(upper_percentile)
|
||||
|
||||
# Filter out values outside the bounds
|
||||
filtered_label = label[(label >= lower_bound) & (label <= upper_bound)]
|
||||
|
||||
# Print the number of removed outliers
|
||||
if log:
|
||||
print(f"Removed {len(label) - len(filtered_label)} outliers.")
|
||||
return filtered_label
|
||||
|
||||
|
||||
def calculate_risk_adjusted_target(df, days=5):
|
||||
df = df.sort_values(by=['ts_code', 'trade_date'])
|
||||
|
||||
df['future_close'] = df.groupby('ts_code')['close'].shift(-days)
|
||||
df['future_open'] = df.groupby('ts_code')['open'].shift(-1)
|
||||
df['future_return'] = (df['future_close'] - df['future_open']) / df['future_open']
|
||||
|
||||
df['future_volatility'] = df.groupby('ts_code')['future_return'].rolling(days, min_periods=1).std().reset_index(
|
||||
level=0, drop=True)
|
||||
sharpe_ratio = df['future_return'] * df['future_volatility']
|
||||
sharpe_ratio.replace([np.inf, -np.inf], np.nan, inplace=True)
|
||||
|
||||
return sharpe_ratio
|
||||
|
||||
|
||||
def calculate_score(df, days=5, lambda_param=1.0):
|
||||
def calculate_max_drawdown(prices):
|
||||
peak = prices.iloc[0] # 初始化峰值
|
||||
max_drawdown = 0 # 初始化最大回撤
|
||||
|
||||
for price in prices:
|
||||
if price > peak:
|
||||
peak = price # 更新峰值
|
||||
else:
|
||||
drawdown = (peak - price) / peak # 计算当前回撤
|
||||
max_drawdown = max(max_drawdown, drawdown) # 更新最大回撤
|
||||
|
||||
return max_drawdown
|
||||
|
||||
def compute_stock_score(stock_df):
|
||||
stock_df = stock_df.sort_values(by=['trade_date'])
|
||||
future_return = stock_df['future_return']
|
||||
# 使用已有的 pct_chg 字段计算波动率
|
||||
volatility = stock_df['pct_chg'].rolling(days).std().shift(-days)
|
||||
max_drawdown = stock_df['close'].rolling(days).apply(calculate_max_drawdown, raw=False).shift(-days)
|
||||
score = future_return - lambda_param * max_drawdown
|
||||
return score
|
||||
|
||||
# # 确保 DataFrame 按照股票代码和交易日期排序
|
||||
# df = df.sort_values(by=['ts_code', 'trade_date'])
|
||||
|
||||
# 对每个股票分别计算 score
|
||||
df['score'] = df.groupby('ts_code').apply(compute_stock_score).reset_index(level=0, drop=True)
|
||||
|
||||
return df['score']
|
||||
|
||||
|
||||
def remove_highly_correlated_features(df, feature_columns, threshold=0.9):
|
||||
numeric_features = df[feature_columns].select_dtypes(include=[np.number]).columns.tolist()
|
||||
if not numeric_features:
|
||||
raise ValueError("No numeric features found in the provided data.")
|
||||
|
||||
corr_matrix = df[numeric_features].corr().abs()
|
||||
upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
|
||||
to_drop = [column for column in upper.columns if any(upper[column] > threshold)]
|
||||
remaining_features = [col for col in feature_columns if col not in to_drop
|
||||
or 'act' in col or 'af' in col]
|
||||
return remaining_features
|
||||
|
||||
|
||||
import pandas as pd
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
|
||||
|
||||
def cross_sectional_standardization(df, features):
|
||||
df_sorted = df.sort_values(by='trade_date') # 按时间排序
|
||||
df_standardized = df_sorted.copy()
|
||||
|
||||
for date in df_sorted['trade_date'].unique():
|
||||
# 获取当前时间点的数据
|
||||
current_data = df_standardized[df_standardized['trade_date'] == date]
|
||||
|
||||
# 只对指定特征进行标准化
|
||||
scaler = StandardScaler()
|
||||
standardized_values = scaler.fit_transform(current_data[features])
|
||||
|
||||
# 将标准化结果重新赋值回去
|
||||
df_standardized.loc[df_standardized['trade_date'] == date, features] = standardized_values
|
||||
|
||||
return df_standardized
|
||||
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import statsmodels.api as sm
|
||||
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
|
||||
|
||||
def neutralize_manual(df, features, industry_col, mkt_cap_col):
|
||||
""" 手动实现简单回归以提升速度 """
|
||||
|
||||
for col in features:
|
||||
residuals = []
|
||||
for _, group in df.groupby(industry_col):
|
||||
if len(group) > 1:
|
||||
x = np.log(group[mkt_cap_col]) # 市值对数
|
||||
y = group[col] # 因子值
|
||||
beta = np.cov(y, x)[0, 1] / np.var(x) # 计算斜率
|
||||
alpha = np.mean(y) - beta * np.mean(x) # 计算截距
|
||||
resid = y - (alpha + beta * x) # 计算残差
|
||||
residuals.extend(resid)
|
||||
else:
|
||||
residuals.extend(group[col]) # 样本不足时保留原值
|
||||
|
||||
df[col] = residuals
|
||||
|
||||
return df
|
||||
|
||||
|
||||
import gc
|
||||
|
||||
gc.collect()
|
||||
|
||||
|
||||
def mad_filter(df, features, n=3):
|
||||
for col in features:
|
||||
median = df[col].median()
|
||||
mad = np.median(np.abs(df[col] - median))
|
||||
upper = median + n * mad
|
||||
lower = median - n * mad
|
||||
df[col] = np.clip(df[col], lower, upper) # 截断极值
|
||||
return df
|
||||
|
||||
|
||||
def percentile_filter(df, features, lower_percentile=0.01, upper_percentile=0.99):
|
||||
for col in features:
|
||||
# 按日期分组计算上下百分位数
|
||||
lower_bound = df.groupby('trade_date')[col].transform(
|
||||
lambda x: x.quantile(lower_percentile)
|
||||
)
|
||||
upper_bound = df.groupby('trade_date')[col].transform(
|
||||
lambda x: x.quantile(upper_percentile)
|
||||
)
|
||||
# 截断超出范围的值
|
||||
df[col] = np.clip(df[col], lower_bound, upper_bound)
|
||||
return df
|
||||
|
||||
|
||||
from scipy.stats import iqr
|
||||
|
||||
|
||||
def iqr_filter(df, features):
|
||||
for col in features:
|
||||
df[col] = df.groupby('trade_date')[col].transform(
|
||||
lambda x: (x - x.median()) / iqr(x) if iqr(x) != 0 else x
|
||||
)
|
||||
return df
|
||||
|
||||
|
||||
def quantile_filter(df, features, lower_quantile=0.01, upper_quantile=0.99, window=60):
|
||||
df = df.copy()
|
||||
for col in features:
|
||||
# 计算 rolling 统计量,需要按日期进行 groupby
|
||||
rolling_lower = df.groupby('trade_date')[col].transform(
|
||||
lambda x: x.rolling(window=min(len(x), window)).quantile(lower_quantile))
|
||||
rolling_upper = df.groupby('trade_date')[col].transform(
|
||||
lambda x: x.rolling(window=min(len(x), window)).quantile(upper_quantile))
|
||||
|
||||
# 对数据进行裁剪
|
||||
df[col] = np.clip(df[col], rolling_lower, rolling_upper)
|
||||
|
||||
return df
|
||||
|
||||
|
||||
# In[11]:
|
||||
|
||||
|
||||
# print(test_data.head()[['act_factor1', 'act_factor2', 'ts_code', 'trade_date']])
|
||||
|
||||
|
||||
# In[12]:
|
||||
|
||||
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
import lightgbm as lgb
|
||||
import matplotlib.pyplot as plt
|
||||
from sklearn.decomposition import PCA
|
||||
|
||||
|
||||
def train_light_model(train_data_df, params, feature_columns, callbacks, evals,
|
||||
print_feature_importance=True, num_boost_round=100,
|
||||
validation_days=180, use_pca=False, split_date=None): # 新增参数:validation_days
|
||||
# 确保数据按时间排序
|
||||
train_data_df = train_data_df.sort_values(by='trade_date')
|
||||
|
||||
numeric_columns = train_data_df.select_dtypes(include=['float64', 'int64']).columns
|
||||
numeric_columns = [col for col in numeric_columns if col in feature_columns]
|
||||
# X_train.loc[:, numeric_columns] = scaler.fit_transform(X_train[numeric_columns])
|
||||
# X_val.loc[:, numeric_columns] = scaler.transform(X_val[numeric_columns])
|
||||
# train_data_df = cross_sectional_standardization(train_data_df, numeric_columns)
|
||||
|
||||
# 去除标签为空的样本
|
||||
train_data_df = train_data_df.dropna(subset=['label'])
|
||||
print('原始训练集大小: ', len(train_data_df))
|
||||
|
||||
# 按时间顺序划分训练集和验证集
|
||||
if split_date is None:
|
||||
all_dates = train_data_df['trade_date'].unique() # 获取所有唯一的 trade_date
|
||||
split_date = all_dates[-validation_days] # 划分点为倒数第 validation_days 天
|
||||
train_data_split = train_data_df[train_data_df['trade_date'] < split_date] # 训练集
|
||||
val_data_split = train_data_df[train_data_df['trade_date'] >= split_date] # 验证集
|
||||
|
||||
# 打印划分结果
|
||||
print(f"划分后的训练集大小: {len(train_data_split)}, 验证集大小: {len(val_data_split)}")
|
||||
|
||||
# 提取特征和标签
|
||||
X_train = train_data_split[feature_columns]
|
||||
y_train = train_data_split['label']
|
||||
|
||||
X_val = val_data_split[feature_columns]
|
||||
y_val = val_data_split['label']
|
||||
|
||||
# 标准化数值特征
|
||||
scaler = StandardScaler()
|
||||
|
||||
# 计算每个 trade_date 内的样本数(LTR 需要 group 信息)
|
||||
train_groups = train_data_split.groupby('trade_date').size().tolist()
|
||||
val_groups = val_data_split.groupby('trade_date').size().tolist()
|
||||
|
||||
# 处理类别特征
|
||||
categorical_feature = [col for col in feature_columns if 'cat' in col]
|
||||
|
||||
pca = None
|
||||
if use_pca:
|
||||
pca = PCA(n_components=0.95) # 或指定 n_components=固定值(如 10)
|
||||
numeric_features = [col for col in feature_columns if col not in categorical_feature]
|
||||
numeric_pca = pca.fit_transform(X_train[numeric_features])
|
||||
X_train = pd.concat([pd.DataFrame(numeric_pca, index=X_train.index), X_train[categorical_feature]], axis=1)
|
||||
|
||||
numeric_pca = pca.transform(X_val[numeric_features])
|
||||
X_val = pd.concat([pd.DataFrame(numeric_pca, index=X_val.index), X_val[categorical_feature]], axis=1)
|
||||
|
||||
# 计算权重(基于时间)
|
||||
# trade_date = train_data_split['trade_date'] # 交易日期
|
||||
# weights = (trade_date - trade_date.min()).dt.days / (trade_date.max() - trade_date.min()).days + 1
|
||||
# weights = train_data_split.groupby('trade_date')['std_return_5'].transform(
|
||||
# lambda x: x / x.mean()
|
||||
# )
|
||||
ud = sorted(train_data_split["trade_date"].unique().tolist())
|
||||
date_weights = {date: weight * weight for date, weight in zip(ud, np.linspace(1, 10, len(ud)))}
|
||||
params['weight'] = train_data_split["trade_date"].map(date_weights).tolist()
|
||||
|
||||
train_dataset = lgb.Dataset(
|
||||
X_train, label=y_train, group=train_groups,
|
||||
categorical_feature=categorical_feature
|
||||
)
|
||||
|
||||
# weights = val_data_split.groupby('trade_date')['std_return_5'].transform(
|
||||
# lambda x: x / x.mean()
|
||||
# )
|
||||
val_dataset = lgb.Dataset(
|
||||
X_val, label=y_val, group=val_groups,
|
||||
categorical_feature=categorical_feature
|
||||
)
|
||||
|
||||
# 训练模型
|
||||
model = lgb.train(
|
||||
params, train_dataset, num_boost_round=num_boost_round,
|
||||
valid_sets=[train_dataset, val_dataset], valid_names=['train', 'valid'],
|
||||
callbacks=callbacks
|
||||
)
|
||||
|
||||
# 打印特征重要性(如果需要)
|
||||
if print_feature_importance:
|
||||
lgb.plot_metric(evals)
|
||||
lgb.plot_importance(model, importance_type='split', max_num_features=20)
|
||||
plt.show()
|
||||
|
||||
return model, scaler, pca
|
||||
|
||||
|
||||
# In[13]:
|
||||
|
||||
|
||||
days = 2
|
||||
df = df.sort_values(by=['ts_code', 'trade_date'])
|
||||
# df['future_return'] = df.groupby('ts_code', group_keys=False)['close'].apply(lambda x: x.shift(-days) / x - 1)
|
||||
df['future_return'] = (df.groupby('ts_code')['close'].shift(-days) - df.groupby('ts_code')['open'].shift(-1)) / \
|
||||
df.groupby('ts_code')['open'].shift(-1)
|
||||
df['future_volatility'] = (
|
||||
df.groupby('ts_code')['pct_chg']
|
||||
.transform(lambda x: x.rolling(days).std().shift(-days))
|
||||
)
|
||||
df['future_score'] = calculate_score(df, days=2, lambda_param=0.3)
|
||||
df['label'] = df.groupby('trade_date', group_keys=False)['future_score'].transform(
|
||||
lambda x: pd.qcut(x, q=20, labels=False, duplicates='drop')
|
||||
)
|
||||
# df['future_score'] = (
|
||||
# 0.7 * df['future_return']
|
||||
# * 0.3 * df['future_volatility']
|
||||
# )
|
||||
|
||||
|
||||
# In[30]:
|
||||
|
||||
|
||||
def select_pre_zt_stocks_dynamic(
|
||||
stock_df,
|
||||
):
|
||||
stock_df = stock_df.groupby('trade_date', group_keys=False).apply(
|
||||
lambda x: x.nlargest(1000, 'return_20')
|
||||
)
|
||||
return stock_df
|
||||
|
||||
|
||||
pdf = select_pre_zt_stocks_dynamic(df)
|
||||
filter_index = pdf['future_return'].between(pdf['future_return'].quantile(0.01), pdf['future_return'].quantile(0.99))
|
||||
|
||||
# filter_index = pdf['future_volatility'].between(pdf['future_volatility'].quantile(0.01),
|
||||
# pdf['future_volatility'].quantile(0.99)) | filter_index
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
pdf = pdf.merge(industry_df, on=['cat_l2_code', 'trade_date'], how='left')
|
||||
pdf = pdf.sort_values(['trade_date'])
|
||||
pdf = pdf.replace([np.inf, -np.inf], np.nan)
|
||||
|
||||
|
||||
feature_columns = [col for col in pdf.columns if col in pdf.columns]
|
||||
feature_columns = [col for col in feature_columns if col not in ['trade_date',
|
||||
'ts_code',
|
||||
'label']]
|
||||
feature_columns = [col for col in feature_columns if 'future' not in col]
|
||||
feature_columns = [col for col in feature_columns if 'label' not in col]
|
||||
feature_columns = [col for col in feature_columns if 'score' not in col]
|
||||
feature_columns = [col for col in feature_columns if 'gen' not in col]
|
||||
feature_columns = [col for col in feature_columns if 'cat_l2_code' not in col]
|
||||
feature_columns = [col for col in feature_columns if col not in origin_columns]
|
||||
feature_columns = [col for col in feature_columns if not col.startswith('_')]
|
||||
|
||||
numeric_columns = pdf.select_dtypes(include=['float64', 'int64']).columns
|
||||
numeric_columns = [col for col in numeric_columns if col in feature_columns]
|
||||
|
||||
# feature_columns, _ = remove_shifted_features(pdf, feature_columns, size=0.8)
|
||||
|
||||
pdf = quantile_filter(pdf, numeric_columns)
|
||||
|
||||
pdf = cross_sectional_standardization(pdf, numeric_columns)
|
||||
|
||||
|
||||
# print('去极值')
|
||||
# train_data = quantile_filter(train_data, numeric_columns) # 去极值
|
||||
# # print('中性化')
|
||||
# # train_data = neutralize_manual(train_data, numeric_columns, industry_col='cat_l2_code', mkt_cap_col='log(circ_mv)') # 中性化
|
||||
# print('去极值')
|
||||
# test_data = quantile_filter(test_data, numeric_columns) # 去极值
|
||||
|
||||
feature_columns = remove_highly_correlated_features(pdf,
|
||||
feature_columns)
|
||||
print(len(pdf))
|
||||
|
||||
|
||||
# In[123]:
|
||||
|
||||
|
||||
# print('train data size: ', len(train_data))
|
||||
|
||||
label_gain = list(range(len(df['label'].unique())))
|
||||
label_gain = [gain * gain for gain in label_gain]
|
||||
light_params = {
|
||||
'label_gain': label_gain,
|
||||
'objective': 'lambdarank',
|
||||
'metric': 'ndcg',
|
||||
'learning_rate': 0.03,
|
||||
'num_leaves': 32,
|
||||
# 'min_data_in_leaf': 128,
|
||||
'max_depth': 8,
|
||||
'max_bin': 32,
|
||||
'feature_fraction': 0.7,
|
||||
# 'bagging_fraction': 0.7,
|
||||
'bagging_freq': 5,
|
||||
'lambda_l1': 0.1,
|
||||
'lambda_l2': 0.1,
|
||||
'boosting': 'gbdt',
|
||||
'verbosity': -1,
|
||||
'extra_trees': True,
|
||||
'max_position': 5,
|
||||
'ndcg_at': 1,
|
||||
'quant_train_renew_leaf': True,
|
||||
'lambdarank_truncation_level': 3,
|
||||
# 'lambdarank_position_bias_regularization': 1,
|
||||
'seed': 7
|
||||
}
|
||||
evals = {}
|
||||
|
||||
gc.collect()
|
||||
|
||||
|
||||
# In[128]:
|
||||
|
||||
|
||||
gc.collect()
|
||||
|
||||
|
||||
def rolling_train_predict(df, train_days, test_days, feature_columns_origin, days=5, use_pca=False, validation_days=60,
|
||||
filter_index=None):
|
||||
# 1. 按照交易日期排序
|
||||
unique_dates = df[df['trade_date'] >= '2020-01-01']['trade_date'].unique().tolist()
|
||||
unique_dates = sorted(unique_dates)
|
||||
n = len(unique_dates)
|
||||
|
||||
# 2. 计算需要跳过的天数,使后续窗口对齐
|
||||
extra_days = (n - train_days) % test_days
|
||||
start_index = extra_days # 从此索引开始滚动
|
||||
|
||||
predictions_list = []
|
||||
|
||||
for start in range(start_index, n - train_days - test_days + 1, test_days):
|
||||
|
||||
train_dates = unique_dates[start: start + train_days]
|
||||
test_dates = unique_dates[start + train_days: start + train_days + test_days]
|
||||
|
||||
# 根据日期筛选数据
|
||||
train_data = df[filter_index & df['trade_date'].isin(train_dates)]
|
||||
test_data = df[df['trade_date'].isin(test_dates)]
|
||||
|
||||
train_data = train_data.sort_values('trade_date')
|
||||
test_data = test_data.sort_values('trade_date')
|
||||
|
||||
# feature_columns, _ = remove_shifted_features(train_data, feature_columns_origin, size=0.8)
|
||||
|
||||
train_data = train_data.dropna(subset=feature_columns)
|
||||
train_data = train_data.dropna(subset=['label'])
|
||||
train_data = train_data.reset_index(drop=True)
|
||||
|
||||
# print(test_data.tail())
|
||||
test_data = test_data.dropna(subset=feature_columns)
|
||||
# test_data = test_data.dropna(subset=['label'])
|
||||
test_data = test_data.reset_index(drop=True)
|
||||
|
||||
# print(len(train_data))
|
||||
print(f"最小日期: {train_data['trade_date'].min().strftime('%Y-%m-%d')}")
|
||||
print(f"最大日期: {train_data['trade_date'].max().strftime('%Y-%m-%d')}")
|
||||
# print(len(test_data))
|
||||
print(f"最小日期: {test_data['trade_date'].min().strftime('%Y-%m-%d')}")
|
||||
print(f"最大日期: {test_data['trade_date'].max().strftime('%Y-%m-%d')}")
|
||||
|
||||
cat_columns = [col for col in df.columns if col.startswith('cat')]
|
||||
for col in cat_columns:
|
||||
train_data[col] = train_data[col].astype('category')
|
||||
test_data[col] = test_data[col].astype('category')
|
||||
|
||||
label_gain = list(range(len(train_data['label'].unique())))
|
||||
label_gain = [(gain + 1) * (gain + 1) for gain in label_gain]
|
||||
light_params['label_gain'] = label_gain
|
||||
|
||||
# ud = train_data["trade_date"].unique()
|
||||
# date_weights = {date: weight for date, weight in zip(ud, np.linspace(1, 2, len(unique_dates)))}
|
||||
# light_params['weight'] = train_data["trade_date"].map(date_weights).tolist()
|
||||
|
||||
# print(f'feature_columns: {feature_columns}')
|
||||
# feature_contri = [2 if feat.startswith('act_factor') else 1 for feat in feature_columns]
|
||||
# light_params['feature_contri'] = feature_contri
|
||||
model, _, _ = train_light_model(train_data.dropna(subset=['label']),
|
||||
light_params, feature_columns,
|
||||
[lgb.log_evaluation(period=100),
|
||||
lgb.callback.record_evaluation(evals),
|
||||
lgb.early_stopping(100, first_metric_only=True)
|
||||
], evals,
|
||||
num_boost_round=3000, validation_days=validation_days,
|
||||
print_feature_importance=False, use_pca=False)
|
||||
|
||||
score_df = test_data.copy()
|
||||
score_df['score'] = model.predict(score_df[feature_columns])
|
||||
score_df = score_df.loc[score_df.groupby('trade_date')['score'].idxmax()]
|
||||
score_df = score_df[['trade_date', 'score', 'ts_code']]
|
||||
predictions_list.append(score_df)
|
||||
|
||||
# m = 5
|
||||
# all_data = []
|
||||
# for i, trade_date in enumerate(sorted(score_df['trade_date'].unique().tolist())):
|
||||
# # 提取当前日期的数据
|
||||
# current_data = score_df[score_df['trade_date'] == trade_date]
|
||||
# all_data.append(current_data)
|
||||
#
|
||||
# numeric_columns = [col for col in feature_columns if col in current_data.select_dtypes(include=['float64', 'int64']).columns]
|
||||
# current_data = cross_sectional_standardization(current_data, numeric_columns)
|
||||
# current_data['score'] = model.predict(current_data[feature_columns])
|
||||
# daily_top_score = current_data.loc[[current_data['score'].idxmax()]]
|
||||
# predictions_list.append(daily_top_score[['trade_date', 'score', 'ts_code']])
|
||||
#
|
||||
# if i % m == 0:
|
||||
# train_data_split = pd.concat(all_data)
|
||||
# train_data_split = train_data_split.dropna(subset=['label'])
|
||||
#
|
||||
# X_train = train_data_split[feature_columns]
|
||||
# y_train = train_data_split['label']
|
||||
#
|
||||
# train_groups = train_data_split.groupby('trade_date').size().tolist()
|
||||
# categorical_feature = [col for col in feature_columns if 'cat' in col]
|
||||
#
|
||||
# train_dataset = lgb.Dataset(
|
||||
# X_train, label=y_train, group=train_groups,
|
||||
# categorical_feature=categorical_feature
|
||||
# )
|
||||
#
|
||||
# model = lgb.train(
|
||||
# light_params, train_dataset, num_boost_round=36,
|
||||
# init_model=model
|
||||
# )
|
||||
# all_data = []
|
||||
|
||||
final_predictions = pd.concat(predictions_list, ignore_index=True)
|
||||
return final_predictions
|
||||
|
||||
|
||||
# In[129]:
|
||||
|
||||
|
||||
gc.collect()
|
||||
|
||||
print(df[df['ts_code'] == '000001.SZ'].tail(1)[['act_factor1', 'act_factor2']])
|
||||
print('finish')
|
||||
# qdf = qdf[qdf['trade_date'] >= '2022-01-01']
|
||||
|
||||
final_predictions = rolling_train_predict(pdf[pdf['trade_date'] >= '2020-01-01'], 500, 20, feature_columns,
|
||||
days=days, validation_days=60, filter_index=filter_index)
|
||||
final_predictions.to_csv('predictions_test.tsv', index=False)
|
||||
|
||||
|
||||
# In[126]:
|
||||
|
||||
|
||||
print(df[df['ts_code'] == '000001.SZ'].tail(1)[['act_factor1', 'act_factor2']])
|
||||
print('finish')
|
||||
|
||||
|
||||
# In[29]:
|
||||
|
||||
|
||||
train_data = pdf[filter_index & (pdf['trade_date'] == '2023-01-03')]
|
||||
train_data = train_data.dropna(subset=['label'])
|
||||
train_data = train_data.reset_index(drop=True)
|
||||
print(len(train_data))
|
||||
|
||||
|
||||
# In[34]:
|
||||
|
||||
|
||||
# filter_index = pdf['future_return'].between(pdf['future_return'].quantile(0.01), pdf['future_return'].quantile(0.99))
|
||||
|
||||
train_data = pdf[filter_index & (pdf['trade_date'] == '2023-01-03')]
|
||||
print(len(train_data))
|
||||
|
||||
3543
main/train/RollingRankCopy.ipynb
Normal file
3543
main/train/RollingRankCopy.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
1717
main/train/TRank.ipynb
Normal file
1717
main/train/TRank.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
1362
main/train/Transformer.ipynb
Normal file
1362
main/train/Transformer.ipynb
Normal file
File diff suppressed because one or more lines are too long
1406
main/train/UpdateClassify.ipynb
Normal file
1406
main/train/UpdateClassify.ipynb
Normal file
File diff suppressed because one or more lines are too long
1566
main/train/UpdateRank.ipynb
Normal file
1566
main/train/UpdateRank.ipynb
Normal file
File diff suppressed because one or more lines are too long
1138
main/train/UpdateRegression.ipynb
Normal file
1138
main/train/UpdateRegression.ipynb
Normal file
File diff suppressed because one or more lines are too long
1387
main/train/UpdateSGD.ipynb
Normal file
1387
main/train/UpdateSGD.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
896
main/train/V1-copy.ipynb
Normal file
896
main/train/V1-copy.ipynb
Normal file
@@ -0,0 +1,896 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-02-09T14:52:54.170824Z",
|
||||
"start_time": "2025-02-09T14:52:53.544850Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"%load_ext autoreload\n",
|
||||
"%autoreload 2\n",
|
||||
"\n",
|
||||
"from code.utils.utils import read_and_merge_h5_data"
|
||||
],
|
||||
"id": "79a7758178bafdd3",
|
||||
"outputs": [],
|
||||
"execution_count": 1
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-02-09T14:53:36.873700Z",
|
||||
"start_time": "2025-02-09T14:52:54.170824Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"print('daily data')\n",
|
||||
"df = read_and_merge_h5_data('../../data/daily_data.h5', key='daily_data',\n",
|
||||
" columns=['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol'],\n",
|
||||
" df=None)\n",
|
||||
"\n",
|
||||
"print('daily basic')\n",
|
||||
"df = read_and_merge_h5_data('../../data/daily_basic.h5', key='daily_basic_with_st',\n",
|
||||
" columns=['ts_code', 'trade_date', 'turnover_rate', 'pe_ttm', 'circ_mv', 'volume_ratio',\n",
|
||||
" 'is_st'], df=df)\n",
|
||||
"\n",
|
||||
"print('stk limit')\n",
|
||||
"df = read_and_merge_h5_data('../../data/stk_limit.h5', key='stk_limit',\n",
|
||||
" columns=['ts_code', 'trade_date', 'pre_close', 'up_limit', 'down_limit'],\n",
|
||||
" df=df)\n",
|
||||
"print('money flow')\n",
|
||||
"df = read_and_merge_h5_data('../../data/money_flow.h5', key='money_flow',\n",
|
||||
" columns=['ts_code', 'trade_date', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol',\n",
|
||||
" 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol'],\n",
|
||||
" df=df)"
|
||||
],
|
||||
"id": "a79cafb06a7e0e43",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"daily data\n",
|
||||
"daily basic\n",
|
||||
"stk limit\n",
|
||||
"money flow\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 2
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-02-09T14:53:37.426404Z",
|
||||
"start_time": "2025-02-09T14:53:36.955552Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": "origin_columns = df.columns.tolist()",
|
||||
"id": "c4e9e1d31da6dba6",
|
||||
"outputs": [],
|
||||
"execution_count": 3
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-02-09T14:53:38.164112Z",
|
||||
"start_time": "2025-02-09T14:53:38.070007Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"import talib\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_technical_factor(df):\n",
|
||||
" df['up'] = (df['high'] - df[['close', 'open']].max(axis=1)) / df['close']\n",
|
||||
" df['down'] = (df[['close', 'open']].min(axis=1) - df['low']) / df['close']\n",
|
||||
"\n",
|
||||
" df['atr_14'] = talib.ATR(df['high'], df['low'], df['close'], timeperiod=14)\n",
|
||||
" df['atr_6'] = talib.ATR(df['high'], df['low'], df['close'], timeperiod=6)\n",
|
||||
"\n",
|
||||
" df['obv'] = talib.OBV(df['close'], df['vol'])\n",
|
||||
" df['maobv_6'] = talib.SMA(df['obv'], timeperiod=6)\n",
|
||||
" df['obv-maobv_6'] = df['obv'] - df['maobv_6']\n",
|
||||
"\n",
|
||||
" df['rsi_3'] = talib.RSI(df['close'], timeperiod=3)\n",
|
||||
" df['rsi_6'] = talib.RSI(df['close'], timeperiod=6)\n",
|
||||
" df['rsi_9'] = talib.RSI(df['close'], timeperiod=9)\n",
|
||||
"\n",
|
||||
" df['return_10'] = df['close'] / df['close'].shift(10) - 1\n",
|
||||
" df['return_20'] = df['close'] / df['close'].shift(20) - 1\n",
|
||||
"\n",
|
||||
" # # 计算 _rank_return_10 和 _rank_return_20\n",
|
||||
" # df['_rank_return_10'] = df['return_10'].rank(pct=True)\n",
|
||||
" # df['_rank_return_20'] = df['return_20'].rank(pct=True)\n",
|
||||
"\n",
|
||||
" # 计算 avg_close_5\n",
|
||||
" df['avg_close_5'] = df['close'].rolling(window=5).mean() / df['close']\n",
|
||||
"\n",
|
||||
" # 计算 std_return_5, std_return_15, std_return_25, std_return_252, std_return_2522\n",
|
||||
" df['std_return_5'] = df['close'].pct_change().shift(-1).rolling(window=5).std()\n",
|
||||
" df['std_return_15'] = df['close'].pct_change().shift(-1).rolling(window=15).std()\n",
|
||||
" df['std_return_25'] = df['close'].pct_change().shift(-1).rolling(window=25).std()\n",
|
||||
" df['std_return_90'] = df['close'].pct_change().shift(-1).rolling(window=90).std()\n",
|
||||
" df['std_return_90_2'] = df['close'].shift(10).pct_change().shift(-1).rolling(window=90).std()\n",
|
||||
"\n",
|
||||
" # 计算 std_return_5 / std_return_252 和 std_return_5 / std_return_25\n",
|
||||
" df['std_return_5 / std_return_90'] = df['std_return_5'] / df['std_return_90']\n",
|
||||
" df['std_return_5 / std_return_25'] = df['std_return_5'] / df['std_return_25']\n",
|
||||
"\n",
|
||||
" # 计算 std_return_252 - std_return_2522\n",
|
||||
" df['std_return_90 - std_return_90_2'] = df['std_return_90'] - df['std_return_90_2']\n",
|
||||
" return df\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_act_factor(df):\n",
|
||||
" # 计算 m_ta_ema(close, 5), m_ta_ema(close, 13), m_ta_ema(close, 20), m_ta_ema(close, 60)\n",
|
||||
" df['ema_5'] = talib.EMA(df['close'], timeperiod=5)\n",
|
||||
" df['ema_13'] = talib.EMA(df['close'], timeperiod=13)\n",
|
||||
" df['ema_20'] = talib.EMA(df['close'], timeperiod=20)\n",
|
||||
" df['ema_60'] = talib.EMA(df['close'], timeperiod=60)\n",
|
||||
"\n",
|
||||
" # 计算 act_factor1, act_factor2, act_factor3, act_factor4\n",
|
||||
" df['act_factor1'] = np.arctan((df['ema_5'] / df['ema_5'].shift(1) - 1) * 100) * 57.3 / 50\n",
|
||||
" df['act_factor2'] = np.arctan((df['ema_13'] / df['ema_13'].shift(1) - 1) * 100) * 57.3 / 40\n",
|
||||
" df['act_factor3'] = np.arctan((df['ema_20'] / df['ema_20'].shift(1) - 1) * 100) * 57.3 / 21\n",
|
||||
" df['act_factor4'] = np.arctan((df['ema_60'] / df['ema_60'].shift(1) - 1) * 100) * 57.3 / 10\n",
|
||||
"\n",
|
||||
" # 计算 act_factor5 和 act_factor6\n",
|
||||
" df['act_factor5'] = df['act_factor1'] + df['act_factor2'] + df['act_factor3'] + df['act_factor4']\n",
|
||||
" df['act_factor6'] = (df['act_factor1'] - df['act_factor2']) / np.sqrt(\n",
|
||||
" df['act_factor1'] ** 2 + df['act_factor2'] ** 2)\n",
|
||||
"\n",
|
||||
" # 根据 'trade_date' 进行分组,在每个组内分别计算 'act_factor1', 'act_factor2', 'act_factor3' 的排名\n",
|
||||
" df['rank_act_factor1'] = df.groupby('trade_date')['act_factor1'].rank(ascending=False, pct=True)\n",
|
||||
" df['rank_act_factor2'] = df.groupby('trade_date')['act_factor2'].rank(ascending=False, pct=True)\n",
|
||||
" df['rank_act_factor3'] = df.groupby('trade_date')['act_factor3'].rank(ascending=False, pct=True)\n",
|
||||
"\n",
|
||||
" return df\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_money_flow_factor(df):\n",
|
||||
" df['active_buy_volume_large'] = df['buy_lg_vol'] / df['net_mf_vol']\n",
|
||||
" df['active_buy_volume_big'] = df['buy_elg_vol'] / df['net_mf_vol']\n",
|
||||
" df['active_buy_volume_small'] = df['buy_sm_vol'] / df['net_mf_vol']\n",
|
||||
"\n",
|
||||
" df['buy_lg_vol - sell_lg_vol'] = (df['buy_lg_vol'] - df['sell_lg_vol']) / df['net_mf_vol']\n",
|
||||
" df['buy_elg_vol - sell_elg_vol'] = (df['buy_elg_vol'] - df['sell_elg_vol']) / df['net_mf_vol']\n",
|
||||
"\n",
|
||||
" # # 你还提到了一些其他字段:\n",
|
||||
" # df['net_active_buy_volume_main'] = df['net_mf_vol'] / df['buy_sm_vol']\n",
|
||||
" # df['netflow_amount_main'] = df['net_mf_vol'] / df['buy_sm_vol'] # 这里假设 'net_mf_vol' 是主流资金流\n",
|
||||
"\n",
|
||||
" # df['active_sell_volume_large'] = df['sell_lg_vol'] / df['sell_sm_vol']\n",
|
||||
" # df['active_sell_volume_big'] = df['sell_elg_vol'] / df['sell_sm_vol']\n",
|
||||
" # df['active_sell_volume_small'] = df['sell_sm_vol'] / df['sell_sm_vol']\n",
|
||||
"\n",
|
||||
" return df\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_alpha_factor(df):\n",
|
||||
" df['alpha_022'] = df['close'] - df['close'].shift(5)\n",
|
||||
"\n",
|
||||
" # alpha_003: (close - open) / (high - low)\n",
|
||||
" df['alpha_003'] = (df['close'] - df['open']) / (df['high'] - df['low'])\n",
|
||||
"\n",
|
||||
" # alpha_007: rank(correlation(close, volume, 5))\n",
|
||||
" df['alpha_007'] = df['close'].rolling(5).corr(df['vol']).rank(axis=1)\n",
|
||||
"\n",
|
||||
" # alpha_013: rank(sum(close, 5) - sum(close, 20))\n",
|
||||
" df['alpha_013'] = (df['close'].rolling(5).sum() - df['close'].rolling(20).sum()).rank(axis=1)\n",
|
||||
" return df\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_future_data(df):\n",
|
||||
" df['future_return1'] = (df['close'].shift(-1) - df['close']) / df['close']\n",
|
||||
" df['future_return2'] = (df['open'].shift(-2) - df['open'].shift(-1)) / df['open'].shift(-1)\n",
|
||||
" df['future_return3'] = (df['close'].shift(-2) - df['close'].shift(-1)) / df['close'].shift(-1)\n",
|
||||
" df['future_return4'] = (df['close'].shift(-2) - df['open'].shift(-1)) / df['open'].shift(-1)\n",
|
||||
" df['future_return5'] = (df['close'].shift(-5) - df['open'].shift(-1)) / df['open'].shift(-1)\n",
|
||||
" df['future_return6'] = (df['close'].shift(-10) - df['open'].shift(-1)) / df['open'].shift(-1)\n",
|
||||
" df['future_return7'] = (df['close'].shift(-20) - df['open'].shift(-1)) / df['open'].shift(-1)\n",
|
||||
" df['future_close1'] = (df['close'].shift(-1) - df['close']) / df['close']\n",
|
||||
" df['future_close2'] = (df['close'].shift(-2) - df['close']) / df['close']\n",
|
||||
" df['future_close3'] = (df['close'].shift(-3) - df['close']) / df['close']\n",
|
||||
" df['future_close4'] = (df['close'].shift(-4) - df['close']) / df['close']\n",
|
||||
" df['future_close5'] = (df['close'].shift(-5) - df['close']) / df['close']\n",
|
||||
" df['future_af11'] = df['act_factor1'].shift(-1)\n",
|
||||
" df['future_af12'] = df['act_factor1'].shift(-2)\n",
|
||||
" df['future_af13'] = df['act_factor1'].shift(-3)\n",
|
||||
" df['future_af14'] = df['act_factor1'].shift(-4)\n",
|
||||
" df['future_af15'] = df['act_factor1'].shift(-5)\n",
|
||||
" df['future_af21'] = df['act_factor2'].shift(-1)\n",
|
||||
" df['future_af22'] = df['act_factor2'].shift(-2)\n",
|
||||
" df['future_af23'] = df['act_factor2'].shift(-3)\n",
|
||||
" df['future_af24'] = df['act_factor2'].shift(-4)\n",
|
||||
" df['future_af25'] = df['act_factor2'].shift(-5)\n",
|
||||
" df['future_af31'] = df['act_factor3'].shift(-1)\n",
|
||||
" df['future_af32'] = df['act_factor3'].shift(-2)\n",
|
||||
" df['future_af33'] = df['act_factor3'].shift(-3)\n",
|
||||
" df['future_af34'] = df['act_factor3'].shift(-4)\n",
|
||||
" df['future_af35'] = df['act_factor3'].shift(-5)\n",
|
||||
"\n",
|
||||
" return df\n"
|
||||
],
|
||||
"id": "a735bc02ceb4d872",
|
||||
"outputs": [],
|
||||
"execution_count": 4
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-02-09T14:53:49.153376Z",
|
||||
"start_time": "2025-02-09T14:53:38.164112Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"df = get_technical_factor(df)\n",
|
||||
"df = get_act_factor(df)\n",
|
||||
"df = get_money_flow_factor(df)\n",
|
||||
"df = get_future_data(df)\n",
|
||||
"# df = df.drop(columns=origin_columns)\n",
|
||||
"\n",
|
||||
"print(df.info())"
|
||||
],
|
||||
"id": "53f86ddc0677a6d7",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"RangeIndex: 8364308 entries, 0 to 8364307\n",
|
||||
"Data columns (total 83 columns):\n",
|
||||
" # Column Dtype \n",
|
||||
"--- ------ ----- \n",
|
||||
" 0 ts_code object \n",
|
||||
" 1 trade_date datetime64[ns]\n",
|
||||
" 2 open float64 \n",
|
||||
" 3 close float64 \n",
|
||||
" 4 high float64 \n",
|
||||
" 5 low float64 \n",
|
||||
" 6 vol float64 \n",
|
||||
" 7 is_st object \n",
|
||||
" 8 up_limit float64 \n",
|
||||
" 9 down_limit float64 \n",
|
||||
" 10 buy_sm_vol float64 \n",
|
||||
" 11 sell_sm_vol float64 \n",
|
||||
" 12 buy_lg_vol float64 \n",
|
||||
" 13 sell_lg_vol float64 \n",
|
||||
" 14 buy_elg_vol float64 \n",
|
||||
" 15 sell_elg_vol float64 \n",
|
||||
" 16 net_mf_vol float64 \n",
|
||||
" 17 up float64 \n",
|
||||
" 18 down float64 \n",
|
||||
" 19 atr_14 float64 \n",
|
||||
" 20 atr_6 float64 \n",
|
||||
" 21 obv float64 \n",
|
||||
" 22 maobv_6 float64 \n",
|
||||
" 23 obv-maobv_6 float64 \n",
|
||||
" 24 rsi_3 float64 \n",
|
||||
" 25 rsi_6 float64 \n",
|
||||
" 26 rsi_9 float64 \n",
|
||||
" 27 return_10 float64 \n",
|
||||
" 28 return_20 float64 \n",
|
||||
" 29 avg_close_5 float64 \n",
|
||||
" 30 std_return_5 float64 \n",
|
||||
" 31 std_return_15 float64 \n",
|
||||
" 32 std_return_25 float64 \n",
|
||||
" 33 std_return_90 float64 \n",
|
||||
" 34 std_return_90_2 float64 \n",
|
||||
" 35 std_return_5 / std_return_90 float64 \n",
|
||||
" 36 std_return_5 / std_return_25 float64 \n",
|
||||
" 37 std_return_90 - std_return_90_2 float64 \n",
|
||||
" 38 ema_5 float64 \n",
|
||||
" 39 ema_13 float64 \n",
|
||||
" 40 ema_20 float64 \n",
|
||||
" 41 ema_60 float64 \n",
|
||||
" 42 act_factor1 float64 \n",
|
||||
" 43 act_factor2 float64 \n",
|
||||
" 44 act_factor3 float64 \n",
|
||||
" 45 act_factor4 float64 \n",
|
||||
" 46 act_factor5 float64 \n",
|
||||
" 47 act_factor6 float64 \n",
|
||||
" 48 rank_act_factor1 float64 \n",
|
||||
" 49 rank_act_factor2 float64 \n",
|
||||
" 50 rank_act_factor3 float64 \n",
|
||||
" 51 active_buy_volume_large float64 \n",
|
||||
" 52 active_buy_volume_big float64 \n",
|
||||
" 53 active_buy_volume_small float64 \n",
|
||||
" 54 buy_lg_vol - sell_lg_vol float64 \n",
|
||||
" 55 buy_elg_vol - sell_elg_vol float64 \n",
|
||||
" 56 future_return1 float64 \n",
|
||||
" 57 future_return2 float64 \n",
|
||||
" 58 future_return3 float64 \n",
|
||||
" 59 future_return4 float64 \n",
|
||||
" 60 future_return5 float64 \n",
|
||||
" 61 future_return6 float64 \n",
|
||||
" 62 future_return7 float64 \n",
|
||||
" 63 future_close1 float64 \n",
|
||||
" 64 future_close2 float64 \n",
|
||||
" 65 future_close3 float64 \n",
|
||||
" 66 future_close4 float64 \n",
|
||||
" 67 future_close5 float64 \n",
|
||||
" 68 future_af11 float64 \n",
|
||||
" 69 future_af12 float64 \n",
|
||||
" 70 future_af13 float64 \n",
|
||||
" 71 future_af14 float64 \n",
|
||||
" 72 future_af15 float64 \n",
|
||||
" 73 future_af21 float64 \n",
|
||||
" 74 future_af22 float64 \n",
|
||||
" 75 future_af23 float64 \n",
|
||||
" 76 future_af24 float64 \n",
|
||||
" 77 future_af25 float64 \n",
|
||||
" 78 future_af31 float64 \n",
|
||||
" 79 future_af32 float64 \n",
|
||||
" 80 future_af33 float64 \n",
|
||||
" 81 future_af34 float64 \n",
|
||||
" 82 future_af35 float64 \n",
|
||||
"dtypes: datetime64[ns](1), float64(80), object(2)\n",
|
||||
"memory usage: 5.2+ GB\n",
|
||||
"None\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 5
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-02-09T14:55:28.712343Z",
|
||||
"start_time": "2025-02-09T14:53:49.279168Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"def filter_data(df):\n",
|
||||
" df = df.groupby('trade_date').apply(lambda x: x.nlargest(1000, 'act_factor3'))\n",
|
||||
" df = df[df['is_st'] == False]\n",
|
||||
" df = df[df['is_st'] == False]\n",
|
||||
" df = df[~df['ts_code'].str.startswith('30')]\n",
|
||||
" df = df[~df['ts_code'].str.startswith('68')]\n",
|
||||
" df = df[~df['ts_code'].str.startswith('8')]\n",
|
||||
" df = df.reset_index(drop=True)\n",
|
||||
" return df\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"df = filter_data(df)\n",
|
||||
"print(df.info())"
|
||||
],
|
||||
"id": "dbe2fd8021b9417f",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"RangeIndex: 1136157 entries, 0 to 1136156\n",
|
||||
"Data columns (total 83 columns):\n",
|
||||
" # Column Non-Null Count Dtype \n",
|
||||
"--- ------ -------------- ----- \n",
|
||||
" 0 ts_code 1136157 non-null object \n",
|
||||
" 1 trade_date 1136157 non-null datetime64[ns]\n",
|
||||
" 2 open 1136157 non-null float64 \n",
|
||||
" 3 close 1136157 non-null float64 \n",
|
||||
" 4 high 1136157 non-null float64 \n",
|
||||
" 5 low 1136157 non-null float64 \n",
|
||||
" 6 vol 1136157 non-null float64 \n",
|
||||
" 7 is_st 1136157 non-null object \n",
|
||||
" 8 up_limit 1135878 non-null float64 \n",
|
||||
" 9 down_limit 1135878 non-null float64 \n",
|
||||
" 10 buy_sm_vol 1135663 non-null float64 \n",
|
||||
" 11 sell_sm_vol 1135663 non-null float64 \n",
|
||||
" 12 buy_lg_vol 1135663 non-null float64 \n",
|
||||
" 13 sell_lg_vol 1135663 non-null float64 \n",
|
||||
" 14 buy_elg_vol 1135663 non-null float64 \n",
|
||||
" 15 sell_elg_vol 1135663 non-null float64 \n",
|
||||
" 16 net_mf_vol 1135663 non-null float64 \n",
|
||||
" 17 up 1136157 non-null float64 \n",
|
||||
" 18 down 1136157 non-null float64 \n",
|
||||
" 19 atr_14 1136157 non-null float64 \n",
|
||||
" 20 atr_6 1136157 non-null float64 \n",
|
||||
" 21 obv 1136157 non-null float64 \n",
|
||||
" 22 maobv_6 1136157 non-null float64 \n",
|
||||
" 23 obv-maobv_6 1136157 non-null float64 \n",
|
||||
" 24 rsi_3 1136157 non-null float64 \n",
|
||||
" 25 rsi_6 1136157 non-null float64 \n",
|
||||
" 26 rsi_9 1136157 non-null float64 \n",
|
||||
" 27 return_10 1136157 non-null float64 \n",
|
||||
" 28 return_20 1136157 non-null float64 \n",
|
||||
" 29 avg_close_5 1136157 non-null float64 \n",
|
||||
" 30 std_return_5 1136157 non-null float64 \n",
|
||||
" 31 std_return_15 1136157 non-null float64 \n",
|
||||
" 32 std_return_25 1136157 non-null float64 \n",
|
||||
" 33 std_return_90 1136131 non-null float64 \n",
|
||||
" 34 std_return_90_2 1136129 non-null float64 \n",
|
||||
" 35 std_return_5 / std_return_90 1136131 non-null float64 \n",
|
||||
" 36 std_return_5 / std_return_25 1136157 non-null float64 \n",
|
||||
" 37 std_return_90 - std_return_90_2 1136129 non-null float64 \n",
|
||||
" 38 ema_5 1136157 non-null float64 \n",
|
||||
" 39 ema_13 1136157 non-null float64 \n",
|
||||
" 40 ema_20 1136157 non-null float64 \n",
|
||||
" 41 ema_60 1136153 non-null float64 \n",
|
||||
" 42 act_factor1 1136157 non-null float64 \n",
|
||||
" 43 act_factor2 1136157 non-null float64 \n",
|
||||
" 44 act_factor3 1136157 non-null float64 \n",
|
||||
" 45 act_factor4 1136152 non-null float64 \n",
|
||||
" 46 act_factor5 1136152 non-null float64 \n",
|
||||
" 47 act_factor6 1136157 non-null float64 \n",
|
||||
" 48 rank_act_factor1 1136157 non-null float64 \n",
|
||||
" 49 rank_act_factor2 1136157 non-null float64 \n",
|
||||
" 50 rank_act_factor3 1136157 non-null float64 \n",
|
||||
" 51 active_buy_volume_large 1135659 non-null float64 \n",
|
||||
" 52 active_buy_volume_big 1135636 non-null float64 \n",
|
||||
" 53 active_buy_volume_small 1135663 non-null float64 \n",
|
||||
" 54 buy_lg_vol - sell_lg_vol 1135660 non-null float64 \n",
|
||||
" 55 buy_elg_vol - sell_elg_vol 1135640 non-null float64 \n",
|
||||
" 56 future_return1 1136157 non-null float64 \n",
|
||||
" 57 future_return2 1136157 non-null float64 \n",
|
||||
" 58 future_return3 1136157 non-null float64 \n",
|
||||
" 59 future_return4 1136157 non-null float64 \n",
|
||||
" 60 future_return5 1136157 non-null float64 \n",
|
||||
" 61 future_return6 1136157 non-null float64 \n",
|
||||
" 62 future_return7 1136157 non-null float64 \n",
|
||||
" 63 future_close1 1136157 non-null float64 \n",
|
||||
" 64 future_close2 1136157 non-null float64 \n",
|
||||
" 65 future_close3 1136157 non-null float64 \n",
|
||||
" 66 future_close4 1136157 non-null float64 \n",
|
||||
" 67 future_close5 1136157 non-null float64 \n",
|
||||
" 68 future_af11 1136157 non-null float64 \n",
|
||||
" 69 future_af12 1136157 non-null float64 \n",
|
||||
" 70 future_af13 1136157 non-null float64 \n",
|
||||
" 71 future_af14 1136157 non-null float64 \n",
|
||||
" 72 future_af15 1136157 non-null float64 \n",
|
||||
" 73 future_af21 1136157 non-null float64 \n",
|
||||
" 74 future_af22 1136157 non-null float64 \n",
|
||||
" 75 future_af23 1136157 non-null float64 \n",
|
||||
" 76 future_af24 1136157 non-null float64 \n",
|
||||
" 77 future_af25 1136157 non-null float64 \n",
|
||||
" 78 future_af31 1136157 non-null float64 \n",
|
||||
" 79 future_af32 1136157 non-null float64 \n",
|
||||
" 80 future_af33 1136157 non-null float64 \n",
|
||||
" 81 future_af34 1136157 non-null float64 \n",
|
||||
" 82 future_af35 1136157 non-null float64 \n",
|
||||
"dtypes: datetime64[ns](1), float64(80), object(2)\n",
|
||||
"memory usage: 719.5+ MB\n",
|
||||
"None\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 6
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-02-09T15:00:45.828404Z",
|
||||
"start_time": "2025-02-09T15:00:45.294830Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"def remove_outliers_iqr(series, lower_quantile=0.05, upper_quantile=0.95, threshold=1.5):\n",
|
||||
" Q1 = series.quantile(lower_quantile)\n",
|
||||
" Q3 = series.quantile(upper_quantile)\n",
|
||||
" IQR = Q3 - Q1\n",
|
||||
" lower_bound = Q1 - threshold * IQR\n",
|
||||
" upper_bound = Q3 + threshold * IQR\n",
|
||||
" # 过滤掉低于下边界或高于上边界的极值\n",
|
||||
" return (series >= lower_bound) & (series <= upper_bound)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def neutralize_labels(labels, features, feature_columns, z_threshold=3, method='regression'):\n",
|
||||
" labels_no_outliers = remove_outliers_iqr(labels)\n",
|
||||
" return labels_no_outliers\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"train_data = df[df['trade_date'] <= '2023-01-01']\n",
|
||||
"test_data = df[df['trade_date'] >= '2023-01-01']\n",
|
||||
"\n",
|
||||
"feature_columns = [col for col in df.columns if col not in ['trade_date',\n",
|
||||
" 'ts_code',\n",
|
||||
" 'label']]\n",
|
||||
"feature_columns = [col for col in feature_columns if 'future' not in col]\n",
|
||||
"feature_columns = [col for col in feature_columns if 'score' not in col]\n",
|
||||
"feature_columns = [col for col in feature_columns if col not in origin_columns]\n",
|
||||
"\n",
|
||||
"# for column in [column for column in train_data.columns if 'future' in column]:\n",
|
||||
"# label_index = neutralize_labels(train_data[column], train_data, feature_columns, z_threshold=3, method='regression')\n",
|
||||
"# train_data = train_data[label_index]\n",
|
||||
"# label_index = neutralize_labels(test_data[column], test_data, feature_columns, z_threshold=3, method='regression')\n",
|
||||
"# test_data = test_data[label_index]\n",
|
||||
"\n",
|
||||
"print(len(train_data))\n",
|
||||
"print(len(test_data))"
|
||||
],
|
||||
"id": "5f3d9aece75318cd",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"['up', 'down', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'obv-maobv_6', 'rsi_3', 'rsi_6', 'rsi_9', 'return_10', 'return_20', 'avg_close_5', 'std_return_5', 'std_return_15', 'std_return_25', 'std_return_90', 'std_return_90_2', 'std_return_5 / std_return_90', 'std_return_5 / std_return_25', 'std_return_90 - std_return_90_2', 'ema_5', 'ema_13', 'ema_20', 'ema_60', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4', 'act_factor5', 'act_factor6', 'rank_act_factor1', 'rank_act_factor2', 'rank_act_factor3', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'buy_lg_vol - sell_lg_vol', 'buy_elg_vol - sell_elg_vol']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 19
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-02-09T14:56:05.319915Z",
|
||||
"start_time": "2025-02-09T14:56:03.355725Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"def get_qcuts(series, quantiles):\n",
|
||||
" q = pd.qcut(series, q=quantiles, labels=False, duplicates='drop')\n",
|
||||
" return q[-1] # 返回窗口最后一个元素的分位数标签\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"window = 5\n",
|
||||
"quantiles = 20\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_label(df):\n",
|
||||
" labels = df['future_af13'] - df['act_factor1']\n",
|
||||
" # labels = df['future_close3']\n",
|
||||
" return labels\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"train_data['label'], test_data['label'] = get_label(train_data), get_label(test_data)\n",
|
||||
"\n",
|
||||
"train_data, test_data = train_data.dropna(subset=['label']), test_data.dropna(subset=['label'])\n",
|
||||
"train_data, test_data = train_data.replace([np.inf, -np.inf], np.nan).dropna(), test_data.replace([np.inf, -np.inf],\n",
|
||||
" np.nan).dropna()\n",
|
||||
"train_data, test_data = train_data.reset_index(drop=True), test_data.reset_index(drop=True)\n",
|
||||
"\n",
|
||||
"print(len(train_data))\n",
|
||||
"print(len(test_data))"
|
||||
],
|
||||
"id": "f4f16d63ad18d1bc",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"875004\n",
|
||||
"最小日期: 2017-01-03\n",
|
||||
"最大日期: 2022-12-30\n",
|
||||
"260581\n",
|
||||
"最小日期: 2023-01-03\n",
|
||||
"最大日期: 2025-01-27\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 13
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-02-09T14:56:05.480695Z",
|
||||
"start_time": "2025-02-09T14:56:05.367238Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import lightgbm as lgb\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import optuna\n",
|
||||
"from sklearn.model_selection import KFold\n",
|
||||
"from sklearn.metrics import mean_absolute_error\n",
|
||||
"import os\n",
|
||||
"import json\n",
|
||||
"import pickle\n",
|
||||
"import hashlib\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def objective(trial, X, y, num_boost_round, params):\n",
|
||||
" # 参数网格\n",
|
||||
" X, y = X.reset_index(drop=True), y.reset_index(drop=True)\n",
|
||||
" param_grid = {\n",
|
||||
" \"n_estimators\": trial.suggest_categorical(\"n_estimators\", [10000]),\n",
|
||||
" \"learning_rate\": trial.suggest_float(\"learning_rate\", 0.01, 0.3),\n",
|
||||
" \"num_leaves\": trial.suggest_int(\"num_leaves\", 20, 3000, step=25),\n",
|
||||
" \"max_depth\": trial.suggest_int(\"max_depth\", 3, 16),\n",
|
||||
" \"min_data_in_leaf\": trial.suggest_int(\"min_data_in_leaf\", 200, 10000, step=100),\n",
|
||||
" \"lambda_l1\": trial.suggest_int(\"lambda_l1\", 0, 100, step=5),\n",
|
||||
" \"lambda_l2\": trial.suggest_int(\"lambda_l2\", 0, 100, step=5),\n",
|
||||
" \"min_gain_to_split\": trial.suggest_float(\"min_gain_to_split\", 0, 15),\n",
|
||||
" \"bagging_fraction\": trial.suggest_float(\"bagging_fraction\", 0.2, 0.95, step=0.1),\n",
|
||||
" \"bagging_freq\": trial.suggest_categorical(\"bagging_freq\", [1]),\n",
|
||||
" \"feature_fraction\": trial.suggest_float(\"feature_fraction\", 0.2, 0.95, step=0.1),\n",
|
||||
" \"random_state\": 1,\n",
|
||||
" \"objective\": 'regression',\n",
|
||||
" 'verbosity': -1\n",
|
||||
" }\n",
|
||||
" # 5折交叉验证\n",
|
||||
" cv = KFold(n_splits=5, shuffle=False)\n",
|
||||
"\n",
|
||||
" cv_scores = np.empty(5)\n",
|
||||
" for idx, (train_idx, test_idx) in enumerate(cv.split(X, y)):\n",
|
||||
" X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]\n",
|
||||
" y_train, y_test = y[train_idx], y[test_idx]\n",
|
||||
"\n",
|
||||
" # LGBM建模\n",
|
||||
" model = lgb.LGBMRegressor(**param_grid, num_boost_round=num_boost_round)\n",
|
||||
" model.fit(\n",
|
||||
" X_train,\n",
|
||||
" y_train,\n",
|
||||
" eval_set=[(X_test, y_test)],\n",
|
||||
" eval_metric=\"l2\",\n",
|
||||
" callbacks=[\n",
|
||||
" # LightGBMPruningCallback(trial, \"l2\"),\n",
|
||||
" lgb.early_stopping(50, first_metric_only=True),\n",
|
||||
" lgb.log_evaluation(period=-1)\n",
|
||||
" ],\n",
|
||||
" )\n",
|
||||
" # 模型预测\n",
|
||||
" preds = model.predict(X_test)\n",
|
||||
" # 优化指标logloss最小\n",
|
||||
" cv_scores[idx] = mean_absolute_error(y_test, preds)\n",
|
||||
"\n",
|
||||
" return np.mean(cv_scores)\n",
|
||||
"\n",
|
||||
"def generate_key(params, feature_columns, num_boost_round):\n",
|
||||
" key_data = {\n",
|
||||
" \"params\": params,\n",
|
||||
" \"feature_columns\": feature_columns,\n",
|
||||
" \"num_boost_round\": num_boost_round\n",
|
||||
" }\n",
|
||||
" # 转换成排序后的 JSON 字符串,再生成 md5 hash\n",
|
||||
" key_str = json.dumps(key_data, sort_keys=True)\n",
|
||||
" return hashlib.md5(key_str.encode('utf-8')).hexdigest()\n",
|
||||
"\n",
|
||||
"def train_light_model(df, params, feature_columns, callbacks, evals,\n",
|
||||
" print_feature_importance=True, num_boost_round=100,\n",
|
||||
" use_optuna=False):\n",
|
||||
" cache_file = 'light_model.pkl'\n",
|
||||
" cache_key = generate_key(params, feature_columns, num_boost_round)\n",
|
||||
"\n",
|
||||
" # 检查缓存文件是否存在\n",
|
||||
" if os.path.exists(cache_file):\n",
|
||||
" try:\n",
|
||||
" with open(cache_file, 'rb') as f:\n",
|
||||
" cache_data = pickle.load(f)\n",
|
||||
" if cache_data.get('key') == cache_key:\n",
|
||||
" print(\"加载缓存模型...\")\n",
|
||||
" return cache_data.get('model')\n",
|
||||
" else:\n",
|
||||
" print(\"缓存模型的参数与当前参数不匹配,重新训练模型。\")\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"加载缓存失败: {e},重新训练模型。\")\n",
|
||||
" else:\n",
|
||||
" print(\"未发现缓存模型,开始训练新模型。\")\n",
|
||||
" # 确保数据按照 date 和 label 排序\n",
|
||||
" df_sorted = df.sort_values(by=['trade_date', 'label'], ascending=[True, False]) # 按日期升序、标签降序排序\n",
|
||||
" df_sorted = df_sorted.sort_values(by='trade_date')\n",
|
||||
" unique_dates = df_sorted['trade_date'].unique()\n",
|
||||
" val_date_count = int(len(unique_dates) * 0.1)\n",
|
||||
" val_dates = unique_dates[-val_date_count:]\n",
|
||||
" val_indices = df_sorted[df_sorted['trade_date'].isin(val_dates)].index\n",
|
||||
" train_indices = df_sorted[~df_sorted['trade_date'].isin(val_dates)].index\n",
|
||||
"\n",
|
||||
" # 获取训练集和验证集的样本\n",
|
||||
" train_df = df_sorted.iloc[train_indices]\n",
|
||||
" val_df = df_sorted.iloc[val_indices]\n",
|
||||
"\n",
|
||||
" X_train = train_df[feature_columns]\n",
|
||||
" y_train = train_df['label']\n",
|
||||
"\n",
|
||||
" X_val = val_df[feature_columns]\n",
|
||||
" y_val = val_df['label']\n",
|
||||
"\n",
|
||||
" train_data = lgb.Dataset(X_train, label=y_train)\n",
|
||||
" val_data = lgb.Dataset(X_val, label=y_val)\n",
|
||||
" if use_optuna:\n",
|
||||
" # study = optuna.create_study(direction='minimize' if classify else 'maximize')\n",
|
||||
" study = optuna.create_study(direction='minimize')\n",
|
||||
" study.optimize(lambda trial: objective(trial, X_train, y_train, num_boost_round, params), n_trials=20)\n",
|
||||
"\n",
|
||||
" print(f\"Best parameters: {study.best_trial.params}\")\n",
|
||||
" print(f\"Best score: {study.best_trial.value}\")\n",
|
||||
"\n",
|
||||
" params.update(study.best_trial.params)\n",
|
||||
" model = lgb.train(\n",
|
||||
" params, train_data, num_boost_round=num_boost_round,\n",
|
||||
" valid_sets=[train_data, val_data], valid_names=['train', 'valid'],\n",
|
||||
" callbacks=callbacks\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # 打印特征重要性(如果需要)\n",
|
||||
" if print_feature_importance:\n",
|
||||
" lgb.plot_metric(evals)\n",
|
||||
" lgb.plot_tree(model, figsize=(20, 8))\n",
|
||||
" lgb.plot_importance(model, importance_type='split', max_num_features=20)\n",
|
||||
" plt.show()\n",
|
||||
" # with open(cache_file, 'wb') as f:\n",
|
||||
" # pickle.dump({'key': cache_key,\n",
|
||||
" # 'model': model,\n",
|
||||
" # 'feature_columns': feature_columns}, f)\n",
|
||||
" # print(\"模型训练完成并已保存缓存。\")\n",
|
||||
" return model\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"from catboost import CatBoostRegressor\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def train_catboost(df, num_boost_round, params=None):\n",
|
||||
" \"\"\"\n",
|
||||
" 训练 CatBoost 排序模型\n",
|
||||
" - df: 包含因子、date、instrument 和 label 的 DataFrame\n",
|
||||
" - num_boost_round: 训练的轮数\n",
|
||||
" - print_feature_importance: 是否打印特征重要性\n",
|
||||
" - plot: 是否绘制特征重要性图\n",
|
||||
" - split_date: 用于划分训练集和验证集的日期(比如 '2020-01-01')\n",
|
||||
"\n",
|
||||
" 返回训练好的模型\n",
|
||||
" \"\"\"\n",
|
||||
" df_sorted = df.sort_values(by=['date', 'label'], ascending=[True, False])\n",
|
||||
"\n",
|
||||
" # 提取特征和标签\n",
|
||||
" feature_columns = [col for col in df.columns if col not in ['date',\n",
|
||||
" 'instrument',\n",
|
||||
" 'label']]\n",
|
||||
" feature_columns = [col for col in feature_columns if 'future' not in col]\n",
|
||||
" feature_columns = [col for col in feature_columns if 'score' not in col]\n",
|
||||
"\n",
|
||||
" df_sorted = df_sorted.sort_values(by='date')\n",
|
||||
" unique_dates = df_sorted['date'].unique()\n",
|
||||
" val_date_count = int(len(unique_dates) * 0.1)\n",
|
||||
" val_dates = unique_dates[-val_date_count:]\n",
|
||||
" val_indices = df_sorted[df_sorted['date'].isin(val_dates)].index\n",
|
||||
" train_indices = df_sorted[~df_sorted['date'].isin(val_dates)].index\n",
|
||||
"\n",
|
||||
" # 获取训练集和验证集的样本\n",
|
||||
" train_df = df_sorted.iloc[train_indices].sort_values(by=['date', 'label'], ascending=[True, False])\n",
|
||||
" val_df = df_sorted.iloc[val_indices].sort_values(by=['date', 'label'], ascending=[True, False])\n",
|
||||
"\n",
|
||||
" X_train = train_df[feature_columns]\n",
|
||||
" y_train = train_df['label']\n",
|
||||
"\n",
|
||||
" X_val = val_df[feature_columns]\n",
|
||||
" y_val = val_df['label']\n",
|
||||
"\n",
|
||||
" model = CatBoostRegressor(**params, iterations=num_boost_round)\n",
|
||||
" model.fit(X_train,\n",
|
||||
" y_train,\n",
|
||||
" eval_set=(X_val, y_val))\n",
|
||||
"\n",
|
||||
" return model"
|
||||
],
|
||||
"id": "8f134d435f71e9e2",
|
||||
"outputs": [],
|
||||
"execution_count": 14
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-02-09T14:56:05.576927Z",
|
||||
"start_time": "2025-02-09T14:56:05.480695Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"light_params = {\n",
|
||||
" 'objective': 'regression',\n",
|
||||
" 'metric': 'l2',\n",
|
||||
" 'learning_rate': 0.05,\n",
|
||||
" 'is_unbalance': True,\n",
|
||||
" 'num_leaves': 2048,\n",
|
||||
" 'min_data_in_leaf': 16,\n",
|
||||
" 'max_depth': 32,\n",
|
||||
" 'max_bin': 1024,\n",
|
||||
" 'nthread': 2,\n",
|
||||
" 'feature_fraction': 0.7,\n",
|
||||
" 'bagging_fraction': 0.7,\n",
|
||||
" 'bagging_freq': 5,\n",
|
||||
" 'lambda_l1': 80,\n",
|
||||
" 'lambda_l2': 65,\n",
|
||||
" 'verbosity': -1\n",
|
||||
"}"
|
||||
],
|
||||
"id": "4a4542e1ed6afe7d",
|
||||
"outputs": [],
|
||||
"execution_count": 15
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-02-09T14:57:25.341222Z",
|
||||
"start_time": "2025-02-09T14:56:05.640256Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"print('train data size: ', len(train_data))\n",
|
||||
"df = train_data\n",
|
||||
"\n",
|
||||
"evals = {}\n",
|
||||
"light_model = train_light_model(train_data, light_params, feature_columns,\n",
|
||||
" [lgb.log_evaluation(period=500),\n",
|
||||
" lgb.callback.record_evaluation(evals),\n",
|
||||
" lgb.early_stopping(50, first_metric_only=True)\n",
|
||||
" ], evals,\n",
|
||||
" num_boost_round=1000, use_optuna=False,\n",
|
||||
" print_feature_importance=False)"
|
||||
],
|
||||
"id": "beeb098799ecfa6a",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"train data size: 875004\n",
|
||||
"未发现缓存模型,开始训练新模型。\n",
|
||||
"Training until validation scores don't improve for 50 rounds\n",
|
||||
"Early stopping, best iteration is:\n",
|
||||
"[378]\ttrain's l2: 0.435049\tvalid's l2: 0.589178\n",
|
||||
"Evaluated only: l2\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 16
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-02-09T14:57:27.394697Z",
|
||||
"start_time": "2025-02-09T14:57:25.373274Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"test_data['score'] = light_model.predict(test_data[feature_columns])\n",
|
||||
"predictions = test_data.loc[test_data.groupby('trade_date')['score'].idxmax()]"
|
||||
],
|
||||
"id": "5bb96ca8492e74d",
|
||||
"outputs": [],
|
||||
"execution_count": 17
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-02-09T14:57:27.489570Z",
|
||||
"start_time": "2025-02-09T14:57:27.397368Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": "predictions[['trade_date', 'score', 'ts_code']].to_csv('predictions.csv', index=False)",
|
||||
"id": "5d1522a7538db91b",
|
||||
"outputs": [],
|
||||
"execution_count": 18
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
1117
main/train/V1.1.ipynb
Normal file
1117
main/train/V1.1.ipynb
Normal file
File diff suppressed because one or more lines are too long
929
main/train/V1.ipynb
Normal file
929
main/train/V1.ipynb
Normal file
@@ -0,0 +1,929 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-02-11T16:39:38.576665Z",
|
||||
"start_time": "2025-02-11T16:39:38.019824Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"%load_ext autoreload\n",
|
||||
"%autoreload 2\n",
|
||||
"\n",
|
||||
"from code.utils.utils import read_and_merge_h5_data\n"
|
||||
],
|
||||
"id": "79a7758178bafdd3",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The autoreload extension is already loaded. To reload it, use:\n",
|
||||
" %reload_ext autoreload\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 8
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-02-11T16:40:45.842510Z",
|
||||
"start_time": "2025-02-11T16:39:54.757326Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"\n",
|
||||
"print('daily data')\n",
|
||||
"df = read_and_merge_h5_data('../../data/daily_data.h5', key='daily_data',\n",
|
||||
" columns=['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol'],\n",
|
||||
" df=None)\n",
|
||||
"\n",
|
||||
"print('daily basic')\n",
|
||||
"df = read_and_merge_h5_data('../../data/daily_basic.h5', key='daily_basic',\n",
|
||||
" columns=['ts_code', 'trade_date', 'turnover_rate', 'pe_ttm', 'circ_mv', 'volume_ratio',\n",
|
||||
" 'is_st'], df=df)\n",
|
||||
"\n",
|
||||
"print('stk limit')\n",
|
||||
"df = read_and_merge_h5_data('../../data/stk_limit.h5', key='stk_limit',\n",
|
||||
" columns=['ts_code', 'trade_date', 'pre_close', 'up_limit', 'down_limit'],\n",
|
||||
" df=df)\n",
|
||||
"print('money flow')\n",
|
||||
"df = read_and_merge_h5_data('../../data/money_flow.h5', key='money_flow',\n",
|
||||
" columns=['ts_code', 'trade_date', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol',\n",
|
||||
" 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol'],\n",
|
||||
" df=df)"
|
||||
],
|
||||
"id": "a79cafb06a7e0e43",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"daily data\n",
|
||||
"daily basic\n",
|
||||
"stk limit\n",
|
||||
"money flow\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 10
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-02-11T16:40:45.905077Z",
|
||||
"start_time": "2025-02-11T16:40:45.848510Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": "origin_columns = df.columns.tolist()",
|
||||
"id": "c4e9e1d31da6dba6",
|
||||
"outputs": [],
|
||||
"execution_count": 11
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-02-11T16:40:46.016229Z",
|
||||
"start_time": "2025-02-11T16:40:45.938587Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"import talib\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_technical_factor(df):\n",
|
||||
" df['up'] = (df['high'] - df[['close', 'open']].max(axis=1)) / df['close']\n",
|
||||
" df['down'] = (df[['close', 'open']].min(axis=1) - df['low']) / df['close']\n",
|
||||
"\n",
|
||||
" df['atr_14'] = talib.ATR(df['high'], df['low'], df['close'], timeperiod=14)\n",
|
||||
" df['atr_6'] = talib.ATR(df['high'], df['low'], df['close'], timeperiod=6)\n",
|
||||
"\n",
|
||||
" df['obv'] = talib.OBV(df['close'], df['vol'])\n",
|
||||
" df['maobv_6'] = talib.SMA(df['obv'], timeperiod=6)\n",
|
||||
" df['obv-maobv_6'] = df['obv'] - df['maobv_6']\n",
|
||||
"\n",
|
||||
" df['rsi_3'] = talib.RSI(df['close'], timeperiod=3)\n",
|
||||
" df['rsi_6'] = talib.RSI(df['close'], timeperiod=6)\n",
|
||||
" df['rsi_9'] = talib.RSI(df['close'], timeperiod=9)\n",
|
||||
"\n",
|
||||
" df['return_10'] = df['close'] / df['close'].shift(10) - 1\n",
|
||||
" df['return_20'] = df['close'] / df['close'].shift(20) - 1\n",
|
||||
"\n",
|
||||
" # # 计算 _rank_return_10 和 _rank_return_20\n",
|
||||
" # df['_rank_return_10'] = df['return_10'].rank(pct=True)\n",
|
||||
" # df['_rank_return_20'] = df['return_20'].rank(pct=True)\n",
|
||||
"\n",
|
||||
" # 计算 avg_close_5\n",
|
||||
" df['avg_close_5'] = df['close'].rolling(window=5).mean() / df['close']\n",
|
||||
"\n",
|
||||
" # 计算 std_return_5, std_return_15, std_return_25, std_return_252, std_return_2522\n",
|
||||
" df['std_return_5'] = df['close'].pct_change().shift(-1).rolling(window=5).std()\n",
|
||||
" df['std_return_15'] = df['close'].pct_change().shift(-1).rolling(window=15).std()\n",
|
||||
" df['std_return_25'] = df['close'].pct_change().shift(-1).rolling(window=25).std()\n",
|
||||
" df['std_return_90'] = df['close'].pct_change().shift(-1).rolling(window=90).std()\n",
|
||||
" df['std_return_90_2'] = df['close'].shift(10).pct_change().shift(-1).rolling(window=90).std()\n",
|
||||
"\n",
|
||||
" # 计算 std_return_5 / std_return_252 和 std_return_5 / std_return_25\n",
|
||||
" df['std_return_5 / std_return_90'] = df['std_return_5'] / df['std_return_90']\n",
|
||||
" df['std_return_5 / std_return_25'] = df['std_return_5'] / df['std_return_25']\n",
|
||||
"\n",
|
||||
" # 计算 std_return_252 - std_return_2522\n",
|
||||
" df['std_return_90 - std_return_90_2'] = df['std_return_90'] - df['std_return_90_2']\n",
|
||||
" return df\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_act_factor(df):\n",
|
||||
" # 计算 m_ta_ema(close, 5), m_ta_ema(close, 13), m_ta_ema(close, 20), m_ta_ema(close, 60)\n",
|
||||
" df['ema_5'] = talib.EMA(df['close'], timeperiod=5)\n",
|
||||
" df['ema_13'] = talib.EMA(df['close'], timeperiod=13)\n",
|
||||
" df['ema_20'] = talib.EMA(df['close'], timeperiod=20)\n",
|
||||
" df['ema_60'] = talib.EMA(df['close'], timeperiod=60)\n",
|
||||
"\n",
|
||||
" # 计算 act_factor1, act_factor2, act_factor3, act_factor4\n",
|
||||
" df['act_factor1'] = np.arctan((df['ema_5'] / df['ema_5'].shift(1) - 1) * 100) * 57.3 / 50\n",
|
||||
" df['act_factor2'] = np.arctan((df['ema_13'] / df['ema_13'].shift(1) - 1) * 100) * 57.3 / 40\n",
|
||||
" df['act_factor3'] = np.arctan((df['ema_20'] / df['ema_20'].shift(1) - 1) * 100) * 57.3 / 21\n",
|
||||
" df['act_factor4'] = np.arctan((df['ema_60'] / df['ema_60'].shift(1) - 1) * 100) * 57.3 / 10\n",
|
||||
"\n",
|
||||
" # 计算 act_factor5 和 act_factor6\n",
|
||||
" df['act_factor5'] = df['act_factor1'] + df['act_factor2'] + df['act_factor3'] + df['act_factor4']\n",
|
||||
" df['act_factor6'] = (df['act_factor1'] - df['act_factor2']) / np.sqrt(\n",
|
||||
" df['act_factor1'] ** 2 + df['act_factor2'] ** 2)\n",
|
||||
"\n",
|
||||
" # 根据 'trade_date' 进行分组,在每个组内分别计算 'act_factor1', 'act_factor2', 'act_factor3' 的排名\n",
|
||||
" df['rank_act_factor1'] = df.groupby('trade_date')['act_factor1'].rank(ascending=False, pct=True)\n",
|
||||
" df['rank_act_factor2'] = df.groupby('trade_date')['act_factor2'].rank(ascending=False, pct=True)\n",
|
||||
" df['rank_act_factor3'] = df.groupby('trade_date')['act_factor3'].rank(ascending=False, pct=True)\n",
|
||||
"\n",
|
||||
" return df\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_money_flow_factor(df):\n",
|
||||
" df['active_buy_volume_large'] = df['buy_lg_vol'] / df['net_mf_vol']\n",
|
||||
" df['active_buy_volume_big'] = df['buy_elg_vol'] / df['net_mf_vol']\n",
|
||||
" df['active_buy_volume_small'] = df['buy_sm_vol'] / df['net_mf_vol']\n",
|
||||
"\n",
|
||||
" df['buy_lg_vol - sell_lg_vol'] = (df['buy_lg_vol'] - df['sell_lg_vol']) / df['net_mf_vol']\n",
|
||||
" df['buy_elg_vol - sell_elg_vol'] = (df['buy_elg_vol'] - df['sell_elg_vol']) / df['net_mf_vol']\n",
|
||||
"\n",
|
||||
" # # 你还提到了一些其他字段:\n",
|
||||
" # df['net_active_buy_volume_main'] = df['net_mf_vol'] / df['buy_sm_vol']\n",
|
||||
" # df['netflow_amount_main'] = df['net_mf_vol'] / df['buy_sm_vol'] # 这里假设 'net_mf_vol' 是主流资金流\n",
|
||||
"\n",
|
||||
" # df['active_sell_volume_large'] = df['sell_lg_vol'] / df['sell_sm_vol']\n",
|
||||
" # df['active_sell_volume_big'] = df['sell_elg_vol'] / df['sell_sm_vol']\n",
|
||||
" # df['active_sell_volume_small'] = df['sell_sm_vol'] / df['sell_sm_vol']\n",
|
||||
"\n",
|
||||
" return df\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_alpha_factor(df):\n",
|
||||
" df['alpha_022'] = df['close'] - df['close'].shift(5)\n",
|
||||
"\n",
|
||||
" # alpha_003: (close - open) / (high - low)\n",
|
||||
" df['alpha_003'] = (df['close'] - df['open']) / (df['high'] - df['low'])\n",
|
||||
"\n",
|
||||
" # alpha_007: rank(correlation(close, volume, 5))\n",
|
||||
" df['alpha_007'] = df['close'].rolling(5).corr(df['vol']).rank(axis=1)\n",
|
||||
"\n",
|
||||
" # alpha_013: rank(sum(close, 5) - sum(close, 20))\n",
|
||||
" df['alpha_013'] = (df['close'].rolling(5).sum() - df['close'].rolling(20).sum()).rank(axis=1)\n",
|
||||
" return df\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_future_data(df):\n",
|
||||
" df['future_return1'] = (df['close'].shift(-1) - df['close']) / df['close']\n",
|
||||
" df['future_return2'] = (df['open'].shift(-2) - df['open'].shift(-1)) / df['open'].shift(-1)\n",
|
||||
" df['future_return3'] = (df['close'].shift(-2) - df['close'].shift(-1)) / df['close'].shift(-1)\n",
|
||||
" df['future_return4'] = (df['close'].shift(-2) - df['open'].shift(-1)) / df['open'].shift(-1)\n",
|
||||
" df['future_return5'] = (df['close'].shift(-5) - df['open'].shift(-1)) / df['open'].shift(-1)\n",
|
||||
" df['future_return6'] = (df['close'].shift(-10) - df['open'].shift(-1)) / df['open'].shift(-1)\n",
|
||||
" df['future_return7'] = (df['close'].shift(-20) - df['open'].shift(-1)) / df['open'].shift(-1)\n",
|
||||
" df['future_close1'] = (df['close'].shift(-1) - df['close']) / df['close']\n",
|
||||
" df['future_close2'] = (df['close'].shift(-2) - df['close']) / df['close']\n",
|
||||
" df['future_close3'] = (df['close'].shift(-3) - df['close']) / df['close']\n",
|
||||
" df['future_close4'] = (df['close'].shift(-4) - df['close']) / df['close']\n",
|
||||
" df['future_close5'] = (df['close'].shift(-5) - df['close']) / df['close']\n",
|
||||
" df['future_af11'] = df['act_factor1'].shift(-1)\n",
|
||||
" df['future_af12'] = df['act_factor1'].shift(-2)\n",
|
||||
" df['future_af13'] = df['act_factor1'].shift(-3)\n",
|
||||
" df['future_af14'] = df['act_factor1'].shift(-4)\n",
|
||||
" df['future_af15'] = df['act_factor1'].shift(-5)\n",
|
||||
" df['future_af21'] = df['act_factor2'].shift(-1)\n",
|
||||
" df['future_af22'] = df['act_factor2'].shift(-2)\n",
|
||||
" df['future_af23'] = df['act_factor2'].shift(-3)\n",
|
||||
" df['future_af24'] = df['act_factor2'].shift(-4)\n",
|
||||
" df['future_af25'] = df['act_factor2'].shift(-5)\n",
|
||||
" df['future_af31'] = df['act_factor3'].shift(-1)\n",
|
||||
" df['future_af32'] = df['act_factor3'].shift(-2)\n",
|
||||
" df['future_af33'] = df['act_factor3'].shift(-3)\n",
|
||||
" df['future_af34'] = df['act_factor3'].shift(-4)\n",
|
||||
" df['future_af35'] = df['act_factor3'].shift(-5)\n",
|
||||
"\n",
|
||||
" return df\n"
|
||||
],
|
||||
"id": "a735bc02ceb4d872",
|
||||
"outputs": [],
|
||||
"execution_count": 12
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-02-11T16:40:56.805530Z",
|
||||
"start_time": "2025-02-11T16:40:46.048312Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"df = get_technical_factor(df)\n",
|
||||
"df = get_act_factor(df)\n",
|
||||
"df = get_money_flow_factor(df)\n",
|
||||
"df = get_future_data(df)\n",
|
||||
"# df = df.drop(columns=origin_columns)\n",
|
||||
"\n",
|
||||
"print(df.info())"
|
||||
],
|
||||
"id": "53f86ddc0677a6d7",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"RangeIndex: 8375079 entries, 0 to 8375078\n",
|
||||
"Data columns (total 87 columns):\n",
|
||||
" # Column Dtype \n",
|
||||
"--- ------ ----- \n",
|
||||
" 0 ts_code object \n",
|
||||
" 1 trade_date datetime64[ns]\n",
|
||||
" 2 open float64 \n",
|
||||
" 3 close float64 \n",
|
||||
" 4 high float64 \n",
|
||||
" 5 low float64 \n",
|
||||
" 6 vol float64 \n",
|
||||
" 7 turnover_rate float64 \n",
|
||||
" 8 pe_ttm float64 \n",
|
||||
" 9 circ_mv float64 \n",
|
||||
" 10 volume_ratio float64 \n",
|
||||
" 11 is_st object \n",
|
||||
" 12 up_limit float64 \n",
|
||||
" 13 down_limit float64 \n",
|
||||
" 14 buy_sm_vol float64 \n",
|
||||
" 15 sell_sm_vol float64 \n",
|
||||
" 16 buy_lg_vol float64 \n",
|
||||
" 17 sell_lg_vol float64 \n",
|
||||
" 18 buy_elg_vol float64 \n",
|
||||
" 19 sell_elg_vol float64 \n",
|
||||
" 20 net_mf_vol float64 \n",
|
||||
" 21 up float64 \n",
|
||||
" 22 down float64 \n",
|
||||
" 23 atr_14 float64 \n",
|
||||
" 24 atr_6 float64 \n",
|
||||
" 25 obv float64 \n",
|
||||
" 26 maobv_6 float64 \n",
|
||||
" 27 obv-maobv_6 float64 \n",
|
||||
" 28 rsi_3 float64 \n",
|
||||
" 29 rsi_6 float64 \n",
|
||||
" 30 rsi_9 float64 \n",
|
||||
" 31 return_10 float64 \n",
|
||||
" 32 return_20 float64 \n",
|
||||
" 33 avg_close_5 float64 \n",
|
||||
" 34 std_return_5 float64 \n",
|
||||
" 35 std_return_15 float64 \n",
|
||||
" 36 std_return_25 float64 \n",
|
||||
" 37 std_return_90 float64 \n",
|
||||
" 38 std_return_90_2 float64 \n",
|
||||
" 39 std_return_5 / std_return_90 float64 \n",
|
||||
" 40 std_return_5 / std_return_25 float64 \n",
|
||||
" 41 std_return_90 - std_return_90_2 float64 \n",
|
||||
" 42 ema_5 float64 \n",
|
||||
" 43 ema_13 float64 \n",
|
||||
" 44 ema_20 float64 \n",
|
||||
" 45 ema_60 float64 \n",
|
||||
" 46 act_factor1 float64 \n",
|
||||
" 47 act_factor2 float64 \n",
|
||||
" 48 act_factor3 float64 \n",
|
||||
" 49 act_factor4 float64 \n",
|
||||
" 50 act_factor5 float64 \n",
|
||||
" 51 act_factor6 float64 \n",
|
||||
" 52 rank_act_factor1 float64 \n",
|
||||
" 53 rank_act_factor2 float64 \n",
|
||||
" 54 rank_act_factor3 float64 \n",
|
||||
" 55 active_buy_volume_large float64 \n",
|
||||
" 56 active_buy_volume_big float64 \n",
|
||||
" 57 active_buy_volume_small float64 \n",
|
||||
" 58 buy_lg_vol - sell_lg_vol float64 \n",
|
||||
" 59 buy_elg_vol - sell_elg_vol float64 \n",
|
||||
" 60 future_return1 float64 \n",
|
||||
" 61 future_return2 float64 \n",
|
||||
" 62 future_return3 float64 \n",
|
||||
" 63 future_return4 float64 \n",
|
||||
" 64 future_return5 float64 \n",
|
||||
" 65 future_return6 float64 \n",
|
||||
" 66 future_return7 float64 \n",
|
||||
" 67 future_close1 float64 \n",
|
||||
" 68 future_close2 float64 \n",
|
||||
" 69 future_close3 float64 \n",
|
||||
" 70 future_close4 float64 \n",
|
||||
" 71 future_close5 float64 \n",
|
||||
" 72 future_af11 float64 \n",
|
||||
" 73 future_af12 float64 \n",
|
||||
" 74 future_af13 float64 \n",
|
||||
" 75 future_af14 float64 \n",
|
||||
" 76 future_af15 float64 \n",
|
||||
" 77 future_af21 float64 \n",
|
||||
" 78 future_af22 float64 \n",
|
||||
" 79 future_af23 float64 \n",
|
||||
" 80 future_af24 float64 \n",
|
||||
" 81 future_af25 float64 \n",
|
||||
" 82 future_af31 float64 \n",
|
||||
" 83 future_af32 float64 \n",
|
||||
" 84 future_af33 float64 \n",
|
||||
" 85 future_af34 float64 \n",
|
||||
" 86 future_af35 float64 \n",
|
||||
"dtypes: datetime64[ns](1), float64(84), object(2)\n",
|
||||
"memory usage: 5.4+ GB\n",
|
||||
"None\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 13
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-02-11T16:42:03.707721Z",
|
||||
"start_time": "2025-02-11T16:40:56.889317Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"def filter_data(df):\n",
|
||||
" df = df.groupby('trade_date').apply(lambda x: x.nlargest(1000, 'act_factor3'))\n",
|
||||
" df = df[~df['is_st']]\n",
|
||||
" df = df[~df['ts_code'].str.startswith('30')]\n",
|
||||
" df = df[~df['ts_code'].str.startswith('68')]\n",
|
||||
" df = df[~df['ts_code'].str.startswith('8')]\n",
|
||||
" df = df.reset_index(drop=True)\n",
|
||||
" return df\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"df = filter_data(df)\n",
|
||||
"print(df.info())"
|
||||
],
|
||||
"id": "dbe2fd8021b9417f",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"RangeIndex: 1101560 entries, 0 to 1101559\n",
|
||||
"Data columns (total 87 columns):\n",
|
||||
" # Column Non-Null Count Dtype \n",
|
||||
"--- ------ -------------- ----- \n",
|
||||
" 0 ts_code 1101560 non-null object \n",
|
||||
" 1 trade_date 1101560 non-null datetime64[ns]\n",
|
||||
" 2 open 1101560 non-null float64 \n",
|
||||
" 3 close 1101560 non-null float64 \n",
|
||||
" 4 high 1101560 non-null float64 \n",
|
||||
" 5 low 1101560 non-null float64 \n",
|
||||
" 6 vol 1101560 non-null float64 \n",
|
||||
" 7 turnover_rate 1101560 non-null float64 \n",
|
||||
" 8 pe_ttm 932908 non-null float64 \n",
|
||||
" 9 circ_mv 1101560 non-null float64 \n",
|
||||
" 10 volume_ratio 1101096 non-null float64 \n",
|
||||
" 11 is_st 1101560 non-null object \n",
|
||||
" 12 up_limit 1101282 non-null float64 \n",
|
||||
" 13 down_limit 1101282 non-null float64 \n",
|
||||
" 14 buy_sm_vol 1101069 non-null float64 \n",
|
||||
" 15 sell_sm_vol 1101069 non-null float64 \n",
|
||||
" 16 buy_lg_vol 1101069 non-null float64 \n",
|
||||
" 17 sell_lg_vol 1101069 non-null float64 \n",
|
||||
" 18 buy_elg_vol 1101069 non-null float64 \n",
|
||||
" 19 sell_elg_vol 1101069 non-null float64 \n",
|
||||
" 20 net_mf_vol 1101069 non-null float64 \n",
|
||||
" 21 up 1101560 non-null float64 \n",
|
||||
" 22 down 1101560 non-null float64 \n",
|
||||
" 23 atr_14 1100687 non-null float64 \n",
|
||||
" 24 atr_6 1100687 non-null float64 \n",
|
||||
" 25 obv 1101560 non-null float64 \n",
|
||||
" 26 maobv_6 1101560 non-null float64 \n",
|
||||
" 27 obv-maobv_6 1101560 non-null float64 \n",
|
||||
" 28 rsi_3 1100687 non-null float64 \n",
|
||||
" 29 rsi_6 1100687 non-null float64 \n",
|
||||
" 30 rsi_9 1100687 non-null float64 \n",
|
||||
" 31 return_10 1101560 non-null float64 \n",
|
||||
" 32 return_20 1101560 non-null float64 \n",
|
||||
" 33 avg_close_5 1101560 non-null float64 \n",
|
||||
" 34 std_return_5 1101560 non-null float64 \n",
|
||||
" 35 std_return_15 1101560 non-null float64 \n",
|
||||
" 36 std_return_25 1101559 non-null float64 \n",
|
||||
" 37 std_return_90 1101533 non-null float64 \n",
|
||||
" 38 std_return_90_2 1101531 non-null float64 \n",
|
||||
" 39 std_return_5 / std_return_90 1101533 non-null float64 \n",
|
||||
" 40 std_return_5 / std_return_25 1101559 non-null float64 \n",
|
||||
" 41 std_return_90 - std_return_90_2 1101531 non-null float64 \n",
|
||||
" 42 ema_5 1100687 non-null float64 \n",
|
||||
" 43 ema_13 1100687 non-null float64 \n",
|
||||
" 44 ema_20 1100687 non-null float64 \n",
|
||||
" 45 ema_60 1100682 non-null float64 \n",
|
||||
" 46 act_factor1 1100687 non-null float64 \n",
|
||||
" 47 act_factor2 1100687 non-null float64 \n",
|
||||
" 48 act_factor3 1100687 non-null float64 \n",
|
||||
" 49 act_factor4 1100682 non-null float64 \n",
|
||||
" 50 act_factor5 1100682 non-null float64 \n",
|
||||
" 51 act_factor6 1100687 non-null float64 \n",
|
||||
" 52 rank_act_factor1 1100687 non-null float64 \n",
|
||||
" 53 rank_act_factor2 1100687 non-null float64 \n",
|
||||
" 54 rank_act_factor3 1100687 non-null float64 \n",
|
||||
" 55 active_buy_volume_large 1101065 non-null float64 \n",
|
||||
" 56 active_buy_volume_big 1101042 non-null float64 \n",
|
||||
" 57 active_buy_volume_small 1101069 non-null float64 \n",
|
||||
" 58 buy_lg_vol - sell_lg_vol 1101066 non-null float64 \n",
|
||||
" 59 buy_elg_vol - sell_elg_vol 1101046 non-null float64 \n",
|
||||
" 60 future_return1 1101560 non-null float64 \n",
|
||||
" 61 future_return2 1101560 non-null float64 \n",
|
||||
" 62 future_return3 1101560 non-null float64 \n",
|
||||
" 63 future_return4 1101560 non-null float64 \n",
|
||||
" 64 future_return5 1101560 non-null float64 \n",
|
||||
" 65 future_return6 1101560 non-null float64 \n",
|
||||
" 66 future_return7 1101560 non-null float64 \n",
|
||||
" 67 future_close1 1101560 non-null float64 \n",
|
||||
" 68 future_close2 1101560 non-null float64 \n",
|
||||
" 69 future_close3 1101560 non-null float64 \n",
|
||||
" 70 future_close4 1101560 non-null float64 \n",
|
||||
" 71 future_close5 1101560 non-null float64 \n",
|
||||
" 72 future_af11 1100687 non-null float64 \n",
|
||||
" 73 future_af12 1100687 non-null float64 \n",
|
||||
" 74 future_af13 1100687 non-null float64 \n",
|
||||
" 75 future_af14 1100687 non-null float64 \n",
|
||||
" 76 future_af15 1100687 non-null float64 \n",
|
||||
" 77 future_af21 1100687 non-null float64 \n",
|
||||
" 78 future_af22 1100687 non-null float64 \n",
|
||||
" 79 future_af23 1100687 non-null float64 \n",
|
||||
" 80 future_af24 1100687 non-null float64 \n",
|
||||
" 81 future_af25 1100687 non-null float64 \n",
|
||||
" 82 future_af31 1100687 non-null float64 \n",
|
||||
" 83 future_af32 1100687 non-null float64 \n",
|
||||
" 84 future_af33 1100687 non-null float64 \n",
|
||||
" 85 future_af34 1100687 non-null float64 \n",
|
||||
" 86 future_af35 1100687 non-null float64 \n",
|
||||
"dtypes: datetime64[ns](1), float64(84), object(2)\n",
|
||||
"memory usage: 731.2+ MB\n",
|
||||
"None\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 14
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-02-11T16:42:04.317134Z",
|
||||
"start_time": "2025-02-11T16:42:03.969288Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"def remove_outliers_iqr(series, lower_quantile=0.05, upper_quantile=0.95, threshold=1.5):\n",
|
||||
" Q1 = series.quantile(lower_quantile)\n",
|
||||
" Q3 = series.quantile(upper_quantile)\n",
|
||||
" IQR = Q3 - Q1\n",
|
||||
" lower_bound = Q1 - threshold * IQR\n",
|
||||
" upper_bound = Q3 + threshold * IQR\n",
|
||||
" # 过滤掉低于下边界或高于上边界的极值\n",
|
||||
" return (series >= lower_bound) & (series <= upper_bound)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def neutralize_labels(labels, features, feature_columns, z_threshold=3, method='regression'):\n",
|
||||
" labels_no_outliers = remove_outliers_iqr(labels)\n",
|
||||
" return labels_no_outliers\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"train_data = df[df['trade_date'] <= '2023-01-01']\n",
|
||||
"test_data = df[df['trade_date'] >= '2023-01-01']\n",
|
||||
"\n",
|
||||
"feature_columns = [col for col in df.columns if col not in ['trade_date',\n",
|
||||
" 'ts_code',\n",
|
||||
" 'label']]\n",
|
||||
"feature_columns = [col for col in feature_columns if 'future' not in col]\n",
|
||||
"feature_columns = [col for col in feature_columns if 'score' not in col]\n",
|
||||
"feature_columns = [col for col in feature_columns if col not in origin_columns]\n",
|
||||
"\n",
|
||||
"# for column in [column for column in train_data.columns if 'future' in column]:\n",
|
||||
"# label_index = neutralize_labels(train_data[column], train_data, feature_columns, z_threshold=3, method='regression')\n",
|
||||
"# train_data = train_data[label_index]\n",
|
||||
"# label_index = neutralize_labels(test_data[column], test_data, feature_columns, z_threshold=3, method='regression')\n",
|
||||
"# test_data = test_data[label_index]\n",
|
||||
"\n",
|
||||
"print(len(train_data))\n",
|
||||
"print(len(test_data))"
|
||||
],
|
||||
"id": "5f3d9aece75318cd",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"860933\n",
|
||||
"240627\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 15
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-02-11T16:42:06.417436Z",
|
||||
"start_time": "2025-02-11T16:42:04.322121Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"def get_qcuts(series, quantiles):\n",
|
||||
" q = pd.qcut(series, q=quantiles, labels=False, duplicates='drop')\n",
|
||||
" return q[-1] # 返回窗口最后一个元素的分位数标签\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"window = 5\n",
|
||||
"quantiles = 20\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_label(df):\n",
|
||||
" labels = df['future_af13'] - df['act_factor1']\n",
|
||||
" # labels = df['future_close3']\n",
|
||||
" return labels\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"train_data['label'], test_data['label'] = get_label(train_data), get_label(test_data)\n",
|
||||
"\n",
|
||||
"train_data, test_data = train_data.dropna(subset=['label']), test_data.dropna(subset=['label'])\n",
|
||||
"train_data, test_data = train_data.replace([np.inf, -np.inf], np.nan).dropna(), test_data.replace([np.inf, -np.inf],\n",
|
||||
" np.nan).dropna()\n",
|
||||
"train_data, test_data = train_data.reset_index(drop=True), test_data.reset_index(drop=True)\n",
|
||||
"\n",
|
||||
"print(len(train_data))\n",
|
||||
"print(len(test_data))"
|
||||
],
|
||||
"id": "f4f16d63ad18d1bc",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"C:\\Users\\liaozhaorun\\AppData\\Local\\Temp\\ipykernel_88940\\2181928612.py:16: SettingWithCopyWarning: \n",
|
||||
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
||||
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
||||
"\n",
|
||||
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
||||
" train_data['label'], test_data['label'] = get_label(train_data), get_label(test_data)\n",
|
||||
"C:\\Users\\liaozhaorun\\AppData\\Local\\Temp\\ipykernel_88940\\2181928612.py:16: SettingWithCopyWarning: \n",
|
||||
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
||||
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
||||
"\n",
|
||||
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
||||
" train_data['label'], test_data['label'] = get_label(train_data), get_label(test_data)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"747134\n",
|
||||
"184095\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 16
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-02-11T16:42:08.032246Z",
|
||||
"start_time": "2025-02-11T16:42:06.481439Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import lightgbm as lgb\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import optuna\n",
|
||||
"from sklearn.model_selection import KFold\n",
|
||||
"from sklearn.metrics import mean_absolute_error\n",
|
||||
"import os\n",
|
||||
"import json\n",
|
||||
"import pickle\n",
|
||||
"import hashlib\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def objective(trial, X, y, num_boost_round, params):\n",
|
||||
" # 参数网格\n",
|
||||
" X, y = X.reset_index(drop=True), y.reset_index(drop=True)\n",
|
||||
" param_grid = {\n",
|
||||
" \"n_estimators\": trial.suggest_categorical(\"n_estimators\", [10000]),\n",
|
||||
" \"learning_rate\": trial.suggest_float(\"learning_rate\", 0.01, 0.3),\n",
|
||||
" \"num_leaves\": trial.suggest_int(\"num_leaves\", 20, 3000, step=25),\n",
|
||||
" \"max_depth\": trial.suggest_int(\"max_depth\", 3, 16),\n",
|
||||
" \"min_data_in_leaf\": trial.suggest_int(\"min_data_in_leaf\", 200, 10000, step=100),\n",
|
||||
" \"lambda_l1\": trial.suggest_int(\"lambda_l1\", 0, 100, step=5),\n",
|
||||
" \"lambda_l2\": trial.suggest_int(\"lambda_l2\", 0, 100, step=5),\n",
|
||||
" \"min_gain_to_split\": trial.suggest_float(\"min_gain_to_split\", 0, 15),\n",
|
||||
" \"bagging_fraction\": trial.suggest_float(\"bagging_fraction\", 0.2, 0.95, step=0.1),\n",
|
||||
" \"bagging_freq\": trial.suggest_categorical(\"bagging_freq\", [1]),\n",
|
||||
" \"feature_fraction\": trial.suggest_float(\"feature_fraction\", 0.2, 0.95, step=0.1),\n",
|
||||
" \"random_state\": 1,\n",
|
||||
" \"objective\": 'regression',\n",
|
||||
" 'verbosity': -1\n",
|
||||
" }\n",
|
||||
" # 5折交叉验证\n",
|
||||
" cv = KFold(n_splits=5, shuffle=False)\n",
|
||||
"\n",
|
||||
" cv_scores = np.empty(5)\n",
|
||||
" for idx, (train_idx, test_idx) in enumerate(cv.split(X, y)):\n",
|
||||
" X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]\n",
|
||||
" y_train, y_test = y[train_idx], y[test_idx]\n",
|
||||
"\n",
|
||||
" # LGBM建模\n",
|
||||
" model = lgb.LGBMRegressor(**param_grid, num_boost_round=num_boost_round)\n",
|
||||
" model.fit(\n",
|
||||
" X_train,\n",
|
||||
" y_train,\n",
|
||||
" eval_set=[(X_test, y_test)],\n",
|
||||
" eval_metric=\"l2\",\n",
|
||||
" callbacks=[\n",
|
||||
" # LightGBMPruningCallback(trial, \"l2\"),\n",
|
||||
" lgb.early_stopping(50, first_metric_only=True),\n",
|
||||
" lgb.log_evaluation(period=-1)\n",
|
||||
" ],\n",
|
||||
" )\n",
|
||||
" # 模型预测\n",
|
||||
" preds = model.predict(X_test)\n",
|
||||
" # 优化指标logloss最小\n",
|
||||
" cv_scores[idx] = mean_absolute_error(y_test, preds)\n",
|
||||
"\n",
|
||||
" return np.mean(cv_scores)\n",
|
||||
"\n",
|
||||
"def generate_key(params, feature_columns, num_boost_round):\n",
|
||||
" key_data = {\n",
|
||||
" \"params\": params,\n",
|
||||
" \"feature_columns\": feature_columns,\n",
|
||||
" \"num_boost_round\": num_boost_round\n",
|
||||
" }\n",
|
||||
" # 转换成排序后的 JSON 字符串,再生成 md5 hash\n",
|
||||
" key_str = json.dumps(key_data, sort_keys=True)\n",
|
||||
" return hashlib.md5(key_str.encode('utf-8')).hexdigest()\n",
|
||||
"\n",
|
||||
"def train_light_model(df, params, feature_columns, callbacks, evals,\n",
|
||||
" print_feature_importance=True, num_boost_round=100,\n",
|
||||
" use_optuna=False):\n",
|
||||
" cache_file = 'light_model.pkl'\n",
|
||||
" cache_key = generate_key(params, feature_columns, num_boost_round)\n",
|
||||
"\n",
|
||||
" # 检查缓存文件是否存在\n",
|
||||
" if os.path.exists(cache_file):\n",
|
||||
" try:\n",
|
||||
" with open(cache_file, 'rb') as f:\n",
|
||||
" cache_data = pickle.load(f)\n",
|
||||
" if cache_data.get('key') == cache_key:\n",
|
||||
" print(\"加载缓存模型...\")\n",
|
||||
" return cache_data.get('model')\n",
|
||||
" else:\n",
|
||||
" print(\"缓存模型的参数与当前参数不匹配,重新训练模型。\")\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"加载缓存失败: {e},重新训练模型。\")\n",
|
||||
" else:\n",
|
||||
" print(\"未发现缓存模型,开始训练新模型。\")\n",
|
||||
" # 确保数据按照 date 和 label 排序\n",
|
||||
" df_sorted = df.sort_values(by=['trade_date', 'label'], ascending=[True, False]) # 按日期升序、标签降序排序\n",
|
||||
" df_sorted = df_sorted.sort_values(by='trade_date')\n",
|
||||
" unique_dates = df_sorted['trade_date'].unique()\n",
|
||||
" val_date_count = int(len(unique_dates) * 0.1)\n",
|
||||
" val_dates = unique_dates[-val_date_count:]\n",
|
||||
" val_indices = df_sorted[df_sorted['trade_date'].isin(val_dates)].index\n",
|
||||
" train_indices = df_sorted[~df_sorted['trade_date'].isin(val_dates)].index\n",
|
||||
"\n",
|
||||
" # 获取训练集和验证集的样本\n",
|
||||
" train_df = df_sorted.iloc[train_indices]\n",
|
||||
" val_df = df_sorted.iloc[val_indices]\n",
|
||||
"\n",
|
||||
" X_train = train_df[feature_columns]\n",
|
||||
" y_train = train_df['label']\n",
|
||||
"\n",
|
||||
" X_val = val_df[feature_columns]\n",
|
||||
" y_val = val_df['label']\n",
|
||||
"\n",
|
||||
" train_data = lgb.Dataset(X_train, label=y_train)\n",
|
||||
" val_data = lgb.Dataset(X_val, label=y_val)\n",
|
||||
" if use_optuna:\n",
|
||||
" # study = optuna.create_study(direction='minimize' if classify else 'maximize')\n",
|
||||
" study = optuna.create_study(direction='minimize')\n",
|
||||
" study.optimize(lambda trial: objective(trial, X_train, y_train, num_boost_round, params), n_trials=20)\n",
|
||||
"\n",
|
||||
" print(f\"Best parameters: {study.best_trial.params}\")\n",
|
||||
" print(f\"Best score: {study.best_trial.value}\")\n",
|
||||
"\n",
|
||||
" params.update(study.best_trial.params)\n",
|
||||
" model = lgb.train(\n",
|
||||
" params, train_data, num_boost_round=num_boost_round,\n",
|
||||
" valid_sets=[train_data, val_data], valid_names=['train', 'valid'],\n",
|
||||
" callbacks=callbacks\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # 打印特征重要性(如果需要)\n",
|
||||
" if print_feature_importance:\n",
|
||||
" lgb.plot_metric(evals)\n",
|
||||
" lgb.plot_tree(model, figsize=(20, 8))\n",
|
||||
" lgb.plot_importance(model, importance_type='split', max_num_features=20)\n",
|
||||
" plt.show()\n",
|
||||
" # with open(cache_file, 'wb') as f:\n",
|
||||
" # pickle.dump({'key': cache_key,\n",
|
||||
" # 'model': model,\n",
|
||||
" # 'feature_columns': feature_columns}, f)\n",
|
||||
" # print(\"模型训练完成并已保存缓存。\")\n",
|
||||
" return model\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"from catboost import CatBoostRegressor\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def train_catboost(df, num_boost_round, params=None):\n",
|
||||
" \"\"\"\n",
|
||||
" 训练 CatBoost 排序模型\n",
|
||||
" - df: 包含因子、date、instrument 和 label 的 DataFrame\n",
|
||||
" - num_boost_round: 训练的轮数\n",
|
||||
" - print_feature_importance: 是否打印特征重要性\n",
|
||||
" - plot: 是否绘制特征重要性图\n",
|
||||
" - split_date: 用于划分训练集和验证集的日期(比如 '2020-01-01')\n",
|
||||
"\n",
|
||||
" 返回训练好的模型\n",
|
||||
" \"\"\"\n",
|
||||
" df_sorted = df.sort_values(by=['date', 'label'], ascending=[True, False])\n",
|
||||
"\n",
|
||||
" # 提取特征和标签\n",
|
||||
" feature_columns = [col for col in df.columns if col not in ['date',\n",
|
||||
" 'instrument',\n",
|
||||
" 'label']]\n",
|
||||
" feature_columns = [col for col in feature_columns if 'future' not in col]\n",
|
||||
" feature_columns = [col for col in feature_columns if 'score' not in col]\n",
|
||||
"\n",
|
||||
" df_sorted = df_sorted.sort_values(by='date')\n",
|
||||
" unique_dates = df_sorted['date'].unique()\n",
|
||||
" val_date_count = int(len(unique_dates) * 0.1)\n",
|
||||
" val_dates = unique_dates[-val_date_count:]\n",
|
||||
" val_indices = df_sorted[df_sorted['date'].isin(val_dates)].index\n",
|
||||
" train_indices = df_sorted[~df_sorted['date'].isin(val_dates)].index\n",
|
||||
"\n",
|
||||
" # 获取训练集和验证集的样本\n",
|
||||
" train_df = df_sorted.iloc[train_indices].sort_values(by=['date', 'label'], ascending=[True, False])\n",
|
||||
" val_df = df_sorted.iloc[val_indices].sort_values(by=['date', 'label'], ascending=[True, False])\n",
|
||||
"\n",
|
||||
" X_train = train_df[feature_columns]\n",
|
||||
" y_train = train_df['label']\n",
|
||||
"\n",
|
||||
" X_val = val_df[feature_columns]\n",
|
||||
" y_val = val_df['label']\n",
|
||||
"\n",
|
||||
" model = CatBoostRegressor(**params, iterations=num_boost_round)\n",
|
||||
" model.fit(X_train,\n",
|
||||
" y_train,\n",
|
||||
" eval_set=(X_val, y_val))\n",
|
||||
"\n",
|
||||
" return model"
|
||||
],
|
||||
"id": "8f134d435f71e9e2",
|
||||
"outputs": [],
|
||||
"execution_count": 17
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-02-11T16:42:08.126033Z",
|
||||
"start_time": "2025-02-11T16:42:08.047878Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"light_params = {\n",
|
||||
" 'objective': 'regression',\n",
|
||||
" 'metric': 'l2',\n",
|
||||
" 'learning_rate': 0.05,\n",
|
||||
" 'is_unbalance': True,\n",
|
||||
" 'num_leaves': 2048,\n",
|
||||
" 'min_data_in_leaf': 16,\n",
|
||||
" 'max_depth': 32,\n",
|
||||
" 'max_bin': 1024,\n",
|
||||
" 'nthread': 2,\n",
|
||||
" 'feature_fraction': 0.7,\n",
|
||||
" 'bagging_fraction': 0.7,\n",
|
||||
" 'bagging_freq': 5,\n",
|
||||
" 'lambda_l1': 80,\n",
|
||||
" 'lambda_l2': 65,\n",
|
||||
" 'verbosity': -1\n",
|
||||
"}"
|
||||
],
|
||||
"id": "4a4542e1ed6afe7d",
|
||||
"outputs": [],
|
||||
"execution_count": 18
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-02-11T16:43:30.267422Z",
|
||||
"start_time": "2025-02-11T16:42:08.363141Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"print('train data size: ', len(train_data))\n",
|
||||
"df = train_data\n",
|
||||
"\n",
|
||||
"evals = {}\n",
|
||||
"light_model = train_light_model(train_data, light_params, feature_columns,\n",
|
||||
" [lgb.log_evaluation(period=500),\n",
|
||||
" lgb.callback.record_evaluation(evals),\n",
|
||||
" lgb.early_stopping(50, first_metric_only=True)\n",
|
||||
" ], evals,\n",
|
||||
" num_boost_round=1000, use_optuna=False,\n",
|
||||
" print_feature_importance=False)"
|
||||
],
|
||||
"id": "beeb098799ecfa6a",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"train data size: 747134\n",
|
||||
"未发现缓存模型,开始训练新模型。\n",
|
||||
"Training until validation scores don't improve for 50 rounds\n",
|
||||
"[500]\ttrain's l2: 0.415226\tvalid's l2: 0.57674\n",
|
||||
"Early stopping, best iteration is:\n",
|
||||
"[477]\ttrain's l2: 0.417575\tvalid's l2: 0.576627\n",
|
||||
"Evaluated only: l2\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 19
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-02-11T16:43:32.102494Z",
|
||||
"start_time": "2025-02-11T16:43:30.315429Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"test_data['score'] = light_model.predict(test_data[feature_columns])\n",
|
||||
"predictions = test_data.loc[test_data.groupby('trade_date')['score'].idxmax()]"
|
||||
],
|
||||
"id": "5bb96ca8492e74d",
|
||||
"outputs": [],
|
||||
"execution_count": 20
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-02-11T16:43:32.197091Z",
|
||||
"start_time": "2025-02-11T16:43:32.107477Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": "predictions[['trade_date', 'score', 'ts_code']].to_csv('predictions.csv', index=False)",
|
||||
"id": "5d1522a7538db91b",
|
||||
"outputs": [],
|
||||
"execution_count": 21
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
0
main/train/__init__.py
Normal file
0
main/train/__init__.py
Normal file
BIN
main/train/best_model.pth
Normal file
BIN
main/train/best_model.pth
Normal file
Binary file not shown.
5004
main/train/catboost_info/catboost_training.json
Normal file
5004
main/train/catboost_info/catboost_training.json
Normal file
File diff suppressed because it is too large
Load Diff
BIN
main/train/catboost_info/learn/events.out.tfevents
Normal file
BIN
main/train/catboost_info/learn/events.out.tfevents
Normal file
Binary file not shown.
5001
main/train/catboost_info/learn_error.tsv
Normal file
5001
main/train/catboost_info/learn_error.tsv
Normal file
File diff suppressed because it is too large
Load Diff
BIN
main/train/catboost_info/test/events.out.tfevents
Normal file
BIN
main/train/catboost_info/test/events.out.tfevents
Normal file
Binary file not shown.
BIN
main/train/catboost_info/test1/events.out.tfevents
Normal file
BIN
main/train/catboost_info/test1/events.out.tfevents
Normal file
Binary file not shown.
5001
main/train/catboost_info/test_error.tsv
Normal file
5001
main/train/catboost_info/test_error.tsv
Normal file
File diff suppressed because it is too large
Load Diff
5001
main/train/catboost_info/time_left.tsv
Normal file
5001
main/train/catboost_info/time_left.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
384
main/train/code.ipynb
Normal file
384
main/train/code.ipynb
Normal file
@@ -0,0 +1,384 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_technical_factor(df):\n",
|
||||
" # 按股票和日期排序\n",
|
||||
" df = df.sort_values(by=['ts_code', 'trade_date'])\n",
|
||||
" grouped = df.groupby('ts_code', group_keys=False)\n",
|
||||
"\n",
|
||||
" df['return_skew'] = grouped['pct_chg'].rolling(window=5).skew().reset_index(0, drop=True)\n",
|
||||
" df['return_kurtosis'] = grouped['pct_chg'].rolling(window=5).kurt().reset_index(0, drop=True)\n",
|
||||
"\n",
|
||||
" # 因子 1:短期成交量变化率\n",
|
||||
" df['volume_change_rate'] = (\n",
|
||||
" grouped['vol'].rolling(window=2).mean() /\n",
|
||||
" grouped['vol'].rolling(window=5).mean() - 1\n",
|
||||
" ).reset_index(level=0, drop=True) # 确保索引对齐\n",
|
||||
"\n",
|
||||
" # 因子 2:成交量突破信号\n",
|
||||
" max_volume = grouped['vol'].rolling(window=5).max().reset_index(level=0, drop=True) # 确保索引对齐\n",
|
||||
" df['cat_volume_breakout'] = (df['vol'] > max_volume)\n",
|
||||
"\n",
|
||||
" # 因子 3:换手率均线偏离度\n",
|
||||
" mean_turnover = grouped['turnover_rate'].rolling(window=3).mean().reset_index(level=0, drop=True)\n",
|
||||
" std_turnover = grouped['turnover_rate'].rolling(window=3).std().reset_index(level=0, drop=True)\n",
|
||||
" df['turnover_deviation'] = (df['turnover_rate'] - mean_turnover) / std_turnover\n",
|
||||
"\n",
|
||||
" # 因子 4:换手率激增信号\n",
|
||||
" df['cat_turnover_spike'] = (df['turnover_rate'] > mean_turnover + 2 * std_turnover)\n",
|
||||
"\n",
|
||||
" # 因子 5:量比均值\n",
|
||||
" df['avg_volume_ratio'] = grouped['volume_ratio'].rolling(window=3).mean().reset_index(level=0, drop=True)\n",
|
||||
"\n",
|
||||
" # 因子 6:量比突破信号\n",
|
||||
" max_volume_ratio = grouped['volume_ratio'].rolling(window=5).max().reset_index(level=0, drop=True)\n",
|
||||
" df['cat_volume_ratio_breakout'] = (df['volume_ratio'] > max_volume_ratio)\n",
|
||||
"\n",
|
||||
" # 因子 7:成交量与换手率的综合动量因子\n",
|
||||
" alpha = 0.5\n",
|
||||
" df['momentum_factor'] = df['volume_change_rate'] + alpha * df['turnover_deviation']\n",
|
||||
"\n",
|
||||
" # 因子 8:量价共振因子\n",
|
||||
" df['price_change_rate'] = grouped['close'].pct_change()\n",
|
||||
" df['resonance_factor'] = df['volume_ratio'] * df['price_change_rate']\n",
|
||||
"\n",
|
||||
" # 计算 up 和 down\n",
|
||||
" df['log_close'] = np.log(df['close'])\n",
|
||||
"\n",
|
||||
" df['vol_spike'] = grouped.apply(\n",
|
||||
" lambda x: pd.Series(x['vol'].rolling(20).mean(), index=x.index)\n",
|
||||
" )\n",
|
||||
" df['cat_vol_spike'] = df['vol'] > 2 * df['vol_spike']\n",
|
||||
" df['vol_std_5'] = df['vol'].pct_change().rolling(5).std()\n",
|
||||
"\n",
|
||||
" df['up'] = (df['high'] - df[['close', 'open']].max(axis=1)) / df['close']\n",
|
||||
" df['down'] = (df[['close', 'open']].min(axis=1) - df['low']) / df['close']\n",
|
||||
"\n",
|
||||
" # 计算 ATR\n",
|
||||
" df['atr_14'] = grouped.apply(\n",
|
||||
" lambda x: pd.Series(talib.ATR(x['high'].values, x['low'].values, x['close'].values, timeperiod=14),\n",
|
||||
" index=x.index)\n",
|
||||
" )\n",
|
||||
" df['atr_6'] = grouped.apply(\n",
|
||||
" lambda x: pd.Series(talib.ATR(x['high'].values, x['low'].values, x['close'].values, timeperiod=6),\n",
|
||||
" index=x.index)\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # 计算 OBV 及其均线\n",
|
||||
" df['obv'] = grouped.apply(\n",
|
||||
" lambda x: pd.Series(talib.OBV(x['close'].values, x['vol'].values), index=x.index)\n",
|
||||
" )\n",
|
||||
" df['maobv_6'] = grouped.apply(\n",
|
||||
" lambda x: pd.Series(talib.SMA(x['obv'].values, timeperiod=6), index=x.index)\n",
|
||||
" )\n",
|
||||
" df['obv-maobv_6'] = df['obv'] - df['maobv_6']\n",
|
||||
"\n",
|
||||
" # 计算 RSI\n",
|
||||
" df['rsi_3'] = grouped.apply(\n",
|
||||
" lambda x: pd.Series(talib.RSI(x['close'].values, timeperiod=3), index=x.index)\n",
|
||||
" )\n",
|
||||
" df['rsi_6'] = grouped.apply(\n",
|
||||
" lambda x: pd.Series(talib.RSI(x['close'].values, timeperiod=6), index=x.index)\n",
|
||||
" )\n",
|
||||
" df['rsi_9'] = grouped.apply(\n",
|
||||
" lambda x: pd.Series(talib.RSI(x['close'].values, timeperiod=9), index=x.index)\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # 计算 return_10 和 return_20\n",
|
||||
" df['return_5'] = grouped['close'].apply(lambda x: x / x.shift(5) - 1)\n",
|
||||
" df['return_10'] = grouped['close'].apply(lambda x: x / x.shift(10) - 1)\n",
|
||||
" df['return_20'] = grouped['close'].apply(lambda x: x / x.shift(20) - 1)\n",
|
||||
"\n",
|
||||
" # df['avg_close_5'] = grouped['close'].apply(lambda x: x.rolling(window=5).mean() / x)\n",
|
||||
"\n",
|
||||
" # 计算标准差指标\n",
|
||||
" df['std_return_5'] = grouped['close'].apply(lambda x: x.pct_change().rolling(window=5).std())\n",
|
||||
" df['std_return_15'] = grouped['close'].apply(lambda x: x.pct_change().rolling(window=15).std())\n",
|
||||
" df['std_return_25'] = grouped['close'].apply(lambda x: x.pct_change().rolling(window=25).std())\n",
|
||||
" df['std_return_90'] = grouped['close'].apply(lambda x: x.pct_change().rolling(window=90).std())\n",
|
||||
" df['std_return_90_2'] = grouped['close'].apply(lambda x: x.shift(10).pct_change().rolling(window=90).std())\n",
|
||||
"\n",
|
||||
" # 计算比值指标\n",
|
||||
" df['std_return_5 / std_return_90'] = df['std_return_5'] / df['std_return_90']\n",
|
||||
" df['std_return_5 / std_return_25'] = df['std_return_5'] / df['std_return_25']\n",
|
||||
"\n",
|
||||
" # 计算标准差差值\n",
|
||||
" df['std_return_90 - std_return_90_2'] = df['std_return_90'] - df['std_return_90_2']\n",
|
||||
"\n",
|
||||
" return df\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_act_factor(df, cat=True):\n",
|
||||
" # 按股票和日期排序\n",
|
||||
" df = df.sort_values(by=['ts_code', 'trade_date'])\n",
|
||||
" grouped = df.groupby('ts_code', group_keys=False)\n",
|
||||
" # 计算 EMA 指标\n",
|
||||
" df['_ema_5'] = grouped['close'].apply(\n",
|
||||
" lambda x: pd.Series(talib.EMA(x.values, timeperiod=5), index=x.index)\n",
|
||||
" )\n",
|
||||
" df['_ema_13'] = grouped['close'].apply(\n",
|
||||
" lambda x: pd.Series(talib.EMA(x.values, timeperiod=13), index=x.index)\n",
|
||||
" )\n",
|
||||
" df['_ema_20'] = grouped['close'].apply(\n",
|
||||
" lambda x: pd.Series(talib.EMA(x.values, timeperiod=20), index=x.index)\n",
|
||||
" )\n",
|
||||
" df['_ema_60'] = grouped['close'].apply(\n",
|
||||
" lambda x: pd.Series(talib.EMA(x.values, timeperiod=60), index=x.index)\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # 计算 act_factor1, act_factor2, act_factor3, act_factor4\n",
|
||||
" df['act_factor1'] = grouped['_ema_5'].apply(\n",
|
||||
" lambda x: np.arctan((x / x.shift(1) - 1) * 100) * 57.3 / 50\n",
|
||||
" )\n",
|
||||
" df['act_factor2'] = grouped['_ema_13'].apply(\n",
|
||||
" lambda x: np.arctan((x / x.shift(1) - 1) * 100) * 57.3 / 40\n",
|
||||
" )\n",
|
||||
" df['act_factor3'] = grouped['_ema_20'].apply(\n",
|
||||
" lambda x: np.arctan((x / x.shift(1) - 1) * 100) * 57.3 / 21\n",
|
||||
" )\n",
|
||||
" df['act_factor4'] = grouped['_ema_60'].apply(\n",
|
||||
" lambda x: np.arctan((x / x.shift(1) - 1) * 100) * 57.3 / 10\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" if cat:\n",
|
||||
" df['cat_af1'] = df['act_factor1'] > 0\n",
|
||||
" df['cat_af2'] = df['act_factor2'] > df['act_factor1']\n",
|
||||
" df['cat_af3'] = df['act_factor3'] > df['act_factor2']\n",
|
||||
" df['cat_af4'] = df['act_factor4'] > df['act_factor3']\n",
|
||||
"\n",
|
||||
" # 计算 act_factor5 和 act_factor6\n",
|
||||
" df['act_factor5'] = df['act_factor1'] + df['act_factor2'] + df['act_factor3'] + df['act_factor4']\n",
|
||||
" df['act_factor6'] = (df['act_factor1'] - df['act_factor2']) / np.sqrt(\n",
|
||||
" df['act_factor1'] ** 2 + df['act_factor2'] ** 2)\n",
|
||||
"\n",
|
||||
" # 根据 trade_date 截面计算排名\n",
|
||||
" df['rank_act_factor1'] = df.groupby('trade_date', group_keys=False)['act_factor1'].rank(ascending=False, pct=True)\n",
|
||||
" df['rank_act_factor2'] = df.groupby('trade_date', group_keys=False)['act_factor2'].rank(ascending=False, pct=True)\n",
|
||||
" df['rank_act_factor3'] = df.groupby('trade_date', group_keys=False)['act_factor3'].rank(ascending=False, pct=True)\n",
|
||||
"\n",
|
||||
" return df\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_money_flow_factor(df):\n",
|
||||
" # 计算资金流相关因子(字段名称见 tushare 数据说明)\n",
|
||||
" df['active_buy_volume_large'] = df['buy_lg_vol'] / df['net_mf_vol']\n",
|
||||
" df['active_buy_volume_big'] = df['buy_elg_vol'] / df['net_mf_vol']\n",
|
||||
" df['active_buy_volume_small'] = df['buy_sm_vol'] / df['net_mf_vol']\n",
|
||||
"\n",
|
||||
" df['buy_lg_vol_minus_sell_lg_vol'] = (df['buy_lg_vol'] - df['sell_lg_vol']) / df['net_mf_vol']\n",
|
||||
" df['buy_elg_vol_minus_sell_elg_vol'] = (df['buy_elg_vol'] - df['sell_elg_vol']) / df['net_mf_vol']\n",
|
||||
"\n",
|
||||
" df['log(circ_mv)'] = np.log(df['circ_mv'])\n",
|
||||
" return df\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_alpha_factor(df):\n",
|
||||
" df = df.sort_values(by=['ts_code', 'trade_date'])\n",
|
||||
" grouped = df.groupby('ts_code')\n",
|
||||
"\n",
|
||||
" # alpha_022: 当前 close 与 5 日前 close 差值\n",
|
||||
" df['alpha_022'] = grouped['close'].transform(lambda x: x - x.shift(5))\n",
|
||||
"\n",
|
||||
" # alpha_003: (close - open) / (high - low)\n",
|
||||
" df['alpha_003'] = np.where(df['high'] != df['low'],\n",
|
||||
" (df['close'] - df['open']) / (df['high'] - df['low']),\n",
|
||||
" 0)\n",
|
||||
"\n",
|
||||
" # alpha_007: 计算过去5日 close 与 vol 的相关性,并按 trade_date 排名\n",
|
||||
" df['alpha_007'] = grouped.apply(lambda x: x['close'].rolling(5).corr(x['vol'])).reset_index(level=0, drop=True)\n",
|
||||
" df['alpha_007'] = df.groupby('trade_date', group_keys=False)['alpha_007'].rank(ascending=True, pct=True)\n",
|
||||
"\n",
|
||||
" # alpha_013: 计算过去5日 close 之和 - 20日 close 之和,并按 trade_date 排名\n",
|
||||
" df['alpha_013'] = grouped['close'].transform(lambda x: x.rolling(5).sum() - x.rolling(20).sum())\n",
|
||||
" df['alpha_013'] = df.groupby('trade_date', group_keys=False)['alpha_013'].rank(ascending=True, pct=True)\n",
|
||||
"\n",
|
||||
" return df\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_limit_factor(df):\n",
|
||||
" # 按股票和日期排序\n",
|
||||
" df = df.sort_values(by=['ts_code', 'trade_date'])\n",
|
||||
"\n",
|
||||
" # 分组处理\n",
|
||||
" grouped = df.groupby('ts_code', group_keys=False)\n",
|
||||
"\n",
|
||||
" # 1. 今日是否涨停/跌停\n",
|
||||
" df['cat_up_limit'] = (df['close'] == df['up_limit']).astype(int) # 是否涨停(1表示涨停,0表示未涨停)\n",
|
||||
" df['cat_down_limit'] = (df['close'] == df['down_limit']).astype(int) # 是否跌停(1表示跌停,0表示未跌停)\n",
|
||||
"\n",
|
||||
" # 2. 最近涨跌停次数(过去20个交易日)\n",
|
||||
" df['up_limit_count_10d'] = grouped['cat_up_limit'].rolling(window=10, min_periods=1).sum().reset_index(level=0,\n",
|
||||
" drop=True)\n",
|
||||
" df['down_limit_count_10d'] = grouped['cat_down_limit'].rolling(window=10, min_periods=1).sum().reset_index(level=0,\n",
|
||||
" drop=True)\n",
|
||||
"\n",
|
||||
" # 3. 最近连续涨跌停天数\n",
|
||||
" def calculate_consecutive_limits(series):\n",
|
||||
" \"\"\"\n",
|
||||
" 计算连续涨停/跌停天数。\n",
|
||||
" \"\"\"\n",
|
||||
" consecutive_up = series * (series.groupby((series != series.shift()).cumsum()).cumcount() + 1)\n",
|
||||
" consecutive_down = series * (series.groupby((series != series.shift()).cumsum()).cumcount() + 1)\n",
|
||||
" return consecutive_up, consecutive_down\n",
|
||||
"\n",
|
||||
" # 连续涨停天数\n",
|
||||
" df['consecutive_up_limit'] = grouped['cat_up_limit'].apply(\n",
|
||||
" lambda x: calculate_consecutive_limits(x)[0]\n",
|
||||
" ).reset_index(level=0, drop=True)\n",
|
||||
"\n",
|
||||
" # 连续跌停天数\n",
|
||||
" # df['consecutive_down_limit'] = grouped['cat_down_limit'].apply(\n",
|
||||
" # lambda x: calculate_consecutive_limits(x)[1]\n",
|
||||
" # ).reset_index(level=0, drop=True)\n",
|
||||
"\n",
|
||||
" return df\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_cyp_perf_factor(df):\n",
|
||||
" # 预处理:按股票代码和时间排序\n",
|
||||
" df = df.sort_values(by=['ts_code', 'trade_date'])\n",
|
||||
"\n",
|
||||
" # 按股票代码分组处理\n",
|
||||
" grouped = df.groupby('ts_code', group_keys=False)\n",
|
||||
"\n",
|
||||
" df['ctrl_strength'] = (df['cost_85pct'] - df['cost_15pct']) / (df['his_high'] - df['his_low'])\n",
|
||||
"\n",
|
||||
" df['low_cost_dev'] = (df['close'] - df['cost_5pct']) / (df['cost_50pct'] - df['cost_5pct'])\n",
|
||||
"\n",
|
||||
" df['asymmetry'] = (df['cost_95pct'] - df['cost_50pct']) / (df['cost_50pct'] - df['cost_5pct'])\n",
|
||||
"\n",
|
||||
" df['lock_factor'] = df['turnover_rate'] * (\n",
|
||||
" 1 - (df['cost_95pct'] - df['cost_5pct']) / (df['his_high'] - df['his_low']))\n",
|
||||
"\n",
|
||||
" df['vol_break'] = np.where((df['close'] > df['cost_85pct']) & (df['volume_ratio'] > 2), 1, 0)\n",
|
||||
"\n",
|
||||
" df['weight_roc5'] = grouped['weight_avg'].apply(lambda x: x.pct_change(5))\n",
|
||||
"\n",
|
||||
" def rolling_corr(group):\n",
|
||||
" roc_close = group['close'].pct_change()\n",
|
||||
" roc_weight = group['weight_avg'].pct_change()\n",
|
||||
" return roc_close.rolling(10).corr(roc_weight)\n",
|
||||
"\n",
|
||||
" df['price_cost_divergence'] = grouped.apply(rolling_corr)\n",
|
||||
"\n",
|
||||
" def calc_atr(group):\n",
|
||||
" high, low, close = group['high'], group['low'], group['close']\n",
|
||||
" tr = np.maximum(high - low,\n",
|
||||
" np.maximum(abs(high - close.shift()),\n",
|
||||
" abs(low - close.shift())))\n",
|
||||
" return tr.rolling(14).mean()\n",
|
||||
"\n",
|
||||
" df['atr_14'] = grouped.apply(calc_atr)\n",
|
||||
" df['cost_atr_adj'] = (df['cost_95pct'] - df['cost_5pct']) / df['atr_14']\n",
|
||||
"\n",
|
||||
" # 12. 小盘股筹码集中度\n",
|
||||
" df['smallcap_concentration'] = (1 / df['circ_mv']) * (df['cost_85pct'] - df['cost_15pct'])\n",
|
||||
"\n",
|
||||
" # 16. 筹码稳定性指数 (20日波动率)\n",
|
||||
" df['weight_std20'] = grouped['weight_avg'].apply(lambda x: x.rolling(20).std())\n",
|
||||
" df['cost_stability'] = df['weight_std20'] / grouped['weight_avg'].transform(lambda x: x.rolling(20).mean())\n",
|
||||
"\n",
|
||||
" # 17. 成本区间突破标记\n",
|
||||
" df['high_cost_break_days'] = grouped.apply(lambda g: g['close'].gt(g['cost_95pct']).rolling(5).sum())\n",
|
||||
"\n",
|
||||
" # 18. 黄金筹码共振 (复合事件)\n",
|
||||
" df['cat_golden_resonance'] = ((df['close'] > df['weight_avg']) &\n",
|
||||
" (df['volume_ratio'] > 1.5) &\n",
|
||||
" (df['winner_rate'] > 0.7))\n",
|
||||
"\n",
|
||||
" # 20. 筹码-流动性风险\n",
|
||||
" df['liquidity_risk'] = (df['cost_95pct'] - df['cost_5pct']) * (\n",
|
||||
" 1 / grouped['vol'].transform(lambda x: x.rolling(10).mean()))\n",
|
||||
"\n",
|
||||
" df.drop(columns=['weight_std20'], inplace=True, errors='ignore')\n",
|
||||
"\n",
|
||||
" return df\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_mv_factors(df):\n",
|
||||
" \"\"\"\n",
|
||||
" 计算多个因子并生成最终的综合因子。\n",
|
||||
"\n",
|
||||
" 参数:\n",
|
||||
" df (pd.DataFrame): 包含 ts_code, trade_date, turnover_rate, pe_ttm, pb, ps, circ_mv, volume_ratio, vol 等列的数据框。\n",
|
||||
"\n",
|
||||
" 返回:\n",
|
||||
" pd.DataFrame: 包含新增因子和最终综合因子的数据框。\n",
|
||||
" \"\"\"\n",
|
||||
" # 按 ts_code 和 trade_date 排序\n",
|
||||
" df = df.sort_values(by=['ts_code', 'trade_date'])\n",
|
||||
"\n",
|
||||
" # 按 ts_code 分组\n",
|
||||
" grouped = df.groupby('ts_code', group_keys=False)\n",
|
||||
"\n",
|
||||
" # 1. 市值流动比因子\n",
|
||||
" df['mv_turnover_ratio'] = df['turnover_rate'] / df['circ_mv']\n",
|
||||
"\n",
|
||||
" # 2. 市值调整成交量因子\n",
|
||||
" df['mv_adjusted_volume'] = df['vol'] / df['circ_mv']\n",
|
||||
"\n",
|
||||
" # 3. 市值加权换手率因子\n",
|
||||
" df['mv_weighted_turnover'] = df['turnover_rate'] * (1 / df['circ_mv'])\n",
|
||||
"\n",
|
||||
" # 4. 非线性市值成交量因子\n",
|
||||
" df['nonlinear_mv_volume'] = df['vol'] / df['circ_mv']\n",
|
||||
"\n",
|
||||
" # 5. 市值量比因子\n",
|
||||
" df['mv_volume_ratio'] = df['volume_ratio'] / df['circ_mv']\n",
|
||||
"\n",
|
||||
" # 6. 市值动量因子\n",
|
||||
" df['mv_momentum'] = df['turnover_rate'] * df['volume_ratio'] / df['circ_mv']\n",
|
||||
"\n",
|
||||
" # 7. 市值波动率因子\n",
|
||||
" df['turnover_std'] = grouped['turnover_rate'].rolling(window=20).std().reset_index(level=0, drop=True)\n",
|
||||
" df['mv_volatility'] = grouped.apply(lambda x: x['turnover_std'] / x['circ_mv']).reset_index(level=0, drop=True)\n",
|
||||
"\n",
|
||||
" # 8. 市值成长性因子\n",
|
||||
" df['volume_growth'] = grouped['vol'].pct_change(periods=20).reset_index(level=0, drop=True)\n",
|
||||
" df['mv_growth'] = grouped.apply(lambda x: x['volume_growth'] / x['circ_mv']).reset_index(level=0, drop=True)\n",
|
||||
"\n",
|
||||
" # # 标准化因子\n",
|
||||
" # factor_columns = [\n",
|
||||
" # 'mv_turnover_ratio', 'mv_adjusted_volume', 'mv_weighted_turnover',\n",
|
||||
" # 'nonlinear_mv_volume', 'mv_volume_ratio', 'mv_momentum',\n",
|
||||
" # 'mv_volatility', 'mv_growth'\n",
|
||||
" # ]\n",
|
||||
" # scaler = StandardScaler()\n",
|
||||
" # df[factor_columns] = scaler.fit_transform(df[factor_columns])\n",
|
||||
" #\n",
|
||||
" # # 加权合成因子\n",
|
||||
" # weights = [0.2, 0.15, 0.15, 0.1, 0.1, 0.1, 0.1, 0.1] # 各因子权重\n",
|
||||
" # df['final_combined_factor'] = df[factor_columns].dot(weights)\n",
|
||||
"\n",
|
||||
" return df"
|
||||
],
|
||||
"id": "505e825945e4b8cf"
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
1359
main/train/predictions.tsv
Normal file
1359
main/train/predictions.tsv
Normal file
File diff suppressed because it is too large
Load Diff
565
main/train/predictions_test.tsv
Normal file
565
main/train/predictions_test.tsv
Normal file
@@ -0,0 +1,565 @@
|
||||
trade_date,score,ts_code
|
||||
2022-12-08,0.27431420966080605,600778.SH
|
||||
2022-12-09,0.6150539465999814,002995.SZ
|
||||
2022-12-12,0.32582588516973016,001219.SZ
|
||||
2022-12-13,0.449772253615743,603183.SH
|
||||
2022-12-14,0.6769511128551923,001219.SZ
|
||||
2022-12-15,0.5930979713048357,001219.SZ
|
||||
2022-12-16,0.43211109874606424,603183.SH
|
||||
2022-12-19,0.5066203384263489,000892.SZ
|
||||
2022-12-20,0.2882618462700443,000691.SZ
|
||||
2022-12-21,0.40494380930765467,001219.SZ
|
||||
2022-12-22,0.7379517535413331,002762.SZ
|
||||
2022-12-23,0.5775898117404806,002566.SZ
|
||||
2022-12-26,0.3292293609625978,002719.SZ
|
||||
2022-12-27,0.580738686242899,000679.SZ
|
||||
2022-12-28,0.5180122078878033,605289.SH
|
||||
2022-12-29,0.643325626734685,002103.SZ
|
||||
2022-12-30,0.5378362015974298,603209.SH
|
||||
2023-01-03,0.36814451293952416,000985.SZ
|
||||
2023-01-04,0.4506419163930136,605133.SH
|
||||
2023-01-05,-0.08745711573292192,605167.SH
|
||||
2023-01-06,0.3958417326952953,605289.SH
|
||||
2023-01-09,0.16620697664167175,600778.SH
|
||||
2023-01-10,0.25992110313636035,000985.SZ
|
||||
2023-01-11,0.5095437644681087,002771.SZ
|
||||
2023-01-12,0.4397750442288285,605258.SH
|
||||
2023-01-13,0.6102622318789971,003043.SZ
|
||||
2023-01-16,0.31204276505440004,002808.SZ
|
||||
2023-01-17,0.4972787924897241,002975.SZ
|
||||
2023-01-18,0.026553404105244968,002975.SZ
|
||||
2023-01-19,0.29558268580158115,603860.SH
|
||||
2023-01-20,0.2510349420297213,002849.SZ
|
||||
2023-01-30,0.21942028551157527,003039.SZ
|
||||
2023-01-31,0.3575069234093295,605081.SH
|
||||
2023-02-01,0.4427957172082794,002893.SZ
|
||||
2023-02-02,0.6212207641739337,600817.SH
|
||||
2023-02-03,0.6202750689624308,002993.SZ
|
||||
2023-02-06,0.20845430964837489,000010.SZ
|
||||
2023-02-07,0.3667829939094325,600593.SH
|
||||
2023-02-08,0.32215761217132205,000820.SZ
|
||||
2023-02-09,0.1516026707537734,002021.SZ
|
||||
2023-02-10,0.5453734923733047,003016.SZ
|
||||
2023-02-13,0.7491169288183265,003037.SZ
|
||||
2023-02-14,0.32476512974212635,002828.SZ
|
||||
2023-02-15,0.6984519009806621,605128.SH
|
||||
2023-02-16,0.2078494458450699,605378.SH
|
||||
2023-02-17,0.20087261579967608,000668.SZ
|
||||
2023-02-20,0.6724819126277912,002715.SZ
|
||||
2023-02-21,0.7209181859866042,605028.SH
|
||||
2023-02-22,0.42474501256326314,002900.SZ
|
||||
2023-02-23,0.43124729325039124,001236.SZ
|
||||
2023-02-24,0.6008854884810912,603102.SH
|
||||
2023-02-27,0.5702542696831331,605259.SH
|
||||
2023-02-28,0.24318268223778186,002857.SZ
|
||||
2023-03-01,0.5388577927345274,603950.SH
|
||||
2023-03-02,0.6815724852841429,001236.SZ
|
||||
2023-03-03,0.6064483180272962,002098.SZ
|
||||
2023-03-06,0.5180664638865109,605178.SH
|
||||
2023-03-07,0.7291442722387731,001339.SZ
|
||||
2023-03-08,0.3240206100047592,603268.SH
|
||||
2023-03-09,0.5619204909224714,603030.SH
|
||||
2023-03-10,0.6055962888677536,003027.SZ
|
||||
2023-03-13,0.10471064296768949,605296.SH
|
||||
2023-03-14,0.5148688231123284,603176.SH
|
||||
2023-03-15,0.41425644779572274,605287.SH
|
||||
2023-03-16,0.3858205191834723,605303.SH
|
||||
2023-03-17,0.38210649704563177,002899.SZ
|
||||
2023-03-20,0.20755090351337924,002778.SZ
|
||||
2023-03-21,0.2184477420463366,603155.SH
|
||||
2023-03-22,0.07842488490864312,002836.SZ
|
||||
2023-03-23,0.26327386834675565,002899.SZ
|
||||
2023-03-24,0.21281930224537013,605086.SH
|
||||
2023-03-27,0.19455767073518335,603729.SH
|
||||
2023-03-28,0.18440479662298903,603324.SH
|
||||
2023-03-29,0.5577394899737692,002995.SZ
|
||||
2023-03-30,0.28537485170922117,603679.SH
|
||||
2023-03-31,0.30705863202777134,603615.SH
|
||||
2023-04-03,0.43719928717137047,603321.SH
|
||||
2023-04-04,0.7949399014212187,603139.SH
|
||||
2023-04-06,0.5079656399994698,002715.SZ
|
||||
2023-04-07,0.701235747536229,605299.SH
|
||||
2023-04-10,0.5142089175897191,001316.SZ
|
||||
2023-04-11,0.6097058153625001,002835.SZ
|
||||
2023-04-12,0.42821688099056865,003043.SZ
|
||||
2023-04-13,0.6086458195457266,605296.SH
|
||||
2023-04-14,0.40520429106061684,001316.SZ
|
||||
2023-04-17,0.7332476184295339,002862.SZ
|
||||
2023-04-18,0.24978196798538302,600768.SH
|
||||
2023-04-19,0.5235224445388739,603657.SH
|
||||
2023-04-20,0.5073410973887871,000702.SZ
|
||||
2023-04-21,0.25827344858110657,002848.SZ
|
||||
2023-04-24,0.441433820804789,603685.SH
|
||||
2023-04-25,0.45710917638850534,603230.SH
|
||||
2023-04-26,0.28288056233393655,002725.SZ
|
||||
2023-04-27,0.13616135413238703,002972.SZ
|
||||
2023-04-28,0.26068199992734814,603178.SH
|
||||
2023-05-04,0.5654404518697154,600107.SH
|
||||
2023-05-05,0.26758125911217795,603021.SH
|
||||
2023-05-08,0.23558429168600836,002778.SZ
|
||||
2023-05-09,0.2707962779077066,603213.SH
|
||||
2023-05-10,0.33701828135159717,600778.SH
|
||||
2023-05-11,0.5467076847749692,603958.SH
|
||||
2023-05-12,0.6956005090125644,603958.SH
|
||||
2023-05-15,0.28587355864974423,000679.SZ
|
||||
2023-05-16,0.6092507418432053,600796.SH
|
||||
2023-05-17,0.4723632871528185,002633.SZ
|
||||
2023-05-18,0.44171920333992315,605089.SH
|
||||
2023-05-19,0.15743942037394715,001317.SZ
|
||||
2023-05-22,0.47338926108587503,603151.SH
|
||||
2023-05-23,0.7537765588258426,603721.SH
|
||||
2023-05-24,0.35894033254239865,003007.SZ
|
||||
2023-05-25,0.6230303733419829,003005.SZ
|
||||
2023-05-26,0.5243725213664181,003005.SZ
|
||||
2023-05-29,0.5460639613578377,001288.SZ
|
||||
2023-05-30,-0.14324964018444036,605151.SH
|
||||
2023-05-31,0.1321851497388741,003041.SZ
|
||||
2023-06-01,0.488265280236323,603170.SH
|
||||
2023-06-02,0.2725329302903607,002875.SZ
|
||||
2023-06-05,0.4445215836414108,001316.SZ
|
||||
2023-06-06,0.233866225393599,600753.SH
|
||||
2023-06-07,0.1512953839015877,603097.SH
|
||||
2023-06-08,0.5303933339784708,002780.SZ
|
||||
2023-06-09,0.595474766855165,002893.SZ
|
||||
2023-06-12,0.7044220035173576,002820.SZ
|
||||
2023-06-13,0.46792362066084003,002702.SZ
|
||||
2023-06-14,0.5917956764629129,000880.SZ
|
||||
2023-06-15,0.3231002542961875,002981.SZ
|
||||
2023-06-16,0.3426911954075076,600847.SH
|
||||
2023-06-19,-0.02150391139369695,603132.SH
|
||||
2023-06-20,0.6704208966606625,002949.SZ
|
||||
2023-06-21,0.7415606269689047,002806.SZ
|
||||
2023-06-26,0.2389199769543643,600847.SH
|
||||
2023-06-27,0.2003853580878301,605169.SH
|
||||
2023-06-28,0.46623595119888966,605218.SH
|
||||
2023-06-29,0.5589108980336046,603958.SH
|
||||
2023-06-30,0.6290675381060588,603286.SH
|
||||
2023-07-03,0.30433310431106353,600778.SH
|
||||
2023-07-04,0.41651276650561014,002513.SZ
|
||||
2023-07-05,0.3473548650199746,603132.SH
|
||||
2023-07-06,0.40969750497772167,002591.SZ
|
||||
2023-07-07,0.2430362735691786,001231.SZ
|
||||
2023-07-10,0.4983750803303532,001267.SZ
|
||||
2023-07-11,0.733386176985722,002551.SZ
|
||||
2023-07-12,0.7936049551065578,000004.SZ
|
||||
2023-07-13,0.40916765144188155,000638.SZ
|
||||
2023-07-14,0.21996055437116258,605580.SH
|
||||
2023-07-17,0.22071234127281886,605369.SH
|
||||
2023-07-18,0.32992331418284704,002802.SZ
|
||||
2023-07-19,0.3337178034533016,001222.SZ
|
||||
2023-07-20,0.44391528952121656,600234.SH
|
||||
2023-07-21,0.5703993630872055,600448.SH
|
||||
2023-07-24,0.13840705878806345,002753.SZ
|
||||
2023-07-25,0.14047801960398054,002377.SZ
|
||||
2023-07-26,0.44664932418756537,003032.SZ
|
||||
2023-07-27,0.5452266508240136,603838.SH
|
||||
2023-07-28,0.6501197606840003,002397.SZ
|
||||
2023-07-31,0.8161881604231447,002397.SZ
|
||||
2023-08-01,0.8054314713785248,002397.SZ
|
||||
2023-08-02,0.5699471212343736,600119.SH
|
||||
2023-08-03,0.3961899673469923,002787.SZ
|
||||
2023-08-04,0.747143154431229,600082.SH
|
||||
2023-08-07,0.5542807418220157,605162.SH
|
||||
2023-08-08,0.4660499541690493,605369.SH
|
||||
2023-08-09,0.1846960748819621,605060.SH
|
||||
2023-08-10,0.6657079567366654,003020.SZ
|
||||
2023-08-11,0.657079341742516,000953.SZ
|
||||
2023-08-14,0.42201036027484534,002495.SZ
|
||||
2023-08-15,0.4783974585467736,002495.SZ
|
||||
2023-08-16,0.5756999736912221,003030.SZ
|
||||
2023-08-17,0.7578964013923504,002052.SZ
|
||||
2023-08-18,0.004897979672684783,603151.SH
|
||||
2023-08-21,0.11103177103375994,605339.SH
|
||||
2023-08-22,0.5482563310657345,603021.SH
|
||||
2023-08-23,0.7223546665888397,000669.SZ
|
||||
2023-08-24,0.750140979575826,600235.SH
|
||||
2023-08-25,0.34893747282432125,001318.SZ
|
||||
2023-08-28,0.17097259367409923,603329.SH
|
||||
2023-08-29,0.290639411928478,001267.SZ
|
||||
2023-08-30,0.07811174210597455,603021.SH
|
||||
2023-08-31,0.2910924076064356,603838.SH
|
||||
2023-09-01,0.6435370857973789,002696.SZ
|
||||
2023-09-04,0.04554737468797225,605259.SH
|
||||
2023-09-05,0.18999529865866976,001231.SZ
|
||||
2023-09-06,0.47343827547785233,002982.SZ
|
||||
2023-09-07,0.5685183560937441,001231.SZ
|
||||
2023-09-08,0.4682919982486746,003025.SZ
|
||||
2023-09-11,0.511414318533627,002535.SZ
|
||||
2023-09-12,0.34217637355801866,003020.SZ
|
||||
2023-09-13,0.4524964916922371,001269.SZ
|
||||
2023-09-14,0.6440683894231696,002856.SZ
|
||||
2023-09-15,0.6265975964127983,001269.SZ
|
||||
2023-09-18,0.6451154901817582,002857.SZ
|
||||
2023-09-19,0.41416994363886955,605151.SH
|
||||
2023-09-20,0.4097659657161061,600615.SH
|
||||
2023-09-21,0.3308468663518861,603616.SH
|
||||
2023-09-22,0.7278118492027132,600608.SH
|
||||
2023-09-25,0.3087058065638187,002963.SZ
|
||||
2023-09-26,0.44957535540535354,000638.SZ
|
||||
2023-09-27,0.6529354742977974,000609.SZ
|
||||
2023-09-28,0.5608546287364546,605080.SH
|
||||
2023-10-09,0.2684689324603092,000004.SZ
|
||||
2023-10-10,0.7514903867910352,001337.SZ
|
||||
2023-10-11,0.6833204831817536,000010.SZ
|
||||
2023-10-12,0.6849345854259707,001288.SZ
|
||||
2023-10-13,0.45213040270359944,001223.SZ
|
||||
2023-10-16,0.44367713319364266,001311.SZ
|
||||
2023-10-17,0.4358062265247695,001266.SZ
|
||||
2023-10-18,0.7896227965981543,002535.SZ
|
||||
2023-10-19,0.8091287635227896,000609.SZ
|
||||
2023-10-20,0.7497841605463051,000705.SZ
|
||||
2023-10-23,0.5162013866354915,600615.SH
|
||||
2023-10-24,0.5346626404470584,000554.SZ
|
||||
2023-10-25,0.31017050910898813,002836.SZ
|
||||
2023-10-26,0.41928742617604475,002798.SZ
|
||||
2023-10-27,0.6206681321070086,600791.SH
|
||||
2023-10-30,0.2809249638133884,600697.SH
|
||||
2023-10-31,0.41380204486883465,605299.SH
|
||||
2023-11-01,0.3913649017002345,002952.SZ
|
||||
2023-11-02,0.25190883932779223,603272.SH
|
||||
2023-11-03,0.25483193696737405,600697.SH
|
||||
2023-11-06,0.6843951349633363,603900.SH
|
||||
2023-11-07,0.6794539224187386,002005.SZ
|
||||
2023-11-08,0.2787637201989255,605337.SH
|
||||
2023-11-09,0.43869442213023335,603307.SH
|
||||
2023-11-10,0.2901012944614997,002615.SZ
|
||||
2023-11-13,0.48928158926409887,003020.SZ
|
||||
2023-11-14,0.46232236500040824,603268.SH
|
||||
2023-11-15,0.6895995906987776,000010.SZ
|
||||
2023-11-16,0.45286066066734804,001298.SZ
|
||||
2023-11-17,0.7301876489705413,000010.SZ
|
||||
2023-11-20,0.7343688038104235,000004.SZ
|
||||
2023-11-21,0.23914531702237296,600361.SH
|
||||
2023-11-22,0.6007850824537518,002735.SZ
|
||||
2023-11-23,0.6504458118708949,603655.SH
|
||||
2023-11-24,0.46094310596129545,002842.SZ
|
||||
2023-11-27,0.6273014444813882,603729.SH
|
||||
2023-11-28,0.44076850931480105,002188.SZ
|
||||
2023-11-29,0.2215431212240851,605598.SH
|
||||
2023-11-30,0.47752407474308556,002247.SZ
|
||||
2023-12-01,0.5451043441108514,603045.SH
|
||||
2023-12-04,0.37633081988016603,603183.SH
|
||||
2023-12-05,0.7161351255511346,000929.SZ
|
||||
2023-12-06,0.467262040140511,002848.SZ
|
||||
2023-12-07,0.39095280707015256,600883.SH
|
||||
2023-12-08,0.4253618928722024,001373.SZ
|
||||
2023-12-11,0.2828511933586843,002753.SZ
|
||||
2023-12-12,0.5178019880022604,600099.SH
|
||||
2023-12-13,0.5539680447662736,000702.SZ
|
||||
2023-12-14,0.7341038153763678,000609.SZ
|
||||
2023-12-15,0.5186263801346903,002495.SZ
|
||||
2023-12-18,0.45857609424669377,002835.SZ
|
||||
2023-12-19,0.4688848440159626,000004.SZ
|
||||
2023-12-20,0.528943977776945,002571.SZ
|
||||
2023-12-21,0.22718655814787117,000020.SZ
|
||||
2023-12-22,0.5928968748610841,002052.SZ
|
||||
2023-12-25,0.3483050510250432,001201.SZ
|
||||
2023-12-26,0.32710963694268524,600778.SH
|
||||
2023-12-27,0.8023420246227895,603061.SH
|
||||
2023-12-28,0.6117050022740004,001223.SZ
|
||||
2023-12-29,0.30978967753335185,600791.SH
|
||||
2024-01-02,0.7213136293380755,001339.SZ
|
||||
2024-01-03,0.27966735262244463,603255.SH
|
||||
2024-01-04,0.4113020595403535,002569.SZ
|
||||
2024-01-05,0.5862299814842546,002397.SZ
|
||||
2024-01-08,0.2751799131522872,002207.SZ
|
||||
2024-01-09,0.3983758716199103,600462.SH
|
||||
2024-01-10,0.27583418641572577,002381.SZ
|
||||
2024-01-11,0.5163369671207251,001308.SZ
|
||||
2024-01-12,0.1869457075250673,001333.SZ
|
||||
2024-01-15,0.38890973684559677,600778.SH
|
||||
2024-01-16,0.628350379976437,002973.SZ
|
||||
2024-01-17,0.567643592779436,001259.SZ
|
||||
2024-01-18,0.45358303557672786,605003.SH
|
||||
2024-01-19,0.17143291074790643,605003.SH
|
||||
2024-01-22,0.38033202178552034,600137.SH
|
||||
2024-01-23,0.2682996358307573,603307.SH
|
||||
2024-01-24,0.09762885957278981,002200.SZ
|
||||
2024-01-25,0.2791248936457759,600322.SH
|
||||
2024-01-26,0.6698373552722515,001300.SZ
|
||||
2024-01-29,0.3453399213641141,001212.SZ
|
||||
2024-01-30,0.2018197040316142,000609.SZ
|
||||
2024-01-31,0.13839592742162668,002862.SZ
|
||||
2024-02-01,0.304731410465905,002397.SZ
|
||||
2024-02-02,0.4968354158495188,600791.SH
|
||||
2024-02-05,0.6076128945914563,002883.SZ
|
||||
2024-02-06,0.27529438199416406,001313.SZ
|
||||
2024-02-07,0.7032777773473656,601279.SH
|
||||
2024-02-08,0.5124649392628607,001339.SZ
|
||||
2024-02-19,0.47259882754683735,600608.SH
|
||||
2024-02-20,-0.14666017874686663,605289.SH
|
||||
2024-02-21,0.658199830069353,001317.SZ
|
||||
2024-02-22,-0.03512517816725689,603895.SH
|
||||
2024-02-23,0.49070407040956543,603286.SH
|
||||
2024-02-26,0.5874761268473985,603192.SH
|
||||
2024-02-27,0.6988377838547319,002856.SZ
|
||||
2024-02-28,0.2914956876374956,002513.SZ
|
||||
2024-02-29,0.39780267057003393,002760.SZ
|
||||
2024-03-01,0.270122240597905,000908.SZ
|
||||
2024-03-04,0.6403053454891889,000622.SZ
|
||||
2024-03-05,0.7898552633062592,002848.SZ
|
||||
2024-03-06,0.3809277227308279,002888.SZ
|
||||
2024-03-07,0.853455303753637,002199.SZ
|
||||
2024-03-08,0.2596463171676263,603206.SH
|
||||
2024-03-11,0.7555631824344116,002888.SZ
|
||||
2024-03-12,0.22682284013007994,001298.SZ
|
||||
2024-03-13,0.5422854277369126,001298.SZ
|
||||
2024-03-14,0.45779191251005164,600444.SH
|
||||
2024-03-15,0.6383376746111096,603023.SH
|
||||
2024-03-18,0.3201696636960583,001217.SZ
|
||||
2024-03-19,0.5996501433774017,001300.SZ
|
||||
2024-03-20,0.679915283803281,603273.SH
|
||||
2024-03-21,0.33698105904573383,000820.SZ
|
||||
2024-03-22,0.5218294653991029,002272.SZ
|
||||
2024-03-25,0.6420731490646061,002272.SZ
|
||||
2024-03-26,0.2833072190123176,000985.SZ
|
||||
2024-03-27,0.47781267803027716,002094.SZ
|
||||
2024-03-28,0.3495920493283059,001368.SZ
|
||||
2024-03-29,0.6814619283051855,605167.SH
|
||||
2024-04-01,0.13340757834476227,002787.SZ
|
||||
2024-04-02,0.6485933245807073,002629.SZ
|
||||
2024-04-03,0.5005374249650347,600889.SH
|
||||
2024-04-08,0.3458211793083884,002998.SZ
|
||||
2024-04-09,0.4543106999676991,603268.SH
|
||||
2024-04-10,0.3782795993313912,600289.SH
|
||||
2024-04-11,0.5682306510607521,002999.SZ
|
||||
2024-04-12,0.7563586796123376,001333.SZ
|
||||
2024-04-15,0.6820371060072895,605259.SH
|
||||
2024-04-16,0.20646370209051096,603177.SH
|
||||
2024-04-17,0.397823887165538,001367.SZ
|
||||
2024-04-18,0.5284023840181794,001288.SZ
|
||||
2024-04-19,0.5531779115633008,603137.SH
|
||||
2024-04-22,0.5285690780224108,603270.SH
|
||||
2024-04-23,0.4948891344668796,605003.SH
|
||||
2024-04-24,0.18745324505195465,002899.SZ
|
||||
2024-04-25,0.5561527695473475,605287.SH
|
||||
2024-04-26,0.3910800271321991,603813.SH
|
||||
2024-04-29,0.22800522777162466,600083.SH
|
||||
2024-04-30,0.47575239303431954,603991.SH
|
||||
2024-05-06,0.24135147235517762,603991.SH
|
||||
2024-05-07,0.6505337985284155,603955.SH
|
||||
2024-05-08,-0.008011195615733824,000820.SZ
|
||||
2024-05-09,0.3612607813981246,002848.SZ
|
||||
2024-05-10,0.5729784871026853,002295.SZ
|
||||
2024-05-13,0.42642459942636673,001228.SZ
|
||||
2024-05-14,0.616654660412696,603150.SH
|
||||
2024-05-15,0.14069160886717613,000995.SZ
|
||||
2024-05-16,0.2893164763341796,002893.SZ
|
||||
2024-05-17,0.26559138928183623,000593.SZ
|
||||
2024-05-20,0.6202830453921107,605318.SH
|
||||
2024-05-21,0.5962739970033031,002231.SZ
|
||||
2024-05-22,0.24212148187955357,600408.SH
|
||||
2024-05-23,0.5866973439001457,000056.SZ
|
||||
2024-05-24,0.4950026505575876,002620.SZ
|
||||
2024-05-27,0.2716568170957144,001367.SZ
|
||||
2024-05-28,0.614584069552624,603062.SH
|
||||
2024-05-29,0.5190445551469995,605567.SH
|
||||
2024-05-30,0.5650446836882232,001299.SZ
|
||||
2024-05-31,0.05127480727491687,002811.SZ
|
||||
2024-06-03,0.34948424376750514,603276.SH
|
||||
2024-06-04,0.2962107247005024,605365.SH
|
||||
2024-06-05,0.4265710639424621,600793.SH
|
||||
2024-06-06,0.2352508879607737,603193.SH
|
||||
2024-06-07,0.48735816467729426,001306.SZ
|
||||
2024-06-11,0.46798640763968125,002569.SZ
|
||||
2024-06-12,0.47153387530090407,001215.SZ
|
||||
2024-06-13,0.5520110912359264,603280.SH
|
||||
2024-06-14,0.19454715507705864,605598.SH
|
||||
2024-06-17,0.0963230849661045,002888.SZ
|
||||
2024-06-18,-0.07804798333558113,002615.SZ
|
||||
2024-06-19,0.610634439198648,002848.SZ
|
||||
2024-06-20,0.528548137823535,002888.SZ
|
||||
2024-06-21,0.5548914160403448,603097.SH
|
||||
2024-06-24,0.39920928373259934,001336.SZ
|
||||
2024-06-25,0.3187385965144802,603255.SH
|
||||
2024-06-26,0.32449821843842575,002582.SZ
|
||||
2024-06-27,0.5820092583390628,001387.SZ
|
||||
2024-06-28,0.3390536217231177,600493.SH
|
||||
2024-07-01,0.6778726275046811,002875.SZ
|
||||
2024-07-02,0.20520989636689582,605255.SH
|
||||
2024-07-03,0.5743008555845837,002495.SZ
|
||||
2024-07-04,0.9067074210889693,002247.SZ
|
||||
2024-07-05,0.7460402549612926,002094.SZ
|
||||
2024-07-08,0.6484051297498691,002012.SZ
|
||||
2024-07-09,0.4656464752321579,000056.SZ
|
||||
2024-07-10,0.5651250099764935,600421.SH
|
||||
2024-07-11,0.3919914680384934,000010.SZ
|
||||
2024-07-12,0.6138163480575665,002861.SZ
|
||||
2024-07-15,0.7779874584252403,605318.SH
|
||||
2024-07-16,0.4480483662651257,001217.SZ
|
||||
2024-07-17,0.4172203427639832,002094.SZ
|
||||
2024-07-18,0.7704811874850669,600561.SH
|
||||
2024-07-19,0.47075926007399854,002883.SZ
|
||||
2024-07-22,0.7111332900169166,002094.SZ
|
||||
2024-07-23,0.2511920883015322,002094.SZ
|
||||
2024-07-24,0.4172115642303865,000017.SZ
|
||||
2024-07-25,0.3128542596085397,002485.SZ
|
||||
2024-07-26,0.7143056769547913,002800.SZ
|
||||
2024-07-29,0.7348554188058531,001212.SZ
|
||||
2024-07-30,0.6544735763131749,600678.SH
|
||||
2024-07-31,0.33170353517328943,000619.SZ
|
||||
2024-08-01,0.5622688198785722,603021.SH
|
||||
2024-08-02,0.553901968970459,600355.SH
|
||||
2024-08-05,0.7203888768112711,002652.SZ
|
||||
2024-08-06,0.06845913509306308,002388.SZ
|
||||
2024-08-07,0.32607442357451333,002647.SZ
|
||||
2024-08-08,0.16487864247139689,600538.SH
|
||||
2024-08-09,0.541184606366378,603657.SH
|
||||
2024-08-12,0.26199426116748675,605180.SH
|
||||
2024-08-13,0.3717346916082516,600281.SH
|
||||
2024-08-14,0.6248284781260683,603151.SH
|
||||
2024-08-15,0.4701587394310862,001288.SZ
|
||||
2024-08-16,0.4980961856837102,600538.SH
|
||||
2024-08-19,0.6280757532182177,002622.SZ
|
||||
2024-08-20,0.4216952108346084,002094.SZ
|
||||
2024-08-21,0.8872335373988718,001317.SZ
|
||||
2024-08-22,0.5745604057145399,000056.SZ
|
||||
2024-08-23,0.505744305740485,603122.SH
|
||||
2024-08-26,0.8419619283484038,002072.SZ
|
||||
2024-08-27,0.6810091806857562,002084.SZ
|
||||
2024-08-28,0.6875657067302675,002072.SZ
|
||||
2024-08-29,0.44229937751556,600692.SH
|
||||
2024-08-30,0.5551556278148088,605100.SH
|
||||
2024-09-02,0.6992787682915002,000004.SZ
|
||||
2024-09-03,0.29976910325882256,001218.SZ
|
||||
2024-09-04,0.6768429298090569,002717.SZ
|
||||
2024-09-05,0.6206771676543172,002622.SZ
|
||||
2024-09-06,0.7271087976454396,002861.SZ
|
||||
2024-09-09,0.5051345343079485,002199.SZ
|
||||
2024-09-10,0.606941618095118,600533.SH
|
||||
2024-09-11,0.44563057902498,000632.SZ
|
||||
2024-09-12,0.544127097473236,600889.SH
|
||||
2024-09-13,0.8023872948409544,000702.SZ
|
||||
2024-09-18,0.27102315041971126,002729.SZ
|
||||
2024-09-19,0.8222201895966966,002629.SZ
|
||||
2024-09-20,0.7270786760641249,001296.SZ
|
||||
2024-09-23,0.6662260924500418,603778.SH
|
||||
2024-09-24,0.45201298931080247,600322.SH
|
||||
2024-09-25,0.7020112241768041,000573.SZ
|
||||
2024-09-26,0.9042164267563502,600322.SH
|
||||
2024-09-27,0.6660782992616279,600156.SH
|
||||
2024-09-30,0.6863709879538685,600156.SH
|
||||
2024-10-08,0.6127578713627717,600791.SH
|
||||
2024-10-09,0.42399006878436024,002486.SZ
|
||||
2024-10-10,0.19353810507130897,001379.SZ
|
||||
2024-10-11,0.32072111647856577,001306.SZ
|
||||
2024-10-14,0.3485052899457631,605169.SH
|
||||
2024-10-15,0.23682051553226138,603130.SH
|
||||
2024-10-16,0.06953864745342996,000695.SZ
|
||||
2024-10-17,0.6514983088909282,600599.SH
|
||||
2024-10-18,0.4389209760325074,002977.SZ
|
||||
2024-10-21,0.7197768607464181,603261.SH
|
||||
2024-10-22,0.6635882450692222,002806.SZ
|
||||
2024-10-23,0.45400588695990063,603172.SH
|
||||
2024-10-24,0.38830884380759845,600107.SH
|
||||
2024-10-25,0.8934238346702872,600243.SH
|
||||
2024-10-28,0.2797309154889543,600539.SH
|
||||
2024-10-29,0.2969987104969114,600768.SH
|
||||
2024-10-30,0.40287164661797453,002551.SZ
|
||||
2024-10-31,0.3564256188428194,002086.SZ
|
||||
2024-11-01,0.6321036634810505,002551.SZ
|
||||
2024-11-04,0.5456367520507691,603682.SH
|
||||
2024-11-05,0.4395577653608702,600802.SH
|
||||
2024-11-06,0.6650198272206568,600243.SH
|
||||
2024-11-07,0.43235770657243566,000430.SZ
|
||||
2024-11-08,0.5643849119436214,002076.SZ
|
||||
2024-11-11,0.7137448249635079,000007.SZ
|
||||
2024-11-12,0.5201716819040841,000679.SZ
|
||||
2024-11-13,0.6337096887315559,603214.SH
|
||||
2024-11-14,0.4979383369165651,603201.SH
|
||||
2024-11-15,0.45240021627553617,603201.SH
|
||||
2024-11-18,0.22669552732089976,605177.SH
|
||||
2024-11-19,0.38573714473203297,603331.SH
|
||||
2024-11-20,0.2979426663257885,001378.SZ
|
||||
2024-11-21,0.42041152689995204,002980.SZ
|
||||
2024-11-22,0.4998877526755884,003003.SZ
|
||||
2024-11-25,0.3166022033285757,002381.SZ
|
||||
2024-11-26,0.5105217556899093,001300.SZ
|
||||
2024-11-27,0.4581772650911419,603183.SH
|
||||
2024-11-28,0.2582863137914294,600302.SH
|
||||
2024-11-29,0.6516611156931627,600202.SH
|
||||
2024-12-02,0.6264742314126309,603021.SH
|
||||
2024-12-03,0.17876631396560913,605287.SH
|
||||
2024-12-04,0.571815529773696,603637.SH
|
||||
2024-12-05,0.5009836942607793,002615.SZ
|
||||
2024-12-06,0.5935816089479097,002615.SZ
|
||||
2024-12-09,0.2732740647491455,000880.SZ
|
||||
2024-12-10,0.8403246240655503,002211.SZ
|
||||
2024-12-11,0.6063814254598854,000952.SZ
|
||||
2024-12-12,0.65530104057359,002213.SZ
|
||||
2024-12-13,0.4326422618589393,002193.SZ
|
||||
2024-12-16,0.6951945851895344,002582.SZ
|
||||
2024-12-17,0.4367668554492269,002846.SZ
|
||||
2024-12-18,0.6034186500026795,600844.SH
|
||||
2024-12-19,0.6478095239545749,000695.SZ
|
||||
2024-12-20,0.12678291780050743,002687.SZ
|
||||
2024-12-23,0.0005977226174160126,603176.SH
|
||||
2024-12-24,0.6931664421535906,000790.SZ
|
||||
2024-12-25,0.9280754228431425,000004.SZ
|
||||
2024-12-26,0.5025149234980124,603082.SH
|
||||
2024-12-27,0.5619791111821815,603325.SH
|
||||
2024-12-30,0.5970375646444621,603291.SH
|
||||
2024-12-31,0.6210268787938896,603798.SH
|
||||
2025-01-02,0.5150847228562943,603255.SH
|
||||
2025-01-03,0.3936455223882481,001238.SZ
|
||||
2025-01-06,0.42512989288467096,001256.SZ
|
||||
2025-01-07,0.20046282642128765,002763.SZ
|
||||
2025-01-08,0.3594789763099251,603137.SH
|
||||
2025-01-09,0.17148562512671917,603150.SH
|
||||
2025-01-10,0.8141461510449396,603909.SH
|
||||
2025-01-13,0.7278259334554208,002365.SZ
|
||||
2025-01-14,0.5332116728293693,000955.SZ
|
||||
2025-01-15,0.26395540555061114,001223.SZ
|
||||
2025-01-16,0.20321325349056088,603637.SH
|
||||
2025-01-17,0.5155710238940482,000586.SZ
|
||||
2025-01-20,0.7692783436325927,002072.SZ
|
||||
2025-01-21,0.3743531875901297,601798.SH
|
||||
2025-01-22,0.5609509357408301,603059.SH
|
||||
2025-01-23,0.20207700522454125,001378.SZ
|
||||
2025-01-24,0.3138610824997807,002760.SZ
|
||||
2025-01-27,0.3887191549307029,603192.SH
|
||||
2025-02-05,0.5131470103792286,600599.SH
|
||||
2025-02-06,0.42133133196663924,603255.SH
|
||||
2025-02-07,0.2981033776962701,605003.SH
|
||||
2025-02-10,0.78495727017451,600225.SH
|
||||
2025-02-11,0.9081192688195034,600225.SH
|
||||
2025-02-12,0.8027995368952746,600225.SH
|
||||
2025-02-13,0.6500142590724168,603789.SH
|
||||
2025-02-14,0.7725392087365835,002058.SZ
|
||||
2025-02-17,0.49255867173448825,600228.SH
|
||||
2025-02-18,0.5576519868864848,600243.SH
|
||||
2025-02-19,0.2592919075461544,002496.SZ
|
||||
2025-02-20,0.421298468924212,002848.SZ
|
||||
2025-02-21,0.29697775540100313,001316.SZ
|
||||
2025-02-24,0.7638868267339545,603211.SH
|
||||
2025-02-25,0.5526784340520452,003028.SZ
|
||||
2025-02-26,0.5065861650146529,603716.SH
|
||||
2025-02-27,0.7407038446632749,603211.SH
|
||||
2025-02-28,0.4668521688585335,003043.SZ
|
||||
2025-03-03,0.2680721667617631,600753.SH
|
||||
2025-03-04,0.34955816615272756,002977.SZ
|
||||
2025-03-05,0.7482244194415444,603057.SH
|
||||
2025-03-06,0.6305254140888802,603280.SH
|
||||
2025-03-07,0.19351037427994797,600241.SH
|
||||
2025-03-10,0.38766031239447357,603325.SH
|
||||
2025-03-11,0.4303913500153944,002872.SZ
|
||||
2025-03-12,0.18459289202598228,002898.SZ
|
||||
2025-03-13,0.8576596052682522,001319.SZ
|
||||
2025-03-14,0.6230146680130096,000757.SZ
|
||||
2025-03-17,0.7328875682123387,603843.SH
|
||||
2025-03-18,0.47499472013228067,000669.SZ
|
||||
2025-03-19,0.3268919044509167,002305.SZ
|
||||
2025-03-20,0.4569272632462979,600356.SH
|
||||
2025-03-21,0.07591851931376978,000586.SZ
|
||||
2025-03-24,0.49072061900133407,603335.SH
|
||||
2025-03-25,0.4306632631450777,603381.SH
|
||||
2025-03-26,0.46006642069249487,001299.SZ
|
||||
2025-03-27,0.41362062710862235,002394.SZ
|
||||
2025-03-28,0.564157006795436,001332.SZ
|
||||
2025-03-31,0.5981134959932276,001238.SZ
|
||||
2025-04-01,0.6363729449100586,603102.SH
|
||||
2025-04-02,0.2865246522723796,002872.SZ
|
||||
2025-04-03,0.29802040504689753,000633.SZ
|
||||
2025-04-07,0.554762051627518,002872.SZ
|
||||
2025-04-08,0.34687738661031947,603682.SH
|
||||
2025-04-09,0.13896921728258024,001331.SZ
|
||||
|
1116
main/train/predictions_train.tsv
Normal file
1116
main/train/predictions_train.tsv
Normal file
File diff suppressed because it is too large
Load Diff
14
main/train/test.py
Normal file
14
main/train/test.py
Normal file
@@ -0,0 +1,14 @@
|
||||
from operator import index
|
||||
|
||||
import tushare as ts
|
||||
import pandas as pd
|
||||
import time
|
||||
|
||||
ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')
|
||||
pro = ts.pro_api()
|
||||
|
||||
df = pro.index_member_all(ts_code='603579.SH')
|
||||
print(df)
|
||||
|
||||
df = pro.sw_daily(trade_date='20250305', fields='ts_code,name,open,close,vol,pe,pb')
|
||||
print(df[df['ts_code'] == '851171.SI'])
|
||||
565
main/train/test1.tsv
Normal file
565
main/train/test1.tsv
Normal file
@@ -0,0 +1,565 @@
|
||||
trade_date,score,ts_code
|
||||
2022-12-08,1.2708337806641494,603816.SH
|
||||
2022-12-09,1.4207120834806832,603567.SH
|
||||
2022-12-12,1.0198883623815167,002305.SZ
|
||||
2022-12-13,1.7022732146012465,002910.SZ
|
||||
2022-12-14,0.4115956442621504,600493.SH
|
||||
2022-12-15,1.2308250306434583,601858.SH
|
||||
2022-12-16,0.5214964254452716,601677.SH
|
||||
2022-12-19,1.5635207796349075,000721.SZ
|
||||
2022-12-20,0.9950031675966513,002314.SZ
|
||||
2022-12-21,1.867139344678808,603238.SH
|
||||
2022-12-22,0.11397346668733664,002095.SZ
|
||||
2022-12-23,0.7020503260530933,600706.SH
|
||||
2022-12-26,1.064077090528082,002707.SZ
|
||||
2022-12-27,0.5487905008977592,000978.SZ
|
||||
2022-12-28,0.9795388321537417,600225.SH
|
||||
2022-12-29,0.6402559056339422,600056.SH
|
||||
2022-12-30,0.9466308655445547,002357.SZ
|
||||
2023-01-03,0.6849950582517478,002031.SZ
|
||||
2023-01-04,0.8958700703884613,003010.SZ
|
||||
2023-01-05,0.9901544872773684,002357.SZ
|
||||
2023-01-06,0.7029762528454185,000929.SZ
|
||||
2023-01-09,1.2070723183050875,002279.SZ
|
||||
2023-01-10,0.28632510343867906,002933.SZ
|
||||
2023-01-11,0.7059503351778397,002576.SZ
|
||||
2023-01-12,1.700028635026902,002576.SZ
|
||||
2023-01-13,1.4228228373146723,002043.SZ
|
||||
2023-01-16,0.24930703006686591,600958.SH
|
||||
2023-01-17,1.0616927130654037,603882.SH
|
||||
2023-01-18,0.6166412038694548,000739.SZ
|
||||
2023-01-19,0.5967697229641841,603806.SH
|
||||
2023-01-20,0.8290879039003781,600705.SH
|
||||
2023-01-30,1.0826864888349266,000972.SZ
|
||||
2023-01-31,1.7476350470413293,605133.SH
|
||||
2023-02-01,1.0698795326344217,002297.SZ
|
||||
2023-02-02,1.168956058233029,002762.SZ
|
||||
2023-02-03,0.6068761459217956,002474.SZ
|
||||
2023-02-06,1.3603267774479497,002855.SZ
|
||||
2023-02-07,1.3722562072579707,002167.SZ
|
||||
2023-02-08,1.444800461687164,002117.SZ
|
||||
2023-02-09,0.6478721098934555,600501.SH
|
||||
2023-02-10,1.7330712792214502,002122.SZ
|
||||
2023-02-13,1.0751336841418047,603711.SH
|
||||
2023-02-14,0.858121706097957,002354.SZ
|
||||
2023-02-15,1.0628443879922715,600817.SH
|
||||
2023-02-16,1.0941227999628862,002660.SZ
|
||||
2023-02-17,0.5452970336991657,002792.SZ
|
||||
2023-02-20,0.7452925786277558,600817.SH
|
||||
2023-02-21,1.2263444506836183,601360.SH
|
||||
2023-02-22,0.8498400500947443,002882.SZ
|
||||
2023-02-23,1.3778772059701936,002942.SZ
|
||||
2023-02-24,0.8116211264751758,002942.SZ
|
||||
2023-02-27,1.369491951000112,600118.SH
|
||||
2023-02-28,1.7437044662527195,600325.SH
|
||||
2023-03-01,0.6172338223208104,002350.SZ
|
||||
2023-03-02,0.9753294078191806,002261.SZ
|
||||
2023-03-03,0.9460072368251595,605389.SH
|
||||
2023-03-06,0.7661730237898733,000977.SZ
|
||||
2023-03-07,1.5306012129925908,601728.SH
|
||||
2023-03-08,1.7347243229852956,603042.SH
|
||||
2023-03-09,1.7785688963407722,601698.SH
|
||||
2023-03-10,1.794639030708944,002808.SZ
|
||||
2023-03-13,2.2765957078169055,601728.SH
|
||||
2023-03-14,1.5770232731123273,002236.SZ
|
||||
2023-03-15,1.9886076279595977,601698.SH
|
||||
2023-03-16,1.7538871949426555,601138.SH
|
||||
2023-03-17,1.2850616649676168,000506.SZ
|
||||
2023-03-20,0.6617355633181617,601117.SH
|
||||
2023-03-21,1.2834165832572753,600633.SH
|
||||
2023-03-22,1.286625601927238,002803.SZ
|
||||
2023-03-23,1.2442366849499193,601138.SH
|
||||
2023-03-24,1.7385288121049993,601138.SH
|
||||
2023-03-27,0.5271836596864287,600633.SH
|
||||
2023-03-28,0.9233261884964775,000890.SZ
|
||||
2023-03-29,1.0388156797328032,600633.SH
|
||||
2023-03-30,0.880222808466912,600975.SH
|
||||
2023-03-31,1.7723670660012394,002153.SZ
|
||||
2023-04-03,1.4447814388081068,600633.SH
|
||||
2023-04-04,0.9805981968002965,000988.SZ
|
||||
2023-04-06,1.2735568908129031,002558.SZ
|
||||
2023-04-07,0.5977729773368881,002222.SZ
|
||||
2023-04-10,0.36120306701232185,000032.SZ
|
||||
2023-04-11,2.0134197062348904,603258.SH
|
||||
2023-04-12,0.6807091195842823,603888.SH
|
||||
2023-04-13,1.5510435282176684,600415.SH
|
||||
2023-04-14,1.6158618609191548,603258.SH
|
||||
2023-04-17,0.5935406330588169,603918.SH
|
||||
2023-04-18,1.438798944751228,603258.SH
|
||||
2023-04-19,0.4851330354034662,002975.SZ
|
||||
2023-04-20,0.17004215747506052,600415.SH
|
||||
2023-04-21,1.3733089702528274,601595.SH
|
||||
2023-04-24,2.3249160418531685,603258.SH
|
||||
2023-04-25,2.4887955829326054,601858.SH
|
||||
2023-04-26,1.9420082198135482,601019.SH
|
||||
2023-04-27,2.3040109178691113,601811.SH
|
||||
2023-04-28,1.0754625899722956,601811.SH
|
||||
2023-05-04,1.6688121146522907,601336.SH
|
||||
2023-05-05,1.1037723664352612,601989.SH
|
||||
2023-05-08,1.6994199603704685,601288.SH
|
||||
2023-05-09,1.2636377329259567,002354.SZ
|
||||
2023-05-10,1.2628967915122853,601949.SH
|
||||
2023-05-11,0.8020741700988911,603083.SH
|
||||
2023-05-12,0.22312816960298115,600629.SH
|
||||
2023-05-15,0.7341052846591558,002229.SZ
|
||||
2023-05-16,0.6350705971737554,603268.SH
|
||||
2023-05-17,1.0396627856239795,603958.SH
|
||||
2023-05-18,1.4091099521269763,601858.SH
|
||||
2023-05-19,0.6341161328902458,600239.SH
|
||||
2023-05-22,0.4664478043150085,603798.SH
|
||||
2023-05-23,0.3950180406443093,002864.SZ
|
||||
2023-05-24,0.9532057286987137,002366.SZ
|
||||
2023-05-25,0.661525047825837,605011.SH
|
||||
2023-05-26,0.873646794491419,600088.SH
|
||||
2023-05-29,1.0161343809163572,600636.SH
|
||||
2023-05-30,1.8522924730896868,603918.SH
|
||||
2023-05-31,0.14065827549083917,002315.SZ
|
||||
2023-06-01,1.0647192154325815,002229.SZ
|
||||
2023-06-02,1.0897714474656055,605028.SH
|
||||
2023-06-05,0.818149194152834,002995.SZ
|
||||
2023-06-06,1.1559913886165554,002229.SZ
|
||||
2023-06-07,0.9730919792856488,603933.SH
|
||||
2023-06-08,1.1740853193005574,003010.SZ
|
||||
2023-06-09,0.7055820145524615,002395.SZ
|
||||
2023-06-12,0.8768369889703852,000977.SZ
|
||||
2023-06-13,0.5333934871843615,600839.SH
|
||||
2023-06-14,1.1828705214010444,002229.SZ
|
||||
2023-06-15,1.9054644381740913,600602.SH
|
||||
2023-06-16,1.6671793256997451,002920.SZ
|
||||
2023-06-19,0.4424093682681172,002194.SZ
|
||||
2023-06-20,0.7166566485622967,600100.SH
|
||||
2023-06-21,1.185368125310508,600592.SH
|
||||
2023-06-26,0.49477817284107434,605016.SH
|
||||
2023-06-27,0.6467017315354233,002865.SZ
|
||||
2023-06-28,1.4462997720570885,600310.SH
|
||||
2023-06-29,0.9079748876905797,000809.SZ
|
||||
2023-06-30,1.1417365323043627,002920.SZ
|
||||
2023-07-03,1.0292231512798002,600105.SH
|
||||
2023-07-04,0.9764499369108617,002355.SZ
|
||||
2023-07-05,1.1950967963313073,603489.SH
|
||||
2023-07-06,0.8067305519266362,603809.SH
|
||||
2023-07-07,-0.11113958569144997,603786.SH
|
||||
2023-07-10,1.4365223354022805,002835.SZ
|
||||
2023-07-11,0.9055036034028278,603767.SH
|
||||
2023-07-12,0.662603535490377,002265.SZ
|
||||
2023-07-13,0.6580169744401991,605005.SH
|
||||
2023-07-14,0.7806145283148259,002284.SZ
|
||||
2023-07-17,0.8928179964563782,002616.SZ
|
||||
2023-07-18,1.0102033286200784,603709.SH
|
||||
2023-07-19,0.28926601683884473,603429.SH
|
||||
2023-07-20,1.0778442223423874,603709.SH
|
||||
2023-07-21,0.49418969039024113,000068.SZ
|
||||
2023-07-24,1.152792861172028,002172.SZ
|
||||
2023-07-25,-0.48175589465657037,000656.SZ
|
||||
2023-07-26,0.9348834119551785,000608.SZ
|
||||
2023-07-27,1.5360069738010982,600231.SH
|
||||
2023-07-28,1.5514157372959714,600782.SH
|
||||
2023-07-31,0.31996521869248884,000750.SZ
|
||||
2023-08-01,0.5474246928163892,601860.SH
|
||||
2023-08-02,1.277218052964389,600162.SH
|
||||
2023-08-03,0.784739789704388,002400.SZ
|
||||
2023-08-04,1.5311245596423297,002310.SZ
|
||||
2023-08-07,1.3664999923727883,002377.SZ
|
||||
2023-08-08,0.6775719496805408,000004.SZ
|
||||
2023-08-09,1.0473202246778897,003005.SZ
|
||||
2023-08-10,0.7095500932241833,600610.SH
|
||||
2023-08-11,0.9061227536071346,000656.SZ
|
||||
2023-08-14,1.712926297349448,600272.SH
|
||||
2023-08-15,0.7173624078005385,000656.SZ
|
||||
2023-08-16,1.0612842867590515,600155.SH
|
||||
2023-08-17,-0.11242980180119416,002898.SZ
|
||||
2023-08-18,0.7958378602153298,002221.SZ
|
||||
2023-08-21,1.2818341414622505,000006.SZ
|
||||
2023-08-22,1.4925485997390293,000006.SZ
|
||||
2023-08-23,1.0205726179128791,002178.SZ
|
||||
2023-08-24,0.5123818439671383,603000.SH
|
||||
2023-08-25,1.1561261254770698,600684.SH
|
||||
2023-08-28,1.0243035845865234,600602.SH
|
||||
2023-08-29,0.48711445602297804,002279.SZ
|
||||
2023-08-30,1.5892134655969756,002279.SZ
|
||||
2023-08-31,0.8832038176770832,002235.SZ
|
||||
2023-09-01,0.7778267621876187,002527.SZ
|
||||
2023-09-04,0.9715794190221472,000766.SZ
|
||||
2023-09-05,0.6363690681522609,000701.SZ
|
||||
2023-09-06,1.1626954331771984,603078.SH
|
||||
2023-09-07,0.8950438021027404,601001.SH
|
||||
2023-09-08,0.4907685411177441,603722.SH
|
||||
2023-09-11,1.4880263364860575,000851.SZ
|
||||
2023-09-12,0.7192761393042792,603329.SH
|
||||
2023-09-13,0.7091837012523252,000158.SZ
|
||||
2023-09-14,0.7548676419597591,601699.SH
|
||||
2023-09-15,0.9847401608369946,600257.SH
|
||||
2023-09-18,1.7523740102808545,600814.SH
|
||||
2023-09-19,0.9311114338734422,000536.SZ
|
||||
2023-09-20,1.5094699709720083,002902.SZ
|
||||
2023-09-21,1.10032325281936,603890.SH
|
||||
2023-09-22,0.5455145772639567,001268.SZ
|
||||
2023-09-25,1.5392432178391338,600895.SH
|
||||
2023-09-26,1.139513204589203,002642.SZ
|
||||
2023-09-27,1.0155696225867201,002827.SZ
|
||||
2023-09-28,0.7631131516304462,001298.SZ
|
||||
2023-10-09,1.3121583124085132,002513.SZ
|
||||
2023-10-10,1.192726847273738,600895.SH
|
||||
2023-10-11,1.378300878017007,600895.SH
|
||||
2023-10-12,1.3374848728915072,000010.SZ
|
||||
2023-10-13,1.247352917522287,000536.SZ
|
||||
2023-10-16,1.0205919288199263,002456.SZ
|
||||
2023-10-17,1.8124967402525871,601127.SH
|
||||
2023-10-18,1.6545503123646252,002456.SZ
|
||||
2023-10-19,1.403565591711023,603009.SH
|
||||
2023-10-20,1.479674671497312,002176.SZ
|
||||
2023-10-23,1.1364881895480998,002337.SZ
|
||||
2023-10-24,1.603597993080332,002122.SZ
|
||||
2023-10-25,1.2135837368610372,603123.SH
|
||||
2023-10-26,1.0507834567170797,002671.SZ
|
||||
2023-10-27,1.2313059730365872,603266.SH
|
||||
2023-10-30,1.4721372697796935,603353.SH
|
||||
2023-10-31,0.8329626816487755,600599.SH
|
||||
2023-11-01,0.7167096735509632,603186.SH
|
||||
2023-11-02,1.2802635893255563,601633.SH
|
||||
2023-11-03,1.2590421473918265,002640.SZ
|
||||
2023-11-06,0.7531355226612281,003040.SZ
|
||||
2023-11-07,0.6577034595225779,603768.SH
|
||||
2023-11-08,1.8511623446368441,002889.SZ
|
||||
2023-11-09,1.4717991539794943,603266.SH
|
||||
2023-11-10,0.46890167407485095,002584.SZ
|
||||
2023-11-13,1.1394805956388967,603220.SH
|
||||
2023-11-14,1.0217982309018654,603883.SH
|
||||
2023-11-15,0.6661740992600602,600106.SH
|
||||
2023-11-16,1.3719183134079516,002584.SZ
|
||||
2023-11-17,1.5131093025579179,603266.SH
|
||||
2023-11-20,1.6229596862404188,002103.SZ
|
||||
2023-11-21,0.9595377845746105,002291.SZ
|
||||
2023-11-22,1.1541727944363123,600506.SH
|
||||
2023-11-23,0.6172428975445288,000029.SZ
|
||||
2023-11-24,0.3581002329377547,000710.SZ
|
||||
2023-11-27,1.0865084227106108,002691.SZ
|
||||
2023-11-28,0.36551005528336356,603721.SH
|
||||
2023-11-29,0.6325689777455243,600780.SH
|
||||
2023-11-30,0.9673540111120547,600148.SH
|
||||
2023-12-01,1.6031005266953247,002238.SZ
|
||||
2023-12-04,0.3139675086881397,601156.SH
|
||||
2023-12-05,0.4471787298728605,002238.SZ
|
||||
2023-12-06,1.519722809770461,600128.SH
|
||||
2023-12-07,0.9306959636346697,601599.SH
|
||||
2023-12-08,1.11955592447384,600678.SH
|
||||
2023-12-11,0.920549654717692,600981.SH
|
||||
2023-12-12,1.1192972102468268,603999.SH
|
||||
2023-12-13,1.059999047897406,601595.SH
|
||||
2023-12-14,1.3135891954923238,605577.SH
|
||||
2023-12-15,1.0121314183460337,603358.SH
|
||||
2023-12-18,1.2923561000198138,601595.SH
|
||||
2023-12-19,0.4727877781694898,002856.SZ
|
||||
2023-12-20,-0.01336745195249621,002937.SZ
|
||||
2023-12-21,0.9432845860894691,603825.SH
|
||||
2023-12-22,1.5927408932377043,603167.SH
|
||||
2023-12-25,1.1489781197830256,001314.SZ
|
||||
2023-12-26,2.075405615830143,605117.SH
|
||||
2023-12-27,1.1634401553385165,002952.SZ
|
||||
2023-12-28,1.9276471606617283,600610.SH
|
||||
2023-12-29,1.1664703290672742,600621.SH
|
||||
2024-01-02,1.4036288516782467,603396.SH
|
||||
2024-01-03,0.6064916104024441,603052.SH
|
||||
2024-01-04,0.27194078032727403,000810.SZ
|
||||
2024-01-05,0.7248236406349026,002884.SZ
|
||||
2024-01-08,1.917000409535365,600983.SH
|
||||
2024-01-09,1.3670026723187405,601225.SH
|
||||
2024-01-10,0.6428111801429847,002419.SZ
|
||||
2024-01-11,0.8603468507895841,603097.SH
|
||||
2024-01-12,0.8616154280062541,001269.SZ
|
||||
2024-01-15,1.6162131927011785,002140.SZ
|
||||
2024-01-16,1.6278064283944136,605366.SH
|
||||
2024-01-17,1.2234956634332685,603556.SH
|
||||
2024-01-18,0.2930411130350266,601865.SH
|
||||
2024-01-19,0.5463003069500746,002033.SZ
|
||||
2024-01-22,1.4568410090998594,600438.SH
|
||||
2024-01-23,0.4796543547584619,002056.SZ
|
||||
2024-01-24,1.1933657317735877,000921.SZ
|
||||
2024-01-25,1.1196073236619992,000070.SZ
|
||||
2024-01-26,0.22116166302548493,601225.SH
|
||||
2024-01-29,1.0247482544629951,000717.SZ
|
||||
2024-01-30,1.0239528607343813,000651.SZ
|
||||
2024-01-31,1.371059822546121,600188.SH
|
||||
2024-02-01,0.9952706097104108,601288.SH
|
||||
2024-02-02,1.1495734266360917,601658.SH
|
||||
2024-02-05,0.29949274220153294,600018.SH
|
||||
2024-02-06,1.299845239071017,600004.SH
|
||||
2024-02-07,1.000836675958177,600350.SH
|
||||
2024-02-08,0.6401652900537433,600131.SH
|
||||
2024-02-19,0.08475967168317824,002749.SZ
|
||||
2024-02-20,0.5804821425310229,002085.SZ
|
||||
2024-02-21,0.5362126107341831,600053.SH
|
||||
2024-02-22,1.206204153162725,600639.SH
|
||||
2024-02-23,1.6848572441872742,603958.SH
|
||||
2024-02-26,-0.22984388407558198,000017.SZ
|
||||
2024-02-27,-0.02096017812243845,603011.SH
|
||||
2024-02-28,1.1590591855669556,603933.SH
|
||||
2024-02-29,0.1819266114797644,002085.SZ
|
||||
2024-03-01,0.6727454630028324,600571.SH
|
||||
2024-03-04,1.113038537485005,601058.SH
|
||||
2024-03-05,0.8578265704409984,600160.SH
|
||||
2024-03-06,1.0085170866918751,600331.SH
|
||||
2024-03-07,1.0605223005794961,603220.SH
|
||||
2024-03-08,0.9054891550652612,601001.SH
|
||||
2024-03-11,1.3538575989659345,002085.SZ
|
||||
2024-03-12,0.9889456011580702,603580.SH
|
||||
2024-03-13,1.212835645663162,603499.SH
|
||||
2024-03-14,0.648581745020845,603773.SH
|
||||
2024-03-15,1.2772879612108654,002902.SZ
|
||||
2024-03-18,0.8179927345023094,002591.SZ
|
||||
2024-03-19,1.6214555790374034,000908.SZ
|
||||
2024-03-20,2.2115174455952333,603580.SH
|
||||
2024-03-21,1.545209963736103,603499.SH
|
||||
2024-03-22,0.6838446792203144,605180.SH
|
||||
2024-03-25,0.31578511420724414,603985.SH
|
||||
2024-03-26,1.781904626165001,605198.SH
|
||||
2024-03-27,-0.16938824239950148,603006.SH
|
||||
2024-03-28,0.9649663433118687,002739.SZ
|
||||
2024-03-29,0.3320943922901912,603499.SH
|
||||
2024-04-01,0.9952388185722638,603878.SH
|
||||
2024-04-02,0.9364155422135148,000603.SZ
|
||||
2024-04-03,0.7982619216871609,603878.SH
|
||||
2024-04-08,1.358471943036704,605198.SH
|
||||
2024-04-09,0.18053926667744466,002574.SZ
|
||||
2024-04-10,1.3153422390391303,000528.SZ
|
||||
2024-04-11,1.3657881440950266,002716.SZ
|
||||
2024-04-12,1.5676274828749224,000975.SZ
|
||||
2024-04-15,-0.13567914644119722,000157.SZ
|
||||
2024-04-16,0.93425932274213,600690.SH
|
||||
2024-04-17,1.327024106545604,001696.SZ
|
||||
2024-04-18,1.8296222086183656,600210.SH
|
||||
2024-04-19,0.578844593874061,002716.SZ
|
||||
2024-04-22,1.0676680912492613,603050.SH
|
||||
2024-04-23,1.5669359888338785,002167.SZ
|
||||
2024-04-24,0.7255550236691839,603017.SH
|
||||
2024-04-25,1.997033122105935,000737.SZ
|
||||
2024-04-26,1.1631716891035941,001696.SZ
|
||||
2024-04-29,0.5416930775042976,002715.SZ
|
||||
2024-04-30,0.4865144780512015,002167.SZ
|
||||
2024-05-06,1.6985755115133785,002125.SZ
|
||||
2024-05-07,1.5949401523876496,600789.SH
|
||||
2024-05-08,0.978355142201031,605177.SH
|
||||
2024-05-09,0.1605421067543249,603018.SH
|
||||
2024-05-10,0.6676316772066117,002264.SZ
|
||||
2024-05-13,0.733933392442504,603222.SH
|
||||
2024-05-14,0.5547173325807716,603025.SH
|
||||
2024-05-15,1.3155422143795996,000913.SZ
|
||||
2024-05-16,1.4221204991742524,001696.SZ
|
||||
2024-05-17,1.1562985723597323,600383.SH
|
||||
2024-05-20,0.4695922603130449,002264.SZ
|
||||
2024-05-21,1.8689264604736717,000002.SZ
|
||||
2024-05-22,1.3121218875647056,600843.SH
|
||||
2024-05-23,0.7664908704814034,002225.SZ
|
||||
2024-05-24,0.7589930057344444,600530.SH
|
||||
2024-05-27,1.19159766228358,601919.SH
|
||||
2024-05-28,1.0019105122452157,600726.SH
|
||||
2024-05-29,0.4881622770453212,601019.SH
|
||||
2024-05-30,1.4287668616222775,002938.SZ
|
||||
2024-05-31,1.0528834774374836,601019.SH
|
||||
2024-06-03,0.7762962067944593,603530.SH
|
||||
2024-06-04,1.7857240618535462,601117.SH
|
||||
2024-06-05,1.3040177518621878,603530.SH
|
||||
2024-06-06,1.0260169367235976,600900.SH
|
||||
2024-06-07,0.8588448524419576,603172.SH
|
||||
2024-06-11,1.1812779982242239,600584.SH
|
||||
2024-06-12,1.3882505145638304,003026.SZ
|
||||
2024-06-13,1.4302228321179788,001339.SZ
|
||||
2024-06-14,0.9306175962728962,002889.SZ
|
||||
2024-06-17,0.6869594774433183,600530.SH
|
||||
2024-06-18,1.1904711061862112,001298.SZ
|
||||
2024-06-19,1.697703035579816,605258.SH
|
||||
2024-06-20,0.8952978126779231,600733.SH
|
||||
2024-06-21,1.6755370442001838,001298.SZ
|
||||
2024-06-24,0.7658870375300119,002252.SZ
|
||||
2024-06-25,0.5860602429129975,002485.SZ
|
||||
2024-06-26,0.3448818899550934,003031.SZ
|
||||
2024-06-27,0.5265851943756428,000793.SZ
|
||||
2024-06-28,1.9979062957915383,603838.SH
|
||||
2024-07-01,1.0825164001234038,002485.SZ
|
||||
2024-07-02,0.9075039211419761,601985.SH
|
||||
2024-07-03,1.409183048681464,600025.SH
|
||||
2024-07-04,0.904029442040085,600025.SH
|
||||
2024-07-05,1.0426545217304006,001286.SZ
|
||||
2024-07-08,1.698642218338944,600584.SH
|
||||
2024-07-09,0.6470181687793882,002463.SZ
|
||||
2024-07-10,2.06122700428312,002947.SZ
|
||||
2024-07-11,0.7410903122804028,600601.SH
|
||||
2024-07-12,1.340083218079972,600686.SH
|
||||
2024-07-15,1.5618937384484992,600733.SH
|
||||
2024-07-16,1.758580945058497,600733.SH
|
||||
2024-07-17,1.5157121004090266,000625.SZ
|
||||
2024-07-18,0.3083377315759717,603898.SH
|
||||
2024-07-19,0.8944208661047769,000078.SZ
|
||||
2024-07-22,1.0429143541686636,002829.SZ
|
||||
2024-07-23,0.8234894660266799,002005.SZ
|
||||
2024-07-24,1.0749929990189133,600626.SH
|
||||
2024-07-25,1.880220214501819,600611.SH
|
||||
2024-07-26,1.8509059338376548,600650.SH
|
||||
2024-07-29,2.076506303850798,600817.SH
|
||||
2024-07-30,2.1084511316806602,000712.SZ
|
||||
2024-07-31,1.638115197432452,000421.SZ
|
||||
2024-08-01,1.040980502901104,600811.SH
|
||||
2024-08-02,1.7885572982299731,001696.SZ
|
||||
2024-08-05,0.9537751859233105,001379.SZ
|
||||
2024-08-06,0.3092017392193045,002461.SZ
|
||||
2024-08-07,1.466643911628964,002488.SZ
|
||||
2024-08-08,0.7882843739164674,603863.SH
|
||||
2024-08-09,0.23494689741035102,603488.SH
|
||||
2024-08-12,2.131668395298663,002488.SZ
|
||||
2024-08-13,2.0937880721661606,603978.SH
|
||||
2024-08-14,1.131943220068558,002232.SZ
|
||||
2024-08-15,0.5873737710201326,002611.SZ
|
||||
2024-08-16,1.199835048451392,000062.SZ
|
||||
2024-08-19,1.1805751411608383,600811.SH
|
||||
2024-08-20,1.5503271129180245,600661.SH
|
||||
2024-08-21,0.578554281440386,000880.SZ
|
||||
2024-08-22,1.2853820450612699,600105.SH
|
||||
2024-08-23,0.6423692277445572,002403.SZ
|
||||
2024-08-26,0.9239367963781929,002986.SZ
|
||||
2024-08-27,0.46409556994532736,605183.SH
|
||||
2024-08-28,1.0631407155165504,002647.SZ
|
||||
2024-08-29,1.1245501698512903,603639.SH
|
||||
2024-08-30,0.6361599235773499,002246.SZ
|
||||
2024-09-02,1.2492364629991852,002072.SZ
|
||||
2024-09-03,1.1926755374832714,002199.SZ
|
||||
2024-09-04,1.6204729565651932,600599.SH
|
||||
2024-09-05,1.0957778793973632,002595.SZ
|
||||
2024-09-06,1.6168023970816094,002005.SZ
|
||||
2024-09-09,0.8605482386029011,002456.SZ
|
||||
2024-09-10,1.3546410789459808,002717.SZ
|
||||
2024-09-11,0.5980254226205031,603196.SH
|
||||
2024-09-12,0.9187303745519808,002640.SZ
|
||||
2024-09-13,0.6801121207893971,600358.SH
|
||||
2024-09-18,1.079264299860147,600550.SH
|
||||
2024-09-19,1.9477947178464494,002786.SZ
|
||||
2024-09-20,1.1194340099294373,002123.SZ
|
||||
2024-09-23,1.6714675557693415,002453.SZ
|
||||
2024-09-24,1.3675826070587052,002453.SZ
|
||||
2024-09-25,0.7076301816428471,000627.SZ
|
||||
2024-09-26,0.8882412710980511,603398.SH
|
||||
2024-09-27,0.7521975260737696,000002.SZ
|
||||
2024-09-30,0.7230331038260748,600570.SH
|
||||
2024-10-08,1.0593517928482987,600550.SH
|
||||
2024-10-09,1.0220239311477122,000560.SZ
|
||||
2024-10-10,1.234368197028218,600606.SH
|
||||
2024-10-11,0.8664109594444052,000750.SZ
|
||||
2024-10-14,1.8450296800922745,603822.SH
|
||||
2024-10-15,1.8353472785641625,002583.SZ
|
||||
2024-10-16,0.9543901161454763,000536.SZ
|
||||
2024-10-17,1.0015738096430133,600619.SH
|
||||
2024-10-18,1.2591757462030437,600622.SH
|
||||
2024-10-21,1.364327760137209,002583.SZ
|
||||
2024-10-22,2.0756244365506276,002685.SZ
|
||||
2024-10-23,0.38458758527962894,000004.SZ
|
||||
2024-10-24,1.5124132416681377,002094.SZ
|
||||
2024-10-25,1.4347033681199353,600839.SH
|
||||
2024-10-28,1.5745059258009038,001696.SZ
|
||||
2024-10-29,1.8529251428567266,002628.SZ
|
||||
2024-10-30,1.7713369405635928,002717.SZ
|
||||
2024-10-31,1.1151492405672683,001696.SZ
|
||||
2024-11-01,0.7164870376829126,600246.SH
|
||||
2024-11-04,1.769839917258226,002384.SZ
|
||||
2024-11-05,1.092728971983151,600212.SH
|
||||
2024-11-06,0.6671879019120053,603336.SH
|
||||
2024-11-07,0.5602821558966659,002416.SZ
|
||||
2024-11-08,1.1858491806130256,001696.SZ
|
||||
2024-11-11,1.3270787455846025,002456.SZ
|
||||
2024-11-12,1.4104327679900899,603398.SH
|
||||
2024-11-13,1.2197272384042277,600839.SH
|
||||
2024-11-14,0.6314176029145626,603697.SH
|
||||
2024-11-15,-0.13737144527610326,603268.SH
|
||||
2024-11-18,0.7120357386859574,000605.SZ
|
||||
2024-11-19,0.42252100937945863,002469.SZ
|
||||
2024-11-20,1.007664521961136,600619.SH
|
||||
2024-11-21,0.9709988043471746,002694.SZ
|
||||
2024-11-22,-0.01837433439991007,000415.SZ
|
||||
2024-11-25,0.7354373323734968,000973.SZ
|
||||
2024-11-26,1.4832316580516,002348.SZ
|
||||
2024-11-27,1.0092630330488712,002899.SZ
|
||||
2024-11-28,1.6528548440755675,600327.SH
|
||||
2024-11-29,1.5310967942763765,003010.SZ
|
||||
2024-12-02,0.9985829805322318,000981.SZ
|
||||
2024-12-03,1.1009990341574898,600628.SH
|
||||
2024-12-04,1.4406720428536548,000679.SZ
|
||||
2024-12-05,1.5166810165916147,002640.SZ
|
||||
2024-12-06,1.5480151614841313,003021.SZ
|
||||
2024-12-09,0.7658546815142482,600593.SH
|
||||
2024-12-10,1.6164052159572586,000981.SZ
|
||||
2024-12-11,1.0871733525157767,000882.SZ
|
||||
2024-12-12,1.3618111397645727,002265.SZ
|
||||
2024-12-13,1.1378238320491278,605006.SH
|
||||
2024-12-16,0.5942319336561258,603533.SH
|
||||
2024-12-17,0.8922367248452927,600503.SH
|
||||
2024-12-18,0.29961260322010785,600796.SH
|
||||
2024-12-19,0.5126832965650956,002031.SZ
|
||||
2024-12-20,0.4049032161823233,600172.SH
|
||||
2024-12-23,0.378293448285725,600172.SH
|
||||
2024-12-24,0.7270156370860105,600673.SH
|
||||
2024-12-25,0.8938311286037234,603610.SH
|
||||
2024-12-26,0.8824820051198142,603893.SH
|
||||
2024-12-27,1.3362649834557183,002068.SZ
|
||||
2024-12-30,0.35138947858631225,600183.SH
|
||||
2024-12-31,1.1948708599448123,600506.SH
|
||||
2025-01-02,1.4558889081434663,002730.SZ
|
||||
2025-01-03,0.7128978848132725,600707.SH
|
||||
2025-01-06,0.7650844601290192,600803.SH
|
||||
2025-01-07,1.0635380792047187,600405.SH
|
||||
2025-01-08,0.7951289392847068,002837.SZ
|
||||
2025-01-09,1.0252099583039425,603306.SH
|
||||
2025-01-10,0.30155175957668673,002916.SZ
|
||||
2025-01-13,0.9708056750766493,603986.SH
|
||||
2025-01-14,0.7870312186305877,000063.SZ
|
||||
2025-01-15,1.6878038525240218,002917.SZ
|
||||
2025-01-16,0.8478674953917144,002449.SZ
|
||||
2025-01-17,1.0235316084839805,600693.SH
|
||||
2025-01-20,0.8827244215589688,002730.SZ
|
||||
2025-01-21,0.9142145000698546,002379.SZ
|
||||
2025-01-22,1.632439883586815,603228.SH
|
||||
2025-01-23,0.5950475737248152,603121.SH
|
||||
2025-01-24,1.512800316493977,000880.SZ
|
||||
2025-01-27,0.280247948525134,002532.SZ
|
||||
2025-02-05,0.7525772298409897,600722.SH
|
||||
2025-02-06,-0.059951823103750426,601869.SH
|
||||
2025-02-07,0.45215019046862537,000977.SZ
|
||||
2025-02-10,1.2658737579405763,003007.SZ
|
||||
2025-02-11,1.0408324160127955,603106.SH
|
||||
2025-02-12,1.6119030499830551,000856.SZ
|
||||
2025-02-13,1.59310517514652,002929.SZ
|
||||
2025-02-14,1.5077797651403821,002410.SZ
|
||||
2025-02-17,1.3860825096123015,002044.SZ
|
||||
2025-02-18,1.1270921559091267,002779.SZ
|
||||
2025-02-19,1.187211682113541,603286.SH
|
||||
2025-02-20,1.2669618354736996,605488.SH
|
||||
2025-02-21,1.025703816755235,600588.SH
|
||||
2025-02-24,1.5648046166656084,600602.SH
|
||||
2025-02-25,0.8745657353291226,002896.SZ
|
||||
2025-02-26,0.9230669528117501,000032.SZ
|
||||
2025-02-27,0.9367812438709472,603200.SH
|
||||
2025-02-28,1.0306691811926367,002276.SZ
|
||||
2025-03-03,0.6479828536311146,002044.SZ
|
||||
2025-03-04,0.9022328614595625,001306.SZ
|
||||
2025-03-05,0.13297506000529996,002036.SZ
|
||||
2025-03-06,1.0473094549911606,001309.SZ
|
||||
2025-03-07,0.8908293616489041,002580.SZ
|
||||
2025-03-10,0.9714032058498194,600126.SH
|
||||
2025-03-11,1.5066583997940508,000678.SZ
|
||||
2025-03-12,1.2822828774552384,603059.SH
|
||||
2025-03-13,1.1088751386616387,003038.SZ
|
||||
2025-03-14,1.2806594439606795,002713.SZ
|
||||
2025-03-17,0.9239379376977839,001256.SZ
|
||||
2025-03-18,1.155518645532713,600610.SH
|
||||
2025-03-19,1.566452580640379,605008.SH
|
||||
2025-03-20,1.6658038834776503,603949.SH
|
||||
2025-03-21,0.3002046262826852,603112.SH
|
||||
2025-03-24,0.969025415982965,001256.SZ
|
||||
2025-03-25,1.0089862742297053,002300.SZ
|
||||
2025-03-26,0.9563316876479583,600468.SH
|
||||
2025-03-27,0.7596417124134163,000633.SZ
|
||||
2025-03-28,0.8823950983342874,000006.SZ
|
||||
2025-03-31,1.5478113985550597,002851.SZ
|
||||
2025-04-01,0.5961759062409911,002847.SZ
|
||||
2025-04-02,0.1297394192678443,002093.SZ
|
||||
2025-04-03,1.6113567681708816,603353.SH
|
||||
2025-04-07,1.6243924524047828,601008.SH
|
||||
2025-04-08,1.5465757662622548,600598.SH
|
||||
2025-04-09,1.2262057864670963,601952.SH
|
||||
|
72
main/train/test2.tsv
Normal file
72
main/train/test2.tsv
Normal file
@@ -0,0 +1,72 @@
|
||||
trade_date,score,ts_code
|
||||
2024-12-09,1.1968650846005326,600593.SH
|
||||
2024-12-10,0.21490252296809745,002611.SZ
|
||||
2024-12-11,0.5721914798956016,002321.SZ
|
||||
2024-12-12,0.6509338263544048,600628.SH
|
||||
2024-12-13,2.1288113028385376,600628.SH
|
||||
2024-12-16,1.378346480524284,002086.SZ
|
||||
2024-12-17,1.45986967550941,002741.SZ
|
||||
2024-12-18,1.3436778254529067,600579.SH
|
||||
2024-12-19,0.41218776805787716,600796.SH
|
||||
2024-12-20,1.0840917563770454,603421.SH
|
||||
2024-12-23,1.00141172278312,600889.SH
|
||||
2024-12-24,1.0354156548919864,600725.SH
|
||||
2024-12-25,0.9562524807100355,600066.SH
|
||||
2024-12-26,1.1279048294352958,002916.SZ
|
||||
2024-12-27,0.6532174116474766,002068.SZ
|
||||
2024-12-30,-0.1308794790538431,002918.SZ
|
||||
2024-12-31,0.7160474599127873,600857.SH
|
||||
2025-01-02,1.5067649520721882,002449.SZ
|
||||
2025-01-03,0.9282246137432282,603379.SH
|
||||
2025-01-06,0.6797051204009213,603893.SH
|
||||
2025-01-07,0.9376184079476354,603236.SH
|
||||
2025-01-08,0.9064516934700023,603308.SH
|
||||
2025-01-09,0.9314493554789942,000880.SZ
|
||||
2025-01-10,0.5025761501709369,600584.SH
|
||||
2025-01-13,0.7483210862212708,000063.SZ
|
||||
2025-01-14,1.2632673941368837,000063.SZ
|
||||
2025-01-15,1.8580661802761587,002917.SZ
|
||||
2025-01-16,1.1918089652002073,600693.SH
|
||||
2025-01-17,0.8288939941365315,600693.SH
|
||||
2025-01-20,0.677726091977902,002577.SZ
|
||||
2025-01-21,1.8336548268410158,603893.SH
|
||||
2025-01-22,1.0395051538956546,000573.SZ
|
||||
2025-01-23,0.4308220427423068,003021.SZ
|
||||
2025-01-24,1.8057941775723685,002862.SZ
|
||||
2025-01-27,1.216662909774701,002779.SZ
|
||||
2025-02-05,0.8484867753831473,603990.SH
|
||||
2025-02-06,0.5038824073142949,001380.SZ
|
||||
2025-02-07,0.7672133571524726,002031.SZ
|
||||
2025-02-10,0.5417223016033719,000681.SZ
|
||||
2025-02-11,0.9399374716518157,000034.SZ
|
||||
2025-02-12,1.8742056631297925,000856.SZ
|
||||
2025-02-13,1.4837670146272484,600633.SH
|
||||
2025-02-14,1.2043600916692372,605488.SH
|
||||
2025-02-17,1.1259104542173328,603918.SH
|
||||
2025-02-18,1.1806931791732853,600126.SH
|
||||
2025-02-19,1.020437698817749,603956.SH
|
||||
2025-02-20,0.5818349669113919,003021.SZ
|
||||
2025-02-21,1.0941497070930342,603950.SH
|
||||
2025-02-24,1.867258980329339,600602.SH
|
||||
2025-02-25,0.8646726218943293,002691.SZ
|
||||
2025-02-26,1.2878484406363957,002245.SZ
|
||||
2025-02-27,1.3013902577988068,600173.SH
|
||||
2025-02-28,0.7804376426721501,603040.SH
|
||||
2025-03-03,0.45593268249434266,002345.SZ
|
||||
2025-03-04,0.9265705061587579,600589.SH
|
||||
2025-03-05,0.766962270753268,002575.SZ
|
||||
2025-03-06,0.7030260458187082,601100.SH
|
||||
2025-03-07,0.924011193171594,002896.SZ
|
||||
2025-03-10,1.0811487252993004,600126.SH
|
||||
2025-03-11,1.272392599656189,002896.SZ
|
||||
2025-03-12,1.0905437448562905,002276.SZ
|
||||
2025-03-13,1.0688995313878895,003038.SZ
|
||||
2025-03-14,1.2418913857438587,001256.SZ
|
||||
2025-03-17,1.004550155323,001256.SZ
|
||||
2025-03-18,0.7517848278576412,600403.SH
|
||||
2025-03-19,1.5106246878723002,605008.SH
|
||||
2025-03-20,1.1509811695536982,600610.SH
|
||||
2025-03-21,0.6033998331536018,603196.SH
|
||||
2025-03-24,0.3456173948047773,002345.SZ
|
||||
2025-03-25,1.470314131581159,600320.SH
|
||||
2025-03-26,0.745243100558546,603325.SH
|
||||
|
Reference in New Issue
Block a user