Merge branch 'dev'
# Conflicts: # .gitignore # main/train/Classify2.ipynb
This commit is contained in:
8505
main/train/Classify/Classify2.ipynb
Normal file
8505
main/train/Classify/Classify2.ipynb
Normal file
File diff suppressed because one or more lines are too long
66
main/train/Classify/Classify2.py
Normal file
66
main/train/Classify/Classify2.py
Normal file
@@ -0,0 +1,66 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from qlib.data.dataset import DatasetH
|
||||
|
||||
dates = pd.to_datetime(pd.date_range("2020-01-01", "2020-01-10"))
|
||||
instruments = ["SH600000", "SH600001"]
|
||||
index = pd.MultiIndex.from_product([dates, instruments], names=["datetime", "instrument"])
|
||||
|
||||
data = {
|
||||
"feature_1": np.random.randn(len(index)),
|
||||
"feature_2": np.random.randn(len(index)),
|
||||
"label": np.random.randn(len(index)) * 0.01
|
||||
}
|
||||
my_df = pd.DataFrame(data, index=index)
|
||||
my_df.iloc[1, 0] = np.nan # 人为制造一个缺失值
|
||||
my_df.iloc[5, 2] = np.nan # 人为制造一个标签缺失值
|
||||
|
||||
print("----------- 原始 DataFrame -----------")
|
||||
print(my_df.head())
|
||||
|
||||
# 2. 创建包含 StaticDataLoader 和 Processors 的完整配置
|
||||
data_handler_config = {
|
||||
"class": "DataHandlerLP",
|
||||
"module_path": "qlib.data.dataset.handler",
|
||||
"kwargs": {
|
||||
# 核心部分:配置数据加载器
|
||||
"data_loader": {
|
||||
"class": "StaticDataLoader",
|
||||
"module_path": "qlib.data.dataset.loader",
|
||||
"kwargs": {
|
||||
"config": my_df, # <--- 在这里将你的DataFrame传入!
|
||||
}
|
||||
},
|
||||
|
||||
"shared_processors": [
|
||||
|
||||
],
|
||||
"infer_processors": [
|
||||
# {"class": "DropnaLabel", "module_path": "qlib.data.dataset.processor"},
|
||||
|
||||
],
|
||||
"learn_processors": [
|
||||
{"class": "Fillna", "module_path": "qlib.data.dataset.processor", "kwargs": {"fill_value": 0}},
|
||||
]
|
||||
},
|
||||
}
|
||||
|
||||
from qlib.utils import init_instance_by_config
|
||||
|
||||
# 3. 使用配置初始化 DataHandler
|
||||
# 这一步会自动加载 StaticDataLoader 的数据,并运行所有定义的处理器
|
||||
dh = init_instance_by_config(data_handler_config)
|
||||
ds = DatasetH(
|
||||
dh,
|
||||
segments={
|
||||
"train": ("20190101", "20221231"),
|
||||
"valid": ("20220101", "20231231"),
|
||||
"test": ("20240101", "20250101"),
|
||||
},
|
||||
)
|
||||
# 4. 验证结果
|
||||
# DK_L (Learn) 数据经过了 DropnaLabel -> ZScoreNorm -> Fillna
|
||||
learn_data = ds.prepare("all", data_key='learn', segments='train')
|
||||
print("----------- train DataFrame -----------")
|
||||
|
||||
print(learn_data)
|
||||
1211
main/train/Classify/predictions_test.tsv
Normal file
1211
main/train/Classify/predictions_test.tsv
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
@@ -99,7 +99,7 @@
|
||||
"cyq perf\n",
|
||||
"left merge on ['ts_code', 'trade_date']\n",
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"RangeIndex: 9162612 entries, 0 to 9162611\n",
|
||||
"RangeIndex: 9315967 entries, 0 to 9315966\n",
|
||||
"Data columns (total 33 columns):\n",
|
||||
" # Column Dtype \n",
|
||||
"--- ------ ----- \n",
|
||||
@@ -688,10 +688,10 @@
|
||||
"Calculating cs_rank_size...\n",
|
||||
"Finished cs_rank_size.\n",
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"RangeIndex: 4819708 entries, 0 to 4819707\n",
|
||||
"RangeIndex: 4910010 entries, 0 to 4910009\n",
|
||||
"Columns: 181 entries, ts_code to cs_rank_size\n",
|
||||
"dtypes: bool(10), datetime64[ns](1), float64(165), int64(3), object(2)\n",
|
||||
"memory usage: 6.2+ GB\n",
|
||||
"memory usage: 6.3+ GB\n",
|
||||
"None\n",
|
||||
"['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'amount', 'pct_chg', 'turnover_rate', 'pe_ttm', 'circ_mv', 'total_mv', 'volume_ratio', 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct', 'cost_50pct', 'cost_85pct', 'cost_95pct', 'weight_avg', 'winner_rate', 'cat_l2_code', 'undist_profit_ps', 'ocfps', 'roa', 'roe', 'AR', 'BR', 'AR_BR', 'log_circ_mv', 'cashflow_to_ev_factor', 'book_to_price_ratio', 'turnover_rate_mean_5', 'variance_20', 'bbi_ratio_factor', 'daily_deviation', 'lg_elg_net_buy_vol', 'flow_lg_elg_intensity', 'sm_net_buy_vol', 'flow_divergence_diff', 'flow_divergence_ratio', 'total_buy_vol', 'lg_elg_buy_prop', 'flow_struct_buy_change', 'lg_elg_net_buy_vol_change', 'flow_lg_elg_accel', 'chip_concentration_range', 'chip_skewness', 'floating_chip_proxy', 'cost_support_15pct_change', 'cat_winner_price_zone', 'flow_chip_consistency', 'profit_taking_vs_absorb', 'cat_is_positive', 'upside_vol', 'downside_vol', 'vol_ratio', 'return_skew', 'return_kurtosis', 'volume_change_rate', 'cat_volume_breakout', 'turnover_deviation', 'cat_turnover_spike', 'avg_volume_ratio', 'cat_volume_ratio_breakout', 'vol_spike', 'vol_std_5', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'rsi_3', 'return_5', 'return_20', 'std_return_5', 'std_return_90', 'std_return_90_2', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4', 'rank_act_factor1', 'rank_act_factor2', 'rank_act_factor3', 'cov', 'delta_cov', 'alpha_22_improved', 'alpha_003', 'alpha_007', 'alpha_013', 'vol_break', 'weight_roc5', 'price_cost_divergence', 'smallcap_concentration', 'cost_stability', 'high_cost_break_days', 'liquidity_risk', 'turnover_std', 'mv_volatility', 'volume_growth', 'mv_growth', 'momentum_factor', 'resonance_factor', 'log_close', 'cat_vol_spike', 'up', 'down', 'obv_maobv_6', 'std_return_5_over_std_return_90', 'std_return_90_minus_std_return_90_2', 'cat_af2', 'cat_af3', 'cat_af4', 'act_factor5', 'act_factor6', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'buy_lg_vol_minus_sell_lg_vol', 'buy_elg_vol_minus_sell_elg_vol', 'ctrl_strength', 'low_cost_dev', 'asymmetry', 'lock_factor', 'cat_vol_break', 'cost_atr_adj', 'cat_golden_resonance', 'mv_turnover_ratio', 'mv_adjusted_volume', 'mv_weighted_turnover', 'nonlinear_mv_volume', 'mv_volume_ratio', 'mv_momentum', 'lg_flow_mom_corr_20_60', 'lg_flow_accel', 'profit_pressure', 'underwater_resistance', 'cost_conc_std_20', 'profit_decay_20', 'vol_amp_loss_20', 'vol_drop_profit_cnt_5', 'lg_flow_vol_interact_20', 'cost_break_confirm_cnt_5', 'atr_norm_channel_pos_14', 'turnover_diff_skew_20', 'lg_sm_flow_diverge_20', 'pullback_strong_20_20', 'vol_wgt_hist_pos_20', 'vol_adj_roc_20', 'cs_rank_net_lg_flow_val', 'cs_rank_flow_divergence', 'cs_rank_ind_adj_lg_flow', 'cs_rank_elg_buy_ratio', 'cs_rank_rel_profit_margin', 'cs_rank_cost_breadth', 'cs_rank_dist_to_upper_cost', 'cs_rank_winner_rate', 'cs_rank_intraday_range', 'cs_rank_close_pos_in_range', 'cs_rank_opening_gap', 'cs_rank_pos_in_hist_range', 'cs_rank_vol_x_profit_margin', 'cs_rank_lg_flow_price_concordance', 'cs_rank_turnover_per_winner', 'cs_rank_ind_cap_neutral_pe', 'cs_rank_volume_ratio', 'cs_rank_elg_buy_sell_sm_ratio', 'cs_rank_cost_dist_vol_ratio', 'cs_rank_size']\n"
|
||||
]
|
||||
@@ -1583,7 +1583,14 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"MAD Filtering: 100%|██████████| 131/131 [00:14<00:00, 8.77it/s]\n"
|
||||
"MAD Filtering: 62%|██████▏ | 81/131 [00:08<00:05, 9.28it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"MAD Filtering: 100%|██████████| 131/131 [00:13<00:00, 9.63it/s]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1598,14 +1605,7 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"MAD Filtering: 82%|████████▏ | 107/131 [00:12<00:02, 9.41it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"MAD Filtering: 100%|██████████| 131/131 [00:13<00:00, 9.60it/s]\n"
|
||||
"MAD Filtering: 100%|██████████| 131/131 [00:14<00:00, 8.97it/s]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1645,13 +1645,13 @@
|
||||
"截面 MAD 去极值处理完成。\n",
|
||||
"feature_columns: ['vol', 'pct_chg', 'turnover_rate', 'volume_ratio', 'winner_rate', 'undist_profit_ps', 'ocfps', 'AR', 'BR', 'AR_BR', 'cashflow_to_ev_factor', 'book_to_price_ratio', 'turnover_rate_mean_5', 'variance_20', 'bbi_ratio_factor', 'daily_deviation', 'lg_elg_net_buy_vol', 'flow_lg_elg_intensity', 'sm_net_buy_vol', 'total_buy_vol', 'lg_elg_buy_prop', 'flow_struct_buy_change', 'lg_elg_net_buy_vol_change', 'flow_lg_elg_accel', 'chip_concentration_range', 'chip_skewness', 'floating_chip_proxy', 'cost_support_15pct_change', 'cat_winner_price_zone', 'flow_chip_consistency', 'profit_taking_vs_absorb', 'cat_is_positive', 'upside_vol', 'downside_vol', 'vol_ratio', 'return_skew', 'return_kurtosis', 'volume_change_rate', 'cat_volume_breakout', 'turnover_deviation', 'cat_turnover_spike', 'avg_volume_ratio', 'cat_volume_ratio_breakout', 'vol_spike', 'vol_std_5', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'rsi_3', 'return_5', 'return_20', 'std_return_5', 'std_return_90', 'std_return_90_2', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4', 'rank_act_factor1', 'rank_act_factor2', 'rank_act_factor3', 'cov', 'delta_cov', 'alpha_22_improved', 'alpha_003', 'alpha_007', 'alpha_013', 'vol_break', 'weight_roc5', 'smallcap_concentration', 'cost_stability', 'high_cost_break_days', 'liquidity_risk', 'turnover_std', 'mv_volatility', 'volume_growth', 'mv_growth', 'momentum_factor', 'resonance_factor', 'log_close', 'cat_vol_spike', 'up', 'down', 'obv_maobv_6', 'std_return_5_over_std_return_90', 'std_return_90_minus_std_return_90_2', 'cat_af2', 'cat_af3', 'cat_af4', 'act_factor5', 'act_factor6', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'buy_lg_vol_minus_sell_lg_vol', 'buy_elg_vol_minus_sell_elg_vol', 'ctrl_strength', 'low_cost_dev', 'asymmetry', 'lock_factor', 'cat_vol_break', 'cost_atr_adj', 'cat_golden_resonance', 'mv_turnover_ratio', 'mv_adjusted_volume', 'mv_weighted_turnover', 'nonlinear_mv_volume', 'mv_volume_ratio', 'mv_momentum', 'lg_flow_mom_corr_20_60', 'lg_flow_accel', 'profit_pressure', 'underwater_resistance', 'cost_conc_std_20', 'profit_decay_20', 'vol_amp_loss_20', 'vol_drop_profit_cnt_5', 'lg_flow_vol_interact_20', 'cost_break_confirm_cnt_5', 'atr_norm_channel_pos_14', 'turnover_diff_skew_20', 'lg_sm_flow_diverge_20', 'pullback_strong_20_20', 'vol_wgt_hist_pos_20', 'vol_adj_roc_20', 'cs_rank_net_lg_flow_val', 'cs_rank_elg_buy_ratio', 'cs_rank_rel_profit_margin', 'cs_rank_cost_breadth', 'cs_rank_dist_to_upper_cost', 'cs_rank_winner_rate', 'cs_rank_intraday_range', 'cs_rank_close_pos_in_range', 'cs_rank_pos_in_hist_range', 'cs_rank_vol_x_profit_margin', 'cs_rank_lg_flow_price_concordance', 'cs_rank_turnover_per_winner', 'cs_rank_volume_ratio', 'cs_rank_elg_buy_sell_sm_ratio', 'cs_rank_cost_dist_vol_ratio', 'cs_rank_size', 'cat_up_limit', 'industry_obv', 'industry_return_5', 'industry_return_20', 'industry__ema_5', 'industry__ema_13', 'industry__ema_20', 'industry__ema_60', 'industry_act_factor1', 'industry_act_factor2', 'industry_act_factor3', 'industry_act_factor4', 'industry_act_factor5', 'industry_act_factor6', 'industry_rank_act_factor1', 'industry_rank_act_factor2', 'industry_rank_act_factor3', 'industry_return_5_percentile', 'industry_return_20_percentile', '000852.SH_MACD', '000905.SH_MACD', '399006.SZ_MACD', '000852.SH_MACD_hist', '000905.SH_MACD_hist', '399006.SZ_MACD_hist', '000852.SH_RSI', '000905.SH_RSI', '399006.SZ_RSI', '000852.SH_Signal_line', '000905.SH_Signal_line', '399006.SZ_Signal_line', '000852.SH_amount_change_rate', '000905.SH_amount_change_rate', '399006.SZ_amount_change_rate', '000852.SH_amount_mean', '000905.SH_amount_mean', '399006.SZ_amount_mean', '000852.SH_daily_return', '000905.SH_daily_return', '399006.SZ_daily_return', '000852.SH_up_ratio_20d', '000905.SH_up_ratio_20d', '399006.SZ_up_ratio_20d', '000852.SH_volatility', '000905.SH_volatility', '399006.SZ_volatility', '000852.SH_volume_change_rate', '000905.SH_volume_change_rate', '399006.SZ_volume_change_rate']\n",
|
||||
"df最小日期: 2019-01-02\n",
|
||||
"df最大日期: 2025-10-10\n",
|
||||
"2056336\n",
|
||||
"df最大日期: 2025-11-21\n",
|
||||
"2056030\n",
|
||||
"train_data最小日期: 2020-01-02\n",
|
||||
"train_data最大日期: 2022-12-30\n",
|
||||
"2045675\n",
|
||||
"2135782\n",
|
||||
"test_data最小日期: 2023-01-03\n",
|
||||
"test_data最大日期: 2025-10-10\n",
|
||||
"test_data最大日期: 2025-11-21\n",
|
||||
" ts_code trade_date log_circ_mv\n",
|
||||
"0 000001.SZ 2019-01-02 16.574219\n",
|
||||
"1 000001.SZ 2019-01-03 16.583965\n",
|
||||
@@ -1954,7 +1954,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<catboost.core.CatBoostClassifier at 0x707ccc5ac1a0>"
|
||||
"<catboost.core.CatBoostClassifier at 0x7602293f6030>"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
@@ -2068,7 +2068,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"5588 2056336\n",
|
||||
"5587 2056030\n",
|
||||
" ts_code trade_date turnover_rate\n",
|
||||
"0 000001.SZ 2023-01-03 1.1307\n",
|
||||
"1 000001.SZ 2023-01-04 1.1284\n",
|
||||
@@ -2076,13 +2076,13 @@
|
||||
"3 000001.SZ 2023-01-06 0.6162\n",
|
||||
"4 000001.SZ 2023-01-09 0.5450\n",
|
||||
"... ... ... ...\n",
|
||||
"2045670 605599.SH 2025-09-26 0.3434\n",
|
||||
"2045671 605599.SH 2025-09-29 0.3943\n",
|
||||
"2045672 605599.SH 2025-09-30 0.4982\n",
|
||||
"2045673 605599.SH 2025-10-09 1.0319\n",
|
||||
"2045674 605599.SH 2025-10-10 0.8859\n",
|
||||
"2135777 605599.SH 2025-11-17 0.3820\n",
|
||||
"2135778 605599.SH 2025-11-18 0.3565\n",
|
||||
"2135779 605599.SH 2025-11-19 0.3748\n",
|
||||
"2135780 605599.SH 2025-11-20 0.3132\n",
|
||||
"2135781 605599.SH 2025-11-21 0.4580\n",
|
||||
"\n",
|
||||
"[2045675 rows x 3 columns]\n"
|
||||
"[2135782 rows x 3 columns]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -2117,7 +2117,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.2"
|
||||
"version": "3.12.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user