Classify2-2025-5-28
This commit is contained in:
@@ -39,12 +39,24 @@
|
||||
"\n",
|
||||
"import warnings\n",
|
||||
"\n",
|
||||
"warnings.filterwarnings(\"ignore\")"
|
||||
"warnings.filterwarnings(\"ignore\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "4a481c60",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 设置使用核心\n",
|
||||
"import os\n",
|
||||
"os.environ[\"MODIN_CPUS\"] = \"4\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "a79cafb06a7e0e43",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -58,13 +70,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"daily data\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"daily data\n",
|
||||
"daily basic\n",
|
||||
"inner merge on ['ts_code', 'trade_date']\n",
|
||||
"stk limit\n",
|
||||
@@ -75,7 +81,7 @@
|
||||
"left merge on ['ts_code', 'trade_date']\n",
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"RangeIndex: 8665405 entries, 0 to 8665404\n",
|
||||
"Data columns (total 32 columns):\n",
|
||||
"Data columns (total 33 columns):\n",
|
||||
" # Column Dtype \n",
|
||||
"--- ------ ----- \n",
|
||||
" 0 ts_code object \n",
|
||||
@@ -85,33 +91,34 @@
|
||||
" 4 high float64 \n",
|
||||
" 5 low float64 \n",
|
||||
" 6 vol float64 \n",
|
||||
" 7 pct_chg float64 \n",
|
||||
" 8 turnover_rate float64 \n",
|
||||
" 9 pe_ttm float64 \n",
|
||||
" 10 circ_mv float64 \n",
|
||||
" 11 total_mv float64 \n",
|
||||
" 12 volume_ratio float64 \n",
|
||||
" 13 is_st bool \n",
|
||||
" 14 up_limit float64 \n",
|
||||
" 15 down_limit float64 \n",
|
||||
" 16 buy_sm_vol float64 \n",
|
||||
" 17 sell_sm_vol float64 \n",
|
||||
" 18 buy_lg_vol float64 \n",
|
||||
" 19 sell_lg_vol float64 \n",
|
||||
" 20 buy_elg_vol float64 \n",
|
||||
" 21 sell_elg_vol float64 \n",
|
||||
" 22 net_mf_vol float64 \n",
|
||||
" 23 his_low float64 \n",
|
||||
" 24 his_high float64 \n",
|
||||
" 25 cost_5pct float64 \n",
|
||||
" 26 cost_15pct float64 \n",
|
||||
" 27 cost_50pct float64 \n",
|
||||
" 28 cost_85pct float64 \n",
|
||||
" 29 cost_95pct float64 \n",
|
||||
" 30 weight_avg float64 \n",
|
||||
" 31 winner_rate float64 \n",
|
||||
"dtypes: bool(1), datetime64[ns](1), float64(29), object(1)\n",
|
||||
"memory usage: 2.0+ GB\n",
|
||||
" 7 amount float64 \n",
|
||||
" 8 pct_chg float64 \n",
|
||||
" 9 turnover_rate float64 \n",
|
||||
" 10 pe_ttm float64 \n",
|
||||
" 11 circ_mv float64 \n",
|
||||
" 12 total_mv float64 \n",
|
||||
" 13 volume_ratio float64 \n",
|
||||
" 14 is_st bool \n",
|
||||
" 15 up_limit float64 \n",
|
||||
" 16 down_limit float64 \n",
|
||||
" 17 buy_sm_vol float64 \n",
|
||||
" 18 sell_sm_vol float64 \n",
|
||||
" 19 buy_lg_vol float64 \n",
|
||||
" 20 sell_lg_vol float64 \n",
|
||||
" 21 buy_elg_vol float64 \n",
|
||||
" 22 sell_elg_vol float64 \n",
|
||||
" 23 net_mf_vol float64 \n",
|
||||
" 24 his_low float64 \n",
|
||||
" 25 his_high float64 \n",
|
||||
" 26 cost_5pct float64 \n",
|
||||
" 27 cost_15pct float64 \n",
|
||||
" 28 cost_50pct float64 \n",
|
||||
" 29 cost_85pct float64 \n",
|
||||
" 30 cost_95pct float64 \n",
|
||||
" 31 weight_avg float64 \n",
|
||||
" 32 winner_rate float64 \n",
|
||||
"dtypes: bool(1), datetime64[ns](1), float64(30), object(1)\n",
|
||||
"memory usage: 2.1+ GB\n",
|
||||
"None\n"
|
||||
]
|
||||
}
|
||||
@@ -121,7 +128,7 @@
|
||||
"\n",
|
||||
"print('daily data')\n",
|
||||
"df = read_and_merge_h5_data('../../data/daily_data.h5', key='daily_data',\n",
|
||||
" columns=['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'pct_chg'],\n",
|
||||
" columns=['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'amount', 'pct_chg'],\n",
|
||||
" df=None)\n",
|
||||
"\n",
|
||||
"print('daily basic')\n",
|
||||
@@ -149,7 +156,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"id": "cac01788dac10678",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -217,7 +224,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 5,
|
||||
"id": "c4e9e1d31da6dba6",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -317,7 +324,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 6,
|
||||
"id": "a735bc02ceb4d872",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -333,7 +340,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 7,
|
||||
"id": "53f86ddc0677a6d7",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -400,7 +407,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 8,
|
||||
"id": "dbe2fd8021b9417f",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -413,7 +420,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"['ts_code', 'open', 'close', 'high', 'low', 'circ_mv', 'total_mv', 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct', 'cost_50pct', 'cost_85pct', 'cost_95pct', 'weight_avg', 'in_date']\n"
|
||||
"['ts_code', 'open', 'close', 'high', 'low', 'amount', 'circ_mv', 'total_mv', 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct', 'cost_50pct', 'cost_85pct', 'cost_95pct', 'weight_avg', 'in_date']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -428,7 +435,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 9,
|
||||
"id": "85c3e3d0235ffffa",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -456,7 +463,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 10,
|
||||
"id": "92d84ce15a562ec6",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -476,7 +483,13 @@
|
||||
"使用 'ann_date' 作为财务数据生效日期。\n",
|
||||
"警告: 从 financial_data_subset 中移除了 366 行,因为其 'ts_code' 或 'ann_date' 列存在空值。\n",
|
||||
"使用 'ann_date' 作为财务数据生效日期。\n",
|
||||
"警告: 从 financial_data_subset 中移除了 366 行,因为其 'ts_code' 或 'ann_date' 列存在空值。\n",
|
||||
"警告: 从 financial_data_subset 中移除了 366 行,因为其 'ts_code' 或 'ann_date' 列存在空值。\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"开始计算因子: AR, BR (原地修改)...\n",
|
||||
"因子 AR, BR 计算成功。\n",
|
||||
"因子 AR, BR 计算流程结束。\n",
|
||||
@@ -495,7 +508,7 @@
|
||||
"错误: 计算日级别行业动量基准需要以下列: ['pct_chg', 'cat_l2_code', 'trade_date', 'ts_code']。\n",
|
||||
"错误: 计算日级别行业偏离度需要以下列: ['pct_chg', 'daily_industry_positive_benchmark', 'daily_industry_negative_benchmark']。请先运行 daily_industry_momentum_benchmark(df)。\n",
|
||||
"Index(['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol',\n",
|
||||
" 'pct_chg', 'turnover_rate', 'pe_ttm', 'circ_mv', 'total_mv',\n",
|
||||
" 'amount', 'pct_chg', 'turnover_rate', 'pe_ttm', 'circ_mv', 'total_mv',\n",
|
||||
" 'volume_ratio', 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol',\n",
|
||||
" 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol',\n",
|
||||
" 'sell_elg_vol', 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct',\n",
|
||||
@@ -592,11 +605,11 @@
|
||||
"Finished cs_rank_size.\n",
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"RangeIndex: 4539678 entries, 0 to 4539677\n",
|
||||
"Columns: 180 entries, ts_code to cs_rank_size\n",
|
||||
"dtypes: bool(10), datetime64[ns](1), float64(164), int32(3), object(2)\n",
|
||||
"memory usage: 5.7+ GB\n",
|
||||
"Columns: 181 entries, ts_code to cs_rank_size\n",
|
||||
"dtypes: bool(10), datetime64[ns](1), float64(165), int32(3), object(2)\n",
|
||||
"memory usage: 5.8+ GB\n",
|
||||
"None\n",
|
||||
"['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'pct_chg', 'turnover_rate', 'pe_ttm', 'circ_mv', 'total_mv', 'volume_ratio', 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct', 'cost_50pct', 'cost_85pct', 'cost_95pct', 'weight_avg', 'winner_rate', 'cat_l2_code', 'undist_profit_ps', 'ocfps', 'roa', 'roe', 'AR', 'BR', 'AR_BR', 'log_circ_mv', 'cashflow_to_ev_factor', 'book_to_price_ratio', 'turnover_rate_mean_5', 'variance_20', 'bbi_ratio_factor', 'daily_deviation', 'lg_elg_net_buy_vol', 'flow_lg_elg_intensity', 'sm_net_buy_vol', 'flow_divergence_diff', 'flow_divergence_ratio', 'total_buy_vol', 'lg_elg_buy_prop', 'flow_struct_buy_change', 'lg_elg_net_buy_vol_change', 'flow_lg_elg_accel', 'chip_concentration_range', 'chip_skewness', 'floating_chip_proxy', 'cost_support_15pct_change', 'cat_winner_price_zone', 'flow_chip_consistency', 'profit_taking_vs_absorb', 'cat_is_positive', 'upside_vol', 'downside_vol', 'vol_ratio', 'return_skew', 'return_kurtosis', 'volume_change_rate', 'cat_volume_breakout', 'turnover_deviation', 'cat_turnover_spike', 'avg_volume_ratio', 'cat_volume_ratio_breakout', 'vol_spike', 'vol_std_5', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'rsi_3', 'return_5', 'return_20', 'std_return_5', 'std_return_90', 'std_return_90_2', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4', 'rank_act_factor1', 'rank_act_factor2', 'rank_act_factor3', 'cov', 'delta_cov', 'alpha_22_improved', 'alpha_003', 'alpha_007', 'alpha_013', 'vol_break', 'weight_roc5', 'price_cost_divergence', 'smallcap_concentration', 'cost_stability', 'high_cost_break_days', 'liquidity_risk', 'turnover_std', 'mv_volatility', 'volume_growth', 'mv_growth', 'momentum_factor', 'resonance_factor', 'log_close', 'cat_vol_spike', 'up', 'down', 'obv_maobv_6', 'std_return_5_over_std_return_90', 'std_return_90_minus_std_return_90_2', 'cat_af2', 'cat_af3', 'cat_af4', 'act_factor5', 'act_factor6', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'buy_lg_vol_minus_sell_lg_vol', 'buy_elg_vol_minus_sell_elg_vol', 'ctrl_strength', 'low_cost_dev', 'asymmetry', 'lock_factor', 'cat_vol_break', 'cost_atr_adj', 'cat_golden_resonance', 'mv_turnover_ratio', 'mv_adjusted_volume', 'mv_weighted_turnover', 'nonlinear_mv_volume', 'mv_volume_ratio', 'mv_momentum', 'lg_flow_mom_corr_20_60', 'lg_flow_accel', 'profit_pressure', 'underwater_resistance', 'cost_conc_std_20', 'profit_decay_20', 'vol_amp_loss_20', 'vol_drop_profit_cnt_5', 'lg_flow_vol_interact_20', 'cost_break_confirm_cnt_5', 'atr_norm_channel_pos_14', 'turnover_diff_skew_20', 'lg_sm_flow_diverge_20', 'pullback_strong_20_20', 'vol_wgt_hist_pos_20', 'vol_adj_roc_20', 'cs_rank_net_lg_flow_val', 'cs_rank_flow_divergence', 'cs_rank_ind_adj_lg_flow', 'cs_rank_elg_buy_ratio', 'cs_rank_rel_profit_margin', 'cs_rank_cost_breadth', 'cs_rank_dist_to_upper_cost', 'cs_rank_winner_rate', 'cs_rank_intraday_range', 'cs_rank_close_pos_in_range', 'cs_rank_opening_gap', 'cs_rank_pos_in_hist_range', 'cs_rank_vol_x_profit_margin', 'cs_rank_lg_flow_price_concordance', 'cs_rank_turnover_per_winner', 'cs_rank_ind_cap_neutral_pe', 'cs_rank_volume_ratio', 'cs_rank_elg_buy_sell_sm_ratio', 'cs_rank_cost_dist_vol_ratio', 'cs_rank_size']\n"
|
||||
"['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'amount', 'pct_chg', 'turnover_rate', 'pe_ttm', 'circ_mv', 'total_mv', 'volume_ratio', 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct', 'cost_50pct', 'cost_85pct', 'cost_95pct', 'weight_avg', 'winner_rate', 'cat_l2_code', 'undist_profit_ps', 'ocfps', 'roa', 'roe', 'AR', 'BR', 'AR_BR', 'log_circ_mv', 'cashflow_to_ev_factor', 'book_to_price_ratio', 'turnover_rate_mean_5', 'variance_20', 'bbi_ratio_factor', 'daily_deviation', 'lg_elg_net_buy_vol', 'flow_lg_elg_intensity', 'sm_net_buy_vol', 'flow_divergence_diff', 'flow_divergence_ratio', 'total_buy_vol', 'lg_elg_buy_prop', 'flow_struct_buy_change', 'lg_elg_net_buy_vol_change', 'flow_lg_elg_accel', 'chip_concentration_range', 'chip_skewness', 'floating_chip_proxy', 'cost_support_15pct_change', 'cat_winner_price_zone', 'flow_chip_consistency', 'profit_taking_vs_absorb', 'cat_is_positive', 'upside_vol', 'downside_vol', 'vol_ratio', 'return_skew', 'return_kurtosis', 'volume_change_rate', 'cat_volume_breakout', 'turnover_deviation', 'cat_turnover_spike', 'avg_volume_ratio', 'cat_volume_ratio_breakout', 'vol_spike', 'vol_std_5', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'rsi_3', 'return_5', 'return_20', 'std_return_5', 'std_return_90', 'std_return_90_2', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4', 'rank_act_factor1', 'rank_act_factor2', 'rank_act_factor3', 'cov', 'delta_cov', 'alpha_22_improved', 'alpha_003', 'alpha_007', 'alpha_013', 'vol_break', 'weight_roc5', 'price_cost_divergence', 'smallcap_concentration', 'cost_stability', 'high_cost_break_days', 'liquidity_risk', 'turnover_std', 'mv_volatility', 'volume_growth', 'mv_growth', 'momentum_factor', 'resonance_factor', 'log_close', 'cat_vol_spike', 'up', 'down', 'obv_maobv_6', 'std_return_5_over_std_return_90', 'std_return_90_minus_std_return_90_2', 'cat_af2', 'cat_af3', 'cat_af4', 'act_factor5', 'act_factor6', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'buy_lg_vol_minus_sell_lg_vol', 'buy_elg_vol_minus_sell_elg_vol', 'ctrl_strength', 'low_cost_dev', 'asymmetry', 'lock_factor', 'cat_vol_break', 'cost_atr_adj', 'cat_golden_resonance', 'mv_turnover_ratio', 'mv_adjusted_volume', 'mv_weighted_turnover', 'nonlinear_mv_volume', 'mv_volume_ratio', 'mv_momentum', 'lg_flow_mom_corr_20_60', 'lg_flow_accel', 'profit_pressure', 'underwater_resistance', 'cost_conc_std_20', 'profit_decay_20', 'vol_amp_loss_20', 'vol_drop_profit_cnt_5', 'lg_flow_vol_interact_20', 'cost_break_confirm_cnt_5', 'atr_norm_channel_pos_14', 'turnover_diff_skew_20', 'lg_sm_flow_diverge_20', 'pullback_strong_20_20', 'vol_wgt_hist_pos_20', 'vol_adj_roc_20', 'cs_rank_net_lg_flow_val', 'cs_rank_flow_divergence', 'cs_rank_ind_adj_lg_flow', 'cs_rank_elg_buy_ratio', 'cs_rank_rel_profit_margin', 'cs_rank_cost_breadth', 'cs_rank_dist_to_upper_cost', 'cs_rank_winner_rate', 'cs_rank_intraday_range', 'cs_rank_close_pos_in_range', 'cs_rank_opening_gap', 'cs_rank_pos_in_hist_range', 'cs_rank_vol_x_profit_margin', 'cs_rank_lg_flow_price_concordance', 'cs_rank_turnover_per_winner', 'cs_rank_ind_cap_neutral_pe', 'cs_rank_volume_ratio', 'cs_rank_elg_buy_sell_sm_ratio', 'cs_rank_cost_dist_vol_ratio', 'cs_rank_size']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -629,6 +642,11 @@
|
||||
"# df = cat_reason(df, top_list_df)\n",
|
||||
"# df = cat_is_on_top_list(df, top_list_df)\n",
|
||||
"\n",
|
||||
"# df = ts_turnover_rate_acceleration_5_20(df)\n",
|
||||
"# df = ts_vol_sustain_10_30(df)\n",
|
||||
"# df = cs_turnover_rate_relative_strength_20(df)\n",
|
||||
"# df = cs_amount_outlier_10(df)\n",
|
||||
"\n",
|
||||
"df = add_financial_factor(df, fina_indicator_df, factor_value_col='undist_profit_ps')\n",
|
||||
"df = add_financial_factor(df, fina_indicator_df, factor_value_col='ocfps')\n",
|
||||
"df = add_financial_factor(df, fina_indicator_df, factor_value_col='roa')\n",
|
||||
@@ -697,7 +715,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 11,
|
||||
"id": "b87b938028afa206",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -735,7 +753,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 12,
|
||||
"id": "f4f16d63ad18d1bc",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -745,7 +763,6 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import statsmodels.api as sm # 用于中性化回归\n",
|
||||
"from tqdm import tqdm # 可选,用于显示进度条\n",
|
||||
@@ -962,7 +979,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 13,
|
||||
"id": "40e6b68a91b30c79",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -972,9 +989,6 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def remove_outliers_label_percentile(label: pd.Series, lower_percentile: float = 0.01, upper_percentile: float = 0.99,\n",
|
||||
" log=True):\n",
|
||||
" if not (0 <= lower_percentile < upper_percentile <= 1):\n",
|
||||
@@ -1071,10 +1085,6 @@
|
||||
" return df_standardized\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def neutralize_manual_revised(df: pd.DataFrame, features: list, industry_col: str, mkt_cap_col: str) -> pd.DataFrame:\n",
|
||||
" \"\"\"\n",
|
||||
" 手动实现简单回归以提升速度,通过构建 Series 确保索引对齐。\n",
|
||||
@@ -1289,7 +1299,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 14,
|
||||
"id": "47c12bb34062ae7a",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -1323,7 +1333,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 15,
|
||||
"id": "29221dde",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1366,7 +1376,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 16,
|
||||
"id": "03ee5daf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -1379,7 +1389,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 17,
|
||||
"id": "b76ea08a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1400,7 +1410,7 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"MAD Filtering: 100%|██████████| 131/131 [00:28<00:00, 4.67it/s]\n"
|
||||
"MAD Filtering: 100%|██████████| 131/131 [00:28<00:00, 4.59it/s]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1415,7 +1425,7 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"MAD Filtering: 100%|██████████| 131/131 [00:24<00:00, 5.43it/s]\n"
|
||||
"MAD Filtering: 100%|██████████| 131/131 [00:23<00:00, 5.52it/s]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1597,7 +1607,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": 18,
|
||||
"id": "3ff2d1c5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -1606,8 +1616,6 @@
|
||||
"from sklearn.linear_model import LogisticRegression\n",
|
||||
"import matplotlib.pyplot as plt # 保持 matplotlib 导入,尽管LightGBM的绘图功能已移除\n",
|
||||
"from sklearn.decomposition import PCA\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import datetime # 用于日期计算\n",
|
||||
"from catboost import CatBoostClassifier\n",
|
||||
"from catboost import Pool\n",
|
||||
@@ -1740,7 +1748,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"execution_count": 19,
|
||||
"id": "c6eb5cd4-e714-420a-ac48-39af3e11ee81",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -1774,7 +1782,7 @@
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "b132fdab733b4d4e856d4688f2997dac",
|
||||
"model_id": "5119f1526efe46b58c2c2c978ebdf1f1",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
@@ -1789,8 +1797,8 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0:\tlearn: 0.6887138\ttest: 0.6894516\tbest: 0.6894516 (0)\ttotal: 328ms\tremaining: 8m 12s\n",
|
||||
"bestTest = 0.5217666894\n",
|
||||
"0:\tlearn: 0.6887139\ttest: 0.6894516\tbest: 0.6894516 (0)\ttotal: 271ms\tremaining: 6m 46s\n",
|
||||
"bestTest = 0.5217666495\n",
|
||||
"bestIteration = 487\n",
|
||||
"Shrink model to first 488 iterations.\n"
|
||||
]
|
||||
@@ -1814,7 +1822,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"execution_count": 20,
|
||||
"id": "5d1522a7538db91b",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -1852,7 +1860,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"execution_count": 21,
|
||||
"id": "09b1799e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1874,7 +1882,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"execution_count": 22,
|
||||
"id": "e53b209a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -1907,12 +1915,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"execution_count": 23,
|
||||
"id": "364e821a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import numpy as np\n",
|
||||
@@ -1992,7 +1999,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"execution_count": 24,
|
||||
"id": "1f6e6336",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -2061,7 +2068,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"execution_count": 25,
|
||||
"id": "7e9023cc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -2261,7 +2268,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"execution_count": 26,
|
||||
"id": "a0000d75",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -2320,8 +2327,6 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import seaborn as sns\n",
|
||||
"from scipy.stats import spearmanr\n",
|
||||
@@ -2509,7 +2514,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"execution_count": 27,
|
||||
"id": "a436dba4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
||||
Reference in New Issue
Block a user