Classify2-2025-5-28

This commit is contained in:
liaozhaorun
2025-05-28 14:16:04 +08:00
parent bf86fd9415
commit cecbef02f6
19 changed files with 4335 additions and 4159 deletions

View File

@@ -39,12 +39,24 @@
"\n",
"import warnings\n",
"\n",
"warnings.filterwarnings(\"ignore\")"
"warnings.filterwarnings(\"ignore\")\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "4a481c60",
"metadata": {},
"outputs": [],
"source": [
"# 设置使用核心\n",
"import os\n",
"os.environ[\"MODIN_CPUS\"] = \"4\"\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "a79cafb06a7e0e43",
"metadata": {
"ExecuteTime": {
@@ -58,13 +70,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"daily data\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"daily data\n",
"daily basic\n",
"inner merge on ['ts_code', 'trade_date']\n",
"stk limit\n",
@@ -75,7 +81,7 @@
"left merge on ['ts_code', 'trade_date']\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 8665405 entries, 0 to 8665404\n",
"Data columns (total 32 columns):\n",
"Data columns (total 33 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object \n",
@@ -85,33 +91,34 @@
" 4 high float64 \n",
" 5 low float64 \n",
" 6 vol float64 \n",
" 7 pct_chg float64 \n",
" 8 turnover_rate float64 \n",
" 9 pe_ttm float64 \n",
" 10 circ_mv float64 \n",
" 11 total_mv float64 \n",
" 12 volume_ratio float64 \n",
" 13 is_st bool \n",
" 14 up_limit float64 \n",
" 15 down_limit float64 \n",
" 16 buy_sm_vol float64 \n",
" 17 sell_sm_vol float64 \n",
" 18 buy_lg_vol float64 \n",
" 19 sell_lg_vol float64 \n",
" 20 buy_elg_vol float64 \n",
" 21 sell_elg_vol float64 \n",
" 22 net_mf_vol float64 \n",
" 23 his_low float64 \n",
" 24 his_high float64 \n",
" 25 cost_5pct float64 \n",
" 26 cost_15pct float64 \n",
" 27 cost_50pct float64 \n",
" 28 cost_85pct float64 \n",
" 29 cost_95pct float64 \n",
" 30 weight_avg float64 \n",
" 31 winner_rate float64 \n",
"dtypes: bool(1), datetime64[ns](1), float64(29), object(1)\n",
"memory usage: 2.0+ GB\n",
" 7 amount float64 \n",
" 8 pct_chg float64 \n",
" 9 turnover_rate float64 \n",
" 10 pe_ttm float64 \n",
" 11 circ_mv float64 \n",
" 12 total_mv float64 \n",
" 13 volume_ratio float64 \n",
" 14 is_st bool \n",
" 15 up_limit float64 \n",
" 16 down_limit float64 \n",
" 17 buy_sm_vol float64 \n",
" 18 sell_sm_vol float64 \n",
" 19 buy_lg_vol float64 \n",
" 20 sell_lg_vol float64 \n",
" 21 buy_elg_vol float64 \n",
" 22 sell_elg_vol float64 \n",
" 23 net_mf_vol float64 \n",
" 24 his_low float64 \n",
" 25 his_high float64 \n",
" 26 cost_5pct float64 \n",
" 27 cost_15pct float64 \n",
" 28 cost_50pct float64 \n",
" 29 cost_85pct float64 \n",
" 30 cost_95pct float64 \n",
" 31 weight_avg float64 \n",
" 32 winner_rate float64 \n",
"dtypes: bool(1), datetime64[ns](1), float64(30), object(1)\n",
"memory usage: 2.1+ GB\n",
"None\n"
]
}
@@ -121,7 +128,7 @@
"\n",
"print('daily data')\n",
"df = read_and_merge_h5_data('../../data/daily_data.h5', key='daily_data',\n",
" columns=['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'pct_chg'],\n",
" columns=['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'amount', 'pct_chg'],\n",
" df=None)\n",
"\n",
"print('daily basic')\n",
@@ -149,7 +156,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"id": "cac01788dac10678",
"metadata": {
"ExecuteTime": {
@@ -217,7 +224,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 5,
"id": "c4e9e1d31da6dba6",
"metadata": {
"ExecuteTime": {
@@ -317,7 +324,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 6,
"id": "a735bc02ceb4d872",
"metadata": {
"ExecuteTime": {
@@ -333,7 +340,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 7,
"id": "53f86ddc0677a6d7",
"metadata": {
"ExecuteTime": {
@@ -400,7 +407,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 8,
"id": "dbe2fd8021b9417f",
"metadata": {
"ExecuteTime": {
@@ -413,7 +420,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"['ts_code', 'open', 'close', 'high', 'low', 'circ_mv', 'total_mv', 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct', 'cost_50pct', 'cost_85pct', 'cost_95pct', 'weight_avg', 'in_date']\n"
"['ts_code', 'open', 'close', 'high', 'low', 'amount', 'circ_mv', 'total_mv', 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct', 'cost_50pct', 'cost_85pct', 'cost_95pct', 'weight_avg', 'in_date']\n"
]
}
],
@@ -428,7 +435,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"id": "85c3e3d0235ffffa",
"metadata": {
"ExecuteTime": {
@@ -456,7 +463,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 10,
"id": "92d84ce15a562ec6",
"metadata": {
"ExecuteTime": {
@@ -476,7 +483,13 @@
"使用 'ann_date' 作为财务数据生效日期。\n",
"警告: 从 financial_data_subset 中移除了 366 行,因为其 'ts_code' 或 'ann_date' 列存在空值。\n",
"使用 'ann_date' 作为财务数据生效日期。\n",
"警告: 从 financial_data_subset 中移除了 366 行,因为其 'ts_code' 或 'ann_date' 列存在空值。\n",
"警告: 从 financial_data_subset 中移除了 366 行,因为其 'ts_code' 或 'ann_date' 列存在空值。\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"开始计算因子: AR, BR (原地修改)...\n",
"因子 AR, BR 计算成功。\n",
"因子 AR, BR 计算流程结束。\n",
@@ -495,7 +508,7 @@
"错误: 计算日级别行业动量基准需要以下列: ['pct_chg', 'cat_l2_code', 'trade_date', 'ts_code']。\n",
"错误: 计算日级别行业偏离度需要以下列: ['pct_chg', 'daily_industry_positive_benchmark', 'daily_industry_negative_benchmark']。请先运行 daily_industry_momentum_benchmark(df)。\n",
"Index(['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol',\n",
" 'pct_chg', 'turnover_rate', 'pe_ttm', 'circ_mv', 'total_mv',\n",
" 'amount', 'pct_chg', 'turnover_rate', 'pe_ttm', 'circ_mv', 'total_mv',\n",
" 'volume_ratio', 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol',\n",
" 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol',\n",
" 'sell_elg_vol', 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct',\n",
@@ -592,11 +605,11 @@
"Finished cs_rank_size.\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 4539678 entries, 0 to 4539677\n",
"Columns: 180 entries, ts_code to cs_rank_size\n",
"dtypes: bool(10), datetime64[ns](1), float64(164), int32(3), object(2)\n",
"memory usage: 5.7+ GB\n",
"Columns: 181 entries, ts_code to cs_rank_size\n",
"dtypes: bool(10), datetime64[ns](1), float64(165), int32(3), object(2)\n",
"memory usage: 5.8+ GB\n",
"None\n",
"['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'pct_chg', 'turnover_rate', 'pe_ttm', 'circ_mv', 'total_mv', 'volume_ratio', 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct', 'cost_50pct', 'cost_85pct', 'cost_95pct', 'weight_avg', 'winner_rate', 'cat_l2_code', 'undist_profit_ps', 'ocfps', 'roa', 'roe', 'AR', 'BR', 'AR_BR', 'log_circ_mv', 'cashflow_to_ev_factor', 'book_to_price_ratio', 'turnover_rate_mean_5', 'variance_20', 'bbi_ratio_factor', 'daily_deviation', 'lg_elg_net_buy_vol', 'flow_lg_elg_intensity', 'sm_net_buy_vol', 'flow_divergence_diff', 'flow_divergence_ratio', 'total_buy_vol', 'lg_elg_buy_prop', 'flow_struct_buy_change', 'lg_elg_net_buy_vol_change', 'flow_lg_elg_accel', 'chip_concentration_range', 'chip_skewness', 'floating_chip_proxy', 'cost_support_15pct_change', 'cat_winner_price_zone', 'flow_chip_consistency', 'profit_taking_vs_absorb', 'cat_is_positive', 'upside_vol', 'downside_vol', 'vol_ratio', 'return_skew', 'return_kurtosis', 'volume_change_rate', 'cat_volume_breakout', 'turnover_deviation', 'cat_turnover_spike', 'avg_volume_ratio', 'cat_volume_ratio_breakout', 'vol_spike', 'vol_std_5', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'rsi_3', 'return_5', 'return_20', 'std_return_5', 'std_return_90', 'std_return_90_2', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4', 'rank_act_factor1', 'rank_act_factor2', 'rank_act_factor3', 'cov', 'delta_cov', 'alpha_22_improved', 'alpha_003', 'alpha_007', 'alpha_013', 'vol_break', 'weight_roc5', 'price_cost_divergence', 'smallcap_concentration', 'cost_stability', 'high_cost_break_days', 'liquidity_risk', 'turnover_std', 'mv_volatility', 'volume_growth', 'mv_growth', 'momentum_factor', 'resonance_factor', 'log_close', 'cat_vol_spike', 'up', 'down', 'obv_maobv_6', 'std_return_5_over_std_return_90', 'std_return_90_minus_std_return_90_2', 'cat_af2', 'cat_af3', 'cat_af4', 'act_factor5', 'act_factor6', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'buy_lg_vol_minus_sell_lg_vol', 'buy_elg_vol_minus_sell_elg_vol', 'ctrl_strength', 'low_cost_dev', 'asymmetry', 'lock_factor', 'cat_vol_break', 'cost_atr_adj', 'cat_golden_resonance', 'mv_turnover_ratio', 'mv_adjusted_volume', 'mv_weighted_turnover', 'nonlinear_mv_volume', 'mv_volume_ratio', 'mv_momentum', 'lg_flow_mom_corr_20_60', 'lg_flow_accel', 'profit_pressure', 'underwater_resistance', 'cost_conc_std_20', 'profit_decay_20', 'vol_amp_loss_20', 'vol_drop_profit_cnt_5', 'lg_flow_vol_interact_20', 'cost_break_confirm_cnt_5', 'atr_norm_channel_pos_14', 'turnover_diff_skew_20', 'lg_sm_flow_diverge_20', 'pullback_strong_20_20', 'vol_wgt_hist_pos_20', 'vol_adj_roc_20', 'cs_rank_net_lg_flow_val', 'cs_rank_flow_divergence', 'cs_rank_ind_adj_lg_flow', 'cs_rank_elg_buy_ratio', 'cs_rank_rel_profit_margin', 'cs_rank_cost_breadth', 'cs_rank_dist_to_upper_cost', 'cs_rank_winner_rate', 'cs_rank_intraday_range', 'cs_rank_close_pos_in_range', 'cs_rank_opening_gap', 'cs_rank_pos_in_hist_range', 'cs_rank_vol_x_profit_margin', 'cs_rank_lg_flow_price_concordance', 'cs_rank_turnover_per_winner', 'cs_rank_ind_cap_neutral_pe', 'cs_rank_volume_ratio', 'cs_rank_elg_buy_sell_sm_ratio', 'cs_rank_cost_dist_vol_ratio', 'cs_rank_size']\n"
"['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'amount', 'pct_chg', 'turnover_rate', 'pe_ttm', 'circ_mv', 'total_mv', 'volume_ratio', 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct', 'cost_50pct', 'cost_85pct', 'cost_95pct', 'weight_avg', 'winner_rate', 'cat_l2_code', 'undist_profit_ps', 'ocfps', 'roa', 'roe', 'AR', 'BR', 'AR_BR', 'log_circ_mv', 'cashflow_to_ev_factor', 'book_to_price_ratio', 'turnover_rate_mean_5', 'variance_20', 'bbi_ratio_factor', 'daily_deviation', 'lg_elg_net_buy_vol', 'flow_lg_elg_intensity', 'sm_net_buy_vol', 'flow_divergence_diff', 'flow_divergence_ratio', 'total_buy_vol', 'lg_elg_buy_prop', 'flow_struct_buy_change', 'lg_elg_net_buy_vol_change', 'flow_lg_elg_accel', 'chip_concentration_range', 'chip_skewness', 'floating_chip_proxy', 'cost_support_15pct_change', 'cat_winner_price_zone', 'flow_chip_consistency', 'profit_taking_vs_absorb', 'cat_is_positive', 'upside_vol', 'downside_vol', 'vol_ratio', 'return_skew', 'return_kurtosis', 'volume_change_rate', 'cat_volume_breakout', 'turnover_deviation', 'cat_turnover_spike', 'avg_volume_ratio', 'cat_volume_ratio_breakout', 'vol_spike', 'vol_std_5', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'rsi_3', 'return_5', 'return_20', 'std_return_5', 'std_return_90', 'std_return_90_2', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4', 'rank_act_factor1', 'rank_act_factor2', 'rank_act_factor3', 'cov', 'delta_cov', 'alpha_22_improved', 'alpha_003', 'alpha_007', 'alpha_013', 'vol_break', 'weight_roc5', 'price_cost_divergence', 'smallcap_concentration', 'cost_stability', 'high_cost_break_days', 'liquidity_risk', 'turnover_std', 'mv_volatility', 'volume_growth', 'mv_growth', 'momentum_factor', 'resonance_factor', 'log_close', 'cat_vol_spike', 'up', 'down', 'obv_maobv_6', 'std_return_5_over_std_return_90', 'std_return_90_minus_std_return_90_2', 'cat_af2', 'cat_af3', 'cat_af4', 'act_factor5', 'act_factor6', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'buy_lg_vol_minus_sell_lg_vol', 'buy_elg_vol_minus_sell_elg_vol', 'ctrl_strength', 'low_cost_dev', 'asymmetry', 'lock_factor', 'cat_vol_break', 'cost_atr_adj', 'cat_golden_resonance', 'mv_turnover_ratio', 'mv_adjusted_volume', 'mv_weighted_turnover', 'nonlinear_mv_volume', 'mv_volume_ratio', 'mv_momentum', 'lg_flow_mom_corr_20_60', 'lg_flow_accel', 'profit_pressure', 'underwater_resistance', 'cost_conc_std_20', 'profit_decay_20', 'vol_amp_loss_20', 'vol_drop_profit_cnt_5', 'lg_flow_vol_interact_20', 'cost_break_confirm_cnt_5', 'atr_norm_channel_pos_14', 'turnover_diff_skew_20', 'lg_sm_flow_diverge_20', 'pullback_strong_20_20', 'vol_wgt_hist_pos_20', 'vol_adj_roc_20', 'cs_rank_net_lg_flow_val', 'cs_rank_flow_divergence', 'cs_rank_ind_adj_lg_flow', 'cs_rank_elg_buy_ratio', 'cs_rank_rel_profit_margin', 'cs_rank_cost_breadth', 'cs_rank_dist_to_upper_cost', 'cs_rank_winner_rate', 'cs_rank_intraday_range', 'cs_rank_close_pos_in_range', 'cs_rank_opening_gap', 'cs_rank_pos_in_hist_range', 'cs_rank_vol_x_profit_margin', 'cs_rank_lg_flow_price_concordance', 'cs_rank_turnover_per_winner', 'cs_rank_ind_cap_neutral_pe', 'cs_rank_volume_ratio', 'cs_rank_elg_buy_sell_sm_ratio', 'cs_rank_cost_dist_vol_ratio', 'cs_rank_size']\n"
]
}
],
@@ -629,6 +642,11 @@
"# df = cat_reason(df, top_list_df)\n",
"# df = cat_is_on_top_list(df, top_list_df)\n",
"\n",
"# df = ts_turnover_rate_acceleration_5_20(df)\n",
"# df = ts_vol_sustain_10_30(df)\n",
"# df = cs_turnover_rate_relative_strength_20(df)\n",
"# df = cs_amount_outlier_10(df)\n",
"\n",
"df = add_financial_factor(df, fina_indicator_df, factor_value_col='undist_profit_ps')\n",
"df = add_financial_factor(df, fina_indicator_df, factor_value_col='ocfps')\n",
"df = add_financial_factor(df, fina_indicator_df, factor_value_col='roa')\n",
@@ -697,7 +715,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 11,
"id": "b87b938028afa206",
"metadata": {
"ExecuteTime": {
@@ -735,7 +753,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 12,
"id": "f4f16d63ad18d1bc",
"metadata": {
"ExecuteTime": {
@@ -745,7 +763,6 @@
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import statsmodels.api as sm # 用于中性化回归\n",
"from tqdm import tqdm # 可选,用于显示进度条\n",
@@ -962,7 +979,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 13,
"id": "40e6b68a91b30c79",
"metadata": {
"ExecuteTime": {
@@ -972,9 +989,6 @@
},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"\n",
"def remove_outliers_label_percentile(label: pd.Series, lower_percentile: float = 0.01, upper_percentile: float = 0.99,\n",
" log=True):\n",
" if not (0 <= lower_percentile < upper_percentile <= 1):\n",
@@ -1071,10 +1085,6 @@
" return df_standardized\n",
"\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"\n",
"def neutralize_manual_revised(df: pd.DataFrame, features: list, industry_col: str, mkt_cap_col: str) -> pd.DataFrame:\n",
" \"\"\"\n",
" 手动实现简单回归以提升速度,通过构建 Series 确保索引对齐。\n",
@@ -1289,7 +1299,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 14,
"id": "47c12bb34062ae7a",
"metadata": {
"ExecuteTime": {
@@ -1323,7 +1333,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 15,
"id": "29221dde",
"metadata": {},
"outputs": [
@@ -1366,7 +1376,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 16,
"id": "03ee5daf",
"metadata": {},
"outputs": [],
@@ -1379,7 +1389,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 17,
"id": "b76ea08a",
"metadata": {},
"outputs": [
@@ -1400,7 +1410,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"MAD Filtering: 100%|██████████| 131/131 [00:28<00:00, 4.67it/s]\n"
"MAD Filtering: 100%|██████████| 131/131 [00:28<00:00, 4.59it/s]\n"
]
},
{
@@ -1415,7 +1425,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"MAD Filtering: 100%|██████████| 131/131 [00:24<00:00, 5.43it/s]\n"
"MAD Filtering: 100%|██████████| 131/131 [00:23<00:00, 5.52it/s]\n"
]
},
{
@@ -1597,7 +1607,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 18,
"id": "3ff2d1c5",
"metadata": {},
"outputs": [],
@@ -1606,8 +1616,6 @@
"from sklearn.linear_model import LogisticRegression\n",
"import matplotlib.pyplot as plt # 保持 matplotlib 导入尽管LightGBM的绘图功能已移除\n",
"from sklearn.decomposition import PCA\n",
"import pandas as pd\n",
"import numpy as np\n",
"import datetime # 用于日期计算\n",
"from catboost import CatBoostClassifier\n",
"from catboost import Pool\n",
@@ -1740,7 +1748,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 19,
"id": "c6eb5cd4-e714-420a-ac48-39af3e11ee81",
"metadata": {
"ExecuteTime": {
@@ -1774,7 +1782,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "b132fdab733b4d4e856d4688f2997dac",
"model_id": "5119f1526efe46b58c2c2c978ebdf1f1",
"version_major": 2,
"version_minor": 0
},
@@ -1789,8 +1797,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"0:\tlearn: 0.6887138\ttest: 0.6894516\tbest: 0.6894516 (0)\ttotal: 328ms\tremaining: 8m 12s\n",
"bestTest = 0.5217666894\n",
"0:\tlearn: 0.6887139\ttest: 0.6894516\tbest: 0.6894516 (0)\ttotal: 271ms\tremaining: 6m 46s\n",
"bestTest = 0.5217666495\n",
"bestIteration = 487\n",
"Shrink model to first 488 iterations.\n"
]
@@ -1814,7 +1822,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 20,
"id": "5d1522a7538db91b",
"metadata": {
"ExecuteTime": {
@@ -1852,7 +1860,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 21,
"id": "09b1799e",
"metadata": {},
"outputs": [
@@ -1874,7 +1882,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 22,
"id": "e53b209a",
"metadata": {},
"outputs": [
@@ -1907,12 +1915,11 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 23,
"id": "364e821a",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
@@ -1992,7 +1999,7 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 24,
"id": "1f6e6336",
"metadata": {},
"outputs": [
@@ -2061,7 +2068,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 25,
"id": "7e9023cc",
"metadata": {},
"outputs": [],
@@ -2261,7 +2268,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 26,
"id": "a0000d75",
"metadata": {},
"outputs": [
@@ -2320,8 +2327,6 @@
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from scipy.stats import spearmanr\n",
@@ -2509,7 +2514,7 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 27,
"id": "a436dba4",
"metadata": {},
"outputs": [