Classify2-2025-5-28

This commit is contained in:
liaozhaorun
2025-05-28 14:16:04 +08:00
parent bf86fd9415
commit cecbef02f6
19 changed files with 4335 additions and 4159 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -39,12 +39,24 @@
"\n",
"import warnings\n",
"\n",
"warnings.filterwarnings(\"ignore\")"
"warnings.filterwarnings(\"ignore\")\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "4a481c60",
"metadata": {},
"outputs": [],
"source": [
"# 设置使用核心\n",
"import os\n",
"os.environ[\"MODIN_CPUS\"] = \"4\"\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "a79cafb06a7e0e43",
"metadata": {
"ExecuteTime": {
@@ -58,13 +70,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"daily data\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"daily data\n",
"daily basic\n",
"inner merge on ['ts_code', 'trade_date']\n",
"stk limit\n",
@@ -75,7 +81,7 @@
"left merge on ['ts_code', 'trade_date']\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 8665405 entries, 0 to 8665404\n",
"Data columns (total 32 columns):\n",
"Data columns (total 33 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object \n",
@@ -85,33 +91,34 @@
" 4 high float64 \n",
" 5 low float64 \n",
" 6 vol float64 \n",
" 7 pct_chg float64 \n",
" 8 turnover_rate float64 \n",
" 9 pe_ttm float64 \n",
" 10 circ_mv float64 \n",
" 11 total_mv float64 \n",
" 12 volume_ratio float64 \n",
" 13 is_st bool \n",
" 14 up_limit float64 \n",
" 15 down_limit float64 \n",
" 16 buy_sm_vol float64 \n",
" 17 sell_sm_vol float64 \n",
" 18 buy_lg_vol float64 \n",
" 19 sell_lg_vol float64 \n",
" 20 buy_elg_vol float64 \n",
" 21 sell_elg_vol float64 \n",
" 22 net_mf_vol float64 \n",
" 23 his_low float64 \n",
" 24 his_high float64 \n",
" 25 cost_5pct float64 \n",
" 26 cost_15pct float64 \n",
" 27 cost_50pct float64 \n",
" 28 cost_85pct float64 \n",
" 29 cost_95pct float64 \n",
" 30 weight_avg float64 \n",
" 31 winner_rate float64 \n",
"dtypes: bool(1), datetime64[ns](1), float64(29), object(1)\n",
"memory usage: 2.0+ GB\n",
" 7 amount float64 \n",
" 8 pct_chg float64 \n",
" 9 turnover_rate float64 \n",
" 10 pe_ttm float64 \n",
" 11 circ_mv float64 \n",
" 12 total_mv float64 \n",
" 13 volume_ratio float64 \n",
" 14 is_st bool \n",
" 15 up_limit float64 \n",
" 16 down_limit float64 \n",
" 17 buy_sm_vol float64 \n",
" 18 sell_sm_vol float64 \n",
" 19 buy_lg_vol float64 \n",
" 20 sell_lg_vol float64 \n",
" 21 buy_elg_vol float64 \n",
" 22 sell_elg_vol float64 \n",
" 23 net_mf_vol float64 \n",
" 24 his_low float64 \n",
" 25 his_high float64 \n",
" 26 cost_5pct float64 \n",
" 27 cost_15pct float64 \n",
" 28 cost_50pct float64 \n",
" 29 cost_85pct float64 \n",
" 30 cost_95pct float64 \n",
" 31 weight_avg float64 \n",
" 32 winner_rate float64 \n",
"dtypes: bool(1), datetime64[ns](1), float64(30), object(1)\n",
"memory usage: 2.1+ GB\n",
"None\n"
]
}
@@ -121,7 +128,7 @@
"\n",
"print('daily data')\n",
"df = read_and_merge_h5_data('../../data/daily_data.h5', key='daily_data',\n",
" columns=['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'pct_chg'],\n",
" columns=['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'amount', 'pct_chg'],\n",
" df=None)\n",
"\n",
"print('daily basic')\n",
@@ -149,7 +156,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"id": "cac01788dac10678",
"metadata": {
"ExecuteTime": {
@@ -217,7 +224,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 5,
"id": "c4e9e1d31da6dba6",
"metadata": {
"ExecuteTime": {
@@ -317,7 +324,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 6,
"id": "a735bc02ceb4d872",
"metadata": {
"ExecuteTime": {
@@ -333,7 +340,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 7,
"id": "53f86ddc0677a6d7",
"metadata": {
"ExecuteTime": {
@@ -400,7 +407,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 8,
"id": "dbe2fd8021b9417f",
"metadata": {
"ExecuteTime": {
@@ -413,7 +420,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"['ts_code', 'open', 'close', 'high', 'low', 'circ_mv', 'total_mv', 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct', 'cost_50pct', 'cost_85pct', 'cost_95pct', 'weight_avg', 'in_date']\n"
"['ts_code', 'open', 'close', 'high', 'low', 'amount', 'circ_mv', 'total_mv', 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct', 'cost_50pct', 'cost_85pct', 'cost_95pct', 'weight_avg', 'in_date']\n"
]
}
],
@@ -428,7 +435,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"id": "85c3e3d0235ffffa",
"metadata": {
"ExecuteTime": {
@@ -456,7 +463,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 10,
"id": "92d84ce15a562ec6",
"metadata": {
"ExecuteTime": {
@@ -476,7 +483,13 @@
"使用 'ann_date' 作为财务数据生效日期。\n",
"警告: 从 financial_data_subset 中移除了 366 行,因为其 'ts_code' 或 'ann_date' 列存在空值。\n",
"使用 'ann_date' 作为财务数据生效日期。\n",
"警告: 从 financial_data_subset 中移除了 366 行,因为其 'ts_code' 或 'ann_date' 列存在空值。\n",
"警告: 从 financial_data_subset 中移除了 366 行,因为其 'ts_code' 或 'ann_date' 列存在空值。\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"开始计算因子: AR, BR (原地修改)...\n",
"因子 AR, BR 计算成功。\n",
"因子 AR, BR 计算流程结束。\n",
@@ -495,7 +508,7 @@
"错误: 计算日级别行业动量基准需要以下列: ['pct_chg', 'cat_l2_code', 'trade_date', 'ts_code']。\n",
"错误: 计算日级别行业偏离度需要以下列: ['pct_chg', 'daily_industry_positive_benchmark', 'daily_industry_negative_benchmark']。请先运行 daily_industry_momentum_benchmark(df)。\n",
"Index(['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol',\n",
" 'pct_chg', 'turnover_rate', 'pe_ttm', 'circ_mv', 'total_mv',\n",
" 'amount', 'pct_chg', 'turnover_rate', 'pe_ttm', 'circ_mv', 'total_mv',\n",
" 'volume_ratio', 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol',\n",
" 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol',\n",
" 'sell_elg_vol', 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct',\n",
@@ -592,11 +605,11 @@
"Finished cs_rank_size.\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 4539678 entries, 0 to 4539677\n",
"Columns: 180 entries, ts_code to cs_rank_size\n",
"dtypes: bool(10), datetime64[ns](1), float64(164), int32(3), object(2)\n",
"memory usage: 5.7+ GB\n",
"Columns: 181 entries, ts_code to cs_rank_size\n",
"dtypes: bool(10), datetime64[ns](1), float64(165), int32(3), object(2)\n",
"memory usage: 5.8+ GB\n",
"None\n",
"['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'pct_chg', 'turnover_rate', 'pe_ttm', 'circ_mv', 'total_mv', 'volume_ratio', 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct', 'cost_50pct', 'cost_85pct', 'cost_95pct', 'weight_avg', 'winner_rate', 'cat_l2_code', 'undist_profit_ps', 'ocfps', 'roa', 'roe', 'AR', 'BR', 'AR_BR', 'log_circ_mv', 'cashflow_to_ev_factor', 'book_to_price_ratio', 'turnover_rate_mean_5', 'variance_20', 'bbi_ratio_factor', 'daily_deviation', 'lg_elg_net_buy_vol', 'flow_lg_elg_intensity', 'sm_net_buy_vol', 'flow_divergence_diff', 'flow_divergence_ratio', 'total_buy_vol', 'lg_elg_buy_prop', 'flow_struct_buy_change', 'lg_elg_net_buy_vol_change', 'flow_lg_elg_accel', 'chip_concentration_range', 'chip_skewness', 'floating_chip_proxy', 'cost_support_15pct_change', 'cat_winner_price_zone', 'flow_chip_consistency', 'profit_taking_vs_absorb', 'cat_is_positive', 'upside_vol', 'downside_vol', 'vol_ratio', 'return_skew', 'return_kurtosis', 'volume_change_rate', 'cat_volume_breakout', 'turnover_deviation', 'cat_turnover_spike', 'avg_volume_ratio', 'cat_volume_ratio_breakout', 'vol_spike', 'vol_std_5', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'rsi_3', 'return_5', 'return_20', 'std_return_5', 'std_return_90', 'std_return_90_2', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4', 'rank_act_factor1', 'rank_act_factor2', 'rank_act_factor3', 'cov', 'delta_cov', 'alpha_22_improved', 'alpha_003', 'alpha_007', 'alpha_013', 'vol_break', 'weight_roc5', 'price_cost_divergence', 'smallcap_concentration', 'cost_stability', 'high_cost_break_days', 'liquidity_risk', 'turnover_std', 'mv_volatility', 'volume_growth', 'mv_growth', 'momentum_factor', 'resonance_factor', 'log_close', 'cat_vol_spike', 'up', 'down', 'obv_maobv_6', 'std_return_5_over_std_return_90', 'std_return_90_minus_std_return_90_2', 'cat_af2', 'cat_af3', 'cat_af4', 'act_factor5', 'act_factor6', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'buy_lg_vol_minus_sell_lg_vol', 'buy_elg_vol_minus_sell_elg_vol', 'ctrl_strength', 'low_cost_dev', 'asymmetry', 'lock_factor', 'cat_vol_break', 'cost_atr_adj', 'cat_golden_resonance', 'mv_turnover_ratio', 'mv_adjusted_volume', 'mv_weighted_turnover', 'nonlinear_mv_volume', 'mv_volume_ratio', 'mv_momentum', 'lg_flow_mom_corr_20_60', 'lg_flow_accel', 'profit_pressure', 'underwater_resistance', 'cost_conc_std_20', 'profit_decay_20', 'vol_amp_loss_20', 'vol_drop_profit_cnt_5', 'lg_flow_vol_interact_20', 'cost_break_confirm_cnt_5', 'atr_norm_channel_pos_14', 'turnover_diff_skew_20', 'lg_sm_flow_diverge_20', 'pullback_strong_20_20', 'vol_wgt_hist_pos_20', 'vol_adj_roc_20', 'cs_rank_net_lg_flow_val', 'cs_rank_flow_divergence', 'cs_rank_ind_adj_lg_flow', 'cs_rank_elg_buy_ratio', 'cs_rank_rel_profit_margin', 'cs_rank_cost_breadth', 'cs_rank_dist_to_upper_cost', 'cs_rank_winner_rate', 'cs_rank_intraday_range', 'cs_rank_close_pos_in_range', 'cs_rank_opening_gap', 'cs_rank_pos_in_hist_range', 'cs_rank_vol_x_profit_margin', 'cs_rank_lg_flow_price_concordance', 'cs_rank_turnover_per_winner', 'cs_rank_ind_cap_neutral_pe', 'cs_rank_volume_ratio', 'cs_rank_elg_buy_sell_sm_ratio', 'cs_rank_cost_dist_vol_ratio', 'cs_rank_size']\n"
"['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'amount', 'pct_chg', 'turnover_rate', 'pe_ttm', 'circ_mv', 'total_mv', 'volume_ratio', 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct', 'cost_50pct', 'cost_85pct', 'cost_95pct', 'weight_avg', 'winner_rate', 'cat_l2_code', 'undist_profit_ps', 'ocfps', 'roa', 'roe', 'AR', 'BR', 'AR_BR', 'log_circ_mv', 'cashflow_to_ev_factor', 'book_to_price_ratio', 'turnover_rate_mean_5', 'variance_20', 'bbi_ratio_factor', 'daily_deviation', 'lg_elg_net_buy_vol', 'flow_lg_elg_intensity', 'sm_net_buy_vol', 'flow_divergence_diff', 'flow_divergence_ratio', 'total_buy_vol', 'lg_elg_buy_prop', 'flow_struct_buy_change', 'lg_elg_net_buy_vol_change', 'flow_lg_elg_accel', 'chip_concentration_range', 'chip_skewness', 'floating_chip_proxy', 'cost_support_15pct_change', 'cat_winner_price_zone', 'flow_chip_consistency', 'profit_taking_vs_absorb', 'cat_is_positive', 'upside_vol', 'downside_vol', 'vol_ratio', 'return_skew', 'return_kurtosis', 'volume_change_rate', 'cat_volume_breakout', 'turnover_deviation', 'cat_turnover_spike', 'avg_volume_ratio', 'cat_volume_ratio_breakout', 'vol_spike', 'vol_std_5', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'rsi_3', 'return_5', 'return_20', 'std_return_5', 'std_return_90', 'std_return_90_2', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4', 'rank_act_factor1', 'rank_act_factor2', 'rank_act_factor3', 'cov', 'delta_cov', 'alpha_22_improved', 'alpha_003', 'alpha_007', 'alpha_013', 'vol_break', 'weight_roc5', 'price_cost_divergence', 'smallcap_concentration', 'cost_stability', 'high_cost_break_days', 'liquidity_risk', 'turnover_std', 'mv_volatility', 'volume_growth', 'mv_growth', 'momentum_factor', 'resonance_factor', 'log_close', 'cat_vol_spike', 'up', 'down', 'obv_maobv_6', 'std_return_5_over_std_return_90', 'std_return_90_minus_std_return_90_2', 'cat_af2', 'cat_af3', 'cat_af4', 'act_factor5', 'act_factor6', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'buy_lg_vol_minus_sell_lg_vol', 'buy_elg_vol_minus_sell_elg_vol', 'ctrl_strength', 'low_cost_dev', 'asymmetry', 'lock_factor', 'cat_vol_break', 'cost_atr_adj', 'cat_golden_resonance', 'mv_turnover_ratio', 'mv_adjusted_volume', 'mv_weighted_turnover', 'nonlinear_mv_volume', 'mv_volume_ratio', 'mv_momentum', 'lg_flow_mom_corr_20_60', 'lg_flow_accel', 'profit_pressure', 'underwater_resistance', 'cost_conc_std_20', 'profit_decay_20', 'vol_amp_loss_20', 'vol_drop_profit_cnt_5', 'lg_flow_vol_interact_20', 'cost_break_confirm_cnt_5', 'atr_norm_channel_pos_14', 'turnover_diff_skew_20', 'lg_sm_flow_diverge_20', 'pullback_strong_20_20', 'vol_wgt_hist_pos_20', 'vol_adj_roc_20', 'cs_rank_net_lg_flow_val', 'cs_rank_flow_divergence', 'cs_rank_ind_adj_lg_flow', 'cs_rank_elg_buy_ratio', 'cs_rank_rel_profit_margin', 'cs_rank_cost_breadth', 'cs_rank_dist_to_upper_cost', 'cs_rank_winner_rate', 'cs_rank_intraday_range', 'cs_rank_close_pos_in_range', 'cs_rank_opening_gap', 'cs_rank_pos_in_hist_range', 'cs_rank_vol_x_profit_margin', 'cs_rank_lg_flow_price_concordance', 'cs_rank_turnover_per_winner', 'cs_rank_ind_cap_neutral_pe', 'cs_rank_volume_ratio', 'cs_rank_elg_buy_sell_sm_ratio', 'cs_rank_cost_dist_vol_ratio', 'cs_rank_size']\n"
]
}
],
@@ -629,6 +642,11 @@
"# df = cat_reason(df, top_list_df)\n",
"# df = cat_is_on_top_list(df, top_list_df)\n",
"\n",
"# df = ts_turnover_rate_acceleration_5_20(df)\n",
"# df = ts_vol_sustain_10_30(df)\n",
"# df = cs_turnover_rate_relative_strength_20(df)\n",
"# df = cs_amount_outlier_10(df)\n",
"\n",
"df = add_financial_factor(df, fina_indicator_df, factor_value_col='undist_profit_ps')\n",
"df = add_financial_factor(df, fina_indicator_df, factor_value_col='ocfps')\n",
"df = add_financial_factor(df, fina_indicator_df, factor_value_col='roa')\n",
@@ -697,7 +715,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 11,
"id": "b87b938028afa206",
"metadata": {
"ExecuteTime": {
@@ -735,7 +753,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 12,
"id": "f4f16d63ad18d1bc",
"metadata": {
"ExecuteTime": {
@@ -745,7 +763,6 @@
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import statsmodels.api as sm # 用于中性化回归\n",
"from tqdm import tqdm # 可选,用于显示进度条\n",
@@ -962,7 +979,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 13,
"id": "40e6b68a91b30c79",
"metadata": {
"ExecuteTime": {
@@ -972,9 +989,6 @@
},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"\n",
"def remove_outliers_label_percentile(label: pd.Series, lower_percentile: float = 0.01, upper_percentile: float = 0.99,\n",
" log=True):\n",
" if not (0 <= lower_percentile < upper_percentile <= 1):\n",
@@ -1071,10 +1085,6 @@
" return df_standardized\n",
"\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"\n",
"def neutralize_manual_revised(df: pd.DataFrame, features: list, industry_col: str, mkt_cap_col: str) -> pd.DataFrame:\n",
" \"\"\"\n",
" 手动实现简单回归以提升速度,通过构建 Series 确保索引对齐。\n",
@@ -1289,7 +1299,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 14,
"id": "47c12bb34062ae7a",
"metadata": {
"ExecuteTime": {
@@ -1323,7 +1333,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 15,
"id": "29221dde",
"metadata": {},
"outputs": [
@@ -1366,7 +1376,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 16,
"id": "03ee5daf",
"metadata": {},
"outputs": [],
@@ -1379,7 +1389,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 17,
"id": "b76ea08a",
"metadata": {},
"outputs": [
@@ -1400,7 +1410,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"MAD Filtering: 100%|██████████| 131/131 [00:28<00:00, 4.67it/s]\n"
"MAD Filtering: 100%|██████████| 131/131 [00:28<00:00, 4.59it/s]\n"
]
},
{
@@ -1415,7 +1425,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"MAD Filtering: 100%|██████████| 131/131 [00:24<00:00, 5.43it/s]\n"
"MAD Filtering: 100%|██████████| 131/131 [00:23<00:00, 5.52it/s]\n"
]
},
{
@@ -1597,7 +1607,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 18,
"id": "3ff2d1c5",
"metadata": {},
"outputs": [],
@@ -1606,8 +1616,6 @@
"from sklearn.linear_model import LogisticRegression\n",
"import matplotlib.pyplot as plt # 保持 matplotlib 导入尽管LightGBM的绘图功能已移除\n",
"from sklearn.decomposition import PCA\n",
"import pandas as pd\n",
"import numpy as np\n",
"import datetime # 用于日期计算\n",
"from catboost import CatBoostClassifier\n",
"from catboost import Pool\n",
@@ -1740,7 +1748,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 19,
"id": "c6eb5cd4-e714-420a-ac48-39af3e11ee81",
"metadata": {
"ExecuteTime": {
@@ -1774,7 +1782,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "b132fdab733b4d4e856d4688f2997dac",
"model_id": "5119f1526efe46b58c2c2c978ebdf1f1",
"version_major": 2,
"version_minor": 0
},
@@ -1789,8 +1797,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"0:\tlearn: 0.6887138\ttest: 0.6894516\tbest: 0.6894516 (0)\ttotal: 328ms\tremaining: 8m 12s\n",
"bestTest = 0.5217666894\n",
"0:\tlearn: 0.6887139\ttest: 0.6894516\tbest: 0.6894516 (0)\ttotal: 271ms\tremaining: 6m 46s\n",
"bestTest = 0.5217666495\n",
"bestIteration = 487\n",
"Shrink model to first 488 iterations.\n"
]
@@ -1814,7 +1822,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 20,
"id": "5d1522a7538db91b",
"metadata": {
"ExecuteTime": {
@@ -1852,7 +1860,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 21,
"id": "09b1799e",
"metadata": {},
"outputs": [
@@ -1874,7 +1882,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 22,
"id": "e53b209a",
"metadata": {},
"outputs": [
@@ -1907,12 +1915,11 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 23,
"id": "364e821a",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
@@ -1992,7 +1999,7 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 24,
"id": "1f6e6336",
"metadata": {},
"outputs": [
@@ -2061,7 +2068,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 25,
"id": "7e9023cc",
"metadata": {},
"outputs": [],
@@ -2261,7 +2268,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 26,
"id": "a0000d75",
"metadata": {},
"outputs": [
@@ -2320,8 +2327,6 @@
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from scipy.stats import spearmanr\n",
@@ -2509,7 +2514,7 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 27,
"id": "a436dba4",
"metadata": {},
"outputs": [

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff