feat(factors): 新增筹码集中度相关因子并优化训练框架
- 添加 19 个筹码分布和胜率相关因子(包括chip_dispersion、winner_rate等系列) - LightGBM模型添加早停和训练指标记录功能 - 统一Label配置到common.py模块 - 新增list_factors.py因子列表脚本
This commit is contained in:
@@ -48,6 +48,7 @@ class LightGBMModel(BaseModel):
|
||||
self.params = dict(params) if params is not None else {}
|
||||
self.model = None
|
||||
self.feature_names_: Optional[list] = None
|
||||
self.evals_result_: Optional[dict] = None
|
||||
|
||||
def fit(
|
||||
self,
|
||||
@@ -90,14 +91,23 @@ class LightGBMModel(BaseModel):
|
||||
y_val_np = y_val.to_numpy()
|
||||
valid_sets = lgb.Dataset(X_val_np, label=y_val_np, reference=train_data)
|
||||
|
||||
# 从 params 中提取 num_boost_round,默认 100
|
||||
num_boost_round = self.params.pop("n_estimators", 100)
|
||||
# 从 params 中提取训练控制参数
|
||||
params_copy = dict(self.params)
|
||||
num_boost_round = params_copy.pop("n_estimators", 100)
|
||||
early_stopping_round = params_copy.pop("early_stopping_round", 50)
|
||||
|
||||
self.evals_result_ = {}
|
||||
callbacks = [
|
||||
lgb.early_stopping(stopping_rounds=early_stopping_round),
|
||||
lgb.record_evaluation(self.evals_result_),
|
||||
]
|
||||
|
||||
self.model = lgb.train(
|
||||
self.params,
|
||||
params_copy,
|
||||
train_data,
|
||||
num_boost_round=num_boost_round,
|
||||
valid_sets=[valid_sets] if valid_sets else None,
|
||||
callbacks=callbacks,
|
||||
)
|
||||
|
||||
return self
|
||||
@@ -121,6 +131,34 @@ class LightGBMModel(BaseModel):
|
||||
result = self.model.predict(X_np)
|
||||
return np.asarray(result)
|
||||
|
||||
def get_evals_result(self) -> Optional[dict]:
|
||||
"""获取训练评估结果
|
||||
|
||||
Returns:
|
||||
评估结果字典,如果模型尚未训练返回 None
|
||||
"""
|
||||
return self.evals_result_
|
||||
|
||||
def get_best_iteration(self) -> Optional[int]:
|
||||
"""获取最佳迭代轮数(考虑早停)
|
||||
|
||||
Returns:
|
||||
最佳迭代轮数,如果模型未训练返回 None
|
||||
"""
|
||||
if self.model is None:
|
||||
return None
|
||||
return self.model.best_iteration
|
||||
|
||||
def get_best_score(self) -> Optional[dict]:
|
||||
"""获取最佳评分
|
||||
|
||||
Returns:
|
||||
最佳评分字典,如果模型未训练返回 None
|
||||
"""
|
||||
if self.model is None:
|
||||
return None
|
||||
return self.model.best_score
|
||||
|
||||
def feature_importance(self) -> Optional[pd.Series]:
|
||||
"""返回特征重要性
|
||||
|
||||
|
||||
@@ -84,7 +84,7 @@ class ResultAnalyzer:
|
||||
print("\n" + "-" * 80)
|
||||
print(f"[警告] 贡献为0的特征(共 {len(zero_importance_features)} 个):")
|
||||
for i, feature in enumerate(zero_importance_features, 1):
|
||||
print(f" {i}. {feature}")
|
||||
print(f"'{feature}',")
|
||||
|
||||
# 统计摘要
|
||||
print("\n" + "=" * 80)
|
||||
|
||||
@@ -189,10 +189,14 @@ class RankTask(BaseTask):
|
||||
|
||||
def plot_training_metrics(self) -> None:
|
||||
"""绘制训练指标曲线(NDCG)"""
|
||||
if self.model and hasattr(self.model, "model") and self.model.model:
|
||||
if self.model and hasattr(self.model, "get_evals_result"):
|
||||
try:
|
||||
import lightgbm as lgb
|
||||
|
||||
lgb.plot_metric(self.model.model)
|
||||
evals_result = self.model.get_evals_result()
|
||||
if evals_result:
|
||||
lgb.plot_metric(evals_result)
|
||||
else:
|
||||
print("[警告] 没有训练指标数据可供绘制")
|
||||
except Exception as e:
|
||||
print(f"[警告] 无法绘制训练曲线: {e}")
|
||||
|
||||
@@ -77,10 +77,14 @@ class RegressionTask(BaseTask):
|
||||
|
||||
def plot_training_metrics(self) -> None:
|
||||
"""绘制训练指标曲线"""
|
||||
if self.model and hasattr(self.model, "model") and self.model.model:
|
||||
if self.model and hasattr(self.model, "get_evals_result"):
|
||||
try:
|
||||
import lightgbm as lgb
|
||||
|
||||
lgb.plot_metric(self.model.model)
|
||||
evals_result = self.model.get_evals_result()
|
||||
if evals_result:
|
||||
lgb.plot_metric(evals_result)
|
||||
else:
|
||||
print("[警告] 没有训练指标数据可供绘制")
|
||||
except Exception as e:
|
||||
print(f"[警告] 无法绘制训练曲线: {e}")
|
||||
|
||||
Reference in New Issue
Block a user