feat(factors): 新增筹码集中度相关因子并优化训练框架

- 添加 19 个筹码分布和胜率相关因子(包括chip_dispersion、winner_rate等系列)
- LightGBM模型添加早停和训练指标记录功能
- 统一Label配置到common.py模块
- 新增list_factors.py因子列表脚本
This commit is contained in:
2026-03-29 01:34:58 +08:00
parent d4e0e2a0b6
commit c3d1b157e9
9 changed files with 373 additions and 246 deletions

View File

@@ -48,6 +48,7 @@ class LightGBMModel(BaseModel):
self.params = dict(params) if params is not None else {}
self.model = None
self.feature_names_: Optional[list] = None
self.evals_result_: Optional[dict] = None
def fit(
self,
@@ -90,14 +91,23 @@ class LightGBMModel(BaseModel):
y_val_np = y_val.to_numpy()
valid_sets = lgb.Dataset(X_val_np, label=y_val_np, reference=train_data)
# 从 params 中提取 num_boost_round默认 100
num_boost_round = self.params.pop("n_estimators", 100)
# 从 params 中提取训练控制参数
params_copy = dict(self.params)
num_boost_round = params_copy.pop("n_estimators", 100)
early_stopping_round = params_copy.pop("early_stopping_round", 50)
self.evals_result_ = {}
callbacks = [
lgb.early_stopping(stopping_rounds=early_stopping_round),
lgb.record_evaluation(self.evals_result_),
]
self.model = lgb.train(
self.params,
params_copy,
train_data,
num_boost_round=num_boost_round,
valid_sets=[valid_sets] if valid_sets else None,
callbacks=callbacks,
)
return self
@@ -121,6 +131,34 @@ class LightGBMModel(BaseModel):
result = self.model.predict(X_np)
return np.asarray(result)
def get_evals_result(self) -> Optional[dict]:
"""获取训练评估结果
Returns:
评估结果字典,如果模型尚未训练返回 None
"""
return self.evals_result_
def get_best_iteration(self) -> Optional[int]:
"""获取最佳迭代轮数(考虑早停)
Returns:
最佳迭代轮数,如果模型未训练返回 None
"""
if self.model is None:
return None
return self.model.best_iteration
def get_best_score(self) -> Optional[dict]:
"""获取最佳评分
Returns:
最佳评分字典,如果模型未训练返回 None
"""
if self.model is None:
return None
return self.model.best_score
def feature_importance(self) -> Optional[pd.Series]:
"""返回特征重要性

View File

@@ -84,7 +84,7 @@ class ResultAnalyzer:
print("\n" + "-" * 80)
print(f"[警告] 贡献为0的特征{len(zero_importance_features)} 个):")
for i, feature in enumerate(zero_importance_features, 1):
print(f" {i}. {feature}")
print(f"'{feature}',")
# 统计摘要
print("\n" + "=" * 80)

View File

@@ -189,10 +189,14 @@ class RankTask(BaseTask):
def plot_training_metrics(self) -> None:
"""绘制训练指标曲线NDCG"""
if self.model and hasattr(self.model, "model") and self.model.model:
if self.model and hasattr(self.model, "get_evals_result"):
try:
import lightgbm as lgb
lgb.plot_metric(self.model.model)
evals_result = self.model.get_evals_result()
if evals_result:
lgb.plot_metric(evals_result)
else:
print("[警告] 没有训练指标数据可供绘制")
except Exception as e:
print(f"[警告] 无法绘制训练曲线: {e}")

View File

@@ -77,10 +77,14 @@ class RegressionTask(BaseTask):
def plot_training_metrics(self) -> None:
"""绘制训练指标曲线"""
if self.model and hasattr(self.model, "model") and self.model.model:
if self.model and hasattr(self.model, "get_evals_result"):
try:
import lightgbm as lgb
lgb.plot_metric(self.model.model)
evals_result = self.model.get_evals_result()
if evals_result:
lgb.plot_metric(evals_result)
else:
print("[警告] 没有训练指标数据可供绘制")
except Exception as e:
print(f"[警告] 无法绘制训练曲线: {e}")