docs(AGENTS): 新增AI行为准则规范
- 添加代码存放位置规则,强制代码存放于 src/ 或 tests/ 目录 - 添加 Tests 目录代码运行规则,强制使用 pytest 运行测试代码 - 更新 learn_to_rank 实验代码:调整因子列表和处理器配置 - 修复 schema_cache 表结构缓存逻辑
This commit is contained in:
@@ -30,6 +30,7 @@
|
||||
" Trainer,\n",
|
||||
" Winsorizer,\n",
|
||||
" NullFiller,\n",
|
||||
" check_data_quality,\n",
|
||||
")\n",
|
||||
"from src.training.config import TrainingConfig\n",
|
||||
"\n"
|
||||
@@ -46,13 +47,13 @@
|
||||
"outputs": [],
|
||||
"execution_count": null,
|
||||
"source": [
|
||||
"def create_factors_with_metadata(\n",
|
||||
"def register_factors(\n",
|
||||
" engine: FactorEngine,\n",
|
||||
" selected_factors: List[str],\n",
|
||||
" factor_definitions: dict,\n",
|
||||
" label_factor: dict,\n",
|
||||
") -> List[str]:\n",
|
||||
" \"\"\"注册因子(SELECTED_FACTORS 从 metadata 查询,FACTOR_DEFINITIONS 用表达式注册)\"\"\"\n",
|
||||
" \"\"\"注册因子(selected_factors 从 metadata 查询,factor_definitions 用 DSL 表达式注册)\"\"\"\n",
|
||||
" print(\"=\" * 80)\n",
|
||||
" print(\"注册因子\")\n",
|
||||
" print(\"=\" * 80)\n",
|
||||
@@ -327,9 +328,6 @@
|
||||
" \"random_state\": 42,\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"# 数据处理器配置(新 API:需要传入 feature_cols)\n",
|
||||
"# 注意:processor 现在需要显式指定要处理的特征列\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# 股票池筛选函数\n",
|
||||
"# 使用新的 StockPoolManager API:传入自定义筛选函数和所需列/因子\n",
|
||||
@@ -409,7 +407,7 @@
|
||||
"\n",
|
||||
"# 2. 使用 metadata 定义因子\n",
|
||||
"print(\"\\n[2] 定义因子(从 metadata 注册)\")\n",
|
||||
"feature_cols = create_factors_with_metadata(\n",
|
||||
"feature_cols = register_factors(\n",
|
||||
" engine, SELECTED_FACTORS, FACTOR_DEFINITIONS, LABEL_FACTOR\n",
|
||||
")\n",
|
||||
"target_col = LABEL_NAME\n",
|
||||
@@ -434,7 +432,7 @@
|
||||
"# 5. 创建模型\n",
|
||||
"model = LightGBMModel(params=MODEL_PARAMS)\n",
|
||||
"\n",
|
||||
"# 6. 创建数据处理器(新 API:需要传入 feature_cols)\n",
|
||||
"# 6. 创建数据处理器(使用函数返回的完整特征列表)\n",
|
||||
"processors = [\n",
|
||||
" NullFiller(feature_cols=feature_cols, strategy=\"mean\"),\n",
|
||||
" Winsorizer(feature_cols=feature_cols, lower=0.01, upper=0.99),\n",
|
||||
@@ -560,8 +558,32 @@
|
||||
"outputs": [],
|
||||
"execution_count": null,
|
||||
"source": [
|
||||
"# 步骤 3: 训练集数据处理\n",
|
||||
"print(\"\\n[步骤 3/6] 训练集数据处理\")\n",
|
||||
"# 步骤 3: 数据质量检查(必须在预处理之前)\n",
|
||||
"print(\"\\n[步骤 3/7] 数据质量检查\")\n",
|
||||
"print(\"-\" * 60)\n",
|
||||
"print(\" [说明] 此检查在 fillna 等处理之前执行,用于发现数据问题\")\n",
|
||||
"\n",
|
||||
"print(\"\\n 检查训练集...\")\n",
|
||||
"check_data_quality(train_data, feature_cols, raise_on_error=True)\n",
|
||||
"\n",
|
||||
"if \"val_data\" in locals() and val_data is not None:\n",
|
||||
" print(\"\\n 检查验证集...\")\n",
|
||||
" check_data_quality(val_data, feature_cols, raise_on_error=True)\n",
|
||||
"\n",
|
||||
"print(\"\\n 检查测试集...\")\n",
|
||||
"check_data_quality(test_data, feature_cols, raise_on_error=True)\n",
|
||||
"\n",
|
||||
"print(\" [成功] 数据质量检查通过,未发现异常\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "code",
|
||||
"outputs": [],
|
||||
"execution_count": null,
|
||||
"source": [
|
||||
"# 步骤 4: 训练集数据处理\n",
|
||||
"print(\"\\n[步骤 4/7] 训练集数据处理\")\n",
|
||||
"print(\"-\" * 60)\n",
|
||||
"fitted_processors = []\n",
|
||||
"if processors:\n",
|
||||
@@ -595,7 +617,7 @@
|
||||
"execution_count": null,
|
||||
"source": [
|
||||
"# 步骤 4: 训练模型\n",
|
||||
"print(\"\\n[步骤 4/6] 训练模型\")\n",
|
||||
"print(\"\\n[步骤 5/7] 训练模型\")\n",
|
||||
"print(\"-\" * 60)\n",
|
||||
"print(f\" 模型类型: LightGBM\")\n",
|
||||
"print(f\" 训练样本数: {len(train_data)}\")\n",
|
||||
@@ -624,7 +646,7 @@
|
||||
"execution_count": null,
|
||||
"source": [
|
||||
"# 步骤 5: 测试集数据处理\n",
|
||||
"print(\"\\n[步骤 5/6] 测试集数据处理\")\n",
|
||||
"print(\"\\n[步骤 6/7] 测试集数据处理\")\n",
|
||||
"print(\"-\" * 60)\n",
|
||||
"if processors and test_data is not train_data:\n",
|
||||
" for i, processor in enumerate(fitted_processors, 1):\n",
|
||||
@@ -647,7 +669,7 @@
|
||||
"execution_count": null,
|
||||
"source": [
|
||||
"# 步骤 6: 生成预测\n",
|
||||
"print(\"\\n[步骤 6/6] 生成预测\")\n",
|
||||
"print(\"\\n[步骤 7/7] 生成预测\")\n",
|
||||
"print(\"-\" * 60)\n",
|
||||
"X_test = test_data.select(feature_cols)\n",
|
||||
"print(f\" 测试样本数: {len(X_test)}\")\n",
|
||||
|
||||
Reference in New Issue
Block a user