refactor(factors): 简化 add_factor API 并默认启用 metadata

- 合并 add_factor_by_name 到 add_factor,支持三种调用方式
- FactorManager 构造函数改为可选参数,使用默认路径
- FactorEngine 默认启用 metadata,无需手动配置路径
This commit is contained in:
2026-03-12 22:34:25 +08:00
parent 2bb7718dd1
commit ced7a929c3
7 changed files with 496 additions and 254 deletions

View File

@@ -47,29 +47,40 @@
"execution_count": null,
"source": [
"def create_factors_with_metadata(\n",
" engine: FactorEngine, factor_definitions: dict, label_factor: dict\n",
" engine: FactorEngine,\n",
" selected_factors: List[str],\n",
" factor_definitions: dict,\n",
" label_factor: dict,\n",
") -> List[str]:\n",
" \"\"\"使用 metadata 注册因子特征因子通过名称注册label 因子通过表达式注册)\"\"\"\n",
" \"\"\"注册因子SELECTED_FACTORS 从 metadata 查询FACTOR_DEFINITIONS 用表达式注册)\"\"\"\n",
" print(\"=\" * 80)\n",
" print(\"使用 metadata 注册因子\")\n",
" print(\"注册因子\")\n",
" print(\"=\" * 80)\n",
"\n",
" # 注册所有特征因子(通过 metadata 名称\n",
" # 注册 SELECTED_FACTORS 中的因子(已在 metadata \n",
" print(\"\\n注册特征因子从 metadata:\")\n",
" for name in factor_definitions.keys():\n",
" engine.add_factor_by_name(name)\n",
" for name in selected_factors:\n",
" engine.add_factor(name)\n",
" print(f\" - {name}\")\n",
"\n",
" # 注册 label 因子(通过表达式,因为 label 不在 metadata 中)\n",
" # 注册 FACTOR_DEFINITIONS 中的因子(通过表达式,尚未在 metadata 中)\n",
" print(\"\\n注册特征因子表达式:\")\n",
" for name, expr in factor_definitions.items():\n",
" engine.add_factor(name, expr)\n",
" print(f\" - {name}: {expr}\")\n",
"\n",
" # 注册 label 因子(通过表达式)\n",
" print(\"\\n注册 Label 因子(表达式):\")\n",
" for name, expr in label_factor.items():\n",
" engine.add_factor(name, expr)\n",
" print(f\" - {name}: {expr}\")\n",
"\n",
" # 从字典自动获取特征列\n",
" feature_cols = list(factor_definitions.keys())\n",
" # 特征列 = SELECTED_FACTORS + FACTOR_DEFINITIONS 的 keys\n",
" feature_cols = selected_factors + list(factor_definitions.keys())\n",
"\n",
" print(f\"\\n特征因子数: {len(feature_cols)}\")\n",
" print(f\" - 来自 metadata: {len(selected_factors)}\")\n",
" print(f\" - 来自表达式: {len(factor_definitions)}\")\n",
" print(f\"Label: {list(label_factor.keys())[0]}\")\n",
" print(f\"已注册因子总数: {len(engine.list_registered())}\")\n",
"\n",
@@ -123,7 +134,67 @@
"# 特征因子定义字典:新增因子只需在此处添加一行\n",
"LABEL_NAME = \"future_return_5\"\n",
"\n",
"FACTOR_DEFINITIONS = FACTOR_DICT = {\n",
"# 当前选择的因子列表(从 FACTOR_DEFINITIONS 中选择要使用的因子)\n",
"SELECTED_FACTORS = [\n",
" # ================= 1. 价格、趋势与路径依赖 =================\n",
" \"ma_5\",\n",
" \"ma_20\",\n",
" \"ma_ratio_5_20\",\n",
" \"bias_10\",\n",
" \"high_low_ratio\",\n",
" \"bbi_ratio\",\n",
" \"return_5\",\n",
" \"return_20\",\n",
" \"kaufman_ER_20\",\n",
" \"mom_acceleration_10_20\",\n",
" \"drawdown_from_high_60\",\n",
" \"up_days_ratio_20\",\n",
" # ================= 2. 波动率、风险调整与高阶矩 =================\n",
" \"volatility_5\",\n",
" \"volatility_20\",\n",
" \"volatility_ratio\",\n",
" \"std_return_20\",\n",
" \"sharpe_ratio_20\",\n",
" \"min_ret_20\",\n",
" \"volatility_squeeze_5_60\",\n",
" # ================= 3. 日内微观结构与异象 =================\n",
" \"overnight_intraday_diff\",\n",
" \"upper_shadow_ratio\",\n",
" \"capital_retention_20\",\n",
" \"max_ret_20\",\n",
" # ================= 4. 量能、流动性与量价背离 =================\n",
" \"volume_ratio_5_20\",\n",
" \"turnover_rate_mean_5\",\n",
" \"turnover_deviation\",\n",
" \"amihud_illiq_20\",\n",
" \"turnover_cv_20\",\n",
" \"pv_corr_20\",\n",
" \"close_vwap_deviation\",\n",
" # ================= 5. 基本面财务特征 =================\n",
" \"roe\",\n",
" \"roa\",\n",
" \"profit_margin\",\n",
" \"debt_to_equity\",\n",
" \"current_ratio\",\n",
" \"net_profit_yoy\",\n",
" \"revenue_yoy\",\n",
" \"healthy_expansion_velocity\",\n",
" # ================= 6. 基本面估值与截面动量共振 =================\n",
" \"EP\",\n",
" \"BP\",\n",
" \"CP\",\n",
" \"market_cap_rank\",\n",
" \"turnover_rank\",\n",
" \"return_5_rank\",\n",
" \"EP_rank\",\n",
" \"pe_expansion_trend\",\n",
" \"value_price_divergence\",\n",
" \"active_market_cap\",\n",
" \"ebit_rank\",\n",
"]\n",
"\n",
"# 因子定义字典(完整因子库)\n",
"FACTOR_DEFINITIONS = {\n",
" # ================= 1. 价格、趋势与路径依赖 (Trend, Momentum & Path Dependency) =================\n",
" \"ma_5\": \"ts_mean(close, 5)\",\n",
" \"ma_20\": \"ts_mean(close, 20)\",\n",
@@ -338,7 +409,9 @@
"\n",
"# 2. 使用 metadata 定义因子\n",
"print(\"\\n[2] 定义因子(从 metadata 注册)\")\n",
"feature_cols = create_factors_with_metadata(engine, FACTOR_DEFINITIONS, LABEL_FACTOR)\n",
"feature_cols = create_factors_with_metadata(\n",
" engine, SELECTED_FACTORS, FACTOR_DEFINITIONS, LABEL_FACTOR\n",
")\n",
"target_col = LABEL_NAME\n",
"\n",
"# 3. 准备数据(使用模块级别的日期配置)\n",