refactor(factors): 简化 add_factor API 并默认启用 metadata

- 合并 add_factor_by_name 到 add_factor,支持三种调用方式
- FactorManager 构造函数改为可选参数,使用默认路径
- FactorEngine 默认启用 metadata,无需手动配置路径
This commit is contained in:
2026-03-12 22:34:25 +08:00
parent 2bb7718dd1
commit ced7a929c3
7 changed files with 496 additions and 254 deletions

View File

@@ -26,29 +26,40 @@ from src.training.config import TrainingConfig
# ## 2. 定义辅助函数
# %%
def create_factors_with_metadata(
engine: FactorEngine, factor_definitions: dict, label_factor: dict
engine: FactorEngine,
selected_factors: List[str],
factor_definitions: dict,
label_factor: dict,
) -> List[str]:
"""使用 metadata 注册因子特征因子通过名称注册label 因子通过表达式注册)"""
"""注册因子SELECTED_FACTORS 从 metadata 查询FACTOR_DEFINITIONS 用表达式注册)"""
print("=" * 80)
print("使用 metadata 注册因子")
print("注册因子")
print("=" * 80)
# 注册所有特征因子(通过 metadata 名称
# 注册 SELECTED_FACTORS 中的因子(已在 metadata
print("\n注册特征因子(从 metadata:")
for name in factor_definitions.keys():
engine.add_factor_by_name(name)
for name in selected_factors:
engine.add_factor(name)
print(f" - {name}")
# 注册 label 因子(通过表达式,因为 label 不在 metadata 中)
# 注册 FACTOR_DEFINITIONS 中的因子(通过表达式,尚未在 metadata 中)
print("\n注册特征因子(表达式):")
for name, expr in factor_definitions.items():
engine.add_factor(name, expr)
print(f" - {name}: {expr}")
# 注册 label 因子(通过表达式)
print("\n注册 Label 因子(表达式):")
for name, expr in label_factor.items():
engine.add_factor(name, expr)
print(f" - {name}: {expr}")
# 从字典自动获取特征列
feature_cols = list(factor_definitions.keys())
# 特征列 = SELECTED_FACTORS + FACTOR_DEFINITIONS 的 keys
feature_cols = selected_factors + list(factor_definitions.keys())
print(f"\n特征因子数: {len(feature_cols)}")
print(f" - 来自 metadata: {len(selected_factors)}")
print(f" - 来自表达式: {len(factor_definitions)}")
print(f"Label: {list(label_factor.keys())[0]}")
print(f"已注册因子总数: {len(engine.list_registered())}")
@@ -91,7 +102,67 @@ def prepare_data(
# 特征因子定义字典:新增因子只需在此处添加一行
LABEL_NAME = "future_return_5"
FACTOR_DEFINITIONS = FACTOR_DICT = {
# 当前选择的因子列表(从 FACTOR_DEFINITIONS 中选择要使用的因子)
SELECTED_FACTORS = [
# ================= 1. 价格、趋势与路径依赖 =================
"ma_5",
"ma_20",
"ma_ratio_5_20",
"bias_10",
"high_low_ratio",
"bbi_ratio",
"return_5",
"return_20",
"kaufman_ER_20",
"mom_acceleration_10_20",
"drawdown_from_high_60",
"up_days_ratio_20",
# ================= 2. 波动率、风险调整与高阶矩 =================
"volatility_5",
"volatility_20",
"volatility_ratio",
"std_return_20",
"sharpe_ratio_20",
"min_ret_20",
"volatility_squeeze_5_60",
# ================= 3. 日内微观结构与异象 =================
"overnight_intraday_diff",
"upper_shadow_ratio",
"capital_retention_20",
"max_ret_20",
# ================= 4. 量能、流动性与量价背离 =================
"volume_ratio_5_20",
"turnover_rate_mean_5",
"turnover_deviation",
"amihud_illiq_20",
"turnover_cv_20",
"pv_corr_20",
"close_vwap_deviation",
# ================= 5. 基本面财务特征 =================
"roe",
"roa",
"profit_margin",
"debt_to_equity",
"current_ratio",
"net_profit_yoy",
"revenue_yoy",
"healthy_expansion_velocity",
# ================= 6. 基本面估值与截面动量共振 =================
"EP",
"BP",
"CP",
"market_cap_rank",
"turnover_rank",
"return_5_rank",
"EP_rank",
"pe_expansion_trend",
"value_price_divergence",
"active_market_cap",
"ebit_rank",
]
# 因子定义字典(完整因子库)
FACTOR_DEFINITIONS = {
# ================= 1. 价格、趋势与路径依赖 (Trend, Momentum & Path Dependency) =================
"ma_5": "ts_mean(close, 5)",
"ma_20": "ts_mean(close, 20)",
@@ -284,7 +355,9 @@ engine = FactorEngine(metadata_path="data/factors.jsonl")
# 2. 使用 metadata 定义因子
print("\n[2] 定义因子(从 metadata 注册)")
feature_cols = create_factors_with_metadata(engine, FACTOR_DEFINITIONS, LABEL_FACTOR)
feature_cols = create_factors_with_metadata(
engine, SELECTED_FACTORS, FACTOR_DEFINITIONS, LABEL_FACTOR
)
target_col = LABEL_NAME
# 3. 准备数据(使用模块级别的日期配置)