refactor(factors): 简化 add_factor API 并默认启用 metadata
- 合并 add_factor_by_name 到 add_factor,支持三种调用方式 - FactorManager 构造函数改为可选参数,使用默认路径 - FactorEngine 默认启用 metadata,无需手动配置路径
This commit is contained in:
@@ -40,29 +40,40 @@ from src.training.config import TrainingConfig
|
||||
# ## 2. 辅助函数
|
||||
# %%
|
||||
def create_factors_with_metadata(
|
||||
engine: FactorEngine, factor_definitions: dict, label_factor: dict
|
||||
engine: FactorEngine,
|
||||
selected_factors: List[str],
|
||||
factor_definitions: dict,
|
||||
label_factor: dict,
|
||||
) -> List[str]:
|
||||
"""使用 metadata 注册因子(特征因子通过名称注册,label 因子通过表达式注册)"""
|
||||
"""注册因子(SELECTED_FACTORS 从 metadata 查询,FACTOR_DEFINITIONS 用表达式注册)"""
|
||||
print("=" * 80)
|
||||
print("使用 metadata 注册因子")
|
||||
print("注册因子")
|
||||
print("=" * 80)
|
||||
|
||||
# 注册所有特征因子(通过 metadata 名称)
|
||||
# 注册 SELECTED_FACTORS 中的因子(已在 metadata 中)
|
||||
print("\n注册特征因子(从 metadata):")
|
||||
for name in factor_definitions.keys():
|
||||
engine.add_factor_by_name(name)
|
||||
for name in selected_factors:
|
||||
engine.add_factor(name)
|
||||
print(f" - {name}")
|
||||
|
||||
# 注册 label 因子(通过表达式,因为 label 不在 metadata 中)
|
||||
# 注册 FACTOR_DEFINITIONS 中的因子(通过表达式,尚未在 metadata 中)
|
||||
print("\n注册特征因子(表达式):")
|
||||
for name, expr in factor_definitions.items():
|
||||
engine.add_factor(name, expr)
|
||||
print(f" - {name}: {expr}")
|
||||
|
||||
# 注册 label 因子(通过表达式)
|
||||
print("\n注册 Label 因子(表达式):")
|
||||
for name, expr in label_factor.items():
|
||||
engine.add_factor(name, expr)
|
||||
print(f" - {name}: {expr}")
|
||||
|
||||
# 从字典自动获取特征列
|
||||
feature_cols = list(factor_definitions.keys())
|
||||
# 特征列 = SELECTED_FACTORS + FACTOR_DEFINITIONS 的 keys
|
||||
feature_cols = selected_factors + list(factor_definitions.keys())
|
||||
|
||||
print(f"\n特征因子数: {len(feature_cols)}")
|
||||
print(f" - 来自 metadata: {len(selected_factors)}")
|
||||
print(f" - 来自表达式: {len(factor_definitions)}")
|
||||
print(f"Label: {list(label_factor.keys())[0]}")
|
||||
print(f"已注册因子总数: {len(engine.list_registered())}")
|
||||
|
||||
@@ -236,62 +247,68 @@ def evaluate_ndcg_at_k(
|
||||
# 特征因子定义字典(复用 regression.ipynb 的因子定义)
|
||||
LABEL_NAME = "future_return_5_rank"
|
||||
|
||||
FACTOR_DEFINITIONS = {
|
||||
# ================= 1. 价格、趋势与路径依赖 (Trend, Momentum & Path Dependency) =================
|
||||
"ma_5": "ts_mean(close, 5)",
|
||||
"ma_20": "ts_mean(close, 20)",
|
||||
"ma_ratio_5_20": "ts_mean(close, 5) / (ts_mean(close, 20) + 1e-8) - 1",
|
||||
"bias_10": "close / (ts_mean(close, 10) + 1e-8) - 1",
|
||||
"high_low_ratio": "(close - ts_min(low, 20)) / (ts_max(high, 20) - ts_min(low, 20) + 1e-8)",
|
||||
"bbi_ratio": "(ts_mean(close, 3) + ts_mean(close, 6) + ts_mean(close, 12) + ts_mean(close, 24)) / (4 * close + 1e-8)",
|
||||
"return_5": "(close / (ts_delay(close, 5) + 1e-8)) - 1",
|
||||
"return_20": "(close / (ts_delay(close, 20) + 1e-8)) - 1",
|
||||
"kaufman_ER_20": "abs(close - ts_delay(close, 20)) / (ts_sum(abs(close - ts_delay(close, 1)), 20) + 1e-8)",
|
||||
"mom_acceleration_10_20": "(close / (ts_delay(close, 10) + 1e-8) - 1) - (ts_delay(close, 10) / (ts_delay(close, 20) + 1e-8) - 1)",
|
||||
"drawdown_from_high_60": "close / (ts_max(high, 60) + 1e-8) - 1",
|
||||
"up_days_ratio_20": "ts_sum(close > ts_delay(close, 1), 20) / 20",
|
||||
# 当前选择的因子列表(从 FACTOR_DEFINITIONS 中选择要使用的因子)
|
||||
SELECTED_FACTORS = [
|
||||
# ================= 1. 价格、趋势与路径依赖 =================
|
||||
"ma_5",
|
||||
"ma_20",
|
||||
"ma_ratio_5_20",
|
||||
"bias_10",
|
||||
"high_low_ratio",
|
||||
"bbi_ratio",
|
||||
"return_5",
|
||||
"return_20",
|
||||
"kaufman_ER_20",
|
||||
"mom_acceleration_10_20",
|
||||
"drawdown_from_high_60",
|
||||
"up_days_ratio_20",
|
||||
# ================= 2. 波动率、风险调整与高阶矩 =================
|
||||
"volatility_5": "ts_std(close, 5)",
|
||||
"volatility_20": "ts_std(close, 20)",
|
||||
"volatility_ratio": "ts_std(close, 5) / (ts_std(close, 20) + 1e-8)",
|
||||
"std_return_20": "ts_std((close / (ts_delay(close, 1) + 1e-8)) - 1, 20)",
|
||||
"sharpe_ratio_20": "ts_mean(close / (ts_delay(close, 1) + 1e-8) - 1, 20) / (ts_std(close / (ts_delay(close, 1) + 1e-8) - 1, 20) + 1e-8)",
|
||||
"min_ret_20": "ts_min(close / (ts_delay(close, 1) + 1e-8) - 1, 20)",
|
||||
"volatility_squeeze_5_60": "ts_std(close, 5) / (ts_std(close, 60) + 1e-8)",
|
||||
"volatility_5",
|
||||
"volatility_20",
|
||||
"volatility_ratio",
|
||||
"std_return_20",
|
||||
"sharpe_ratio_20",
|
||||
"min_ret_20",
|
||||
"volatility_squeeze_5_60",
|
||||
# ================= 3. 日内微观结构与异象 =================
|
||||
"overnight_intraday_diff": "(open / (ts_delay(close, 1) + 1e-8) - 1) - (close / (open + 1e-8) - 1)",
|
||||
"upper_shadow_ratio": "(high - ((open + close + abs(open - close)) / 2)) / (high - low + 1e-8)",
|
||||
"capital_retention_20": "ts_sum(abs(close - open), 20) / (ts_sum(high - low, 20) + 1e-8)",
|
||||
"max_ret_20": "ts_max(close / (ts_delay(close, 1) + 1e-8) - 1, 20)",
|
||||
"overnight_intraday_diff",
|
||||
"upper_shadow_ratio",
|
||||
"capital_retention_20",
|
||||
"max_ret_20",
|
||||
# ================= 4. 量能、流动性与量价背离 =================
|
||||
"volume_ratio_5_20": "ts_mean(vol, 5) / (ts_mean(vol, 20) + 1e-8)",
|
||||
"turnover_rate_mean_5": "ts_mean(turnover_rate, 5)",
|
||||
"turnover_deviation": "(turnover_rate - ts_mean(turnover_rate, 10)) / (ts_std(turnover_rate, 10) + 1e-8)",
|
||||
"amihud_illiq_20": "ts_mean(abs(close / (ts_delay(close, 1) + 1e-8) - 1) / (amount + 1e-8), 20)",
|
||||
"turnover_cv_20": "ts_std(turnover_rate, 20) / (ts_mean(turnover_rate, 20) + 1e-8)",
|
||||
"pv_corr_20": "ts_corr(close / (ts_delay(close, 1) + 1e-8) - 1, vol, 20)",
|
||||
"close_vwap_deviation": "close / (amount / (vol * 100 + 1e-8) + 1e-8) - 1",
|
||||
"volume_ratio_5_20",
|
||||
"turnover_rate_mean_5",
|
||||
"turnover_deviation",
|
||||
"amihud_illiq_20",
|
||||
"turnover_cv_20",
|
||||
"pv_corr_20",
|
||||
"close_vwap_deviation",
|
||||
# ================= 5. 基本面财务特征 =================
|
||||
"roe": "n_income / (total_hldr_eqy_exc_min_int + 1e-8)",
|
||||
"roa": "n_income / (total_assets + 1e-8)",
|
||||
"profit_margin": "n_income / (revenue + 1e-8)",
|
||||
"debt_to_equity": "total_liab / (total_hldr_eqy_exc_min_int + 1e-8)",
|
||||
"current_ratio": "total_cur_assets / (total_cur_liab + 1e-8)",
|
||||
"net_profit_yoy": "(n_income / (ts_delay(n_income, 252) + 1e-8)) - 1",
|
||||
"revenue_yoy": "(revenue / (ts_delay(revenue, 252) + 1e-8)) - 1",
|
||||
"healthy_expansion_velocity": "(total_assets / (ts_delay(total_assets, 252) + 1e-8) - 1) - (total_liab / (ts_delay(total_liab, 252) + 1e-8) - 1)",
|
||||
"roe",
|
||||
"roa",
|
||||
"profit_margin",
|
||||
"debt_to_equity",
|
||||
"current_ratio",
|
||||
"net_profit_yoy",
|
||||
"revenue_yoy",
|
||||
"healthy_expansion_velocity",
|
||||
# ================= 6. 基本面估值与截面动量共振 =================
|
||||
"EP": "n_income / (total_mv * 10000 + 1e-8)",
|
||||
"BP": "total_hldr_eqy_exc_min_int / (total_mv * 10000 + 1e-8)",
|
||||
"CP": "n_cashflow_act / (total_mv * 10000 + 1e-8)",
|
||||
"market_cap_rank": "cs_rank(total_mv)",
|
||||
"turnover_rank": "cs_rank(turnover_rate)",
|
||||
"return_5_rank": "cs_rank((close / (ts_delay(close, 5) + 1e-8)) - 1)",
|
||||
"EP_rank": "cs_rank(n_income / (total_mv + 1e-8))",
|
||||
"pe_expansion_trend": "(total_mv / (n_income + 1e-8)) / (ts_delay(total_mv, 60) / (ts_delay(n_income, 60) + 1e-8) + 1e-8) - 1",
|
||||
"value_price_divergence": "cs_rank((n_income - ts_delay(n_income, 252)) / (abs(ts_delay(n_income, 252)) + 1e-8)) - cs_rank(close / (ts_delay(close, 20) + 1e-8))",
|
||||
"active_market_cap": "total_mv * ts_mean(turnover_rate, 20)",
|
||||
"ebit_rank": "cs_rank(ebit)",
|
||||
"EP",
|
||||
"BP",
|
||||
"CP",
|
||||
"market_cap_rank",
|
||||
"turnover_rank",
|
||||
"return_5_rank",
|
||||
"EP_rank",
|
||||
"pe_expansion_trend",
|
||||
"value_price_divergence",
|
||||
"active_market_cap",
|
||||
"ebit_rank",
|
||||
]
|
||||
|
||||
# 因子定义字典(完整因子库)
|
||||
FACTOR_DEFINITIONS = {
|
||||
# "turnover_volatility_ratio": "log(ts_std(turnover_rate, 20))"
|
||||
}
|
||||
|
||||
# Label 因子定义(不参与训练,用于计算目标)
|
||||
@@ -332,7 +349,7 @@ MODEL_PARAMS = {
|
||||
N_QUANTILES = 20 # 将 label 分为 20 组
|
||||
|
||||
# 特征列(用于数据处理器)
|
||||
FEATURE_COLS = list(FACTOR_DEFINITIONS.keys())
|
||||
FEATURE_COLS = SELECTED_FACTORS
|
||||
|
||||
# 数据处理器配置
|
||||
PROCESSORS = [
|
||||
@@ -385,11 +402,13 @@ print("=" * 80)
|
||||
|
||||
# 1. 创建 FactorEngine(启用 metadata 功能)
|
||||
print("\n[1] 创建 FactorEngine")
|
||||
engine = FactorEngine(metadata_path="data/factors.jsonl")
|
||||
engine = FactorEngine()
|
||||
|
||||
# 2. 使用 metadata 定义因子
|
||||
print("\n[2] 定义因子(从 metadata 注册)")
|
||||
feature_cols = create_factors_with_metadata(engine, FACTOR_DEFINITIONS, LABEL_FACTOR)
|
||||
feature_cols = create_factors_with_metadata(
|
||||
engine, SELECTED_FACTORS, FACTOR_DEFINITIONS, LABEL_FACTOR
|
||||
)
|
||||
|
||||
# 3. 准备数据
|
||||
print("\n[3] 准备数据")
|
||||
|
||||
Reference in New Issue
Block a user