fix(factor-engine): 修复多因子计算时数据规格字段合并的 bug

- 修复 FactorEngine.compute() 中相同表的字段未正确合并的问题
- 将简单去重改为字段集合合并,确保所有因子依赖的字段都被获取
- 解决 high_low_ratio 等需要 high/low 字段的因子计算失败问题
This commit is contained in:
2026-03-14 02:12:20 +08:00
parent ca27cb297a
commit ecb22b826c
4 changed files with 141 additions and 128 deletions

View File

@@ -335,13 +335,28 @@ class FactorEngine:
for plan in plans:
all_specs.extend(plan.data_specs)
# 去重数据规格(基于表名
seen_tables: set = set()
unique_specs: List[DataSpec] = []
# 合并相同表的字段(而不是简单地去重)
table_to_columns: Dict[str, Set[str]] = {}
table_to_spec: Dict[str, DataSpec] = {}
for spec in all_specs:
if spec.table not in seen_tables:
seen_tables.add(spec.table)
unique_specs.append(spec)
if spec.table not in table_to_columns:
table_to_columns[spec.table] = set()
table_to_spec[spec.table] = spec
table_to_columns[spec.table].update(spec.columns)
# 创建合并后的数据规格
unique_specs: List[DataSpec] = []
for table_name, columns in table_to_columns.items():
original_spec = table_to_spec[table_name]
unique_specs.append(
DataSpec(
table=table_name,
columns=list(columns),
join_type=original_spec.join_type,
left_on=original_spec.left_on,
right_on=original_spec.right_on,
)
)
# 4. 从路由器获取核心宽表
core_data = self.router.fetch_data(