2026-03-02 22:29:18 +08:00
|
|
|
|
"""执行计划生成器。
|
|
|
|
|
|
|
|
|
|
|
|
整合编译器和翻译器,生成完整的执行计划。
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
from typing import Any, Dict, List, Optional, Set, Union
|
|
|
|
|
|
|
|
|
|
|
|
from src.factors.dsl import (
|
|
|
|
|
|
Node,
|
|
|
|
|
|
Symbol,
|
|
|
|
|
|
FunctionNode,
|
|
|
|
|
|
BinaryOpNode,
|
|
|
|
|
|
UnaryOpNode,
|
|
|
|
|
|
Constant,
|
|
|
|
|
|
)
|
|
|
|
|
|
from src.factors.compiler import DependencyExtractor
|
|
|
|
|
|
from src.factors.translator import PolarsTranslator
|
|
|
|
|
|
from src.factors.engine.data_spec import DataSpec, ExecutionPlan
|
2026-03-03 17:32:58 +08:00
|
|
|
|
from src.factors.engine.schema_cache import get_schema_cache
|
2026-03-02 22:29:18 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ExecutionPlanner:
|
|
|
|
|
|
"""执行计划生成器。
|
|
|
|
|
|
|
|
|
|
|
|
整合编译器和翻译器,生成完整的执行计划。
|
|
|
|
|
|
|
|
|
|
|
|
Attributes:
|
|
|
|
|
|
compiler: 依赖提取器
|
|
|
|
|
|
translator: Polars 翻译器
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self) -> None:
|
|
|
|
|
|
"""初始化执行计划生成器。"""
|
|
|
|
|
|
self.compiler = DependencyExtractor()
|
|
|
|
|
|
self.translator = PolarsTranslator()
|
|
|
|
|
|
|
|
|
|
|
|
def create_plan(
|
|
|
|
|
|
self,
|
|
|
|
|
|
expression: Node,
|
|
|
|
|
|
output_name: str = "factor",
|
|
|
|
|
|
data_specs: Optional[List[DataSpec]] = None,
|
|
|
|
|
|
) -> ExecutionPlan:
|
|
|
|
|
|
"""从表达式创建执行计划。
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
expression: DSL 表达式节点
|
|
|
|
|
|
output_name: 输出因子名称
|
|
|
|
|
|
data_specs: 预定义的数据规格,None 时自动推导
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
执行计划对象
|
|
|
|
|
|
"""
|
|
|
|
|
|
# 1. 提取依赖
|
|
|
|
|
|
dependencies = self.compiler.extract_dependencies(expression)
|
|
|
|
|
|
|
|
|
|
|
|
# 2. 翻译为 Polars 表达式
|
|
|
|
|
|
polars_expr = self.translator.translate(expression)
|
|
|
|
|
|
|
|
|
|
|
|
# 3. 推导或验证数据规格
|
|
|
|
|
|
if data_specs is None:
|
|
|
|
|
|
data_specs = self._infer_data_specs(dependencies, expression)
|
|
|
|
|
|
|
|
|
|
|
|
return ExecutionPlan(
|
|
|
|
|
|
data_specs=data_specs,
|
|
|
|
|
|
polars_expr=polars_expr,
|
|
|
|
|
|
dependencies=dependencies,
|
|
|
|
|
|
output_name=output_name,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def _infer_data_specs(
|
|
|
|
|
|
self,
|
|
|
|
|
|
dependencies: Set[str],
|
|
|
|
|
|
expression: Node,
|
|
|
|
|
|
) -> List[DataSpec]:
|
|
|
|
|
|
"""从依赖推导数据规格。
|
|
|
|
|
|
|
2026-03-03 17:32:58 +08:00
|
|
|
|
使用 SchemaCache 动态扫描数据库表结构,自动匹配字段到对应的表。
|
|
|
|
|
|
表结构只扫描一次并缓存在内存中。
|
2026-03-02 22:29:18 +08:00
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
dependencies: 依赖的字段集合
|
|
|
|
|
|
expression: 表达式节点
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
数据规格列表
|
|
|
|
|
|
"""
|
2026-03-03 17:32:58 +08:00
|
|
|
|
# 使用 SchemaCache 自动匹配字段到表
|
|
|
|
|
|
schema_cache = get_schema_cache()
|
|
|
|
|
|
table_to_fields = schema_cache.match_fields_to_tables(dependencies)
|
2026-03-02 22:29:18 +08:00
|
|
|
|
|
|
|
|
|
|
data_specs = []
|
2026-03-03 17:32:58 +08:00
|
|
|
|
for table_name, columns in table_to_fields.items():
|
2026-03-02 22:29:18 +08:00
|
|
|
|
data_specs.append(
|
|
|
|
|
|
DataSpec(
|
2026-03-03 17:32:58 +08:00
|
|
|
|
table=table_name,
|
|
|
|
|
|
columns=columns,
|
2026-03-02 22:29:18 +08:00
|
|
|
|
)
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
return data_specs
|