feat(factors): 新增公式解析基础组件
新增公式解析相关模块,支持将字符串表达式解析为 DSL 节点树: - exceptions.py: 定义公式解析异常体系 - FormulaParseError 基类,提供位置指示的错误信息 - UnknownFunctionError 支持模糊匹配建议 - InvalidSyntaxError、EmptyExpressionError 等具体异常 - parser.py: 基于 Python ast 的公式解析器 - 支持符号引用、数值常量、二元/一元运算 - 支持函数调用和比较运算 - 常量折叠优化 - registry.py: 函数注册表 - 支持动态注册和查询公式函数 - 提供可用函数列表和重复注册检查
This commit is contained in:
@@ -10,10 +10,13 @@
|
||||
5. 返回包含因子结果的数据表
|
||||
"""
|
||||
|
||||
from typing import Any, Dict, List, Optional, Set, Union
|
||||
from typing import Any, Dict, List, Optional, Set, Union, TYPE_CHECKING
|
||||
|
||||
import polars as pl
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from src.factors.registry import FunctionRegistry
|
||||
|
||||
from src.factors.dsl import (
|
||||
Node,
|
||||
Symbol,
|
||||
@@ -45,25 +48,36 @@ class FactorEngine:
|
||||
planner: 执行计划生成器
|
||||
compute_engine: 计算引擎
|
||||
registered_expressions: 注册的表达式字典
|
||||
_registry: 函数注册表
|
||||
_parser: 公式解析器
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
data_source: Optional[Dict[str, pl.DataFrame]] = None,
|
||||
max_workers: int = 4,
|
||||
registry: Optional["FunctionRegistry"] = None,
|
||||
) -> None:
|
||||
"""初始化因子引擎。
|
||||
|
||||
Args:
|
||||
data_source: 内存数据源,为 None 时使用数据库连接
|
||||
max_workers: 并行计算的最大工作线程数
|
||||
registry: 函数注册表,None 时创建独立实例
|
||||
"""
|
||||
from src.factors.registry import FunctionRegistry
|
||||
from src.factors.parser import FormulaParser
|
||||
|
||||
self.router = DataRouter(data_source)
|
||||
self.planner = ExecutionPlanner()
|
||||
self.compute_engine = ComputeEngine(max_workers=max_workers)
|
||||
self.registered_expressions: Dict[str, Node] = {}
|
||||
self._plans: Dict[str, ExecutionPlan] = {}
|
||||
|
||||
# 初始化注册表和解析器(支持注入外部注册表实现共享)
|
||||
self._registry = registry if registry is not None else FunctionRegistry()
|
||||
self._parser = FormulaParser(self._registry)
|
||||
|
||||
def register(
|
||||
self,
|
||||
name: str,
|
||||
@@ -104,6 +118,63 @@ class FactorEngine:
|
||||
|
||||
return self
|
||||
|
||||
def add_factor(
|
||||
self,
|
||||
name: str,
|
||||
expression: Union[str, Node],
|
||||
data_specs: Optional[List[DataSpec]] = None,
|
||||
) -> "FactorEngine":
|
||||
"""注册因子(支持字符串或 Node 表达式)。
|
||||
|
||||
这是 register 方法的增强版,支持字符串表达式解析。
|
||||
向后兼容:register 方法保持不变,继续只接受 Node 类型。
|
||||
|
||||
遵循 Fail-Fast 原则:字符串表达式会立即解析,失败时立即抛出异常。
|
||||
|
||||
Args:
|
||||
name: 因子名称
|
||||
expression: 字符串表达式或 Node 对象
|
||||
data_specs: 可选的数据规格
|
||||
|
||||
Returns:
|
||||
self,支持链式调用
|
||||
|
||||
Raises:
|
||||
TypeError: 当 expression 类型不支持时
|
||||
FormulaParseError: 当字符串解析失败时(立即报错)
|
||||
|
||||
Example:
|
||||
>>> engine = FactorEngine()
|
||||
>>>
|
||||
>>> # 字符串方式(新功能)
|
||||
>>> engine.add_factor("ma20", "ts_mean(close, 20)")
|
||||
>>>
|
||||
>>> # Node 方式(与 register 相同)
|
||||
>>> from src.factors.api import close, ts_mean
|
||||
>>> engine.add_factor("ma20", ts_mean(close, 20))
|
||||
>>>
|
||||
>>> # 复杂表达式
|
||||
>>> engine.add_factor("alpha1", "cs_rank(close / open)")
|
||||
>>>
|
||||
>>> # 链式调用
|
||||
>>> (engine
|
||||
... .add_factor("ma5", "ts_mean(close, 5)")
|
||||
... .add_factor("ma10", "ts_mean(close, 10)")
|
||||
... .add_factor("golden_cross", "ma5 > ma10"))
|
||||
"""
|
||||
if isinstance(expression, str):
|
||||
# Fail-Fast:立即解析,失败立即报错
|
||||
node = self._parser.parse(expression)
|
||||
elif isinstance(expression, Node):
|
||||
node = expression
|
||||
else:
|
||||
raise TypeError(
|
||||
f"表达式必须是 str 或 Node 类型,收到 {type(expression).__name__}"
|
||||
)
|
||||
|
||||
# 委托给现有的 register 方法
|
||||
return self.register(name, node, data_specs)
|
||||
|
||||
def compute(
|
||||
self,
|
||||
factor_names: Union[str, List[str]],
|
||||
|
||||
Reference in New Issue
Block a user