perf(factors/engine): 重构计算引擎使用 Polars 原生并行
- 移除 Python 多进程/多线程池,消除 DataFrame 序列化开销 - 采用 BFS 分层执行策略,每层表达式通过单次 with_columns 提交 - 利用 Polars Rust 引擎实现零拷贝并行计算 - 添加死锁检测机制处理依赖环
This commit is contained in:
@@ -57,7 +57,6 @@ class FactorEngine:
|
||||
def __init__(
|
||||
self,
|
||||
data_source: Optional[Dict[str, pl.DataFrame]] = None,
|
||||
max_workers: int = 4,
|
||||
registry: Optional["FunctionRegistry"] = None,
|
||||
metadata_path: Optional[str] = None,
|
||||
) -> None:
|
||||
@@ -65,16 +64,15 @@ class FactorEngine:
|
||||
|
||||
Args:
|
||||
data_source: 内存数据源,为 None 时使用数据库连接
|
||||
max_workers: 并行计算的最大工作线程数
|
||||
registry: 函数注册表,None 时创建独立实例
|
||||
metadata_path: 因子元数据文件路径,为 None 时不启用 metadata 功能
|
||||
metadata_path: 因子元数据文件路径,为 None 时启用默认 metadata 功能
|
||||
"""
|
||||
from src.factors.registry import FunctionRegistry
|
||||
from src.factors.parser import FormulaParser
|
||||
|
||||
self.router = DataRouter(data_source)
|
||||
self.planner = ExecutionPlanner()
|
||||
self.compute_engine = ComputeEngine(max_workers=max_workers)
|
||||
self.compute_engine = ComputeEngine()
|
||||
self.registered_expressions: Dict[str, Node] = {}
|
||||
self._plans: Dict[str, ExecutionPlan] = {}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user