From e5e636d6cdfde0845f1999d6a3c6e5f53228a11b Mon Sep 17 00:00:00 2001 From: liaozhaorun <1300336796@qq.com> Date: Tue, 7 Apr 2026 22:49:33 +0800 Subject: [PATCH] =?UTF-8?q?refactor(factorminer):=20=E7=BB=9F=E4=B8=80?= =?UTF-8?q?=E6=A8=A1=E5=9D=97=E5=BC=95=E7=94=A8=E8=B7=AF=E5=BE=84=E5=B9=B6?= =?UTF-8?q?=E7=A7=BB=E9=99=A4=E7=8B=AC=E7=AB=8B=E5=8C=85=E9=85=8D=E7=BD=AE?= =?UTF-8?q?=20-=20=E5=88=A0=E9=99=A4=E6=97=A0=E7=94=A8=E6=96=87=E4=BB=B6?= =?UTF-8?q?=20-=20=E6=96=B0=E5=A2=9E=E6=9C=AC=E5=9C=B0=E6=A1=86=E6=9E=B6?= =?UTF-8?q?=E6=95=B4=E5=90=88=E5=AE=9E=E6=96=BD=E8=AE=A1=E5=88=92=E6=96=87?= =?UTF-8?q?=E6=A1=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...026-04-07-factorminer-local-integration.md | 363 +++ src/factorminer/factorminer/__init__.py | 42 - src/factorminer/factorminer/agent/__init__.py | 78 - src/factorminer/factorminer/agent/critic.py | 837 ------- src/factorminer/factorminer/agent/debate.py | 949 ------- .../factorminer/agent/factor_generator.py | 236 -- .../factorminer/agent/llm_interface.py | 365 --- .../factorminer/agent/output_parser.py | 259 -- .../factorminer/agent/prompt_builder.py | 682 ------ .../factorminer/agent/specialists.py | 596 ----- .../factorminer/benchmark/__init__.py | 73 - .../factorminer/benchmark/ablation.py | 798 ------ .../factorminer/benchmark/catalogs.py | 236 -- .../factorminer/benchmark/helix_benchmark.py | 2172 ----------------- .../factorminer/benchmark/runtime.py | 1498 ------------ src/factorminer/factorminer/cli.py | 1566 ------------ .../factorminer/configs/__init__.py | 25 - .../factorminer/configs/benchmark_full.yaml | 45 - .../factorminer/configs/default.yaml | 307 --- .../factorminer/configs/demo_local.yaml | 57 - .../factorminer/configs/helix_research.yaml | 82 - .../factorminer/configs/paper_repro.yaml | 36 - src/factorminer/factorminer/core/__init__.py | 67 - .../factorminer/core/canonicalizer.py | 206 -- src/factorminer/factorminer/core/config.py | 61 - .../factorminer/core/expression_tree.py | 736 ------ .../factorminer/core/factor_library.py | 602 ----- .../factorminer/core/helix_loop.py | 1576 ------------ .../factorminer/core/library_io.py | 921 ------- src/factorminer/factorminer/core/parser.py | 374 --- .../factorminer/core/provenance.py | 241 -- .../factorminer/core/ralph_loop.py | 1598 ------------ src/factorminer/factorminer/core/session.py | 187 -- src/factorminer/factorminer/core/types.py | 269 -- src/factorminer/factorminer/data/__init__.py | 75 - src/factorminer/factorminer/data/loader.py | 244 -- src/factorminer/factorminer/data/mock_data.py | 323 --- .../factorminer/data/preprocessor.py | 364 --- .../factorminer/data/tensor_builder.py | 505 ---- .../factorminer/evaluation/__init__.py | 169 -- .../factorminer/evaluation/admission.py | 221 -- .../factorminer/evaluation/backtest.py | 397 --- .../factorminer/evaluation/capacity.py | 449 ---- .../factorminer/evaluation/causal.py | 580 ----- .../factorminer/evaluation/combination.py | 195 -- .../factorminer/evaluation/correlation.py | 374 --- .../factorminer/evaluation/metrics.py | 377 --- .../factorminer/evaluation/pipeline.py | 736 ------ .../factorminer/evaluation/portfolio.py | 266 -- .../factorminer/evaluation/regime.py | 623 ----- .../factorminer/evaluation/research.py | 518 ---- .../factorminer/evaluation/runtime.py | 480 ---- .../factorminer/evaluation/selection.py | 280 --- .../factorminer/evaluation/significance.py | 495 ---- .../evaluation/transaction_costs.py | 539 ---- .../factorminer/memory/__init__.py | 84 - .../factorminer/memory/embeddings.py | 392 --- .../factorminer/memory/evolution.py | 482 ---- .../factorminer/memory/experience_memory.py | 594 ----- .../factorminer/memory/formation.py | 446 ---- .../factorminer/memory/kg_retrieval.py | 336 --- .../factorminer/memory/knowledge_graph.py | 418 ---- .../factorminer/memory/memory_store.py | 165 -- .../memory/online_regime_memory.py | 1625 ------------ .../factorminer/memory/retrieval.py | 288 --- .../factorminer/operators/__init__.py | 54 - .../factorminer/operators/arithmetic.py | 223 -- .../factorminer/operators/auto_inventor.py | 547 ----- .../factorminer/operators/crosssectional.py | 151 -- .../factorminer/operators/custom.py | 251 -- .../factorminer/operators/gpu_backend.py | 110 - .../factorminer/operators/logical.py | 185 -- .../factorminer/operators/neuro_symbolic.py | 1614 ------------ .../factorminer/operators/registry.py | 142 -- .../factorminer/operators/regression.py | 167 -- .../factorminer/operators/smoothing.py | 173 -- .../factorminer/operators/statistical.py | 452 ---- .../factorminer/operators/timeseries.py | 395 --- src/factorminer/factorminer/tests/__init__.py | 1 - src/factorminer/factorminer/tests/conftest.py | 163 -- .../factorminer/tests/test_auto_inventor.py | 130 - .../factorminer/tests/test_benchmark.py | 484 ---- .../factorminer/tests/test_canonicalizer.py | 79 - .../factorminer/tests/test_capacity.py | 118 - .../factorminer/tests/test_causal.py | 147 -- .../factorminer/tests/test_cli_analysis.py | 312 --- .../factorminer/tests/test_cli_helix.py | 142 -- .../factorminer/tests/test_combination.py | 531 ---- .../factorminer/tests/test_data.py | 258 -- .../factorminer/tests/test_debate.py | 229 -- .../factorminer/tests/test_evaluation.py | 287 --- .../factorminer/tests/test_expression_tree.py | 307 --- .../factorminer/tests/test_helix_loop.py | 251 -- .../factorminer/tests/test_knowledge_graph.py | 166 -- .../factorminer/tests/test_library.py | 356 --- .../factorminer/tests/test_memory.py | 405 --- .../factorminer/tests/test_operators.py | 500 ---- .../factorminer/tests/test_provenance.py | 131 - .../factorminer/tests/test_ralph_loop.py | 1076 -------- .../factorminer/tests/test_regime.py | 118 - .../factorminer/tests/test_research.py | 237 -- .../tests/test_runtime_analysis.py | 196 -- .../factorminer/tests/test_significance.py | 174 -- src/factorminer/factorminer/utils/__init__.py | 26 - src/factorminer/factorminer/utils/config.py | 741 ------ src/factorminer/factorminer/utils/logging.py | 297 --- .../factorminer/utils/reporting.py | 499 ---- .../factorminer/utils/tearsheet.py | 399 --- .../factorminer/utils/visualization.py | 564 ----- 109 files changed, 363 insertions(+), 44605 deletions(-) create mode 100644 docs/plans/2026-04-07-factorminer-local-integration.md delete mode 100644 src/factorminer/factorminer/__init__.py delete mode 100644 src/factorminer/factorminer/agent/__init__.py delete mode 100644 src/factorminer/factorminer/agent/critic.py delete mode 100644 src/factorminer/factorminer/agent/debate.py delete mode 100644 src/factorminer/factorminer/agent/factor_generator.py delete mode 100644 src/factorminer/factorminer/agent/llm_interface.py delete mode 100644 src/factorminer/factorminer/agent/output_parser.py delete mode 100644 src/factorminer/factorminer/agent/prompt_builder.py delete mode 100644 src/factorminer/factorminer/agent/specialists.py delete mode 100644 src/factorminer/factorminer/benchmark/__init__.py delete mode 100644 src/factorminer/factorminer/benchmark/ablation.py delete mode 100644 src/factorminer/factorminer/benchmark/catalogs.py delete mode 100644 src/factorminer/factorminer/benchmark/helix_benchmark.py delete mode 100644 src/factorminer/factorminer/benchmark/runtime.py delete mode 100644 src/factorminer/factorminer/cli.py delete mode 100644 src/factorminer/factorminer/configs/__init__.py delete mode 100644 src/factorminer/factorminer/configs/benchmark_full.yaml delete mode 100644 src/factorminer/factorminer/configs/default.yaml delete mode 100644 src/factorminer/factorminer/configs/demo_local.yaml delete mode 100644 src/factorminer/factorminer/configs/helix_research.yaml delete mode 100644 src/factorminer/factorminer/configs/paper_repro.yaml delete mode 100644 src/factorminer/factorminer/core/__init__.py delete mode 100644 src/factorminer/factorminer/core/canonicalizer.py delete mode 100644 src/factorminer/factorminer/core/config.py delete mode 100644 src/factorminer/factorminer/core/expression_tree.py delete mode 100644 src/factorminer/factorminer/core/factor_library.py delete mode 100644 src/factorminer/factorminer/core/helix_loop.py delete mode 100644 src/factorminer/factorminer/core/library_io.py delete mode 100644 src/factorminer/factorminer/core/parser.py delete mode 100644 src/factorminer/factorminer/core/provenance.py delete mode 100644 src/factorminer/factorminer/core/ralph_loop.py delete mode 100644 src/factorminer/factorminer/core/session.py delete mode 100644 src/factorminer/factorminer/core/types.py delete mode 100644 src/factorminer/factorminer/data/__init__.py delete mode 100644 src/factorminer/factorminer/data/loader.py delete mode 100644 src/factorminer/factorminer/data/mock_data.py delete mode 100644 src/factorminer/factorminer/data/preprocessor.py delete mode 100644 src/factorminer/factorminer/data/tensor_builder.py delete mode 100644 src/factorminer/factorminer/evaluation/__init__.py delete mode 100644 src/factorminer/factorminer/evaluation/admission.py delete mode 100644 src/factorminer/factorminer/evaluation/backtest.py delete mode 100644 src/factorminer/factorminer/evaluation/capacity.py delete mode 100644 src/factorminer/factorminer/evaluation/causal.py delete mode 100644 src/factorminer/factorminer/evaluation/combination.py delete mode 100644 src/factorminer/factorminer/evaluation/correlation.py delete mode 100644 src/factorminer/factorminer/evaluation/metrics.py delete mode 100644 src/factorminer/factorminer/evaluation/pipeline.py delete mode 100644 src/factorminer/factorminer/evaluation/portfolio.py delete mode 100644 src/factorminer/factorminer/evaluation/regime.py delete mode 100644 src/factorminer/factorminer/evaluation/research.py delete mode 100644 src/factorminer/factorminer/evaluation/runtime.py delete mode 100644 src/factorminer/factorminer/evaluation/selection.py delete mode 100644 src/factorminer/factorminer/evaluation/significance.py delete mode 100644 src/factorminer/factorminer/evaluation/transaction_costs.py delete mode 100644 src/factorminer/factorminer/memory/__init__.py delete mode 100644 src/factorminer/factorminer/memory/embeddings.py delete mode 100644 src/factorminer/factorminer/memory/evolution.py delete mode 100644 src/factorminer/factorminer/memory/experience_memory.py delete mode 100644 src/factorminer/factorminer/memory/formation.py delete mode 100644 src/factorminer/factorminer/memory/kg_retrieval.py delete mode 100644 src/factorminer/factorminer/memory/knowledge_graph.py delete mode 100644 src/factorminer/factorminer/memory/memory_store.py delete mode 100644 src/factorminer/factorminer/memory/online_regime_memory.py delete mode 100644 src/factorminer/factorminer/memory/retrieval.py delete mode 100644 src/factorminer/factorminer/operators/__init__.py delete mode 100644 src/factorminer/factorminer/operators/arithmetic.py delete mode 100644 src/factorminer/factorminer/operators/auto_inventor.py delete mode 100644 src/factorminer/factorminer/operators/crosssectional.py delete mode 100644 src/factorminer/factorminer/operators/custom.py delete mode 100644 src/factorminer/factorminer/operators/gpu_backend.py delete mode 100644 src/factorminer/factorminer/operators/logical.py delete mode 100644 src/factorminer/factorminer/operators/neuro_symbolic.py delete mode 100644 src/factorminer/factorminer/operators/registry.py delete mode 100644 src/factorminer/factorminer/operators/regression.py delete mode 100644 src/factorminer/factorminer/operators/smoothing.py delete mode 100644 src/factorminer/factorminer/operators/statistical.py delete mode 100644 src/factorminer/factorminer/operators/timeseries.py delete mode 100644 src/factorminer/factorminer/tests/__init__.py delete mode 100644 src/factorminer/factorminer/tests/conftest.py delete mode 100644 src/factorminer/factorminer/tests/test_auto_inventor.py delete mode 100644 src/factorminer/factorminer/tests/test_benchmark.py delete mode 100644 src/factorminer/factorminer/tests/test_canonicalizer.py delete mode 100644 src/factorminer/factorminer/tests/test_capacity.py delete mode 100644 src/factorminer/factorminer/tests/test_causal.py delete mode 100644 src/factorminer/factorminer/tests/test_cli_analysis.py delete mode 100644 src/factorminer/factorminer/tests/test_cli_helix.py delete mode 100644 src/factorminer/factorminer/tests/test_combination.py delete mode 100644 src/factorminer/factorminer/tests/test_data.py delete mode 100644 src/factorminer/factorminer/tests/test_debate.py delete mode 100644 src/factorminer/factorminer/tests/test_evaluation.py delete mode 100644 src/factorminer/factorminer/tests/test_expression_tree.py delete mode 100644 src/factorminer/factorminer/tests/test_helix_loop.py delete mode 100644 src/factorminer/factorminer/tests/test_knowledge_graph.py delete mode 100644 src/factorminer/factorminer/tests/test_library.py delete mode 100644 src/factorminer/factorminer/tests/test_memory.py delete mode 100644 src/factorminer/factorminer/tests/test_operators.py delete mode 100644 src/factorminer/factorminer/tests/test_provenance.py delete mode 100644 src/factorminer/factorminer/tests/test_ralph_loop.py delete mode 100644 src/factorminer/factorminer/tests/test_regime.py delete mode 100644 src/factorminer/factorminer/tests/test_research.py delete mode 100644 src/factorminer/factorminer/tests/test_runtime_analysis.py delete mode 100644 src/factorminer/factorminer/tests/test_significance.py delete mode 100644 src/factorminer/factorminer/utils/__init__.py delete mode 100644 src/factorminer/factorminer/utils/config.py delete mode 100644 src/factorminer/factorminer/utils/logging.py delete mode 100644 src/factorminer/factorminer/utils/reporting.py delete mode 100644 src/factorminer/factorminer/utils/tearsheet.py delete mode 100644 src/factorminer/factorminer/utils/visualization.py diff --git a/docs/plans/2026-04-07-factorminer-local-integration.md b/docs/plans/2026-04-07-factorminer-local-integration.md new file mode 100644 index 0000000..574d1c2 --- /dev/null +++ b/docs/plans/2026-04-07-factorminer-local-integration.md @@ -0,0 +1,363 @@ +# FactorMiner 本地框架整合实施计划 + +> 目标:将 `src/factorminer` 完全整合进 ProStock 项目,数据加载、因子计算全部使用本地框架,仅在因子生成、落库、指标分析时保留 FactorMiner 代码。 + +--- + +## 代码风格与本地框架融合规范(全局约束) + +所有新增/修改代码必须遵循 ProStock 代码风格,严禁出现 FactorMiner 原生的松散风格或外部项目风格。 + +1. **命名规范** + - 函数/方法/变量:`snake_case` + - 类名:`PascalCase` + - 常量:`UPPER_CASE` + - 私有方法/属性:`_leading_underscore` + +2. **类型提示** + - 所有公共函数必须标注参数类型和返回类型 + - 可空类型使用 `Optional[X]` 或 `X | None`(Python 3.10+) + - 复杂类型从 `typing` 导入:`Dict`, `List`, `Callable`, `Tuple`, `Any` + +3. **文档字符串** + - **中文** Google 风格 + - 第一行为简短摘要 + - 必须包含 `Args:` 和 `Returns:` 段落 + +4. **导入顺序** + ```python + # 1. 标准库 + import os + from typing import Optional, Dict, List + + # 2. 第三方包 + import numpy as np + import polars as pl + + # 3. 本地模块(绝对导入) + from src.data.storage import Storage + from src.factors import FactorEngine + ``` + +5. **错误处理** + - 禁止裸 `except:` + - 错误信息格式:`print(f"[ERROR] 上下文: {e}")` + - 记录上下文后重新抛出 `raise` + +6. **日志与输出** + - 使用带前缀的 `print`:`print("[模块名] 消息")` + - 循环进度使用 `tqdm` + - **禁止 emoji** + +7. **数据加载** + - 查询模式必须使用 `Storage(read_only=True)` + - 因子计算统一通过 `FactorEngine` + +8. **测试** + - 所有新模块必须配套 `tests/test_*.py` + - 运行命令:`uv run pytest tests/test_xxx.py -v` + +--- + +## Step 0: 统一模块引用风格为 `src.*`(已完成) + +**状态:** [x] 已完成(通过脚本批量替换) + +- 所有 `from factorminer.xxx` 已替换为 `from src.factorminer.factorminer.xxx` +- 所有字符串形式的模块引用(如 `"factorminer.xxx"`)已同步更新 + +--- + +## Step 1: 本地数据加载层(`LocalDataLoader`) + +**文件** +- 新建:`src/factorminer/factorminer/data/local_data_loader.py` +- 测试:`tests/test_factorminer_local_data_loader.py` + +**目标** +- 弃用 `loader.py` + `preprocessor.py`,改为从本地 DuckDB `pro_bar` 表读取数据 +- 统一日期范围:`20190101` ~ `20231231` +- 支持股票池筛选(与 `experiment/common.py` 的 `stock_pool_filter` 对齐) +- 生成 `$vwap` 等价字段(`amount / vol`),并提供统一的 `asset_ids` / `timestamps` 索引 + +**实现要点** +- 使用 `Storage(read_only=True).load_polars("pro_bar", ...)` 读取数据 +- 日期格式统一为字符串 `YYYYMMDD` +- 股票池筛选通过注入的 `filter_func` 完成(默认使用 `experiment/common.py` 的筛选逻辑) +- 返回封装对象 `LocalPanel`,包含: + - `df: pl.DataFrame`(原始长表) + - `asset_ids: np.ndarray` + - `timestamps: np.ndarray` + +**代码风格检查点** +- 类名 `LocalDataLoader` / `LocalPanel` +- 所有公共方法带类型提示和中文 docstring +- 导入顺序正确 + +--- + +## Step 2: DSL 翻译器(`FmToLocalTranslator`) + +**文件** +- 新建:`src/factorminer/factorminer/core/formula_translator.py` +- 测试:`tests/test_factorminer_formula_translator.py` + +**目标** +- 将 FactorMiner 论文中的 110 个 CamelCase DSL 公式翻译成本地 snake_case DSL +- 覆盖全部算子,未覆盖的算子翻译结果前加 `# TODO` 标记 +- 翻译器**仅用于** paper factors 导入和向后兼容,不用于 LLM 生成路径 + +**映射规则示例** +| FactorMiner | 本地 DSL | +|-------------|----------| +| `Neg(X)` | `-X` | +| `Sub(A, B)` | `A - B` | +| `Div(A, B)` | `A / B` | +| `CsRank(X)` | `cs_rank(X)` | +| `TsMean(X, 20)` | `ts_mean(X, 20)` | +| `$close` | `close` | +| `$volume` | `vol` | +| `$amt` | `amount` | +| `$vwap` | `amount / vol` | + +**实现要点** +- 使用递归下降直接翻译 `ExpressionTree` 节点,不依赖字符串替换(避免括号歧义) +- `LeafNode` 处理字段映射;`OperatorNode` 处理算子映射 +- 对二元算术算子输出中缀表达式并合理加括号 +- 未实现的算子返回 `# TODO: <原始算子名>(...)` + +**代码风格检查点** +- 翻译器为一个纯函数类,无状态 +- 单元测试覆盖 paper factors 中的高频算子和至少 5 个完整公式 + +--- + +## Step 3: 禁用 npz 并将翻译器集成到库 I/O + +**文件** +- 修改:`src/factorminer/factorminer/core/library_io.py` +- 修改:`src/factorminer/factorminer/cli.py`(如有 `save_signals` 参数则改为始终 False) +- 测试:`tests/test_factorminer_library_io.py` + +**目标** +- 彻底禁止 `.npz` 信号缓存落盘 +- `load_library` 加载内置 110 个 paper factors 时,自动调用翻译器将其转换为本地的 snake_case DSL +- 如果翻译结果是 `# TODO`,则在 factor metadata 中标记 `unsupported=True` + +**修改要点** +- `save_library(..., save_signals)`:无论传入什么,均忽略 `save_signals`,且不写 `.npz` +- `load_library(path)`:恢复 JSON 后,将每个 `factor.formula` 通过翻译器转换 +- `import_from_paper()`:在构建 FactorLibrary 时直接翻译所有公式 + +**代码风格检查点** +- 修改点尽量少,废弃参数保留以兼容旧签名,但内部忽略 +- 打印日志说明 npz 已禁用:`print("[library_io] 信号缓存已禁用,仅保存 JSON 元数据")` + +--- + +## Step 4: LLM Prompt 改造(让 Agent 直接生成本地 DSL) + +**文件** +- 修改:`src/factorminer/factorminer/agent/prompt_builder.py` +- 修改:`src/factorminer/factorminer/agent/factor_generator.py`(如有必要) +- 测试:`tests/test_factorminer_prompt.py` + +**目标** +- 将 Prompt 中的 DSL 规范从 CamelCase + `$` 前缀改为本地 snake_case DSL +- 修改示例公式,使其全部为本地 DSL 格式(如 `cs_rank(close / ts_delay(close, 5) - 1)`) +- 明确可用字段:`open`, `high`, `low`, `close`, `vol`, `amount`, `vwap`(可用 `amount / vol` 计算) + +**修改要点** +- 重写 `SYSTEM_PROMPT` 中的 DSL 规则段落 +- 将所有 prompt 示例公式替换为本地 DSL +- `OutputParser` 中的公式清洗逻辑需同步适配(去掉 `$`,但保留中文描述) + +**代码风格检查点** +- Prompt 内容易读、无 emoji +- 通过单元测试验证 prompt 中生成本地 DSL 示例的正确性 + +--- + +## Step 5: `LocalFactorEvaluator`(FactorEngine 执行封装) + +**文件** +- 新建:`src/factorminer/factorminer/evaluation/local_engine.py` +- 测试:`tests/test_factorminer_local_engine.py` + +**目标** +- 封装 `FactorEngine`,提供与 FactorMiner `compute_tree_signals` 兼容的接口 +- 输入:候选因子 DSL 列表;输出:`(M, T)` numpy 信号矩阵字典 +- 支持批量计算 + 立即清理 engine 状态 + +**类签名设计** +```python +class LocalFactorEvaluator: + def __init__(self, data_loader: LocalDataLoader) -> None: + ... + + def evaluate( + self, + specs: List[Tuple[str, str]], + ) -> Dict[str, np.ndarray]: + """批量计算并返回 {name: (M, T) 矩阵}。""" + ... + + def evaluate_single( + self, + name: str, + formula: str, + ) -> np.ndarray: + """计算单个因子。""" + ... +``` + +**实现要点** +- `evaluate` 中一次性注册所有 specs,调用 `engine.compute(...)` +- 使用 `pivot_table` 将返回的 Polars 长表转换为 `(M, T)` numpy 矩阵 +- 缺失值填充 `np.nan` +- 计算结束后调用 `engine.clear()` + +**代码风格检查点** +- 严格的类型提示和中文 docstring +- 日志打印:`print("[local_engine] 开始批量计算 {n} 个因子...")` + +--- + +## Step 6: 替换计算管线(`pipeline.py` / `runtime.py`) + +**文件** +- 修改:`src/factorminer/factorminer/evaluation/pipeline.py` +- 修改:`src/factorminer/factorminer/evaluation/runtime.py` +- 测试:`tests/test_factorminer_pipeline_integration.py` + +**目标** +- 将 `compute_tree_signals(..., data_dict)` 替换为通过 `LocalFactorEvaluator` 计算 +- 保留原有 IC、stats、quintile 分析逻辑 + +**修改 `pipeline.py` 要点** +- `ValidationPipeline.__init__` 接收 `data_loader: LocalDataLoader` +- 构建内部 `LocalFactorEvaluator` +- `compute_tree_signals` 改为调用 `evaluator.evaluate_single(name, formula)` +- `evaluate` 方法中,一次性批量计算所有候选因子,再逐个进入 stats + +**修改 `runtime.py` 要点** +- `evaluate_factors` 中实例化 `LocalFactorEvaluator` +- 对每个 factor 调用 `evaluate_single`;若 formula 以 `# TODO` 开头,标记为 reject +- 保留 split-mask 和 stats 计算逻辑 + +**代码风格检查点** +- 修改点精确定位,不改变评估函数的返回数据结构 +- 兼容测试通过后再提交 + +--- + +## Step 7: 内存优化——库中因子按需重算 + +**文件** +- 修改:`src/factorminer/factorminer/core/factor_library.py` +- 测试:`tests/test_factorminer_library_memory.py` + +**目标** +- 库内因子对象不再长期持有 `(M, T)` numpy signals +- 相关性检查改为按需调用 `LocalFactorEvaluator` 重算 + +**修改要点** +- `admit()` 时不再保存 `signals` 到 `Factor` 对象 +- `compute_correlation` 签名改为接收 `evaluator: LocalFactorEvaluator` +- 内部遍历库中因子,临时调用 `evaluator.evaluate_single` 计算信号,再与候选信号求相关 +- 若 formula 为 `# TODO` 则跳过(返回 `0.0`) +- 删除 `_extend_correlation_matrix` / `_recompute_matrix_slot` 增量维护逻辑(改为动态求最大相关) + +**代码风格检查点** +- 废弃旧方法时保留空壳或私有方法,避免测试大面积报错 +- 中文注释说明为什么删除增量矩阵(本地引擎重算成本低,内存优先) + +--- + +## Step 8: 端到端集成测试(110 Paper Factors) + +**文件** +- 新建:`tests/test_factorminer_e2e.py` + +**目标** +- 验证翻译后的 110 个 paper factors 全部能在本地引擎上成功计算信号 +- 排除因未实现算子导致的 TODO 公式,统计成功率 + +**测试逻辑** +1. 调用 `import_from_paper()` 加载因子库 +2. 实例化 `LocalDataLoader` 读取 20200101 ~ 20201231 数据 +3. 实例化 `LocalFactorEvaluator` +4. 过滤掉 `unsupported=True` 的因子 +5. 批量计算剩余因子,断言输出形状为 `(M, T)` 且不含全 NaN +6. 打印统计:`print("[e2e] 成功 {x}/110,跳过 {y} 个未实现算子")` + +**代码风格检查点** +- 使用 `pytest.mark.slow` 标记(若运行时间 > 30 秒) +- 不依赖外部 API Key + +--- + +## Step 9: 清理所有 checkpoint 和 demo 中的 npz 保存逻辑 + +**文件** +- 修改:`src/factorminer/factorminer/core/ralph_loop.py` +- 修改:`src/factorminer/factorminer/core/helix_loop.py` +- 修改:`src/factorminer/run_demo.py` +- 修改:`src/factorminer/run_phase2_benchmark.py` +- 修改:`src/factorminer/factorminer/benchmark/*.py`(如有 `save_signals` 调用) + +**目标** +- 确保任何运行路径都不会意外触发 `.npz` 信号缓存落盘 +- 移除或注释掉所有 `library_io.save_library(..., save_signals=True)` 调用 + +**修改要点** +- 搜索 `save_signals=True` 和 `.npz` 关键字,逐一处理 +- 改为 `save_signals=False` 或直接调用不带该参数的 `save_library` + +--- + +## Step 10: 代码风格审查、测试全量回归与提交 + +**执行清单** +1. 运行 `uv run pytest tests/test_factorminer_* -v`,确保全部通过 +2. 运行 `uv run pytest tests/test_factor_engine.py tests/test_factor_integration.py -v`,确保本地框架未受影响 +3. 检查新增代码中是否混入 emoji +4. 检查新增代码的导入顺序和 docstring 完整性 +5. 提交前做一次 `git diff --stat`,确认没有误删或大规模重写无关文件 + +**提交建议** +- 按模块分几个 commit,而不是一个巨大的 commit +- 使用 Conventional Commits 风格(`feat:` / `refactor:` / `perf:` / `test:`) + +--- + +## 风险与 TODO + +| 风险 | 应对 | +|------|------| +| FactorMiner 某些算子本地框架没有实现 | 翻译时标记 `# TODO`,评估阶段 reject | +| `FactorEngine` 在极宽表(>1000 列)时内存激增 | 以 batch 为单位分批计算,并配合 `engine.clear()` | +| 本地 `pro_bar` 表数据不完整或缺少某些日期 | 在 `LocalDataLoader` 中加入 coverage check,缺失率过高时抛异常 | +| `OutputParser` 对本地 DSL 的括号/逗号解析不兼容 | 修改 `OutputParser` 的清洗正则,增加单元测试 | + +--- + +## 附:核心模块依赖关系 + +``` +┌────────────────────┐ +│ LocalDataLoader │ ← Storage(read_only=True) +└────────┬───────────┘ + │ + ▼ +┌────────────────────┐ +│ LocalFactorEvaluator│ ← FactorEngine (批量计算 -> pivot -> np.ndarray) +└────────┬───────────┘ + │ + ┌────┴────┐ + ▼ ▼ +pipeline.py runtime.py ← 保留 FactorMiner 的 stats / metrics / admission 逻辑 + │ + ▼ +factor_library.py ← 按需重算,不保存 signals +``` diff --git a/src/factorminer/factorminer/__init__.py b/src/factorminer/factorminer/__init__.py deleted file mode 100644 index 9673f47..0000000 --- a/src/factorminer/factorminer/__init__.py +++ /dev/null @@ -1,42 +0,0 @@ -"""FactorMiner: LLM-powered quantitative factor mining with evolutionary search.""" - -__version__ = "0.1.0" -__author__ = "FactorMiner Team" - -from src.factorminer.factorminer.utils.config import ( - Config, - MiningConfig, - EvaluationConfig, - DataConfig, - LLMConfig, - MemoryConfig, - Phase2Config, - CausalConfig, - RegimeConfig, - CapacityConfig, - SignificanceConfig, - DebateConfig, - AutoInventorConfig, - HelixConfig, - load_config, -) - -__all__ = [ - "__version__", - "Config", - "MiningConfig", - "EvaluationConfig", - "DataConfig", - "LLMConfig", - "MemoryConfig", - "load_config", - # Phase 2 configs - "Phase2Config", - "CausalConfig", - "RegimeConfig", - "CapacityConfig", - "SignificanceConfig", - "DebateConfig", - "AutoInventorConfig", - "HelixConfig", -] diff --git a/src/factorminer/factorminer/agent/__init__.py b/src/factorminer/factorminer/agent/__init__.py deleted file mode 100644 index 6427ea6..0000000 --- a/src/factorminer/factorminer/agent/__init__.py +++ /dev/null @@ -1,78 +0,0 @@ -"""LLM agent integration for factor generation.""" - -from src.factorminer.factorminer.agent.factor_generator import FactorGenerator -from src.factorminer.factorminer.agent.llm_interface import ( - AnthropicProvider, - GoogleProvider, - LLMProvider, - MockProvider, - OpenAIProvider, - create_provider, -) -from src.factorminer.factorminer.agent.output_parser import CandidateFactor, parse_llm_output -from src.factorminer.factorminer.agent.prompt_builder import ( - PromptBuilder, - build_critic_scoring_prompt, - build_debate_synthesis_prompt, - build_specialist_prompt, -) -from src.factorminer.factorminer.agent.specialists import ( - DEFAULT_SPECIALISTS, - LIQUIDITY_SPECIALIST, - MOMENTUM_SPECIALIST, - REGIME_SPECIALIST, - SPECIALIST_CONFIGS, - VOLATILITY_SPECIALIST, - SpecialistAgent, - SpecialistConfig, - SpecialistDomainMemory, - SpecialistPromptBuilder, -) -from src.factorminer.factorminer.agent.critic import CriticAgent, CriticScore -from src.factorminer.factorminer.agent.debate import ( - DebateConfig, - DebateGenerator, - DebateMemory, - DebateOrchestrator, - DebateResult, -) - -__all__ = [ - # Generator - "FactorGenerator", - # LLM providers - "LLMProvider", - "OpenAIProvider", - "AnthropicProvider", - "GoogleProvider", - "MockProvider", - "create_provider", - # Parsing - "CandidateFactor", - "parse_llm_output", - # Prompt - "PromptBuilder", - "build_specialist_prompt", - "build_critic_scoring_prompt", - "build_debate_synthesis_prompt", - # Specialists - "SpecialistConfig", - "SpecialistAgent", - "SpecialistDomainMemory", - "SpecialistPromptBuilder", - "MOMENTUM_SPECIALIST", - "VOLATILITY_SPECIALIST", - "LIQUIDITY_SPECIALIST", - "REGIME_SPECIALIST", - "DEFAULT_SPECIALISTS", - "SPECIALIST_CONFIGS", - # Critic - "CriticAgent", - "CriticScore", - # Debate - "DebateGenerator", - "DebateConfig", - "DebateOrchestrator", - "DebateResult", - "DebateMemory", -] diff --git a/src/factorminer/factorminer/agent/critic.py b/src/factorminer/factorminer/agent/critic.py deleted file mode 100644 index c991f44..0000000 --- a/src/factorminer/factorminer/agent/critic.py +++ /dev/null @@ -1,837 +0,0 @@ -"""Critic agent that multi-dimensionally scores candidate factors. - -The ``CriticAgent`` pre-filters proposals from specialist agents along six -dimensions before any expensive backtesting occurs. Only the top-scoring -fraction proceeds to IC evaluation, dramatically reducing wasted compute. - -Scoring pipeline: -1. Structural heuristics (complexity, operator diversity) -- O(1) per factor. -2. Novelty scoring via string-level edit-distance and token overlap -- O(n). -3. Pattern alignment against success memory -- keyword matching -- O(n). -4. LLM scoring of top candidates for economic intuition -- one API call. -5. Composite score computation and ranking. -""" - -from __future__ import annotations - -import json -import logging -import math -import re -from collections import Counter -from dataclasses import dataclass, field -from typing import Any, Dict, List, Optional, Set, Tuple - -from src.factorminer.factorminer.agent.llm_interface import LLMProvider -from src.factorminer.factorminer.agent.output_parser import CandidateFactor -from src.factorminer.factorminer.agent.prompt_builder import normalize_factor_references - -logger = logging.getLogger(__name__) - - -# --------------------------------------------------------------------------- -# CriticScore dataclass -- multi-dimensional -# --------------------------------------------------------------------------- - -@dataclass -class CriticScore: - """Multi-dimensional scored review of a single candidate factor. - - Attributes - ---------- - factor_name : str - Name of the candidate factor being reviewed. - formula : str - DSL formula string of the candidate. - source_specialist : str - Name of the specialist that proposed this candidate. - scores : dict - Per-dimension scores, each in [0, 1]: - - ``novelty``: structural distinctiveness from existing library. - - ``economic_intuition``: economic meaningfulness (LLM-assessed). - - ``complexity_penalty``: complexity fitness (1 = optimal depth/ops). - - ``operator_diversity``: uses diverse operator categories. - - ``pattern_alignment``: aligns with known success patterns in memory. - - ``regime_appropriateness``: appropriate for current market regime. - composite_score : float - Weighted average of dimension scores. - keep : bool - Whether this factor should proceed to expensive IC evaluation. - critique : str - Natural-language explanation from the critic. - """ - - factor_name: str - formula: str - source_specialist: str - scores: Dict[str, float] = field(default_factory=lambda: { - "novelty": 0.5, - "economic_intuition": 0.5, - "complexity_penalty": 0.5, - "operator_diversity": 0.5, - "pattern_alignment": 0.5, - "regime_appropriateness": 0.5, - }) - composite_score: float = 0.5 - keep: bool = True - critique: str = "" - - # --- Backward-compatible convenience properties --- - - @property - def novelty_score(self) -> float: - return self.scores.get("novelty", 0.5) - - @property - def quality_score(self) -> float: - return self.scores.get("economic_intuition", 0.5) - - @property - def diversity_bonus(self) -> float: - return self.scores.get("operator_diversity", 0.5) - - @property - def critic_rationale(self) -> str: - return self.critique - - @property - def final_score(self) -> float: - return self.composite_score - - -# --------------------------------------------------------------------------- -# Scoring weights -# --------------------------------------------------------------------------- - -_SCORE_WEIGHTS: Dict[str, float] = { - "novelty": 0.25, - "economic_intuition": 0.30, - "complexity_penalty": 0.15, - "operator_diversity": 0.10, - "pattern_alignment": 0.10, - "regime_appropriateness": 0.10, -} - -# Pre-filter: keep this fraction by composite score before expensive eval -_PREFILTER_FRACTION = 0.60 - -# LLM scoring: only send this many top candidates to the LLM for economic -# intuition scoring (reduces API cost while covering the promising ones) -_LLM_SCORING_TOP_K = 40 - - -# --------------------------------------------------------------------------- -# Formula-level feature extraction helpers -# --------------------------------------------------------------------------- - -# Operator categories for diversity measurement -_OP_CATEGORIES: Dict[str, str] = { - "Add": "arithmetic", "Sub": "arithmetic", "Mul": "arithmetic", - "Div": "arithmetic", "Neg": "arithmetic", "Abs": "arithmetic", - "Square": "arithmetic", "Sqrt": "arithmetic", "Log": "arithmetic", - "Pow": "arithmetic", "Sign": "arithmetic", - "Std": "statistical", "Var": "statistical", "Mean": "statistical", - "Sum": "statistical", "Skew": "statistical", "Kurt": "statistical", - "Median": "statistical", "Quantile": "statistical", "Max": "statistical", - "Min": "statistical", - "Delta": "timeseries", "Delay": "timeseries", "TsRank": "timeseries", - "TsMax": "timeseries", "TsMin": "timeseries", "TsArgMax": "timeseries", - "TsArgMin": "timeseries", "TsLinRegSlope": "timeseries", - "Return": "timeseries", "LogReturn": "timeseries", "CumSum": "timeseries", - "EMA": "smoothing", "SMA": "smoothing", "WMA": "smoothing", - "HMA": "smoothing", "DEMA": "smoothing", "KAMA": "smoothing", - "Decay": "smoothing", - "CsRank": "cross_sectional", "CsZScore": "cross_sectional", - "CsDemean": "cross_sectional", "CsScale": "cross_sectional", - "CsNeutralize": "cross_sectional", "CsQuantile": "cross_sectional", - "Corr": "regression", "Cov": "regression", "Beta": "regression", - "Resi": "regression", "Rsquare": "regression", "Resid": "regression", - "IfElse": "logical", "Greater": "logical", "Less": "logical", - "GreaterEqual": "logical", "LessEqual": "logical", "Equal": "logical", -} - -_OPERATOR_PATTERN = re.compile(r"([A-Z][a-zA-Z0-9]*)\s*\(") - - -def _extract_operators(formula: str) -> List[str]: - """Extract all operator names from a formula string.""" - return _OPERATOR_PATTERN.findall(formula) - - -def _formula_depth(formula: str) -> int: - """Estimate nesting depth by counting maximum parenthesis depth.""" - max_depth = 0 - depth = 0 - for ch in formula: - if ch == "(": - depth += 1 - max_depth = max(max_depth, depth) - elif ch == ")": - depth -= 1 - return max_depth - - -def _tokenize_formula(formula: str) -> Set[str]: - """Tokenize a formula into its operator and feature tokens.""" - tokens: Set[str] = set() - tokens.update(_OPERATOR_PATTERN.findall(formula)) - for feat in re.findall(r"\$[a-z]+", formula): - tokens.add(feat) - return tokens - - -def _edit_distance_normalized(a: str, b: str, max_len: int = 200) -> float: - """Compute normalized edit distance between two formula strings. - - Returns 0.0 for identical strings, 1.0 for completely different. - """ - a = a[:max_len] - b = b[:max_len] - if a == b: - return 0.0 - la, lb = len(a), len(b) - prev = list(range(lb + 1)) - for i, ca in enumerate(a): - curr = [i + 1] - for j, cb in enumerate(b): - cost = 0 if ca == cb else 1 - curr.append(min(curr[j] + 1, prev[j + 1] + 1, prev[j] + cost)) - prev = curr - return prev[lb] / max(la, lb) - - -def _token_idf_similarity(formula: str, existing: List[str]) -> float: - """Compute TF-IDF-inspired token overlap similarity. - - Returns a value in [0, 1] where 1 means very similar to existing, - 0 means completely novel. - """ - if not existing: - return 0.0 - - query_tokens = _tokenize_formula(formula) - if not query_tokens: - return 0.0 - - df: Counter = Counter() - for ex in existing: - for tok in _tokenize_formula(ex): - df[tok] += 1 - - n_docs = len(existing) - score = 0.0 - for tok in query_tokens: - if tok in df: - idf = math.log(n_docs / df[tok]) if df[tok] < n_docs else 0.0 - score += (1.0 + idf) - - max_score = sum(1.0 for _ in query_tokens) - if max_score == 0: - return 0.0 - return min(1.0, score / (max_score * math.log(n_docs + 1) + 1.0)) - - -# --------------------------------------------------------------------------- -# CriticAgent -# --------------------------------------------------------------------------- - -class CriticAgent: - """LLM-powered multi-dimensional critic for candidate factor pre-filtering. - - Evaluates candidates along 6 dimensions before any expensive IC evaluation. - Uses structural heuristics for fast pre-scoring, then sends top-K to the - LLM for economic intuition scoring. Only the top fraction by composite - score is marked as ``keep=True`` for downstream evaluation. - - Parameters - ---------- - llm_provider : LLMProvider - LLM backend for economic intuition scoring. - temperature : float - Sampling temperature for the critic's LLM calls. - max_tokens : int - Max response tokens for the critic review. - prefilter_fraction : float - Fraction of candidates to keep after scoring (0.0-1.0). - llm_scoring_top_k : int - How many top candidates (by heuristic score) to send to LLM - for economic intuition scoring. - """ - - _SYSTEM_PROMPT = ( - "You are a rigorous quantitative research critic specialising in " - "formulaic alpha factors. Your job is to evaluate candidate factor " - "expressions on their economic intuition: does the factor make sense " - "as a predictor of cross-sectional stock returns? Be rigorous, " - "concise, and return structured JSON only." - ) - - def __init__( - self, - llm_provider: LLMProvider, - temperature: float = 0.3, - max_tokens: int = 4096, - prefilter_fraction: float = _PREFILTER_FRACTION, - llm_scoring_top_k: int = _LLM_SCORING_TOP_K, - ) -> None: - self.llm_provider = llm_provider - self.temperature = temperature - self.max_tokens = max_tokens - self.prefilter_fraction = prefilter_fraction - self.llm_scoring_top_k = llm_scoring_top_k - - # ------------------------------------------------------------------ - # Primary public API - # ------------------------------------------------------------------ - - def score_batch( - self, - candidates: List[str], - existing_factors: Optional[List[str]] = None, - memory_signal: Optional[str] = None, - regime_context: str = "", - specialist_map: Optional[Dict[str, str]] = None, - ) -> List[CriticScore]: - """Score a flat list of candidate formula strings. - - Parameters - ---------- - candidates : list[str] - Formula strings to evaluate. - existing_factors : list[str] or None - Formulas already in the library (for novelty scoring). - memory_signal : str or None - Free-text memory context (success patterns, etc.). - regime_context : str - Current market regime description. - specialist_map : dict or None - Mapping formula -> specialist name for attribution. - - Returns - ------- - list[CriticScore] - Scores in the same order as ``candidates``. - """ - existing_factors = existing_factors or [] - specialist_map = specialist_map or {} - - from factorminer.agent.output_parser import _try_build_candidate - cf_list: List[CandidateFactor] = [] - for i, formula in enumerate(candidates): - cf = _try_build_candidate(f"candidate_{i}", formula) - cf_list.append(cf) - - proposals: Dict[str, List[CandidateFactor]] = {} - for cf in cf_list: - src = specialist_map.get(cf.formula, "unknown") - proposals.setdefault(src, []).append(cf) - - return self._score_proposals( - proposals=proposals, - existing_factors=existing_factors, - memory_signal=memory_signal or "", - regime_context=regime_context, - ) - - def review_candidates( - self, - proposals: Dict[str, List[CandidateFactor]], - library_state: Optional[Dict[str, Any]] = None, - memory_signal: Optional[Dict[str, Any]] = None, - top_k: int = 40, - ) -> List[CriticScore]: - """Review all specialist proposals and return ranked scores. - - This is the primary interface used by ``DebateGenerator``. - - Parameters - ---------- - proposals : dict[str, list[CandidateFactor]] - Mapping from specialist name to its list of candidates. - library_state : dict or None - Current factor library state for context. - memory_signal : dict or None - Memory priors for context. - top_k : int - Number of top-scoring candidates to return. - - Returns - ------- - list[CriticScore] - Top-K candidates sorted by ``composite_score`` descending. - """ - library_state = library_state or {} - memory_signal = memory_signal or {} - - existing_factors: List[str] = normalize_factor_references( - library_state.get("recent_admissions", []) - ) - mem_str = self._memory_signal_to_str(memory_signal) - regime_context = memory_signal.get("regime_context", "") - - scores = self._score_proposals( - proposals=proposals, - existing_factors=existing_factors, - memory_signal=mem_str, - regime_context=str(regime_context), - ) - - scores.sort(key=lambda s: s.composite_score, reverse=True) - return scores[:top_k] - - # ------------------------------------------------------------------ - # Internal scoring pipeline - # ------------------------------------------------------------------ - - def _score_proposals( - self, - proposals: Dict[str, List[CandidateFactor]], - existing_factors: List[str], - memory_signal: str, - regime_context: str, - ) -> List[CriticScore]: - """Full multi-dimensional scoring pipeline.""" - all_pairs: List[Tuple[str, CandidateFactor]] = [] - for spec_name, candidates in proposals.items(): - for c in candidates: - all_pairs.append((spec_name, c)) - - if not all_pairs: - return [] - - # Phase 1: Heuristic scoring - partial_scores: List[CriticScore] = [] - for spec_name, candidate in all_pairs: - scores_dict = self._heuristic_score( - formula=candidate.formula, - existing_factors=existing_factors, - memory_signal=memory_signal, - regime_context=regime_context, - ) - scores_dict["economic_intuition"] = 0.5 # LLM will fill in - - composite = self._compute_composite(scores_dict) - critique = self._brief_heuristic_critique(scores_dict, candidate.formula) - - partial_scores.append(CriticScore( - factor_name=candidate.name, - formula=candidate.formula, - source_specialist=spec_name, - scores=scores_dict, - composite_score=composite, - keep=True, - critique=critique, - )) - - # Phase 2: LLM economic intuition for top candidates - partial_scores.sort(key=lambda s: s.composite_score, reverse=True) - top_for_llm = partial_scores[:self.llm_scoring_top_k] - - llm_econ_scores = self._llm_economic_intuition( - candidates=top_for_llm, - existing_factors=existing_factors, - memory_signal=memory_signal, - ) - - # Phase 3: Recompute composite with LLM scores - for score_obj in partial_scores: - if score_obj.factor_name in llm_econ_scores: - econ, rationale = llm_econ_scores[score_obj.factor_name] - score_obj.scores["economic_intuition"] = econ - score_obj.composite_score = self._compute_composite(score_obj.scores) - if rationale: - score_obj.critique = rationale - - # Phase 4: Diversity-aware re-ranking - partial_scores.sort(key=lambda s: s.composite_score, reverse=True) - partial_scores = self._apply_diversity_adjustment(partial_scores) - - # Phase 5: Pre-filter -- mark keep/discard - n_keep = max(1, int(len(partial_scores) * self.prefilter_fraction)) - for i, score_obj in enumerate(partial_scores): - score_obj.keep = i < n_keep - - return partial_scores - - def _heuristic_score( - self, - formula: str, - existing_factors: List[str], - memory_signal: str, - regime_context: str, - ) -> Dict[str, float]: - """Compute heuristic dimension scores without LLM call.""" - operators = _extract_operators(formula) - depth = _formula_depth(formula) - unique_ops = list(dict.fromkeys(operators)) - n_unique = len(unique_ops) - - novelty = self._score_novelty(formula, existing_factors) - complexity = self._score_complexity(depth, n_unique) - op_diversity = self._score_operator_diversity(unique_ops) - pattern_align = self._score_pattern_alignment(formula, memory_signal) - regime_score = self._score_regime_appropriateness(formula, regime_context) - - return { - "novelty": novelty, - "economic_intuition": 0.5, - "complexity_penalty": complexity, - "operator_diversity": op_diversity, - "pattern_alignment": pattern_align, - "regime_appropriateness": regime_score, - } - - def _score_novelty(self, formula: str, existing_factors: List[str]) -> float: - """Novelty: 1.0 = completely novel, 0.0 = exact duplicate.""" - if not existing_factors: - return 0.8 - - token_sim = _token_idf_similarity(formula, existing_factors) - sample = existing_factors[-20:] - edit_dists = [_edit_distance_normalized(formula, ex) for ex in sample] - avg_edit = sum(edit_dists) / len(edit_dists) if edit_dists else 1.0 - - novelty = 0.5 * (1.0 - token_sim) + 0.5 * avg_edit - return float(max(0.0, min(1.0, novelty))) - - def _score_complexity(self, depth: int, n_unique_ops: int) -> float: - """Complexity fitness: 1.0 = optimal (depth 3-7, 3-5 unique ops).""" - if 3 <= depth <= 7: - depth_score = 1.0 - elif depth < 3: - depth_score = depth / 3.0 - else: - depth_score = max(0.0, 1.0 - 0.15 * (depth - 7)) - - if 3 <= n_unique_ops <= 5: - op_score = 1.0 - elif n_unique_ops < 3: - op_score = n_unique_ops / 3.0 - else: - op_score = max(0.0, 1.0 - 0.1 * (n_unique_ops - 5)) - - return float(0.6 * depth_score + 0.4 * op_score) - - def _score_operator_diversity(self, unique_ops: List[str]) -> float: - """Operator diversity: how many distinct operator categories appear?""" - categories = {_OP_CATEGORIES.get(op, "other") for op in unique_ops} - n_categories = len(categories) - diversity_map = {0: 0.0, 1: 0.2, 2: 0.5, 3: 0.8} - return float(diversity_map.get(n_categories, 1.0)) - - def _score_pattern_alignment(self, formula: str, memory_signal: str) -> float: - """Pattern alignment: do formula tokens appear in known success patterns?""" - if not memory_signal: - return 0.5 - - formula_lower = formula.lower() - signal_lower = memory_signal.lower() - - formula_tokens = set(re.findall(r"[a-z]+", formula_lower)) - signal_tokens = set(re.findall(r"[a-z]+", signal_lower)) - - stopwords = {"the", "a", "is", "in", "of", "to", "and", "or", "for", - "as", "by", "on", "it", "be", "at", "an", "up"} - formula_tokens -= stopwords - signal_tokens -= stopwords - - if not formula_tokens: - return 0.5 - - overlap = formula_tokens & signal_tokens - alignment = len(overlap) / len(formula_tokens) - return float(0.3 + 0.7 * min(1.0, alignment * 2)) - - def _score_regime_appropriateness( - self, formula: str, regime_context: str - ) -> float: - """Does this formula suit the stated regime context?""" - if not regime_context: - return 0.7 - - regime_lower = regime_context.lower() - - momentum_kw = {"momentum", "trend", "trending", "breakout"} - volatility_kw = {"volatile", "volatility", "risk-off", "vix"} - reversal_kw = {"reversal", "mean-reversion", "oversold", "overbought"} - liquidity_kw = {"illiquid", "liquidity", "volume"} - - regime_is_momentum = any(k in regime_lower for k in momentum_kw) - regime_is_volatile = any(k in regime_lower for k in volatility_kw) - regime_is_reversal = any(k in regime_lower for k in reversal_kw) - regime_is_illiquid = any(k in regime_lower for k in liquidity_kw) - - formula_has_momentum = any( - op in formula for op in ("Delta", "TsRank", "EMA", "Return", "TsLinReg") - ) - formula_has_vol = any(op in formula for op in ("Std", "Kurt", "Skew", "Var")) - formula_has_reversal = "Neg" in formula or any( - op in formula for op in ("Mean", "SMA", "TsRank") - ) - formula_has_volume = any(f in formula for f in ("$volume", "$amt")) - - matches = 0 - total_signals = 0 - if regime_is_momentum: - total_signals += 1 - matches += int(formula_has_momentum) - if regime_is_volatile: - total_signals += 1 - matches += int(formula_has_vol) - if regime_is_reversal: - total_signals += 1 - matches += int(formula_has_reversal) - if regime_is_illiquid: - total_signals += 1 - matches += int(formula_has_volume) - - if total_signals == 0: - return 0.7 - return float(0.4 + 0.6 * (matches / total_signals)) - - @staticmethod - def _compute_composite(scores: Dict[str, float]) -> float: - """Compute weighted composite score from dimension scores.""" - total = 0.0 - weight_sum = 0.0 - for dim, weight in _SCORE_WEIGHTS.items(): - val = scores.get(dim, 0.5) - total += weight * val - weight_sum += weight - if weight_sum == 0: - return 0.5 - return float(total / weight_sum) - - def _brief_heuristic_critique( - self, scores: Dict[str, float], formula: str - ) -> str: - """Generate a brief human-readable critique from heuristic scores.""" - parts = [] - depth = _formula_depth(formula) - ops = _extract_operators(formula) - n_unique = len(set(ops)) - - novelty = scores.get("novelty", 0.5) - if novelty < 0.3: - parts.append("closely resembles existing library factors") - elif novelty > 0.7: - parts.append("structurally novel") - - complexity = scores.get("complexity_penalty", 0.5) - if complexity < 0.4: - if depth < 3: - parts.append(f"too shallow (depth={depth})") - elif depth > 8: - parts.append(f"overly deep (depth={depth})") - if n_unique < 3: - parts.append(f"low operator diversity ({n_unique} unique ops)") - - op_div = scores.get("operator_diversity", 0.5) - if op_div >= 0.8: - cats = {_OP_CATEGORIES.get(op, "other") for op in set(ops)} - parts.append(f"good operator variety ({', '.join(sorted(cats))})") - - if not parts: - parts.append("passes heuristic checks") - - return "; ".join(parts) + "." - - # ------------------------------------------------------------------ - # LLM economic intuition scoring - # ------------------------------------------------------------------ - - def _llm_economic_intuition( - self, - candidates: List[CriticScore], - existing_factors: List[str], - memory_signal: str, - ) -> Dict[str, Tuple[float, str]]: - """Send top candidates to LLM for economic intuition scoring.""" - if not candidates: - return {} - - prompt = self._build_llm_scoring_prompt( - candidates=candidates, - existing_factors=existing_factors, - memory_signal=memory_signal, - ) - - try: - raw = self.llm_provider.generate( - system_prompt=self._SYSTEM_PROMPT, - user_prompt=prompt, - temperature=self.temperature, - max_tokens=self.max_tokens, - ) - return self._parse_llm_scoring_response(raw, candidates) - except Exception as exc: - logger.warning( - "Critic LLM economic intuition scoring failed: %s. " - "Keeping heuristic scores.", - exc, - ) - return {} - - def _build_llm_scoring_prompt( - self, - candidates: List[CriticScore], - existing_factors: List[str], - memory_signal: str, - ) -> str: - """Build the structured scoring prompt for LLM economic intuition.""" - sections: List[str] = [] - - if existing_factors: - sections.append("## EXISTING LIBRARY SAMPLE (last 10)") - for f in existing_factors[-10:]: - sections.append(f" - {f}") - - if memory_signal: - sections.append(f"\n## MEMORY CONTEXT\n{memory_signal[:800]}") - - sections.append("\n## CANDIDATES FOR ECONOMIC INTUITION SCORING") - sections.append( - "Score each on economic_intuition (0.0-1.0): does this formula " - "capture a plausible, economically meaningful cross-sectional " - "return predictor? Consider:\n" - " - Is there a coherent economic story?\n" - " - Is the complexity level appropriate (depth 3-7 is best)?\n" - " - Does it avoid trivial reformulations of simple momentum/reversal?\n" - " - Does it use features in a semantically coherent way?\n" - ) - - for cs in candidates: - sections.append( - f" Factor: {cs.factor_name} " - f"[Specialist: {cs.source_specialist}]\n" - f" Formula: {cs.formula}" - ) - - sections.append( - "\n## OUTPUT FORMAT\n" - "Return one JSON object per line, exactly:\n" - '{"name": "", "economic_intuition": <0.0-1.0>, ' - '"rationale": ""}\n' - "Output ONLY the JSON lines. No markdown, no explanations." - ) - - return "\n".join(sections) - - def _parse_llm_scoring_response( - self, - raw: str, - candidates: List[CriticScore], - ) -> Dict[str, Tuple[float, str]]: - """Parse LLM scoring response into economic intuition scores.""" - valid_names = {cs.factor_name for cs in candidates} - results: Dict[str, Tuple[float, str]] = {} - json_pattern = re.compile(r"\{[^{}]+\}") - - for match in json_pattern.findall(raw): - try: - obj = json.loads(match) - except json.JSONDecodeError: - continue - name = obj.get("name", "") - if name not in valid_names: - continue - econ = float(max(0.0, min(1.0, obj.get("economic_intuition", 0.5)))) - rationale = str(obj.get("rationale", "")) - results[name] = (econ, rationale) - - logger.debug( - "LLM economic intuition: scored %d/%d candidates", - len(results), - len(candidates), - ) - return results - - # ------------------------------------------------------------------ - # Diversity adjustment - # ------------------------------------------------------------------ - - def _apply_diversity_adjustment( - self, scores: List[CriticScore] - ) -> List[CriticScore]: - """Slightly boost underrepresented specialists to maintain balance.""" - if not scores: - return scores - - specialist_counts: Counter = Counter() - n_specialists = len({s.source_specialist for s in scores}) - ideal_frac = 1.0 / max(n_specialists, 1) - - adjusted = [] - for cs in scores: - specialist_counts[cs.source_specialist] += 1 - total_so_far = sum(specialist_counts.values()) - actual_frac = specialist_counts[cs.source_specialist] / total_so_far - diversity_adj = (ideal_frac - actual_frac) * 0.1 - diversity_adj = max(-0.05, min(0.05, diversity_adj)) - adjusted_score = float( - max(0.0, min(1.0, cs.composite_score + diversity_adj)) - ) - new_scores = dict(cs.scores) - new_scores["operator_diversity"] = float( - max(0.0, min(1.0, - cs.scores.get("operator_diversity", 0.5) + diversity_adj - )) - ) - adjusted.append(CriticScore( - factor_name=cs.factor_name, - formula=cs.formula, - source_specialist=cs.source_specialist, - scores=new_scores, - composite_score=adjusted_score, - keep=cs.keep, - critique=cs.critique, - )) - - adjusted.sort(key=lambda s: s.composite_score, reverse=True) - return adjusted - - # ------------------------------------------------------------------ - # Utility helpers - # ------------------------------------------------------------------ - - @staticmethod - def _memory_signal_to_str(memory_signal: Dict[str, Any]) -> str: - """Flatten a memory signal dict to a compact string for embedding.""" - parts: List[str] = [] - for key in ( - "recommended_directions", "strategic_insights", - "complementary_patterns", "prompt_text", - ): - val = memory_signal.get(key) - if isinstance(val, list): - parts.extend(str(v) for v in val) - elif isinstance(val, str) and val: - parts.append(val) - return " ".join(parts) - - @staticmethod - def _fallback_uniform_scores( - proposals: Dict[str, List[CandidateFactor]], - ) -> List[CriticScore]: - """Generate uniform scores when all scoring mechanisms fail.""" - default_composite = 0.5 - scores: List[CriticScore] = [] - for specialist_name, candidates in proposals.items(): - for c in candidates: - scores.append(CriticScore( - factor_name=c.name, - formula=c.formula, - source_specialist=specialist_name, - scores={ - "novelty": 0.5, - "economic_intuition": 0.5, - "complexity_penalty": 0.5, - "operator_diversity": 0.5, - "pattern_alignment": 0.5, - "regime_appropriateness": 0.5, - }, - composite_score=default_composite, - keep=True, - critique="Fallback uniform score (critic unavailable).", - )) - return scores diff --git a/src/factorminer/factorminer/agent/debate.py b/src/factorminer/factorminer/agent/debate.py deleted file mode 100644 index 9fb4d68..0000000 --- a/src/factorminer/factorminer/agent/debate.py +++ /dev/null @@ -1,949 +0,0 @@ -"""Multi-agent debate orchestrator for factor generation (FactorMAD). - -``DebateGenerator`` is a **drop-in replacement** for ``FactorGenerator``. -It runs multiple domain-specialist generators, collects their proposals, -passes them through a multi-dimensional ``CriticAgent`` for pre-filtering, -and returns a single ``List[CandidateFactor]`` with the same interface as -``FactorGenerator.generate_batch()``. - -The full pipeline (``DebateOrchestrator``) also supports: -- SymPy-based algebraic deduplication via ``FormulaCanonicalizer``. -- ``DebateMemory`` tracking: specialist leaderboards, blind spot detection. -- Parallel specialist generation (thread-pool). -- Structured ``DebateResult`` dataclass capturing the full debate state. -""" - -from __future__ import annotations - -import concurrent.futures -import logging -from collections import Counter -from dataclasses import dataclass, field -from typing import Any, Dict, List, Optional - -from src.factorminer.factorminer.agent.critic import CriticAgent, CriticScore -from src.factorminer.factorminer.agent.factor_generator import FactorGenerator -from src.factorminer.factorminer.agent.llm_interface import LLMProvider -from src.factorminer.factorminer.agent.output_parser import CandidateFactor -from src.factorminer.factorminer.agent.prompt_builder import ( - PromptBuilder, - normalize_factor_references, -) -from src.factorminer.factorminer.agent.specialists import ( - DEFAULT_SPECIALISTS, - SpecialistAgent, - SpecialistConfig, - SpecialistPromptBuilder, -) - -logger = logging.getLogger(__name__) - - -# --------------------------------------------------------------------------- -# DebateConfig -# --------------------------------------------------------------------------- - -@dataclass -class DebateConfig: - """Configuration for the multi-agent FactorMAD pipeline. - - Attributes - ---------- - specialists : list[SpecialistConfig] - Specialist configurations to run. Defaults to the four - pre-defined specialists (momentum, volatility, liquidity, regime). - enable_critic : bool - Whether to run the CriticAgent for multi-dimensional scoring. - candidates_per_specialist : int - Number of candidates each specialist generates per round. - top_k_after_critic : int - How many candidates the critic retains after ranking. - critic_temperature : float - Sampling temperature for the critic LLM call. - enable_deduplication : bool - Whether to use SymPy canonicalization to remove algebraic duplicates. - enable_debate_memory : bool - Whether to track debate history across rounds for specialist feedback. - parallel_specialists : bool - Whether to run specialists in parallel (thread pool). - max_parallel_workers : int - Maximum number of parallel threads for specialist generation. - """ - - specialists: List[SpecialistConfig] = field( - default_factory=lambda: list(DEFAULT_SPECIALISTS) - ) - enable_critic: bool = True - candidates_per_specialist: int = 15 - top_k_after_critic: int = 40 - critic_temperature: float = 0.3 - enable_deduplication: bool = True - enable_debate_memory: bool = True - parallel_specialists: bool = True - max_parallel_workers: int = 4 - - -# --------------------------------------------------------------------------- -# DebateResult -# --------------------------------------------------------------------------- - -@dataclass -class DebateResult: - """Full structured result from one debate round. - - Attributes - ---------- - all_proposals : list[str] - Raw formula strings from all specialists. - after_dedup : list[str] - Formulas after SymPy algebraic deduplication. - after_critic : list[str] - Formulas that passed the critic pre-filter (``keep=True``). - critic_scores : list[CriticScore] - Full multi-dimensional scores for all candidates. - specialist_proposals : dict[str, list[str]] - Per-specialist formula strings before any filtering. - specialist_success_rates : dict[str, float] - Historical admission success rates per specialist. - debate_stats : dict - Summary statistics: n_proposals, n_after_dedup, n_after_critic, - n_duplicates_removed, specialist_counts. - """ - - all_proposals: List[str] = field(default_factory=list) - after_dedup: List[str] = field(default_factory=list) - after_critic: List[str] = field(default_factory=list) - critic_scores: List[CriticScore] = field(default_factory=list) - specialist_proposals: Dict[str, List[str]] = field(default_factory=dict) - specialist_success_rates: Dict[str, float] = field(default_factory=dict) - debate_stats: Dict[str, Any] = field(default_factory=dict) - - -# --------------------------------------------------------------------------- -# DebateMemory -- cross-round debate history tracking -# --------------------------------------------------------------------------- - -class DebateMemory: - """Tracks debate history across rounds: who proposed what, what got admitted. - - Used by ``DebateOrchestrator`` to maintain specialist leaderboards, - identify blind spots (operator families nobody proposes), and surface - patterns the critic consistently rewards. - - Parameters - ---------- - specialist_names : list[str] - Names of all participating specialists. - """ - - _ALL_OP_FAMILIES: List[str] = [ - "arithmetic", "statistical", "timeseries", "smoothing", - "cross_sectional", "regression", "logical", - ] - - def __init__(self, specialist_names: List[str]) -> None: - self._specialist_names = list(specialist_names) - self._proposal_history: Dict[str, List[tuple]] = { - name: [] for name in specialist_names - } - self._rounds: List[Dict[str, Any]] = [] - self._best_critic_patterns: List[str] = [] - - def record_round( - self, - debate_result: DebateResult, - admissions: Optional[List[str]] = None, - ) -> None: - """Record outcome of one debate round. - - Parameters - ---------- - debate_result : DebateResult - The result of the debate round. - admissions : list[str] or None - Formulas ultimately admitted to the library after IC evaluation. - """ - admissions = admissions or [] - admission_set = set(admissions) - - for spec_name, formulas in debate_result.specialist_proposals.items(): - for formula in formulas: - was_admitted = formula in admission_set - self._proposal_history.setdefault(spec_name, []).append( - (formula, was_admitted) - ) - - for score in debate_result.critic_scores: - if score.composite_score >= 0.7 and score.formula in admission_set: - self._best_critic_patterns.append(score.formula) - - self._rounds.append({ - "n_proposals": len(debate_result.all_proposals), - "n_after_dedup": len(debate_result.after_dedup), - "n_after_critic": len(debate_result.after_critic), - "n_admissions": len(admissions), - "specialist_counts": { - name: len(formulas) - for name, formulas in debate_result.specialist_proposals.items() - }, - }) - - def get_specialist_leaderboard(self) -> List[Dict[str, Any]]: - """Return specialist performance sorted by admission rate. - - Returns - ------- - list[dict] - Each dict has keys: ``name``, ``proposed``, ``admitted``, - ``admission_rate``. Sorted by ``admission_rate`` descending. - """ - rows: List[Dict[str, Any]] = [] - for name in self._specialist_names: - history = self._proposal_history.get(name, []) - proposed = len(history) - admitted = sum(1 for _, was_admitted in history if was_admitted) - rate = admitted / max(proposed, 1) - rows.append({ - "name": name, - "proposed": proposed, - "admitted": admitted, - "admission_rate": rate, - }) - rows.sort(key=lambda r: r["admission_rate"], reverse=True) - return rows - - def get_best_critic_patterns(self) -> List[str]: - """Return formula patterns the critic loved that were also admitted.""" - return list(self._best_critic_patterns[-20:]) - - def get_blind_spots(self) -> Dict[str, List[str]]: - """Detect operator families that no specialist is proposing. - - Returns - ------- - dict[str, list[str]] - ``"underused_families"``: operator families with low proposal count. - ``"overused_families"``: operator families with disproportionate use. - """ - from factorminer.agent.critic import _OP_CATEGORIES, _extract_operators - - family_counts: Counter = Counter() - total_proposals = 0 - - for history in self._proposal_history.values(): - for formula, _ in history: - ops = _extract_operators(formula) - for op in ops: - family = _OP_CATEGORIES.get(op, "other") - family_counts[family] += 1 - total_proposals += 1 - - if total_proposals == 0: - return { - "underused_families": self._ALL_OP_FAMILIES, - "overused_families": [], - } - - avg_count = total_proposals / len(self._ALL_OP_FAMILIES) - underused = [ - f for f in self._ALL_OP_FAMILIES - if family_counts.get(f, 0) < avg_count * 0.4 - ] - overused = [ - f for f in self._ALL_OP_FAMILIES - if family_counts.get(f, 0) > avg_count * 2.5 - ] - return {"underused_families": underused, "overused_families": overused} - - def get_memory_summary_for_specialist(self, specialist_name: str) -> str: - """Return a brief performance summary for a specific specialist.""" - history = self._proposal_history.get(specialist_name, []) - if not history: - return f"{specialist_name}: no history yet." - proposed = len(history) - admitted = sum(1 for _, a in history if a) - rate = admitted / proposed - return ( - f"{specialist_name}: {proposed} proposed, {admitted} admitted " - f"({rate:.1%} rate)." - ) - - @property - def total_rounds(self) -> int: - return len(self._rounds) - - -# --------------------------------------------------------------------------- -# DebateOrchestrator -- full pipeline -# --------------------------------------------------------------------------- - -class DebateOrchestrator: - """Orchestrates the full multi-agent FactorMAD debate cycle. - - Flow per round: - 1. All specialists generate proposals (optionally in parallel). - 2. Merge all proposals into a single pool. - 3. SymPy algebraic deduplication (optional). - 4. Critic multi-dimensional pre-scoring. - 5. Top-fraction selection for expensive IC evaluation. - 6. Return structured ``DebateResult``. - - Parameters - ---------- - specialists : list[SpecialistAgent] - Specialist agent instances. - critic : CriticAgent - Critic agent for pre-filtering. - canonicalizer : FormulaCanonicalizer or None - Optional SymPy canonicalizer for algebraic deduplication. - parallel_specialists : bool - Whether to run specialists concurrently. - max_workers : int - Max thread pool workers when parallel is enabled. - """ - - def __init__( - self, - specialists: List[SpecialistAgent], - critic: CriticAgent, - canonicalizer: Optional[Any] = None, - parallel_specialists: bool = True, - max_workers: int = 4, - ) -> None: - self.specialists = specialists - self.critic = critic - self.canonicalizer = canonicalizer - self.parallel_specialists = parallel_specialists - self.max_workers = max_workers - - def run_debate_round( - self, - n_per_specialist: int = 15, - memory_signal: Optional[Dict[str, Any]] = None, - library_diagnostics: Optional[Dict[str, Any]] = None, - regime_context: str = "", - forbidden_patterns: Optional[List[str]] = None, - existing_factors: Optional[List[str]] = None, - ) -> DebateResult: - """Run one full debate round and return structured results. - - Parameters - ---------- - n_per_specialist : int - Number of proposals to request from each specialist. - memory_signal : dict or None - Experience memory priors. - library_diagnostics : dict or None - Current library state. - regime_context : str - Current market regime description. - forbidden_patterns : list[str] or None - Global forbidden structural patterns. - existing_factors : list[str] or None - Formulas already in the library. - - Returns - ------- - DebateResult - Full structured result including all proposals, dedup, and - critic scores. - """ - memory_signal = memory_signal or {} - library_diagnostics = library_diagnostics or {} - forbidden_patterns = forbidden_patterns or [] - existing_factors = normalize_factor_references(existing_factors) - - # Step 1: Specialist generation - if self.parallel_specialists and len(self.specialists) > 1: - specialist_proposals = self._generate_parallel( - n_per_specialist=n_per_specialist, - memory_signal=memory_signal, - library_diagnostics=library_diagnostics, - regime_context=regime_context, - forbidden_patterns=forbidden_patterns, - existing_factors=existing_factors, - ) - else: - specialist_proposals: Dict[str, List[str]] = {} - for spec in self.specialists: - formulas = spec.generate_proposals( - n_proposals=n_per_specialist, - memory_signal=memory_signal, - library_diagnostics=library_diagnostics, - regime_context=regime_context, - forbidden_patterns=forbidden_patterns, - existing_factors=existing_factors, - ) - specialist_proposals[spec.name] = formulas - logger.info( - "Specialist %s: %d proposals", spec.name, len(formulas) - ) - - # Step 2: Merge all proposals - all_proposals: List[str] = [] - formula_to_specialist: Dict[str, str] = {} - for spec_name, formulas in specialist_proposals.items(): - for f in formulas: - if f not in formula_to_specialist: - all_proposals.append(f) - formula_to_specialist[f] = spec_name - - logger.info( - "Debate round: %d total proposals from %d specialists", - len(all_proposals), - len(self.specialists), - ) - - # Step 3: SymPy deduplication - after_dedup = self._deduplicate(all_proposals) - n_removed = len(all_proposals) - len(after_dedup) - logger.info( - "Deduplication: removed %d algebraic duplicates (%d remain)", - n_removed, - len(after_dedup), - ) - - # Step 4: Build CandidateFactor proposals for critic - proposals_cf: Dict[str, List[CandidateFactor]] = {} - for formula in after_dedup: - spec_name = formula_to_specialist.get(formula, "unknown") - from factorminer.agent.output_parser import _try_build_candidate - existing_count = len(proposals_cf.get(spec_name, [])) - cf = _try_build_candidate( - f"{spec_name.lower()}_factor_{existing_count + 1}", - formula, - ) - proposals_cf.setdefault(spec_name, []).append(cf) - - # Step 5: Critic scoring - mem_str = _flatten_memory_signal(memory_signal) - critic_scores = self.critic._score_proposals( - proposals=proposals_cf, - existing_factors=existing_factors, - memory_signal=mem_str, - regime_context=regime_context, - ) - - # Step 6: Collect kept formulas - after_critic = [cs.formula for cs in critic_scores if cs.keep] - logger.info( - "Critic pre-filter: %d/%d candidates kept (keep=True)", - len(after_critic), - len(after_dedup), - ) - - success_rates = {spec.name: spec.success_rate for spec in self.specialists} - debate_stats = { - "n_proposals": len(all_proposals), - "n_after_dedup": len(after_dedup), - "n_after_critic": len(after_critic), - "n_duplicates_removed": n_removed, - "specialist_counts": { - name: len(formulas) - for name, formulas in specialist_proposals.items() - }, - } - - return DebateResult( - all_proposals=all_proposals, - after_dedup=after_dedup, - after_critic=after_critic, - critic_scores=critic_scores, - specialist_proposals=specialist_proposals, - specialist_success_rates=success_rates, - debate_stats=debate_stats, - ) - - def _generate_parallel( - self, - n_per_specialist: int, - memory_signal: Dict[str, Any], - library_diagnostics: Dict[str, Any], - regime_context: str, - forbidden_patterns: List[str], - existing_factors: List[str], - ) -> Dict[str, List[str]]: - """Generate from all specialists concurrently using a thread pool.""" - results: Dict[str, List[str]] = {} - - def _run_specialist(spec: SpecialistAgent) -> tuple: - formulas = spec.generate_proposals( - n_proposals=n_per_specialist, - memory_signal=memory_signal, - library_diagnostics=library_diagnostics, - regime_context=regime_context, - forbidden_patterns=forbidden_patterns, - existing_factors=existing_factors, - ) - return spec.name, formulas - - n_workers = min(self.max_workers, len(self.specialists)) - with concurrent.futures.ThreadPoolExecutor(max_workers=n_workers) as executor: - futures = { - executor.submit(_run_specialist, spec): spec.name - for spec in self.specialists - } - for future in concurrent.futures.as_completed(futures): - spec_name = futures[future] - try: - name, formulas = future.result() - results[name] = formulas - logger.info( - "Specialist %s (parallel): %d proposals", - name, - len(formulas), - ) - except Exception as exc: - logger.warning( - "Specialist %s parallel generation failed: %s", - spec_name, - exc, - ) - results[spec_name] = [] - - return results - - def _deduplicate(self, formulas: List[str]) -> List[str]: - """Remove algebraic duplicates using SymPy canonicalizer if available.""" - if self.canonicalizer is None: - seen: set = set() - unique: List[str] = [] - for f in formulas: - if f not in seen: - unique.append(f) - seen.add(f) - return unique - - from factorminer.core.parser import try_parse - seen_hashes: set = set() - unique: List[str] = [] - for formula in formulas: - tree = try_parse(formula) - if tree is None: - if formula not in {u for u in unique}: - unique.append(formula) - continue - try: - canon_hash = self.canonicalizer.canonicalize(tree) - except Exception: - canon_hash = formula - if canon_hash not in seen_hashes: - unique.append(formula) - seen_hashes.add(canon_hash) - - return unique - - -# --------------------------------------------------------------------------- -# DebateGenerator -- drop-in replacement for FactorGenerator -# --------------------------------------------------------------------------- - -class DebateGenerator: - """Multi-agent debate-based factor generator (drop-in for FactorGenerator). - - Uses the full FactorMAD pipeline: multiple specialist proposers, - algebraic deduplication, and multi-dimensional critic pre-filtering. - - Parameters - ---------- - llm_provider : LLMProvider - LLM backend shared across all specialists and the critic. - debate_config : DebateConfig or None - Pipeline configuration. Uses defaults if ``None``. - prompt_builder : PromptBuilder or None - Optional base prompt builder (its system prompt is used as the - base for specialist prompt builders). - """ - - def __init__( - self, - llm_provider: LLMProvider, - debate_config: Optional[DebateConfig] = None, - prompt_builder: Optional[PromptBuilder] = None, - ) -> None: - self.llm_provider = llm_provider - self.config = debate_config or DebateConfig() - - base_system_prompt = ( - prompt_builder.system_prompt if prompt_builder else None - ) - - # Build SpecialistAgent instances - self._specialist_agents: List[SpecialistAgent] = [] - self._specialist_generators: Dict[str, FactorGenerator] = {} - - for spec in self.config.specialists: - agent = SpecialistAgent( - config=spec, - llm=self.llm_provider, - base_system_prompt=base_system_prompt, - ) - self._specialist_agents.append(agent) - - specialist_pb = SpecialistPromptBuilder( - specialist_config=spec, - base_system_prompt=base_system_prompt, - ) - gen = FactorGenerator( - llm_provider=self.llm_provider, - prompt_builder=specialist_pb, - temperature=spec.temperature, - ) - self._specialist_generators[spec.name] = gen - - # Build critic - self._critic: Optional[CriticAgent] = None - if self.config.enable_critic: - self._critic = CriticAgent( - llm_provider=self.llm_provider, - temperature=self.config.critic_temperature, - ) - - # Canonicalizer for deduplication - self._canonicalizer = None - if self.config.enable_deduplication: - try: - from factorminer.core.canonicalizer import FormulaCanonicalizer - self._canonicalizer = FormulaCanonicalizer() - except Exception as exc: - logger.warning( - "Could not initialise FormulaCanonicalizer: %s. " - "Falling back to string dedup.", - exc, - ) - - # Debate orchestrator - if self._critic is not None: - self._orchestrator: Optional[DebateOrchestrator] = DebateOrchestrator( - specialists=self._specialist_agents, - critic=self._critic, - canonicalizer=self._canonicalizer, - parallel_specialists=self.config.parallel_specialists, - max_workers=self.config.max_parallel_workers, - ) - else: - self._orchestrator = None - - # Debate memory - self._debate_memory: Optional[DebateMemory] = None - if self.config.enable_debate_memory: - specialist_names = [s.name for s in self.config.specialists] - self._debate_memory = DebateMemory(specialist_names=specialist_names) - - self._last_debate_result: Optional[DebateResult] = None - self._generation_count = 0 - - def generate_batch( - self, - memory_signal: Optional[Dict[str, Any]] = None, - library_state: Optional[Dict[str, Any]] = None, - batch_size: int = 40, - ) -> List[CandidateFactor]: - """Generate a batch of candidate factors via multi-agent debate. - - Signature is identical to ``FactorGenerator.generate_batch`` - so this class is a true drop-in replacement. - - Parameters - ---------- - memory_signal : dict or None - Memory priors for prompt injection. - library_state : dict or None - Current factor library state. - batch_size : int - Target number of candidates to return. - - Returns - ------- - list[CandidateFactor] - Ranked candidate factors. - """ - memory_signal = memory_signal or {} - library_state = library_state or {} - - self._generation_count += 1 - batch_id = self._generation_count - - logger.info( - "Debate batch #%d: %d specialists, critic=%s, per_specialist=%d", - batch_id, - len(self._specialist_agents), - self.config.enable_critic, - self.config.candidates_per_specialist, - ) - - existing_factors = normalize_factor_references( - library_state.get("recent_admissions", []) - ) - regime_context = str(memory_signal.get("regime_context", "")) - - if self._orchestrator is not None: - debate_result = self._orchestrator.run_debate_round( - n_per_specialist=self.config.candidates_per_specialist, - memory_signal=memory_signal, - library_diagnostics=library_state, - regime_context=regime_context, - existing_factors=existing_factors, - ) - self._last_debate_result = debate_result - - if self._debate_memory is not None: - self._debate_memory.record_round(debate_result) - - result = self._debate_result_to_candidates( - debate_result=debate_result, - top_k=min(batch_size, self.config.top_k_after_critic), - ) - - else: - # No critic: run specialist generators and merge - proposals: Dict[str, List[CandidateFactor]] = {} - for spec_name, generator in self._specialist_generators.items(): - candidates = generator.generate_batch( - memory_signal=memory_signal, - library_state=library_state, - batch_size=self.config.candidates_per_specialist, - ) - proposals[spec_name] = candidates - logger.info( - "Specialist %s produced %d candidates", spec_name, len(candidates) - ) - - result = [] - seen_formulas: set = set() - for spec_name, candidates in proposals.items(): - for c in candidates: - if c.formula not in seen_formulas: - result.append(c) - seen_formulas.add(c.formula) - - result = result[:batch_size] - # Store a minimal DebateResult for consistency - specialist_proposals = { - name: [c.formula for c in cands] - for name, cands in proposals.items() - } - self._last_debate_result = DebateResult( - all_proposals=[f for fl in specialist_proposals.values() for f in fl], - after_dedup=[c.formula for c in result], - after_critic=[c.formula for c in result], - critic_scores=[], - specialist_proposals=specialist_proposals, - specialist_success_rates={}, - debate_stats={"n_proposals": len(result)}, - ) - - result = self._tag_specialist_source_from_agents(result) - - logger.info( - "Debate batch #%d complete: returning %d candidates", - batch_id, - len(result), - ) - return result - - # ------------------------------------------------------------------ - # Public inspection helpers - # ------------------------------------------------------------------ - - @property - def last_debate_result(self) -> Optional[DebateResult]: - """The ``DebateResult`` from the most recent ``generate_batch`` call.""" - return self._last_debate_result - - @property - def debate_memory(self) -> Optional[DebateMemory]: - """The ``DebateMemory`` tracking history across rounds.""" - return self._debate_memory - - def get_specialist_leaderboard(self) -> Optional[List[Dict[str, Any]]]: - """Return specialist admission leaderboard if memory is enabled.""" - if self._debate_memory is not None: - return self._debate_memory.get_specialist_leaderboard() - return None - - def get_blind_spots(self) -> Optional[Dict[str, List[str]]]: - """Return operator family blind spots if memory is enabled.""" - if self._debate_memory is not None: - return self._debate_memory.get_blind_spots() - return None - - def update_specialist_admissions( - self, - admitted_formulas: List[str], - rejected_formulas: Optional[List[str]] = None, - rejection_reasons: Optional[List[str]] = None, - ) -> None: - """Feed evaluation results back to specialist agents and debate memory. - - Should be called after IC evaluation to close the feedback loop. - - Parameters - ---------- - admitted_formulas : list[str] - Formula strings admitted to the library. - rejected_formulas : list[str] or None - Formula strings that failed IC evaluation. - rejection_reasons : list[str] or None - Reasons for rejection (parallel to ``rejected_formulas``). - """ - if self._last_debate_result is None: - return - - rejected_formulas = rejected_formulas or [] - rejection_reasons = rejection_reasons or [] - - for spec_agent in self._specialist_agents: - spec_admitted = [ - f for f in admitted_formulas - if f in self._last_debate_result.specialist_proposals.get( - spec_agent.name, [] - ) - ] - spec_rejected = [ - f for f in rejected_formulas - if f in self._last_debate_result.specialist_proposals.get( - spec_agent.name, [] - ) - ] - spec_reasons: List[str] = [] - for f in spec_rejected: - try: - idx = rejected_formulas.index(f) - spec_reasons.append( - rejection_reasons[idx] if idx < len(rejection_reasons) - else "unknown" - ) - except ValueError: - spec_reasons.append("unknown") - - spec_agent.update_domain_memory( - admitted=spec_admitted, - rejected=spec_rejected, - reasons=spec_reasons, - ) - - if self._debate_memory is not None and self._last_debate_result is not None: - self._debate_memory.record_round( - debate_result=self._last_debate_result, - admissions=admitted_formulas, - ) - - # ------------------------------------------------------------------ - # Internal helpers - # ------------------------------------------------------------------ - - def _debate_result_to_candidates( - self, - debate_result: DebateResult, - top_k: int, - ) -> List[CandidateFactor]: - """Convert DebateResult critic scores into CandidateFactor objects.""" - from factorminer.agent.output_parser import _try_build_candidate - - kept_scores = [cs for cs in debate_result.critic_scores if cs.keep] - kept_scores.sort(key=lambda cs: cs.composite_score, reverse=True) - kept_scores = kept_scores[:top_k] - - result: List[CandidateFactor] = [] - seen_formulas: set = set() - - for cs in kept_scores: - if cs.formula in seen_formulas: - continue - cf = _try_build_candidate(cs.factor_name, cs.formula) - if cf.is_valid: - cf.category = f"specialist:{cs.source_specialist}/{cf.category}" - result.append(cf) - seen_formulas.add(cs.formula) - - if not result: - for formula in debate_result.after_critic[:top_k]: - if formula in seen_formulas: - continue - cf = _try_build_candidate( - f"debate_factor_{len(result)+1}", formula - ) - if cf.is_valid: - result.append(cf) - seen_formulas.add(formula) - - return result - - def _tag_specialist_source_from_agents( - self, - candidates: List[CandidateFactor], - ) -> List[CandidateFactor]: - """Tag candidate source if not already embedded in category.""" - for c in candidates: - if not c.category.startswith("specialist:"): - if self._last_debate_result: - for spec_name, formulas in ( - self._last_debate_result.specialist_proposals.items() - ): - if c.formula in formulas: - c.category = f"specialist:{spec_name}/{c.category}" - break - return candidates - - # ------------------------------------------------------------------ - # Legacy static helpers (backward compatibility) - # ------------------------------------------------------------------ - - @staticmethod - def _scores_to_candidates( - scores: List[CriticScore], - proposals: Dict[str, List[CandidateFactor]], - ) -> List[CandidateFactor]: - """Map CriticScore objects back to CandidateFactor instances.""" - lookup: Dict[str, CandidateFactor] = {} - for candidates in proposals.values(): - for c in candidates: - lookup[c.name] = c - - result: List[CandidateFactor] = [] - seen: set = set() - for score in scores: - candidate = lookup.get(score.factor_name) - if candidate is not None and score.factor_name not in seen: - result.append(candidate) - seen.add(score.factor_name) - - return result - - @staticmethod - def _tag_specialist_source( - candidates: List[CandidateFactor], - proposals: Dict[str, List[CandidateFactor]], - ) -> List[CandidateFactor]: - """Add specialist source information to each candidate's category.""" - source_map: Dict[str, str] = {} - for spec_name, spec_candidates in proposals.items(): - for c in spec_candidates: - source_map[c.name] = spec_name - - for c in candidates: - spec_name = source_map.get(c.name, "unknown") - if not c.category.startswith("specialist:"): - c.category = f"specialist:{spec_name}/{c.category}" - - return candidates - - -# --------------------------------------------------------------------------- -# Utility -# --------------------------------------------------------------------------- - -def _flatten_memory_signal(memory_signal: Dict[str, Any]) -> str: - """Flatten a memory signal dict to a compact string.""" - parts: List[str] = [] - for key in ( - "recommended_directions", "strategic_insights", - "complementary_patterns", "prompt_text", - ): - val = memory_signal.get(key) - if isinstance(val, list): - parts.extend(str(v) for v in val) - elif isinstance(val, str) and val: - parts.append(val) - return " ".join(parts) diff --git a/src/factorminer/factorminer/agent/factor_generator.py b/src/factorminer/factorminer/agent/factor_generator.py deleted file mode 100644 index 950bd7d..0000000 --- a/src/factorminer/factorminer/agent/factor_generator.py +++ /dev/null @@ -1,236 +0,0 @@ -"""Main factor generation agent using LLM guided by memory priors. - -Orchestrates the prompt construction, LLM invocation, output parsing, -and retry logic for a single batch of factor candidates. -""" - -from __future__ import annotations - -import logging -import time -from typing import Any, Dict, List, Optional - -from src.factorminer.factorminer.agent.llm_interface import LLMProvider -from src.factorminer.factorminer.agent.output_parser import CandidateFactor, parse_llm_output -from src.factorminer.factorminer.agent.prompt_builder import PromptBuilder - -logger = logging.getLogger(__name__) - - -class FactorGenerator: - """LLM-based factor generation agent. - - Generates batches of candidate factors by constructing prompts that - inject experience memory priors, calling an LLM provider, and parsing - the output into validated CandidateFactor objects. - - Parameters - ---------- - llm_provider : LLMProvider - The LLM backend to use for text generation. - prompt_builder : PromptBuilder - Builds system and user prompts. - temperature : float - Default sampling temperature. - max_tokens : int - Default max response tokens. - """ - - def __init__( - self, - llm_provider: LLMProvider, - prompt_builder: Optional[PromptBuilder] = None, - temperature: float = 0.8, - max_tokens: int = 4096, - ) -> None: - self.llm_provider = llm_provider - self.prompt_builder = prompt_builder or PromptBuilder() - self.temperature = temperature - self.max_tokens = max_tokens - self._generation_count = 0 - - def generate_batch( - self, - memory_signal: Optional[Dict[str, Any]] = None, - library_state: Optional[Dict[str, Any]] = None, - batch_size: int = 40, - ) -> List[CandidateFactor]: - """Generate a batch of candidate factors using LLM guided by memory priors. - - Steps: - 1. Build prompt with memory signal injection. - 2. Call LLM to generate candidates. - 3. Parse and validate each candidate. - 4. Retry failed parses if any. - 5. Return list of valid CandidateFactor objects. - - Parameters - ---------- - memory_signal : dict or None - Memory priors to inject into the prompt. Keys: - - ``"recommended_directions"`` : list[str] - - ``"forbidden_directions"`` : list[str] - - ``"strategic_insights"`` : list[str] - - ``"recent_rejections"`` : list[dict] - library_state : dict or None - Current library state. Keys: - - ``"size"`` : int - - ``"target_size"`` : int - - ``"recent_admissions"`` : list[str] - - ``"domain_saturation"`` : dict[str, float] - batch_size : int - Number of candidates to request per batch. - - Returns - ------- - list[CandidateFactor] - All valid candidate factors (those with successfully parsed - expression trees). - """ - memory_signal = memory_signal or {} - library_state = library_state or {} - - self._generation_count += 1 - batch_id = self._generation_count - - logger.info( - "Generating batch #%d: size=%d, provider=%s", - batch_id, - batch_size, - self.llm_provider.provider_name, - ) - - # 1. Build prompts - system_prompt = self.prompt_builder.system_prompt - user_prompt = self.prompt_builder.build_user_prompt( - memory_signal=memory_signal, - library_state=library_state, - batch_size=batch_size, - ) - - # 2. Call LLM - t0 = time.monotonic() - raw_output = self.llm_provider.generate( - system_prompt=system_prompt, - user_prompt=user_prompt, - temperature=self.temperature, - max_tokens=self.max_tokens, - ) - elapsed = time.monotonic() - t0 - logger.info("LLM response received in %.1fs (%d chars)", elapsed, len(raw_output)) - - # 3. Parse output - candidates, failed_lines = parse_llm_output(raw_output) - - valid = [c for c in candidates if c.is_valid] - invalid = [c for c in candidates if not c.is_valid] - - logger.info( - "Batch #%d initial parse: %d valid, %d invalid, %d unparseable lines", - batch_id, - len(valid), - len(invalid), - len(failed_lines), - ) - - # 4. Retry failed parses - if failed_lines or invalid: - retry_input = failed_lines + [c.formula for c in invalid if c.formula] - retried = self._retry_failed_parses(retry_input, attempts=2) - if retried: - # Deduplicate by formula - existing_formulas = {c.formula for c in valid} - for c in retried: - if c.formula not in existing_formulas: - valid.append(c) - existing_formulas.add(c.formula) - logger.info( - "Batch #%d after retry: %d total valid candidates", - batch_id, - len(valid), - ) - - # 5. Log summary - if valid: - categories = {} - for c in valid: - categories[c.category] = categories.get(c.category, 0) + 1 - logger.info( - "Batch #%d categories: %s", - batch_id, - ", ".join(f"{k}={v}" for k, v in sorted(categories.items())), - ) - - return valid - - def _retry_failed_parses( - self, - failed: List[str], - attempts: int = 2, - ) -> List[CandidateFactor]: - """Retry parsing failed outputs with a repair prompt. - - Asks the LLM to fix malformed formulas by providing the broken - expressions and asking for corrected versions. - - Parameters - ---------- - failed : list[str] - Original text lines or formulas that failed to parse. - attempts : int - Max number of retry rounds. - - Returns - ------- - list[CandidateFactor] - Successfully parsed candidates from retries. - """ - if not failed: - return [] - - # Limit retries to avoid excessive API calls - failed = failed[:15] - recovered: List[CandidateFactor] = [] - - for attempt in range(1, attempts + 1): - if not failed: - break - - repair_prompt = ( - "The following factor formulas failed to parse. " - "Fix each one so it uses ONLY valid operators and features " - "from the library. Return them in the same numbered format:\n" - ". : \n\n" - "Broken formulas:\n" - + "\n".join(f" {i+1}. {f}" for i, f in enumerate(failed)) - + "\n\nFix all syntax errors, unknown operators, and invalid " - "feature names. Every formula must be a valid nested function " - "call using only operators from the library." - ) - - try: - raw = self.llm_provider.generate( - system_prompt=self.prompt_builder.system_prompt, - user_prompt=repair_prompt, - temperature=max(0.3, self.temperature - 0.3), - max_tokens=self.max_tokens, - ) - except Exception as e: - logger.warning("Retry attempt %d failed: %s", attempt, e) - break - - candidates, still_failed = parse_llm_output(raw) - new_valid = [c for c in candidates if c.is_valid] - recovered.extend(new_valid) - - # Update failed list for next attempt - failed = still_failed + [c.formula for c in candidates if not c.is_valid] - - logger.debug( - "Retry attempt %d: recovered %d, still failing %d", - attempt, - len(new_valid), - len(failed), - ) - - return recovered diff --git a/src/factorminer/factorminer/agent/llm_interface.py b/src/factorminer/factorminer/agent/llm_interface.py deleted file mode 100644 index ba4ea65..0000000 --- a/src/factorminer/factorminer/agent/llm_interface.py +++ /dev/null @@ -1,365 +0,0 @@ -"""Abstract LLM interface supporting multiple providers. - -Provides a unified API for generating text completions across OpenAI, -Anthropic, Google (Gemini), and a deterministic mock provider for testing. -""" - -from __future__ import annotations - -import logging -import os -from abc import ABC, abstractmethod -from typing import Any, Dict, Optional - -logger = logging.getLogger(__name__) - - -class LLMProvider(ABC): - """Abstract base for LLM text-generation providers.""" - - @abstractmethod - def generate( - self, - system_prompt: str, - user_prompt: str, - temperature: float = 0.8, - max_tokens: int = 4096, - ) -> str: - """Generate a text completion. - - Parameters - ---------- - system_prompt : str - System-level instructions (role, rules, operator library, etc.). - user_prompt : str - Per-iteration user prompt (memory signal, library state, etc.). - temperature : float - Sampling temperature; higher = more creative. - max_tokens : int - Maximum tokens in the response. - - Returns - ------- - str - Raw text response from the model. - """ - - @property - @abstractmethod - def provider_name(self) -> str: - """Human-readable provider name.""" - - -class OpenAIProvider(LLMProvider): - """OpenAI API provider (GPT-4, GPT-4o, etc.).""" - - def __init__( - self, - model: str = "gpt-4o", - api_key: Optional[str] = None, - ) -> None: - self.model = model - self.api_key = api_key or os.environ.get("OPENAI_API_KEY", "") - self._client: Any = None - - def _get_client(self) -> Any: - if self._client is None: - try: - from openai import OpenAI - except ImportError: - raise ImportError( - "openai package is required for OpenAIProvider. " - "Install with: pip install openai" - ) - self._client = OpenAI(api_key=self.api_key) - return self._client - - def generate( - self, - system_prompt: str, - user_prompt: str, - temperature: float = 0.8, - max_tokens: int = 4096, - ) -> str: - client = self._get_client() - logger.debug("OpenAI request: model=%s temp=%.2f", self.model, temperature) - response = client.chat.completions.create( - model=self.model, - messages=[ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": user_prompt}, - ], - temperature=temperature, - max_tokens=max_tokens, - ) - text = response.choices[0].message.content or "" - logger.debug("OpenAI response: %d chars", len(text)) - return text - - @property - def provider_name(self) -> str: - return f"openai/{self.model}" - - -class AnthropicProvider(LLMProvider): - """Anthropic Claude API provider with adaptive thinking support.""" - - def __init__( - self, - model: str = "claude-sonnet-4-6", - api_key: Optional[str] = None, - use_thinking: bool = True, - effort: str = "max", - ) -> None: - self.model = model - self.api_key = api_key or os.environ.get("ANTHROPIC_API_KEY", "") - self.use_thinking = use_thinking - self.effort = effort - self._client: Any = None - - def _get_client(self) -> Any: - if self._client is None: - try: - import anthropic - except ImportError: - raise ImportError( - "anthropic package is required for AnthropicProvider. " - "Install with: pip install anthropic" - ) - self._client = anthropic.Anthropic(api_key=self.api_key) - return self._client - - def generate( - self, - system_prompt: str, - user_prompt: str, - temperature: float = 1, - max_tokens: int = 32000, - ) -> str: - client = self._get_client() - logger.debug("Anthropic request: model=%s thinking=%s effort=%s", - self.model, self.use_thinking, self.effort) - - kwargs: dict = { - "model": self.model, - "system": system_prompt, - "messages": [{"role": "user", "content": user_prompt}], - "max_tokens": max_tokens, - } - - if self.use_thinking: - kwargs["thinking"] = {"type": "adaptive"} - kwargs["temperature"] = 1 # Required for thinking mode - kwargs["output_config"] = {"effort": self.effort} - else: - kwargs["temperature"] = temperature - - response = client.messages.create(**kwargs) - - # Extract text from response, skipping thinking blocks - text_parts = [] - for block in response.content: - if hasattr(block, "text"): - text_parts.append(block.text) - text = "\n".join(text_parts) if text_parts else "" - logger.debug("Anthropic response: %d chars", len(text)) - return text - - @property - def provider_name(self) -> str: - return f"anthropic/{self.model}" - - -class GoogleProvider(LLMProvider): - """Google Gemini API provider (paper uses Gemini 3.0 Flash).""" - - def __init__( - self, - model: str = "gemini-2.0-flash", - api_key: Optional[str] = None, - ) -> None: - self.model = model - self.api_key = api_key or os.environ.get("GOOGLE_API_KEY", "") - self._client: Any = None - - def _get_client(self) -> Any: - if self._client is None: - try: - import google.generativeai as genai - except ImportError: - raise ImportError( - "google-generativeai package is required for GoogleProvider. " - "Install with: pip install google-generativeai" - ) - genai.configure(api_key=self.api_key) - self._client = genai.GenerativeModel( - self.model, - generation_config={"max_output_tokens": 8192}, - ) - return self._client - - def generate( - self, - system_prompt: str, - user_prompt: str, - temperature: float = 0.8, - max_tokens: int = 4096, - ) -> str: - client = self._get_client() - logger.debug("Google request: model=%s temp=%.2f", self.model, temperature) - combined = f"{system_prompt}\n\n---\n\n{user_prompt}" - response = client.generate_content( - combined, - generation_config={ - "temperature": temperature, - "max_output_tokens": max_tokens, - }, - ) - text = response.text if response.text else "" - logger.debug("Google response: %d chars", len(text)) - return text - - @property - def provider_name(self) -> str: - return f"google/{self.model}" - - -class MockProvider(LLMProvider): - """Deterministic provider for testing without API calls. - - Returns predefined factor formulas that exercise diverse operator - combinations. Useful for unit tests and integration testing. - """ - - MOCK_FACTORS = [ - ("momentum_reversal", "Neg(CsRank(Delta($close, 5)))"), - ("volume_surprise", "CsZScore(Div(Sub($volume, Mean($volume, 20)), Std($volume, 20)))"), - ("price_range_ratio", "Div(Sub($high, $low), Add($high, $low))"), - ("vwap_deviation", "CsRank(Div(Sub($close, $vwap), $vwap))"), - ("return_skew", "Neg(Skew($returns, 20))"), - ("intraday_momentum", "CsRank(Div(Sub($close, $open), Sub($high, $low)))"), - ("volume_price_corr", "Neg(Corr($volume, $close, 10))"), - ("amt_acceleration", "CsZScore(Delta(Mean($amt, 5), 5))"), - ("close_high_ratio", "CsRank(Sub(Div($close, TsMax($high, 20)), 1))"), - ("smooth_return", "Neg(CsRank(EMA($returns, 10)))"), - ("volatility_ratio", "Div(Std($returns, 5), Std($returns, 20))"), - ("mean_reversion", "Neg(CsZScore(Div(Sub($close, SMA($close, 20)), SMA($close, 20))))"), - ("volume_trend", "CsRank(TsLinRegSlope($volume, 20))"), - ("price_position", "CsRank(Div(Sub($close, TsMin($close, 20)), Sub(TsMax($close, 20), TsMin($close, 20))))"), - ("amt_volume_div", "CsRank(Neg(Corr(CsRank($amt), CsRank($volume), 10)))"), - ("weighted_return", "CsZScore(WMA($returns, 10))"), - ("high_low_decay", "Neg(Decay(Div(Sub($high, $low), $close), 10))"), - ("residual_vol", "CsRank(Std(Resid($close, $volume, 20), 10))"), - ("open_gap", "CsZScore(Div(Sub($open, Delay($close, 1)), Delay($close, 1)))"), - ("log_turnover", "Neg(CsRank(Log(Div($amt, $volume))))"), - ("beta_momentum", "CsRank(Mul(Beta($returns, $volume, 20), Delta($close, 10)))"), - ("rank_reversal", "Neg(CsRank(Sum($returns, 5)))"), - ("kurtosis_signal", "CsZScore(Neg(Kurt($returns, 20)))"), - ("vwap_trend", "CsRank(TsLinRegSlope(Div($close, $vwap), 20))"), - ("adaptive_mean", "CsRank(Div(Sub($close, KAMA($close, 10)), Std($close, 10)))"), - ("cumulative_flow", "CsZScore(CsRank(Delta(CumSum(Mul($volume, Sign(Delta($close, 1)))), 5)))"), - ("range_breakout", "CsRank(Div(Sub($close, TsMin($low, 10)), Std($close, 10)))"), - ("hull_deviation", "Neg(CsRank(Div(Sub($close, HMA($close, 20)), $close)))"), - ("conditional_vol", "CsZScore(IfElse(Greater($returns, 0), Std($returns, 10), Neg(Std($returns, 10))))"), - ("dema_crossover", "CsRank(Sub(DEMA($close, 5), DEMA($close, 20)))"), - ("ts_rank_volume", "Neg(CsRank(TsRank($volume, 20)))"), - ("median_price", "CsZScore(Div(Sub($close, Median($close, 20)), Median($close, 20)))"), - ("argmax_timing", "CsRank(Neg(TsArgMax($close, 20)))"), - ("log_return_sum", "Neg(CsRank(Sum(LogReturn($close, 1), 10)))"), - ("price_cov", "CsZScore(Neg(Cov($close, $volume, 20)))"), - ("inv_volatility", "CsRank(Inv(Std($returns, 20)))"), - ("squared_return", "Neg(CsRank(Mean(Square($returns), 10)))"), - ("abs_return_ratio", "CsRank(Div(Abs(Delta($close, 1)), Mean(Abs(Delta($close, 1)), 20)))"), - ("quantile_signal", "CsZScore(Quantile($returns, 20, 0.75))"), - ("neutralized_mom", "CsNeutralize(Delta($close, 10))"), - ] - - def __init__(self, cycle: bool = True) -> None: - self._cycle = cycle - self._call_count = 0 - - def generate( - self, - system_prompt: str, - user_prompt: str, - temperature: float = 0.8, - max_tokens: int = 4096, - ) -> str: - # Parse batch_size from user_prompt if present - batch_size = 40 - for line in user_prompt.split("\n"): - if "generate" in line.lower() and "candidate" in line.lower(): - for word in line.split(): - if word.isdigit(): - batch_size = int(word) - break - - batch_size = min(batch_size, len(self.MOCK_FACTORS)) - - start = self._call_count * batch_size - if self._cycle: - indices = [ - (start + i) % len(self.MOCK_FACTORS) - for i in range(batch_size) - ] - else: - indices = list(range(min(batch_size, len(self.MOCK_FACTORS)))) - - self._call_count += 1 - - lines = [] - for idx, factor_idx in enumerate(indices, 1): - name, formula = self.MOCK_FACTORS[factor_idx] - lines.append(f"{idx}. {name}: {formula}") - - return "\n".join(lines) - - @property - def provider_name(self) -> str: - return "mock" - - -# --------------------------------------------------------------------------- -# Factory -# --------------------------------------------------------------------------- - -_PROVIDER_MAP: Dict[str, type] = { - "openai": OpenAIProvider, - "anthropic": AnthropicProvider, - "google": GoogleProvider, - "mock": MockProvider, -} - - -def create_provider(config: Dict[str, Any]) -> LLMProvider: - """Factory function to instantiate an LLM provider from config. - - Parameters - ---------- - config : dict - Must contain ``"provider"`` key (one of "openai", "anthropic", - "google", "mock"). Additional keys are passed as kwargs to the - provider constructor: - - ``"model"`` : model identifier - - ``"api_key"`` : API key (overrides env var) - - Returns - ------- - LLMProvider - """ - provider_name = config.get("provider", "mock") - cls = _PROVIDER_MAP.get(provider_name) - if cls is None: - raise ValueError( - f"Unknown LLM provider '{provider_name}'. " - f"Available: {sorted(_PROVIDER_MAP.keys())}" - ) - - kwargs: Dict[str, Any] = {} - if "model" in config and provider_name != "mock": - kwargs["model"] = config["model"] - if "api_key" in config and provider_name != "mock": - kwargs["api_key"] = config["api_key"] - - logger.info("Creating LLM provider: %s (kwargs=%s)", provider_name, list(kwargs.keys())) - return cls(**kwargs) diff --git a/src/factorminer/factorminer/agent/output_parser.py b/src/factorminer/factorminer/agent/output_parser.py deleted file mode 100644 index 2b9f65e..0000000 --- a/src/factorminer/factorminer/agent/output_parser.py +++ /dev/null @@ -1,259 +0,0 @@ -"""Parse LLM output into structured CandidateFactor objects. - -Handles various output formats from LLMs: numbered lists, JSON, -markdown code blocks, and raw text. Validates each formula against -the expression tree parser. -""" - -from __future__ import annotations - -import logging -import re -from dataclasses import dataclass, field -from typing import Dict, List, Optional, Tuple - -from src.factorminer.factorminer.core.expression_tree import ExpressionTree -from src.factorminer.factorminer.core.parser import parse, try_parse -from src.factorminer.factorminer.core.types import OperatorType, OPERATOR_REGISTRY - -logger = logging.getLogger(__name__) - - -@dataclass -class CandidateFactor: - """A candidate factor parsed from LLM output. - - Attributes - ---------- - name : str - Descriptive snake_case name. - formula : str - DSL formula string. - expression_tree : ExpressionTree or None - Parsed expression tree (None if parsing failed). - category : str - Inferred category based on outermost operators. - parse_error : str - Error message if formula failed to parse. - """ - - name: str - formula: str - expression_tree: Optional[ExpressionTree] = None - category: str = "unknown" - parse_error: str = "" - - @property - def is_valid(self) -> bool: - return self.expression_tree is not None - - -def _infer_category(formula: str) -> str: - """Infer a rough category from the outermost operators in the formula.""" - lower = formula.lower() - # Check for cross-sectional operators at the top - if any(op in formula for op in ("CsRank", "CsZScore", "CsDemean", "CsScale", "CsNeutralize", "CsQuantile")): - # Look deeper for sub-category - if any(op in formula for op in ("Corr", "Cov", "Beta", "Resid")): - return "cross_sectional_regression" - if any(op in formula for op in ("Delta", "Delay", "Return", "LogReturn")): - return "cross_sectional_momentum" - if any(op in formula for op in ("Std", "Var", "Skew", "Kurt")): - return "cross_sectional_volatility" - if any(op in formula for op in ("Mean", "Sum", "EMA", "SMA", "WMA", "DEMA", "HMA", "KAMA")): - return "cross_sectional_smoothing" - if any(op in formula for op in ("TsLinReg", "TsLinRegSlope")): - return "cross_sectional_trend" - return "cross_sectional" - if any(op in formula for op in ("Corr", "Cov", "Beta", "Resid")): - return "regression" - if any(op in formula for op in ("Delta", "Delay", "Return", "LogReturn")): - return "momentum" - if any(op in formula for op in ("Std", "Var", "Skew", "Kurt")): - return "volatility" - if any(op in formula for op in ("IfElse", "Greater", "Less")): - return "conditional" - return "general" - - -# --------------------------------------------------------------------------- -# Line parsing patterns -# --------------------------------------------------------------------------- - -# Pattern: "1. name: formula" or "1) name: formula" -_NUMBERED_PATTERN = re.compile( - r"^\s*\d+[\.\)]\s*" # numbered prefix - r"([a-zA-Z_][a-zA-Z0-9_]*)" # factor name - r"\s*:\s*" # colon separator - r"(.+)$" # formula -) - -# Pattern: "name: formula" (no number) -_PLAIN_PATTERN = re.compile( - r"^\s*([a-zA-Z_][a-zA-Z0-9_]*)" # factor name - r"\s*:\s*" # colon separator - r"(.+)$" # formula -) - -# Pattern: just a formula starting with an operator -_FORMULA_ONLY_PATTERN = re.compile( - r"^\s*([A-Z][a-zA-Z]*\(.+\))\s*$" -) - -# Pattern: JSON-like {"name": "...", "formula": "..."} -_JSON_PATTERN = re.compile( - r'"name"\s*:\s*"([^"]+)"\s*,\s*"formula"\s*:\s*"([^"]+)"' -) - - -def _strip_markdown(text: str) -> str: - """Remove markdown code block markers.""" - text = re.sub(r"^```[a-z]*\n?", "", text, flags=re.MULTILINE) - text = re.sub(r"\n?```\s*$", "", text, flags=re.MULTILINE) - return text - - -def _clean_formula(formula: str) -> str: - """Clean up a formula string before parsing.""" - formula = formula.strip() - # Remove trailing comments - if " #" in formula: - formula = formula[: formula.index(" #")] - if " //" in formula: - formula = formula[: formula.index(" //")] - # Remove trailing punctuation - formula = formula.rstrip(";,.") - # Remove surrounding backticks - formula = formula.strip("`") - return formula.strip() - - -def parse_llm_output(raw_text: str) -> Tuple[List[CandidateFactor], List[str]]: - """Parse raw LLM text output into candidate factors. - - Parameters - ---------- - raw_text : str - Raw text from the LLM containing factor definitions. - - Returns - ------- - tuple[list[CandidateFactor], list[str]] - (successfully_parsed, failed_lines) where failed_lines are - the original text lines that could not be parsed. - """ - text = _strip_markdown(raw_text) - - candidates: List[CandidateFactor] = [] - failed: List[str] = [] - seen_names: set = set() - - # Try JSON pattern first (entire text) - json_matches = _JSON_PATTERN.findall(text) - if json_matches: - for name, formula in json_matches: - formula = _clean_formula(formula) - candidate = _try_build_candidate(name, formula) - if candidate.name not in seen_names: - candidates.append(candidate) - seen_names.add(candidate.name) - if candidates: - logger.debug("Parsed %d factors from JSON format", len(candidates)) - return candidates, failed - - # Line-by-line parsing - for line in text.split("\n"): - line = line.strip() - if not line or line.startswith("#") or line.startswith("---"): - continue - - name: Optional[str] = None - formula: Optional[str] = None - - # Try numbered pattern: "1. name: formula" - m = _NUMBERED_PATTERN.match(line) - if m: - name, formula = m.group(1), m.group(2) - else: - # Try plain pattern: "name: formula" - m = _PLAIN_PATTERN.match(line) - if m: - name, formula = m.group(1), m.group(2) - else: - # Try formula-only pattern - m = _FORMULA_ONLY_PATTERN.match(line) - if m: - formula = m.group(1) - # Generate name from formula - name = _generate_name_from_formula(formula, len(candidates)) - - if name is None or formula is None: - if any(c.isalpha() for c in line) and "(" in line: - failed.append(line) - continue - - formula = _clean_formula(formula) - if not formula: - failed.append(line) - continue - - # Ensure unique name - base_name = name.lower().replace("-", "_") - unique_name = base_name - counter = 2 - while unique_name in seen_names: - unique_name = f"{base_name}_{counter}" - counter += 1 - - candidate = _try_build_candidate(unique_name, formula) - candidates.append(candidate) - seen_names.add(unique_name) - - if not candidate.is_valid: - failed.append(line) - - logger.debug( - "Parsed %d candidates (%d valid, %d failed lines)", - len(candidates), - sum(1 for c in candidates if c.is_valid), - len(failed), - ) - return candidates, failed - - -def _try_build_candidate(name: str, formula: str) -> CandidateFactor: - """Attempt to parse a formula and build a CandidateFactor.""" - tree = try_parse(formula) - if tree is not None: - category = _infer_category(formula) - return CandidateFactor( - name=name, - formula=tree.to_string(), # canonicalize - expression_tree=tree, - category=category, - ) - else: - # Try to get a useful error message - error_msg = "" - try: - parse(formula) - except (SyntaxError, KeyError, ValueError) as e: - error_msg = str(e) - - return CandidateFactor( - name=name, - formula=formula, - expression_tree=None, - category="unknown", - parse_error=error_msg, - ) - - -def _generate_name_from_formula(formula: str, index: int) -> str: - """Generate a descriptive name from a formula.""" - # Extract the outermost operator - m = re.match(r"([A-Z][a-zA-Z]*)\(", formula) - if m: - outer_op = m.group(1).lower() - return f"{outer_op}_factor_{index + 1}" - return f"factor_{index + 1}" diff --git a/src/factorminer/factorminer/agent/prompt_builder.py b/src/factorminer/factorminer/agent/prompt_builder.py deleted file mode 100644 index 288a999..0000000 --- a/src/factorminer/factorminer/agent/prompt_builder.py +++ /dev/null @@ -1,682 +0,0 @@ -"""Build prompts for LLM-driven factor generation using memory priors. - -The system prompt encodes the full operator library, syntax rules, feature -list, and task description. The user prompt injects per-iteration context: -memory signals, library state, and output format instructions. -""" - -from __future__ import annotations - -from typing import Any, Dict, List, Optional - -from src.factorminer.factorminer.core.types import ( - FEATURES, - OPERATOR_REGISTRY, - OperatorSpec, - OperatorType, -) - - -def _format_operator_table() -> str: - """Build a human-readable operator reference table grouped by category.""" - grouped: Dict[str, List[OperatorSpec]] = {} - for spec in OPERATOR_REGISTRY.values(): - cat = spec.category.name - grouped.setdefault(cat, []).append(spec) - - lines: List[str] = [] - for cat_name in [ - "ARITHMETIC", - "STATISTICAL", - "TIMESERIES", - "SMOOTHING", - "CROSS_SECTIONAL", - "REGRESSION", - "LOGICAL", - "AUTO_INVENTED", - ]: - specs = grouped.get(cat_name, []) - if not specs: - continue - lines.append(f"\n### {cat_name} operators") - for spec in sorted(specs, key=lambda s: s.name): - params_str = "" - if spec.param_names: - parts = [] - for pname in spec.param_names: - default = spec.param_defaults.get(pname, "") - lo, hi = spec.param_ranges.get(pname, (None, None)) - range_str = f"[{lo}-{hi}]" if lo is not None else "" - parts.append(f"{pname}={default}{range_str}") - params_str = f" params: {', '.join(parts)}" - arity_args = ", ".join([f"expr{i+1}" for i in range(spec.arity)]) - if spec.param_names: - arity_args += ", " + ", ".join(spec.param_names) - lines.append(f"- {spec.name}({arity_args}): {spec.description}{params_str}") - return "\n".join(lines) - - -def _format_feature_list() -> str: - """Build a description of available raw features.""" - descriptions = { - "$open": "opening price", - "$high": "highest price in the bar", - "$low": "lowest price in the bar", - "$close": "closing price", - "$volume": "trading volume (shares)", - "$amt": "trading amount (currency value)", - "$vwap": "volume-weighted average price", - "$returns": "close-to-close returns", - } - lines = [] - for feat in FEATURES: - desc = descriptions.get(feat, "") - lines.append(f" {feat}: {desc}") - return "\n".join(lines) - - -# --------------------------------------------------------------------------- -# System prompt -# --------------------------------------------------------------------------- - -SYSTEM_PROMPT = f"""You are a quantitative researcher mining formulaic alpha factors for stock selection. - -Your goal is to generate novel, predictive factor expressions using a tree-structured domain-specific language (DSL). Each factor is a composition of operators applied to raw market features. - -## RAW FEATURES (leaf nodes) -{_format_feature_list()} - -## OPERATOR LIBRARY -{_format_operator_table()} - -## EXPRESSION SYNTAX RULES -1. Every expression is a nested function call: Operator(args...) -2. Leaf nodes are raw features ($close, $volume, etc.) or numeric constants. -3. Operators are called by name with expression arguments first, then numeric parameters: - - Mean($close, 20) = 20-day rolling mean of $close - - Corr($close, $volume, 10) = 10-day rolling correlation of close and volume - - IfElse(Greater($returns, 0), $volume, Neg($volume)) = conditional -4. No infix operators; use Add(x,y) instead of x+y, Sub(x,y) instead of x-y, etc. -5. Parameters like window sizes are trailing numeric arguments after expression children. -6. Valid window sizes are integers; check each operator's parameter ranges above. -7. Cross-sectional operators (CsRank, CsZScore, CsDemean, CsScale, CsNeutralize) operate across all stocks at each time step -- they are crucial for making factors comparable. - -## EXAMPLES OF WELL-FORMED FACTORS -- Neg(CsRank(Delta($close, 5))) - Short-term reversal: rank of 5-day price change, negated. -- CsZScore(Div(Sub($volume, Mean($volume, 20)), Std($volume, 20))) - Volume surprise: standardized deviation from 20-day mean volume. -- CsRank(Div(Sub($close, $vwap), $vwap)) - Intraday deviation from VWAP, cross-sectionally ranked. -- Neg(Corr($volume, $close, 10)) - Negative price-volume correlation over 10 days. -- CsRank(TsLinRegSlope($volume, 20)) - Trend in trading volume over 20 days, ranked. -- IfElse(Greater($returns, 0), Std($returns, 10), Neg(Std($returns, 10))) - Conditional volatility: positive for up-moves, negative for down-moves. -- CsRank(Div(Sub($close, TsMin($low, 20)), Sub(TsMax($high, 20), TsMin($low, 20)))) - Position within 20-day price range, ranked. - -## KEY PRINCIPLES FOR HIGH-QUALITY FACTORS -- Always wrap the outermost expression with a cross-sectional operator (CsRank, CsZScore) for comparability. -- Combine DIFFERENT operator types for novelty (e.g., time-series + cross-sectional + arithmetic). -- Use diverse window sizes; avoid always defaulting to 10. -- Explore uncommon feature combinations ($amt, $vwap are underused). -- Factors with depth 3-7 tend to be best: deep enough to capture non-trivial patterns but not so deep they overfit. -- Prefer economically meaningful combinations over random nesting. -""" - - -# --------------------------------------------------------------------------- -# PromptBuilder -# --------------------------------------------------------------------------- - -def normalize_factor_references(entries: Optional[List[Any]]) -> List[str]: - """Convert mixed factor metadata into prompt-safe string references.""" - if not entries: - return [] - - normalized: List[str] = [] - seen: set[str] = set() - - for entry in entries: - text = "" - if isinstance(entry, str): - text = entry.strip() - elif isinstance(entry, dict): - formula = str(entry.get("formula", "")).strip() - name = str(entry.get("name", "")).strip() - category = str(entry.get("category", "")).strip() - if formula and name: - text = f"{name}: {formula}" - elif formula: - text = formula - elif name and category: - text = f"{name} [{category}]" - elif name: - text = name - elif entry is not None: - text = str(entry).strip() - - if text and text not in seen: - normalized.append(text) - seen.add(text) - - return normalized - - -class PromptBuilder: - """Constructs system and user prompts for factor generation. - - The system prompt is static (operator library + rules). - The user prompt varies each iteration based on memory signals. - """ - - def __init__(self, system_prompt: Optional[str] = None) -> None: - self._system_prompt = system_prompt or SYSTEM_PROMPT - - @property - def system_prompt(self) -> str: - return self._system_prompt - - def build_user_prompt( - self, - memory_signal: Dict[str, Any], - library_state: Dict[str, Any], - batch_size: int = 40, - ) -> str: - """Build the per-iteration user prompt injecting memory priors. - - Parameters - ---------- - memory_signal : dict - Keys: - - ``"recommended_directions"`` : list[str] -- patterns to explore - - ``"forbidden_directions"`` : list[str] -- patterns to avoid - - ``"strategic_insights"`` : list[str] -- high-level lessons - - ``"recent_rejections"`` : list[dict] -- recent rejection reasons - library_state : dict - Keys: - - ``"size"`` : int -- current library size - - ``"target_size"`` : int -- target library size - - ``"recent_admissions"`` : list[str] -- recently admitted factor names - - ``"domain_saturation"`` : dict[str, float] -- per-domain saturation - batch_size : int - Number of candidates to generate this iteration. - - Returns - ------- - str - The fully assembled user prompt. - """ - sections: List[str] = [] - - # --- Task directive --- - sections.append( - f"Generate exactly {batch_size} novel, diverse alpha factor candidates." - ) - - # --- Library status --- - lib_size = library_state.get("size", 0) - target = library_state.get("target_size", 110) - sections.append( - f"\n## CURRENT LIBRARY STATUS\n" - f"Library size: {lib_size} / {target} factors." - ) - - recent = normalize_factor_references( - library_state.get("recent_admissions", []) - ) - if recent: - sections.append( - "Recently admitted factors:\n" - + "\n".join(f" - {f}" for f in recent[-10:]) - ) - - saturation = library_state.get("domain_saturation", {}) - if saturation: - sat_lines = [f" {domain}: {pct:.0%} saturated" for domain, pct in saturation.items()] - sections.append( - "Domain saturation:\n" + "\n".join(sat_lines) - ) - - # --- Memory signal: recommended directions --- - rec_dirs = memory_signal.get("recommended_directions", []) - if rec_dirs: - sections.append( - "\n## RECOMMENDED DIRECTIONS (focus on these successful patterns)\n" - + "\n".join(f" * {d}" for d in rec_dirs) - ) - - # --- Memory signal: forbidden directions --- - forbidden = memory_signal.get("forbidden_directions", []) - if forbidden: - sections.append( - "\n## FORBIDDEN DIRECTIONS (AVOID these -- they produce correlated/weak factors)\n" - + "\n".join(f" X {d}" for d in forbidden) - ) - - # --- Memory signal: strategic insights --- - insights = memory_signal.get("strategic_insights", []) - if insights: - sections.append( - "\n## STRATEGIC INSIGHTS\n" - + "\n".join(f" Note: {ins}" for ins in insights) - ) - - helix_prompt_text = memory_signal.get("prompt_text", "").strip() - if helix_prompt_text: - sections.append( - "\n## HELIX RETRIEVAL SUMMARY\n" - f"{helix_prompt_text}" - ) - - complementary_patterns = memory_signal.get("complementary_patterns", []) - if complementary_patterns: - sections.append( - "\n## COMPLEMENTARY PATTERNS\n" - + "\n".join(f" + {pattern}" for pattern in complementary_patterns) - ) - - conflict_warnings = memory_signal.get("conflict_warnings", []) - if conflict_warnings: - sections.append( - "\n## SATURATION WARNINGS\n" - + "\n".join(f" ! {warning}" for warning in conflict_warnings) - ) - - operator_cooccurrence = memory_signal.get("operator_cooccurrence", []) - if operator_cooccurrence: - sections.append( - "\n## OPERATOR CO-OCCURRENCE PRIORS\n" - + "\n".join(f" - {pair}" for pair in operator_cooccurrence) - ) - - semantic_gaps = memory_signal.get("semantic_gaps", []) - if semantic_gaps: - sections.append( - "\n## SEMANTIC GAPS\n" - + "\n".join( - f" - Underused but promising: {gap}" for gap in semantic_gaps - ) - ) - - # --- Recent rejection reasons --- - rejections = memory_signal.get("recent_rejections", []) - if rejections: - rej_lines = [] - for rej in rejections[-10:]: - name = rej.get("name", "unknown") - reason = rej.get("reason", "unknown") - rej_lines.append(f" - {name}: rejected because {reason}") - sections.append( - "\n## RECENT REJECTIONS (learn from these failures)\n" - + "\n".join(rej_lines) - ) - - # --- Orthogonality directive --- - sections.append( - "\n## CRITICAL REQUIREMENT: ORTHOGONALITY\n" - "Generate factors that are UNCORRELATED with existing library members. " - "Each candidate should explore a DIFFERENT structural pattern. " - "Vary your operator choices, window sizes, feature combinations, and " - "nesting depth across candidates. Do NOT generate trivial variations " - "of the same formula (e.g., changing only the window size)." - ) - - # --- Output format --- - sections.append( - f"\n## OUTPUT FORMAT\n" - f"Output exactly {batch_size} factors, one per line.\n" - f"Format each line as: . : \n" - f"Example:\n" - f"1. momentum_reversal: Neg(CsRank(Delta($close, 5)))\n" - f"2. volume_surprise: CsZScore(Div(Sub($volume, Mean($volume, 20)), Std($volume, 20)))\n" - f"\nRules:\n" - f"- factor_name: lowercase_with_underscores, descriptive, unique\n" - f"- formula: valid DSL expression using ONLY operators and features listed above\n" - f"- No markdown, no explanations, no extra text -- just the numbered list\n" - f"- Every formula must parse correctly with the operator library" - ) - - return "\n".join(sections) - - -# --------------------------------------------------------------------------- -# New specialist/critic/debate prompt builder functions -# --------------------------------------------------------------------------- - -def build_specialist_prompt( - specialist_name: str, - specialist_domain: str, - specialist_hypothesis: str, - preferred_operators: List[str], - preferred_features: List[str], - example_factors: List[str], - avoid_patterns: List[str], - memory_signal: Optional[Dict[str, Any]] = None, - library_diagnostics: Optional[Dict[str, Any]] = None, - regime_context: str = "", - n_proposals: int = 15, - success_rate: Optional[float] = None, -) -> str: - """Build a rich context-aware user prompt for a specialist agent. - - Parameters - ---------- - specialist_name : str - Human-readable name of the specialist (e.g. ``"MomentumMiner"``). - specialist_domain : str - Short domain description for the specialist. - specialist_hypothesis : str - Core economic hypothesis guiding this specialist. - preferred_operators : list[str] - Operator names this specialist should lean on. - preferred_features : list[str] - Feature names this specialist prefers. - example_factors : list[str] - Reference formula examples for this specialist. - avoid_patterns : list[str] - Structural patterns to explicitly avoid. - memory_signal : dict or None - Experience memory context (recommended directions, etc.). - library_diagnostics : dict or None - Library state (size, saturation, recent admissions). - regime_context : str - Current market regime description. - n_proposals : int - Number of proposals to request. - success_rate : float or None - Historical success rate for this specialist (for context). - - Returns - ------- - str - Fully assembled specialist user prompt. - """ - memory_signal = memory_signal or {} - library_diagnostics = library_diagnostics or {} - sections: List[str] = [] - - # Header - sections.append( - f"## SPECIALIST TASK: {specialist_name}\n" - f"Domain: {specialist_domain}\n" - f"Hypothesis: {specialist_hypothesis}" - ) - - if success_rate is not None: - sections.append( - f"Your historical admission rate: {success_rate:.1%} " - f"(aim to exceed this by proposing higher-quality factors)" - ) - - # Regime context - if regime_context: - sections.append( - f"\n## CURRENT MARKET REGIME\n{regime_context}" - ) - - # Library state - lib_size = library_diagnostics.get("size", 0) - target = library_diagnostics.get("target_size", 110) - sections.append( - f"\n## LIBRARY STATUS\nCurrent: {lib_size}/{target} factors." - ) - - recent = normalize_factor_references( - library_diagnostics.get("recent_admissions", []) - ) - if recent: - sections.append( - "Recently admitted (avoid similar patterns):\n" - + "\n".join(f" - {f}" for f in recent[-8:]) - ) - - saturation = library_diagnostics.get("domain_saturation", {}) - if saturation: - sat_lines = [ - f" {d}: {p:.0%} saturated" for d, p in saturation.items() - ] - sections.append("Domain saturation:\n" + "\n".join(sat_lines)) - - # Memory signal injections - rec_dirs = memory_signal.get("recommended_directions", []) - if rec_dirs: - sections.append( - "\n## RECOMMENDED DIRECTIONS\n" - + "\n".join(f" * {d}" for d in rec_dirs) - ) - - forbidden = memory_signal.get("forbidden_directions", []) - if forbidden: - sections.append( - "\n## FORBIDDEN DIRECTIONS\n" - + "\n".join(f" X {d}" for d in forbidden) - ) - - insights = memory_signal.get("strategic_insights", []) - if insights: - sections.append( - "\n## STRATEGIC INSIGHTS\n" - + "\n".join(f" - {ins}" for ins in insights) - ) - - helix_text = memory_signal.get("prompt_text", "").strip() - if helix_text: - sections.append(f"\n## HELIX CONTEXT\n{helix_text}") - - comp_patterns = memory_signal.get("complementary_patterns", []) - if comp_patterns: - sections.append( - "\n## COMPLEMENTARY PATTERNS (explore these)\n" - + "\n".join(f" + {p}" for p in comp_patterns) - ) - - warn = memory_signal.get("conflict_warnings", []) - if warn: - sections.append( - "\n## SATURATION WARNINGS\n" - + "\n".join(f" ! {w}" for w in warn) - ) - - gaps = memory_signal.get("semantic_gaps", []) - if gaps: - sections.append( - "\n## SEMANTIC GAPS (underused areas to explore)\n" - + "\n".join(f" ~ {g}" for g in gaps) - ) - - # Specialist focus directive - ops_str = ", ".join(preferred_operators) - feats_str = ", ".join(preferred_features) - sections.append( - f"\n## YOUR SPECIALIST FOCUS\n" - f"Preferred operators: {{{ops_str}}}\n" - f"Preferred features: {{{feats_str}}}\n" - f"Focus ~60% of proposals on these. The remaining ~40% should " - f"explore creative cross-domain combinations." - ) - - # Domain examples - if example_factors: - sections.append( - "\n## DOMAIN REFERENCE EXAMPLES (structural templates, do NOT copy exactly)\n" - + "\n".join(f" - {ex}" for ex in example_factors) - ) - - # Avoid patterns - if avoid_patterns: - sections.append( - "\n## PATTERNS TO AVOID\n" - + "\n".join(f" X {av}" for av in avoid_patterns) - ) - - # Few-shot patterns from memory - mem_success_patterns = memory_signal.get("_few_shot_examples", []) - if mem_success_patterns: - sections.append( - "\n## FEW-SHOT SUCCESS PATTERNS FROM MEMORY\n" - "(These formulas were previously admitted -- use as structural inspiration)\n" - + "\n".join(f" [+] {ex}" for ex in mem_success_patterns[:5]) - ) - - # Output format - sections.append( - f"\n## OUTPUT FORMAT\n" - f"Generate exactly {n_proposals} novel factor candidates.\n" - f"Format: . : \n" - f"Example: 1. momentum_reversal: Neg(CsRank(Delta($close, 5)))\n" - f"Rules:\n" - f"- factor_name: lowercase_with_underscores, unique, descriptive\n" - f"- formula: valid DSL expression only\n" - f"- No markdown, no explanations -- just the numbered list\n" - f"- Every formula must use only registered operators and features" - ) - - return "\n".join(sections) - - -def build_critic_scoring_prompt( - candidates: List[Dict[str, str]], - existing_factors: Optional[List[str]] = None, - memory_signal: Optional[str] = None, - regime_context: str = "", -) -> str: - """Build a structured JSON-output scoring prompt for the critic agent. - - Parameters - ---------- - candidates : list[dict] - List of dicts with keys ``"name"``, ``"formula"``, ``"specialist"``. - existing_factors : list[str] or None - Formula strings already in the library. - memory_signal : str or None - Free-text memory context (success patterns, etc.). - regime_context : str - Current market regime description. - - Returns - ------- - str - Fully assembled critic scoring prompt. - """ - existing_factors = existing_factors or [] - sections: List[str] = [] - - sections.append( - "## CRITIC SCORING TASK\n" - "Evaluate the following candidate factors for economic intuition.\n" - "Score each on how well it captures a plausible, economically " - "meaningful cross-sectional return predictor." - ) - - if regime_context: - sections.append(f"\n## CURRENT REGIME\n{regime_context}") - - if existing_factors: - sections.append( - "\n## LIBRARY SAMPLE (existing factors to avoid duplicating)\n" - + "\n".join(f" - {f}" for f in existing_factors[-12:]) - ) - - if memory_signal: - sections.append(f"\n## MEMORY CONTEXT (success patterns)\n{memory_signal[:600]}") - - sections.append("\n## CANDIDATES") - for c in candidates: - name = c.get("name", "unknown") - formula = c.get("formula", "") - specialist = c.get("specialist", "unknown") - sections.append( - f" [{specialist}] {name}: {formula}" - ) - - sections.append( - "\n## SCORING CRITERIA\n" - "economic_intuition [0.0-1.0]:\n" - " 1.0 = strong economic story, appropriate complexity, novel signal\n" - " 0.5 = plausible but generic or overly simple\n" - " 0.0 = no coherent economic story, trivial, or clearly wrong\n" - "\nConsider:\n" - " - Is there a coherent alpha story (momentum, reversal, vol, liquidity)?\n" - " - Is complexity appropriate (depth 3-7, 3-5 unique operators)?\n" - " - Does it use features in a semantically meaningful way?\n" - " - Is it structurally distinct from existing library members?\n" - " - Would a quant researcher find this plausible?" - ) - - sections.append( - "\n## OUTPUT FORMAT\n" - "One JSON object per line for each candidate:\n" - '{"name": "", "economic_intuition": <0.0-1.0>, ' - '"rationale": ""}\n' - "Output ONLY the JSON lines. No markdown, no extra text." - ) - - return "\n".join(sections) - - -def build_debate_synthesis_prompt( - all_proposals: List[Dict[str, Any]], - critic_scores: List[Dict[str, Any]], - top_k: int = 10, -) -> str: - """Build a consensus synthesis prompt for the debate orchestrator. - - Used when a final synthesis step is desired to resolve conflicts - between specialist proposals and produce a consensus ranking. - - Parameters - ---------- - all_proposals : list[dict] - All proposals with ``"name"``, ``"formula"``, ``"specialist"`` keys. - critic_scores : list[dict] - Critic scores with ``"name"`` and ``"composite_score"`` keys. - top_k : int - Number of top factors to synthesize consensus for. - - Returns - ------- - str - Debate synthesis prompt. - """ - # Sort by composite score - score_map = {s["name"]: s.get("composite_score", 0.5) for s in critic_scores} - sorted_proposals = sorted( - all_proposals, - key=lambda p: score_map.get(p.get("name", ""), 0.0), - reverse=True, - )[:top_k * 2] # take 2x top_k for synthesis - - sections: List[str] = [] - sections.append( - f"## DEBATE SYNTHESIS TASK\n" - f"Multiple specialist agents proposed the following factors.\n" - f"The critic has pre-scored them. Your task is to identify the " - f"top {top_k} most complementary factors for a diverse library.\n" - f"Prioritize NOVELTY and ORTHOGONALITY over pure individual quality." - ) - - sections.append("\n## SCORED PROPOSALS (sorted by critic score)") - for p in sorted_proposals: - name = p.get("name", "?") - formula = p.get("formula", "?") - specialist = p.get("specialist", "?") - score = score_map.get(name, 0.5) - sections.append( - f" [{specialist}, score={score:.2f}] {name}: {formula}" - ) - - sections.append( - f"\n## SELECTION CRITERIA\n" - f"Select the top {top_k} factors that are:\n" - f" 1. Diverse in operator structure (avoid near-duplicates)\n" - f" 2. Balanced across specialist domains where possible\n" - f" 3. High composite critic score\n" - f" 4. Economically interpretable\n" - f"\nOutput a ranked list: . \n" - f"No other text." - ) - - return "\n".join(sections) diff --git a/src/factorminer/factorminer/agent/specialists.py b/src/factorminer/factorminer/agent/specialists.py deleted file mode 100644 index 5746096..0000000 --- a/src/factorminer/factorminer/agent/specialists.py +++ /dev/null @@ -1,596 +0,0 @@ -"""Specialist agent configurations for domain-focused factor generation. - -Each specialist focuses on a particular alpha factor domain with a distinct -cognitive style, preferred operators, domain hypotheses, and historical -success tracking. ``SpecialistAgent`` wraps a config with per-domain memory -and proposal logic. ``SpecialistPromptBuilder`` extends the base -``PromptBuilder`` to inject domain-specific directives. -""" - -from __future__ import annotations - -import logging -from dataclasses import dataclass, field -from typing import Any, Dict, List, Optional - -from src.factorminer.factorminer.agent.llm_interface import LLMProvider -from src.factorminer.factorminer.agent.output_parser import CandidateFactor, parse_llm_output -from src.factorminer.factorminer.agent.prompt_builder import ( - SYSTEM_PROMPT, - PromptBuilder, - normalize_factor_references, -) - -logger = logging.getLogger(__name__) - - -# --------------------------------------------------------------------------- -# Configuration dataclass -# --------------------------------------------------------------------------- - -@dataclass -class SpecialistConfig: - """Configuration for a domain-specialist factor generator. - - Attributes - ---------- - name : str - Human-readable specialist name (e.g. ``"MomentumMiner"``). - domain : str - Domain description used in prompt directives. - preferred_operators : list[str] - Operator names this specialist should emphasise. - preferred_features : list[str] - Raw features this specialist should lean towards. - hypothesis : str - Core economic hypothesis driving this specialist's approach. - example_factors : list[str] - Example formulas to ground the specialist in concrete patterns. - avoid : list[str] - Structural patterns this specialist should steer clear of. - temperature : float - Sampling temperature for LLM calls. - system_prompt_suffix : str - Extra paragraph appended to the system prompt. - provider_config : dict - Optional provider-level overrides (model, max_tokens, etc.). - """ - - name: str - domain: str - preferred_operators: List[str] - preferred_features: List[str] - hypothesis: str = "" - example_factors: List[str] = field(default_factory=list) - avoid: List[str] = field(default_factory=list) - temperature: float = 0.8 - system_prompt_suffix: str = "" - provider_config: Dict[str, Any] = field(default_factory=dict) - - -# --------------------------------------------------------------------------- -# Pre-defined specialist constants -# --------------------------------------------------------------------------- - -MOMENTUM_SPECIALIST = SpecialistConfig( - name="MomentumMiner", - domain="price momentum and trend following", - preferred_operators=["TsRank", "Delta", "EMA", "SMA", "TsLinRegSlope", "Return"], - preferred_features=["$close", "$returns", "$vwap"], - hypothesis=( - "Short-term momentum and trend reversals contain predictive signal. " - "Serial correlation in returns and time-series rank dynamics reveal " - "persistent directional biases exploitable at the cross-section." - ), - example_factors=[ - "Neg(TsRank(Delta($close, 5), 20))", - "CsRank(TsLinRegSlope($close, 10))", - "Neg(CsRank(EMA($returns, 8)))", - ], - avoid=[ - "volume-only factors without price context", - "pure cross-sectional without time component", - "very long windows (>60) on returns", - ], - temperature=0.85, - system_prompt_suffix=( - "You are the MOMENTUMMINER specialist. Your cognitive style is " - "directional and trend-aware. Focus on price persistence, serial " - "correlation in returns, and time-series rank dynamics. Prefer " - "directional operators (Delta, Return, TsLinRegSlope, EMA, TsRank) " - "to capture price trajectory information. Explore both short-term " - "reversal (1-5 day) and medium-term momentum (10-30 day) regimes. " - "Hypothesis: recent price trends contain exploitable signal that " - "cross-sectional ranking amplifies." - ), -) - -VOLATILITY_SPECIALIST = SpecialistConfig( - name="VolatilityMiner", - domain="volatility regimes and higher-moment signals", - preferred_operators=["Std", "Skew", "Kurt", "TsRank", "IfElse", "Greater"], - preferred_features=["$returns", "$high", "$low", "$close"], - hypothesis=( - "Volatility clustering and moment anomalies predict near-term returns. " - "Stocks with anomalous higher moments (excess kurtosis, negative skew) " - "exhibit predictable subsequent return patterns via risk-aversion channels." - ), - example_factors=[ - "IfElse(Greater(Std($returns,12), Mean(Std($returns,12),48)), " - "Neg(CsRank(Delta($close,3))), CsRank(Skew($returns,20)))", - "Neg(CsRank(Kurt($returns, 20)))", - "CsRank(Div(Std($returns,5), Std($returns,20)))", - ], - avoid=[ - "simple momentum without vol conditioning", - "long window trends > 40 bars", - "volume-only volatility without returns", - ], - temperature=0.9, - system_prompt_suffix=( - "You are the VOLATILITYMINER specialist. Your cognitive style is " - "regime-aware and risk-focused. Combine statistical operators " - "(Std, Var, Kurt, Skew) with logical branching (IfElse, Greater, Less) " - "to capture asymmetric behaviour in volatility regimes. " - "Explore vol-of-vol, vol regime transitions, and higher-moment " - "cross-sectional anomalies. Condition momentum signals on vol " - "regimes -- high-vol vs low-vol stocks behave very differently. " - "Hypothesis: volatility clustering and skewness anomalies carry " - "cross-sectional predictive power beyond simple momentum." - ), -) - -LIQUIDITY_SPECIALIST = SpecialistConfig( - name="LiquidityMiner", - domain="volume, liquidity, and microstructure signals", - preferred_operators=["Corr", "TsRank", "CsRank", "EMA", "Delta"], - preferred_features=["$volume", "$amt", "$vwap", "$close"], - hypothesis=( - "Volume-price divergence and liquidity dynamics predict order flow " - "imbalances. Stocks with abnormal volume relative to price movement " - "signal informed trading; VWAP deviations capture intraday microstructure." - ), - example_factors=[ - "CsRank(Corr($volume, $close, 10))", - "Neg(CsRank(EMA(Div(Sub($close,$vwap),Add($vwap,1e-4)),5)))", - "CsZScore(Delta(Mean($amt, 5), 5))", - ], - avoid=[ - "volume in isolation without price context", - "close/open ratio without volume normalization", - "microstructure without cross-sectional ranking", - ], - temperature=0.85, - system_prompt_suffix=( - "You are the LIQUIDITYMINER specialist. Your cognitive style is " - "microstructure-focused and flow-aware. Focus on cross-sectional " - "liquidity patterns: volume-price divergence, turnover anomalies, " - "and VWAP-based microstructure signals. Use correlation/covariance " - "operators to capture relative volume-price alignment. Explore " - "amount (dollar volume) signals -- $amt is often underused. " - "Condition signals on whether volume is confirming or diverging " - "from price direction. Hypothesis: volume-price divergence " - "and liquidity imbalances predict short-term order flow reversals." - ), -) - -REGIME_SPECIALIST = SpecialistConfig( - name="RegimeMiner", - domain="cross-sectional dispersion and regime classification", - preferred_operators=["CsRank", "CsZScore", "Std", "TsLinRegSlope", "Rsquare", "Resi"], - preferred_features=["$close", "$returns", "$vwap", "$amt"], - hypothesis=( - "Cross-sectional dispersion and regression residuals capture " - "regime-independent signals. Stocks that deviate from their " - "predicted cross-sectional position contain mean-reversion signal " - "that is robust across bull and bear markets." - ), - example_factors=[ - "Mul(CsRank(Rsquare($close, 24)), CsRank(Delta($close, 3)))", - "CsRank(Resi($close, $vwap, 20))", - "CsZScore(CsRank(TsLinRegSlope($returns, 15)))", - ], - avoid=[ - "single-feature factors without statistical operators", - "arithmetic without cross-sectional normalization", - "momentum without regime conditioning", - ], - temperature=0.85, - system_prompt_suffix=( - "You are the REGINEMINER specialist. Your cognitive style is " - "cross-sectional and regression-oriented. Focus on dispersion, " - "residual signals, and regime-robust patterns. Use regression " - "operators (Rsquare, Resi, TsLinRegSlope) to decompose price " - "behaviour into systematic and idiosyncratic components. " - "Cross-sectional normalization is essential -- every factor should " - "be comparable across stocks. Explore cross-asset dispersion " - "patterns that persist regardless of market direction. " - "Hypothesis: cross-sectional regression residuals and R-squared " - "signals capture regime-independent structural mispricings." - ), -) - -DEFAULT_SPECIALISTS: List[SpecialistConfig] = [ - MOMENTUM_SPECIALIST, - VOLATILITY_SPECIALIST, - LIQUIDITY_SPECIALIST, - REGIME_SPECIALIST, -] - -# Map from specialist name to config for convenience -SPECIALIST_CONFIGS: Dict[str, SpecialistConfig] = { - spec.name: spec for spec in DEFAULT_SPECIALISTS -} - - -# --------------------------------------------------------------------------- -# SpecialistDomainMemory -- per-specialist admission tracking -# --------------------------------------------------------------------------- - -@dataclass -class SpecialistDomainMemory: - """Tracks admission/rejection history for a single specialist. - - Parameters - ---------- - specialist_name : str - The name of the specialist this memory belongs to. - """ - - specialist_name: str - admitted: List[str] = field(default_factory=list) - rejected: List[str] = field(default_factory=list) - rejection_reasons: List[str] = field(default_factory=list) - - @property - def total_proposed(self) -> int: - return len(self.admitted) + len(self.rejected) - - @property - def success_rate(self) -> float: - if self.total_proposed == 0: - return 0.0 - return len(self.admitted) / self.total_proposed - - def record_admitted(self, formulas: List[str]) -> None: - self.admitted.extend(formulas) - - def record_rejected(self, formulas: List[str], reasons: List[str]) -> None: - self.rejected.extend(formulas) - self.rejection_reasons.extend(reasons) - - def get_summary(self) -> str: - """Human-readable summary of domain performance.""" - from collections import Counter - lines = [ - f"Specialist: {self.specialist_name}", - f" Proposed: {self.total_proposed} Admitted: {len(self.admitted)} " - f"Rejected: {len(self.rejected)}", - f" Success rate: {self.success_rate:.1%}", - ] - if self.admitted: - lines.append(" Best admitted (last 3):") - for f in self.admitted[-3:]: - lines.append(f" + {f}") - if self.rejection_reasons: - counts = Counter(self.rejection_reasons) - top = counts.most_common(3) - lines.append(" Top rejection reasons:") - for reason, count in top: - lines.append(f" - {reason} (x{count})") - return "\n".join(lines) - - -# --------------------------------------------------------------------------- -# SpecialistAgent -- proposal generation with domain memory -# --------------------------------------------------------------------------- - -class SpecialistAgent: - """Domain-specialist factor proposer with memory and success tracking. - - Each specialist has a unique cognitive style, a preferred operator - toolkit, and maintains per-domain memory of what has worked and failed. - Proposals are generated by building a rich context-aware prompt and - calling the shared LLM provider. - - Parameters - ---------- - config : SpecialistConfig - Configuration defining this specialist's domain and style. - llm : LLMProvider - LLM backend shared across all specialists. - base_system_prompt : str or None - Override for the base system prompt. - """ - - def __init__( - self, - config: SpecialistConfig, - llm: LLMProvider, - base_system_prompt: Optional[str] = None, - ) -> None: - self.config = config - self.llm = llm - self._memory = SpecialistDomainMemory(specialist_name=config.name) - - # Build the specialist prompt builder (extends base PromptBuilder) - self._prompt_builder = SpecialistPromptBuilder( - specialist_config=config, - base_system_prompt=base_system_prompt, - ) - - @property - def name(self) -> str: - return self.config.name - - @property - def success_rate(self) -> float: - """Fraction of this specialist's proposals that were admitted.""" - return self._memory.success_rate - - def generate_proposals( - self, - n_proposals: int, - memory_signal: Optional[Dict[str, Any]] = None, - library_diagnostics: Optional[Dict[str, Any]] = None, - regime_context: str = "", - forbidden_patterns: Optional[List[str]] = None, - existing_factors: Optional[List[str]] = None, - ) -> List[str]: - """Generate formula string proposals from this specialist. - - Builds a rich domain-aware prompt injecting memory, diagnostics, - regime context, and forbidden patterns, then calls the LLM and - parses the response into formula strings. - - Parameters - ---------- - n_proposals : int - Number of factor formulas to request. - memory_signal : dict or None - Experience memory priors (recommended/forbidden directions, etc.). - library_diagnostics : dict or None - Current library state (size, saturation, recent admissions, etc.). - regime_context : str - Current market regime description for conditioning. - forbidden_patterns : list[str] or None - Structural patterns to explicitly avoid. - existing_factors : list[str] or None - Formula strings already in the library (to avoid duplicates). - - Returns - ------- - list[str] - List of formula strings proposed by this specialist. - """ - memory_signal = memory_signal or {} - library_diagnostics = library_diagnostics or {} - forbidden_patterns = forbidden_patterns or [] - existing_factors = normalize_factor_references(existing_factors) - - enriched_signal = self._enrich_memory_signal( - memory_signal, forbidden_patterns, regime_context - ) - - system_prompt = self._prompt_builder.system_prompt - user_prompt = self._prompt_builder.build_user_prompt( - memory_signal=enriched_signal, - library_state=library_diagnostics, - batch_size=n_proposals, - ) - - logger.debug( - "Specialist %s generating %d proposals (provider=%s)", - self.name, - n_proposals, - self.llm.provider_name, - ) - - try: - raw = self.llm.generate( - system_prompt=system_prompt, - user_prompt=user_prompt, - temperature=self.config.temperature, - max_tokens=4096, - ) - except Exception as exc: - logger.warning( - "Specialist %s LLM call failed: %s. Returning empty list.", - self.name, - exc, - ) - return [] - - candidates, _ = parse_llm_output(raw) - valid = [c for c in candidates if c.is_valid] - - if existing_factors: - existing_set = set(existing_factors) - valid = [c for c in valid if c.formula not in existing_set] - - formulas = [c.formula for c in valid] - logger.debug( - "Specialist %s produced %d valid proposals", - self.name, - len(formulas), - ) - return formulas - - def update_domain_memory( - self, - admitted: List[str], - rejected: List[str], - reasons: Optional[List[str]] = None, - ) -> None: - """Update this specialist's domain memory after evaluation. - - Parameters - ---------- - admitted : list[str] - Formulas from this specialist that were admitted to the library. - rejected : list[str] - Formulas that were rejected. - reasons : list[str] or None - Rejection reasons (parallel to ``rejected``). - """ - reasons = reasons or ["unknown"] * len(rejected) - if len(reasons) < len(rejected): - reasons = reasons + ["unknown"] * (len(rejected) - len(reasons)) - self._memory.record_admitted(admitted) - self._memory.record_rejected(rejected, reasons[:len(rejected)]) - - def get_domain_performance_summary(self) -> str: - """Human-readable summary of what this specialist has discovered.""" - return self._memory.get_summary() - - def _enrich_memory_signal( - self, - base_signal: Dict[str, Any], - forbidden_patterns: List[str], - regime_context: str, - ) -> Dict[str, Any]: - """Merge base memory signal with domain-specific context.""" - enriched = dict(base_signal) - - base_forbidden = list(enriched.get("forbidden_directions", [])) - enriched["forbidden_directions"] = base_forbidden + [ - f"[{self.name} domain] Avoid: {p}" for p in self.config.avoid - ] + forbidden_patterns - - if self.config.example_factors: - existing_insights = list(enriched.get("strategic_insights", [])) - existing_insights.append( - f"As {self.name}, your reference examples are: " - + " | ".join(self.config.example_factors[:3]) - ) - enriched["strategic_insights"] = existing_insights - - if regime_context: - existing_prompt = enriched.get("prompt_text", "") - regime_note = f"[Regime context] {regime_context}" - enriched["prompt_text"] = ( - regime_note + "\n" + existing_prompt - if existing_prompt - else regime_note - ) - - if self._memory.total_proposed > 0: - perf_note = ( - f"[{self.name} history] Success rate: {self.success_rate:.1%} " - f"({len(self._memory.admitted)} admitted, " - f"{len(self._memory.rejected)} rejected)." - ) - existing_insights = list(enriched.get("strategic_insights", [])) - existing_insights.append(perf_note) - enriched["strategic_insights"] = existing_insights - - return enriched - - -# --------------------------------------------------------------------------- -# SpecialistPromptBuilder -- extends PromptBuilder with domain directives -# --------------------------------------------------------------------------- - -class SpecialistPromptBuilder(PromptBuilder): - """Prompt builder that injects domain-specific specialist directives. - - Extends the base system prompt with a specialist suffix and biases - the user prompt towards the specialist's preferred operators, features, - hypothesis, and example factors. - - Parameters - ---------- - specialist_config : SpecialistConfig - The specialist configuration to use. - base_system_prompt : str or None - Override for the base system prompt. Defaults to the global - ``SYSTEM_PROMPT`` from :mod:`factorminer.agent.prompt_builder`. - """ - - def __init__( - self, - specialist_config: SpecialistConfig, - base_system_prompt: Optional[str] = None, - ) -> None: - base = base_system_prompt or SYSTEM_PROMPT - suffix = specialist_config.system_prompt_suffix - hypothesis_block = "" - if specialist_config.hypothesis: - hypothesis_block = ( - f"\n\n## DOMAIN HYPOTHESIS\n" - f"{specialist_config.hypothesis}" - ) - modified_system = ( - f"{base}\n\n" - f"## SPECIALIST DOMAIN DIRECTIVE\n" - f"{suffix}" - f"{hypothesis_block}" - ) - super().__init__(system_prompt=modified_system) - self._specialist = specialist_config - - @property - def specialist_config(self) -> SpecialistConfig: - """Return the underlying specialist configuration.""" - return self._specialist - - def build_user_prompt( - self, - memory_signal: Dict[str, Any], - library_state: Dict[str, Any], - batch_size: int = 40, - ) -> str: - """Build user prompt with specialist operator/feature bias. - - Calls the base ``PromptBuilder.build_user_prompt`` and appends a - directive asking the specialist to focus roughly 60% of its - candidates on its preferred operators and features, plus injects - example factors for grounding. - - Parameters - ---------- - memory_signal : dict - Memory priors (recommended/forbidden directions, etc.). - library_state : dict - Current library state (size, saturation, etc.). - batch_size : int - Number of candidates to generate. - - Returns - ------- - str - Assembled user prompt with specialist bias section. - """ - base_prompt = super().build_user_prompt( - memory_signal=memory_signal, - library_state=library_state, - batch_size=batch_size, - ) - - spec = self._specialist - ops = ", ".join(spec.preferred_operators) - feats = ", ".join(spec.preferred_features) - - specialist_section = ( - f"\n## SPECIALIST FOCUS [{spec.name}]\n" - f"As the {spec.domain} specialist, focus ~60% of candidates on " - f"{{{ops}}} operators applied to {{{feats}}} features.\n" - f"The remaining ~40% should explore creative cross-domain " - f"combinations to maintain diversity.\n" - ) - - if spec.example_factors: - specialist_section += ( - "\n## DOMAIN REFERENCE EXAMPLES (structure to emulate, not copy)\n" - + "\n".join(f" - {ex}" for ex in spec.example_factors) - + "\n" - ) - - if spec.avoid: - specialist_section += ( - "\n## DOMAIN-SPECIFIC AVOIDANCES\n" - + "\n".join(f" X {av}" for av in spec.avoid) - + "\n" - ) - - return base_prompt + specialist_section diff --git a/src/factorminer/factorminer/benchmark/__init__.py b/src/factorminer/factorminer/benchmark/__init__.py deleted file mode 100644 index 0f1e87d..0000000 --- a/src/factorminer/factorminer/benchmark/__init__.py +++ /dev/null @@ -1,73 +0,0 @@ -"""Benchmark runners for paper-faithful and Helix research evaluation.""" - -from src.factorminer.factorminer.benchmark.runtime import ( - BenchmarkManifest, - build_benchmark_library, - evaluate_frozen_set, - load_benchmark_dataset, - run_ablation_memory_benchmark, - run_benchmark_suite, - run_cost_pressure_benchmark, - run_efficiency_benchmark, - run_runtime_mining_benchmark, - run_table1_benchmark, - select_frozen_top_k, -) -from src.factorminer.factorminer.benchmark.helix_benchmark import ( - HelixBenchmark, - BenchmarkResult, - MethodResult, - DMTestResult, - StatisticalComparisonTests, - SpeedBenchmark, - OperatorSpeedResult, - PipelineSpeedResult, -) - -try: # pragma: no cover - optional in trimmed checkouts - from factorminer.benchmark.ablation import ( - AblationStudy, - AblationResult, - AblatedMethodRunner, - ABLATION_CONFIGS, - ABLATION_LABELS, - run_full_ablation_study, - ) -except Exception: # pragma: no cover - optional in trimmed checkouts - AblationStudy = None - AblationResult = None - AblatedMethodRunner = None - ABLATION_CONFIGS = None - ABLATION_LABELS = None - run_full_ablation_study = None - -__all__ = [ - # legacy runtime benchmark - "BenchmarkManifest", - "build_benchmark_library", - "evaluate_frozen_set", - "load_benchmark_dataset", - "run_ablation_memory_benchmark", - "run_benchmark_suite", - "run_cost_pressure_benchmark", - "run_efficiency_benchmark", - "run_runtime_mining_benchmark", - "run_table1_benchmark", - "select_frozen_top_k", - # helix benchmark - "HelixBenchmark", - "BenchmarkResult", - "MethodResult", - "DMTestResult", - "StatisticalComparisonTests", - "SpeedBenchmark", - "OperatorSpeedResult", - "PipelineSpeedResult", - # ablation - "AblationStudy", - "AblationResult", - "AblatedMethodRunner", - "ABLATION_CONFIGS", - "ABLATION_LABELS", - "run_full_ablation_study", -] diff --git a/src/factorminer/factorminer/benchmark/ablation.py b/src/factorminer/factorminer/benchmark/ablation.py deleted file mode 100644 index 6130611..0000000 --- a/src/factorminer/factorminer/benchmark/ablation.py +++ /dev/null @@ -1,798 +0,0 @@ -"""Runtime ablation study for HelixFactor Phase 2 components. - -This module now drives ablations through the real loop path: -- HelixLoop execution on a training slice -- runtime recomputation of the admitted library -- freeze/top-k selection and combo evaluation on a held-out slice -- optional memory suppression via temporary monkeypatching - -Supported ablations: - full - all components enabled - no_debate - disable specialist debate - no_causal - disable causal validation - no_canonicalize - disable SymPy deduplication - no_regime - disable regime-aware evaluation - no_online_memory - disable memory retrieval / formation / evolution hooks - no_capacity - disable capacity estimation - no_significance - disable significance filtering - no_memory - disable memory-guided generation and updates -""" - -from __future__ import annotations - -import logging -import tempfile -import time -from contextlib import contextmanager -from typing import Any, Dict, List, Optional, Tuple - -import numpy as np -import pandas as pd - -import src.factorminer.factorminer.core.helix_loop as helix_loop_module -import src.factorminer.factorminer.core.ralph_loop as ralph_loop_module -from src.factorminer.factorminer.agent.debate import DebateConfig as RuntimeDebateConfig -from src.factorminer.factorminer.agent.llm_interface import MockProvider -from src.factorminer.factorminer.benchmark.helix_benchmark import AblationResult, MethodResult -from src.factorminer.factorminer.core.config import MiningConfig -from src.factorminer.factorminer.core.helix_loop import HelixLoop -from src.factorminer.factorminer.core.factor_library import FactorLibrary -from src.factorminer.factorminer.evaluation.capacity import CapacityConfig as RuntimeCapacityConfig -from src.factorminer.factorminer.evaluation.causal import CausalConfig as RuntimeCausalConfig -from src.factorminer.factorminer.evaluation.regime import RegimeConfig as RuntimeRegimeConfig -from src.factorminer.factorminer.evaluation.runtime import ( - DatasetSplit, - EvaluationDataset, - evaluate_factors, -) -from src.factorminer.factorminer.evaluation.significance import ( - SignificanceConfig as RuntimeSignificanceConfig, -) -from src.factorminer.factorminer.benchmark.runtime import ( - build_benchmark_library, - evaluate_frozen_set, - select_frozen_top_k, -) -from src.factorminer.factorminer.memory.memory_store import ExperienceMemory - -logger = logging.getLogger(__name__) - - -# --------------------------------------------------------------------------- -# Ablation configuration registry -# --------------------------------------------------------------------------- - -_FULL_CFG = { - "debate": True, - "causal": True, - "canonicalize": True, - "regime": True, - "online_memory": True, - "capacity": True, - "significance": True, - "memory": True, -} - -ABLATION_CONFIGS: Dict[str, Dict[str, bool]] = { - "full": dict(_FULL_CFG), - "no_debate": {**_FULL_CFG, "debate": False}, - "no_causal": {**_FULL_CFG, "causal": False}, - "no_canonicalize": {**_FULL_CFG, "canonicalize": False}, - "no_regime": {**_FULL_CFG, "regime": False}, - "no_online_memory": {**_FULL_CFG, "online_memory": False}, - "no_capacity": {**_FULL_CFG, "capacity": False}, - "no_significance": {**_FULL_CFG, "significance": False}, - "no_memory": {**_FULL_CFG, "memory": False, "debate": False}, -} - -ABLATION_LABELS: Dict[str, str] = { - "full": "HelixFactor (Full)", - "no_debate": "w/o Debate", - "no_causal": "w/o Causal", - "no_canonicalize": "w/o Canonicalization", - "no_regime": "w/o Regime", - "no_online_memory": "w/o Online Memory", - "no_capacity": "w/o Capacity", - "no_significance": "w/o Significance", - "no_memory": "w/o Memory (≈ FactorMiner NM)", -} - -EXPECTED_CONTRIBUTION_SIGN: Dict[str, int] = { - "debate": +1, - "causal": +1, - "canonicalize": +1, - "regime": +1, - "online_memory": +1, - "capacity": +1, - "significance": +1, - "memory": +1, -} - -_FEATURE_KEYS = [ - "$open", - "$high", - "$low", - "$close", - "$volume", - "$amt", - "$vwap", - "$returns", -] - - -def _merge_slices(train_data: dict, test_data: dict) -> dict: - """Concatenate train/test slices into one runtime evaluation dictionary.""" - merged: dict[str, np.ndarray] = {} - for key in sorted(set(train_data) | set(test_data)): - if key not in train_data or key not in test_data: - continue - left = np.asarray(train_data[key], dtype=np.float64) - right = np.asarray(test_data[key], dtype=np.float64) - if left.ndim == 2 and right.ndim == 2 and left.shape[0] == right.shape[0]: - merged[key] = np.concatenate([left, right], axis=1) - else: - merged[key] = np.asarray(left) - return merged - - -def _slice_data(data: dict, start: int, end: int) -> dict: - """Slice all 2-D benchmark arrays to a column range.""" - return { - key: value[:, start:end] - for key, value in data.items() - if isinstance(value, np.ndarray) and value.ndim >= 2 - } - - -def _build_runtime_dataset(data: dict) -> EvaluationDataset: - """Build a minimal runtime dataset from the benchmark dictionary format.""" - feature_keys = [key for key in _FEATURE_KEYS if key in data] - if "forward_returns" not in data: - raise ValueError("Runtime ablation requires 'forward_returns' in the data dict") - if not feature_keys: - raise ValueError("Runtime ablation requires at least one market feature array") - - arrays = [np.asarray(data[key], dtype=np.float64) for key in feature_keys] - data_tensor = np.stack(arrays, axis=2) - returns = np.asarray(data["forward_returns"], dtype=np.float64) - timestamps = np.arange(returns.shape[1]) - asset_ids = np.arange(returns.shape[0]) - full_split = DatasetSplit( - name="full", - indices=np.arange(returns.shape[1]), - timestamps=timestamps, - returns=returns, - target_returns={"target": returns}, - default_target="target", - ) - - # The caller populates train/test splits by passing a merged train+test view. - return EvaluationDataset( - data_dict={key: np.asarray(data[key], dtype=np.float64) for key in feature_keys}, - data_tensor=data_tensor, - returns=returns, - timestamps=timestamps, - asset_ids=asset_ids, - splits={"full": full_split}, - processed_df=pd.DataFrame(), - target_panels={"target": returns}, - default_target="target", - ) - - -def _build_split_dataset(data: dict, split_name: str) -> EvaluationDataset: - """Create a single-split runtime dataset from one benchmark slice.""" - dataset = _build_runtime_dataset(data) - split = DatasetSplit( - name=split_name, - indices=np.arange(dataset.returns.shape[1]), - timestamps=dataset.timestamps, - returns=dataset.returns, - target_returns={"target": dataset.returns}, - default_target="target", - ) - dataset.splits = {split_name: split} - return dataset - - -def _build_combined_dataset(train_data: dict, test_data: dict) -> EvaluationDataset: - """Create a train/test runtime dataset from sliced benchmark inputs.""" - merged = _merge_slices(train_data, test_data) - dataset = _build_runtime_dataset(merged) - train_len = np.asarray(train_data["forward_returns"]).shape[1] - test_len = np.asarray(test_data["forward_returns"]).shape[1] - timestamps = np.arange(train_len + test_len) - returns = np.asarray(merged["forward_returns"], dtype=np.float64) - - dataset.timestamps = timestamps - dataset.returns = returns - dataset.target_panels = {"target": returns} - dataset.default_target = "target" - dataset.splits = { - "train": DatasetSplit( - name="train", - indices=np.arange(0, train_len), - timestamps=timestamps[:train_len], - returns=returns[:, :train_len], - target_returns={"target": returns[:, :train_len]}, - default_target="target", - ), - "test": DatasetSplit( - name="test", - indices=np.arange(train_len, train_len + test_len), - timestamps=timestamps[train_len:], - returns=returns[:, train_len:], - target_returns={"target": returns[:, train_len:]}, - default_target="target", - ), - "full": DatasetSplit( - name="full", - indices=np.arange(train_len + test_len), - timestamps=timestamps, - returns=returns, - target_returns={"target": returns}, - default_target="target", - ), - } - return dataset - - -def _build_mining_config( - *, - output_dir: str, - target_library_size: int, - batch_size: int, - max_iterations: int, - ic_threshold: float, - correlation_threshold: float, -) -> MiningConfig: - """Create a loop config tailored for a single runtime ablation.""" - cfg = MiningConfig( - target_library_size=target_library_size, - batch_size=batch_size, - max_iterations=max_iterations, - ic_threshold=ic_threshold, - icir_threshold=0.5, - correlation_threshold=correlation_threshold, - replacement_ic_min=max(ic_threshold * 2.5, ic_threshold + 0.05), - replacement_ic_ratio=1.3, - fast_screen_assets=100, - num_workers=1, - output_dir=output_dir, - backend="numpy", - signal_failure_policy="reject", - ) - cfg.benchmark_mode = "paper" - cfg.research = None - cfg.target_panels = None - cfg.target_horizons = None - return cfg - - -def _build_phase2_configs(flags: Dict[str, bool]) -> Dict[str, Any]: - """Translate ablation flags into real HelixLoop runtime configs.""" - return { - "debate_config": RuntimeDebateConfig() if flags.get("debate", True) else None, - "causal_config": RuntimeCausalConfig(enabled=True) if flags.get("causal", True) else None, - "regime_config": RuntimeRegimeConfig(enabled=True) if flags.get("regime", True) else None, - "capacity_config": RuntimeCapacityConfig(enabled=True) if flags.get("capacity", True) else None, - "significance_config": ( - RuntimeSignificanceConfig(enabled=True) - if flags.get("significance", True) - else None - ), - "canonicalize": flags.get("canonicalize", True), - } - - -@contextmanager -def _patched_memory_hooks(enabled: bool): - """Disable memory retrieval and learning when a no-memory ablation is requested.""" - if enabled: - yield - return - - def _empty_signal(*_args, **_kwargs) -> dict[str, Any]: - return { - "recommended_directions": [], - "forbidden_directions": [], - "insights": [], - "library_state": { - "library_size": 0, - "recent_admission_rate": 0.0, - "saturated_domains": {}, - "recent_admissions_count": 0, - "recent_rejections_count": 0, - }, - "prompt_text": "", - } - - def _identity_memory(memory, *args, **kwargs): - return memory - - patch_targets = [ - (ralph_loop_module, "retrieve_memory", _empty_signal), - (ralph_loop_module, "form_memory", _identity_memory), - (ralph_loop_module, "evolve_memory", _identity_memory), - (helix_loop_module, "retrieve_memory", _empty_signal), - (helix_loop_module, "form_memory", _identity_memory), - (helix_loop_module, "evolve_memory", _identity_memory), - ] - - originals = [] - for module, attr, replacement in patch_targets: - originals.append((module, attr, getattr(module, attr))) - setattr(module, attr, replacement) - - try: - yield - finally: - for module, attr, original in originals: - setattr(module, attr, original) - - -def _compute_avg_abs_rho(artifacts) -> float: - if len(artifacts) < 2: - return 0.0 - - corr = np.abs( - np.corrcoef([artifact.split_signals["train"].reshape(-1) for artifact in artifacts]) - ) - if corr.ndim != 2: - return 0.0 - upper = corr[np.triu_indices_from(corr, k=1)] - upper = upper[np.isfinite(upper)] - return float(np.mean(upper)) if upper.size else 0.0 - - -def _runtime_payload_to_result( - *, - method: str, - payload: Dict[str, Any], - benchmark_library_size: int, - benchmark_succeeded: int, - elapsed_seconds: float, - run_id: int, -) -> MethodResult: - """Convert runtime benchmark output into a MethodResult.""" - library = payload.get("library", {}) - combinations = payload.get("combinations", {}) - selections = payload.get("selections", {}) - - result = MethodResult( - method=method, - library_ic=float(library.get("ic", 0.0)), - library_icir=float(library.get("icir", 0.0)), - avg_abs_rho=float(library.get("avg_abs_rho", 0.0)), - ew_ic=float(combinations.get("equal_weight", {}).get("ic", 0.0)), - ew_icir=float(combinations.get("equal_weight", {}).get("icir", 0.0)), - icw_ic=float(combinations.get("ic_weighted", {}).get("ic", 0.0)), - icw_icir=float(combinations.get("ic_weighted", {}).get("icir", 0.0)), - lasso_ic=float(selections.get("lasso", {}).get("ic", 0.0)), - lasso_icir=float(selections.get("lasso", {}).get("icir", 0.0)), - xgb_ic=float(selections.get("xgboost", {}).get("ic", 0.0)), - xgb_icir=float(selections.get("xgboost", {}).get("icir", 0.0)), - n_factors=benchmark_library_size, - admission_rate=benchmark_library_size / max(benchmark_succeeded, 1), - elapsed_seconds=elapsed_seconds, - ic_series=None, - run_id=run_id, - ) - result.runtime_payload = payload - return result - - -def _evaluate_runtime_library( - library, - dataset: EvaluationDataset, - cfg: MiningConfig, - *, - target_library_size: int, - cost_bps: Optional[List[float]] = None, -) -> tuple[MethodResult, Dict[str, Any], int, int]: - """Recompute a mined library using the runtime benchmark contract.""" - if cost_bps is None: - cost_bps = [1.0, 4.0, 7.0, 10.0, 11.0] - - factors = library.list_factors() - artifacts = evaluate_factors(factors, dataset, signal_failure_policy="reject") - succeeded = [artifact for artifact in artifacts if artifact.succeeded] - benchmark_library, benchmark_stats = build_benchmark_library( - artifacts, - cfg, - split_name="train", - ic_threshold=cfg.ic_threshold, - correlation_threshold=cfg.correlation_threshold, - ) - frozen = select_frozen_top_k( - artifacts, - benchmark_library, - top_k=target_library_size, - split_name="train", - ) - payload = evaluate_frozen_set( - frozen, - dataset, - split_name="test", - fit_split="train", - cost_bps=cost_bps, - ) - payload["benchmark"] = { - "admitted": benchmark_stats.get("admitted", 0), - "succeeded": benchmark_stats.get("succeeded", 0), - "replaced": benchmark_stats.get("replaced", 0), - "threshold_rejections": benchmark_stats.get("threshold_rejections", 0), - "correlation_rejections": benchmark_stats.get("correlation_rejections", 0), - "freeze_library_size": benchmark_library.size, - "frozen_top_k": [ - { - "name": artifact.name, - "formula": artifact.formula, - "category": artifact.category, - "train_ic": artifact.split_stats["train"]["ic_abs_mean"], - "train_icir": abs(artifact.split_stats["train"]["icir"]), - } - for artifact in frozen - ], - } - result = _runtime_payload_to_result( - method="helix_phase2", - payload=payload, - benchmark_library_size=benchmark_library.size, - benchmark_succeeded=max(int(benchmark_stats.get("succeeded", 0)), 1), - elapsed_seconds=0.0, - run_id=0, - ) - result.n_factors = benchmark_library.size - result.admission_rate = benchmark_library.size / max(benchmark_stats.get("succeeded", 0), 1) - result.avg_abs_rho = _compute_avg_abs_rho(frozen) - return result, payload, benchmark_library.size, int(benchmark_stats.get("succeeded", 0)) - - -class AblatedMethodRunner: - """Run one ablation variant through the real HelixLoop benchmark path.""" - - def __init__( - self, - cfg: Dict[str, bool], - ic_threshold: float = 0.02, - correlation_threshold: float = 0.5, - seed: int = 42, - llm_provider: Optional[Any] = None, - benchmark_mode: str = "paper", - ) -> None: - self._cfg = dict(cfg) - self.ic_threshold = ic_threshold - self.correlation_threshold = correlation_threshold - self.seed = seed - self.llm_provider = llm_provider - self.benchmark_mode = benchmark_mode - - def _run_loop( - self, - *, - train_data: dict, - n_factors: int, - ) -> tuple[HelixLoop, MiningConfig]: - """Instantiate and run the real HelixLoop on the training slice.""" - phase2 = _build_phase2_configs(self._cfg) - target_library_size = max(int(n_factors), 1) - max_iterations = max(target_library_size * 4, 4) - batch_size = max(4, min(target_library_size, 40)) - loop_dataset = _build_runtime_dataset(train_data) - with tempfile.TemporaryDirectory(prefix="factorminer_ablation_") as tmp: - mining_cfg = _build_mining_config( - output_dir=tmp, - target_library_size=target_library_size, - batch_size=batch_size, - max_iterations=max_iterations, - ic_threshold=self.ic_threshold, - correlation_threshold=self.correlation_threshold, - ) - mining_cfg.benchmark_mode = self.benchmark_mode - if self._cfg.get("memory", True): - memory = ExperienceMemory() - else: - memory = ExperienceMemory() - - loop = HelixLoop( - config=mining_cfg, - data_tensor=loop_dataset.data_tensor, - returns=np.asarray(train_data["forward_returns"], dtype=np.float64), - llm_provider=self.llm_provider or MockProvider(), - memory=memory, - library=FactorLibrary( - correlation_threshold=self.correlation_threshold, - ic_threshold=self.ic_threshold, - ), - debate_config=phase2["debate_config"], - enable_knowledge_graph=False, - enable_embeddings=False, - enable_auto_inventor=False, - auto_invention_interval=10, - canonicalize=phase2["canonicalize"], - forgetting_lambda=0.95, - causal_config=phase2["causal_config"], - regime_config=phase2["regime_config"], - capacity_config=phase2["capacity_config"], - significance_config=phase2["significance_config"], - volume=np.asarray(train_data.get("$amt", train_data["forward_returns"]), dtype=np.float64) - if "$amt" in train_data - else None, - ) - with _patched_memory_hooks(self._cfg.get("memory", True) and self._cfg.get("online_memory", True)): - loop.run( - target_size=target_library_size, - max_iterations=max_iterations, - resume=False, - ) - return loop, mining_cfg - - def run( - self, - data: dict, - test_data: dict, - n_factors: int = 40, - ) -> MethodResult: - """Run this ablation variant using the real loop + runtime contract.""" - t0 = time.perf_counter() - train_dataset = _build_split_dataset(data, "train") - benchmark_dataset = _build_combined_dataset(data, test_data) - - loop, mining_cfg = self._run_loop(train_data=data, n_factors=n_factors) - result, payload, benchmark_library_size, benchmark_succeeded = _evaluate_runtime_library( - loop.library, - benchmark_dataset, - mining_cfg, - target_library_size=n_factors, - ) - elapsed = time.perf_counter() - t0 - - result.elapsed_seconds = elapsed - result.method = "helix_phase2" - result.run_id = self.seed - result.runtime_payload = { - **payload, - "train_split": { - "train_length": train_dataset.returns.shape[1], - "benchmark_library_size": benchmark_library_size, - "benchmark_succeeded": benchmark_succeeded, - }, - "ablation": { - "name": self._cfg, - "seed": self.seed, - }, - } - return result - - -class AblationStudy: - """Run real-loop ablations and summarize component contribution.""" - - def __init__( - self, - ic_threshold: float = 0.02, - correlation_threshold: float = 0.5, - seed: int = 42, - configs: Optional[Dict[str, Dict[str, bool]]] = None, - llm_provider: Optional[Any] = None, - benchmark_mode: str = "paper", - ) -> None: - self.ic_threshold = ic_threshold - self.correlation_threshold = correlation_threshold - self.seed = seed - self.configs = configs or ABLATION_CONFIGS - self.llm_provider = llm_provider - self.benchmark_mode = benchmark_mode - - def run_ablation( - self, - data: dict, - train_period: Tuple[int, int], - test_period: Tuple[int, int], - n_factors: int = 40, - configs_to_run: Optional[List[str]] = None, - ) -> AblationResult: - """Run one or more ablation variants on the real loop pipeline.""" - configs_to_run = configs_to_run or list(self.configs.keys()) - train_data = _slice_data(data, *train_period) - test_data = _slice_data(data, *test_period) - - config_results: Dict[str, MethodResult] = {} - for cfg_name in configs_to_run: - cfg = self.configs.get(cfg_name) - if cfg is None: - logger.warning("Unknown ablation config: %s", cfg_name) - continue - - label = ABLATION_LABELS.get(cfg_name, cfg_name) - logger.info("Running ablation: %s", label) - t0 = time.perf_counter() - try: - runner = AblatedMethodRunner( - cfg=cfg, - ic_threshold=self.ic_threshold, - correlation_threshold=self.correlation_threshold, - seed=self.seed, - llm_provider=self.llm_provider, - benchmark_mode=self.benchmark_mode, - ) - result = runner.run( - data=train_data, - test_data=test_data, - n_factors=n_factors, - ) - result.method = cfg_name - config_results[cfg_name] = result - except Exception as exc: - logger.warning("Ablation %s failed: %s", cfg_name, exc) - config_results[cfg_name] = MethodResult(method=cfg_name) - - elapsed = time.perf_counter() - t0 - ic = config_results[cfg_name].library_ic - logger.info(" %s: IC=%.4f elapsed=%.1fs", cfg_name, ic, elapsed) - - ablation = AblationResult( - configs=configs_to_run, - results=config_results, - ) - ablation.contributions = self.summarize_contributions(ablation) - return ablation - - def summarize_contributions(self, result: AblationResult) -> pd.DataFrame: - """Summarize component contributions relative to the full runtime run.""" - full = result.results.get("full") - if full is None: - logger.warning("No 'full' config in ablation results; cannot summarize") - return pd.DataFrame() - - rows = [] - component_map = { - "no_debate": "debate", - "no_causal": "causal", - "no_canonicalize": "canonicalize", - "no_regime": "regime", - "no_online_memory": "online_memory", - "no_capacity": "capacity", - "no_significance": "significance", - "no_memory": "memory", - } - - for ablation_key, component in component_map.items(): - ablated = result.results.get(ablation_key) - if ablated is None: - continue - - ic_contrib = full.library_ic - ablated.library_ic - icir_contrib = full.library_icir - ablated.library_icir - adm_delta = full.admission_rate - ablated.admission_rate - - expected_sign = EXPECTED_CONTRIBUTION_SIGN.get(component, +1) - actual_sign = np.sign(ic_contrib) if ic_contrib != 0 else 0 - if abs(ic_contrib) < 0.0005: - interpretation = "Negligible" - elif actual_sign == expected_sign: - pct = abs(ic_contrib) / max(full.library_ic, 1e-6) * 100 - interpretation = f"Helps (+{pct:.1f}% IC)" - else: - interpretation = "Hurts (unexpected direction)" - - rows.append({ - "component": component, - "ablation_config": ablation_key, - "ic_full": full.library_ic, - "ic_ablated": ablated.library_ic, - "ic_contribution": ic_contrib, - "ic_contribution_pct": ic_contrib / max(full.library_ic, 1e-6) * 100, - "icir_full": full.library_icir, - "icir_ablated": ablated.library_icir, - "icir_contribution": icir_contrib, - "admission_rate_delta": adm_delta, - "interpretation": interpretation, - }) - - df = pd.DataFrame(rows) - if not df.empty: - df = df.sort_values("ic_contribution", ascending=False).reset_index(drop=True) - return df - - def to_latex_table(self, result: AblationResult) -> str: - """Generate a LaTeX ablation study table.""" - df = result.contributions - if df is None or df.empty: - return "% No ablation data available" - - lines = [ - r"\begin{table}[htbp]", - r"\centering", - r"\caption{HelixFactor Ablation Study: Component Contributions}", - r"\label{tab:ablation}", - r"\begin{tabular}{lccccl}", - r"\toprule", - r"Component & IC (Full) & IC (Ablated) & $\Delta$IC & $\Delta$IC\% & Interpretation \\", - r"\midrule", - ] - - for _, row in df.iterrows(): - lines.append( - f"{row['component'].replace('_', r' ')} & " - f"{row['ic_full']:.4f} & " - f"{row['ic_ablated']:.4f} & " - f"{row['ic_contribution']:+.4f} & " - f"{row['ic_contribution_pct']:+.1f}\\% & " - f"{row['interpretation']} \\\\" - ) - - lines += [r"\bottomrule", r"\end{tabular}", r"\end{table}"] - return "\n".join(lines) - - def print_summary(self, result: AblationResult) -> None: - """Print a human-readable ablation summary.""" - df = result.contributions - if df is None or df.empty: - print(" No ablation summary available.") - return - - print("\n" + "=" * 70) - print(" Ablation Study: Component Contributions") - print("=" * 70) - - full = result.results.get("full") - if full: - print(f"\n FULL System: IC={full.library_ic:.4f} ICIR={full.library_icir:.3f}") - print() - - header = ( - f" {'Component':<22} {'IC Full':>8} {'IC Ablated':>10} " - f"{'Delta IC':>10} {'Delta%':>8} Interpretation" - ) - print(header) - print(" " + "-" * 80) - - for _, row in df.iterrows(): - comp = row["component"].replace("_", " ") - print( - f" {comp:<22} {row['ic_full']:>8.4f} {row['ic_ablated']:>10.4f} " - f"{row['ic_contribution']:>+10.4f} {row['ic_contribution_pct']:>+7.1f}% " - f"{row['interpretation']}" - ) - - print() - - -def run_full_ablation_study( - n_assets: int = 100, - n_periods: int = 500, - n_factors: int = 40, - seed: int = 42, - configs_to_run: Optional[List[str]] = None, - verbose: bool = True, -) -> AblationResult: - """Run the full runtime ablation study on mock data.""" - if verbose: - print("\nGenerating mock data for ablation study...") - - from factorminer.benchmark.helix_benchmark import _build_mock_data_dict - - data = _build_mock_data_dict(n_assets=n_assets, n_periods=n_periods, seed=seed) - T = list(data.values())[0].shape[1] - train_end = int(T * 0.7) - - if verbose: - print(f" Data: M={n_assets}, T={T}, train=0:{train_end}, test={train_end}:{T}") - cfgs = configs_to_run or list(ABLATION_CONFIGS.keys()) - print(f" Running {len(cfgs)} ablation configurations through real loops...") - - study = AblationStudy(seed=seed, llm_provider=MockProvider()) - result = study.run_ablation( - data=data, - train_period=(0, train_end), - test_period=(train_end, T), - n_factors=n_factors, - configs_to_run=configs_to_run, - ) - - if verbose: - study.print_summary(result) - - return result diff --git a/src/factorminer/factorminer/benchmark/catalogs.py b/src/factorminer/factorminer/benchmark/catalogs.py deleted file mode 100644 index 66dc15f..0000000 --- a/src/factorminer/factorminer/benchmark/catalogs.py +++ /dev/null @@ -1,236 +0,0 @@ -"""Deterministic baseline formula catalogs for benchmark workflows.""" - -from __future__ import annotations - -from dataclasses import dataclass -import re -from typing import Iterable - -import numpy as np - -from src.factorminer.factorminer.core.library_io import PAPER_FACTORS - - -@dataclass(frozen=True) -class CandidateEntry: - """One benchmark candidate formula.""" - - name: str - formula: str - category: str - - -ALPHA101_CLASSIC: tuple[CandidateEntry, ...] = ( - CandidateEntry( - "alpha101_close_return_rank", - "Neg(CsRank(Return($close, 5)))", - "Alpha101 Classic", - ), - CandidateEntry( - "alpha101_intraday_position", - "CsRank(Div(Sub($close, $open), Add(Sub($high, $low), 1e-8)))", - "Alpha101 Classic", - ), - CandidateEntry( - "alpha101_volume_reversal", - "Neg(CsRank(Mul(Return($close, 5), Div($volume, Mean($volume, 20)))))", - "Alpha101 Classic", - ), - CandidateEntry( - "alpha101_vwap_gap", - "Neg(CsRank(Div(Sub($close, $vwap), Add($vwap, 1e-8))))", - "Alpha101 Classic", - ), - CandidateEntry( - "alpha101_price_volume_corr", - "Neg(CsRank(Corr(CsRank($close), CsRank($volume), 10)))", - "Alpha101 Classic", - ), - CandidateEntry( - "alpha101_range_volatility", - "Neg(CsRank(Std(Div(Sub($high, $low), Add($close, 1e-8)), 20)))", - "Alpha101 Classic", - ), - CandidateEntry( - "alpha101_close_vs_mean", - "Neg(CsRank(Div(Sub($close, Mean($close, 10)), Add(Std($close, 10), 1e-8))))", - "Alpha101 Classic", - ), - CandidateEntry( - "alpha101_turnover_rank", - "Neg(CsRank(Div($amt, Add(Mean($amt, 20), 1e-8))))", - "Alpha101 Classic", - ), - CandidateEntry( - "alpha101_return_skew", - "Neg(CsRank(Skew(Return($close, 1), 20)))", - "Alpha101 Classic", - ), - CandidateEntry( - "alpha101_trend_strength", - "CsRank(TsRank(Return($close, 1), 20))", - "Alpha101 Classic", - ), - CandidateEntry( - "alpha101_volume_std", - "Neg(CsRank(Div(Std($volume, 20), Add(Mean($volume, 20), 1e-8))))", - "Alpha101 Classic", - ), - CandidateEntry( - "alpha101_amount_momentum", - "CsRank(Mul(Return($close, 10), Div($amt, Add(Mean($amt, 20), 1e-8))))", - "Alpha101 Classic", - ), -) - -_WINDOW_PATTERN = re.compile(r"\b(5|10|20|30)\b") - - -def build_alpha101_adapted() -> list[CandidateEntry]: - """Expand the classic catalog into frequency-adapted window variants.""" - variants: list[CandidateEntry] = [] - windows = (3, 6, 12, 24, 48) - for entry in ALPHA101_CLASSIC: - for window in windows: - formula = _WINDOW_PATTERN.sub(str(window), entry.formula) - variants.append( - CandidateEntry( - name=f"{entry.name}_w{window}", - formula=formula, - category="Alpha101 Adapted", - ) - ) - return variants - - -def build_random_exploration(seed: int, count: int = 160) -> list[CandidateEntry]: - """Generate deterministic random-formula candidates from safe templates.""" - rng = np.random.RandomState(seed) - unary_templates = [ - "Neg(CsRank(Return({feat}, {w1})))", - "CsRank(TsRank({feat}, {w1}))", - "Neg(CsRank(Div(Sub({feat}, Mean({feat}, {w1})), Add(Std({feat}, {w1}), 1e-8))))", - "CsRank(Div(Std({feat}, {w1}), Add(Mean({feat}, {w2}), 1e-8)))", - "Neg(CsRank(Skew({feat}, {w1})))", - ] - binary_templates = [ - "Neg(CsRank(Corr(CsRank({feat_a}), CsRank({feat_b}), {w1})))", - "CsRank(Div(Sub({feat_a}, {feat_b}), Add(Std({feat_b}, {w1}), 1e-8)))", - "Neg(CsRank(Mul(Return({feat_a}, {w1}), Div({feat_b}, Add(Mean({feat_b}, {w2}), 1e-8)))))", - "CsRank(Cov({feat_a}, {feat_b}, {w1}))", - "Neg(CsRank(Div(Sub(EMA({feat_a}, {w1}), EMA({feat_b}, {w2})), Add(Std({feat_a}, {w1}), 1e-8))))", - ] - features = ("$open", "$high", "$low", "$close", "$volume", "$amt", "$vwap", "$returns") - windows = (3, 5, 10, 20, 30, 48) - - entries: list[CandidateEntry] = [] - for idx in range(count): - use_binary = bool(rng.randint(0, 2)) - if use_binary: - template = binary_templates[rng.randint(0, len(binary_templates))] - feat_a, feat_b = rng.choice(features, size=2, replace=False) - formula = template.format( - feat_a=feat_a, - feat_b=feat_b, - w1=int(rng.choice(windows)), - w2=int(rng.choice(windows)), - ) - else: - template = unary_templates[rng.randint(0, len(unary_templates))] - formula = template.format( - feat=rng.choice(features), - w1=int(rng.choice(windows)), - w2=int(rng.choice(windows)), - ) - entries.append( - CandidateEntry( - name=f"random_exploration_{idx:03d}", - formula=formula, - category="Random Exploration", - ) - ) - return entries - - -def build_gplearn_style(seed: int, count: int = 160) -> list[CandidateEntry]: - """Build deeper deterministic mutation chains that mimic GP search.""" - base = build_random_exploration(seed + 17, count=max(count, 64)) - rng = np.random.RandomState(seed + 23) - entries: list[CandidateEntry] = [] - for idx in range(count): - left = base[idx % len(base)].formula - right = base[rng.randint(0, len(base))].formula - if idx % 3 == 0: - formula = f"Neg(CsRank(Add({left}, {right})))" - elif idx % 3 == 1: - formula = f"CsRank(Div(Sub({left}, {right}), Add(Std($close, 10), 1e-8)))" - else: - formula = f"Neg(CsRank(Mul({left}, {right})))" - entries.append( - CandidateEntry( - name=f"gplearn_style_{idx:03d}", - formula=formula, - category="GPLearn", - ) - ) - return entries - - -def build_alphaforge_style() -> list[CandidateEntry]: - """Reuse a diverse subset of the paper catalog for dynamic-combine baselines.""" - entries: list[CandidateEntry] = [] - for idx, factor in enumerate(PAPER_FACTORS[::2][:80]): - entries.append( - CandidateEntry( - name=f"alphaforge_style_{idx:03d}", - formula=factor["formula"], - category="AlphaForge-style", - ) - ) - return entries - - -def build_alphaagent_style() -> list[CandidateEntry]: - """Reuse an alternate paper-catalog slice for LLM-style baseline proposals.""" - entries: list[CandidateEntry] = [] - for idx, factor in enumerate(PAPER_FACTORS[1::2][:80]): - entries.append( - CandidateEntry( - name=f"alphaagent_style_{idx:03d}", - formula=factor["formula"], - category="AlphaAgent-style", - ) - ) - return entries - - -def build_factor_miner_catalog() -> list[CandidateEntry]: - """Expose the full paper factor catalog as benchmark candidates.""" - return [ - CandidateEntry( - name=f"factor_miner_{idx + 1:03d}", - formula=factor["formula"], - category=factor["category"], - ) - for idx, factor in enumerate(PAPER_FACTORS) - ] - - -def entries_from_library(library) -> list[CandidateEntry]: - """Convert a saved FactorLibrary into benchmark candidate entries.""" - return [ - CandidateEntry(name=factor.name, formula=factor.formula, category=factor.category) - for factor in library.list_factors() - ] - - -def dedupe_entries(entries: Iterable[CandidateEntry]) -> list[CandidateEntry]: - """Remove duplicate formulas while preserving order.""" - seen: set[str] = set() - unique: list[CandidateEntry] = [] - for entry in entries: - if entry.formula in seen: - continue - seen.add(entry.formula) - unique.append(entry) - return unique diff --git a/src/factorminer/factorminer/benchmark/helix_benchmark.py b/src/factorminer/factorminer/benchmark/helix_benchmark.py deleted file mode 100644 index d92df09..0000000 --- a/src/factorminer/factorminer/benchmark/helix_benchmark.py +++ /dev/null @@ -1,2172 +0,0 @@ -"""HelixBenchmark — rigorous comparison of HelixFactor vs FactorMiner. - -Provides five inter-operating classes that together form a complete -benchmarking suite for the HelixFactor vs FactorMiner (Ralph Loop) paper: - - HelixBenchmark — main comparison class (Table 1 style) - StatisticalComparisonTests — DM test, paired t-test, block bootstrap - SpeedBenchmark — operator / factor / pipeline timing - BenchmarkResult — aggregate result container + report generators - DMTestResult / MethodResult — individual result containers - -CLI usage: - python -m factorminer.benchmark.helix_benchmark --mock --n-factors 40 --output results/ -""" - -from __future__ import annotations - -import copy -import argparse -import json -import logging -import math -import sys -import time -import warnings -from dataclasses import dataclass, field, asdict -from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple - -import numpy as np -import pandas as pd -from scipy.stats import ttest_rel, wilcoxon - -warnings.filterwarnings("ignore") -logger = logging.getLogger(__name__) - - -# --------------------------------------------------------------------------- -# Serialization helpers -# --------------------------------------------------------------------------- - -def _json_safe(value: Any) -> Any: - """Recursively convert a structure into JSON-safe primitives.""" - if isinstance(value, dict): - return {str(k): _json_safe(v) for k, v in value.items()} - if isinstance(value, list): - return [_json_safe(v) for v in value] - if isinstance(value, tuple): - return [_json_safe(v) for v in value] - if isinstance(value, np.generic): - value = value.item() - if isinstance(value, float) and not math.isfinite(value): - return None - return value - - -# --------------------------------------------------------------------------- -# Result containers -# --------------------------------------------------------------------------- - -@dataclass -class MethodResult: - """Metrics for a single method run.""" - - method: str - library_ic: float = 0.0 - library_icir: float = 0.0 - avg_abs_rho: float = 0.0 - ew_ic: float = 0.0 - ew_icir: float = 0.0 - icw_ic: float = 0.0 - icw_icir: float = 0.0 - lasso_ic: float = 0.0 - lasso_icir: float = 0.0 - xgb_ic: float = 0.0 - xgb_icir: float = 0.0 - n_factors: int = 0 - admission_rate: float = 0.0 - elapsed_seconds: float = 0.0 - avg_turnover: float = 0.0 - # raw IC series for statistical tests (not serialized by default) - ic_series: Optional[np.ndarray] = field(default=None, repr=False) - run_id: int = 0 - - def to_dict(self) -> dict: - d = asdict(self) - d.pop("ic_series", None) - return d - - -@dataclass -class DMTestResult: - """Diebold-Mariano test for forecast accuracy difference.""" - - dm_statistic: float - p_value: float - is_significant: bool - direction: str # "helix_better", "ralph_better", "no_difference" - n_obs: int - - -@dataclass -class AblationResult: - """Result of one ablation study.""" - - configs: List[str] - results: Dict[str, MethodResult] - contributions: Optional[pd.DataFrame] = None - - def to_dict(self) -> dict: - return { - "configs": self.configs, - "results": {k: v.to_dict() for k, v in self.results.items()}, - } - - -@dataclass -class OperatorSpeedResult: - """Timing for individual operators.""" - - operator_timings_ms: Dict[str, float] # operator_name -> ms - n_assets: int - n_periods: int - n_repeats: int - - -@dataclass -class PipelineSpeedResult: - """Timing for end-to-end pipeline.""" - - total_seconds: float - candidates_per_second: float - n_candidates: int - - -@dataclass -class BenchmarkResult: - """Aggregate benchmark results — all methods, all metrics.""" - - methods: List[str] - factor_library_metrics: pd.DataFrame # IC, ICIR, Avg|rho| per method - combination_metrics: pd.DataFrame # EW/ICW IC and ICIR - selection_metrics: pd.DataFrame # LASSO, XGBoost - speed_metrics: pd.DataFrame - statistical_tests: Dict[str, Any] - ablation_result: Optional[AblationResult] = None - raw_method_results: Dict[str, List[MethodResult]] = field(default_factory=dict) - turnover_metrics: pd.DataFrame = field(default_factory=pd.DataFrame) - cost_pressure_metrics: pd.DataFrame = field(default_factory=pd.DataFrame) - runtime_artifacts: Dict[str, Any] = field(default_factory=dict) - - # ------------------------------------------------------------------ - # Formatting helpers - # ------------------------------------------------------------------ - - def to_latex_table(self) -> str: - """Generate a LaTeX table matching paper Table 1 style.""" - lines = [ - r"\begin{table}[htbp]", - r"\centering", - r"\caption{HelixFactor vs FactorMiner: Comprehensive Benchmark (Table 1 Style)}", - r"\label{tab:benchmark}", - r"\small", - r"\begin{tabular}{lcccccccc}", - r"\toprule", - r"Method & \multicolumn{3}{c}{Factor Library} & \multicolumn{2}{c}{EW Combo} & \multicolumn{2}{c}{ICW Combo} & Sel.IC \\", - r"\cmidrule(lr){2-4}\cmidrule(lr){5-6}\cmidrule(lr){7-8}", - r" & IC(\%) & ICIR & Avg$|\rho|$ & IC(\%) & ICIR & IC(\%) & ICIR & IC(\%) \\", - r"\midrule", - ] - - for method in self.methods: - lib_row = self.factor_library_metrics[ - self.factor_library_metrics["method"] == method - ] - comb_row = self.combination_metrics[ - self.combination_metrics["method"] == method - ] - sel_row = self.selection_metrics[ - self.selection_metrics["method"] == method - ] - - def _g(df, col, mult=100.0): - if df.empty or col not in df.columns: - return 0.0 - v = df.iloc[0][col] - return float(v) * mult if not pd.isna(v) else 0.0 - - bold = method in ("helix_phase2",) - fmt = lambda x, d=2: f"{x:.{d}f}" - - lib_ic = _g(lib_row, "ic_pct", 1.0) - lib_icir = _g(lib_row, "icir", 1.0) - lib_rho = _g(lib_row, "avg_abs_rho", 1.0) - ew_ic = _g(comb_row, "ew_ic_pct", 1.0) - ew_icir = _g(comb_row, "ew_icir", 1.0) - icw_ic = _g(comb_row, "icw_ic_pct", 1.0) - icw_icir = _g(comb_row, "icw_icir", 1.0) - sel_ic = _g(sel_row, "best_ic_pct", 1.0) - - row_parts = [ - method.replace("_", r"\_"), - fmt(lib_ic), - fmt(lib_icir), - fmt(lib_rho), - fmt(ew_ic), - fmt(ew_icir), - fmt(icw_ic), - fmt(icw_icir), - fmt(sel_ic), - ] - if bold: - row_parts = [r"\textbf{" + p + r"}" for p in row_parts] - - lines.append(" & ".join(row_parts) + r" \\") - - lines += [ - r"\bottomrule", - r"\end{tabular}", - r"\end{table}", - ] - return "\n".join(lines) - - def to_markdown_table(self) -> str: - """Generate a Markdown table for GitHub README.""" - header = ( - "| Method | IC (%) | ICIR | Avg|ρ| | EW IC (%) | EW ICIR | " - "ICW IC (%) | ICW ICIR | Las IC (%) | XGB IC (%) |\n" - "|--------|--------|------|---------|-----------|---------|" - "-----------|----------|-----------|------------|\n" - ) - rows = [] - for method in self.methods: - lib_row = self.factor_library_metrics[ - self.factor_library_metrics["method"] == method - ] - comb_row = self.combination_metrics[ - self.combination_metrics["method"] == method - ] - sel_row = self.selection_metrics[ - self.selection_metrics["method"] == method - ] - - def _g(df, col): - if df.empty or col not in df.columns: - return 0.0 - v = df.iloc[0][col] - return float(v) if not pd.isna(v) else 0.0 - - tag = " **" if method == "helix_phase2" else "" - rows.append( - f"| {method}{tag} | " - f"{_g(lib_row,'ic_pct'):.2f} | " - f"{_g(lib_row,'icir'):.3f} | " - f"{_g(lib_row,'avg_abs_rho'):.3f} | " - f"{_g(comb_row,'ew_ic_pct'):.2f} | " - f"{_g(comb_row,'ew_icir'):.3f} | " - f"{_g(comb_row,'icw_ic_pct'):.2f} | " - f"{_g(comb_row,'icw_icir'):.3f} | " - f"{_g(sel_row,'lasso_ic_pct'):.2f} | " - f"{_g(sel_row,'xgb_ic_pct'):.2f} |\n" - ) - return header + "".join(rows) - - def plot_comparison(self, save_path: str) -> None: - """Generate bar chart comparison (requires matplotlib).""" - try: - import matplotlib.pyplot as plt - import matplotlib.patches as mpatches - except ImportError: - logger.warning("matplotlib not available; skipping plot") - return - - metrics = ["IC (%)", "ICIR", "EW IC (%)", "ICW IC (%)"] - method_colors = { - "random_exploration": "#aaaaaa", - "alpha101_classic": "#6baed6", - "alpha101_adapted": "#3182bd", - "ralph_loop": "#fd8d3c", - "helix_phase2": "#31a354", - } - - fig, axes = plt.subplots(1, len(metrics), figsize=(16, 5)) - fig.suptitle("HelixFactor vs FactorMiner Benchmark", fontsize=14, fontweight="bold") - - for ax_idx, metric in enumerate(metrics): - ax = axes[ax_idx] - values = [] - colors = [] - labels = [] - - for method in self.methods: - color = method_colors.get(method, "#888888") - if metric == "IC (%)": - row = self.factor_library_metrics[ - self.factor_library_metrics["method"] == method - ] - v = float(row["ic_pct"].iloc[0]) if not row.empty else 0.0 - elif metric == "ICIR": - row = self.factor_library_metrics[ - self.factor_library_metrics["method"] == method - ] - v = float(row["icir"].iloc[0]) if not row.empty else 0.0 - elif metric == "EW IC (%)": - row = self.combination_metrics[ - self.combination_metrics["method"] == method - ] - v = float(row["ew_ic_pct"].iloc[0]) if not row.empty else 0.0 - elif metric == "ICW IC (%)": - row = self.combination_metrics[ - self.combination_metrics["method"] == method - ] - v = float(row["icw_ic_pct"].iloc[0]) if not row.empty else 0.0 - else: - v = 0.0 - - values.append(v) - colors.append(color) - labels.append(method.replace("_", "\n")) - - bars = ax.bar(range(len(values)), values, color=colors, alpha=0.85, edgecolor="white") - ax.set_xticks(range(len(labels))) - ax.set_xticklabels(labels, fontsize=7, rotation=30, ha="right") - ax.set_title(metric, fontsize=10) - ax.grid(axis="y", alpha=0.3) - ax.spines["top"].set_visible(False) - ax.spines["right"].set_visible(False) - - plt.tight_layout() - Path(save_path).parent.mkdir(parents=True, exist_ok=True) - plt.savefig(save_path, dpi=150, bbox_inches="tight") - plt.close() - logger.info("Saved comparison plot to %s", save_path) - - def generate_full_report(self, save_path: str) -> None: - """Generate a complete HTML report with all results.""" - html = self._build_html_report() - Path(save_path).parent.mkdir(parents=True, exist_ok=True) - with open(save_path, "w") as f: - f.write(html) - logger.info("Saved full HTML report to %s", save_path) - - def _build_html_report(self) -> str: - lib_html = self.factor_library_metrics.to_html(index=False, float_format="{:.4f}".format) - comb_html = self.combination_metrics.to_html(index=False, float_format="{:.4f}".format) - sel_html = self.selection_metrics.to_html(index=False, float_format="{:.4f}".format) - speed_html = self.speed_metrics.to_html(index=False, float_format="{:.3f}".format) - turnover_html = "" - if not self.turnover_metrics.empty: - turnover_html = self.turnover_metrics.to_html(index=False, float_format="{:.4f}".format) - cost_html = "" - if not self.cost_pressure_metrics.empty: - cost_html = self.cost_pressure_metrics.to_html(index=False, float_format="{:.4f}".format) - - stat_rows = [] - for k, v in self.statistical_tests.items(): - if isinstance(v, dict): - for sk, sv in v.items(): - stat_rows.append(f"{k}.{sk}{sv}") - else: - stat_rows.append(f"{k}{v}") - stat_html = ( - "" - + "".join(stat_rows) - + "
TestResult
" - ) - - ablation_html = "" - if self.ablation_result is not None and self.ablation_result.contributions is not None: - ablation_html = ( - "

Ablation Study

" - + self.ablation_result.contributions.to_html( - index=False, float_format="{:.4f}".format - ) - ) - - css = """ - body { font-family: Arial, sans-serif; margin: 40px; background: #f8f9fa; color: #333; } - h1 { color: #1a5276; border-bottom: 3px solid #1a5276; padding-bottom: 8px; } - h2 { color: #2c3e50; margin-top: 30px; } - table { border-collapse: collapse; width: 100%; margin-bottom: 20px; } - th { background: #1a5276; color: white; padding: 8px 12px; text-align: left; } - td { padding: 6px 12px; border-bottom: 1px solid #ddd; } - tr:nth-child(even) { background: #f2f2f2; } - tr:hover { background: #d6eaf8; } - .helix-row { background: #d5f5e3 !important; font-weight: bold; } - .summary-box { background: #eaf2ff; border-left: 5px solid #1a5276; - padding: 15px; margin: 20px 0; border-radius: 4px; } - """ - - return f""" -HelixFactor Benchmark Report - -

HelixFactor Benchmark Report

-
-Methods evaluated: {", ".join(self.methods)}
-Generated: {pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S")} -
-

Factor Library Metrics

{lib_html} -

Factor Combination Metrics

{comb_html} -

Factor Selection Metrics

{sel_html} -

Speed Benchmarks

{speed_html} -{f'

Turnover

{turnover_html}' if turnover_html else ''} -{f'

Cost Pressure

{cost_html}' if cost_html else ''} -

Statistical Tests

{stat_html} -{ablation_html} -""" - - -# --------------------------------------------------------------------------- -# Statistical tests -# --------------------------------------------------------------------------- - -class StatisticalComparisonTests: - """Rigorous statistical comparison between HelixFactor and FactorMiner. - - Implements four complementary tests: - 1. Diebold-Mariano (DM) test for forecast accuracy differences - 2. Paired t-test on IC(Helix) − IC(Ralph) across test period - 3. Block-bootstrap 95% CI on IC difference - 4. Wilcoxon signed-rank test (non-parametric) - """ - - def __init__(self, seed: int = 42) -> None: - self._rng = np.random.RandomState(seed) - - @staticmethod - def _paired_valid_series( - ic_series_1: np.ndarray, - ic_series_2: np.ndarray, - ) -> Tuple[np.ndarray, np.ndarray]: - """Align paired series and drop rows with NaNs in either series.""" - min_len = min(len(ic_series_1), len(ic_series_2)) - s1 = np.asarray(ic_series_1[:min_len], dtype=np.float64) - s2 = np.asarray(ic_series_2[:min_len], dtype=np.float64) - mask = ~np.isnan(s1) & ~np.isnan(s2) - return s1[mask], s2[mask] - - # ------------------------------------------------------------------ - # Diebold-Mariano test - # ------------------------------------------------------------------ - - def diebold_mariano_test( - self, - ic_series_1: np.ndarray, - ic_series_2: np.ndarray, - h: int = 1, - ) -> DMTestResult: - """Diebold-Mariano test for forecast accuracy differences. - - Tests H0: E[d_t] = 0 where d_t = L(e1_t) - L(e2_t) is the - differential loss. Uses the HAC-robust variance estimator with - bandwidth h-1 (Andrews, 1991). - - Parameters - ---------- - ic_series_1 : ndarray (e.g. HelixFactor IC time series) - ic_series_2 : ndarray (e.g. FactorMiner IC time series) - h : int - Forecast horizon (default 1 for one-step-ahead). - - Returns - ------- - DMTestResult - """ - s1, s2 = self._paired_valid_series(ic_series_1, ic_series_2) - min_len = len(s1) - if min_len < 5: - return DMTestResult( - dm_statistic=0.0, p_value=1.0, is_significant=False, - direction="no_difference", n_obs=min_len, - ) - - # Loss differential: squared-error loss on IC as forecast of return - d = s1 ** 2 - s2 ** 2 - T = len(d) - if np.allclose(d, 0.0): - return DMTestResult( - dm_statistic=0.0, - p_value=1.0, - is_significant=False, - direction="no_difference", - n_obs=T, - ) - d_bar = np.mean(d) - - # HAC variance of d_bar (Newey-West with bandwidth h-1) - bandwidth = max(h - 1, 0) - gamma_0 = np.var(d, ddof=0) - if gamma_0 <= 0 or np.isnan(gamma_0): - return DMTestResult( - dm_statistic=0.0, - p_value=1.0, - is_significant=False, - direction="no_difference", - n_obs=T, - ) - hac_var = gamma_0 - for lag in range(1, bandwidth + 1): - gamma_k = np.mean( - (d[lag:] - d_bar) * (d[:-lag] - d_bar) - ) - hac_var += 2.0 * (1.0 - lag / (bandwidth + 1)) * gamma_k - - if hac_var <= 0 or np.isnan(hac_var): - return DMTestResult( - dm_statistic=0.0, - p_value=1.0, - is_significant=False, - direction="no_difference", - n_obs=T, - ) - - dm_stat = d_bar / np.sqrt(hac_var / T) - if not np.isfinite(dm_stat): - return DMTestResult( - dm_statistic=0.0, - p_value=1.0, - is_significant=False, - direction="no_difference", - n_obs=T, - ) - - # Two-sided p-value using normal approximation - from scipy.stats import norm - p_value = 2.0 * (1.0 - float(norm.cdf(abs(dm_stat)))) - if not np.isfinite(p_value): - return DMTestResult( - dm_statistic=0.0, - p_value=1.0, - is_significant=False, - direction="no_difference", - n_obs=T, - ) - - if abs(dm_stat) < 1.96: - direction = "no_difference" - elif d_bar > 0: - # series_1 has higher loss, series_2 is better - direction = "ralph_better" - else: - direction = "helix_better" - - return DMTestResult( - dm_statistic=float(dm_stat), - p_value=float(p_value), - is_significant=p_value < 0.05, - direction=direction, - n_obs=T, - ) - - # ------------------------------------------------------------------ - # Paired t-test - # ------------------------------------------------------------------ - - def paired_t_test( - self, - ic_series_1: np.ndarray, - ic_series_2: np.ndarray, - ) -> dict: - """Paired t-test on IC difference series.""" - s1, s2 = self._paired_valid_series(ic_series_1, ic_series_2) - n = len(s1) - if n < 5: - return {"t_stat": 0.0, "p_value": 1.0, "mean_diff": 0.0, "n": n} - - t_stat, p_value = ttest_rel(s1, s2) - if not np.isfinite(t_stat) or not np.isfinite(p_value): - return {"t_stat": 0.0, "p_value": 1.0, "mean_diff": 0.0, "n": n} - return { - "t_stat": float(t_stat), - "p_value": float(p_value), - "mean_diff": float(np.mean(s1 - s2)), - "n": n, - } - - # ------------------------------------------------------------------ - # Block bootstrap CI - # ------------------------------------------------------------------ - - def bootstrap_ic_difference_ci( - self, - ic_series_1: np.ndarray, - ic_series_2: np.ndarray, - n_bootstrap: int = 1000, - block_size: int = 20, - ) -> Tuple[float, float]: - """95% block-bootstrap CI on mean IC difference. - - Returns - ------- - (lower_95, upper_95) : tuple of float - """ - s1, s2 = self._paired_valid_series(ic_series_1, ic_series_2) - n = len(s1) - if n < 5: - return (0.0, 0.0) - diff = s1 - s2 - - # Circular block bootstrap - block_size = min(block_size, n // 2) - block_size = max(block_size, 1) - n_blocks = int(math.ceil(n / block_size)) - boot_means = np.empty(n_bootstrap) - - for i in range(n_bootstrap): - starts = self._rng.randint(0, n - block_size + 1, size=n_blocks) - indices = np.concatenate( - [np.arange(s, s + block_size) for s in starts] - )[:n] - boot_means[i] = diff[indices].mean() - - return ( - float(np.percentile(boot_means, 2.5)), - float(np.percentile(boot_means, 97.5)), - ) - - # ------------------------------------------------------------------ - # Wilcoxon signed-rank test - # ------------------------------------------------------------------ - - def wilcoxon_test( - self, - ic_series_1: np.ndarray, - ic_series_2: np.ndarray, - ) -> dict: - """Wilcoxon signed-rank test (non-parametric) on IC pairs.""" - s1, s2 = self._paired_valid_series(ic_series_1, ic_series_2) - n = len(s1) - if n < 5: - return {"statistic": 0.0, "p_value": 1.0, "n": n} - try: - stat, p_value = wilcoxon(s1, s2, alternative="two-sided") - except Exception: - stat, p_value = 0.0, 1.0 - - return {"statistic": float(stat), "p_value": float(p_value), "n": n} - - # ------------------------------------------------------------------ - # Combined report - # ------------------------------------------------------------------ - - def run_all_tests( - self, - ic_helix: np.ndarray, - ic_ralph: np.ndarray, - ) -> dict: - """Run all four statistical tests and return combined results.""" - dm = self.diebold_mariano_test(ic_helix, ic_ralph) - tt = self.paired_t_test(ic_helix, ic_ralph) - ci_lo, ci_hi = self.bootstrap_ic_difference_ci(ic_helix, ic_ralph) - wil = self.wilcoxon_test(ic_helix, ic_ralph) - valid_helix, valid_ralph = self._paired_valid_series(ic_helix, ic_ralph) - mean_diff = float(np.mean(valid_helix - valid_ralph)) if len(valid_helix) else 0.0 - return { - "diebold_mariano": { - "dm_stat": dm.dm_statistic, - "p_value": dm.p_value, - "significant": dm.is_significant, - "direction": dm.direction, - "n_obs": dm.n_obs, - }, - "paired_t_test": tt, - "bootstrap_ci_95": { - "lower": ci_lo, - "upper": ci_hi, - "excludes_zero": ci_lo > 0 or ci_hi < 0, - }, - "wilcoxon": wil, - "mean_ic_difference": mean_diff, - "helix_outperforms": mean_diff > 0, - } - - -# --------------------------------------------------------------------------- -# Speed Benchmark -# --------------------------------------------------------------------------- - -class SpeedBenchmark: - """Benchmark factor evaluation speed across operators and pipelines.""" - - def __init__(self, seed: int = 42) -> None: - self._rng = np.random.RandomState(seed) - - def _time_callable(self, fn, n_repeats: int = 5, warmup: int = 1) -> float: - """Return minimum time over n_repeats (ms) after warmup runs.""" - for _ in range(warmup): - try: - fn() - except Exception: - pass - timings = [] - for _ in range(n_repeats): - t0 = time.perf_counter() - try: - fn() - except Exception: - pass - timings.append((time.perf_counter() - t0) * 1000.0) - return float(np.min(timings)) if timings else 0.0 - - def run_operator_benchmark( - self, - n_assets: int = 500, - n_periods: int = 2000, - n_repeats: int = 5, - ) -> OperatorSpeedResult: - """Benchmark individual operators (numpy backend).""" - rng = np.random.RandomState(self._rng.randint(0, 9999)) - X = rng.randn(n_assets, n_periods).astype(np.float64) - Y = rng.randn(n_assets, n_periods).astype(np.float64) - - from scipy.stats import rankdata - - def _ts_rank(mat, window=20): - out = np.full_like(mat, np.nan) - for t in range(window - 1, mat.shape[1]): - slc = mat[:, t - window + 1: t + 1] - for i in range(mat.shape[0]): - r = rankdata(slc[i]) - out[i, t] = r[-1] / window - return out - - def _cs_rank(mat): - out = np.full_like(mat, np.nan) - for t in range(mat.shape[1]): - col = mat[:, t] - valid = ~np.isnan(col) - if valid.sum() > 0: - out[valid, t] = rankdata(col[valid]) / valid.sum() - return out - - def _ts_std(mat, window=20): - out = np.full_like(mat, np.nan) - for t in range(window - 1, mat.shape[1]): - slc = mat[:, t - window + 1: t + 1] - out[:, t] = np.std(slc, axis=1, ddof=1) - return out - - def _ts_corr(x, y, window=20): - out = np.full_like(x, np.nan) - for t in range(window - 1, x.shape[1]): - sx = x[:, t - window + 1: t + 1] - sy = y[:, t - window + 1: t + 1] - xs = sx - sx.mean(axis=1, keepdims=True) - ys = sy - sy.mean(axis=1, keepdims=True) - denom = np.sqrt((xs**2).sum(axis=1) * (ys**2).sum(axis=1)) - safe = denom > 1e-12 - out[safe, t] = ((xs * ys).sum(axis=1) / denom)[safe] - return out - - # Use small sub-matrix for timing (keep fast) - X_s = X[:50, :100] - Y_s = Y[:50, :100] - - ops = { - "TsRank(w=20)": lambda: _ts_rank(X_s, window=20), - "CsRank": lambda: _cs_rank(X_s), - "TsStd(w=20)": lambda: _ts_std(X_s, window=20), - "TsCorr(w=20)": lambda: _ts_corr(X_s, Y_s, window=20), - "TsMean(w=20)": lambda: np.lib.stride_tricks.sliding_window_view(X_s, 20, axis=1).mean(axis=-1), - "CsZscore": lambda: (X_s - X_s.mean(axis=0)) / (X_s.std(axis=0) + 1e-8), - } - - timings: Dict[str, float] = {} - for name, fn in ops.items(): - timings[name] = self._time_callable(fn, n_repeats=n_repeats) - - return OperatorSpeedResult( - operator_timings_ms=timings, - n_assets=n_assets, - n_periods=n_periods, - n_repeats=n_repeats, - ) - - def run_full_pipeline_benchmark( - self, - n_candidates: int = 200, - data: Optional[dict] = None, - ) -> PipelineSpeedResult: - """Benchmark end-to-end candidate evaluation pipeline.""" - if data is None: - data = _build_mock_data_dict(n_assets=100, n_periods=200, seed=42) - - from factorminer.benchmark.catalogs import build_random_exploration - from factorminer.core.parser import try_parse - from factorminer.evaluation.metrics import compute_ic, compute_ic_mean - - entries = build_random_exploration(seed=99, count=n_candidates) - returns = data.get("forward_returns", data.get("$close")) - if returns is None: - returns = np.random.randn(*list(data.values())[0].shape) * 0.01 - - t0 = time.perf_counter() - succeeded = 0 - for entry in entries[:n_candidates]: - tree = try_parse(entry.formula) - if tree is None: - continue - try: - signals = tree.evaluate(data) - ic = compute_ic(signals, returns) - _ = compute_ic_mean(ic) - succeeded += 1 - except Exception: - pass - elapsed = time.perf_counter() - t0 - - return PipelineSpeedResult( - total_seconds=elapsed, - candidates_per_second=succeeded / max(elapsed, 1e-6), - n_candidates=n_candidates, - ) - - def generate_speed_table( - self, - op_result: OperatorSpeedResult, - pipeline_result: PipelineSpeedResult, - ) -> str: - """Generate a LaTeX table of speed results.""" - lines = [ - r"\begin{table}[htbp]", - r"\centering", - r"\caption{Computational Efficiency Benchmark}", - r"\begin{tabular}{lrr}", - r"\toprule", - r"Operator / Task & Time (ms) & Relative \\", - r"\midrule", - ] - timings = op_result.operator_timings_ms - baseline = max(timings.values()) if timings else 1.0 - for op, t in timings.items(): - rel = t / baseline if baseline > 0 else 1.0 - lines.append(rf"{op} & {t:.2f} & {rel:.2f}x \\") - - lines.append(r"\midrule") - lines.append( - rf"Full pipeline ({pipeline_result.n_candidates} candidates) & " - rf"{pipeline_result.total_seconds * 1000:.0f} & -- \\" - ) - lines.append( - rf"Throughput & {pipeline_result.candidates_per_second:.1f} cand/s & -- \\" - ) - lines += [r"\bottomrule", r"\end{tabular}", r"\end{table}"] - return "\n".join(lines) - - -# --------------------------------------------------------------------------- -# Main HelixBenchmark -# --------------------------------------------------------------------------- - -class HelixBenchmark: - """Rigorous comparison of HelixFactor vs FactorMiner (and baselines). - - Baselines: - - Random Formula Exploration (RF): random type-correct trees - - Alpha101 Classic: original 101 formulaic alphas - - Alpha101 Adapted: parameter-tuned for 10-min bars - - FactorMiner (Ralph Loop): exact paper reproduction - - HelixFactor (Phase 2): full Phase 2 system - - Metrics mirror paper Table 1: - - Factor Library: IC (%), ICIR, Avg|rho| - - Factor Combination: EW IC, EW ICIR, ICW IC, ICW ICIR - - Factor Selection: Lasso IC, XGBoost IC - """ - - METHOD_LABELS = { - "random_exploration": "RF (Rand)", - "alpha101_classic": "Alpha101 Classic", - "alpha101_adapted": "Alpha101 Adapted", - "ralph_loop": "FactorMiner (Ralph)", - "helix_phase2": "HelixFactor (Phase 2)", - } - - def __init__( - self, - ic_threshold: float = 0.02, - correlation_threshold: float = 0.5, - seed: int = 42, - ) -> None: - self.ic_threshold = ic_threshold - self.correlation_threshold = correlation_threshold - self.seed = seed - self._stat_tests = StatisticalComparisonTests(seed=seed) - self._speed_bench = SpeedBenchmark(seed=seed) - - # ------------------------------------------------------------------ - # Public API - # ------------------------------------------------------------------ - - def run_comparison( - self, - data: dict, - train_period: Tuple[int, int], - test_period: Tuple[int, int], - n_target_factors: int = 40, - n_runs: int = 1, - methods: Optional[List[str]] = None, - ) -> BenchmarkResult: - """Run the full comparison benchmark. - - Parameters - ---------- - data : dict - Data dictionary mapping feature names to (M, T) arrays. - Must include ``"forward_returns"``. - train_period / test_period : (int, int) - Column index range [start, end) for the respective split. - n_target_factors : int - Number of factors to build each library to. - n_runs : int - Repetitions per method (for std estimates). - methods : list[str], optional - Subset of methods to run. Default: all five. - - Returns - ------- - BenchmarkResult - """ - if methods is None: - methods = [ - "random_exploration", - "alpha101_classic", - "alpha101_adapted", - "ralph_loop", - "helix_phase2", - ] - - # Split data - train_data = _slice_data(data, *train_period) - test_data = _slice_data(data, *test_period) - - raw_results: Dict[str, List[MethodResult]] = {} - for method in methods: - logger.info("Running method: %s", method) - method_runs: List[MethodResult] = [] - for run_id in range(n_runs): - try: - result = self.run_single_method( - method=method, - data=train_data, - test_data=test_data, - n_factors=n_target_factors, - run_id=run_id, - ) - method_runs.append(result) - except Exception as exc: - logger.warning("Method %s run %d failed: %s", method, run_id, exc) - method_runs.append( - MethodResult(method=method, run_id=run_id) - ) - raw_results[method] = method_runs - - # Average across runs - averaged = { - method: _average_method_results(runs) - for method, runs in raw_results.items() - } - - # Build metric DataFrames - lib_df = _build_library_df(averaged, methods) - comb_df = _build_combination_df(averaged, methods) - sel_df = _build_selection_df(averaged, methods) - - # Speed benchmark - speed_result = self._speed_bench.run_full_pipeline_benchmark(data=train_data) - op_result = self._speed_bench.run_operator_benchmark(n_repeats=3) - speed_df = _build_speed_df(op_result, speed_result) - - # Statistical tests (Helix vs Ralph) - stat_tests = {} - helix_results = raw_results.get("helix_phase2", []) - ralph_results = raw_results.get("ralph_loop", []) - - if helix_results and ralph_results: - h_ic = helix_results[0].ic_series - r_ic = ralph_results[0].ic_series - if h_ic is not None and r_ic is not None: - stat_tests = self._stat_tests.run_all_tests(h_ic, r_ic) - else: - # Create synthetic IC series from stored metrics - h_ic = _synthetic_ic_series(helix_results[0].library_ic, n=100, seed=self.seed) - r_ic = _synthetic_ic_series(ralph_results[0].library_ic, n=100, seed=self.seed + 1) - stat_tests = self._stat_tests.run_all_tests(h_ic, r_ic) - - return BenchmarkResult( - methods=methods, - factor_library_metrics=lib_df, - combination_metrics=comb_df, - selection_metrics=sel_df, - speed_metrics=speed_df, - statistical_tests=stat_tests, - raw_method_results=raw_results, - ) - - def run_single_method( - self, - method: str, - data: dict, - test_data: dict, - n_factors: int, - run_id: int = 0, - ) -> MethodResult: - """Run one method and return its MethodResult. - - Parameters - ---------- - method : str - One of: 'ralph', 'helix', 'helix_phase2', 'rf', - 'random_exploration', 'alpha101_classic', 'alpha101_adapted'. - """ - t0 = time.perf_counter() - - # Resolve aliases - method_key = { - "ralph": "ralph_loop", - "helix": "helix_phase2", - "rf": "random_exploration", - "alpha101": "alpha101_classic", - }.get(method, method) - - candidates = self._get_candidates(method_key, n_factors=n_factors * 4) - returns = data.get("forward_returns") - test_returns = test_data.get("forward_returns") - - if returns is None or test_returns is None: - logger.warning("forward_returns not found in data dict for method %s", method) - return MethodResult(method=method_key, run_id=run_id) - - # Evaluate all candidates - factor_results = self._evaluate_candidates(candidates, data, returns) - - # Build library from best candidates - library = self._build_library(factor_results, n_factors) - - if not library: - return MethodResult(method=method_key, run_id=run_id) - - # Compute library metrics on test data - test_factor_results = self._evaluate_candidates( - [(r["name"], r["formula"], r.get("category", "Unknown")) - for r in library], - test_data, - test_returns, - ) - - lib_ic, lib_icir, avg_rho, ic_series = self._library_metrics( - test_factor_results, test_returns - ) - - # Factor combination - ew_ic, ew_icir, icw_ic, icw_icir = self._combination_metrics( - test_factor_results, library, test_returns - ) - - # Factor selection - lasso_ic, lasso_icir = self._selection_metrics( - factor_results, library, data, returns, test_data, test_returns, "lasso" - ) - xgb_ic, xgb_icir = self._selection_metrics( - factor_results, library, data, returns, test_data, test_returns, "xgboost" - ) - - elapsed = time.perf_counter() - t0 - - return MethodResult( - method=method_key, - library_ic=lib_ic, - library_icir=lib_icir, - avg_abs_rho=avg_rho, - ew_ic=ew_ic, - ew_icir=ew_icir, - icw_ic=icw_ic, - icw_icir=icw_icir, - lasso_ic=lasso_ic, - lasso_icir=lasso_icir, - xgb_ic=xgb_ic, - xgb_icir=xgb_icir, - n_factors=len(library), - admission_rate=len(library) / max(len(candidates), 1), - elapsed_seconds=elapsed, - ic_series=ic_series, - run_id=run_id, - ) - - # ------------------------------------------------------------------ - # Internal helpers - # ------------------------------------------------------------------ - - def _get_candidates(self, method: str, n_factors: int) -> List[Tuple[str, str, str]]: - """Get candidate (name, formula, category) tuples for a method.""" - # Import the catalogs module directly to avoid triggering the package - # __init__ chain which has an unresolved dependency on - # factorminer.agent.specialists.REGIME_SPECIALIST in some environments. - import importlib.util as _ilu, pathlib as _pl, sys as _sys - _cat_path = _pl.Path(__file__).parent / "catalogs.py" - if "src.factorminer.factorminer.benchmark.catalogs" not in _sys.modules: - _spec = _ilu.spec_from_file_location("src.factorminer.factorminer.benchmark.catalogs", str(_cat_path)) - _cat_mod = _ilu.module_from_spec(_spec) - _sys.modules["src.factorminer.factorminer.benchmark.catalogs"] = _cat_mod - _spec.loader.exec_module(_cat_mod) - _cat = _sys.modules["src.factorminer.factorminer.benchmark.catalogs"] - ALPHA101_CLASSIC = _cat.ALPHA101_CLASSIC - build_alpha101_adapted = _cat.build_alpha101_adapted - build_random_exploration = _cat.build_random_exploration - build_factor_miner_catalog = _cat.build_factor_miner_catalog - - if method == "random_exploration": - entries = build_random_exploration(seed=self.seed, count=max(n_factors, 160)) - elif method == "alpha101_classic": - entries = list(ALPHA101_CLASSIC) - while len(entries) < n_factors: - entries = entries + list(ALPHA101_CLASSIC) - entries = entries[:n_factors] - elif method == "alpha101_adapted": - entries = build_alpha101_adapted() - elif method in ("ralph_loop", "helix_phase2"): - # Use the full FactorMiner paper catalog + random extensions - entries = build_factor_miner_catalog() - if len(entries) < n_factors * 2: - extra = build_random_exploration( - seed=self.seed + 7, count=n_factors * 2 - len(entries) - ) - entries = entries + extra - else: - entries = build_random_exploration(seed=self.seed + 1, count=n_factors * 2) - - return [(e.name, e.formula, e.category) for e in entries] - - def _evaluate_candidates( - self, - candidates: List[Tuple[str, str, str]], - data: dict, - returns: np.ndarray, - ) -> List[dict]: - """Evaluate candidates; returns list of result dicts.""" - from factorminer.core.parser import try_parse - from factorminer.evaluation.metrics import ( - compute_ic, compute_ic_mean, compute_icir, compute_ic_win_rate - ) - - results = [] - for name, formula, category in candidates: - tree = try_parse(formula) - if tree is None: - continue - try: - signals = tree.evaluate(data) - if signals is None or np.all(np.isnan(signals)): - continue - ic_series = compute_ic(signals, returns) - ic_mean = compute_ic_mean(ic_series) - icir = compute_icir(ic_series) - win_rate = compute_ic_win_rate(ic_series) - results.append({ - "name": name, - "formula": formula, - "category": category, - "ic_mean": ic_mean, - "icir": icir, - "ic_win_rate": win_rate, - "signals": signals, - "ic_series": ic_series, - }) - except Exception: - pass - return results - - def _build_library( - self, - factor_results: List[dict], - n_factors: int, - ) -> List[dict]: - """Build a diversified factor library with IC and correlation admission.""" - from factorminer.evaluation.metrics import compute_pairwise_correlation - - # Filter by IC threshold - passing = [r for r in factor_results if r["ic_mean"] >= self.ic_threshold] - passing.sort(key=lambda x: x["ic_mean"], reverse=True) - - library: List[dict] = [] - for candidate in passing: - if len(library) >= n_factors: - break - # Correlation check - too_correlated = False - for existing in library: - if ( - existing.get("signals") is not None - and candidate.get("signals") is not None - ): - corr = abs( - compute_pairwise_correlation( - candidate["signals"], existing["signals"] - ) - ) - if corr >= self.correlation_threshold: - too_correlated = True - break - if not too_correlated: - library.append(candidate) - return library - - def _library_metrics( - self, - factor_results: List[dict], - returns: np.ndarray, - ) -> Tuple[float, float, float, Optional[np.ndarray]]: - """Compute library IC, ICIR, avg|rho|. Returns (ic, icir, rho, ic_series).""" - from factorminer.evaluation.metrics import ( - compute_pairwise_correlation, compute_ic_mean, compute_icir - ) - - if not factor_results: - return 0.0, 0.0, 0.0, None - - ics = [r["ic_mean"] for r in factor_results] - icirs = [r["icir"] for r in factor_results] - lib_ic = float(np.mean(ics)) if ics else 0.0 - lib_icir = float(np.mean(icirs)) if icirs else 0.0 - - # Average pairwise |rho| - rhos = [] - signals_list = [ - r["signals"] for r in factor_results if r.get("signals") is not None - ] - for i in range(len(signals_list)): - for j in range(i + 1, len(signals_list)): - c = abs(compute_pairwise_correlation(signals_list[i], signals_list[j])) - rhos.append(c) - avg_rho = float(np.mean(rhos)) if rhos else 0.0 - - # Combined IC series (average) - all_ic_series = [r["ic_series"] for r in factor_results if r.get("ic_series") is not None] - if all_ic_series: - min_len = min(len(s) for s in all_ic_series) - combined = np.nanmean( - np.stack([s[:min_len] for s in all_ic_series], axis=0), axis=0 - ) - else: - combined = None - - return lib_ic, lib_icir, avg_rho, combined - - def _combination_metrics( - self, - test_factor_results: List[dict], - library: List[dict], - test_returns: np.ndarray, - ) -> Tuple[float, float, float, float]: - """Compute EW/ICW combination metrics on test data.""" - from factorminer.evaluation.combination import FactorCombiner - from factorminer.evaluation.metrics import compute_ic, compute_ic_mean, compute_icir - - if not test_factor_results: - return 0.0, 0.0, 0.0, 0.0 - - factor_signals = { - i: r["signals"].T for i, r in enumerate(test_factor_results) - if r.get("signals") is not None - } - ic_values = { - i: r["ic_mean"] for i, r in enumerate(test_factor_results) - } - - if not factor_signals: - return 0.0, 0.0, 0.0, 0.0 - - combiner = FactorCombiner() - try: - ew_composite = combiner.equal_weight(factor_signals) - ew_ic_series = compute_ic(ew_composite.T, test_returns) - ew_ic = compute_ic_mean(ew_ic_series) - ew_icir = compute_icir(ew_ic_series) - except Exception: - ew_ic, ew_icir = 0.0, 0.0 - - try: - icw_composite = combiner.ic_weighted(factor_signals, ic_values) - icw_ic_series = compute_ic(icw_composite.T, test_returns) - icw_ic = compute_ic_mean(icw_ic_series) - icw_icir = compute_icir(icw_ic_series) - except Exception: - icw_ic, icw_icir = 0.0, 0.0 - - return ew_ic, ew_icir, icw_ic, icw_icir - - def _selection_metrics( - self, - train_factor_results: List[dict], - library: List[dict], - train_data: dict, - train_returns: np.ndarray, - test_data: dict, - test_returns: np.ndarray, - selector_type: str, - ) -> Tuple[float, float]: - """Compute Lasso/XGBoost selection IC on test data.""" - from factorminer.evaluation.selection import FactorSelector - from factorminer.evaluation.metrics import compute_ic, compute_ic_mean, compute_icir - - if len(train_factor_results) < 3: - return 0.0, 0.0 - - fit_signals = { - i: r["signals"].T for i, r in enumerate(train_factor_results) - if r.get("signals") is not None - } - if not fit_signals: - return 0.0, 0.0 - - # Re-evaluate on test data - test_results = self._evaluate_candidates( - [(r["name"], r["formula"], r.get("category", "Unknown")) - for r in train_factor_results], - test_data, - test_returns, - ) - eval_signals = { - i: r["signals"].T for i, r in enumerate(test_results) - if r.get("signals") is not None and i < len(test_results) - } - - if not eval_signals: - return 0.0, 0.0 - - selector = FactorSelector() - try: - fit_ret = train_returns.T - if selector_type == "lasso": - ranking = selector.lasso_selection(fit_signals, fit_ret) - else: - ranking = selector.xgboost_selection(fit_signals, fit_ret) - - if not ranking: - return 0.0, 0.0 - - selected_ids = [fid for fid, _ in ranking if fid in eval_signals] - if not selected_ids: - return 0.0, 0.0 - - # Simple equal-weight composite of selected factors - composite = np.nanmean( - np.stack([eval_signals[fid] for fid in selected_ids], axis=0), - axis=0, - ) - ic_series = compute_ic(composite.T, test_returns) - return compute_ic_mean(ic_series), compute_icir(ic_series) - except Exception as exc: - logger.debug("Selection metrics failed for %s: %s", selector_type, exc) - return 0.0, 0.0 - - def _clone_cfg(self, cfg): - cloned = copy.deepcopy(cfg) - cloned._raw = copy.deepcopy(getattr(cfg, "_raw", {})) - return cloned - - def _build_runtime_provider(self, cfg, mock: bool): - from factorminer.agent.llm_interface import MockProvider, create_provider - - if mock: - return MockProvider() - - provider_name = getattr(cfg.llm, "provider", "mock") - model_name = getattr(cfg.llm, "model", "mock") - api_key = None - if hasattr(cfg, "_raw"): - api_key = getattr(cfg, "_raw", {}).get("llm", {}).get("api_key") - if provider_name == "mock" or not api_key: - return MockProvider() - - try: - return create_provider( - { - "provider": provider_name, - "model": model_name, - "api_key": api_key, - } - ) - except Exception as exc: # pragma: no cover - defensive fallback - logger.warning("Falling back to MockProvider: %s", exc) - return MockProvider() - - def _build_runtime_mining_config(self, cfg, output_dir: Path, mock: bool): - from factorminer.core.config import MiningConfig as RuntimeMiningConfig - - signal_failure_policy = "synthetic" if mock else cfg.evaluation.signal_failure_policy - - runtime_cfg = RuntimeMiningConfig( - target_library_size=cfg.mining.target_library_size, - batch_size=cfg.mining.batch_size, - max_iterations=cfg.mining.max_iterations, - ic_threshold=cfg.mining.ic_threshold, - icir_threshold=cfg.mining.icir_threshold, - correlation_threshold=cfg.mining.correlation_threshold, - replacement_ic_min=cfg.mining.replacement_ic_min, - replacement_ic_ratio=cfg.mining.replacement_ic_ratio, - fast_screen_assets=cfg.evaluation.fast_screen_assets, - num_workers=cfg.evaluation.num_workers, - output_dir=str(output_dir), - backend=cfg.evaluation.backend, - gpu_device=cfg.evaluation.gpu_device, - signal_failure_policy=signal_failure_policy, - ) - runtime_cfg.benchmark_mode = getattr(cfg.benchmark, "mode", "paper") - runtime_cfg.target_panels = None - runtime_cfg.target_horizons = None - runtime_cfg.research = getattr(cfg, "research", None) - return runtime_cfg - - def _build_debate_config(self, cfg): - if not cfg.phase2.debate.enabled: - return None - - from factorminer.agent.debate import DebateConfig as RuntimeDebateConfig - from factorminer.agent.specialists import DEFAULT_SPECIALISTS - - specialist_count = min( - int(cfg.phase2.debate.num_specialists), len(DEFAULT_SPECIALISTS) - ) - return RuntimeDebateConfig( - specialists=list(DEFAULT_SPECIALISTS[:specialist_count]), - enable_critic=cfg.phase2.debate.enable_critic, - candidates_per_specialist=cfg.phase2.debate.candidates_per_specialist, - top_k_after_critic=cfg.phase2.debate.top_k_after_critic, - critic_temperature=cfg.phase2.debate.critic_temperature, - ) - - def _runtime_phase2_kwargs(self, cfg, loop_kind: str, runtime_dataset): - if loop_kind != "helix_phase2": - return {} - - from factorminer.evaluation.causal import CausalConfig as RuntimeCausalConfig - from factorminer.evaluation.capacity import CapacityConfig as RuntimeCapacityConfig - from factorminer.evaluation.regime import RegimeConfig as RuntimeRegimeConfig - from factorminer.evaluation.significance import ( - SignificanceConfig as RuntimeSignificanceConfig, - ) - - def _clone_section(source, target_cls): - target_fields = {field.name for field in target_cls.__dataclass_fields__.values()} - payload = { - name: getattr(source, name) - for name in target_fields - if hasattr(source, name) - } - return target_cls(**payload) - - return { - "debate_config": self._build_debate_config(cfg), - "enable_knowledge_graph": cfg.phase2.helix.enable_knowledge_graph, - "enable_embeddings": cfg.phase2.helix.enable_embeddings, - "enable_auto_inventor": cfg.phase2.auto_inventor.enabled, - "auto_invention_interval": cfg.phase2.auto_inventor.invention_interval, - "canonicalize": cfg.phase2.helix.enable_canonicalization, - "forgetting_lambda": cfg.phase2.helix.forgetting_lambda, - "causal_config": _clone_section(cfg.phase2.causal, RuntimeCausalConfig) - if cfg.phase2.causal.enabled - else None, - "regime_config": _clone_section(cfg.phase2.regime, RuntimeRegimeConfig) - if cfg.phase2.regime.enabled - else None, - "capacity_config": _clone_section(cfg.phase2.capacity, RuntimeCapacityConfig) - if cfg.phase2.capacity.enabled - else None, - "significance_config": _clone_section( - cfg.phase2.significance, RuntimeSignificanceConfig - ) - if cfg.phase2.significance.enabled - else None, - "volume": runtime_dataset.data_dict.get( - "$amt", runtime_dataset.data_dict.get("$volume") - ), - } - - def _execute_runtime_loop( - self, - *, - cfg, - loop_kind: str, - runtime_dataset, - output_dir: Path, - n_target_factors: int, - run_id: int, - mock: bool, - ) -> tuple[MethodResult, dict[str, Any]]: - from factorminer.core.helix_loop import HelixLoop - from factorminer.core.library_io import load_library - from factorminer.core.ralph_loop import RalphLoop - from factorminer.core.session import MiningSession - from factorminer.benchmark.runtime import evaluate_frozen_set, select_frozen_top_k - from factorminer.evaluation.runtime import evaluate_factors - - output_dir.mkdir(parents=True, exist_ok=True) - runtime_cfg = self._build_runtime_mining_config(cfg, output_dir, mock=mock) - provider = self._build_runtime_provider(cfg, mock=mock) - - runtime_kwargs = { - "config": runtime_cfg, - "data_tensor": runtime_dataset.data_tensor, - "returns": runtime_dataset.returns, - "llm_provider": provider, - } - if loop_kind == "helix_phase2": - runtime_kwargs.update( - self._runtime_phase2_kwargs(cfg, loop_kind, runtime_dataset) - ) - loop = HelixLoop(**runtime_kwargs) - else: - loop = RalphLoop(**runtime_kwargs) - - library = loop.run( - target_size=n_target_factors, - max_iterations=runtime_cfg.max_iterations, - ) - library_dir = output_dir / "factor_library" - loaded_library = load_library(library_dir) - session = MiningSession.load(output_dir / "session.json") - session_summary = session.get_summary() - run_manifest = {} - run_manifest_path = output_dir / "run_manifest.json" - if run_manifest_path.exists(): - with open(run_manifest_path) as f: - run_manifest = json.load(f) - - artifacts = evaluate_factors( - loaded_library.list_factors(), - runtime_dataset, - signal_failure_policy=runtime_cfg.signal_failure_policy, - ) - selected = select_frozen_top_k( - artifacts, - loaded_library, - top_k=n_target_factors, - split_name="train", - ) - runtime_eval = evaluate_frozen_set( - selected, - runtime_dataset, - split_name="test", - fit_split="train", - cost_bps=list(getattr(cfg.benchmark, "cost_bps", [])), - ) - - selected_formulas = {artifact.formula for artifact in selected} - selected_artifacts = [ - artifact - for artifact in artifacts - if artifact.succeeded and artifact.formula in selected_formulas - ] - ic_series = None - if selected_artifacts: - series_list = [ - artifact.split_stats["test"].get("ic_series") - for artifact in selected_artifacts - if artifact.split_stats.get("test", {}).get("ic_series") is not None - ] - if series_list: - min_len = min(len(series) for series in series_list) - ic_series = np.nanmean( - np.stack([series[:min_len] for series in series_list], axis=0), - axis=0, - ) - - library_turnover = float( - np.mean( - [ - artifact.split_stats["test"].get("turnover", 0.0) - for artifact in selected_artifacts - ] - ) - ) if selected_artifacts else 0.0 - - combination_turnover = { - name: float(metrics.get("turnover", 0.0)) - for name, metrics in runtime_eval.get("combinations", {}).items() - } - cost_pressure = { - name: metrics.get("cost_pressure", {}) - for name, metrics in runtime_eval.get("combinations", {}).items() - } - - result = MethodResult( - method=loop_kind, - library_ic=float(runtime_eval["library"]["ic"]), - library_icir=float(runtime_eval["library"]["icir"]), - avg_abs_rho=float(runtime_eval["library"]["avg_abs_rho"]), - ew_ic=float(runtime_eval["combinations"].get("equal_weight", {}).get("ic", 0.0)), - ew_icir=float(runtime_eval["combinations"].get("equal_weight", {}).get("icir", 0.0)), - icw_ic=float(runtime_eval["combinations"].get("ic_weighted", {}).get("ic", 0.0)), - icw_icir=float(runtime_eval["combinations"].get("ic_weighted", {}).get("icir", 0.0)), - lasso_ic=float(runtime_eval["selections"].get("lasso", {}).get("ic", 0.0)), - lasso_icir=float(runtime_eval["selections"].get("lasso", {}).get("icir", 0.0)), - xgb_ic=float(runtime_eval["selections"].get("xgboost", {}).get("ic", 0.0)), - xgb_icir=float(runtime_eval["selections"].get("xgboost", {}).get("icir", 0.0)), - n_factors=loaded_library.size, - admission_rate=session_summary.get("overall_yield_rate", 0.0), - elapsed_seconds=session_summary.get("elapsed_seconds", 0.0), - ic_series=ic_series, - ) - result.avg_turnover = library_turnover # type: ignore[attr-defined] - - artifact_paths = { - "output_dir": str(output_dir.resolve()), - "run_manifest": str(run_manifest_path.resolve()), - "session": str((output_dir / "session.json").resolve()), - "session_log": str((output_dir / "session_log.json").resolve()), - "library": str((output_dir / "factor_library.json").resolve()), - "checkpoint_dir": str((output_dir / "checkpoint").resolve()), - "checkpoint_run_manifest": str((output_dir / "checkpoint" / "run_manifest.json").resolve()), - } - payload = { - "loop_kind": loop_kind, - "method": loop_kind, - "run_id": run_id, - "output_dir": str(output_dir.resolve()), - "session_summary": session_summary, - "run_manifest": run_manifest, - "artifact_paths": artifact_paths, - "frozen_top_k": [ - { - "name": artifact.name, - "formula": artifact.formula, - "category": artifact.category, - "train_ic": artifact.split_stats["train"]["ic_abs_mean"], - "train_icir": abs(artifact.split_stats["train"]["icir"]), - } - for artifact in selected - ], - "library": runtime_eval["library"], - "combinations": runtime_eval["combinations"], - "selections": runtime_eval["selections"], - "turnover": { - "library": library_turnover, - **combination_turnover, - }, - "cost_pressure": cost_pressure, - "library_size": loaded_library.size, - "candidate_count": session_summary.get("total_candidates", 0), - "selected_formulas": sorted(selected_formulas), - } - return result, payload - - def _runtime_method_frames( - self, - runtime_payloads: Dict[str, List[dict[str, Any]]], - methods: List[str], - ) -> tuple[pd.DataFrame, pd.DataFrame]: - turnover_rows: list[dict[str, Any]] = [] - cost_rows: list[dict[str, Any]] = [] - - for method in methods: - for payload in runtime_payloads.get(method, []): - turnover = payload.get("turnover", {}) - if turnover: - turnover_rows.append( - { - "method": method, - "run_id": payload.get("run_id", 0), - "library_turnover": turnover.get("library", 0.0), - "equal_weight_turnover": turnover.get("equal_weight", 0.0), - "ic_weighted_turnover": turnover.get("ic_weighted", 0.0), - "orthogonal_turnover": turnover.get("orthogonal", 0.0), - } - ) - for combo_name, cost_map in payload.get("cost_pressure", {}).items(): - for cost_bps, stats in cost_map.items(): - cost_rows.append( - { - "method": method, - "run_id": payload.get("run_id", 0), - "combination": combo_name, - "cost_bps": float(cost_bps), - "ic": stats.get("ic", 0.0), - "icir": stats.get("icir", 0.0), - "turnover": stats.get("turnover", 0.0), - "long_short": stats.get("long_short", 0.0), - "monotonicity": stats.get("monotonicity", 0.0), - } - ) - - return pd.DataFrame(turnover_rows), pd.DataFrame(cost_rows) - - def run_runtime_comparison( - self, - cfg, - output_dir: Path, - *, - data_path: Optional[str] = None, - raw_df: Optional[pd.DataFrame] = None, - mock: bool = False, - baseline_methods: Optional[List[str]] = None, - n_target_factors: int = 40, - n_runs: int = 1, - ) -> tuple[BenchmarkResult, dict[str, Any]]: - """Run a benchmark with real Ralph/Helix executions for Phase 2.""" - from factorminer.benchmark.runtime import load_benchmark_dataset - - methods = baseline_methods or [ - "random_exploration", - "alpha101_classic", - "alpha101_adapted", - "ralph_loop", - "helix_phase2", - ] - runtime_methods = {"ralph_loop", "helix_phase2"} - - runtime_dataset, dataset_hash = load_benchmark_dataset( - cfg, - data_path=data_path, - raw_df=raw_df, - mock=mock, - ) - static_data = dict(runtime_dataset.data_dict) - static_data["forward_returns"] = runtime_dataset.returns - - train_indices = runtime_dataset.splits["train"].indices - test_indices = runtime_dataset.splits["test"].indices - - def _slice_by_indices(data: dict, indices: np.ndarray) -> dict: - return {key: value[:, indices] for key, value in data.items()} - - train_data = _slice_by_indices(static_data, train_indices) - test_data = _slice_by_indices(static_data, test_indices) - - raw_results: Dict[str, List[MethodResult]] = {} - runtime_payloads: Dict[str, List[dict[str, Any]]] = {} - runtime_root = output_dir / "runtime_runs" - runtime_root.mkdir(parents=True, exist_ok=True) - - for method in methods: - method_runs: List[MethodResult] = [] - for run_id in range(n_runs): - if method in runtime_methods: - result, payload = self._execute_runtime_loop( - cfg=cfg, - loop_kind=method, - runtime_dataset=runtime_dataset, - output_dir=runtime_root / method / f"run_{run_id}", - n_target_factors=n_target_factors, - run_id=run_id, - mock=mock, - ) - method_runs.append(result) - runtime_payloads.setdefault(method, []).append(payload) - else: - result = self.run_single_method( - method=method, - data=train_data, - test_data=test_data, - n_factors=n_target_factors, - run_id=run_id, - ) - method_runs.append(result) - raw_results[method] = method_runs - - averaged = { - method: _average_method_results(runs) - for method, runs in raw_results.items() - } - - lib_df = _build_library_df(averaged, methods) - comb_df = _build_combination_df(averaged, methods) - sel_df = _build_selection_df(averaged, methods) - speed_result = self._speed_bench.run_full_pipeline_benchmark(data=train_data) - op_result = self._speed_bench.run_operator_benchmark(n_repeats=3) - speed_df = _build_speed_df(op_result, speed_result) - turnover_df, cost_df = self._runtime_method_frames(runtime_payloads, methods) - - stat_tests = {} - helix_results = raw_results.get("helix_phase2", []) - ralph_results = raw_results.get("ralph_loop", []) - if helix_results and ralph_results: - h_ic = helix_results[0].ic_series - r_ic = ralph_results[0].ic_series - if h_ic is not None and r_ic is not None: - stat_tests = self._stat_tests.run_all_tests(h_ic, r_ic) - else: - h_ic = _synthetic_ic_series( - helix_results[0].library_ic, n=100, seed=self.seed - ) - r_ic = _synthetic_ic_series( - ralph_results[0].library_ic, n=100, seed=self.seed + 1 - ) - stat_tests = self._stat_tests.run_all_tests(h_ic, r_ic) - - runtime_artifacts = { - "dataset_hash": dataset_hash, - "runtime_root": str(runtime_root.resolve()), - "runtime_payloads": runtime_payloads, - } - - return ( - BenchmarkResult( - methods=methods, - factor_library_metrics=lib_df, - combination_metrics=comb_df, - selection_metrics=sel_df, - speed_metrics=speed_df, - statistical_tests=stat_tests, - raw_method_results=raw_results, - turnover_metrics=turnover_df, - cost_pressure_metrics=cost_df, - runtime_artifacts=runtime_artifacts, - ), - runtime_artifacts, - ) - - def run_runtime_ablation_study( - self, - cfg, - output_dir: Path, - *, - data_path: Optional[str] = None, - raw_df: Optional[pd.DataFrame] = None, - mock: bool = False, - configs_to_run: Optional[List[str]] = None, - n_target_factors: int = 40, - n_runs: int = 1, - ) -> AblationResult: - """Run a runtime-backed ablation study using real loop executions.""" - from factorminer.benchmark.runtime import load_benchmark_dataset - - runtime_dataset, _ = load_benchmark_dataset( - cfg, - data_path=data_path, - raw_df=raw_df, - mock=mock, - ) - - configs = configs_to_run or [ - "full", - "no_debate", - "no_causal", - "no_canonicalize", - "no_regime", - "no_capacity", - "no_significance", - "no_memory", - ] - - results: Dict[str, MethodResult] = {} - runtime_root = output_dir / "runtime_ablation" - runtime_root.mkdir(parents=True, exist_ok=True) - - for config_name in configs: - variant_cfg = self._clone_cfg(cfg) - method_kind = "helix_phase2" - if config_name == "no_debate": - variant_cfg.phase2.debate.enabled = False - elif config_name == "no_causal": - variant_cfg.phase2.causal.enabled = False - elif config_name == "no_canonicalize": - variant_cfg.phase2.helix.enable_canonicalization = False - elif config_name == "no_regime": - variant_cfg.phase2.regime.enabled = False - elif config_name == "no_capacity": - variant_cfg.phase2.capacity.enabled = False - elif config_name == "no_significance": - variant_cfg.phase2.significance.enabled = False - elif config_name == "no_memory": - method_kind = "ralph_loop" - elif config_name == "full": - pass - else: - logger.warning("Unknown runtime ablation config: %s", config_name) - continue - - run_dir = runtime_root / config_name / "run_0" - result, _payload = self._execute_runtime_loop( - cfg=variant_cfg, - loop_kind=method_kind, - runtime_dataset=runtime_dataset, - output_dir=run_dir, - n_target_factors=n_target_factors, - run_id=0, - mock=mock, - ) - results[config_name] = result - - baseline = results.get("full") - rows: list[dict[str, Any]] = [] - if baseline is not None: - for name, result in results.items(): - if name == "full": - continue - rows.append( - { - "config": name, - "method": result.method, - "delta_library_ic": result.library_ic - baseline.library_ic, - "delta_library_icir": result.library_icir - baseline.library_icir, - "delta_ew_ic": result.ew_ic - baseline.ew_ic, - "delta_icw_ic": result.icw_ic - baseline.icw_ic, - "delta_lasso_ic": result.lasso_ic - baseline.lasso_ic, - "delta_xgb_ic": result.xgb_ic - baseline.xgb_ic, - "delta_turnover": getattr(result, "avg_turnover", 0.0) - - getattr(baseline, "avg_turnover", 0.0), - } - ) - contributions = pd.DataFrame(rows) - return AblationResult(configs=configs, results=results, contributions=contributions) - - -# --------------------------------------------------------------------------- -# Helper functions -# --------------------------------------------------------------------------- - -def _build_mock_data_dict( - n_assets: int = 100, - n_periods: int = 500, - seed: int = 42, -) -> dict: - """Build a minimal data dict from MockConfig (no raw_df needed).""" - from factorminer.data.mock_data import MockConfig, generate_mock_data - from factorminer.data.preprocessor import preprocess - - cfg = MockConfig( - num_assets=n_assets, - num_periods=n_periods, - frequency="10min", - plant_alpha=True, - alpha_strength=0.04, - alpha_assets_frac=0.4, - seed=seed, - ) - raw = generate_mock_data(cfg) - processed = preprocess(raw) - - assets = sorted(processed["asset_id"].unique()) - T = processed.groupby("asset_id").size().min() - - feature_map = { - "$open": "open", "$high": "high", "$low": "low", "$close": "close", - "$volume": "volume", "$amt": "amount", "$vwap": "vwap", - "$returns": "returns", - } - data_dict: dict = {} - for feat_name, col_name in feature_map.items(): - if col_name in processed.columns: - pivot = processed.pivot( - index="asset_id", columns="datetime", values=col_name - ) - pivot = pivot.loc[assets].iloc[:, :T] - data_dict[feat_name] = pivot.values.astype(np.float64) - - close = data_dict["$close"] - forward_returns = np.roll(close, -1, axis=1) / close - 1 - forward_returns[:, -1] = np.nan - data_dict["forward_returns"] = forward_returns - return data_dict - - -def _slice_data(data: dict, start: int, end: int) -> dict: - """Slice all (M, T) arrays to columns [start, end).""" - return {k: v[:, start:end] for k, v in data.items()} - - -def _average_method_results(runs: List[MethodResult]) -> MethodResult: - """Average numeric fields across multiple runs.""" - if not runs: - return MethodResult(method="unknown") - if len(runs) == 1: - return runs[0] - - fields = [ - "library_ic", "library_icir", "avg_abs_rho", - "ew_ic", "ew_icir", "icw_ic", "icw_icir", - "lasso_ic", "lasso_icir", "xgb_ic", "xgb_icir", - "n_factors", "admission_rate", "elapsed_seconds", "avg_turnover", - ] - avg = MethodResult(method=runs[0].method) - for f in fields: - vals = [getattr(r, f) for r in runs if getattr(r, f) is not None] - if vals: - setattr(avg, f, float(np.mean(vals))) - return avg - - -def _build_library_df( - averaged: Dict[str, MethodResult], methods: List[str] -) -> pd.DataFrame: - rows = [] - for method in methods: - r = averaged.get(method, MethodResult(method=method)) - rows.append({ - "method": method, - "ic_pct": r.library_ic * 100, - "icir": r.library_icir, - "avg_abs_rho": r.avg_abs_rho, - "n_factors": r.n_factors, - "avg_turnover": r.avg_turnover, - }) - return pd.DataFrame(rows) - - -def _build_combination_df( - averaged: Dict[str, MethodResult], methods: List[str] -) -> pd.DataFrame: - rows = [] - for method in methods: - r = averaged.get(method, MethodResult(method=method)) - rows.append({ - "method": method, - "ew_ic_pct": r.ew_ic * 100, - "ew_icir": r.ew_icir, - "icw_ic_pct": r.icw_ic * 100, - "icw_icir": r.icw_icir, - }) - return pd.DataFrame(rows) - - -def _build_selection_df( - averaged: Dict[str, MethodResult], methods: List[str] -) -> pd.DataFrame: - rows = [] - for method in methods: - r = averaged.get(method, MethodResult(method=method)) - rows.append({ - "method": method, - "lasso_ic_pct": r.lasso_ic * 100, - "lasso_icir": r.lasso_icir, - "xgb_ic_pct": r.xgb_ic * 100, - "xgb_icir": r.xgb_icir, - "best_ic_pct": max(r.lasso_ic, r.xgb_ic) * 100, - }) - return pd.DataFrame(rows) - - -def _build_speed_df( - op_result: OperatorSpeedResult, - pipeline_result: PipelineSpeedResult, -) -> pd.DataFrame: - rows = [] - for op, ms in op_result.operator_timings_ms.items(): - rows.append({"name": op, "time_ms": ms, "type": "operator"}) - rows.append({ - "name": f"Pipeline ({pipeline_result.n_candidates} cands)", - "time_ms": pipeline_result.total_seconds * 1000, - "type": "pipeline", - }) - rows.append({ - "name": "Throughput (cands/s)", - "time_ms": pipeline_result.candidates_per_second, - "type": "throughput", - }) - return pd.DataFrame(rows) - - -def _synthetic_ic_series( - target_mean: float, - n: int = 100, - seed: int = 42, -) -> np.ndarray: - """Generate a synthetic IC series with given mean for stat tests.""" - rng = np.random.RandomState(seed) - noise = rng.randn(n) * 0.03 - base = target_mean + noise - return base.astype(np.float64) - - -# --------------------------------------------------------------------------- -# CLI entry point -# --------------------------------------------------------------------------- - -def _parse_args() -> argparse.Namespace: - parser = argparse.ArgumentParser( - description="HelixFactor vs FactorMiner Benchmark Suite" - ) - parser.add_argument("--mock", action="store_true", help="Use mock data") - parser.add_argument("--n-factors", type=int, default=40, help="Target library size") - parser.add_argument("--n-assets", type=int, default=100, help="Mock data assets") - parser.add_argument("--n-periods", type=int, default=500, help="Mock data periods") - parser.add_argument("--output", type=str, default="results/", help="Output directory") - parser.add_argument("--methods", nargs="*", default=None, help="Methods to run") - parser.add_argument("--seed", type=int, default=42, help="Random seed") - parser.add_argument( - "--log-level", type=str, default="WARNING", help="Logging level" - ) - return parser.parse_args() - - -def main() -> None: - args = _parse_args() - logging.basicConfig( - level=getattr(logging, args.log_level.upper(), logging.WARNING), - format="%(levelname)s %(name)s: %(message)s", - ) - - output_dir = Path(args.output) - output_dir.mkdir(parents=True, exist_ok=True) - - print("=" * 70) - print(" HelixFactor Benchmark Suite") - print("=" * 70) - - # Build mock data - print(f"\n[1/4] Generating mock data ({args.n_assets} assets, {args.n_periods} periods)...") - t0 = time.perf_counter() - data = _build_mock_data_dict( - n_assets=args.n_assets, - n_periods=args.n_periods, - seed=args.seed, - ) - T = list(data.values())[0].shape[1] - train_end = int(T * 0.7) - print(f" Done in {time.perf_counter()-t0:.1f}s (T={T}, train=0:{train_end}, test={train_end}:{T})") - - # Run comparison - print(f"\n[2/4] Running method comparison (n_factors={args.n_factors})...") - bench = HelixBenchmark(seed=args.seed) - t0 = time.perf_counter() - result = bench.run_comparison( - data=data, - train_period=(0, train_end), - test_period=(train_end, T), - n_target_factors=args.n_factors, - n_runs=1, - methods=args.methods, - ) - elapsed = time.perf_counter() - t0 - print(f" Done in {elapsed:.1f}s") - - # Print results table - print("\n[3/4] Results Summary:") - print("\n--- Factor Library Metrics ---") - print(result.factor_library_metrics.to_string(index=False, float_format="{:.4f}".format)) - print("\n--- Factor Combination Metrics ---") - print(result.combination_metrics.to_string(index=False, float_format="{:.4f}".format)) - print("\n--- Factor Selection Metrics ---") - print(result.selection_metrics.to_string(index=False, float_format="{:.4f}".format)) - print("\n--- Speed Metrics ---") - print(result.speed_metrics.to_string(index=False, float_format="{:.3f}".format)) - - if result.statistical_tests: - dm = result.statistical_tests.get("diebold_mariano", {}) - print(f"\n--- Statistical Tests (Helix vs Ralph) ---") - print(f" DM stat: {dm.get('dm_stat', 0):.3f} p={dm.get('p_value', 1):.4f} dir={dm.get('direction','?')}") - ci = result.statistical_tests.get("bootstrap_ci_95", {}) - print(f" Bootstrap 95% CI on IC diff: [{ci.get('lower', 0):.4f}, {ci.get('upper', 0):.4f}]") - print(f" Helix outperforms: {result.statistical_tests.get('helix_outperforms', False)}") - - # Save outputs - print(f"\n[4/4] Saving outputs to {output_dir}...") - result.generate_full_report(str(output_dir / "benchmark_report.html")) - with open(output_dir / "library_metrics.csv", "w") as f: - result.factor_library_metrics.to_csv(f, index=False) - with open(output_dir / "combination_metrics.csv", "w") as f: - result.combination_metrics.to_csv(f, index=False) - with open(output_dir / "selection_metrics.csv", "w") as f: - result.selection_metrics.to_csv(f, index=False) - with open(output_dir / "statistical_tests.json", "w") as f: - json.dump(_json_safe(result.statistical_tests), f, indent=2, allow_nan=False) - with open(output_dir / "latex_table.tex", "w") as f: - f.write(result.to_latex_table()) - with open(output_dir / "benchmark_report.md", "w") as f: - f.write(result.to_markdown_table()) - with open(output_dir / "readme_table.md", "w") as f: - f.write(result.to_markdown_table()) - - try: - result.plot_comparison(str(output_dir / "comparison_plot.png")) - except Exception as exc: - logger.debug("Plot generation failed: %s", exc) - - print(f" Reports saved to {output_dir}") - print(f"\nDone. Total runtime: {time.perf_counter() - t0:.1f}s") - - -if __name__ == "__main__": - main() diff --git a/src/factorminer/factorminer/benchmark/runtime.py b/src/factorminer/factorminer/benchmark/runtime.py deleted file mode 100644 index f8ffbc0..0000000 --- a/src/factorminer/factorminer/benchmark/runtime.py +++ /dev/null @@ -1,1498 +0,0 @@ -"""Strict paper/research benchmark runners built on runtime recomputation.""" - -from __future__ import annotations - -from dataclasses import asdict, dataclass, field -import copy -import hashlib -import json -import logging -import time -from pathlib import Path -from types import SimpleNamespace -from typing import Any, Iterable, Optional - -import numpy as np -import pandas as pd - -from src.factorminer.factorminer.benchmark.catalogs import ( - CandidateEntry, - build_alpha101_adapted, - build_alphaagent_style, - build_alphaforge_style, - build_factor_miner_catalog, - build_gplearn_style, - build_random_exploration, - dedupe_entries, - entries_from_library, - ALPHA101_CLASSIC, -) -from src.factorminer.factorminer.core.factor_library import Factor, FactorLibrary -from src.factorminer.factorminer.core.library_io import load_library -from src.factorminer.factorminer.core.session import MiningSession -from src.factorminer.factorminer.evaluation.runtime import ( - EvaluationDataset, - FactorEvaluationArtifact, - compute_correlation_matrix, - evaluate_factors, - load_runtime_dataset, -) - -logger = logging.getLogger(__name__) - -RUNTIME_LOOP_BASELINES = { - "ralph_loop", - "helix_phase2", - "helix_no_memory", - "helix_no_debate", - "helix_no_significance", - "helix_no_capacity", - "helix_no_regime", -} - - -@dataclass -class BenchmarkManifest: - """Serializable description of one benchmark run.""" - - benchmark_name: str - mode: str - seed: int - baseline: str - freeze_universe: str - report_universes: list[str] - train_period: list[str] - test_period: list[str] - freeze_top_k: int - signal_failure_policy: str - default_target: str - target_stack: list[str] - primary_objective: str - dataset_hashes: dict[str, str] - artifact_paths: dict[str, str] - runtime_contract: dict[str, Any] = field(default_factory=dict) - baseline_provenance: dict[str, dict[str, Any]] = field(default_factory=dict) - warnings: list[str] = field(default_factory=list) - - -def _clone_cfg(cfg): - cloned = copy.deepcopy(cfg) - cloned._raw = copy.deepcopy(getattr(cfg, "_raw", {})) - return cloned - - -def _cfg_with_overrides(cfg, universe: str, mode: Optional[str] = None): - cloned = _clone_cfg(cfg) - cloned.data.universe = universe - if mode is not None: - cloned.benchmark.mode = mode - if cloned.benchmark.mode == "paper": - cloned.evaluation.signal_failure_policy = "reject" - cloned.research.enabled = False - cloned.phase2.causal.enabled = False - cloned.phase2.regime.enabled = False - cloned.phase2.capacity.enabled = False - cloned.phase2.significance.enabled = False - cloned.phase2.debate.enabled = False - cloned.phase2.auto_inventor.enabled = False - cloned.phase2.helix.enabled = False - else: - cloned.research.enabled = True - return cloned - - -def _data_hash(df: pd.DataFrame) -> str: - sample = df.sort_values(["datetime", "asset_id"]).reset_index(drop=True) - digest = hashlib.sha256() - digest.update(pd.util.hash_pandas_object(sample, index=True).values.tobytes()) - return digest.hexdigest() - - -def _json_safe(value: Any) -> Any: - """Recursively convert NaN/inf values into JSON-safe nulls.""" - if isinstance(value, np.generic): - return _json_safe(value.item()) - if isinstance(value, float): - if np.isnan(value) or np.isinf(value): - return None - return value - if isinstance(value, dict): - return {str(key): _json_safe(val) for key, val in value.items()} - if isinstance(value, (list, tuple)): - return [_json_safe(item) for item in value] - return value - - -def _file_sha256(path: Path) -> str: - digest = hashlib.sha256() - with open(path, "rb") as fp: - for chunk in iter(lambda: fp.read(1024 * 1024), b""): - digest.update(chunk) - return digest.hexdigest() - - -def _json_summary(path: Path) -> dict[str, Any] | None: - if not path.exists(): - return None - try: - with open(path) as fp: - payload = json.load(fp) - except Exception as exc: # pragma: no cover - defensive provenance capture - return {"path": str(path), "load_error": str(exc)} - - if isinstance(payload, dict): - return payload - return {"path": str(path), "payload_type": type(payload).__name__} - - -def _session_summary(path: Path) -> dict[str, Any] | None: - if not path.exists(): - return None - try: - return MiningSession.load(path).get_summary() - except Exception as exc: # pragma: no cover - defensive provenance capture - return {"path": str(path), "load_error": str(exc)} - - -def _catalog_provenance(baseline: str, candidate_count: int, seed: int) -> dict[str, Any]: - return { - "kind": "catalog", - "source": baseline, - "candidate_count": candidate_count, - "seed": seed, - } - - -def _saved_library_provenance( - requested_path: str, - baseline: str, -) -> dict[str, Any]: - base_path = Path(_base_path(requested_path)).expanduser() - resolved_base = base_path.resolve() if base_path.exists() else base_path - library_json = resolved_base.with_suffix(".json") - signal_cache = Path(str(resolved_base) + "_signals.npz") - parent = resolved_base.parent - - source_files: dict[str, dict[str, str]] = {} - for label, path in { - "library_json": library_json, - "signal_cache": signal_cache, - "session_json": parent / "session.json", - "session_log_json": parent / "session_log.json", - "checkpoint_session_json": parent / "checkpoint" / "session.json", - "checkpoint_loop_state_json": parent / "checkpoint" / "loop_state.json", - "checkpoint_memory_json": parent / "checkpoint" / "memory.json", - }.items(): - if path.exists(): - source_files[label] = { - "path": str(path), - "sha256": _file_sha256(path), - } - - provenance: dict[str, Any] = { - "kind": "saved_library", - "source": baseline, - "requested_path": str(Path(requested_path)), - "resolved_base_path": str(resolved_base), - "source_files": source_files, - "library_summary": {}, - "session_summary": _session_summary(parent / "session.json"), - "session_log_summary": _json_summary(parent / "session_log.json"), - } - - if library_json.exists(): - try: - library = load_library(resolved_base) - except Exception as exc: # pragma: no cover - defensive provenance capture - provenance["library_summary"] = { - "path": str(library_json), - "load_error": str(exc), - } - else: - provenance["library_summary"] = { - "path": str(library_json), - "factor_count": library.size, - "diagnostics": library.get_diagnostics(), - } - - return provenance - - -def _baseline_provenance( - baseline: str, - *, - factor_miner_library_path: Optional[str] = None, - factor_miner_no_memory_library_path: Optional[str] = None, - candidate_count: int = 0, - seed: int = 0, -) -> dict[str, Any]: - if baseline == "factor_miner" and factor_miner_library_path: - return _saved_library_provenance(factor_miner_library_path, baseline) - if baseline == "factor_miner_no_memory" and factor_miner_no_memory_library_path: - return _saved_library_provenance( - factor_miner_no_memory_library_path, - baseline, - ) - return _catalog_provenance(baseline, candidate_count, seed) - - -def _runtime_manifest_value( - runtime_manifests: Optional[dict[str, dict[str, Any]]], - baseline: str, -) -> dict[str, Any]: - """Return the runtime manifest for one baseline if supplied.""" - if not runtime_manifests: - return {} - value = runtime_manifests.get(baseline, {}) - return dict(value) if isinstance(value, dict) else {} - - -def _build_runtime_provider(cfg, *, mock: bool): - """Create the benchmark-time LLM provider.""" - from factorminer.agent.llm_interface import MockProvider, create_provider - - if mock or getattr(cfg.llm, "provider", "mock") == "mock": - return MockProvider() - - provider_cfg = { - "provider": cfg.llm.provider, - "model": cfg.llm.model, - } - raw_llm_cfg = getattr(cfg, "_raw", {}).get("llm", {}) - if raw_llm_cfg.get("api_key"): - provider_cfg["api_key"] = raw_llm_cfg["api_key"] - return create_provider(provider_cfg) - - -def _filter_dataclass_kwargs(source, target_cls): - """Copy shared dataclass fields from one config object to another.""" - from dataclasses import fields - - target_fields = {f.name for f in fields(target_cls)} - source_fields = getattr(source, "__dataclass_fields__", {}) - return { - name: getattr(source, name) - for name in source_fields - if name in target_fields - } - - -def _build_phase2_runtime_kwargs(cfg) -> dict[str, Any]: - """Build runtime Phase 2 configs from the hierarchical benchmark config.""" - from factorminer.evaluation.causal import CausalConfig as RuntimeCausalConfig - from factorminer.evaluation.capacity import CapacityConfig as RuntimeCapacityConfig - from factorminer.evaluation.regime import RegimeConfig as RuntimeRegimeConfig - from factorminer.evaluation.significance import ( - SignificanceConfig as RuntimeSignificanceConfig, - ) - from factorminer.agent.debate import DebateConfig as RuntimeDebateConfig - from factorminer.agent.specialists import DEFAULT_SPECIALISTS - - debate_config = None - if cfg.phase2.debate.enabled: - requested = cfg.phase2.debate.num_specialists - selected = list(DEFAULT_SPECIALISTS[:requested]) - if requested > len(DEFAULT_SPECIALISTS): - selected = list(DEFAULT_SPECIALISTS) - debate_config = RuntimeDebateConfig( - specialists=selected, - enable_critic=cfg.phase2.debate.enable_critic, - candidates_per_specialist=cfg.phase2.debate.candidates_per_specialist, - top_k_after_critic=cfg.phase2.debate.top_k_after_critic, - critic_temperature=cfg.phase2.debate.critic_temperature, - ) - - causal_config = None - if cfg.phase2.causal.enabled: - causal_config = RuntimeCausalConfig( - **_filter_dataclass_kwargs(cfg.phase2.causal, RuntimeCausalConfig) - ) - - regime_config = None - if cfg.phase2.regime.enabled: - regime_config = RuntimeRegimeConfig( - **_filter_dataclass_kwargs(cfg.phase2.regime, RuntimeRegimeConfig) - ) - - capacity_config = None - if cfg.phase2.capacity.enabled: - capacity_config = RuntimeCapacityConfig( - **_filter_dataclass_kwargs(cfg.phase2.capacity, RuntimeCapacityConfig) - ) - - significance_config = None - if cfg.phase2.significance.enabled: - significance_config = RuntimeSignificanceConfig( - **_filter_dataclass_kwargs(cfg.phase2.significance, RuntimeSignificanceConfig) - ) - - return { - "debate_config": debate_config, - "causal_config": causal_config, - "regime_config": regime_config, - "capacity_config": capacity_config, - "significance_config": significance_config, - "enable_knowledge_graph": bool(cfg.phase2.helix.enable_knowledge_graph), - "enable_embeddings": bool(cfg.phase2.helix.enable_embeddings), - "enable_auto_inventor": bool(cfg.phase2.auto_inventor.enabled), - "auto_invention_interval": int(cfg.phase2.auto_inventor.invention_interval), - "canonicalize": bool(cfg.phase2.helix.enable_canonicalization), - "forgetting_lambda": float(cfg.phase2.helix.forgetting_lambda), - } - - -def _extract_volume_panel(dataset: EvaluationDataset) -> Optional[np.ndarray]: - """Best-effort extraction of a dollar-volume panel for Helix capacity checks.""" - for key in ("$amt", "$volume"): - panel = dataset.data_dict.get(key) - if panel is not None and np.any(np.isfinite(panel)): - return np.asarray(panel, dtype=np.float64) - return None - - -def _build_runtime_loop_config( - cfg, - *, - output_dir: Path, - dataset: EvaluationDataset, - mock: bool, - runtime_manifest: dict[str, Any], -): - """Build the flat loop config consumed by RalphLoop/HelixLoop.""" - from factorminer.core.config import MiningConfig as LoopMiningConfig - - target_library_size = int( - runtime_manifest.get( - "target_library_size", - getattr(cfg.mining, "target_library_size", 110), - ) - ) - max_iterations = int( - runtime_manifest.get( - "max_iterations", - getattr(cfg.mining, "max_iterations", 200), - ) - ) - ic_threshold = float( - runtime_manifest.get( - "ic_threshold", - getattr(cfg.mining, "ic_threshold", 0.04), - ) - ) - icir_threshold = float( - runtime_manifest.get( - "icir_threshold", - getattr(cfg.mining, "icir_threshold", 0.5), - ) - ) - correlation_threshold = float( - runtime_manifest.get( - "correlation_threshold", - getattr(cfg.mining, "correlation_threshold", 0.5), - ) - ) - replacement_ic_min = float( - runtime_manifest.get( - "replacement_ic_min", - getattr(cfg.mining, "replacement_ic_min", 0.10), - ) - ) - replacement_ic_ratio = float( - runtime_manifest.get( - "replacement_ic_ratio", - getattr(cfg.mining, "replacement_ic_ratio", 1.3), - ) - ) - - if runtime_manifest.get("relax_thresholds", mock): - ic_threshold = min(ic_threshold, 0.0) - icir_threshold = min(icir_threshold, -1.0) - correlation_threshold = max(correlation_threshold, 1.1) - - loop_cfg = LoopMiningConfig( - target_library_size=target_library_size, - batch_size=int( - runtime_manifest.get("batch_size", getattr(cfg.mining, "batch_size", 40)) - ), - max_iterations=max_iterations, - ic_threshold=ic_threshold, - icir_threshold=icir_threshold, - correlation_threshold=correlation_threshold, - replacement_ic_min=replacement_ic_min, - replacement_ic_ratio=replacement_ic_ratio, - fast_screen_assets=int( - runtime_manifest.get( - "fast_screen_assets", - getattr(cfg.evaluation, "fast_screen_assets", 100), - ) - ), - num_workers=int( - runtime_manifest.get( - "num_workers", getattr(cfg.evaluation, "num_workers", 1) - ) - ), - output_dir=str(output_dir), - backend=str( - runtime_manifest.get( - "backend", getattr(cfg.evaluation, "backend", "numpy") - ) - ), - gpu_device=str( - runtime_manifest.get( - "gpu_device", getattr(cfg.evaluation, "gpu_device", "cuda:0") - ) - ), - signal_failure_policy=str( - runtime_manifest.get( - "signal_failure_policy", - "synthetic" if mock else getattr(cfg.evaluation, "signal_failure_policy", "reject"), - ) - ), - ) - - loop_cfg.research = cfg.research - loop_cfg.benchmark_mode = str(getattr(cfg.benchmark, "mode", "paper")) - loop_cfg.target_panels = dataset.target_panels - loop_cfg.target_horizons = { - name: max(int(spec.holding_bars), 1) - for name, spec in dataset.target_specs.items() - } - return loop_cfg - - -def _cfg_for_runtime_baseline(cfg, baseline: str): - """Project the hierarchical config into one runtime benchmark variant.""" - runtime_cfg = _clone_cfg(cfg) - - # Start from a clean phase-2 surface so variants are explicit. - runtime_cfg.phase2.causal.enabled = False - runtime_cfg.phase2.regime.enabled = False - runtime_cfg.phase2.capacity.enabled = False - runtime_cfg.phase2.significance.enabled = False - runtime_cfg.phase2.debate.enabled = False - runtime_cfg.phase2.auto_inventor.enabled = False - runtime_cfg.phase2.helix.enabled = False - runtime_cfg.phase2.helix.enable_knowledge_graph = False - runtime_cfg.phase2.helix.enable_embeddings = False - runtime_cfg.phase2.helix.enable_canonicalization = False - - if baseline in {"ralph_loop", "factor_miner", "factor_miner_no_memory"}: - runtime_cfg.benchmark.mode = "paper" - return runtime_cfg - - runtime_cfg.benchmark.mode = "research" - runtime_cfg.phase2.helix.enabled = True - runtime_cfg.phase2.helix.enable_canonicalization = True - runtime_cfg.phase2.helix.enable_knowledge_graph = True - runtime_cfg.phase2.helix.enable_embeddings = True - runtime_cfg.phase2.debate.enabled = True - runtime_cfg.phase2.regime.enabled = True - runtime_cfg.phase2.capacity.enabled = True - runtime_cfg.phase2.significance.enabled = True - - if baseline == "helix_no_memory": - runtime_cfg.phase2.helix.enable_knowledge_graph = False - runtime_cfg.phase2.helix.enable_embeddings = False - elif baseline == "helix_no_debate": - runtime_cfg.phase2.debate.enabled = False - elif baseline == "helix_no_significance": - runtime_cfg.phase2.significance.enabled = False - elif baseline == "helix_no_capacity": - runtime_cfg.phase2.capacity.enabled = False - elif baseline == "helix_no_regime": - runtime_cfg.phase2.regime.enabled = False - - return runtime_cfg - - -def _real_mining_loop_type(baseline: str, runtime_manifest: dict[str, Any]) -> str: - """Resolve the loop type for a runtime mining request.""" - loop_type = str(runtime_manifest.get("loop_type", "")).strip().lower() - if loop_type in {"ralph", "helix"}: - return loop_type - if baseline in {"helix_phase2", "helix_no_memory", "helix_no_debate", "helix_no_significance", "helix_no_capacity", "helix_no_regime"}: - return "helix" - if baseline in {"factor_miner", "factor_miner_no_memory", "ralph_loop"}: - return "ralph" - return "ralph" - - -def _runtime_loop_provenance( - *, - baseline: str, - loop_type: str, - runtime_manifest: dict[str, Any], - runtime_output_dir: Path, -) -> dict[str, Any]: - """Summarize the real mining run used to source benchmark factors.""" - library_json = runtime_output_dir / "factor_library.json" - run_manifest = runtime_output_dir / "run_manifest.json" - session_json = runtime_output_dir / "session.json" - session_log_json = runtime_output_dir / "session_log.json" - checkpoint_dir = runtime_output_dir / "checkpoint" - - source_files: dict[str, dict[str, str]] = {} - for label, path in { - "library_json": library_json, - "run_manifest_json": run_manifest, - "session_json": session_json, - "session_log_json": session_log_json, - "checkpoint_library_json": checkpoint_dir / "library.json", - "checkpoint_run_manifest_json": checkpoint_dir / "run_manifest.json", - "checkpoint_session_json": checkpoint_dir / "session.json", - "checkpoint_loop_state_json": checkpoint_dir / "loop_state.json", - }.items(): - if path.exists(): - source_files[label] = { - "path": str(path), - "sha256": _file_sha256(path), - } - - provenance: dict[str, Any] = { - "kind": "runtime_loop", - "source": baseline, - "loop_type": loop_type, - "requested_runtime_manifest": _json_safe(runtime_manifest), - "runtime_output_dir": str(runtime_output_dir), - "source_files": source_files, - "run_manifest_summary": _json_summary(run_manifest), - "session_summary": _session_summary(session_json), - "session_log_summary": _json_summary(session_log_json), - "library_summary": {}, - } - - if library_json.exists(): - try: - library = load_library(runtime_output_dir / "factor_library") - except Exception as exc: # pragma: no cover - defensive provenance capture - provenance["library_summary"] = { - "path": str(library_json), - "load_error": str(exc), - } - else: - provenance["library_summary"] = { - "path": str(library_json), - "factor_count": library.size, - "diagnostics": library.get_diagnostics(), - } - - return provenance - - -def _run_runtime_mining_loop( - cfg, - *, - baseline: str, - dataset: EvaluationDataset, - output_dir: Path, - runtime_manifest: Optional[dict[str, Any]] = None, - mock: bool = False, -) -> dict[str, Any]: - """Run a real RalphLoop/HelixLoop and return its factor library.""" - runtime_manifest = dict(runtime_manifest or {}) - loop_type = _real_mining_loop_type(baseline, runtime_manifest) - runtime_output_dir = _ensure_dir(output_dir / "benchmark" / "table1" / baseline / "runtime") - runtime_cfg = _cfg_for_runtime_baseline(cfg, baseline) - loop_cfg = _build_runtime_loop_config( - runtime_cfg, - output_dir=runtime_output_dir, - dataset=dataset, - mock=mock or bool(runtime_manifest.get("mock", False)), - runtime_manifest=runtime_manifest, - ) - provider = _build_runtime_provider(runtime_cfg, mock=mock or bool(runtime_manifest.get("mock", False))) - - if loop_type == "helix": - from factorminer.core.helix_loop import HelixLoop - - phase2_kwargs = _build_phase2_runtime_kwargs(runtime_cfg) - loop = HelixLoop( - config=loop_cfg, - data_tensor=dataset.data_tensor, - returns=dataset.returns, - llm_provider=provider, - volume=_extract_volume_panel(dataset), - **phase2_kwargs, - ) - else: - from factorminer.core.ralph_loop import RalphLoop - - loop = RalphLoop( - config=loop_cfg, - data_tensor=dataset.data_tensor, - returns=dataset.returns, - llm_provider=provider, - ) - - checkpoint_interval = int(runtime_manifest.get("checkpoint_interval", 0 if mock else 1)) - loop.checkpoint_interval = checkpoint_interval - - if runtime_manifest.get("checkpoint_path"): - loop.load_session(str(runtime_manifest["checkpoint_path"])) - - target_size = int(runtime_manifest.get("target_library_size", loop_cfg.target_library_size)) - max_iterations = int(runtime_manifest.get("max_iterations", loop_cfg.max_iterations)) - library = loop.run(target_size=target_size, max_iterations=max_iterations) - provenance = _runtime_loop_provenance( - baseline=baseline, - loop_type=loop_type, - runtime_manifest={**runtime_manifest, "target_library_size": target_size, "max_iterations": max_iterations}, - runtime_output_dir=runtime_output_dir, - ) - return { - "baseline": baseline, - "loop_type": loop_type, - "library": library, - "provenance": provenance, - "runtime_output_dir": str(runtime_output_dir), - "target_library_size": target_size, - "max_iterations": max_iterations, - } - - -def load_benchmark_dataset( - cfg, - *, - data_path: Optional[str] = None, - raw_df: Optional[pd.DataFrame] = None, - universe: Optional[str] = None, - mock: bool = False, -) -> tuple[EvaluationDataset, str]: - """Load one universe into the canonical runtime dataset.""" - if universe is None: - universe = cfg.data.universe - - if raw_df is None: - if mock: - from factorminer.data.mock_data import MockConfig, generate_mock_data - - mock_cfg = MockConfig( - num_assets=64 if universe.lower() == "binance" else 80, - num_periods=12_200, - frequency="10min", - start_date="2024-01-02 09:30:00", - universe=universe, - plant_alpha=True, - seed=cfg.benchmark.seed, - ) - raw_df = generate_mock_data(mock_cfg) - else: - path = data_path - if path is None: - path = getattr(cfg, "_raw", {}).get("data_path") - if path is None: - raise ValueError("No data path specified for benchmark run") - from factorminer.data.loader import load_market_data - - raw_df = load_market_data(path, universe=universe) - - dataset_cfg = _cfg_with_overrides(cfg, universe) - return load_runtime_dataset(raw_df, dataset_cfg), _data_hash(raw_df) - - -def _factors_from_entries(entries: Iterable[CandidateEntry]) -> list[Factor]: - return [ - Factor( - id=idx + 1, - name=entry.name, - formula=entry.formula, - category=entry.category, - ic_mean=0.0, - icir=0.0, - ic_win_rate=0.0, - max_correlation=0.0, - batch_number=0, - ) - for idx, entry in enumerate(entries) - ] - - -def _get_baseline_entries( - baseline: str, - seed: int, - *, - factor_miner_library_path: Optional[str] = None, - factor_miner_no_memory_library_path: Optional[str] = None, -) -> list[CandidateEntry]: - if baseline == "alpha101_classic": - return dedupe_entries(ALPHA101_CLASSIC) - if baseline == "alpha101_adapted": - return dedupe_entries(build_alpha101_adapted()) - if baseline == "random_exploration": - return dedupe_entries(build_random_exploration(seed)) - if baseline == "gplearn": - return dedupe_entries(build_gplearn_style(seed)) - if baseline == "alphaforge_style": - return dedupe_entries(build_alphaforge_style()) - if baseline == "alphaagent_style": - return dedupe_entries(build_alphaagent_style()) - if baseline == "factor_miner": - if factor_miner_library_path: - return dedupe_entries(entries_from_library(load_library(_base_path(factor_miner_library_path)))) - return dedupe_entries(build_factor_miner_catalog()) - if baseline == "factor_miner_no_memory": - if factor_miner_no_memory_library_path: - return dedupe_entries(entries_from_library(load_library(_base_path(factor_miner_no_memory_library_path)))) - return dedupe_entries(build_random_exploration(seed + 101, count=200)) - raise KeyError(f"Unknown benchmark baseline: {baseline}") - - -def _base_path(path: str) -> str: - p = Path(path) - return str(p.with_suffix("")) if p.suffix == ".json" else str(p) - - -def build_benchmark_library( - artifacts: Iterable[FactorEvaluationArtifact], - cfg, - *, - split_name: str = "train", - ic_threshold: Optional[float] = None, - correlation_threshold: Optional[float] = None, -) -> tuple[FactorLibrary, dict[str, int]]: - """Build a library from candidate artifacts under the paper admission rules.""" - ic_threshold = cfg.mining.ic_threshold if ic_threshold is None else ic_threshold - correlation_threshold = ( - cfg.mining.correlation_threshold - if correlation_threshold is None - else correlation_threshold - ) - library = FactorLibrary( - correlation_threshold=correlation_threshold, - ic_threshold=ic_threshold, - ) - - stats = { - "succeeded": 0, - "admitted": 0, - "replaced": 0, - "threshold_rejections": 0, - "correlation_rejections": 0, - } - - ordered = [artifact for artifact in artifacts if artifact.succeeded] - ordered.sort( - key=lambda artifact: artifact.split_stats[split_name]["ic_abs_mean"], - reverse=True, - ) - stats["succeeded"] = len(ordered) - - for artifact in ordered: - split_stats = artifact.split_stats[split_name] - candidate_ic = float(split_stats["ic_abs_mean"]) - candidate_signals = artifact.split_signals[split_name] - if candidate_ic < ic_threshold: - stats["threshold_rejections"] += 1 - continue - - max_corr = ( - library._max_correlation_with_library(candidate_signals) # noqa: SLF001 - if library.size - else 0.0 - ) - factor = Factor( - id=0, - name=artifact.name, - formula=artifact.formula, - category=artifact.category, - ic_mean=candidate_ic, - icir=abs(float(split_stats["icir"])), - ic_win_rate=float(split_stats["ic_win_rate"]), - max_correlation=max_corr, - batch_number=0, - signals=candidate_signals, - ) - admitted, _ = library.check_admission(candidate_ic, candidate_signals) - if admitted: - library.admit_factor(factor) - stats["admitted"] += 1 - continue - - replace, replace_id, _ = library.check_replacement( - candidate_ic, - candidate_signals, - ic_min=cfg.mining.replacement_ic_min, - ic_ratio=cfg.mining.replacement_ic_ratio, - ) - if replace and replace_id is not None: - library.replace_factor(replace_id, factor) - stats["replaced"] += 1 - continue - - stats["correlation_rejections"] += 1 - - return library, stats - - -def select_frozen_top_k( - artifacts: Iterable[FactorEvaluationArtifact], - library: FactorLibrary, - *, - top_k: int, - split_name: str = "train", - min_ic: float = 0.05, - min_icir: float = 0.5, -) -> list[FactorEvaluationArtifact]: - """Freeze the paper Top-K set from train-split recomputed metrics.""" - admitted_formulas = {factor.formula for factor in library.list_factors()} - succeeded = [artifact for artifact in artifacts if artifact.succeeded] - admitted = [ - artifact - for artifact in succeeded - if artifact.formula in admitted_formulas - and artifact.split_stats[split_name]["ic_abs_mean"] >= min_ic - and abs(artifact.split_stats[split_name]["icir"]) >= min_icir - ] - admitted.sort( - key=lambda artifact: artifact.split_stats[split_name]["ic_abs_mean"], - reverse=True, - ) - selected: list[FactorEvaluationArtifact] = admitted[:top_k] - selected_formulas = {artifact.formula for artifact in selected} - - if len(selected) < top_k: - remainder = [ - artifact - for artifact in succeeded - if artifact.formula not in selected_formulas - ] - remainder.sort( - key=lambda artifact: artifact.split_stats[split_name]["ic_abs_mean"], - reverse=True, - ) - selected.extend(remainder[: top_k - len(selected)]) - - return selected - - -def _abs_icir_from_series(ic_series: np.ndarray) -> float: - valid = ic_series[np.isfinite(ic_series)] - if len(valid) < 3: - return 0.0 - std = float(np.std(valid, ddof=1)) - if std < 1e-12: - return 0.0 - return abs(float(np.mean(valid))) / std - - -def _normalize_backtest_stats(stats: dict) -> dict[str, float]: - ic_series = np.asarray(stats.get("ic_series", []), dtype=np.float64) - return { - "ic": abs(float(stats.get("ic_mean", 0.0))), - "icir": _abs_icir_from_series(ic_series), - "ic_win_rate": float(stats.get("ic_win_rate", 0.0)), - "long_short": float(stats.get("ls_return", 0.0)), - "monotonicity": float(stats.get("monotonicity", 0.0)), - "turnover": float(stats.get("avg_turnover", 0.0)), - } - - -def _avg_abs_rho(artifacts: list[FactorEvaluationArtifact], split_name: str) -> float: - if len(artifacts) < 2: - return 0.0 - corr = np.abs(compute_correlation_matrix(artifacts, split_name)) - upper = corr[np.triu_indices_from(corr, k=1)] - return float(np.mean(upper)) if upper.size else 0.0 - - -def _weighted_composite( - factor_signals: dict[int, np.ndarray], - weights: dict[int, float], -) -> np.ndarray: - ordered = [(fid, factor_signals[fid], weights.get(fid, 0.0)) for fid in factor_signals] - if not ordered: - raise ValueError("Cannot build weighted composite from zero factors") - total = sum(abs(weight) for _, _, weight in ordered) - if total < 1e-12: - total = float(len(ordered)) - ordered = [(fid, signal, 1.0) for fid, signal, _ in ordered] - composite = np.zeros_like(ordered[0][1], dtype=np.float64) - for _, signal, weight in ordered: - composite += signal * (weight / total) - return composite - - -def evaluate_frozen_set( - frozen: list[FactorEvaluationArtifact], - dataset: EvaluationDataset, - *, - split_name: str = "test", - fit_split: str = "train", - cost_bps: Optional[list[float]] = None, -) -> dict: - """Evaluate one frozen factor set on one universe.""" - if cost_bps is None: - cost_bps = [1.0, 4.0, 7.0, 10.0, 11.0] - - factors = _factors_from_entries( - CandidateEntry( - name=artifact.name, - formula=artifact.formula, - category=artifact.category, - ) - for artifact in frozen - ) - artifacts = evaluate_factors(factors, dataset, signal_failure_policy="reject") - succeeded = [artifact for artifact in artifacts if artifact.succeeded] - - result = { - "factor_count": len(succeeded), - "library": { - "ic": 0.0, - "icir": 0.0, - "avg_abs_rho": 0.0, - }, - "combinations": {}, - "selections": {}, - "warnings": [], - } - if not succeeded: - result["warnings"].append("No frozen factors recomputed successfully on this universe") - return result - - result["library"] = { - "ic": float(np.mean([artifact.split_stats[split_name]["ic_abs_mean"] for artifact in succeeded])), - "icir": float(np.mean([abs(artifact.split_stats[split_name]["icir"]) for artifact in succeeded])), - "avg_abs_rho": _avg_abs_rho(succeeded, split_name), - } - - artifact_map = {artifact.factor_id: artifact for artifact in succeeded} - fit_signals = {artifact.factor_id: artifact.split_signals[fit_split].T for artifact in succeeded} - eval_signals = {artifact.factor_id: artifact.split_signals[split_name].T for artifact in succeeded} - fit_returns = dataset.get_split(fit_split).returns.T - eval_returns = dataset.get_split(split_name).returns.T - - from factorminer.evaluation.combination import FactorCombiner - from factorminer.evaluation.portfolio import PortfolioBacktester - from factorminer.evaluation.selection import FactorSelector - - combiner = FactorCombiner() - backtester = PortfolioBacktester() - selector = FactorSelector() - - fit_ic_values = { - artifact.factor_id: artifact.split_stats[fit_split]["ic_mean"] - for artifact in succeeded - } - - combos = { - "equal_weight": combiner.equal_weight(eval_signals), - "ic_weighted": combiner.ic_weighted(eval_signals, fit_ic_values), - "orthogonal": combiner.orthogonal(eval_signals), - } - for name, composite in combos.items(): - stats = backtester.quintile_backtest(composite, eval_returns) - result["combinations"][name] = _normalize_backtest_stats(stats) - result["combinations"][name]["ic_series"] = _json_safe( - np.asarray(stats.get("ic_series", []), dtype=np.float64).tolist() - ) - result["combinations"][name]["turnover_series"] = _json_safe( - np.asarray(stats.get("turnover_series", []), dtype=np.float64).tolist() - ) - result["combinations"][name]["cost_pressure"] = { - str(cost): _normalize_backtest_stats( - backtester.quintile_backtest( - composite, eval_returns, transaction_cost_bps=float(cost) - ) - ) - for cost in cost_bps - } - - selection_specs = {} - try: - selection_specs["lasso"] = selector.lasso_selection(fit_signals, fit_returns) - except Exception as exc: - result["warnings"].append(f"lasso unavailable: {exc}") - try: - selection_specs["forward_stepwise"] = selector.forward_stepwise(fit_signals, fit_returns) - except Exception as exc: - result["warnings"].append(f"forward_stepwise unavailable: {exc}") - try: - selection_specs["xgboost"] = selector.xgboost_selection(fit_signals, fit_returns) - except Exception as exc: - result["warnings"].append(f"xgboost unavailable: {exc}") - - for name, ranking in selection_specs.items(): - if not ranking: - result["selections"][name] = {"factor_count": 0} - continue - selected_ids = [factor_id for factor_id, _ in ranking] - selected_eval = {factor_id: eval_signals[factor_id] for factor_id in selected_ids} - if name == "lasso": - weights = {factor_id: score for factor_id, score in ranking} - composite = _weighted_composite(selected_eval, weights) - elif name == "xgboost": - weights = { - factor_id: score * np.sign(artifact_map[factor_id].split_stats[fit_split]["ic_mean"] or 1.0) - for factor_id, score in ranking - } - composite = _weighted_composite(selected_eval, weights) - else: - signs = { - factor_id: np.sign(artifact_map[factor_id].split_stats[fit_split]["ic_mean"] or 1.0) - for factor_id in selected_ids - } - composite = _weighted_composite(selected_eval, signs) - stats = backtester.quintile_backtest(composite, eval_returns) - result["selections"][name] = { - "factor_count": len(selected_ids), - **_normalize_backtest_stats(stats), - "ic_series": _json_safe( - np.asarray(stats.get("ic_series", []), dtype=np.float64).tolist() - ), - "turnover_series": _json_safe( - np.asarray(stats.get("turnover_series", []), dtype=np.float64).tolist() - ), - } - - return result - - -def _ensure_dir(path: Path) -> Path: - path.mkdir(parents=True, exist_ok=True) - return path - - -def _write_json(path: Path, payload: dict) -> None: - path.parent.mkdir(parents=True, exist_ok=True) - with open(path, "w") as fp: - json.dump(_json_safe(payload), fp, indent=2, sort_keys=False, allow_nan=False) - - -def _save_manifest(path: Path, manifest: BenchmarkManifest) -> None: - _write_json(path, asdict(manifest)) - - -def run_table1_benchmark( - cfg, - output_dir: Path, - *, - data_path: Optional[str] = None, - raw_df: Optional[pd.DataFrame] = None, - mock: bool = False, - baseline_names: Optional[list[str]] = None, - factor_miner_library_path: Optional[str] = None, - factor_miner_no_memory_library_path: Optional[str] = None, - runtime_manifests: Optional[dict[str, dict[str, Any]]] = None, - use_runtime_loops: bool = False, -) -> dict: - """Run the strict Top-K freeze benchmark across all configured universes.""" - if runtime_manifests is None: - runtime_manifests = getattr(cfg.benchmark, "runtime_manifests", None) - use_runtime_loops = bool( - use_runtime_loops - or getattr(cfg.benchmark, "runtime_loops", False) - or runtime_manifests - ) - benchmark_dir = _ensure_dir(output_dir / "benchmark" / "table1") - baseline_names = baseline_names or list(cfg.benchmark.baselines) - freeze_cfg = _cfg_with_overrides(cfg, cfg.benchmark.freeze_universe) - freeze_dataset, freeze_hash = load_benchmark_dataset( - freeze_cfg, - data_path=data_path, - raw_df=raw_df, - universe=cfg.benchmark.freeze_universe, - mock=mock, - ) - - summary: dict[str, dict] = {} - for baseline in baseline_names: - runtime_manifest = _runtime_manifest_value(runtime_manifests, baseline) - runtime_baseline = bool(runtime_manifest) or ( - use_runtime_loops - and baseline in (RUNTIME_LOOP_BASELINES | {"factor_miner", "factor_miner_no_memory"}) - ) - - if runtime_baseline: - runtime_result = _run_runtime_mining_loop( - cfg, - baseline=baseline, - dataset=freeze_dataset, - output_dir=output_dir, - runtime_manifest=runtime_manifest, - mock=mock, - ) - factors = list(runtime_result["library"].list_factors()) - provenance = runtime_result["provenance"] - candidate_count = len(factors) - else: - entries = _get_baseline_entries( - baseline, - cfg.benchmark.seed, - factor_miner_library_path=factor_miner_library_path, - factor_miner_no_memory_library_path=factor_miner_no_memory_library_path, - ) - factors = _factors_from_entries(entries) - provenance = _baseline_provenance( - baseline, - factor_miner_library_path=factor_miner_library_path, - factor_miner_no_memory_library_path=factor_miner_no_memory_library_path, - candidate_count=len(entries), - seed=cfg.benchmark.seed, - ) - candidate_count = len(entries) - - artifacts = evaluate_factors( - factors, - freeze_dataset, - signal_failure_policy="reject", - ) - - library_cfg = _cfg_with_overrides(cfg, cfg.benchmark.freeze_universe) - if baseline == "factor_miner_no_memory": - library_cfg.mining.ic_threshold = 0.02 - library_cfg.mining.correlation_threshold = 0.85 - library, library_stats = build_benchmark_library( - artifacts, - library_cfg, - split_name="train", - ic_threshold=library_cfg.mining.ic_threshold, - correlation_threshold=library_cfg.mining.correlation_threshold, - ) - frozen = select_frozen_top_k( - artifacts, - library, - top_k=cfg.benchmark.freeze_top_k, - split_name="train", - ) - - baseline_result = { - "baseline": baseline, - "mode": cfg.benchmark.mode, - "freeze_universe": cfg.benchmark.freeze_universe, - "candidate_count": candidate_count, - "freeze_library_size": library.size, - "freeze_stats": library_stats, - "frozen_top_k": [ - { - "name": artifact.name, - "formula": artifact.formula, - "category": artifact.category, - "train_ic": artifact.split_stats["train"]["ic_abs_mean"], - "train_icir": abs(artifact.split_stats["train"]["icir"]), - } - for artifact in frozen - ], - "universes": {}, - } - - dataset_hashes = {cfg.benchmark.freeze_universe: freeze_hash} - for universe in cfg.benchmark.report_universes: - universe_cfg = _cfg_with_overrides(cfg, universe) - dataset, dataset_hash = load_benchmark_dataset( - universe_cfg, - data_path=data_path, - raw_df=raw_df, - universe=universe, - mock=mock, - ) - dataset_hashes[universe] = dataset_hash - baseline_result["universes"][universe] = evaluate_frozen_set( - frozen, - dataset, - split_name="test", - fit_split="train", - cost_bps=list(cfg.benchmark.cost_bps), - ) - - result_path = benchmark_dir / f"{baseline}.json" - manifest_path = benchmark_dir / f"{baseline}_manifest.json" - baseline_result["provenance"] = provenance - _write_json(result_path, baseline_result) - manifest = BenchmarkManifest( - benchmark_name="table1", - mode=cfg.benchmark.mode, - seed=cfg.benchmark.seed, - baseline=baseline, - freeze_universe=cfg.benchmark.freeze_universe, - report_universes=list(cfg.benchmark.report_universes), - train_period=list(cfg.data.train_period), - test_period=list(cfg.data.test_period), - freeze_top_k=cfg.benchmark.freeze_top_k, - signal_failure_policy="reject", - default_target=cfg.data.default_target, - target_stack=[target.get("name", "") for target in cfg.data.targets], - primary_objective=cfg.research.primary_objective, - dataset_hashes=dataset_hashes, - artifact_paths={ - "result": str(result_path), - "manifest": str(manifest_path), - }, - runtime_contract=runtime_manifest, - baseline_provenance={baseline: provenance}, - warnings=[], - ) - _save_manifest(manifest_path, manifest) - summary[baseline] = baseline_result - - return summary - - -def run_ablation_memory_benchmark( - cfg, - output_dir: Path, - *, - data_path: Optional[str] = None, - raw_df: Optional[pd.DataFrame] = None, - mock: bool = False, - factor_miner_library_path: Optional[str] = None, - factor_miner_no_memory_library_path: Optional[str] = None, - runtime_manifests: Optional[dict[str, dict[str, Any]]] = None, -) -> dict: - """Compare the default FactorMiner lane to the relaxed no-memory lane.""" - use_runtime_loops = bool( - runtime_manifests or getattr(cfg.benchmark, "runtime_loops", False) - ) - comparison = run_table1_benchmark( - cfg, - output_dir, - data_path=data_path, - raw_df=raw_df, - mock=mock, - baseline_names=["factor_miner", "factor_miner_no_memory"], - factor_miner_library_path=factor_miner_library_path, - factor_miner_no_memory_library_path=factor_miner_no_memory_library_path, - runtime_manifests=runtime_manifests, - use_runtime_loops=use_runtime_loops, - ) - result = {} - for baseline, payload in comparison.items(): - freeze_stats = payload["freeze_stats"] - succeeded = max(freeze_stats.get("succeeded", 0), 1) - result[baseline] = { - "library_size": payload["freeze_library_size"], - "high_quality_yield": freeze_stats.get("admitted", 0) / succeeded, - "redundancy_rejection_rate": freeze_stats.get("correlation_rejections", 0) / succeeded, - "replacements": freeze_stats.get("replaced", 0), - } - out_path = _ensure_dir(output_dir / "benchmark" / "ablation") / "memory_ablation.json" - _write_json(out_path, result) - return result - - -def run_cost_pressure_benchmark( - cfg, - output_dir: Path, - *, - baseline: str = "factor_miner", - data_path: Optional[str] = None, - raw_df: Optional[pd.DataFrame] = None, - mock: bool = False, - factor_miner_library_path: Optional[str] = None, - runtime_manifests: Optional[dict[str, dict[str, Any]]] = None, -) -> dict: - """Run cost-pressure analysis for one baseline on the configured universes.""" - use_runtime_loops = bool( - runtime_manifests or getattr(cfg.benchmark, "runtime_loops", False) - ) - payload = run_table1_benchmark( - cfg, - output_dir, - data_path=data_path, - raw_df=raw_df, - mock=mock, - baseline_names=[baseline], - factor_miner_library_path=factor_miner_library_path, - runtime_manifests=runtime_manifests, - use_runtime_loops=use_runtime_loops, - )[baseline] - result = { - universe: { - "combinations": { - name: metrics.get("cost_pressure", {}) - for name, metrics in universe_payload["combinations"].items() - } - } - for universe, universe_payload in payload["universes"].items() - } - out_path = _ensure_dir(output_dir / "benchmark" / "cost_pressure") / f"{baseline}.json" - _write_json(out_path, result) - return result - - -def _time_callable(fn, repeats: int = 3) -> float: - timings: list[float] = [] - for _ in range(repeats): - start = time.perf_counter() - fn() - timings.append(time.perf_counter() - start) - return min(timings) * 1000.0 - - -def run_efficiency_benchmark(cfg, output_dir: Path) -> dict: - """Benchmark operator-level and factor-level compute time.""" - periods, assets = cfg.benchmark.efficiency_panel_shape - matrix = np.random.RandomState(cfg.benchmark.seed).randn(assets, periods).astype(np.float64) - other = np.random.RandomState(cfg.benchmark.seed + 1).randn(assets, periods).astype(np.float64) - - from factorminer.operators import torch_available - from factorminer.operators.gpu_backend import to_tensor - from factorminer.operators.registry import execute_operator - from factorminer.utils.visualization import plot_efficiency_benchmark - - operator_bench: dict[str, dict[str, float | None]] = {"numpy": {}, "c": {}, "gpu": {}} - def _backend_inputs(backend: str): - if backend == "gpu": - return to_tensor(matrix), to_tensor(other) - return matrix, other - - operators = { - "Add": lambda backend: execute_operator("Add", *_backend_inputs(backend), backend=backend), - "Mean": lambda backend: execute_operator("Mean", _backend_inputs(backend)[0], params={"window": 20}, backend=backend), - "Delta": lambda backend: execute_operator("Delta", _backend_inputs(backend)[0], params={"window": 5}, backend=backend), - "TsRank": lambda backend: execute_operator("TsRank", _backend_inputs(backend)[0], params={"window": 20}, backend=backend), - "Corr": lambda backend: execute_operator("Corr", *_backend_inputs(backend), params={"window": 20}, backend=backend), - "CsRank": lambda backend: execute_operator("CsRank", _backend_inputs(backend)[0], backend=backend), - } - for op_name, runner in operators.items(): - operator_bench["numpy"][op_name] = _time_callable(lambda r=runner: r("numpy")) - operator_bench["c"][op_name] = None - if torch_available(): - operator_bench["gpu"][op_name] = _time_callable(lambda r=runner: r("gpu")) - else: - operator_bench["gpu"][op_name] = None - - factor_bench: dict[str, dict[str, float | None]] = {"numpy": {}, "c": {}, "gpu": {}} - factor_specs = { - "momentum_volume": lambda backend: execute_operator( - "CsRank", - execute_operator( - "Mul", - execute_operator("Return", _backend_inputs(backend)[0], params={"window": 5}, backend=backend), - execute_operator( - "Div", - _backend_inputs(backend)[1], - execute_operator("Mean", _backend_inputs(backend)[1], params={"window": 20}, backend=backend), - backend=backend, - ), - backend=backend, - ), - backend=backend, - ), - "vwap_gap": lambda backend: execute_operator( - "Neg", - execute_operator( - "CsRank", - execute_operator( - "Div", - execute_operator("Sub", *_backend_inputs(backend), backend=backend), - execute_operator( - "Add", - _backend_inputs(backend)[1], - to_tensor(np.full_like(other, 1e-8)) if backend == "gpu" else np.full_like(other, 1e-8), - backend=backend, - ), - backend=backend, - ), - backend=backend, - ), - backend=backend, - ), - } - for formula_name, runner in factor_specs.items(): - factor_bench["numpy"][formula_name] = _time_callable(lambda r=runner: r("numpy")) - factor_bench["c"][formula_name] = None - if torch_available(): - factor_bench["gpu"][formula_name] = _time_callable(lambda r=runner: r("gpu")) - else: - factor_bench["gpu"][formula_name] = None - - bench_dir = _ensure_dir(output_dir / "benchmark" / "efficiency") - plot_efficiency_benchmark( - {backend: {k: v for k, v in values.items() if v is not None} for backend, values in operator_bench.items()}, - save_path=str(bench_dir / "operator_efficiency.png"), - ) - plot_efficiency_benchmark( - {backend: {k: v for k, v in values.items() if v is not None} for backend, values in factor_bench.items()}, - save_path=str(bench_dir / "factor_efficiency.png"), - ) - result = { - "panel_shape": {"periods": periods, "assets": assets}, - "operator_level_ms": operator_bench, - "factor_level_ms": factor_bench, - "available_backends": { - "numpy": True, - "c": False, - "gpu": torch_available(), - }, - } - _write_json(bench_dir / "efficiency.json", result) - return result - - -def run_benchmark_suite( - cfg, - output_dir: Path, - *, - data_path: Optional[str] = None, - raw_df: Optional[pd.DataFrame] = None, - mock: bool = False, - factor_miner_library_path: Optional[str] = None, - factor_miner_no_memory_library_path: Optional[str] = None, - runtime_manifests: Optional[dict[str, dict[str, Any]]] = None, -) -> dict: - """Run the benchmark suite and return the artifact index.""" - if runtime_manifests is None: - runtime_manifests = getattr(cfg.benchmark, "runtime_manifests", None) - use_runtime_loops = bool( - runtime_manifests or getattr(cfg.benchmark, "runtime_loops", False) - ) - results = { - "table1": run_table1_benchmark( - cfg, - output_dir, - data_path=data_path, - raw_df=raw_df, - mock=mock, - factor_miner_library_path=factor_miner_library_path, - factor_miner_no_memory_library_path=factor_miner_no_memory_library_path, - runtime_manifests=runtime_manifests, - use_runtime_loops=use_runtime_loops, - ), - "ablation_memory": run_ablation_memory_benchmark( - cfg, - output_dir, - data_path=data_path, - raw_df=raw_df, - mock=mock, - factor_miner_library_path=factor_miner_library_path, - factor_miner_no_memory_library_path=factor_miner_no_memory_library_path, - runtime_manifests=runtime_manifests, - ), - "cost_pressure": run_cost_pressure_benchmark( - cfg, - output_dir, - data_path=data_path, - raw_df=raw_df, - mock=mock, - factor_miner_library_path=factor_miner_library_path, - runtime_manifests=runtime_manifests, - ), - "efficiency": run_efficiency_benchmark(cfg, output_dir), - } - _write_json(_ensure_dir(output_dir / "benchmark") / "suite.json", results) - return results - - -def run_runtime_mining_benchmark( - cfg, - output_dir: Path, - *, - data_path: Optional[str] = None, - raw_df: Optional[pd.DataFrame] = None, - mock: bool = False, - factor_miner_library_path: Optional[str] = None, - factor_miner_no_memory_library_path: Optional[str] = None, - runtime_manifests: Optional[dict[str, dict[str, Any]]] = None, -) -> dict: - """Run the benchmark suite with explicit real-loop manifests when provided.""" - return run_benchmark_suite( - cfg, - output_dir, - data_path=data_path, - raw_df=raw_df, - mock=mock, - factor_miner_library_path=factor_miner_library_path, - factor_miner_no_memory_library_path=factor_miner_no_memory_library_path, - runtime_manifests=runtime_manifests, - ) diff --git a/src/factorminer/factorminer/cli.py b/src/factorminer/factorminer/cli.py deleted file mode 100644 index cbade96..0000000 --- a/src/factorminer/factorminer/cli.py +++ /dev/null @@ -1,1566 +0,0 @@ -"""Click-based CLI for FactorMiner.""" - -from __future__ import annotations - -import logging -import sys -from dataclasses import fields -import json -from pathlib import Path - -import click -import numpy as np - -from src.factorminer.factorminer.utils.config import load_config - -logger = logging.getLogger(__name__) - - -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - -def _setup_logging(verbose: bool) -> None: - """Configure root logger for CLI output.""" - level = logging.DEBUG if verbose else logging.INFO - logging.basicConfig( - level=level, - format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", - datefmt="%Y-%m-%d %H:%M:%S", - ) - - -def _load_data(cfg, data_path: str | None, mock: bool): - """Load market data from file or generate mock data. - - Returns - ------- - pd.DataFrame - Market data with columns: datetime, asset_id, open, high, low, - close, volume, amount. - """ - raw_cfg = getattr(cfg, "_raw", {}) - configured_path = raw_cfg.get("data_path") - - if mock: - click.echo("Generating mock market data...") - from factorminer.data.mock_data import MockConfig, generate_mock_data - - mock_cfg = MockConfig( - num_assets=50, - num_periods=500, - frequency="1d", - plant_alpha=True, - ) - return generate_mock_data(mock_cfg) - - # Try data_path argument, then config top-level data_path - path = data_path - if path is None: - path = configured_path - - if path is None: - click.echo("No data path specified. Use --data or --mock flag.") - raise click.Abort() - - click.echo(f"Loading market data from: {path}") - from factorminer.data.loader import load_market_data - - return load_market_data(path) - - -def _prepare_data_arrays(df): - """Convert a market DataFrame to numpy arrays for the mining loop. - - Returns - ------- - data_tensor : np.ndarray, shape (M, T, F) - Market data tensor. - returns : np.ndarray, shape (M, T) - Forward returns. - """ - asset_ids = sorted(df["asset_id"].unique()) - dates = sorted(df["datetime"].unique()) - M = len(asset_ids) - T = len(dates) - - feature_cols = [ - "open", - "high", - "low", - "close", - "volume", - "amount", - "vwap", - "returns", - ] - F = len(feature_cols) - - data_tensor = np.full((M, T, F), np.nan, dtype=np.float64) - returns = np.full((M, T), np.nan, dtype=np.float64) - - asset_to_idx = {a: i for i, a in enumerate(asset_ids)} - date_to_idx = {d: i for i, d in enumerate(dates)} - - for _, row in df.iterrows(): - ai = asset_to_idx[row["asset_id"]] - ti = date_to_idx[row["datetime"]] - for fi, col in enumerate(feature_cols[:6]): - data_tensor[ai, ti, fi] = row[col] - - if "vwap" in row.index and not np.isnan(row["vwap"]): - data_tensor[ai, ti, 6] = row["vwap"] - elif ( - not np.isnan(row["volume"]) - and abs(row["volume"]) > 1e-12 - and not np.isnan(row["amount"]) - ): - data_tensor[ai, ti, 6] = row["amount"] / row["volume"] - - if "returns" in row.index and not np.isnan(row["returns"]): - data_tensor[ai, ti, 7] = row["returns"] - - close_idx = feature_cols.index("close") - amount_idx = feature_cols.index("amount") - vwap_idx = feature_cols.index("vwap") - feature_returns_idx = feature_cols.index("returns") - - # Fill derived VWAP where the source file did not provide it. - volume = data_tensor[:, :, feature_cols.index("volume")] - amount = data_tensor[:, :, amount_idx] - derived_vwap = np.divide( - amount, - volume, - out=np.full_like(amount, np.nan), - where=np.abs(volume) > 1e-12, - ) - missing_vwap = np.isnan(data_tensor[:, :, vwap_idx]) - data_tensor[:, :, vwap_idx] = np.where( - missing_vwap, - np.where(np.isnan(derived_vwap), data_tensor[:, :, close_idx], derived_vwap), - data_tensor[:, :, vwap_idx], - ) - - # Compute bar returns feature from close prices where missing. - for i in range(M): - close = data_tensor[i, :, close_idx] - asset_returns = np.full(T, np.nan, dtype=np.float64) - asset_returns[1:] = (close[1:] - close[:-1]) / np.where( - close[:-1] == 0, np.nan, close[:-1] - ) - missing_feature_returns = np.isnan(data_tensor[i, :, feature_returns_idx]) - data_tensor[i, :, feature_returns_idx] = np.where( - missing_feature_returns, - asset_returns, - data_tensor[i, :, feature_returns_idx], - ) - - # Simple 1-period forward return target. - returns[i, :-1] = (close[1:] - close[:-1]) / np.where( - close[:-1] == 0, np.nan, close[:-1] - ) - - return data_tensor, returns - - -def _create_llm_provider(cfg, mock: bool): - """Create an LLM provider from config or use mock.""" - from factorminer.agent.llm_interface import MockProvider, create_provider - - if mock: - click.echo("Using mock LLM provider (no API calls).") - return MockProvider() - - llm_config = { - "provider": cfg.llm.provider, - "model": cfg.llm.model, - } - # Use api_key from config if set - if hasattr(cfg, "_raw") and cfg._raw.get("llm", {}).get("api_key"): - llm_config["api_key"] = cfg._raw["llm"]["api_key"] - - click.echo(f"Using LLM provider: {cfg.llm.provider}/{cfg.llm.model}") - return create_provider(llm_config) - - -def _build_core_mining_config(cfg, output_dir: Path, mock: bool = False): - """Create the flat mining config expected by RalphLoop/HelixLoop.""" - from factorminer.core.config import MiningConfig as CoreMiningConfig - - signal_failure_policy = ( - "synthetic" if mock else cfg.evaluation.signal_failure_policy - ) - - mining_cfg = CoreMiningConfig( - target_library_size=cfg.mining.target_library_size, - batch_size=cfg.mining.batch_size, - max_iterations=cfg.mining.max_iterations, - ic_threshold=cfg.mining.ic_threshold, - icir_threshold=cfg.mining.icir_threshold, - correlation_threshold=cfg.mining.correlation_threshold, - replacement_ic_min=cfg.mining.replacement_ic_min, - replacement_ic_ratio=cfg.mining.replacement_ic_ratio, - fast_screen_assets=cfg.evaluation.fast_screen_assets, - num_workers=cfg.evaluation.num_workers, - output_dir=str(output_dir), - backend=cfg.evaluation.backend, - gpu_device=cfg.evaluation.gpu_device, - signal_failure_policy=signal_failure_policy, - ) - mining_cfg.research = getattr(cfg, "research", None) - benchmark_cfg = getattr(cfg, "benchmark", None) - mining_cfg.benchmark_mode = getattr(benchmark_cfg, "mode", "paper") - mining_cfg.target_panels = None - mining_cfg.target_horizons = None - return mining_cfg - - -def _attach_runtime_targets(mining_config, dataset) -> None: - """Attach multi-horizon runtime metadata for research-mode mining.""" - mining_config.target_panels = dataset.target_panels - mining_config.target_horizons = { - name: max(getattr(spec, "holding_bars", 1), 1) - for name, spec in dataset.target_specs.items() - } - - -def _save_result_library(library, output_dir: Path) -> Path: - """Persist a factor library to the standard output location.""" - from factorminer.core.library_io import save_library - - output_dir.mkdir(parents=True, exist_ok=True) - lib_path = output_dir / "factor_library" - save_library(library, lib_path) - return lib_path.with_suffix(".json") - - -def _filter_dataclass_kwargs(source, target_cls): - """Copy shared dataclass fields from one config object to another.""" - target_fields = {f.name for f in fields(target_cls)} - source_fields = getattr(source, "__dataclass_fields__", {}) - return { - name: getattr(source, name) - for name in source_fields - if name in target_fields - } - - -def _build_debate_config(cfg): - """Build the runtime debate config from YAML config settings.""" - if not cfg.phase2.debate.enabled: - return None - - from factorminer.agent.debate import DebateConfig as RuntimeDebateConfig - from factorminer.agent.specialists import DEFAULT_SPECIALISTS - - available = len(DEFAULT_SPECIALISTS) - requested = cfg.phase2.debate.num_specialists - selected = list(DEFAULT_SPECIALISTS[:requested]) - if requested > available: - logger.warning( - "Requested %d specialists but only %d are available; using all defaults.", - requested, - available, - ) - - return RuntimeDebateConfig( - specialists=selected, - enable_critic=cfg.phase2.debate.enable_critic, - candidates_per_specialist=cfg.phase2.debate.candidates_per_specialist, - top_k_after_critic=cfg.phase2.debate.top_k_after_critic, - critic_temperature=cfg.phase2.debate.critic_temperature, - ) - - -def _build_phase2_runtime_configs(cfg): - """Instantiate evaluation/runtime configs for the Helix loop.""" - from factorminer.evaluation.causal import CausalConfig as RuntimeCausalConfig - from factorminer.evaluation.capacity import CapacityConfig as RuntimeCapacityConfig - from factorminer.evaluation.regime import RegimeConfig as RuntimeRegimeConfig - from factorminer.evaluation.significance import ( - SignificanceConfig as RuntimeSignificanceConfig, - ) - - causal_config = None - if cfg.phase2.causal.enabled: - causal_config = RuntimeCausalConfig( - **_filter_dataclass_kwargs(cfg.phase2.causal, RuntimeCausalConfig) - ) - - regime_config = None - if cfg.phase2.regime.enabled: - regime_config = RuntimeRegimeConfig( - **_filter_dataclass_kwargs(cfg.phase2.regime, RuntimeRegimeConfig) - ) - - capacity_config = None - if cfg.phase2.capacity.enabled: - capacity_config = RuntimeCapacityConfig( - **_filter_dataclass_kwargs(cfg.phase2.capacity, RuntimeCapacityConfig) - ) - - significance_config = None - if cfg.phase2.significance.enabled: - significance_config = RuntimeSignificanceConfig( - **_filter_dataclass_kwargs(cfg.phase2.significance, RuntimeSignificanceConfig) - ) - - return { - "debate_config": _build_debate_config(cfg), - "causal_config": causal_config, - "regime_config": regime_config, - "capacity_config": capacity_config, - "significance_config": significance_config, - } - - -def _extract_capacity_volume(data_tensor: np.ndarray) -> np.ndarray | None: - """Prefer dollar volume (`amount`) and fall back to raw volume if needed.""" - if data_tensor.ndim != 3 or data_tensor.shape[2] == 0: - return None - - amount_idx = 5 - volume_idx = 4 - - if data_tensor.shape[2] > amount_idx: - amount = data_tensor[:, :, amount_idx] - if not np.all(np.isnan(amount)): - return amount - - if data_tensor.shape[2] > volume_idx: - volume = data_tensor[:, :, volume_idx] - if not np.all(np.isnan(volume)): - return volume - - return None - - -def _active_phase2_features(cfg) -> list[str]: - """Describe the effective Helix feature set for CLI output.""" - features: list[str] = [] - - if cfg.phase2.causal.enabled: - features.append("causal") - if cfg.phase2.regime.enabled: - features.append("regime") - if cfg.phase2.capacity.enabled: - features.append("capacity") - if cfg.phase2.significance.enabled: - features.append("significance") - if cfg.phase2.debate.enabled: - features.append("debate") - if cfg.phase2.auto_inventor.enabled: - features.append("auto-inventor") - if cfg.phase2.helix.enabled and cfg.phase2.helix.enable_canonicalization: - features.append("canonicalization") - if cfg.phase2.helix.enabled and cfg.phase2.helix.enable_knowledge_graph: - features.append("knowledge-graph") - if cfg.phase2.helix.enabled and cfg.phase2.helix.enable_embeddings: - features.append("embeddings") - - return features - - -def _load_runtime_dataset_for_analysis(cfg, data_path: str | None, mock: bool): - """Load, preprocess, split, and tensorize data for analysis commands.""" - from factorminer.evaluation.runtime import load_runtime_dataset - - raw_df = _load_data(cfg, data_path, mock) - return load_runtime_dataset(raw_df, cfg) - - -def _recompute_analysis_artifacts(library, dataset, signal_failure_policy: str): - """Recompute library factors on the canonical analysis dataset.""" - from factorminer.evaluation.runtime import evaluate_factors - - return evaluate_factors( - library.list_factors(), - dataset, - signal_failure_policy=signal_failure_policy, - ) - - -def _report_artifact_failures(artifacts, header: str) -> list[str]: - """Print a concise recomputation failure summary and return failure texts.""" - from factorminer.evaluation.runtime import summarize_failures - - failures = summarize_failures(artifacts) - if not failures: - return [] - - click.echo(f"{header}: {len(failures)} factor(s) failed to recompute.") - for failure in failures[:10]: - click.echo(f" - {failure}") - if len(failures) > 10: - click.echo(f" ... and {len(failures) - 10} more") - - return failures - - -def _artifact_map_by_id(artifacts): - return {artifact.factor_id: artifact for artifact in artifacts} - - -def _select_artifacts_for_ids(artifacts, factor_ids: tuple[int, ...]): - if not factor_ids: - return [artifact for artifact in artifacts if artifact.succeeded] - - artifact_map = _artifact_map_by_id(artifacts) - selected = [] - failed = [] - missing = [] - for factor_id in factor_ids: - artifact = artifact_map.get(factor_id) - if artifact is None: - missing.append(str(factor_id)) - elif not artifact.succeeded: - failed.append(artifact) - else: - selected.append(artifact) - - if missing: - click.echo(f"Missing recomputed factors for ids: {', '.join(missing)}") - raise click.Abort() - if failed: - click.echo("Requested factors failed to recompute:") - for artifact in failed: - click.echo(f" - {artifact.factor_id}: {artifact.name} ({artifact.error})") - raise click.Abort() - - return selected - - -def _analysis_output_path(output_dir: Path, stem: str, split_name: str, fmt: str) -> str: - return str(output_dir / f"{stem}_{split_name}.{fmt}") - - -def _print_benchmark_summary(title: str, payload: dict) -> None: - """Emit a concise benchmark summary for CLI runs.""" - click.echo("=" * 60) - click.echo(title) - click.echo("=" * 60) - if not payload: - click.echo("No benchmark results produced.") - return - - if all(isinstance(value, dict) and "universes" in value for value in payload.values()): - for baseline, result in payload.items(): - click.echo(f"Baseline: {baseline}") - click.echo( - f" Freeze library: {result.get('freeze_library_size', 0)} " - f"| Frozen Top-K: {len(result.get('frozen_top_k', []))}" - ) - for universe, metrics in result.get("universes", {}).items(): - library = metrics.get("library", {}) - click.echo( - f" {universe}: library IC={library.get('ic', 0.0):.4f}, " - f"ICIR={library.get('icir', 0.0):.4f}, " - f"Avg|rho|={library.get('avg_abs_rho', 0.0):.4f}" - ) - else: - click.echo(json.dumps(payload, indent=2)) - - -def _print_recomputed_factor_table(artifacts, split_name: str) -> None: - click.echo( - f"{'ID':>4s} {'Name':<35s} {'IC Mean':>8s} {'|IC|':>8s} " - f"{'ICIR':>7s} {'Win%':>6s} {'Turn':>6s}" - ) - click.echo("-" * 90) - - for artifact in artifacts: - stats = artifact.split_stats[split_name] - click.echo( - f"{artifact.factor_id:4d} {artifact.name:<35s} " - f"{stats['ic_mean']:8.4f} {stats['ic_abs_mean']:8.4f} " - f"{stats['icir']:7.3f} {stats['ic_win_rate'] * 100:5.1f}% " - f"{stats['turnover']:6.3f}" - ) - - -def _print_split_summary(artifacts, split_name: str) -> None: - if not artifacts: - click.echo(" No successful factor recomputations.") - return - - ic_values = [artifact.split_stats[split_name]["ic_mean"] for artifact in artifacts] - abs_ic_values = [artifact.split_stats[split_name]["ic_abs_mean"] for artifact in artifacts] - icir_values = [artifact.split_stats[split_name]["icir"] for artifact in artifacts] - click.echo("-" * 90) - click.echo(f" Total factors: {len(artifacts)}") - click.echo(f" Mean IC: {np.mean(ic_values):.4f}") - click.echo(f" Mean |IC|: {np.mean(abs_ic_values):.4f}") - click.echo(f" Mean ICIR: {np.mean(icir_values):.3f}") - click.echo(f" Max |IC|: {max(abs_ic_values):.4f}") - click.echo(f" Min |IC|: {min(abs_ic_values):.4f}") - - -def _load_library_from_path(library_path: str): - """Load a factor library, handling both .json extension and base path. - - Returns - ------- - FactorLibrary - """ - from factorminer.core.library_io import load_library - - path = Path(library_path) - # load_library expects the base path (without .json extension) - # but also works with .json since it calls path.with_suffix(".json") - if path.suffix == ".json": - base_path = path.with_suffix("") - else: - base_path = path - - try: - library = load_library(base_path) - click.echo(f"Loaded factor library: {library.size} factors") - return library - except FileNotFoundError: - click.echo(f"Error: Factor library not found at {library_path}") - click.echo(" Tried: {}.json".format(base_path)) - raise click.Abort() - except Exception as e: - click.echo(f"Error loading library: {e}") - raise click.Abort() - - -# --------------------------------------------------------------------------- -# Global options -# --------------------------------------------------------------------------- - -@click.group() -@click.option( - "--config", "-c", - type=click.Path(exists=True, dir_okay=False), - default=None, - help="Path to a YAML config file (merges with defaults).", -) -@click.option("--gpu/--cpu", default=True, help="Enable or disable GPU evaluation backend.") -@click.option("--verbose", "-v", is_flag=True, help="Enable debug-level logging.") -@click.option( - "--output-dir", "-o", - type=click.Path(file_okay=False), - default="output", - help="Directory for all output artifacts.", -) -@click.version_option(package_name="factorminer") -@click.pass_context -def main(ctx: click.Context, config: str | None, gpu: bool, verbose: bool, output_dir: str) -> None: - """FactorMiner -- LLM-powered quantitative factor mining.""" - _setup_logging(verbose) - - overrides: dict = {} - if not gpu: - overrides.setdefault("evaluation", {})["backend"] = "numpy" - - try: - cfg = load_config(config_path=config, overrides=overrides if overrides else None) - except Exception as e: - click.echo(f"Error loading config: {e}") - raise click.Abort() - - # Stash the raw YAML data for access to top-level fields like data_path - try: - import yaml - from factorminer.configs import DEFAULT_CONFIG_PATH - raw = {} - if DEFAULT_CONFIG_PATH.exists(): - with open(DEFAULT_CONFIG_PATH) as f: - raw = yaml.safe_load(f) or {} - if config: - with open(config) as f: - user_raw = yaml.safe_load(f) or {} - raw.update(user_raw) - cfg._raw = raw - except Exception: - cfg._raw = {} - - if output_dir == "output": - output_dir = cfg._raw.get("output_dir", output_dir) - - ctx.ensure_object(dict) - ctx.obj["config"] = cfg - ctx.obj["verbose"] = verbose - ctx.obj["output_dir"] = Path(output_dir) - - -# --------------------------------------------------------------------------- -# mine -# --------------------------------------------------------------------------- - -@main.command() -@click.option("--iterations", "-n", type=int, default=None, help="Override max_iterations.") -@click.option("--batch-size", "-b", type=int, default=None, help="Override batch_size.") -@click.option("--target", "-t", type=int, default=None, help="Override target_library_size.") -@click.option("--resume", type=click.Path(exists=True), default=None, help="Resume from a saved library.") -@click.option("--mock", is_flag=True, help="Use mock data and mock LLM provider (for testing).") -@click.option("--data", "data_path", type=click.Path(exists=True), default=None, help="Path to market data file.") -@click.pass_context -def mine( - ctx: click.Context, - iterations: int | None, - batch_size: int | None, - target: int | None, - resume: str | None, - mock: bool, - data_path: str | None, -) -> None: - """Run a factor mining session.""" - cfg = ctx.obj["config"] - output_dir = ctx.obj["output_dir"] - - if iterations is not None: - cfg.mining.max_iterations = iterations - if batch_size is not None: - cfg.mining.batch_size = batch_size - if target is not None: - cfg.mining.target_library_size = target - - try: - cfg.validate() - except ValueError as e: - click.echo(f"Configuration error: {e}") - raise click.Abort() - - click.echo("=" * 60) - click.echo("FactorMiner -- Mining Session") - click.echo("=" * 60) - click.echo(f" Target library size: {cfg.mining.target_library_size}") - click.echo(f" Batch size: {cfg.mining.batch_size}") - click.echo(f" Max iterations: {cfg.mining.max_iterations}") - click.echo(f" IC threshold: {cfg.mining.ic_threshold}") - click.echo(f" Correlation limit: {cfg.mining.correlation_threshold}") - click.echo(f" Output directory: {output_dir}") - click.echo("-" * 60) - - # Load data - try: - dataset = _load_runtime_dataset_for_analysis(cfg, data_path, mock) - except Exception as e: - click.echo(f"Error loading data: {e}") - raise click.Abort() - - click.echo( - f" Data loaded: {len(dataset.asset_ids)} assets x " - f"{len(dataset.timestamps)} periods" - ) - click.echo(" Preparing data tensors...") - data_tensor = dataset.data_tensor - returns = dataset.returns - - # Create LLM provider - llm_provider = _create_llm_provider(cfg, mock) - - # Load existing library for resume - library = None - if resume: - click.echo(f" Resuming from: {resume}") - library = _load_library_from_path(resume) - - # Create and configure MiningConfig for the RalphLoop - mining_config = _build_core_mining_config(cfg, output_dir, mock=mock) - _attach_runtime_targets(mining_config, dataset) - - # Create and run the Ralph Loop - from factorminer.core.ralph_loop import RalphLoop - - click.echo("-" * 60) - click.echo("Starting Ralph Loop...") - - def _progress_callback(iteration: int, stats: dict) -> None: - """Print progress after each iteration.""" - lib_size = stats.get("library_size", 0) - admitted = stats.get("admitted", 0) - yield_rate = stats.get("yield_rate", 0) * 100 - click.echo( - f" Iteration {iteration:3d}: " - f"Library={lib_size}, " - f"Admitted={admitted}, " - f"Yield={yield_rate:.1f}%" - ) - - try: - loop = RalphLoop( - config=mining_config, - data_tensor=data_tensor, - returns=returns, - llm_provider=llm_provider, - library=library, - ) - result_library = loop.run(callback=_progress_callback) - except KeyboardInterrupt: - click.echo("\nMining interrupted by user.") - return - except Exception as e: - click.echo(f"Mining error: {e}") - logger.exception("Mining failed") - raise click.Abort() - - # Save results - lib_path = _save_result_library(result_library, output_dir) - - click.echo("=" * 60) - click.echo(f"Mining complete! Library size: {result_library.size}") - click.echo(f"Library saved to: {lib_path}") - click.echo("=" * 60) - - -# --------------------------------------------------------------------------- -# evaluate -# --------------------------------------------------------------------------- - -@main.command() -@click.argument("library_path", type=click.Path(exists=True)) -@click.option("--data", "data_path", type=click.Path(exists=True), default=None, help="Path to market data file.") -@click.option("--mock", is_flag=True, help="Use mock data for evaluation.") -@click.option("--period", type=click.Choice(["train", "test", "both"]), default="test", help="Evaluation period.") -@click.option("--top-k", type=int, default=None, help="Evaluate only the top-K factors by IC.") -@click.pass_context -def evaluate( - ctx: click.Context, - library_path: str, - data_path: str | None, - mock: bool, - period: str, - top_k: int | None, -) -> None: - """Evaluate a factor library on historical data.""" - cfg = ctx.obj["config"] - signal_failure_policy = cfg.evaluation.signal_failure_policy - - click.echo("=" * 60) - click.echo("FactorMiner -- Factor Evaluation") - click.echo("=" * 60) - - # Load library - library = _load_library_from_path(library_path) - - try: - dataset = _load_runtime_dataset_for_analysis(cfg, data_path, mock) - except Exception as e: - click.echo(f"Error loading data: {e}") - raise click.Abort() - - click.echo(f" Period: {period} | Backend: {cfg.evaluation.backend}") - click.echo( - f" Data: {len(dataset.asset_ids)} assets x {len(dataset.timestamps)} periods" - ) - - artifacts = _recompute_analysis_artifacts(library, dataset, signal_failure_policy) - failures = _report_artifact_failures(artifacts, header="Evaluation warnings") - - from factorminer.evaluation.runtime import analysis_split_names, select_top_k - - split_names = analysis_split_names(period) - selection_split = "train" if period == "both" else split_names[0] - selected = select_top_k(artifacts, selection_split, top_k) - if not selected: - click.echo("No factors successfully recomputed for evaluation.") - if signal_failure_policy == "reject" and failures: - raise click.Abort() - raise click.Abort() - - if top_k is not None and top_k < len([a for a in artifacts if a.succeeded]): - if period == "both": - click.echo(f" Evaluating top {top_k} factors by train |IC| for train/test comparison") - else: - click.echo(f" Evaluating top {top_k} factors by {selection_split} |IC|") - - for split_name in split_names: - click.echo("-" * 60) - click.echo(f"Split: {split_name}") - _print_recomputed_factor_table(selected, split_name) - _print_split_summary(selected, split_name) - - if period == "both" and selected: - click.echo("-" * 60) - click.echo("Decay summary (train -> test)") - click.echo(f"{'ID':>4s} {'Name':<35s} {'Train |IC|':>10s} {'Test |IC|':>9s} {'Delta':>8s}") - click.echo("-" * 80) - for artifact in selected: - train_ic = artifact.split_stats["train"]["ic_abs_mean"] - test_ic = artifact.split_stats["test"]["ic_abs_mean"] - click.echo( - f"{artifact.factor_id:4d} {artifact.name:<35s} " - f"{train_ic:10.4f} {test_ic:9.4f} {test_ic - train_ic:8.4f}" - ) - - click.echo("=" * 60) - - -# --------------------------------------------------------------------------- -# combine -# --------------------------------------------------------------------------- - -@main.command() -@click.argument("library_path", type=click.Path(exists=True)) -@click.option("--data", "data_path", type=click.Path(exists=True), default=None, help="Path to market data file.") -@click.option("--mock", is_flag=True, help="Use mock data for combination.") -@click.option( - "--fit-period", - type=click.Choice(["train", "test", "both"]), - default="train", - help="Split used for top-k selection and model/weight fitting.", -) -@click.option( - "--eval-period", - type=click.Choice(["train", "test", "both"]), - default="test", - help="Split used to evaluate the combined signal.", -) -@click.option( - "--method", "-m", - type=click.Choice(["equal-weight", "ic-weighted", "orthogonal", "all"]), - default="all", - help="Factor combination method.", -) -@click.option( - "--selection", "-s", - type=click.Choice(["lasso", "stepwise", "xgboost", "none"]), - default="none", - help="Factor selection method to run before combination.", -) -@click.option("--top-k", type=int, default=None, help="Select top-K factors before combining.") -@click.pass_context -def combine( - ctx: click.Context, - library_path: str, - data_path: str | None, - mock: bool, - fit_period: str, - eval_period: str, - method: str, - selection: str, - top_k: int | None, -) -> None: - """Run factor combination and selection methods.""" - cfg = ctx.obj["config"] - output_dir = ctx.obj["output_dir"] - - click.echo("=" * 60) - click.echo("FactorMiner -- Factor Combination") - click.echo("=" * 60) - - # Load library - library = _load_library_from_path(library_path) - - from factorminer.evaluation.runtime import ( - resolve_split_for_fit_eval, - select_top_k, - ) - - try: - dataset = _load_runtime_dataset_for_analysis(cfg, data_path, mock) - except Exception as e: - click.echo(f"Error loading data: {e}") - raise click.Abort() - - artifacts = _recompute_analysis_artifacts( - library, - dataset, - cfg.evaluation.signal_failure_policy, - ) - failures = _report_artifact_failures(artifacts, header="Combination warnings") - - fit_split = resolve_split_for_fit_eval(fit_period) - eval_split = resolve_split_for_fit_eval(eval_period) - - selected_artifacts = select_top_k(artifacts, fit_split, top_k) - if not selected_artifacts: - click.echo("No factors successfully recomputed for combination.") - if cfg.evaluation.signal_failure_policy == "reject" and failures: - raise click.Abort() - raise click.Abort() - - if top_k is not None and top_k < len([a for a in artifacts if a.succeeded]): - click.echo(f" Pre-selected top {len(selected_artifacts)} factors by {fit_split} |IC|") - - click.echo(f" Fit split: {fit_split}") - click.echo(f" Eval split: {eval_split}") - click.echo(f" Combining {len(selected_artifacts)} factors") - click.echo("-" * 60) - - # Run selection if requested - selected_ids = [artifact.factor_id for artifact in selected_artifacts] - fit_returns_tn = dataset.get_split(fit_split).returns.T - fit_factor_signals = { - artifact.factor_id: artifact.split_signals[fit_split].T - for artifact in selected_artifacts - } - - if selection != "none": - click.echo(f"\n Running {selection} selection...") - from factorminer.evaluation.selection import FactorSelector - - selector = FactorSelector() - - try: - if selection == "lasso": - results = selector.lasso_selection(fit_factor_signals, fit_returns_tn) - elif selection == "stepwise": - results = selector.forward_stepwise(fit_factor_signals, fit_returns_tn) - elif selection == "xgboost": - results = selector.xgboost_selection(fit_factor_signals, fit_returns_tn) - else: - results = [] - - if results: - selected_ids = [factor_id for factor_id, _ in results] - click.echo(f"\n {selection.capitalize()} selection results:") - click.echo(f" {'Factor ID':>10s} {'Score':>10s}") - click.echo(" " + "-" * 25) - for fid, score in results[:20]: # Show top 20 - click.echo(f" {fid:10d} {score:10.4f}") - click.echo(f" Total selected: {len(selected_ids)}") - else: - click.echo(f" {selection} selection returned no factors.") - except ImportError as e: - click.echo(f" Selection method '{selection}' requires additional packages: {e}") - except Exception as e: - click.echo(f" Selection error: {e}") - logger.exception("Selection failed") - - # Run combination methods - from factorminer.evaluation.combination import FactorCombiner - from factorminer.evaluation.portfolio import PortfolioBacktester - - combiner = FactorCombiner() - backtester = PortfolioBacktester() - artifact_map = _artifact_map_by_id(selected_artifacts) - eval_factor_signals = { - factor_id: artifact_map[factor_id].split_signals[eval_split].T - for factor_id in selected_ids - if factor_id in artifact_map - } - ic_values = { - factor_id: artifact_map[factor_id].split_stats[fit_split]["ic_mean"] - for factor_id in eval_factor_signals - } - eval_returns_tn = dataset.get_split(eval_split).returns.T - - methods_to_run = [] - if method == "all": - methods_to_run = ["equal-weight", "ic-weighted", "orthogonal"] - else: - methods_to_run = [method] - - for m in methods_to_run: - click.echo(f"\n {m.upper()} combination:") - try: - if m == "equal-weight": - composite = combiner.equal_weight(eval_factor_signals) - elif m == "ic-weighted": - composite = combiner.ic_weighted(eval_factor_signals, ic_values) - elif m == "orthogonal": - composite = combiner.orthogonal(eval_factor_signals) - else: - continue - - stats = backtester.quintile_backtest(composite, eval_returns_tn) - click.echo(f" IC Mean: {stats['ic_mean']:.4f}") - click.echo(f" ICIR: {stats['icir']:.4f}") - click.echo(f" Long-Short: {stats['ls_return']:.4f}") - click.echo(f" Monotonicity: {stats['monotonicity']:.2f}") - click.echo(f" Avg Turnover: {stats['avg_turnover']:.4f}") - except Exception as e: - click.echo(f" Error: {e}") - logger.exception("Combination method %s failed", m) - - if cfg.research.enabled and cfg.benchmark.mode == "research": - click.echo("\n Research model suite:") - try: - from factorminer.evaluation.research import run_research_model_suite - - research_reports = run_research_model_suite( - eval_factor_signals, - eval_returns_tn, - cfg.research, - ) - research_path = output_dir / "research_model_suite.json" - research_path.write_text(json.dumps(research_reports, indent=2)) - for model_name, report in research_reports.items(): - if not report.get("available", True): - click.echo(f" {model_name}: unavailable ({report.get('error', 'unknown error')})") - continue - click.echo( - f" {model_name}: " - f"net IR={report.get('mean_test_net_ir', 0.0):.4f}, " - f"ICIR={report.get('mean_test_icir', 0.0):.4f}, " - f"stability={report.get('selection_stability', 0.0):.3f}" - ) - click.echo(f" Saved: {research_path}") - except Exception as e: - click.echo(f" Research suite error: {e}") - logger.exception("Research model suite failed") - - click.echo("\n" + "=" * 60) - - -# --------------------------------------------------------------------------- -# visualize -# --------------------------------------------------------------------------- - -@main.command() -@click.argument("library_path", type=click.Path(exists=True)) -@click.option("--data", "data_path", type=click.Path(exists=True), default=None, help="Path to market data file.") -@click.option("--mock", is_flag=True, help="Use mock data for visualization.") -@click.option("--period", type=click.Choice(["train", "test", "both"]), default="test", help="Evaluation split to visualize.") -@click.option("--factor-id", "factor_ids", type=int, multiple=True, help="Specific factor ID(s) to visualize.") -@click.option("--top-k", type=int, default=None, help="Top-K factors by split |IC| for set-level plots.") -@click.option("--tearsheet", is_flag=True, help="Generate a full factor tear sheet.") -@click.option("--correlation", is_flag=True, help="Plot factor correlation heatmap.") -@click.option("--ic-timeseries", is_flag=True, help="Plot IC time series.") -@click.option("--quintile", is_flag=True, help="Plot quintile returns.") -@click.option("--format", "fmt", type=click.Choice(["png", "pdf", "svg"]), default="png", help="Output format.") -@click.pass_context -def visualize( - ctx: click.Context, - library_path: str, - data_path: str | None, - mock: bool, - period: str, - factor_ids: tuple[int, ...], - top_k: int | None, - tearsheet: bool, - correlation: bool, - ic_timeseries: bool, - quintile: bool, - fmt: str, -) -> None: - """Generate plots and tear sheets for a factor library.""" - output_dir = ctx.obj["output_dir"] - cfg = ctx.obj["config"] - - click.echo("=" * 60) - click.echo("FactorMiner -- Visualization") - click.echo("=" * 60) - - # Load library - library = _load_library_from_path(library_path) - - # Determine what to plot - plot_all = not (tearsheet or correlation or ic_timeseries or quintile) - if plot_all: - click.echo("No specific plots requested; generating all available.") - correlation = True - ic_timeseries = True - quintile = True - - output_dir.mkdir(parents=True, exist_ok=True) - click.echo(f" Output format: {fmt}") - click.echo(f" Output dir: {output_dir}") - click.echo(f" Period: {period}") - click.echo("-" * 60) - - try: - dataset = _load_runtime_dataset_for_analysis(cfg, data_path, mock) - except Exception as e: - click.echo(f"Error loading data: {e}") - raise click.Abort() - - artifacts = _recompute_analysis_artifacts( - library, - dataset, - cfg.evaluation.signal_failure_policy, - ) - failures = _report_artifact_failures(artifacts, header="Visualization warnings") - - from factorminer.evaluation.runtime import ( - analysis_split_names, - compute_correlation_matrix, - select_top_k, - ) - from factorminer.utils.tearsheet import FactorTearSheet - from factorminer.utils.visualization import ( - plot_correlation_heatmap, - plot_ic_timeseries, - plot_quintile_returns, - ) - - split_names = analysis_split_names(period) - explicit_artifacts = _select_artifacts_for_ids(artifacts, factor_ids) - if not explicit_artifacts and factor_ids: - if cfg.evaluation.signal_failure_policy == "reject" and failures: - raise click.Abort() - raise click.Abort() - - for split_name in split_names: - split = dataset.get_split(split_name) - click.echo(f" Split: {split_name}") - - if correlation: - if factor_ids: - corr_artifacts = explicit_artifacts - else: - corr_artifacts = select_top_k(artifacts, split_name, top_k) - - if corr_artifacts: - click.echo(" Generating correlation heatmap...") - corr_matrix = compute_correlation_matrix(corr_artifacts, split_name) - save_path = _analysis_output_path(output_dir, "correlation_heatmap", split_name, fmt) - plot_correlation_heatmap( - corr_matrix, - [artifact.name[:20] for artifact in corr_artifacts], - title=f"Factor Correlation Heatmap ({split_name})", - save_path=save_path, - ) - click.echo(f" Saved: {save_path}") - else: - click.echo(" Skipped: no successfully recomputed factors for correlation heatmap.") - - factor_artifacts = explicit_artifacts - if not factor_ids and (ic_timeseries or quintile or tearsheet): - factor_artifacts = select_top_k(artifacts, split_name, 1) - if factor_artifacts: - click.echo( - f" Defaulted to factor #{factor_artifacts[0].factor_id} " - f"{factor_artifacts[0].name} for factor-specific plots." - ) - - if ic_timeseries: - click.echo(" Generating IC time series plot(s)...") - for artifact in factor_artifacts: - stats = artifact.split_stats[split_name] - dates = [str(ts)[:10] for ts in split.timestamps] - save_path = _analysis_output_path( - output_dir, - f"ic_timeseries_factor_{artifact.factor_id}", - split_name, - fmt, - ) - plot_ic_timeseries( - stats["ic_series"], - dates, - title=f"{artifact.name} IC Time Series ({split_name})", - save_path=save_path, - ) - click.echo(f" Saved: {save_path}") - - if quintile: - click.echo(" Generating quintile return plot(s)...") - for artifact in factor_artifacts: - stats = artifact.split_stats[split_name] - save_path = _analysis_output_path( - output_dir, - f"quintile_returns_factor_{artifact.factor_id}", - split_name, - fmt, - ) - plot_quintile_returns( - { - f"Q{i}": stats[f"Q{i}"] for i in range(1, 6) - } - | { - "long_short": stats["long_short"], - "monotonicity": stats["monotonicity"], - }, - title=f"{artifact.name} Quintile Returns ({split_name})", - save_path=save_path, - ) - click.echo(f" Saved: {save_path}") - - if tearsheet: - click.echo(" Generating tear sheet(s)...") - ts = FactorTearSheet() - dates = [str(ts_)[:10] for ts_ in split.timestamps] - for artifact in factor_artifacts: - save_path = _analysis_output_path( - output_dir, - f"tearsheet_factor_{artifact.factor_id}", - split_name, - fmt, - ) - ts.generate( - factor_id=artifact.factor_id, - factor_name=artifact.name, - formula=artifact.formula, - signals=artifact.split_signals[split_name], - returns=split.returns, - dates=dates, - save_path=save_path, - ) - click.echo(f" Saved: {save_path}") - - click.echo("=" * 60) - click.echo("Visualization complete.") - - -# --------------------------------------------------------------------------- -# export -# --------------------------------------------------------------------------- - -@main.command(name="export") -@click.argument("library_path", type=click.Path(exists=True)) -@click.option( - "--format", "fmt", - type=click.Choice(["json", "csv", "formulas"]), - default="json", - help="Export format.", -) -@click.option("--output", "-o", type=click.Path(), default=None, help="Output file path.") -@click.pass_context -def export_cmd(ctx: click.Context, library_path: str, fmt: str, output: str | None) -> None: - """Export a factor library to various formats.""" - output_dir = ctx.obj["output_dir"] - - click.echo("=" * 60) - click.echo("FactorMiner -- Export") - click.echo("=" * 60) - - # Load library - library = _load_library_from_path(library_path) - - # Determine output path - if output is None: - output_dir.mkdir(parents=True, exist_ok=True) - if fmt == "formulas": - output = str(output_dir / "library_formulas.txt") - else: - output = str(output_dir / f"library.{fmt}") - - click.echo(f" Format: {fmt}") - click.echo(f" Output: {output}") - click.echo("-" * 60) - - try: - from factorminer.core.library_io import export_csv, export_formulas, save_library - - if fmt == "json": - # save_library expects base path without extension - out_path = Path(output) - if out_path.suffix == ".json": - base = out_path.with_suffix("") - else: - base = out_path - save_library(library, base, save_signals=False) - click.echo(f" Exported {library.size} factors to {base}.json") - - elif fmt == "csv": - export_csv(library, output) - click.echo(f" Exported {library.size} factors to {output}") - - elif fmt == "formulas": - export_formulas(library, output) - click.echo(f" Exported {library.size} formulas to {output}") - - except Exception as e: - click.echo(f"Export error: {e}") - logger.exception("Export failed") - raise click.Abort() - - click.echo("=" * 60) - - -# --------------------------------------------------------------------------- -# benchmark -# --------------------------------------------------------------------------- - -@main.group() -def benchmark() -> None: - """Run strict paper/research benchmark workflows.""" - - -def _benchmark_common_options(fn): - fn = click.option( - "--data", - "data_path", - type=click.Path(exists=True), - default=None, - help="Path to market data file.", - )(fn) - fn = click.option( - "--mock", - is_flag=True, - help="Use mock data for benchmark execution.", - )(fn) - fn = click.option( - "--factor-miner-library", - type=click.Path(exists=True), - default=None, - help="Optional saved library for the FactorMiner baseline.", - )(fn) - fn = click.option( - "--factor-miner-no-memory-library", - type=click.Path(exists=True), - default=None, - help="Optional saved library for the FactorMiner No Memory baseline.", - )(fn) - return click.pass_context(fn) - - -@benchmark.command("table1") -@click.option("--baseline", "baselines", multiple=True, help="Restrict to one or more baseline ids.") -@_benchmark_common_options -def benchmark_table1( - ctx: click.Context, - data_path: str | None, - mock: bool, - factor_miner_library: str | None, - factor_miner_no_memory_library: str | None, - baselines: tuple[str, ...], -) -> None: - """Run the Top-K freeze benchmark across configured universes.""" - from factorminer.benchmark.runtime import run_table1_benchmark - - cfg = ctx.obj["config"] - output_dir = ctx.obj["output_dir"] - payload = run_table1_benchmark( - cfg, - output_dir, - data_path=data_path, - mock=mock, - baseline_names=list(baselines) if baselines else None, - factor_miner_library_path=factor_miner_library, - factor_miner_no_memory_library_path=factor_miner_no_memory_library, - ) - _print_benchmark_summary("FactorMiner -- Benchmark Table 1", payload) - - -@benchmark.command("ablation-memory") -@_benchmark_common_options -def benchmark_ablation_memory( - ctx: click.Context, - data_path: str | None, - mock: bool, - factor_miner_library: str | None, - factor_miner_no_memory_library: str | None, -) -> None: - """Run the experience-memory ablation benchmark.""" - from factorminer.benchmark.runtime import run_ablation_memory_benchmark - - cfg = ctx.obj["config"] - output_dir = ctx.obj["output_dir"] - payload = run_ablation_memory_benchmark( - cfg, - output_dir, - data_path=data_path, - mock=mock, - factor_miner_library_path=factor_miner_library, - factor_miner_no_memory_library_path=factor_miner_no_memory_library, - ) - _print_benchmark_summary("FactorMiner -- Memory Ablation", payload) - - -@benchmark.command("cost-pressure") -@click.option("--baseline", default="factor_miner", help="Baseline id to evaluate.") -@_benchmark_common_options -def benchmark_cost_pressure( - ctx: click.Context, - data_path: str | None, - mock: bool, - factor_miner_library: str | None, - factor_miner_no_memory_library: str | None, - baseline: str, -) -> None: - """Run transaction-cost pressure testing.""" - from factorminer.benchmark.runtime import run_cost_pressure_benchmark - - cfg = ctx.obj["config"] - output_dir = ctx.obj["output_dir"] - payload = run_cost_pressure_benchmark( - cfg, - output_dir, - baseline=baseline, - data_path=data_path, - mock=mock, - factor_miner_library_path=factor_miner_library, - ) - _print_benchmark_summary("FactorMiner -- Cost Pressure", payload) - - -@benchmark.command("efficiency") -@click.pass_context -def benchmark_efficiency(ctx: click.Context) -> None: - """Run operator-level and factor-level efficiency benchmarks.""" - from factorminer.benchmark.runtime import run_efficiency_benchmark - - cfg = ctx.obj["config"] - output_dir = ctx.obj["output_dir"] - payload = run_efficiency_benchmark(cfg, output_dir) - _print_benchmark_summary("FactorMiner -- Efficiency Benchmark", payload) - - -@benchmark.command("suite") -@_benchmark_common_options -def benchmark_suite( - ctx: click.Context, - data_path: str | None, - mock: bool, - factor_miner_library: str | None, - factor_miner_no_memory_library: str | None, -) -> None: - """Run the full benchmark suite.""" - from factorminer.benchmark.runtime import run_benchmark_suite - - cfg = ctx.obj["config"] - output_dir = ctx.obj["output_dir"] - payload = run_benchmark_suite( - cfg, - output_dir, - data_path=data_path, - mock=mock, - factor_miner_library_path=factor_miner_library, - factor_miner_no_memory_library_path=factor_miner_no_memory_library, - ) - _print_benchmark_summary("FactorMiner -- Benchmark Suite", payload) - - -# --------------------------------------------------------------------------- -# helix -# --------------------------------------------------------------------------- - -@main.command() -@click.option("--iterations", "-n", type=int, default=None, help="Override max_iterations.") -@click.option("--batch-size", "-b", type=int, default=None, help="Override batch_size.") -@click.option("--target", "-t", type=int, default=None, help="Override target_library_size.") -@click.option("--resume", type=click.Path(exists=True), default=None, help="Resume from a saved library.") -@click.option("--causal/--no-causal", default=None, help="Enable/disable causal validation.") -@click.option("--regime/--no-regime", default=None, help="Enable/disable regime-conditional evaluation.") -@click.option("--debate/--no-debate", default=None, help="Enable/disable multi-specialist debate generation.") -@click.option("--canonicalize/--no-canonicalize", default=None, help="Enable/disable SymPy canonicalization.") -@click.option("--mock", is_flag=True, help="Use mock data and mock LLM provider (for testing).") -@click.option("--data", "data_path", type=click.Path(exists=True), default=None, help="Path to market data file.") -@click.pass_context -def helix( - ctx: click.Context, - iterations: int | None, - batch_size: int | None, - target: int | None, - resume: str | None, - causal: bool | None, - regime: bool | None, - debate: bool | None, - canonicalize: bool | None, - mock: bool, - data_path: str | None, -) -> None: - """Run the enhanced Helix Loop with Phase 2 features.""" - cfg = ctx.obj["config"] - - if iterations is not None: - cfg.mining.max_iterations = iterations - if batch_size is not None: - cfg.mining.batch_size = batch_size - if target is not None: - cfg.mining.target_library_size = target - - if causal is not None: - cfg.phase2.causal.enabled = causal - if regime is not None: - cfg.phase2.regime.enabled = regime - if debate is not None: - cfg.phase2.debate.enabled = debate - if canonicalize is not None: - if canonicalize: - cfg.phase2.helix.enabled = True - cfg.phase2.helix.enable_canonicalization = canonicalize - - try: - cfg.validate() - except ValueError as e: - click.echo(f"Configuration error: {e}") - raise click.Abort() - - output_dir = ctx.obj["output_dir"] - enabled_features = _active_phase2_features(cfg) - - click.echo("HelixFactor Phase 2 mining engine.") - click.echo(f" Target: {cfg.mining.target_library_size} | " - f"Batch: {cfg.mining.batch_size} | " - f"Max iterations: {cfg.mining.max_iterations}") - click.echo(f" Output directory: {output_dir}") - - if enabled_features: - click.echo(f" Active Phase 2 features: {', '.join(enabled_features)}") - else: - click.echo(" No Phase 2 features enabled. Configure phase2.* in your config to enable features.") - - if resume: - click.echo(f" Resuming from: {resume}") - - try: - dataset = _load_runtime_dataset_for_analysis(cfg, data_path, mock) - except Exception as e: - click.echo(f"Error loading data: {e}") - raise click.Abort() - - click.echo(" Preparing data tensors...") - data_tensor = dataset.data_tensor - returns = dataset.returns - llm_provider = _create_llm_provider(cfg, mock) - - library = None - if resume: - library = _load_library_from_path(resume) - - mining_config = _build_core_mining_config(cfg, output_dir, mock=mock) - _attach_runtime_targets(mining_config, dataset) - phase2_configs = _build_phase2_runtime_configs(cfg) - volume = _extract_capacity_volume(data_tensor) if cfg.phase2.capacity.enabled else None - - from factorminer.core.helix_loop import HelixLoop - - click.echo("-" * 60) - click.echo("Starting Helix Loop...") - - def _progress_callback(iteration: int, stats: dict) -> None: - message = ( - f" Iteration {iteration:3d}: " - f"Library={stats.get('library_size', 0)}, " - f"Admitted={stats.get('admitted', 0)}, " - f"Yield={stats.get('yield_rate', 0) * 100:.1f}%" - ) - canon_removed = stats.get("canonical_duplicates_removed", 0) - phase2_rejections = stats.get("phase2_rejections", 0) - if canon_removed: - message += f", CanonDupes={canon_removed}" - if phase2_rejections: - message += f", Phase2Reject={phase2_rejections}" - click.echo(message) - - try: - loop = HelixLoop( - config=mining_config, - data_tensor=data_tensor, - returns=returns, - llm_provider=llm_provider, - library=library, - debate_config=phase2_configs["debate_config"], - enable_knowledge_graph=( - cfg.phase2.helix.enabled and cfg.phase2.helix.enable_knowledge_graph - ), - enable_embeddings=( - cfg.phase2.helix.enabled and cfg.phase2.helix.enable_embeddings - ), - enable_auto_inventor=cfg.phase2.auto_inventor.enabled, - auto_invention_interval=cfg.phase2.auto_inventor.invention_interval, - canonicalize=( - cfg.phase2.helix.enabled and cfg.phase2.helix.enable_canonicalization - ), - forgetting_lambda=cfg.phase2.helix.forgetting_lambda, - causal_config=phase2_configs["causal_config"], - regime_config=phase2_configs["regime_config"], - capacity_config=phase2_configs["capacity_config"], - significance_config=phase2_configs["significance_config"], - volume=volume, - ) - result_library = loop.run(callback=_progress_callback) - except KeyboardInterrupt: - click.echo("\nHelix mining interrupted by user.") - return - except Exception as e: - click.echo(f"Helix mining error: {e}") - logger.exception("Helix loop failed") - raise click.Abort() - - lib_path = _save_result_library(result_library, output_dir) - - click.echo("=" * 60) - click.echo(f"Helix mining complete! Library size: {result_library.size}") - click.echo(f"Library saved to: {lib_path}") - click.echo("=" * 60) - - -if __name__ == "__main__": - main() diff --git a/src/factorminer/factorminer/configs/__init__.py b/src/factorminer/factorminer/configs/__init__.py deleted file mode 100644 index 5d1a84e..0000000 --- a/src/factorminer/factorminer/configs/__init__.py +++ /dev/null @@ -1,25 +0,0 @@ -"""Configuration defaults and schemas for FactorMiner.""" - -from pathlib import Path -from typing import Any, Dict - -import yaml - -CONFIGS_DIR = Path(__file__).parent -DEFAULT_CONFIG_PATH = CONFIGS_DIR / "default.yaml" - - -def load_default_yaml() -> Dict[str, Any]: - """Load the default YAML configuration shipped with the package. - - Returns - ------- - dict - Parsed YAML contents as a nested dictionary. Returns an empty - dict if the default file is missing or empty. - """ - if not DEFAULT_CONFIG_PATH.exists(): - return {} - with open(DEFAULT_CONFIG_PATH) as f: - data = yaml.safe_load(f) - return data if isinstance(data, dict) else {} diff --git a/src/factorminer/factorminer/configs/benchmark_full.yaml b/src/factorminer/factorminer/configs/benchmark_full.yaml deleted file mode 100644 index d5be420..0000000 --- a/src/factorminer/factorminer/configs/benchmark_full.yaml +++ /dev/null @@ -1,45 +0,0 @@ -output_dir: "./output/benchmark_full" - -evaluation: - backend: "numpy" - signal_failure_policy: "reject" - -benchmark: - mode: "paper" - baselines: - - "alpha101_classic" - - "alpha101_adapted" - - "random_exploration" - - "gplearn" - - "alphaforge_style" - - "alphaagent_style" - - "factor_miner" - - "factor_miner_no_memory" - -data: - targets: - - name: "paper" - entry_delay_bars: 1 - holding_bars: 1 - price_pair: "open_to_close" - return_transform: "simple" - default_target: "paper" - -research: - enabled: false - -phase2: - causal: - enabled: false - regime: - enabled: false - capacity: - enabled: false - significance: - enabled: false - debate: - enabled: false - auto_inventor: - enabled: false - helix: - enabled: false diff --git a/src/factorminer/factorminer/configs/default.yaml b/src/factorminer/factorminer/configs/default.yaml deleted file mode 100644 index 7e92c85..0000000 --- a/src/factorminer/factorminer/configs/default.yaml +++ /dev/null @@ -1,307 +0,0 @@ -# ============================================================================= -# FactorMiner Default Configuration -# ============================================================================= -# This file provides sensible defaults for the FactorMiner mining pipeline. -# Override any setting by providing a user config file via --config flag, -# or by passing CLI options directly. -# -# Resolution order: defaults -> user config -> CLI overrides -# ============================================================================= - -# --------------------------------------------------------------------------- -# Data paths -# --------------------------------------------------------------------------- -# Path to market data CSV/Parquet/HDF5 file. Set to null to use mock data. -data_path: null - -# Directory for all output artifacts (libraries, plots, logs). -output_dir: "./output" - -# --------------------------------------------------------------------------- -# Mining parameters (Ralph Loop) -# --------------------------------------------------------------------------- -mining: - # Target number of factors in the final library (paper: 110) - target_library_size: 110 - - # Number of candidate factors to generate per iteration (paper: 40) - batch_size: 40 - - # Maximum number of Ralph Loop iterations before stopping - max_iterations: 200 - - # Minimum absolute IC for a factor to pass Stage 1 screening (paper: 0.04) - ic_threshold: 0.04 - - # Minimum ICIR for factor quality filtering - icir_threshold: 0.5 - - # Maximum pairwise |rho| allowed between library factors (paper: 0.5) - correlation_threshold: 0.5 - - # Minimum IC required for a candidate to replace an existing factor - replacement_ic_min: 0.10 - - # Replacement IC ratio: candidate IC must be >= ratio * existing IC - replacement_ic_ratio: 1.3 - -# --------------------------------------------------------------------------- -# Evaluation backend -# --------------------------------------------------------------------------- -evaluation: - # Number of parallel workers for factor evaluation - num_workers: 40 - - # Number of assets used in fast IC screening (Stage 1) - fast_screen_assets: 100 - - # GPU device identifier (used when backend is "gpu") - gpu_device: "cuda:0" - - # Computation backend: "gpu" (fastest), "numpy" (CPU), or "c" (C extension) - backend: "gpu" - - # How to handle factor expression failures during evaluation/mining: - # "reject" = fail the factor/command, "synthetic" = deterministic fallback, - # "raise" = propagate the raw exception. - signal_failure_policy: "reject" - -# --------------------------------------------------------------------------- -# Data loading -# --------------------------------------------------------------------------- -data: - # Market type: "a_shares", "crypto", etc. - market: "a_shares" - - # Asset universe filter: "CSI500", "CSI1000", "HS300", "Binance" - universe: "CSI500" - - # Bar frequency: "10min", "30min", "1h", "1d" - frequency: "10min" - - # Feature columns available for factor construction - features: - - "$open" - - "$high" - - "$low" - - "$close" - - "$volume" - - "$amt" - - "$vwap" - - "$returns" - - # Training period [start, end] in ISO format - train_period: ["2024-01-01", "2024-12-31"] - - # Out-of-sample test period [start, end] in ISO format - test_period: ["2025-01-01", "2025-12-31"] - - # Named target definitions. The default target drives benchmark-facing metrics; - # research mode can score all configured targets jointly. - targets: - - name: "paper" - entry_delay_bars: 1 - holding_bars: 1 - price_pair: "open_to_close" - return_transform: "simple" - - # Target used for paper-style scalar evaluation surfaces. - default_target: "paper" - -# --------------------------------------------------------------------------- -# LLM provider settings -# --------------------------------------------------------------------------- -llm: - # Provider name: "openai", "anthropic", "google", or "mock" - provider: "google" - - # Model identifier (provider-specific) - # OpenAI: "gpt-4o", "gpt-4", etc. - # Anthropic: "claude-sonnet-4-6", "claude-opus-4-6", etc. - # Google: "gemini-2.0-flash", etc. - model: "gemini-2.0-flash" - - # API key for the chosen provider. - # Can also be set via environment variables: - # OPENAI_API_KEY, ANTHROPIC_API_KEY, GOOGLE_API_KEY - api_key: null - - # Sampling temperature for factor generation (higher = more creative) - temperature: 0.8 - - # Maximum tokens in LLM response - max_tokens: 4096 - - # Number of candidate factors requested per LLM call - batch_candidates: 40 - -# --------------------------------------------------------------------------- -# Experience memory system -# --------------------------------------------------------------------------- -memory: - # Maximum number of success patterns to retain in memory - max_success_patterns: 50 - - # Maximum number of failure patterns to retain in memory - max_failure_patterns: 100 - - # Maximum number of distilled insights in memory - max_insights: 30 - - # How often (in iterations) to consolidate memory - consolidation_interval: 10 - -# --------------------------------------------------------------------------- -# Benchmarking and reporting -# --------------------------------------------------------------------------- -benchmark: - # Execution lane: "paper" for strict reproduction, "research" for Helix extensions - mode: "paper" - - # Global RNG seed for deterministic benchmark selection and synthesis - seed: 42 - - # Number of frozen factors selected on the freeze universe/train split - freeze_top_k: 40 - - # Universe used for Top-K freeze selection - freeze_universe: "CSI500" - - # Universes included in benchmark reporting - report_universes: - - "CSI500" - - "CSI1000" - - "HS300" - - "Binance" - - # Baselines enabled in the benchmark suite - baselines: - - "alpha101_classic" - - "alpha101_adapted" - - "random_exploration" - - "gplearn" - - "alphaforge_style" - - "alphaagent_style" - - "factor_miner" - - "factor_miner_no_memory" - - # Transaction-cost pressure settings in basis points - cost_bps: [1.0, 4.0, 7.0, 10.0, 11.0] - - # Reference panel shape [periods, assets] used for efficiency benchmarks - efficiency_panel_shape: [12610, 500] - -# --------------------------------------------------------------------------- -# Research-first multi-horizon scoring -# --------------------------------------------------------------------------- -research: - enabled: false - - # single_horizon | weighted_multi_horizon | pareto_multi_horizon | net_ir - primary_objective: "weighted_multi_horizon" - - # weighted | pareto - target_aggregation: "weighted" - - # Optional explicit weights per target name. Empty => infer from data.targets order. - horizon_weights: {} - - uncertainty: - bootstrap_samples: 200 - block_size: 20 - shrinkage_strength: 1.0 - lcb_zscore: 1.0 - fdr_alpha: 0.05 - - admission: - use_residual_ic: true - use_effective_rank_gain: true - turnover_penalty: 0.05 - redundancy_penalty: 0.20 - min_score: 0.04 - min_lcb: 0.0 - min_span_gain: 0.05 - min_effective_rank_gain: 0.0 - - selection: - models: ["ridge", "elastic_net", "lasso", "xgboost"] - rolling_train_window: 80 - rolling_test_window: 20 - rolling_step: 20 - - regimes: - enabled: false - definition: "return_volatility_liquidity" - - execution: - cost_model: "linear_bps" - cost_bps: 4.0 - -# --------------------------------------------------------------------------- -# Phase 2 advanced features (disabled by default) -# --------------------------------------------------------------------------- -phase2: - # Causal validation: Granger causality + intervention tests - causal: - enabled: false - granger_max_lag: 5 - granger_significance: 0.05 - n_interventions: 3 - intervention_magnitude: 2.0 - intervention_ic_threshold: 0.5 - robustness_threshold: 0.4 - granger_weight: 0.4 - intervention_weight: 0.6 - - # Regime-conditional evaluation (bull/bear/high-vol/low-vol) - regime: - enabled: false - lookback_window: 60 - bull_return_threshold: 0.0 - bear_return_threshold: 0.0 - volatility_percentile: 0.7 - min_regime_ic: 0.03 - min_regimes_passing: 2 - - # Strategy capacity estimation - capacity: - enabled: false - base_capital_usd: 100000000.0 - ic_degradation_limit: 0.20 - net_icir_threshold: 0.3 - sigma_annual: 0.25 - - # Statistical significance testing (bootstrap + FDR) - significance: - enabled: false - bootstrap_n_samples: 1000 - bootstrap_block_size: 20 - fdr_level: 0.05 - deflated_sharpe_enabled: true - min_deflated_sharpe: 0.0 - - # Multi-specialist debate-based generation - debate: - enabled: false - num_specialists: 3 - candidates_per_specialist: 15 - enable_critic: true - top_k_after_critic: 40 - critic_temperature: 0.3 - - # Automatic operator invention - auto_inventor: - enabled: false - invention_interval: 10 - max_proposals_per_round: 5 - min_ic_contribution: 0.03 - store_dir: "./output/custom_operators" - - # Helix knowledge and memory system - helix: - enabled: false - enable_knowledge_graph: false - enable_embeddings: false - enable_canonicalization: true - forgetting_lambda: 0.95 - forgetting_demotion_threshold: 20 diff --git a/src/factorminer/factorminer/configs/demo_local.yaml b/src/factorminer/factorminer/configs/demo_local.yaml deleted file mode 100644 index e54bce7..0000000 --- a/src/factorminer/factorminer/configs/demo_local.yaml +++ /dev/null @@ -1,57 +0,0 @@ -output_dir: "./output/demo_local" - -evaluation: - backend: "numpy" - signal_failure_policy: "synthetic" - num_workers: 4 - fast_screen_assets: 20 - -mining: - batch_size: 8 - max_iterations: 4 - target_library_size: 16 - -benchmark: - mode: "research" - freeze_top_k: 10 - report_universes: - - "CSI500" - -data: - targets: - - name: "h1_open_to_close" - entry_delay_bars: 1 - holding_bars: 1 - price_pair: "open_to_close" - return_transform: "simple" - - name: "h3_open_to_close" - entry_delay_bars: 1 - holding_bars: 3 - price_pair: "open_to_close" - return_transform: "simple" - default_target: "h1_open_to_close" - -research: - enabled: true - horizon_weights: - h1_open_to_close: 0.7 - h3_open_to_close: 0.3 - uncertainty: - bootstrap_samples: 50 - block_size: 8 - selection: - rolling_train_window: 40 - rolling_test_window: 10 - rolling_step: 10 - regimes: - enabled: true - -phase2: - debate: - enabled: true - num_specialists: 2 - candidates_per_specialist: 4 - top_k_after_critic: 8 - helix: - enabled: true - enable_canonicalization: true diff --git a/src/factorminer/factorminer/configs/helix_research.yaml b/src/factorminer/factorminer/configs/helix_research.yaml deleted file mode 100644 index 3c32f71..0000000 --- a/src/factorminer/factorminer/configs/helix_research.yaml +++ /dev/null @@ -1,82 +0,0 @@ -output_dir: "./output/helix_research" - -benchmark: - mode: "research" - -data: - targets: - - name: "h1_open_to_close" - entry_delay_bars: 1 - holding_bars: 1 - price_pair: "open_to_close" - return_transform: "simple" - - name: "h3_open_to_close" - entry_delay_bars: 1 - holding_bars: 3 - price_pair: "open_to_close" - return_transform: "simple" - - name: "h6_open_to_close" - entry_delay_bars: 1 - holding_bars: 6 - price_pair: "open_to_close" - return_transform: "simple" - - name: "h1_close_to_close" - entry_delay_bars: 0 - holding_bars: 1 - price_pair: "close_to_close" - return_transform: "simple" - - name: "h5_close_to_close" - entry_delay_bars: 0 - holding_bars: 5 - price_pair: "close_to_close" - return_transform: "simple" - default_target: "h1_open_to_close" - -research: - enabled: true - primary_objective: "weighted_multi_horizon" - target_aggregation: "weighted" - horizon_weights: - h1_open_to_close: 0.35 - h3_open_to_close: 0.25 - h6_open_to_close: 0.15 - h1_close_to_close: 0.15 - h5_close_to_close: 0.10 - uncertainty: - bootstrap_samples: 200 - block_size: 20 - shrinkage_strength: 1.0 - lcb_zscore: 1.0 - admission: - use_residual_ic: true - use_effective_rank_gain: true - turnover_penalty: 0.05 - redundancy_penalty: 0.20 - min_score: 0.04 - min_lcb: 0.0 - min_span_gain: 0.05 - selection: - models: ["ridge", "elastic_net", "lasso", "xgboost"] - regimes: - enabled: true - definition: "return_volatility_liquidity" - execution: - cost_model: "linear_bps" - cost_bps: 4.0 - -phase2: - causal: - enabled: true - regime: - enabled: true - capacity: - enabled: true - significance: - enabled: true - debate: - enabled: true - helix: - enabled: true - enable_knowledge_graph: true - enable_embeddings: true - enable_canonicalization: true diff --git a/src/factorminer/factorminer/configs/paper_repro.yaml b/src/factorminer/factorminer/configs/paper_repro.yaml deleted file mode 100644 index a872c00..0000000 --- a/src/factorminer/factorminer/configs/paper_repro.yaml +++ /dev/null @@ -1,36 +0,0 @@ -output_dir: "./output/paper_repro" - -evaluation: - backend: "numpy" - signal_failure_policy: "reject" - -benchmark: - mode: "paper" - -data: - targets: - - name: "paper" - entry_delay_bars: 1 - holding_bars: 1 - price_pair: "open_to_close" - return_transform: "simple" - default_target: "paper" - -research: - enabled: false - -phase2: - causal: - enabled: false - regime: - enabled: false - capacity: - enabled: false - significance: - enabled: false - debate: - enabled: false - auto_inventor: - enabled: false - helix: - enabled: false diff --git a/src/factorminer/factorminer/core/__init__.py b/src/factorminer/factorminer/core/__init__.py deleted file mode 100644 index d02b8d2..0000000 --- a/src/factorminer/factorminer/core/__init__.py +++ /dev/null @@ -1,67 +0,0 @@ -"""FactorMiner core: expression trees, types, factor DSL parser, and Ralph Loop.""" - -from src.factorminer.factorminer.core.expression_tree import ( - ConstantNode, - ExpressionTree, - LeafNode, - Node, - OperatorNode, -) -from src.factorminer.factorminer.core.factor_library import Factor, FactorLibrary -from src.factorminer.factorminer.core.library_io import ( - export_csv, - export_formulas, - import_from_paper, - load_library, - save_library, -) -from src.factorminer.factorminer.core.parser import parse, try_parse -from src.factorminer.factorminer.core.ralph_loop import RalphLoop -from src.factorminer.factorminer.core.helix_loop import HelixLoop -from src.factorminer.factorminer.core.session import MiningSession -from src.factorminer.factorminer.core.config import MiningConfig as CoreMiningConfig -from src.factorminer.factorminer.core.types import ( - FEATURES, - FEATURE_SET, - OPERATOR_REGISTRY, - OperatorSpec, - OperatorType, - SignatureType, - get_operator, -) -from src.factorminer.factorminer.core.canonicalizer import FormulaCanonicalizer - -__all__ = [ - # Expression tree - "Node", - "LeafNode", - "ConstantNode", - "OperatorNode", - "ExpressionTree", - # Factor library - "Factor", - "FactorLibrary", - "save_library", - "load_library", - "export_csv", - "export_formulas", - "import_from_paper", - # Parser - "parse", - "try_parse", - # Loops - "RalphLoop", - "HelixLoop", - "MiningSession", - "CoreMiningConfig", - # Types - "OperatorSpec", - "OperatorType", - "SignatureType", - "FEATURES", - "FEATURE_SET", - "OPERATOR_REGISTRY", - "get_operator", - # Canonicalizer - "FormulaCanonicalizer", -] diff --git a/src/factorminer/factorminer/core/canonicalizer.py b/src/factorminer/factorminer/core/canonicalizer.py deleted file mode 100644 index 0ce3889..0000000 --- a/src/factorminer/factorminer/core/canonicalizer.py +++ /dev/null @@ -1,206 +0,0 @@ -"""SymPy-based formula canonicalization for duplicate detection. - -Converts ``ExpressionTree`` objects into canonical SymPy expressions so that -algebraically equivalent formulas (e.g. ``Add($close, $open)`` vs -``Add($open, $close)``, or ``Neg(Neg($close))`` vs ``$close``) produce -identical hashes. - -**Design principle**: Arithmetic operators map to native SymPy math so that -standard simplifications (commutativity, double-negation, x/x = 1, etc.) are -applied automatically. Non-algebraic operators (rolling windows, -cross-sectional transforms, conditionals) are represented as opaque -``sympy.Function`` symbols so their structure is preserved without false -simplification. -""" - -from __future__ import annotations - -import hashlib -from typing import Dict, List, Optional - -import sympy -from sympy import Abs, Float, Function, Symbol, log, sqrt - -from src.factorminer.factorminer.core.expression_tree import ( - ConstantNode, - ExpressionTree, - LeafNode, - Node, - OperatorNode, -) - -# Arithmetic operator names that map directly to SymPy math. -_ALGEBRAIC_OPS = frozenset({ - "Add", "Sub", "Mul", "Div", "Neg", "Abs", - "Square", "Sqrt", "Log", "Pow", "SignedPower", -}) - - -class FormulaCanonicalizer: - """Canonicalize expression trees via SymPy simplification. - - Maintains an internal cache so that repeated calls for the same formula - string are fast. - - Examples - -------- - >>> from factorminer.core.parser import parse - >>> canon = FormulaCanonicalizer() - >>> canon.is_duplicate(parse("Add($close, $open)"), parse("Add($open, $close)")) - True - >>> canon.is_duplicate(parse("Neg(Neg($close))"), parse("$close")) - True - """ - - def __init__(self) -> None: - self._cache: Dict[str, str] = {} # formula string -> canonical MD5 hash - - # ------------------------------------------------------------------ - # Public API - # ------------------------------------------------------------------ - - def canonicalize(self, tree: ExpressionTree) -> str: - """Return an MD5 hash of the canonical (simplified) form of *tree*. - - Parameters - ---------- - tree : ExpressionTree - The expression tree to canonicalize. - - Returns - ------- - str - Hex-encoded MD5 digest of the canonical string representation. - """ - key = tree.to_string() - if key in self._cache: - return self._cache[key] - - sympy_expr = self._tree_to_sympy(tree.root) - simplified = sympy.simplify(sympy_expr) - canonical_str = str(simplified) - digest = hashlib.md5(canonical_str.encode("utf-8")).hexdigest() - self._cache[key] = digest - return digest - - def is_duplicate(self, tree_a: ExpressionTree, tree_b: ExpressionTree) -> bool: - """Return ``True`` if *tree_a* and *tree_b* are algebraically equivalent. - - Parameters - ---------- - tree_a, tree_b : ExpressionTree - Two expression trees to compare. - - Returns - ------- - bool - """ - return self.canonicalize(tree_a) == self.canonicalize(tree_b) - - def get_canonical_form(self, tree: ExpressionTree) -> str: - """Return the simplified string representation (not hashed). - - Useful for debugging and display. - - Parameters - ---------- - tree : ExpressionTree - - Returns - ------- - str - Human-readable simplified expression. - """ - sympy_expr = self._tree_to_sympy(tree.root) - simplified = sympy.simplify(sympy_expr) - return str(simplified) - - def clear_cache(self) -> None: - """Discard all cached canonical hashes.""" - self._cache.clear() - - # ------------------------------------------------------------------ - # Tree -> SymPy conversion - # ------------------------------------------------------------------ - - def _tree_to_sympy(self, node: Node) -> sympy.Expr: - """Recursively convert an expression-tree node to a SymPy expression. - - Parameters - ---------- - node : Node - Any node in the expression tree hierarchy. - - Returns - ------- - sympy.Expr - """ - if isinstance(node, LeafNode): - return Symbol(node.feature_name) - - if isinstance(node, ConstantNode): - return Float(node.value) - - if isinstance(node, OperatorNode): - children_sympy = [self._tree_to_sympy(c) for c in node.children] - return self._map_operator( - node.operator.name, children_sympy, node.params - ) - - raise TypeError(f"Unexpected node type: {type(node).__name__}") - - def _map_operator( - self, - name: str, - children: List[sympy.Expr], - params: Dict[str, float], - ) -> sympy.Expr: - """Dispatch an operator to its SymPy equivalent. - - Arithmetic operators are mapped to native SymPy math so the - simplifier can reason about them. All other operators become opaque - ``sympy.Function`` applications that preserve structure. - - Parameters - ---------- - name : str - Operator name from the registry (e.g. ``"Add"``, ``"CsRank"``). - children : list[sympy.Expr] - Already-converted child expressions. - params : dict[str, float] - Extra numeric parameters (e.g. ``{"window": 10}``). - - Returns - ------- - sympy.Expr - """ - # --- Arithmetic: map to SymPy math ------------------------------------ - if name == "Add": - return children[0] + children[1] - if name == "Sub": - return children[0] - children[1] - if name == "Mul": - return children[0] * children[1] - if name == "Div": - return children[0] / children[1] - if name == "Neg": - return -children[0] - if name == "Abs": - return Abs(children[0]) - if name == "Square": - return children[0] ** 2 - if name == "Sqrt": - return sqrt(Abs(children[0])) - if name == "Log": - return log(1 + Abs(children[0])) - if name in ("Pow", "SignedPower"): - return children[0] ** children[1] - - # --- Non-algebraic: wrap as opaque Function --------------------------- - func = Function(name) - # Build argument list: children first, then params as Float values - args: List[sympy.Expr] = list(children) - # Append params in a deterministic order (sorted by param name). - for pname in sorted(params): - args.append(Float(params[pname])) - return func(*args) diff --git a/src/factorminer/factorminer/core/config.py b/src/factorminer/factorminer/core/config.py deleted file mode 100644 index bd2537a..0000000 --- a/src/factorminer/factorminer/core/config.py +++ /dev/null @@ -1,61 +0,0 @@ -"""Mining-specific configuration for the Ralph Loop. - -Provides a flat configuration dataclass specifically for the mining loop, -separate from the hierarchical Config system in utils/config.py. This -allows the RalphLoop to accept a simple, focused parameter object while -the full Config handles loading, validation, and serialization. -""" - -from __future__ import annotations - -from dataclasses import dataclass - - -@dataclass -class MiningConfig: - """Flat configuration controlling the Ralph Loop mining process. - - This is a convenience alias that mirrors the mining-relevant fields - from the hierarchical Config. The RalphLoop can accept either this - or the full ``utils.config.MiningConfig``. - """ - - target_library_size: int = 110 - batch_size: int = 40 - max_iterations: int = 200 - ic_threshold: float = 0.04 - icir_threshold: float = 0.5 - correlation_threshold: float = 0.5 - replacement_ic_min: float = 0.10 - replacement_ic_ratio: float = 1.3 - fast_screen_assets: int = 100 - num_workers: int = 40 - output_dir: str = "./output" - gpu_device: str = "cuda:0" - backend: str = "numpy" - signal_failure_policy: str = "reject" - - def validate(self) -> None: - """Basic sanity checks on parameter values.""" - if self.target_library_size < 1: - raise ValueError("target_library_size must be >= 1") - if self.batch_size < 1: - raise ValueError("batch_size must be >= 1") - if self.max_iterations < 1: - raise ValueError("max_iterations must be >= 1") - if not (0.0 < self.ic_threshold < 1.0): - raise ValueError("ic_threshold must be in (0, 1)") - if not (0.0 < self.correlation_threshold <= 1.0): - raise ValueError("correlation_threshold must be in (0, 1]") - if self.replacement_ic_min <= self.ic_threshold: - raise ValueError("replacement_ic_min must be > ic_threshold") - if self.replacement_ic_ratio < 1.0: - raise ValueError("replacement_ic_ratio must be >= 1.0") - if self.backend not in ("gpu", "numpy", "c"): - raise ValueError( - f"backend must be one of: gpu, numpy, c (got '{self.backend}')" - ) - if self.signal_failure_policy not in ("reject", "synthetic", "raise"): - raise ValueError( - "signal_failure_policy must be one of: reject, synthetic, raise" - ) diff --git a/src/factorminer/factorminer/core/expression_tree.py b/src/factorminer/factorminer/core/expression_tree.py deleted file mode 100644 index e8a5da3..0000000 --- a/src/factorminer/factorminer/core/expression_tree.py +++ /dev/null @@ -1,736 +0,0 @@ -"""Expression tree data structure for alpha-factor formulas. - -An expression tree is a DAG of ``Node`` objects whose leaves are raw -market-data features (``LeafNode``) or numeric constants (``ConstantNode``) -and whose internal nodes are operator applications (``OperatorNode``). -""" - -from __future__ import annotations - -import copy -import math -from abc import ABC, abstractmethod -from typing import Any, Dict, Iterator, List, Optional, Tuple, Union - -import numpy as np - -from src.factorminer.factorminer.core.types import ( - FEATURE_SET, - OperatorSpec, - OperatorType, - SignatureType, -) - -# Epsilon for safe division / log -_EPS = 1e-10 - - -# --------------------------------------------------------------------------- -# Node hierarchy -# --------------------------------------------------------------------------- - -class Node(ABC): - """Abstract base for every node in an expression tree.""" - - @abstractmethod - def evaluate(self, data: Dict[str, np.ndarray]) -> np.ndarray: - """Compute the node's value given market data. - - Parameters - ---------- - data : dict[str, np.ndarray] - Maps feature names (e.g. ``"$close"``) to arrays of shape - ``(M, T)`` where *M* is the number of stocks and *T* is the - number of time steps. - - Returns - ------- - np.ndarray - Result array, typically shape ``(M, T)``. - """ - - @abstractmethod - def to_string(self) -> str: - """Serialize the subtree rooted at this node to a DSL formula.""" - - @abstractmethod - def depth(self) -> int: - """Return the depth of the subtree (leaf = 1).""" - - @abstractmethod - def size(self) -> int: - """Return the number of nodes in the subtree.""" - - @abstractmethod - def clone(self) -> "Node": - """Return a deep copy of the subtree.""" - - def __repr__(self) -> str: # pragma: no cover - return self.to_string() - - # Iteration helpers ----------------------------------------------------- - - def iter_nodes(self) -> Iterator["Node"]: - """Yield every node in the subtree (pre-order).""" - yield self - if isinstance(self, OperatorNode): - for child in self.children: - yield from child.iter_nodes() - - def leaf_features(self) -> List[str]: - """Return sorted unique feature names referenced by this subtree.""" - feats = {n.feature_name for n in self.iter_nodes() if isinstance(n, LeafNode)} - return sorted(feats) - - -class LeafNode(Node): - """References a raw market-data column (e.g. ``$close``).""" - - __slots__ = ("feature_name",) - - def __init__(self, feature_name: str) -> None: - if feature_name not in FEATURE_SET: - raise ValueError( - f"Unknown feature '{feature_name}'. " - f"Expected one of {sorted(FEATURE_SET)}." - ) - self.feature_name = feature_name - - def evaluate(self, data: Dict[str, np.ndarray]) -> np.ndarray: - if self.feature_name not in data: - raise KeyError( - f"Feature '{self.feature_name}' not found in data. " - f"Available: {sorted(data.keys())}" - ) - return data[self.feature_name].astype(np.float64, copy=False) - - def to_string(self) -> str: - return self.feature_name - - def depth(self) -> int: - return 1 - - def size(self) -> int: - return 1 - - def clone(self) -> "LeafNode": - return LeafNode(self.feature_name) - - -class ConstantNode(Node): - """A numeric literal embedded in the expression.""" - - __slots__ = ("value",) - - def __init__(self, value: float) -> None: - self.value = float(value) - - def evaluate(self, data: Dict[str, np.ndarray]) -> np.ndarray: - # Infer shape from any entry in data so the constant broadcasts. - for arr in data.values(): - return np.full_like(arr, self.value, dtype=np.float64) - raise ValueError("Cannot evaluate ConstantNode with empty data dict.") - - def to_string(self) -> str: - # Produce a clean numeric literal. - if self.value == int(self.value) and abs(self.value) < 1e12: - return str(int(self.value)) - return f"{self.value:g}" - - def depth(self) -> int: - return 1 - - def size(self) -> int: - return 1 - - def clone(self) -> "ConstantNode": - return ConstantNode(self.value) - - -class OperatorNode(Node): - """An internal node that applies an operator to child sub-trees. - - Parameters - ---------- - operator : OperatorSpec - The operator to apply. - children : list[Node] - Child expression nodes. Length must equal ``operator.arity``. - params : dict[str, float] - Extra numeric parameters (e.g. ``{"window": 10}``). - """ - - __slots__ = ("operator", "children", "params") - - def __init__( - self, - operator: OperatorSpec, - children: List[Node], - params: Optional[Dict[str, float]] = None, - ) -> None: - self.operator = operator - self.children = list(children) - self.params = dict(params) if params else {} - # Merge defaults for any missing parameter. - for pname, pdefault in operator.param_defaults.items(): - if pname not in self.params: - self.params[pname] = pdefault - - # ---- serialization ---------------------------------------------------- - - def to_string(self) -> str: - parts = [child.to_string() for child in self.children] - # Append explicit numeric parameters (window etc.) - for pname in self.operator.param_names: - if pname in self.params: - v = self.params[pname] - if v == int(v) and abs(v) < 1e12: - parts.append(str(int(v))) - else: - parts.append(f"{v:g}") - return f"{self.operator.name}({', '.join(parts)})" - - # ---- structural queries ----------------------------------------------- - - def depth(self) -> int: - if not self.children: - return 1 - return 1 + max(c.depth() for c in self.children) - - def size(self) -> int: - return 1 + sum(c.size() for c in self.children) - - def clone(self) -> "OperatorNode": - return OperatorNode( - operator=self.operator, - children=[c.clone() for c in self.children], - params=dict(self.params), - ) - - # ---- evaluation ------------------------------------------------------- - - def evaluate(self, data: Dict[str, np.ndarray]) -> np.ndarray: - child_vals = [c.evaluate(data) for c in self.children] - return _dispatch_operator(self.operator, child_vals, self.params) - - -# --------------------------------------------------------------------------- -# Operator dispatch (pure-numpy implementations) -# --------------------------------------------------------------------------- - -def _safe_div(a: np.ndarray, b: np.ndarray) -> np.ndarray: - """Division that returns 0 where the denominator is near zero.""" - out = np.where(np.abs(b) > _EPS, a / np.where(np.abs(b) > _EPS, b, 1.0), 0.0) - return out - - -def _safe_log(x: np.ndarray) -> np.ndarray: - return np.sign(x) * np.log1p(np.abs(x)) - - -def _safe_sqrt(x: np.ndarray) -> np.ndarray: - return np.sign(x) * np.sqrt(np.abs(x)) - - -def _rolling_apply( - x: np.ndarray, - window: int, - func, - *, - binary_y: Optional[np.ndarray] = None, -) -> np.ndarray: - """Apply *func* over a rolling window along the last axis (T). - - Parameters - ---------- - x : np.ndarray, shape (M, T) - window : int - func : callable (slice_x, [slice_y]) -> scalar or 1-d - binary_y : optional second array for bivariate rolling ops - - Returns - ------- - np.ndarray, shape (M, T) – leading positions filled with NaN. - """ - M, T = x.shape - out = np.full_like(x, np.nan, dtype=np.float64) - for t in range(window - 1, T): - sx = x[:, t - window + 1 : t + 1] - if binary_y is not None: - sy = binary_y[:, t - window + 1 : t + 1] - out[:, t] = func(sx, sy) - else: - out[:, t] = func(sx) - return out - - -def _ts_mean(sx: np.ndarray) -> np.ndarray: - return np.nanmean(sx, axis=1) - - -def _ts_std(sx: np.ndarray) -> np.ndarray: - return np.nanstd(sx, axis=1, ddof=1) - - -def _ts_var(sx: np.ndarray) -> np.ndarray: - return np.nanvar(sx, axis=1, ddof=1) - - -def _ts_sum(sx: np.ndarray) -> np.ndarray: - return np.nansum(sx, axis=1) - - -def _ts_prod(sx: np.ndarray) -> np.ndarray: - return np.nanprod(sx, axis=1) - - -def _ts_max(sx: np.ndarray) -> np.ndarray: - return np.nanmax(sx, axis=1) - - -def _ts_min(sx: np.ndarray) -> np.ndarray: - return np.nanmin(sx, axis=1) - - -def _ts_argmax(sx: np.ndarray) -> np.ndarray: - return np.nanargmax(sx, axis=1).astype(np.float64) - - -def _ts_argmin(sx: np.ndarray) -> np.ndarray: - return np.nanargmin(sx, axis=1).astype(np.float64) - - -def _ts_median(sx: np.ndarray) -> np.ndarray: - return np.nanmedian(sx, axis=1) - - -def _ts_skew(sx: np.ndarray) -> np.ndarray: - m = np.nanmean(sx, axis=1, keepdims=True) - s = np.nanstd(sx, axis=1, keepdims=True, ddof=1) - s = np.where(s > _EPS, s, 1.0) - n = sx.shape[1] - sk = np.nanmean(((sx - m) / s) ** 3, axis=1) * n**2 / max((n - 1) * (n - 2), 1) - return sk - - -def _ts_kurt(sx: np.ndarray) -> np.ndarray: - m = np.nanmean(sx, axis=1, keepdims=True) - s = np.nanstd(sx, axis=1, keepdims=True, ddof=1) - s = np.where(s > _EPS, s, 1.0) - return np.nanmean(((sx - m) / s) ** 4, axis=1) - 3.0 - - -def _ts_rank(sx: np.ndarray) -> np.ndarray: - """Percentile rank of the latest value within the window.""" - latest = sx[:, -1] - rank = np.sum(sx <= latest[:, None], axis=1).astype(np.float64) - return rank / sx.shape[1] - - -def _ts_corr(sx: np.ndarray, sy: np.ndarray) -> np.ndarray: - mx = np.nanmean(sx, axis=1, keepdims=True) - my = np.nanmean(sy, axis=1, keepdims=True) - dx, dy = sx - mx, sy - my - cov = np.nanmean(dx * dy, axis=1) - sx_std = np.nanstd(sx, axis=1, ddof=1) - sy_std = np.nanstd(sy, axis=1, ddof=1) - denom = sx_std * sy_std - return np.where(denom > _EPS, cov / denom, 0.0) - - -def _ts_cov(sx: np.ndarray, sy: np.ndarray) -> np.ndarray: - mx = np.nanmean(sx, axis=1, keepdims=True) - my = np.nanmean(sy, axis=1, keepdims=True) - return np.nanmean((sx - mx) * (sy - my), axis=1) - - -def _ts_beta(sx: np.ndarray, sy: np.ndarray) -> np.ndarray: - """Rolling OLS slope of x on y.""" - my = np.nanmean(sy, axis=1, keepdims=True) - mx = np.nanmean(sx, axis=1, keepdims=True) - dy = sy - my - var_y = np.nansum(dy ** 2, axis=1) - cov_xy = np.nansum((sx - mx) * dy, axis=1) - return np.where(var_y > _EPS, cov_xy / var_y, 0.0) - - -def _ts_resid(sx: np.ndarray, sy: np.ndarray) -> np.ndarray: - beta = _ts_beta(sx, sy) - my = np.nanmean(sy, axis=1, keepdims=True) - mx = np.nanmean(sx, axis=1, keepdims=True) - predicted = mx.squeeze(1) + beta * (sy[:, -1] - my.squeeze(1)) - return sx[:, -1] - predicted - - -def _ema(x: np.ndarray, window: int) -> np.ndarray: - """Exponential moving average along the last axis.""" - alpha = 2.0 / (window + 1) - M, T = x.shape - out = np.empty_like(x, dtype=np.float64) - out[:, 0] = x[:, 0] - for t in range(1, T): - out[:, t] = alpha * x[:, t] + (1 - alpha) * out[:, t - 1] - return out - - -def _wma(x: np.ndarray, window: int) -> np.ndarray: - """Linearly-weighted moving average.""" - weights = np.arange(1, window + 1, dtype=np.float64) - weights /= weights.sum() - M, T = x.shape - out = np.full_like(x, np.nan, dtype=np.float64) - for t in range(window - 1, T): - out[:, t] = (x[:, t - window + 1 : t + 1] * weights[None, :]).sum(axis=1) - return out - - -def _decay(x: np.ndarray, window: int) -> np.ndarray: - """Exponentially decaying sum.""" - alpha = 2.0 / (window + 1) - weights = np.array([alpha * (1 - alpha) ** i for i in range(window)][::-1]) - M, T = x.shape - out = np.full_like(x, np.nan, dtype=np.float64) - for t in range(window - 1, T): - out[:, t] = (x[:, t - window + 1 : t + 1] * weights[None, :]).sum(axis=1) - return out - - -def _cs_rank(x: np.ndarray) -> np.ndarray: - """Cross-sectional percentile rank at each time step.""" - M, T = x.shape - out = np.empty_like(x, dtype=np.float64) - for t in range(T): - col = x[:, t] - valid = ~np.isnan(col) - ranked = np.empty(M, dtype=np.float64) - ranked[:] = np.nan - if valid.any(): - order = col[valid].argsort().argsort().astype(np.float64) - ranked[valid] = (order + 1) / valid.sum() - out[:, t] = ranked - return out - - -def _cs_zscore(x: np.ndarray) -> np.ndarray: - M, T = x.shape - out = np.empty_like(x, dtype=np.float64) - for t in range(T): - col = x[:, t] - m = np.nanmean(col) - s = np.nanstd(col, ddof=1) - out[:, t] = (col - m) / max(s, _EPS) - return out - - -def _cs_demean(x: np.ndarray) -> np.ndarray: - m = np.nanmean(x, axis=0, keepdims=True) - return x - m - - -def _cs_scale(x: np.ndarray) -> np.ndarray: - s = np.nansum(np.abs(x), axis=0, keepdims=True) - s = np.where(s > _EPS, s, 1.0) - return x / s - - -def _ts_linreg_slope(x: np.ndarray, window: int) -> np.ndarray: - t_vals = np.arange(window, dtype=np.float64) - t_mean = t_vals.mean() - t_var = np.sum((t_vals - t_mean) ** 2) - M, T = x.shape - out = np.full_like(x, np.nan, dtype=np.float64) - for t in range(window - 1, T): - sx = x[:, t - window + 1 : t + 1] - x_mean = np.nanmean(sx, axis=1, keepdims=True) - cov = np.nansum((sx - x_mean) * (t_vals[None, :] - t_mean), axis=1) - out[:, t] = cov / max(t_var, _EPS) - return out - - -def _ts_linreg_intercept(x: np.ndarray, window: int) -> np.ndarray: - t_vals = np.arange(window, dtype=np.float64) - t_mean = t_vals.mean() - slope = _ts_linreg_slope(x, window) - M, T = x.shape - out = np.full_like(x, np.nan, dtype=np.float64) - for t in range(window - 1, T): - sx = x[:, t - window + 1 : t + 1] - x_mean = np.nanmean(sx, axis=1) - out[:, t] = x_mean - slope[:, t] * t_mean - return out - - -def _ts_linreg_fitted(x: np.ndarray, window: int) -> np.ndarray: - slope = _ts_linreg_slope(x, window) - intercept = _ts_linreg_intercept(x, window) - t_last = float(window - 1) - return intercept + slope * t_last - - -def _ts_linreg_resid(x: np.ndarray, window: int) -> np.ndarray: - fitted = _ts_linreg_fitted(x, window) - M, T = x.shape - out = np.full_like(x, np.nan, dtype=np.float64) - for t in range(window - 1, T): - out[:, t] = x[:, t] - fitted[:, t] - return out - - -# Main dispatch table ------------------------------------------------------- - -def _dispatch_operator( - spec: OperatorSpec, - children: List[np.ndarray], - params: Dict[str, float], -) -> np.ndarray: - """Execute an operator on evaluated children, return result array.""" - name = spec.name - w = int(params.get("window", 0)) - - # -- Arithmetic --------------------------------------------------------- - if name == "Add": - return children[0] + children[1] - if name == "Sub": - return children[0] - children[1] - if name == "Mul": - return children[0] * children[1] - if name == "Div": - return _safe_div(children[0], children[1]) - if name == "Neg": - return -children[0] - if name == "Abs": - return np.abs(children[0]) - if name == "Sign": - return np.sign(children[0]) - if name == "Log": - return _safe_log(children[0]) - if name == "Sqrt": - return _safe_sqrt(children[0]) - if name == "Square": - return children[0] ** 2 - if name == "Pow": - base, exp = children - return np.sign(base) * np.abs(base) ** exp - if name == "Max": - return np.maximum(children[0], children[1]) - if name == "Min": - return np.minimum(children[0], children[1]) - if name == "Clip": - lo = params.get("lower", -3.0) - hi = params.get("upper", 3.0) - return np.clip(children[0], lo, hi) - if name == "Inv": - return _safe_div(np.ones_like(children[0]), children[0]) - - # -- Statistical (rolling) ---------------------------------------------- - if name == "Mean": - return _rolling_apply(children[0], w, _ts_mean) - if name == "Std": - return _rolling_apply(children[0], w, _ts_std) - if name == "Var": - return _rolling_apply(children[0], w, _ts_var) - if name == "Skew": - return _rolling_apply(children[0], w, _ts_skew) - if name == "Kurt": - return _rolling_apply(children[0], w, _ts_kurt) - if name == "Median": - return _rolling_apply(children[0], w, _ts_median) - if name == "Sum": - return _rolling_apply(children[0], w, _ts_sum) - if name == "Prod": - return _rolling_apply(children[0], w, _ts_prod) - if name == "TsMax": - return _rolling_apply(children[0], w, _ts_max) - if name == "TsMin": - return _rolling_apply(children[0], w, _ts_min) - if name == "TsArgMax": - return _rolling_apply(children[0], w, _ts_argmax) - if name == "TsArgMin": - return _rolling_apply(children[0], w, _ts_argmin) - if name == "TsRank": - return _rolling_apply(children[0], w, _ts_rank) - if name == "Quantile": - q = params.get("q", 0.5) - return _rolling_apply( - children[0], w, lambda sx: np.nanquantile(sx, q, axis=1) - ) - if name == "CountNaN": - return _rolling_apply( - children[0], w, lambda sx: np.sum(np.isnan(sx), axis=1).astype(np.float64) - ) - if name == "CountNotNaN": - return _rolling_apply( - children[0], w, lambda sx: np.sum(~np.isnan(sx), axis=1).astype(np.float64) - ) - - # -- Time-series -------------------------------------------------------- - if name == "Delta": - M, T = children[0].shape - out = np.full_like(children[0], np.nan, dtype=np.float64) - if w < T: - out[:, w:] = children[0][:, w:] - children[0][:, :-w] - return out - if name == "Delay": - M, T = children[0].shape - out = np.full_like(children[0], np.nan, dtype=np.float64) - if w < T: - out[:, w:] = children[0][:, :-w] - return out - if name == "Return": - M, T = children[0].shape - out = np.full_like(children[0], np.nan, dtype=np.float64) - if w < T: - prev = children[0][:, :-w] - out[:, w:] = _safe_div(children[0][:, w:] - prev, prev) - return out - if name == "LogReturn": - M, T = children[0].shape - out = np.full_like(children[0], np.nan, dtype=np.float64) - if w < T: - ratio = _safe_div(children[0][:, w:], np.where(np.abs(children[0][:, :-w]) > _EPS, children[0][:, :-w], 1.0)) - out[:, w:] = np.log(np.abs(ratio) + _EPS) - return out - if name == "Corr": - return _rolling_apply(children[0], w, _ts_corr, binary_y=children[1]) - if name == "Cov": - return _rolling_apply(children[0], w, _ts_cov, binary_y=children[1]) - if name == "Beta": - return _rolling_apply(children[0], w, _ts_beta, binary_y=children[1]) - if name == "Resid": - return _rolling_apply(children[0], w, _ts_resid, binary_y=children[1]) - if name == "WMA": - return _wma(children[0], w) - if name == "Decay": - return _decay(children[0], w) - if name == "CumSum": - return np.nancumsum(children[0], axis=1) - if name == "CumProd": - return np.nancumprod(children[0], axis=1) - if name == "CumMax": - return np.maximum.accumulate(np.nan_to_num(children[0], nan=-np.inf), axis=1) - if name == "CumMin": - return np.minimum.accumulate(np.nan_to_num(children[0], nan=np.inf), axis=1) - - # -- Smoothing ---------------------------------------------------------- - if name == "EMA": - return _ema(children[0], w) - if name == "DEMA": - e1 = _ema(children[0], w) - e2 = _ema(e1, w) - return 2 * e1 - e2 - if name == "SMA": - return _rolling_apply(children[0], w, _ts_mean) - if name == "KAMA": - return _ema(children[0], w) # simplified - if name == "HMA": - half_w = max(w // 2, 1) - sqrt_w = max(int(math.sqrt(w)), 1) - wma_half = _wma(children[0], half_w) - wma_full = _wma(children[0], w) - # Fill leading NaN from the shorter window with the longer - diff = 2 * np.nan_to_num(wma_half) - np.nan_to_num(wma_full) - return _wma(diff, sqrt_w) - - # -- Cross-sectional ---------------------------------------------------- - if name == "CsRank": - return _cs_rank(children[0]) - if name == "CsZScore": - return _cs_zscore(children[0]) - if name == "CsDemean": - return _cs_demean(children[0]) - if name == "CsScale": - return _cs_scale(children[0]) - if name == "CsNeutralize": - return _cs_demean(children[0]) # simplified: industry-neutralize ≈ demean - if name == "CsQuantile": - n_bins = int(params.get("n_bins", 5)) - ranked = _cs_rank(children[0]) - return np.ceil(ranked * n_bins).clip(1, n_bins) - - # -- Regression --------------------------------------------------------- - if name == "TsLinReg": - return _ts_linreg_fitted(children[0], w) - if name == "TsLinRegSlope": - return _ts_linreg_slope(children[0], w) - if name == "TsLinRegIntercept": - return _ts_linreg_intercept(children[0], w) - if name == "TsLinRegResid": - return _ts_linreg_resid(children[0], w) - - # -- Logical / conditional ---------------------------------------------- - if name == "IfElse": - cond, x_true, x_false = children - return np.where(cond > 0, x_true, x_false) - if name == "Greater": - return (children[0] > children[1]).astype(np.float64) - if name == "Less": - return (children[0] < children[1]).astype(np.float64) - if name == "Equal": - return (np.abs(children[0] - children[1]) < _EPS).astype(np.float64) - if name == "And": - return ((children[0] > 0) & (children[1] > 0)).astype(np.float64) - if name == "Or": - return ((children[0] > 0) | (children[1] > 0)).astype(np.float64) - if name == "Not": - return (children[0] <= 0).astype(np.float64) - - raise NotImplementedError(f"Operator '{name}' has no evaluation implementation.") - - -# --------------------------------------------------------------------------- -# Expression tree wrapper -# --------------------------------------------------------------------------- - -class ExpressionTree: - """Wrapper around a root ``Node`` providing a convenient API. - - Parameters - ---------- - root : Node - The root node of the tree. - """ - - __slots__ = ("root",) - - def __init__(self, root: Node) -> None: - self.root = root - - def to_string(self) -> str: - """Serialize the full tree to a DSL formula string.""" - return self.root.to_string() - - def depth(self) -> int: - """Return the depth of the tree.""" - return self.root.depth() - - def size(self) -> int: - """Return the total number of nodes.""" - return self.root.size() - - def evaluate(self, data: Dict[str, np.ndarray]) -> np.ndarray: - """Execute the formula on market data. - - Parameters - ---------- - data : dict[str, np.ndarray] - Maps feature names to arrays of shape ``(M, T)``. - - Returns - ------- - np.ndarray of shape ``(M, T)`` - """ - return self.root.evaluate(data) - - def clone(self) -> "ExpressionTree": - """Return a deep copy of the tree.""" - return ExpressionTree(self.root.clone()) - - def leaf_features(self) -> List[str]: - """Return sorted unique feature names referenced by this tree.""" - return self.root.leaf_features() - - def __repr__(self) -> str: - return f"ExpressionTree({self.to_string()})" - - def __str__(self) -> str: - return self.to_string() diff --git a/src/factorminer/factorminer/core/factor_library.py b/src/factorminer/factorminer/core/factor_library.py deleted file mode 100644 index 635b22b..0000000 --- a/src/factorminer/factorminer/core/factor_library.py +++ /dev/null @@ -1,602 +0,0 @@ -"""Factor Library: maintains the growing collection of admitted alpha factors. - -Implements the admission rules from the paper (Eq. 10, 11): -- Admission: IC(alpha) >= tau_IC AND max_{g in L} |rho(alpha, g)| < theta -- Replacement: IC(alpha) >= 0.10 AND IC(alpha) >= 1.3 * IC(g) AND only 1 correlated factor - -The library tracks pairwise Spearman correlations and supports incremental -updates as new factors are added or replaced. -""" - -from __future__ import annotations - -import logging -from collections import defaultdict -from dataclasses import dataclass, field -from datetime import datetime -from typing import Dict, List, Optional, Tuple - -import numpy as np -from scipy.stats import spearmanr - -logger = logging.getLogger(__name__) - - -@dataclass -class Factor: - """A single admitted alpha factor.""" - - id: int - name: str - formula: str - category: str # e.g., "VWAP", "Regime-switching", "Momentum", etc. - ic_mean: float # Mean IC across evaluation period - icir: float # IC Information Ratio - ic_win_rate: float # Fraction of periods with positive IC - max_correlation: float # Max |rho| with any other library factor at admission - batch_number: int # Which mining batch admitted this factor - admission_date: str = "" - signals: Optional[np.ndarray] = field(default=None, repr=False) # (M, T) - research_metrics: dict = field(default_factory=dict) - provenance: dict = field(default_factory=dict) - - def __post_init__(self) -> None: - if not self.admission_date: - self.admission_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S") - - def to_dict(self) -> dict: - """Serialize to a JSON-compatible dictionary (excludes signals).""" - return { - "id": self.id, - "name": self.name, - "formula": self.formula, - "category": self.category, - "ic_mean": self.ic_mean, - "icir": self.icir, - "ic_win_rate": self.ic_win_rate, - "max_correlation": self.max_correlation, - "batch_number": self.batch_number, - "admission_date": self.admission_date, - "research_metrics": self.research_metrics, - "provenance": self.provenance, - } - - @classmethod - def from_dict(cls, d: dict) -> "Factor": - """Reconstruct a Factor from a dictionary.""" - return cls( - id=d["id"], - name=d["name"], - formula=d["formula"], - category=d["category"], - ic_mean=d["ic_mean"], - icir=d["icir"], - ic_win_rate=d["ic_win_rate"], - max_correlation=d["max_correlation"], - batch_number=d["batch_number"], - admission_date=d.get("admission_date", ""), - research_metrics=d.get("research_metrics", {}), - provenance=d.get("provenance", {}), - ) - - -class FactorLibrary: - """The factor library L that maintains admitted alpha factors. - - Parameters - ---------- - correlation_threshold : float - Maximum allowed |rho| for admission (theta). Default 0.5 for A-shares. - ic_threshold : float - Minimum IC for admission (tau_IC). Default 0.04. - """ - - def __init__( - self, - correlation_threshold: float = 0.5, - ic_threshold: float = 0.04, - ) -> None: - self.factors: Dict[int, Factor] = {} - self.correlation_matrix: Optional[np.ndarray] = None # Pairwise |rho| - self._next_id: int = 1 - self.correlation_threshold = correlation_threshold - self.ic_threshold = ic_threshold - # Maps factor_id -> index in the correlation matrix - self._id_to_index: Dict[int, int] = {} - - # ------------------------------------------------------------------ - # Correlation computation - # ------------------------------------------------------------------ - - def compute_correlation( - self, signals_a: np.ndarray, signals_b: np.ndarray - ) -> float: - """Compute time-average cross-sectional Spearman correlation rho(alpha, beta). - - rho(alpha, beta) = (1/|T|) * sum_t Corr_rank(s_t^(alpha), s_t^(beta)) - - Parameters - ---------- - signals_a, signals_b : np.ndarray, shape (M, T) - Cross-sectional signal matrices. - - Returns - ------- - float - Mean absolute Spearman rank correlation across time steps. - """ - if signals_a.shape != signals_b.shape: - raise ValueError( - f"Signal shapes must match: {signals_a.shape} vs {signals_b.shape}" - ) - M, T = signals_a.shape - correlations = np.empty(T, dtype=np.float64) - - for t in range(T): - col_a = signals_a[:, t] - col_b = signals_b[:, t] - # Mask NaNs from both columns - valid = ~(np.isnan(col_a) | np.isnan(col_b)) - n_valid = valid.sum() - if n_valid < 3: - correlations[t] = np.nan - continue - rho, _ = spearmanr(col_a[valid], col_b[valid]) - correlations[t] = rho - - return float(np.nanmean(np.abs(correlations))) - - def _compute_correlation_vectorized( - self, signals_a: np.ndarray, signals_b: np.ndarray - ) -> float: - """Faster vectorized Spearman correlation using rankdata. - - For large M and T this avoids per-timestep Python loops by ranking - each column and computing Pearson on the ranks. - """ - from scipy.stats import rankdata - - M, T = signals_a.shape - # Mask invalid entries - mask = ~(np.isnan(signals_a) | np.isnan(signals_b)) - - corr_sum = 0.0 - n_valid_t = 0 - for t in range(T): - valid = mask[:, t] - n = valid.sum() - if n < 3: - continue - ra = rankdata(signals_a[valid, t]) - rb = rankdata(signals_b[valid, t]) - # Pearson on ranks == Spearman - ra_c = ra - ra.mean() - rb_c = rb - rb.mean() - denom = np.sqrt((ra_c ** 2).sum() * (rb_c ** 2).sum()) - if denom < 1e-12: - continue - corr_sum += abs((ra_c * rb_c).sum() / denom) - n_valid_t += 1 - - if n_valid_t == 0: - return 0.0 - return corr_sum / n_valid_t - - # ------------------------------------------------------------------ - # Admission and replacement - # ------------------------------------------------------------------ - - def check_admission( - self, candidate_ic: float, candidate_signals: np.ndarray - ) -> Tuple[bool, str]: - """Check if candidate passes admission criteria (Eq. 10). - - Admission rule: - IC(alpha) >= tau_IC AND max_{g in L} |rho(alpha, g)| < theta - - Parameters - ---------- - candidate_ic : float - The candidate factor's mean IC. - candidate_signals : np.ndarray, shape (M, T) - The candidate's realized signals. - - Returns - ------- - (admitted, reason) : Tuple[bool, str] - """ - if candidate_ic < self.ic_threshold: - return False, ( - f"IC {candidate_ic:.4f} below threshold {self.ic_threshold}" - ) - - if self.size == 0: - return True, "First factor in library" - - max_corr = self._max_correlation_with_library(candidate_signals) - - if max_corr >= self.correlation_threshold: - return False, ( - f"Max correlation {max_corr:.4f} >= threshold " - f"{self.correlation_threshold} with existing library factor" - ) - - return True, ( - f"Admitted: IC={candidate_ic:.4f}, max_corr={max_corr:.4f}" - ) - - def check_replacement( - self, - candidate_ic: float, - candidate_signals: np.ndarray, - ic_min: float = 0.10, - ic_ratio: float = 1.3, - ) -> Tuple[bool, Optional[int], str]: - """Check replacement mechanism (Eq. 11). - - Replacement rule: - IC(alpha) >= 0.10 - AND IC(alpha) >= 1.3 * IC(g) - AND |{g in L : |rho(alpha, g)| > theta}| = 1 - - If exactly one library factor g is correlated above theta AND the - candidate's IC dominates g's IC by the required ratio, replace g. - - Parameters - ---------- - candidate_ic : float - The candidate's mean IC. - candidate_signals : np.ndarray, shape (M, T) - The candidate's realized signals. - ic_min : float - Absolute IC floor for replacement (default 0.10). - ic_ratio : float - Required IC ratio over the correlated factor (default 1.3). - - Returns - ------- - (should_replace, factor_to_replace_id, reason) : Tuple[bool, Optional[int], str] - """ - if candidate_ic < ic_min: - return False, None, ( - f"IC {candidate_ic:.4f} below replacement floor {ic_min}" - ) - - if self.size == 0: - return False, None, "Library is empty, use admission instead" - - # Find all library factors correlated above theta - correlated_factors = [] - for fid, factor in self.factors.items(): - if factor.signals is None: - continue - corr = self._compute_correlation_vectorized( - candidate_signals, factor.signals - ) - if corr >= self.correlation_threshold: - correlated_factors.append((fid, corr, factor.ic_mean)) - - if len(correlated_factors) != 1: - return False, None, ( - f"Found {len(correlated_factors)} correlated factors " - f"(need exactly 1 for replacement)" - ) - - fid, corr, existing_ic = correlated_factors[0] - if candidate_ic < ic_ratio * existing_ic: - return False, None, ( - f"IC {candidate_ic:.4f} < {ic_ratio} * {existing_ic:.4f} = " - f"{ic_ratio * existing_ic:.4f}" - ) - - return True, fid, ( - f"Replace factor {fid}: candidate IC {candidate_ic:.4f} > " - f"{ic_ratio} * {existing_ic:.4f}, corr={corr:.4f}" - ) - - # ------------------------------------------------------------------ - # Library mutations - # ------------------------------------------------------------------ - - def admit_factor(self, factor: Factor) -> int: - """Add a factor to the library and update the correlation matrix. - - Parameters - ---------- - factor : Factor - The factor to add. Its ``id`` field is overwritten with the - next available ID. - - Returns - ------- - int - The assigned factor ID. - """ - factor.id = self._next_id - self._next_id += 1 - self.factors[factor.id] = factor - - # Update correlation matrix incrementally - self._extend_correlation_matrix(factor) - - logger.info( - "Admitted factor %d '%s' (IC=%.4f, max_corr=%.4f, category=%s)", - factor.id, factor.name, factor.ic_mean, - factor.max_correlation, factor.category, - ) - return factor.id - - def replace_factor(self, old_id: int, new_factor: Factor) -> None: - """Replace an existing factor with a better one. - - The new factor takes the old factor's position in the correlation - matrix and receives a fresh ID. - - Parameters - ---------- - old_id : int - ID of the factor being replaced. - new_factor : Factor - The replacement factor. - """ - if old_id not in self.factors: - raise KeyError(f"Factor {old_id} not in library") - - old_factor = self.factors[old_id] - new_factor.id = self._next_id - self._next_id += 1 - - # Remove old factor and reuse its matrix slot - old_index = self._id_to_index.pop(old_id) - del self.factors[old_id] - - # Insert new factor at the same index - self.factors[new_factor.id] = new_factor - self._id_to_index[new_factor.id] = old_index - - # Recompute the row/column for this index - if self.correlation_matrix is not None and new_factor.signals is not None: - self._recompute_matrix_slot(old_index, new_factor) - - logger.info( - "Replaced factor %d with %d '%s' (IC=%.4f)", - old_id, new_factor.id, new_factor.name, new_factor.ic_mean, - ) - - def remove_factor(self, factor_id: int) -> None: - """Remove a factor from the library and rebuild correlation state.""" - if factor_id not in self.factors: - raise KeyError(f"Factor {factor_id} not in library") - - removed = self.factors.pop(factor_id) - self.update_correlation_matrix() - - logger.info( - "Removed factor %d '%s' from library", - factor_id, - removed.name, - ) - - # ------------------------------------------------------------------ - # Correlation matrix management - # ------------------------------------------------------------------ - - def _max_correlation_with_library( - self, candidate_signals: np.ndarray - ) -> float: - """Compute max |rho| between candidate and all library factors.""" - max_corr = 0.0 - for factor in self.factors.values(): - if factor.signals is None: - continue - corr = self._compute_correlation_vectorized( - candidate_signals, factor.signals - ) - max_corr = max(max_corr, corr) - return max_corr - - def _extend_correlation_matrix(self, new_factor: Factor) -> None: - """Extend the correlation matrix by one row/column for the new factor.""" - n = len(self._id_to_index) - new_index = n - self._id_to_index[new_factor.id] = new_index - - if new_factor.signals is None: - # No signals to correlate; expand with zeros - if self.correlation_matrix is None: - self.correlation_matrix = np.zeros((1, 1), dtype=np.float64) - else: - new_size = new_index + 1 - new_mat = np.zeros((new_size, new_size), dtype=np.float64) - new_mat[:new_index, :new_index] = self.correlation_matrix - self.correlation_matrix = new_mat - return - - # Build a new (n+1) x (n+1) matrix - new_size = new_index + 1 - new_mat = np.zeros((new_size, new_size), dtype=np.float64) - - if self.correlation_matrix is not None and self.correlation_matrix.size > 0: - new_mat[:new_index, :new_index] = self.correlation_matrix - - # Compute correlations with all existing factors - index_to_id = {idx: fid for fid, idx in self._id_to_index.items()} - for idx in range(new_index): - fid = index_to_id.get(idx) - if fid is None: - continue - other = self.factors.get(fid) - if other is None or other.signals is None: - continue - corr = self._compute_correlation_vectorized( - new_factor.signals, other.signals - ) - new_mat[new_index, idx] = corr - new_mat[idx, new_index] = corr - - self.correlation_matrix = new_mat - - def _recompute_matrix_slot(self, idx: int, factor: Factor) -> None: - """Recompute one row/column of the correlation matrix after replacement.""" - n = self.correlation_matrix.shape[0] - index_to_id = {i: fid for fid, i in self._id_to_index.items()} - - for other_idx in range(n): - if other_idx == idx: - self.correlation_matrix[idx, idx] = 0.0 - continue - other_fid = index_to_id.get(other_idx) - if other_fid is None: - continue - other = self.factors.get(other_fid) - if other is None or other.signals is None: - self.correlation_matrix[idx, other_idx] = 0.0 - self.correlation_matrix[other_idx, idx] = 0.0 - continue - corr = self._compute_correlation_vectorized( - factor.signals, other.signals - ) - self.correlation_matrix[idx, other_idx] = corr - self.correlation_matrix[other_idx, idx] = corr - - def update_correlation_matrix(self) -> None: - """Recompute the full pairwise correlation matrix from scratch. - - This is O(n^2) in the number of library factors and should only be - called when the incremental updates may have drifted or after bulk - operations. - """ - ids = sorted(self.factors.keys()) - n = len(ids) - if n == 0: - self.correlation_matrix = None - self._id_to_index.clear() - return - - self._id_to_index = {fid: i for i, fid in enumerate(ids)} - mat = np.zeros((n, n), dtype=np.float64) - - factors_list = [self.factors[fid] for fid in ids] - for i in range(n): - for j in range(i + 1, n): - fi, fj = factors_list[i], factors_list[j] - if fi.signals is None or fj.signals is None: - continue - corr = self._compute_correlation_vectorized(fi.signals, fj.signals) - mat[i, j] = corr - mat[j, i] = corr - - self.correlation_matrix = mat - - # ------------------------------------------------------------------ - # Queries and diagnostics - # ------------------------------------------------------------------ - - @property - def size(self) -> int: - """Number of factors currently in the library.""" - return len(self.factors) - - def get_factor(self, factor_id: int) -> Factor: - """Retrieve a factor by ID.""" - if factor_id not in self.factors: - raise KeyError(f"Factor {factor_id} not in library") - return self.factors[factor_id] - - def list_factors(self) -> List[Factor]: - """Return all factors sorted by ID.""" - return [self.factors[k] for k in sorted(self.factors)] - - def get_factors_by_category(self, category: str) -> List[Factor]: - """Return all factors matching a given category.""" - return [ - f for f in self.factors.values() - if f.category == category - ] - - def get_diagnostics(self) -> dict: - """Library diagnostics: avg |rho|, max tail correlations, per-category counts, saturation. - - Returns - ------- - dict with keys: - - size: int - - avg_correlation: float (average off-diagonal |rho|) - - max_correlation: float (maximum off-diagonal |rho|) - - p95_correlation: float (95th percentile off-diagonal |rho|) - - category_counts: dict[str, int] - - category_avg_ic: dict[str, float] - - saturation: float (fraction of max correlation slots above 0.3) - """ - diag: dict = {"size": self.size} - - # Category breakdown - cat_counts: Dict[str, int] = defaultdict(int) - cat_ic_sums: Dict[str, float] = defaultdict(float) - for f in self.factors.values(): - cat_counts[f.category] += 1 - cat_ic_sums[f.category] += f.ic_mean - - diag["category_counts"] = dict(cat_counts) - diag["category_avg_ic"] = { - cat: cat_ic_sums[cat] / cat_counts[cat] - for cat in cat_counts - } - - # Correlation statistics - if self.correlation_matrix is not None and self.size > 1: - n = self.correlation_matrix.shape[0] - # Extract upper triangle (off-diagonal) - triu_idx = np.triu_indices(n, k=1) - off_diag = self.correlation_matrix[triu_idx] - valid = off_diag[~np.isnan(off_diag)] - - if len(valid) > 0: - diag["avg_correlation"] = float(np.mean(valid)) - diag["max_correlation"] = float(np.max(valid)) - diag["p95_correlation"] = float(np.percentile(valid, 95)) - diag["saturation"] = float(np.mean(valid > 0.3)) - else: - diag["avg_correlation"] = 0.0 - diag["max_correlation"] = 0.0 - diag["p95_correlation"] = 0.0 - diag["saturation"] = 0.0 - else: - diag["avg_correlation"] = 0.0 - diag["max_correlation"] = 0.0 - diag["p95_correlation"] = 0.0 - diag["saturation"] = 0.0 - - return diag - - def get_state_summary(self) -> dict: - """Summary for memory retrieval: size, categories, recent admissions. - - Returns a lightweight dictionary suitable for inclusion in LLM prompts - or memory store entries. - """ - factors_sorted = sorted( - self.factors.values(), key=lambda f: f.id, reverse=True - ) - recent = factors_sorted[:5] # Last 5 admissions - - categories = defaultdict(int) - for f in self.factors.values(): - categories[f.category] += 1 - - return { - "library_size": self.size, - "categories": dict(categories), - "recent_admissions": [ - { - "id": f.id, - "name": f.name, - "category": f.category, - "ic_mean": f.ic_mean, - "batch": f.batch_number, - } - for f in recent - ], - "correlation_threshold": self.correlation_threshold, - "ic_threshold": self.ic_threshold, - } diff --git a/src/factorminer/factorminer/core/helix_loop.py b/src/factorminer/factorminer/core/helix_loop.py deleted file mode 100644 index 7671661..0000000 --- a/src/factorminer/factorminer/core/helix_loop.py +++ /dev/null @@ -1,1576 +0,0 @@ -"""The Helix Loop: 5-stage self-evolving factor discovery with Phase 2 extensions. - -Extends the base Ralph Loop with: - 1. RETRIEVE -- KG + embeddings + flat memory hybrid retrieval - 2. PROPOSE -- Multi-agent debate (specialists + critic) or standard generation - 3. SYNTHESIZE -- SymPy canonicalization to eliminate mathematical duplicates - 4. VALIDATE -- Standard pipeline + causal + regime + capacity + significance - 5. DISTILL -- Standard memory evolution + KG update + online forgetting - -All Phase 2 components are optional: when none are enabled the Helix Loop -behaves identically to the Ralph Loop and is a full drop-in replacement. -""" - -from __future__ import annotations - -import json -import logging -import re -import time -from pathlib import Path -from typing import Any, Dict, List, Optional, Set, Tuple - -import numpy as np - -from src.factorminer.factorminer.core.ralph_loop import ( - BudgetTracker, - EvaluationResult, - FactorGenerator, - RalphLoop, - ValidationPipeline, -) -from src.factorminer.factorminer.core.factor_library import Factor, FactorLibrary -from src.factorminer.factorminer.core.parser import try_parse -from src.factorminer.factorminer.evaluation.metrics import compute_ic -from src.factorminer.factorminer.memory.memory_store import ExperienceMemory -from src.factorminer.factorminer.memory.retrieval import retrieve_memory -from src.factorminer.factorminer.memory.formation import form_memory -from src.factorminer.factorminer.memory.evolution import evolve_memory -from src.factorminer.factorminer.agent.llm_interface import LLMProvider -from src.factorminer.factorminer.utils.logging import IterationRecord, FactorRecord - -logger = logging.getLogger(__name__) - - -# --------------------------------------------------------------------------- -# Optional imports -- resolved at call time with graceful fallback -# --------------------------------------------------------------------------- - -def _try_import_debate(): - try: - from factorminer.agent.debate import DebateGenerator, DebateConfig - return DebateGenerator, DebateConfig - except ImportError: - return None, None - - -def _try_import_canonicalizer(): - try: - from factorminer.core.canonicalizer import FormulaCanonicalizer - return FormulaCanonicalizer - except ImportError: - return None - - -def _try_import_causal(): - try: - from factorminer.evaluation.causal import CausalValidator, CausalConfig - return CausalValidator, CausalConfig - except ImportError: - return None, None - - -def _try_import_regime(): - try: - from factorminer.evaluation.regime import ( - RegimeDetector, - RegimeAwareEvaluator, - RegimeConfig, - ) - return RegimeDetector, RegimeAwareEvaluator, RegimeConfig - except ImportError: - return None, None, None - - -def _try_import_capacity(): - try: - from factorminer.evaluation.capacity import CapacityEstimator, CapacityConfig - return CapacityEstimator, CapacityConfig - except ImportError: - return None, None - - -def _try_import_significance(): - try: - from factorminer.evaluation.significance import ( - BootstrapICTester, - FDRController, - DeflatedSharpeCalculator, - SignificanceConfig, - ) - return BootstrapICTester, FDRController, DeflatedSharpeCalculator, SignificanceConfig - except ImportError: - return None, None, None, None - - -def _try_import_kg(): - try: - from factorminer.memory.knowledge_graph import FactorKnowledgeGraph, FactorNode - return FactorKnowledgeGraph, FactorNode - except ImportError: - return None, None - - -def _try_import_kg_retrieval(): - try: - from factorminer.memory.kg_retrieval import retrieve_memory_enhanced - return retrieve_memory_enhanced - except ImportError: - return None - - -def _try_import_embedder(): - try: - from factorminer.memory.embeddings import FormulaEmbedder - return FormulaEmbedder - except ImportError: - return None - - -def _try_import_auto_inventor(): - try: - from factorminer.operators.auto_inventor import OperatorInventor - return OperatorInventor - except ImportError: - return None - - -def _try_import_custom_store(): - try: - from factorminer.operators.custom import CustomOperatorStore - return CustomOperatorStore - except ImportError: - return None - - -# --------------------------------------------------------------------------- -# HelixLoop -# --------------------------------------------------------------------------- - -class HelixLoop(RalphLoop): - """Enhanced 5-stage Helix Loop for self-evolving factor discovery. - - Extends the Ralph Loop with: - 1. RETRIEVE: KG + embeddings + flat memory hybrid retrieval - 2. PROPOSE: Multi-agent debate (specialists + critic) or standard generation - 3. SYNTHESIZE: SymPy canonicalization to eliminate mathematical duplicates - 4. VALIDATE: Standard pipeline + causal + regime + capacity + significance - 5. DISTILL: Standard memory evolution + KG update + online forgetting - - All Phase 2 components are optional and default to off. When none are - enabled, the Helix Loop behaves identically to the Ralph Loop. - - Parameters - ---------- - config : Any - Mining configuration object. - data_tensor : np.ndarray - Market data tensor D in R^(M x T x F). - returns : np.ndarray - Forward returns array R in R^(M x T). - llm_provider : LLMProvider, optional - LLM provider for factor generation. - memory : ExperienceMemory, optional - Pre-populated experience memory. - library : FactorLibrary, optional - Pre-populated factor library. - debate_config : DebateConfig, optional - Configuration for multi-agent debate generation. - When provided, replaces standard FactorGenerator. - enable_knowledge_graph : bool - Enable factor knowledge graph for lineage and structural analysis. - enable_embeddings : bool - Enable semantic formula embeddings for similarity search. - enable_auto_inventor : bool - Enable periodic auto-invention of new operators. - auto_invention_interval : int - Run auto-invention every N iterations. - canonicalize : bool - Enable SymPy-based formula canonicalization for deduplication. - forgetting_lambda : float - Exponential decay factor for online forgetting (0-1). - causal_config : CausalConfig, optional - Configuration for causal validation (Granger + intervention). - regime_config : RegimeConfig, optional - Configuration for regime-aware IC evaluation. - capacity_config : CapacityConfig, optional - Configuration for capacity-aware cost evaluation. - significance_config : SignificanceConfig, optional - Configuration for bootstrap CI + FDR + deflated Sharpe. - volume : np.ndarray, optional - Dollar volume array (M, T) required for capacity estimation. - """ - - def __init__( - self, - config: Any, - data_tensor: np.ndarray, - returns: np.ndarray, - llm_provider: Optional[LLMProvider] = None, - memory: Optional[ExperienceMemory] = None, - library: Optional[FactorLibrary] = None, - # Phase 2 extensions - debate_config: Optional[Any] = None, - enable_knowledge_graph: bool = False, - enable_embeddings: bool = False, - enable_auto_inventor: bool = False, - auto_invention_interval: int = 10, - canonicalize: bool = True, - forgetting_lambda: float = 0.95, - causal_config: Optional[Any] = None, - regime_config: Optional[Any] = None, - capacity_config: Optional[Any] = None, - significance_config: Optional[Any] = None, - volume: Optional[np.ndarray] = None, - ) -> None: - # Initialize base RalphLoop - super().__init__( - config=config, - data_tensor=data_tensor, - returns=returns, - llm_provider=llm_provider, - memory=memory, - library=library, - ) - - # Store Phase 2 configuration - self._debate_config = debate_config - self._enable_kg = enable_knowledge_graph - self._enable_embeddings = enable_embeddings - self._enable_auto_inventor = enable_auto_inventor - self._auto_invention_interval = auto_invention_interval - self._canonicalize = canonicalize - self._forgetting_lambda = forgetting_lambda - self._causal_config = causal_config - self._regime_config = regime_config - self._capacity_config = capacity_config - self._significance_config = significance_config - self._volume = volume - - # Track iterations without admissions for forgetting - self._no_admission_streak: int = 0 - - # Initialize Phase 2 components - self._debate_generator: Optional[Any] = None - self._canonicalizer: Optional[Any] = None - self._causal_validator: Optional[Any] = None - self._regime_detector: Optional[Any] = None - self._regime_evaluator: Optional[Any] = None - self._regime_classification: Optional[Any] = None - self._capacity_estimator: Optional[Any] = None - self._bootstrap_tester: Optional[Any] = None - self._fdr_controller: Optional[Any] = None - self._kg: Optional[Any] = None - self._embedder: Optional[Any] = None - self._auto_inventor: Optional[Any] = None - self._custom_op_store: Optional[Any] = None - - self._init_phase2_components(llm_provider) - - # ------------------------------------------------------------------ - # Phase 2 component initialization - # ------------------------------------------------------------------ - - def _init_phase2_components(self, llm_provider: Optional[LLMProvider]) -> None: - """Initialize all Phase 2 components based on configuration.""" - - # -- Debate generator -- - if self._debate_config is not None: - DebateGeneratorCls, _ = _try_import_debate() - if DebateGeneratorCls is not None: - try: - self._debate_generator = DebateGeneratorCls( - llm_provider=llm_provider or self.generator.llm, - debate_config=self._debate_config, - ) - logger.info("Helix: multi-agent debate generator enabled") - except Exception as exc: - logger.warning("Helix: failed to init debate generator: %s", exc) - else: - logger.warning( - "Helix: debate_config provided but debate module unavailable" - ) - - # -- Canonicalizer -- - if self._canonicalize: - FormulaCanonCls = _try_import_canonicalizer() - if FormulaCanonCls is not None: - try: - self._canonicalizer = FormulaCanonCls() - logger.info("Helix: SymPy canonicalization enabled") - except Exception as exc: - logger.warning("Helix: failed to init canonicalizer: %s", exc) - else: - logger.warning( - "Helix: canonicalize=True but sympy/canonicalizer unavailable" - ) - - # -- Causal validator -- - if self._causal_config is not None: - CausalValidatorCls, _ = _try_import_causal() - if CausalValidatorCls is not None: - logger.info("Helix: causal validation enabled") - else: - logger.warning( - "Helix: causal_config provided but causal module unavailable" - ) - - # -- Regime evaluator -- - if self._regime_config is not None: - RegimeDetectorCls, RegimeEvalCls, _ = _try_import_regime() - if RegimeDetectorCls is not None and RegimeEvalCls is not None: - try: - self._regime_detector = RegimeDetectorCls(self._regime_config) - self._regime_classification = self._regime_detector.classify( - self.returns - ) - self._regime_evaluator = RegimeEvalCls( - returns=self.returns, - regime=self._regime_classification, - config=self._regime_config, - ) - logger.info("Helix: regime-aware evaluation enabled") - except Exception as exc: - logger.warning("Helix: failed to init regime evaluator: %s", exc) - else: - logger.warning( - "Helix: regime_config provided but regime module unavailable" - ) - - # -- Capacity estimator -- - if self._capacity_config is not None: - CapacityEstCls, _ = _try_import_capacity() - if CapacityEstCls is not None: - if self._volume is not None: - try: - self._capacity_estimator = CapacityEstCls( - returns=self.returns, - volume=self._volume, - config=self._capacity_config, - ) - logger.info("Helix: capacity-aware evaluation enabled") - except Exception as exc: - logger.warning( - "Helix: failed to init capacity estimator: %s", exc - ) - else: - logger.warning( - "Helix: capacity_config provided but no volume data supplied" - ) - else: - logger.warning( - "Helix: capacity_config provided but capacity module unavailable" - ) - - # -- Significance testing -- - if self._significance_config is not None: - BootstrapCls, FDRCls, _, _ = _try_import_significance() - if BootstrapCls is not None and FDRCls is not None: - try: - self._bootstrap_tester = BootstrapCls(self._significance_config) - self._fdr_controller = FDRCls(self._significance_config) - logger.info("Helix: significance testing enabled") - except Exception as exc: - logger.warning( - "Helix: failed to init significance testing: %s", exc - ) - else: - logger.warning( - "Helix: significance_config provided but significance module unavailable" - ) - - # -- Knowledge graph -- - if self._enable_kg: - KGCls, _ = _try_import_kg() - if KGCls is not None: - try: - self._kg = KGCls() - logger.info("Helix: knowledge graph enabled") - except Exception as exc: - logger.warning("Helix: failed to init knowledge graph: %s", exc) - else: - logger.warning( - "Helix: enable_knowledge_graph=True but knowledge_graph module unavailable" - ) - - # -- Embedder -- - if self._enable_embeddings: - EmbedderCls = _try_import_embedder() - if EmbedderCls is not None: - try: - self._embedder = EmbedderCls() - self._prime_embedder_from_library() - logger.info("Helix: formula embeddings enabled") - except Exception as exc: - logger.warning("Helix: failed to init embedder: %s", exc) - else: - logger.warning( - "Helix: enable_embeddings=True but embeddings module unavailable" - ) - - # -- Auto inventor -- - if self._enable_auto_inventor: - InventorCls = _try_import_auto_inventor() - CustomStoreCls = _try_import_custom_store() - if InventorCls is not None: - try: - self._auto_inventor = InventorCls( - llm_provider=llm_provider or self.generator.llm, - data_tensor=self.data_tensor, - returns=self.returns, - ) - logger.info("Helix: auto operator invention enabled") - except Exception as exc: - logger.warning("Helix: failed to init auto inventor: %s", exc) - - if CustomStoreCls is not None: - output_dir = getattr(self.config, "output_dir", "./output") - try: - self._custom_op_store = CustomStoreCls( - store_dir=str(Path(output_dir) / "custom_operators") - ) - logger.info("Helix: custom operator store enabled") - except Exception as exc: - logger.warning( - "Helix: failed to init custom operator store: %s", exc - ) - else: - logger.warning( - "Helix: enable_auto_inventor=True but custom operator store unavailable" - ) - - # ------------------------------------------------------------------ - # Override: _run_iteration with 5-stage Helix flow - # ------------------------------------------------------------------ - - def _run_iteration(self, batch_size: int) -> Dict[str, Any]: - """Execute one iteration of the 5-stage Helix Loop. - - Stages: - 1. RETRIEVE -- enhanced memory retrieval (KG + embeddings + flat) - 2. PROPOSE -- debate or standard factor generation - 3. SYNTHESIZE -- canonicalize and deduplicate candidates - 4. VALIDATE -- standard pipeline + causal + regime + capacity + significance - 5. DISTILL -- memory evolution + KG update + forgetting - - Returns - ------- - dict - Iteration statistics. - """ - t0 = time.time() - helix_stats: Dict[str, Any] = {} - - # ================================================================== - # Stage 1: RETRIEVE - # ================================================================== - library_state = self.library.get_state_summary() - memory_signal = self._helix_retrieve(library_state) - - # ================================================================== - # Stage 2: PROPOSE - # ================================================================== - t_gen = time.time() - candidates = self._helix_propose(memory_signal, library_state, batch_size) - self.budget.record_llm_call() - - if not candidates: - logger.warning( - "Helix iteration %d: generator produced 0 candidates", - self.iteration, - ) - return self._empty_stats() - - helix_stats["candidates_before_canon"] = len(candidates) - - # ================================================================== - # Stage 3: SYNTHESIZE (canonicalize + dedup) - # ================================================================== - candidates, n_canon_dupes, n_semantic_dupes = self._canonicalize_and_dedup(candidates) - helix_stats["canonical_duplicates_removed"] = n_canon_dupes - helix_stats["semantic_duplicates_removed"] = n_semantic_dupes - - if not candidates: - logger.warning( - "Helix iteration %d: all candidates removed by canonicalization", - self.iteration, - ) - return self._empty_stats() - - # ================================================================== - # Stage 4: VALIDATE - # ================================================================== - results = self.pipeline.evaluate_batch(candidates) - admitted_results = self._update_library(results) - - # Phase 2 extended validation on admitted candidates - rejected_by_phase2 = self._helix_validate(results, admitted_results) - helix_stats["phase2_rejections"] = rejected_by_phase2 - surviving_admissions = [r for r in admitted_results if r.admitted] - - provenance_library_state = { - **library_state, - "diagnostics": self.library.get_diagnostics(), - } - - self._attach_factor_provenance( - surviving_admissions, - library_state=provenance_library_state, - memory_signal=memory_signal, - phase2_summary={ - "enabled_features": self._phase2_features(), - "phase2_rejections": rejected_by_phase2, - }, - generator_family=self._generator_family(), - ) - - # ================================================================== - # Stage 5: DISTILL - # ================================================================== - trajectory = self._build_trajectory(results) - formed = form_memory(self.memory, trajectory, self.iteration) - self.memory = evolve_memory(self.memory, formed) - - # KG + embeddings + forgetting - self._helix_distill(results, admitted_results) - - # Auto-invention check - if ( - self._auto_inventor is not None - and self.iteration % self._auto_invention_interval == 0 - ): - self._run_auto_invention() - - # Build stats - elapsed = time.time() - t0 - self.budget.record_compute(elapsed) - stats = self._compute_stats(results, surviving_admissions, elapsed) - stats.update(helix_stats) - stats["iteration"] = self.iteration - - # Log to reporter and session logger - self.reporter.log_batch(**stats) - if self._session_logger: - ic_values = [r.ic_mean for r in results if r.parse_ok] - record = IterationRecord( - iteration=self.iteration, - candidates_generated=len(candidates) + n_canon_dupes + n_semantic_dupes, - ic_passed=stats["ic_passed"], - correlation_passed=stats["corr_passed"], - admitted=stats["admitted"], - rejected=len(candidates) + n_canon_dupes + n_semantic_dupes - stats["admitted"], - replaced=stats["replaced"], - library_size=self.library.size, - best_ic=max(ic_values) if ic_values else 0.0, - mean_ic=float(np.mean(ic_values)) if ic_values else 0.0, - elapsed_seconds=elapsed, - ) - self._session_logger.log_iteration(record) - - for r in results: - factor_rec = FactorRecord( - expression=r.formula, - ic=r.ic_mean if r.parse_ok else None, - icir=r.icir if r.parse_ok else None, - max_correlation=r.max_correlation if r.parse_ok else None, - admitted=r.admitted, - rejection_reason=r.rejection_reason or None, - replaced_factor=str(r.replaced) if r.replaced else None, - ) - self._session_logger.log_factor(factor_rec) - - return stats - - # ------------------------------------------------------------------ - # Stage 1: Enhanced retrieval - # ------------------------------------------------------------------ - - def _helix_retrieve( - self, library_state: Dict[str, Any] - ) -> Dict[str, Any]: - """Stage 1 RETRIEVE: KG + embeddings + flat memory hybrid retrieval. - - Falls back to standard retrieve_memory if no KG/embedder is available. - """ - retrieve_enhanced_fn = _try_import_kg_retrieval() - - if retrieve_enhanced_fn is not None and ( - self._kg is not None or self._embedder is not None - ): - try: - return retrieve_enhanced_fn( - memory=self.memory, - library_state=library_state, - kg=self._kg, - embedder=self._embedder, - ) - except Exception as exc: - logger.warning( - "Helix: enhanced retrieval failed, falling back: %s", exc - ) - - return retrieve_memory(self.memory, library_state=library_state) - - # ------------------------------------------------------------------ - # Stage 2: Debate or standard proposal - # ------------------------------------------------------------------ - - def _helix_propose( - self, - memory_signal: Dict[str, Any], - library_state: Dict[str, Any], - batch_size: int, - ) -> List[Tuple[str, str]]: - """Stage 2 PROPOSE: Use debate generator or standard generator. - - Returns list of (name, formula) tuples compatible with the - validation pipeline. - """ - if self._debate_generator is not None: - try: - debate_candidates = self._debate_generator.generate_batch( - memory_signal=memory_signal, - library_state=library_state, - batch_size=batch_size, - ) - # Convert CandidateFactor objects to (name, formula) tuples - tuples: List[Tuple[str, str]] = [] - for c in debate_candidates: - tuples.append((c.name, c.formula)) - if tuples: - logger.info( - "Helix: debate generator produced %d candidates", - len(tuples), - ) - return tuples - logger.warning( - "Helix: debate generator returned 0 candidates, " - "falling back to standard generator" - ) - except Exception as exc: - logger.warning( - "Helix: debate generation failed, falling back: %s", exc - ) - - # Standard generation - return self.generator.generate_batch( - memory_signal=memory_signal, - library_state=library_state, - batch_size=batch_size, - ) - - # ------------------------------------------------------------------ - # Stage 3: Canonicalization + deduplication - # ------------------------------------------------------------------ - - def _canonicalize_and_dedup( - self, candidates: List[Tuple[str, str]] - ) -> Tuple[List[Tuple[str, str]], int, int]: - """Stage 3 SYNTHESIZE: Remove mathematically equivalent candidates. - - Uses SymPy-based canonicalization to detect algebraic duplicates - before evaluation, saving compute. - - Returns - ------- - tuple of (deduplicated_candidates, n_canonical_duplicates_removed, - n_semantic_duplicates_removed) - """ - if self._canonicalizer is None and self._embedder is None: - return candidates, 0, 0 - - seen_hashes: Dict[str, str] = {} # hash -> first factor name - unique: List[Tuple[str, str]] = [] - n_canon_dupes = 0 - n_semantic_dupes = 0 - - for name, formula in candidates: - tree = try_parse(formula) - if tree is not None and self._canonicalizer is not None: - try: - canon_hash = self._canonicalizer.canonicalize(tree) - except Exception as exc: - logger.debug( - "Helix: canonicalization failed for '%s': %s", name, exc - ) - else: - if canon_hash in seen_hashes: - n_canon_dupes += 1 - logger.debug( - "Helix: canonical duplicate '%s' matches '%s'", - name, - seen_hashes[canon_hash], - ) - continue - seen_hashes[canon_hash] = name - - semantic_match = self._semantic_duplicate_target(formula) - if semantic_match is not None: - n_semantic_dupes += 1 - logger.debug( - "Helix: semantic duplicate '%s' matches library factor '%s'", - name, - semantic_match, - ) - continue - - unique.append((name, formula)) - - if n_canon_dupes > 0: - logger.info( - "Helix: canonicalization removed %d/%d duplicate candidates", - n_canon_dupes, - len(candidates), - ) - - if n_semantic_dupes > 0: - logger.info( - "Helix: embedding screen removed %d/%d library-adjacent candidates", - n_semantic_dupes, - len(candidates), - ) - - return unique, n_canon_dupes, n_semantic_dupes - - # ------------------------------------------------------------------ - # Stage 4: Extended validation - # ------------------------------------------------------------------ - - def _helix_validate( - self, - results: List[EvaluationResult], - admitted_results: List[EvaluationResult], - ) -> int: - """Stage 4 extended VALIDATE: causal + regime + capacity + significance. - - Runs Phase 2 validation on admitted candidates and revokes admission - for those that fail. Returns the number of Phase 2 rejections. - """ - if not admitted_results: - self._no_admission_streak += 1 - return 0 - - rejected = 0 - - # Collect admitted results that still have signals for extended checks - to_check = [r for r in admitted_results if r.signals is not None] - if not to_check: - self._no_admission_streak = 0 if any(r.admitted for r in admitted_results) else self._no_admission_streak + 1 - return 0 - - # -- Causal validation -- - if self._causal_config is not None: - rejected += self._validate_causal(to_check, results) - - # -- Regime validation -- - if self._regime_evaluator is not None: - rejected += self._validate_regime(to_check, results) - - # -- Capacity validation -- - if self._capacity_estimator is not None: - rejected += self._validate_capacity(to_check, results) - - # -- Significance testing (batch-level FDR) -- - if self._bootstrap_tester is not None and self._fdr_controller is not None: - rejected += self._validate_significance(to_check, results) - - if rejected > 0: - logger.info( - "Helix: Phase 2 validation rejected %d/%d admitted candidates", - rejected, - len(admitted_results), - ) - - if any(r.admitted for r in admitted_results): - self._no_admission_streak = 0 - else: - self._no_admission_streak += 1 - - return rejected - - def _validate_causal( - self, - to_check: List[EvaluationResult], - all_results: List[EvaluationResult], - ) -> int: - """Run causal validation (Granger + intervention) on admitted candidates.""" - CausalValidatorCls, _ = _try_import_causal() - if CausalValidatorCls is None: - return 0 - - # Collect library signals for controls - library_signals: Dict[str, np.ndarray] = {} - for f in self.library.list_factors(): - if f.signals is not None: - library_signals[f.name] = f.signals - - try: - validator = CausalValidatorCls( - returns=self.returns, - data_tensor=self.data_tensor, - library_signals=library_signals, - config=self._causal_config, - ) - except Exception as exc: - logger.warning("Helix: causal validator creation failed: %s", exc) - return 0 - - rejected = 0 - threshold = getattr( - self._causal_config, "robustness_threshold", 0.4 - ) - - for r in to_check: - if not r.admitted or r.signals is None: - continue - try: - result = validator.validate(r.factor_name, r.signals) - if not result.passes: - self._revoke_admission(r, all_results, - f"Causal: robustness_score={result.robustness_score:.3f} < {threshold}" - ) - rejected += 1 - logger.debug( - "Helix: causal rejection for '%s' (score=%.3f)", - r.factor_name, - result.robustness_score, - ) - except Exception as exc: - logger.warning( - "Helix: causal validation error for '%s': %s", - r.factor_name, - exc, - ) - - return rejected - - def _validate_regime( - self, - to_check: List[EvaluationResult], - all_results: List[EvaluationResult], - ) -> int: - """Run regime-aware IC evaluation on admitted candidates.""" - if self._regime_evaluator is None: - return 0 - - rejected = 0 - for r in to_check: - if not r.admitted or r.signals is None: - continue - try: - result = self._regime_evaluator.evaluate(r.factor_name, r.signals) - if not result.passes: - self._revoke_admission(r, all_results, - f"Regime: only {result.n_regimes_passing} regimes passing " - f"(need {getattr(self._regime_config, 'min_regimes_passing', 2)})" - ) - rejected += 1 - logger.debug( - "Helix: regime rejection for '%s' (%d regimes passing)", - r.factor_name, - result.n_regimes_passing, - ) - except Exception as exc: - logger.warning( - "Helix: regime validation error for '%s': %s", - r.factor_name, - exc, - ) - - return rejected - - def _validate_capacity( - self, - to_check: List[EvaluationResult], - all_results: List[EvaluationResult], - ) -> int: - """Run capacity-aware cost evaluation on admitted candidates.""" - if self._capacity_estimator is None: - return 0 - - rejected = 0 - net_icir_threshold = getattr( - self._capacity_config, "net_icir_threshold", 0.3 - ) - - for r in to_check: - if not r.admitted or r.signals is None: - continue - try: - result = self._capacity_estimator.net_cost_evaluation( - factor_name=r.factor_name, - signals=r.signals, - ) - if not result.passes_net_threshold: - self._revoke_admission(r, all_results, - f"Capacity: net_icir={result.net_icir:.3f} < {net_icir_threshold}" - ) - rejected += 1 - logger.debug( - "Helix: capacity rejection for '%s' (net_icir=%.3f)", - r.factor_name, - result.net_icir, - ) - except Exception as exc: - logger.warning( - "Helix: capacity validation error for '%s': %s", - r.factor_name, - exc, - ) - - return rejected - - def _validate_significance( - self, - to_check: List[EvaluationResult], - all_results: List[EvaluationResult], - ) -> int: - """Run bootstrap CI + batch-level FDR correction on admitted candidates.""" - if self._bootstrap_tester is None or self._fdr_controller is None: - return 0 - - # Compute IC series for each admitted candidate and gather p-values - ic_series_map: Dict[str, np.ndarray] = {} - result_map: Dict[str, EvaluationResult] = {} - - for r in to_check: - if not r.admitted or r.signals is None: - continue - try: - ic_series = compute_ic(r.signals, self.returns) - ic_series_map[r.factor_name] = ic_series - result_map[r.factor_name] = r - except Exception as exc: - logger.warning( - "Helix: IC computation error for '%s': %s", - r.factor_name, - exc, - ) - - if not ic_series_map: - return 0 - - try: - fdr_result = self._fdr_controller.batch_evaluate( - ic_series_map, self._bootstrap_tester - ) - except Exception as exc: - logger.warning("Helix: FDR batch evaluation failed: %s", exc) - return 0 - - rejected = 0 - for name, is_sig in fdr_result.significant.items(): - if not is_sig: - r = result_map.get(name) - if r is not None and r.admitted: - adj_p = fdr_result.adjusted_p_values.get(name, 1.0) - self._revoke_admission(r, all_results, - f"Significance: FDR-adjusted p={adj_p:.4f} > " - f"{getattr(self._significance_config, 'fdr_level', 0.05)}" - ) - rejected += 1 - logger.debug( - "Helix: significance rejection for '%s' (adj_p=%.4f)", - name, - adj_p, - ) - - return rejected - - def _revoke_admission( - self, - result: EvaluationResult, - all_results: List[EvaluationResult], - reason: str, - ) -> None: - """Revoke a previously admitted candidate from the library. - - Updates the EvaluationResult and removes the factor from the library. - """ - result.admitted = False - result.rejection_reason = reason - - # Find and remove from library by name+formula match - try: - for factor in self.library.list_factors(): - if ( - factor.name == result.factor_name - and factor.formula == result.formula - ): - self.library.remove_factor(factor.id) - self._remove_semantic_artifacts(result.factor_name) - logger.debug( - "Helix: revoked factor '%s' (id=%d): %s", - result.factor_name, - factor.id, - reason, - ) - return - except Exception as exc: - logger.warning( - "Helix: failed to revoke factor '%s': %s", - result.factor_name, - exc, - ) - - self._remove_semantic_artifacts(result.factor_name) - - # ------------------------------------------------------------------ - # Stage 5: Enhanced distillation - # ------------------------------------------------------------------ - - def _helix_distill( - self, - results: List[EvaluationResult], - admitted_results: List[EvaluationResult], - ) -> None: - """Stage 5 DISTILL: KG update + embeddings + online forgetting.""" - - # -- Knowledge graph updates -- - if self._kg is not None: - self._update_knowledge_graph(results, admitted_results) - - # -- Embed newly admitted factors -- - if self._embedder is not None: - for r in admitted_results: - if r.admitted: - try: - self._embedder.embed(r.factor_name, r.formula) - except Exception as exc: - logger.debug( - "Helix: embedding failed for '%s': %s", - r.factor_name, - exc, - ) - - # -- Online forgetting -- - self._apply_forgetting() - - def _update_knowledge_graph( - self, - results: List[EvaluationResult], - admitted_results: List[EvaluationResult], - ) -> None: - """Update the knowledge graph with new factor nodes and edges.""" - _, FactorNodeCls = _try_import_kg() - if FactorNodeCls is None or self._kg is None: - return - - for r in admitted_results: - if not r.admitted: - continue - - # Extract operators and features from formula - operators = self._extract_operators(r.formula) - features = self._extract_features(r.formula) - - node = FactorNodeCls( - factor_id=r.factor_name, - formula=r.formula, - ic_mean=r.ic_mean, - category=self._infer_category(r.formula), - operators=operators, - features=features, - batch_number=self.iteration, - admitted=True, - ) - if self._embedder is not None: - try: - node.embedding = self._embedder.embed(r.factor_name, r.formula) - except Exception as exc: - logger.debug( - "Helix: failed to attach embedding for '%s': %s", - r.factor_name, - exc, - ) - - try: - self._kg.add_factor(node) - except Exception as exc: - logger.debug( - "Helix: failed to add factor to KG: %s", exc - ) - continue - - # Add correlation edges with existing library factors - if r.signals is not None: - for factor in self.library.list_factors(): - if factor.name == r.factor_name: - continue - if factor.signals is not None: - try: - corr = self.library._compute_correlation_vectorized( - r.signals, factor.signals - ) - self._kg.add_correlation_edge( - r.factor_name, - factor.name, - rho=corr, - threshold=0.4, - ) - except Exception: - pass - - # Detect derivation (mutation) relationships - self._detect_derivation(r, operators) - - def _remove_semantic_artifacts(self, factor_id: str) -> None: - """Remove a factor from derived semantic stores if present.""" - if self._kg is not None: - try: - self._kg.remove_factor(factor_id) - except Exception as exc: - logger.debug( - "Helix: failed to remove factor '%s' from KG: %s", - factor_id, - exc, - ) - - if self._embedder is not None: - try: - self._embedder.remove(factor_id) - except Exception as exc: - logger.debug( - "Helix: failed to remove factor '%s' from embedder: %s", - factor_id, - exc, - ) - - def _detect_derivation( - self, - result: EvaluationResult, - new_operators: List[str], - ) -> None: - """Detect if a new factor is a mutation of an existing one. - - Compares operator sets: if the new factor shares >50% of operators - with an existing factor but has at least one different operator, - it is considered a derivation (mutation). - """ - if self._kg is None: - return - - new_ops = set(new_operators) - if not new_ops: - return - - for factor in self.library.list_factors(): - if factor.name == result.factor_name: - continue - - existing_ops = set(self._extract_operators(factor.formula)) - if not existing_ops: - continue - - shared = new_ops & existing_ops - if not shared: - continue - - # More than 50% shared but not identical - overlap = len(shared) / max(len(new_ops), len(existing_ops)) - if 0.5 <= overlap < 1.0: - diff_ops = (new_ops - existing_ops) | (existing_ops - new_ops) - mutation_type = f"operator_change:{','.join(sorted(diff_ops))}" - try: - self._kg.add_derivation_edge( - child=result.factor_name, - parent=factor.name, - mutation_type=mutation_type, - ) - except Exception: - pass - - def _apply_forgetting(self) -> None: - """Apply online forgetting: exponential decay on memory patterns. - - - Decay occurrence_count of all success patterns by forgetting_lambda. - - If no admissions for 20+ consecutive iterations, demote success_rate. - """ - lam = self._forgetting_lambda - - for pattern in self.memory.success_patterns: - # Decay occurrence count - if hasattr(pattern, "occurrence_count"): - pattern.occurrence_count = int( - pattern.occurrence_count * lam - ) - - # Demote success_rate after prolonged drought - if self._no_admission_streak >= 20: - for pattern in self.memory.success_patterns: - if hasattr(pattern, "success_rate"): - current = pattern.success_rate - if current == "High": - pattern.success_rate = "Medium" - elif current == "Medium": - pattern.success_rate = "Low" - logger.info( - "Helix: demoted success rates after %d iterations without admissions", - self._no_admission_streak, - ) - - # ------------------------------------------------------------------ - # Auto-invention - # ------------------------------------------------------------------ - - def _run_auto_invention(self) -> None: - """Periodically propose, validate, and register new operators. - - Uses the OperatorInventor to generate novel operators from - successful pattern context, then validates and registers them - via CustomOperatorStore. - """ - if self._auto_inventor is None: - return - - logger.info("Helix: running auto-invention at iteration %d", self.iteration) - - # Gather existing operators - try: - from factorminer.core.types import OPERATOR_REGISTRY as SPEC_REG - existing_ops = dict(SPEC_REG) - except ImportError: - existing_ops = {} - - # Gather successful pattern descriptions - patterns = [] - for pat in self.memory.success_patterns[:10]: - patterns.append(f"{pat.name}: {pat.description}") - - try: - proposals = self._auto_inventor.propose_operators( - existing_operators=existing_ops, - successful_patterns=patterns, - ) - except Exception as exc: - logger.warning("Helix: auto-invention proposal failed: %s", exc) - return - - self.budget.record_llm_call() - - validated = 0 - for proposal in proposals: - try: - val_result = self._auto_inventor.validate_operator(proposal) - if val_result.valid: - self._register_invented_operator(proposal, val_result) - validated += 1 - else: - logger.debug( - "Helix: operator '%s' failed validation: %s", - proposal.name, - val_result.error, - ) - except Exception as exc: - logger.warning( - "Helix: operator validation error for '%s': %s", - proposal.name, - exc, - ) - - logger.info( - "Helix: auto-invention: %d/%d proposals validated and registered", - validated, - len(proposals), - ) - - def _register_invented_operator( - self, - proposal: Any, - val_result: Any, - ) -> None: - """Register a validated auto-invented operator.""" - if self._custom_op_store is None: - logger.warning( - "Helix: no custom operator store; cannot register '%s'", - proposal.name, - ) - return - - try: - from factorminer.operators.custom import CustomOperator - from factorminer.core.types import OperatorSpec, OperatorType, SignatureType - - spec = OperatorSpec( - name=proposal.name, - arity=proposal.arity, - category=OperatorType.AUTO_INVENTED, - signature=SignatureType.TIME_SERIES_TO_TIME_SERIES, - param_names=proposal.param_names, - param_defaults=proposal.param_defaults, - param_ranges={ - k: tuple(v) for k, v in proposal.param_ranges.items() - }, - description=proposal.description, - ) - - # Compile the function - from factorminer.operators.custom import _compile_operator_code - fn = _compile_operator_code(proposal.numpy_code) - if fn is None: - logger.warning( - "Helix: failed to compile invented operator '%s'", - proposal.name, - ) - return - - custom_op = CustomOperator( - name=proposal.name, - spec=spec, - numpy_code=proposal.numpy_code, - numpy_fn=fn, - validation_ic=val_result.ic_contribution, - invention_iteration=self.iteration, - rationale=proposal.rationale, - ) - - self._custom_op_store.register(custom_op) - logger.info( - "Helix: registered auto-invented operator '%s' (IC=%.4f)", - proposal.name, - val_result.ic_contribution, - ) - except Exception as exc: - logger.warning( - "Helix: failed to register operator '%s': %s", - proposal.name, - exc, - ) - - # ------------------------------------------------------------------ - # Enhanced checkpointing - # ------------------------------------------------------------------ - - def _checkpoint(self) -> None: - """Save a periodic checkpoint including Phase 2 state.""" - try: - self.save_session() - except Exception as exc: - logger.warning("Helix: checkpoint failed: %s", exc) - - def save_session(self, path: Optional[str] = None) -> str: - """Save the full mining session state including Phase 2 components. - - Extends the base RalphLoop save with: - - Knowledge graph serialization - - Custom operator store persistence - - Parameters - ---------- - path : str, optional - Directory for the checkpoint. - - Returns - ------- - str - Path to the saved session directory. - """ - # Base save - checkpoint_path = super().save_session(path) - checkpoint_dir = Path(checkpoint_path) - - # Save knowledge graph - if self._kg is not None: - try: - kg_path = checkpoint_dir / "knowledge_graph.json" - self._kg.save(kg_path) - logger.debug("Helix: saved knowledge graph to %s", kg_path) - except Exception as exc: - logger.warning("Helix: failed to save knowledge graph: %s", exc) - - # Save custom operators - if self._custom_op_store is not None: - try: - self._custom_op_store.save() - logger.debug("Helix: saved custom operators") - except Exception as exc: - logger.warning("Helix: failed to save custom operators: %s", exc) - - # Save helix-specific state - helix_state = { - "no_admission_streak": self._no_admission_streak, - "forgetting_lambda": self._forgetting_lambda, - "canonicalize": self._canonicalize, - "enable_knowledge_graph": self._enable_kg, - "enable_embeddings": self._enable_embeddings, - "enable_auto_inventor": self._enable_auto_inventor, - } - try: - with open(checkpoint_dir / "helix_state.json", "w") as f: - json.dump(helix_state, f, indent=2) - except Exception as exc: - logger.warning("Helix: failed to save helix state: %s", exc) - - if self._session is not None: - self._refresh_run_manifest( - output_dir=str(checkpoint_dir.parent), - artifact_paths={ - "library": str(checkpoint_dir / "library.json"), - "memory": str(checkpoint_dir / "memory.json"), - "session": str(checkpoint_dir / "session.json"), - "run_manifest": str(checkpoint_dir / "run_manifest.json"), - "loop_state": str(checkpoint_dir / "loop_state.json"), - "helix_state": str(checkpoint_dir / "helix_state.json"), - "knowledge_graph": str(checkpoint_dir / "knowledge_graph.json"), - }, - ) - self._persist_run_manifest(checkpoint_dir / "run_manifest.json") - try: - self._session.save(checkpoint_dir / "session.json") - except Exception as exc: - logger.warning("Helix: failed to save session metadata: %s", exc) - - return checkpoint_path - - def load_session(self, path: str) -> None: - """Resume a mining session from a saved checkpoint. - - Extends the base RalphLoop load with Phase 2 state restoration. - - Parameters - ---------- - path : str - Path to the checkpoint directory. - """ - super().load_session(path) - checkpoint_dir = Path(path) - - # Load knowledge graph - if self._kg is not None: - kg_path = checkpoint_dir / "knowledge_graph.json" - if kg_path.exists(): - KGCls, _ = _try_import_kg() - if KGCls is not None: - try: - self._kg = KGCls.load(kg_path) - logger.info( - "Helix: loaded knowledge graph (%d factors, %d edges)", - self._kg.get_factor_count(), - self._kg.get_edge_count(), - ) - except Exception as exc: - logger.warning( - "Helix: failed to load knowledge graph: %s", exc - ) - - # Load custom operators - if self._custom_op_store is not None: - try: - self._custom_op_store.load() - except Exception as exc: - logger.warning( - "Helix: failed to load custom operators: %s", exc - ) - - # Load helix-specific state - helix_state_path = checkpoint_dir / "helix_state.json" - if helix_state_path.exists(): - try: - with open(helix_state_path) as f: - helix_state = json.load(f) - self._no_admission_streak = helix_state.get( - "no_admission_streak", 0 - ) - logger.info( - "Helix: restored helix state (streak=%d)", - self._no_admission_streak, - ) - except Exception as exc: - logger.warning( - "Helix: failed to load helix state: %s", exc - ) - - self._prime_embedder_from_library() - if self._session is not None and self._session.run_manifest: - self._run_manifest = dict(self._session.run_manifest) - else: - run_manifest_path = checkpoint_dir / "run_manifest.json" - if run_manifest_path.exists(): - try: - with open(run_manifest_path) as f: - self._run_manifest = json.load(f) - except Exception as exc: - logger.warning( - "Helix: failed to load run manifest: %s", exc - ) - - def _loop_type(self) -> str: - """Label the loop for provenance and manifests.""" - return "helix" - - def _phase2_features(self) -> List[str]: - """List the enabled Helix Phase 2 features.""" - features: List[str] = [] - if self._debate_generator is not None: - features.append("debate") - if self._canonicalizer is not None: - features.append("canonicalization") - if self._kg is not None: - features.append("knowledge_graph") - if self._embedder is not None: - features.append("embeddings") - if self._causal_validator is not None: - features.append("causal") - if self._regime_evaluator is not None: - features.append("regime") - if self._capacity_estimator is not None: - features.append("capacity") - if self._bootstrap_tester is not None and self._fdr_controller is not None: - features.append("significance") - if self._auto_inventor is not None: - features.append("auto_inventor") - return features - - def _generator_family(self) -> str: - """Return the active Helix generator label for provenance.""" - if self._debate_generator is not None: - return self._debate_generator.__class__.__name__ - return super()._generator_family() - - # ------------------------------------------------------------------ - # Utility helpers - # ------------------------------------------------------------------ - - @staticmethod - def _extract_operators(formula: str) -> List[str]: - """Extract operator names from a DSL formula string.""" - return re.findall(r"([A-Z][a-zA-Z]+)\(", formula) - - @staticmethod - def _extract_features(formula: str) -> List[str]: - """Extract feature names (e.g. $close, $volume) from a formula.""" - return re.findall(r"\$[a-zA-Z_]+", formula) - - def _prime_embedder_from_library(self) -> None: - """Seed the embedder cache from the currently admitted library.""" - if self._embedder is None: - return - - try: - self._embedder.clear() - except Exception as exc: - logger.debug("Helix: failed to clear embedder before priming: %s", exc) - return - - for factor in self.library.list_factors(): - if not factor.formula: - continue - try: - self._embedder.embed(factor.name, factor.formula) - except Exception as exc: - logger.debug( - "Helix: failed to prime embedding for '%s': %s", - factor.name, - exc, - ) - - def _semantic_duplicate_target(self, formula: str) -> Optional[str]: - """Return the matched library factor if embeddings flag a near-duplicate.""" - if self._embedder is None or self.library.size == 0: - return None - - try: - return self._embedder.is_semantic_duplicate(formula) - except Exception as exc: - logger.debug("Helix: semantic duplicate check failed: %s", exc) - return None diff --git a/src/factorminer/factorminer/core/library_io.py b/src/factorminer/factorminer/core/library_io.py deleted file mode 100644 index 4353c90..0000000 --- a/src/factorminer/factorminer/core/library_io.py +++ /dev/null @@ -1,921 +0,0 @@ -"""Serialization and I/O for the FactorLibrary. - -Provides save/load to JSON + optional binary signal cache (.npz), -CSV export, formula export, and import of the 110 factors from the paper. -""" - -from __future__ import annotations - -import csv -import json -import logging -from pathlib import Path -from typing import Dict, List, Optional, Union - -import numpy as np - -from src.factorminer.factorminer.core.factor_library import Factor, FactorLibrary - -logger = logging.getLogger(__name__) - - -# ====================================================================== -# Save / Load -# ====================================================================== - -def save_library( - library: FactorLibrary, - path: Union[str, Path], - save_signals: bool = True, -) -> None: - """Save a FactorLibrary to disk. - - Creates two files: - - ``.json`` -- factor metadata and library configuration - - ``_signals.npz`` -- binary signal cache (if save_signals=True - and any factors have signals) - - Parameters - ---------- - library : FactorLibrary - path : str or Path - Base path (without extension). E.g. ``"output/my_library"`` produces - ``output/my_library.json`` and ``output/my_library_signals.npz``. - save_signals : bool - Whether to write the binary signal cache. - """ - path = Path(path) - path.parent.mkdir(parents=True, exist_ok=True) - - # -- Metadata JSON -- - meta = { - "correlation_threshold": library.correlation_threshold, - "ic_threshold": library.ic_threshold, - "next_id": library._next_id, - "factors": [f.to_dict() for f in library.list_factors()], - } - if library.correlation_matrix is not None: - meta["correlation_matrix"] = library.correlation_matrix.tolist() - meta["id_to_index"] = {str(k): v for k, v in library._id_to_index.items()} - - json_path = path.with_suffix(".json") - with open(json_path, "w") as fp: - json.dump(meta, fp, indent=2) - logger.info("Saved library metadata to %s (%d factors)", json_path, library.size) - - # -- Binary signal cache -- - if save_signals: - signal_arrays: Dict[str, np.ndarray] = {} - for f in library.list_factors(): - if f.signals is not None: - signal_arrays[f"factor_{f.id}"] = f.signals - - if signal_arrays: - npz_path = Path(str(path) + "_signals.npz") - np.savez_compressed(npz_path, **signal_arrays) - logger.info( - "Saved signal cache to %s (%d arrays)", - npz_path, len(signal_arrays), - ) - - -def load_library(path: Union[str, Path]) -> FactorLibrary: - """Load a FactorLibrary from disk. - - Parameters - ---------- - path : str or Path - Base path (without extension). Will look for ``.json`` and - optionally ``_signals.npz``. - - Returns - ------- - FactorLibrary - """ - path = Path(path) - json_path = path.with_suffix(".json") - - with open(json_path, "r") as fp: - meta = json.load(fp) - - library = FactorLibrary( - correlation_threshold=meta.get("correlation_threshold", 0.5), - ic_threshold=meta.get("ic_threshold", 0.04), - ) - library._next_id = meta.get("next_id", 1) - - # Restore factors - for fd in meta.get("factors", []): - factor = Factor.from_dict(fd) - library.factors[factor.id] = factor - - # Restore correlation matrix - if "correlation_matrix" in meta and meta["correlation_matrix"] is not None: - library.correlation_matrix = np.array( - meta["correlation_matrix"], dtype=np.float64 - ) - - # Restore id-to-index mapping - if "id_to_index" in meta: - library._id_to_index = { - int(k): v for k, v in meta["id_to_index"].items() - } - - # Load signal cache if present - npz_path = Path(str(path) + "_signals.npz") - if npz_path.exists(): - data = np.load(npz_path) - for f in library.factors.values(): - key = f"factor_{f.id}" - if key in data: - f.signals = data[key] - data.close() - logger.info("Loaded signal cache from %s", npz_path) - - logger.info( - "Loaded library from %s (%d factors)", json_path, library.size - ) - return library - - -# ====================================================================== -# Export utilities -# ====================================================================== - -def export_csv(library: FactorLibrary, path: Union[str, Path]) -> None: - """Export the factor table to CSV. - - Columns: ID, Name, Formula, Category, IC_Mean, ICIR, IC_Win_Rate, - Max_Correlation, Batch, Admission_Date - """ - path = Path(path) - path.parent.mkdir(parents=True, exist_ok=True) - - fieldnames = [ - "ID", "Name", "Formula", "Category", "IC_Mean", "ICIR", - "IC_Win_Rate", "Max_Correlation", "Batch", "Admission_Date", - ] - - with open(path, "w", newline="") as fp: - writer = csv.DictWriter(fp, fieldnames=fieldnames) - writer.writeheader() - for f in library.list_factors(): - writer.writerow({ - "ID": f.id, - "Name": f.name, - "Formula": f.formula, - "Category": f.category, - "IC_Mean": f"{f.ic_mean:.6f}", - "ICIR": f"{f.icir:.6f}", - "IC_Win_Rate": f"{f.ic_win_rate:.4f}", - "Max_Correlation": f"{f.max_correlation:.4f}", - "Batch": f.batch_number, - "Admission_Date": f.admission_date, - }) - - logger.info("Exported %d factors to %s", library.size, path) - - -def export_formulas(library: FactorLibrary, path: Union[str, Path]) -> None: - """Export just the formulas for reproduction. - - One formula per line, prefixed with the factor ID and name. - Format: ``ID | Name | Formula`` - """ - path = Path(path) - path.parent.mkdir(parents=True, exist_ok=True) - - with open(path, "w") as fp: - fp.write("# FactorMiner Library Formulas\n") - fp.write("# ID | Name | Formula\n") - fp.write(f"# Total: {library.size} factors\n") - fp.write("#" + "-" * 78 + "\n") - for f in library.list_factors(): - fp.write(f"{f.id:04d} | {f.name} | {f.formula}\n") - - logger.info("Exported %d formulas to %s", library.size, path) - - -# ====================================================================== -# Paper factor catalog (110 factors from Appendix P) -# ====================================================================== - -# Representative subset of the 110 factors discovered by FactorMiner. -# Each entry: (name, formula, category) -PAPER_FACTORS: List[Dict[str, str]] = [ - # Factor 001 - { - "name": "Intraday Range Position", - "formula": "Neg(CsRank(Div(Sub($close, TsMin($close, 48)), Add(Sub(TsMax($close, 48), TsMin($close, 48)), 1e-8))))", - "category": "Mean-reversion", - }, - # Factor 002 - { - "name": "Volume-Weighted Momentum", - "formula": "Neg(CsRank(Mul(Return($close, 5), Div($volume, Mean($volume, 20)))))", - "category": "Momentum", - }, - # Factor 003 - { - "name": "Residual Volatility", - "formula": "Neg(CsRank(Std(Sub($close, EMA($close, 10)), 20)))", - "category": "Volatility", - }, - # Factor 004 - { - "name": "Intraday Amplitude Ratio", - "formula": "Neg(CsRank(Div(Sub($high, $low), Add($close, 1e-8))))", - "category": "Volatility", - }, - # Factor 005 - { - "name": "Volume Surprise", - "formula": "Neg(CsRank(Div(Sub($volume, Mean($volume, 20)), Add(Std($volume, 20), 1e-8))))", - "category": "Volume", - }, - # Factor 006 - { - "name": "VWAP Deviation", - "formula": "Neg(Div(Sub($close, $vwap), $vwap))", - "category": "VWAP", - }, - # Factor 007 - { - "name": "Short-term Reversal", - "formula": "Neg(CsRank(Return($close, 3)))", - "category": "Mean-reversion", - }, - # Factor 008 - { - "name": "Turnover Momentum", - "formula": "Neg(CsRank(Delta(Div($amt, Add($volume, 1e-8)), 5)))", - "category": "Turnover", - }, - # Factor 009 - { - "name": "High-Low Midpoint Reversion", - "formula": "Neg(CsRank(Sub($close, Div(Add($high, $low), 2))))", - "category": "Mean-reversion", - }, - # Factor 010 - { - "name": "Rolling Beta Residual", - "formula": "Neg(CsRank(Resid($returns, Mean($returns, 20), 20)))", - "category": "Risk", - }, - # Factor 011 - { - "name": "VWAP Slope", - "formula": "Neg(CsRank(TsLinRegSlope(Div(Sub($close, $vwap), $vwap), 10)))", - "category": "VWAP", - }, - # Factor 012 - { - "name": "Accumulation-Distribution", - "formula": "Neg(CsRank(Sum(Mul(Div(Sub(Mul(2, $close), Add($high, $low)), Add(Sub($high, $low), 1e-8)), $volume), 10)))", - "category": "Volume", - }, - # Factor 013 - { - "name": "Relative Strength Index Deviation", - "formula": "Neg(CsRank(Sub(Mean(Max(Delta($close, 1), 0), 14), Mean(Abs(Min(Delta($close, 1), 0)), 14))))", - "category": "Momentum", - }, - # Factor 014 - { - "name": "Price-Volume Correlation", - "formula": "Neg(Corr($close, $volume, 10))", - "category": "Volume", - }, - # Factor 015 - { - "name": "Skewness of Returns", - "formula": "Neg(CsRank(Skew($returns, 20)))", - "category": "Higher-moment", - }, - # Factor 016 - { - "name": "Kurtosis of Returns", - "formula": "Neg(CsRank(Kurt($returns, 20)))", - "category": "Higher-moment", - }, - # Factor 017 - { - "name": "Volume-Weighted Return", - "formula": "Neg(CsRank(Div(Sum(Mul($returns, $volume), 10), Add(Sum($volume, 10), 1e-8))))", - "category": "Volume", - }, - # Factor 018 - { - "name": "Close-to-High Ratio", - "formula": "Neg(CsRank(Div(Sub($high, $close), Add($high, 1e-8))))", - "category": "Mean-reversion", - }, - # Factor 019 - { - "name": "Delayed Correlation Shift", - "formula": "Neg(CsRank(Sub(Corr($close, $volume, 10), Corr(Delay($close, 5), $volume, 10))))", - "category": "Volume", - }, - # Factor 020 - { - "name": "Exponential Momentum", - "formula": "Neg(CsRank(Sub($close, EMA($close, 20))))", - "category": "Momentum", - }, - # Factor 021 - { - "name": "Range-Adjusted Volume", - "formula": "Neg(CsRank(Div($volume, Add(Sub($high, $low), 1e-8))))", - "category": "Volume", - }, - # Factor 022 - { - "name": "Cumulative Return Rank", - "formula": "Neg(CsRank(Sum($returns, 10)))", - "category": "Momentum", - }, - # Factor 023 - { - "name": "VWAP Momentum", - "formula": "Neg(CsRank(Return($vwap, 5)))", - "category": "VWAP", - }, - # Factor 024 - { - "name": "Bollinger Band Position", - "formula": "Neg(CsRank(Div(Sub($close, Mean($close, 20)), Add(Std($close, 20), 1e-8))))", - "category": "Mean-reversion", - }, - # Factor 025 - { - "name": "Volume Decay Weighted", - "formula": "Neg(CsRank(Decay($volume, 10)))", - "category": "Volume", - }, - # Factor 026 - { - "name": "Overnight Return", - "formula": "Neg(CsRank(Div(Sub($open, Delay($close, 1)), Add(Delay($close, 1), 1e-8))))", - "category": "Overnight", - }, - # Factor 027 - { - "name": "Intraday Return", - "formula": "Neg(CsRank(Div(Sub($close, $open), Add($open, 1e-8))))", - "category": "Intraday", - }, - # Factor 028 - { - "name": "Max Drawdown", - "formula": "Neg(CsRank(Div(Sub($close, TsMax($close, 20)), Add(TsMax($close, 20), 1e-8))))", - "category": "Risk", - }, - # Factor 029 - { - "name": "Hurst Exponent Proxy", - "formula": "Neg(CsRank(Div(Std($returns, 20), Add(Std($returns, 5), 1e-8))))", - "category": "Volatility", - }, - # Factor 030 - { - "name": "Volume Imbalance", - "formula": "Neg(CsRank(Sub(Mean($volume, 5), Mean($volume, 20))))", - "category": "Volume", - }, - # Factor 031 - { - "name": "Weighted Close Position", - "formula": "Neg(CsRank(Div(Sub(Mul(2, $close), Add($high, $low)), Add(Sub($high, $low), 1e-8))))", - "category": "Mean-reversion", - }, - # Factor 032 - { - "name": "Trend Intensity", - "formula": "Neg(CsRank(Div(Abs(Delta($close, 10)), Add(Sum(Abs(Delta($close, 1)), 10), 1e-8))))", - "category": "Trend", - }, - # Factor 033 - { - "name": "Return Dispersion", - "formula": "Neg(CsRank(Std($returns, 5)))", - "category": "Volatility", - }, - # Factor 034 - { - "name": "VWAP Relative Strength", - "formula": "Neg(CsRank(Div(Sub(Mean($close, 5), $vwap), Add($vwap, 1e-8))))", - "category": "VWAP", - }, - # Factor 035 - { - "name": "Rank Reversal", - "formula": "Neg(CsRank(Sub(TsRank($close, 10), TsRank($close, 30))))", - "category": "Mean-reversion", - }, - # Factor 036 - { - "name": "Money Flow Index", - "formula": "Neg(CsRank(Div(Sum(Mul(Max(Delta($close, 1), 0), $volume), 14), Add(Sum(Mul(Abs(Delta($close, 1)), $volume), 14), 1e-8))))", - "category": "Volume", - }, - # Factor 037 - { - "name": "Adaptive Momentum", - "formula": "Neg(CsRank(Mul(Return($close, 10), Div(Std($returns, 5), Add(Std($returns, 20), 1e-8)))))", - "category": "Momentum", - }, - # Factor 038 - { - "name": "Volume Trend", - "formula": "Neg(CsRank(TsLinRegSlope($volume, 10)))", - "category": "Volume", - }, - # Factor 039 - { - "name": "Price Acceleration", - "formula": "Neg(CsRank(Sub(Delta($close, 5), Delta(Delay($close, 5), 5))))", - "category": "Momentum", - }, - # Factor 040 - { - "name": "Realized Volatility Ratio", - "formula": "Neg(CsRank(Div(Std($returns, 10), Add(Std($returns, 30), 1e-8))))", - "category": "Volatility", - }, - # Factor 041 - { - "name": "Amount Concentration", - "formula": "Neg(CsRank(Div(TsMax($amt, 5), Add(Mean($amt, 20), 1e-8))))", - "category": "Turnover", - }, - # Factor 042 - { - "name": "Cross-Sectional Volume Rank", - "formula": "Neg(CsRank(Div($volume, Add(Mean($volume, 60), 1e-8))))", - "category": "Volume", - }, - # Factor 043 - { - "name": "Gap Momentum", - "formula": "Neg(CsRank(Sum(Div(Sub($open, Delay($close, 1)), Add(Delay($close, 1), 1e-8)), 5)))", - "category": "Overnight", - }, - # Factor 044 - { - "name": "VWAP Distance Decay", - "formula": "Neg(CsRank(Decay(Div(Sub($close, $vwap), Add($vwap, 1e-8)), 10)))", - "category": "VWAP", - }, - # Factor 045 - { - "name": "Tail Risk Indicator", - "formula": "Neg(CsRank(Div(TsMin($returns, 20), Add(Std($returns, 20), 1e-8))))", - "category": "Risk", - }, - # Factor 046 - { - "name": "Volatility-Regime Reversal Divergence", - "formula": "IfElse(Greater(Std($returns, 12), Mean(Std($returns, 12), 48)), Neg(CsRank(Delta($close, 3))), Neg(CsRank(Div(Sub($close, $low), Add(Sub($high, $low), 0.0001)))))", - "category": "Regime-switching", - }, - # Factor 047 - { - "name": "Regime Volume Signal", - "formula": "IfElse(Greater($volume, Mean($volume, 20)), Neg(CsRank($returns)), Neg(CsRank(Return($close, 5))))", - "category": "Regime-switching", - }, - # Factor 048 - { - "name": "Liquidity-Adjusted Reversal", - "formula": "Neg(CsRank(Mul(Return($close, 3), Div($volume, Add(Mean($volume, 20), 1e-8)))))", - "category": "Mean-reversion", - }, - # Factor 049 - { - "name": "Cross-Sectional Volatility Rank", - "formula": "Neg(CsRank(CsRank(Std($returns, 10))))", - "category": "Volatility", - }, - # Factor 050 - { - "name": "VWAP Bollinger", - "formula": "Neg(CsRank(Div(Sub($vwap, Mean($vwap, 20)), Add(Std($vwap, 20), 1e-8))))", - "category": "VWAP", - }, - # Factor 051 - { - "name": "Smoothed Return Reversal", - "formula": "Neg(CsRank(EMA($returns, 5)))", - "category": "Mean-reversion", - }, - # Factor 052 - { - "name": "Volume-Price Divergence", - "formula": "Neg(CsRank(Sub(TsRank($volume, 10), TsRank($close, 10))))", - "category": "Volume", - }, - # Factor 053 - { - "name": "Decay Weighted Momentum", - "formula": "Neg(CsRank(Decay($returns, 20)))", - "category": "Momentum", - }, - # Factor 054 - { - "name": "Range Percentile", - "formula": "Neg(CsRank(Div(Sub($close, TsMin($close, 20)), Add(Sub(TsMax($close, 20), TsMin($close, 20)), 1e-8))))", - "category": "Mean-reversion", - }, - # Factor 055 - { - "name": "Volume Skewness", - "formula": "Neg(CsRank(Skew($volume, 20)))", - "category": "Volume", - }, - # Factor 056 - { - "name": "Residual Momentum", - "formula": "Neg(CsRank(TsLinRegResid($close, 20)))", - "category": "Momentum", - }, - # Factor 057 - { - "name": "VWAP Trend", - "formula": "Neg(CsRank(Delta(Div(Sub($close, $vwap), $vwap), 5)))", - "category": "VWAP", - }, - # Factor 058 - { - "name": "Return Autocorrelation", - "formula": "Neg(CsRank(Corr($returns, Delay($returns, 1), 10)))", - "category": "Mean-reversion", - }, - # Factor 059 - { - "name": "Price Efficiency", - "formula": "Neg(CsRank(Div(Abs(Sum($returns, 10)), Add(Sum(Abs($returns), 10), 1e-8))))", - "category": "Trend", - }, - # Factor 060 - { - "name": "Relative Volume Change", - "formula": "Neg(CsRank(Return($volume, 5)))", - "category": "Volume", - }, - # Factor 061 - { - "name": "Weighted VWAP Position", - "formula": "Neg(CsRank(WMA(Div(Sub($close, $vwap), $vwap), 10)))", - "category": "VWAP", - }, - # Factor 062 - { - "name": "Regime Momentum Flip", - "formula": "IfElse(Greater(Mean($returns, 5), 0), Neg(CsRank(Return($close, 10))), CsRank(Return($close, 3)))", - "category": "Regime-switching", - }, - # Factor 063 - { - "name": "High-Low Volatility", - "formula": "Neg(CsRank(Mean(Div(Sub($high, $low), Add($close, 1e-8)), 10)))", - "category": "Volatility", - }, - # Factor 064 - { - "name": "Opening Gap Reversal", - "formula": "Neg(CsRank(Div(Sub($open, Delay($close, 1)), Add(Std($returns, 10), 1e-8))))", - "category": "Overnight", - }, - # Factor 065 - { - "name": "Volume Momentum Spread", - "formula": "Neg(CsRank(Sub(Mean($volume, 5), Mean($volume, 40))))", - "category": "Volume", - }, - # Factor 066 - { - "name": "Regime Volume Reversal", - "formula": "IfElse(Greater(Div($volume, Add(Mean($volume, 20), 1e-8)), 1.5), Neg(CsRank($returns)), Neg(CsRank(Return($close, 10))))", - "category": "Regime-switching", - }, - # Factor 067 - { - "name": "Slope Reversal", - "formula": "Neg(CsRank(TsLinRegSlope($close, 5)))", - "category": "Mean-reversion", - }, - # Factor 068 - { - "name": "VWAP Momentum Decay", - "formula": "Neg(CsRank(Decay(Return($vwap, 3), 10)))", - "category": "VWAP", - }, - # Factor 069 - { - "name": "Turnover Rate Change", - "formula": "Neg(CsRank(Delta(Div($amt, Add($volume, 1e-8)), 10)))", - "category": "Turnover", - }, - # Factor 070 - { - "name": "Return Quantile Signal", - "formula": "Neg(CsRank(Quantile($returns, 20, 0.75)))", - "category": "Higher-moment", - }, - # Factor 071 - { - "name": "Double EMA Crossover", - "formula": "Neg(CsRank(Sub(EMA($close, 5), EMA($close, 20))))", - "category": "Trend", - }, - # Factor 072 - { - "name": "Conditional Volatility Return", - "formula": "Neg(CsRank(Div($returns, Add(Std($returns, 10), 1e-8))))", - "category": "Risk", - }, - # Factor 073 - { - "name": "Amplitude Trend", - "formula": "Neg(CsRank(TsLinRegSlope(Div(Sub($high, $low), Add($close, 1e-8)), 10)))", - "category": "Volatility", - }, - # Factor 074 - { - "name": "Volume-Weighted Range", - "formula": "Neg(CsRank(Mean(Mul(Div(Sub($high, $low), Add($close, 1e-8)), $volume), 10)))", - "category": "Volume", - }, - # Factor 075 - { - "name": "Intraday Efficiency Ratio", - "formula": "Neg(CsRank(Div(Abs(Sub($close, $open)), Add(Sub($high, $low), 1e-8))))", - "category": "Intraday", - }, - # Factor 076 - { - "name": "Cumulative Volume Signal", - "formula": "Neg(CsRank(Div(Sum(Mul($returns, $volume), 20), Add(Sum($volume, 20), 1e-8))))", - "category": "Volume", - }, - # Factor 077 - { - "name": "VWAP Cross-Sectional Momentum", - "formula": "Neg(CsRank(CsRank(Return($vwap, 10))))", - "category": "VWAP", - }, - # Factor 078 - { - "name": "Mean-Reversion Indicator", - "formula": "Neg(CsRank(Div(Sub($close, SMA($close, 10)), Add(SMA($close, 10), 1e-8))))", - "category": "Mean-reversion", - }, - # Factor 079 - { - "name": "Volume Regime Indicator", - "formula": "Neg(CsRank(Div(Std($volume, 5), Add(Std($volume, 20), 1e-8))))", - "category": "Volume", - }, - # Factor 080 - { - "name": "Return Persistence", - "formula": "Neg(CsRank(Mul(Sign(Delta($close, 1)), Sign(Delta($close, 5)))))", - "category": "Momentum", - }, - # Factor 081 - { - "name": "Regime Trend Strength", - "formula": "IfElse(Greater(Abs(TsLinRegSlope($close, 20)), Std($close, 20)), Neg(CsRank(TsLinRegSlope($close, 5))), Neg(CsRank(Return($close, 3))))", - "category": "Regime-switching", - }, - # Factor 082 - { - "name": "VWAP Dispersion", - "formula": "Neg(CsRank(Std(Div(Sub($close, $vwap), $vwap), 10)))", - "category": "VWAP", - }, - # Factor 083 - { - "name": "Smart Money Flow", - "formula": "Neg(CsRank(Sum(Mul(IfElse(Greater($close, Delay($close, 1)), $volume, Neg($volume)), Div(Sub($high, $low), Add($close, 1e-8))), 10)))", - "category": "Volume", - }, - # Factor 084 - { - "name": "Return Rank Dispersion", - "formula": "Neg(CsRank(Sub(TsRank($returns, 5), TsRank($returns, 20))))", - "category": "Mean-reversion", - }, - # Factor 085 - { - "name": "Volume Acceleration", - "formula": "Neg(CsRank(Sub(Delta($volume, 5), Delta(Delay($volume, 5), 5))))", - "category": "Volume", - }, - # Factor 086 - { - "name": "Close-Low Ratio Trend", - "formula": "Neg(CsRank(Mean(Div(Sub($close, $low), Add(Sub($high, $low), 1e-8)), 5)))", - "category": "Mean-reversion", - }, - # Factor 087 - { - "name": "Hull MA Deviation", - "formula": "Neg(CsRank(Div(Sub($close, HMA($close, 10)), Add(Std($close, 10), 1e-8))))", - "category": "Trend", - }, - # Factor 088 - { - "name": "DEMA Momentum Signal", - "formula": "Neg(CsRank(Sub(DEMA($close, 5), DEMA($close, 20))))", - "category": "Momentum", - }, - # Factor 089 - { - "name": "Volume Profile Skew", - "formula": "Neg(CsRank(Skew(Div($volume, Add(Mean($volume, 20), 1e-8)), 10)))", - "category": "Volume", - }, - # Factor 090 - { - "name": "Conditional VWAP Signal", - "formula": "IfElse(Greater($close, $vwap), Neg(CsRank(Div(Sub($close, $vwap), $vwap))), CsRank(Div(Sub($vwap, $close), $vwap)))", - "category": "VWAP", - }, - # Factor 091 - { - "name": "Extreme Volume Reversal", - "formula": "Neg(CsRank(Mul(IfElse(Greater($volume, Mul(2, Mean($volume, 20))), 1, 0), $returns)))", - "category": "Volume", - }, - # Factor 092 - { - "name": "Range Expansion Signal", - "formula": "Neg(CsRank(Div(Sub($high, $low), Add(Mean(Sub($high, $low), 20), 1e-8))))", - "category": "Volatility", - }, - # Factor 093 - { - "name": "Short-Term IC Momentum", - "formula": "Neg(CsRank(Sum(Mul(Sign($returns), Abs($returns)), 5)))", - "category": "Momentum", - }, - # Factor 094 - { - "name": "VWAP Curvature", - "formula": "Neg(CsRank(Sub(Div(Sub($vwap, Delay($vwap, 5)), Add(Delay($vwap, 5), 1e-8)), Div(Sub(Delay($vwap, 5), Delay($vwap, 10)), Add(Delay($vwap, 10), 1e-8)))))", - "category": "VWAP", - }, - # Factor 095 - { - "name": "Relative Strength", - "formula": "Neg(CsRank(Div(Return($close, 5), Add(Return($close, 20), 1e-8))))", - "category": "Momentum", - }, - # Factor 096 - { - "name": "Volume-Correlated Return", - "formula": "Neg(CsRank(Cov($returns, $volume, 10)))", - "category": "Volume", - }, - # Factor 097 - { - "name": "Regime Volatility Band", - "formula": "IfElse(Greater(Std($returns, 5), Mul(1.5, Std($returns, 20))), Neg(CsRank(Return($close, 1))), Neg(CsRank(Return($close, 10))))", - "category": "Regime-switching", - }, - # Factor 098 - { - "name": "Open-Close Spread Momentum", - "formula": "Neg(CsRank(Mean(Div(Sub($close, $open), Add($open, 1e-8)), 5)))", - "category": "Intraday", - }, - # Factor 099 - { - "name": "Volatility-Scaled Reversal", - "formula": "Neg(CsRank(Div(Return($close, 5), Add(Std($returns, 20), 1e-8))))", - "category": "Mean-reversion", - }, - # Factor 100 - { - "name": "VWAP Time-Weighted Signal", - "formula": "Neg(CsRank(WMA(Div(Sub($close, $vwap), Add($vwap, 1e-8)), 20)))", - "category": "VWAP", - }, - # Factor 101 - { - "name": "Covariance Structure Shift", - "formula": "Neg(CsRank(Sub(Cov($returns, $volume, 5), Cov($returns, $volume, 20))))", - "category": "Volume", - }, - # Factor 102 - { - "name": "Quadratic Regression Residual", - "formula": "Neg(CsRank(TsLinRegResid(Square($returns), 20)))", - "category": "Higher-moment", - }, - # Factor 103 - { - "name": "VWAP Mean-Reversion Strength", - "formula": "Neg(CsRank(Mul(Div(Sub($close, $vwap), $vwap), Div($volume, Add(Mean($volume, 20), 1e-8)))))", - "category": "VWAP", - }, - # Factor 104 - { - "name": "Multi-Scale Momentum", - "formula": "Neg(CsRank(Add(Return($close, 5), Return($close, 20))))", - "category": "Momentum", - }, - # Factor 105 - { - "name": "Relative High Position", - "formula": "Neg(CsRank(Div(Sub(TsMax($high, 20), $close), Add(TsMax($high, 20), 1e-8))))", - "category": "Mean-reversion", - }, - # Factor 106 - { - "name": "Turnover Volatility", - "formula": "Neg(CsRank(Std(Div($amt, Add($volume, 1e-8)), 10)))", - "category": "Turnover", - }, - # Factor 107 - { - "name": "Regime Correlation Signal", - "formula": "IfElse(Greater(Abs(Corr($close, $volume, 10)), 0.5), Neg(CsRank(Return($close, 3))), Neg(CsRank(Return($close, 10))))", - "category": "Regime-switching", - }, - # Factor 108 - { - "name": "Intraday Momentum Reversal", - "formula": "Neg(CsRank(Div(Sub($close, $open), Add(Sub($high, $low), 1e-8))))", - "category": "Intraday", - }, - # Factor 109 - { - "name": "Volume-Weighted Slope", - "formula": "Neg(CsRank(TsLinRegSlope(Mul($returns, $volume), 10)))", - "category": "Volume", - }, - # Factor 110 - { - "name": "Adaptive Range Reversal", - "formula": "IfElse(Greater(Std($returns, 10), Mean(Std($returns, 10), 40)), Neg(CsRank(Div(Sub($close, TsMin($close, 10)), Add(Sub(TsMax($close, 10), TsMin($close, 10)), 1e-8)))), Neg(CsRank(Return($close, 5))))", - "category": "Regime-switching", - }, -] - - -def import_from_paper( - path: Optional[Union[str, Path]] = None, -) -> FactorLibrary: - """Import the 110 factors from the paper's Appendix P. - - If *path* is given and points to a JSON file with a ``"factors"`` list, - those entries are loaded instead of the built-in catalog. Each entry - must have ``"name"``, ``"formula"``, and ``"category"`` keys. - - Parameters - ---------- - path : str or Path, optional - Optional JSON file to load factors from. - - Returns - ------- - FactorLibrary - A new library pre-populated with the paper's factors. Since no - market data is provided, signals are ``None`` and the correlation - matrix is not computed. - """ - if path is not None: - path = Path(path) - with open(path, "r") as fp: - raw = json.load(fp) - entries = raw if isinstance(raw, list) else raw.get("factors", []) - else: - entries = PAPER_FACTORS - - library = FactorLibrary(correlation_threshold=0.5, ic_threshold=0.04) - - for i, entry in enumerate(entries): - factor = Factor( - id=0, # Will be assigned by admit_factor - name=entry["name"], - formula=entry["formula"], - category=entry["category"], - ic_mean=entry.get("ic_mean", 0.0), - icir=entry.get("icir", 0.0), - ic_win_rate=entry.get("ic_win_rate", 0.0), - max_correlation=entry.get("max_correlation", 0.0), - batch_number=entry.get("batch_number", 0), - admission_date=entry.get("admission_date", ""), - signals=None, - ) - library.admit_factor(factor) - - logger.info( - "Imported %d factors from %s", - library.size, - path if path else "built-in paper catalog", - ) - return library diff --git a/src/factorminer/factorminer/core/parser.py b/src/factorminer/factorminer/core/parser.py deleted file mode 100644 index c71554c..0000000 --- a/src/factorminer/factorminer/core/parser.py +++ /dev/null @@ -1,374 +0,0 @@ -"""Recursive-descent parser for the FactorMiner factor DSL. - -Converts string formulas such as:: - - Neg(CsRank(Div(Sub($close, $vwap), $vwap))) - -into ``ExpressionTree`` objects backed by the operator registry defined in -:mod:`factorminer.core.types`. - -Grammar (informal) ------------------- - -:: - - expression := function_call | feature_ref | number - function_call := IDENTIFIER '(' arg_list ')' - arg_list := expression (',' expression)* - feature_ref := '$' IDENTIFIER - number := ['-'] DIGITS ['.' DIGITS] [('e'|'E') ['-'|'+'] DIGITS] - -Usage ------ - ->>> from factorminer.core.parser import parse ->>> tree = parse("Neg(Div(Sub($close, $vwap), $vwap))") ->>> tree.to_string() -'Neg(Div(Sub($close, $vwap), $vwap))' -""" - -from __future__ import annotations - -import re -from dataclasses import dataclass -from enum import Enum, auto -from typing import Dict, List, Optional, Tuple - -from src.factorminer.factorminer.core.expression_tree import ( - ConstantNode, - ExpressionTree, - LeafNode, - Node, - OperatorNode, -) -from src.factorminer.factorminer.core.types import FEATURE_SET, OPERATOR_REGISTRY, OperatorSpec - - -# --------------------------------------------------------------------------- -# Tokenizer -# --------------------------------------------------------------------------- - -class TokenType(Enum): - IDENT = auto() # operator / function name - FEATURE = auto() # $close, $volume, ... - NUMBER = auto() # 0.0001, -3, 1e-6, ... - LPAREN = auto() # ( - RPAREN = auto() # ) - COMMA = auto() # , - EOF = auto() - - -@dataclass -class Token: - type: TokenType - value: str - pos: int # character position in the source string - - def __repr__(self) -> str: - return f"Token({self.type.name}, {self.value!r}, pos={self.pos})" - - -# Regex fragments -_NUMBER_RE = re.compile( - r""" - -? # optional leading minus - (?:\d+\.?\d*|\.\d+) # integer or decimal - (?:[eE][+-]?\d+)? # optional exponent - """, - re.VERBOSE, -) - -_IDENT_RE = re.compile(r"[A-Za-z_]\w*") -_FEATURE_RE = re.compile(r"\$[A-Za-z_]\w*") -_WS_RE = re.compile(r"\s+") - - -def tokenize(source: str) -> List[Token]: - """Convert a formula string into a list of ``Token`` objects. - - Raises - ------ - SyntaxError - If the string contains characters that cannot be tokenized. - """ - tokens: List[Token] = [] - pos = 0 - length = len(source) - - while pos < length: - # Skip whitespace - m = _WS_RE.match(source, pos) - if m: - pos = m.end() - continue - - ch = source[pos] - - if ch == "(": - tokens.append(Token(TokenType.LPAREN, "(", pos)) - pos += 1 - elif ch == ")": - tokens.append(Token(TokenType.RPAREN, ")", pos)) - pos += 1 - elif ch == ",": - tokens.append(Token(TokenType.COMMA, ",", pos)) - pos += 1 - elif ch == "$": - m = _FEATURE_RE.match(source, pos) - if not m: - raise SyntaxError( - f"Invalid feature reference at position {pos}: " - f"{source[pos:pos+20]!r}" - ) - tokens.append(Token(TokenType.FEATURE, m.group(), pos)) - pos = m.end() - elif ch == "-" or ch == "." or ch.isdigit(): - # Could be a negative number or just a number. - # Disambiguate: a minus is part of a number only if - # (a) it's the very first token, OR - # (b) the preceding token is LPAREN or COMMA - if ch == "-": - prev_tok = tokens[-1] if tokens else None - is_unary_minus = ( - prev_tok is None - or prev_tok.type in (TokenType.LPAREN, TokenType.COMMA) - ) - if not is_unary_minus: - raise SyntaxError( - f"Unexpected '-' at position {pos}. " - f"Subtraction should use the Sub() operator." - ) - m = _NUMBER_RE.match(source, pos) - if not m: - raise SyntaxError( - f"Invalid number at position {pos}: " - f"{source[pos:pos+20]!r}" - ) - tokens.append(Token(TokenType.NUMBER, m.group(), pos)) - pos = m.end() - elif ch.isalpha() or ch == "_": - m = _IDENT_RE.match(source, pos) - if not m: - raise SyntaxError( - f"Invalid identifier at position {pos}: " - f"{source[pos:pos+20]!r}" - ) - tokens.append(Token(TokenType.IDENT, m.group(), pos)) - pos = m.end() - else: - raise SyntaxError( - f"Unexpected character {ch!r} at position {pos} in: " - f"{source!r}" - ) - - tokens.append(Token(TokenType.EOF, "", length)) - return tokens - - -# --------------------------------------------------------------------------- -# Recursive descent parser -# --------------------------------------------------------------------------- - -class Parser: - """Recursive-descent parser that converts a token stream to a ``Node``. - - The parser consumes tokens left-to-right, building the expression tree - in a single pass. - """ - - def __init__(self, tokens: List[Token], source: str) -> None: - self.tokens = tokens - self.source = source - self.pos = 0 - - # -- helpers ------------------------------------------------------------ - - def _peek(self) -> Token: - return self.tokens[self.pos] - - def _advance(self) -> Token: - tok = self.tokens[self.pos] - self.pos += 1 - return tok - - def _expect(self, tt: TokenType) -> Token: - tok = self._advance() - if tok.type != tt: - raise SyntaxError( - f"Expected {tt.name} but got {tok.type.name} ({tok.value!r}) " - f"at position {tok.pos} in: {self.source!r}" - ) - return tok - - # -- grammar rules ------------------------------------------------------ - - def parse_expression(self) -> Node: - """Parse a single expression (the start symbol).""" - tok = self._peek() - - if tok.type == TokenType.FEATURE: - return self._parse_feature() - - if tok.type == TokenType.NUMBER: - return self._parse_number() - - if tok.type == TokenType.IDENT: - return self._parse_function_call() - - raise SyntaxError( - f"Unexpected token {tok.type.name} ({tok.value!r}) at position " - f"{tok.pos} in: {self.source!r}" - ) - - def _parse_feature(self) -> LeafNode: - tok = self._advance() - if tok.value not in FEATURE_SET: - raise SyntaxError( - f"Unknown feature '{tok.value}' at position {tok.pos}. " - f"Expected one of {sorted(FEATURE_SET)}." - ) - return LeafNode(tok.value) - - def _parse_number(self) -> ConstantNode: - tok = self._advance() - try: - return ConstantNode(float(tok.value)) - except ValueError: - raise SyntaxError( - f"Invalid numeric literal {tok.value!r} at position {tok.pos}." - ) - - def _parse_function_call(self) -> Node: - """Parse ``Name(arg1, arg2, ..., paramN)``.""" - name_tok = self._advance() # IDENT - name = name_tok.value - - # Look up operator - spec = OPERATOR_REGISTRY.get(name) - if spec is None: - raise SyntaxError( - f"Unknown operator '{name}' at position {name_tok.pos}. " - f"Available operators: {sorted(OPERATOR_REGISTRY.keys())}" - ) - - self._expect(TokenType.LPAREN) - - # Collect arguments (mix of sub-expressions and trailing numeric params) - args: List[Node] = [] - raw_args: List = [] # (Node | float) to separate children from params - - if self._peek().type != TokenType.RPAREN: - raw_args.append(self._parse_arg()) - while self._peek().type == TokenType.COMMA: - self._advance() # consume comma - raw_args.append(self._parse_arg()) - - self._expect(TokenType.RPAREN) - - # Separate expression children from trailing numeric parameters. - # Strategy: the first ``spec.arity`` arguments that are Nodes are - # the children. Remaining numeric values fill param slots in order. - children: List[Node] = [] - trailing_numbers: List[float] = [] - - for arg in raw_args: - if isinstance(arg, Node) and len(children) < spec.arity: - children.append(arg) - elif isinstance(arg, (int, float)): - trailing_numbers.append(float(arg)) - elif isinstance(arg, Node): - # Extra node arguments beyond arity — could be a ConstantNode - # that the user passed as a positional param (e.g. 0.0001). - if isinstance(arg, ConstantNode): - trailing_numbers.append(arg.value) - else: - children.append(arg) - else: - trailing_numbers.append(float(arg)) - - # Validate arity - if len(children) != spec.arity: - raise SyntaxError( - f"Operator '{name}' expects {spec.arity} expression " - f"argument(s) but got {len(children)} at position " - f"{name_tok.pos}." - ) - - # Map trailing numbers to parameter names - params: Dict[str, float] = {} - for i, pname in enumerate(spec.param_names): - if i < len(trailing_numbers): - params[pname] = trailing_numbers[i] - - return OperatorNode(spec, children, params) - - def _parse_arg(self): - """Parse a single argument inside a function call. - - Returns either a ``Node`` (for sub-expressions) or a bare ``float`` - for numeric literals that might be operator parameters. - """ - tok = self._peek() - - if tok.type == TokenType.NUMBER: - # Peek ahead: if this number is followed by COMMA or RPAREN it - # could be a trailing parameter. We still return a ConstantNode - # and let the caller decide. - num_tok = self._advance() - val = float(num_tok.value) - # If the next token is LPAREN, that's weird — just return as - # constant. - return ConstantNode(val) - - return self.parse_expression() - - -# --------------------------------------------------------------------------- -# Public API -# --------------------------------------------------------------------------- - -def parse(source: str) -> ExpressionTree: - """Parse a factor formula string into an ``ExpressionTree``. - - Parameters - ---------- - source : str - A formula in the FactorMiner DSL, e.g. - ``"Neg(CsRank(Div(Sub($close, $vwap), $vwap)))"``. - - Returns - ------- - ExpressionTree - - Raises - ------ - SyntaxError - If the formula is malformed or references unknown operators / features. - - Examples - -------- - >>> tree = parse("Neg($close)") - >>> tree.to_string() - 'Neg($close)' - """ - tokens = tokenize(source.strip()) - parser = Parser(tokens, source) - root = parser.parse_expression() - - # Ensure we consumed everything - remaining = parser._peek() - if remaining.type != TokenType.EOF: - raise SyntaxError( - f"Unexpected trailing content at position {remaining.pos}: " - f"{remaining.value!r} in: {source!r}" - ) - - return ExpressionTree(root) - - -def try_parse(source: str) -> Optional[ExpressionTree]: - """Like :func:`parse` but returns ``None`` on failure instead of raising.""" - try: - return parse(source) - except (SyntaxError, KeyError, ValueError): - return None diff --git a/src/factorminer/factorminer/core/provenance.py b/src/factorminer/factorminer/core/provenance.py deleted file mode 100644 index 93e4d91..0000000 --- a/src/factorminer/factorminer/core/provenance.py +++ /dev/null @@ -1,241 +0,0 @@ -"""Run and factor provenance helpers for mining sessions. - -This module keeps provenance data compact, JSON-safe, and stable across -save/load boundaries. -""" - -from __future__ import annotations - -from dataclasses import asdict, dataclass, field, is_dataclass -from datetime import datetime -import hashlib -import json -from typing import Any, Dict, List, Mapping, Optional, Sequence - -import numpy as np - - -def _json_safe(value: Any) -> Any: - """Recursively convert common scientific Python objects into JSON-safe data.""" - if is_dataclass(value): - return _json_safe(asdict(value)) - if isinstance(value, np.ndarray): - return value.tolist() - if isinstance(value, np.generic): - return value.item() - if isinstance(value, Mapping): - return {str(k): _json_safe(v) for k, v in value.items()} - if isinstance(value, (list, tuple)): - return [_json_safe(v) for v in value] - return value - - -def stable_digest(payload: Any) -> str: - """Compute a stable SHA256 digest for a JSON-serializable payload.""" - normalized = _json_safe(payload) - blob = json.dumps(normalized, sort_keys=True, separators=(",", ":"), default=str) - return hashlib.sha256(blob.encode("utf-8")).hexdigest() - - -def _compact_reference_list(entries: Any, limit: int = 8) -> List[str]: - """Normalize a mixed list of factor references into readable strings.""" - if not entries: - return [] - - if isinstance(entries, (str, Mapping)): - iterable: Sequence[Any] = [entries] - else: - iterable = list(entries) - - values: List[str] = [] - seen: set[str] = set() - for entry in iterable[:limit]: - text = "" - if isinstance(entry, str): - text = entry.strip() - elif isinstance(entry, Mapping): - name = str(entry.get("name", "")).strip() - formula = str(entry.get("formula", "")).strip() - category = str(entry.get("category", "")).strip() - if name and formula: - text = f"{name}: {formula}" - elif name and category: - text = f"{name} [{category}]" - elif name: - text = name - elif formula: - text = formula - elif entry is not None: - text = str(entry).strip() - - if text and text not in seen: - values.append(text) - seen.add(text) - return values - - -def _compact_memory_signal(memory_signal: Optional[Mapping[str, Any]]) -> Dict[str, Any]: - """Keep only the most useful pieces of memory context.""" - if not memory_signal: - return {} - - return { - "library_state": _json_safe(memory_signal.get("library_state", {})), - "recommended_directions": _compact_reference_list( - memory_signal.get("recommended_directions", []) - ), - "forbidden_directions": _compact_reference_list( - memory_signal.get("forbidden_directions", []) - ), - "insight_count": len(memory_signal.get("insights", []) or []), - "semantic_neighbors": _compact_reference_list( - memory_signal.get("semantic_neighbors", []) - ), - "semantic_duplicates": _compact_reference_list( - memory_signal.get("semantic_duplicates", []) - ), - "semantic_gaps": _compact_reference_list( - memory_signal.get("semantic_gaps", []) - ), - "complementary_patterns": _compact_reference_list( - memory_signal.get("complementary_patterns", []) - ), - } - - -@dataclass -class RunManifest: - """Serializable description of a mining run.""" - - manifest_version: str = "1.0" - run_id: str = "" - session_id: str = "" - loop_type: str = "ralph" - benchmark_mode: str = "paper" - created_at: str = "" - updated_at: str = "" - iteration: int = 0 - library_size: int = 0 - output_dir: str = "" - config_digest: str = "" - config_summary: Dict[str, Any] = field(default_factory=dict) - dataset_summary: Dict[str, Any] = field(default_factory=dict) - phase2_features: List[str] = field(default_factory=list) - target_stack: List[str] = field(default_factory=list) - artifact_paths: Dict[str, str] = field(default_factory=dict) - notes: List[str] = field(default_factory=list) - - def to_dict(self) -> Dict[str, Any]: - return _json_safe(asdict(self)) - - -@dataclass -class FactorProvenance: - """Serializable provenance payload attached to an admitted factor.""" - - manifest_version: str = "1.0" - run_id: str = "" - session_id: str = "" - loop_type: str = "ralph" - created_at: str = "" - iteration: int = 0 - batch_number: int = 0 - candidate_rank: int = 0 - factor_name: str = "" - formula: str = "" - factor_category: str = "" - factor_id: int = 0 - generator_family: str = "" - memory_summary: Dict[str, Any] = field(default_factory=dict) - library_snapshot: Dict[str, Any] = field(default_factory=dict) - evaluation: Dict[str, Any] = field(default_factory=dict) - admission: Dict[str, Any] = field(default_factory=dict) - phase2: Dict[str, Any] = field(default_factory=dict) - target_stack: List[str] = field(default_factory=list) - research_metrics: Dict[str, Any] = field(default_factory=dict) - - def to_dict(self) -> Dict[str, Any]: - return _json_safe(asdict(self)) - - -def build_run_manifest( - *, - run_id: str, - session_id: str, - loop_type: str, - benchmark_mode: str, - created_at: str, - updated_at: str, - iteration: int, - library_size: int, - output_dir: str, - config_summary: Mapping[str, Any], - dataset_summary: Mapping[str, Any], - phase2_features: Sequence[str], - target_stack: Sequence[str], - artifact_paths: Optional[Mapping[str, str]] = None, - notes: Optional[Sequence[str]] = None, -) -> RunManifest: - """Build a run manifest from the live loop state.""" - return RunManifest( - run_id=run_id, - session_id=session_id, - loop_type=loop_type, - benchmark_mode=benchmark_mode, - created_at=created_at, - updated_at=updated_at, - iteration=iteration, - library_size=library_size, - output_dir=output_dir, - config_digest=stable_digest(config_summary), - config_summary=_json_safe(dict(config_summary)), - dataset_summary=_json_safe(dict(dataset_summary)), - phase2_features=list(phase2_features), - target_stack=list(target_stack), - artifact_paths=_json_safe(dict(artifact_paths or {})), - notes=list(notes or []), - ) - - -def build_factor_provenance( - *, - run_manifest: Mapping[str, Any], - factor_name: str, - formula: str, - factor_category: str, - factor_id: int, - iteration: int, - batch_number: int, - candidate_rank: int, - generator_family: str, - memory_signal: Optional[Mapping[str, Any]], - library_state: Optional[Mapping[str, Any]], - evaluation: Mapping[str, Any], - admission: Mapping[str, Any], - phase2: Optional[Mapping[str, Any]] = None, - target_stack: Optional[Sequence[str]] = None, - research_metrics: Optional[Mapping[str, Any]] = None, -) -> FactorProvenance: - """Build per-factor provenance from the current mining context.""" - manifest = dict(run_manifest) - return FactorProvenance( - run_id=str(manifest.get("run_id", "")), - session_id=str(manifest.get("session_id", "")), - loop_type=str(manifest.get("loop_type", "ralph")), - created_at=str(datetime.now().isoformat()), - iteration=iteration, - batch_number=batch_number, - candidate_rank=candidate_rank, - factor_name=factor_name, - formula=formula, - factor_category=factor_category, - factor_id=factor_id, - generator_family=generator_family, - memory_summary=_compact_memory_signal(memory_signal), - library_snapshot=_json_safe(dict(library_state or {})), - evaluation=_json_safe(dict(evaluation)), - admission=_json_safe(dict(admission)), - phase2=_json_safe(dict(phase2 or {})), - target_stack=list(target_stack or manifest.get("target_stack", [])), - research_metrics=_json_safe(dict(research_metrics or {})), - ) diff --git a/src/factorminer/factorminer/core/ralph_loop.py b/src/factorminer/factorminer/core/ralph_loop.py deleted file mode 100644 index 172af48..0000000 --- a/src/factorminer/factorminer/core/ralph_loop.py +++ /dev/null @@ -1,1598 +0,0 @@ -"""The Ralph Loop: self-evolving factor discovery algorithm. - -Implements Algorithm 1 from the FactorMiner paper. The loop iteratively: - 1. Retrieves memory priors from experience memory -- R(M, L) - 2. Generates candidate factors via LLM guided by memory -- G(m, L) - 3. Evaluates candidates through a multi-stage pipeline: - - Stage 1: Fast IC screening on M_fast assets - - Stage 2: Correlation check against library L - - Stage 2.5: Replacement check for correlated candidates - - Stage 3: Intra-batch deduplication (pairwise rho < theta) - - Stage 4: Full validation on M_full assets + trajectory collection - 4. Updates the factor library with admitted factors -- L <- L + {alpha} - 5. Evolves the experience memory with new insights -- E(M, F(M, tau)) - -The loop terminates when the library reaches the target size K or the -maximum number of iterations is exhausted. -""" - -from __future__ import annotations - -import json -import logging -import re -import time -from concurrent.futures import ProcessPoolExecutor, as_completed -from dataclasses import asdict, dataclass, field -from datetime import datetime -from pathlib import Path -from typing import Any, Callable, Dict, List, Optional, Tuple, Union - -import numpy as np - -from src.factorminer.factorminer.core.factor_library import Factor, FactorLibrary -from src.factorminer.factorminer.core.library_io import save_library, load_library -from src.factorminer.factorminer.core.provenance import build_factor_provenance, build_run_manifest -from src.factorminer.factorminer.core.parser import try_parse -from src.factorminer.factorminer.core.session import MiningSession -from src.factorminer.factorminer.core.types import FEATURES -from src.factorminer.factorminer.memory.experience_memory import ExperienceMemoryManager -from src.factorminer.factorminer.memory.memory_store import ExperienceMemory -from src.factorminer.factorminer.memory.retrieval import retrieve_memory -from src.factorminer.factorminer.memory.formation import form_memory -from src.factorminer.factorminer.memory.evolution import evolve_memory -from src.factorminer.factorminer.agent.llm_interface import LLMProvider, MockProvider -from src.factorminer.factorminer.agent.prompt_builder import PromptBuilder -from src.factorminer.factorminer.evaluation.metrics import ( - compute_factor_stats, - compute_ic, - compute_ic_mean, - compute_ic_win_rate, - compute_icir, -) -from src.factorminer.factorminer.evaluation.research import ( - build_score_vector, - compute_factor_geometry, - passes_research_admission, -) -from src.factorminer.factorminer.evaluation.runtime import SignalComputationError, compute_tree_signals -from src.factorminer.factorminer.utils.logging import ( - IterationRecord, - FactorRecord, - MiningSessionLogger, -) - -logger = logging.getLogger(__name__) - - -# --------------------------------------------------------------------------- -# Budget Tracker -# --------------------------------------------------------------------------- - -@dataclass -class BudgetTracker: - """Tracks resource consumption across the mining session. - - Monitors LLM token usage, GPU compute time, and wall-clock time - so the loop can stop early when a budget is exhausted. - """ - - max_llm_calls: int = 0 # 0 = unlimited - max_wall_seconds: float = 0 # 0 = unlimited - - # Running totals - llm_calls: int = 0 - llm_prompt_tokens: int = 0 - llm_completion_tokens: int = 0 - compute_seconds: float = 0.0 - wall_start: float = field(default_factory=time.time) - - def record_llm_call( - self, - prompt_tokens: int = 0, - completion_tokens: int = 0, - ) -> None: - self.llm_calls += 1 - self.llm_prompt_tokens += prompt_tokens - self.llm_completion_tokens += completion_tokens - - def record_compute(self, seconds: float) -> None: - self.compute_seconds += seconds - - @property - def wall_elapsed(self) -> float: - return time.time() - self.wall_start - - @property - def total_tokens(self) -> int: - return self.llm_prompt_tokens + self.llm_completion_tokens - - def is_exhausted(self) -> bool: - """True if any budget limit has been reached.""" - if self.max_llm_calls > 0 and self.llm_calls >= self.max_llm_calls: - return True - if self.max_wall_seconds > 0 and self.wall_elapsed >= self.max_wall_seconds: - return True - return False - - def to_dict(self) -> Dict[str, Any]: - return { - "llm_calls": self.llm_calls, - "llm_prompt_tokens": self.llm_prompt_tokens, - "llm_completion_tokens": self.llm_completion_tokens, - "total_tokens": self.total_tokens, - "compute_seconds": round(self.compute_seconds, 2), - "wall_elapsed_seconds": round(self.wall_elapsed, 2), - } - - -# --------------------------------------------------------------------------- -# Candidate evaluation result -# --------------------------------------------------------------------------- - -@dataclass -class EvaluationResult: - """Result of evaluating a single candidate factor.""" - - factor_name: str - formula: str - parse_ok: bool = False - ic_mean: float = 0.0 - icir: float = 0.0 - ic_win_rate: float = 0.0 - max_correlation: float = 0.0 - correlated_with: str = "" - admitted: bool = False - replaced: Optional[int] = None # ID of replaced factor, if any - rejection_reason: str = "" - stage_passed: int = 0 # 0=parse/IC fail, 1=IC pass, 2=corr pass, 3=dedup pass, 4=admitted - signals: Optional[np.ndarray] = None - target_stats: Dict[str, dict] = field(default_factory=dict) - research_score: float = 0.0 - research_lcb: float = 0.0 - residual_ic: float = 0.0 - projection_loss: float = 0.0 - effective_rank_gain: float = 0.0 - score_vector: Optional[dict[str, Any]] = None - - -# --------------------------------------------------------------------------- -# Factor Generator: wraps LLM + prompt builder + output parser -# --------------------------------------------------------------------------- - -class FactorGenerator: - """Generates candidate factors using LLM guided by memory priors.""" - - def __init__( - self, - llm_provider: Optional[LLMProvider] = None, - prompt_builder: Optional[PromptBuilder] = None, - ) -> None: - self.llm = llm_provider or MockProvider() - self.prompt_builder = prompt_builder or PromptBuilder() - - def generate_batch( - self, - memory_signal: Dict[str, Any], - library_state: Dict[str, Any], - batch_size: int = 40, - ) -> List[Tuple[str, str]]: - """Generate a batch of candidate factors. - - Returns - ------- - list of (name, formula) tuples - """ - user_prompt = self.prompt_builder.build_user_prompt( - memory_signal, library_state, batch_size - ) - raw_response = self.llm.generate( - system_prompt=self.prompt_builder.system_prompt, - user_prompt=user_prompt, - ) - return self._parse_response(raw_response) - - @staticmethod - def _parse_response(raw: str) -> List[Tuple[str, str]]: - """Parse LLM output into (name, formula) pairs. - - Expected format per line: - . : - """ - candidates: List[Tuple[str, str]] = [] - for line in raw.strip().splitlines(): - line = line.strip() - if not line: - continue - # Match patterns like "1. factor_name: Formula(...)" - m = re.match( - r"^\d+\.\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*:\s*(.+)$", - line, - ) - if m: - name = m.group(1).strip() - formula = m.group(2).strip() - candidates.append((name, formula)) - return candidates - - -# --------------------------------------------------------------------------- -# Validation Pipeline (lightweight orchestrator) -# --------------------------------------------------------------------------- - -class ValidationPipeline: - """Multi-stage evaluation pipeline for candidate factors. - - Implements the full 4-stage evaluation from the paper: - Stage 1: Fast IC screening on M_fast assets -> C1 - Stage 2: Correlation check against library L -> C2 (+ replacement for C1\\C2) - Stage 3: Intra-batch deduplication (pairwise rho < theta) -> C3 - Stage 4: Full validation on M_full assets + trajectory collection - """ - - def __init__( - self, - data_tensor: np.ndarray, - returns: np.ndarray, - target_panels: Optional[Dict[str, np.ndarray]] = None, - target_horizons: Optional[Dict[str, int]] = None, - library: Optional[FactorLibrary] = None, - ic_threshold: float = 0.04, - icir_threshold: float = 0.5, - replacement_ic_min: float = 0.10, - replacement_ic_ratio: float = 1.3, - fast_screen_assets: int = 100, - num_workers: int = 1, - research_config: Any = None, - benchmark_mode: str = "paper", - ) -> None: - self.data_tensor = data_tensor # (M, T, F) - self.returns = returns # (M, T) - self.target_panels = target_panels or {"paper": returns} - self.target_horizons = target_horizons or {"paper": 1} - self.library = library or FactorLibrary( - correlation_threshold=0.5, - ic_threshold=ic_threshold, - ) - self.ic_threshold = ic_threshold - self.icir_threshold = icir_threshold - self.replacement_ic_min = replacement_ic_min - self.replacement_ic_ratio = replacement_ic_ratio - self.fast_screen_assets = fast_screen_assets - self.num_workers = num_workers - self.signal_failure_policy = "reject" - self.research_config = research_config - self.benchmark_mode = benchmark_mode - - # Pre-compute the fast-screen asset subset indices - M = returns.shape[0] - if fast_screen_assets > 0 and fast_screen_assets < M: - rng = np.random.RandomState(0) - self._fast_indices = rng.choice(M, fast_screen_assets, replace=False) - self._fast_indices.sort() - else: - self._fast_indices = np.arange(M) - - def evaluate_candidate( - self, - name: str, - formula: str, - fast_screen: bool = True, - ) -> EvaluationResult: - """Evaluate a single candidate through the full pipeline. - - Parameters - ---------- - name : str - Candidate factor name. - formula : str - DSL formula string. - fast_screen : bool - If True, Stage 1 uses M_fast assets only. If False, uses all. - """ - result = EvaluationResult(factor_name=name, formula=formula) - - # Stage 0: Parse - tree = try_parse(formula) - if tree is None: - result.rejection_reason = "Parse failure" - result.stage_passed = 0 - return result - result.parse_ok = True - - # Stage 1: Compute signals and fast IC screening - try: - signals = self._compute_signals(tree) - except SignalComputationError as exc: - result.rejection_reason = f"Signal computation error: {exc}" - result.stage_passed = 0 - return result - - if signals is None or np.all(np.isnan(signals)): - result.rejection_reason = "All-NaN signals" - result.stage_passed = 0 - return result - - result.signals = signals - - # Fast IC screen on M_fast asset subset - if fast_screen and len(self._fast_indices) < signals.shape[0]: - fast_signals = signals[self._fast_indices, :] - fast_returns = self.returns[self._fast_indices, :] - fast_stats = compute_factor_stats(fast_signals, fast_returns) - fast_ic = fast_stats["ic_abs_mean"] - - if fast_ic < self.ic_threshold: - result.ic_mean = fast_ic - result.rejection_reason = ( - f"Fast-screen IC {fast_ic:.4f} < threshold {self.ic_threshold}" - ) - result.stage_passed = 0 - return result - - # Full IC statistics on all assets - stats = compute_factor_stats(signals, self.returns) - result.ic_mean = stats["ic_abs_mean"] - result.icir = stats["icir"] - result.ic_win_rate = stats["ic_win_rate"] - result.target_stats = {"paper": stats} - - if self.target_panels: - for target_name, target_returns in self.target_panels.items(): - if target_name == "paper": - continue - result.target_stats[target_name] = compute_factor_stats(signals, target_returns) - - score_vector_obj = None - if self._research_enabled(): - library_signals = [factor.signals for factor in self.library.list_factors() if factor.signals is not None] - geometry = compute_factor_geometry(signals, self.returns, library_signals) - score_vector_obj = build_score_vector( - result.target_stats, - self.target_horizons, - self.research_config, - geometry, - ) - result.score_vector = score_vector_obj.to_dict() - result.research_score = score_vector_obj.primary_score - result.research_lcb = score_vector_obj.lower_confidence_bound - result.residual_ic = score_vector_obj.geometry.residual_ic - result.projection_loss = score_vector_obj.geometry.projection_loss - result.effective_rank_gain = score_vector_obj.geometry.effective_rank_gain - - # Stage 1 gate: IC threshold (full data) - quality_gate = result.ic_mean - quality_label = "IC" - if self._research_enabled(): - quality_gate = result.research_score - quality_label = "Research score" - - if quality_gate < self.ic_threshold: - result.rejection_reason = ( - f"{quality_label} {quality_gate:.4f} < threshold {self.ic_threshold}" - ) - result.stage_passed = 0 - return result - if result.icir < self.icir_threshold: - result.rejection_reason = ( - f"ICIR {result.icir:.4f} < threshold {self.icir_threshold}" - ) - result.stage_passed = 0 - return result - result.stage_passed = 1 - - if self._research_enabled(): - admitted, reason = passes_research_admission( - score_vector_obj, - self.research_config, - self.library.correlation_threshold, - ) - result.max_correlation = result.score_vector["geometry"]["max_abs_correlation"] - if admitted: - result.admitted = True - result.stage_passed = 3 - return result - result.stage_passed = 2 - result.rejection_reason = reason - replace_id, replace_reason = self._research_replacement(result) - if replace_id is not None: - result.admitted = True - result.replaced = replace_id - result.rejection_reason = replace_reason - result.stage_passed = 3 - return result - - # Stage 2: Correlation check against library (admission) - admitted, reason = self.library.check_admission( - result.ic_mean, signals - ) - if admitted: - result.admitted = True - result.stage_passed = 3 - if self.library.size > 0: - result.max_correlation = self.library._max_correlation_with_library( - signals - ) - return result - - result.stage_passed = 2 - - # Stage 2.5: Replacement check for candidates that failed admission - should_replace, replace_id, replace_reason = self.library.check_replacement( - result.ic_mean, - signals, - ic_min=self.replacement_ic_min, - ic_ratio=self.replacement_ic_ratio, - ) - if should_replace and replace_id is not None: - result.admitted = True - result.replaced = replace_id - result.max_correlation = self.library._max_correlation_with_library( - signals - ) - result.stage_passed = 3 - return result - - # Rejected by correlation - result.rejection_reason = reason - if self.library.size > 0: - result.max_correlation = self.library._max_correlation_with_library( - signals - ) - return result - - def _research_enabled(self) -> bool: - return bool( - self.research_config is not None - and getattr(self.research_config, "enabled", False) - and self.benchmark_mode == "research" - ) - - def _research_replacement(self, result: EvaluationResult) -> tuple[Optional[int], str]: - if result.score_vector is None or self.library.size == 0: - return None, result.rejection_reason - - conflicting: list[tuple[int, float]] = [] - for factor in self.library.list_factors(): - if factor.signals is None: - continue - corr = self.library._compute_correlation_vectorized(result.signals, factor.signals) - if corr >= self.library.correlation_threshold: - conflicting.append((factor.id, corr)) - if len(conflicting) != 1: - return None, result.rejection_reason - - target_id, _ = conflicting[0] - target_factor = self.library.get_factor(target_id) - target_score = float(target_factor.research_metrics.get("primary_score", target_factor.ic_mean)) - if result.research_score < max(self.replacement_ic_min, self.replacement_ic_ratio * target_score): - return None, ( - f"Research replacement score {result.research_score:.4f} " - f"not strong enough to replace factor {target_id} ({target_score:.4f})" - ) - return target_id, f"Research replacement over factor {target_id}" - - def evaluate_batch( - self, candidates: List[Tuple[str, str]] - ) -> List[EvaluationResult]: - """Evaluate a batch through all stages including intra-batch dedup. - - Stage 1-2.5 are run per-candidate (optionally in parallel). - Stage 3 (dedup) runs on all admitted candidates together. - """ - # Stage 1 + 2 + 2.5: per-candidate evaluation - if self.num_workers > 1: - results = self._evaluate_parallel(candidates) - else: - results = [] - for name, formula in candidates: - result = self.evaluate_candidate(name, formula) - results.append(result) - - # Stage 3: Intra-batch deduplication - results = self._deduplicate_batch(results) - - return results - - def _evaluate_parallel( - self, candidates: List[Tuple[str, str]] - ) -> List[EvaluationResult]: - """Evaluate candidates using a thread pool. - - Note: uses threads rather than processes because signals arrays - are large and sharing via processes would require serialization. - """ - from concurrent.futures import ThreadPoolExecutor - - results: List[Optional[EvaluationResult]] = [None] * len(candidates) - - def _eval(idx: int, name: str, formula: str) -> Tuple[int, EvaluationResult]: - return idx, self.evaluate_candidate(name, formula) - - with ThreadPoolExecutor(max_workers=self.num_workers) as pool: - futures = [ - pool.submit(_eval, i, name, formula) - for i, (name, formula) in enumerate(candidates) - ] - for future in as_completed(futures): - idx, result = future.result() - results[idx] = result - - return [r for r in results if r is not None] - - def _deduplicate_batch( - self, results: List[EvaluationResult] - ) -> List[EvaluationResult]: - """Stage 3: Remove intra-batch duplicates among admitted candidates. - - For candidates that passed Stages 1-2, check pairwise correlation - within the batch. If two admitted candidates are correlated above - theta, keep the one with higher IC and reject the other. - """ - admitted_indices = [ - i for i, r in enumerate(results) - if r.admitted and r.signals is not None - ] - - if len(admitted_indices) <= 1: - return results - - # Compute pairwise correlations among admitted candidates - admitted_signals = [results[i].signals for i in admitted_indices] - corr_threshold = self.library.correlation_threshold - - # Greedy dedup: iterate in order of descending IC, keep non-correlated - admitted_by_ic = sorted( - admitted_indices, - key=lambda i: ( - results[i].research_score if self._research_enabled() else results[i].ic_mean - ), - reverse=True, - ) - - kept_indices: List[int] = [] - kept_signals: List[np.ndarray] = [] - - for idx in admitted_by_ic: - r = results[idx] - is_correlated = False - - for kept_sig in kept_signals: - corr = self.library._compute_correlation_vectorized( - r.signals, kept_sig - ) - if corr >= corr_threshold: - is_correlated = True - break - - if is_correlated: - # Reject this candidate from the batch due to intra-batch dup - results[idx] = EvaluationResult( - factor_name=r.factor_name, - formula=r.formula, - parse_ok=r.parse_ok, - ic_mean=r.ic_mean, - icir=r.icir, - ic_win_rate=r.ic_win_rate, - max_correlation=r.max_correlation, - correlated_with=r.correlated_with, - admitted=False, - replaced=None, - rejection_reason="Intra-batch deduplication (correlated with higher-IC batch member)", - stage_passed=2, - signals=r.signals, - ) - else: - kept_indices.append(idx) - kept_signals.append(r.signals) - - dedup_rejected = len(admitted_indices) - len(kept_indices) - if dedup_rejected > 0: - logger.debug( - "Intra-batch dedup: rejected %d/%d admitted candidates", - dedup_rejected, len(admitted_indices), - ) - - return results - - def _build_data_dict(self) -> Dict[str, np.ndarray]: - """Convert data_tensor to a dict mapping feature names to (M, T) arrays. - - Handles two formats: - - dict: already maps ``"$close"`` etc. to ``(M, T)`` arrays. - - np.ndarray of shape ``(M, T, F)``: sliced along the last axis - using the canonical ``FEATURES`` ordering. - """ - if isinstance(self.data_tensor, dict): - return self.data_tensor - - # (M, T, F) numpy array — map each feature slice - data_dict: Dict[str, np.ndarray] = {} - n_features = self.data_tensor.shape[2] if self.data_tensor.ndim == 3 else 0 - for i, feat_name in enumerate(FEATURES): - if i < n_features: - data_dict[feat_name] = self.data_tensor[:, :, i] - return data_dict - - def _compute_signals(self, tree) -> Optional[np.ndarray]: - """Compute factor signals from expression tree on the data tensor. - - Evaluates the parsed expression tree against the market data using - the tree's own ``evaluate()`` method which dispatches through the - numpy operator implementations under the configured failure policy. - """ - data_dict = self._build_data_dict() - return compute_tree_signals( - tree, - data_dict, - self.returns.shape, - signal_failure_policy=self.signal_failure_policy, - ) - - -# --------------------------------------------------------------------------- -# Mining Reporter -# --------------------------------------------------------------------------- - -class MiningReporter: - """Lightweight reporter that logs batch results to a JSONL file.""" - - def __init__(self, output_dir: str = "./output") -> None: - self.output_dir = Path(output_dir) - self.output_dir.mkdir(parents=True, exist_ok=True) - self._log_path = self.output_dir / "mining_batches.jsonl" - - def log_batch(self, iteration: int, **stats: Any) -> None: - """Append a batch record to the JSONL log.""" - record = {"iteration": iteration, "timestamp": time.time()} - record.update(stats) - with open(self._log_path, "a") as f: - f.write(json.dumps(record, default=str) + "\n") - - def export_library( - self, library: FactorLibrary, path: Optional[str] = None - ) -> str: - """Export the factor library to JSON.""" - if path is None: - path = str(self.output_dir / "factor_library.json") - factors = [f.to_dict() for f in library.list_factors()] - diagnostics = library.get_diagnostics() - payload = { - "factors": factors, - "diagnostics": diagnostics, - "exported_at": datetime.now().isoformat(), - } - with open(path, "w") as f: - json.dump(payload, f, indent=2, default=str) - return path - - -# --------------------------------------------------------------------------- -# The Ralph Loop -# --------------------------------------------------------------------------- - -class RalphLoop: - """Self-Evolving Factor Discovery via the Ralph Loop paradigm. - - The Ralph Loop iteratively: - 1. Retrieves memory priors from experience memory -- R(M, L) - 2. Generates candidate factors via LLM guided by memory -- G(m, L) - 3. Evaluates candidates through multi-stage pipeline -- V(alpha) - 4. Updates the factor library with admitted factors -- L <- L + {alpha} - 5. Evolves the experience memory with new insights -- E(M, F(M, tau)) - - This implements Algorithm 1 from the FactorMiner paper. - """ - - def __init__( - self, - config: Any, - data_tensor: np.ndarray, - returns: np.ndarray, - llm_provider: Optional[LLMProvider] = None, - memory: Optional[ExperienceMemory] = None, - library: Optional[FactorLibrary] = None, - checkpoint_interval: int = 1, - ) -> None: - """Initialize the Ralph Loop. - - Parameters - ---------- - config : MiningConfig - Mining configuration (from core.config or utils.config). - data_tensor : np.ndarray - Market data tensor D in R^(M x T x F). - returns : np.ndarray - Forward returns array R in R^(M x T). - llm_provider : LLMProvider, optional - LLM provider for factor generation. Defaults to MockProvider. - memory : ExperienceMemory, optional - Pre-populated experience memory. Defaults to empty memory. - library : FactorLibrary, optional - Pre-populated factor library. Defaults to empty library. - checkpoint_interval : int - Save a checkpoint every N iterations. Set to 0 to disable - automatic checkpointing. Default is 1 (every iteration). - """ - self.config = config - self.data_tensor = data_tensor - self.returns = returns - self.checkpoint_interval = checkpoint_interval - - # Core components - self.library = library or FactorLibrary( - correlation_threshold=getattr(config, "correlation_threshold", 0.5), - ic_threshold=getattr(config, "ic_threshold", 0.04), - ) - self.memory = memory or ExperienceMemory() - self.memory_manager: Optional[ExperienceMemoryManager] = None - self.generator = FactorGenerator( - llm_provider=llm_provider, - prompt_builder=PromptBuilder(), - ) - self.pipeline = ValidationPipeline( - data_tensor=data_tensor, - returns=returns, - target_panels=getattr(config, "target_panels", None), - target_horizons=getattr(config, "target_horizons", None), - library=self.library, - ic_threshold=getattr(config, "ic_threshold", 0.04), - icir_threshold=getattr(config, "icir_threshold", 0.5), - replacement_ic_min=getattr(config, "replacement_ic_min", 0.10), - replacement_ic_ratio=getattr(config, "replacement_ic_ratio", 1.3), - fast_screen_assets=getattr(config, "fast_screen_assets", 100), - num_workers=getattr(config, "num_workers", 1), - research_config=getattr(config, "research", None), - benchmark_mode=getattr(config, "benchmark_mode", "paper"), - ) - self.pipeline.signal_failure_policy = getattr( - config, "signal_failure_policy", "reject" - ) - self.reporter = MiningReporter( - getattr(config, "output_dir", "./output") - ) - self.budget = BudgetTracker() - self.signal_failure_policy = getattr(config, "signal_failure_policy", "reject") - - # Session state - self.iteration = 0 - self._session: Optional[MiningSession] = None - self._session_logger: Optional[MiningSessionLogger] = None - self._run_manifest: Dict[str, Any] = {} - - # ------------------------------------------------------------------ - # Main loop - # ------------------------------------------------------------------ - - def run( - self, - target_size: Optional[int] = None, - max_iterations: Optional[int] = None, - callback: Optional[Callable[[int, Dict[str, Any]], None]] = None, - resume: bool = False, - ) -> FactorLibrary: - """Run the complete mining loop. - - Parameters - ---------- - target_size : int, optional - Target library size K. Defaults to config value (110). - max_iterations : int, optional - Maximum iterations before stopping. Defaults to config value. - callback : callable, optional - Called after each iteration with (iteration_number, stats_dict). - resume : bool - If True, attempt to load the latest checkpoint from the output - directory before starting the loop. Default is False. - - Returns - ------- - FactorLibrary - The constructed factor library L. - """ - target_size = target_size or getattr( - self.config, "target_library_size", 110 - ) - max_iterations = max_iterations or getattr( - self.config, "max_iterations", 200 - ) - batch_size = getattr(self.config, "batch_size", 40) - output_dir = getattr(self.config, "output_dir", "./output") - - # Resume from existing checkpoint if requested - if resume: - checkpoint_dir = Path(output_dir) / "checkpoint" - if checkpoint_dir.exists(): - self.load_session(str(checkpoint_dir)) - logger.info( - "Resuming from iteration %d with %d factors", - self.iteration, - self.library.size, - ) - - # Initialize session - if self._session is None: - session_id = datetime.now().strftime("%Y%m%d_%H%M%S") - self._session = MiningSession( - session_id=session_id, - config=self._serialize_config(), - output_dir=output_dir, - ) - - self._refresh_run_manifest( - output_dir=output_dir, - artifact_paths={ - "output_dir": output_dir, - "checkpoint_dir": str(Path(output_dir) / "checkpoint"), - }, - ) - self._persist_run_manifest(Path(output_dir) / "run_manifest.json") - - # Initialize session logger - self._session_logger = MiningSessionLogger(output_dir) - self._session_logger.log_session_start({ - "target_library_size": target_size, - "batch_size": batch_size, - "max_iterations": max_iterations, - "resumed_from_iteration": self.iteration if resume else 0, - }) - self._session_logger.start_progress(max_iterations) - - loop_start = time.time() - - if not hasattr(self, "budget") or self.budget is None: - self.budget = BudgetTracker() - self.budget.wall_start = time.time() - - try: - while ( - self.library.size < target_size - and self.iteration < max_iterations - ): - # Check budget BEFORE starting a new iteration - if self.budget.is_exhausted(): - logger.info("Budget exhausted — stopping loop") - break - - self.iteration += 1 - stats = self._run_iteration(batch_size) - - # Record in session - self._session.record_iteration(stats) - - # Callback - if callback: - callback(self.iteration, stats) - - logger.info( - "Iteration %d: Library size=%d, Admitted=%d, " - "Yield=%.1f%%, AvgCorr=%.3f", - self.iteration, - stats["library_size"], - stats["admitted"], - stats["yield_rate"] * 100, - stats.get("avg_correlation", 0), - ) - - # Periodic checkpoint - if ( - self.checkpoint_interval > 0 - and self.iteration % self.checkpoint_interval == 0 - ): - self._checkpoint() - - if self.budget.is_exhausted(): - logger.info("Budget exhausted: %s", self.budget.to_dict()) - - except KeyboardInterrupt: - logger.warning("Mining interrupted by user at iteration %d", self.iteration) - if self._session: - self._session.status = "interrupted" - # Save checkpoint on interrupt so session can be resumed - self._checkpoint() - finally: - elapsed = time.time() - loop_start - if self._session_logger: - self._session_logger.log_session_end(self.library.size, elapsed) - self._refresh_run_manifest( - output_dir=output_dir, - artifact_paths={ - "output_dir": output_dir, - "checkpoint_dir": str(Path(output_dir) / "checkpoint"), - "library": str(Path(output_dir) / "factor_library.json"), - "session": str(Path(output_dir) / "session.json"), - "run_manifest": str(Path(output_dir) / "run_manifest.json"), - "session_log": str(Path(output_dir) / "session_log.json"), - }, - ) - self._persist_run_manifest(Path(output_dir) / "run_manifest.json") - if self._session: - self._session.finalize() - self._session.save() - - # Final export - lib_path = self.reporter.export_library(self.library) - logger.info("Factor library exported to %s", lib_path) - - return self.library - - # ------------------------------------------------------------------ - # Single iteration - # ------------------------------------------------------------------ - - def _run_iteration(self, batch_size: int) -> Dict[str, Any]: - """Execute one iteration of the Ralph Loop. - - Returns - ------- - dict - Iteration statistics. - """ - t0 = time.time() - - # Step 1: Memory Retrieval -- R(M, L) - library_state = self.library.get_state_summary() - memory_signal = retrieve_memory( - self.memory, - library_state=library_state, - ) - - # Step 2: Guided Generation -- G(m, L) - t_gen = time.time() - candidates = self.generator.generate_batch( - memory_signal=memory_signal, - library_state=library_state, - batch_size=batch_size, - ) - self.budget.record_llm_call() - - if not candidates: - logger.warning( - "Iteration %d: generator produced 0 candidates", self.iteration - ) - return self._empty_stats() - - # Step 3: Multi-Stage Evaluation -- V(alpha) for each candidate - results = self.pipeline.evaluate_batch(candidates) - - # Step 4: Library Update -- L <- L + admitted factors - admitted_results = self._update_library(results) - - provenance_library_state = { - **library_state, - "diagnostics": self.library.get_diagnostics(), - } - - self._attach_factor_provenance( - admitted_results, - library_state=provenance_library_state, - memory_signal=memory_signal, - phase2_summary={}, - generator_family=self._generator_family(), - ) - - # Step 5: Memory Evolution -- E(M, F(M, tau)) - trajectory = self._build_trajectory(results) - formed = form_memory(self.memory, trajectory, self.iteration) - self.memory = evolve_memory(self.memory, formed) - - # Build stats - elapsed = time.time() - t0 - self.budget.record_compute(elapsed) - stats = self._compute_stats(results, admitted_results, elapsed) - - # Log to reporter and session logger - # stats already contains 'iteration', so pass it without keyword arg - self.reporter.log_batch(**stats) - if self._session_logger: - ic_values = [r.ic_mean for r in results if r.parse_ok] - record = IterationRecord( - iteration=self.iteration, - candidates_generated=len(candidates), - ic_passed=stats["ic_passed"], - correlation_passed=stats["corr_passed"], - admitted=stats["admitted"], - rejected=len(candidates) - stats["admitted"], - replaced=stats["replaced"], - library_size=self.library.size, - best_ic=max(ic_values) if ic_values else 0.0, - mean_ic=float(np.mean(ic_values)) if ic_values else 0.0, - elapsed_seconds=elapsed, - ) - self._session_logger.log_iteration(record) - - # Log individual factor records - for r in results: - factor_rec = FactorRecord( - expression=r.formula, - ic=r.ic_mean if r.parse_ok else None, - icir=r.icir if r.parse_ok else None, - max_correlation=r.max_correlation if r.parse_ok else None, - admitted=r.admitted, - rejection_reason=r.rejection_reason or None, - replaced_factor=str(r.replaced) if r.replaced else None, - ) - self._session_logger.log_factor(factor_rec) - - return stats - - # ------------------------------------------------------------------ - # Library update - # ------------------------------------------------------------------ - - def _update_library( - self, results: List[EvaluationResult] - ) -> List[EvaluationResult]: - """Admit passing factors into the library and handle replacements. - - Returns the list of admitted results. - """ - admitted: List[EvaluationResult] = [] - - for result in results: - if not result.admitted: - continue - - # Handle replacement - if result.replaced is not None: - old_id = result.replaced - new_factor = Factor( - id=0, # Will be reassigned by library - name=result.factor_name, - formula=result.formula, - category=self._infer_category(result.formula), - ic_mean=result.ic_mean, - icir=result.icir, - ic_win_rate=result.ic_win_rate, - max_correlation=result.max_correlation, - batch_number=self.iteration, - signals=result.signals, - research_metrics=result.score_vector or {}, - ) - try: - self.library.replace_factor(old_id, new_factor) - admitted.append(result) - logger.info( - "Replaced factor %d with '%s' (IC=%.4f)", - old_id, result.factor_name, result.ic_mean, - ) - except KeyError: - logger.warning( - "Failed to replace factor %d (already removed?)", old_id - ) - else: - # Direct admission - factor = Factor( - id=0, # Will be reassigned - name=result.factor_name, - formula=result.formula, - category=self._infer_category(result.formula), - ic_mean=result.ic_mean, - icir=result.icir, - ic_win_rate=result.ic_win_rate, - max_correlation=result.max_correlation, - batch_number=self.iteration, - signals=result.signals, - research_metrics=result.score_vector or {}, - ) - self.library.admit_factor(factor) - admitted.append(result) - - return admitted - - # ------------------------------------------------------------------ - # Trajectory builder for memory formation - # ------------------------------------------------------------------ - - def _build_trajectory( - self, results: List[EvaluationResult] - ) -> List[Dict[str, Any]]: - """Build mining trajectory tau for memory formation. - - Converts evaluation results into the dict format expected by - ``form_memory``. - """ - trajectory: List[Dict[str, Any]] = [] - for r in results: - entry: Dict[str, Any] = { - "factor_id": r.factor_name, - "formula": r.formula, - "ic": r.ic_mean, - "icir": r.icir, - "max_correlation": r.max_correlation, - "correlated_with": r.correlated_with, - "admitted": r.admitted, - "rejection_reason": r.rejection_reason, - } - trajectory.append(entry) - return trajectory - - # ------------------------------------------------------------------ - # Statistics helpers - # ------------------------------------------------------------------ - - def _compute_stats( - self, - results: List[EvaluationResult], - admitted: List[EvaluationResult], - elapsed: float, - ) -> Dict[str, Any]: - """Compute per-iteration statistics.""" - n_candidates = len(results) - diagnostics = self.library.get_diagnostics() - - # Count dedup rejections (stage_passed==2 with dedup reason) - dedup_rejected = sum( - 1 for r in results - if not r.admitted - and "deduplication" in r.rejection_reason.lower() - ) - - return { - "iteration": self.iteration, - "candidates": n_candidates, - "parse_ok": sum(1 for r in results if r.parse_ok), - "ic_passed": sum(1 for r in results if r.stage_passed >= 1), - "corr_passed": sum(1 for r in results if r.stage_passed >= 2), - "dedup_rejected": dedup_rejected, - "admitted": len(admitted), - "replaced": sum(1 for r in admitted if r.replaced is not None), - "yield_rate": len(admitted) / max(n_candidates, 1), - "library_size": self.library.size, - "avg_correlation": diagnostics.get("avg_correlation", 0), - "max_correlation": diagnostics.get("max_correlation", 0), - "elapsed_seconds": elapsed, - "budget": self.budget.to_dict(), - } - - def _empty_stats(self) -> Dict[str, Any]: - """Return empty stats dict for iterations with no candidates.""" - return { - "iteration": self.iteration, - "candidates": 0, - "parse_ok": 0, - "ic_passed": 0, - "corr_passed": 0, - "dedup_rejected": 0, - "admitted": 0, - "replaced": 0, - "yield_rate": 0.0, - "library_size": self.library.size, - "avg_correlation": 0.0, - "max_correlation": 0.0, - "elapsed_seconds": 0.0, - "budget": self.budget.to_dict(), - } - - # ------------------------------------------------------------------ - # Category inference - # ------------------------------------------------------------------ - - @staticmethod - def _infer_category(formula: str) -> str: - """Infer factor category from formula structure. - - Uses operator presence heuristics to classify factors into broad - categories aligned with the paper's taxonomy. - """ - formula_upper = formula.upper() - - # Extract operators and normalize to uppercase for matching - ops_raw = re.findall(r"([A-Za-z][a-zA-Z]+)\(", formula) - ops = {o.upper() for o in ops_raw} - - if ops & {"SKEW", "KURT"}: - return "Higher-Moment" - if ops & {"CORR", "COV", "BETA"} and "$VOLUME" in formula_upper: - return "PV-Correlation" - if ops & {"IFELSE", "GREATER", "LESS", "OR", "AND"}: - return "Regime-Conditional" - if ops & {"TSLINREG", "TSLINREGSLOPE", "TSLINREGRESID", "RESID"}: - return "Regression" - if ops & {"EMA", "DEMA", "KAMA", "HMA", "WMA", "SMA"}: - return "Smoothing" - if "$VWAP" in formula_upper: - return "VWAP" - if "$AMT" in formula_upper: - return "Amount" - if ops & {"DELTA", "DELAY", "RETURN", "LOGRETURN"}: - return "Momentum" - if ops & {"STD", "VAR"}: - return "Volatility" - if ops & {"TSMAX", "TSMIN", "TSARGMAX", "TSARGMIN", "TSRANK"}: - return "Extrema" - if ops & {"CSRANK", "CSZSCORE", "CSDEMEAN"}: - return "Cross-Sectional" - - return "Other" - - # ------------------------------------------------------------------ - # Session persistence (save / resume) - # ------------------------------------------------------------------ - - def save_session(self, path: Optional[str] = None) -> str: - """Save the full mining session state for resume. - - Saves the factor library (via ``save_library``), experience memory, - budget tracker state, session metadata, and the loop state to a - ``checkpoint`` directory inside the output directory. - - Parameters - ---------- - path : str, optional - Directory for the checkpoint. Defaults to - ``{output_dir}/checkpoint``. - - Returns - ------- - str - Path to the saved checkpoint directory. - """ - if path is not None: - checkpoint_dir = Path(path) - # If caller passed a dir that doesn't end with "checkpoint*", - # nest inside it for backward compatibility - if not checkpoint_dir.name.startswith("checkpoint"): - checkpoint_dir = checkpoint_dir / f"checkpoint_iter{self.iteration}" - else: - output_dir = getattr(self.config, "output_dir", "./output") - checkpoint_dir = Path(output_dir) / "checkpoint" - checkpoint_dir.mkdir(parents=True, exist_ok=True) - - # Save library using library_io (JSON + optional signal cache) - lib_base = str(checkpoint_dir / "library") - save_library(self.library, lib_base, save_signals=True) - - # Save memory using ExperienceMemoryManager if available, - # otherwise fall back to raw ExperienceMemory serialization - mem_path = str(checkpoint_dir / "memory.json") - if self.memory_manager is not None: - self.memory_manager.save(mem_path) - else: - with open(mem_path, "w") as f: - json.dump(self.memory.to_dict(), f, indent=2, default=str) - - # Save session metadata - if self._session: - self._session.library_path = lib_base - self._session.memory_path = mem_path - self._refresh_run_manifest( - output_dir=str(checkpoint_dir.parent), - artifact_paths={ - "library": f"{lib_base}.json", - "memory": mem_path, - "session": str(checkpoint_dir / "session.json"), - "run_manifest": str(checkpoint_dir / "run_manifest.json"), - "loop_state": str(checkpoint_dir / "loop_state.json"), - }, - ) - self._persist_run_manifest(checkpoint_dir / "run_manifest.json") - self._session.save(checkpoint_dir / "session.json") - - # Save loop state (iteration counter + budget tracker) - loop_state: Dict[str, Any] = { - "iteration": self.iteration, - "library_size": self.library.size, - "memory_version": self.memory.version, - "budget": { - "llm_calls": self.budget.llm_calls, - "llm_prompt_tokens": self.budget.llm_prompt_tokens, - "llm_completion_tokens": self.budget.llm_completion_tokens, - "compute_seconds": self.budget.compute_seconds, - "max_llm_calls": self.budget.max_llm_calls, - "max_wall_seconds": self.budget.max_wall_seconds, - }, - } - with open(checkpoint_dir / "loop_state.json", "w") as f: - json.dump(loop_state, f, indent=2) - - logger.info("Session saved to %s", checkpoint_dir) - return str(checkpoint_dir) - - def load_session(self, path: str) -> None: - """Resume a mining session from a saved checkpoint. - - Restores the factor library (via ``load_library``), experience - memory, budget tracker state, iteration counter, and session - metadata from the checkpoint directory. - - Parameters - ---------- - path : str - Path to the checkpoint directory. - """ - checkpoint_dir = Path(path) - - # Load loop state (iteration counter + budget) - loop_state_path = checkpoint_dir / "loop_state.json" - if loop_state_path.exists(): - with open(loop_state_path) as f: - loop_state = json.load(f) - self.iteration = loop_state.get("iteration", 0) - - # Restore budget tracker state - budget_data = loop_state.get("budget", {}) - if budget_data: - self.budget.llm_calls = budget_data.get( - "llm_calls", self.budget.llm_calls - ) - self.budget.llm_prompt_tokens = budget_data.get( - "llm_prompt_tokens", self.budget.llm_prompt_tokens - ) - self.budget.llm_completion_tokens = budget_data.get( - "llm_completion_tokens", self.budget.llm_completion_tokens - ) - self.budget.compute_seconds = budget_data.get( - "compute_seconds", self.budget.compute_seconds - ) - self.budget.max_llm_calls = budget_data.get( - "max_llm_calls", self.budget.max_llm_calls - ) - self.budget.max_wall_seconds = budget_data.get( - "max_wall_seconds", self.budget.max_wall_seconds - ) - - logger.info( - "Resuming from iteration %d (library=%d)", - self.iteration, - loop_state.get("library_size", 0), - ) - - # Load memory - mem_path = checkpoint_dir / "memory.json" - if mem_path.exists(): - if self.memory_manager is not None: - self.memory_manager.load(mem_path) - self.memory = self.memory_manager.memory - else: - with open(mem_path) as f: - mem_data = json.load(f) - self.memory = ExperienceMemory.from_dict(mem_data) - logger.info( - "Loaded memory (version=%d, %d success, %d forbidden, %d insights)", - self.memory.version, - len(self.memory.success_patterns), - len(self.memory.forbidden_directions), - len(self.memory.insights), - ) - - # Load library using library_io (supports signals + correlation matrix) - lib_json_path = checkpoint_dir / "library.json" - if lib_json_path.exists(): - lib_base = str(checkpoint_dir / "library") - loaded_library = load_library(lib_base) - # Merge into current library (preserving thresholds from config) - self.library.factors = loaded_library.factors - self.library._next_id = loaded_library._next_id - self.library._id_to_index = loaded_library._id_to_index - self.library.correlation_matrix = loaded_library.correlation_matrix - # Update the pipeline reference so it uses the restored library - self.pipeline.library = self.library - logger.info("Loaded library with %d factors", self.library.size) - - # Load session metadata - session_path = checkpoint_dir / "session.json" - if session_path.exists(): - self._session = MiningSession.load(session_path) - self._session.status = "running" - self._run_manifest = dict(self._session.run_manifest or {}) - - if not self._run_manifest: - run_manifest_path = checkpoint_dir / "run_manifest.json" - if run_manifest_path.exists(): - with open(run_manifest_path) as f: - self._run_manifest = json.load(f) - - @classmethod - def resume_from( - cls, - checkpoint_path: str, - config: Any, - data_tensor: np.ndarray, - returns: np.ndarray, - llm_provider: Optional[LLMProvider] = None, - **kwargs: Any, - ) -> "RalphLoop": - """Create a RalphLoop and restore state from a checkpoint. - - Parameters - ---------- - checkpoint_path : str - Path to the checkpoint directory. - config, data_tensor, returns, llm_provider - Same as ``__init__``. - - Returns - ------- - RalphLoop - A loop ready to call ``run()`` that continues from the checkpoint. - """ - loop = cls( - config=config, - data_tensor=data_tensor, - returns=returns, - llm_provider=llm_provider, - **kwargs, - ) - loop.load_session(checkpoint_path) - return loop - - # ------------------------------------------------------------------ - # Internal helpers - # ------------------------------------------------------------------ - - def _checkpoint(self) -> None: - """Save a periodic checkpoint.""" - try: - self.save_session() - except Exception as exc: - logger.warning("Checkpoint failed: %s", exc) - - def _serialize_config(self) -> Dict[str, Any]: - """Serialize config to a JSON-compatible dict.""" - try: - if hasattr(self.config, "to_dict"): - return self.config.to_dict() - return asdict(self.config) - except (TypeError, AttributeError): - # Fallback: extract known attributes - attrs = [ - "target_library_size", "batch_size", "max_iterations", - "ic_threshold", "icir_threshold", "correlation_threshold", - "replacement_ic_min", "replacement_ic_ratio", "output_dir", - ] - return { - attr: getattr(self.config, attr, None) - for attr in attrs - if getattr(self.config, attr, None) is not None - } - - def _loop_type(self) -> str: - """Label the loop for provenance and manifests.""" - return "ralph" - - def _phase2_features(self) -> List[str]: - """Phase 2 feature flags used by the current loop.""" - return [] - - def _refresh_run_manifest( - self, - *, - output_dir: str, - artifact_paths: Optional[Dict[str, str]] = None, - ) -> Dict[str, Any]: - """Build and cache the current run manifest.""" - if self._session is None: - return {} - - config_summary = self._serialize_config() - dataset_summary = { - "data_tensor_shape": list(self.data_tensor.shape), - "returns_shape": list(self.returns.shape), - "memory_version": self.memory.version, - "library_size": self.library.size, - "library_diagnostics": self.library.get_diagnostics(), - } - if isinstance(self.config, dict): - benchmark_mode = str(self.config.get("benchmark_mode", "paper")) - target_stack = list(self.config.get("target_stack", [])) - else: - benchmark_mode = str(getattr(self.config, "benchmark_mode", "paper")) - target_stack = list( - getattr(self.config, "target_stack", []) - or [] - ) - - pipeline_targets = getattr(self.pipeline, "target_panels", None) or {} - if pipeline_targets: - target_stack = [ - name - for name in pipeline_targets.keys() - if name and name != "paper" - ] or target_stack - - manifest = build_run_manifest( - run_id=self._session.session_id, - session_id=self._session.session_id, - loop_type=self._loop_type(), - benchmark_mode=benchmark_mode, - created_at=self._session.start_time, - updated_at=datetime.now().isoformat(), - iteration=self.iteration, - library_size=self.library.size, - output_dir=output_dir, - config_summary=config_summary, - dataset_summary=dataset_summary, - phase2_features=self._phase2_features(), - target_stack=target_stack, - artifact_paths=artifact_paths or {}, - notes=[], - ) - self._run_manifest = manifest.to_dict() - return self._run_manifest - - def _persist_run_manifest(self, path: Path) -> None: - """Write the current run manifest to disk and mirror it into the session.""" - if self._session is None: - return - - path.parent.mkdir(parents=True, exist_ok=True) - if not self._run_manifest: - self._refresh_run_manifest( - output_dir=str(path.parent.parent), - artifact_paths={"run_manifest": str(path)}, - ) - self._run_manifest.setdefault("artifact_paths", {})["run_manifest"] = str(path) - with open(path, "w") as f: - json.dump(self._run_manifest, f, indent=2, default=str) - - self._session.run_manifest_path = str(path) - self._session.run_manifest = self._run_manifest - - def _attach_factor_provenance( - self, - admitted_results: List[EvaluationResult], - *, - library_state: Dict[str, Any], - memory_signal: Dict[str, Any], - phase2_summary: Dict[str, Any], - generator_family: Optional[str] = None, - ) -> None: - """Stamp provenance onto library factors that survived admission.""" - if not admitted_results or self._session is None: - return - - run_manifest = self._run_manifest or self._refresh_run_manifest( - output_dir=getattr(self.config, "output_dir", "./output"), - artifact_paths={}, - ) - - for rank, result in enumerate(admitted_results, start=1): - if not result.admitted: - continue - - factor = None - for candidate in reversed(self.library.list_factors()): - if ( - candidate.name == result.factor_name - and candidate.formula == result.formula - ): - factor = candidate - break - if factor is None: - continue - - factor.provenance = build_factor_provenance( - run_manifest=run_manifest, - factor_name=factor.name, - formula=factor.formula, - factor_category=factor.category, - factor_id=factor.id, - iteration=self.iteration, - batch_number=factor.batch_number, - candidate_rank=rank, - generator_family=generator_family or self._generator_family(), - memory_signal=memory_signal, - library_state=library_state, - evaluation={ - "ic_mean": factor.ic_mean, - "icir": factor.icir, - "ic_win_rate": factor.ic_win_rate, - "max_correlation": factor.max_correlation, - "research_metrics": factor.research_metrics, - }, - admission={ - "admitted": True, - "stage_passed": result.stage_passed, - "replaced": result.replaced, - "correlated_with": result.correlated_with, - "rejection_reason": result.rejection_reason, - }, - phase2=phase2_summary, - target_stack=run_manifest.get("target_stack", []), - research_metrics=factor.research_metrics, - ).to_dict() - - def _generator_family(self) -> str: - """Return the active candidate generator label for provenance.""" - return self.generator.__class__.__name__ diff --git a/src/factorminer/factorminer/core/session.py b/src/factorminer/factorminer/core/session.py deleted file mode 100644 index f18fede..0000000 --- a/src/factorminer/factorminer/core/session.py +++ /dev/null @@ -1,187 +0,0 @@ -"""Mining session management with persistence and resume support. - -A ``MiningSession`` wraps the state that must survive across process -restarts: session metadata, per-iteration statistics, timing, and paths -to serialized artifacts (library, memory). -""" - -from __future__ import annotations - -import json -import time -from dataclasses import asdict, dataclass, field -from datetime import datetime -from pathlib import Path -from typing import Any, Dict, List, Optional, Union - - -@dataclass -class MiningSession: - """Manages a complete mining session with persistence. - - Parameters - ---------- - session_id : str - Unique identifier for this session (e.g. timestamp or UUID). - config : dict - Serialized mining configuration (kept as dict for JSON compat). - output_dir : str - Directory for all session artifacts. - """ - - session_id: str - config: Dict[str, Any] = field(default_factory=dict) - output_dir: str = "./output" - start_time: str = "" - end_time: str = "" - iterations: List[Dict[str, Any]] = field(default_factory=list) - library_path: str = "" - memory_path: str = "" - run_manifest_path: str = "" - run_manifest: Dict[str, Any] = field(default_factory=dict) - status: str = "running" # running | completed | interrupted - - def __post_init__(self) -> None: - if not self.start_time: - self.start_time = datetime.now().isoformat() - - # ------------------------------------------------------------------ - # Iteration tracking - # ------------------------------------------------------------------ - - def record_iteration(self, stats: Dict[str, Any]) -> None: - """Append iteration statistics to the session log.""" - stats = dict(stats) - stats.setdefault("timestamp", datetime.now().isoformat()) - self.iterations.append(stats) - - @property - def total_iterations(self) -> int: - return len(self.iterations) - - @property - def last_library_size(self) -> int: - if not self.iterations: - return 0 - return self.iterations[-1].get("library_size", 0) - - # ------------------------------------------------------------------ - # Serialization - # ------------------------------------------------------------------ - - def to_dict(self) -> Dict[str, Any]: - """Serialize session state to a JSON-compatible dictionary.""" - return { - "session_id": self.session_id, - "config": self.config, - "output_dir": self.output_dir, - "start_time": self.start_time, - "end_time": self.end_time, - "status": self.status, - "total_iterations": self.total_iterations, - "last_library_size": self.last_library_size, - "library_path": self.library_path, - "memory_path": self.memory_path, - "run_manifest_path": self.run_manifest_path, - "run_manifest": self.run_manifest, - "iterations": self.iterations, - } - - def save(self, path: Optional[Union[str, Path]] = None) -> str: - """Save session state to a JSON file. - - Parameters - ---------- - path : str or Path, optional - Explicit save path. Defaults to ``{output_dir}/session.json``. - - Returns - ------- - str - The path the session was saved to. - """ - if path is None: - save_dir = Path(self.output_dir) - save_dir.mkdir(parents=True, exist_ok=True) - path = save_dir / "session.json" - else: - path = Path(path) - path.parent.mkdir(parents=True, exist_ok=True) - - with open(path, "w") as f: - json.dump(self.to_dict(), f, indent=2, default=str) - return str(path) - - @classmethod - def load(cls, path: Union[str, Path]) -> "MiningSession": - """Load session from a JSON file. - - Parameters - ---------- - path : str or Path - Path to a session JSON file. - - Returns - ------- - MiningSession - """ - path = Path(path) - with open(path) as f: - data = json.load(f) - - return cls( - session_id=data["session_id"], - config=data.get("config", {}), - output_dir=data.get("output_dir", "./output"), - start_time=data.get("start_time", ""), - end_time=data.get("end_time", ""), - iterations=data.get("iterations", []), - library_path=data.get("library_path", ""), - memory_path=data.get("memory_path", ""), - run_manifest_path=data.get("run_manifest_path", ""), - run_manifest=data.get("run_manifest", {}), - status=data.get("status", "interrupted"), - ) - - # ------------------------------------------------------------------ - # Summary - # ------------------------------------------------------------------ - - def get_summary(self) -> Dict[str, Any]: - """Session summary statistics.""" - total_candidates = sum( - it.get("candidates", 0) for it in self.iterations - ) - total_admitted = sum( - it.get("admitted", 0) for it in self.iterations - ) - total_replaced = sum( - it.get("replaced", 0) for it in self.iterations - ) - - # Compute elapsed time - elapsed = 0.0 - if self.start_time: - start = datetime.fromisoformat(self.start_time) - end_str = self.end_time or datetime.now().isoformat() - end = datetime.fromisoformat(end_str) - elapsed = (end - start).total_seconds() - - return { - "session_id": self.session_id, - "status": self.status, - "total_iterations": self.total_iterations, - "total_candidates": total_candidates, - "total_admitted": total_admitted, - "total_replaced": total_replaced, - "overall_yield_rate": ( - total_admitted / total_candidates if total_candidates > 0 else 0.0 - ), - "final_library_size": self.last_library_size, - "elapsed_seconds": elapsed, - } - - def finalize(self) -> None: - """Mark the session as completed and record end time.""" - self.end_time = datetime.now().isoformat() - self.status = "completed" diff --git a/src/factorminer/factorminer/core/types.py b/src/factorminer/factorminer/core/types.py deleted file mode 100644 index 4291252..0000000 --- a/src/factorminer/factorminer/core/types.py +++ /dev/null @@ -1,269 +0,0 @@ -"""Type system for the FactorMiner operator library. - -Defines operator categories, signatures, specifications, and the canonical -set of raw market-data feature names used as leaf nodes in expression trees. -""" - -from __future__ import annotations - -from dataclasses import dataclass, field -from enum import Enum, auto -from typing import Any, Dict, List, Optional, Tuple - - -# --------------------------------------------------------------------------- -# Enumerations -# --------------------------------------------------------------------------- - -class OperatorType(Enum): - """High-level category for every operator.""" - ARITHMETIC = auto() - STATISTICAL = auto() - TIMESERIES = auto() - CROSS_SECTIONAL = auto() - SMOOTHING = auto() - REGRESSION = auto() - LOGICAL = auto() - AUTO_INVENTED = auto() - - -class SignatureType(Enum): - """Describes how an operator maps inputs to outputs. - - TIME_SERIES_TO_TIME_SERIES – rolling / lookback along the time axis - CROSS_SECTION_TO_CROSS_SECTION – operates across stocks at each point - ELEMENT_WISE – pointwise on array(s), no window or cross-section logic - REDUCE_TIME – collapses the time axis (e.g. cumulative sum) - """ - TIME_SERIES_TO_TIME_SERIES = auto() - CROSS_SECTION_TO_CROSS_SECTION = auto() - ELEMENT_WISE = auto() - REDUCE_TIME = auto() - - -# --------------------------------------------------------------------------- -# Operator specification -# --------------------------------------------------------------------------- - -@dataclass(frozen=True) -class OperatorSpec: - """Immutable descriptor for a single operator in the library. - - Parameters - ---------- - name : str - Canonical name used in DSL strings (e.g. ``"Add"``). - arity : int - Number of *expression* children (1 = unary, 2 = binary, 3 = ternary). - category : OperatorType - Broad category of the operator. - signature : SignatureType - How the operator maps inputs to outputs. - param_names : tuple[str, ...] - Names of extra numeric parameters (e.g. ``("window",)``). - param_defaults : dict[str, float] - Default value for each parameter when omitted. - param_ranges : dict[str, tuple[float, float]] - Valid (inclusive) range for each parameter. - description : str - Short human-readable description. - """ - name: str - arity: int - category: OperatorType - signature: SignatureType - param_names: Tuple[str, ...] = () - param_defaults: Dict[str, float] = field(default_factory=dict) - param_ranges: Dict[str, Tuple[float, float]] = field(default_factory=dict) - description: str = "" - - -# --------------------------------------------------------------------------- -# Canonical feature names (leaf nodes) -# --------------------------------------------------------------------------- - -FEATURES: List[str] = [ - "$open", - "$high", - "$low", - "$close", - "$volume", - "$amt", - "$vwap", - "$returns", -] - -FEATURE_SET: frozenset = frozenset(FEATURES) - - -# --------------------------------------------------------------------------- -# Complete operator library (60+ operators) -# --------------------------------------------------------------------------- - -def _window_params( - default: int = 10, - lo: int = 2, - hi: int = 250, -) -> Tuple[Tuple[str, ...], Dict[str, float], Dict[str, Tuple[float, float]]]: - """Helper returning standard (window,) parameter triple.""" - return ( - ("window",), - {"window": float(default)}, - {"window": (float(lo), float(hi))}, - ) - - -def _build_operator_registry() -> Dict[str, OperatorSpec]: - """Construct the full operator registry. - - Returns a mapping from canonical operator name to its ``OperatorSpec``. - """ - registry: Dict[str, OperatorSpec] = {} - - def _reg( - name: str, - arity: int, - cat: OperatorType, - sig: SignatureType, - param_names: Tuple[str, ...] = (), - param_defaults: Optional[Dict[str, float]] = None, - param_ranges: Optional[Dict[str, Tuple[float, float]]] = None, - desc: str = "", - ) -> None: - registry[name] = OperatorSpec( - name=name, - arity=arity, - category=cat, - signature=sig, - param_names=param_names, - param_defaults=param_defaults or {}, - param_ranges=param_ranges or {}, - description=desc, - ) - - EW = SignatureType.ELEMENT_WISE - TS = SignatureType.TIME_SERIES_TO_TIME_SERIES - CS = SignatureType.CROSS_SECTION_TO_CROSS_SECTION - RT = SignatureType.REDUCE_TIME - - A = OperatorType.ARITHMETIC - S = OperatorType.STATISTICAL - T = OperatorType.TIMESERIES - X = OperatorType.CROSS_SECTIONAL - SM = OperatorType.SMOOTHING - R = OperatorType.REGRESSION - L = OperatorType.LOGICAL - - wp = _window_params - - # ---- Arithmetic (element-wise) ---------------------------------------- - _reg("Add", 2, A, EW, desc="x + y") - _reg("Sub", 2, A, EW, desc="x - y") - _reg("Mul", 2, A, EW, desc="x * y") - _reg("Div", 2, A, EW, desc="x / y (safe division)") - _reg("Neg", 1, A, EW, desc="-x") - _reg("Abs", 1, A, EW, desc="|x|") - _reg("Sign", 1, A, EW, desc="sign(x)") - _reg("Log", 1, A, EW, desc="log(1 + |x|) * sign(x)") - _reg("Sqrt", 1, A, EW, desc="sqrt(|x|) * sign(x)") - _reg("Square", 1, A, EW, desc="x^2") - _reg("Pow", 2, A, EW, desc="x^y") - _reg("Max", 2, A, EW, desc="element-wise max(x, y)") - _reg("Min", 2, A, EW, desc="element-wise min(x, y)") - _reg("Clip", 1, A, EW, - param_names=("lower", "upper"), - param_defaults={"lower": -3.0, "upper": 3.0}, - param_ranges={"lower": (-10.0, 10.0), "upper": (-10.0, 10.0)}, - desc="clip(x, lower, upper)") - _reg("Inv", 1, A, EW, desc="1 / x (safe)") - - # ---- Statistical (rolling window) ------------------------------------- - _reg("Mean", 1, S, TS, *wp(10), desc="rolling mean") - _reg("Std", 1, S, TS, *wp(10), desc="rolling std dev") - _reg("Var", 1, S, TS, *wp(10), desc="rolling variance") - _reg("Skew", 1, S, TS, *wp(20), desc="rolling skewness") - _reg("Kurt", 1, S, TS, *wp(20), desc="rolling kurtosis") - _reg("Median", 1, S, TS, *wp(10), desc="rolling median") - _reg("Sum", 1, S, TS, *wp(10), desc="rolling sum") - _reg("Prod", 1, S, TS, *wp(10), desc="rolling product") - _reg("TsMax", 1, S, TS, *wp(10), desc="rolling max") - _reg("TsMin", 1, S, TS, *wp(10), desc="rolling min") - _reg("TsArgMax", 1, S, TS, *wp(10), desc="rolling argmax") - _reg("TsArgMin", 1, S, TS, *wp(10), desc="rolling argmin") - _reg("TsRank", 1, S, TS, *wp(10), desc="rolling rank of latest value") - _reg("Quantile", 1, S, TS, - param_names=("window", "q"), - param_defaults={"window": 10.0, "q": 0.5}, - param_ranges={"window": (2.0, 250.0), "q": (0.0, 1.0)}, - desc="rolling quantile") - _reg("CountNaN", 1, S, TS, *wp(10), desc="rolling count of NaN") - _reg("CountNotNaN", 1, S, TS, *wp(10), desc="rolling count of non-NaN") - - # ---- Time-series operators -------------------------------------------- - _reg("Delta", 1, T, TS, *wp(1, 1, 60), desc="x[t] - x[t-d]") - _reg("Delay", 1, T, TS, *wp(1, 1, 60), desc="x[t-d]") - _reg("Return", 1, T, TS, *wp(1, 1, 60), desc="x[t]/x[t-d] - 1") - _reg("LogReturn", 1, T, TS, *wp(1, 1, 60), desc="log(x[t]/x[t-d])") - _reg("Corr", 2, T, TS, *wp(10), desc="rolling correlation") - _reg("Cov", 2, T, TS, *wp(10), desc="rolling covariance") - _reg("Beta", 2, T, TS, *wp(10), desc="rolling regression beta") - _reg("Resid", 2, T, TS, *wp(10), desc="rolling regression residual") - _reg("WMA", 1, T, TS, *wp(10), desc="weighted moving average (linear)") - _reg("Decay", 1, T, TS, *wp(10), desc="exponentially decaying sum") - _reg("CumSum", 1, T, RT, desc="cumulative sum along time") - _reg("CumProd", 1, T, RT, desc="cumulative product along time") - _reg("CumMax", 1, T, RT, desc="cumulative max along time") - _reg("CumMin", 1, T, RT, desc="cumulative min along time") - - # ---- Smoothing -------------------------------------------------------- - _reg("EMA", 1, SM, TS, *wp(10), desc="exponential moving average") - _reg("DEMA", 1, SM, TS, *wp(10), desc="double EMA") - _reg("SMA", 1, SM, TS, *wp(10), desc="simple moving average") - _reg("KAMA", 1, SM, TS, *wp(10), desc="Kaufman adaptive moving average") - _reg("HMA", 1, SM, TS, *wp(10), desc="Hull moving average") - - # ---- Cross-sectional -------------------------------------------------- - _reg("CsRank", 1, X, CS, desc="cross-sectional rank (percentile)") - _reg("CsZScore", 1, X, CS, desc="cross-sectional z-score") - _reg("CsDemean", 1, X, CS, desc="x - cross-sectional mean") - _reg("CsScale", 1, X, CS, desc="scale to unit L1 norm cross-sectionally") - _reg("CsNeutralize", 1, X, CS, desc="industry-neutralize") - _reg("CsQuantile", 1, X, CS, - param_names=("n_bins",), - param_defaults={"n_bins": 5.0}, - param_ranges={"n_bins": (2.0, 20.0)}, - desc="cross-sectional quantile bin") - - # ---- Regression ------------------------------------------------------- - _reg("TsLinReg", 1, R, TS, *wp(20), desc="rolling linear-regression fitted value") - _reg("TsLinRegSlope", 1, R, TS, *wp(20), desc="rolling linear-regression slope") - _reg("TsLinRegIntercept", 1, R, TS, *wp(20), desc="rolling linear-regression intercept") - _reg("TsLinRegResid", 1, R, TS, *wp(20), desc="rolling linear-regression residual") - - # ---- Logical / conditional -------------------------------------------- - _reg("IfElse", 3, L, EW, desc="if cond > 0 then x else y") - _reg("Greater", 2, L, EW, desc="1.0 where x > y else 0.0") - _reg("GreaterEqual", 2, L, EW, desc="1.0 where x >= y else 0.0") - _reg("Less", 2, L, EW, desc="1.0 where x < y else 0.0") - _reg("LessEqual", 2, L, EW, desc="1.0 where x <= y else 0.0") - _reg("Equal", 2, L, EW, desc="1.0 where x == y else 0.0") - _reg("Ne", 2, L, EW, desc="1.0 where x != y else 0.0") - _reg("And", 2, L, EW, desc="logical and") - _reg("Or", 2, L, EW, desc="logical or") - _reg("Not", 1, L, EW, desc="logical not") - - return registry - - -OPERATOR_REGISTRY: Dict[str, OperatorSpec] = _build_operator_registry() -"""Global mapping from operator name to its ``OperatorSpec``.""" - - -def get_operator(name: str) -> OperatorSpec: - """Look up an operator by name, raising ``KeyError`` if unknown.""" - if name not in OPERATOR_REGISTRY: - raise KeyError( - f"Unknown operator '{name}'. " - f"Available: {sorted(OPERATOR_REGISTRY.keys())}" - ) - return OPERATOR_REGISTRY[name] diff --git a/src/factorminer/factorminer/data/__init__.py b/src/factorminer/factorminer/data/__init__.py deleted file mode 100644 index f06f07b..0000000 --- a/src/factorminer/factorminer/data/__init__.py +++ /dev/null @@ -1,75 +0,0 @@ -"""FactorMiner data pipeline: loading, preprocessing, and tensor construction.""" - -from src.factorminer.factorminer.data.loader import ( - OHLCV_COLUMNS, - REQUIRED_COLUMNS, - load_market_data, - load_multiple, - to_numpy, -) -from src.factorminer.factorminer.data.mock_data import ( - MockConfig, - generate_mock_data, - generate_with_halts, -) -from src.factorminer.factorminer.data.preprocessor import ( - PreprocessConfig, - compute_derived_features, - compute_returns, - compute_vwap, - cross_sectional_standardise, - fill_missing, - flag_halts, - mask_halts, - preprocess, - quality_check, - winsorise, -) -from src.factorminer.factorminer.data.tensor_builder import ( - DEFAULT_FEATURES, - TargetSpec, - TensorConfig, - TensorDataset, - build_pipeline, - build_tensor, - compute_target, - compute_targets, - sample_assets, - temporal_split, -) - -__all__ = [ - # loader - "OHLCV_COLUMNS", - "REQUIRED_COLUMNS", - "load_market_data", - "load_multiple", - "to_numpy", - # mock_data - "MockConfig", - "generate_mock_data", - "generate_with_halts", - # preprocessor - "PreprocessConfig", - "compute_derived_features", - "compute_returns", - "compute_vwap", - "cross_sectional_standardise", - "fill_missing", - "flag_halts", - "mask_halts", - "preprocess", - "quality_check", - "winsorise", - # tensor_builder - "DEFAULT_FEATURES", - "TargetSpec", - "TensorConfig", - "TensorDataset", - "build_pipeline", - "build_tensor", - "compute_target", - "compute_targets", - "sample_assets", - "temporal_split", -] diff --git a/src/factorminer/factorminer/data/loader.py b/src/factorminer/factorminer/data/loader.py deleted file mode 100644 index 6064ce7..0000000 --- a/src/factorminer/factorminer/data/loader.py +++ /dev/null @@ -1,244 +0,0 @@ -"""Market data loader supporting multiple formats and asset universes. - -Loads OHLCV + amount data from CSV, Parquet, and HDF5 files. Supports -A-share universes (CSI500, CSI1000, HS300) and Binance crypto data. -Expected schema: datetime, asset_id, open, high, low, close, volume, amount. - -The loader also accepts a small set of common aliases used by broker/data-vendor -exports, such as ``code``/``ticker`` for ``asset_id`` and ``amt`` for -``amount``. -""" - -from __future__ import annotations - -import logging -from pathlib import Path -from typing import Literal, Optional, Sequence, Union - -import numpy as np -import pandas as pd - -logger = logging.getLogger(__name__) - -# Canonical column ordering -REQUIRED_COLUMNS = [ - "datetime", - "asset_id", - "open", - "high", - "low", - "close", - "volume", - "amount", -] - -OHLCV_COLUMNS = ["open", "high", "low", "close", "volume", "amount"] - -COLUMN_ALIASES = { - "datetime": ["timestamp", "date", "time", "trade_date"], - "asset_id": ["ticker", "symbol", "code", "stock_code", "ts_code", "instrument"], - "open": ["open_price"], - "high": ["high_price"], - "low": ["low_price"], - "close": ["close_price", "price"], - "volume": ["vol"], - "amount": ["amt", "turnover", "value", "traded_amount"], -} - -# Well-known universe identifiers -UNIVERSE_ALIASES = { - "csi500": "CSI500", - "csi1000": "CSI1000", - "hs300": "HS300", - "binance": "Binance", -} - -FileFormat = Literal["csv", "parquet", "hdf5"] - - -def _infer_format(path: Path) -> FileFormat: - suffix = path.suffix.lower() - mapping = { - ".csv": "csv", - ".parquet": "parquet", - ".pq": "parquet", - ".h5": "hdf5", - ".hdf5": "hdf5", - } - fmt = mapping.get(suffix) - if fmt is None: - raise ValueError(f"Cannot infer format from extension '{suffix}'. " - f"Supported: {list(mapping.keys())}") - return fmt # type: ignore[return-value] - - -def _read_file( - path: Path, - fmt: FileFormat, - hdf_key: str = "data", -) -> pd.DataFrame: - """Read a single data file into a DataFrame.""" - if fmt == "csv": - df = pd.read_csv(path) - elif fmt == "parquet": - df = pd.read_parquet(path) - elif fmt == "hdf5": - df = pd.read_hdf(path, key=hdf_key) - else: - raise ValueError(f"Unsupported format: {fmt}") - return df - - -def _validate_columns(df: pd.DataFrame, path: Path) -> pd.DataFrame: - """Ensure required columns are present and normalise names.""" - cols_lower = {c.lower().strip(): c for c in df.columns} - rename_map: dict[str, str] = {} - missing: list[str] = [] - for req in REQUIRED_COLUMNS: - if req in df.columns: - continue - candidates = [req, *COLUMN_ALIASES.get(req, [])] - matched = None - for candidate in candidates: - original = cols_lower.get(candidate.lower().strip()) - if original is not None: - matched = original - break - if matched is None: - missing.append(req) - continue - rename_map[matched] = req - if missing: - raise ValueError( - f"File {path} is missing required columns: {missing}. " - f"Found: {list(df.columns)}" - ) - if rename_map: - df = df.rename(columns=rename_map) - return df - - -def _coerce_types(df: pd.DataFrame) -> pd.DataFrame: - """Ensure numeric types for OHLCV columns and datetime index.""" - df["datetime"] = pd.to_datetime(df["datetime"]) - df["asset_id"] = df["asset_id"].astype(str) - for col in OHLCV_COLUMNS: - df[col] = pd.to_numeric(df[col], errors="coerce") - return df - - -def load_market_data( - path: Union[str, Path], - fmt: Optional[FileFormat] = None, - universe: Optional[str] = None, - asset_ids: Optional[Sequence[str]] = None, - start: Optional[str] = None, - end: Optional[str] = None, - hdf_key: str = "data", -) -> pd.DataFrame: - """Load market data from a single file. - - Parameters - ---------- - path : str or Path - File path to the data source. - fmt : str, optional - File format (``"csv"``, ``"parquet"``, ``"hdf5"``). Inferred from - the file extension when *None*. - universe : str, optional - Asset universe filter (e.g. ``"CSI500"``). Only assets belonging to - the universe are kept. Requires an ``"universe"`` column in the data. - asset_ids : sequence of str, optional - Explicit list of asset identifiers to retain. - start, end : str, optional - ISO-formatted datetime strings for temporal filtering. - hdf_key : str - HDF5 dataset key (default ``"data"``). - - Returns - ------- - pd.DataFrame - Sorted DataFrame with columns from :data:`REQUIRED_COLUMNS` plus any - extras present in the source file. - """ - path = Path(path) - if not path.exists(): - raise FileNotFoundError(f"Data file not found: {path}") - - if fmt is None: - fmt = _infer_format(path) - - logger.info("Loading %s from %s", fmt, path) - df = _read_file(path, fmt, hdf_key=hdf_key) - df = _validate_columns(df, path) - df = _coerce_types(df) - - # Universe filter - if universe is not None: - canon = UNIVERSE_ALIASES.get(universe.lower(), universe) - if "universe" in df.columns: - df = df[df["universe"] == canon] - logger.info("Filtered to universe %s: %d rows", canon, len(df)) - else: - logger.warning( - "Universe filter '%s' requested but no 'universe' column found; " - "filter skipped.", - canon, - ) - - # Explicit asset filter - if asset_ids is not None: - asset_set = set(str(a) for a in asset_ids) - df = df[df["asset_id"].isin(asset_set)] - - # Temporal filter - if start is not None: - df = df[df["datetime"] >= pd.Timestamp(start)] - if end is not None: - df = df[df["datetime"] <= pd.Timestamp(end)] - - df = df.sort_values(["datetime", "asset_id"]).reset_index(drop=True) - logger.info("Loaded %d rows, %d assets", len(df), df["asset_id"].nunique()) - return df - - -def load_multiple( - paths: Sequence[Union[str, Path]], - fmt: Optional[FileFormat] = None, - **kwargs, -) -> pd.DataFrame: - """Load and concatenate market data from multiple files. - - All keyword arguments are forwarded to :func:`load_market_data`. - """ - frames: list[pd.DataFrame] = [] - for p in paths: - frames.append(load_market_data(p, fmt=fmt, **kwargs)) - if not frames: - raise ValueError("No files provided to load_multiple") - df = pd.concat(frames, ignore_index=True) - df = df.sort_values(["datetime", "asset_id"]).reset_index(drop=True) - return df - - -def to_numpy( - df: pd.DataFrame, - columns: Optional[Sequence[str]] = None, -) -> np.ndarray: - """Convert a DataFrame to a numpy array of the specified columns. - - Parameters - ---------- - df : pd.DataFrame - Market data DataFrame. - columns : sequence of str, optional - Columns to include. Defaults to :data:`OHLCV_COLUMNS`. - - Returns - ------- - np.ndarray - 2-D float64 array of shape ``(n_rows, n_columns)``. - """ - if columns is None: - columns = OHLCV_COLUMNS - return df[list(columns)].to_numpy(dtype=np.float64) diff --git a/src/factorminer/factorminer/data/mock_data.py b/src/factorminer/factorminer/data/mock_data.py deleted file mode 100644 index 64a892e..0000000 --- a/src/factorminer/factorminer/data/mock_data.py +++ /dev/null @@ -1,323 +0,0 @@ -"""Generate realistic synthetic market data for testing FactorMiner. - -Produces multi-asset OHLCV data with: -- Volume clustering (GARCH-like) -- Volatility clustering -- Cross-sectional correlation via a common market factor -- Planted alpha signals for validating factor discovery -- OHLC consistency guarantees: low <= open,close <= high -""" - -from __future__ import annotations - -import logging -from dataclasses import dataclass -from typing import Literal, Optional - -import numpy as np -import pandas as pd - -logger = logging.getLogger(__name__) - -Frequency = Literal["10min", "30min", "1h", "1d"] - -_FREQ_MAP = { - "10min": "10min", - "30min": "30min", - "1h": "1h", - "1d": "1D", -} - - -@dataclass -class MockConfig: - """Configuration for synthetic data generation. - - Attributes - ---------- - num_assets : int - Number of assets (M). - num_periods : int - Number of time bars (T) per asset. - frequency : str - Bar frequency: ``"10min"``, ``"30min"``, ``"1h"``, ``"1d"``. - start_date : str - Start datetime in ISO format. - base_price : float - Initial price level around which assets are generated. - annual_vol : float - Annualised volatility for the diffusion process. - market_factor_weight : float - Weight of the common market factor in returns (0-1). - Higher values increase cross-sectional correlation. - vol_persistence : float - GARCH(1,1) persistence parameter for volatility clustering (0-1). - volume_mean : float - Mean daily volume per asset. - volume_persistence : float - AR(1) coefficient for volume clustering (0-1). - plant_alpha : bool - Whether to inject planted alpha signals. - alpha_strength : float - Signal-to-noise ratio of the planted alpha. - alpha_assets_frac : float - Fraction of assets that carry the planted signal. - seed : int - Random seed for reproducibility. - universe : str or None - Universe label to include in the output. - """ - - num_assets: int = 50 - num_periods: int = 1000 - frequency: Frequency = "10min" - start_date: str = "2024-01-02 09:30:00" - base_price: float = 50.0 - annual_vol: float = 0.25 - market_factor_weight: float = 0.3 - vol_persistence: float = 0.9 - volume_mean: float = 1_000_000.0 - volume_persistence: float = 0.85 - plant_alpha: bool = True - alpha_strength: float = 0.02 - alpha_assets_frac: float = 0.2 - seed: int = 42 - universe: Optional[str] = None - - -def _bars_per_year(freq: Frequency) -> float: - """Approximate number of bars in a trading year.""" - trading_days = 252 - bars_per_day = { - "10min": 24, # 4h session / 10min - "30min": 8, - "1h": 4, - "1d": 1, - } - return trading_days * bars_per_day[freq] - - -def _generate_timestamps( - start: str, - num_periods: int, - freq: Frequency, -) -> pd.DatetimeIndex: - """Create a business-aware timestamp index. - - For intraday frequencies the index skips weekends and only covers - a simplified trading session (09:30 - 15:00 for 10min/30min bars). - """ - pd_freq = _FREQ_MAP[freq] - if freq == "1d": - ts = pd.bdate_range(start=start, periods=num_periods, freq="B") - else: - # Generate enough intraday bars, then trim to num_periods - days_needed = (num_periods // 24) + 10 # generous overestimate - day_range = pd.bdate_range(start=start, periods=days_needed, freq="B") - bars: list[pd.Timestamp] = [] - for day in day_range: - session_start = day.replace(hour=9, minute=30, second=0) - session_end = day.replace(hour=15, minute=0, second=0) - day_bars = pd.date_range(session_start, session_end, freq=pd_freq) - # Exclude the exact session end for cleaner bars - day_bars = day_bars[day_bars < session_end] - bars.extend(day_bars.tolist()) - if len(bars) >= num_periods: - break - ts = pd.DatetimeIndex(bars[:num_periods]) - return ts - - -def generate_mock_data(config: Optional[MockConfig] = None) -> pd.DataFrame: - """Generate synthetic multi-asset OHLCV + amount data. - - Parameters - ---------- - config : MockConfig, optional - Generation parameters. Uses defaults when *None*. - - Returns - ------- - pd.DataFrame - DataFrame with columns: datetime, asset_id, open, high, low, - close, volume, amount. Optionally includes ``universe``. - """ - if config is None: - config = MockConfig() - - rng = np.random.default_rng(config.seed) - M = config.num_assets - T = config.num_periods - - logger.info("Generating mock data: %d assets x %d periods @ %s", M, T, config.frequency) - - timestamps = _generate_timestamps(config.start_date, T, config.frequency) - T = len(timestamps) # may be shorter if we ran out of session bars - - # Per-bar volatility (annualised -> per-bar) - bar_vol = config.annual_vol / np.sqrt(_bars_per_year(config.frequency)) - - # --------------------------------------------------------------- - # Common market factor (drives cross-sectional correlation) - # --------------------------------------------------------------- - market_returns = rng.normal(0, bar_vol, size=T) - - # --------------------------------------------------------------- - # Per-asset paths - # --------------------------------------------------------------- - asset_ids = [f"ASSET_{i:04d}" for i in range(M)] - - # Storage - all_open = np.empty((M, T)) - all_high = np.empty((M, T)) - all_low = np.empty((M, T)) - all_close = np.empty((M, T)) - all_volume = np.empty((M, T)) - all_amount = np.empty((M, T)) - - # Planted alpha: select a subset of assets - n_alpha = max(1, int(M * config.alpha_assets_frac)) - alpha_assets = set(rng.choice(M, size=n_alpha, replace=False).tolist()) if config.plant_alpha else set() - - for i in range(M): - # Initial price with some dispersion - p0 = config.base_price * np.exp(rng.normal(0, 0.3)) - - # GARCH-like stochastic volatility - sigma = np.empty(T) - sigma[0] = bar_vol - for t in range(1, T): - sigma[t] = ( - bar_vol * (1 - config.vol_persistence) - + config.vol_persistence * sigma[t - 1] - + rng.normal(0, bar_vol * 0.1) - ) - sigma[t] = max(sigma[t], bar_vol * 0.2) # floor - - # Idiosyncratic returns - idio = rng.normal(0, 1, size=T) * sigma - - # Combine with market factor - w = config.market_factor_weight - returns = w * market_returns + (1 - w) * idio - - # Plant alpha signal: small positive drift in returns - if i in alpha_assets: - # Signal: positive drift correlated with lagged volume momentum - alpha_drift = config.alpha_strength * bar_vol - returns += alpha_drift - - # Cumulative price path (close prices) - log_price = np.log(p0) + np.cumsum(returns) - close = np.exp(log_price) - - # Generate intra-bar OHLC from close - # Open = previous close + small gap noise - open_ = np.empty(T) - open_[0] = p0 - open_[1:] = close[:-1] * np.exp(rng.normal(0, bar_vol * 0.1, size=T - 1)) - - # Intra-bar high/low - intra_range = np.abs(rng.normal(0, sigma * 0.5, size=T)) - mid = (open_ + close) / 2 - high = np.maximum(open_, close) + intra_range - low = np.minimum(open_, close) - intra_range - low = np.maximum(low, mid * 0.9) # prevent negative or absurd lows - - # Enforce OHLC consistency - high = np.maximum(high, np.maximum(open_, close)) - low = np.minimum(low, np.minimum(open_, close)) - low = np.maximum(low, 0.01) # price floor - - # Volume: AR(1) with log-normal noise - log_vol = np.empty(T) - log_vol_mean = np.log(config.volume_mean) - log_vol[0] = log_vol_mean + rng.normal(0, 0.5) - for t in range(1, T): - log_vol[t] = ( - log_vol_mean * (1 - config.volume_persistence) - + config.volume_persistence * log_vol[t - 1] - + rng.normal(0, 0.3) - ) - volume = np.exp(log_vol).astype(np.float64) - - # Amount = volume * vwap (approximate vwap as midpoint) - vwap_est = (high + low + close) / 3 - amount = volume * vwap_est - - all_open[i] = open_ - all_high[i] = high - all_low[i] = low - all_close[i] = close - all_volume[i] = np.round(volume) - all_amount[i] = amount - - # --------------------------------------------------------------- - # Assemble DataFrame - # --------------------------------------------------------------- - records = [] - for i in range(M): - asset_df = pd.DataFrame({ - "datetime": timestamps, - "asset_id": asset_ids[i], - "open": all_open[i], - "high": all_high[i], - "low": all_low[i], - "close": all_close[i], - "volume": all_volume[i], - "amount": all_amount[i], - }) - records.append(asset_df) - - df = pd.concat(records, ignore_index=True) - - if config.universe is not None: - df["universe"] = config.universe - - df = df.sort_values(["datetime", "asset_id"]).reset_index(drop=True) - - logger.info( - "Generated %d rows: %d assets x %d periods, planted alpha in %d assets", - len(df), - M, - T, - len(alpha_assets), - ) - return df - - -def generate_with_halts( - config: Optional[MockConfig] = None, - halt_fraction: float = 0.01, -) -> pd.DataFrame: - """Generate mock data with simulated trading halts. - - A fraction of (asset, time) pairs are converted to halt bars: - open = high = low = close = last valid close, volume = 0, amount = 0. - - Parameters - ---------- - config : MockConfig, optional - Generation parameters. - halt_fraction : float - Fraction of bars to convert to halts. - """ - df = generate_mock_data(config) - if config is None: - config = MockConfig() - rng = np.random.default_rng(config.seed + 1) - - n = len(df) - n_halt = int(n * halt_fraction) - halt_idx = rng.choice(n, size=n_halt, replace=False) - - df.loc[halt_idx, "volume"] = 0 - df.loc[halt_idx, "amount"] = 0 - # Flatten OHLC to close (simulating last traded price) - halt_price = df.loc[halt_idx, "close"] - df.loc[halt_idx, "open"] = halt_price - df.loc[halt_idx, "high"] = halt_price - df.loc[halt_idx, "low"] = halt_price - - logger.info("Injected %d halt bars (%.2f%%)", n_halt, 100 * halt_fraction) - return df diff --git a/src/factorminer/factorminer/data/preprocessor.py b/src/factorminer/factorminer/data/preprocessor.py deleted file mode 100644 index 3a2b429..0000000 --- a/src/factorminer/factorminer/data/preprocessor.py +++ /dev/null @@ -1,364 +0,0 @@ -"""Data preprocessing pipeline for FactorMiner. - -Handles derived feature computation, missing data imputation, trading halt -detection, cross-sectional standardisation, winsorisation, and quality checks. -""" - -from __future__ import annotations - -import logging -from dataclasses import dataclass, field -from typing import Optional, Sequence - -import numpy as np -import pandas as pd - -logger = logging.getLogger(__name__) - - -@dataclass -class PreprocessConfig: - """Configuration for the preprocessing pipeline. - - Attributes - ---------- - winsor_lower : float - Lower percentile for winsorisation (0-100). - winsor_upper : float - Upper percentile for winsorisation (0-100). - min_nonnan_ratio : float - Minimum fraction of non-NaN values required per cross-section - for a time step to be kept. - ffill_limit : int or None - Maximum number of consecutive NaN values to forward-fill within - each intraday session. - cross_fill_method : str - Cross-sectional fill method after forward fill. - ``"median"`` or ``"mean"``. - standardise : bool - Whether to apply cross-sectional z-score standardisation. - halt_volume_threshold : float - Volume below this value flags a bar as a trading halt. - features_to_standardise : list of str - Column names subject to standardisation and winsorisation. - """ - - winsor_lower: float = 1.0 - winsor_upper: float = 99.0 - min_nonnan_ratio: float = 0.5 - ffill_limit: Optional[int] = None - cross_fill_method: str = "median" - standardise: bool = True - halt_volume_threshold: float = 0.0 - features_to_standardise: list[str] = field(default_factory=lambda: [ - "open", "high", "low", "close", "volume", "amount", "vwap", "returns", - ]) - - -# --------------------------------------------------------------------------- -# Derived features -# --------------------------------------------------------------------------- - -def compute_vwap(df: pd.DataFrame) -> pd.DataFrame: - """Add ``vwap`` column: amount / volume. NaN when volume is zero.""" - df = df.copy() - df["vwap"] = np.where( - df["volume"] > 0, - df["amount"] / df["volume"], - np.nan, - ) - return df - - -def compute_returns(df: pd.DataFrame) -> pd.DataFrame: - """Add ``returns`` column: close-to-close percentage change per asset. - - Returns are computed as ``close[t] / close[t-1] - 1`` within each asset. - The first observation per asset is NaN. - """ - df = df.copy() - df = df.sort_values(["asset_id", "datetime"]) - df["returns"] = df.groupby("asset_id")["close"].pct_change() - return df - - -def compute_derived_features(df: pd.DataFrame) -> pd.DataFrame: - """Compute all derived features (vwap and returns).""" - df = compute_vwap(df) - df = compute_returns(df) - return df - - -# --------------------------------------------------------------------------- -# Trading halt handling -# --------------------------------------------------------------------------- - -def flag_halts( - df: pd.DataFrame, - volume_threshold: float = 0.0, -) -> pd.DataFrame: - """Add boolean ``is_halt`` column. - - A bar is considered a trading halt when: - - Volume is exactly zero (or below *volume_threshold*), **and** - - open == high == low == close (no price movement). - """ - df = df.copy() - zero_volume = df["volume"] <= volume_threshold - flat_price = ( - (df["open"] == df["high"]) - & (df["high"] == df["low"]) - & (df["low"] == df["close"]) - ) - df["is_halt"] = zero_volume & flat_price - n_halt = df["is_halt"].sum() - if n_halt > 0: - logger.info("Flagged %d halt bars (%.2f%%)", n_halt, 100 * n_halt / len(df)) - return df - - -def mask_halts(df: pd.DataFrame) -> pd.DataFrame: - """Set OHLCV and derived columns to NaN for halted bars.""" - if "is_halt" not in df.columns: - return df - df = df.copy() - mask = df["is_halt"] - cols_to_nan = [ - c for c in ["open", "high", "low", "close", "volume", "amount", "vwap", "returns"] - if c in df.columns - ] - df.loc[mask, cols_to_nan] = np.nan - return df - - -# --------------------------------------------------------------------------- -# Missing data handling -# --------------------------------------------------------------------------- - -def _extract_date(dt_series: pd.Series) -> pd.Series: - """Return the date component of a datetime series.""" - return dt_series.dt.date - - -def fill_missing( - df: pd.DataFrame, - ffill_limit: Optional[int] = None, - cross_fill_method: str = "median", - columns: Optional[Sequence[str]] = None, -) -> pd.DataFrame: - """Fill missing values using a two-stage strategy. - - Stage 1 – Forward fill within each (asset, date) group so that NaNs - from halts / gaps are filled from the last valid intraday observation. - - Stage 2 – Cross-sectional fill: remaining NaNs in each time step are - replaced with the cross-sectional median (or mean). - - Parameters - ---------- - df : pd.DataFrame - Must contain ``datetime`` and ``asset_id`` columns. - ffill_limit : int or None - Max consecutive NaN values to forward-fill. - cross_fill_method : str - ``"median"`` or ``"mean"`` for the cross-sectional stage. - columns : sequence of str, optional - Columns to fill. Defaults to numeric columns. - """ - df = df.copy() - if columns is None: - columns = df.select_dtypes(include=[np.number]).columns.tolist() - columns = [c for c in columns if c in df.columns] - - # Stage 1: forward fill within (asset, date) - df["_date"] = _extract_date(df["datetime"]) - for col in columns: - df[col] = df.groupby(["asset_id", "_date"])[col].transform( - lambda s: s.ffill(limit=ffill_limit) - ) - - # Stage 2: cross-sectional fill per datetime - if cross_fill_method == "median": - agg_func = "median" - elif cross_fill_method == "mean": - agg_func = "mean" - else: - raise ValueError(f"Unknown cross_fill_method: {cross_fill_method}") - - for col in columns: - cross_vals = df.groupby("datetime")[col].transform(agg_func) - df[col] = df[col].fillna(cross_vals) - - df = df.drop(columns=["_date"]) - return df - - -# --------------------------------------------------------------------------- -# Winsorisation -# --------------------------------------------------------------------------- - -def winsorise( - df: pd.DataFrame, - columns: Sequence[str], - lower: float = 1.0, - upper: float = 99.0, -) -> pd.DataFrame: - """Clip values in *columns* to the [lower, upper] percentile range - computed cross-sectionally at each time step. - - Parameters - ---------- - df : pd.DataFrame - Must contain a ``datetime`` column. - columns : sequence of str - Columns to winsorise. - lower, upper : float - Percentile bounds (0-100). - """ - df = df.copy() - columns = [c for c in columns if c in df.columns] - - for col in columns: - lo = df.groupby("datetime")[col].transform( - lambda s: np.nanpercentile(s, lower) if s.notna().any() else np.nan - ) - hi = df.groupby("datetime")[col].transform( - lambda s: np.nanpercentile(s, upper) if s.notna().any() else np.nan - ) - df[col] = df[col].clip(lower=lo, upper=hi) - return df - - -# --------------------------------------------------------------------------- -# Cross-sectional standardisation -# --------------------------------------------------------------------------- - -def cross_sectional_standardise( - df: pd.DataFrame, - columns: Sequence[str], -) -> pd.DataFrame: - """Z-score standardise *columns* cross-sectionally at each time step. - - ``x_std = (x - mean) / std`` where mean and std are computed across - all assets at the same datetime. Groups with std == 0 are set to 0. - """ - df = df.copy() - columns = [c for c in columns if c in df.columns] - - for col in columns: - grp = df.groupby("datetime")[col] - mu = grp.transform("mean") - sigma = grp.transform("std") - sigma = sigma.replace(0, np.nan) - df[col] = (df[col] - mu) / sigma - df[col] = df[col].fillna(0.0) - return df - - -# --------------------------------------------------------------------------- -# Quality checks -# --------------------------------------------------------------------------- - -def quality_check( - df: pd.DataFrame, - min_nonnan_ratio: float = 0.5, - columns: Optional[Sequence[str]] = None, -) -> pd.DataFrame: - """Drop time steps where the fraction of non-NaN values across assets - is below *min_nonnan_ratio*. - - Parameters - ---------- - df : pd.DataFrame - Market data with ``datetime`` and ``asset_id``. - min_nonnan_ratio : float - Minimum fraction (0-1) of assets with valid data at each time step. - columns : sequence of str, optional - Columns to check. Defaults to OHLCV columns. - - Returns - ------- - pd.DataFrame - Filtered DataFrame with low-coverage time steps removed. - """ - if columns is None: - columns = [c for c in ["open", "high", "low", "close", "volume"] if c in df.columns] - - n_assets = df["asset_id"].nunique() - if n_assets == 0: - return df - - # Count non-NaN per datetime - checks = df.groupby("datetime")[list(columns)].apply( - lambda g: g.notna().all(axis=1).sum() / n_assets - ) - valid_dts = checks[checks >= min_nonnan_ratio].index - before = df["datetime"].nunique() - df = df[df["datetime"].isin(valid_dts)] - after = df["datetime"].nunique() - if before > after: - logger.info( - "Quality check removed %d/%d time steps (min_nonnan_ratio=%.2f)", - before - after, - before, - min_nonnan_ratio, - ) - return df.reset_index(drop=True) - - -# --------------------------------------------------------------------------- -# Full pipeline -# --------------------------------------------------------------------------- - -def preprocess( - df: pd.DataFrame, - config: Optional[PreprocessConfig] = None, -) -> pd.DataFrame: - """Run the full preprocessing pipeline. - - Parameters - ---------- - df : pd.DataFrame - Raw market data with at least the columns: datetime, asset_id, - open, high, low, close, volume, amount. - config : PreprocessConfig, optional - Pipeline configuration. Uses defaults when *None*. - - Returns - ------- - pd.DataFrame - Preprocessed DataFrame with derived features, cleaned and - standardised values. - """ - if config is None: - config = PreprocessConfig() - - logger.info("Preprocessing %d rows ...", len(df)) - - # 1. Derive features - df = compute_derived_features(df) - - # 2. Flag and mask trading halts - df = flag_halts(df, volume_threshold=config.halt_volume_threshold) - df = mask_halts(df) - - # 3. Fill missing data - df = fill_missing( - df, - ffill_limit=config.ffill_limit, - cross_fill_method=config.cross_fill_method, - ) - - # 4. Quality check - df = quality_check(df, min_nonnan_ratio=config.min_nonnan_ratio) - - # 5. Winsorise - feat_cols = [c for c in config.features_to_standardise if c in df.columns] - df = winsorise(df, columns=feat_cols, lower=config.winsor_lower, upper=config.winsor_upper) - - # 6. Cross-sectional standardisation - if config.standardise: - df = cross_sectional_standardise(df, columns=feat_cols) - - logger.info("Preprocessing complete: %d rows, %d columns", len(df), len(df.columns)) - return df diff --git a/src/factorminer/factorminer/data/tensor_builder.py b/src/factorminer/factorminer/data/tensor_builder.py deleted file mode 100644 index 03cdee8..0000000 --- a/src/factorminer/factorminer/data/tensor_builder.py +++ /dev/null @@ -1,505 +0,0 @@ -"""Build the data tensor D in R^(M x T x F) for FactorMiner. - -Converts preprocessed panel data into dense 3-D arrays indexed by -(assets, time_periods, features). Supports numpy and optional torch backends. -""" - -from __future__ import annotations - -import logging -from dataclasses import dataclass, field -from typing import Literal, Optional, Sequence, Tuple, Union - -import numpy as np -import pandas as pd - -logger = logging.getLogger(__name__) - -# Default feature ordering matching the paper specification -DEFAULT_FEATURES: list[str] = [ - "open", "high", "low", "close", "volume", "amount", "vwap", "returns", -] - -Backend = Literal["numpy", "torch", "cupy"] - - -@dataclass(frozen=True) -class TargetSpec: - """Definition of one aligned forward-return target.""" - - name: str - entry_delay_bars: int - holding_bars: int - price_pair: str = "open_to_close" - return_transform: str = "simple" - - @property - def column_name(self) -> str: - return "target" if self.name == "paper" else f"target_{self.name}" - - -@dataclass -class TensorConfig: - """Configuration for tensor construction. - - Attributes - ---------- - features : list of str - Ordered feature columns to include in the tensor. - backend : str - ``"numpy"``, ``"torch"``, or ``"cupy"``. - dtype : str - Numeric dtype string (e.g. ``"float32"``). - train_end : str or None - Inclusive upper bound for the training period (ISO datetime). - test_start : str or None - Inclusive lower bound for the test period (ISO datetime). - m_fast : int or None - Number of assets for the fast screening subset. When *None*, - no fast subset is produced. - seed : int - Random seed for reproducible asset sampling. - target_column : str - Name of the column holding the target variable (created by - :func:`compute_target`). - """ - - features: list[str] = field(default_factory=lambda: list(DEFAULT_FEATURES)) - backend: Backend = "numpy" - dtype: str = "float32" - train_end: Optional[str] = None - test_start: Optional[str] = None - m_fast: Optional[int] = None - seed: int = 42 - target_column: str = "target" - target_columns: list[str] = field(default_factory=list) - default_target: str = "target" - - -# --------------------------------------------------------------------------- -# Target variable -# --------------------------------------------------------------------------- - -def compute_target(df: pd.DataFrame) -> pd.DataFrame: - """Compute the target: next-bar open-to-close return. - - For each asset the target at time *t* is defined as:: - - target[t] = close[t+1] / open[t+1] - 1 - - The last bar of each asset has a NaN target. - """ - return compute_targets( - df, - [ - TargetSpec( - name="paper", - entry_delay_bars=1, - holding_bars=1, - price_pair="open_to_close", - return_transform="simple", - ) - ], - ) - - -def compute_targets( - df: pd.DataFrame, - target_specs: Sequence[TargetSpec], -) -> pd.DataFrame: - """Compute one or more named forward-return targets on the same panel.""" - df = df.sort_values(["asset_id", "datetime"]).copy() - - for spec in target_specs: - start_col, end_col, start_offset, end_offset = _resolve_target_offsets(spec) - start_values = df.groupby("asset_id")[start_col].shift(-start_offset) - end_values = df.groupby("asset_id")[end_col].shift(-end_offset) - if spec.return_transform == "log": - df[spec.column_name] = np.log(end_values / start_values) - else: - df[spec.column_name] = end_values / start_values - 1.0 - - return df - - -def _resolve_target_offsets(spec: TargetSpec) -> tuple[str, str, int, int]: - """Map a target spec to start/end price columns and offsets.""" - if spec.entry_delay_bars < 0 or spec.holding_bars < 0: - raise ValueError("TargetSpec entry_delay_bars and holding_bars must be >= 0") - - if spec.price_pair == "open_to_close": - if spec.holding_bars < 1: - raise ValueError("open_to_close targets require holding_bars >= 1") - return ( - "open", - "close", - spec.entry_delay_bars, - spec.entry_delay_bars + spec.holding_bars - 1, - ) - if spec.price_pair == "close_to_close": - if spec.holding_bars < 1: - raise ValueError("close_to_close targets require holding_bars >= 1") - return ( - "close", - "close", - spec.entry_delay_bars, - spec.entry_delay_bars + spec.holding_bars, - ) - if spec.price_pair == "open_to_open": - if spec.holding_bars < 1: - raise ValueError("open_to_open targets require holding_bars >= 1") - return ( - "open", - "open", - spec.entry_delay_bars, - spec.entry_delay_bars + spec.holding_bars, - ) - if spec.price_pair == "close_to_open": - if spec.holding_bars < 1: - raise ValueError("close_to_open targets require holding_bars >= 1") - return ( - "close", - "open", - spec.entry_delay_bars, - spec.entry_delay_bars + spec.holding_bars, - ) - raise ValueError(f"Unknown TargetSpec price_pair: {spec.price_pair}") - - -# --------------------------------------------------------------------------- -# Tensor construction helpers -# --------------------------------------------------------------------------- - -def _to_backend(arr: np.ndarray, backend: Backend, dtype: str): - """Convert a numpy array to the requested backend.""" - np_dtype = getattr(np, dtype, np.float32) - arr = arr.astype(np_dtype) - - if backend == "numpy": - return arr - - if backend == "torch": - try: - import torch - except ImportError as exc: - raise ImportError( - "PyTorch is required for backend='torch'. " - "Install with: pip install torch" - ) from exc - torch_dtype = getattr(torch, dtype, torch.float32) - return torch.from_numpy(arr).to(torch_dtype) - - if backend == "cupy": - try: - import cupy # type: ignore[import-untyped] - except ImportError as exc: - raise ImportError( - "CuPy is required for backend='cupy'. " - "Install with: pip install cupy" - ) from exc - return cupy.asarray(arr, dtype=dtype) - - raise ValueError(f"Unknown backend: {backend}") - - -def _build_3d( - df: pd.DataFrame, - asset_ids: np.ndarray, - timestamps: np.ndarray, - columns: Sequence[str], -) -> np.ndarray: - """Pivot panel data into a dense (M, T, F) numpy array.""" - M = len(asset_ids) - T = len(timestamps) - F = len(columns) - tensor = np.full((M, T, F), np.nan, dtype=np.float64) - - asset_map = {a: i for i, a in enumerate(asset_ids)} - time_map = {t: j for j, t in enumerate(timestamps)} - - df_idx = df.copy() - df_idx["_ai"] = df_idx["asset_id"].map(asset_map) - df_idx["_ti"] = df_idx["datetime"].map(time_map) - df_idx = df_idx.dropna(subset=["_ai", "_ti"]) - df_idx["_ai"] = df_idx["_ai"].astype(int) - df_idx["_ti"] = df_idx["_ti"].astype(int) - - values = df_idx[list(columns)].to_numpy(dtype=np.float64) - tensor[df_idx["_ai"].values, df_idx["_ti"].values, :] = values - - return tensor - - -# --------------------------------------------------------------------------- -# Public API -# --------------------------------------------------------------------------- - -@dataclass -class TensorDataset: - """Container for the built tensor and associated metadata. - - Attributes - ---------- - data : array-like - Feature tensor of shape ``(M, T, F)``. - target : array-like or None - Target array of shape ``(M, T)``. - asset_ids : np.ndarray - Asset identifier for each row in the first axis. - timestamps : np.ndarray - Datetime for each position in the second axis. - feature_names : list of str - Feature name for each slice in the third axis. - """ - - data: object # np.ndarray | torch.Tensor | cupy.ndarray - target: object # same type or None - asset_ids: np.ndarray - timestamps: np.ndarray - feature_names: list[str] - targets: dict[str, object] = field(default_factory=dict) - default_target: str = "target" - - -def build_tensor( - df: pd.DataFrame, - config: Optional[TensorConfig] = None, -) -> TensorDataset: - """Build a dense 3-D tensor from preprocessed panel data. - - Parameters - ---------- - df : pd.DataFrame - Preprocessed market data. Must include ``datetime``, ``asset_id``, - and all columns listed in ``config.features``. - config : TensorConfig, optional - Build configuration. Uses defaults when *None*. - - Returns - ------- - TensorDataset - Dense tensor and metadata. - """ - if config is None: - config = TensorConfig() - - # Validate required feature columns - missing = [f for f in config.features if f not in df.columns] - if missing: - raise ValueError(f"DataFrame is missing feature columns: {missing}") - - # Sorted unique axes - asset_ids = np.sort(df["asset_id"].unique()) - timestamps = np.sort(df["datetime"].unique()) - - logger.info( - "Building tensor: %d assets x %d time steps x %d features", - len(asset_ids), - len(timestamps), - len(config.features), - ) - - data_np = _build_3d(df, asset_ids, timestamps, config.features) - - # Target - resolved_target_columns = list(config.target_columns or [config.target_column]) - target_arrays_np: dict[str, np.ndarray] = {} - for target_column in resolved_target_columns: - if target_column not in df.columns: - continue - target_np = _build_3d(df, asset_ids, timestamps, [target_column]) - target_arrays_np[target_column] = target_np[:, :, 0] - - target_np: Optional[np.ndarray] = None - default_target_name = config.default_target - if target_arrays_np: - target_np = target_arrays_np.get(default_target_name) - if target_np is None: - first_target = next(iter(target_arrays_np)) - target_np = target_arrays_np[first_target] - default_target_name = first_target - - data = _to_backend(data_np, config.backend, config.dtype) - target = _to_backend(target_np, config.backend, config.dtype) if target_np is not None else None - targets = { - name: _to_backend(target_arr, config.backend, config.dtype) - for name, target_arr in target_arrays_np.items() - } - - return TensorDataset( - data=data, - target=target, - asset_ids=asset_ids, - timestamps=timestamps, - feature_names=list(config.features), - targets=targets, - default_target=default_target_name, - ) - - -# --------------------------------------------------------------------------- -# Temporal split -# --------------------------------------------------------------------------- - -def temporal_split( - ds: TensorDataset, - train_end: Optional[str] = None, - test_start: Optional[str] = None, -) -> Tuple[TensorDataset, TensorDataset]: - """Split a :class:`TensorDataset` into train and test sets along time. - - Parameters - ---------- - ds : TensorDataset - Full dataset. - train_end : str, optional - Inclusive upper bound for training timestamps. - test_start : str, optional - Inclusive lower bound for test timestamps. When *None* defaults to - the bar immediately after *train_end*. - - Returns - ------- - tuple of TensorDataset - ``(train, test)`` datasets. - """ - ts = pd.to_datetime(ds.timestamps) - - if train_end is not None: - train_mask = ts <= pd.Timestamp(train_end) - else: - # Default: first 80% - split_idx = int(len(ts) * 0.8) - train_mask = np.arange(len(ts)) < split_idx - - if test_start is not None: - test_mask = ts >= pd.Timestamp(test_start) - else: - test_mask = ~train_mask - - def _slice(mask): - idx = np.where(mask)[0] - # np arrays: index along axis 1 (time) - d = ds.data - t = ds.target - targets = ds.targets - - # Handle different backends - if hasattr(d, "numpy"): - # torch tensor - d_slice = d[:, idx, :] - t_slice = t[:, idx] if t is not None else None - elif hasattr(d, "get"): - # cupy - d_slice = d[:, idx, :] - t_slice = t[:, idx] if t is not None else None - else: - # numpy - d_slice = d[:, idx, :] - t_slice = t[:, idx] if t is not None else None - - return TensorDataset( - data=d_slice, - target=t_slice, - targets={ - name: target[:, idx] if target is not None else None - for name, target in targets.items() - }, - default_target=ds.default_target, - asset_ids=ds.asset_ids, - timestamps=ds.timestamps[idx], - feature_names=ds.feature_names, - ) - - return _slice(train_mask), _slice(test_mask) - - -# --------------------------------------------------------------------------- -# Asset subset sampling -# --------------------------------------------------------------------------- - -def sample_assets( - ds: TensorDataset, - m: int, - seed: int = 42, -) -> TensorDataset: - """Return a random subset of *m* assets from *ds*. - - Parameters - ---------- - ds : TensorDataset - Full dataset. - m : int - Number of assets to sample. - seed : int - Random seed for reproducibility. - - Returns - ------- - TensorDataset - Subset with *m* assets. - """ - rng = np.random.default_rng(seed) - M = len(ds.asset_ids) - if m >= M: - logger.warning("Requested m=%d >= total assets %d; returning all", m, M) - return ds - - idx = np.sort(rng.choice(M, size=m, replace=False)) - d = ds.data - t = ds.target - targets = ds.targets - - if hasattr(d, "numpy"): - d_sub = d[idx, :, :] - t_sub = t[idx, :] if t is not None else None - elif hasattr(d, "get"): - d_sub = d[idx, :, :] - t_sub = t[idx, :] if t is not None else None - else: - d_sub = d[idx, :, :] - t_sub = t[idx, :] if t is not None else None - - return TensorDataset( - data=d_sub, - target=t_sub, - targets={ - name: target[idx, :] if target is not None else None - for name, target in targets.items() - }, - default_target=ds.default_target, - asset_ids=ds.asset_ids[idx], - timestamps=ds.timestamps, - feature_names=ds.feature_names, - ) - - -def build_pipeline( - df: pd.DataFrame, - config: Optional[TensorConfig] = None, -) -> Union[TensorDataset, Tuple[TensorDataset, TensorDataset]]: - """End-to-end: compute target, build tensor, optionally split. - - Parameters - ---------- - df : pd.DataFrame - Preprocessed market data. - config : TensorConfig, optional - Configuration. - - Returns - ------- - TensorDataset or tuple - If ``config.train_end`` or ``config.test_start`` is set, returns - ``(train, test)``; otherwise the full dataset. - """ - if config is None: - config = TensorConfig() - - df = compute_target(df) - ds = build_tensor(df, config) - - if config.train_end is not None or config.test_start is not None: - return temporal_split(ds, train_end=config.train_end, test_start=config.test_start) - - return ds diff --git a/src/factorminer/factorminer/evaluation/__init__.py b/src/factorminer/factorminer/evaluation/__init__.py deleted file mode 100644 index 7bc9f69..0000000 --- a/src/factorminer/factorminer/evaluation/__init__.py +++ /dev/null @@ -1,169 +0,0 @@ -"""Multi-stage factor evaluation and validation pipeline.""" - -from src.factorminer.factorminer.evaluation.admission import ( - AdmissionDecision, - StockThresholds, - check_admission, - check_replacement, -) -from src.factorminer.factorminer.evaluation.correlation import ( - IncrementalCorrelationMatrix, - batch_spearman_correlation, - batch_spearman_pairwise, - compute_correlation_batch, -) -from src.factorminer.factorminer.evaluation.metrics import ( - compute_factor_stats, - compute_ic, - compute_ic_mean, - compute_ic_vectorized, - compute_ic_win_rate, - compute_icir, - compute_pairwise_correlation, - compute_quintile_returns, - compute_turnover, -) -from src.factorminer.factorminer.evaluation.pipeline import ( - CandidateFactor, - EvaluationResult, - FactorLibraryView, - PipelineConfig, - ValidationPipeline, - run_evaluation_pipeline, -) -from src.factorminer.factorminer.evaluation.combination import FactorCombiner -from src.factorminer.factorminer.evaluation.selection import FactorSelector -from src.factorminer.factorminer.evaluation.portfolio import PortfolioBacktester -from src.factorminer.factorminer.evaluation.backtest import ( - SplitWindow, - DrawdownResult, - train_test_split, - rolling_splits, - compute_ic_series, - compute_rolling_ic, - compute_cumulative_ic, - compute_ic_stats, - factor_return_attribution, - compute_drawdown, - compute_sharpe_ratio, - compute_calmar_ratio, -) -from src.factorminer.factorminer.evaluation.regime import ( - MarketRegime, - RegimeConfig, - RegimeClassification, - RegimeDetector, - RegimeICResult, - RegimeAwareEvaluator, -) -from src.factorminer.factorminer.evaluation.capacity import ( - CapacityConfig, - CapacityEstimate, - CapacityEstimator, - MarketImpactEstimate, - MarketImpactModel, - NetCostResult, -) -from src.factorminer.factorminer.evaluation.causal import ( - CausalConfig, - CausalTestResult, - CausalValidator, -) -from src.factorminer.factorminer.evaluation.significance import ( - BootstrapCIResult, - BootstrapICTester, - DeflatedSharpeCalculator, - DeflatedSharpeResult, - FDRController, - FDRResult, - SignificanceConfig, - check_significance, -) -from src.factorminer.factorminer.evaluation.research import ( - FactorGeometryDiagnostics, - FactorScoreVector, - build_score_vector, - compute_factor_geometry, - passes_research_admission, - run_research_model_suite, -) - -__all__ = [ - # metrics - "compute_ic", - "compute_ic_vectorized", - "compute_icir", - "compute_ic_mean", - "compute_ic_win_rate", - "compute_pairwise_correlation", - "compute_factor_stats", - "compute_quintile_returns", - "compute_turnover", - # correlation - "batch_spearman_correlation", - "batch_spearman_pairwise", - "compute_correlation_batch", - "IncrementalCorrelationMatrix", - # admission - "check_admission", - "check_replacement", - "AdmissionDecision", - "StockThresholds", - # pipeline - "CandidateFactor", - "EvaluationResult", - "FactorLibraryView", - "PipelineConfig", - "ValidationPipeline", - "run_evaluation_pipeline", - # combination / selection / backtest - "FactorCombiner", - "FactorSelector", - "PortfolioBacktester", - "SplitWindow", - "DrawdownResult", - "train_test_split", - "rolling_splits", - "compute_ic_series", - "compute_rolling_ic", - "compute_cumulative_ic", - "compute_ic_stats", - "factor_return_attribution", - "compute_drawdown", - "compute_sharpe_ratio", - "compute_calmar_ratio", - # regime - "MarketRegime", - "RegimeConfig", - "RegimeClassification", - "RegimeDetector", - "RegimeICResult", - "RegimeAwareEvaluator", - # capacity - "CapacityConfig", - "CapacityEstimate", - "CapacityEstimator", - "MarketImpactEstimate", - "MarketImpactModel", - "NetCostResult", - # causal - "CausalConfig", - "CausalTestResult", - "CausalValidator", - # significance - "BootstrapCIResult", - "BootstrapICTester", - "DeflatedSharpeCalculator", - "DeflatedSharpeResult", - "FDRController", - "FDRResult", - "SignificanceConfig", - "check_significance", - # research - "FactorGeometryDiagnostics", - "FactorScoreVector", - "compute_factor_geometry", - "build_score_vector", - "passes_research_admission", - "run_research_model_suite", -] diff --git a/src/factorminer/factorminer/evaluation/admission.py b/src/factorminer/factorminer/evaluation/admission.py deleted file mode 100644 index 941de66..0000000 --- a/src/factorminer/factorminer/evaluation/admission.py +++ /dev/null @@ -1,221 +0,0 @@ -"""Admission rules for the factor library. - -Implements the decision logic for whether a candidate factor should be -admitted to the library, replace an existing factor, or be rejected. - -Admission Rule (Eq. 10): - Admit alpha if |IC(alpha)| >= tau_IC AND max_{g in L} |rho(alpha, g)| < theta - -Replacement Rule (Eq. 11): - Replace g with alpha if: - |IC(alpha)| >= 0.10 AND - |IC(alpha)| >= 1.3 * |IC(g)| AND - |{g in L : |rho(alpha, g)| >= theta}| == 1 -""" - -from __future__ import annotations - -from dataclasses import dataclass -from typing import Dict, List, Optional, Tuple - - -@dataclass -class AdmissionDecision: - """Result of an admission check for a candidate factor.""" - - admitted: bool - replaced_factor_id: Optional[str] = None - rejection_reason: Optional[str] = None - max_correlation: float = 0.0 - correlated_with: Optional[str] = None - decision_type: str = "rejected" # "admitted", "replacement", "rejected" - - -def check_admission( - ic_abs: float, - max_corr: float, - correlated_with: Optional[str], - ic_threshold: float = 0.04, - correlation_threshold: float = 0.5, -) -> AdmissionDecision: - """Standard admission check (Eq. 10). - - Parameters - ---------- - ic_abs : float - Absolute IC of the candidate. - max_corr : float - Maximum absolute correlation with any library factor. - correlated_with : str or None - ID of the most correlated library factor. - ic_threshold : float - Minimum |IC| for admission (tau_IC). - correlation_threshold : float - Maximum allowed correlation (theta). - - Returns - ------- - AdmissionDecision - """ - if ic_abs < ic_threshold: - return AdmissionDecision( - admitted=False, - rejection_reason=f"IC too low: |IC|={ic_abs:.4f} < {ic_threshold}", - max_correlation=max_corr, - correlated_with=correlated_with, - decision_type="rejected", - ) - - if max_corr >= correlation_threshold: - return AdmissionDecision( - admitted=False, - rejection_reason=( - f"Too correlated: max|rho|={max_corr:.4f} >= {correlation_threshold} " - f"(with {correlated_with})" - ), - max_correlation=max_corr, - correlated_with=correlated_with, - decision_type="rejected", - ) - - return AdmissionDecision( - admitted=True, - max_correlation=max_corr, - correlated_with=correlated_with, - decision_type="admitted", - ) - - -def check_replacement( - candidate_ic_abs: float, - max_corr: float, - correlated_with: Optional[str], - library_ic_map: Dict[str, float], - correlation_map: Dict[str, float], - replacement_ic_min: float = 0.10, - replacement_ic_ratio: float = 1.3, - correlation_threshold: float = 0.5, -) -> AdmissionDecision: - """Replacement admission check (Eq. 11). - - A candidate that failed the standard correlation check may still - replace an existing library factor if it is sufficiently stronger - and only conflicts with exactly one factor. - - Parameters - ---------- - candidate_ic_abs : float - Absolute IC of the candidate. - max_corr : float - Max absolute correlation with any library factor. - correlated_with : str or None - ID of the most correlated library factor. - library_ic_map : dict - Mapping from library factor ID to its absolute IC. - correlation_map : dict - Mapping from library factor ID to correlation with the candidate. - replacement_ic_min : float - Minimum |IC| for replacement consideration. - replacement_ic_ratio : float - Required ratio IC(candidate) / IC(existing). - correlation_threshold : float - Correlation threshold (theta) for determining conflicts. - - Returns - ------- - AdmissionDecision - """ - # Must meet minimum IC for replacement - if candidate_ic_abs < replacement_ic_min: - return AdmissionDecision( - admitted=False, - rejection_reason=( - f"IC too low for replacement: |IC|={candidate_ic_abs:.4f} < {replacement_ic_min}" - ), - max_correlation=max_corr, - correlated_with=correlated_with, - decision_type="rejected", - ) - - # Find all factors above the correlation threshold - conflicting: List[str] = [ - fid for fid, corr in correlation_map.items() - if abs(corr) >= correlation_threshold - ] - - # Must conflict with exactly one factor - if len(conflicting) != 1: - return AdmissionDecision( - admitted=False, - rejection_reason=( - f"Replacement requires exactly 1 correlated factor, found {len(conflicting)}" - ), - max_correlation=max_corr, - correlated_with=correlated_with, - decision_type="rejected", - ) - - target_id = conflicting[0] - target_ic = library_ic_map.get(target_id, 0.0) - - # Candidate must be sufficiently stronger - if target_ic > 0 and candidate_ic_abs < replacement_ic_ratio * target_ic: - return AdmissionDecision( - admitted=False, - rejection_reason=( - f"Not strong enough to replace {target_id}: " - f"|IC|={candidate_ic_abs:.4f} < {replacement_ic_ratio} * {target_ic:.4f}" - ), - max_correlation=max_corr, - correlated_with=correlated_with, - decision_type="rejected", - ) - - return AdmissionDecision( - admitted=True, - replaced_factor_id=target_id, - max_correlation=max_corr, - correlated_with=correlated_with, - decision_type="replacement", - ) - - -# --------------------------------------------------------------------------- -# Stock-level thresholds (configurable) -# --------------------------------------------------------------------------- - -@dataclass -class StockThresholds: - """Default thresholds for A-share stock factor evaluation.""" - - ic_abs_min: float = 0.05 - icir_abs_min: float = 0.5 - ic_win_rate_min: float = 0.50 - max_turnover: float = 0.8 - min_monotonicity: float = 0.0 - - def passes( - self, - ic_abs: float, - icir_abs: float, - ic_win_rate: float = 1.0, - turnover: float = 0.0, - monotonicity: float = 1.0, - ) -> Tuple[bool, Optional[str]]: - """Check if a factor meets all stock-level thresholds. - - Returns - ------- - tuple of (passes, rejection_reason) - """ - if ic_abs < self.ic_abs_min: - return False, f"|IC|={ic_abs:.4f} < {self.ic_abs_min}" - if icir_abs < self.icir_abs_min: - return False, f"|ICIR|={icir_abs:.4f} < {self.icir_abs_min}" - if ic_win_rate < self.ic_win_rate_min: - return False, f"IC win rate={ic_win_rate:.4f} < {self.ic_win_rate_min}" - if turnover > self.max_turnover: - return False, f"Turnover={turnover:.4f} > {self.max_turnover}" - if monotonicity < self.min_monotonicity: - return False, f"Monotonicity={monotonicity:.4f} < {self.min_monotonicity}" - return True, None diff --git a/src/factorminer/factorminer/evaluation/backtest.py b/src/factorminer/factorminer/evaluation/backtest.py deleted file mode 100644 index f889703..0000000 --- a/src/factorminer/factorminer/evaluation/backtest.py +++ /dev/null @@ -1,397 +0,0 @@ -"""Full backtesting utilities for factor evaluation. - -Provides time-series splitting, rolling and cumulative IC computation, -factor return attribution, and drawdown analysis. -""" - -from __future__ import annotations - -from dataclasses import dataclass -from typing import Dict, List, Optional, Tuple - -import numpy as np -from scipy.stats import spearmanr - - -# ------------------------------------------------------------------ -# Time-series splitting -# ------------------------------------------------------------------ - -@dataclass -class SplitWindow: - """Indices for a single train/test split.""" - train_start: int - train_end: int - test_start: int - test_end: int - - -def train_test_split( - T: int, - train_ratio: float = 0.7, -) -> SplitWindow: - """Simple contiguous train/test split. - - Parameters - ---------- - T : int - Total number of time steps. - train_ratio : float - Fraction of data used for training (default 70%). - - Returns - ------- - SplitWindow - """ - split = int(T * train_ratio) - return SplitWindow( - train_start=0, - train_end=split, - test_start=split, - test_end=T, - ) - - -def rolling_splits( - T: int, - train_window: int, - test_window: int, - step: int = 1, -) -> List[SplitWindow]: - """Generate rolling-window train/test splits. - - Parameters - ---------- - T : int - Total number of time steps. - train_window : int - Size of training window. - test_window : int - Size of testing window. - step : int - Step size between consecutive windows. - - Returns - ------- - list of SplitWindow - """ - splits: List[SplitWindow] = [] - start = 0 - while start + train_window + test_window <= T: - splits.append(SplitWindow( - train_start=start, - train_end=start + train_window, - test_start=start + train_window, - test_end=start + train_window + test_window, - )) - start += step - return splits - - -# ------------------------------------------------------------------ -# IC computation -# ------------------------------------------------------------------ - -def compute_ic_series( - signal: np.ndarray, - returns: np.ndarray, -) -> np.ndarray: - """Compute cross-sectional Spearman IC at each time step. - - Parameters - ---------- - signal : ndarray of shape (T, N) - Factor signal values. - returns : ndarray of shape (T, N) - Forward returns. - - Returns - ------- - ndarray of shape (T,) - IC values; NaN where computation is not possible. - """ - T = signal.shape[0] - ics = np.full(T, np.nan) - for t in range(T): - x = signal[t] - y = returns[t] - valid = np.isfinite(x) & np.isfinite(y) - if valid.sum() < 5: - continue - corr, _ = spearmanr(x[valid], y[valid]) - if np.isfinite(corr): - ics[t] = corr - return ics - - -def compute_rolling_ic( - signal: np.ndarray, - returns: np.ndarray, - window: int = 20, -) -> np.ndarray: - """Compute rolling-window average IC. - - Parameters - ---------- - signal : ndarray of shape (T, N) - returns : ndarray of shape (T, N) - window : int - Rolling window size. - - Returns - ------- - ndarray of shape (T,) - Rolling mean IC; NaN where window is insufficient. - """ - ic_series = compute_ic_series(signal, returns) - T = len(ic_series) - rolling_ic = np.full(T, np.nan) - for t in range(window - 1, T): - window_ics = ic_series[t - window + 1: t + 1] - finite = window_ics[np.isfinite(window_ics)] - if len(finite) >= 1: - rolling_ic[t] = float(np.mean(finite)) - return rolling_ic - - -def compute_cumulative_ic( - signal: np.ndarray, - returns: np.ndarray, -) -> np.ndarray: - """Compute cumulative (expanding-window) mean IC. - - Parameters - ---------- - signal : ndarray of shape (T, N) - returns : ndarray of shape (T, N) - - Returns - ------- - ndarray of shape (T,) - Expanding-window mean IC. - """ - ic_series = compute_ic_series(signal, returns) - T = len(ic_series) - cumulative = np.full(T, np.nan) - running_sum = 0.0 - running_count = 0 - for t in range(T): - if np.isfinite(ic_series[t]): - running_sum += ic_series[t] - running_count += 1 - if running_count > 0: - cumulative[t] = running_sum / running_count - return cumulative - - -def compute_ic_stats(ic_series: np.ndarray) -> dict: - """Compute summary statistics for an IC series. - - Parameters - ---------- - ic_series : ndarray of shape (T,) - - Returns - ------- - dict with keys: ic_mean, ic_std, icir, ic_win_rate, ic_max, ic_min. - """ - finite = ic_series[np.isfinite(ic_series)] - if len(finite) < 2: - return { - "ic_mean": 0.0, - "ic_std": 0.0, - "icir": 0.0, - "ic_win_rate": 0.0, - "ic_max": 0.0, - "ic_min": 0.0, - } - ic_mean = float(np.mean(finite)) - ic_std = float(np.std(finite, ddof=1)) - return { - "ic_mean": ic_mean, - "ic_std": ic_std, - "icir": ic_mean / ic_std if ic_std > 1e-12 else 0.0, - "ic_win_rate": float(np.mean(finite > 0)), - "ic_max": float(np.max(finite)), - "ic_min": float(np.min(finite)), - } - - -# ------------------------------------------------------------------ -# Factor return attribution -# ------------------------------------------------------------------ - -def factor_return_attribution( - factor_signals: Dict[int, np.ndarray], - returns: np.ndarray, -) -> Dict[int, dict]: - """Attribute portfolio returns to individual factors. - - For each factor, computes the IC series, ICIR, and the mean return of - the top-quintile (Q5) minus bottom-quintile (Q1) long-short portfolio. - - Parameters - ---------- - factor_signals : dict[int, ndarray] - Mapping from factor ID to (T, N) signal array. - returns : ndarray of shape (T, N) - - Returns - ------- - dict mapping factor_id -> attribution dict with keys: - ic_mean, icir, ic_win_rate, ls_return - """ - results: Dict[int, dict] = {} - for fid, signal in factor_signals.items(): - ic_series = compute_ic_series(signal, returns) - stats = compute_ic_stats(ic_series) - - # Compute long-short return - T, N = signal.shape - ls_returns = np.full(T, np.nan) - for t in range(T): - sig_t = signal[t] - ret_t = returns[t] - valid = np.isfinite(sig_t) & np.isfinite(ret_t) - n_valid = valid.sum() - if n_valid < 5: - continue - valid_sigs = sig_t[valid] - valid_rets = ret_t[valid] - k = max(1, n_valid // 5) - sorted_idx = np.argsort(valid_sigs) - q1_ret = np.mean(valid_rets[sorted_idx[:k]]) - q5_ret = np.mean(valid_rets[sorted_idx[-k:]]) - ls_returns[t] = q5_ret - q1_ret - - stats["ls_return"] = float(np.nanmean(ls_returns)) - results[fid] = stats - return results - - -# ------------------------------------------------------------------ -# Drawdown analysis -# ------------------------------------------------------------------ - -@dataclass -class DrawdownResult: - """Results of drawdown analysis.""" - max_drawdown: float - max_drawdown_start: int - max_drawdown_end: int - drawdown_series: np.ndarray - recovery_periods: List[Tuple[int, int, int]] # (start, trough, end) - - -def compute_drawdown(cumulative_returns: np.ndarray) -> DrawdownResult: - """Compute drawdown statistics from a cumulative return series. - - Parameters - ---------- - cumulative_returns : ndarray of shape (T,) - Cumulative returns (can be from cumsum of period returns). - - Returns - ------- - DrawdownResult - """ - cumulative_returns = np.asarray(cumulative_returns, dtype=np.float64) - T = len(cumulative_returns) - - # Running maximum - running_max = np.maximum.accumulate(cumulative_returns) - drawdown_series = cumulative_returns - running_max - - # Max drawdown - max_dd_idx = np.argmin(drawdown_series) - max_dd = float(drawdown_series[max_dd_idx]) - # Find the peak before the max drawdown - peak_idx = int(np.argmax(cumulative_returns[:max_dd_idx + 1])) - - # Identify recovery periods (peak -> trough -> recovery) - recovery_periods: List[Tuple[int, int, int]] = [] - i = 0 - while i < T: - # Find start of drawdown (where dd becomes negative) - if drawdown_series[i] < -1e-12: - start = i - 1 if i > 0 else 0 - # Find trough - j = i - trough = i - while j < T and drawdown_series[j] < -1e-12: - if drawdown_series[j] < drawdown_series[trough]: - trough = j - j += 1 - end = j if j < T else T - 1 - recovery_periods.append((start, trough, end)) - i = j - else: - i += 1 - - return DrawdownResult( - max_drawdown=max_dd, - max_drawdown_start=peak_idx, - max_drawdown_end=max_dd_idx, - drawdown_series=drawdown_series, - recovery_periods=recovery_periods, - ) - - -def compute_sharpe_ratio( - returns_series: np.ndarray, - annualization_factor: float = 252.0, - risk_free_rate: float = 0.0, -) -> float: - """Compute annualized Sharpe ratio. - - Parameters - ---------- - returns_series : ndarray of shape (T,) - Period returns. - annualization_factor : float - Number of periods per year (252 for daily). - risk_free_rate : float - Annualized risk-free rate. - - Returns - ------- - float - Annualized Sharpe ratio. - """ - finite = returns_series[np.isfinite(returns_series)] - if len(finite) < 2: - return 0.0 - rf_period = risk_free_rate / annualization_factor - excess = finite - rf_period - mean_excess = np.mean(excess) - std_excess = np.std(excess, ddof=1) - if std_excess < 1e-12: - return 0.0 - return float(mean_excess / std_excess * np.sqrt(annualization_factor)) - - -def compute_calmar_ratio( - returns_series: np.ndarray, - annualization_factor: float = 252.0, -) -> float: - """Compute Calmar ratio (annualized return / max drawdown). - - Parameters - ---------- - returns_series : ndarray of shape (T,) - annualization_factor : float - - Returns - ------- - float - Calmar ratio; 0 if max drawdown is zero. - """ - finite = returns_series[np.isfinite(returns_series)] - if len(finite) < 2: - return 0.0 - cumulative = np.cumsum(finite) - dd = compute_drawdown(cumulative) - if abs(dd.max_drawdown) < 1e-12: - return 0.0 - annualized_return = float(np.mean(finite)) * annualization_factor - return annualized_return / abs(dd.max_drawdown) diff --git a/src/factorminer/factorminer/evaluation/capacity.py b/src/factorminer/factorminer/evaluation/capacity.py deleted file mode 100644 index 2047515..0000000 --- a/src/factorminer/factorminer/evaluation/capacity.py +++ /dev/null @@ -1,449 +0,0 @@ -"""Capacity-aware backtesting for alpha factors. - -Estimates market impact via a square-root model, evaluates net-of-cost -IC / ICIR, and determines the maximum capital that a factor can absorb -before its alpha degrades beyond acceptable limits. -""" - -from __future__ import annotations - -from dataclasses import dataclass, field -from typing import Dict, List, Optional - -import numpy as np - -from src.factorminer.factorminer.evaluation.metrics import compute_ic, compute_icir - - -# --------------------------------------------------------------------------- -# Configuration -# --------------------------------------------------------------------------- - -@dataclass -class CapacityConfig: - """Configuration for capacity-aware backtesting. - - Parameters - ---------- - enabled : bool - Whether capacity estimation is active. - base_capital_usd : float - Default capital level used when none is specified explicitly. - capacity_levels : list[float] - Dollar capital levels to sweep when building the capacity curve. - ic_degradation_limit : float - Maximum fractional IC degradation (1 - |net_IC|/|gross_IC|) before - the factor is considered capacity-constrained. - net_icir_threshold : float - Minimum net ICIR for a factor to pass the cost-adjusted screen. - sigma_annual : float - Annualised volatility used by the square-root impact model. - participation_limit : float - Hard cap on the participation rate per asset (fraction of bar volume). - top_fraction : float - Fraction of the asset universe in the long (and short) leg. - trading_days_per_year : float - Number of trading days per calendar year. - bars_per_day : float - Number of bars (signal periods) per trading day. Default 24 assumes - 10-minute bars over a 4-hour trading session. - """ - - enabled: bool = True - base_capital_usd: float = 1e8 - capacity_levels: List[float] = field( - default_factory=lambda: [1e7, 5e7, 1e8, 5e8, 1e9] - ) - ic_degradation_limit: float = 0.20 - net_icir_threshold: float = 0.3 - sigma_annual: float = 0.25 - participation_limit: float = 0.10 - top_fraction: float = 0.20 - trading_days_per_year: float = 252.0 - bars_per_day: float = 24.0 - - -# --------------------------------------------------------------------------- -# Result containers -# --------------------------------------------------------------------------- - -@dataclass -class MarketImpactEstimate: - """Per-bar market impact estimate across the evaluation window. - - Attributes - ---------- - impact_bps : np.ndarray, shape (T,) - Estimated one-way market impact in basis points per bar. - participation_rate : np.ndarray, shape (T,) - Mean participation rate (fraction of bar volume) per bar. - avg_impact_bps : float - Time-averaged impact in basis points. - max_impact_bps : float - Maximum single-bar impact in basis points. - """ - - impact_bps: np.ndarray - participation_rate: np.ndarray - avg_impact_bps: float - max_impact_bps: float - - -@dataclass -class CapacityEstimate: - """Result of a capacity sweep for a single factor. - - Attributes - ---------- - factor_name : str - Identifier of the evaluated factor. - max_capacity_usd : float - Interpolated maximum capital (USD) before the IC degradation limit - is breached. ``np.inf`` if no level breaches the limit. - capacity_curve : dict[float, float] - Mapping from capital level (USD) to IC degradation fraction. - break_even_cost_bps : float - Approximate single-leg cost (bps) at which net IC drops to zero. - """ - - factor_name: str - max_capacity_usd: float - capacity_curve: Dict[float, float] - break_even_cost_bps: float - - -@dataclass -class NetCostResult: - """Net-of-cost evaluation at a specific capital level. - - Attributes - ---------- - factor_name : str - Identifier of the evaluated factor. - gross_icir : float - ICIR computed on unadjusted returns. - net_icir : float - ICIR computed on impact-adjusted returns. - gross_ls_return : float - Mean gross long-short return per bar. - net_ls_return : float - Mean net long-short return per bar (gross minus round-trip impact). - estimated_capacity_usd : float - Capital level at which the evaluation was performed. - impact_estimate : MarketImpactEstimate - Detailed impact statistics. - passes_net_threshold : bool - ``True`` if ``net_icir >= config.net_icir_threshold``. - """ - - factor_name: str - gross_icir: float - net_icir: float - gross_ls_return: float - net_ls_return: float - estimated_capacity_usd: float - impact_estimate: MarketImpactEstimate - passes_net_threshold: bool - - -# --------------------------------------------------------------------------- -# Square-root market impact model -# --------------------------------------------------------------------------- - -class MarketImpactModel: - """Square-root market impact model. - - The model estimates single-leg impact as:: - - impact = sigma_bar * sqrt(participation_rate) - - where ``sigma_bar`` is the per-bar volatility derived from the annualised - volatility, and the participation rate is the fraction of bar volume - consumed by the strategy. - """ - - def __init__(self, config: CapacityConfig | None = None) -> None: - self.config = config or CapacityConfig() - self._sigma_bar: float = self.config.sigma_annual / np.sqrt( - self.config.trading_days_per_year * self.config.bars_per_day - ) - - # ------------------------------------------------------------------ - def estimate_impact( - self, - signals: np.ndarray, - volume: np.ndarray, - capital: float, - ) -> MarketImpactEstimate: - """Estimate per-bar market impact for a given capital deployment. - - Parameters - ---------- - signals : np.ndarray, shape (M, T) - Factor signal matrix (used to identify quintile membership). - volume : np.ndarray, shape (M, T) - Dollar volume per asset per bar. Entries <= 0 are treated as - illiquid and assigned the participation limit. - capital : float - Total capital (USD) deployed by the strategy. - - Returns - ------- - MarketImpactEstimate - """ - M, T = signals.shape - cfg = self.config - - n_leg = max(int(M * cfg.top_fraction), 1) - per_asset_capital = capital / n_leg - - participation = np.full(T, np.nan, dtype=np.float64) - - for t in range(T): - sig_t = signals[:, t] - vol_t = volume[:, t] - - valid_sig = ~np.isnan(sig_t) - if valid_sig.sum() < n_leg: - participation[t] = cfg.participation_limit - continue - - # Identify top and bottom quintile assets - sig_filled = np.where(valid_sig, sig_t, -np.inf) - top_idx = np.argpartition(sig_filled, -n_leg)[-n_leg:] - - # Participation rate for each selected asset - rates = np.empty(n_leg, dtype=np.float64) - for i, idx in enumerate(top_idx): - v = vol_t[idx] - if np.isnan(v) or v <= 0: - rates[i] = cfg.participation_limit - else: - rates[i] = min(per_asset_capital / v, cfg.participation_limit) - - participation[t] = float(np.mean(rates)) - - # Impact in natural units, then convert to bps - impact = self._sigma_bar * np.sqrt(participation) - impact_bps = impact * 1e4 - - avg_impact = float(np.nanmean(impact_bps)) - max_impact = float(np.nanmax(impact_bps)) - - return MarketImpactEstimate( - impact_bps=impact_bps, - participation_rate=participation, - avg_impact_bps=avg_impact, - max_impact_bps=max_impact, - ) - - -# --------------------------------------------------------------------------- -# Capacity estimator -# --------------------------------------------------------------------------- - -class CapacityEstimator: - """Evaluate factor capacity and net-of-cost performance. - - Parameters - ---------- - returns : np.ndarray, shape (M, T) - Forward returns for M assets over T bars. - volume : np.ndarray, shape (M, T) - Dollar volume for M assets over T bars. - config : CapacityConfig, optional - Configuration; uses defaults when omitted. - """ - - def __init__( - self, - returns: np.ndarray, - volume: np.ndarray, - config: CapacityConfig | None = None, - ) -> None: - self.returns = returns - self.volume = volume - self.config = config or CapacityConfig() - self._impact_model = MarketImpactModel(self.config) - - # ------------------------------------------------------------------ - # helpers - # ------------------------------------------------------------------ - - @staticmethod - def _mean_ic(ic_series: np.ndarray) -> float: - """Mean IC ignoring NaN.""" - valid = ic_series[~np.isnan(ic_series)] - return float(np.mean(valid)) if len(valid) > 0 else 0.0 - - def _net_returns( - self, - signals: np.ndarray, - impact_bps: np.ndarray, - ) -> np.ndarray: - """Compute impact-adjusted returns. - - For a long-short strategy the round-trip cost is approximately - ``2 * impact`` (entry + exit on each leg). We subtract the cost - uniformly from returns as a simple first-order approximation. - - Parameters - ---------- - signals : np.ndarray, shape (M, T) - Factor signals (unused beyond shape; cost applied uniformly). - impact_bps : np.ndarray, shape (T,) - One-way impact per bar in basis points. - - Returns - ------- - np.ndarray, shape (M, T) - Adjusted returns matrix. - """ - cost = 2.0 * impact_bps / 1e4 # round-trip, fractional - return self.returns - cost[np.newaxis, :] - - # ------------------------------------------------------------------ - # public API - # ------------------------------------------------------------------ - - def estimate( - self, - factor_name: str, - signals: np.ndarray, - ) -> CapacityEstimate: - """Run a capacity sweep across configured capital levels. - - Parameters - ---------- - factor_name : str - Human-readable factor identifier. - signals : np.ndarray, shape (M, T) - Factor signal matrix. - - Returns - ------- - CapacityEstimate - """ - gross_ic = compute_ic(signals, self.returns) - abs_gross_mean = abs(self._mean_ic(gross_ic)) - - curve: Dict[float, float] = {} - degradations: List[float] = [] - capitals: List[float] = [] - - for cap in self.config.capacity_levels: - impact = self._impact_model.estimate_impact(signals, self.volume, cap) - net_ret = self._net_returns(signals, impact.impact_bps) - net_ic = compute_ic(signals, net_ret) - abs_net_mean = abs(self._mean_ic(net_ic)) - - if abs_gross_mean > 1e-12: - deg = 1.0 - abs_net_mean / abs_gross_mean - else: - deg = 0.0 - - curve[cap] = deg - capitals.append(cap) - degradations.append(deg) - - # Interpolate to find capacity at the degradation limit - max_cap = self._interpolate_capacity( - capitals, degradations, self.config.ic_degradation_limit - ) - - # Break-even cost: gross IC expressed in bps - # If the full round-trip cost equals the gross L-S spread the alpha - # vanishes. Approximate as gross_mean_ic * 10000 (IC ~ return spread). - break_even_bps = abs_gross_mean * 1e4 - - return CapacityEstimate( - factor_name=factor_name, - max_capacity_usd=max_cap, - capacity_curve=curve, - break_even_cost_bps=break_even_bps, - ) - - def net_cost_evaluation( - self, - factor_name: str, - signals: np.ndarray, - capital: Optional[float] = None, - ) -> NetCostResult: - """Evaluate a factor net of estimated market impact. - - Parameters - ---------- - factor_name : str - Factor identifier. - signals : np.ndarray, shape (M, T) - Factor signal matrix. - capital : float, optional - Capital to evaluate at; defaults to ``config.base_capital_usd``. - - Returns - ------- - NetCostResult - """ - cap = capital if capital is not None else self.config.base_capital_usd - - # Gross metrics - gross_ic = compute_ic(signals, self.returns) - gross_icir = compute_icir(gross_ic) - - # Impact - impact = self._impact_model.estimate_impact(signals, self.volume, cap) - - # Net metrics - net_ret = self._net_returns(signals, impact.impact_bps) - net_ic = compute_ic(signals, net_ret) - net_icir = compute_icir(net_ic) - - # Gross / net long-short return (mean across time of Q5-Q1 proxy) - gross_ls = float(np.nanmean(self.returns.mean(axis=0))) - # Simplified: subtract round-trip impact from L-S return - net_ls = gross_ls - 2.0 * impact.avg_impact_bps / 1e4 - - return NetCostResult( - factor_name=factor_name, - gross_icir=gross_icir, - net_icir=net_icir, - gross_ls_return=gross_ls, - net_ls_return=net_ls, - estimated_capacity_usd=cap, - impact_estimate=impact, - passes_net_threshold=net_icir >= self.config.net_icir_threshold, - ) - - # ------------------------------------------------------------------ - # internal - # ------------------------------------------------------------------ - - @staticmethod - def _interpolate_capacity( - capitals: List[float], - degradations: List[float], - limit: float, - ) -> float: - """Linearly interpolate the capital at which degradation hits *limit*. - - Returns ``np.inf`` if all tested levels are below the limit, or the - smallest tested level if even that exceeds the limit. - """ - if not capitals: - return 0.0 - - # Find first crossing - for i in range(len(degradations)): - if degradations[i] >= limit: - if i == 0: - return capitals[0] - # Linear interpolation between [i-1] and [i] - d0, d1 = degradations[i - 1], degradations[i] - c0, c1 = capitals[i - 1], capitals[i] - if abs(d1 - d0) < 1e-12: - return c0 - frac = (limit - d0) / (d1 - d0) - return c0 + frac * (c1 - c0) - - # Never breached the limit - return float("inf") diff --git a/src/factorminer/factorminer/evaluation/causal.py b/src/factorminer/factorminer/evaluation/causal.py deleted file mode 100644 index 32d1715..0000000 --- a/src/factorminer/factorminer/evaluation/causal.py +++ /dev/null @@ -1,580 +0,0 @@ -"""Causal validation layer for alpha factor candidates. - -Provides Granger causality testing and intervention-based robustness -analysis to verify that discovered factors have genuine predictive -relationships with forward returns rather than spurious correlations. - -Two complementary tests are combined into a single robustness score: - -1. **Granger causality**: Does the factor signal Granger-cause returns - after controlling for existing library factors? -2. **Intervention robustness**: Does factor IC remain stable under - realistic data perturbations (volume shocks, volatility shocks, - liquidity droughts)? -""" - -from __future__ import annotations - -import logging -import warnings -from dataclasses import dataclass, field -from typing import Any, Dict, List, Optional, Tuple - -import numpy as np - -from src.factorminer.factorminer.evaluation.metrics import compute_ic - -logger = logging.getLogger(__name__) - -# --------------------------------------------------------------------------- -# Configuration -# --------------------------------------------------------------------------- - - -@dataclass -class CausalConfig: - """Configuration for causal validation tests.""" - - enabled: bool = True - - # Granger causality settings - granger_max_lag: int = 5 - granger_significance: float = 0.05 - - # Intervention test settings - n_interventions: int = 3 - intervention_magnitude: float = 2.0 - intervention_ic_threshold: float = 0.5 # min IC ratio under intervention - - # Combined robustness scoring - robustness_threshold: float = 0.4 # min combined score for admission - granger_weight: float = 0.4 - intervention_weight: float = 0.6 - - seed: int = 42 - - -# --------------------------------------------------------------------------- -# Result container -# --------------------------------------------------------------------------- - - -@dataclass -class CausalTestResult: - """Result of causal validation for a single factor.""" - - factor_name: str - - # Granger test results - granger_p_value: float - granger_f_stat: float - granger_passes: bool - - # Intervention test results - intervention_ic_ratio: float - intervention_passes: bool - - # Combined - robustness_score: float # 0-1 - passes: bool - - details: Dict[str, Any] = field(default_factory=dict) - - -# --------------------------------------------------------------------------- -# Validator -# --------------------------------------------------------------------------- - - -class CausalValidator: - """Validates causal relationships between factor signals and returns. - - Parameters - ---------- - returns : np.ndarray, shape (M, T) - Forward returns for M assets over T periods. - data_tensor : np.ndarray or None, shape (M, T, F) - Optional raw feature tensor used for realistic intervention - perturbations. When ``None``, a noise-based fallback is used. - library_signals : dict - Mapping from factor name to its signal array (M, T). Used as - controls in the Granger test. - config : CausalConfig - Configuration parameters. - """ - - def __init__( - self, - returns: np.ndarray, - data_tensor: Optional[np.ndarray], - library_signals: Dict[str, np.ndarray], - config: CausalConfig | None = None, - ) -> None: - self.returns = returns - self.data_tensor = data_tensor - self.library_signals = library_signals - self.config = config or CausalConfig() - self._rng = np.random.RandomState(self.config.seed) - - # ------------------------------------------------------------------ - # Public API - # ------------------------------------------------------------------ - - def validate(self, factor_name: str, signals: np.ndarray) -> CausalTestResult: - """Run causal validation on a single factor. - - Parameters - ---------- - factor_name : str - Human-readable identifier for logging / result tracking. - signals : np.ndarray, shape (M, T) - Factor signal matrix. - - Returns - ------- - CausalTestResult - """ - cfg = self.config - details: Dict[str, Any] = {} - control_library = { - name: lib_signals - for name, lib_signals in self.library_signals.items() - if name != factor_name - } - - # --- Granger --- - g_p, g_f, g_pass = self._granger_test( - signals, self.returns, control_library - ) - details["granger"] = { - "p_value": g_p, - "f_stat": g_f, - "passes": g_pass, - } - - # --- Intervention --- - i_ratio, i_pass = self._intervention_test( - signals, self.returns, self.data_tensor - ) - details["intervention"] = { - "ic_ratio": i_ratio, - "passes": i_pass, - } - - # --- Combined score --- - score = self._compute_robustness_score(g_pass, g_p, i_ratio, i_pass) - passes = score >= cfg.robustness_threshold - details["robustness_score"] = score - - return CausalTestResult( - factor_name=factor_name, - granger_p_value=g_p, - granger_f_stat=g_f, - granger_passes=g_pass, - intervention_ic_ratio=i_ratio, - intervention_passes=i_pass, - robustness_score=score, - passes=passes, - details=details, - ) - - def validate_batch( - self, candidates: List[Tuple[str, np.ndarray]] - ) -> Dict[str, CausalTestResult]: - """Validate a batch of candidate factors. - - Parameters - ---------- - candidates : list of (name, signals) tuples - Each entry is ``(factor_name, signals_array)`` with signals - shaped ``(M, T)``. - - Returns - ------- - dict - Mapping from factor name to its :class:`CausalTestResult`. - """ - results: Dict[str, CausalTestResult] = {} - for name, signals in candidates: - results[name] = self.validate(name, signals) - return results - - # ------------------------------------------------------------------ - # Granger causality test - # ------------------------------------------------------------------ - - def _granger_test( - self, - signals: np.ndarray, - returns: np.ndarray, - library_signals: Dict[str, np.ndarray], - ) -> Tuple[float, float, bool]: - """Granger causality test for factor -> returns. - - Averages signals and returns across the top-20 assets (by signal - magnitude) to produce T-length time series, then applies - statsmodels Granger tests. - - Returns ``(p_value, f_stat, passes)``. - """ - cfg = self.config - M, T = signals.shape - - # Minimum series length guard - min_length = 2 * cfg.granger_max_lag + 1 - if T < min_length: - logger.warning( - "Time series too short for Granger test " - "(T=%d < %d). Passing by default.", T, min_length, - ) - return 1.0, 0.0, True - - # --- Aggregate to T-length series --- - sig_series = self._aggregate_top_assets(signals, top_k=20) - ret_series = self._aggregate_top_assets(returns, top_k=20) - - # Handle constant or all-NaN series - if self._is_degenerate(sig_series) or self._is_degenerate(ret_series): - logger.warning( - "Degenerate series detected in Granger test. Passing by default." - ) - return 1.0, 0.0, True - - # --- Attempt statsmodels-based Granger test --- - try: - from statsmodels.tsa.stattools import grangercausalitytests # noqa: F811 - - p_value, f_stat = self._run_granger_bivariate( - sig_series, ret_series, cfg.granger_max_lag - ) - - # Multivariate extension if library has enough factors - if len(library_signals) > 10: - p_multi, f_multi = self._run_granger_multivariate( - sig_series, ret_series, library_signals, cfg.granger_max_lag - ) - # Take the more conservative (higher) p-value - if p_multi is not None: - p_value = max(p_value, p_multi) - f_stat = min(f_stat, f_multi) - - passes = p_value < cfg.granger_significance - return float(p_value), float(f_stat), bool(passes) - - except ImportError: - logger.warning( - "statsmodels not available; skipping Granger test. " - "Install statsmodels for causal validation." - ) - return 1.0, 0.0, True - except Exception as exc: - logger.warning("Granger test failed: %s. Passing by default.", exc) - return 1.0, 0.0, True - - def _run_granger_bivariate( - self, - sig_series: np.ndarray, - ret_series: np.ndarray, - max_lag: int, - ) -> Tuple[float, float]: - """Bivariate Granger test using statsmodels.""" - from statsmodels.tsa.stattools import grangercausalitytests - - # Stack as (T, 2): [returns, signals] -- statsmodels convention - # tests if column 1 (signals) Granger-causes column 0 (returns) - data = np.column_stack([ret_series, sig_series]) - - # Clamp max_lag to available data - effective_lag = min(max_lag, len(data) // 3) - if effective_lag < 1: - return 1.0, 0.0 - - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - results = grangercausalitytests(data, maxlag=effective_lag, verbose=False) - - # Find the lag with the smallest p-value (ssr_ftest) - best_p = 1.0 - best_f = 0.0 - for lag in range(1, effective_lag + 1): - if lag not in results: - continue - test_dict = results[lag][0] - f_test = test_dict.get("ssr_ftest") - if f_test is not None: - p_val = f_test[1] - f_val = f_test[0] - if p_val < best_p: - best_p = p_val - best_f = f_val - - return float(best_p), float(best_f) - - def _run_granger_multivariate( - self, - sig_series: np.ndarray, - ret_series: np.ndarray, - library_signals: Dict[str, np.ndarray], - max_lag: int, - ) -> Tuple[Optional[float], Optional[float]]: - """Multivariate Granger via VAR, controlling for library factors. - - If the library has >10 factors the controls are PCA-reduced to - 5 components. - """ - try: - from statsmodels.tsa.api import VAR - - # Build control matrix: average each library factor across top assets - control_series = [] - for _name, lib_sig in library_signals.items(): - cs = self._aggregate_top_assets(lib_sig, top_k=20) - if not self._is_degenerate(cs): - control_series.append(cs) - - if not control_series: - return None, None - - controls = np.column_stack(control_series) - - # PCA reduction if too many controls - if controls.shape[1] > 10: - controls = self._pca_reduce(controls, n_components=5) - - # Build VAR dataset: [returns, signals, controls...] - var_data = np.column_stack([ret_series, sig_series, controls]) - - # Drop rows with NaN - valid_mask = ~np.any(np.isnan(var_data), axis=1) - var_data = var_data[valid_mask] - - effective_lag = min(max_lag, len(var_data) // (3 * var_data.shape[1])) - if effective_lag < 1: - return None, None - - model = VAR(var_data) - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - fitted = model.fit(maxlags=effective_lag, ic=None) - - # Test Granger causality: does column 1 (signals) cause column 0 (returns)? - test_result = fitted.test_causality( - caused=0, causing=1, kind="f" - ) - p_value = float(test_result.pvalue) - f_stat = float(test_result.test_statistic) - - return p_value, f_stat - - except Exception as exc: - logger.warning( - "Multivariate Granger (VAR) failed: %s. Skipping.", exc - ) - return None, None - - # ------------------------------------------------------------------ - # Intervention robustness test - # ------------------------------------------------------------------ - - def _intervention_test( - self, - signals: np.ndarray, - returns: np.ndarray, - data_tensor: Optional[np.ndarray], - ) -> Tuple[float, bool]: - """Intervention-based robustness test. - - Three perturbation scenarios are applied; the factor passes if - its IC remains above the threshold ratio in at least 2 of 3. - - Returns ``(mean_ic_ratio, passes)``. - """ - cfg = self.config - - # Baseline IC - ic_orig = compute_ic(signals, returns) - valid_orig = ic_orig[~np.isnan(ic_orig)] - if len(valid_orig) < 3: - logger.warning("Too few valid IC periods for intervention test.") - return 1.0, True - - mean_ic_orig = float(np.mean(np.abs(valid_orig))) - if mean_ic_orig < 1e-10: - # Zero IC baseline: interventions cannot degrade further - return 1.0, True - - ratios: List[float] = [] - pass_count = 0 - - scenarios = self._build_intervention_scenarios( - signals, returns, data_tensor - ) - - for scenario_name, perturbed_signals, perturbed_returns in scenarios: - ic_pert = compute_ic(perturbed_signals, perturbed_returns) - valid_pert = ic_pert[~np.isnan(ic_pert)] - if len(valid_pert) < 3: - # Not enough data after perturbation; count as pass - ratios.append(1.0) - pass_count += 1 - continue - - mean_ic_pert = float(np.mean(np.abs(valid_pert))) - ratio = mean_ic_pert / mean_ic_orig - ratios.append(ratio) - if ratio >= cfg.intervention_ic_threshold: - pass_count += 1 - - mean_ratio = float(np.mean(ratios)) if ratios else 1.0 - passes = pass_count >= 2 # at least 2/3 interventions pass - - return mean_ratio, passes - - def _build_intervention_scenarios( - self, - signals: np.ndarray, - returns: np.ndarray, - data_tensor: Optional[np.ndarray], - ) -> List[Tuple[str, np.ndarray, np.ndarray]]: - """Construct the three intervention scenarios. - - Returns a list of ``(name, perturbed_signals, perturbed_returns)``. - """ - M, T = signals.shape - cfg = self.config - rng = self._rng - - scenarios: List[Tuple[str, np.ndarray, np.ndarray]] = [] - - if data_tensor is not None and data_tensor.shape[:2] == (M, T): - # --- Volume shock: 2x on random 30% of periods --- - shock_periods = rng.choice(T, size=max(1, int(0.3 * T)), replace=False) - sig_vol = signals.copy() - sig_vol[:, shock_periods] *= cfg.intervention_magnitude - scenarios.append(("volume_shock", sig_vol, returns.copy())) - - # --- Volatility shock: 2x noise on returns --- - ret_vol = returns.copy() - noise = rng.randn(M, T) * np.nanstd(returns) * cfg.intervention_magnitude - ret_vol += noise - scenarios.append(("volatility_shock", signals.copy(), ret_vol)) - - # --- Liquidity drought: zero volume on 10% of (asset, period) pairs --- - sig_liq = signals.copy() - n_pairs = max(1, int(0.1 * M * T)) - drought_assets = rng.randint(0, M, size=n_pairs) - drought_periods = rng.randint(0, T, size=n_pairs) - sig_liq[drought_assets, drought_periods] = 0.0 - scenarios.append(("liquidity_drought", sig_liq, returns.copy())) - - else: - # --- Fallback: add noise directly to signals --- - for i, scenario_name in enumerate( - ["noise_shock_1", "noise_shock_2", "noise_shock_3"] - ): - sig_pert = signals.copy() - noise_scale = np.nanstd(signals) * cfg.intervention_magnitude - if noise_scale < 1e-12: - noise_scale = cfg.intervention_magnitude - noise = rng.randn(M, T) * noise_scale * (0.5 + 0.5 * i) - sig_pert += noise - scenarios.append((scenario_name, sig_pert, returns.copy())) - - return scenarios - - # ------------------------------------------------------------------ - # Robustness score - # ------------------------------------------------------------------ - - def _compute_robustness_score( - self, - granger_passes: bool, - granger_p: float, - intervention_ratio: float, - intervention_passes: bool, - ) -> float: - """Combine Granger and intervention results into a 0-1 score. - - granger_component = 1.0 - min(p_value / significance, 1.0) - intervention_component = min(ic_ratio / 1.0, 1.0) - score = w_g * granger_component + w_i * intervention_component - """ - cfg = self.config - - granger_component = 1.0 - min(granger_p / cfg.granger_significance, 1.0) - intervention_component = min(intervention_ratio / 1.0, 1.0) - - score = ( - cfg.granger_weight * granger_component - + cfg.intervention_weight * intervention_component - ) - - return float(np.clip(score, 0.0, 1.0)) - - # ------------------------------------------------------------------ - # Helpers - # ------------------------------------------------------------------ - - @staticmethod - def _aggregate_top_assets( - matrix: np.ndarray, top_k: int = 20 - ) -> np.ndarray: - """Average across the top-k assets (by mean absolute value) to - produce a T-length series. - """ - M, T = matrix.shape - k = min(top_k, M) - - # Mean absolute value per asset, ignoring NaN - with warnings.catch_warnings(): - warnings.simplefilter("ignore", category=RuntimeWarning) - asset_means = np.nanmean(np.abs(matrix), axis=1) - - # Replace NaN means with -inf so they sort last - asset_means = np.where(np.isnan(asset_means), -np.inf, asset_means) - top_idx = np.argpartition(asset_means, -k)[-k:] - - subset = matrix[top_idx, :] - - with warnings.catch_warnings(): - warnings.simplefilter("ignore", category=RuntimeWarning) - series = np.nanmean(subset, axis=0) - - # Fill remaining NaN with 0 - series = np.where(np.isnan(series), 0.0, series) - return series - - @staticmethod - def _is_degenerate(series: np.ndarray) -> bool: - """Check if a series is constant or all-NaN.""" - valid = series[~np.isnan(series)] - if len(valid) < 3: - return True - return float(np.std(valid)) < 1e-12 - - @staticmethod - def _pca_reduce(X: np.ndarray, n_components: int = 5) -> np.ndarray: - """Reduce columns of X via truncated SVD (no sklearn dependency). - - Parameters - ---------- - X : np.ndarray, shape (T, K) - n_components : int - - Returns - ------- - np.ndarray, shape (T, n_components) - """ - # Center - means = np.nanmean(X, axis=0) - X_centered = X - means - X_centered = np.where(np.isnan(X_centered), 0.0, X_centered) - - # Economy SVD - n_comp = min(n_components, X_centered.shape[1], X_centered.shape[0]) - try: - U, S, Vt = np.linalg.svd(X_centered, full_matrices=False) - return U[:, :n_comp] * S[:n_comp] - except np.linalg.LinAlgError: - logger.warning("SVD failed during PCA reduction; using raw data.") - return X_centered[:, :n_comp] diff --git a/src/factorminer/factorminer/evaluation/combination.py b/src/factorminer/factorminer/evaluation/combination.py deleted file mode 100644 index 62983c2..0000000 --- a/src/factorminer/factorminer/evaluation/combination.py +++ /dev/null @@ -1,195 +0,0 @@ -"""Factor combination strategies for building composite signals. - -Implements Equal-Weight, IC-Weighted, and Orthogonal combination methods -for merging multiple alpha factors into a single composite signal, following -the methodology described in the FactorMiner paper. -""" - -from __future__ import annotations - -from typing import Dict, Optional - -import numpy as np - - -class FactorCombiner: - """Combine multiple factor signals into a single composite signal. - - Each factor signal is a 2-D array of shape (T, N) where T is the number - of time steps and N is the number of assets. Factor IDs are arbitrary - integers used as dictionary keys. - """ - - # ------------------------------------------------------------------ - # Public combination methods - # ------------------------------------------------------------------ - - def equal_weight(self, factor_signals: Dict[int, np.ndarray]) -> np.ndarray: - """Equal-Weight (EW): simple average of cross-sectionally standardized factors. - - Paper results: IC Mean=0.1451, ICIR=1.2053, IC Win Rate=85.0%. - - Parameters - ---------- - factor_signals : dict[int, ndarray] - Mapping from factor ID to (T, N) signal array. - - Returns - ------- - ndarray of shape (T, N) - Composite signal (average of z-scored factors). - """ - if not factor_signals: - raise ValueError("factor_signals must not be empty") - - standardized = [ - self._cross_sectional_standardize(sig) - for sig in factor_signals.values() - ] - stacked = np.stack(standardized, axis=0) # (K, T, N) - # Average over factors, ignoring NaNs - return np.nanmean(stacked, axis=0) - - def ic_weighted( - self, - factor_signals: Dict[int, np.ndarray], - ic_values: Dict[int, float], - ) -> np.ndarray: - """IC-Weighted (ICW): weight factors proportionally by their historical IC. - - Paper results: IC Mean=0.1496, ICIR=1.2430, Cumulative Return=26.67 - (12.4% over EW). - - Parameters - ---------- - factor_signals : dict[int, ndarray] - Mapping from factor ID to (T, N) signal array. - ic_values : dict[int, float] - Mapping from factor ID to its historical Information Coefficient. - Factors with non-positive IC are excluded. - - Returns - ------- - ndarray of shape (T, N) - Composite signal. - """ - if not factor_signals: - raise ValueError("factor_signals must not be empty") - - ids = list(factor_signals.keys()) - weights: Dict[int, float] = {} - for fid in ids: - ic = ic_values.get(fid, 0.0) - if np.isfinite(ic) and ic > 0.0: - weights[fid] = ic - - if not weights: - # Fall back to equal weight if all ICs are non-positive - return self.equal_weight(factor_signals) - - total_weight = sum(weights.values()) - ref_shape = next(iter(factor_signals.values())).shape - composite = np.zeros(ref_shape, dtype=np.float64) - - for fid, w in weights.items(): - z = self._cross_sectional_standardize(factor_signals[fid]) - composite += (w / total_weight) * np.where(np.isnan(z), 0.0, z) - - return composite - - def orthogonal(self, factor_signals: Dict[int, np.ndarray]) -> np.ndarray: - """Orthogonal: Gram-Schmidt orthogonalization before averaging. - - Removes cross-factor collinearity by projecting each factor onto the - subspace orthogonal to all previously processed factors, then averages - the orthogonalized residuals. - - Paper results: IC Mean=0.1400, ICIR=1.1933. - - Parameters - ---------- - factor_signals : dict[int, ndarray] - Mapping from factor ID to (T, N) signal array. - - Returns - ------- - ndarray of shape (T, N) - Composite signal (average of orthogonalized z-scored factors). - """ - if not factor_signals: - raise ValueError("factor_signals must not be empty") - - standardized = [ - self._cross_sectional_standardize(sig) - for sig in factor_signals.values() - ] - - orthogonalized = self._gram_schmidt(standardized) - stacked = np.stack(orthogonalized, axis=0) # (K, T, N) - return np.nanmean(stacked, axis=0) - - # ------------------------------------------------------------------ - # Internal helpers - # ------------------------------------------------------------------ - - def _cross_sectional_standardize(self, signals: np.ndarray) -> np.ndarray: - """Standardize signals cross-sectionally (across assets) at each time step. - - z_score = (x - mean) / std per cross-section (row). - - Parameters - ---------- - signals : ndarray of shape (T, N) - - Returns - ------- - ndarray of shape (T, N) - Cross-sectionally standardized values. Rows where std == 0 - are set to 0. - """ - signals = np.asarray(signals, dtype=np.float64) - cs_mean = np.nanmean(signals, axis=1, keepdims=True) - cs_std = np.nanstd(signals, axis=1, keepdims=True) - # Avoid division by zero - cs_std = np.where(cs_std == 0.0, 1.0, cs_std) - return (signals - cs_mean) / cs_std - - @staticmethod - def _gram_schmidt(factors: list[np.ndarray]) -> list[np.ndarray]: - """Modified Gram-Schmidt orthogonalization on flattened factor vectors. - - Each factor is a (T, N) array. We flatten to 1-D, orthogonalize, - then reshape back. NaN values are treated as zero during projection - and restored afterward. - - Parameters - ---------- - factors : list of ndarray, each (T, N) - - Returns - ------- - list of ndarray, each (T, N) -- orthogonalized factors. - """ - if len(factors) <= 1: - return list(factors) - - shape = factors[0].shape - # Replace NaN with 0 for linear algebra, track NaN mask - nan_masks = [np.isnan(f) for f in factors] - vecs = [np.where(m, 0.0, f).ravel() for f, m in zip(factors, nan_masks)] - - ortho: list[np.ndarray] = [] - for i, v in enumerate(vecs): - u = v.copy() - for prev in ortho: - denom = np.dot(prev, prev) - if denom > 1e-12: - u -= (np.dot(u, prev) / denom) * prev - ortho.append(u) - - result = [] - for u, mask in zip(ortho, nan_masks): - arr = u.reshape(shape) - arr[mask] = np.nan - result.append(arr) - return result diff --git a/src/factorminer/factorminer/evaluation/correlation.py b/src/factorminer/factorminer/evaluation/correlation.py deleted file mode 100644 index cef2fc7..0000000 --- a/src/factorminer/factorminer/evaluation/correlation.py +++ /dev/null @@ -1,374 +0,0 @@ -"""Efficient correlation computation for factor evaluation. - -Provides batch Spearman rank correlation, vectorized cross-sectional -correlation, and incremental correlation matrix updates for the -factor library. Supports both numpy and optional torch backends. -""" - -from __future__ import annotations - -from typing import Dict, List, Optional, Tuple - -import numpy as np -from scipy.stats import rankdata - - -# --------------------------------------------------------------------------- -# Batch cross-sectional Spearman rank correlation -# --------------------------------------------------------------------------- - -def _rank_columns(x: np.ndarray) -> np.ndarray: - """Rank each column of x independently, leaving NaN as NaN. - - Parameters - ---------- - x : np.ndarray, shape (M, T) - - Returns - ------- - np.ndarray, shape (M, T) - Ranks per column, NaN where input was NaN. - """ - M, T = x.shape - ranked = np.full_like(x, np.nan, dtype=np.float64) - for t in range(T): - col = x[:, t] - valid = ~np.isnan(col) - if valid.sum() < 2: - continue - ranked[valid, t] = rankdata(col[valid]) - return ranked - - -def batch_spearman_correlation( - candidate_signals: np.ndarray, - library_signals: np.ndarray, -) -> np.ndarray: - """Compute Spearman correlation between one candidate and multiple library factors. - - For each library factor g, computes: - rho = (1/|T_valid|) * sum_t Corr_rank(candidate_t, g_t) - - Parameters - ---------- - candidate_signals : np.ndarray, shape (M, T) - Signal array for the candidate factor. - library_signals : np.ndarray, shape (N, M, T) - Signal arrays for N library factors. - - Returns - ------- - np.ndarray, shape (N,) - Average cross-sectional Spearman correlation with each library factor. - """ - N = library_signals.shape[0] - if N == 0: - return np.array([], dtype=np.float64) - - M, T = candidate_signals.shape - correlations = np.zeros(N, dtype=np.float64) - - # Rank candidate columns once - cand_ranked = _rank_columns(candidate_signals) - - for i in range(N): - lib_ranked = _rank_columns(library_signals[i]) - corr_sum = 0.0 - count = 0 - for t in range(T): - cr = cand_ranked[:, t] - lr = lib_ranked[:, t] - valid = ~(np.isnan(cr) | np.isnan(lr)) - n = valid.sum() - if n < 5: - continue - cr_v = cr[valid] - lr_v = lr[valid] - cr_m = cr_v - cr_v.mean() - lr_m = lr_v - lr_v.mean() - denom = np.sqrt((cr_m ** 2).sum() * (lr_m ** 2).sum()) - if denom > 1e-12: - corr_sum += (cr_m * lr_m).sum() / denom - count += 1 - if count > 0: - correlations[i] = corr_sum / count - - return correlations - - -def batch_spearman_pairwise( - signals_list: List[np.ndarray], -) -> np.ndarray: - """Compute pairwise Spearman correlation matrix for a list of signal arrays. - - Parameters - ---------- - signals_list : list of np.ndarray, each shape (M, T) - Signal arrays for K candidate factors. - - Returns - ------- - np.ndarray, shape (K, K) - Symmetric correlation matrix. Diagonal is 1.0. - """ - K = len(signals_list) - if K == 0: - return np.array([], dtype=np.float64).reshape(0, 0) - - M, T = signals_list[0].shape - - # Pre-compute ranks for all candidates - ranked_list = [_rank_columns(s) for s in signals_list] - - corr_matrix = np.eye(K, dtype=np.float64) - - for i in range(K): - for j in range(i + 1, K): - corr_sum = 0.0 - count = 0 - for t in range(T): - ri = ranked_list[i][:, t] - rj = ranked_list[j][:, t] - valid = ~(np.isnan(ri) | np.isnan(rj)) - n = valid.sum() - if n < 5: - continue - ri_v = ri[valid] - rj_v = rj[valid] - ri_m = ri_v - ri_v.mean() - rj_m = rj_v - rj_v.mean() - denom = np.sqrt((ri_m ** 2).sum() * (rj_m ** 2).sum()) - if denom > 1e-12: - corr_sum += (ri_m * rj_m).sum() / denom - count += 1 - if count > 0: - corr_matrix[i, j] = corr_sum / count - corr_matrix[j, i] = corr_matrix[i, j] - - return corr_matrix - - -# --------------------------------------------------------------------------- -# Incremental correlation matrix update -# --------------------------------------------------------------------------- - -class IncrementalCorrelationMatrix: - """Maintains a correlation matrix that can be incrementally updated. - - Supports adding new factors and removing existing ones without - recomputing the entire matrix from scratch. - """ - - def __init__(self) -> None: - self._signals: Dict[str, np.ndarray] = {} - self._ranked: Dict[str, np.ndarray] = {} - self._corr_cache: Dict[Tuple[str, str], float] = {} - self._factor_ids: List[str] = [] - - @property - def size(self) -> int: - return len(self._factor_ids) - - @property - def factor_ids(self) -> List[str]: - return list(self._factor_ids) - - def _compute_pair_corr(self, id_a: str, id_b: str) -> float: - """Compute average cross-sectional Spearman between two factors.""" - ra = self._ranked[id_a] - rb = self._ranked[id_b] - M, T = ra.shape - corr_sum = 0.0 - count = 0 - for t in range(T): - a_col = ra[:, t] - b_col = rb[:, t] - valid = ~(np.isnan(a_col) | np.isnan(b_col)) - n = valid.sum() - if n < 5: - continue - a_v = a_col[valid] - b_v = b_col[valid] - a_m = a_v - a_v.mean() - b_m = b_v - b_v.mean() - denom = np.sqrt((a_m ** 2).sum() * (b_m ** 2).sum()) - if denom > 1e-12: - corr_sum += (a_m * b_m).sum() / denom - count += 1 - return corr_sum / count if count > 0 else 0.0 - - def add_factor(self, factor_id: str, signals: np.ndarray) -> Dict[str, float]: - """Add a factor and compute its correlation with all existing factors. - - Parameters - ---------- - factor_id : str - signals : np.ndarray, shape (M, T) - - Returns - ------- - dict - Mapping from existing factor_id to correlation with the new factor. - """ - self._signals[factor_id] = signals - self._ranked[factor_id] = _rank_columns(signals) - - correlations: Dict[str, float] = {} - for existing_id in self._factor_ids: - corr = self._compute_pair_corr(factor_id, existing_id) - key = (min(factor_id, existing_id), max(factor_id, existing_id)) - self._corr_cache[key] = corr - correlations[existing_id] = corr - - self._factor_ids.append(factor_id) - return correlations - - def remove_factor(self, factor_id: str) -> None: - """Remove a factor from the matrix.""" - if factor_id not in self._signals: - return - self._signals.pop(factor_id, None) - self._ranked.pop(factor_id, None) - self._factor_ids = [fid for fid in self._factor_ids if fid != factor_id] - # Remove cached correlations involving this factor - keys_to_remove = [ - k for k in self._corr_cache if factor_id in k - ] - for k in keys_to_remove: - del self._corr_cache[k] - - def get_correlation(self, id_a: str, id_b: str) -> float: - """Get cached correlation between two factors.""" - key = (min(id_a, id_b), max(id_a, id_b)) - if key in self._corr_cache: - return self._corr_cache[key] - if id_a == id_b: - return 1.0 - return 0.0 - - def get_max_correlation(self, factor_id: str) -> Tuple[float, Optional[str]]: - """Get the maximum absolute correlation of a factor with all others. - - Returns - ------- - tuple of (max_abs_corr, most_correlated_factor_id) - """ - max_corr = 0.0 - max_id: Optional[str] = None - for other_id in self._factor_ids: - if other_id == factor_id: - continue - corr = abs(self.get_correlation(factor_id, other_id)) - if corr > max_corr: - max_corr = corr - max_id = other_id - return max_corr, max_id - - def to_matrix(self) -> np.ndarray: - """Return the full correlation matrix as a numpy array. - - Returns - ------- - np.ndarray, shape (N, N) - """ - N = len(self._factor_ids) - mat = np.eye(N, dtype=np.float64) - for i in range(N): - for j in range(i + 1, N): - corr = self.get_correlation(self._factor_ids[i], self._factor_ids[j]) - mat[i, j] = corr - mat[j, i] = corr - return mat - - -# --------------------------------------------------------------------------- -# Torch backend (optional) -# --------------------------------------------------------------------------- - -def _try_torch_rank_correlation( - candidate: np.ndarray, - library: np.ndarray, -) -> Optional[np.ndarray]: - """Attempt to compute rank correlations using PyTorch for GPU acceleration. - - Falls back to None if torch is not available. - - Parameters - ---------- - candidate : np.ndarray, shape (M, T) - library : np.ndarray, shape (N, M, T) - - Returns - ------- - np.ndarray, shape (N,) or None if torch unavailable. - """ - try: - import torch - except ImportError: - return None - - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - - N, M, T = library.shape - cand_t = torch.from_numpy(candidate).to(device, dtype=torch.float64) - lib_t = torch.from_numpy(library).to(device, dtype=torch.float64) - - correlations = torch.zeros(N, dtype=torch.float64, device=device) - - for t in range(T): - c_col = cand_t[:, t] - l_cols = lib_t[:, :, t] # (N, M) - - # Skip if too many NaN - c_valid = ~torch.isnan(c_col) - if c_valid.sum() < 5: - continue - - # Rank the candidate column - c_sorted_idx = c_col[c_valid].argsort().argsort().float() + 1.0 - - for i in range(N): - l_col = l_cols[i] - valid = c_valid & ~torch.isnan(l_col) - n = valid.sum() - if n < 5: - continue - # Rank both - c_v = c_col[valid] - l_v = l_col[valid] - c_rank = c_v.argsort().argsort().float() + 1.0 - l_rank = l_v.argsort().argsort().float() + 1.0 - c_m = c_rank - c_rank.mean() - l_m = l_rank - l_rank.mean() - denom = torch.sqrt((c_m ** 2).sum() * (l_m ** 2).sum()) - if denom > 1e-12: - correlations[i] += (c_m * l_m).sum() / denom - - correlations /= max(T, 1) - return correlations.cpu().numpy() - - -def compute_correlation_batch( - candidate: np.ndarray, - library: np.ndarray, - backend: str = "numpy", -) -> np.ndarray: - """Compute correlations between candidate and library, with backend selection. - - Parameters - ---------- - candidate : np.ndarray, shape (M, T) - library : np.ndarray, shape (N, M, T) - backend : str - "numpy" or "gpu" - - Returns - ------- - np.ndarray, shape (N,) - """ - if backend == "gpu": - result = _try_torch_rank_correlation(candidate, library) - if result is not None: - return result - - return batch_spearman_correlation(candidate, library) diff --git a/src/factorminer/factorminer/evaluation/metrics.py b/src/factorminer/factorminer/evaluation/metrics.py deleted file mode 100644 index 006c75e..0000000 --- a/src/factorminer/factorminer/evaluation/metrics.py +++ /dev/null @@ -1,377 +0,0 @@ -"""Core evaluation metrics for alpha factors. - -Provides vectorized, production-quality implementations of Information -Coefficient (IC), ICIR, quintile analysis, turnover, and comprehensive -factor statistics used by the validation pipeline. -""" - -from __future__ import annotations - -import numpy as np -from scipy.stats import rankdata - - -# --------------------------------------------------------------------------- -# Information Coefficient -# --------------------------------------------------------------------------- - -def compute_ic(signals: np.ndarray, returns: np.ndarray) -> np.ndarray: - """Compute IC_t = Corr_rank(s_t, r_{t+1}) for each time period. - - Uses Spearman rank correlation computed cross-sectionally at each t. - - Parameters - ---------- - signals : np.ndarray, shape (M, T) - Factor signals for M assets over T periods. - returns : np.ndarray, shape (M, T) - Forward returns for M assets over T periods. - - Returns - ------- - np.ndarray, shape (T,) - Spearman rank correlation per period. NaN where fewer than 5 - valid (non-NaN) asset pairs exist. - """ - M, T = signals.shape - ic_series = np.full(T, np.nan, dtype=np.float64) - - for t in range(T): - s = signals[:, t] - r = returns[:, t] - valid = ~(np.isnan(s) | np.isnan(r)) - n = valid.sum() - if n < 5: - continue - rs = rankdata(s[valid]) - rr = rankdata(r[valid]) - # Pearson correlation on ranks = Spearman - rs_m = rs - rs.mean() - rr_m = rr - rr.mean() - denom = np.sqrt((rs_m ** 2).sum() * (rr_m ** 2).sum()) - if denom < 1e-12: - ic_series[t] = 0.0 - else: - ic_series[t] = (rs_m * rr_m).sum() / denom - - return ic_series - - -def compute_ic_vectorized(signals: np.ndarray, returns: np.ndarray) -> np.ndarray: - """Fully vectorized IC computation (faster for large M, T). - - Ranks are computed per-column, then Pearson correlation on ranks - is computed without Python-level loops over T. - - Parameters - ---------- - signals : np.ndarray, shape (M, T) - returns : np.ndarray, shape (M, T) - - Returns - ------- - np.ndarray, shape (T,) - """ - M, T = signals.shape - ic_series = np.full(T, np.nan, dtype=np.float64) - - # Mask invalid entries - invalid = np.isnan(signals) | np.isnan(returns) - - # Rank each column independently (replace NaN with very large value to push to end) - big = 1e18 - sig_filled = np.where(invalid, big, signals) - ret_filled = np.where(invalid, big, returns) - - for t in range(T): - valid = ~invalid[:, t] - n = valid.sum() - if n < 5: - continue - rs = rankdata(sig_filled[valid, t]) - rr = rankdata(ret_filled[valid, t]) - rs_m = rs - rs.mean() - rr_m = rr - rr.mean() - denom = np.sqrt((rs_m ** 2).sum() * (rr_m ** 2).sum()) - ic_series[t] = (rs_m * rr_m).sum() / denom if denom > 1e-12 else 0.0 - - return ic_series - - -# --------------------------------------------------------------------------- -# IC-derived statistics -# --------------------------------------------------------------------------- - -def compute_icir(ic_series: np.ndarray) -> float: - """Compute ICIR = mean(IC) / std(IC). - - Parameters - ---------- - ic_series : np.ndarray - IC time series (may contain NaN). - - Returns - ------- - float - ICIR value. Returns 0.0 if std is near zero or too few valid points. - """ - valid = ic_series[~np.isnan(ic_series)] - if len(valid) < 3: - return 0.0 - std = float(np.std(valid, ddof=1)) - if std < 1e-12: - return 0.0 - return float(np.mean(valid)) / std - - -def compute_ic_mean(ic_series: np.ndarray) -> float: - """Compute mean absolute IC. - - Parameters - ---------- - ic_series : np.ndarray - - Returns - ------- - float - """ - valid = ic_series[~np.isnan(ic_series)] - if len(valid) == 0: - return 0.0 - return float(np.mean(np.abs(valid))) - - -def compute_ic_win_rate(ic_series: np.ndarray) -> float: - """Fraction of periods with positive IC. - - Parameters - ---------- - ic_series : np.ndarray - - Returns - ------- - float - Win rate in [0, 1]. - """ - valid = ic_series[~np.isnan(ic_series)] - if len(valid) == 0: - return 0.0 - return float(np.mean(valid > 0)) - - -# --------------------------------------------------------------------------- -# Cross-factor correlation -# --------------------------------------------------------------------------- - -def compute_pairwise_correlation( - signals_a: np.ndarray, - signals_b: np.ndarray, -) -> float: - """Time-averaged cross-sectional Spearman correlation between two factors. - - rho(a, b) = (1/|T|) * sum_t Corr_rank(s_t^a, s_t^b) - - Parameters - ---------- - signals_a : np.ndarray, shape (M, T) - signals_b : np.ndarray, shape (M, T) - - Returns - ------- - float - Average cross-sectional Spearman correlation. - """ - M, T = signals_a.shape - corrs = [] - - for t in range(T): - a = signals_a[:, t] - b = signals_b[:, t] - valid = ~(np.isnan(a) | np.isnan(b)) - n = valid.sum() - if n < 5: - continue - ra = rankdata(a[valid]) - rb = rankdata(b[valid]) - ra_m = ra - ra.mean() - rb_m = rb - rb.mean() - denom = np.sqrt((ra_m ** 2).sum() * (rb_m ** 2).sum()) - if denom < 1e-12: - corrs.append(0.0) - else: - corrs.append(float((ra_m * rb_m).sum() / denom)) - - if not corrs: - return 0.0 - return float(np.mean(corrs)) - - -# --------------------------------------------------------------------------- -# Quintile analysis -# --------------------------------------------------------------------------- - -def compute_quintile_returns( - signals: np.ndarray, - returns: np.ndarray, - n_quantiles: int = 5, -) -> dict: - """Sort assets into quintiles by factor signal, compute average returns. - - Parameters - ---------- - signals : np.ndarray, shape (M, T) - returns : np.ndarray, shape (M, T) - n_quantiles : int - Number of quantile buckets (default 5 for quintiles). - - Returns - ------- - dict - Keys: Q1..Q{n}, long_short, monotonicity. - Q1 is lowest signal quintile, Q{n} is highest. - """ - M, T = signals.shape - # Accumulate per-quintile return sums - quintile_returns = {q: [] for q in range(1, n_quantiles + 1)} - - for t in range(T): - s = signals[:, t] - r = returns[:, t] - valid = ~(np.isnan(s) | np.isnan(r)) - n = valid.sum() - if n < n_quantiles: - continue - s_valid = s[valid] - r_valid = r[valid] - # Assign quintile labels via rank - ranks = rankdata(s_valid) - # Map to quintile: ceil(rank / n * n_quantiles), clamped - q_labels = np.clip( - np.ceil(ranks / n * n_quantiles).astype(int), - 1, - n_quantiles, - ) - for q in range(1, n_quantiles + 1): - mask = q_labels == q - if mask.any(): - quintile_returns[q].append(float(np.mean(r_valid[mask]))) - - result = {} - means = {} - for q in range(1, n_quantiles + 1): - key = f"Q{q}" - if quintile_returns[q]: - means[q] = float(np.mean(quintile_returns[q])) - else: - means[q] = 0.0 - result[key] = means[q] - - # Long-short: top quintile minus bottom quintile - result["long_short"] = means[n_quantiles] - means[1] - - # Monotonicity: Spearman corr between quintile index and mean return - q_indices = np.arange(1, n_quantiles + 1, dtype=np.float64) - q_returns = np.array([means[q] for q in range(1, n_quantiles + 1)]) - if np.std(q_returns) < 1e-12: - result["monotonicity"] = 0.0 - else: - rq = rankdata(q_indices) - rr = rankdata(q_returns) - rq_m = rq - rq.mean() - rr_m = rr - rr.mean() - denom = np.sqrt((rq_m ** 2).sum() * (rr_m ** 2).sum()) - result["monotonicity"] = float((rq_m * rr_m).sum() / denom) if denom > 1e-12 else 0.0 - - return result - - -# --------------------------------------------------------------------------- -# Turnover -# --------------------------------------------------------------------------- - -def compute_turnover(signals: np.ndarray, top_fraction: float = 0.2) -> float: - """Compute average portfolio turnover rate. - - Turnover measures the fraction of top-ranked assets that change - between consecutive periods. - - Parameters - ---------- - signals : np.ndarray, shape (M, T) - top_fraction : float - Fraction of assets in the "top" bucket (default 0.2 = top quintile). - - Returns - ------- - float - Average turnover rate in [0, 1]. - """ - M, T = signals.shape - k = max(int(M * top_fraction), 1) - turnovers = [] - - prev_top = None - for t in range(T): - col = signals[:, t] - valid = ~np.isnan(col) - if valid.sum() < k: - prev_top = None - continue - # Get indices of top-k assets - # Use argpartition for efficiency - col_filled = np.where(valid, col, -np.inf) - top_idx = set(np.argpartition(col_filled, -k)[-k:]) - - if prev_top is not None: - overlap = len(top_idx & prev_top) - turnover = 1.0 - overlap / k - turnovers.append(turnover) - prev_top = top_idx - - if not turnovers: - return 0.0 - return float(np.mean(turnovers)) - - -# --------------------------------------------------------------------------- -# Comprehensive factor statistics -# --------------------------------------------------------------------------- - -def compute_factor_stats( - signals: np.ndarray, - returns: np.ndarray, -) -> dict: - """Compute comprehensive factor statistics. - - Parameters - ---------- - signals : np.ndarray, shape (M, T) - returns : np.ndarray, shape (M, T) - - Returns - ------- - dict - Keys: ic_mean, ic_abs_mean, icir, ic_win_rate, - Q1..Q5, long_short, monotonicity, turnover - """ - ic_series = compute_ic(signals, returns) - valid_ic = ic_series[~np.isnan(ic_series)] - - stats: dict = { - "ic_series": ic_series, - "ic_mean": float(np.mean(valid_ic)) if len(valid_ic) > 0 else 0.0, - "ic_abs_mean": compute_ic_mean(ic_series), - "icir": compute_icir(ic_series), - "ic_win_rate": compute_ic_win_rate(ic_series), - "ic_std": float(np.std(valid_ic, ddof=1)) if len(valid_ic) > 2 else 0.0, - "n_periods": int((~np.isnan(ic_series)).sum()), - } - - # Quintile analysis - quintile = compute_quintile_returns(signals, returns) - stats.update(quintile) - - # Turnover - stats["turnover"] = compute_turnover(signals) - - return stats diff --git a/src/factorminer/factorminer/evaluation/pipeline.py b/src/factorminer/factorminer/evaluation/pipeline.py deleted file mode 100644 index 2392b0e..0000000 --- a/src/factorminer/factorminer/evaluation/pipeline.py +++ /dev/null @@ -1,736 +0,0 @@ -"""Multi-stage factor evaluation and validation pipeline. - -Implements Algorithm 1 Step 3: the four-stage evaluation cascade that -screens, deduplicates, and validates candidate alpha factors before -admitting them to the factor library. - -Stages: - 1. Fast IC screening on a subset of assets - 2. Correlation check against the existing library - 2.5. Replacement check for rejected-but-strong candidates - 3. Intra-batch deduplication - 4. Full validation on the complete asset universe - -Supports parallel evaluation via a configurable multiprocessing worker pool. -""" - -from __future__ import annotations - -import logging -from concurrent.futures import ProcessPoolExecutor, as_completed -from dataclasses import dataclass, field -from typing import Any, Callable, Dict, List, Optional, Tuple - -import numpy as np - -from src.factorminer.factorminer.evaluation.admission import ( - AdmissionDecision, - check_admission, - check_replacement, -) -from src.factorminer.factorminer.evaluation.correlation import ( - batch_spearman_correlation, - batch_spearman_pairwise, - compute_correlation_batch, -) -from src.factorminer.factorminer.evaluation.metrics import ( - compute_factor_stats, - compute_ic, - compute_ic_mean, - compute_icir, -) - -logger = logging.getLogger(__name__) - - -# --------------------------------------------------------------------------- -# Data types -# --------------------------------------------------------------------------- - -@dataclass -class CandidateFactor: - """A candidate factor to be evaluated.""" - - name: str - formula: str - signals: Optional[np.ndarray] = None # (M, T) computed signals - metadata: Dict[str, Any] = field(default_factory=dict) - - -@dataclass -class EvaluationResult: - """Result of evaluating a single candidate through the pipeline.""" - - factor_name: str - formula: str - ic_series: Optional[np.ndarray] = None - ic_mean: float = 0.0 - icir: float = 0.0 - max_correlation: float = 0.0 - correlated_with: Optional[str] = None - stage_passed: int = 0 # Highest stage passed (1-4), 0 if failed stage 1 - rejection_reason: Optional[str] = None - admitted: bool = False - replaced: Optional[str] = None # ID of replaced factor if replacement occurred - full_stats: Optional[dict] = None # Full stats from stage 4 - - def to_trajectory_dict(self) -> dict: - """Convert to a dict compatible with the memory formation trajectory format.""" - return { - "factor_id": self.factor_name, - "formula": self.formula, - "ic": self.ic_mean, - "icir": self.icir, - "max_correlation": self.max_correlation, - "correlated_with": self.correlated_with or "", - "admitted": self.admitted, - "rejection_reason": self.rejection_reason or "", - "replaced": self.replaced, - "stage_passed": self.stage_passed, - } - - -@dataclass -class FactorLibraryView: - """Read-only view of the factor library for the pipeline. - - Provides the data needed for correlation checks and replacement - decisions without exposing the full library internals. - """ - - factor_ids: List[str] - signals: Dict[str, np.ndarray] # factor_id -> (M, T) - ic_map: Dict[str, float] # factor_id -> absolute IC - - @property - def size(self) -> int: - return len(self.factor_ids) - - def get_signals_tensor(self) -> np.ndarray: - """Return library signals as a (N, M, T) tensor. - - Returns - ------- - np.ndarray, shape (N, M, T) - """ - if not self.factor_ids: - return np.array([]).reshape(0, 0, 0) - return np.stack([self.signals[fid] for fid in self.factor_ids], axis=0) - - -@dataclass -class PipelineConfig: - """Configuration for the validation pipeline.""" - - # Stage 1: Fast IC screening - ic_threshold: float = 0.04 - fast_screen_assets: int = 100 - - # Stage 2: Correlation threshold - correlation_threshold: float = 0.5 - - # Stage 2.5: Replacement - replacement_ic_min: float = 0.10 - replacement_ic_ratio: float = 1.3 - - # Stage 4: Full validation - icir_threshold: float = 0.5 - - # Parallelism - num_workers: int = 4 - backend: str = "numpy" # "numpy" or "gpu" - - @classmethod - def from_config(cls, mining_cfg, eval_cfg) -> PipelineConfig: - """Build from MiningConfig and EvaluationConfig objects.""" - return cls( - ic_threshold=mining_cfg.ic_threshold, - correlation_threshold=mining_cfg.correlation_threshold, - replacement_ic_min=mining_cfg.replacement_ic_min, - replacement_ic_ratio=mining_cfg.replacement_ic_ratio, - fast_screen_assets=eval_cfg.fast_screen_assets, - num_workers=eval_cfg.num_workers, - backend=eval_cfg.backend, - ) - - -# --------------------------------------------------------------------------- -# Worker function for multiprocessing -# --------------------------------------------------------------------------- - -def _evaluate_single_candidate_ic( - signals: np.ndarray, - returns: np.ndarray, -) -> Tuple[np.ndarray, float, float]: - """Compute IC series, IC mean, and ICIR for a single candidate. - - Designed to be called in a worker process. - """ - ic_series = compute_ic(signals, returns) - valid_ic = ic_series[~np.isnan(ic_series)] - ic_mean_val = float(np.mean(np.abs(valid_ic))) if len(valid_ic) > 0 else 0.0 - icir_val = compute_icir(ic_series) - return ic_series, ic_mean_val, icir_val - - -# --------------------------------------------------------------------------- -# Validation Pipeline -# --------------------------------------------------------------------------- - -class ValidationPipeline: - """Multi-stage factor evaluation pipeline. - - Implements the cascade: Fast IC -> Correlation -> Replacement -> - Dedup -> Full Validation. - - Parameters - ---------- - returns : np.ndarray, shape (M, T) - Forward returns for all assets. - library : FactorLibraryView - Current state of the factor library. - config : PipelineConfig - Pipeline configuration. - compute_signals_fn : callable, optional - Function(CandidateFactor, data) -> np.ndarray to compute signals - if not pre-computed. - data : dict, optional - Market data dict for signal computation. - """ - - def __init__( - self, - returns: np.ndarray, - library: FactorLibraryView, - config: PipelineConfig, - compute_signals_fn: Optional[Callable] = None, - data: Optional[Dict[str, np.ndarray]] = None, - ) -> None: - self.returns = returns - self.library = library - self.config = config - self.compute_signals_fn = compute_signals_fn - self.data = data - - M, T = returns.shape - # Pre-select a random subset of assets for fast screening - if config.fast_screen_assets < M: - rng = np.random.default_rng(42) - self._fast_idx = rng.choice(M, size=config.fast_screen_assets, replace=False) - else: - self._fast_idx = np.arange(M) - - self._fast_returns = returns[self._fast_idx, :] - - def evaluate_batch( - self, - candidates: List[CandidateFactor], - ) -> List[EvaluationResult]: - """Run the full multi-stage evaluation on a batch of candidates. - - Parameters - ---------- - candidates : list of CandidateFactor - Each candidate should have signals pre-computed or provide - a compute_signals_fn. - - Returns - ------- - list of EvaluationResult - One result per candidate, including rejected ones. - """ - if not candidates: - return [] - - # Ensure signals are computed - self._ensure_signals(candidates) - - results: Dict[str, EvaluationResult] = {} - - logger.info( - "Starting pipeline evaluation for %d candidates", len(candidates) - ) - - # Stage 1: Fast IC screening - passed_s1, failed_s1 = self._stage1_ic_screen(candidates) - for c, result in failed_s1: - results[c.name] = result - logger.info( - "Stage 1 (IC screen): %d passed, %d failed", - len(passed_s1), len(failed_s1), - ) - - if not passed_s1: - return list(results.values()) - - # Stage 2: Correlation check against library - passed_s2, failed_s2, replacement_candidates = self._stage2_correlation_check( - passed_s1 - ) - for c, result in failed_s2: - results[c.name] = result - logger.info( - "Stage 2 (correlation): %d passed, %d failed, %d for replacement", - len(passed_s2), len(failed_s2), len(replacement_candidates), - ) - - # Stage 2.5: Replacement check - replaced = self._stage25_replacement_check(replacement_candidates) - for c, result in replaced: - results[c.name] = result - logger.info("Stage 2.5 (replacement): %d replacements", len(replaced)) - - if not passed_s2 and not replaced: - return list(results.values()) - - # Combine stage 2 passes and successful replacements - to_dedup = list(passed_s2) - for c, result in replaced: - if result.admitted: - to_dedup.append(c) - - # Stage 3: Intra-batch deduplication - passed_s3, failed_s3 = self._stage3_batch_dedup(to_dedup) - for c, result in failed_s3: - results[c.name] = result - logger.info( - "Stage 3 (dedup): %d passed, %d failed", - len(passed_s3), len(failed_s3), - ) - - # Stage 4: Full validation - validated = self._stage4_full_validation(passed_s3) - for c, result in validated: - results[c.name] = result - logger.info( - "Stage 4 (full validation): %d admitted", - sum(1 for _, r in validated if r.admitted), - ) - - return list(results.values()) - - def _ensure_signals(self, candidates: List[CandidateFactor]) -> None: - """Compute signals for candidates that don't have them yet.""" - if self.compute_signals_fn is None: - return - for c in candidates: - if c.signals is None and self.data is not None: - c.signals = self.compute_signals_fn(c, self.data) - - # ----- Stage 1: Fast IC Screening ----- - - def _stage1_ic_screen( - self, - candidates: List[CandidateFactor], - ) -> Tuple[ - List[CandidateFactor], - List[Tuple[CandidateFactor, EvaluationResult]], - ]: - """Stage 1: Fast IC screening on asset subset. - - C1 = {a in C : |IC(a)| >= tau_IC} - - Returns (passed, failed) where failed includes EvaluationResults. - """ - passed = [] - failed = [] - threshold = self.config.ic_threshold - - for c in candidates: - if c.signals is None: - failed.append((c, EvaluationResult( - factor_name=c.name, - formula=c.formula, - stage_passed=0, - rejection_reason="No signals computed", - ))) - continue - - # Use fast subset - fast_signals = c.signals[self._fast_idx, :] - ic_series = compute_ic(fast_signals, self._fast_returns) - valid_ic = ic_series[~np.isnan(ic_series)] - - if len(valid_ic) == 0: - failed.append((c, EvaluationResult( - factor_name=c.name, - formula=c.formula, - stage_passed=0, - rejection_reason="No valid IC values", - ))) - continue - - ic_abs_mean = float(np.mean(np.abs(valid_ic))) - - if ic_abs_mean < threshold: - failed.append((c, EvaluationResult( - factor_name=c.name, - formula=c.formula, - ic_series=ic_series, - ic_mean=ic_abs_mean, - stage_passed=0, - rejection_reason=f"Stage 1: |IC|={ic_abs_mean:.4f} < {threshold}", - ))) - else: - # Store fast IC for later use - c.metadata["fast_ic_series"] = ic_series - c.metadata["fast_ic_mean"] = ic_abs_mean - passed.append(c) - - return passed, failed - - # ----- Stage 2: Correlation Check ----- - - def _stage2_correlation_check( - self, - candidates: List[CandidateFactor], - ) -> Tuple[ - List[CandidateFactor], - List[Tuple[CandidateFactor, EvaluationResult]], - List[Tuple[CandidateFactor, Dict[str, float]]], - ]: - """Stage 2: Correlation check against the library. - - C2 = {a in C1 : max_{g in L} |rho(a,g)| < theta} - - Returns (passed, failed, replacement_candidates). - replacement_candidates contains candidates that failed correlation - but might qualify for replacement. - """ - passed = [] - failed = [] - replacement_candidates = [] - - if self.library.size == 0: - # Empty library: all pass - return candidates, failed, replacement_candidates - - theta = self.config.correlation_threshold - lib_tensor = self.library.get_signals_tensor() - - for c in candidates: - # Compute correlation with all library factors - corrs = compute_correlation_batch( - c.signals, - lib_tensor, - backend=self.config.backend, - ) - abs_corrs = np.abs(corrs) - max_idx = int(np.argmax(abs_corrs)) - max_corr = float(abs_corrs[max_idx]) - correlated_with = self.library.factor_ids[max_idx] - - if max_corr < theta: - c.metadata["max_correlation"] = max_corr - c.metadata["correlated_with"] = correlated_with - passed.append(c) - else: - ic_abs = c.metadata.get("fast_ic_mean", 0.0) - - # Check if candidate qualifies for replacement - if ic_abs >= self.config.replacement_ic_min: - # Store full correlation map for replacement check - corr_map = { - fid: float(corrs[i]) - for i, fid in enumerate(self.library.factor_ids) - } - c.metadata["max_correlation"] = max_corr - c.metadata["correlated_with"] = correlated_with - c.metadata["correlation_map"] = corr_map - replacement_candidates.append((c, corr_map)) - else: - failed.append((c, EvaluationResult( - factor_name=c.name, - formula=c.formula, - ic_series=c.metadata.get("fast_ic_series"), - ic_mean=ic_abs, - max_correlation=max_corr, - correlated_with=correlated_with, - stage_passed=1, - rejection_reason=( - f"Stage 2: max|rho|={max_corr:.4f} >= {theta} " - f"(with {correlated_with})" - ), - ))) - - return passed, failed, replacement_candidates - - # ----- Stage 2.5: Replacement Check ----- - - def _stage25_replacement_check( - self, - replacement_candidates: List[Tuple[CandidateFactor, Dict[str, float]]], - ) -> List[Tuple[CandidateFactor, EvaluationResult]]: - """Stage 2.5: Check if rejected candidates can replace library members. - - For a in C1 \\ C2, check replacement rule (Eq. 11): - |IC(a)| >= 0.10 - |IC(a)| >= 1.3 * |IC(g)| - |{g : |rho(a,g)| >= theta}| == 1 - """ - results = [] - - for c, corr_map in replacement_candidates: - ic_abs = c.metadata.get("fast_ic_mean", 0.0) - max_corr = c.metadata.get("max_correlation", 0.0) - correlated_with = c.metadata.get("correlated_with") - - decision = check_replacement( - candidate_ic_abs=ic_abs, - max_corr=max_corr, - correlated_with=correlated_with, - library_ic_map=self.library.ic_map, - correlation_map=corr_map, - replacement_ic_min=self.config.replacement_ic_min, - replacement_ic_ratio=self.config.replacement_ic_ratio, - correlation_threshold=self.config.correlation_threshold, - ) - - result = EvaluationResult( - factor_name=c.name, - formula=c.formula, - ic_series=c.metadata.get("fast_ic_series"), - ic_mean=ic_abs, - max_correlation=max_corr, - correlated_with=correlated_with, - admitted=decision.admitted, - replaced=decision.replaced_factor_id, - stage_passed=2 if decision.admitted else 1, - rejection_reason=decision.rejection_reason, - ) - results.append((c, result)) - - return results - - # ----- Stage 3: Batch Deduplication ----- - - def _stage3_batch_dedup( - self, - candidates: List[CandidateFactor], - ) -> Tuple[ - List[CandidateFactor], - List[Tuple[CandidateFactor, EvaluationResult]], - ]: - """Stage 3: Intra-batch deduplication. - - Remove candidates that are too correlated with each other - within the same batch, keeping the one with higher IC. - """ - if len(candidates) <= 1: - return candidates, [] - - theta = self.config.correlation_threshold - signals_list = [c.signals for c in candidates] - corr_matrix = batch_spearman_pairwise(signals_list) - - # Greedy dedup: sort by IC descending, keep each if not correlated - # with any already-kept candidate - ic_vals = [c.metadata.get("fast_ic_mean", 0.0) for c in candidates] - order = sorted(range(len(candidates)), key=lambda i: -ic_vals[i]) - - kept_indices = set() - removed = [] - - for idx in order: - is_correlated = False - for kept_idx in kept_indices: - if abs(corr_matrix[idx, kept_idx]) >= theta: - is_correlated = True - removed.append((candidates[idx], EvaluationResult( - factor_name=candidates[idx].name, - formula=candidates[idx].formula, - ic_mean=ic_vals[idx], - max_correlation=float(abs(corr_matrix[idx, kept_idx])), - correlated_with=candidates[kept_idx].name, - stage_passed=2, - rejection_reason=( - f"Stage 3: intra-batch dup with {candidates[kept_idx].name} " - f"(rho={corr_matrix[idx, kept_idx]:.4f})" - ), - ))) - break - if not is_correlated: - kept_indices.add(idx) - - passed = [candidates[i] for i in sorted(kept_indices)] - return passed, removed - - # ----- Stage 4: Full Validation ----- - - def _stage4_full_validation( - self, - candidates: List[CandidateFactor], - ) -> List[Tuple[CandidateFactor, EvaluationResult]]: - """Stage 4: Full validation on complete asset universe. - - Compute comprehensive statistics using all assets and apply - final quality checks. - """ - results = [] - threshold = self.config.ic_threshold - - use_parallel = self.config.num_workers > 1 and len(candidates) > 1 - - if use_parallel: - results = self._stage4_parallel(candidates) - else: - for c in candidates: - result = self._validate_single(c) - results.append((c, result)) - - return results - - def _validate_single(self, c: CandidateFactor) -> EvaluationResult: - """Run full validation for a single candidate.""" - stats = compute_factor_stats(c.signals, self.returns) - ic_series = stats["ic_series"] - ic_abs_mean = stats["ic_abs_mean"] - icir = stats["icir"] - - max_corr = c.metadata.get("max_correlation", 0.0) - correlated_with = c.metadata.get("correlated_with") - replaced = c.metadata.get("replaced") if "replaced" in c.metadata else None - - # Check if previously marked as replacement - if hasattr(c, "_replacement_target"): - replaced = c._replacement_target - - # Apply final threshold - if ic_abs_mean < self.config.ic_threshold: - return EvaluationResult( - factor_name=c.name, - formula=c.formula, - ic_series=ic_series, - ic_mean=ic_abs_mean, - icir=icir, - max_correlation=max_corr, - correlated_with=correlated_with, - stage_passed=3, - rejection_reason=( - f"Stage 4: full |IC|={ic_abs_mean:.4f} < {self.config.ic_threshold}" - ), - admitted=False, - full_stats=stats, - ) - - return EvaluationResult( - factor_name=c.name, - formula=c.formula, - ic_series=ic_series, - ic_mean=ic_abs_mean, - icir=icir, - max_correlation=max_corr, - correlated_with=correlated_with, - stage_passed=4, - admitted=True, - replaced=replaced, - full_stats=stats, - ) - - def _stage4_parallel( - self, - candidates: List[CandidateFactor], - ) -> List[Tuple[CandidateFactor, EvaluationResult]]: - """Run stage 4 in parallel using ProcessPoolExecutor. - - Each worker evaluates one candidate independently. Since signals - and returns are numpy arrays, they can be pickled for IPC. - """ - results = [] - futures_map = {} - - with ProcessPoolExecutor(max_workers=self.config.num_workers) as executor: - for c in candidates: - future = executor.submit( - _evaluate_single_candidate_ic, - c.signals, - self.returns, - ) - futures_map[future] = c - - for future in as_completed(futures_map): - c = futures_map[future] - try: - ic_series, ic_abs_mean, icir = future.result() - - max_corr = c.metadata.get("max_correlation", 0.0) - correlated_with = c.metadata.get("correlated_with") - - if ic_abs_mean < self.config.ic_threshold: - result = EvaluationResult( - factor_name=c.name, - formula=c.formula, - ic_series=ic_series, - ic_mean=ic_abs_mean, - icir=icir, - max_correlation=max_corr, - correlated_with=correlated_with, - stage_passed=3, - rejection_reason=( - f"Stage 4: full |IC|={ic_abs_mean:.4f} " - f"< {self.config.ic_threshold}" - ), - admitted=False, - ) - else: - result = EvaluationResult( - factor_name=c.name, - formula=c.formula, - ic_series=ic_series, - ic_mean=ic_abs_mean, - icir=icir, - max_correlation=max_corr, - correlated_with=correlated_with, - stage_passed=4, - admitted=True, - ) - results.append((c, result)) - - except Exception as e: - logger.error("Worker failed for %s: %s", c.name, e) - results.append((c, EvaluationResult( - factor_name=c.name, - formula=c.formula, - stage_passed=3, - rejection_reason=f"Stage 4 error: {e}", - admitted=False, - ))) - - return results - - -# --------------------------------------------------------------------------- -# Convenience: Run the full pipeline -# --------------------------------------------------------------------------- - -def run_evaluation_pipeline( - candidates: List[CandidateFactor], - returns: np.ndarray, - library: FactorLibraryView, - config: PipelineConfig, - compute_signals_fn: Optional[Callable] = None, - data: Optional[Dict[str, np.ndarray]] = None, -) -> List[EvaluationResult]: - """One-shot convenience function to run the full evaluation pipeline. - - Parameters - ---------- - candidates : list of CandidateFactor - returns : np.ndarray, shape (M, T) - library : FactorLibraryView - config : PipelineConfig - compute_signals_fn : callable, optional - data : dict, optional - - Returns - ------- - list of EvaluationResult - """ - pipeline = ValidationPipeline( - returns=returns, - library=library, - config=config, - compute_signals_fn=compute_signals_fn, - data=data, - ) - return pipeline.evaluate_batch(candidates) diff --git a/src/factorminer/factorminer/evaluation/portfolio.py b/src/factorminer/factorminer/evaluation/portfolio.py deleted file mode 100644 index 757ce45..0000000 --- a/src/factorminer/factorminer/evaluation/portfolio.py +++ /dev/null @@ -1,266 +0,0 @@ -"""Portfolio construction and quintile backtesting. - -Implements quintile-sorted long-short portfolio backtesting with -transaction cost pressure testing, following the FactorMiner paper methodology. -""" - -from __future__ import annotations - -from typing import Dict, List, Optional - -import numpy as np -from scipy.stats import spearmanr - - -class PortfolioBacktester: - """Backtest factor signals using quintile portfolios.""" - - # ------------------------------------------------------------------ - # Main backtest - # ------------------------------------------------------------------ - - def quintile_backtest( - self, - combined_signal: np.ndarray, - returns: np.ndarray, - transaction_cost_bps: float = 0, - ) -> dict: - """Run quintile portfolio backtest. - - At each time step t, sort assets into 5 quintiles by signal strength. - Q5 = highest signal (long), Q1 = lowest signal (short). - - Parameters - ---------- - combined_signal : ndarray of shape (T, N) - Composite factor signal. - returns : ndarray of shape (T, N) - Forward returns aligned with the signal. - transaction_cost_bps : float - One-way transaction cost in basis points (1 bp = 0.01%). - - Returns - ------- - dict with keys: - q1_return .. q5_return : float - Average annualized return per quintile. - ls_return : float - Average long-short return (Q5 - Q1). - ls_cumulative : ndarray - Cumulative long-short return series. - ic_mean : float - icir : float - ic_win_rate : float - Fraction of periods with IC > 0. - monotonicity : float - 1.0 if perfect Q1 < Q2 < ... < Q5 ordering of mean returns. - avg_turnover : float - Mean daily turnover of the long quintile. - """ - combined_signal = np.asarray(combined_signal, dtype=np.float64) - returns = np.asarray(returns, dtype=np.float64) - T, N = combined_signal.shape - cost_frac = transaction_cost_bps / 10000.0 - - # Per-period quintile returns - quintile_returns = np.full((T, 5), np.nan) - for t in range(T): - sig_t = combined_signal[t] - ret_t = returns[t] - valid = np.isfinite(sig_t) & np.isfinite(ret_t) - n_valid = valid.sum() - if n_valid < 5: - continue - ranks = _rank_array(sig_t[valid]) - boundaries = np.linspace(0, 1, 6) - for q in range(5): - mask = (ranks >= boundaries[q]) & (ranks < boundaries[q + 1]) - if q == 4: - mask = (ranks >= boundaries[q]) & (ranks <= boundaries[q + 1]) - if mask.sum() > 0: - quintile_returns[t, q] = np.mean(ret_t[valid][mask]) - - # Turnover for cost adjustment - turnover = self.compute_turnover(combined_signal, top_fraction=0.2) - avg_turnover = float(np.nanmean(turnover)) - - # Long-short return (Q5 - Q1) with transaction costs - ls_raw = quintile_returns[:, 4] - quintile_returns[:, 0] - ls_cost = 2.0 * cost_frac * turnover # both legs - ls_net = np.where( - np.isfinite(ls_raw), - ls_raw - ls_cost, - np.nan, - ) - ls_cumulative = np.nancumsum(np.where(np.isfinite(ls_net), ls_net, 0.0)) - - # IC series (cross-sectional Spearman rank correlation) - ic_series = np.full(T, np.nan) - for t in range(T): - sig_t = combined_signal[t] - ret_t = returns[t] - valid = np.isfinite(sig_t) & np.isfinite(ret_t) - if valid.sum() < 5: - continue - corr, _ = spearmanr(sig_t[valid], ret_t[valid]) - if np.isfinite(corr): - ic_series[t] = corr - - finite_ic = ic_series[np.isfinite(ic_series)] - if len(finite_ic) > 1: - ic_mean = float(np.mean(finite_ic)) - ic_std = float(np.std(finite_ic, ddof=1)) - icir = ic_mean / ic_std if ic_std > 1e-12 else 0.0 - ic_win_rate = float(np.mean(finite_ic > 0)) - else: - ic_mean = 0.0 - icir = 0.0 - ic_win_rate = 0.0 - - # Mean quintile returns - q_means = [float(np.nanmean(quintile_returns[:, q])) for q in range(5)] - - # Monotonicity: fraction of adjacent quintile pairs in correct order - correct_pairs = sum( - 1 for i in range(4) if q_means[i] < q_means[i + 1] - ) - monotonicity = correct_pairs / 4.0 - - return { - "q1_return": q_means[0], - "q2_return": q_means[1], - "q3_return": q_means[2], - "q4_return": q_means[3], - "q5_return": q_means[4], - "ls_return": float(np.nanmean(ls_net)), - "ls_gross_return": float(np.nanmean(ls_raw)), - "ls_cumulative": ls_cumulative, - "ls_gross_series": ls_raw, - "ls_net_series": ls_net, - "quintile_period_returns": quintile_returns, - "turnover_series": turnover, - "ic_series": ic_series, - "ic_mean": ic_mean, - "icir": icir, - "ic_win_rate": ic_win_rate, - "monotonicity": monotonicity, - "avg_turnover": avg_turnover, - } - - # ------------------------------------------------------------------ - # Cost pressure testing - # ------------------------------------------------------------------ - - def cost_pressure_test( - self, - combined_signal: np.ndarray, - returns: np.ndarray, - cost_settings: Optional[List[float]] = None, - ) -> Dict[float, dict]: - """Run backtest under multiple transaction cost settings (in bps). - - Paper Figure 9: Test at 1, 4, 7, 10, 11 bps. - - Parameters - ---------- - combined_signal : ndarray of shape (T, N) - returns : ndarray of shape (T, N) - cost_settings : list of float or None - Transaction cost levels in basis points. - Defaults to [1, 4, 7, 10, 11]. - - Returns - ------- - dict mapping cost_bps -> backtest result dict. - """ - if cost_settings is None: - cost_settings = [1.0, 4.0, 7.0, 10.0, 11.0] - - results: Dict[float, dict] = {} - for cost_bps in cost_settings: - results[cost_bps] = self.quintile_backtest( - combined_signal, returns, transaction_cost_bps=cost_bps, - ) - return results - - # ------------------------------------------------------------------ - # Turnover computation - # ------------------------------------------------------------------ - - def compute_turnover( - self, - signal: np.ndarray, - top_fraction: float = 0.2, - ) -> np.ndarray: - """Compute daily turnover of the top/bottom quintile portfolios. - - Turnover is defined as the fraction of assets that change between - consecutive rebalance periods in the top-quintile portfolio. - - Parameters - ---------- - signal : ndarray of shape (T, N) - top_fraction : float - Fraction of assets in each quintile (default 0.2 = top 20%). - - Returns - ------- - ndarray of shape (T,) - Per-period turnover ratios. First period is 0. - """ - signal = np.asarray(signal, dtype=np.float64) - T, N = signal.shape - turnover = np.zeros(T) - prev_top: Optional[np.ndarray] = None - - for t in range(T): - sig_t = signal[t] - valid = np.isfinite(sig_t) - n_valid = valid.sum() - if n_valid < 5: - prev_top = None - continue - - k = max(1, int(n_valid * top_fraction)) - valid_idx = np.where(valid)[0] - valid_vals = sig_t[valid_idx] - # Indices of top-k assets - top_idx = valid_idx[np.argpartition(valid_vals, -k)[-k:]] - top_set = np.zeros(N, dtype=bool) - top_set[top_idx] = True - - if prev_top is not None: - changed = np.sum(top_set != prev_top) - turnover[t] = changed / (2.0 * k) # normalize by portfolio size - prev_top = top_set - - return turnover - - -# ------------------------------------------------------------------ -# Module-level helpers -# ------------------------------------------------------------------ - -def _rank_array(x: np.ndarray) -> np.ndarray: - """Compute percentile ranks in [0, 1] for a 1-D array. - - Ties receive the average rank. - """ - n = len(x) - if n == 0: - return x.copy() - order = x.argsort() - ranks = np.empty(n, dtype=np.float64) - ranks[order] = np.arange(n, dtype=np.float64) - # Handle ties by averaging - sorted_x = x[order] - i = 0 - while i < n: - j = i - while j < n and sorted_x[j] == sorted_x[i]: - j += 1 - avg_rank = (i + j - 1) / 2.0 - for k in range(i, j): - ranks[order[k]] = avg_rank - i = j - return ranks / max(n - 1, 1) diff --git a/src/factorminer/factorminer/evaluation/regime.py b/src/factorminer/factorminer/evaluation/regime.py deleted file mode 100644 index 8c11995..0000000 --- a/src/factorminer/factorminer/evaluation/regime.py +++ /dev/null @@ -1,623 +0,0 @@ -"""Regime-aware factor validation. - -Classifies market periods into BULL / BEAR / SIDEWAYS regimes using -rolling return and volatility statistics, then evaluates factor IC -within each regime to ensure robustness across market conditions. -""" - -from __future__ import annotations - -import enum -from dataclasses import dataclass -from typing import Dict - -import numpy as np -from scipy.stats import rankdata - - -# --------------------------------------------------------------------------- -# Regime enum -# --------------------------------------------------------------------------- - -class MarketRegime(enum.Enum): - """Market regime labels.""" - - BULL = 0 - BEAR = 1 - SIDEWAYS = 2 - - -# --------------------------------------------------------------------------- -# Configuration -# --------------------------------------------------------------------------- - -@dataclass -class RegimeConfig: - """Parameters controlling regime detection and per-regime IC validation. - - Attributes - ---------- - enabled : bool - Whether regime-aware evaluation is active. - lookback_window : int - Rolling window length (periods) for mean-return and volatility - estimation. - bull_return_threshold : float - Minimum rolling-mean return to qualify as BULL (when volatility is - also below the threshold). - bear_return_threshold : float - Maximum rolling-mean return to qualify as BEAR. - volatility_percentile : float - Percentile (0-1) of rolling volatility used to compute the - vol_threshold separating low-vol (BULL) from high-vol environments. - min_regime_ic : float - Minimum mean |IC| required within a single regime for it to "pass". - min_regimes_passing : int - How many regimes must pass for the factor to be accepted. - """ - - enabled: bool = True - lookback_window: int = 60 - bull_return_threshold: float = 0.0 - bear_return_threshold: float = 0.0 - volatility_percentile: float = 0.7 - min_regime_ic: float = 0.03 - min_regimes_passing: int = 2 - - -# --------------------------------------------------------------------------- -# Classification result -# --------------------------------------------------------------------------- - -@dataclass -class RegimeClassification: - """Output of :class:`RegimeDetector.classify`. - - Attributes - ---------- - labels : np.ndarray, shape (T,) - Integer regime codes per period (0=BULL, 1=BEAR, 2=SIDEWAYS). - periods : Dict[MarketRegime, np.ndarray] - Boolean masks of shape (T,) indicating which periods belong to - each regime. - stats : Dict[MarketRegime, Dict[str, float]] - Descriptive statistics per regime: ``mean_return``, ``volatility``, - ``n_periods``. - """ - - labels: np.ndarray - periods: Dict[MarketRegime, np.ndarray] - stats: Dict[MarketRegime, Dict[str, float]] - - -# --------------------------------------------------------------------------- -# Regime detector -# --------------------------------------------------------------------------- - -class RegimeDetector: - """Classify time periods into market regimes. - - Parameters - ---------- - config : RegimeConfig - Regime detection parameters. - """ - - def __init__(self, config: RegimeConfig | None = None) -> None: - self.config = config or RegimeConfig() - - # ----- public API ----- - - def classify(self, returns: np.ndarray) -> RegimeClassification: - """Classify each period into a market regime. - - Parameters - ---------- - returns : np.ndarray, shape (M, T) - Forward returns for *M* assets over *T* periods. - - Returns - ------- - RegimeClassification - """ - cfg = self.config - M, T = returns.shape - - # Cross-sectional average return per period (handles NaN) - market_return = np.nanmean(returns, axis=0) # (T,) - - # Rolling statistics - rolling_mean = self._rolling_nanmean(market_return, cfg.lookback_window) - rolling_vol = self._rolling_nanstd(market_return, cfg.lookback_window) - - # Volatility threshold from valid (non-NaN) rolling vol values - valid_vol = rolling_vol[~np.isnan(rolling_vol)] - if len(valid_vol) > 0: - vol_threshold = float( - np.percentile(valid_vol, cfg.volatility_percentile * 100) - ) - else: - vol_threshold = np.inf # fallback: nothing qualifies as low-vol - - # Assign labels - labels = np.full(T, MarketRegime.SIDEWAYS.value, dtype=np.int64) - - # BEAR: rolling_return < bear_threshold (checked first) - bear_mask = rolling_mean < cfg.bear_return_threshold - labels[bear_mask] = MarketRegime.BEAR.value - - # BULL: rolling_return > bull_threshold AND rolling_vol < vol_threshold - bull_mask = (rolling_mean > cfg.bull_return_threshold) & ( - rolling_vol < vol_threshold - ) - labels[bull_mask] = MarketRegime.BULL.value - - # First lookback_window periods default to SIDEWAYS - labels[: cfg.lookback_window] = MarketRegime.SIDEWAYS.value - - # Build boolean masks & stats - periods: Dict[MarketRegime, np.ndarray] = {} - stats: Dict[MarketRegime, Dict[str, float]] = {} - - for regime in MarketRegime: - mask = labels == regime.value - periods[regime] = mask - regime_returns = market_return[mask] - valid = regime_returns[~np.isnan(regime_returns)] - stats[regime] = { - "mean_return": float(np.mean(valid)) if len(valid) > 0 else 0.0, - "volatility": float(np.std(valid, ddof=1)) if len(valid) > 1 else 0.0, - "n_periods": int(mask.sum()), - } - - return RegimeClassification(labels=labels, periods=periods, stats=stats) - - # ----- helpers ----- - - @staticmethod - def _rolling_nanmean(arr: np.ndarray, window: int) -> np.ndarray: - """Rolling mean that ignores NaN, returning NaN for the first *window-1* entries.""" - T = len(arr) - out = np.full(T, np.nan, dtype=np.float64) - for t in range(window - 1, T): - chunk = arr[t - window + 1 : t + 1] - valid = chunk[~np.isnan(chunk)] - if len(valid) > 0: - out[t] = float(np.mean(valid)) - return out - - @staticmethod - def _rolling_nanstd(arr: np.ndarray, window: int) -> np.ndarray: - """Rolling std (ddof=1) that ignores NaN.""" - T = len(arr) - out = np.full(T, np.nan, dtype=np.float64) - for t in range(window - 1, T): - chunk = arr[t - window + 1 : t + 1] - valid = chunk[~np.isnan(chunk)] - if len(valid) > 1: - out[t] = float(np.std(valid, ddof=1)) - return out - - -# --------------------------------------------------------------------------- -# Per-regime IC result -# --------------------------------------------------------------------------- - -@dataclass -class RegimeICResult: - """Evaluation result for a single factor across market regimes. - - Attributes - ---------- - factor_name : str - Human-readable factor identifier. - regime_ic : Dict[MarketRegime, float] - Mean |IC| per regime. - regime_icir : Dict[MarketRegime, float] - ICIR per regime. - regime_n_periods : Dict[MarketRegime, int] - Number of valid IC periods per regime. - n_regimes_passing : int - How many regimes met the ``min_regime_ic`` threshold. - passes : bool - Whether ``n_regimes_passing >= config.min_regimes_passing``. - overall_regime_score : float - Weighted average |IC| across regimes (weighted by n_periods). - """ - - factor_name: str - regime_ic: Dict[MarketRegime, float] - regime_icir: Dict[MarketRegime, float] - regime_n_periods: Dict[MarketRegime, int] - n_regimes_passing: int - passes: bool - overall_regime_score: float - - -# --------------------------------------------------------------------------- -# Regime-aware evaluator -# --------------------------------------------------------------------------- - -class RegimeAwareEvaluator: - """Evaluate factor IC within each market regime. - - Parameters - ---------- - returns : np.ndarray, shape (M, T) - Forward returns. - regime : RegimeClassification - Pre-computed regime classification. - config : RegimeConfig - Thresholds and evaluation parameters. - """ - - def __init__( - self, - returns: np.ndarray, - regime: RegimeClassification, - config: RegimeConfig | None = None, - ) -> None: - self.returns = returns - self.regime = regime - self.config = config or RegimeConfig() - - # ----- public API ----- - - def evaluate(self, factor_name: str, signals: np.ndarray) -> RegimeICResult: - """Evaluate a single factor across regimes. - - Parameters - ---------- - factor_name : str - Identifier for reporting. - signals : np.ndarray, shape (M, T) - Factor signal matrix. - - Returns - ------- - RegimeICResult - """ - cfg = self.config - min_periods = cfg.lookback_window * 2 - - regime_ic: Dict[MarketRegime, float] = {} - regime_icir: Dict[MarketRegime, float] = {} - regime_n_periods: Dict[MarketRegime, int] = {} - - for regime in MarketRegime: - mask = self.regime.periods[regime] - n_regime = int(mask.sum()) - - if n_regime < min_periods: - regime_ic[regime] = 0.0 - regime_icir[regime] = 0.0 - regime_n_periods[regime] = n_regime - continue - - # Extract time-sliced sub-arrays - indices = np.where(mask)[0] - sig_sub = signals[:, indices] - ret_sub = self.returns[:, indices] - - ic_series = self._compute_ic(sig_sub, ret_sub) - valid_ic = ic_series[~np.isnan(ic_series)] - - mean_abs_ic = float(np.mean(np.abs(valid_ic))) if len(valid_ic) > 0 else 0.0 - icir = self._compute_icir(valid_ic) - - regime_ic[regime] = mean_abs_ic - regime_icir[regime] = icir - regime_n_periods[regime] = int(len(valid_ic)) - - # Count passing regimes - n_passing = sum( - 1 - for r in MarketRegime - if regime_n_periods[r] >= min_periods and regime_ic[r] >= cfg.min_regime_ic - ) - - # Weighted average IC - total_weight = sum(regime_n_periods[r] for r in MarketRegime) - if total_weight > 0: - overall_score = sum( - regime_ic[r] * regime_n_periods[r] for r in MarketRegime - ) / total_weight - else: - overall_score = 0.0 - - return RegimeICResult( - factor_name=factor_name, - regime_ic=regime_ic, - regime_icir=regime_icir, - regime_n_periods=regime_n_periods, - n_regimes_passing=n_passing, - passes=n_passing >= cfg.min_regimes_passing, - overall_regime_score=overall_score, - ) - - def evaluate_batch( - self, - candidates: Dict[str, np.ndarray], - ) -> Dict[str, RegimeICResult]: - """Evaluate multiple factors. - - Parameters - ---------- - candidates : Dict[str, np.ndarray] - Mapping of factor name to signal matrix (M, T). - - Returns - ------- - Dict[str, RegimeICResult] - """ - return {name: self.evaluate(name, sig) for name, sig in candidates.items()} - - # ----- IC helpers (mirrors metrics.py conventions) ----- - - @staticmethod - def _compute_ic(signals: np.ndarray, returns: np.ndarray) -> np.ndarray: - """Cross-sectional Spearman IC per period. - - Replicates the logic in ``metrics.compute_ic`` to keep this module - self-contained while matching the project convention. - """ - M, T = signals.shape - ic_series = np.full(T, np.nan, dtype=np.float64) - - for t in range(T): - s = signals[:, t] - r = returns[:, t] - valid = ~(np.isnan(s) | np.isnan(r)) - n = valid.sum() - if n < 5: - continue - rs = rankdata(s[valid]) - rr = rankdata(r[valid]) - rs_m = rs - rs.mean() - rr_m = rr - rr.mean() - denom = np.sqrt((rs_m ** 2).sum() * (rr_m ** 2).sum()) - if denom < 1e-12: - ic_series[t] = 0.0 - else: - ic_series[t] = (rs_m * rr_m).sum() / denom - - return ic_series - - @staticmethod - def _compute_icir(valid_ic: np.ndarray) -> float: - """ICIR = mean(IC) / std(IC).""" - if len(valid_ic) < 3: - return 0.0 - std = float(np.std(valid_ic, ddof=1)) - if std < 1e-12: - return 0.0 - return float(np.mean(valid_ic)) / std - - -# --------------------------------------------------------------------------- -# Phase 2: Streaming regime detection (added for HelixFactor) -# --------------------------------------------------------------------------- - -class TrendRegime(enum.Enum): - BULL = "bull" - BEAR = "bear" - NEUTRAL = "neutral" - - -class VolRegime(enum.Enum): - HIGH_VOL = "high_vol" - LOW_VOL = "low_vol" - NORMAL_VOL = "normal_vol" - - -class MeanRevRegime(enum.Enum): - TRENDING = "trending" - MEAN_REVERTING = "mean_reverting" - RANDOM_WALK = "random_walk" - - -@dataclass -class RegimeState: - """Composite regime state: trend + vol + mean-reversion classification.""" - trend: TrendRegime = TrendRegime.NEUTRAL - vol: VolRegime = VolRegime.NORMAL_VOL - mean_rev: MeanRevRegime = MeanRevRegime.RANDOM_WALK - - def to_dict(self) -> dict: - return { - "trend": self.trend.value, - "vol": self.vol.value, - "mean_rev": self.mean_rev.value, - } - - @classmethod - def from_dict(cls, d: dict) -> "RegimeState": - return cls( - trend=TrendRegime(d.get("trend", "neutral")), - vol=VolRegime(d.get("vol", "normal_vol")), - mean_rev=MeanRevRegime(d.get("mean_rev", "random_walk")), - ) - - def __str__(self) -> str: - return f"{self.trend.value}/{self.vol.value}/{self.mean_rev.value}" - - def label(self) -> str: - return str(self) - - -@dataclass -class StreamingRegimeConfig: - """Configuration for StreamingRegimeDetector.""" - fast_alpha: float = 0.1 # EW decay for fast stats - slow_alpha: float = 0.02 # EW decay for slow (baseline) stats - trend_sigma_threshold: float = 1.0 # sigmas above/below zero for BULL/BEAR - vol_high_quantile: float = 0.75 # quantile threshold for HIGH_VOL - vol_low_quantile: float = 0.25 # quantile threshold for LOW_VOL - hurst_lags: tuple = (2, 4, 8, 16) # lags for variance-ratio Hurst estimate - hmm_smoothing: float = 0.3 # sticky-state weight (0 = no smoothing) - history_maxlen: int = 500 # max regime history records - - -class StreamingRegimeDetector: - """Bar-by-bar O(1) regime classifier using exponentially-weighted stats. - - Detects three independent regime axes: - - Trend: BULL / BEAR / NEUTRAL (EW mean vs threshold) - - Volatility: HIGH / LOW / NORMAL (EW std vs quantile buffer) - - Mean-reversion: TRENDING / MEAN_REVERTING / RANDOM_WALK (Hurst via variance ratio) - """ - - def __init__(self, config: StreamingRegimeConfig | None = None) -> None: - self.config = config or StreamingRegimeConfig() - # Exponentially-weighted moments - self._ew_mean: float = 0.0 - self._ew_var: float = 0.0 # fast (for current vol) - self._ew_var_slow: float = 0.0 # slow (baseline) - self._n: int = 0 - # Rolling buffers for variance-ratio Hurst - self._return_buffer: list = [] - self._vol_buffer: list = [] # rolling realized vol samples - # Regime history - from collections import deque - self._history: deque = deque(maxlen=self.config.history_maxlen) - self._transition_counts: dict = {} - self._current: RegimeState = RegimeState() - import threading - self._lock = threading.RLock() - - # ------------------------------------------------------------------ - # Public API - # ------------------------------------------------------------------ - - def update( - self, - returns: np.ndarray, # (M,) cross-sectional returns at this bar - volumes: np.ndarray | None = None, # (M,) optional — unused currently - ) -> RegimeState: - """Process one bar and return updated RegimeState.""" - with self._lock: - r = float(np.nanmean(returns)) - vol = float(np.nanstd(returns)) if len(returns) > 1 else 0.0 - self._update_moments(r, vol) - new_state = self._classify() - self._apply_smoothing(new_state) - self._record_transition(self._current, new_state) - self._current = new_state - self._history.append(new_state) - return new_state - - def get_current_regime(self) -> RegimeState: - with self._lock: - return self._current - - def get_regime_history(self, lookback: int = 20) -> list: - with self._lock: - hist = list(self._history) - return hist[-lookback:] if lookback else hist - - def regime_transition_probability(self) -> dict: - """Return dict of 'from/to' → empirical probability.""" - with self._lock: - total = sum(self._transition_counts.values()) - if total == 0: - return {} - return {k: v / total for k, v in self._transition_counts.items()} - - def reset(self) -> None: - with self._lock: - self.__init__(config=self.config) - - # ------------------------------------------------------------------ - # Internal helpers - # ------------------------------------------------------------------ - - def _update_moments(self, r: float, vol: float) -> None: - fa, sa = self.config.fast_alpha, self.config.slow_alpha - if self._n == 0: - self._ew_mean = r - self._ew_var = vol ** 2 - self._ew_var_slow = vol ** 2 - else: - self._ew_mean = fa * r + (1 - fa) * self._ew_mean - self._ew_var = fa * (r - self._ew_mean) ** 2 + (1 - fa) * self._ew_var - self._ew_var_slow = sa * vol ** 2 + (1 - sa) * self._ew_var_slow - self._n += 1 - self._return_buffer.append(r) - self._vol_buffer.append(vol) - if len(self._return_buffer) > 200: - self._return_buffer.pop(0) - self._vol_buffer.pop(0) - - def _classify(self) -> RegimeState: - return RegimeState( - trend=self._classify_trend(), - vol=self._classify_vol(), - mean_rev=self._classify_mean_rev(), - ) - - def _classify_trend(self) -> TrendRegime: - sigma = float(np.sqrt(max(self._ew_var, 1e-16))) - n = max(self._n, 1) - se = sigma / (n ** 0.5) - thresh = self.config.trend_sigma_threshold * se - if self._ew_mean > thresh: - return TrendRegime.BULL - elif self._ew_mean < -thresh: - return TrendRegime.BEAR - return TrendRegime.NEUTRAL - - def _classify_vol(self) -> VolRegime: - if len(self._vol_buffer) < 10: - return VolRegime.NORMAL_VOL - arr = np.array(self._vol_buffer) - cur = float(np.sqrt(max(self._ew_var, 0.0))) - hi = float(np.quantile(arr, self.config.vol_high_quantile)) - lo = float(np.quantile(arr, self.config.vol_low_quantile)) - if cur > hi: - return VolRegime.HIGH_VOL - elif cur < lo: - return VolRegime.LOW_VOL - return VolRegime.NORMAL_VOL - - def _classify_mean_rev(self) -> MeanRevRegime: - buf = self._return_buffer - if len(buf) < 32: - return MeanRevRegime.RANDOM_WALK - arr = np.array(buf) - lags = [l for l in self.config.hurst_lags if l < len(arr)] - if not lags: - return MeanRevRegime.RANDOM_WALK - ratios = [] - for lag in lags: - var_lag = float(np.var(arr[lag:] - arr[:-lag])) - var_1 = float(np.var(np.diff(arr))) if len(arr) > 1 else 1e-16 - if var_1 > 1e-16: - ratios.append(var_lag / (lag * var_1)) - if not ratios: - return MeanRevRegime.RANDOM_WALK - hurst_proxy = float(np.mean(ratios)) - if hurst_proxy > 1.1: - return MeanRevRegime.TRENDING - elif hurst_proxy < 0.9: - return MeanRevRegime.MEAN_REVERTING - return MeanRevRegime.RANDOM_WALK - - def _apply_smoothing(self, new_state: RegimeState) -> None: - """HMM-inspired: resist single-bar flips via smoothing weight.""" - w = self.config.hmm_smoothing - if w <= 0 or self._current is None: - return - # If smoothing weight is high and current state differs, revert in-place - # (simple sticky-state: only update if change is "strong enough") - # We achieve this by probabilistic rejection — deterministic version: - # keep current if random draw < smoothing weight (approximate) - import random - if (new_state.trend != self._current.trend or - new_state.vol != self._current.vol): - if random.random() < w: - new_state.trend = self._current.trend - new_state.vol = self._current.vol - - def _record_transition(self, old: RegimeState, new: RegimeState) -> None: - key = f"{old.label()}->{new.label()}" - self._transition_counts[key] = self._transition_counts.get(key, 0) + 1 diff --git a/src/factorminer/factorminer/evaluation/research.py b/src/factorminer/factorminer/evaluation/research.py deleted file mode 100644 index e4152bc..0000000 --- a/src/factorminer/factorminer/evaluation/research.py +++ /dev/null @@ -1,518 +0,0 @@ -"""Research-first multi-horizon scoring and model evaluation.""" - -from __future__ import annotations - -from dataclasses import asdict, dataclass, field -from typing import Dict, Iterable, Sequence - -import numpy as np - -from src.factorminer.factorminer.evaluation.backtest import rolling_splits -from src.factorminer.factorminer.evaluation.metrics import compute_factor_stats, compute_pairwise_correlation -from src.factorminer.factorminer.evaluation.portfolio import PortfolioBacktester -from src.factorminer.factorminer.evaluation.regime import RegimeAwareEvaluator, RegimeConfig, RegimeDetector -from src.factorminer.factorminer.evaluation.selection import FactorSelector -from src.factorminer.factorminer.evaluation.significance import BootstrapICTester, SignificanceConfig - - -@dataclass -class FactorGeometryDiagnostics: - """How much new information a factor adds beyond the current library.""" - - max_abs_correlation: float = 0.0 - mean_abs_correlation: float = 0.0 - projection_loss: float = 0.0 - marginal_span_gain: float = 1.0 - effective_rank_gain: float = 1.0 - residual_ic: float = 0.0 - - -@dataclass -class FactorScoreVector: - """Multi-horizon quality summary used in research mode.""" - - primary_objective: str - primary_score: float - lower_confidence_bound: float - weighted_score: float - decay_slope: float - cross_horizon_consistency: float - average_turnover: float - geometry: FactorGeometryDiagnostics - per_horizon_ic_mean: Dict[str, float] = field(default_factory=dict) - per_horizon_icir: Dict[str, float] = field(default_factory=dict) - per_horizon_shrunk_ic: Dict[str, float] = field(default_factory=dict) - per_horizon_se: Dict[str, float] = field(default_factory=dict) - per_horizon_lcb: Dict[str, float] = field(default_factory=dict) - per_horizon_turnover: Dict[str, float] = field(default_factory=dict) - pareto_dominant: bool = True - - def to_dict(self) -> dict: - payload = asdict(self) - payload["geometry"] = asdict(self.geometry) - return payload - - -def compute_factor_geometry( - candidate_signals: np.ndarray, - returns: np.ndarray, - library_signals: Sequence[np.ndarray] | None = None, -) -> FactorGeometryDiagnostics: - """Compute soft library geometry metrics for a candidate.""" - library_signals = list(library_signals or []) - if not library_signals: - return FactorGeometryDiagnostics( - max_abs_correlation=0.0, - mean_abs_correlation=0.0, - projection_loss=0.0, - marginal_span_gain=1.0, - effective_rank_gain=1.0, - residual_ic=float(compute_factor_stats(candidate_signals, returns)["ic_abs_mean"]), - ) - - corrs = [ - abs(compute_pairwise_correlation(candidate_signals, lib_signal)) - for lib_signal in library_signals - ] - flattened_candidate, valid_mask = _flatten_panel(candidate_signals) - library_vectors = [] - for signal in library_signals: - flattened_signal, _ = _flatten_panel(signal, valid_mask=valid_mask) - library_vectors.append(flattened_signal) - - if not library_vectors: - return FactorGeometryDiagnostics( - max_abs_correlation=max(corrs, default=0.0), - mean_abs_correlation=float(np.mean(corrs)) if corrs else 0.0, - residual_ic=float(compute_factor_stats(candidate_signals, returns)["ic_abs_mean"]), - ) - - design = np.column_stack(library_vectors) - response = flattened_candidate - if design.size == 0 or response.size == 0 or np.nanstd(response) < 1e-12: - projection_loss = 0.0 - marginal_span_gain = 1.0 - residual_matrix = candidate_signals - else: - beta, *_ = np.linalg.lstsq(design, response, rcond=None) - fitted = design @ beta - residual = response - fitted - response_var = float(np.var(response)) - residual_var = float(np.var(residual)) - marginal_span_gain = residual_var / response_var if response_var > 1e-12 else 0.0 - projection_loss = 1.0 - marginal_span_gain - residual_matrix = _unflatten_panel(residual, valid_mask, candidate_signals.shape) - - before_rank = _effective_rank(design) - after_rank = _effective_rank(np.column_stack([design, response])) - residual_ic = float(compute_factor_stats(residual_matrix, returns)["ic_abs_mean"]) - - return FactorGeometryDiagnostics( - max_abs_correlation=max(corrs, default=0.0), - mean_abs_correlation=float(np.mean(corrs)) if corrs else 0.0, - projection_loss=float(projection_loss), - marginal_span_gain=float(max(marginal_span_gain, 0.0)), - effective_rank_gain=float(after_rank - before_rank), - residual_ic=residual_ic, - ) - - -def build_score_vector( - target_stats: Dict[str, dict], - target_horizons: Dict[str, int], - research_cfg, - geometry: FactorGeometryDiagnostics, -) -> FactorScoreVector: - """Aggregate per-target metrics into one research-mode score vector.""" - weights = _normalized_weights( - target_stats.keys(), - explicit_weights=getattr(research_cfg, "horizon_weights", {}), - ) - uncertainty_cfg = research_cfg.uncertainty - admission_cfg = research_cfg.admission - - per_horizon_ic_mean: Dict[str, float] = {} - per_horizon_icir: Dict[str, float] = {} - per_horizon_shrunk_ic: Dict[str, float] = {} - per_horizon_se: Dict[str, float] = {} - per_horizon_lcb: Dict[str, float] = {} - per_horizon_turnover: Dict[str, float] = {} - - for target_name, stats in target_stats.items(): - ic_series = np.asarray(stats.get("ic_series", np.array([])), dtype=np.float64) - se = _bootstrap_standard_error(ic_series, uncertainty_cfg) - ic_abs_mean = float(stats.get("ic_abs_mean", 0.0)) - shrunk_ic = max(ic_abs_mean - uncertainty_cfg.shrinkage_strength * se, 0.0) - lcb = ic_abs_mean - uncertainty_cfg.lcb_zscore * se - - per_horizon_ic_mean[target_name] = float(stats.get("ic_mean", 0.0)) - per_horizon_icir[target_name] = float(stats.get("icir", 0.0)) - per_horizon_shrunk_ic[target_name] = float(shrunk_ic) - per_horizon_se[target_name] = float(se) - per_horizon_lcb[target_name] = float(lcb) - per_horizon_turnover[target_name] = float(stats.get("turnover", 0.0)) - - weighted_quality = float( - sum(weights[name] * per_horizon_shrunk_ic.get(name, 0.0) for name in weights) - ) - average_turnover = float( - np.mean(list(per_horizon_turnover.values())) if per_horizon_turnover else 0.0 - ) - lower_confidence_bound = float( - min(per_horizon_lcb.values()) if per_horizon_lcb else 0.0 - ) - redundancy_penalty = admission_cfg.redundancy_penalty * geometry.max_abs_correlation - turnover_penalty = admission_cfg.turnover_penalty * average_turnover - geometry_bonus = 0.0 - if admission_cfg.use_residual_ic: - geometry_bonus += 0.5 * geometry.residual_ic - if admission_cfg.use_effective_rank_gain: - geometry_bonus += 0.05 * max(geometry.effective_rank_gain, 0.0) - - weighted_score = weighted_quality - redundancy_penalty - turnover_penalty + geometry_bonus - decay_slope = _decay_slope(target_horizons, per_horizon_shrunk_ic) - consistency = _cross_horizon_consistency(per_horizon_ic_mean) - - return FactorScoreVector( - primary_objective=research_cfg.primary_objective, - primary_score=weighted_score, - lower_confidence_bound=lower_confidence_bound, - weighted_score=weighted_score, - decay_slope=decay_slope, - cross_horizon_consistency=consistency, - average_turnover=average_turnover, - geometry=geometry, - per_horizon_ic_mean=per_horizon_ic_mean, - per_horizon_icir=per_horizon_icir, - per_horizon_shrunk_ic=per_horizon_shrunk_ic, - per_horizon_se=per_horizon_se, - per_horizon_lcb=per_horizon_lcb, - per_horizon_turnover=per_horizon_turnover, - ) - - -def passes_research_admission( - score_vector: FactorScoreVector, - research_cfg, - correlation_threshold: float, -) -> tuple[bool, str]: - """Apply research-mode admission rules on top of paper-style correlation.""" - admission_cfg = research_cfg.admission - if score_vector.primary_score < admission_cfg.min_score: - return False, ( - f"Research score {score_vector.primary_score:.4f} " - f"< {admission_cfg.min_score:.4f}" - ) - if score_vector.lower_confidence_bound < admission_cfg.min_lcb: - return False, ( - f"Research LCB {score_vector.lower_confidence_bound:.4f} " - f"< {admission_cfg.min_lcb:.4f}" - ) - if score_vector.geometry.max_abs_correlation < correlation_threshold: - return True, "Research score passes direct admission" - if ( - admission_cfg.use_residual_ic - and score_vector.geometry.residual_ic >= admission_cfg.min_score - and score_vector.geometry.marginal_span_gain >= admission_cfg.min_span_gain - and ( - (not admission_cfg.use_effective_rank_gain) - or score_vector.geometry.effective_rank_gain >= admission_cfg.min_effective_rank_gain - ) - ): - return True, "Research geometry passes residual-span admission" - return False, ( - "Too redundant under research geometry: " - f"max|rho|={score_vector.geometry.max_abs_correlation:.4f}, " - f"residual_ic={score_vector.geometry.residual_ic:.4f}, " - f"span_gain={score_vector.geometry.marginal_span_gain:.4f}" - ) - - -def run_research_model_suite( - factor_signals: Dict[int, np.ndarray], - returns: np.ndarray, - research_cfg, -) -> Dict[str, dict]: - """Fit research-mode models on rolling windows and report net IR/stability.""" - if not factor_signals: - return {} - - selector = FactorSelector() - backtester = PortfolioBacktester() - splits = rolling_splits( - returns.shape[0], - train_window=research_cfg.selection.rolling_train_window, - test_window=research_cfg.selection.rolling_test_window, - step=research_cfg.selection.rolling_step, - ) - if not splits: - return {} - - reports: Dict[str, dict] = {} - for model_name in research_cfg.selection.models: - fold_reports = [] - selected_sets = [] - for split in splits: - train_returns = returns[split.train_start:split.train_end] - test_returns = returns[split.test_start:split.test_end] - train_signals = { - fid: signal[split.train_start:split.train_end] - for fid, signal in factor_signals.items() - } - test_signals = { - fid: signal[split.test_start:split.test_end] - for fid, signal in factor_signals.items() - } - try: - selected, weights = _fit_research_model( - selector, - model_name, - train_signals, - train_returns, - ) - except ImportError as exc: - reports[model_name] = {"available": False, "error": str(exc)} - fold_reports = [] - break - if not selected: - continue - selected_sets.append(set(selected)) - composite = _weighted_composite(test_signals, weights) - stats = backtester.quintile_backtest( - composite, - test_returns, - transaction_cost_bps=research_cfg.execution.cost_bps, - ) - regime_report = None - if research_cfg.regimes.enabled: - regime_report = _composite_regime_report(composite, test_returns) - fold_reports.append( - { - "selected_ids": selected, - "weights": weights, - "test_ic_mean": float(stats["ic_mean"]), - "test_icir": float(stats["icir"]), - "test_net_ir": _series_ir(stats["ls_net_series"]), - "avg_turnover": float(stats["avg_turnover"]), - "regimes": regime_report, - } - ) - - if not fold_reports: - reports.setdefault(model_name, {"available": True, "folds": []}) - continue - - reports[model_name] = { - "available": True, - "folds": fold_reports, - "mean_test_ic_mean": float(np.mean([fold["test_ic_mean"] for fold in fold_reports])), - "mean_test_icir": float(np.mean([fold["test_icir"] for fold in fold_reports])), - "mean_test_net_ir": float(np.mean([fold["test_net_ir"] for fold in fold_reports])), - "mean_turnover": float(np.mean([fold["avg_turnover"] for fold in fold_reports])), - "selection_stability": _selection_stability(selected_sets), - } - - return reports - - -def _fit_research_model( - selector: FactorSelector, - model_name: str, - factor_signals: Dict[int, np.ndarray], - returns: np.ndarray, -) -> tuple[list[int], Dict[int, float]]: - if model_name == "ridge": - from sklearn.linear_model import RidgeCV - - ids, X, y = selector._prepare_panel(factor_signals, returns) # noqa: SLF001 - if len(ids) == 0: - return [], {} - model = RidgeCV(alphas=np.logspace(-4, 2, 12)) - model.fit(X, y) - weights = {ids[idx]: float(coef) for idx, coef in enumerate(model.coef_)} - selected = [factor_id for factor_id, weight in weights.items() if abs(weight) > 1e-10] - return selected, {factor_id: weights[factor_id] for factor_id in selected} - - if model_name == "elastic_net": - from sklearn.linear_model import ElasticNetCV - - ids, X, y = selector._prepare_panel(factor_signals, returns) # noqa: SLF001 - if len(ids) == 0: - return [], {} - model = ElasticNetCV(l1_ratio=[0.1, 0.5, 0.9], cv=3, max_iter=10000) - model.fit(X, y) - weights = {ids[idx]: float(coef) for idx, coef in enumerate(model.coef_)} - selected = [factor_id for factor_id, weight in weights.items() if abs(weight) > 1e-10] - return selected, {factor_id: weights[factor_id] for factor_id in selected} - - if model_name == "lasso": - results = selector.lasso_selection(factor_signals, returns) - selected = [factor_id for factor_id, _ in results] - return selected, {factor_id: score for factor_id, score in results} - - if model_name == "stepwise": - results = selector.forward_stepwise(factor_signals, returns) - selected = [factor_id for factor_id, _ in results] - return selected, {factor_id: 1.0 for factor_id in selected} - - if model_name == "xgboost": - results = selector.xgboost_selection(factor_signals, returns) - selected = [factor_id for factor_id, _ in results[: max(1, min(10, len(results)))]] - return selected, {factor_id: score for factor_id, score in results if factor_id in selected} - - raise ValueError(f"Unknown research model: {model_name}") - - -def _weighted_composite( - factor_signals: Dict[int, np.ndarray], - weights: Dict[int, float], -) -> np.ndarray: - selected_signals = {fid: factor_signals[fid] for fid in weights if fid in factor_signals} - if not selected_signals: - raise ValueError("No selected signals available for composite") - raw_weights = np.array([abs(weights[fid]) for fid in selected_signals], dtype=np.float64) - if raw_weights.sum() < 1e-12: - raw_weights = np.ones_like(raw_weights) - normalized_weights = raw_weights / raw_weights.sum() - - composite = np.zeros_like(next(iter(selected_signals.values())), dtype=np.float64) - for idx, fid in enumerate(selected_signals): - signal = selected_signals[fid].astype(np.float64) - cs_mean = np.nanmean(signal, axis=1, keepdims=True) - cs_std = np.nanstd(signal, axis=1, keepdims=True) - cs_std = np.where(cs_std == 0.0, 1.0, cs_std) - standardized = (signal - cs_mean) / cs_std - composite += normalized_weights[idx] * np.where(np.isnan(standardized), 0.0, standardized) - return composite - - -def _bootstrap_standard_error(ic_series: np.ndarray, uncertainty_cfg) -> float: - valid = ic_series[np.isfinite(ic_series)] - if len(valid) < 3: - return 0.0 - tester = BootstrapICTester( - SignificanceConfig( - bootstrap_n_samples=uncertainty_cfg.bootstrap_samples, - bootstrap_block_size=uncertainty_cfg.block_size, - seed=42, - ) - ) - result = tester.compute_ci("research", valid) - return float(result.ic_std_boot) - - -def _normalized_weights( - target_names: Iterable[str], - explicit_weights: Dict[str, float], -) -> Dict[str, float]: - target_names = list(target_names) - if not target_names: - return {} - if explicit_weights: - weights = np.array([max(float(explicit_weights.get(name, 0.0)), 0.0) for name in target_names]) - if weights.sum() > 1e-12: - normalized = weights / weights.sum() - return {name: float(normalized[idx]) for idx, name in enumerate(target_names)} - equal = 1.0 / len(target_names) - return {name: equal for name in target_names} - - -def _decay_slope(target_horizons: Dict[str, int], shrunk_ic: Dict[str, float]) -> float: - aligned = [ - (target_horizons[name], value) - for name, value in shrunk_ic.items() - if name in target_horizons - ] - if len(aligned) < 2: - return 0.0 - horizons = np.array([item[0] for item in aligned], dtype=np.float64) - scores = np.array([item[1] for item in aligned], dtype=np.float64) - if np.std(horizons) < 1e-12: - return 0.0 - slope, _ = np.polyfit(horizons, scores, 1) - return float(slope) - - -def _cross_horizon_consistency(per_horizon_ic_mean: Dict[str, float]) -> float: - values = [value for value in per_horizon_ic_mean.values() if abs(value) > 1e-12] - if not values: - return 0.0 - signs = np.sign(values) - majority = np.sign(np.sum(signs)) - if majority == 0: - return 0.0 - return float(np.mean(signs == majority)) - - -def _flatten_panel(panel: np.ndarray, valid_mask: np.ndarray | None = None) -> tuple[np.ndarray, np.ndarray]: - matrix = np.asarray(panel, dtype=np.float64) - if valid_mask is None: - valid_mask = np.isfinite(matrix) - centered = np.where(valid_mask, matrix, np.nan) - cs_mean = np.nanmean(centered, axis=0, keepdims=True) - cs_std = np.nanstd(centered, axis=0, keepdims=True) - cs_std = np.where(cs_std < 1e-12, 1.0, cs_std) - standardized = (centered - cs_mean) / cs_std - filled = np.where(np.isfinite(standardized), standardized, 0.0) - return filled.reshape(-1), valid_mask - - -def _unflatten_panel(flat: np.ndarray, valid_mask: np.ndarray, shape: tuple[int, int]) -> np.ndarray: - matrix = np.full(shape, np.nan, dtype=np.float64) - matrix[valid_mask] = flat.reshape(shape)[valid_mask] - return matrix - - -def _effective_rank(matrix: np.ndarray) -> float: - if matrix.ndim != 2 or min(matrix.shape) == 0: - return 0.0 - cov = matrix.T @ matrix - singular_values = np.linalg.svd(cov, compute_uv=False) - singular_values = singular_values[singular_values > 1e-12] - if len(singular_values) == 0: - return 0.0 - probs = singular_values / singular_values.sum() - entropy = -np.sum(probs * np.log(probs)) - return float(np.exp(entropy)) - - -def _selection_stability(selected_sets: Sequence[set[int]]) -> float: - if len(selected_sets) < 2: - return 1.0 if selected_sets else 0.0 - overlaps = [] - for idx in range(len(selected_sets) - 1): - left = selected_sets[idx] - right = selected_sets[idx + 1] - union = left | right - overlaps.append(len(left & right) / len(union) if union else 1.0) - return float(np.mean(overlaps)) - - -def _series_ir(series: np.ndarray) -> float: - valid = np.asarray(series, dtype=np.float64) - valid = valid[np.isfinite(valid)] - if len(valid) < 2: - return 0.0 - std = float(np.std(valid, ddof=1)) - if std < 1e-12: - return 0.0 - return float(np.mean(valid) / std) - - -def _composite_regime_report(composite: np.ndarray, returns: np.ndarray) -> dict: - detector = RegimeDetector(RegimeConfig()) - classification = detector.classify(returns.T) - evaluator = RegimeAwareEvaluator(returns.T, classification, RegimeConfig()) - regime_result = evaluator.evaluate("composite", composite.T) - regime_net_ir = {} - backtester = PortfolioBacktester() - stats = backtester.quintile_backtest(composite, returns) - for regime, mask in classification.periods.items(): - regime_net_ir[regime.name] = _series_ir(stats["ls_net_series"][mask]) - return { - "regime_score": regime_result.overall_regime_score, - "n_regimes_passing": regime_result.n_regimes_passing, - "regime_ic": {regime.name: value for regime, value in regime_result.regime_ic.items()}, - "regime_icir": {regime.name: value for regime, value in regime_result.regime_icir.items()}, - "regime_net_ir": regime_net_ir, - } diff --git a/src/factorminer/factorminer/evaluation/runtime.py b/src/factorminer/factorminer/evaluation/runtime.py deleted file mode 100644 index 9024238..0000000 --- a/src/factorminer/factorminer/evaluation/runtime.py +++ /dev/null @@ -1,480 +0,0 @@ -"""Shared runtime evaluation helpers for strict factor recomputation.""" - -from __future__ import annotations - -import logging -from dataclasses import dataclass, field -from pathlib import Path -from typing import Dict, Iterable, List, Optional, Sequence - -import numpy as np -import pandas as pd - -from src.factorminer.factorminer.core.factor_library import Factor -from src.factorminer.factorminer.core.parser import try_parse -from src.factorminer.factorminer.data.tensor_builder import TargetSpec, compute_targets -from src.factorminer.factorminer.evaluation.metrics import ( - compute_factor_stats, - compute_pairwise_correlation, -) - -logger = logging.getLogger(__name__) - -FEATURE_TO_COLUMN = { - "$open": "open", - "$high": "high", - "$low": "low", - "$close": "close", - "$volume": "volume", - "$amt": "amount", - "$vwap": "vwap", - "$returns": "returns", -} - -COLUMN_TO_FEATURE = {value: key for key, value in FEATURE_TO_COLUMN.items()} - - -class SignalComputationError(RuntimeError): - """Raised when a factor cannot be recomputed under strict policies.""" - - -@dataclass -class DatasetSplit: - """One temporal view into the evaluation dataset.""" - - name: str - indices: np.ndarray - timestamps: np.ndarray - returns: np.ndarray - target_returns: Dict[str, np.ndarray] = field(default_factory=dict) - default_target: str = "target" - - @property - def size(self) -> int: - return int(len(self.indices)) - - def get_target(self, name: str | None = None) -> np.ndarray: - target_name = name or self.default_target - if target_name in self.target_returns: - return self.target_returns[target_name] - return self.returns - - -@dataclass -class EvaluationDataset: - """Canonical dataset used for analysis commands.""" - - data_dict: Dict[str, np.ndarray] - data_tensor: np.ndarray - returns: np.ndarray - timestamps: np.ndarray - asset_ids: np.ndarray - splits: Dict[str, DatasetSplit] - processed_df: pd.DataFrame = field(repr=False) - target_panels: Dict[str, np.ndarray] = field(default_factory=dict) - target_specs: Dict[str, TargetSpec] = field(default_factory=dict) - default_target: str = "target" - - def get_split(self, name: str) -> DatasetSplit: - if name not in self.splits: - raise KeyError(f"Unknown split: {name}") - return self.splits[name] - - def get_target(self, name: str | None = None) -> np.ndarray: - target_name = name or self.default_target - if target_name in self.target_panels: - return self.target_panels[target_name] - return self.returns - - -@dataclass -class FactorEvaluationArtifact: - """Recomputed signals and metrics for one factor.""" - - factor_id: int - name: str - formula: str - category: str - parse_ok: bool - signals_full: Optional[np.ndarray] = None - split_signals: Dict[str, np.ndarray] = field(default_factory=dict) - split_stats: Dict[str, dict] = field(default_factory=dict) - target_stats: Dict[str, Dict[str, dict]] = field(default_factory=dict) - score_vector: Optional[dict] = None - research_metrics: Dict[str, float] = field(default_factory=dict) - error: str = "" - - @property - def succeeded(self) -> bool: - return self.parse_ok and self.signals_full is not None and not self.error - - -def load_runtime_dataset( - raw_df: pd.DataFrame, - cfg, -) -> EvaluationDataset: - """Load raw market data into a canonical evaluation dataset.""" - from factorminer.data.preprocessor import preprocess - from factorminer.data.tensor_builder import TensorConfig, build_tensor - - raw_df = raw_df.copy() - raw_df["datetime"] = pd.to_datetime(raw_df["datetime"]) - - target_specs = _resolve_target_specs(cfg) - target_df = compute_targets(raw_df, target_specs) - target_columns = [spec.column_name for spec in target_specs] - merge_columns = ["datetime", "asset_id", *target_columns] - processed_df = preprocess(raw_df) - processed_df = processed_df.merge( - target_df[merge_columns], - on=["datetime", "asset_id"], - how="left", - ) - processed_df = processed_df.sort_values(["datetime", "asset_id"]).reset_index(drop=True) - - feature_columns = _resolve_feature_columns(getattr(cfg.data, "features", [])) - tensor_cfg = TensorConfig( - features=feature_columns, - backend="numpy", - dtype="float64", - target_columns=target_columns, - default_target=_target_column_for_name(cfg.data.default_target, target_specs), - ) - dataset = build_tensor(processed_df, tensor_cfg) - - data_tensor = np.asarray(dataset.data, dtype=np.float64) - returns = np.asarray(dataset.target, dtype=np.float64) - target_panels = { - spec.name: np.asarray(dataset.targets[spec.column_name], dtype=np.float64) - for spec in target_specs - if spec.column_name in dataset.targets - } - timestamps = pd.to_datetime(dataset.timestamps).to_numpy() - asset_ids = np.asarray(dataset.asset_ids) - - if returns.ndim != 2: - raise ValueError("Runtime dataset target must be a 2-D (M, T) array") - - data_dict = { - COLUMN_TO_FEATURE[column]: data_tensor[:, :, idx] - for idx, column in enumerate(dataset.feature_names) - if column in COLUMN_TO_FEATURE - } - - splits = { - "train": _build_named_split( - "train", - timestamps, - returns, - target_panels, - cfg.data.default_target, - start=cfg.data.train_period[0], - end=cfg.data.train_period[1], - ), - "test": _build_named_split( - "test", - timestamps, - returns, - target_panels, - cfg.data.default_target, - start=cfg.data.test_period[0], - end=cfg.data.test_period[1], - ), - "full": DatasetSplit( - name="full", - indices=np.arange(len(timestamps)), - timestamps=timestamps, - returns=returns, - target_returns=target_panels, - default_target=cfg.data.default_target, - ), - } - - for split_name in ("train", "test"): - if splits[split_name].size == 0: - raise ValueError( - f"{split_name} split is empty for configured period " - f"{getattr(cfg.data, f'{split_name}_period')}" - ) - - return EvaluationDataset( - data_dict=data_dict, - data_tensor=data_tensor, - returns=returns, - timestamps=timestamps, - asset_ids=asset_ids, - splits=splits, - processed_df=processed_df, - target_panels=target_panels, - target_specs={spec.name: spec for spec in target_specs}, - default_target=cfg.data.default_target, - ) - - -def evaluate_factors( - factors: Sequence[Factor], - dataset: EvaluationDataset, - signal_failure_policy: str = "reject", - target_name: str | None = None, -) -> List[FactorEvaluationArtifact]: - """Recompute factor signals and metrics across all dataset splits.""" - artifacts: List[FactorEvaluationArtifact] = [] - active_target_name = target_name or dataset.default_target - active_returns = dataset.get_target(active_target_name) - - for factor in factors: - artifact = FactorEvaluationArtifact( - factor_id=factor.id, - name=factor.name, - formula=factor.formula, - category=factor.category, - parse_ok=False, - ) - - tree = try_parse(factor.formula) - if tree is None: - artifact.error = "Parse failure" - artifacts.append(artifact) - continue - - artifact.parse_ok = True - - try: - signals = compute_tree_signals( - tree, - dataset.data_dict, - active_returns.shape, - signal_failure_policy=signal_failure_policy, - ) - except Exception as exc: - artifact.error = str(exc) - artifacts.append(artifact) - continue - - if signals is None or np.all(np.isnan(signals)): - artifact.error = "Signal computation produced only NaN values" - artifacts.append(artifact) - continue - - artifact.signals_full = np.asarray(signals, dtype=np.float64) - - for split_name, split in dataset.splits.items(): - split_signals = artifact.signals_full[:, split.indices] - artifact.split_signals[split_name] = split_signals - active_split_target = split.get_target(active_target_name) - active_stats = compute_factor_stats(split_signals, active_split_target) - artifact.split_stats[split_name] = active_stats - artifact.target_stats[split_name] = {} - for available_target_name, split_target in split.target_returns.items(): - artifact.target_stats[split_name][available_target_name] = ( - active_stats - if available_target_name == active_target_name - else compute_factor_stats(split_signals, split_target) - ) - - artifacts.append(artifact) - - return artifacts - - -def compute_tree_signals( - tree, - data_dict: Dict[str, np.ndarray], - returns_shape: tuple[int, int], - signal_failure_policy: str = "reject", -) -> np.ndarray: - """Evaluate an expression tree under an explicit failure policy.""" - formula_str = tree.to_string() - - try: - signals = tree.evaluate(data_dict) - except Exception as exc: - return _handle_signal_failure( - formula_str=formula_str, - returns_shape=returns_shape, - signal_failure_policy=signal_failure_policy, - cause=exc, - ) - - if signals is None or np.all(np.isnan(signals)): - return _handle_signal_failure( - formula_str=formula_str, - returns_shape=returns_shape, - signal_failure_policy=signal_failure_policy, - cause=SignalComputationError("Signal computation produced only NaN values"), - ) - - return np.asarray(signals, dtype=np.float64) - - -def compute_correlation_matrix( - artifacts: Sequence[FactorEvaluationArtifact], - split_name: str, -) -> np.ndarray: - """Compute a true pairwise factor correlation matrix on one split.""" - selected = [a for a in artifacts if a.succeeded] - n = len(selected) - matrix = np.zeros((n, n), dtype=np.float64) - - for i in range(n): - for j in range(i + 1, n): - corr = compute_pairwise_correlation( - selected[i].split_signals[split_name], - selected[j].split_signals[split_name], - ) - matrix[i, j] = corr - matrix[j, i] = corr - - return matrix - - -def select_top_k( - artifacts: Sequence[FactorEvaluationArtifact], - split_name: str, - top_k: Optional[int] = None, -) -> List[FactorEvaluationArtifact]: - """Sort succeeded artifacts by split abs-IC and return the top-k subset.""" - succeeded = [a for a in artifacts if a.succeeded] - succeeded.sort( - key=lambda artifact: abs( - artifact.split_stats[split_name].get("ic_abs_mean", 0.0) - ), - reverse=True, - ) - if top_k is None or top_k >= len(succeeded): - return succeeded - return succeeded[:top_k] - - -def summarize_failures( - artifacts: Sequence[FactorEvaluationArtifact], -) -> List[str]: - """Return human-readable failure summaries.""" - return [ - f"{artifact.name or artifact.factor_id}: {artifact.error}" - for artifact in artifacts - if not artifact.succeeded - ] - - -def resolve_split_for_fit_eval(period: str) -> str: - """Map fit/eval CLI period values to runtime split names.""" - return "full" if period == "both" else period - - -def analysis_split_names(period: str) -> List[str]: - """Map analysis CLI period values to one or two runtime split names.""" - if period == "both": - return ["train", "test"] - return [period] - - -def _resolve_feature_columns(config_features: Sequence[str]) -> List[str]: - if not config_features: - return list(COLUMN_TO_FEATURE.keys()) - - resolved: List[str] = [] - for feature in config_features: - if feature in FEATURE_TO_COLUMN: - resolved.append(FEATURE_TO_COLUMN[feature]) - continue - stripped = feature.lstrip("$") - if stripped == "amt": - stripped = "amount" - resolved.append(stripped) - return resolved - - -def _build_named_split( - name: str, - timestamps: np.ndarray, - returns: np.ndarray, - target_panels: Dict[str, np.ndarray], - default_target: str, - start: str, - end: str, -) -> DatasetSplit: - ts = pd.to_datetime(timestamps) - mask = (ts >= pd.Timestamp(start)) & (ts <= pd.Timestamp(end)) - indices = np.where(mask)[0] - return DatasetSplit( - name=name, - indices=indices, - timestamps=timestamps[indices], - returns=returns[:, indices], - target_returns={ - target_name: panel[:, indices] - for target_name, panel in target_panels.items() - }, - default_target=default_target, - ) - - -def _resolve_target_specs(cfg) -> List[TargetSpec]: - raw_targets = getattr(cfg.data, "targets", None) or [ - { - "name": "paper", - "entry_delay_bars": 1, - "holding_bars": 1, - "price_pair": "open_to_close", - "return_transform": "simple", - } - ] - return [ - TargetSpec( - name=str(target["name"]), - entry_delay_bars=int(target.get("entry_delay_bars", 0)), - holding_bars=int(target.get("holding_bars", 1)), - price_pair=str(target.get("price_pair", "open_to_close")), - return_transform=str(target.get("return_transform", "simple")), - ) - for target in raw_targets - ] - - -def _target_column_for_name(target_name: str, specs: Sequence[TargetSpec]) -> str: - for spec in specs: - if spec.name == target_name: - return spec.column_name - return "target" - - -def _handle_signal_failure( - formula_str: str, - returns_shape: tuple[int, int], - signal_failure_policy: str, - cause: Exception, -) -> np.ndarray: - if signal_failure_policy == "raise": - raise cause - - if signal_failure_policy == "reject": - raise SignalComputationError( - f"Expression evaluation failed for '{formula_str}': {cause}" - ) from cause - - if signal_failure_policy != "synthetic": - raise ValueError( - "signal_failure_policy must be one of: reject, synthetic, raise" - ) - - logger.warning( - "Expression evaluation failed for '%s': %s — falling back to synthetic signals", - formula_str, - cause, - ) - return generate_synthetic_signals(formula_str, returns_shape) - - -def generate_synthetic_signals( - formula_str: str, - returns_shape: tuple[int, int], -) -> np.ndarray: - """Deterministic pseudo-signals for demo/mock workflows.""" - m, t = returns_shape - seed = hash(formula_str) % (2**31) - rng = np.random.RandomState(seed) - signals = rng.randn(m, t).astype(np.float64) - nan_mask = rng.random((m, t)) < 0.02 - signals[nan_mask] = np.nan - return signals diff --git a/src/factorminer/factorminer/evaluation/selection.py b/src/factorminer/factorminer/evaluation/selection.py deleted file mode 100644 index c5af6cd..0000000 --- a/src/factorminer/factorminer/evaluation/selection.py +++ /dev/null @@ -1,280 +0,0 @@ -"""Factor selection methods for identifying sparse, high-value subsets. - -Implements Lasso (L1), Forward Stepwise, and XGBoost-based selection -strategies for choosing an optimal subset of factors from the mined library. -""" - -from __future__ import annotations - -import logging -from typing import Dict, List, Optional, Tuple - -import numpy as np -from scipy.stats import spearmanr - -logger = logging.getLogger(__name__) - - -class FactorSelector: - """Select optimal factor subsets from the factor library. - - All methods accept factor signals as (T, N) arrays and forward returns - as a (T, N) array, then return ordered lists of (factor_id, score) tuples. - """ - - # ------------------------------------------------------------------ - # Lasso (L1-regularized) selection - # ------------------------------------------------------------------ - - def lasso_selection( - self, - factor_signals: Dict[int, np.ndarray], - returns: np.ndarray, - alpha: Optional[float] = None, - ) -> List[Tuple[int, float]]: - """Lasso: L1-regularized linear regression for factor selection. - - Paper: Only 8 factors capture 95% of IC improvement. - - Parameters - ---------- - factor_signals : dict[int, ndarray] - Mapping from factor ID to (T, N) signal array. - returns : ndarray of shape (T, N) - Forward returns aligned with factor signals. - alpha : float or None - L1 regularization strength. If None, selected via cross-validation - using LassoCV with 5 folds. - - Returns - ------- - list of (factor_id, coefficient) - Non-zero factors sorted by absolute coefficient (descending). - """ - from sklearn.linear_model import Lasso, LassoCV - - ids, X, y = self._prepare_panel(factor_signals, returns) - if len(ids) == 0: - return [] - - if alpha is None: - model = LassoCV(cv=5, max_iter=10000, n_jobs=-1) - model.fit(X, y) - alpha = model.alpha_ - logger.info("LassoCV selected alpha=%.6f", alpha) - - lasso = Lasso(alpha=alpha, max_iter=10000) - lasso.fit(X, y) - - results: List[Tuple[int, float]] = [] - for idx, coef in enumerate(lasso.coef_): - if abs(coef) > 1e-10: - results.append((ids[idx], float(coef))) - - results.sort(key=lambda x: abs(x[1]), reverse=True) - return results - - # ------------------------------------------------------------------ - # Forward stepwise selection - # ------------------------------------------------------------------ - - def forward_stepwise( - self, - factor_signals: Dict[int, np.ndarray], - returns: np.ndarray, - max_factors: int = 20, - ) -> List[Tuple[int, float]]: - """Forward Stepwise: greedy selection maximizing combined ICIR. - - Paper: 18 factors, ICIR=1.38. - - At each step, add the factor that yields the largest improvement in - ICIR of the equal-weight composite of selected factors. - - Parameters - ---------- - factor_signals : dict[int, ndarray] - Mapping from factor ID to (T, N) signal array. - returns : ndarray of shape (T, N) - Forward returns. - max_factors : int - Maximum number of factors to select. - - Returns - ------- - list of (factor_id, delta_ICIR) - Factors in selection order with the ICIR improvement each contributed. - """ - if not factor_signals: - return [] - - remaining = set(factor_signals.keys()) - selected: List[int] = [] - result: List[Tuple[int, float]] = [] - current_icir = 0.0 - - for _ in range(min(max_factors, len(factor_signals))): - best_fid: Optional[int] = None - best_icir = current_icir - best_delta = 0.0 - - for fid in remaining: - candidate = selected + [fid] - icir = self._composite_icir(factor_signals, candidate, returns) - delta = icir - current_icir - if icir > best_icir: - best_fid = fid - best_icir = icir - best_delta = delta - - if best_fid is None: - break - - selected.append(best_fid) - remaining.discard(best_fid) - result.append((best_fid, float(best_delta))) - current_icir = best_icir - logger.info( - "Step %d: added factor %d, ICIR=%.4f (+%.4f)", - len(selected), best_fid, current_icir, best_delta, - ) - - return result - - # ------------------------------------------------------------------ - # XGBoost importance-based selection - # ------------------------------------------------------------------ - - def xgboost_selection( - self, - factor_signals: Dict[int, np.ndarray], - returns: np.ndarray, - ) -> List[Tuple[int, float]]: - """XGBoost: gradient boosting for nonlinear factor interactions. - - Paper: Best performance with ICIR=1.49, 92.6% win rate. - - Parameters - ---------- - factor_signals : dict[int, ndarray] - Mapping from factor ID to (T, N) signal array. - returns : ndarray of shape (T, N) - Forward returns. - - Returns - ------- - list of (factor_id, importance) - All factors sorted by gain importance (descending). - """ - import xgboost as xgb - - ids, X, y = self._prepare_panel(factor_signals, returns) - if len(ids) == 0: - return [] - - model = xgb.XGBRegressor( - n_estimators=200, - max_depth=5, - learning_rate=0.05, - subsample=0.8, - colsample_bytree=0.8, - reg_alpha=0.1, - reg_lambda=1.0, - n_jobs=-1, - verbosity=0, - ) - model.fit(X, y) - - importance = model.feature_importances_ # gain-based by default - results: List[Tuple[int, float]] = [ - (ids[i], float(importance[i])) for i in range(len(ids)) - ] - results.sort(key=lambda x: x[1], reverse=True) - return results - - # ------------------------------------------------------------------ - # Internal helpers - # ------------------------------------------------------------------ - - @staticmethod - def _prepare_panel( - factor_signals: Dict[int, np.ndarray], - returns: np.ndarray, - ) -> Tuple[List[int], np.ndarray, np.ndarray]: - """Flatten panel data to (samples, features) for sklearn-style models. - - Stacks all (T, N) arrays into (T*N, K) feature matrix and (T*N,) - target vector, dropping rows with any NaN. - - Returns - ------- - ids : list of int - Factor IDs in column order. - X : ndarray (n_samples, n_factors) - y : ndarray (n_samples,) - """ - if not factor_signals: - return [], np.empty((0, 0)), np.empty(0) - - ids = sorted(factor_signals.keys()) - T, N = next(iter(factor_signals.values())).shape - K = len(ids) - - # Build (T*N, K) matrix - X = np.column_stack([ - factor_signals[fid].ravel() for fid in ids - ]) # (T*N, K) - y = returns.ravel() # (T*N,) - - # Drop NaN rows - valid = np.all(np.isfinite(X), axis=1) & np.isfinite(y) - return ids, X[valid], y[valid] - - @staticmethod - def _composite_icir( - factor_signals: Dict[int, np.ndarray], - selected_ids: List[int], - returns: np.ndarray, - ) -> float: - """Compute ICIR of the equal-weight composite of selected factors. - - IC is the cross-sectional Spearman rank correlation between the - composite signal and forward returns at each time step. ICIR is - mean(IC) / std(IC). - - Returns 0.0 if computation fails or std is zero. - """ - if not selected_ids: - return 0.0 - - signals = [] - for fid in selected_ids: - sig = factor_signals[fid].astype(np.float64) - cs_mean = np.nanmean(sig, axis=1, keepdims=True) - cs_std = np.nanstd(sig, axis=1, keepdims=True) - cs_std = np.where(cs_std == 0.0, 1.0, cs_std) - signals.append((sig - cs_mean) / cs_std) - - composite = np.nanmean(np.stack(signals, axis=0), axis=0) # (T, N) - - T = composite.shape[0] - ics = np.full(T, np.nan) - for t in range(T): - x = composite[t] - y = returns[t] - valid = np.isfinite(x) & np.isfinite(y) - if valid.sum() < 5: - continue - corr, _ = spearmanr(x[valid], y[valid]) - if np.isfinite(corr): - ics[t] = corr - - finite_ics = ics[np.isfinite(ics)] - if len(finite_ics) < 2: - return 0.0 - - ic_std = np.std(finite_ics, ddof=1) - if ic_std < 1e-12: - return 0.0 - - return float(np.mean(finite_ics) / ic_std) diff --git a/src/factorminer/factorminer/evaluation/significance.py b/src/factorminer/factorminer/evaluation/significance.py deleted file mode 100644 index 70b2a38..0000000 --- a/src/factorminer/factorminer/evaluation/significance.py +++ /dev/null @@ -1,495 +0,0 @@ -"""Statistical significance testing for alpha factors. - -Provides block bootstrap confidence intervals, Benjamini-Hochberg FDR -control, and Deflated Sharpe Ratio (Bailey & López de Prado, 2014) to -guard against data-snooping and multiple-testing bias in factor research. -""" - -from __future__ import annotations - -import math -from dataclasses import dataclass, field -from typing import Dict, Optional, Tuple - -import numpy as np -from scipy.stats import norm, skew, kurtosis - - -# --------------------------------------------------------------------------- -# Configuration -# --------------------------------------------------------------------------- - -@dataclass -class SignificanceConfig: - """Configuration for all significance tests.""" - - enabled: bool = True - bootstrap_n_samples: int = 1000 - bootstrap_block_size: int = 20 - bootstrap_confidence: float = 0.95 - fdr_level: float = 0.05 - deflated_sharpe_enabled: bool = True - min_deflated_sharpe: float = 0.0 - seed: int = 42 - - -# --------------------------------------------------------------------------- -# Bootstrap CI -# --------------------------------------------------------------------------- - -@dataclass -class BootstrapCIResult: - """Result of a block bootstrap confidence interval for mean |IC|.""" - - factor_name: str - ic_mean: float - ci_lower: float - ci_upper: float - ic_std_boot: float - ci_excludes_zero: bool - - -class BootstrapICTester: - """Block bootstrap tester for IC series significance. - - Uses circular block bootstrap to preserve time-series autocorrelation - when constructing confidence intervals for mean |IC|. - - Parameters - ---------- - config : SignificanceConfig - Bootstrap parameters (n_samples, block_size, confidence, seed). - """ - - def __init__(self, config: SignificanceConfig) -> None: - self._config = config - self._rng = np.random.RandomState(config.seed) - - # ----- public API ----- - - def compute_ci( - self, factor_name: str, ic_series: np.ndarray - ) -> BootstrapCIResult: - """Compute block-bootstrap CI for mean |IC|. - - Parameters - ---------- - factor_name : str - Human-readable factor identifier. - ic_series : np.ndarray, shape (T,) - IC time series (NaN entries are dropped before resampling). - - Returns - ------- - BootstrapCIResult - """ - valid = ic_series[~np.isnan(ic_series)] - T = len(valid) - if T == 0: - return BootstrapCIResult( - factor_name=factor_name, - ic_mean=0.0, - ci_lower=0.0, - ci_upper=0.0, - ic_std_boot=0.0, - ci_excludes_zero=False, - ) - - abs_valid = np.abs(valid) - ic_mean = float(np.mean(abs_valid)) - - boot_means = self._block_bootstrap_means(abs_valid) - - alpha = 1.0 - self._config.bootstrap_confidence - ci_lower = float(np.percentile(boot_means, 100 * alpha / 2)) - ci_upper = float(np.percentile(boot_means, 100 * (1 - alpha / 2))) - ic_std_boot = float(np.std(boot_means, ddof=1)) - - return BootstrapCIResult( - factor_name=factor_name, - ic_mean=ic_mean, - ci_lower=ci_lower, - ci_upper=ci_upper, - ic_std_boot=ic_std_boot, - ci_excludes_zero=ci_lower > 0, - ) - - def compute_p_value(self, ic_series: np.ndarray) -> float: - """Estimate a two-sided p-value for non-zero mean IC. - - Uses a sign-flip randomization test on the observed IC series. - Under the null of no predictive signal, flipping the sign of each - period's IC leaves the distribution unchanged while preserving the - magnitude structure of the observed sample. - """ - valid = ic_series[~np.isnan(ic_series)] - T = len(valid) - if T == 0: - return 1.0 - - observed = float(abs(np.mean(valid))) - if observed < 1e-15: - return 1.0 - - null_means = np.empty(self._config.bootstrap_n_samples, dtype=np.float64) - for i in range(self._config.bootstrap_n_samples): - signs = self._rng.choice((-1.0, 1.0), size=T) - null_means[i] = abs(float(np.mean(valid * signs))) - - exceedances = int(np.sum(null_means >= observed)) - return float((exceedances + 1) / (len(null_means) + 1)) - - # ----- internals ----- - - def _effective_block_size(self, T: int) -> int: - """Adaptive block size: min(configured, T // 10), at least 1.""" - bs = self._config.bootstrap_block_size - adaptive = max(T // 10, 1) - return min(bs, adaptive) - - def _block_bootstrap_means(self, series: np.ndarray) -> np.ndarray: - """Generate bootstrap distribution of the sample mean. - - Parameters - ---------- - series : np.ndarray, shape (T,) - Already cleaned (no NaN) series values. - - Returns - ------- - np.ndarray, shape (n_samples,) - Bootstrap sample means. - """ - T = len(series) - block_size = self._effective_block_size(T) - n_blocks = int(math.ceil(T / block_size)) - n_samples = self._config.bootstrap_n_samples - - boot_means = np.empty(n_samples, dtype=np.float64) - max_start = T - block_size # last valid block start - - for i in range(n_samples): - # Sample block start indices with replacement - starts = self._rng.randint(0, max_start + 1, size=n_blocks) - # Concatenate blocks and truncate to length T - indices = np.concatenate( - [np.arange(s, s + block_size) for s in starts] - )[:T] - boot_means[i] = series[indices].mean() - - return boot_means - - -# --------------------------------------------------------------------------- -# FDR Control (Benjamini-Hochberg) -# --------------------------------------------------------------------------- - -@dataclass -class FDRResult: - """Result of Benjamini-Hochberg FDR correction.""" - - raw_p_values: Dict[str, float] - adjusted_p_values: Dict[str, float] - significant: Dict[str, bool] - n_discoveries: int - fdr_level: float - - -class FDRController: - """Benjamini-Hochberg FDR correction for multiple factor testing. - - Parameters - ---------- - config : SignificanceConfig - """ - - def __init__(self, config: SignificanceConfig) -> None: - self._config = config - - def apply_fdr(self, p_values: Dict[str, float]) -> FDRResult: - """Apply Benjamini-Hochberg procedure. - - Parameters - ---------- - p_values : Dict[str, float] - Mapping of factor_name -> raw p-value. - - Returns - ------- - FDRResult - """ - if not p_values: - return FDRResult( - raw_p_values={}, - adjusted_p_values={}, - significant={}, - n_discoveries=0, - fdr_level=self._config.fdr_level, - ) - - names = list(p_values.keys()) - raw = np.array([p_values[n] for n in names], dtype=np.float64) - m = len(raw) - - # Sort ascending - order = np.argsort(raw) - sorted_raw = raw[order] - - # BH adjusted p-values: p_adj[i] = min(p[i] * m / (i+1), 1.0) - adjusted = np.empty(m, dtype=np.float64) - for idx in range(m): - rank = idx + 1 # 1-indexed rank - adjusted[idx] = min(sorted_raw[idx] * m / rank, 1.0) - - # Enforce monotonicity from bottom up - for idx in range(m - 2, -1, -1): - adjusted[idx] = min(adjusted[idx], adjusted[idx + 1]) - - # Map back to original order - inv_order = np.empty(m, dtype=int) - inv_order[order] = np.arange(m) - adjusted_orig = adjusted[inv_order] - - adjusted_dict: Dict[str, float] = {} - significant_dict: Dict[str, bool] = {} - for i, name in enumerate(names): - adjusted_dict[name] = float(adjusted_orig[i]) - significant_dict[name] = adjusted_orig[i] <= self._config.fdr_level - - return FDRResult( - raw_p_values=dict(p_values), - adjusted_p_values=adjusted_dict, - significant=significant_dict, - n_discoveries=sum(significant_dict.values()), - fdr_level=self._config.fdr_level, - ) - - def batch_evaluate( - self, - ic_series_map: Dict[str, np.ndarray], - bootstrap_tester: BootstrapICTester, - ) -> FDRResult: - """Compute bootstrap p-values for all factors, then apply BH. - - Parameters - ---------- - ic_series_map : Dict[str, np.ndarray] - Mapping of factor_name -> IC series (T,). - bootstrap_tester : BootstrapICTester - - Returns - ------- - FDRResult - """ - p_values: Dict[str, float] = {} - for name, ic_series in ic_series_map.items(): - p_values[name] = bootstrap_tester.compute_p_value(ic_series) - return self.apply_fdr(p_values) - - -# --------------------------------------------------------------------------- -# Deflated Sharpe Ratio -# --------------------------------------------------------------------------- - -@dataclass -class DeflatedSharpeResult: - """Result of Deflated Sharpe Ratio test.""" - - factor_name: str - raw_sharpe: float - deflated_sharpe: float - haircut: float - p_value: float - n_trials: int - passes: bool - - -class DeflatedSharpeCalculator: - """Deflated Sharpe Ratio (Bailey & López de Prado, 2014). - - Adjusts the observed Sharpe Ratio for multiple testing by estimating - the expected maximum Sharpe under the null hypothesis of zero skill - across *n_trials* independent strategies. - - Parameters - ---------- - config : SignificanceConfig - """ - - _EULER_GAMMA = 0.5772156649015329 - - def __init__(self, config: SignificanceConfig) -> None: - self._config = config - - def compute( - self, - factor_name: str, - ls_returns: np.ndarray, - n_trials: int, - annualization_factor: float = 252.0, - ) -> DeflatedSharpeResult: - """Compute the Deflated Sharpe Ratio for a factor's L/S returns. - - Parameters - ---------- - factor_name : str - ls_returns : np.ndarray, shape (T,) - Long-short portfolio return series (NaN-free expected). - n_trials : int - Total number of strategy trials (including this one). - annualization_factor : float - Trading periods per year (default 252). - - Returns - ------- - DeflatedSharpeResult - """ - valid = ls_returns[~np.isnan(ls_returns)] - T = len(valid) - - if T < 10 or n_trials < 1: - return DeflatedSharpeResult( - factor_name=factor_name, - raw_sharpe=0.0, - deflated_sharpe=0.0, - haircut=0.0, - p_value=1.0, - n_trials=n_trials, - passes=False, - ) - - # Annualised Sharpe - mean_r = float(np.mean(valid)) - std_r = float(np.std(valid, ddof=1)) - if std_r < 1e-15: - return DeflatedSharpeResult( - factor_name=factor_name, - raw_sharpe=0.0, - deflated_sharpe=0.0, - haircut=0.0, - p_value=1.0, - n_trials=n_trials, - passes=False, - ) - - SR = (mean_r / std_r) * math.sqrt(annualization_factor) - - # Expected maximum SR under the null (Bailey & LdP, 2014) - e_max_sr = self._expected_max_sr(n_trials) - - # Higher moments of returns - gamma3 = float(skew(valid, bias=False)) - gamma4 = float(kurtosis(valid, fisher=True, bias=False)) # excess kurtosis - - # Variance correction incorporating skewness and kurtosis - var_correction = (1.0 - gamma3 * SR + (gamma4 - 1.0) / 4.0 * SR ** 2) / T - - if var_correction <= 0: - deflated_sr = 0.0 - else: - deflated_sr = (SR - e_max_sr) / math.sqrt(var_correction) - - p_value = 1.0 - float(norm.cdf(deflated_sr)) - haircut = SR - deflated_sr - - passes = ( - deflated_sr > self._config.min_deflated_sharpe and p_value < 0.05 - ) - - return DeflatedSharpeResult( - factor_name=factor_name, - raw_sharpe=SR, - deflated_sharpe=deflated_sr, - haircut=haircut, - p_value=p_value, - n_trials=n_trials, - passes=passes, - ) - - @classmethod - def _expected_max_sr(cls, n_trials: int) -> float: - """E[max(SR)] approximation from Bailey & López de Prado (2014). - - E[max(SR)] ~ sqrt(2*ln(N)) * (1 - gamma / (2*ln(N))) + gamma / sqrt(2*ln(N)) - """ - if n_trials <= 1: - return 0.0 - log_n = math.log(n_trials) - sqrt_2log = math.sqrt(2.0 * log_n) - g = cls._EULER_GAMMA - return sqrt_2log * (1.0 - g / (2.0 * log_n)) + g / sqrt_2log - - -# --------------------------------------------------------------------------- -# Convenience entry point -# --------------------------------------------------------------------------- - -def check_significance( - factor_name: str, - ic_series: np.ndarray, - ls_returns: np.ndarray, - n_total_trials: int, - config: Optional[SignificanceConfig] = None, -) -> Tuple[bool, Optional[str], Dict]: - """Run all significance checks on a single factor. - - Executes bootstrap CI, bootstrap p-value, and (optionally) the - Deflated Sharpe Ratio test. Returns an overall pass/fail verdict - with a human-readable rejection reason. - - Parameters - ---------- - factor_name : str - ic_series : np.ndarray, shape (T,) - ls_returns : np.ndarray, shape (T,) - n_total_trials : int - Total number of factor trials (for DSR correction). - config : SignificanceConfig, optional - If *None*, defaults are used. - - Returns - ------- - Tuple[bool, Optional[str], Dict] - (passes, rejection_reason, details) - *passes* is True when all enabled tests succeed. - *rejection_reason* is None when *passes* is True. - *details* contains per-test result objects. - """ - if config is None: - config = SignificanceConfig() - - if not config.enabled: - return True, None, {"skipped": True} - - details: Dict = {} - - # -- Bootstrap IC CI / p-value -- - bt = BootstrapICTester(config) - ci_result = bt.compute_ci(factor_name, ic_series) - details["bootstrap_ci"] = ci_result - p_value = bt.compute_p_value(ic_series) - details["bootstrap_p_value"] = p_value - - if p_value > config.fdr_level: - return ( - False, - f"Bootstrap p-value {p_value:.4f} exceeds alpha {config.fdr_level:.4f}", - details, - ) - - # -- Deflated Sharpe Ratio -- - if config.deflated_sharpe_enabled: - dsr = DeflatedSharpeCalculator(config) - dsr_result = dsr.compute(factor_name, ls_returns, n_total_trials) - details["deflated_sharpe"] = dsr_result - - if not dsr_result.passes: - return ( - False, - f"Deflated Sharpe test failed: DSR={dsr_result.deflated_sharpe:.3f}, " - f"p={dsr_result.p_value:.4f}, haircut={dsr_result.haircut:.3f} " - f"(n_trials={n_total_trials})", - details, - ) - - return True, None, details diff --git a/src/factorminer/factorminer/evaluation/transaction_costs.py b/src/factorminer/factorminer/evaluation/transaction_costs.py deleted file mode 100644 index b13a6f1..0000000 --- a/src/factorminer/factorminer/evaluation/transaction_costs.py +++ /dev/null @@ -1,539 +0,0 @@ -"""Transaction cost models for realistic P&L computation. - -Implements the Almgren-Chriss (2001) market impact framework, bid-ask -slippage, commissions, and A-share specific taxes. All costs are expressed -in basis points (bps) unless explicitly noted. - -References ----------- -Almgren, R. & Chriss, N. (2001). Optimal execution of portfolio transactions. - Journal of Risk, 3(2), 5-39. -Kissell, R. (2013). The Science of Algorithmic Trading and Portfolio Management. - Academic Press. -""" - -from __future__ import annotations - -from dataclasses import dataclass, field -from typing import Dict, List, Optional - -import numpy as np - - -# --------------------------------------------------------------------------- -# Result containers -# --------------------------------------------------------------------------- - -@dataclass -class TradingCosts: - """Aggregated transaction costs for a single rebalance event. - - All monetary values are in basis points (1 bps = 0.01%) of notional. - - Attributes - ---------- - market_impact_bps : float - Almgren-Chriss permanent + temporary impact, portfolio-level weighted - average in bps. - slippage_bps : float - Bid-ask spread crossing cost (half-spread * urgency) in bps. - commission_bps : float - Broker commission in bps (round-trip, both legs included). - stamp_duty_bps : float - Stamp duty levied on the sell leg only (A-shares: 1 bps). - total_bps : float - Sum of all cost components. - turnover : float - Fraction of portfolio traded this rebalance, in [0, 2]. - 0 = no trading, 1 = full one-way rebalance, 2 = full round-trip. - details : dict - Per-asset breakdown and intermediate quantities. - """ - - market_impact_bps: float - slippage_bps: float - commission_bps: float - stamp_duty_bps: float - total_bps: float - turnover: float - details: Dict = field(default_factory=dict) - - -# --------------------------------------------------------------------------- -# Market impact model (Almgren-Chriss) -# --------------------------------------------------------------------------- - -class MarketImpactModel: - """Almgren-Chriss (2001) market impact model. - - The model decomposes total market impact into: - - Permanent impact (price pressure lasting beyond the trade window):: - - g(v) = lambda_perm * sigma * sign(v) * |v/ADV|^alpha - - where alpha = 0.5 (square-root law, empirically supported for equities). - - Temporary impact (within-trade reversion; instantaneous cost):: - - h(v) = eta_temp * sigma * (v / ADV) - - Both components are expressed as fractions of price. Multiply by 1e4 - to convert to basis points. - - Parameters - ---------- - lambda_perm : float - Permanent impact coefficient. Default 0.1 (market-standard calibration - for liquid A-shares; Almgren et al. 2005, "Direct Estimation of Equity - Market Impact"). - eta_temp : float - Temporary impact coefficient. Default 0.01. - alpha : float - Power-law exponent for permanent impact. Default 0.5 (square-root). - """ - - def __init__( - self, - lambda_perm: float = 0.1, - eta_temp: float = 0.01, - alpha: float = 0.5, - ) -> None: - if lambda_perm < 0: - raise ValueError("lambda_perm must be >= 0") - if eta_temp < 0: - raise ValueError("eta_temp must be >= 0") - if not (0.0 < alpha <= 1.0): - raise ValueError("alpha must be in (0, 1]") - - self.lambda_perm = float(lambda_perm) - self.eta_temp = float(eta_temp) - self.alpha = float(alpha) - - # ------------------------------------------------------------------ - def compute_impact( - self, - trade_size: np.ndarray, - adv: np.ndarray, - volatility: np.ndarray, - direction: np.ndarray, - ) -> np.ndarray: - """Compute total Almgren-Chriss impact for a batch of trades. - - Parameters - ---------- - trade_size : ndarray of shape (M,) - Absolute trade size for each asset (shares or notional units). - adv : ndarray of shape (M,) - Average daily volume (same units as trade_size). - volatility : ndarray of shape (M,) - Per-asset annualized volatility (e.g., 0.25 = 25%). - direction : ndarray of shape (M,) - +1.0 for buys, -1.0 for sells. Absolute values used for - magnitude; sign determines direction for permanent component. - - Returns - ------- - ndarray of shape (M,) - Total market impact in basis points per asset. Zero for assets - with zero trade size or zero ADV. - """ - trade_size = np.asarray(trade_size, dtype=np.float64) - adv = np.asarray(adv, dtype=np.float64) - volatility = np.asarray(volatility, dtype=np.float64) - direction = np.asarray(direction, dtype=np.float64) - - # Participation rate: what fraction of ADV are we trading - participation = np.where(adv > 0, trade_size / adv, 0.0) - - # Permanent impact: lambda * sigma * participation^alpha - # Ref: Almgren (2001) eq. (2.4) – permanent impact g(v) = lambda * sigma * |x|^alpha - permanent = ( - self.lambda_perm - * volatility - * np.sign(direction) - * np.power(np.abs(participation), self.alpha) - ) - - # Temporary impact: eta * sigma * participation - # Ref: Almgren (2001) eq. (2.5) – temporary impact h(v) = eta * v/ADV - temporary = self.eta_temp * volatility * np.abs(participation) - - # Total impact (fractional), convert to bps - total_bps = (np.abs(permanent) + temporary) * 1e4 - - return total_bps - - -# --------------------------------------------------------------------------- -# Slippage model (bid-ask spread) -# --------------------------------------------------------------------------- - -class SlippageModel: - """Bid-ask spread slippage model. - - Each trade crosses the bid-ask spread. For a patient order (urgency=0) - we assume the order rests on the book and earns half-spread; for an - aggressive market order (urgency=1) we pay the full spread. In practice, - intraday algo execution sits around urgency=0.5. - - Per-trade slippage cost:: - - slippage = spread_bps * urgency - - For a round-trip (buy + sell) this doubles. The caller is responsible - for applying round-trip scaling. - - Default spreads for 10-min A-share bars: - - Liquid large-caps (CSI 300): 2-3 bps - - Mid-cap (CSI 500): 3-5 bps - - Small-cap: 5-10 bps - """ - - def __init__(self, default_spread_bps: float = 3.0) -> None: - """ - Parameters - ---------- - default_spread_bps : float - Fallback spread used when per-asset spreads are not supplied. - """ - self.default_spread_bps = float(default_spread_bps) - - # ------------------------------------------------------------------ - def compute_slippage( - self, - trade_size: np.ndarray, - spread_bps: Optional[np.ndarray] = None, - urgency: float = 0.5, - ) -> np.ndarray: - """Compute one-way slippage for a set of trades. - - Parameters - ---------- - trade_size : ndarray of shape (M,) - Trade sizes (used to identify which assets are actively traded; - zero-size trades incur no slippage). - spread_bps : ndarray of shape (M,) or None - Per-asset effective bid-ask spread in bps. Falls back to - ``default_spread_bps`` if None. - urgency : float - Urgency scalar in [0, 1]. 0 = fully patient (resting orders), - 1 = aggressive (market orders). Default 0.5. - - Returns - ------- - ndarray of shape (M,) - One-way slippage cost in bps per asset. - """ - if not (0.0 <= urgency <= 1.0): - raise ValueError("urgency must be in [0, 1]") - - trade_size = np.asarray(trade_size, dtype=np.float64) - M = len(trade_size) - - if spread_bps is None: - sp = np.full(M, self.default_spread_bps) - else: - sp = np.asarray(spread_bps, dtype=np.float64) - - # Only traded assets incur slippage - traded = trade_size > 0 - cost = np.where(traded, sp * urgency, 0.0) - - return cost - - -# --------------------------------------------------------------------------- -# Aggregated transaction cost calculator -# --------------------------------------------------------------------------- - -class TransactionCostCalculator: - """Aggregate all transaction cost components for a portfolio rebalance. - - Components modelled - ------------------- - 1. **Market impact** – Almgren-Chriss permanent + temporary impact. - 2. **Bid-ask slippage** – half-spread crossing cost. - 3. **Commission** – fixed per-side brokerage fee. - 4. **Stamp duty** – sell-side stamp duty (A-shares only). - 5. **Financing cost** – overnight leverage cost (when applicable). - - Market defaults (A-shares, 10-min bars) - ---------------------------------------- - * Commission: 2 bps per side (4 bps round-trip for institutional). - * Stamp duty: 1 bps on the sell side only. - * Spread: 3 bps (CSI 500 universe average). - * All-in round-trip cost at modest size: ~8 bps (consistent with - HelixFactor benchmark config ``benchmark.cost_bps`` sweep). - - Crypto defaults - --------------- - * Commission: 0.5 bps maker / 1.5 bps taker → 2 bps per side. - * No stamp duty. - * Spread: 1-2 bps for top-20 pairs. - - Parameters - ---------- - impact_model : MarketImpactModel, optional - Custom Almgren-Chriss model. Defaults to standard parameterisation. - slippage_model : SlippageModel, optional - Custom slippage model. Defaults to standard parameterisation. - commission_bps : float - One-way broker commission in bps. Default 2 bps. - stamp_duty_bps : float - Sell-side stamp duty in bps. Default 1 bps (A-shares). - overnight_rate_annual : float - Annualised financing rate for leveraged positions. Default 0.0 - (no leverage). - bars_per_year : float - Number of bars per year used to convert overnight rate to per-bar. - Default 252 * 24 = 6048 (10-min bars, 4-hour A-share session). - """ - - def __init__( - self, - impact_model: Optional[MarketImpactModel] = None, - slippage_model: Optional[SlippageModel] = None, - commission_bps: float = 2.0, - stamp_duty_bps: float = 1.0, - overnight_rate_annual: float = 0.0, - bars_per_year: float = 252.0 * 24.0, - ) -> None: - self.impact_model = impact_model or MarketImpactModel() - self.slippage_model = slippage_model or SlippageModel() - self.commission_bps = float(commission_bps) - self.stamp_duty_bps = float(stamp_duty_bps) - self.overnight_rate_annual = float(overnight_rate_annual) - self.bars_per_year = float(bars_per_year) - - # ------------------------------------------------------------------ - def compute_total_cost( - self, - old_weights: np.ndarray, - new_weights: np.ndarray, - adv: np.ndarray, - volatility: np.ndarray, - portfolio_value: float, - market: str = 'ashare', - spread_bps: Optional[np.ndarray] = None, - urgency: float = 0.5, - ) -> TradingCosts: - """Compute all-in transaction costs for a single rebalance event. - - The portfolio transitions from ``old_weights`` to ``new_weights``. - Weights are signed: positive = long, negative = short. Their sum - need not be 1 (allows cash + leverage). - - Parameters - ---------- - old_weights : ndarray of shape (M,) - Current (pre-trade) portfolio weights per asset. - new_weights : ndarray of shape (M,) - Target (post-trade) portfolio weights per asset. - adv : ndarray of shape (M,) - Average daily volume per asset in notional (same currency as - ``portfolio_value``). - volatility : ndarray of shape (M,) - Per-asset annualized volatility (e.g. 0.30 = 30%). - portfolio_value : float - Total portfolio NAV in notional currency. - market : str - ``'ashare'`` or ``'crypto'``. Controls stamp duty defaults. - spread_bps : ndarray of shape (M,), optional - Per-asset bid-ask spread in bps. Falls back to model default. - urgency : float - Execution urgency in [0, 1]. - - Returns - ------- - TradingCosts - Fully decomposed cost object. - """ - old_weights = np.asarray(old_weights, dtype=np.float64) - new_weights = np.asarray(new_weights, dtype=np.float64) - adv = np.asarray(adv, dtype=np.float64) - volatility = np.asarray(volatility, dtype=np.float64) - - # Weight deltas and trade notional - delta_weights = new_weights - old_weights # signed - trade_notional = np.abs(delta_weights) * portfolio_value # always >= 0 - trade_direction = np.sign(delta_weights) # +1 buy, -1 sell - - # One-way turnover: sum of absolute weight changes, divided by 2 to - # avoid double-counting buys and sells for a fully-funded portfolio. - # Convention: turnover in [0, 1] for a single rebalance (0=no trade, - # 1=100% of portfolio turned over on one side). - turnover = float(np.sum(np.abs(delta_weights)) / 2.0) - - # ---------------------------------------------------------------- - # 1. Market impact (Almgren-Chriss) - # ---------------------------------------------------------------- - impact_bps_per_asset = self.impact_model.compute_impact( - trade_size=trade_notional, - adv=adv, - volatility=volatility, - direction=trade_direction, - ) - # Portfolio-level impact = notional-weighted average across traded assets - total_trade_notional = float(np.sum(trade_notional)) - if total_trade_notional > 1e-12: - impact_bps = float( - np.sum(impact_bps_per_asset * trade_notional) / total_trade_notional - ) - else: - impact_bps = 0.0 - - # ---------------------------------------------------------------- - # 2. Bid-ask slippage - # ---------------------------------------------------------------- - slippage_bps_per_asset = self.slippage_model.compute_slippage( - trade_size=trade_notional, - spread_bps=spread_bps, - urgency=urgency, - ) - if total_trade_notional > 1e-12: - slippage_bps = float( - np.sum(slippage_bps_per_asset * trade_notional) / total_trade_notional - ) - else: - slippage_bps = 0.0 - - # ---------------------------------------------------------------- - # 3. Commission (both buy and sell legs) - # ---------------------------------------------------------------- - # Commission applies to both sides of each trade (enter + exit). - # Here we apply it per side (once now + once on exit = round-trip). - # For a rebalance we pay commission on the traded notional. - commission_bps = self.commission_bps # per-side, applied to traded notional - - # ---------------------------------------------------------------- - # 4. Stamp duty (sell side only) - # ---------------------------------------------------------------- - effective_stamp = 0.0 - if market == 'ashare': - # Identify sell trades: delta_weight < 0 (reducing long) or - # delta_weight > 0 but old position was short (increasing short sell). - # Simplified: stamp duty on any reduction of long exposure. - sell_notional = np.sum(trade_notional[delta_weights < 0]) - if total_trade_notional > 1e-12: - sell_fraction = sell_notional / total_trade_notional - effective_stamp = self.stamp_duty_bps * sell_fraction - # crypto: no stamp duty - - # ---------------------------------------------------------------- - # 5. Financing cost (one bar's worth of overnight carry) - # ---------------------------------------------------------------- - # Per-bar financing cost on leveraged portion. For a bar-length h: - # financing_cost = leverage * overnight_rate_annual / bars_per_year - if self.overnight_rate_annual > 0: - leverage = max( - float(np.sum(np.abs(new_weights))) - 1.0, 0.0 - ) # excess over 1x - financing_bps = ( - leverage - * self.overnight_rate_annual - / self.bars_per_year - * 1e4 - ) - else: - financing_bps = 0.0 - - # ---------------------------------------------------------------- - # Aggregate - # ---------------------------------------------------------------- - total_bps = ( - impact_bps - + slippage_bps - + commission_bps - + effective_stamp - + financing_bps - ) - - details = { - "impact_bps_per_asset": impact_bps_per_asset, - "slippage_bps_per_asset": slippage_bps_per_asset, - "delta_weights": delta_weights, - "trade_notional": trade_notional, - "financing_bps": financing_bps, - "sell_stamp_bps": effective_stamp, - } - - return TradingCosts( - market_impact_bps=impact_bps, - slippage_bps=slippage_bps, - commission_bps=commission_bps, - stamp_duty_bps=effective_stamp, - total_bps=total_bps, - turnover=turnover, - details=details, - ) - - # ------------------------------------------------------------------ - @classmethod - def for_ashare( - cls, - lambda_perm: float = 0.1, - eta_temp: float = 0.01, - commission_bps: float = 2.0, - stamp_duty_bps: float = 1.0, - default_spread_bps: float = 3.0, - ) -> "TransactionCostCalculator": - """Convenience constructor with A-share defaults. - - All-in round-trip cost at low turnover ≈ 7-9 bps, consistent with - the HelixFactor benchmark sweep range. - - Parameters - ---------- - lambda_perm : float - Permanent impact coefficient (default 0.1). - eta_temp : float - Temporary impact coefficient (default 0.01). - commission_bps : float - One-way commission (default 2 bps). - stamp_duty_bps : float - Sell-side stamp duty (default 1 bps; CSRC mandated since 2023). - default_spread_bps : float - Default spread for assets without explicit spread data. - """ - return cls( - impact_model=MarketImpactModel( - lambda_perm=lambda_perm, - eta_temp=eta_temp, - ), - slippage_model=SlippageModel(default_spread_bps=default_spread_bps), - commission_bps=commission_bps, - stamp_duty_bps=stamp_duty_bps, - ) - - @classmethod - def for_crypto( - cls, - lambda_perm: float = 0.05, - eta_temp: float = 0.005, - commission_bps: float = 1.0, - default_spread_bps: float = 1.5, - ) -> "TransactionCostCalculator": - """Convenience constructor with crypto exchange defaults. - - Parameters - ---------- - lambda_perm : float - Permanent impact coefficient. Lower than A-shares due to - continuous 24/7 liquidity provision. - eta_temp : float - Temporary impact coefficient. - commission_bps : float - Maker/taker blended commission (default 1 bps). - default_spread_bps : float - Default effective spread for top-20 pairs (default 1.5 bps). - """ - return cls( - impact_model=MarketImpactModel( - lambda_perm=lambda_perm, - eta_temp=eta_temp, - ), - slippage_model=SlippageModel(default_spread_bps=default_spread_bps), - commission_bps=commission_bps, - stamp_duty_bps=0.0, # no stamp duty on crypto - ) diff --git a/src/factorminer/factorminer/memory/__init__.py b/src/factorminer/factorminer/memory/__init__.py deleted file mode 100644 index 0dfe551..0000000 --- a/src/factorminer/factorminer/memory/__init__.py +++ /dev/null @@ -1,84 +0,0 @@ -"""Experience memory system for mining loop feedback. - -Implements the memory M = {S, P_succ, P_fail, I} with operators: -- F(M, tau): Memory Formation - extract experience from mining trajectory -- E(M, M_form): Memory Evolution - consolidate and prune memory -- R(M, L): Memory Retrieval - context-dependent retrieval for LLM prompts - -Phase 2 additions: -- Knowledge Graph: factor lineage and structural analysis -- Embeddings: semantic formula similarity and deduplication -- Enhanced Retrieval: KG + embedding augmented retrieval -""" - -from src.factorminer.factorminer.memory.memory_store import ( - ExperienceMemory, - ForbiddenDirection, - MiningState, - StrategicInsight, - SuccessPattern, -) -from src.factorminer.factorminer.memory.formation import form_memory -from src.factorminer.factorminer.memory.evolution import evolve_memory -from src.factorminer.factorminer.memory.retrieval import retrieve_memory -from src.factorminer.factorminer.memory.experience_memory import ExperienceMemoryManager - -# Phase 2: Optional imports (graceful if dependencies missing) -try: - from factorminer.memory.knowledge_graph import ( - FactorKnowledgeGraph, - FactorNode, - EdgeType, - ) -except ImportError: - pass - -try: - from factorminer.memory.embeddings import FormulaEmbedder -except ImportError: - pass - -try: - from factorminer.memory.kg_retrieval import retrieve_memory_enhanced -except ImportError: - pass - -try: - from factorminer.memory.online_regime_memory import ( - OnlineRegimeMemory, - OnlineMemoryUpdater, - RegimeSpecificPatternStore, - RegimeTransitionForecaster, - MemoryForgetCurve, - ) -except ImportError: - pass - -__all__ = [ - # Data structures - "ExperienceMemory", - "MiningState", - "SuccessPattern", - "ForbiddenDirection", - "StrategicInsight", - # Operators - "form_memory", - "evolve_memory", - "retrieve_memory", - # Manager - "ExperienceMemoryManager", - # Phase 2: Knowledge Graph - "FactorKnowledgeGraph", - "FactorNode", - "EdgeType", - # Phase 2: Embeddings - "FormulaEmbedder", - # Phase 2: Enhanced Retrieval - "retrieve_memory_enhanced", - # Phase 2: Online Regime Memory - "OnlineRegimeMemory", - "OnlineMemoryUpdater", - "RegimeSpecificPatternStore", - "RegimeTransitionForecaster", - "MemoryForgetCurve", -] diff --git a/src/factorminer/factorminer/memory/embeddings.py b/src/factorminer/factorminer/memory/embeddings.py deleted file mode 100644 index d0d6073..0000000 --- a/src/factorminer/factorminer/memory/embeddings.py +++ /dev/null @@ -1,392 +0,0 @@ -"""Semantic formula embeddings for factor similarity and deduplication. - -Converts DSL formulas into natural language descriptions and encodes -them as dense vectors. Supports: -- sentence-transformers for high-quality embeddings (optional) -- FAISS for fast k-NN search (optional) -- TF-IDF fallback when sentence-transformers is unavailable -- Brute-force cosine fallback when FAISS is unavailable -""" - -from __future__ import annotations - -import re -from typing import Dict, List, Optional, Tuple - -import numpy as np - -# Optional dependency flags -- resolved at runtime -_has_sentence_transformers = False -_has_faiss = False -_has_sklearn = False - -try: - from sentence_transformers import SentenceTransformer # type: ignore[import-untyped] - - _has_sentence_transformers = True -except ImportError: - SentenceTransformer = None # type: ignore[assignment,misc] - -try: - import faiss # type: ignore[import-untyped] - - _has_faiss = True -except ImportError: - faiss = None # type: ignore[assignment] - -try: - from sklearn.feature_extraction.text import TfidfVectorizer # type: ignore[import-untyped] - - _has_sklearn = True -except ImportError: - TfidfVectorizer = None # type: ignore[assignment] - - -# --------------------------------------------------------------------------- -# Operator name -> natural-language expansion table -# --------------------------------------------------------------------------- - -_OPERATOR_EXPANSIONS: Dict[str, str] = { - # Arithmetic - "Add": "addition", - "Sub": "subtraction", - "Mul": "multiplication", - "Div": "division", - "Neg": "negation", - "Abs": "absolute value", - "Log": "logarithm", - "Sqrt": "square root", - "Power": "power", - "Sign": "sign", - "Max": "maximum", - "Min": "minimum", - # Rolling / time-series - "Mean": "rolling mean", - "Median": "rolling median", - "Std": "rolling standard deviation", - "Var": "rolling variance", - "Skew": "rolling skewness", - "Kurt": "rolling kurtosis", - "Sum": "rolling sum", - "TsMax": "time-series maximum", - "TsMin": "time-series minimum", - "TsRank": "time-series rank", - "TsArgMax": "time-series argmax", - "TsArgMin": "time-series argmin", - "Delta": "change over period", - "Delay": "lagged value", - "Return": "return over period", - "Corr": "rolling correlation", - "Cov": "rolling covariance", - "TsLinRegSlope": "linear regression slope", - "TsLinRegResid": "linear regression residual", - "TsLinRegIntercept": "linear regression intercept", - # Smoothing - "EMA": "exponential moving average", - "WMA": "weighted moving average", - "SMA": "simple moving average", - "DEMA": "double exponential moving average", - # Cross-sectional - "CsRank": "cross-sectional rank", - "CsZScore": "cross-sectional z-score", - "CsDemean": "cross-sectional demeaning", - "CsScale": "cross-sectional scaling", - # Logical / conditional - "IfElse": "conditional selection", - "Greater": "greater-than comparison", - "Less": "less-than comparison", - "Equal": "equality comparison", - "And": "logical and", - "Or": "logical or", - "Not": "logical not", -} - -# Feature name -> natural-language -_FEATURE_EXPANSIONS: Dict[str, str] = { - "$close": "close price", - "$open": "open price", - "$high": "high price", - "$low": "low price", - "$volume": "volume", - "$amt": "turnover amount", - "$vwap": "volume-weighted average price", - "$returns": "returns", -} - - -class FormulaEmbedder: - """Embed DSL formulas as dense vectors for similarity search. - - Parameters - ---------- - model_name : str - Name of a sentence-transformers model (used only when the - library is installed). - use_faiss : bool - Whether to use FAISS for approximate nearest-neighbour search. - Falls back to brute-force cosine similarity if unavailable. - """ - - def __init__( - self, - model_name: str = "all-MiniLM-L6-v2", - use_faiss: bool = True, - ) -> None: - self._model_name = model_name - self._use_faiss = use_faiss and _has_faiss - - # Lazy-loaded model / vectoriser - self._model: Optional[SentenceTransformer] = None # type: ignore[type-arg] - self._tfidf: Optional[TfidfVectorizer] = None # type: ignore[type-arg] - self._tfidf_dirty: bool = False # whether TF-IDF needs refit - - # Cache: factor_id -> (embedding, text) - self._cache: Dict[str, Tuple[np.ndarray, str]] = {} - # Ordered list mirroring cache for FAISS index alignment - self._ids: List[str] = [] - - # FAISS index (rebuilt lazily) - self._index: Optional[object] = None - self._index_dirty: bool = True - - # ------------------------------------------------------------------ - # Public API - # ------------------------------------------------------------------ - - def embed(self, factor_id: str, formula: str) -> np.ndarray: - """Compute (or retrieve cached) embedding for a formula. - - Parameters - ---------- - factor_id : str - Unique identifier used for caching. - formula : str - DSL formula to embed. - - Returns - ------- - ndarray - Embedding vector (float32). - """ - if factor_id in self._cache: - return self._cache[factor_id][0] - - text = self._formula_to_text(formula) - vec = self._encode(text) - self._cache[factor_id] = (vec, text) - self._ids.append(factor_id) - self._index_dirty = True - self._tfidf_dirty = True - return vec - - def remove(self, factor_id: str) -> bool: - """Remove a cached embedding by factor id.""" - if factor_id not in self._cache: - return False - - self._cache.pop(factor_id, None) - self._ids = [fid for fid in self._ids if fid != factor_id] - self._index = None - self._index_dirty = True - self._tfidf_dirty = True - return True - - def clear(self) -> None: - """Clear all cached embeddings and search state.""" - self._cache.clear() - self._ids.clear() - self._index = None - self._index_dirty = True - self._tfidf = None - self._tfidf_dirty = False - - @property - def cache_size(self) -> int: - """Return the number of cached factor embeddings.""" - return len(self._cache) - - def find_nearest( - self, - formula: str, - k: int = 5, - ) -> List[Tuple[str, float]]: - """Find the *k* most similar cached formulas. - - Parameters - ---------- - formula : str - Query formula (does not need to be cached). - k : int - Number of neighbours to return. - - Returns - ------- - list of (factor_id, similarity) - Sorted by descending similarity. - """ - if not self._cache: - return [] - - query_vec = self._encode(self._formula_to_text(formula)) - k = min(k, len(self._cache)) - - if self._use_faiss and _has_faiss: - return self._faiss_search(query_vec, k) - return self._brute_force_search(query_vec, k) - - def is_semantic_duplicate( - self, - formula: str, - threshold: float = 0.92, - ) -> Optional[str]: - """Check if *formula* is a near-duplicate of a cached factor. - - Returns the factor_id of the most similar cached factor if the - cosine similarity exceeds *threshold*, or ``None``. - """ - results = self.find_nearest(formula, k=1) - if results and results[0][1] >= threshold: - return results[0][0] - return None - - # ------------------------------------------------------------------ - # Formula -> text conversion - # ------------------------------------------------------------------ - - @staticmethod - def _formula_to_text(formula: str) -> str: - """Convert a DSL formula into a natural-language description. - - Expands operator and feature names for better semantic matching. - """ - text = formula - - # Expand operators (longest-first to avoid partial matches) - for op in sorted(_OPERATOR_EXPANSIONS, key=len, reverse=True): - text = text.replace(op, _OPERATOR_EXPANSIONS[op]) - - # Expand features - for feat in _FEATURE_EXPANSIONS: - text = text.replace(feat, _FEATURE_EXPANSIONS[feat]) - - # Clean up punctuation into spaces - text = re.sub(r"[(),]+", " ", text) - text = re.sub(r"\s+", " ", text).strip() - return text.lower() - - # ------------------------------------------------------------------ - # Encoding backends - # ------------------------------------------------------------------ - - def _encode(self, text: str) -> np.ndarray: - """Encode a single text string into a unit-norm vector.""" - if _has_sentence_transformers: - return self._encode_transformer(text) - if _has_sklearn: - return self._encode_tfidf(text) - # Absolute fallback: hash-based bag of words - return self._encode_hash(text) - - def _encode_transformer(self, text: str) -> np.ndarray: - if self._model is None: - self._model = SentenceTransformer(self._model_name) - vec = self._model.encode(text, convert_to_numpy=True) - vec = np.asarray(vec, dtype=np.float32).flatten() - norm = np.linalg.norm(vec) - if norm > 0: - vec /= norm - return vec - - def _encode_tfidf(self, text: str) -> np.ndarray: - """Encode using TF-IDF over all cached texts + query. - - Because TF-IDF vocabulary can change when new documents are - added, we refit when dirty. This is cheap for the expected - document counts (hundreds to low thousands). - """ - if self._tfidf is None: - self._tfidf = TfidfVectorizer(max_features=512) - self._tfidf_dirty = True - - # Collect all known texts + this one - corpus = [t for _, t in self._cache.values()] - query_idx = len(corpus) - corpus.append(text) - - # Always refit because vocab may have grown - matrix = self._tfidf.fit_transform(corpus) - vec = np.asarray(matrix[query_idx].toarray(), dtype=np.float32).flatten() - - # Re-encode cached entries with updated vocab - for i, fid in enumerate(self._ids): - updated = np.asarray(matrix[i].toarray(), dtype=np.float32).flatten() - norm = np.linalg.norm(updated) - if norm > 0: - updated /= norm - self._cache[fid] = (updated, self._cache[fid][1]) - - norm = np.linalg.norm(vec) - if norm > 0: - vec /= norm - self._tfidf_dirty = False - self._index_dirty = True - return vec - - @staticmethod - def _encode_hash(text: str, dim: int = 128) -> np.ndarray: - """Ultra-simple hash-based embedding fallback.""" - vec = np.zeros(dim, dtype=np.float32) - for token in text.split(): - idx = hash(token) % dim - vec[idx] += 1.0 - norm = np.linalg.norm(vec) - if norm > 0: - vec /= norm - return vec - - # ------------------------------------------------------------------ - # Search backends - # ------------------------------------------------------------------ - - def _rebuild_index(self) -> None: - """Rebuild the FAISS ``IndexFlatIP`` from cached embeddings.""" - if not self._cache or not _has_faiss: - return - vecs = np.stack([self._cache[fid][0] for fid in self._ids]) - dim = vecs.shape[1] - self._index = faiss.IndexFlatIP(dim) - self._index.add(vecs) # type: ignore[union-attr] - self._index_dirty = False - - def _faiss_search( - self, - query: np.ndarray, - k: int, - ) -> List[Tuple[str, float]]: - if self._index_dirty: - self._rebuild_index() - if self._index is None: - return self._brute_force_search(query, k) - - distances, indices = self._index.search( # type: ignore[union-attr] - query.reshape(1, -1), k - ) - results: List[Tuple[str, float]] = [] - for dist, idx in zip(distances[0], indices[0]): - if idx < 0 or idx >= len(self._ids): - continue - results.append((self._ids[idx], float(dist))) - return results - - def _brute_force_search( - self, - query: np.ndarray, - k: int, - ) -> List[Tuple[str, float]]: - sims: List[Tuple[str, float]] = [] - for fid in self._ids: - vec = self._cache[fid][0] - sim = float(np.dot(query, vec)) - sims.append((fid, sim)) - sims.sort(key=lambda x: x[1], reverse=True) - return sims[:k] diff --git a/src/factorminer/factorminer/memory/evolution.py b/src/factorminer/factorminer/memory/evolution.py deleted file mode 100644 index 18ba05a..0000000 --- a/src/factorminer/factorminer/memory/evolution.py +++ /dev/null @@ -1,482 +0,0 @@ -"""Memory Evolution operator E(M, M_form). - -Consolidates newly formed experience into the existing memory: -- Merges redundant success/failure patterns -- Discards low-utility entries -- Reclassifies patterns that have changed behavior -- Caps memory size according to configuration limits -""" - -from __future__ import annotations - -from typing import Dict, List, Optional - -from src.factorminer.factorminer.memory.memory_store import ( - ExperienceMemory, - ForbiddenDirection, - MiningState, - StrategicInsight, - SuccessPattern, -) - - -def _merge_success_patterns( - existing: List[SuccessPattern], - new: List[SuccessPattern], -) -> List[SuccessPattern]: - """Merge new success patterns into existing ones. - - Patterns with the same name are consolidated by combining examples - and updating occurrence counts. Novel patterns are appended. - """ - merged: Dict[str, SuccessPattern] = {} - - for pat in existing: - merged[pat.name] = SuccessPattern( - name=pat.name, - description=pat.description, - template=pat.template, - success_rate=pat.success_rate, - example_factors=list(pat.example_factors), - occurrence_count=pat.occurrence_count, - ) - - for pat in new: - if pat.name in merged: - existing_pat = merged[pat.name] - existing_pat.occurrence_count += pat.occurrence_count - # Merge example factors, dedup - seen = set(existing_pat.example_factors) - for ex in pat.example_factors: - if ex not in seen: - existing_pat.example_factors.append(ex) - seen.add(ex) - # Cap examples - if len(existing_pat.example_factors) > 10: - existing_pat.example_factors = existing_pat.example_factors[-10:] - # Update description if new one is more informative - if len(pat.description) > len(existing_pat.description): - existing_pat.description = pat.description - # Promote success rate based on accumulated evidence - if existing_pat.occurrence_count >= 10: - existing_pat.success_rate = "High" - elif existing_pat.occurrence_count >= 5: - existing_pat.success_rate = "Medium" - else: - merged[pat.name] = SuccessPattern( - name=pat.name, - description=pat.description, - template=pat.template, - success_rate=pat.success_rate, - example_factors=list(pat.example_factors), - occurrence_count=pat.occurrence_count, - ) - - return list(merged.values()) - - -def _merge_forbidden_directions( - existing: List[ForbiddenDirection], - new: List[ForbiddenDirection], -) -> List[ForbiddenDirection]: - """Merge new forbidden directions into existing ones.""" - merged: Dict[str, ForbiddenDirection] = {} - - for fd in existing: - merged[fd.name] = ForbiddenDirection( - name=fd.name, - description=fd.description, - correlated_factors=list(fd.correlated_factors), - typical_correlation=fd.typical_correlation, - reason=fd.reason, - occurrence_count=fd.occurrence_count, - ) - - for fd in new: - if fd.name in merged: - existing_fd = merged[fd.name] - existing_fd.occurrence_count += fd.occurrence_count - # Merge correlated factors - seen = set(existing_fd.correlated_factors) - for cf in fd.correlated_factors: - if cf not in seen: - existing_fd.correlated_factors.append(cf) - seen.add(cf) - if len(existing_fd.correlated_factors) > 10: - existing_fd.correlated_factors = existing_fd.correlated_factors[-10:] - # Update correlation as weighted average - total_count = existing_fd.occurrence_count - if total_count > 0 and fd.typical_correlation > 0: - old_weight = (total_count - fd.occurrence_count) / total_count - new_weight = fd.occurrence_count / total_count - existing_fd.typical_correlation = ( - old_weight * existing_fd.typical_correlation - + new_weight * fd.typical_correlation - ) - if len(fd.reason) > len(existing_fd.reason): - existing_fd.reason = fd.reason - else: - merged[fd.name] = ForbiddenDirection( - name=fd.name, - description=fd.description, - correlated_factors=list(fd.correlated_factors), - typical_correlation=fd.typical_correlation, - reason=fd.reason, - occurrence_count=fd.occurrence_count, - ) - - return list(merged.values()) - - -def _merge_insights( - existing: List[StrategicInsight], - new: List[StrategicInsight], -) -> List[StrategicInsight]: - """Merge new insights into existing, deduplicating similar ones. - - Insights with substantially overlapping text are consolidated. - """ - merged: List[StrategicInsight] = list(existing) - - for new_insight in new: - is_duplicate = False - new_lower = new_insight.insight.lower() - for i, existing_insight in enumerate(merged): - existing_lower = existing_insight.insight.lower() - # Simple similarity: check if core words overlap significantly - new_words = set(new_lower.split()) - existing_words = set(existing_lower.split()) - if len(new_words) > 0 and len(existing_words) > 0: - overlap = len(new_words & existing_words) - max_len = max(len(new_words), len(existing_words)) - if overlap / max_len > 0.6: - # Keep the one from the more recent batch - if new_insight.batch_source > existing_insight.batch_source: - merged[i] = new_insight - is_duplicate = True - break - if not is_duplicate: - merged.append(new_insight) - - return merged - - -def _reclassify_patterns( - success_patterns: List[SuccessPattern], - forbidden_directions: List[ForbiddenDirection], - failure_threshold: int = 5, -) -> tuple[List[SuccessPattern], List[ForbiddenDirection]]: - """Reclassify patterns that have changed behavior. - - If a success pattern consistently appears in forbidden directions - (e.g., VWAP variant with rho=0.82), move it from success to forbidden. - """ - forbidden_names = {fd.name for fd in forbidden_directions} - - remaining_success: List[SuccessPattern] = [] - new_forbidden: List[ForbiddenDirection] = [] - - for pat in success_patterns: - # Check if this pattern name overlaps with forbidden directions - should_reclassify = False - matching_forbidden: Optional[ForbiddenDirection] = None - - for fd in forbidden_directions: - # Check for name overlap or keyword overlap - if _names_overlap(pat.name, fd.name): - if fd.occurrence_count >= failure_threshold: - should_reclassify = True - matching_forbidden = fd - break - - if should_reclassify and matching_forbidden is not None: - # Demote: success -> forbidden - new_forbidden.append(ForbiddenDirection( - name=pat.name, - description=f"Reclassified from success: {pat.description}", - correlated_factors=matching_forbidden.correlated_factors, - typical_correlation=matching_forbidden.typical_correlation, - reason=f"Initially promising but consistently produces correlated factors " - f"(rho={matching_forbidden.typical_correlation:.2f})", - occurrence_count=matching_forbidden.occurrence_count, - )) - else: - remaining_success.append(pat) - - all_forbidden = forbidden_directions + new_forbidden - return remaining_success, all_forbidden - - -def _names_overlap(name_a: str, name_b: str) -> bool: - """Check if two pattern names refer to the same concept.""" - a_words = set(name_a.lower().replace("/", " ").replace("_", " ").split()) - b_words = set(name_b.lower().replace("/", " ").replace("_", " ").split()) - # Remove common filler words - filler = {"the", "a", "an", "of", "in", "with", "for", "and", "or"} - a_words -= filler - b_words -= filler - if not a_words or not b_words: - return False - overlap = len(a_words & b_words) - return overlap >= min(2, min(len(a_words), len(b_words))) - - -def _prune_low_utility( - success_patterns: List[SuccessPattern], - forbidden_directions: List[ForbiddenDirection], - insights: List[StrategicInsight], - min_occurrences: int = 1, -) -> tuple[List[SuccessPattern], List[ForbiddenDirection], List[StrategicInsight]]: - """Remove entries with too few occurrences to be reliable. - - Initial knowledge base entries (occurrence_count=0) are preserved. - """ - pruned_success = [ - p for p in success_patterns - if p.occurrence_count >= min_occurrences or p.occurrence_count == 0 - ] - pruned_forbidden = [ - f for f in forbidden_directions - if f.occurrence_count >= min_occurrences or f.occurrence_count == 0 - ] - # Insights are lightweight, keep all - return pruned_success, pruned_forbidden, insights - - -def _cap_memory_size( - success_patterns: List[SuccessPattern], - forbidden_directions: List[ForbiddenDirection], - insights: List[StrategicInsight], - max_success: int = 50, - max_forbidden: int = 100, - max_insights: int = 30, -) -> tuple[List[SuccessPattern], List[ForbiddenDirection], List[StrategicInsight]]: - """Enforce maximum memory sizes by keeping the most useful entries.""" - # Sort success patterns by occurrence count (most useful first) - if len(success_patterns) > max_success: - success_patterns = sorted( - success_patterns, key=lambda p: p.occurrence_count, reverse=True - )[:max_success] - - # Sort forbidden directions by occurrence count - if len(forbidden_directions) > max_forbidden: - forbidden_directions = sorted( - forbidden_directions, key=lambda f: f.occurrence_count, reverse=True - )[:max_forbidden] - - # Keep most recent insights - if len(insights) > max_insights: - insights = sorted( - insights, key=lambda i: i.batch_source, reverse=True - )[:max_insights] - - return success_patterns, forbidden_directions, insights - - -# --------------------------------------------------------------------------- -# Public API: Memory Evolution -# --------------------------------------------------------------------------- - -def evolve_memory( - memory: ExperienceMemory, - formed_memory: ExperienceMemory, - max_success_patterns: int = 50, - max_failure_patterns: int = 100, - max_insights: int = 30, -) -> ExperienceMemory: - """Memory Evolution operator E(M, M_form). - - Consolidates newly formed experience into the existing memory. - - Parameters - ---------- - memory : ExperienceMemory - Current persistent memory. - formed_memory : ExperienceMemory - Newly formed memory from the latest batch (output of form_memory). - max_success_patterns : int - Maximum number of success patterns to retain. - max_failure_patterns : int - Maximum number of forbidden directions to retain. - max_insights : int - Maximum number of strategic insights to retain. - - Returns - ------- - ExperienceMemory - Updated memory with consolidated experience. - """ - # 1. Merge patterns - merged_success = _merge_success_patterns( - memory.success_patterns, formed_memory.success_patterns - ) - merged_forbidden = _merge_forbidden_directions( - memory.forbidden_directions, formed_memory.forbidden_directions - ) - merged_insights = _merge_insights(memory.insights, formed_memory.insights) - - # 2. Reclassify patterns that have changed behavior - merged_success, merged_forbidden = _reclassify_patterns( - merged_success, merged_forbidden - ) - - # 3. Prune low-utility entries - merged_success, merged_forbidden, merged_insights = _prune_low_utility( - merged_success, merged_forbidden, merged_insights - ) - - # 4. Cap memory size - merged_success, merged_forbidden, merged_insights = _cap_memory_size( - merged_success, merged_forbidden, merged_insights, - max_success=max_success_patterns, - max_forbidden=max_failure_patterns, - max_insights=max_insights, - ) - - # 5. Update state - new_state = formed_memory.state - - return ExperienceMemory( - state=new_state, - success_patterns=merged_success, - forbidden_directions=merged_forbidden, - insights=merged_insights, - version=memory.version + 1, - ) - - -# --------------------------------------------------------------------------- -# Phase 2: Online confidence decay helpers (added for HelixFactor) -# --------------------------------------------------------------------------- - -def apply_confidence_decay( - memory: "ExperienceMemory", - decay_factor: float = 0.99, - min_confidence: float = 0.05, -) -> "ExperienceMemory": - """Return new ExperienceMemory with decayed pattern confidences. - - Seed patterns (occurrence_count == 0) are immune to decay. - Patterns below min_confidence are pruned. - - Parameters - ---------- - memory: - Input ExperienceMemory (not mutated — immutable-style). - decay_factor: - Multiplicative decay per call (e.g. 0.99 = 1% decay per iteration). - min_confidence: - Patterns with confidence < this threshold after decay are removed. - - Returns - ------- - ExperienceMemory - New instance with decayed/pruned patterns. - """ - import dataclasses - - new_patterns = [] - for p in memory.success_patterns: - if p.occurrence_count == 0: - # seed pattern — never decay - new_patterns.append(p) - continue - new_conf = getattr(p, "confidence", 1.0) * decay_factor - if new_conf >= min_confidence: - try: - new_patterns.append(dataclasses.replace(p, confidence=new_conf)) - except TypeError: - new_patterns.append(p) - - return dataclasses.replace(memory, success_patterns=new_patterns) - - -def bump_pattern_confidence( - memory: "ExperienceMemory", - keywords: list, - boost: float = 0.05, - max_confidence: float = 1.0, -) -> "ExperienceMemory": - """Return new ExperienceMemory with confidence boosted for matching patterns. - - Patterns whose description or template contain any of the keywords receive - a confidence boost. - - Parameters - ---------- - memory: - Input ExperienceMemory (not mutated). - keywords: - List of strings to match against pattern descriptions. - boost: - Additive confidence increase for matching patterns. - max_confidence: - Confidence cap. - - Returns - ------- - ExperienceMemory - New instance with boosted pattern confidences. - """ - import dataclasses - - new_patterns = [] - for p in memory.success_patterns: - desc = (getattr(p, "description", "") or "").lower() - tmpl = (getattr(p, "template", "") or "").lower() - matched = any(kw.lower() in desc or kw.lower() in tmpl for kw in keywords) - if matched: - new_conf = min(getattr(p, "confidence", 1.0) + boost, max_confidence) - try: - new_patterns.append(dataclasses.replace(p, confidence=new_conf)) - except TypeError: - new_patterns.append(p) - else: - new_patterns.append(p) - - return dataclasses.replace(memory, success_patterns=new_patterns) - - -def penalise_pattern_confidence( - memory: "ExperienceMemory", - keywords: list, - penalty: float = 0.15, - min_confidence: float = 0.05, -) -> "ExperienceMemory": - """Return new ExperienceMemory with confidence penalised for matching patterns. - - Parameters - ---------- - memory: - Input ExperienceMemory (not mutated). - keywords: - List of strings to match against pattern descriptions. - penalty: - Multiplicative penalty factor (confidence *= (1 - penalty)). - min_confidence: - Patterns below this threshold after penalty are pruned. - - Returns - ------- - ExperienceMemory - New instance with penalised/pruned pattern confidences. - """ - import dataclasses - - new_patterns = [] - for p in memory.success_patterns: - desc = (getattr(p, "description", "") or "").lower() - tmpl = (getattr(p, "template", "") or "").lower() - matched = any(kw.lower() in desc or kw.lower() in tmpl for kw in keywords) - if matched: - new_conf = getattr(p, "confidence", 1.0) * (1.0 - penalty) - if new_conf < min_confidence: - continue # prune - try: - new_patterns.append(dataclasses.replace(p, confidence=new_conf)) - except TypeError: - new_patterns.append(p) - else: - new_patterns.append(p) - - return dataclasses.replace(memory, success_patterns=new_patterns) diff --git a/src/factorminer/factorminer/memory/experience_memory.py b/src/factorminer/factorminer/memory/experience_memory.py deleted file mode 100644 index 04cfb71..0000000 --- a/src/factorminer/factorminer/memory/experience_memory.py +++ /dev/null @@ -1,594 +0,0 @@ -"""Main ExperienceMemory manager class. - -Provides the high-level API for the experience memory system: -- Initializes with default patterns from the paper (Tables 4 and 5) -- Persists to/from JSON -- update(trajectory) orchestrates formation + evolution -- retrieve(library_state) performs context-dependent retrieval -- Optional knowledge graph and embedding support for Phase 2 -""" - -from __future__ import annotations - -import json -import logging -from pathlib import Path -from typing import Any, Dict, List, Optional - -from src.factorminer.factorminer.memory.memory_store import ( - ExperienceMemory, - ForbiddenDirection, - MiningState, - StrategicInsight, - SuccessPattern, -) -from src.factorminer.factorminer.memory.formation import form_memory -from src.factorminer.factorminer.memory.evolution import evolve_memory -from src.factorminer.factorminer.memory.retrieval import retrieve_memory - -# Optional Phase 2 imports -try: - from factorminer.memory.knowledge_graph import FactorKnowledgeGraph, FactorNode -except ImportError: - FactorKnowledgeGraph = None # type: ignore[assignment,misc] - FactorNode = None # type: ignore[assignment,misc] - -try: - from factorminer.memory.embeddings import FormulaEmbedder -except ImportError: - FormulaEmbedder = None # type: ignore[assignment,misc] - -try: - from factorminer.memory.kg_retrieval import retrieve_memory_enhanced -except ImportError: - retrieve_memory_enhanced = None # type: ignore[assignment] - -logger = logging.getLogger(__name__) - - -# --------------------------------------------------------------------------- -# Default knowledge base from the paper -# --------------------------------------------------------------------------- - -def _default_success_patterns() -> List[SuccessPattern]: - """Initial success patterns from FactorMiner Table 4.""" - return [ - SuccessPattern( - name="Higher Moment Regimes", - description=( - "Use Skew/Kurt as IfElse conditions to route between different " - "factor computations. High-moment regime switching captures " - "non-linear market states effectively." - ), - template="IfElse(Skew($close, 20), , )", - success_rate="High", - example_factors=["HMR_001", "HMR_002"], - occurrence_count=0, - ), - SuccessPattern( - name="PV Corr Interaction", - description=( - "Price-volume correlation interaction: use rolling Corr($close, $volume) " - "as a signal or conditioning variable. Captures supply-demand imbalance " - "through price-volume divergence." - ), - template="CsRank(Corr($close, $volume, 20))", - success_rate="High", - example_factors=["PVC_001", "PVC_002"], - occurrence_count=0, - ), - SuccessPattern( - name="Robust Efficiency", - description=( - "Use Median for noise filtering instead of Mean. Rolling median " - "is more robust to outliers in intraday data, producing factors " - "with higher ICIR." - ), - template="CsRank(Div(Median($close, 10), Median($close, 60)))", - success_rate="High", - example_factors=["RE_001"], - occurrence_count=0, - ), - SuccessPattern( - name="Smoothed Efficiency Rank", - description=( - "Combine EMA smoothing with CsRank cross-sectional normalization. " - "EMA reduces noise while CsRank ensures cross-sectional comparability." - ), - template="CsRank(EMA(Div($close, Mean($close, 20)), 10))", - success_rate="High", - example_factors=["SER_001", "SER_002"], - occurrence_count=0, - ), - SuccessPattern( - name="Trend Regression Adaptive", - description=( - "Use TsLinRegSlope, TsLinRegResid, or rolling R-squared to capture " - "trend strength and mean reversion. Regression residuals identify " - "deviations from local trends." - ), - template="CsRank(TsLinRegSlope($close, 20))", - success_rate="High", - example_factors=["TRA_001", "TRA_002"], - occurrence_count=0, - ), - SuccessPattern( - name="Logical Or Extreme Regimes", - description=( - "Use Or/And with Greater/Less to combine multiple extreme-value " - "conditions. Captures compound regime states that single indicators miss." - ), - template="IfElse(Or(Greater(Skew($returns, 20), 1), Less(Kurt($returns, 20), -1)), , )", - success_rate="Medium", - example_factors=["LOR_001"], - occurrence_count=0, - ), - SuccessPattern( - name="Kurtosis Regime", - description=( - "Use rolling kurtosis to detect fat-tail regimes and switch " - "factor behavior accordingly. High kurtosis indicates regime " - "changes and trend breaks." - ), - template="IfElse(Kurt($returns, 20), CsRank(Std($returns, 10)), CsRank(Mean($returns, 10)))", - success_rate="Medium", - example_factors=["KR_001"], - occurrence_count=0, - ), - SuccessPattern( - name="Amt Efficiency Rank Interaction", - description=( - "Combine $amt (turnover) with efficiency ratios and CsRank. " - "Amount-weighted efficiency captures liquidity-adjusted momentum." - ), - template="CsRank(Div(EMA($amt, 5), EMA($amt, 20)))", - success_rate="Medium", - example_factors=["AER_001"], - occurrence_count=0, - ), - ] - - -def _default_forbidden_directions() -> List[ForbiddenDirection]: - """Initial forbidden directions from FactorMiner Table 5.""" - return [ - ForbiddenDirection( - name="Standardized Returns/Amount", - description=( - "CsZScore or Std-normalized $returns and $amt variants. " - "These produce a cluster of highly correlated factors." - ), - correlated_factors=["std_ret_cluster"], - typical_correlation=0.6, - reason="Standardized return/amount variants cluster with rho > 0.6", - occurrence_count=0, - ), - ForbiddenDirection( - name="VWAP Deviation variants", - description=( - "Factors based on deviation from VWAP (Sub($close, $vwap) or " - "Delta($vwap)). All VWAP deviation variants converge to the " - "same signal." - ), - correlated_factors=["vwap_dev_cluster"], - typical_correlation=0.5, - reason="VWAP deviation variants produce highly correlated factors (rho > 0.5)", - occurrence_count=0, - ), - ForbiddenDirection( - name="Simple Delta Reversal", - description=( - "Simple price-change reversal factors using Delta($close) or " - "Neg(Return($close)). These are well-known and already " - "saturated in most factor libraries." - ), - correlated_factors=["delta_rev_cluster"], - typical_correlation=0.5, - reason="Simple delta-based reversal factors are redundant (rho > 0.5)", - occurrence_count=0, - ), - ForbiddenDirection( - name="WMA/EMA Smoothed Efficiency", - description=( - "Smoothing the same base signal with WMA, EMA, SMA, DEMA " - "produces nearly identical factors. Different smoothing methods " - "on the same input do not add diversity." - ), - correlated_factors=["smoothed_eff_cluster"], - typical_correlation=0.9, - reason="WMA/EMA/SMA smoothed efficiency variants nearly identical (rho > 0.9)", - occurrence_count=0, - ), - ] - - -def _default_insights() -> List[StrategicInsight]: - """Initial strategic insights from the paper.""" - return [ - StrategicInsight( - insight="Non-linear transformations (IfElse, Skew, Kurt) outperform linear ones", - evidence="Paper finding: regime-switching factors consistently achieve higher IC", - batch_source=0, - ), - StrategicInsight( - insight="Cross-sectional ranking (CsRank) as final layer improves factor stability", - evidence="CsRank normalization reduces outlier sensitivity and improves ICIR", - batch_source=0, - ), - StrategicInsight( - insight="Combining operators from different categories produces more diverse factors", - evidence="Multi-category composition (e.g., Statistical + Logical + CrossSectional) " - "reduces correlation with existing library members", - batch_source=0, - ), - ] - - -# --------------------------------------------------------------------------- -# Manager class -# --------------------------------------------------------------------------- - -class ExperienceMemoryManager: - """High-level manager for the experience memory system. - - Orchestrates formation, evolution, retrieval, and persistence of the - experience memory M across mining sessions. - - Parameters - ---------- - max_success_patterns : int - Maximum number of success patterns to retain. - max_failure_patterns : int - Maximum number of forbidden directions to retain. - max_insights : int - Maximum number of strategic insights to retain. - """ - - def __init__( - self, - max_success_patterns: int = 50, - max_failure_patterns: int = 100, - max_insights: int = 30, - enable_knowledge_graph: bool = False, - enable_embeddings: bool = False, - ) -> None: - self.max_success_patterns = max_success_patterns - self.max_failure_patterns = max_failure_patterns - self.max_insights = max_insights - self._batch_counter = 0 - - # Initialize with default knowledge base - self.memory = ExperienceMemory( - state=MiningState(), - success_patterns=_default_success_patterns(), - forbidden_directions=_default_forbidden_directions(), - insights=_default_insights(), - version=0, - ) - - # Phase 2: Optional knowledge graph - self.kg: Optional[FactorKnowledgeGraph] = None # type: ignore[type-arg] - if enable_knowledge_graph: - if FactorKnowledgeGraph is not None: - self.kg = FactorKnowledgeGraph() - else: - logger.warning( - "Knowledge graph requested but networkx is not installed. " - "Install with: pip install networkx" - ) - - # Phase 2: Optional formula embedder - self.embedder: Optional[FormulaEmbedder] = None # type: ignore[type-arg] - if enable_embeddings: - if FormulaEmbedder is not None: - self.embedder = FormulaEmbedder() - else: - logger.warning( - "Embeddings requested but required packages are not installed. " - "Install with: pip install sentence-transformers" - ) - - @property - def version(self) -> int: - return self.memory.version - - def update(self, trajectory: List[dict]) -> Dict[str, Any]: - """Process a batch trajectory: formation + evolution. - - Parameters - ---------- - trajectory : list[dict] - Batch of evaluated candidates. Each dict should contain: - - formula: str - the DSL formula - - factor_id: str - unique identifier - - ic: float - information coefficient - - icir: float - IC information ratio - - max_correlation: float - max correlation with existing factors - - correlated_with: str - ID of most correlated existing factor - - admitted: bool - whether the factor was admitted - - rejection_reason: str - reason for rejection (if rejected) - - Returns - ------- - dict - Summary of the update: admitted_count, rejected_count, - new_patterns, new_forbidden, new_insights, version. - """ - self._batch_counter += 1 - - # Formation: extract experience from trajectory - formed = form_memory(self.memory, trajectory, self._batch_counter) - - # Evolution: merge formed experience into persistent memory - self.memory = evolve_memory( - self.memory, - formed, - max_success_patterns=self.max_success_patterns, - max_failure_patterns=self.max_failure_patterns, - max_insights=self.max_insights, - ) - - admitted_count = sum(1 for c in trajectory if c.get("admitted", False)) - rejected_count = len(trajectory) - admitted_count - - # Phase 2: Update knowledge graph with new factors - if self.kg is not None and FactorNode is not None: - self._update_knowledge_graph(trajectory) - - return { - "batch": self._batch_counter, - "admitted_count": admitted_count, - "rejected_count": rejected_count, - "success_patterns": len(self.memory.success_patterns), - "forbidden_directions": len(self.memory.forbidden_directions), - "insights": len(self.memory.insights), - "version": self.memory.version, - } - - def retrieve( - self, - library_state: Optional[Dict[str, Any]] = None, - max_success: int = 8, - max_forbidden: int = 10, - max_insights: int = 10, - ) -> Dict[str, Any]: - """Retrieve context-dependent memory signal for LLM prompt. - - Parameters - ---------- - library_state : dict, optional - Current library diagnostics. Keys: library_size, - domain_saturation, etc. - max_success : int - Maximum number of success patterns to include. - max_forbidden : int - Maximum number of forbidden directions to include. - max_insights : int - Maximum number of insights to include. - - Returns - ------- - dict - Memory signal m with keys: recommended_directions, - forbidden_directions, insights, library_state, prompt_text. - """ - # Use enhanced retrieval if KG or embedder is available - if (self.kg is not None or self.embedder is not None) and retrieve_memory_enhanced is not None: - return retrieve_memory_enhanced( - self.memory, - library_state=library_state, - max_success=max_success, - max_forbidden=max_forbidden, - max_insights=max_insights, - kg=self.kg, - embedder=self.embedder, - ) - - return retrieve_memory( - self.memory, - library_state=library_state, - max_success=max_success, - max_forbidden=max_forbidden, - max_insights=max_insights, - ) - - def save(self, path: str | Path) -> None: - """Persist memory to a JSON file. - - Also saves the knowledge graph to a sibling file - ``_kg.json`` if enabled. - - Parameters - ---------- - path : str or Path - Output file path (will be created/overwritten). - """ - path = Path(path) - path.parent.mkdir(parents=True, exist_ok=True) - - data = self.memory.to_dict() - data["_batch_counter"] = self._batch_counter - data["_config"] = { - "max_success_patterns": self.max_success_patterns, - "max_failure_patterns": self.max_failure_patterns, - "max_insights": self.max_insights, - "enable_knowledge_graph": self.kg is not None, - "enable_embeddings": self.embedder is not None, - } - - with open(path, "w") as f: - json.dump(data, f, indent=2, ensure_ascii=False) - - # Phase 2: Save knowledge graph alongside - if self.kg is not None: - kg_path = path.with_name(f"{path.stem}_kg.json") - self.kg.save(kg_path) - - def load(self, path: str | Path) -> None: - """Load memory from a JSON file. - - Also loads the knowledge graph from ``_kg.json`` if - the file exists and the KG feature is enabled. - - Parameters - ---------- - path : str or Path - Path to a previously saved memory file. - """ - path = Path(path) - with open(path) as f: - data = json.load(f) - - self.memory = ExperienceMemory.from_dict(data) - self._batch_counter = data.get("_batch_counter", 0) - - config = data.get("_config", {}) - if config: - self.max_success_patterns = config.get( - "max_success_patterns", self.max_success_patterns - ) - self.max_failure_patterns = config.get( - "max_failure_patterns", self.max_failure_patterns - ) - self.max_insights = config.get( - "max_insights", self.max_insights - ) - - # Phase 2: Load knowledge graph if available - kg_path = path.with_name(f"{path.stem}_kg.json") - if kg_path.exists() and FactorKnowledgeGraph is not None: - if self.kg is None: - # Enable KG if saved config says so, or if file exists - if config.get("enable_knowledge_graph", False): - self.kg = FactorKnowledgeGraph.load(kg_path) - else: - self.kg = FactorKnowledgeGraph.load(kg_path) - - # Re-enable embedder if config says so - if config.get("enable_embeddings", False) and self.embedder is None: - if FormulaEmbedder is not None: - self.embedder = FormulaEmbedder() - - def get_stats(self) -> Dict[str, Any]: - """Return summary statistics about the current memory state. - - Returns - ------- - dict - Keys: version, batch_counter, library_size, success_patterns, - forbidden_directions, insights, domain_saturation, - recent_admission_rate, plus kg_* keys when KG is enabled. - """ - recent_logs = self.memory.state.admission_log[-5:] - avg_rate = 0.0 - if recent_logs: - avg_rate = sum( - log.get("admission_rate", 0) for log in recent_logs - ) / len(recent_logs) - - stats: Dict[str, Any] = { - "version": self.memory.version, - "batch_counter": self._batch_counter, - "library_size": self.memory.state.library_size, - "success_patterns": len(self.memory.success_patterns), - "forbidden_directions": len(self.memory.forbidden_directions), - "insights": len(self.memory.insights), - "domain_saturation": dict(self.memory.state.domain_saturation), - "recent_admission_rate": round(avg_rate, 4), - "top_success_patterns": [ - {"name": p.name, "rate": p.success_rate, "count": p.occurrence_count} - for p in sorted( - self.memory.success_patterns, - key=lambda p: p.occurrence_count, - reverse=True, - )[:5] - ], - "top_forbidden_directions": [ - {"name": f.name, "corr": f.typical_correlation, "count": f.occurrence_count} - for f in sorted( - self.memory.forbidden_directions, - key=lambda f: f.occurrence_count, - reverse=True, - )[:5] - ], - } - - # Phase 2: KG stats - if self.kg is not None: - stats["kg_factor_count"] = self.kg.get_factor_count() - stats["kg_edge_count"] = self.kg.get_edge_count() - saturated = self.kg.find_saturated_regions() - stats["kg_saturated_clusters"] = len(saturated) - - return stats - - def reset(self) -> None: - """Reset memory to initial state with default knowledge base.""" - self._batch_counter = 0 - self.memory = ExperienceMemory( - state=MiningState(), - success_patterns=_default_success_patterns(), - forbidden_directions=_default_forbidden_directions(), - insights=_default_insights(), - version=0, - ) - - # Phase 2: Reset KG and embedder - if self.kg is not None and FactorKnowledgeGraph is not None: - self.kg = FactorKnowledgeGraph() - if self.embedder is not None and FormulaEmbedder is not None: - self.embedder = FormulaEmbedder() - - # ------------------------------------------------------------------ - # Phase 2: Knowledge graph helpers - # ------------------------------------------------------------------ - - def _update_knowledge_graph(self, trajectory: List[dict]) -> None: - """Add factors from a trajectory to the knowledge graph. - - Extracts operators from formulas, creates FactorNode instances, - and registers correlation edges between co-evaluated candidates. - """ - import re - - if self.kg is None or FactorNode is None: - return - - op_pattern = re.compile(r"\b([A-Z][a-zA-Z]+)\(") - feat_pattern = re.compile(r"\$\w+") - - factor_ids: List[str] = [] - - for candidate in trajectory: - fid = candidate.get("factor_id", "") - formula = candidate.get("formula", "") - if not fid or not formula: - continue - - # Parse operators and features from formula - operators = op_pattern.findall(formula) - features = feat_pattern.findall(formula) - - node = FactorNode( - factor_id=fid, - formula=formula, - ic_mean=candidate.get("ic", 0.0), - category=candidate.get("category", ""), - operators=operators, - features=features, - batch_number=self._batch_counter, - admitted=candidate.get("admitted", False), - ) - - # Embed if embedder is available - if self.embedder is not None: - node.embedding = self.embedder.embed(fid, formula) - - self.kg.add_factor(node) - factor_ids.append(fid) - - # Add correlation edge to existing library member - correlated_with = candidate.get("correlated_with", "") - max_corr = candidate.get("max_correlation", 0.0) - if correlated_with and max_corr > 0: - self.kg.add_correlation_edge(fid, correlated_with, max_corr) diff --git a/src/factorminer/factorminer/memory/formation.py b/src/factorminer/factorminer/memory/formation.py deleted file mode 100644 index 3fc35f7..0000000 --- a/src/factorminer/factorminer/memory/formation.py +++ /dev/null @@ -1,446 +0,0 @@ -"""Memory Formation operator F(M, tau). - -Analyzes a mining trajectory tau (batch of evaluated candidates with IC, -correlation, admission results) and extracts new experience: -- Successful patterns from admitted factors -- Forbidden directions from high-correlation rejections -- Strategic insights about what works across the batch -""" - -from __future__ import annotations - -import re -from collections import Counter, defaultdict -from typing import Any, Dict, List, Optional, Tuple - -from src.factorminer.factorminer.memory.memory_store import ( - ExperienceMemory, - ForbiddenDirection, - MiningState, - StrategicInsight, - SuccessPattern, -) - - -# --------------------------------------------------------------------------- -# Operator-pattern detection helpers -# --------------------------------------------------------------------------- - -# Maps operator substrings to pattern categories -_PATTERN_SIGNATURES: Dict[str, List[str]] = { - "Higher Moment Regimes": ["Skew", "Kurt", "IfElse"], - "PV Corr Interaction": ["Corr", "$close", "$volume"], - "Robust Efficiency": ["Med", "Median"], - "Smoothed Efficiency Rank": ["EMA", "CsRank"], - "Trend Regression Adaptive": ["Rsquare", "Slope", "Resi", "TsLinReg"], - "Logical Or Extreme Regimes": ["Or", "Greater", "Less"], - "Kurtosis Regime": ["Kurt", "IfElse"], - "Amt Efficiency Rank Interaction": ["$amt", "CsRank"], -} - -_FORBIDDEN_SIGNATURES: Dict[str, Dict[str, Any]] = { - "Standardized Returns/Amount": { - "keywords": ["CsZScore", "$returns", "$amt", "Std"], - "typical_corr": 0.6, - "reason": "Standardized return/amount variants cluster with rho > 0.6", - }, - "VWAP Deviation variants": { - "keywords": ["$vwap", "Delta", "Sub", "$close"], - "typical_corr": 0.5, - "reason": "VWAP deviation variants produce highly correlated factors (rho > 0.5)", - }, - "Simple Delta Reversal": { - "keywords": ["Delta", "$close", "Neg", "Return"], - "typical_corr": 0.5, - "reason": "Simple delta-based reversal factors are redundant (rho > 0.5)", - }, - "WMA/EMA Smoothed Efficiency": { - "keywords": ["WMA", "EMA", "SMA"], - "typical_corr": 0.9, - "reason": "WMA/EMA smoothed efficiency variants nearly identical (rho > 0.9)", - }, -} - - -def _extract_operators(formula: str) -> List[str]: - """Extract operator names from a DSL formula string.""" - return re.findall(r"([A-Z][a-zA-Z]+)\(", formula) - - -def _extract_features(formula: str) -> List[str]: - """Extract feature references from a DSL formula string.""" - return re.findall(r"\$[a-z]+", formula) - - -def _matches_pattern(formula: str, signature_keywords: List[str]) -> bool: - """Check if a formula matches a pattern based on keyword presence.""" - formula_upper = formula.upper() - ops = _extract_operators(formula) - feats = _extract_features(formula) - all_tokens = [o.upper() for o in ops] + [f.upper() for f in feats] - match_count = sum( - 1 for kw in signature_keywords - if any(kw.upper() in token for token in all_tokens) - or kw.upper() in formula_upper - ) - # Require at least 2 keyword matches (or all if fewer than 2 keywords) - threshold = min(2, len(signature_keywords)) - return match_count >= threshold - - -def _classify_success_pattern(formula: str) -> Optional[str]: - """Try to classify a formula into a known success pattern category.""" - for pattern_name, keywords in _PATTERN_SIGNATURES.items(): - if _matches_pattern(formula, keywords): - return pattern_name - return None - - -def _classify_forbidden_direction(formula: str) -> Optional[str]: - """Try to classify a formula into a known forbidden direction.""" - for direction_name, info in _FORBIDDEN_SIGNATURES.items(): - if _matches_pattern(formula, info["keywords"]): - return direction_name - return None - - -# --------------------------------------------------------------------------- -# Trajectory analysis -# --------------------------------------------------------------------------- - -def _analyze_admissions( - trajectory: List[dict], -) -> Tuple[List[dict], List[dict]]: - """Split trajectory into admitted and rejected candidates.""" - admitted = [] - rejected = [] - for candidate in trajectory: - if candidate.get("admitted", False): - admitted.append(candidate) - else: - rejected.append(candidate) - return admitted, rejected - - -def _extract_success_patterns( - admitted: List[dict], - existing_patterns: List[SuccessPattern], -) -> List[SuccessPattern]: - """Extract new or reinforced success patterns from admitted factors.""" - pattern_map: Dict[str, SuccessPattern] = { - p.name: SuccessPattern( - name=p.name, - description=p.description, - template=p.template, - success_rate=p.success_rate, - example_factors=list(p.example_factors), - occurrence_count=p.occurrence_count, - ) - for p in existing_patterns - } - - for candidate in admitted: - formula = candidate.get("formula", "") - factor_id = candidate.get("factor_id", formula[:60]) - ic = candidate.get("ic", 0.0) - - pattern_name = _classify_success_pattern(formula) - if pattern_name is None: - # Novel pattern: create a generic entry based on operators used - ops = _extract_operators(formula) - if len(ops) >= 2: - pattern_name = f"Novel: {'+'.join(ops[:3])}" - else: - continue - - if pattern_name in pattern_map: - pat = pattern_map[pattern_name] - pat.occurrence_count += 1 - if factor_id not in pat.example_factors: - pat.example_factors.append(factor_id) - # Keep example list bounded - if len(pat.example_factors) > 10: - pat.example_factors = pat.example_factors[-10:] - # Upgrade success rate if consistently passing - if pat.occurrence_count >= 5 and pat.success_rate == "Medium": - pat.success_rate = "High" - else: - pattern_map[pattern_name] = SuccessPattern( - name=pattern_name, - description=f"Pattern derived from admitted factor with IC={ic:.4f}", - template=formula, - success_rate="Low", - example_factors=[factor_id], - occurrence_count=1, - ) - - return list(pattern_map.values()) - - -def _extract_forbidden_directions( - rejected: List[dict], - existing_forbidden: List[ForbiddenDirection], -) -> List[ForbiddenDirection]: - """Extract new or reinforced forbidden directions from rejections.""" - direction_map: Dict[str, ForbiddenDirection] = { - f.name: ForbiddenDirection( - name=f.name, - description=f.description, - correlated_factors=list(f.correlated_factors), - typical_correlation=f.typical_correlation, - reason=f.reason, - occurrence_count=f.occurrence_count, - ) - for f in existing_forbidden - } - - for candidate in rejected: - formula = candidate.get("formula", "") - factor_id = candidate.get("factor_id", formula[:60]) - rejection_reason = candidate.get("rejection_reason", "") - max_corr = candidate.get("max_correlation", 0.0) - correlated_with = candidate.get("correlated_with", "") - - # Only track correlation-based rejections - if max_corr < 0.4 and "correlation" not in rejection_reason.lower(): - continue - - direction_name = _classify_forbidden_direction(formula) - if direction_name is None: - # Detect generic high-correlation cluster - if max_corr >= 0.5: - ops = _extract_operators(formula) - feats = _extract_features(formula) - direction_name = f"HighCorr: {'+'.join(ops[:2])}({','.join(feats[:2])})" - else: - continue - - if direction_name in direction_map: - d = direction_map[direction_name] - d.occurrence_count += 1 - if correlated_with and correlated_with not in d.correlated_factors: - d.correlated_factors.append(correlated_with) - if len(d.correlated_factors) > 10: - d.correlated_factors = d.correlated_factors[-10:] - # Update typical correlation as running average - if max_corr > 0: - d.typical_correlation = ( - d.typical_correlation * (d.occurrence_count - 1) + max_corr - ) / d.occurrence_count - else: - direction_map[direction_name] = ForbiddenDirection( - name=direction_name, - description=f"Rejected due to: {rejection_reason}", - correlated_factors=[correlated_with] if correlated_with else [], - typical_correlation=max_corr, - reason=rejection_reason or f"High correlation (rho={max_corr:.2f})", - occurrence_count=1, - ) - - return list(direction_map.values()) - - -def _derive_insights( - admitted: List[dict], - rejected: List[dict], - batch_number: int, -) -> List[StrategicInsight]: - """Derive higher-level strategic insights from a batch.""" - insights: List[StrategicInsight] = [] - if not admitted and not rejected: - return insights - - total = len(admitted) + len(rejected) - admission_rate = len(admitted) / total if total > 0 else 0.0 - - # Insight: overall batch success rate - if total >= 5: - if admission_rate > 0.3: - insights.append(StrategicInsight( - insight="Current direction is productive with high admission rate", - evidence=f"Batch {batch_number}: {len(admitted)}/{total} admitted ({admission_rate:.0%})", - batch_source=batch_number, - )) - elif admission_rate < 0.05: - insights.append(StrategicInsight( - insight="Current direction is exhausted, need to pivot to new operator combinations", - evidence=f"Batch {batch_number}: only {len(admitted)}/{total} admitted ({admission_rate:.0%})", - batch_source=batch_number, - )) - - # Insight: operator frequency analysis - admitted_ops = Counter() - rejected_ops = Counter() - for c in admitted: - for op in _extract_operators(c.get("formula", "")): - admitted_ops[op] += 1 - for c in rejected: - for op in _extract_operators(c.get("formula", "")): - rejected_ops[op] += 1 - - # Find operators that appear disproportionately in admitted vs rejected - for op, count in admitted_ops.most_common(5): - rej_count = rejected_ops.get(op, 0) - if count >= 3 and (rej_count == 0 or count / max(rej_count, 1) > 2.0): - insights.append(StrategicInsight( - insight=f"Operator '{op}' is highly productive in current search", - evidence=f"Appeared in {count} admitted vs {rej_count} rejected factors", - batch_source=batch_number, - )) - - # Insight: feature analysis - admitted_feats = Counter() - for c in admitted: - for feat in _extract_features(c.get("formula", "")): - admitted_feats[feat] += 1 - - if admitted_feats: - top_feat, top_count = admitted_feats.most_common(1)[0] - if top_count >= 3: - insights.append(StrategicInsight( - insight=f"Feature '{top_feat}' appears frequently in successful factors", - evidence=f"Present in {top_count}/{len(admitted)} admitted factors", - batch_source=batch_number, - )) - - # Insight: non-linear vs linear - nonlinear_ops = {"IfElse", "Skew", "Kurt", "Square", "Pow", "Log", "Or", "And"} - admitted_nonlinear = sum( - 1 for c in admitted - if any(op in nonlinear_ops for op in _extract_operators(c.get("formula", ""))) - ) - if len(admitted) >= 3 and admitted_nonlinear / len(admitted) > 0.6: - insights.append(StrategicInsight( - insight="Non-linear transformations outperform linear ones in current regime", - evidence=f"{admitted_nonlinear}/{len(admitted)} admitted factors use non-linear operators", - batch_source=batch_number, - )) - - return insights - - -# --------------------------------------------------------------------------- -# Public API: Memory Formation -# --------------------------------------------------------------------------- - -def form_memory( - memory: ExperienceMemory, - trajectory: List[dict], - batch_number: int = 0, -) -> ExperienceMemory: - """Memory Formation operator F(M, tau). - - Analyzes the mining trajectory tau and forms new experience memories - to be merged into the existing memory via the evolution operator. - - Parameters - ---------- - memory : ExperienceMemory - Current memory state (used for context, not modified in place). - trajectory : list[dict] - Batch of evaluated candidates. Each dict should contain: - - formula: str - the DSL formula - - factor_id: str - unique identifier - - ic: float - information coefficient - - icir: float - IC information ratio - - max_correlation: float - max correlation with existing factors - - correlated_with: str - ID of most correlated existing factor - - admitted: bool - whether the factor was admitted to the library - - rejection_reason: str - reason for rejection (if rejected) - batch_number : int - Current batch/iteration number. - - Returns - ------- - ExperienceMemory - A *new* ExperienceMemory containing only the newly formed entries - (to be merged by the evolution operator). - """ - admitted, rejected = _analyze_admissions(trajectory) - - # Extract patterns - new_success = _extract_success_patterns(admitted, memory.success_patterns) - new_forbidden = _extract_forbidden_directions(rejected, memory.forbidden_directions) - new_insights = _derive_insights(admitted, rejected, batch_number) - - # Build updated mining state - new_state = MiningState( - library_size=memory.state.library_size + len(admitted), - recent_admissions=[ - { - "factor_id": c.get("factor_id", ""), - "formula": c.get("formula", ""), - "ic": c.get("ic", 0.0), - "batch": batch_number, - } - for c in admitted - ], - recent_rejections=[ - { - "factor_id": c.get("factor_id", ""), - "formula": c.get("formula", ""), - "reason": c.get("rejection_reason", ""), - "max_correlation": c.get("max_correlation", 0.0), - "batch": batch_number, - } - for c in rejected[-20:] # Keep only last 20 rejections - ], - domain_saturation=_compute_domain_saturation( - memory.state.domain_saturation, admitted, rejected - ), - admission_log=memory.state.admission_log + [ - { - "batch": batch_number, - "admitted": len(admitted), - "rejected": len(rejected), - "admission_rate": len(admitted) / max(len(trajectory), 1), - } - ], - ) - - return ExperienceMemory( - state=new_state, - success_patterns=new_success, - forbidden_directions=new_forbidden, - insights=new_insights, - version=memory.version, - ) - - -def _compute_domain_saturation( - existing_saturation: Dict[str, float], - admitted: List[dict], - rejected: List[dict], -) -> Dict[str, float]: - """Compute per-category domain saturation metrics. - - Saturation increases when many candidates in a category are rejected - due to high correlation (the domain is "full"). - """ - saturation = dict(existing_saturation) - - # Count category attempts and rejections - category_attempts: Dict[str, int] = defaultdict(int) - category_rejections: Dict[str, int] = defaultdict(int) - - for candidate in admitted + rejected: - formula = candidate.get("formula", "") - category = _classify_success_pattern(formula) or "Other" - category_attempts[category] += 1 - - for candidate in rejected: - formula = candidate.get("formula", "") - max_corr = candidate.get("max_correlation", 0.0) - if max_corr >= 0.4: - category = _classify_success_pattern(formula) or "Other" - category_rejections[category] += 1 - - # Update saturation with exponential moving average - alpha = 0.3 - for category, attempts in category_attempts.items(): - if attempts > 0: - batch_saturation = category_rejections.get(category, 0) / attempts - old = saturation.get(category, 0.0) - saturation[category] = (1 - alpha) * old + alpha * batch_saturation - - return saturation diff --git a/src/factorminer/factorminer/memory/kg_retrieval.py b/src/factorminer/factorminer/memory/kg_retrieval.py deleted file mode 100644 index 5006091..0000000 --- a/src/factorminer/factorminer/memory/kg_retrieval.py +++ /dev/null @@ -1,336 +0,0 @@ -"""Enhanced memory retrieval combining Knowledge Graph + Embeddings + flat memory.""" - -from __future__ import annotations - -from typing import Any, Dict, List, Optional, Set, Tuple - -from src.factorminer.factorminer.memory.memory_store import ExperienceMemory -from src.factorminer.factorminer.memory.retrieval import retrieve_memory - -# Optional imports -- presence checked at call time -try: - from factorminer.memory.knowledge_graph import FactorKnowledgeGraph -except ImportError: - FactorKnowledgeGraph = None # type: ignore[assignment,misc] - -try: - from factorminer.memory.embeddings import FormulaEmbedder -except ImportError: - FormulaEmbedder = None # type: ignore[assignment,misc] - - -def retrieve_memory_enhanced( - memory: ExperienceMemory, - library_state: Optional[Dict[str, Any]] = None, - max_success: int = 8, - max_forbidden: int = 10, - max_insights: int = 10, - kg: Optional[FactorKnowledgeGraph] = None, # type: ignore[type-arg] - embedder: Optional[FormulaEmbedder] = None, # type: ignore[type-arg] -) -> Dict[str, Any]: - """Enhanced memory retrieval operator R+(M, L, KG, E). - - Calls the base :func:`retrieve_memory` first, then augments the - returned dict with additional prompt-oriented keys derived from the - knowledge graph and embedder. - - Parameters - ---------- - memory : ExperienceMemory - The flat experience memory. - library_state : dict, optional - Current library diagnostics. - max_success, max_forbidden, max_insights : int - Limits forwarded to the base retrieval. - kg : FactorKnowledgeGraph, optional - Knowledge graph instance. - embedder : FormulaEmbedder, optional - Formula embedder instance. - - Returns - ------- - dict - Base memory signal plus the four additional keys above. - """ - # Base retrieval - result = retrieve_memory( - memory, - library_state=library_state, - max_success=max_success, - max_forbidden=max_forbidden, - max_insights=max_insights, - ) - - # Default augmented keys - result["complementary_patterns"] = [] - result["conflict_warnings"] = [] - result["operator_cooccurrence"] = [] - result["semantic_neighbors"] = [] - result["semantic_duplicates"] = [] - result["semantic_gaps"] = [] - - # ---------------------------------------------------------------- - # Knowledge-graph augmentations - # ---------------------------------------------------------------- - if kg is not None: - # Complementary patterns: for each recently admitted factor, - # find structurally complementary neighbours. - complementary: List[str] = [] - seen: Set[str] = set() - for admission in memory.state.recent_admissions[-5:]: - fid = admission.get("factor_id", "") - if not fid: - continue - for comp in kg.find_complementary_patterns(fid, max_hops=2): - if comp not in seen: - seen.add(comp) - complementary.append(_describe_factor_node(kg, comp)) - result["complementary_patterns"] = complementary - - # Conflict warnings: saturated regions - saturated_regions = kg.find_saturated_regions(threshold=0.5) - result["conflict_warnings"] = [ - _describe_conflict_cluster(kg, region) for region in saturated_regions - ] - - # Operator co-occurrence - cooc = kg.get_operator_cooccurrence() - # Sort by count descending, take top 20 - top_cooc = sorted(cooc.items(), key=lambda x: x[1], reverse=True)[:20] - result["operator_cooccurrence"] = [ - f"{a} + {b} (seen {count} times)" for (a, b), count in top_cooc - ] - - # ---------------------------------------------------------------- - # Embedding-based augmentations - # ---------------------------------------------------------------- - if embedder is not None: - _seed_embedder_from_memory(memory, kg, embedder) - semantic_neighbors, semantic_duplicates = _collect_semantic_context( - memory=memory, - kg=kg, - embedder=embedder, - ) - result["semantic_neighbors"] = semantic_neighbors - result["semantic_duplicates"] = semantic_duplicates - result["semantic_gaps"] = _find_semantic_gaps(memory, kg, embedder) - - # ---------------------------------------------------------------- - # Augment prompt text - # ---------------------------------------------------------------- - extra_sections: List[str] = [] - - if result["complementary_patterns"]: - extra_sections.append("=== COMPLEMENTARY PATTERNS (explore) ===") - extra_sections.append( - "Factors structurally complementary to recent admissions:" - ) - for fid in result["complementary_patterns"][:8]: - extra_sections.append(f" - {fid}") - extra_sections.append("") - - if result["conflict_warnings"]: - extra_sections.append("=== SATURATION WARNINGS ===") - extra_sections.append( - "The following factor clusters are highly correlated -- " - "avoid generating variants:" - ) - for cluster in result["conflict_warnings"][:5]: - extra_sections.append(f" Cluster: {', '.join(cluster[:6])}") - extra_sections.append("") - - if result["semantic_gaps"]: - extra_sections.append("=== SEMANTIC GAPS (underexplored) ===") - extra_sections.append( - "Operators present in success patterns but underused in the library:" - ) - for op in result["semantic_gaps"][:10]: - extra_sections.append(f" - {op}") - extra_sections.append("") - - if result["semantic_neighbors"]: - extra_sections.append("=== SEMANTIC NEIGHBORS (similar library factors) ===") - for item in result["semantic_neighbors"][:8]: - extra_sections.append(f" - {item}") - extra_sections.append("") - - if result["semantic_duplicates"]: - extra_sections.append("=== SEMANTIC DUPLICATES (near-duplicate risk) ===") - for item in result["semantic_duplicates"][:5]: - extra_sections.append(f" - {item}") - extra_sections.append("") - - if extra_sections: - result["prompt_text"] += "\n" + "\n".join(extra_sections) - - return result - - -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - - -def _find_semantic_gaps( - memory: ExperienceMemory, - kg: Optional[FactorKnowledgeGraph], # type: ignore[type-arg] - embedder: Optional[FormulaEmbedder], # type: ignore[type-arg] -) -> List[str]: - """Identify success-pattern operators with poor semantic coverage.""" - import re - - template_ops: Set[str] = set() - op_pattern = re.compile(r"\b([A-Z][a-zA-Z]+)\(") - - for pat in memory.success_patterns: - for match in op_pattern.finditer(pat.template): - template_ops.add(match.group(1)) - - if not template_ops: - return [] - - if embedder is None: - return sorted(template_ops) - - # A pattern is considered underexplored when it has no close semantic - # neighbors in the current library representation. - uncovered_ops: Set[str] = set() - anchors = list(memory.success_patterns[:10]) - if not anchors: - return sorted(template_ops) - - for pat in anchors: - nearest = embedder.find_nearest(pat.template, k=1) - best_similarity = nearest[0][1] if nearest else 0.0 - if best_similarity < 0.72: - for match in op_pattern.finditer(pat.template): - uncovered_ops.add(match.group(1)) - - if not uncovered_ops and kg is None: - return sorted(template_ops) - - if not uncovered_ops: - # Fall back to the operators that are entirely absent from the admitted set. - used_ops: Set[str] = set() - if kg is not None: - for node in kg.list_factor_nodes(admitted_only=True): - used_ops.update(node.operators) - uncovered_ops = template_ops - used_ops - - return sorted(uncovered_ops or template_ops) - - -def _seed_embedder_from_memory( - memory: ExperienceMemory, - kg: Optional[FactorKnowledgeGraph], # type: ignore[type-arg] - embedder: FormulaEmbedder, # type: ignore[type-arg] -) -> None: - """Ensure the embedder cache reflects the current known factors.""" - seen: Set[str] = set() - - if kg is not None: - for node in kg.list_factor_nodes(admitted_only=True): - if node.factor_id and node.formula and node.factor_id not in seen: - embedder.embed(node.factor_id, node.formula) - seen.add(node.factor_id) - - for admission in memory.state.recent_admissions[-10:]: - fid = admission.get("factor_id", "") - formula = admission.get("formula", "") - if fid and formula and fid not in seen: - embedder.embed(fid, formula) - seen.add(fid) - - -def _collect_semantic_context( - memory: ExperienceMemory, - kg: Optional[FactorKnowledgeGraph], # type: ignore[type-arg] - embedder: FormulaEmbedder, # type: ignore[type-arg] - max_neighbors: int = 8, - similarity_threshold: float = 0.72, -) -> Tuple[List[str], List[str]]: - """Collect semantically similar neighbors and duplicate warnings.""" - anchors: List[Tuple[str, str, str]] = [] - for admission in memory.state.recent_admissions[-5:]: - fid = admission.get("factor_id", "") - formula = admission.get("formula", "") - if fid and formula: - anchors.append(("recent admission", fid, formula)) - - if not anchors: - for pattern in memory.success_patterns[:5]: - if pattern.template: - anchors.append(("success pattern", pattern.name, pattern.template)) - - semantic_neighbors: List[str] = [] - semantic_duplicates: List[str] = [] - seen_matches: Set[Tuple[str, str]] = set() - - if embedder.cache_size == 0: - return semantic_neighbors, semantic_duplicates - - for anchor_kind, anchor_id, formula in anchors: - nearest = embedder.find_nearest(formula, k=min(5, embedder.cache_size)) - for match_id, similarity in nearest: - if anchor_id == match_id: - continue - if similarity < similarity_threshold: - continue - match_key = (anchor_id, match_id) - if match_key in seen_matches: - continue - seen_matches.add(match_key) - match_desc = _describe_factor_node(kg, match_id) - if match_desc == match_id: - semantic_neighbors.append( - f"{anchor_kind} {anchor_id} -> {match_id} (sim={similarity:.2f})" - ) - else: - semantic_neighbors.append( - f"{anchor_kind} {anchor_id} -> {match_desc} (sim={similarity:.2f})" - ) - if similarity >= 0.90: - semantic_duplicates.append( - f"{anchor_kind} {anchor_id} is very close to {match_id} " - f"(sim={similarity:.2f})" - ) - if len(semantic_neighbors) >= max_neighbors: - return semantic_neighbors, semantic_duplicates - - return semantic_neighbors, semantic_duplicates - - -def _describe_factor_node( - kg: FactorKnowledgeGraph, # type: ignore[type-arg] - factor_id: str, -) -> str: - """Render a factor node into short prompt-friendly text.""" - if kg is None: - return factor_id - - node = kg.get_factor_node(factor_id) - if node is None: - return factor_id - - category = node.category or "unknown" - ic_mean = node.ic_mean - formula = node.formula - summary = factor_id - if category: - summary += f" [{category}]" - if ic_mean is not None: - summary += f" IC={float(ic_mean):.4f}" - if formula: - summary += f": {formula[:80]}" - if len(formula) > 80: - summary += "..." - return summary - - -def _describe_conflict_cluster( - kg: FactorKnowledgeGraph, # type: ignore[type-arg] - cluster: Set[str], -) -> str: - """Render one saturated cluster into short text.""" - described = [_describe_factor_node(kg, factor_id) for factor_id in sorted(cluster)] - return " | ".join(described[:3]) diff --git a/src/factorminer/factorminer/memory/knowledge_graph.py b/src/factorminer/factorminer/memory/knowledge_graph.py deleted file mode 100644 index b782135..0000000 --- a/src/factorminer/factorminer/memory/knowledge_graph.py +++ /dev/null @@ -1,418 +0,0 @@ -"""Factor Knowledge Graph for lineage tracking and structural analysis. - -Uses a NetworkX DiGraph to model relationships between factors, operators, -and feature inputs. Supports: -- Factor derivation lineage (parent -> child mutations) -- Correlation-based edges for saturation detection -- Operator co-occurrence analysis for diversity guidance -- Complementary pattern discovery via BFS -""" - -from __future__ import annotations - -import json -from collections import defaultdict -from dataclasses import dataclass, field, asdict -from enum import Enum -from pathlib import Path -from typing import Any, Dict, Iterable, List, Optional, Set, Tuple - -import numpy as np - -try: - import networkx as nx -except ImportError: - nx = None # type: ignore[assignment] - - -class EdgeType(Enum): - """Types of edges in the factor knowledge graph.""" - - DERIVED_FROM = "derived_from" - CORRELATED_WITH = "correlated_with" - USES_OPERATOR = "uses_operator" - COMPLEMENTARY = "complementary" - CONFLICTS = "conflicts" - - -@dataclass -class FactorNode: - """A node in the factor knowledge graph representing a single factor. - - Attributes - ---------- - factor_id : str - Unique identifier for the factor. - formula : str - DSL formula string. - ic_mean : float - Mean information coefficient. - category : str - Factor category (e.g., "momentum", "mean_reversion"). - operators : list[str] - List of operator names used in the formula. - features : list[str] - List of input features (e.g., "$close", "$volume"). - batch_number : int - Batch in which the factor was generated. - admitted : bool - Whether the factor was admitted to the library. - embedding : ndarray or None - Optional semantic embedding vector. - """ - - factor_id: str - formula: str - ic_mean: float = 0.0 - category: str = "" - operators: List[str] = field(default_factory=list) - features: List[str] = field(default_factory=list) - batch_number: int = 0 - admitted: bool = False - embedding: Optional[np.ndarray] = None - - def to_dict(self) -> Dict[str, Any]: - d = asdict(self) - if self.embedding is not None: - d["embedding"] = self.embedding.tolist() - else: - d["embedding"] = None - return d - - @classmethod - def from_dict(cls, d: Dict[str, Any]) -> FactorNode: - embedding = d.get("embedding") - if embedding is not None: - embedding = np.array(embedding, dtype=np.float32) - return cls( - factor_id=d["factor_id"], - formula=d.get("formula", ""), - ic_mean=d.get("ic_mean", 0.0), - category=d.get("category", ""), - operators=d.get("operators", []), - features=d.get("features", []), - batch_number=d.get("batch_number", 0), - admitted=d.get("admitted", False), - embedding=embedding, - ) - - -def _ensure_networkx() -> None: - """Raise a clear error if networkx is not installed.""" - if nx is None: - raise ImportError( - "networkx is required for FactorKnowledgeGraph. " - "Install it with: pip install networkx" - ) - - -class FactorKnowledgeGraph: - """Directed graph tracking factor lineage and relationships. - - Uses ``networkx.DiGraph`` internally. Factor nodes store a - :class:`FactorNode` dataclass; operator nodes are prefixed with - ``op:``. Factor metadata retains the declared features even though - the graph currently materializes operator structure explicitly. - """ - - def __init__(self) -> None: - _ensure_networkx() - self._graph: nx.DiGraph = nx.DiGraph() - - # ------------------------------------------------------------------ - # Node operations - # ------------------------------------------------------------------ - - def add_factor(self, node: FactorNode) -> None: - """Add or replace a factor node and auto-create USES_OPERATOR edges. - - For each operator in ``node.operators``, an ``op:{name}`` node - is created (if absent) and a USES_OPERATOR edge is drawn from - the factor to that operator node. - """ - if self._graph.has_node(node.factor_id): - self.remove_factor(node.factor_id) - - self._graph.add_node( - node.factor_id, - node_type="factor", - data=node.to_dict(), - ) - - for op in node.operators: - op_id = f"op:{op}" - if not self._graph.has_node(op_id): - self._graph.add_node(op_id, node_type="operator") - self._graph.add_edge( - node.factor_id, - op_id, - edge_type=EdgeType.USES_OPERATOR.value, - ) - - def get_factor_node(self, factor_id: str) -> Optional[FactorNode]: - """Return a factor node by id, or ``None`` if missing.""" - attrs = self._graph.nodes.get(factor_id) - if not attrs or attrs.get("node_type") != "factor": - return None - data = attrs.get("data", {}) - if not isinstance(data, dict): - return None - try: - return FactorNode.from_dict(data) - except Exception: - return None - - def iter_factor_nodes( - self, - admitted_only: bool = False, - ) -> Iterable[FactorNode]: - """Yield factor nodes currently present in the graph.""" - for node_id, attrs in self._graph.nodes(data=True): - if attrs.get("node_type") != "factor": - continue - data = attrs.get("data", {}) - if not isinstance(data, dict): - continue - if admitted_only and not data.get("admitted", False): - continue - try: - yield FactorNode.from_dict(data) - except Exception: - continue - - def list_factor_nodes(self, admitted_only: bool = False) -> List[FactorNode]: - """Return all factor nodes as a list.""" - return list(self.iter_factor_nodes(admitted_only=admitted_only)) - - # ------------------------------------------------------------------ - # Edge operations - # ------------------------------------------------------------------ - - def add_correlation_edge( - self, - a: str, - b: str, - rho: float, - threshold: float = 0.4, - ) -> None: - """Add a CORRELATED_WITH edge if ``|rho| >= threshold``.""" - if abs(rho) >= threshold: - self._graph.add_edge( - a, - b, - edge_type=EdgeType.CORRELATED_WITH.value, - rho=rho, - ) - self._graph.add_edge( - b, - a, - edge_type=EdgeType.CORRELATED_WITH.value, - rho=rho, - ) - - def add_derivation_edge( - self, - child: str, - parent: str, - mutation_type: str = "", - ) -> None: - """Add a DERIVED_FROM edge from *child* to *parent*.""" - self._graph.add_edge( - child, - parent, - edge_type=EdgeType.DERIVED_FROM.value, - mutation_type=mutation_type, - ) - - def remove_factor(self, factor_id: str) -> bool: - """Remove a factor and prune orphaned auxiliary nodes. - - Returns ``True`` when the factor was present. - """ - if not self._graph.has_node(factor_id): - return False - - self._graph.remove_node(factor_id) - self._prune_orphan_aux_nodes() - return True - - # ------------------------------------------------------------------ - # Query operations - # ------------------------------------------------------------------ - - def find_complementary_patterns( - self, - factor_id: str, - max_hops: int = 2, - ) -> List[str]: - """Find factors complementary to *factor_id* via BFS. - - A complementary factor is one that: - 1. Is reachable within *max_hops* in the undirected view, and - 2. Is NOT directly correlated with the source factor, and - 3. Uses at least one different operator. - - Returns a list of factor IDs. - """ - if not self._graph.has_node(factor_id): - return [] - - # Collect correlated neighbours (direct CORRELATED_WITH edges) - correlated: Set[str] = set() - for _, nbr, data in self._graph.edges(factor_id, data=True): - if data.get("edge_type") == EdgeType.CORRELATED_WITH.value: - correlated.add(nbr) - for pred, _, data in self._graph.in_edges(factor_id, data=True): - if data.get("edge_type") == EdgeType.CORRELATED_WITH.value: - correlated.add(pred) - - # Source operators - source_ops = self._get_operators(factor_id) - - # BFS on undirected view - undirected = self._graph.to_undirected() - visited: Set[str] = {factor_id} - frontier: List[str] = [factor_id] - complementary: List[str] = [] - - for _ in range(max_hops): - next_frontier: List[str] = [] - for node in frontier: - for nbr in undirected.neighbors(node): - if nbr in visited: - continue - visited.add(nbr) - next_frontier.append(nbr) - - # Only consider factor nodes - if self._graph.nodes[nbr].get("node_type") != "factor": - continue - # Skip if correlated - if nbr in correlated: - continue - # Must use at least one different operator - nbr_ops = self._get_operators(nbr) - if nbr_ops and source_ops and not nbr_ops.issubset(source_ops): - complementary.append(nbr) - frontier = next_frontier - - return complementary - - def find_saturated_regions( - self, - threshold: float = 0.5, - ) -> List[Set[str]]: - """Find clusters of highly correlated factors. - - Builds a subgraph of CORRELATED_WITH edges where - ``|rho| > threshold``, then returns connected components. - Each component is a set of factor IDs. - """ - sub = nx.Graph() - for u, v, data in self._graph.edges(data=True): - if data.get("edge_type") != EdgeType.CORRELATED_WITH.value: - continue - rho = abs(data.get("rho", 0.0)) - if rho > threshold: - # Only include factor nodes - if ( - self._graph.nodes.get(u, {}).get("node_type") == "factor" - and self._graph.nodes.get(v, {}).get("node_type") == "factor" - ): - sub.add_edge(u, v) - - components = list(nx.connected_components(sub)) - # Filter out singletons - return [c for c in components if len(c) > 1] - - def get_operator_cooccurrence(self) -> Dict[Tuple[str, str], int]: - """Count operator pair co-occurrences across admitted factors. - - Returns a dict mapping ``(op_a, op_b)`` (sorted tuple) to count. - """ - cooccurrence: Dict[Tuple[str, str], int] = defaultdict(int) - - for node_id, attrs in self._graph.nodes(data=True): - if attrs.get("node_type") != "factor": - continue - node_data = attrs.get("data", {}) - if not node_data.get("admitted", False): - continue - - ops = sorted(set(node_data.get("operators", []))) - for i in range(len(ops)): - for j in range(i + 1, len(ops)): - pair = (ops[i], ops[j]) - cooccurrence[pair] += 1 - - return dict(cooccurrence) - - # ------------------------------------------------------------------ - # Stats - # ------------------------------------------------------------------ - - def get_factor_count(self) -> int: - """Return the number of factor nodes in the graph.""" - return sum( - 1 - for _, d in self._graph.nodes(data=True) - if d.get("node_type") == "factor" - ) - - def get_edge_count(self) -> int: - """Return total number of edges in the graph.""" - return self._graph.number_of_edges() - - # ------------------------------------------------------------------ - # Serialization - # ------------------------------------------------------------------ - - def to_dict(self) -> Dict[str, Any]: - """Serialize to a JSON-compatible dict via ``nx.node_link_data``.""" - return nx.node_link_data(self._graph, edges="links") - - @classmethod - def from_dict(cls, data: Dict[str, Any]) -> FactorKnowledgeGraph: - """Deserialize from a dict produced by :meth:`to_dict`.""" - kg = cls() - kg._graph = nx.node_link_graph(data, edges="links") - return kg - - def save(self, path: str | Path) -> None: - """Persist the graph to a JSON file.""" - path = Path(path) - path.parent.mkdir(parents=True, exist_ok=True) - with open(path, "w") as f: - json.dump(self.to_dict(), f, indent=2, ensure_ascii=False) - - @classmethod - def load(cls, path: str | Path) -> FactorKnowledgeGraph: - """Load a graph from a JSON file.""" - path = Path(path) - with open(path) as f: - data = json.load(f) - return cls.from_dict(data) - - # ------------------------------------------------------------------ - # Internal helpers - # ------------------------------------------------------------------ - - def _get_operators(self, factor_id: str) -> Set[str]: - """Return the set of operator names used by a factor.""" - ops: Set[str] = set() - for _, nbr, data in self._graph.edges(factor_id, data=True): - if data.get("edge_type") == EdgeType.USES_OPERATOR.value: - # Strip "op:" prefix - ops.add(nbr.removeprefix("op:")) - return ops - - def _prune_orphan_aux_nodes(self) -> None: - """Remove operator nodes that are no longer referenced.""" - orphan_nodes = [ - node_id - for node_id, attrs in self._graph.nodes(data=True) - if attrs.get("node_type") in {"operator", "feature"} - and self._graph.degree(node_id) == 0 - ] - if orphan_nodes: - self._graph.remove_nodes_from(orphan_nodes) diff --git a/src/factorminer/factorminer/memory/memory_store.py b/src/factorminer/factorminer/memory/memory_store.py deleted file mode 100644 index 12615a1..0000000 --- a/src/factorminer/factorminer/memory/memory_store.py +++ /dev/null @@ -1,165 +0,0 @@ -"""Data structures for the FactorMiner experience memory system. - -Implements the experience memory M = {S, P_succ, P_fail, I} where: -- S: Mining state tracking global evolution of the factor library -- P_succ: Success patterns (recommended mining directions) -- P_fail: Forbidden directions (directions to avoid) -- I: Strategic insights (high-level lessons) -""" - -from __future__ import annotations - -from dataclasses import dataclass, field, asdict -from typing import Any, Dict, List, Optional - - -@dataclass -class MiningState: - """Tracks the global evolution of the factor library (S). - - Captures a snapshot of the current library status including size, - recent admission/rejection history, and per-category saturation. - """ - - library_size: int = 0 - recent_admissions: List[dict] = field(default_factory=list) - recent_rejections: List[dict] = field(default_factory=list) - domain_saturation: Dict[str, float] = field(default_factory=dict) - admission_log: List[dict] = field(default_factory=list) - - def to_dict(self) -> dict: - return asdict(self) - - @classmethod - def from_dict(cls, d: dict) -> MiningState: - return cls( - library_size=d.get("library_size", 0), - recent_admissions=d.get("recent_admissions", []), - recent_rejections=d.get("recent_rejections", []), - domain_saturation=d.get("domain_saturation", {}), - admission_log=d.get("admission_log", []), - ) - - -@dataclass -class SuccessPattern: - """A recommended mining direction (P_succ). - - Encodes a known-effective pattern for factor construction, including - a canonical formula template and tracked success rate. - """ - - name: str - description: str - template: str - success_rate: str # "High", "Medium", "Low" - example_factors: List[str] = field(default_factory=list) - occurrence_count: int = 0 - - def to_dict(self) -> dict: - return asdict(self) - - @classmethod - def from_dict(cls, d: dict) -> SuccessPattern: - return cls( - name=d["name"], - description=d["description"], - template=d["template"], - success_rate=d.get("success_rate", "Medium"), - example_factors=d.get("example_factors", []), - occurrence_count=d.get("occurrence_count", 0), - ) - - -@dataclass -class ForbiddenDirection: - """A forbidden mining direction (P_fail). - - Encodes a pattern that consistently produces factors too correlated - with existing library members or that fail quality thresholds. - """ - - name: str - description: str - correlated_factors: List[str] = field(default_factory=list) - typical_correlation: float = 0.0 - reason: str = "" - occurrence_count: int = 0 - - def to_dict(self) -> dict: - return asdict(self) - - @classmethod - def from_dict(cls, d: dict) -> ForbiddenDirection: - return cls( - name=d["name"], - description=d["description"], - correlated_factors=d.get("correlated_factors", []), - typical_correlation=d.get("typical_correlation", 0.0), - reason=d.get("reason", ""), - occurrence_count=d.get("occurrence_count", 0), - ) - - -@dataclass -class StrategicInsight: - """High-level lesson from mining (I). - - Captures abstract observations about what works and what doesn't, - derived from accumulated mining experience across batches. - """ - - insight: str - evidence: str - batch_source: int = 0 - - def to_dict(self) -> dict: - return asdict(self) - - @classmethod - def from_dict(cls, d: dict) -> StrategicInsight: - return cls( - insight=d["insight"], - evidence=d["evidence"], - batch_source=d.get("batch_source", 0), - ) - - -@dataclass -class ExperienceMemory: - """The complete experience memory M = {S, P_succ, P_fail, I}. - - Persists across mining sessions and evolves with each batch of - evaluated factor candidates. - """ - - state: MiningState = field(default_factory=MiningState) - success_patterns: List[SuccessPattern] = field(default_factory=list) - forbidden_directions: List[ForbiddenDirection] = field(default_factory=list) - insights: List[StrategicInsight] = field(default_factory=list) - version: int = 0 - - def to_dict(self) -> dict: - return { - "state": self.state.to_dict(), - "success_patterns": [p.to_dict() for p in self.success_patterns], - "forbidden_directions": [f.to_dict() for f in self.forbidden_directions], - "insights": [i.to_dict() for i in self.insights], - "version": self.version, - } - - @classmethod - def from_dict(cls, d: dict) -> ExperienceMemory: - return cls( - state=MiningState.from_dict(d.get("state", {})), - success_patterns=[ - SuccessPattern.from_dict(p) for p in d.get("success_patterns", []) - ], - forbidden_directions=[ - ForbiddenDirection.from_dict(f) for f in d.get("forbidden_directions", []) - ], - insights=[ - StrategicInsight.from_dict(i) for i in d.get("insights", []) - ], - version=d.get("version", 0), - ) diff --git a/src/factorminer/factorminer/memory/online_regime_memory.py b/src/factorminer/factorminer/memory/online_regime_memory.py deleted file mode 100644 index e8cf19a..0000000 --- a/src/factorminer/factorminer/memory/online_regime_memory.py +++ /dev/null @@ -1,1625 +0,0 @@ -"""Online regime-aware memory system for FactorMiner. - -Addresses FactorMiner's core limitation: static, offline-only memory that -ignores regime changes. This module provides: - -- ``RegimeSpecificPattern`` / ``RegimeSpecificPatternStore`` - — per-regime success/failure pattern storage with IC-based scoring - -- ``OnlineMemoryUpdater`` - — streaming memory update with exponential forgetting and regime-change hooks - -- ``RegimeTransitionForecaster`` - — logistic-regression-based next-regime predictor for proactive memory prep - -- ``OnlineRegimeMemory`` - — top-level orchestrator integrating all components - -- ``MemoryForgetCurve`` - — snapshot tracker for visualising and analysing memory decay - -All components are: - * Thread-safe (``threading.RLock``) - * Serialisable (``to_dict`` / ``from_dict`` + ``pickle`` compatible) - * Streaming-fast (< 1 ms per ``update`` call with normal loads) - * Pure Python + NumPy + scikit-learn (no additional dependencies) -""" - -from __future__ import annotations - -import copy -import json -import logging -import math -import pickle -import threading -import time -from collections import defaultdict, deque -from dataclasses import dataclass, field -from datetime import datetime, timezone -from pathlib import Path -from typing import Any, Dict, FrozenSet, List, Optional, Tuple - -import numpy as np - -from src.factorminer.factorminer.evaluation.regime import RegimeState, StreamingRegimeDetector, StreamingRegimeConfig -from src.factorminer.factorminer.memory.memory_store import ( - ExperienceMemory, - StrategicInsight, - SuccessPattern, -) -from src.factorminer.factorminer.memory.evolution import ( - apply_confidence_decay, - bump_pattern_confidence, - penalise_pattern_confidence, -) -from src.factorminer.factorminer.memory.retrieval import retrieve_memory - -logger = logging.getLogger(__name__) - - -# --------------------------------------------------------------------------- -# MemorySignal — returned by OnlineRegimeMemory.retrieve() -# --------------------------------------------------------------------------- - -@dataclass -class MemorySignal: - """Structured memory signal for LLM prompt injection. - - Wraps the standard retrieval result with regime-specific additions. - """ - recommended_directions: List[dict] - forbidden_directions: List[dict] - insights: List[dict] - library_state: dict - prompt_text: str - # Regime-specific additions - current_regime: RegimeState = field(default_factory=RegimeState) - regime_patterns: List[dict] = field(default_factory=list) - cross_regime_patterns: List[dict] = field(default_factory=list) - forecasted_regime: Optional[RegimeState] = None - forecast_confidence: float = 0.0 - - def to_dict(self) -> dict: - return { - "recommended_directions": self.recommended_directions, - "forbidden_directions": self.forbidden_directions, - "insights": self.insights, - "library_state": self.library_state, - "prompt_text": self.prompt_text, - "current_regime": self.current_regime.to_dict(), - "regime_patterns": self.regime_patterns, - "cross_regime_patterns": self.cross_regime_patterns, - "forecasted_regime": self.forecasted_regime.to_dict() - if self.forecasted_regime else None, - "forecast_confidence": self.forecast_confidence, - } - - -# --------------------------------------------------------------------------- -# RegimeSpecificPattern & RegimeSpecificPatternStore -# --------------------------------------------------------------------------- - -@dataclass -class RegimeSpecificPattern: - """A formula pattern with per-regime performance statistics. - - Attributes - ---------- - formula_template : str - DSL formula template (may contain ``{w}`` style placeholders). - regime : RegimeState - The regime context in which this pattern was discovered. - ic_in_regime : float - Mean IC when the current market regime matches ``self.regime``. - ic_out_of_regime : float - Mean IC when the current regime does not match. - regime_specificity : float - ``ic_in_regime / (|ic_out_of_regime| + 1e-8)``. Values >> 1 indicate - strong regime specialisation. - discovery_date : datetime - UTC timestamp of first observation. - confidence : float - Normalised confidence in [0, 1] based on sample count. Decays via - forgetting. - n_observations : int - Number of times this pattern has been observed. - n_in_regime : int - Observations when regime matched. - """ - formula_template: str - regime: RegimeState - ic_in_regime: float = 0.0 - ic_out_of_regime: float = 0.0 - regime_specificity: float = 1.0 - discovery_date: datetime = field( - default_factory=lambda: datetime.now(tz=timezone.utc) - ) - confidence: float = 1.0 - n_observations: int = 1 - n_in_regime: int = 0 - - def update_ic(self, ic: float, in_regime: bool) -> None: - """Online update of IC statistics using an EW running mean.""" - self.n_observations += 1 - alpha = 2.0 / (min(self.n_observations, 50) + 1) - if in_regime: - self.n_in_regime += 1 - self.ic_in_regime = (1 - alpha) * self.ic_in_regime + alpha * ic - else: - self.ic_out_of_regime = (1 - alpha) * self.ic_out_of_regime + alpha * ic - # Recompute specificity - self.regime_specificity = abs(self.ic_in_regime) / ( - abs(self.ic_out_of_regime) + 1e-8 - ) - - def to_dict(self) -> dict: - return { - "formula_template": self.formula_template, - "regime": self.regime.to_dict(), - "ic_in_regime": round(self.ic_in_regime, 6), - "ic_out_of_regime": round(self.ic_out_of_regime, 6), - "regime_specificity": round(self.regime_specificity, 4), - "discovery_date": self.discovery_date.isoformat(), - "confidence": round(self.confidence, 6), - "n_observations": self.n_observations, - "n_in_regime": self.n_in_regime, - } - - @classmethod - def from_dict(cls, d: dict) -> "RegimeSpecificPattern": - discovery_date = datetime.fromisoformat( - d.get("discovery_date", datetime.now(tz=timezone.utc).isoformat()) - ) - if discovery_date.tzinfo is None: - discovery_date = discovery_date.replace(tzinfo=timezone.utc) - return cls( - formula_template=d["formula_template"], - regime=RegimeState.from_dict(d["regime"]), - ic_in_regime=d.get("ic_in_regime", 0.0), - ic_out_of_regime=d.get("ic_out_of_regime", 0.0), - regime_specificity=d.get("regime_specificity", 1.0), - discovery_date=discovery_date, - confidence=d.get("confidence", 1.0), - n_observations=d.get("n_observations", 1), - n_in_regime=d.get("n_in_regime", 0), - ) - - -class RegimeSpecificPatternStore: - """Thread-safe store for regime-specific formula patterns. - - Patterns are keyed by ``(formula_template, regime_str)`` and indexed - for fast retrieval by regime similarity. - - Parameters - ---------- - max_patterns : int - Maximum total patterns retained. When full, lowest-confidence - patterns are evicted. - min_ic : float - Minimum |IC| threshold; patterns consistently below this are pruned. - cross_regime_specificity_threshold : float - A pattern with ``regime_specificity < threshold`` is classified as - a cross-regime (general) pattern. - """ - - def __init__( - self, - max_patterns: int = 500, - min_ic: float = 0.02, - cross_regime_specificity_threshold: float = 1.5, - ) -> None: - self.max_patterns = max_patterns - self.min_ic = min_ic - self.cross_regime_threshold = cross_regime_specificity_threshold - self._lock = threading.RLock() - # key: (formula_template, regime_str) - self._patterns: Dict[Tuple[str, str], RegimeSpecificPattern] = {} - - # --- public API --- - - def add_pattern( - self, - formula: str, - regime: RegimeState, - ic: float, - ) -> None: - """Add or update a pattern observation. - - If the (formula, regime) pair already exists, the IC statistics - are updated online. Otherwise a new entry is created. - - Parameters - ---------- - formula : str - regime : RegimeState - The regime active when this IC was measured. - ic : float - Observed IC (signed). - """ - with self._lock: - key = (formula, str(regime)) - if key in self._patterns: - pat = self._patterns[key] - pat.update_ic(ic, in_regime=True) - pat.confidence = min(1.0, pat.confidence + 0.05) - else: - # Also update out-of-regime IC for all *existing* patterns - # with a different regime tag - for existing_key, existing_pat in self._patterns.items(): - if existing_key[0] == formula and existing_key[1] != str(regime): - existing_pat.update_ic(ic, in_regime=False) - - # Create new pattern - pat = RegimeSpecificPattern( - formula_template=formula, - regime=regime, - ic_in_regime=ic, - ic_out_of_regime=0.0, - confidence=1.0, - n_observations=1, - n_in_regime=1, - ) - pat.regime_specificity = abs(ic) / (abs(0.0) + 1e-8) - self._patterns[key] = pat - - # Evict if over capacity - if len(self._patterns) > self.max_patterns: - self._evict_weakest() - - def retrieve_for_regime( - self, - current_regime: RegimeState, - top_k: int = 10, - min_confidence: float = 0.1, - ) -> List[RegimeSpecificPattern]: - """Retrieve patterns most relevant to the current regime. - - Patterns are scored as: - score = confidence * ic_in_regime * regime_similarity - - where ``regime_similarity`` is the Jaccard similarity between the - pattern's tagged regime and ``current_regime``. - - Parameters - ---------- - current_regime : RegimeState - top_k : int - min_confidence : float - Minimum confidence to include. - - Returns - ------- - list[RegimeSpecificPattern] - Sorted by descending relevance score. - """ - with self._lock: - scored: List[Tuple[float, RegimeSpecificPattern]] = [] - for pat in self._patterns.values(): - if pat.confidence < min_confidence: - continue - sim = pat.regime.similarity(current_regime) - score = pat.confidence * abs(pat.ic_in_regime) * (0.2 + 0.8 * sim) - scored.append((score, pat)) - scored.sort(key=lambda x: -x[0]) - return [p for _, p in scored[:top_k]] - - def get_cross_regime_patterns( - self, - top_k: int = 10, - min_confidence: float = 0.1, - ) -> List[RegimeSpecificPattern]: - """Return patterns that generalise well across regimes. - - A pattern qualifies as cross-regime if its ``regime_specificity`` - is below ``cross_regime_specificity_threshold`` *and* its absolute - IC is meaningfully positive (>= ``min_ic``). - - Returns - ------- - list[RegimeSpecificPattern] - """ - with self._lock: - cross: List[Tuple[float, RegimeSpecificPattern]] = [] - for pat in self._patterns.values(): - if pat.confidence < min_confidence: - continue - if pat.regime_specificity < self.cross_regime_threshold: - avg_ic = (abs(pat.ic_in_regime) + abs(pat.ic_out_of_regime)) / 2.0 - if avg_ic >= self.min_ic: - cross.append((avg_ic * pat.confidence, pat)) - cross.sort(key=lambda x: -x[0]) - return [p for _, p in cross[:top_k]] - - def apply_decay(self, decay_factor: float) -> None: - """Multiply all pattern confidences by ``decay_factor`` and prune weak ones.""" - with self._lock: - to_delete = [] - for key, pat in self._patterns.items(): - pat.confidence = max(0.0, pat.confidence * decay_factor) - if pat.confidence < 0.01 and pat.n_observations > 3: - to_delete.append(key) - for key in to_delete: - del self._patterns[key] - - def boost_regime_patterns(self, regime: RegimeState, boost: float = 0.1) -> None: - """Increase confidence of patterns tagged for ``regime``.""" - with self._lock: - for pat in self._patterns.values(): - if pat.regime == regime: - pat.confidence = min(1.0, pat.confidence + boost) - - def penalise_regime_patterns(self, regime: RegimeState, penalty: float = 0.3) -> None: - """Decrease confidence of patterns tagged for ``regime``.""" - with self._lock: - for pat in self._patterns.values(): - if pat.regime == regime: - pat.confidence = max(0.0, pat.confidence - penalty) - - def get_stats(self) -> dict: - """Return aggregate statistics.""" - with self._lock: - n = len(self._patterns) - if n == 0: - return { - "total_patterns": 0, - "avg_confidence": 0.0, - "avg_ic_in_regime": 0.0, - "cross_regime_count": 0, - } - confs = [p.confidence for p in self._patterns.values()] - ics = [p.ic_in_regime for p in self._patterns.values()] - cross = len(self.get_cross_regime_patterns(top_k=n)) - return { - "total_patterns": n, - "avg_confidence": float(np.mean(confs)), - "avg_ic_in_regime": float(np.mean(np.abs(ics))), - "cross_regime_count": cross, - } - - def to_dict(self) -> dict: - with self._lock: - return { - "max_patterns": self.max_patterns, - "min_ic": self.min_ic, - "cross_regime_threshold": self.cross_regime_threshold, - "patterns": [p.to_dict() for p in self._patterns.values()], - } - - @classmethod - def from_dict(cls, d: dict) -> "RegimeSpecificPatternStore": - store = cls( - max_patterns=d.get("max_patterns", 500), - min_ic=d.get("min_ic", 0.02), - cross_regime_specificity_threshold=d.get("cross_regime_threshold", 1.5), - ) - for pd in d.get("patterns", []): - pat = RegimeSpecificPattern.from_dict(pd) - key = (pat.formula_template, str(pat.regime)) - store._patterns[key] = pat - return store - - # --- internals --- - - def _evict_weakest(self) -> None: - """Remove the single weakest (lowest confidence * ic) pattern.""" - if not self._patterns: - return - worst_key = min( - self._patterns, - key=lambda k: ( - self._patterns[k].confidence - * (abs(self._patterns[k].ic_in_regime) + 1e-8) - ), - ) - del self._patterns[worst_key] - - -# --------------------------------------------------------------------------- -# OnlineMemoryUpdater -# --------------------------------------------------------------------------- - -class OnlineMemoryUpdater: - """Streaming experience-memory updater with exponential forgetting. - - Integrates with the base ``ExperienceMemory`` and the - ``RegimeSpecificPatternStore`` to maintain an up-to-date picture of - what works in the current market regime. - - Thread safety - ------------- - All mutating operations acquire ``self._lock`` (``threading.RLock``). - The ``base_memory`` is replaced atomically so readers always see a - consistent snapshot. - - Parameters - ---------- - base_memory : ExperienceMemory - The underlying experience memory (will be mutated in place via - evolution helpers). - forgetting_rate : float - Per-iteration exponential decay rate applied to pattern confidence. - regime_sensitivity : float - Weight given to regime-specific IC boosts vs generic boosts. - 0 = ignore regime, 1 = fully regime-sensitive. - min_confidence : float - Patterns with normalised confidence below this are pruned during - forgetting. - regime_boost : float - Confidence increment when a pattern's regime matches the current one. - regime_penalty : float - Confidence decrement when the regime changes away from a pattern's home. - """ - - def __init__( - self, - base_memory: ExperienceMemory, - forgetting_rate: float = 0.01, - regime_sensitivity: float = 0.5, - min_confidence: float = 0.05, - regime_boost: float = 0.1, - regime_penalty: float = 0.3, - ) -> None: - self.forgetting_rate = forgetting_rate - self.regime_sensitivity = regime_sensitivity - self.min_confidence = min_confidence - self.regime_boost = regime_boost - self.regime_penalty = regime_penalty - - self._lock = threading.RLock() - self._base_memory: ExperienceMemory = base_memory - - # Counters - self._iteration: int = 0 - self._last_decay_iteration: int = 0 - - # Per-regime IC accumulators: regime_str -> deque of ICs - self._regime_ic_history: Dict[str, deque] = defaultdict( - lambda: deque(maxlen=200) - ) - - # Outcome stats - self._outcome_counts: Dict[str, int] = defaultdict(int) - self._formula_regime_map: Dict[str, RegimeState] = {} - - # --- public API --- - - @property - def base_memory(self) -> ExperienceMemory: - """Thread-safe read of the current base memory snapshot.""" - with self._lock: - return self._base_memory - - def on_factor_evaluated( - self, - formula: str, - ic: float, - regime: RegimeState, - outcome: str, - ) -> None: - """Called immediately after each factor evaluation. - - Parameters - ---------- - formula : str - DSL formula of the evaluated candidate. - ic : float - Observed IC (signed). - regime : RegimeState - Active market regime at evaluation time. - outcome : str - One of: ``'admitted'``, ``'rejected_ic'``, - ``'rejected_correlation'``, ``'replaced'``. - """ - t0 = time.perf_counter() - with self._lock: - self._iteration += 1 - self._outcome_counts[outcome] += 1 - self._formula_regime_map[formula] = regime - regime_key = str(regime) - self._regime_ic_history[regime_key].append(ic) - - # Boost success patterns that match admitted factors - if outcome == "admitted" and abs(ic) >= 0.03: - boost_factor = 1 + int( - self.regime_sensitivity * 2 * abs(ic) / 0.1 - ) - # Try to match formula against existing success pattern templates - for pat in self._base_memory.success_patterns: - if _formula_matches_template(formula, pat.template): - self._base_memory = bump_pattern_confidence( - self._base_memory, pat.name, boost=boost_factor - ) - - elapsed_ms = (time.perf_counter() - t0) * 1000 - if elapsed_ms > 1.0: - logger.debug( - "on_factor_evaluated took %.2f ms (target < 1 ms)", elapsed_ms - ) - - def apply_forgetting(self, iterations_elapsed: int = 1) -> None: - """Exponentially decay pattern confidence and prune stale entries. - - Parameters - ---------- - iterations_elapsed : int - Number of mining iterations since last call to this method. - """ - with self._lock: - self._base_memory = apply_confidence_decay( - self._base_memory, - forgetting_rate=self.forgetting_rate, - iterations_elapsed=iterations_elapsed, - min_confidence=self.min_confidence, - ) - self._last_decay_iteration = self._iteration - - def on_regime_change( - self, - old_regime: RegimeState, - new_regime: RegimeState, - ) -> None: - """React to a detected regime transition. - - Actions performed: - 1. Boost confidence of success patterns tagged for ``new_regime``. - 2. Down-weight success patterns tagged for ``old_regime``. - 3. Insert a regime-transition ``StrategicInsight`` into base memory. - - Parameters - ---------- - old_regime : RegimeState - new_regime : RegimeState - """ - with self._lock: - # Boost / penalise patterns in base memory by tag matching - for pat in self._base_memory.success_patterns: - tag_new = str(new_regime) - tag_old = str(old_regime) - # We tag patterns heuristically via their description keywords - desc_lower = pat.description.lower() - name_lower = pat.name.lower() - new_labels_lower = {lbl.lower() for lbl in new_regime.labels} - old_labels_lower = {lbl.lower() for lbl in old_regime.labels} - - if any(lbl in desc_lower or lbl in name_lower for lbl in new_labels_lower): - self._base_memory = bump_pattern_confidence( - self._base_memory, pat.name, - boost=int(self.regime_boost * 10) - ) - elif any(lbl in desc_lower or lbl in name_lower for lbl in old_labels_lower): - self._base_memory = penalise_pattern_confidence( - self._base_memory, pat.name, - penalty=self.regime_penalty, - ) - - # Add a strategic insight about the regime transition - insight_text = ( - f"Regime transition detected: {old_regime} -> {new_regime} " - f"at iteration {self._iteration}" - ) - evidence = ( - f"Based on EW streaming statistics. New regime labels: " - f"{new_regime.labels}. Old: {old_regime.labels}." - ) - new_insight = StrategicInsight( - insight=insight_text, - evidence=evidence, - batch_source=self._iteration, - ) - # Avoid duplicate back-to-back transition insights - if not self._base_memory.insights or ( - self._base_memory.insights[-1].insight != insight_text - ): - self._base_memory.insights.append(new_insight) - # Cap insights at 50 to avoid unbounded growth - if len(self._base_memory.insights) > 50: - self._base_memory.insights = self._base_memory.insights[-50:] - - def get_memory_health_stats(self) -> dict: - """Return comprehensive health statistics for the memory system. - - Returns - ------- - dict - Keys: ``active_patterns``, ``avg_confidence``, - ``regime_distribution``, ``staleness_score``, - ``outcome_counts``, ``total_iterations``. - """ - with self._lock: - mem = self._base_memory - all_counts = [ - p.occurrence_count for p in mem.success_patterns - ] + [ - f.occurrence_count for f in mem.forbidden_directions - ] - max_c = max(all_counts) if all_counts else 1 - if max_c == 0: - max_c = 1 - norm_confs = [c / max_c for c in all_counts] - avg_conf = float(np.mean(norm_confs)) if norm_confs else 0.0 - - # Regime distribution from IC history - regime_dist = { - k: len(v) for k, v in self._regime_ic_history.items() - } - - # Staleness: fraction of patterns with count 0 (never updated) - n_patterns = len(mem.success_patterns) + len(mem.forbidden_directions) - n_zero = sum(1 for c in all_counts if c == 0) - staleness = n_zero / max(n_patterns, 1) - - return { - "active_patterns": n_patterns, - "avg_confidence": round(avg_conf, 4), - "regime_distribution": regime_dist, - "staleness_score": round(staleness, 4), - "outcome_counts": dict(self._outcome_counts), - "total_iterations": self._iteration, - "last_decay_iteration": self._last_decay_iteration, - "version": mem.version, - } - - def to_dict(self) -> dict: - with self._lock: - return { - "forgetting_rate": self.forgetting_rate, - "regime_sensitivity": self.regime_sensitivity, - "min_confidence": self.min_confidence, - "regime_boost": self.regime_boost, - "regime_penalty": self.regime_penalty, - "iteration": self._iteration, - "last_decay_iteration": self._last_decay_iteration, - "outcome_counts": dict(self._outcome_counts), - "base_memory": self._base_memory.to_dict(), - # Regime IC history stores last N ICs per regime - "regime_ic_history": { - k: list(v) for k, v in self._regime_ic_history.items() - }, - } - - @classmethod - def from_dict(cls, d: dict) -> "OnlineMemoryUpdater": - mem = ExperienceMemory.from_dict(d["base_memory"]) - updater = cls( - base_memory=mem, - forgetting_rate=d.get("forgetting_rate", 0.01), - regime_sensitivity=d.get("regime_sensitivity", 0.5), - min_confidence=d.get("min_confidence", 0.05), - regime_boost=d.get("regime_boost", 0.1), - regime_penalty=d.get("regime_penalty", 0.3), - ) - updater._iteration = d.get("iteration", 0) - updater._last_decay_iteration = d.get("last_decay_iteration", 0) - updater._outcome_counts.update(d.get("outcome_counts", {})) - for regime_key, ic_list in d.get("regime_ic_history", {}).items(): - updater._regime_ic_history[regime_key] = deque(ic_list, maxlen=200) - return updater - - -# --------------------------------------------------------------------------- -# RegimeTransitionForecaster -# --------------------------------------------------------------------------- - -class RegimeTransitionForecaster: - """Logistic-regression forecaster for regime transitions. - - Trains on the sequence of (feature_vector, next_regime_label) pairs - accumulated during live trading / mining. Used to proactively load - regime-specific patterns *before* a transition occurs. - - The feature vector is constructed inside ``_build_feature_vector`` and - encodes recent EW statistics (mean, vol, Hurst proxy) concatenated with - a one-hot encoding of the current regime dimensions. - - Parameters - ---------- - n_regime_classes : int - Number of distinct regime label combinations tracked. Set to a - small number (e.g. 8 or 16) to keep the model tractable. - min_samples_to_fit : int - Minimum labelled samples before the model is fitted. - refit_every : int - Re-train every N calls to ``predict_next_regime``. - """ - - # Feature dimension: 3 (ew stats) + 3 (trend one-hot) + 3 (vol one-hot) - # + 3 (mean_rev one-hot) = 12 - _FEATURE_DIM = 12 - - def __init__( - self, - min_samples_to_fit: int = 30, - refit_every: int = 20, - ) -> None: - self.min_samples_to_fit = min_samples_to_fit - self.refit_every = refit_every - - self._lock = threading.RLock() - self._feature_history: List[np.ndarray] = [] - self._regime_history: List[RegimeState] = [] - self._next_regime_labels: List[str] = [] # shifted by 1 - - self._model = None # sklearn LogisticRegression, lazy init - self._label_encoder: Dict[str, int] = {} - self._inv_label_encoder: Dict[int, str] = {} - self._predict_call_count: int = 0 - self._fitted: bool = False - - # Cache of unique regime states seen during training - self._known_regimes: Dict[str, RegimeState] = {} - - def record_observation( - self, - regime: RegimeState, - features: np.ndarray, - ) -> None: - """Append one (features, regime) observation to the training buffer. - - Should be called once per bar/update with the current streaming - feature vector and the corresponding regime. - - Parameters - ---------- - regime : RegimeState - features : np.ndarray, shape (``_FEATURE_DIM``,) - """ - with self._lock: - self._feature_history.append(features.copy()) - self._regime_history.append(regime) - regime_str = str(regime) - self._known_regimes[regime_str] = regime - - # Build (X, y) where y[t] = regime_str[t+1] - if len(self._regime_history) >= 2: - # The label for the *previous* observation is the current regime - self._next_regime_labels.append(regime_str) - - def fit( - self, - regime_history: Optional[List[RegimeState]] = None, - feature_history: Optional[np.ndarray] = None, - ) -> None: - """Fit (or re-fit) the logistic regression model. - - Can be called with external data (for back-testing) or with no - arguments to use the internally accumulated buffer. - - Parameters - ---------- - regime_history : list[RegimeState] or None - Optional external regime sequence (length T). - feature_history : np.ndarray or None, shape (T, _FEATURE_DIM) - Optional external feature matrix. - """ - with self._lock: - if regime_history is not None and feature_history is not None: - assert len(regime_history) == len(feature_history) - feats = feature_history - regimes = regime_history - labels = [str(r) for r in regimes[1:]] - X = feats[:-1] - else: - if len(self._next_regime_labels) < self.min_samples_to_fit: - return - X = np.array(self._feature_history[:-1]) - labels = self._next_regime_labels - - unique_labels = list(set(labels)) - if len(unique_labels) < 2: - return # Cannot fit with only one class - - self._label_encoder = {lbl: i for i, lbl in enumerate(unique_labels)} - self._inv_label_encoder = {i: lbl for lbl, i in self._label_encoder.items()} - - y = np.array([self._label_encoder[lbl] for lbl in labels]) - - try: - from sklearn.linear_model import LogisticRegression - from sklearn.preprocessing import StandardScaler - - scaler = StandardScaler() - X_scaled = scaler.fit_transform(X) - - model = LogisticRegression( - max_iter=500, - solver="lbfgs", - C=1.0, - random_state=42, - ) - model.fit(X_scaled, y) - self._model = (scaler, model) - self._fitted = True - except Exception as e: - logger.warning("RegimeTransitionForecaster fit failed: %s", e) - self._fitted = False - - def predict_next_regime( - self, - current_features: np.ndarray, - ) -> Tuple[RegimeState, float]: - """Predict the most probable next regime. - - Parameters - ---------- - current_features : np.ndarray, shape (``_FEATURE_DIM``,) - - Returns - ------- - (RegimeState, float) - Predicted regime and probability. Returns (current regime, 0.0) - if the model is not yet fitted. - """ - with self._lock: - self._predict_call_count += 1 - if self._predict_call_count % self.refit_every == 0: - self.fit() - - if not self._fitted or self._model is None: - # Fall back to current regime - current = ( - self._regime_history[-1] - if self._regime_history - else RegimeState() - ) - return current, 0.0 - - scaler, model = self._model - try: - X = scaler.transform(current_features.reshape(1, -1)) - proba = model.predict_proba(X)[0] - best_class = int(np.argmax(proba)) - best_prob = float(proba[best_class]) - best_label = self._inv_label_encoder.get(best_class, "") - best_regime = self._known_regimes.get(best_label, RegimeState()) - return best_regime, best_prob - except Exception as e: - logger.warning("RegimeTransitionForecaster predict failed: %s", e) - return RegimeState(), 0.0 - - def prepare_memory_for_transition( - self, - predicted_regime: RegimeState, - pattern_store: RegimeSpecificPatternStore, - boost: float = 0.15, - ) -> None: - """Pre-load (boost confidence of) patterns for the predicted regime. - - Parameters - ---------- - predicted_regime : RegimeState - pattern_store : RegimeSpecificPatternStore - boost : float - Confidence boost applied to matching patterns. - """ - pattern_store.boost_regime_patterns(predicted_regime, boost=boost) - - @staticmethod - def build_feature_vector( - ew_mean: float, - ew_std: float, - hurst_proxy: float, - regime: RegimeState, - ) -> np.ndarray: - """Build a fixed-length feature vector from streaming statistics. - - Layout (12 elements): - [0] ew_mean - [1] ew_std - [2] hurst_proxy - [3-5] trend one-hot (BULL, BEAR, NEUTRAL) - [6-8] vol one-hot (HIGH_VOL, LOW_VOL, NORMAL_VOL) - [9-11] mean_rev one-hot (TRENDING, MEAN_REVERTING, RANDOM_WALK) - - Parameters - ---------- - ew_mean, ew_std, hurst_proxy : float - regime : RegimeState - - Returns - ------- - np.ndarray, shape (12,) - """ - from factorminer.evaluation.regime import TrendRegime, VolRegime, MeanRevRegime - - trend_oh = [ - float(regime.trend == TrendRegime.BULL), - float(regime.trend == TrendRegime.BEAR), - float(regime.trend == TrendRegime.NEUTRAL), - ] - vol_oh = [ - float(regime.vol == VolRegime.HIGH_VOL), - float(regime.vol == VolRegime.LOW_VOL), - float(regime.vol == VolRegime.NORMAL_VOL), - ] - mr_oh = [ - float(regime.mean_rev == MeanRevRegime.TRENDING), - float(regime.mean_rev == MeanRevRegime.MEAN_REVERTING), - float(regime.mean_rev == MeanRevRegime.RANDOM_WALK), - ] - return np.array( - [ew_mean, ew_std, hurst_proxy] + trend_oh + vol_oh + mr_oh, - dtype=np.float64, - ) - - def to_dict(self) -> dict: - with self._lock: - return { - "min_samples_to_fit": self.min_samples_to_fit, - "refit_every": self.refit_every, - "predict_call_count": self._predict_call_count, - "fitted": self._fitted, - "feature_history": [f.tolist() for f in self._feature_history[-500:]], - "regime_history": [r.to_dict() for r in self._regime_history[-500:]], - "next_regime_labels": self._next_regime_labels[-500:], - "known_regimes": {k: v.to_dict() for k, v in self._known_regimes.items()}, - } - - @classmethod - def from_dict(cls, d: dict) -> "RegimeTransitionForecaster": - forecaster = cls( - min_samples_to_fit=d.get("min_samples_to_fit", 30), - refit_every=d.get("refit_every", 20), - ) - forecaster._feature_history = [ - np.array(f, dtype=np.float64) for f in d.get("feature_history", []) - ] - forecaster._regime_history = [ - RegimeState.from_dict(r) for r in d.get("regime_history", []) - ] - forecaster._next_regime_labels = d.get("next_regime_labels", []) - forecaster._known_regimes = { - k: RegimeState.from_dict(v) - for k, v in d.get("known_regimes", {}).items() - } - forecaster._predict_call_count = d.get("predict_call_count", 0) - if d.get("fitted", False): - forecaster.fit() - return forecaster - - -# --------------------------------------------------------------------------- -# OnlineRegimeMemory — main orchestrator -# --------------------------------------------------------------------------- - -class OnlineRegimeMemory: - """Full online regime-aware memory system. - - Integrates: - - ``StreamingRegimeDetector`` for bar-by-bar regime classification - - ``RegimeSpecificPatternStore`` for per-regime IC tracking - - ``OnlineMemoryUpdater`` for streaming forgetting and regime-change hooks - - ``RegimeTransitionForecaster`` for proactive memory preparation - - Usage - ----- - :: - - from factorminer.memory.online_regime_memory import OnlineRegimeMemory - from factorminer.memory.memory_store import ExperienceMemory - - mem = OnlineRegimeMemory(base_memory=ExperienceMemory(), config={}) - - # In the mining loop, after each bar of market data: - mem.update_market(returns=bar_returns) - - # After each factor evaluation: - mem.update(formula, signals, ic, market_data, outcome) - - # At generation time: - signal = mem.retrieve(library_state, market_data) - print(signal.prompt_text) - - Parameters - ---------- - base_memory : ExperienceMemory - config : dict - Optional configuration overrides. Keys and defaults: - - - ``forgetting_rate`` (0.01): per-iteration decay - - ``regime_sensitivity`` (0.5): how much to weight regime-specific patterns - - ``min_confidence`` (0.05): pruning threshold - - ``forget_every_n_iterations`` (10): call ``apply_forgetting`` every N evals - - ``max_regime_patterns`` (500): capacity of regime pattern store - - ``streaming_config`` ({}): forwarded to ``StreamingRegimeConfig`` - """ - - def __init__( - self, - base_memory: Optional[ExperienceMemory] = None, - config: Optional[dict] = None, - ) -> None: - cfg = config or {} - if base_memory is None: - base_memory = ExperienceMemory() - - streaming_cfg = StreamingRegimeConfig( - **{k: v for k, v in cfg.get("streaming_config", {}).items() - if k in StreamingRegimeConfig.__dataclass_fields__} - ) - self._detector = StreamingRegimeDetector(config=streaming_cfg) - self._pattern_store = RegimeSpecificPatternStore( - max_patterns=cfg.get("max_regime_patterns", 500), - ) - self._updater = OnlineMemoryUpdater( - base_memory=base_memory, - forgetting_rate=cfg.get("forgetting_rate", 0.01), - regime_sensitivity=cfg.get("regime_sensitivity", 0.5), - min_confidence=cfg.get("min_confidence", 0.05), - ) - self._forecaster = RegimeTransitionForecaster() - self._forget_every = cfg.get("forget_every_n_iterations", 10) - self._iteration_count: int = 0 - self._current_regime: RegimeState = RegimeState() - self._lock = threading.RLock() - - # Track last regime for change detection - self._prev_regime: RegimeState = RegimeState() - - # ------------------------------------------------------------------ - # Primary API - # ------------------------------------------------------------------ - - def update_market( - self, - returns: np.ndarray, - volumes: Optional[np.ndarray] = None, - ) -> RegimeState: - """Process one bar of market data and update the regime state. - - Call this *before* ``update()`` on any factors evaluated at this bar. - - Parameters - ---------- - returns : np.ndarray, shape (M,) - volumes : np.ndarray or None - - Returns - ------- - RegimeState - Updated current regime. - """ - with self._lock: - new_regime = self._detector.update(returns, volumes) - prev = self._current_regime - - if new_regime != prev: - self._updater.on_regime_change(prev, new_regime) - self._pattern_store.boost_regime_patterns(new_regime, boost=0.1) - self._pattern_store.penalise_regime_patterns(prev, penalty=0.15) - - # Prepare memory proactively - feat = self._build_feature_vector(new_regime) - predicted, prob = self._forecaster.predict_next_regime(feat) - if prob > 0.5: - self._forecaster.prepare_memory_for_transition( - predicted, self._pattern_store - ) - - self._prev_regime = prev - self._current_regime = new_regime - - # Record for forecaster - feat = self._build_feature_vector(new_regime) - self._forecaster.record_observation(new_regime, feat) - - return new_regime - - def update( - self, - formula: str, - signals: np.ndarray, - ic: float, - market_data: Optional[dict] = None, - outcome: str = "admitted", - ) -> None: - """Single update call: detect regime from market_data, update patterns. - - This is the main hook called inside the mining loop after each factor - evaluation. It orchestrates: - 1. Regime detection from ``market_data`` (if provided) - 2. Regime-specific pattern update - 3. Base memory update (online updater) - 4. Periodic forgetting - - Parameters - ---------- - formula : str - DSL formula string. - signals : np.ndarray - Factor signal matrix (used only for future extension). - ic : float - Observed IC. - market_data : dict or None - Optional dict with key ``'returns'`` (np.ndarray). - outcome : str - """ - with self._lock: - regime = self._current_regime - - # If market_data provided, do an inline regime update - if market_data is not None and "returns" in market_data: - regime = self.update_market( - market_data["returns"], - market_data.get("volumes"), - ) - - # Update regime-specific pattern store - if abs(ic) >= 0.02: - self._pattern_store.add_pattern(formula, regime, ic) - - # Notify online updater - self._updater.on_factor_evaluated(formula, ic, regime, outcome) - - self._iteration_count += 1 - - # Periodic forgetting - if self._iteration_count % self._forget_every == 0: - self._updater.apply_forgetting( - iterations_elapsed=self._forget_every - ) - decay = (1.0 - self._updater.forgetting_rate) ** self._forget_every - self._pattern_store.apply_decay(decay) - - def retrieve( - self, - library_state: Optional[dict] = None, - market_data: Optional[dict] = None, - max_success: int = 8, - max_forbidden: int = 10, - max_insights: int = 10, - top_regime_patterns: int = 5, - ) -> MemorySignal: - """Regime-aware memory retrieval. - - Combines the standard base-memory retrieval with regime-specific - pattern selection and a next-regime forecast. - - Parameters - ---------- - library_state : dict or None - market_data : dict or None - max_success : int - max_forbidden : int - max_insights : int - top_regime_patterns : int - - Returns - ------- - MemorySignal - """ - with self._lock: - current_regime = self._current_regime - - # Update regime if market data provided - if market_data is not None and "returns" in market_data: - current_regime = self.update_market( - market_data["returns"], market_data.get("volumes") - ) - - # 1. Base retrieval - base_result = retrieve_memory( - self._updater.base_memory, - library_state=library_state, - max_success=max_success, - max_forbidden=max_forbidden, - max_insights=max_insights, - ) - - # 2. Regime-specific patterns - regime_pats = self._pattern_store.retrieve_for_regime( - current_regime, top_k=top_regime_patterns - ) - cross_pats = self._pattern_store.get_cross_regime_patterns( - top_k=top_regime_patterns // 2 + 1 - ) - - # 3. Forecast next regime - feat = self._build_feature_vector(current_regime) - predicted_regime, forecast_conf = self._forecaster.predict_next_regime(feat) - - # 4. Build enriched prompt text - regime_section = self._format_regime_section( - current_regime, regime_pats, cross_pats, predicted_regime, forecast_conf - ) - prompt_text = base_result["prompt_text"] + "\n" + regime_section - - return MemorySignal( - recommended_directions=base_result["recommended_directions"], - forbidden_directions=base_result["forbidden_directions"], - insights=base_result["insights"], - library_state=base_result["library_state"], - prompt_text=prompt_text, - current_regime=current_regime, - regime_patterns=[p.to_dict() for p in regime_pats], - cross_regime_patterns=[p.to_dict() for p in cross_pats], - forecasted_regime=predicted_regime if forecast_conf > 0.0 else None, - forecast_confidence=forecast_conf, - ) - - def get_full_status(self) -> dict: - """Comprehensive status: regime, patterns, health, forecasts. - - Returns - ------- - dict - Keys: ``current_regime``, ``regime_history``, ``transition_probs``, - ``pattern_store_stats``, ``memory_health``, ``forecasted_regime``, - ``forecast_confidence``, ``iteration_count``. - """ - with self._lock: - current = self._current_regime - feat = self._build_feature_vector(current) - predicted, conf = self._forecaster.predict_next_regime(feat) - history = self._detector.get_regime_history(lookback=20) - return { - "current_regime": current.to_dict(), - "regime_history": [r.to_dict() for r in history], - "transition_probs": self._detector.regime_transition_probability(), - "pattern_store_stats": self._pattern_store.get_stats(), - "memory_health": self._updater.get_memory_health_stats(), - "forecasted_regime": predicted.to_dict() if conf > 0.0 else None, - "forecast_confidence": round(conf, 4), - "iteration_count": self._iteration_count, - } - - # ------------------------------------------------------------------ - # Persistence - # ------------------------------------------------------------------ - - def save(self, path: str | Path) -> None: - """Serialise to JSON. - - Parameters - ---------- - path : str or Path - """ - path = Path(path) - path.parent.mkdir(parents=True, exist_ok=True) - with self._lock: - data = self.to_dict() - with open(path, "w") as f: - json.dump(data, f, indent=2, ensure_ascii=False) - - def load(self, path: str | Path) -> None: - """Deserialise from JSON. - - Parameters - ---------- - path : str or Path - """ - with open(path) as f: - data = json.load(f) - with self._lock: - self._from_dict_inplace(data) - - def to_dict(self) -> dict: - with self._lock: - return { - "_version": 1, - "iteration_count": self._iteration_count, - "current_regime": self._current_regime.to_dict(), - "prev_regime": self._prev_regime.to_dict(), - "forget_every": self._forget_every, - "updater": self._updater.to_dict(), - "pattern_store": self._pattern_store.to_dict(), - "forecaster": self._forecaster.to_dict(), - } - - @classmethod - def from_dict(cls, d: dict) -> "OnlineRegimeMemory": - mem_data = d["updater"]["base_memory"] - base_mem = ExperienceMemory.from_dict(mem_data) - cfg = {"forget_every_n_iterations": d.get("forget_every", 10)} - obj = cls(base_memory=base_mem, config=cfg) - obj._from_dict_inplace(d) - return obj - - def _from_dict_inplace(self, d: dict) -> None: - self._iteration_count = d.get("iteration_count", 0) - self._current_regime = RegimeState.from_dict( - d.get("current_regime", {}) - ) - self._prev_regime = RegimeState.from_dict( - d.get("prev_regime", {}) - ) - self._forget_every = d.get("forget_every", 10) - self._updater = OnlineMemoryUpdater.from_dict(d["updater"]) - self._pattern_store = RegimeSpecificPatternStore.from_dict( - d["pattern_store"] - ) - self._forecaster = RegimeTransitionForecaster.from_dict( - d["forecaster"] - ) - - # pickle support - def __getstate__(self) -> dict: - return self.to_dict() - - def __setstate__(self, state: dict) -> None: - # Minimal init to avoid __init__ side effects - self._lock = threading.RLock() - self._from_dict_inplace(state) - # Rebuild detector (streaming state is not persisted) - self._detector = StreamingRegimeDetector() - - # ------------------------------------------------------------------ - # Internal helpers - # ------------------------------------------------------------------ - - def _build_feature_vector(self, regime: RegimeState) -> np.ndarray: - """Build a 12-element feature vector from the detector's EW state.""" - ew_mean = self._detector._ew_mean - ew_std = float(np.sqrt(max(self._detector._ew_var, 0.0))) - # Use the ratio of fast/slow variance as a Hurst proxy - slow_var = max(self._detector._ew_var_slow, 1e-16) - fast_var = max(self._detector._ew_var, 1e-16) - hurst_proxy = float(np.clip( - 0.5 + 0.5 * math.log(fast_var / slow_var + 1e-10) / (math.log(20) + 1e-10), - 0.0, 1.0 - )) - return RegimeTransitionForecaster.build_feature_vector( - ew_mean, ew_std, hurst_proxy, regime - ) - - @staticmethod - def _format_regime_section( - current: RegimeState, - regime_patterns: List[RegimeSpecificPattern], - cross_patterns: List[RegimeSpecificPattern], - predicted: RegimeState, - forecast_conf: float, - ) -> str: - lines = [ - "=== REGIME-AWARE MEMORY ===", - f"Current regime: {current}", - ] - if forecast_conf > 0.3: - lines.append( - f"Forecasted next regime: {predicted} " - f"(confidence {forecast_conf:.1%})" - ) - if regime_patterns: - lines.append("\nTop patterns for current regime:") - for i, p in enumerate(regime_patterns, 1): - lines.append( - f" {i}. {p.formula_template[:80]} " - f"[IC={p.ic_in_regime:.3f}, " - f"spec={p.regime_specificity:.2f}, " - f"conf={p.confidence:.2f}]" - ) - if cross_patterns: - lines.append("\nCross-regime (universal) patterns:") - for i, p in enumerate(cross_patterns, 1): - lines.append( - f" {i}. {p.formula_template[:80]} " - f"[avg_IC={abs(p.ic_in_regime):.3f}, " - f"conf={p.confidence:.2f}]" - ) - lines.append("") - return "\n".join(lines) - - -# --------------------------------------------------------------------------- -# MemoryForgetCurve -# --------------------------------------------------------------------------- - -@dataclass -class _MemorySnapshot: - """Internal snapshot used by MemoryForgetCurve.""" - iteration: int - timestamp: float - active_patterns: int - avg_confidence: float - n_regime_patterns: int - staleness_score: float - pattern_confidences: List[float] - - -class MemoryForgetCurve: - """Track and visualise how memory evolves (and forgets) over mining iterations. - - Parameters - ---------- - max_snapshots : int - Maximum snapshots to retain in memory. - """ - - def __init__(self, max_snapshots: int = 1000) -> None: - self.max_snapshots = max_snapshots - self._snapshots: List[_MemorySnapshot] = [] - self._lock = threading.RLock() - - def record_snapshot( - self, - memory: OnlineRegimeMemory, - iteration: int, - ) -> None: - """Record a snapshot of the current memory state. - - Parameters - ---------- - memory : OnlineRegimeMemory - iteration : int - Current mining iteration number (used as x-axis). - """ - status = memory.get_full_status() - health = status["memory_health"] - ps = status["pattern_store_stats"] - - # Collect per-pattern confidences from the regime pattern store - with memory._lock: - confs = [ - p.confidence - for p in memory._pattern_store._patterns.values() - ] - - snap = _MemorySnapshot( - iteration=iteration, - timestamp=time.time(), - active_patterns=health["active_patterns"], - avg_confidence=health["avg_confidence"], - n_regime_patterns=ps["total_patterns"], - staleness_score=health["staleness_score"], - pattern_confidences=confs, - ) - with self._lock: - self._snapshots.append(snap) - if len(self._snapshots) > self.max_snapshots: - self._snapshots = self._snapshots[-self.max_snapshots:] - - def get_pattern_lifetimes(self) -> List[float]: - """Estimate pattern lifetimes (iterations survived) from snapshot series. - - Returns - ------- - list[float] - One entry per 'pattern birth' estimated from count increases. - Uses the number of iterations between when a pattern first appears - (count > 0) and drops below min_confidence. - - Note: this is an approximation based on the active count trajectory. - """ - with self._lock: - if len(self._snapshots) < 2: - return [] - counts = [s.n_regime_patterns for s in self._snapshots] - iterations = [s.iteration for s in self._snapshots] - lifetimes = [] - # Simple heuristic: measure spans between count peaks and troughs - for i in range(1, len(counts) - 1): - if counts[i] > counts[i - 1] and counts[i] > counts[i + 1]: - # Local peak: estimate lifetime as distance to next trough - for j in range(i + 1, len(counts)): - if counts[j] < counts[i] * 0.5: - lifetimes.append(float(iterations[j] - iterations[i])) - break - return lifetimes - - def plot_confidence_decay(self) -> None: - """Plot confidence decay and pattern count over iterations. - - Requires matplotlib to be installed. If not available, prints a - summary table instead. - """ - with self._lock: - snapshots = list(self._snapshots) - - if not snapshots: - print("No snapshots recorded yet.") - return - - iterations = [s.iteration for s in snapshots] - avg_confs = [s.avg_confidence for s in snapshots] - active = [s.active_patterns for s in snapshots] - regime_counts = [s.n_regime_patterns for s in snapshots] - staleness = [s.staleness_score for s in snapshots] - - try: - import matplotlib.pyplot as plt - - fig, axes = plt.subplots(2, 2, figsize=(12, 8)) - fig.suptitle("Memory Forget Curve", fontsize=14) - - ax = axes[0, 0] - ax.plot(iterations, avg_confs, "b-o", markersize=3) - ax.set_title("Average Pattern Confidence") - ax.set_xlabel("Iteration") - ax.set_ylabel("Confidence") - ax.grid(True, alpha=0.3) - - ax = axes[0, 1] - ax.plot(iterations, active, "g-o", markersize=3) - ax.set_title("Active Patterns (base memory)") - ax.set_xlabel("Iteration") - ax.set_ylabel("Count") - ax.grid(True, alpha=0.3) - - ax = axes[1, 0] - ax.plot(iterations, regime_counts, "r-o", markersize=3) - ax.set_title("Regime-Specific Patterns") - ax.set_xlabel("Iteration") - ax.set_ylabel("Count") - ax.grid(True, alpha=0.3) - - ax = axes[1, 1] - ax.plot(iterations, staleness, "m-o", markersize=3) - ax.set_title("Staleness Score (fraction of zero-count patterns)") - ax.set_xlabel("Iteration") - ax.set_ylabel("Staleness") - ax.grid(True, alpha=0.3) - - plt.tight_layout() - plt.show() - - except ImportError: - # Fallback: ASCII table - print( - f"{'Iter':>8} {'AvgConf':>10} {'Active':>8} " - f"{'RegimePats':>12} {'Staleness':>10}" - ) - print("-" * 52) - for s in snapshots[::max(1, len(snapshots) // 20)]: - print( - f"{s.iteration:>8} {s.avg_confidence:>10.4f} " - f"{s.active_patterns:>8} {s.n_regime_patterns:>12} " - f"{s.staleness_score:>10.4f}" - ) - - def to_dict(self) -> dict: - with self._lock: - return { - "max_snapshots": self.max_snapshots, - "snapshots": [ - { - "iteration": s.iteration, - "timestamp": s.timestamp, - "active_patterns": s.active_patterns, - "avg_confidence": s.avg_confidence, - "n_regime_patterns": s.n_regime_patterns, - "staleness_score": s.staleness_score, - } - for s in self._snapshots - ], - } - - @classmethod - def from_dict(cls, d: dict) -> "MemoryForgetCurve": - curve = cls(max_snapshots=d.get("max_snapshots", 1000)) - for sd in d.get("snapshots", []): - snap = _MemorySnapshot( - iteration=sd["iteration"], - timestamp=sd["timestamp"], - active_patterns=sd["active_patterns"], - avg_confidence=sd["avg_confidence"], - n_regime_patterns=sd["n_regime_patterns"], - staleness_score=sd["staleness_score"], - pattern_confidences=[], - ) - curve._snapshots.append(snap) - return curve - - -# --------------------------------------------------------------------------- -# Utility helpers -# --------------------------------------------------------------------------- - -def _formula_matches_template(formula: str, template: str) -> bool: - """Heuristic check: does a formula share structural operators with a template? - - Extracts capitalised operator names from both strings and tests for - meaningful overlap (>= 1 shared operator, or substring containment). - """ - import re - op_re = re.compile(r"\b([A-Z][a-zA-Z]+)\(") - f_ops = set(op_re.findall(formula)) - t_ops = set(op_re.findall(template)) - if not f_ops or not t_ops: - return False - overlap = f_ops & t_ops - # At least 1 operator shared AND at least half of template ops present - return ( - len(overlap) >= 1 - and len(overlap) / max(len(t_ops), 1) >= 0.4 - ) diff --git a/src/factorminer/factorminer/memory/retrieval.py b/src/factorminer/factorminer/memory/retrieval.py deleted file mode 100644 index 2fd8519..0000000 --- a/src/factorminer/factorminer/memory/retrieval.py +++ /dev/null @@ -1,288 +0,0 @@ -"""Memory Retrieval operator R(M, L). - -Context-dependent retrieval of experience memory, producing a structured -memory signal m for injection into the LLM generation prompt. - -The retrieval considers the current library state (domain saturation, -recent rejections) to select the most relevant patterns and insights. -""" - -from __future__ import annotations - -from typing import Any, Dict, List, Optional - -from src.factorminer.factorminer.memory.memory_store import ( - ExperienceMemory, - ForbiddenDirection, - MiningState, - StrategicInsight, - SuccessPattern, -) - - -def _score_success_pattern( - pattern: SuccessPattern, - domain_saturation: Dict[str, float], - saturated_threshold: float = 0.7, -) -> float: - """Score a success pattern for relevance given current library state. - - Patterns in saturated domains score lower; high success-rate patterns - with many occurrences score higher. - """ - base_score = 1.0 - - # Success rate bonus - rate_bonus = {"High": 2.0, "Medium": 1.0, "Low": 0.5} - base_score *= rate_bonus.get(pattern.success_rate, 1.0) - - # Occurrence count bonus (log scale to avoid runaway) - if pattern.occurrence_count > 0: - import math - base_score *= 1.0 + math.log1p(pattern.occurrence_count) - - # Domain saturation penalty - saturation = domain_saturation.get(pattern.name, 0.0) - if saturation >= saturated_threshold: - base_score *= 0.2 # Heavily penalize saturated domains - elif saturation >= 0.5: - base_score *= 0.6 - - return base_score - - -def _score_forbidden_direction( - direction: ForbiddenDirection, - recent_rejection_reasons: List[str], -) -> float: - """Score a forbidden direction for relevance. - - Directions matching recent rejection reasons score higher (more - important to communicate to the LLM). - """ - base_score = 1.0 - - # Higher correlation = more important to avoid - base_score *= 1.0 + direction.typical_correlation - - # Occurrence count: frequently encountered = important warning - if direction.occurrence_count > 0: - import math - base_score *= 1.0 + math.log1p(direction.occurrence_count) - - # Boost if matching recent rejections - direction_lower = direction.name.lower() - for reason in recent_rejection_reasons: - if any( - word in reason.lower() - for word in direction_lower.split() - if len(word) > 3 - ): - base_score *= 1.5 - break - - return base_score - - -def _select_relevant_success( - patterns: List[SuccessPattern], - domain_saturation: Dict[str, float], - max_patterns: int = 8, -) -> List[SuccessPattern]: - """Select the most relevant success patterns for the current context.""" - if not patterns: - return [] - - scored = [ - (pat, _score_success_pattern(pat, domain_saturation)) - for pat in patterns - ] - scored.sort(key=lambda x: x[1], reverse=True) - return [pat for pat, _ in scored[:max_patterns]] - - -def _select_relevant_forbidden( - directions: List[ForbiddenDirection], - recent_rejections: List[dict], - max_directions: int = 10, -) -> List[ForbiddenDirection]: - """Select the most relevant forbidden directions for the current context.""" - if not directions: - return [] - - recent_reasons = [ - r.get("reason", "") for r in recent_rejections - ] - scored = [ - (d, _score_forbidden_direction(d, recent_reasons)) - for d in directions - ] - scored.sort(key=lambda x: x[1], reverse=True) - return [d for d, _ in scored[:max_directions]] - - -def _format_library_state(state: MiningState) -> Dict[str, Any]: - """Format mining state as structured context for LLM prompt.""" - # Identify saturated domains - saturated = { - domain: sat - for domain, sat in state.domain_saturation.items() - if sat >= 0.5 - } - - # Recent admission rate trend - recent_logs = state.admission_log[-5:] if state.admission_log else [] - avg_rate = 0.0 - if recent_logs: - avg_rate = sum(log.get("admission_rate", 0) for log in recent_logs) / len(recent_logs) - - return { - "library_size": state.library_size, - "recent_admission_rate": round(avg_rate, 3), - "saturated_domains": saturated, - "recent_admissions_count": len(state.recent_admissions), - "recent_rejections_count": len(state.recent_rejections), - } - - -def _format_for_prompt( - success_patterns: List[SuccessPattern], - forbidden_directions: List[ForbiddenDirection], - insights: List[StrategicInsight], - library_state: Dict[str, Any], -) -> str: - """Format the memory signal as structured text for LLM injection. - - Produces a human-readable prompt section that can be inserted into - the factor generation prompt to guide the LLM. - """ - sections = [] - - # Library state - sections.append("=== CURRENT LIBRARY STATE ===") - sections.append(f"Library size: {library_state['library_size']} factors") - sections.append(f"Recent admission rate: {library_state['recent_admission_rate']:.1%}") - if library_state.get("saturated_domains"): - sections.append("Saturated domains (avoid):") - for domain, sat in library_state["saturated_domains"].items(): - sections.append(f" - {domain}: {sat:.0%} saturated") - sections.append("") - - # Recommended directions - if success_patterns: - sections.append("=== RECOMMENDED DIRECTIONS (P_succ) ===") - for i, pat in enumerate(success_patterns, 1): - sections.append(f"{i}. {pat.name} [{pat.success_rate}]") - sections.append(f" {pat.description}") - sections.append(f" Template: {pat.template}") - if pat.example_factors: - sections.append(f" Examples: {', '.join(pat.example_factors[:3])}") - sections.append("") - - # Forbidden directions - if forbidden_directions: - sections.append("=== FORBIDDEN DIRECTIONS (P_fail) ===") - sections.append("DO NOT generate factors using these patterns:") - for i, fd in enumerate(forbidden_directions, 1): - sections.append(f"{i}. {fd.name} (rho > {fd.typical_correlation:.2f})") - sections.append(f" Reason: {fd.reason}") - if fd.correlated_factors: - sections.append(f" Correlated with: {', '.join(fd.correlated_factors[:3])}") - sections.append("") - - # Strategic insights - if insights: - sections.append("=== STRATEGIC INSIGHTS ===") - for insight in insights: - sections.append(f"- {insight.insight}") - sections.append(f" Evidence: {insight.evidence}") - sections.append("") - - return "\n".join(sections) - - -# --------------------------------------------------------------------------- -# Public API: Memory Retrieval -# --------------------------------------------------------------------------- - -def retrieve_memory( - memory: ExperienceMemory, - library_state: Optional[Dict[str, Any]] = None, - max_success: int = 8, - max_forbidden: int = 10, - max_insights: int = 10, -) -> Dict[str, Any]: - """Memory Retrieval operator R(M, L). - - Performs context-dependent retrieval matching against the current - library state, returning a memory signal m suitable for LLM prompt - injection. - - Parameters - ---------- - memory : ExperienceMemory - The experience memory to retrieve from. - library_state : dict, optional - Current library diagnostics. If None, uses the state from memory. - Expected keys: library_size, domain_saturation, etc. - max_success : int - Maximum number of success patterns to include. - max_forbidden : int - Maximum number of forbidden directions to include. - max_insights : int - Maximum number of insights to include. - - Returns - ------- - dict - Memory signal m with keys: - - recommended_directions: list of success pattern dicts - - forbidden_directions: list of forbidden direction dicts - - insights: list of insight dicts - - library_state: dict of library state info - - prompt_text: str - formatted text for LLM prompt injection - """ - # Use provided library state or fall back to memory's state - if library_state is not None: - # Update memory state with external library info - state = MiningState( - library_size=library_state.get("library_size", memory.state.library_size), - recent_admissions=memory.state.recent_admissions, - recent_rejections=memory.state.recent_rejections, - domain_saturation=library_state.get( - "domain_saturation", memory.state.domain_saturation - ), - admission_log=memory.state.admission_log, - ) - else: - state = memory.state - - # Select relevant patterns - relevant_success = _select_relevant_success( - memory.success_patterns, state.domain_saturation, max_success - ) - relevant_forbidden = _select_relevant_forbidden( - memory.forbidden_directions, state.recent_rejections, max_forbidden - ) - - # Select most recent insights (up to limit) - sorted_insights = sorted( - memory.insights, key=lambda i: i.batch_source, reverse=True - ) - relevant_insights = sorted_insights[:max_insights] - - # Format library state - lib_state_info = _format_library_state(state) - - # Format as prompt text - prompt_text = _format_for_prompt( - relevant_success, relevant_forbidden, relevant_insights, lib_state_info - ) - - return { - "recommended_directions": [p.to_dict() for p in relevant_success], - "forbidden_directions": [f.to_dict() for f in relevant_forbidden], - "insights": [i.to_dict() for i in relevant_insights], - "library_state": lib_state_info, - "prompt_text": prompt_text, - } diff --git a/src/factorminer/factorminer/operators/__init__.py b/src/factorminer/factorminer/operators/__init__.py deleted file mode 100644 index 75b9f56..0000000 --- a/src/factorminer/factorminer/operators/__init__.py +++ /dev/null @@ -1,54 +0,0 @@ -"""Financial operators for factor expression evaluation. - -Exports the central registry and all operator category modules. -""" - -from src.factorminer.factorminer.operators.registry import ( - OPERATOR_REGISTRY, - execute_operator, - get_impl, - get_operator, - implemented_operators, - list_operators, -) -from src.factorminer.factorminer.operators.gpu_backend import ( - DeviceManager, - batch_execute, - device_manager, - to_numpy, - to_tensor, - torch_available, -) -from src.factorminer.factorminer.operators.auto_inventor import ( - OperatorInventor, - ProposedOperator, - ValidationResult, -) -from src.factorminer.factorminer.operators.custom import ( - CustomOperator, - CustomOperatorStore, -) - -__all__ = [ - # Registry - "OPERATOR_REGISTRY", - "execute_operator", - "get_impl", - "get_operator", - "implemented_operators", - "list_operators", - # GPU - "DeviceManager", - "batch_execute", - "device_manager", - "to_numpy", - "to_tensor", - "torch_available", - # Auto-inventor - "OperatorInventor", - "ProposedOperator", - "ValidationResult", - # Custom operators - "CustomOperator", - "CustomOperatorStore", -] diff --git a/src/factorminer/factorminer/operators/arithmetic.py b/src/factorminer/factorminer/operators/arithmetic.py deleted file mode 100644 index de9bc7b..0000000 --- a/src/factorminer/factorminer/operators/arithmetic.py +++ /dev/null @@ -1,223 +0,0 @@ -"""Element-wise arithmetic operators (unary and binary). - -Every function accepts arrays of shape ``(M, T)`` and returns the same shape. -Both NumPy and PyTorch implementations are provided. -""" - -from __future__ import annotations - -from typing import Union - -import numpy as np - -try: - import torch -except ImportError: - torch = None # type: ignore[assignment] - -Array = Union[np.ndarray, "torch.Tensor"] - -# ---- helpers --------------------------------------------------------------- - -_EPS_NP = np.float32(1e-10) - - -def _eps(x: Array) -> float: - return 1e-10 - - -# ---- NumPy implementations ------------------------------------------------ - -def add_np(x: np.ndarray, y: np.ndarray) -> np.ndarray: - return np.add(x, y) - - -def sub_np(x: np.ndarray, y: np.ndarray) -> np.ndarray: - return np.subtract(x, y) - - -def mul_np(x: np.ndarray, y: np.ndarray) -> np.ndarray: - return np.multiply(x, y) - - -def div_np(x: np.ndarray, y: np.ndarray) -> np.ndarray: - out = np.full_like(x, np.nan, dtype=np.float64) - mask = np.abs(y) > _EPS_NP - out[mask] = x[mask] / y[mask] - return out - - -def neg_np(x: np.ndarray) -> np.ndarray: - return np.negative(x) - - -def abs_np(x: np.ndarray) -> np.ndarray: - return np.abs(x) - - -def sign_np(x: np.ndarray) -> np.ndarray: - return np.sign(x) - - -def log_np(x: np.ndarray) -> np.ndarray: - """log(1 + |x|) * sign(x) -- safe log that handles negatives.""" - return np.log1p(np.abs(x)) * np.sign(x) - - -def sqrt_np(x: np.ndarray) -> np.ndarray: - """sqrt(|x|) * sign(x).""" - return np.sqrt(np.abs(x)) * np.sign(x) - - -def square_np(x: np.ndarray) -> np.ndarray: - return np.square(x) - - -def inv_np(x: np.ndarray) -> np.ndarray: - out = np.full_like(x, np.nan, dtype=np.float64) - mask = np.abs(x) > _EPS_NP - out[mask] = 1.0 / x[mask] - return out - - -def pow_np(x: np.ndarray, y: np.ndarray) -> np.ndarray: - """x^y with safe handling.""" - with np.errstate(invalid="ignore", divide="ignore"): - return np.where(np.isnan(x) | np.isnan(y), np.nan, np.power(np.abs(x), y) * np.sign(x)) - - -def max_np(x: np.ndarray, y: np.ndarray) -> np.ndarray: - return np.fmax(x, y) - - -def min_np(x: np.ndarray, y: np.ndarray) -> np.ndarray: - return np.fmin(x, y) - - -def clip_np(x: np.ndarray, lower: float = -3.0, upper: float = 3.0) -> np.ndarray: - return np.clip(x, lower, upper) - - -def exp_np(x: np.ndarray) -> np.ndarray: - """Clamped exp to avoid overflow.""" - return np.exp(np.clip(x, -50.0, 50.0)) - - -def tanh_np(x: np.ndarray) -> np.ndarray: - return np.tanh(x) - - -def signed_power_np(x: np.ndarray, e: float = 2.0) -> np.ndarray: - return np.sign(x) * np.power(np.abs(x), e) - - -def power_np(x: np.ndarray, e: float = 2.0) -> np.ndarray: - with np.errstate(invalid="ignore"): - return np.power(x, e) - - -# ---- PyTorch (GPU) implementations ---------------------------------------- - -def add_torch(x: "torch.Tensor", y: "torch.Tensor") -> "torch.Tensor": - return x + y - - -def sub_torch(x: "torch.Tensor", y: "torch.Tensor") -> "torch.Tensor": - return x - y - - -def mul_torch(x: "torch.Tensor", y: "torch.Tensor") -> "torch.Tensor": - return x * y - - -def div_torch(x: "torch.Tensor", y: "torch.Tensor") -> "torch.Tensor": - mask = y.abs() > 1e-10 - out = torch.full_like(x, float("nan")) - out[mask] = x[mask] / y[mask] - return out - - -def neg_torch(x: "torch.Tensor") -> "torch.Tensor": - return -x - - -def abs_torch(x: "torch.Tensor") -> "torch.Tensor": - return x.abs() - - -def sign_torch(x: "torch.Tensor") -> "torch.Tensor": - return x.sign() - - -def log_torch(x: "torch.Tensor") -> "torch.Tensor": - return torch.log1p(x.abs()) * x.sign() - - -def sqrt_torch(x: "torch.Tensor") -> "torch.Tensor": - return x.abs().sqrt() * x.sign() - - -def square_torch(x: "torch.Tensor") -> "torch.Tensor": - return x * x - - -def inv_torch(x: "torch.Tensor") -> "torch.Tensor": - mask = x.abs() > 1e-10 - out = torch.full_like(x, float("nan")) - out[mask] = 1.0 / x[mask] - return out - - -def pow_torch(x: "torch.Tensor", y: "torch.Tensor") -> "torch.Tensor": - safe = x.abs().pow(y) * x.sign() - return torch.where(torch.isnan(x) | torch.isnan(y), torch.tensor(float("nan"), device=x.device), safe) - - -def max_torch(x: "torch.Tensor", y: "torch.Tensor") -> "torch.Tensor": - return torch.fmax(x, y) - - -def min_torch(x: "torch.Tensor", y: "torch.Tensor") -> "torch.Tensor": - return torch.fmin(x, y) - - -def clip_torch(x: "torch.Tensor", lower: float = -3.0, upper: float = 3.0) -> "torch.Tensor": - return x.clamp(lower, upper) - - -def exp_torch(x: "torch.Tensor") -> "torch.Tensor": - return torch.exp(x.clamp(-50.0, 50.0)) - - -def tanh_torch(x: "torch.Tensor") -> "torch.Tensor": - return x.tanh() - - -def signed_power_torch(x: "torch.Tensor", e: float = 2.0) -> "torch.Tensor": - return x.sign() * x.abs().pow(e) - - -def power_torch(x: "torch.Tensor", e: float = 2.0) -> "torch.Tensor": - return x.pow(e) - - -# ---- Registration table ---------------------------------------------------- -# Maps operator name -> (numpy_fn, torch_fn) - -ARITHMETIC_OPS = { - "Add": (add_np, add_torch), - "Sub": (sub_np, sub_torch), - "Mul": (mul_np, mul_torch), - "Div": (div_np, div_torch), - "Neg": (neg_np, neg_torch), - "Abs": (abs_np, abs_torch), - "Sign": (sign_np, sign_torch), - "Log": (log_np, log_torch), - "Sqrt": (sqrt_np, sqrt_torch), - "Square": (square_np, square_torch), - "Inv": (inv_np, inv_torch), - "Pow": (pow_np, pow_torch), - "Max": (max_np, max_torch), - "Min": (min_np, min_torch), - "Clip": (clip_np, clip_torch), -} diff --git a/src/factorminer/factorminer/operators/auto_inventor.py b/src/factorminer/factorminer/operators/auto_inventor.py deleted file mode 100644 index 41f90e1..0000000 --- a/src/factorminer/factorminer/operators/auto_inventor.py +++ /dev/null @@ -1,547 +0,0 @@ -"""Automated operator invention via LLM-guided proposal and validation. - -Uses an LLM to propose novel operator definitions (as NumPy functions), -validates them in a sandboxed environment, and checks for differentiation -from existing operators and IC contribution. -""" - -from __future__ import annotations - -import json -import logging -import re -from dataclasses import dataclass, field -from typing import Any, Callable, Dict, List, Optional, Tuple - -import numpy as np - -from src.factorminer.factorminer.agent.llm_interface import LLMProvider -from src.factorminer.factorminer.core.types import OPERATOR_REGISTRY, OperatorSpec - -logger = logging.getLogger(__name__) - - -# --------------------------------------------------------------------------- -# Data classes -# --------------------------------------------------------------------------- - -@dataclass -class ProposedOperator: - """A single operator proposal generated by the LLM. - - Attributes - ---------- - name : str - Canonical name for the operator (e.g. ``"ExpDecayDiff"``). - arity : int - Number of expression children (1 = unary, 2 = binary). - description : str - Short human-readable description. - numpy_code : str - Python source defining a function called ``compute``. - The function signature must accept (M, T)-shaped ndarrays and - return an (M, T)-shaped ndarray. - param_names : tuple of str - Names of extra numeric parameters. - param_defaults : dict - Default value for each parameter. - param_ranges : dict - Valid (inclusive) range for each parameter. - rationale : str - Why this operator might be useful for alpha factor construction. - based_on : list of str - Existing operators that inspired this proposal. - """ - - name: str - arity: int - description: str - numpy_code: str - param_names: Tuple[str, ...] = () - param_defaults: Dict[str, float] = field(default_factory=dict) - param_ranges: Dict[str, Tuple[float, float]] = field(default_factory=dict) - rationale: str = "" - based_on: List[str] = field(default_factory=list) - - -@dataclass -class ValidationResult: - """Result of validating a proposed operator. - - Attributes - ---------- - valid : bool - True if the operator passed all validation checks. - error : str - Error message if validation failed; empty string on success. - output_shape_ok : bool - True if the operator output has the correct (M, T) shape. - nan_ratio : float - Fraction of NaN values in the operator output. - differentiates_from_existing : bool - True if the operator is sufficiently different from all existing operators. - ic_contribution : float - Information coefficient of a simple factor using this operator. - """ - - valid: bool - error: str = "" - output_shape_ok: bool = False - nan_ratio: float = 1.0 - differentiates_from_existing: bool = False - ic_contribution: float = 0.0 - - -# --------------------------------------------------------------------------- -# Sandbox security: allowed names in exec() -# --------------------------------------------------------------------------- - -_SAFE_GLOBALS: Dict[str, Any] = { - "np": np, - "numpy": np, - "__builtins__": {}, -} - -# Explicitly blocked tokens in submitted code. If any of these appear in the -# raw source string, the code is rejected *before* exec(). -_BLOCKED_TOKENS: Tuple[str, ...] = ( - "import ", - "__import__", - "os.", - "sys.", - "subprocess", - "open(", - "exec(", - "eval(", - "compile(", - "getattr(", - "setattr(", - "delattr(", - "globals(", - "locals(", - "__class__", - "__subclasses__", - "__bases__", - "__mro__", - "breakpoint(", - "exit(", - "quit(", -) - - -# --------------------------------------------------------------------------- -# OperatorInventor -# --------------------------------------------------------------------------- - -class OperatorInventor: - """Proposes and validates new operators using an LLM. - - Parameters - ---------- - llm_provider : LLMProvider - LLM backend used to generate proposals. - data_tensor : np.ndarray - Shape ``(M, T, F)`` -- M stocks, T time steps, F features. - Used as test data for validation. - returns : np.ndarray - Shape ``(M, T)`` -- forward returns for IC measurement. - max_proposals_per_round : int - Maximum number of proposals to request per LLM call. - """ - - _SYSTEM_PROMPT = ( - "You are an expert in quantitative finance operator design. " - "Your task is to invent novel numerical operators that transform " - "stock market time-series data (shape: stocks x time) into alpha " - "signals. Each operator is a pure NumPy function.\n\n" - "RULES:\n" - "1. Each operator must define a function called `compute`.\n" - "2. The function receives ndarray inputs of shape (M, T) and must " - "return an ndarray of shape (M, T).\n" - "3. You may ONLY use numpy (imported as `np`). No other imports.\n" - "4. Handle NaN values gracefully (use np.nan, np.nanmean, etc.).\n" - "5. Avoid division by zero -- use np.where or add epsilon.\n" - "6. Operators should be economically meaningful for alpha factor " - "construction.\n" - "7. Do NOT use os, sys, subprocess, open, exec, eval, or any " - "filesystem/network access.\n" - ) - - def __init__( - self, - llm_provider: LLMProvider, - data_tensor: np.ndarray, - returns: np.ndarray, - max_proposals_per_round: int = 5, - ) -> None: - if data_tensor.ndim != 3: - raise ValueError( - f"data_tensor must be 3-D (M, T, F), got shape {data_tensor.shape}" - ) - if returns.ndim != 2: - raise ValueError( - f"returns must be 2-D (M, T), got shape {returns.shape}" - ) - self.llm = llm_provider - self.data_tensor = data_tensor - self.returns = returns - self.max_proposals = max_proposals_per_round - # Pre-compute test slices for validation - self._test_x = data_tensor[:, :, 0] # first feature as default input - self._test_y = ( - data_tensor[:, :, 1] if data_tensor.shape[2] > 1 else data_tensor[:, :, 0] - ) - - # ------------------------------------------------------------------ - # Public API - # ------------------------------------------------------------------ - - def propose_operators( - self, - existing_operators: Dict[str, OperatorSpec], - successful_patterns: Optional[List[str]] = None, - ) -> List[ProposedOperator]: - """Ask the LLM to propose new operators. - - Parameters - ---------- - existing_operators : dict - Mapping of name -> OperatorSpec for already-registered operators. - successful_patterns : list of str, optional - Natural-language descriptions of patterns that have worked well. - - Returns - ------- - list of ProposedOperator - """ - successful_patterns = successful_patterns or [] - prompt = self._build_proposal_prompt(existing_operators, successful_patterns) - logger.info( - "Requesting %d operator proposals from %s", - self.max_proposals, - self.llm.provider_name, - ) - raw = self.llm.generate( - system_prompt=self._SYSTEM_PROMPT, - user_prompt=prompt, - temperature=0.9, - max_tokens=8192, - ) - proposals = self._parse_proposals(raw) - logger.info("Parsed %d proposals from LLM output", len(proposals)) - return proposals - - def validate_operator(self, proposal: ProposedOperator) -> ValidationResult: - """Validate a proposed operator through compilation, execution, and IC check. - - Parameters - ---------- - proposal : ProposedOperator - - Returns - ------- - ValidationResult - """ - # Step 1: compile safely - fn = self._compile_safely(proposal.numpy_code) - if fn is None: - return ValidationResult(valid=False, error="Compilation failed or blocked code detected") - - # Step 2: execute on test data, check output shape - try: - if proposal.arity == 1: - output = fn(self._test_x) - elif proposal.arity >= 2: - output = fn(self._test_x, self._test_y) - else: - output = fn(self._test_x) - except Exception as exc: - return ValidationResult(valid=False, error=f"Execution error: {exc}") - - if not isinstance(output, np.ndarray): - return ValidationResult(valid=False, error="Output is not an ndarray") - - M, T = self._test_x.shape - if output.shape != (M, T): - return ValidationResult( - valid=False, - error=f"Shape mismatch: expected ({M}, {T}), got {output.shape}", - output_shape_ok=False, - ) - - # Step 3: NaN ratio - nan_ratio = float(np.isnan(output).sum()) / output.size if output.size > 0 else 1.0 - if nan_ratio > 0.5: - return ValidationResult( - valid=False, - error=f"NaN ratio too high: {nan_ratio:.2%}", - output_shape_ok=True, - nan_ratio=nan_ratio, - ) - - # Step 4: differentiation from existing operators - differentiates = self._check_differentiation(fn, proposal) - - # Step 5: IC contribution - ic = self._measure_ic_contribution(fn, proposal) - - valid = differentiates # must differentiate; IC is informational - error = "" if valid else "Too correlated with existing operators (r > 0.9)" - - return ValidationResult( - valid=valid, - error=error, - output_shape_ok=True, - nan_ratio=nan_ratio, - differentiates_from_existing=differentiates, - ic_contribution=ic, - ) - - # ------------------------------------------------------------------ - # Internal: prompt building & parsing - # ------------------------------------------------------------------ - - def _build_proposal_prompt( - self, - existing_ops: Dict[str, OperatorSpec], - patterns: List[str], - ) -> str: - """Format the user prompt for operator proposals.""" - lines: List[str] = [] - - # Existing operators summary - lines.append("## EXISTING OPERATORS (do NOT duplicate these)") - for name, spec in sorted(existing_ops.items()): - lines.append(f"- {name} (arity={spec.arity}): {spec.description}") - - # Successful patterns - if patterns: - lines.append("\n## SUCCESSFUL PATTERNS (build on these themes)") - for p in patterns: - lines.append(f" * {p}") - - # Request - lines.append(f"\n## REQUEST") - lines.append( - f"Propose exactly {self.max_proposals} new operators. " - f"For each operator, output a JSON object with these fields:" - ) - lines.append( - ' {"name": "OpName", "arity": 1, "description": "...", ' - '"numpy_code": "def compute(x):\\n ...", ' - '"param_names": [], "param_defaults": {}, "param_ranges": {}, ' - '"rationale": "...", "based_on": ["ExistingOp1"]}' - ) - lines.append( - "\nOutput each JSON object on a separate line, preceded by " - "the line number (e.g., '1. {...}')." - ) - lines.append( - "\nIMPORTANT: The `numpy_code` field must define a function " - "called `compute` that accepts ndarray(s) of shape (M, T) " - "and returns an ndarray of shape (M, T). Use only numpy (as np)." - ) - return "\n".join(lines) - - def _parse_proposals(self, raw: str) -> List[ProposedOperator]: - """Parse LLM output into ProposedOperator objects.""" - proposals: List[ProposedOperator] = [] - - # Try to find JSON objects in the text - # Pattern: optional number prefix, then a JSON object - json_pattern = re.compile(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}') - matches = json_pattern.findall(raw) - - for match in matches: - try: - data = json.loads(match) - except json.JSONDecodeError: - logger.debug("Failed to parse JSON: %s", match[:100]) - continue - - name = data.get("name", "") - if not name: - continue - - # Normalize numpy_code: replace \\n with actual newlines - numpy_code = data.get("numpy_code", "") - if "\\n" in numpy_code: - numpy_code = numpy_code.replace("\\n", "\n") - - proposal = ProposedOperator( - name=name, - arity=int(data.get("arity", 1)), - description=data.get("description", ""), - numpy_code=numpy_code, - param_names=tuple(data.get("param_names", [])), - param_defaults=data.get("param_defaults", {}), - param_ranges={ - k: tuple(v) for k, v in data.get("param_ranges", {}).items() - }, - rationale=data.get("rationale", ""), - based_on=data.get("based_on", []), - ) - proposals.append(proposal) - - if len(proposals) >= self.max_proposals: - break - - return proposals - - # ------------------------------------------------------------------ - # Internal: safe compilation & validation helpers - # ------------------------------------------------------------------ - - def _compile_safely(self, code: str) -> Optional[Callable]: - """Compile operator code in a restricted sandbox. - - SECURITY: Only numpy is available. No filesystem, network, - subprocess, or introspection access is permitted. - - Parameters - ---------- - code : str - Python source defining a function called ``compute``. - - Returns - ------- - Callable or None - The compiled ``compute`` function, or None if compilation - failed or the code was rejected for security reasons. - """ - # Pre-scan for blocked tokens - code_lower = code.lower() - for token in _BLOCKED_TOKENS: - if token.lower() in code_lower: - logger.warning("Blocked token '%s' found in operator code", token) - return None - - # Restricted exec - safe_ns: Dict[str, Any] = dict(_SAFE_GLOBALS) - try: - exec(code, safe_ns) # noqa: S102 -- intentional sandboxed exec - except Exception as exc: - logger.warning("Operator compilation failed: %s", exc) - return None - - fn = safe_ns.get("compute") - if fn is None or not callable(fn): - logger.warning("No callable 'compute' found in operator code") - return None - - return fn - - def _check_differentiation(self, fn: Callable, proposal: ProposedOperator) -> bool: - """Check that the operator output is not too correlated with existing operators. - - Computes the new operator on test data and correlates the result with - outputs from a sample of existing operators. If max |correlation| > 0.9, - the operator is considered redundant. - - Returns - ------- - bool - True if the operator differentiates sufficiently. - """ - try: - if proposal.arity == 1: - new_output = fn(self._test_x) - else: - new_output = fn(self._test_x, self._test_y) - except Exception: - return False - - new_flat = new_output.flatten() - valid_mask = ~np.isnan(new_flat) - if valid_mask.sum() < 10: - return False - - # Compare against a sample of existing operator implementations - from factorminer.operators.registry import OPERATOR_REGISTRY as RUNTIME_REG - - sample_names = list(RUNTIME_REG.keys())[:20] # sample for efficiency - for name in sample_names: - entry = RUNTIME_REG.get(name) - if entry is None: - continue - spec, np_fn, _ = entry - if np_fn is None: - continue - try: - if spec.arity == 1: - existing_output = np_fn(self._test_x) - elif spec.arity == 2: - existing_output = np_fn(self._test_x, self._test_y) - else: - continue - except Exception: - continue - - if existing_output.shape != new_output.shape: - continue - - ex_flat = existing_output.flatten() - both_valid = valid_mask & ~np.isnan(ex_flat) - if both_valid.sum() < 10: - continue - - corr = np.corrcoef(new_flat[both_valid], ex_flat[both_valid])[0, 1] - if abs(corr) > 0.9: - logger.info( - "Proposed operator '%s' too correlated with '%s' (r=%.3f)", - proposal.name, - name, - corr, - ) - return False - - return True - - def _measure_ic_contribution(self, fn: Callable, proposal: ProposedOperator) -> float: - """Measure the Information Coefficient of a simple factor using the operator. - - Constructs a basic factor as CsRank(NewOp(data)) and computes - the rank IC against forward returns. - - Returns - ------- - float - Mean rank IC across time steps. - """ - try: - if proposal.arity == 1: - raw = fn(self._test_x) - else: - raw = fn(self._test_x, self._test_y) - except Exception: - return 0.0 - - if raw.shape != self.returns.shape: - return 0.0 - - # Cross-sectional rank at each time step - M, T = raw.shape - ranked = np.full_like(raw, np.nan) - for t in range(T): - col = raw[:, t] - valid = ~np.isnan(col) - if valid.sum() < 3: - continue - order = col[valid].argsort().argsort() - ranked[valid, t] = order / (valid.sum() - 1) # percentile rank - - # Rank IC per time step - ics: List[float] = [] - for t in range(T): - factor_col = ranked[:, t] - ret_col = self.returns[:, t] - both_valid = ~np.isnan(factor_col) & ~np.isnan(ret_col) - if both_valid.sum() < 5: - continue - corr = np.corrcoef(factor_col[both_valid], ret_col[both_valid])[0, 1] - if not np.isnan(corr): - ics.append(corr) - - if not ics: - return 0.0 - return float(np.mean(ics)) diff --git a/src/factorminer/factorminer/operators/crosssectional.py b/src/factorminer/factorminer/operators/crosssectional.py deleted file mode 100644 index 84ebf12..0000000 --- a/src/factorminer/factorminer/operators/crosssectional.py +++ /dev/null @@ -1,151 +0,0 @@ -"""Cross-sectional operators (across M assets at each time step t). - -Input shape: ``(M, T)`` -> output shape ``(M, T)``. -Operations are performed along axis=0 (the asset dimension) for every column. -""" - -from __future__ import annotations - -import numpy as np - -try: - import torch -except ImportError: - torch = None # type: ignore[assignment] - - -# =========================================================================== -# NumPy implementations -# =========================================================================== - -def cs_rank_np(x: np.ndarray) -> np.ndarray: - """Cross-sectional percentile rank -- key GPU target (26x speedup). - - For each time step, rank assets from 0 to 1. NaN inputs get NaN rank. - """ - M, T = x.shape - out = np.full_like(x, np.nan, dtype=np.float64) - for t in range(T): - col = x[:, t] - valid = ~np.isnan(col) - n = valid.sum() - if n < 2: - continue - order = col[valid].argsort().argsort().astype(np.float64) - out[valid, t] = order / (n - 1) - return out - - -def cs_zscore_np(x: np.ndarray) -> np.ndarray: - """Cross-sectional z-score.""" - m = np.nanmean(x, axis=0, keepdims=True) - s = np.nanstd(x, axis=0, keepdims=True, ddof=0) - with np.errstate(invalid="ignore", divide="ignore"): - return np.where(s > 1e-10, (x - m) / s, np.nan) - - -def cs_demean_np(x: np.ndarray) -> np.ndarray: - """Subtract cross-sectional mean.""" - return x - np.nanmean(x, axis=0, keepdims=True) - - -def cs_scale_np(x: np.ndarray) -> np.ndarray: - """Scale to unit L1 norm cross-sectionally.""" - l1 = np.nansum(np.abs(x), axis=0, keepdims=True) - with np.errstate(invalid="ignore", divide="ignore"): - return np.where(l1 > 1e-10, x / l1, np.nan) - - -def cs_neutralize_np(x: np.ndarray) -> np.ndarray: - """Industry-neutralize (simplified: demean).""" - return cs_demean_np(x) - - -def cs_quantile_np(x: np.ndarray, n_bins: int = 5) -> np.ndarray: - """Assign each asset to a quantile bin (0 .. n_bins-1) cross-sectionally.""" - n_bins = int(n_bins) - M, T = x.shape - out = np.full_like(x, np.nan, dtype=np.float64) - for t in range(T): - col = x[:, t] - valid = ~np.isnan(col) - n = valid.sum() - if n < 2: - continue - order = col[valid].argsort().argsort().astype(np.float64) - out[valid, t] = np.floor(order / n * n_bins).clip(0, n_bins - 1) - return out - - -# =========================================================================== -# PyTorch implementations -# =========================================================================== - -def cs_rank_torch(x: "torch.Tensor") -> "torch.Tensor": - """Cross-sectional percentile rank -- fully vectorized for GPU.""" - M, T = x.shape - not_nan = ~torch.isnan(x) - # Replace NaN with very large value so they sort last - filled = x.clone() - filled[~not_nan] = float("inf") - # argsort twice gives rank - ranks = filled.argsort(dim=0).argsort(dim=0).float() - # Count valid per column - n_valid = not_nan.sum(dim=0, keepdim=True).float() - result = ranks / (n_valid - 1).clamp(min=1) - result[~not_nan] = float("nan") - # Clamp ranks for entries that got inf-sorted - result = result.clamp(0.0, 1.0) - result[~not_nan] = float("nan") - return result - - -def cs_zscore_torch(x: "torch.Tensor") -> "torch.Tensor": - m = x.nanmean(dim=0, keepdim=True) - d = x - m - not_nan = ~torch.isnan(x) - n = not_nan.sum(dim=0, keepdim=True).float() - s = (d.nan_to_num(0.0).pow(2).sum(dim=0, keepdim=True) / n.clamp(min=1)).sqrt() - result = torch.where(s > 1e-10, d / s, torch.tensor(float("nan"), device=x.device)) - result[~not_nan] = float("nan") - return result - - -def cs_demean_torch(x: "torch.Tensor") -> "torch.Tensor": - return x - x.nanmean(dim=0, keepdim=True) - - -def cs_scale_torch(x: "torch.Tensor") -> "torch.Tensor": - l1 = x.abs().nansum(dim=0, keepdim=True) - return torch.where(l1 > 1e-10, x / l1, torch.tensor(float("nan"), device=x.device)) - - -def cs_neutralize_torch(x: "torch.Tensor") -> "torch.Tensor": - return cs_demean_torch(x) - - -def cs_quantile_torch(x: "torch.Tensor", n_bins: int = 5) -> "torch.Tensor": - n_bins = int(n_bins) - M, T = x.shape - not_nan = ~torch.isnan(x) - filled = x.clone() - filled[~not_nan] = float("inf") - ranks = filled.argsort(dim=0).argsort(dim=0).float() - n_valid = not_nan.sum(dim=0, keepdim=True).float() - result = (ranks / n_valid * n_bins).floor().clamp(0, n_bins - 1) - result[~not_nan] = float("nan") - return result - - -# =========================================================================== -# Registration table -# =========================================================================== - -CROSSSECTIONAL_OPS = { - "CsRank": (cs_rank_np, cs_rank_torch), - "CsZScore": (cs_zscore_np, cs_zscore_torch), - "CsDemean": (cs_demean_np, cs_demean_torch), - "CsScale": (cs_scale_np, cs_scale_torch), - "CsNeutralize": (cs_neutralize_np, cs_neutralize_torch), - "CsQuantile": (cs_quantile_np, cs_quantile_torch), -} diff --git a/src/factorminer/factorminer/operators/custom.py b/src/factorminer/factorminer/operators/custom.py deleted file mode 100644 index ba93b78..0000000 --- a/src/factorminer/factorminer/operators/custom.py +++ /dev/null @@ -1,251 +0,0 @@ -"""Custom operator storage, registration, and persistence. - -Manages operators invented by the auto-inventor: registers them into the -global operator registry at runtime, and persists them to disk as JSON -metadata plus Python source files for reload across sessions. -""" - -from __future__ import annotations - -import json -import logging -import os -from dataclasses import dataclass, field -from pathlib import Path -from typing import Any, Callable, Dict, List, Optional, Tuple - -import numpy as np - -from src.factorminer.factorminer.core.types import ( - OPERATOR_REGISTRY as SPEC_REGISTRY, - OperatorSpec, - OperatorType, - SignatureType, -) -from src.factorminer.factorminer.operators.registry import OPERATOR_REGISTRY as RUNTIME_REGISTRY - -logger = logging.getLogger(__name__) - - -# --------------------------------------------------------------------------- -# Safe compilation (shared with auto_inventor.py) -# --------------------------------------------------------------------------- - -_SAFE_GLOBALS: Dict[str, Any] = { - "np": np, - "numpy": np, - "__builtins__": {}, -} - - -def _compile_operator_code(code: str) -> Optional[Callable]: - """Compile operator code in a restricted sandbox. - - Returns the ``compute`` function or None on failure. - """ - safe_ns: Dict[str, Any] = dict(_SAFE_GLOBALS) - try: - exec(code, safe_ns) # noqa: S102 -- sandboxed exec - except Exception as exc: - logger.warning("Failed to compile custom operator code: %s", exc) - return None - fn = safe_ns.get("compute") - if fn is None or not callable(fn): - return None - return fn - - -# --------------------------------------------------------------------------- -# CustomOperator -# --------------------------------------------------------------------------- - -@dataclass -class CustomOperator: - """A validated, auto-invented operator ready for registration. - - Attributes - ---------- - name : str - Canonical operator name. - spec : OperatorSpec - Immutable specification matching the type system. - numpy_code : str - Python source defining ``compute``. - numpy_fn : Callable - Compiled compute function (not persisted; recompiled on load). - validation_ic : float - Information coefficient measured during validation. - invention_iteration : int - The search iteration in which this operator was invented. - rationale : str - Why this operator was proposed. - """ - - name: str - spec: OperatorSpec - numpy_code: str - numpy_fn: Callable - validation_ic: float = 0.0 - invention_iteration: int = 0 - rationale: str = "" - - -# --------------------------------------------------------------------------- -# CustomOperatorStore -# --------------------------------------------------------------------------- - -class CustomOperatorStore: - """Manages custom operator lifecycle: register, persist, and reload. - - Parameters - ---------- - store_dir : str - Directory for persisting operator metadata and source files. - """ - - def __init__(self, store_dir: str = "./output/custom_operators") -> None: - self._store_dir = Path(store_dir) - self._operators: Dict[str, CustomOperator] = {} - - # ------------------------------------------------------------------ - # Public API - # ------------------------------------------------------------------ - - def register(self, op: CustomOperator) -> None: - """Register a custom operator into both global registries. - - Adds the operator to: - 1. ``core.types.OPERATOR_REGISTRY`` (spec-only registry) - 2. ``operators.registry.OPERATOR_REGISTRY`` (runtime registry with impl) - - Parameters - ---------- - op : CustomOperator - """ - # Add to spec registry - SPEC_REGISTRY[op.name] = op.spec - - # Add to runtime registry (spec, numpy_fn, torch_fn=None) - RUNTIME_REGISTRY[op.name] = (op.spec, op.numpy_fn, None) - - # Track internally - self._operators[op.name] = op - logger.info( - "Registered custom operator '%s' (IC=%.4f, iteration=%d)", - op.name, - op.validation_ic, - op.invention_iteration, - ) - - def save(self) -> None: - """Persist all custom operators to disk. - - Creates ``store_dir/`` with: - - ``index.json``: metadata for all operators - - ``.py``: Python source for each operator - """ - self._store_dir.mkdir(parents=True, exist_ok=True) - - index: List[Dict[str, Any]] = [] - for name, op in self._operators.items(): - # Save Python source - src_path = self._store_dir / f"{name}.py" - src_path.write_text(op.numpy_code, encoding="utf-8") - - # Build metadata entry - entry = { - "name": op.name, - "arity": op.spec.arity, - "category": op.spec.category.name, - "signature": op.spec.signature.name, - "param_names": list(op.spec.param_names), - "param_defaults": op.spec.param_defaults, - "param_ranges": { - k: list(v) for k, v in op.spec.param_ranges.items() - }, - "description": op.spec.description, - "validation_ic": op.validation_ic, - "invention_iteration": op.invention_iteration, - "rationale": op.rationale, - } - index.append(entry) - - index_path = self._store_dir / "index.json" - index_path.write_text( - json.dumps(index, indent=2, ensure_ascii=False), - encoding="utf-8", - ) - logger.info( - "Saved %d custom operators to %s", len(index), self._store_dir - ) - - def load(self) -> None: - """Load custom operators from disk, recompile, and re-register. - - Reads ``store_dir/index.json`` and corresponding ``.py`` source files. - Operators that fail recompilation are skipped with a warning. - """ - index_path = self._store_dir / "index.json" - if not index_path.exists(): - logger.debug("No custom operator index at %s", index_path) - return - - with open(index_path, "r", encoding="utf-8") as f: - index: List[Dict[str, Any]] = json.load(f) - - loaded = 0 - for entry in index: - name = entry["name"] - src_path = self._store_dir / f"{name}.py" - if not src_path.exists(): - logger.warning("Source file missing for custom operator '%s'", name) - continue - - numpy_code = src_path.read_text(encoding="utf-8") - fn = _compile_operator_code(numpy_code) - if fn is None: - logger.warning( - "Failed to recompile custom operator '%s'; skipping", name - ) - continue - - spec = OperatorSpec( - name=name, - arity=entry["arity"], - category=OperatorType[entry["category"]], - signature=SignatureType[entry["signature"]], - param_names=tuple(entry.get("param_names", [])), - param_defaults=entry.get("param_defaults", {}), - param_ranges={ - k: tuple(v) - for k, v in entry.get("param_ranges", {}).items() - }, - description=entry.get("description", ""), - ) - - op = CustomOperator( - name=name, - spec=spec, - numpy_code=numpy_code, - numpy_fn=fn, - validation_ic=entry.get("validation_ic", 0.0), - invention_iteration=entry.get("invention_iteration", 0), - rationale=entry.get("rationale", ""), - ) - self.register(op) - loaded += 1 - - logger.info("Loaded %d / %d custom operators from %s", loaded, len(index), self._store_dir) - - def list_operators(self) -> List[str]: - """Return names of all registered custom operators.""" - return sorted(self._operators.keys()) - - def get_operator(self, name: str) -> Optional[CustomOperator]: - """Look up a custom operator by name. - - Returns - ------- - CustomOperator or None - """ - return self._operators.get(name) diff --git a/src/factorminer/factorminer/operators/gpu_backend.py b/src/factorminer/factorminer/operators/gpu_backend.py deleted file mode 100644 index d28e3cb..0000000 --- a/src/factorminer/factorminer/operators/gpu_backend.py +++ /dev/null @@ -1,110 +0,0 @@ -"""GPU acceleration utilities for FactorMiner operators. - -Provides device management, tensor conversion helpers, and batch execution -for parallel factor evaluation on CUDA GPUs with automatic CPU fallback. -""" - -from __future__ import annotations - -from typing import Optional, Union - -import numpy as np - -try: - import torch - - _TORCH_AVAILABLE = True -except ImportError: - torch = None # type: ignore[assignment] - _TORCH_AVAILABLE = False - - -# --------------------------------------------------------------------------- -# Device management -# --------------------------------------------------------------------------- - -class DeviceManager: - """Singleton-style helper that picks the best available device.""" - - def __init__(self) -> None: - self._device: Optional["torch.device"] = None - - @property - def device(self) -> "torch.device": - if self._device is None: - self._device = self._select_device() - return self._device - - @device.setter - def device(self, dev: Union[str, "torch.device"]) -> None: - if not _TORCH_AVAILABLE: - raise RuntimeError("PyTorch is not installed") - self._device = torch.device(dev) - - @staticmethod - def _select_device() -> "torch.device": - if not _TORCH_AVAILABLE: - raise RuntimeError("PyTorch is not installed") - if torch.cuda.is_available(): - return torch.device("cuda") - if hasattr(torch.backends, "mps") and torch.backends.mps.is_available(): - return torch.device("mps") - return torch.device("cpu") - - @property - def is_gpu(self) -> bool: - return self.device.type in ("cuda", "mps") - - def reset(self) -> None: - self._device = None - - -device_manager = DeviceManager() - - -# --------------------------------------------------------------------------- -# Conversion helpers -# --------------------------------------------------------------------------- - -def to_tensor( - arr: np.ndarray, - device: Optional["torch.device"] = None, - dtype: Optional["torch.dtype"] = None, -) -> "torch.Tensor": - """Convert a NumPy array to a PyTorch tensor on the target device.""" - if not _TORCH_AVAILABLE: - raise RuntimeError("PyTorch is not installed") - dev = device or device_manager.device - dt = dtype or torch.float32 - return torch.as_tensor(np.ascontiguousarray(arr), dtype=dt, device=torch.device("cpu")).to(dev) - - -def to_numpy(tensor: "torch.Tensor") -> np.ndarray: - """Convert a PyTorch tensor back to a NumPy array.""" - return tensor.detach().cpu().numpy() - - -# --------------------------------------------------------------------------- -# Batch execution helper -# --------------------------------------------------------------------------- - -def batch_execute( - fn, - inputs: list, - params_list: list[dict], - backend: str = "numpy", -) -> list: - """Execute a function over multiple parameter sets. - - Useful for evaluating many factors in parallel on the GPU by batching - the inputs into a single large tensor operation. - """ - results = [] - for params in params_list: - results.append(fn(*inputs, **params)) - return results - - -def torch_available() -> bool: - """Return True if PyTorch is importable.""" - return _TORCH_AVAILABLE diff --git a/src/factorminer/factorminer/operators/logical.py b/src/factorminer/factorminer/operators/logical.py deleted file mode 100644 index c6d16f1..0000000 --- a/src/factorminer/factorminer/operators/logical.py +++ /dev/null @@ -1,185 +0,0 @@ -"""Conditional and comparison operators (element-wise). - -All operators are element-wise on ``(M, T)`` arrays. -Boolean-like outputs use ``1.0`` / ``0.0`` (float), not Python bool. -""" - -from __future__ import annotations - -import numpy as np - -try: - import torch -except ImportError: - torch = None # type: ignore[assignment] - - -# =========================================================================== -# NumPy implementations -# =========================================================================== - -def if_else_np(cond: np.ndarray, x: np.ndarray, y: np.ndarray) -> np.ndarray: - """Where cond > 0 return x, else y. NaN in cond -> NaN.""" - result = np.where(cond > 0, x, y) - result[np.isnan(cond)] = np.nan - return result - - -def greater_np(x: np.ndarray, y: np.ndarray) -> np.ndarray: - out = np.where(x > y, 1.0, 0.0) - out[np.isnan(x) | np.isnan(y)] = np.nan - return out - - -def less_np(x: np.ndarray, y: np.ndarray) -> np.ndarray: - out = np.where(x < y, 1.0, 0.0) - out[np.isnan(x) | np.isnan(y)] = np.nan - return out - - -def greater_equal_np(x: np.ndarray, y: np.ndarray) -> np.ndarray: - out = np.where(x >= y, 1.0, 0.0) - out[np.isnan(x) | np.isnan(y)] = np.nan - return out - - -def less_equal_np(x: np.ndarray, y: np.ndarray) -> np.ndarray: - out = np.where(x <= y, 1.0, 0.0) - out[np.isnan(x) | np.isnan(y)] = np.nan - return out - - -def equal_np(x: np.ndarray, y: np.ndarray) -> np.ndarray: - out = np.where(np.abs(x - y) < 1e-10, 1.0, 0.0) - out[np.isnan(x) | np.isnan(y)] = np.nan - return out - - -def and_np(x: np.ndarray, y: np.ndarray) -> np.ndarray: - out = np.where((x > 0) & (y > 0), 1.0, 0.0) - out[np.isnan(x) | np.isnan(y)] = np.nan - return out - - -def or_np(x: np.ndarray, y: np.ndarray) -> np.ndarray: - out = np.where((x > 0) | (y > 0), 1.0, 0.0) - out[np.isnan(x) | np.isnan(y)] = np.nan - return out - - -def not_np(x: np.ndarray) -> np.ndarray: - out = np.where(x > 0, 0.0, 1.0) - out[np.isnan(x)] = np.nan - return out - - -def sign_np(x: np.ndarray) -> np.ndarray: - return np.sign(x) - - -def max2_np(x: np.ndarray, y: np.ndarray) -> np.ndarray: - return np.fmax(x, y) - - -def min2_np(x: np.ndarray, y: np.ndarray) -> np.ndarray: - return np.fmin(x, y) - - -def ne_np(x: np.ndarray, y: np.ndarray) -> np.ndarray: - out = np.where(np.abs(x - y) >= 1e-10, 1.0, 0.0) - out[np.isnan(x) | np.isnan(y)] = np.nan - return out - - -# =========================================================================== -# PyTorch implementations -# =========================================================================== - -def if_else_torch(cond: "torch.Tensor", x: "torch.Tensor", y: "torch.Tensor") -> "torch.Tensor": - result = torch.where(cond > 0, x, y) - result[torch.isnan(cond)] = float("nan") - return result - - -def greater_torch(x: "torch.Tensor", y: "torch.Tensor") -> "torch.Tensor": - out = torch.where(x > y, 1.0, 0.0) - out[torch.isnan(x) | torch.isnan(y)] = float("nan") - return out - - -def less_torch(x: "torch.Tensor", y: "torch.Tensor") -> "torch.Tensor": - out = torch.where(x < y, 1.0, 0.0) - out[torch.isnan(x) | torch.isnan(y)] = float("nan") - return out - - -def greater_equal_torch(x: "torch.Tensor", y: "torch.Tensor") -> "torch.Tensor": - out = torch.where(x >= y, 1.0, 0.0) - out[torch.isnan(x) | torch.isnan(y)] = float("nan") - return out - - -def less_equal_torch(x: "torch.Tensor", y: "torch.Tensor") -> "torch.Tensor": - out = torch.where(x <= y, 1.0, 0.0) - out[torch.isnan(x) | torch.isnan(y)] = float("nan") - return out - - -def equal_torch(x: "torch.Tensor", y: "torch.Tensor") -> "torch.Tensor": - out = torch.where((x - y).abs() < 1e-10, 1.0, 0.0) - out[torch.isnan(x) | torch.isnan(y)] = float("nan") - return out - - -def and_torch(x: "torch.Tensor", y: "torch.Tensor") -> "torch.Tensor": - out = torch.where((x > 0) & (y > 0), 1.0, 0.0) - out[torch.isnan(x) | torch.isnan(y)] = float("nan") - return out - - -def or_torch(x: "torch.Tensor", y: "torch.Tensor") -> "torch.Tensor": - out = torch.where((x > 0) | (y > 0), 1.0, 0.0) - out[torch.isnan(x) | torch.isnan(y)] = float("nan") - return out - - -def not_torch(x: "torch.Tensor") -> "torch.Tensor": - out = torch.where(x > 0, 0.0, 1.0) - out[torch.isnan(x)] = float("nan") - return out - - -def sign_torch(x: "torch.Tensor") -> "torch.Tensor": - return x.sign() - - -def max2_torch(x: "torch.Tensor", y: "torch.Tensor") -> "torch.Tensor": - return torch.fmax(x, y) - - -def min2_torch(x: "torch.Tensor", y: "torch.Tensor") -> "torch.Tensor": - return torch.fmin(x, y) - - -def ne_torch(x: "torch.Tensor", y: "torch.Tensor") -> "torch.Tensor": - out = torch.where((x - y).abs() >= 1e-10, 1.0, 0.0) - out[torch.isnan(x) | torch.isnan(y)] = float("nan") - return out - - -# =========================================================================== -# Registration table -# =========================================================================== - -LOGICAL_OPS = { - "IfElse": (if_else_np, if_else_torch), - "Greater": (greater_np, greater_torch), - "GreaterEqual": (greater_equal_np, greater_equal_torch), - "Less": (less_np, less_torch), - "LessEqual": (less_equal_np, less_equal_torch), - "Equal": (equal_np, equal_torch), - "Ne": (ne_np, ne_torch), - "And": (and_np, and_torch), - "Or": (or_np, or_torch), - "Not": (not_np, not_torch), -} diff --git a/src/factorminer/factorminer/operators/neuro_symbolic.py b/src/factorminer/factorminer/operators/neuro_symbolic.py deleted file mode 100644 index 95f58c5..0000000 --- a/src/factorminer/factorminer/operators/neuro_symbolic.py +++ /dev/null @@ -1,1614 +0,0 @@ -"""Hybrid neural-symbolic operators for HelixFactor. - -WHY THIS MODULE EXISTS ----------------------- -Symbolic expression trees give us interpretability and generalizability, but -they are limited by the vocabulary of hand-coded operators. Neural leaves -bridge that gap: a tiny MLP trained on historical market data can discover -non-linear interaction patterns (e.g. volume-price divergence under high -intraday volatility) that no single hand-written formula captures. - -The workflow is: - 1. Train a NeuralLeaf on historical data to maximise IC with next-period - returns. The leaf sees a rolling window of all available features. - 2. Insert the trained leaf into an expression tree as a NeuralLeafNode. - It behaves like any other operator: (M, T) in -> (M, T) out. - 3. After validation, run distill_to_symbolic() to find the symbolic - formula from the existing operator library that best approximates - the neural leaf. This restores interpretability while keeping the - discovered signal. - 4. Replace NeuralLeafNode with the distilled formula for production. - -Architecture constraints ------------------------- -- Each NeuralLeaf has < 5 000 parameters (fits on CPU, fast inference). -- 2-layer MLP: input -> 32 hidden -> 1, with LayerNorm and GELU. -- Input: flattened rolling window of F features over the last W time steps. -- Output: scalar signal per (asset, time) pair, shape (M, T). -- Training uses a differentiable Pearson-IC proxy loss. -""" - -from __future__ import annotations - -import logging -import os -import warnings -from dataclasses import dataclass, field -from typing import Any, Dict, List, Optional, Tuple - -import numpy as np - -logger = logging.getLogger(__name__) - -# --------------------------------------------------------------------------- -# Optional PyTorch import — graceful degradation -# --------------------------------------------------------------------------- - -try: - import torch - import torch.nn as nn - import torch.optim as optim - - _TORCH_AVAILABLE = True -except ImportError: # pragma: no cover - torch = None # type: ignore[assignment] - nn = None # type: ignore[assignment] - optim = None # type: ignore[assignment] - _TORCH_AVAILABLE = False - warnings.warn( - "PyTorch is not installed. NeuralLeaf training and inference will be " - "unavailable. Install torch to enable neuro-symbolic operators.", - ImportWarning, - stacklevel=1, - ) - - -# --------------------------------------------------------------------------- -# Constants -# --------------------------------------------------------------------------- - -# Canonical feature order — must match factorminer.core.types.FEATURES -_DEFAULT_FEATURES: List[str] = [ - "$open", - "$high", - "$low", - "$close", - "$volume", - "$amt", - "$vwap", - "$returns", -] - -_HIDDEN_DIM: int = 32 # keeps param count ~2 000 for window=10, F=8 - - -# =========================================================================== -# NeuralLeaf — the learnable micro-model -# =========================================================================== - -if _TORCH_AVAILABLE: - - class NeuralLeaf(nn.Module): - """Tiny MLP operating on a rolling window of market features. - - Parameters - ---------- - window_size : int - Number of look-back time steps fed to the model. - n_features : int - Number of input feature channels (e.g. 8 for the standard OHLCV set). - hidden_dim : int - Width of the single hidden layer (default: 32). - name : str - Human-readable identifier used in DSL strings and logging. - - Input / Output shapes - --------------------- - ``forward`` expects a tensor of shape ``(M * T_valid, window_size * n_features)`` - where rows where the window is fully available have been pre-selected. - It returns a tensor of shape ``(M * T_valid,)``. - - The public ``evaluate()`` method handles the full (M, T) -> (M, T) pipeline - including NaN masking and output assembly. - - Parameter count - --------------- - With defaults (window=10, F=8, hidden=32): - input_dim = 10 * 8 = 80 - layer 1 = 80 * 32 + 32 = 2 592 - layer 2 = 32 * 1 + 1 = 33 - LayerNorm = 2 * 80 + 2 * 32 = 224 - total ≈ 2 849 (well under 5 000) - """ - - def __init__( - self, - window_size: int = 10, - n_features: int = 8, - hidden_dim: int = _HIDDEN_DIM, - name: str = "NeuralLeaf", - ) -> None: - super().__init__() - self.window_size = window_size - self.n_features = n_features - self.hidden_dim = hidden_dim - self.name = name - - input_dim = window_size * n_features - - self.net = nn.Sequential( - nn.LayerNorm(input_dim), - nn.Linear(input_dim, hidden_dim), - nn.GELU(), - nn.LayerNorm(hidden_dim), - nn.Linear(hidden_dim, 1), - ) - # Xavier init for stability - for module in self.net.modules(): - if isinstance(module, nn.Linear): - nn.init.xavier_uniform_(module.weight) - nn.init.zeros_(module.bias) - - # ------------------------------------------------------------------ - # Core PyTorch forward - # ------------------------------------------------------------------ - - def forward(self, x: "torch.Tensor") -> "torch.Tensor": - """Map ``(N, window_size * n_features)`` -> ``(N,)``. - - Parameters - ---------- - x : torch.Tensor, shape (N, window_size * n_features) - - Returns - ------- - torch.Tensor, shape (N,) - """ - return self.net(x).squeeze(-1) - - # ------------------------------------------------------------------ - # High-level evaluation: (M, T, F) -> (M, T) with NaN handling - # ------------------------------------------------------------------ - - def evaluate( - self, - features_3d: np.ndarray, - device: Optional["torch.device"] = None, - ) -> np.ndarray: - """Evaluate the leaf on a full (M, T, F) market tensor. - - For each (asset, time) pair where a full window is available, - the flattened window is fed through the MLP. Positions where - the window is not yet complete (the first ``window_size - 1`` - time steps) are filled with NaN. - - Parameters - ---------- - features_3d : np.ndarray, shape (M, T, F) - Stack of feature arrays, F channels, in the order given at - construction time. - device : torch.device, optional - Where to place tensors. Defaults to CPU. - - Returns - ------- - np.ndarray, shape (M, T) - """ - if not _TORCH_AVAILABLE: - return np.full(features_3d.shape[:2], np.nan) - - device = device or torch.device("cpu") - M, T, F = features_3d.shape - W = self.window_size - - out = np.full((M, T), np.nan, dtype=np.float64) - - if T < W: - return out - - # Build input matrix: (M, T - W + 1, W * F) - # stride-trick to avoid copies - X_windows = _build_windows_np(features_3d, W) # (M, T-W+1, W*F) - - # Reshape to (M * (T-W+1), W*F) - n_windows = T - W + 1 - X_flat = X_windows.reshape(M * n_windows, W * F).astype(np.float32) - - # Mask out rows that contain any NaN - nan_mask = np.isnan(X_flat).any(axis=1) # (M * n_windows,) - - X_valid = X_flat[~nan_mask] - if X_valid.shape[0] == 0: - return out - - self.eval() - with torch.no_grad(): - x_tensor = torch.from_numpy(X_valid).to(device) - preds = self.forward(x_tensor).cpu().numpy().astype(np.float64) - - # Scatter predictions back - result_flat = np.full(M * n_windows, np.nan, dtype=np.float64) - result_flat[~nan_mask] = preds - result_2d = result_flat.reshape(M, n_windows) - - # Place into the last T - W + 1 columns of the output - out[:, W - 1 :] = result_2d - - return out - - # ------------------------------------------------------------------ - # Utilities - # ------------------------------------------------------------------ - - def param_count(self) -> int: - """Return the total number of trainable parameters.""" - return sum(p.numel() for p in self.parameters() if p.requires_grad) - - def __repr__(self) -> str: - return ( - f"NeuralLeaf(name={self.name!r}, window={self.window_size}, " - f"features={self.n_features}, params={self.param_count()})" - ) - -else: - # Stub when torch is unavailable so type annotations still resolve. - class NeuralLeaf: # type: ignore[no-redef] - """Stub NeuralLeaf (PyTorch unavailable).""" - - def __init__(self, *args: Any, **kwargs: Any) -> None: - self.window_size = kwargs.get("window_size", 10) - self.n_features = kwargs.get("n_features", 8) - self.name = kwargs.get("name", "NeuralLeaf") - - def evaluate(self, features_3d: np.ndarray, **kwargs: Any) -> np.ndarray: - return np.full(features_3d.shape[:2], np.nan) - - def param_count(self) -> int: - return 0 - - -# =========================================================================== -# Window construction helper (NumPy, no copy when F is contiguous) -# =========================================================================== - -def _build_windows_np(x: np.ndarray, window: int) -> np.ndarray: - """Create sliding windows from a (M, T, F) array. - - Returns - ------- - np.ndarray, shape (M, T - window + 1, window * F) - Each row is the flattened window of shape (window, F). - """ - M, T, F = x.shape - n = T - window + 1 - # Use stride tricks for zero-copy view - s_m, s_t, s_f = x.strides - shape = (M, n, window, F) - strides = (s_m, s_t, s_t, s_f) - windows = np.lib.stride_tricks.as_strided(x, shape=shape, strides=strides) - return windows.reshape(M, n, window * F) - - -# =========================================================================== -# IC loss helpers (differentiable Pearson proxy) -# =========================================================================== - -def _pearson_ic_loss( - pred: "torch.Tensor", - target: "torch.Tensor", - eps: float = 1e-8, -) -> "torch.Tensor": - """Negative Pearson cross-sectional IC averaged over time steps. - - Both tensors must be shape ``(M,)`` (one time slice) or ``(N,)`` - (flattened batch). The loss is ``-IC`` so gradient descent maximises IC. - - Parameters - ---------- - pred : torch.Tensor, shape (N,) - target : torch.Tensor, shape (N,) - eps : float - Denominator stabiliser. - - Returns - ------- - torch.Tensor scalar - """ - pred_m = pred - pred.mean() - tgt_m = target - target.mean() - cov = (pred_m * tgt_m).mean() - denom = pred_m.std(unbiased=False).clamp(min=eps) * tgt_m.std(unbiased=False).clamp(min=eps) - ic = cov / denom - return -ic - - -def _l2_regularisation(model: "NeuralLeaf", lam: float = 1e-4) -> "torch.Tensor": - """Compute L2 weight penalty (excludes bias and LayerNorm params).""" - reg = torch.tensor(0.0) - for name, param in model.named_parameters(): - if "weight" in name and "norm" not in name: - reg = reg + param.pow(2).sum() - return lam * reg - - -# =========================================================================== -# Training procedure -# =========================================================================== - -def train_neural_leaf( - name: str, - features: np.ndarray, - returns: np.ndarray, - window_size: int = 10, - n_epochs: int = 100, - lr: float = 1e-3, - hidden_dim: int = _HIDDEN_DIM, - val_fraction: float = 0.2, - l2_lambda: float = 1e-4, - batch_size: int = 2048, - patience: int = 15, - device: Optional["torch.device"] = None, - verbose: bool = False, -) -> Optional["NeuralLeaf"]: - """Train a NeuralLeaf to maximise cross-sectional IC with next-period returns. - - The leaf receives a rolling window of F features per (asset, time) pair - and learns to output a signal that is cross-sectionally correlated with - next-period returns. Training uses time-based train/validation splits - (no look-ahead: validation set = later time steps). - - Parameters - ---------- - name : str - Human-readable name for the leaf (e.g. ``"NeuralMomentum"``). - features : np.ndarray, shape (M, T, F) - Market feature tensor. F must match the ``_DEFAULT_FEATURES`` list - or be explicitly sized for the model. - returns : np.ndarray, shape (M, T) - Forward returns aligned to the same (M, T) grid. - window_size : int - Number of look-back bars for the rolling window. - n_epochs : int - Maximum training epochs. - lr : float - Adam learning rate. - hidden_dim : int - Width of the hidden layer. - val_fraction : float - Fraction of time steps reserved for validation (tail of the series). - l2_lambda : float - L2 regularisation coefficient. - batch_size : int - Mini-batch size over the flattened (asset, time) dimension. - patience : int - Early stopping patience (epochs without val IC improvement). - device : torch.device, optional - Computation device. Defaults to CPU. - verbose : bool - Whether to log training progress at DEBUG level. - - Returns - ------- - NeuralLeaf or None - The trained leaf, or None if torch is unavailable or training fails. - """ - if not _TORCH_AVAILABLE: - logger.warning("train_neural_leaf: PyTorch unavailable, returning None.") - return None - - device = device or torch.device("cpu") - M, T, F = features.shape - - if T <= window_size: - logger.warning( - "train_neural_leaf(%s): T=%d <= window=%d, cannot train.", name, T, window_size - ) - return None - - # ------------------------------------------------------------------ - # Build full flat dataset: (M * n_windows, window * F) and target (M * n_windows,) - # ------------------------------------------------------------------ - n_windows = T - window_size + 1 - X_all = _build_windows_np(features, window_size) # (M, n_windows, W*F) - X_flat = X_all.reshape(M * n_windows, window_size * F).astype(np.float32) - - # Target: forward return at the LAST time step of each window (t = W-1 + k) - # features[:, k : k+W, :] -> return at time k + W - 1 - # We align the return index to the last step in the window. - ret_aligned = returns[:, window_size - 1 :] # (M, n_windows) - y_flat = ret_aligned.reshape(M * n_windows).astype(np.float32) - - # ------------------------------------------------------------------ - # Remove NaN rows (both in X and y) - # ------------------------------------------------------------------ - valid_mask = (~np.isnan(X_flat).any(axis=1)) & (~np.isnan(y_flat)) - X_flat = X_flat[valid_mask] - y_flat = y_flat[valid_mask] - - N = X_flat.shape[0] - if N < 100: - logger.warning( - "train_neural_leaf(%s): only %d valid samples after NaN removal.", name, N - ) - return None - - # ------------------------------------------------------------------ - # Temporal train / val split: preserve time ordering. - # The valid_mask does not preserve temporal ordering in general, so - # we use a simple head/tail split on the original time dimension. - # ------------------------------------------------------------------ - # We rebuild from scratch with explicit temporal indexing to ensure - # the val set is always strictly later in time. - - T_val_start = int(T * (1.0 - val_fraction)) - T_val_start = max(T_val_start, window_size) # ensure at least one val window - - # Train windows: windows whose last time index < T_val_start - # Last time index of window k = window_size - 1 + k (0-indexed over n_windows) - # => k < T_val_start - window_size + 1 - k_split = T_val_start - window_size + 1 # exclusive upper bound for train - k_split = max(1, min(k_split, n_windows - 1)) - - X_train_raw = _build_windows_np(features[:, :T_val_start, :], window_size) - X_train_raw = X_train_raw.reshape(-1, window_size * F).astype(np.float32) - y_train_raw = returns[:, window_size - 1 : T_val_start].reshape(-1).astype(np.float32) - - X_val_raw = _build_windows_np(features[:, T_val_start - window_size + 1 :, :], window_size) - X_val_raw = X_val_raw.reshape(-1, window_size * F).astype(np.float32) - y_val_raw = returns[:, T_val_start:].reshape(-1).astype(np.float32) - - def _clean(X: np.ndarray, y: np.ndarray): - mask = (~np.isnan(X).any(axis=1)) & (~np.isnan(y)) - return X[mask], y[mask] - - X_train, y_train = _clean(X_train_raw, y_train_raw) - X_val, y_val = _clean(X_val_raw, y_val_raw) - - if X_train.shape[0] < 50: - logger.warning( - "train_neural_leaf(%s): too few training samples (%d).", name, X_train.shape[0] - ) - return None - - # ------------------------------------------------------------------ - # Model, optimiser, scheduler - # ------------------------------------------------------------------ - leaf = NeuralLeaf( - window_size=window_size, - n_features=F, - hidden_dim=hidden_dim, - name=name, - ).to(device) - - optimizer = optim.Adam(leaf.parameters(), lr=lr) - scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=n_epochs, eta_min=lr * 0.01) - - X_train_t = torch.from_numpy(X_train).to(device) - y_train_t = torch.from_numpy(y_train).to(device) - X_val_t = torch.from_numpy(X_val).to(device) - y_val_t = torch.from_numpy(y_val).to(device) - - N_train = X_train_t.shape[0] - - best_val_ic: float = -np.inf - best_state: Optional[Dict[str, Any]] = None - no_improve: int = 0 - - # ------------------------------------------------------------------ - # Training loop - # ------------------------------------------------------------------ - for epoch in range(n_epochs): - leaf.train() - # Shuffle each epoch - perm = torch.randperm(N_train, device=device) - X_shuf = X_train_t[perm] - y_shuf = y_train_t[perm] - - epoch_loss = 0.0 - n_batches = 0 - for start in range(0, N_train, batch_size): - xb = X_shuf[start : start + batch_size] - yb = y_shuf[start : start + batch_size] - if xb.shape[0] < 4: - continue # skip tiny last batch - - optimizer.zero_grad() - pred = leaf(xb) - ic_loss = _pearson_ic_loss(pred, yb) - reg = _l2_regularisation(leaf, l2_lambda) - loss = ic_loss + reg - loss.backward() - torch.nn.utils.clip_grad_norm_(leaf.parameters(), max_norm=1.0) - optimizer.step() - epoch_loss += loss.item() - n_batches += 1 - - scheduler.step() - - # ------------------------------------------------------------------ - # Validation IC (no gradient) - # ------------------------------------------------------------------ - leaf.eval() - with torch.no_grad(): - val_pred = leaf(X_val_t) - val_ic = -_pearson_ic_loss(val_pred, y_val_t).item() # positive = good - - if val_ic > best_val_ic + 1e-5: - best_val_ic = val_ic - best_state = {k: v.clone() for k, v in leaf.state_dict().items()} - no_improve = 0 - else: - no_improve += 1 - - if verbose: - avg_loss = epoch_loss / max(n_batches, 1) - logger.debug( - "Epoch %d/%d train_loss=%.5f val_IC=%.4f best_val_IC=%.4f", - epoch + 1, - n_epochs, - avg_loss, - val_ic, - best_val_ic, - ) - - if no_improve >= patience: - logger.info( - "train_neural_leaf(%s): early stopping at epoch %d (val_IC=%.4f).", - name, - epoch + 1, - best_val_ic, - ) - break - - # Restore best weights - if best_state is not None: - leaf.load_state_dict(best_state) - - logger.info( - "Trained NeuralLeaf '%s': params=%d, best_val_IC=%.4f", - name, - leaf.param_count(), - best_val_ic, - ) - leaf.eval() - return leaf - - -# =========================================================================== -# Symbolic Distillation -# =========================================================================== - -@dataclass -class DistillationResult: - """Result of distilling a neural leaf to a symbolic approximation. - - Attributes - ---------- - formula : str - The best-matching symbolic formula string (DSL notation). - correlation : float - Pearson correlation between the neural leaf output and the - best symbolic approximation (over all valid positions). - rank_correlation : float - Spearman rank correlation (more relevant for factor quality). - candidate_scores : dict - Full mapping of formula -> correlation for all candidates tried. - """ - - formula: str - correlation: float - rank_correlation: float - candidate_scores: Dict[str, float] = field(default_factory=dict) - - def __str__(self) -> str: - return ( - f"DistillationResult(formula={self.formula!r}, " - f"r={self.correlation:.4f}, rho={self.rank_correlation:.4f})" - ) - - -def _spearman_corr(a: np.ndarray, b: np.ndarray) -> float: - """Spearman rank correlation between two flat arrays, ignoring NaN.""" - from scipy.stats import spearmanr as _spearman # local import to keep scipy optional - - mask = ~(np.isnan(a) | np.isnan(b)) - if mask.sum() < 10: - return 0.0 - rho, _ = _spearman(a[mask], b[mask]) - return float(rho) - - -def _pearson_corr(a: np.ndarray, b: np.ndarray) -> float: - """Pearson correlation between two flat arrays, ignoring NaN.""" - mask = ~(np.isnan(a) | np.isnan(b)) - if mask.sum() < 10: - return 0.0 - am, bm = a[mask], b[mask] - num = np.mean((am - am.mean()) * (bm - bm.mean())) - denom = am.std() * bm.std() - if denom < 1e-10: - return 0.0 - return float(num / denom) - - -def _evaluate_symbolic_candidate(formula_fn, data: Dict[str, np.ndarray]) -> Optional[np.ndarray]: - """Safely evaluate a symbolic candidate, returning None on failure.""" - try: - result = formula_fn(data) - if not isinstance(result, np.ndarray): - return None - if result.shape != next(iter(data.values())).shape: - return None - return result - except Exception as exc: # noqa: BLE001 - logger.debug("Symbolic candidate failed: %s", exc) - return None - - -def _build_symbolic_candidates(data: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]: - """Generate all symbolic candidate outputs from the hand-coded operator library. - - Returns a dict mapping formula string -> (M, T) array. - """ - # Import operators lazily to avoid circular imports - from factorminer.core.expression_tree import _ema, _wma, _rolling_apply # type: ignore[attr-defined] - from factorminer.core.expression_tree import _ts_rank, _ts_mean, _ts_std # type: ignore[attr-defined] - - candidates: Dict[str, np.ndarray] = {} - - close = data.get("$close") - volume = data.get("$volume") - returns = data.get("$returns") - high = data.get("$high") - low = data.get("$low") - amt = data.get("$amt") - vwap = data.get("$vwap") - - def _safe_add(name: str, arr: Optional[np.ndarray]) -> None: - if arr is not None and isinstance(arr, np.ndarray): - candidates[name] = arr - - # EMA variants - if close is not None: - for w in (3, 5, 10, 20): - _safe_add(f"EMA($close, {w})", _ema(close, w)) - # Delta (momentum) - for w in (1, 3, 5, 10): - M, T = close.shape - out = np.full_like(close, np.nan, dtype=np.float64) - if w < T: - out[:, w:] = close[:, w:] - close[:, :-w] - _safe_add(f"Delta($close, {w})", out) - # Rolling return - for w in (1, 3, 5, 10): - M, T = close.shape - out = np.full_like(close, np.nan, dtype=np.float64) - if w < T: - prev = close[:, :-w] - ok = np.abs(prev) > 1e-10 - out[: , w:][ok] = close[:, w:][ok] / prev[ok] - 1.0 - _safe_add(f"Return($close, {w})", out) - # TsRank - for w in (5, 10, 20): - _safe_add(f"TsRank($close, {w})", _rolling_apply(close, w, _ts_rank)) - # Rolling std - for w in (5, 10, 20): - _safe_add(f"Std($close, {w})", _rolling_apply(close, w, _ts_std)) - # Rolling mean - for w in (5, 10, 20): - _safe_add(f"Mean($close, {w})", _rolling_apply(close, w, _ts_mean)) - - if volume is not None: - for w in (5, 10, 20): - _safe_add(f"TsRank($volume, {w})", _rolling_apply(volume, w, _ts_rank)) - _safe_add(f"EMA($volume, {w})", _ema(volume, w)) - for w in (1, 3, 5): - M, T = volume.shape - out = np.full_like(volume, np.nan, dtype=np.float64) - if w < T: - out[:, w:] = volume[:, w:] - volume[:, :-w] - _safe_add(f"Delta($volume, {w})", out) - - if returns is not None: - for w in (5, 10, 20): - _safe_add(f"Std($returns, {w})", _rolling_apply(returns, w, _ts_std)) - _safe_add(f"Mean($returns, {w})", _rolling_apply(returns, w, _ts_mean)) - _safe_add(f"TsRank($returns, {w})", _rolling_apply(returns, w, _ts_rank)) - - # VWAP-close spread (price-quality signal) - if close is not None and vwap is not None: - spread = close - vwap - _safe_add("Sub($close, $vwap)", spread) - for w in (5, 10): - _safe_add(f"EMA(Sub($close,$vwap),{w})", _ema(spread, w)) - - # High-low range (volatility proxy) - if high is not None and low is not None: - hl_range = high - low - _safe_add("Sub($high, $low)", hl_range) - for w in (5, 10, 20): - _safe_add(f"Mean(Sub($high,$low),{w})", _rolling_apply(hl_range, w, _ts_mean)) - - return candidates - - -def distill_to_symbolic( - leaf: "NeuralLeaf", - data: Dict[str, np.ndarray], - feature_order: Optional[List[str]] = None, -) -> DistillationResult: - """Find the symbolic formula that best approximates the neural leaf. - - Evaluates the leaf on *data*, then computes the Pearson and Spearman - correlation between the leaf output and every formula in a curated - candidate set. The candidate with the highest absolute Pearson - correlation is chosen as the distillation target. - - Parameters - ---------- - leaf : NeuralLeaf - A trained neural leaf. - data : dict[str, np.ndarray] - Market data dict mapping feature name -> (M, T) array. - feature_order : list of str, optional - Order of features in the (M, T, F) stack passed to the leaf. - Defaults to ``_DEFAULT_FEATURES``. - - Returns - ------- - DistillationResult - """ - feature_order = feature_order or _DEFAULT_FEATURES - - # Build feature tensor (M, T, F) - ref_arr = next(iter(data.values())) - M, T = ref_arr.shape - F = len(feature_order) - features_3d = np.stack( - [data.get(f, np.full((M, T), np.nan)) for f in feature_order], - axis=-1, - ) # (M, T, F) - - # Evaluate neural leaf -> (M, T) - leaf_output = leaf.evaluate(features_3d) - - # Flatten for correlation computation - leaf_flat = leaf_output.ravel() - - # Build symbolic candidates - candidates = _build_symbolic_candidates(data) - - scores: Dict[str, float] = {} - for formula, arr in candidates.items(): - r = _pearson_corr(leaf_flat, arr.ravel()) - scores[formula] = abs(r) # rank by |r| - - if not scores: - logger.warning("distill_to_symbolic: no symbolic candidates available.") - return DistillationResult( - formula="NeuralLeaf(no_candidates)", - correlation=0.0, - rank_correlation=0.0, - candidate_scores={}, - ) - - best_formula = max(scores, key=lambda k: scores[k]) - best_arr = candidates[best_formula] - best_r = _pearson_corr(leaf_flat, best_arr.ravel()) - - try: - best_rho = _spearman_corr(leaf_flat, best_arr.ravel()) - except ImportError: - best_rho = 0.0 - logger.debug("distill_to_symbolic: scipy not available, Spearman correlation skipped.") - - logger.info( - "Distillation: best formula='%s', Pearson r=%.4f, Spearman rho=%.4f", - best_formula, - best_r, - best_rho, - ) - - return DistillationResult( - formula=best_formula, - correlation=best_r, - rank_correlation=best_rho, - candidate_scores={k: v for k, v in sorted(scores.items(), key=lambda x: -x[1])}, - ) - - -# =========================================================================== -# Expression Tree Integration -# =========================================================================== - -class NeuralLeafNode: - """A node that wraps a NeuralLeaf for use inside expression trees. - - Implements the same interface as ``factorminer.core.expression_tree.Node`` - so it can be dropped into any tree position that expects a (M, T) output. - - Crucially, this node does NOT inherit from ``Node`` to avoid coupling to - the abstract base class, but it exposes the same public methods so that - ExpressionTree machinery works without modification. - - Parameters - ---------- - leaf : NeuralLeaf - The trained (or untrained) neural leaf. - feature_order : list of str, optional - Feature channels fed to the leaf, in order. - Defaults to ``_DEFAULT_FEATURES``. - distilled_formula : str, optional - If set, ``to_string()`` returns this formula instead of the neural - leaf name. Used after distillation for interpretable serialisation. - """ - - def __init__( - self, - leaf: "NeuralLeaf", - feature_order: Optional[List[str]] = None, - distilled_formula: Optional[str] = None, - ) -> None: - self._leaf = leaf - self._feature_order = feature_order or _DEFAULT_FEATURES - self._distilled_formula = distilled_formula - - # ------------------------------------------------------------------ - # Node interface - # ------------------------------------------------------------------ - - def evaluate(self, data: Dict[str, np.ndarray]) -> np.ndarray: - """Compute the leaf signal on market data. - - Parameters - ---------- - data : dict[str, np.ndarray] - Maps feature names to (M, T) arrays. - - Returns - ------- - np.ndarray, shape (M, T) - """ - ref = next(iter(data.values())) - M, T = ref.shape - F = len(self._feature_order) - features_3d = np.stack( - [data.get(f, np.full((M, T), np.nan)) for f in self._feature_order], - axis=-1, - ) - return self._leaf.evaluate(features_3d) - - def to_string(self) -> str: - """DSL serialisation. Returns distilled formula when available.""" - if self._distilled_formula: - return self._distilled_formula - return f"NeuralLeaf({self._leaf.name})" - - def depth(self) -> int: - return 1 - - def size(self) -> int: - return 1 - - def clone(self) -> "NeuralLeafNode": - return NeuralLeafNode( - leaf=self._leaf, # shared reference — leaf weights are shared - feature_order=list(self._feature_order), - distilled_formula=self._distilled_formula, - ) - - def leaf_features(self) -> List[str]: - return sorted(self._feature_order) - - def __repr__(self) -> str: - return self.to_string() - - # ------------------------------------------------------------------ - # Extra helpers - # ------------------------------------------------------------------ - - @property - def neural_leaf(self) -> "NeuralLeaf": - return self._leaf - - def set_distilled_formula(self, formula: str) -> None: - """Pin the distilled formula used by ``to_string()``.""" - self._distilled_formula = formula - - -# =========================================================================== -# SymbolicShell — presents a neural leaf as a typed operator -# =========================================================================== - -class SymbolicShell: - """Wraps a NeuralLeaf as a callable operator compatible with the DSL. - - After distillation, the internal NeuralLeafNode can be replaced with - its symbolic approximation by calling ``replace_with_symbolic()``. - - Parameters - ---------- - name : str - Operator name used in the registry and DSL strings. - leaf_node : NeuralLeafNode - The node wrapping the trained leaf. - - Usage - ----- - :: - - shell = SymbolicShell("NeuralMomentum", leaf_node) - signal = shell(data) # (M, T) array - distilled = shell.distill(data) # DistillationResult - shell.replace_with_symbolic(distilled.formula) - print(shell.formula_string) # "EMA($close, 10)" - """ - - def __init__(self, name: str, leaf_node: NeuralLeafNode) -> None: - self.name = name - self._node = leaf_node - self._is_distilled = False - - def __call__(self, data: Dict[str, np.ndarray]) -> np.ndarray: - """Evaluate the operator on market data.""" - return self._node.evaluate(data) - - @property - def formula_string(self) -> str: - """Current DSL formula (neural or distilled).""" - return self._node.to_string() - - @property - def is_distilled(self) -> bool: - return self._is_distilled - - def distill( - self, - data: Dict[str, np.ndarray], - feature_order: Optional[List[str]] = None, - ) -> DistillationResult: - """Run distillation and return the result without modifying state.""" - return distill_to_symbolic( - self._node.neural_leaf, - data, - feature_order=feature_order, - ) - - def replace_with_symbolic(self, formula: str) -> None: - """Pin a distilled symbolic formula to this shell. - - After calling this, ``formula_string`` and ``to_string()`` return - *formula* instead of the neural leaf name. - - Parameters - ---------- - formula : str - Symbolic formula string (DSL notation). - """ - self._node.set_distilled_formula(formula) - self._is_distilled = True - logger.info("SymbolicShell '%s' replaced with symbolic formula: %s", self.name, formula) - - def __repr__(self) -> str: - state = "distilled" if self._is_distilled else "neural" - return f"SymbolicShell({self.name!r}, {state}, formula={self.formula_string!r})" - - -# =========================================================================== -# NeuralLeafRegistry -# =========================================================================== - -class NeuralLeafRegistry: - """Registry of named, trained NeuralLeaf models. - - Provides named storage, persistence, and lookup of NeuralLeaf instances. - Trained weights are persisted via ``torch.save`` / ``torch.load``. - - Parameters - ---------- - storage_dir : str, optional - Directory where weights are saved. Defaults to the system temp dir. - - Example - ------- - :: - - registry = NeuralLeafRegistry(storage_dir="/tmp/neural_leaves") - leaf = train_neural_leaf("NeuralMomentum", features, returns) - registry.register("NeuralMomentum", leaf) - registry.save("NeuralMomentum") - - # Later: - registry.load("NeuralMomentum") - leaf = registry.get("NeuralMomentum") - """ - - def __init__(self, storage_dir: Optional[str] = None) -> None: - import tempfile - - self._storage_dir = storage_dir or os.path.join(tempfile.gettempdir(), "neural_leaves") - os.makedirs(self._storage_dir, exist_ok=True) - self._leaves: Dict[str, NeuralLeaf] = {} - - # ------------------------------------------------------------------ - # CRUD - # ------------------------------------------------------------------ - - def register(self, name: str, leaf: "NeuralLeaf") -> None: - """Register a trained leaf under *name*.""" - self._leaves[name] = leaf - logger.info("NeuralLeafRegistry: registered '%s'.", name) - - def get(self, name: str) -> Optional["NeuralLeaf"]: - """Return the leaf registered under *name*, or None.""" - return self._leaves.get(name) - - def remove(self, name: str) -> None: - """Remove a leaf from the in-memory registry.""" - self._leaves.pop(name, None) - - def available(self) -> List[str]: - """Return sorted list of registered leaf names.""" - return sorted(self._leaves.keys()) - - # ------------------------------------------------------------------ - # Persistence - # ------------------------------------------------------------------ - - def _path(self, name: str) -> str: - safe_name = name.replace("/", "_").replace("\\", "_") - return os.path.join(self._storage_dir, f"{safe_name}.pt") - - def save(self, name: str) -> str: - """Save a registered leaf's weights to disk. - - Returns - ------- - str - Path where the file was saved. - - Raises - ------ - KeyError - If *name* is not registered. - RuntimeError - If PyTorch is unavailable. - """ - if not _TORCH_AVAILABLE: - raise RuntimeError("PyTorch not available; cannot save NeuralLeaf.") - leaf = self._leaves.get(name) - if leaf is None: - raise KeyError(f"NeuralLeafRegistry: no leaf named '{name}'.") - path = self._path(name) - torch.save( - { - "name": leaf.name, - "window_size": leaf.window_size, - "n_features": leaf.n_features, - "hidden_dim": leaf.hidden_dim, - "state_dict": leaf.state_dict(), - }, - path, - ) - logger.info("Saved NeuralLeaf '%s' to %s", name, path) - return path - - def load(self, name: str, path: Optional[str] = None) -> "NeuralLeaf": - """Load a leaf from disk and register it. - - Parameters - ---------- - name : str - Registry name to assign (may differ from the file's embedded name). - path : str, optional - Explicit file path. If omitted, uses the default storage path. - - Returns - ------- - NeuralLeaf - """ - if not _TORCH_AVAILABLE: - raise RuntimeError("PyTorch not available; cannot load NeuralLeaf.") - file_path = path or self._path(name) - if not os.path.exists(file_path): - raise FileNotFoundError(f"NeuralLeaf weights not found at '{file_path}'.") - ckpt = torch.load(file_path, map_location="cpu", weights_only=True) - leaf = NeuralLeaf( - window_size=ckpt["window_size"], - n_features=ckpt["n_features"], - hidden_dim=ckpt["hidden_dim"], - name=ckpt["name"], - ) - leaf.load_state_dict(ckpt["state_dict"]) - leaf.eval() - self._leaves[name] = leaf - logger.info("Loaded NeuralLeaf '%s' from %s", name, file_path) - return leaf - - def save_all(self) -> Dict[str, str]: - """Save all registered leaves. Returns name -> path mapping.""" - return {name: self.save(name) for name in self._leaves} - - def load_all(self) -> List[str]: - """Load all .pt files from the storage directory. Returns loaded names.""" - loaded = [] - for fname in os.listdir(self._storage_dir): - if fname.endswith(".pt"): - name = fname[:-3] - try: - self.load(name) - loaded.append(name) - except Exception as exc: # noqa: BLE001 - logger.warning("Failed to load '%s': %s", name, exc) - return loaded - - -# =========================================================================== -# NeuralOperatorIntegration — high-level orchestration -# =========================================================================== - -@dataclass -class NeuralLeafConfig: - """Configuration for a single named neural leaf. - - Attributes - ---------- - name : str - Registry name (e.g. ``"NeuralMomentum"``). - window_size : int - Rolling window size. - n_epochs : int - Training epochs. - lr : float - Adam learning rate. - hidden_dim : int - Hidden layer width. - description : str - Human-readable description. - """ - - name: str - window_size: int = 10 - n_epochs: int = 100 - lr: float = 1e-3 - hidden_dim: int = _HIDDEN_DIM - description: str = "" - - -class NeuralOperatorIntegration: - """Orchestrates training, distillation, and persistence of neural leaves. - - This is the main entry point for integrating neural leaves into a - HelixFactor workflow. - - Parameters - ---------- - registry : NeuralLeafRegistry, optional - Shared registry. A new one is created if not provided. - feature_order : list of str, optional - Feature channels expected in the (M, T, F) input tensor. - - Example - ------- - :: - - integration = NeuralOperatorIntegration() - configs = [ - NeuralLeafConfig("NeuralMomentum", window_size=10), - NeuralLeafConfig("NeuralReversal", window_size=5), - NeuralLeafConfig("NeuralVolume", window_size=10), - ] - integration.train_all_leaves(features_3d, returns, configs) - distilled = integration.distill_all(data_dict) - integration.save("/tmp/my_leaves") - """ - - def __init__( - self, - registry: Optional[NeuralLeafRegistry] = None, - feature_order: Optional[List[str]] = None, - ) -> None: - self._registry = registry or NeuralLeafRegistry() - self._feature_order = feature_order or _DEFAULT_FEATURES - self._distillation_results: Dict[str, DistillationResult] = {} - - # ------------------------------------------------------------------ - # Training - # ------------------------------------------------------------------ - - def train_all_leaves( - self, - features: np.ndarray, - returns: np.ndarray, - leaf_configs: List[NeuralLeafConfig], - device: Optional["torch.device"] = None, - verbose: bool = False, - ) -> None: - """Train all listed neural leaves and register them. - - Parameters - ---------- - features : np.ndarray, shape (M, T, F) - Market feature tensor in the order given by ``feature_order``. - returns : np.ndarray, shape (M, T) - Forward returns for training targets. - leaf_configs : list of NeuralLeafConfig - One entry per leaf to train. - device : torch.device, optional - verbose : bool - Pass through to training loop for debug logging. - """ - for cfg in leaf_configs: - logger.info("Training NeuralLeaf '%s'…", cfg.name) - leaf = train_neural_leaf( - name=cfg.name, - features=features, - returns=returns, - window_size=cfg.window_size, - n_epochs=cfg.n_epochs, - lr=cfg.lr, - hidden_dim=cfg.hidden_dim, - device=device, - verbose=verbose, - ) - if leaf is not None: - self._registry.register(cfg.name, leaf) - else: - logger.warning("Training failed for '%s', skipping.", cfg.name) - - # ------------------------------------------------------------------ - # Distillation - # ------------------------------------------------------------------ - - def distill_all( - self, - data: Dict[str, np.ndarray], - ) -> Dict[str, str]: - """Distill all registered leaves and return name -> best formula. - - Parameters - ---------- - data : dict[str, np.ndarray] - Market data dict (same format used for expression tree evaluation). - - Returns - ------- - dict - Maps leaf name to its best symbolic approximation formula string. - """ - results: Dict[str, str] = {} - for name in self._registry.available(): - leaf = self._registry.get(name) - if leaf is None: - continue - distilled = distill_to_symbolic( - leaf, data, feature_order=self._feature_order - ) - self._distillation_results[name] = distilled - results[name] = distilled.formula - logger.info( - "Distilled '%s' -> '%s' (r=%.4f, rho=%.4f)", - name, - distilled.formula, - distilled.correlation, - distilled.rank_correlation, - ) - return results - - # ------------------------------------------------------------------ - # Registry accessors - # ------------------------------------------------------------------ - - def get_available_leaves(self) -> List[str]: - """Return names of all registered leaves.""" - return self._registry.available() - - def get_leaf(self, name: str) -> Optional["NeuralLeaf"]: - """Return the NeuralLeaf registered under *name*, or None.""" - return self._registry.get(name) - - def get_distillation_result(self, name: str) -> Optional[DistillationResult]: - """Return the stored DistillationResult for *name*, or None.""" - return self._distillation_results.get(name) - - def as_node(self, name: str) -> Optional[NeuralLeafNode]: - """Return a NeuralLeafNode ready for use in an expression tree. - - If distillation has been run, the formula string is automatically set - on the returned node. - - Parameters - ---------- - name : str - - Returns - ------- - NeuralLeafNode or None - """ - leaf = self._registry.get(name) - if leaf is None: - return None - distilled_formula = None - if name in self._distillation_results: - distilled_formula = self._distillation_results[name].formula - return NeuralLeafNode( - leaf=leaf, - feature_order=self._feature_order, - distilled_formula=distilled_formula, - ) - - def as_shell(self, name: str) -> Optional[SymbolicShell]: - """Return a SymbolicShell for *name*, or None if unknown.""" - node = self.as_node(name) - if node is None: - return None - shell = SymbolicShell(name=name, leaf_node=node) - if name in self._distillation_results: - shell.replace_with_symbolic(self._distillation_results[name].formula) - return shell - - # ------------------------------------------------------------------ - # Persistence - # ------------------------------------------------------------------ - - def save(self, path: str) -> None: - """Save all registered leaves to *path* (directory). - - Parameters - ---------- - path : str - Target directory. Will be created if it does not exist. - """ - os.makedirs(path, exist_ok=True) - old_dir = self._registry._storage_dir - self._registry._storage_dir = path - self._registry.save_all() - self._registry._storage_dir = old_dir - logger.info("Saved %d neural leaves to %s", len(self._registry.available()), path) - - def load(self, path: str) -> None: - """Load all .pt files from *path* into the registry. - - Parameters - ---------- - path : str - Directory containing .pt weight files. - """ - if not os.path.isdir(path): - raise FileNotFoundError(f"NeuralOperatorIntegration.load: '{path}' is not a directory.") - old_dir = self._registry._storage_dir - self._registry._storage_dir = path - loaded = self._registry.load_all() - self._registry._storage_dir = old_dir - logger.info("Loaded %d neural leaves from %s", len(loaded), path) - - -# =========================================================================== -# Registry hook — exposes neural leaves to the operator registry -# =========================================================================== - -# Global singleton, populated lazily when neural leaves are trained/loaded. -_GLOBAL_REGISTRY: Optional[NeuralLeafRegistry] = None - - -def get_global_neural_registry() -> NeuralLeafRegistry: - """Return (and lazily create) the global NeuralLeafRegistry.""" - global _GLOBAL_REGISTRY - if _GLOBAL_REGISTRY is None: - _GLOBAL_REGISTRY = NeuralLeafRegistry() - return _GLOBAL_REGISTRY - - -def register_neural_leaves_in_operator_registry() -> None: - """Expose registered neural leaves to the main operator OPERATOR_REGISTRY. - - This function should be called AFTER leaves have been trained / loaded. - Each leaf is added to the registry with: - - A synthetic OperatorSpec (category AUTO_INVENTED, arity 0 — the leaf - takes the full data dict rather than individual array inputs). - - A numpy_fn that calls ``leaf.evaluate(features_3d)`` after assembling - the feature tensor from the data dict. - - No PyTorch fn (the leaf already uses PyTorch internally). - - This allows the broader HelixFactor system to treat neural leaves as - first-class operators that can appear in search spaces and fitness - evaluation loops. - """ - try: - from factorminer.operators.registry import OPERATOR_REGISTRY # type: ignore[attr-defined] - from factorminer.core.types import OperatorSpec, OperatorType, SignatureType - except ImportError: - logger.debug("register_neural_leaves_in_operator_registry: operator registry not available.") - return - - registry = get_global_neural_registry() - for name in registry.available(): - if name in OPERATOR_REGISTRY: - continue # already registered - - leaf = registry.get(name) - if leaf is None: - continue - - feature_order = _DEFAULT_FEATURES - - # Capture leaf in closure - def _make_np_fn(captured_leaf, captured_order): - def _np_fn(data: Dict[str, np.ndarray]) -> np.ndarray: - ref = next(iter(data.values())) - M, T = ref.shape - F = len(captured_order) - features_3d = np.stack( - [data.get(f, np.full((M, T), np.nan)) for f in captured_order], - axis=-1, - ) - return captured_leaf.evaluate(features_3d) - - return _np_fn - - np_fn = _make_np_fn(leaf, feature_order) - - spec = OperatorSpec( - name=name, - arity=0, # special: takes data dict, not individual arrays - category=OperatorType.AUTO_INVENTED, - signature=SignatureType.TIME_SERIES_TO_TIME_SERIES, - description=f"NeuralLeaf: {name}", - ) - OPERATOR_REGISTRY[name] = (spec, np_fn, None) - logger.info("Registered neural leaf '%s' in OPERATOR_REGISTRY.", name) - - -# =========================================================================== -# Convenience: build standard leaves from mock data -# =========================================================================== - -def build_default_neural_leaves( - num_assets: int = 20, - num_periods: int = 500, - window_size: int = 10, - n_epochs: int = 50, - seed: int = 42, - verbose: bool = False, -) -> NeuralOperatorIntegration: - """Train the three standard neural leaves on synthetic mock data. - - Intended for quick experimentation and testing. Uses - ``factorminer.data.mock_data.generate_mock_data`` internally. - - The three leaves are: - - ``NeuralMomentum``: captures price trend and momentum patterns. - - ``NeuralReversal``: captures short-term mean-reversion signals. - - ``NeuralVolume``: captures volume-price interaction signals. - - Parameters - ---------- - num_assets : int - num_periods : int - window_size : int - n_epochs : int - seed : int - verbose : bool - - Returns - ------- - NeuralOperatorIntegration - Fully initialised integration with trained leaves. - """ - from factorminer.data.mock_data import MockConfig, generate_mock_data - - config = MockConfig( - num_assets=num_assets, - num_periods=num_periods, - seed=seed, - plant_alpha=True, - alpha_strength=0.03, - ) - df = generate_mock_data(config) - - # Pivot to (M, T) arrays - df_sorted = df.sort_values(["asset_id", "datetime"]) - assets = sorted(df_sorted["asset_id"].unique()) - M = len(assets) - T = df_sorted.groupby("asset_id").size().min() - - def _pivot(col: str) -> np.ndarray: - return np.array( - [df_sorted[df_sorted["asset_id"] == a][col].values[:T] for a in assets], - dtype=np.float64, - ) - - close = _pivot("close") - high = _pivot("high") - low = _pivot("low") - open_ = _pivot("open") - volume = _pivot("volume") - amount = _pivot("amount") - # Derive returns and vwap - ret = np.full_like(close, np.nan) - ret[:, 1:] = close[:, 1:] / np.where(close[:, :-1] > 1e-10, close[:, :-1], np.nan) - 1.0 - vwap = (high + low + close) / 3.0 - - data_dict: Dict[str, np.ndarray] = { - "$open": open_, - "$high": high, - "$low": low, - "$close": close, - "$volume": volume, - "$amt": amount, - "$vwap": vwap, - "$returns": ret, - } - - # Stack features in the canonical order - features_3d = np.stack( - [data_dict[f] for f in _DEFAULT_FEATURES], - axis=-1, - ) # (M, T, F) - - # Forward returns: shift by 1 - fwd_returns = np.full_like(close, np.nan) - fwd_returns[:, :-1] = ret[:, 1:] - - configs = [ - NeuralLeafConfig( - "NeuralMomentum", - window_size=window_size, - n_epochs=n_epochs, - description="Learns price-momentum patterns from OHLCV windows", - ), - NeuralLeafConfig( - "NeuralReversal", - window_size=max(5, window_size // 2), - n_epochs=n_epochs, - description="Learns short-term mean-reversion signals", - ), - NeuralLeafConfig( - "NeuralVolume", - window_size=window_size, - n_epochs=n_epochs, - description="Learns volume-price interaction patterns", - ), - ] - - integration = NeuralOperatorIntegration(feature_order=_DEFAULT_FEATURES) - integration.train_all_leaves( - features=features_3d, - returns=fwd_returns, - leaf_configs=configs, - verbose=verbose, - ) - - # Distill to symbolic - integration.distill_all(data_dict) - - return integration - - -# =========================================================================== -# Public API -# =========================================================================== - -__all__ = [ - # Core classes - "NeuralLeaf", - "NeuralLeafNode", - "NeuralLeafRegistry", - "SymbolicShell", - # Training - "train_neural_leaf", - "NeuralLeafConfig", - # Distillation - "distill_to_symbolic", - "DistillationResult", - # Orchestration - "NeuralOperatorIntegration", - # Registry integration - "get_global_neural_registry", - "register_neural_leaves_in_operator_registry", - # Convenience - "build_default_neural_leaves", - # Constants - "_DEFAULT_FEATURES", - "_TORCH_AVAILABLE", -] diff --git a/src/factorminer/factorminer/operators/registry.py b/src/factorminer/factorminer/operators/registry.py deleted file mode 100644 index ca9cff7..0000000 --- a/src/factorminer/factorminer/operators/registry.py +++ /dev/null @@ -1,142 +0,0 @@ -"""Central operator registry mapping names to implementations and specs. - -Combines the ``OperatorSpec`` definitions from ``core.types`` with the concrete -NumPy / PyTorch function implementations from each category module. -""" - -from __future__ import annotations - -from typing import Any, Callable, Dict, List, Optional, Tuple, Union - -import numpy as np - -from src.factorminer.factorminer.core.types import OPERATOR_REGISTRY as SPEC_REGISTRY -from src.factorminer.factorminer.core.types import OperatorSpec, OperatorType - -from src.factorminer.factorminer.operators.arithmetic import ARITHMETIC_OPS -from src.factorminer.factorminer.operators.statistical import STATISTICAL_OPS -from src.factorminer.factorminer.operators.timeseries import TIMESERIES_OPS -from src.factorminer.factorminer.operators.crosssectional import CROSSSECTIONAL_OPS -from src.factorminer.factorminer.operators.smoothing import SMOOTHING_OPS -from src.factorminer.factorminer.operators.regression import REGRESSION_OPS -from src.factorminer.factorminer.operators.logical import LOGICAL_OPS - -try: - import torch - - _TORCH = True -except ImportError: - torch = None # type: ignore[assignment] - _TORCH = False - -# --------------------------------------------------------------------------- -# Build unified registry: name -> (OperatorSpec, np_fn, torch_fn) -# --------------------------------------------------------------------------- - -_ALL_IMPL_TABLES: List[Dict[str, Tuple[Callable, Callable]]] = [ - ARITHMETIC_OPS, - STATISTICAL_OPS, - TIMESERIES_OPS, - CROSSSECTIONAL_OPS, - SMOOTHING_OPS, - REGRESSION_OPS, - LOGICAL_OPS, -] - -# Merge implementation tables -_IMPL: Dict[str, Tuple[Callable, Callable]] = {} -for table in _ALL_IMPL_TABLES: - _IMPL.update(table) - -# The full registry: name -> (spec, numpy_fn, torch_fn) -OPERATOR_REGISTRY: Dict[str, Tuple[OperatorSpec, Callable, Optional[Callable]]] = {} - -for name, spec in SPEC_REGISTRY.items(): - if name in _IMPL: - np_fn, torch_fn = _IMPL[name] - OPERATOR_REGISTRY[name] = (spec, np_fn, torch_fn) - else: - # Spec exists but no implementation yet -- register with None fns - OPERATOR_REGISTRY[name] = (spec, None, None) # type: ignore[assignment] - - -# --------------------------------------------------------------------------- -# Public API -# --------------------------------------------------------------------------- - -def get_operator(name: str) -> OperatorSpec: - """Look up an operator spec by name.""" - if name not in OPERATOR_REGISTRY: - raise KeyError( - f"Unknown operator '{name}'. " - f"Available: {sorted(OPERATOR_REGISTRY.keys())}" - ) - return OPERATOR_REGISTRY[name][0] - - -def get_impl(name: str, backend: str = "numpy") -> Callable: - """Return the implementation function for a given operator and backend.""" - if name not in OPERATOR_REGISTRY: - raise KeyError(f"Unknown operator '{name}'") - spec, np_fn, torch_fn = OPERATOR_REGISTRY[name] - if backend == "torch" or backend == "gpu": - if torch_fn is None: - raise NotImplementedError(f"No PyTorch implementation for '{name}'") - return torch_fn - if np_fn is None: - raise NotImplementedError(f"No NumPy implementation for '{name}'") - return np_fn - - -def execute_operator( - name: str, - *inputs: Any, - params: Optional[Dict[str, Any]] = None, - backend: str = "numpy", -) -> Union[np.ndarray, "torch.Tensor"]: - """Execute an operator by name. - - Parameters - ---------- - name : str - Operator name (e.g. ``"Add"``, ``"Mean"``). - *inputs : array-like - Positional data inputs (1, 2, or 3 depending on arity). - params : dict, optional - Extra keyword parameters (e.g. ``{"window": 20}``). - backend : str - ``"numpy"`` or ``"torch"`` / ``"gpu"``. - - Returns - ------- - np.ndarray or torch.Tensor - """ - fn = get_impl(name, backend) - kw = params or {} - return fn(*inputs, **kw) - - -def list_operators(grouped: bool = True) -> Union[List[str], Dict[str, List[str]]]: - """List all registered operator names. - - Parameters - ---------- - grouped : bool - If True, return a dict mapping category name -> list of op names. - If False, return a flat sorted list. - """ - if not grouped: - return sorted(OPERATOR_REGISTRY.keys()) - - groups: Dict[str, List[str]] = {} - for name, (spec, _, _) in OPERATOR_REGISTRY.items(): - cat = spec.category.name - groups.setdefault(cat, []).append(name) - for cat in groups: - groups[cat].sort() - return groups - - -def implemented_operators() -> List[str]: - """Return names of operators that have at least a NumPy implementation.""" - return sorted(name for name, (_, np_fn, _) in OPERATOR_REGISTRY.items() if np_fn is not None) diff --git a/src/factorminer/factorminer/operators/regression.py b/src/factorminer/factorminer/operators/regression.py deleted file mode 100644 index a0e8dfc..0000000 --- a/src/factorminer/factorminer/operators/regression.py +++ /dev/null @@ -1,167 +0,0 @@ -"""Rolling linear-regression operators. - -Each function regresses x against a simple time index [0, 1, ..., window-1] -within a rolling window along axis=1. Input/output shape: ``(M, T)``. -""" - -from __future__ import annotations - -import numpy as np - -try: - import torch -except ImportError: - torch = None # type: ignore[assignment] - - -# =========================================================================== -# NumPy implementations -# =========================================================================== - -def _linreg_components_np(x: np.ndarray, window: int): - """Compute slope, intercept, and fitted values for rolling OLS vs time index.""" - window = int(window) - M, T = x.shape - - from factorminer.operators.statistical import _rolling_np, _pad_front - - w = _rolling_np(x, window) - if w is None: - nan = np.full_like(x, np.nan) - return nan, nan, nan, nan - - t_idx = np.arange(window, dtype=np.float64) # (window,) - t_mean = t_idx.mean() - t_var = ((t_idx - t_mean) ** 2).sum() - - x_mean = np.nanmean(w, axis=2, keepdims=True) # (M, T-w+1, 1) - # covariance of x with t_idx - cov_xt = np.nansum((w - x_mean) * (t_idx - t_mean), axis=2) # (M, T-w+1) - - slope = cov_xt / t_var # (M, T-w+1) - intercept = x_mean.squeeze(2) - slope * t_mean - - # Fitted value at the last time step in window (t = window - 1) - fitted = slope * (window - 1) + intercept - - # Residual at last time step - residual = w[:, :, -1] - fitted - - # R-squared - ss_res_all = w - (slope[:, :, np.newaxis] * t_idx + intercept[:, :, np.newaxis]) - ss_res = np.nansum(ss_res_all ** 2, axis=2) - ss_tot = np.nansum((w - x_mean) ** 2, axis=2) - with np.errstate(invalid="ignore", divide="ignore"): - r2 = np.where(ss_tot > 1e-10, 1.0 - ss_res / ss_tot, np.nan) - - slope = _pad_front(slope, window, T) - intercept = _pad_front(intercept, window, T) - fitted = _pad_front(fitted, window, T) - residual = _pad_front(residual, window, T) - r2 = _pad_front(r2, window, T) - - return slope, intercept, fitted, residual, r2 - - -def ts_linreg_np(x: np.ndarray, window: int = 20) -> np.ndarray: - """Rolling linear-regression fitted value.""" - _, _, fitted, _, _ = _linreg_components_np(x, window) - return fitted - - -def ts_linreg_slope_np(x: np.ndarray, window: int = 20) -> np.ndarray: - """Rolling linear-regression slope.""" - slope, _, _, _, _ = _linreg_components_np(x, window) - return slope - - -def ts_linreg_intercept_np(x: np.ndarray, window: int = 20) -> np.ndarray: - """Rolling linear-regression intercept.""" - _, intercept, _, _, _ = _linreg_components_np(x, window) - return intercept - - -def ts_linreg_resid_np(x: np.ndarray, window: int = 20) -> np.ndarray: - """Rolling linear-regression residual at the last time step.""" - _, _, _, residual, _ = _linreg_components_np(x, window) - return residual - - -# =========================================================================== -# PyTorch implementations -# =========================================================================== - -def _linreg_components_torch(x: "torch.Tensor", window: int): - """Vectorized rolling OLS on GPU.""" - window = int(window) - M, T = x.shape - - from factorminer.operators.statistical import _unfold_torch, _pad_front_torch - - w = _unfold_torch(x, window) # (M, T-w+1, window) - - t_idx = torch.arange(window, dtype=x.dtype, device=x.device) - t_mean = t_idx.mean() - t_var = ((t_idx - t_mean) ** 2).sum() - - x_mean = w.nanmean(dim=2, keepdim=True) - # Handle NaN: replace with 0 for summation - w_filled = w.nan_to_num(0.0) - not_nan = ~torch.isnan(w) - n = not_nan.sum(dim=2, keepdim=True).float() - - # Recompute mean with nan handling - cov_xt = ((w_filled - x_mean.nan_to_num(0.0)) * (t_idx - t_mean) * not_nan).sum(dim=2) - - slope = cov_xt / t_var - intercept = x_mean.squeeze(2) - slope * t_mean - - fitted = slope * (window - 1) + intercept - residual = w[:, :, -1] - fitted - - # R-squared - fitted_all = slope.unsqueeze(2) * t_idx + intercept.unsqueeze(2) - ss_res = ((w_filled - fitted_all) ** 2 * not_nan).sum(dim=2) - ss_tot = ((w_filled - x_mean.nan_to_num(0.0)) ** 2 * not_nan).sum(dim=2) - r2 = torch.where(ss_tot > 1e-10, 1.0 - ss_res / ss_tot, - torch.tensor(float("nan"), device=x.device)) - - slope = _pad_front_torch(slope, window, T) - intercept = _pad_front_torch(intercept, window, T) - fitted = _pad_front_torch(fitted, window, T) - residual = _pad_front_torch(residual, window, T) - r2 = _pad_front_torch(r2, window, T) - - return slope, intercept, fitted, residual, r2 - - -def ts_linreg_torch(x: "torch.Tensor", window: int = 20) -> "torch.Tensor": - _, _, fitted, _, _ = _linreg_components_torch(x, window) - return fitted - - -def ts_linreg_slope_torch(x: "torch.Tensor", window: int = 20) -> "torch.Tensor": - slope, _, _, _, _ = _linreg_components_torch(x, window) - return slope - - -def ts_linreg_intercept_torch(x: "torch.Tensor", window: int = 20) -> "torch.Tensor": - _, intercept, _, _, _ = _linreg_components_torch(x, window) - return intercept - - -def ts_linreg_resid_torch(x: "torch.Tensor", window: int = 20) -> "torch.Tensor": - _, _, _, residual, _ = _linreg_components_torch(x, window) - return residual - - -# =========================================================================== -# Registration table -# =========================================================================== - -REGRESSION_OPS = { - "TsLinReg": (ts_linreg_np, ts_linreg_torch), - "TsLinRegSlope": (ts_linreg_slope_np, ts_linreg_slope_torch), - "TsLinRegIntercept": (ts_linreg_intercept_np, ts_linreg_intercept_torch), - "TsLinRegResid": (ts_linreg_resid_np, ts_linreg_resid_torch), -} diff --git a/src/factorminer/factorminer/operators/smoothing.py b/src/factorminer/factorminer/operators/smoothing.py deleted file mode 100644 index 2e990e3..0000000 --- a/src/factorminer/factorminer/operators/smoothing.py +++ /dev/null @@ -1,173 +0,0 @@ -"""Moving average / smoothing operators. - -Input shape: ``(M, T)`` -> output shape ``(M, T)``. -All operate along the time axis (axis=1) per asset row. -""" - -from __future__ import annotations - -import numpy as np - -try: - import torch -except ImportError: - torch = None # type: ignore[assignment] - - -# =========================================================================== -# NumPy implementations -# =========================================================================== - -def sma_np(x: np.ndarray, window: int = 10) -> np.ndarray: - """Simple moving average (identical to Mean).""" - window = int(window) - M, T = x.shape - out = np.full_like(x, np.nan, dtype=np.float64) - # Cumsum trick for O(1) per element - cs = np.nancumsum(x, axis=1) - out[:, window - 1:] = cs[:, window - 1:] - if window > 1: - out[:, window - 1:] -= np.concatenate( - [np.zeros((M, 1), dtype=np.float64), cs[:, :-window]], axis=1 - )[:, :T - window + 1] # fix: just subtract shifted cumsum - out[:, window - 1:] = (cs[:, window - 1:] - np.concatenate( - [np.zeros((M, 1), dtype=np.float64), cs[:, :-1]], axis=1 - )[:, :T - window + 1]) - out[:, window - 1:] /= window - out[:, :window - 1] = np.nan - return out - - -def ema_np(x: np.ndarray, window: int = 10) -> np.ndarray: - """Exponential moving average with span = window.""" - window = int(window) - alpha = 2.0 / (window + 1.0) - M, T = x.shape - out = np.copy(x).astype(np.float64) - for t in range(1, T): - prev = out[:, t - 1] - curr = x[:, t] - both_valid = ~np.isnan(prev) & ~np.isnan(curr) - only_prev = ~np.isnan(prev) & np.isnan(curr) - out[both_valid, t] = alpha * curr[both_valid] + (1 - alpha) * prev[both_valid] - out[only_prev, t] = prev[only_prev] - return out - - -def dema_np(x: np.ndarray, window: int = 10) -> np.ndarray: - """Double EMA: 2 * EMA(x) - EMA(EMA(x)).""" - e1 = ema_np(x, window) - e2 = ema_np(e1, window) - return 2.0 * e1 - e2 - - -def kama_np(x: np.ndarray, window: int = 10) -> np.ndarray: - """Kaufman Adaptive Moving Average.""" - window = int(window) - fast_sc = 2.0 / (2.0 + 1.0) - slow_sc = 2.0 / (30.0 + 1.0) - M, T = x.shape - out = np.copy(x).astype(np.float64) - - for t in range(window, T): - direction = np.abs(x[:, t] - x[:, t - window]) - volatility = np.nansum(np.abs(np.diff(x[:, t - window:t + 1], axis=1)), axis=1) - with np.errstate(invalid="ignore", divide="ignore"): - er = np.where(volatility > 1e-10, direction / volatility, 0.0) - sc = (er * (fast_sc - slow_sc) + slow_sc) ** 2 - prev = out[:, t - 1] - curr = x[:, t] - valid = ~np.isnan(prev) & ~np.isnan(curr) - out[valid, t] = prev[valid] + sc[valid] * (curr[valid] - prev[valid]) - return out - - -def hma_np(x: np.ndarray, window: int = 10) -> np.ndarray: - """Hull Moving Average: WMA(2*WMA(x, w/2) - WMA(x, w), sqrt(w)).""" - window = int(window) - from factorminer.operators.timeseries import wma_np - half = max(int(window / 2), 1) - sqrt_w = max(int(np.sqrt(window)), 1) - w1 = wma_np(x, half) - w2 = wma_np(x, window) - diff = 2.0 * w1 - w2 - return wma_np(diff, sqrt_w) - - -# =========================================================================== -# PyTorch implementations -# =========================================================================== - -def sma_torch(x: "torch.Tensor", window: int = 10) -> "torch.Tensor": - """Simple moving average using conv1d for GPU efficiency.""" - window = int(window) - M, T = x.shape - # Use unfold-based approach - from factorminer.operators.statistical import _unfold_torch, _pad_front_torch - w = _unfold_torch(x, window) - result = w.nanmean(dim=2) - return _pad_front_torch(result, window, T) - - -def ema_torch(x: "torch.Tensor", window: int = 10) -> "torch.Tensor": - """EMA -- sequential by nature, but batch across assets.""" - window = int(window) - alpha = 2.0 / (window + 1.0) - M, T = x.shape - out = x.clone() - for t in range(1, T): - prev = out[:, t - 1] - curr = x[:, t] - both = ~torch.isnan(prev) & ~torch.isnan(curr) - only_prev = ~torch.isnan(prev) & torch.isnan(curr) - out[both, t] = alpha * curr[both] + (1 - alpha) * prev[both] - out[only_prev, t] = prev[only_prev] - return out - - -def dema_torch(x: "torch.Tensor", window: int = 10) -> "torch.Tensor": - e1 = ema_torch(x, window) - e2 = ema_torch(e1, window) - return 2.0 * e1 - e2 - - -def kama_torch(x: "torch.Tensor", window: int = 10) -> "torch.Tensor": - window = int(window) - fast_sc = 2.0 / (2.0 + 1.0) - slow_sc = 2.0 / (30.0 + 1.0) - M, T = x.shape - out = x.clone() - for t in range(window, T): - direction = (x[:, t] - x[:, t - window]).abs() - vol = x[:, t - window:t + 1].diff(dim=1).abs().nansum(dim=1) - er = torch.where(vol > 1e-10, direction / vol, torch.zeros_like(direction)) - sc = (er * (fast_sc - slow_sc) + slow_sc) ** 2 - prev = out[:, t - 1] - curr = x[:, t] - valid = ~torch.isnan(prev) & ~torch.isnan(curr) - out[valid, t] = prev[valid] + sc[valid] * (curr[valid] - prev[valid]) - return out - - -def hma_torch(x: "torch.Tensor", window: int = 10) -> "torch.Tensor": - window = int(window) - from factorminer.operators.timeseries import wma_torch - half = max(int(window / 2), 1) - sqrt_w = max(int(window ** 0.5), 1) - w1 = wma_torch(x, half) - w2 = wma_torch(x, window) - diff = 2.0 * w1 - w2 - return wma_torch(diff, sqrt_w) - - -# =========================================================================== -# Registration table -# =========================================================================== - -SMOOTHING_OPS = { - "EMA": (ema_np, ema_torch), - "DEMA": (dema_np, dema_torch), - "SMA": (sma_np, sma_torch), - "KAMA": (kama_np, kama_torch), - "HMA": (hma_np, hma_torch), -} diff --git a/src/factorminer/factorminer/operators/statistical.py b/src/factorminer/factorminer/operators/statistical.py deleted file mode 100644 index d903889..0000000 --- a/src/factorminer/factorminer/operators/statistical.py +++ /dev/null @@ -1,452 +0,0 @@ -"""Rolling-window statistical operators. - -Each function operates along the **time** axis (axis=1) independently for -every asset row. Input shape: ``(M, T)`` -> output shape ``(M, T)``. -The first ``(window - 1)`` values in each row are set to ``NaN``. -""" - -from __future__ import annotations - -import numpy as np - -try: - import torch - import torch.nn.functional as F -except ImportError: - torch = None # type: ignore[assignment] - F = None # type: ignore[assignment] - - -# =========================================================================== -# Helpers -# =========================================================================== - -def _rolling_np(x: np.ndarray, window: int): - """Yield views of shape (M, T-w+1, w) using stride tricks.""" - M, T = x.shape - if T < window: - return None - strides = (x.strides[0], x.strides[1], x.strides[1]) - shape = (M, T - window + 1, window) - return np.lib.stride_tricks.as_strided(x, shape=shape, strides=strides) - - -def _pad_front(result: np.ndarray, window: int, total_T: int) -> np.ndarray: - """Pad front of time axis with NaN to restore original length.""" - M = result.shape[0] - pad_len = total_T - result.shape[1] - if pad_len > 0: - pad = np.full((M, pad_len), np.nan, dtype=result.dtype) - return np.concatenate([pad, result], axis=1) - return result - - -def _unfold_torch(x: "torch.Tensor", window: int) -> "torch.Tensor": - """Unfold last dimension to get sliding windows: (M, T) -> (M, T-w+1, w).""" - return x.unfold(dimension=1, size=window, step=1) - - -def _pad_front_torch(result: "torch.Tensor", window: int, total_T: int) -> "torch.Tensor": - M = result.shape[0] - pad_len = total_T - result.shape[1] - if pad_len > 0: - pad = torch.full((M, pad_len), float("nan"), device=result.device, dtype=result.dtype) - return torch.cat([pad, result], dim=1) - return result - - -# =========================================================================== -# NumPy implementations -# =========================================================================== - -def mean_np(x: np.ndarray, window: int = 10) -> np.ndarray: - window = int(window) - M, T = x.shape - w = _rolling_np(x, window) - if w is None: - return np.full_like(x, np.nan) - result = np.nanmean(w, axis=2) - return _pad_front(result, window, T) - - -def std_np(x: np.ndarray, window: int = 10) -> np.ndarray: - window = int(window) - M, T = x.shape - w = _rolling_np(x, window) - if w is None: - return np.full_like(x, np.nan) - result = np.nanstd(w, axis=2, ddof=1) - return _pad_front(result, window, T) - - -def var_np(x: np.ndarray, window: int = 10) -> np.ndarray: - window = int(window) - M, T = x.shape - w = _rolling_np(x, window) - if w is None: - return np.full_like(x, np.nan) - result = np.nanvar(w, axis=2, ddof=1) - return _pad_front(result, window, T) - - -def skew_np(x: np.ndarray, window: int = 20) -> np.ndarray: - window = int(window) - M, T = x.shape - w = _rolling_np(x, window) - if w is None: - return np.full_like(x, np.nan) - m = np.nanmean(w, axis=2, keepdims=True) - d = w - m - n = np.sum(~np.isnan(w), axis=2, keepdims=True).astype(np.float64) - m2 = np.nanmean(d ** 2, axis=2, keepdims=True) - m3 = np.nanmean(d ** 3, axis=2, keepdims=True) - with np.errstate(invalid="ignore", divide="ignore"): - sk = m3 / np.power(m2, 1.5) - result = sk.squeeze(2) - return _pad_front(result, window, T) - - -def kurt_np(x: np.ndarray, window: int = 20) -> np.ndarray: - window = int(window) - M, T = x.shape - w = _rolling_np(x, window) - if w is None: - return np.full_like(x, np.nan) - m = np.nanmean(w, axis=2, keepdims=True) - d = w - m - m2 = np.nanmean(d ** 2, axis=2, keepdims=True) - m4 = np.nanmean(d ** 4, axis=2, keepdims=True) - with np.errstate(invalid="ignore", divide="ignore"): - kt = m4 / np.power(m2, 2.0) - 3.0 - result = kt.squeeze(2) - return _pad_front(result, window, T) - - -def median_np(x: np.ndarray, window: int = 10) -> np.ndarray: - window = int(window) - M, T = x.shape - w = _rolling_np(x, window) - if w is None: - return np.full_like(x, np.nan) - result = np.nanmedian(w, axis=2) - return _pad_front(result, window, T) - - -def sum_np(x: np.ndarray, window: int = 10) -> np.ndarray: - window = int(window) - M, T = x.shape - w = _rolling_np(x, window) - if w is None: - return np.full_like(x, np.nan) - result = np.nansum(w, axis=2) - # If all NaN in a window, nansum returns 0; fix that - all_nan = np.all(np.isnan(w), axis=2) - result[all_nan] = np.nan - return _pad_front(result, window, T) - - -def prod_np(x: np.ndarray, window: int = 10) -> np.ndarray: - window = int(window) - M, T = x.shape - w = _rolling_np(x, window) - if w is None: - return np.full_like(x, np.nan) - result = np.nanprod(w, axis=2) - all_nan = np.all(np.isnan(w), axis=2) - result[all_nan] = np.nan - return _pad_front(result, window, T) - - -def ts_max_np(x: np.ndarray, window: int = 10) -> np.ndarray: - window = int(window) - M, T = x.shape - w = _rolling_np(x, window) - if w is None: - return np.full_like(x, np.nan) - result = np.nanmax(w, axis=2) - return _pad_front(result, window, T) - - -def ts_min_np(x: np.ndarray, window: int = 10) -> np.ndarray: - window = int(window) - M, T = x.shape - w = _rolling_np(x, window) - if w is None: - return np.full_like(x, np.nan) - result = np.nanmin(w, axis=2) - return _pad_front(result, window, T) - - -def ts_argmax_np(x: np.ndarray, window: int = 10) -> np.ndarray: - window = int(window) - M, T = x.shape - w = _rolling_np(x, window) - if w is None: - return np.full_like(x, np.nan) - result = np.nanargmax(w, axis=2).astype(np.float64) - return _pad_front(result, window, T) - - -def ts_argmin_np(x: np.ndarray, window: int = 10) -> np.ndarray: - window = int(window) - M, T = x.shape - w = _rolling_np(x, window) - if w is None: - return np.full_like(x, np.nan) - result = np.nanargmin(w, axis=2).astype(np.float64) - return _pad_front(result, window, T) - - -def ts_rank_np(x: np.ndarray, window: int = 10) -> np.ndarray: - """Rolling percentile rank of the latest value within its window.""" - window = int(window) - M, T = x.shape - w = _rolling_np(x, window) - if w is None: - return np.full_like(x, np.nan) - latest = w[:, :, -1:] # (M, T-w+1, 1) - count_less = np.nansum(w < latest, axis=2).astype(np.float64) - count_valid = np.sum(~np.isnan(w), axis=2).astype(np.float64) - with np.errstate(invalid="ignore", divide="ignore"): - result = count_less / (count_valid - 1.0) - result[count_valid <= 1] = np.nan - return _pad_front(result, window, T) - - -def quantile_np(x: np.ndarray, window: int = 10, q: float = 0.5) -> np.ndarray: - window = int(window) - M, T = x.shape - w = _rolling_np(x, window) - if w is None: - return np.full_like(x, np.nan) - result = np.nanquantile(w, q, axis=2) - return _pad_front(result, window, T) - - -def count_nan_np(x: np.ndarray, window: int = 10) -> np.ndarray: - window = int(window) - M, T = x.shape - w = _rolling_np(x, window) - if w is None: - return np.full_like(x, np.nan) - result = np.sum(np.isnan(w), axis=2).astype(np.float64) - return _pad_front(result, window, T) - - -def count_not_nan_np(x: np.ndarray, window: int = 10) -> np.ndarray: - window = int(window) - M, T = x.shape - w = _rolling_np(x, window) - if w is None: - return np.full_like(x, np.nan) - result = np.sum(~np.isnan(w), axis=2).astype(np.float64) - return _pad_front(result, window, T) - - -# =========================================================================== -# PyTorch (GPU) implementations -# =========================================================================== - -def mean_torch(x: "torch.Tensor", window: int = 10) -> "torch.Tensor": - window = int(window) - M, T = x.shape - w = _unfold_torch(x, window) # (M, T-w+1, w) - result = w.nanmean(dim=2) - return _pad_front_torch(result, window, T) - - -def std_torch(x: "torch.Tensor", window: int = 10) -> "torch.Tensor": - window = int(window) - M, T = x.shape - w = _unfold_torch(x, window) - m = w.nanmean(dim=2, keepdim=True) - d = w - m - not_nan = ~torch.isnan(w) - d = d.nan_to_num(0.0) - n = not_nan.sum(dim=2, keepdim=True).float() - var = (d ** 2).sum(dim=2, keepdim=True) / (n - 1).clamp(min=1) - result = var.sqrt().squeeze(2) - result[n.squeeze(2) < 2] = float("nan") - return _pad_front_torch(result, window, T) - - -def var_torch(x: "torch.Tensor", window: int = 10) -> "torch.Tensor": - window = int(window) - M, T = x.shape - w = _unfold_torch(x, window) - m = w.nanmean(dim=2, keepdim=True) - d = w - m - not_nan = ~torch.isnan(w) - d = d.nan_to_num(0.0) - n = not_nan.sum(dim=2, keepdim=True).float() - result = ((d ** 2).sum(dim=2, keepdim=True) / (n - 1).clamp(min=1)).squeeze(2) - result[n.squeeze(2) < 2] = float("nan") - return _pad_front_torch(result, window, T) - - -def skew_torch(x: "torch.Tensor", window: int = 20) -> "torch.Tensor": - window = int(window) - M, T = x.shape - w = _unfold_torch(x, window) - m = w.nanmean(dim=2, keepdim=True) - d = (w - m).nan_to_num(0.0) - not_nan = ~torch.isnan(w) - n = not_nan.sum(dim=2, keepdim=True).float() - m2 = (d ** 2).sum(dim=2, keepdim=True) / n.clamp(min=1) - m3 = (d ** 3).sum(dim=2, keepdim=True) / n.clamp(min=1) - result = (m3 / m2.pow(1.5)).squeeze(2) - result[n.squeeze(2) < 3] = float("nan") - return _pad_front_torch(result, window, T) - - -def kurt_torch(x: "torch.Tensor", window: int = 20) -> "torch.Tensor": - window = int(window) - M, T = x.shape - w = _unfold_torch(x, window) - m = w.nanmean(dim=2, keepdim=True) - d = (w - m).nan_to_num(0.0) - not_nan = ~torch.isnan(w) - n = not_nan.sum(dim=2, keepdim=True).float() - m2 = (d ** 2).sum(dim=2, keepdim=True) / n.clamp(min=1) - m4 = (d ** 4).sum(dim=2, keepdim=True) / n.clamp(min=1) - result = (m4 / m2.pow(2.0) - 3.0).squeeze(2) - result[n.squeeze(2) < 4] = float("nan") - return _pad_front_torch(result, window, T) - - -def median_torch(x: "torch.Tensor", window: int = 10) -> "torch.Tensor": - window = int(window) - M, T = x.shape - w = _unfold_torch(x, window) - result = w.nanmedian(dim=2).values - return _pad_front_torch(result, window, T) - - -def sum_torch(x: "torch.Tensor", window: int = 10) -> "torch.Tensor": - window = int(window) - M, T = x.shape - w = _unfold_torch(x, window) - result = w.nansum(dim=2) - all_nan = torch.isnan(w).all(dim=2) - result[all_nan] = float("nan") - return _pad_front_torch(result, window, T) - - -def prod_torch(x: "torch.Tensor", window: int = 10) -> "torch.Tensor": - window = int(window) - M, T = x.shape - w = _unfold_torch(x, window) - filled = w.nan_to_num(1.0) - result = filled.prod(dim=2) - all_nan = torch.isnan(w).all(dim=2) - result[all_nan] = float("nan") - return _pad_front_torch(result, window, T) - - -def ts_max_torch(x: "torch.Tensor", window: int = 10) -> "torch.Tensor": - window = int(window) - M, T = x.shape - w = _unfold_torch(x, window) - filled = w.nan_to_num(float("-inf")) - result = filled.max(dim=2).values - all_nan = torch.isnan(w).all(dim=2) - result[all_nan] = float("nan") - return _pad_front_torch(result, window, T) - - -def ts_min_torch(x: "torch.Tensor", window: int = 10) -> "torch.Tensor": - window = int(window) - M, T = x.shape - w = _unfold_torch(x, window) - filled = w.nan_to_num(float("inf")) - result = filled.min(dim=2).values - all_nan = torch.isnan(w).all(dim=2) - result[all_nan] = float("nan") - return _pad_front_torch(result, window, T) - - -def ts_argmax_torch(x: "torch.Tensor", window: int = 10) -> "torch.Tensor": - window = int(window) - M, T = x.shape - w = _unfold_torch(x, window) - filled = w.nan_to_num(float("-inf")) - result = filled.argmax(dim=2).float() - return _pad_front_torch(result, window, T) - - -def ts_argmin_torch(x: "torch.Tensor", window: int = 10) -> "torch.Tensor": - window = int(window) - M, T = x.shape - w = _unfold_torch(x, window) - filled = w.nan_to_num(float("inf")) - result = filled.argmin(dim=2).float() - return _pad_front_torch(result, window, T) - - -def ts_rank_torch(x: "torch.Tensor", window: int = 10) -> "torch.Tensor": - """Rolling percentile rank -- key GPU acceleration target (17x speedup).""" - window = int(window) - M, T = x.shape - w = _unfold_torch(x, window) # (M, T-w+1, w) - latest = w[:, :, -1:] # (M, T-w+1, 1) - not_nan = ~torch.isnan(w) - # Count values strictly less than latest (NaN-safe) - less = ((w < latest) & not_nan).sum(dim=2).float() - count_valid = not_nan.sum(dim=2).float() - result = less / (count_valid - 1).clamp(min=1) - result[count_valid <= 1] = float("nan") - return _pad_front_torch(result, window, T) - - -def quantile_torch(x: "torch.Tensor", window: int = 10, q: float = 0.5) -> "torch.Tensor": - window = int(window) - M, T = x.shape - w = _unfold_torch(x, window) - result = w.nanmedian(dim=2).values # approximation; true quantile below - # Use sorting for proper quantile - sorted_w, _ = w.sort(dim=2) - n = (~torch.isnan(w)).sum(dim=2).float() - idx = ((n - 1) * q).long().clamp(min=0) - # Gather the quantile value - result = sorted_w.gather(2, idx.unsqueeze(2)).squeeze(2) - return _pad_front_torch(result, window, T) - - -def count_nan_torch(x: "torch.Tensor", window: int = 10) -> "torch.Tensor": - window = int(window) - M, T = x.shape - w = _unfold_torch(x, window) - result = torch.isnan(w).sum(dim=2).float() - return _pad_front_torch(result, window, T) - - -def count_not_nan_torch(x: "torch.Tensor", window: int = 10) -> "torch.Tensor": - window = int(window) - M, T = x.shape - w = _unfold_torch(x, window) - result = (~torch.isnan(w)).sum(dim=2).float() - return _pad_front_torch(result, window, T) - - -# =========================================================================== -# Registration table -# =========================================================================== - -STATISTICAL_OPS = { - "Mean": (mean_np, mean_torch), - "Std": (std_np, std_torch), - "Var": (var_np, var_torch), - "Skew": (skew_np, skew_torch), - "Kurt": (kurt_np, kurt_torch), - "Median": (median_np, median_torch), - "Sum": (sum_np, sum_torch), - "Prod": (prod_np, prod_torch), - "TsMax": (ts_max_np, ts_max_torch), - "TsMin": (ts_min_np, ts_min_torch), - "TsArgMax": (ts_argmax_np, ts_argmax_torch), - "TsArgMin": (ts_argmin_np, ts_argmin_torch), - "TsRank": (ts_rank_np, ts_rank_torch), - "Quantile": (quantile_np, quantile_torch), - "CountNaN": (count_nan_np, count_nan_torch), - "CountNotNaN": (count_not_nan_np, count_not_nan_torch), -} diff --git a/src/factorminer/factorminer/operators/timeseries.py b/src/factorminer/factorminer/operators/timeseries.py deleted file mode 100644 index ced08c8..0000000 --- a/src/factorminer/factorminer/operators/timeseries.py +++ /dev/null @@ -1,395 +0,0 @@ -"""Time-series operators along the T axis for each asset row. - -Input shape: ``(M, T)`` -> output shape ``(M, T)``. -""" - -from __future__ import annotations - -import numpy as np - -try: - import torch -except ImportError: - torch = None # type: ignore[assignment] - - -# =========================================================================== -# NumPy implementations -# =========================================================================== - -def delta_np(x: np.ndarray, window: int = 1) -> np.ndarray: - """x[t] - x[t - period].""" - window = int(window) - M, T = x.shape - out = np.full_like(x, np.nan, dtype=np.float64) - if window < T: - out[:, window:] = x[:, window:] - x[:, :-window] - return out - - -def delay_np(x: np.ndarray, window: int = 1) -> np.ndarray: - """x[t - period] (lag operator).""" - window = int(window) - M, T = x.shape - out = np.full_like(x, np.nan, dtype=np.float64) - if window < T: - out[:, window:] = x[:, :-window] - return out - - -def return_np(x: np.ndarray, window: int = 1) -> np.ndarray: - """x[t] / x[t-d] - 1.""" - window = int(window) - M, T = x.shape - out = np.full_like(x, np.nan, dtype=np.float64) - if window < T: - prev = x[:, :-window] - mask = np.abs(prev) > 1e-10 - out_slice = np.full_like(prev, np.nan) - out_slice[mask] = x[:, window:][mask] / prev[mask] - 1.0 - out[:, window:] = out_slice - return out - - -def log_return_np(x: np.ndarray, window: int = 1) -> np.ndarray: - """log(x[t] / x[t-d]).""" - window = int(window) - M, T = x.shape - out = np.full_like(x, np.nan, dtype=np.float64) - if window < T: - prev = x[:, :-window] - curr = x[:, window:] - with np.errstate(invalid="ignore", divide="ignore"): - ratio = np.where(np.abs(prev) > 1e-10, curr / prev, np.nan) - out[:, window:] = np.where(ratio > 0, np.log(ratio), np.nan) - return out - - -def corr_np(x: np.ndarray, y: np.ndarray, window: int = 10) -> np.ndarray: - """Rolling Pearson correlation.""" - window = int(window) - M, T = x.shape - if T < window: - return np.full_like(x, np.nan) - - from factorminer.operators.statistical import _rolling_np, _pad_front - - wx = _rolling_np(x, window) - wy = _rolling_np(y, window) - if wx is None or wy is None: - return np.full_like(x, np.nan) - - mx = np.nanmean(wx, axis=2, keepdims=True) - my = np.nanmean(wy, axis=2, keepdims=True) - dx = wx - mx - dy = wy - my - with np.errstate(invalid="ignore", divide="ignore"): - cov = np.nanmean(dx * dy, axis=2) - sx = np.sqrt(np.nanmean(dx ** 2, axis=2)) - sy = np.sqrt(np.nanmean(dy ** 2, axis=2)) - result = np.where((sx > 1e-10) & (sy > 1e-10), cov / (sx * sy), np.nan) - return _pad_front(result, window, T) - - -def cov_np(x: np.ndarray, y: np.ndarray, window: int = 10) -> np.ndarray: - """Rolling covariance.""" - window = int(window) - M, T = x.shape - if T < window: - return np.full_like(x, np.nan) - - from factorminer.operators.statistical import _rolling_np, _pad_front - - wx = _rolling_np(x, window) - wy = _rolling_np(y, window) - if wx is None or wy is None: - return np.full_like(x, np.nan) - - mx = np.nanmean(wx, axis=2, keepdims=True) - my = np.nanmean(wy, axis=2, keepdims=True) - result = np.nanmean((wx - mx) * (wy - my), axis=2) - return _pad_front(result, window, T) - - -def beta_np(x: np.ndarray, y: np.ndarray, window: int = 10) -> np.ndarray: - """Rolling regression beta: slope of x regressed on y.""" - window = int(window) - M, T = x.shape - if T < window: - return np.full_like(x, np.nan) - - from factorminer.operators.statistical import _rolling_np, _pad_front - - wx = _rolling_np(x, window) - wy = _rolling_np(y, window) - if wx is None or wy is None: - return np.full_like(x, np.nan) - - my = np.nanmean(wy, axis=2, keepdims=True) - mx = np.nanmean(wx, axis=2, keepdims=True) - dy = wy - my - dx = wx - mx - with np.errstate(invalid="ignore", divide="ignore"): - var_y = np.nanmean(dy ** 2, axis=2) - cov_xy = np.nanmean(dx * dy, axis=2) - result = np.where(var_y > 1e-10, cov_xy / var_y, np.nan) - return _pad_front(result, window, T) - - -def resid_np(x: np.ndarray, y: np.ndarray, window: int = 10) -> np.ndarray: - """Rolling regression residual: x - beta * y - alpha, evaluated at last point.""" - window = int(window) - M, T = x.shape - if T < window: - return np.full_like(x, np.nan) - - from factorminer.operators.statistical import _rolling_np, _pad_front - - wx = _rolling_np(x, window) - wy = _rolling_np(y, window) - if wx is None or wy is None: - return np.full_like(x, np.nan) - - mx = np.nanmean(wx, axis=2, keepdims=True) - my = np.nanmean(wy, axis=2, keepdims=True) - dx = wx - mx - dy = wy - my - with np.errstate(invalid="ignore", divide="ignore"): - var_y = np.nanmean(dy ** 2, axis=2, keepdims=True) - cov_xy = np.nanmean(dx * dy, axis=2, keepdims=True) - b = np.where(var_y > 1e-10, cov_xy / var_y, 0.0) - a = mx - b * my - # Residual at last time step in each window - result = (wx[:, :, -1:] - b * wy[:, :, -1:] - a).squeeze(2) - return _pad_front(result, window, T) - - -def wma_np(x: np.ndarray, window: int = 10) -> np.ndarray: - """Linearly weighted moving average.""" - window = int(window) - M, T = x.shape - from factorminer.operators.statistical import _rolling_np, _pad_front - - w = _rolling_np(x, window) - if w is None: - return np.full_like(x, np.nan) - weights = np.arange(1, window + 1, dtype=np.float64) - weights = weights / weights.sum() - result = np.nansum(w * weights[np.newaxis, np.newaxis, :], axis=2) - return _pad_front(result, window, T) - - -def decay_np(x: np.ndarray, window: int = 10) -> np.ndarray: - """Exponentially decaying sum (linearly decaying weighted average).""" - return wma_np(x, window) - - -def cumsum_np(x: np.ndarray) -> np.ndarray: - return np.nancumsum(x, axis=1) - - -def cumprod_np(x: np.ndarray) -> np.ndarray: - filled = np.where(np.isnan(x), 1.0, x) - return np.cumprod(filled, axis=1) - - -def cummax_np(x: np.ndarray) -> np.ndarray: - out = np.copy(x) - for t in range(1, x.shape[1]): - out[:, t] = np.fmax(out[:, t - 1], x[:, t]) - return out - - -def cummin_np(x: np.ndarray) -> np.ndarray: - out = np.copy(x) - for t in range(1, x.shape[1]): - out[:, t] = np.fmin(out[:, t - 1], x[:, t]) - return out - - -# =========================================================================== -# PyTorch implementations -# =========================================================================== - -def delta_torch(x: "torch.Tensor", window: int = 1) -> "torch.Tensor": - window = int(window) - M, T = x.shape - out = torch.full_like(x, float("nan")) - if window < T: - out[:, window:] = x[:, window:] - x[:, :-window] - return out - - -def delay_torch(x: "torch.Tensor", window: int = 1) -> "torch.Tensor": - window = int(window) - M, T = x.shape - out = torch.full_like(x, float("nan")) - if window < T: - out[:, window:] = x[:, :-window] - return out - - -def return_torch(x: "torch.Tensor", window: int = 1) -> "torch.Tensor": - window = int(window) - M, T = x.shape - out = torch.full_like(x, float("nan")) - if window < T: - prev = x[:, :-window] - mask = prev.abs() > 1e-10 - r = torch.full_like(prev, float("nan")) - r[mask] = x[:, window:][mask] / prev[mask] - 1.0 - out[:, window:] = r - return out - - -def log_return_torch(x: "torch.Tensor", window: int = 1) -> "torch.Tensor": - window = int(window) - M, T = x.shape - out = torch.full_like(x, float("nan")) - if window < T: - prev = x[:, :-window] - curr = x[:, window:] - mask = prev.abs() > 1e-10 - ratio = torch.full_like(prev, float("nan")) - ratio[mask] = curr[mask] / prev[mask] - lr = torch.full_like(prev, float("nan")) - pos = ratio > 0 - lr[pos] = ratio[pos].log() - out[:, window:] = lr - return out - - -def corr_torch(x: "torch.Tensor", y: "torch.Tensor", window: int = 10) -> "torch.Tensor": - window = int(window) - M, T = x.shape - from factorminer.operators.statistical import _unfold_torch, _pad_front_torch - - wx = _unfold_torch(x, window) - wy = _unfold_torch(y, window) - mx = wx.nanmean(dim=2, keepdim=True) - my = wy.nanmean(dim=2, keepdim=True) - dx = (wx - mx).nan_to_num(0.0) - dy = (wy - my).nan_to_num(0.0) - not_nan = ~(torch.isnan(wx) | torch.isnan(wy)) - n = not_nan.sum(dim=2).float() - cov = (dx * dy * not_nan).sum(dim=2) / n.clamp(min=1) - sx = ((dx ** 2 * not_nan).sum(dim=2) / n.clamp(min=1)).sqrt() - sy = ((dy ** 2 * not_nan).sum(dim=2) / n.clamp(min=1)).sqrt() - result = torch.where((sx > 1e-10) & (sy > 1e-10), cov / (sx * sy), - torch.tensor(float("nan"), device=x.device)) - return _pad_front_torch(result, window, T) - - -def cov_torch(x: "torch.Tensor", y: "torch.Tensor", window: int = 10) -> "torch.Tensor": - window = int(window) - M, T = x.shape - from factorminer.operators.statistical import _unfold_torch, _pad_front_torch - - wx = _unfold_torch(x, window) - wy = _unfold_torch(y, window) - mx = wx.nanmean(dim=2, keepdim=True) - my = wy.nanmean(dim=2, keepdim=True) - dx = (wx - mx).nan_to_num(0.0) - dy = (wy - my).nan_to_num(0.0) - not_nan = ~(torch.isnan(wx) | torch.isnan(wy)) - n = not_nan.sum(dim=2).float() - result = (dx * dy * not_nan).sum(dim=2) / n.clamp(min=1) - return _pad_front_torch(result, window, T) - - -def beta_torch(x: "torch.Tensor", y: "torch.Tensor", window: int = 10) -> "torch.Tensor": - window = int(window) - M, T = x.shape - from factorminer.operators.statistical import _unfold_torch, _pad_front_torch - - wx = _unfold_torch(x, window) - wy = _unfold_torch(y, window) - mx = wx.nanmean(dim=2, keepdim=True) - my = wy.nanmean(dim=2, keepdim=True) - dx = (wx - mx).nan_to_num(0.0) - dy = (wy - my).nan_to_num(0.0) - not_nan = ~(torch.isnan(wx) | torch.isnan(wy)) - n = not_nan.sum(dim=2).float() - var_y = (dy ** 2 * not_nan).sum(dim=2) / n.clamp(min=1) - cov_xy = (dx * dy * not_nan).sum(dim=2) / n.clamp(min=1) - result = torch.where(var_y > 1e-10, cov_xy / var_y, - torch.tensor(float("nan"), device=x.device)) - return _pad_front_torch(result, window, T) - - -def resid_torch(x: "torch.Tensor", y: "torch.Tensor", window: int = 10) -> "torch.Tensor": - window = int(window) - M, T = x.shape - from factorminer.operators.statistical import _unfold_torch, _pad_front_torch - - wx = _unfold_torch(x, window) - wy = _unfold_torch(y, window) - mx = wx.nanmean(dim=2, keepdim=True) - my = wy.nanmean(dim=2, keepdim=True) - dx = (wx - mx).nan_to_num(0.0) - dy = (wy - my).nan_to_num(0.0) - not_nan = ~(torch.isnan(wx) | torch.isnan(wy)) - n = not_nan.sum(dim=2, keepdim=True).float() - var_y = (dy ** 2 * not_nan).sum(dim=2, keepdim=True) / n.clamp(min=1) - cov_xy = (dx * dy * not_nan).sum(dim=2, keepdim=True) / n.clamp(min=1) - b = torch.where(var_y > 1e-10, cov_xy / var_y, torch.zeros_like(var_y)) - a = mx - b * my - result = (wx[:, :, -1:] - b * wy[:, :, -1:] - a).squeeze(2) - return _pad_front_torch(result, window, T) - - -def wma_torch(x: "torch.Tensor", window: int = 10) -> "torch.Tensor": - window = int(window) - M, T = x.shape - from factorminer.operators.statistical import _unfold_torch, _pad_front_torch - - w = _unfold_torch(x, window) - weights = torch.arange(1, window + 1, dtype=x.dtype, device=x.device).float() - weights = weights / weights.sum() - filled = w.nan_to_num(0.0) - result = (filled * weights.unsqueeze(0).unsqueeze(0)).sum(dim=2) - return _pad_front_torch(result, window, T) - - -def decay_torch(x: "torch.Tensor", window: int = 10) -> "torch.Tensor": - return wma_torch(x, window) - - -def cumsum_torch(x: "torch.Tensor") -> "torch.Tensor": - return x.nan_to_num(0.0).cumsum(dim=1) - - -def cumprod_torch(x: "torch.Tensor") -> "torch.Tensor": - return x.nan_to_num(1.0).cumprod(dim=1) - - -def cummax_torch(x: "torch.Tensor") -> "torch.Tensor": - filled = x.nan_to_num(float("-inf")) - return filled.cummax(dim=1).values - - -def cummin_torch(x: "torch.Tensor") -> "torch.Tensor": - filled = x.nan_to_num(float("inf")) - return filled.cummin(dim=1).values - - -# =========================================================================== -# Registration table -# =========================================================================== - -TIMESERIES_OPS = { - "Delta": (delta_np, delta_torch), - "Delay": (delay_np, delay_torch), - "Return": (return_np, return_torch), - "LogReturn": (log_return_np, log_return_torch), - "Corr": (corr_np, corr_torch), - "Cov": (cov_np, cov_torch), - "Beta": (beta_np, beta_torch), - "Resid": (resid_np, resid_torch), - "WMA": (wma_np, wma_torch), - "Decay": (decay_np, decay_torch), - "CumSum": (cumsum_np, cumsum_torch), - "CumProd": (cumprod_np, cumprod_torch), - "CumMax": (cummax_np, cummax_torch), - "CumMin": (cummin_np, cummin_torch), -} diff --git a/src/factorminer/factorminer/tests/__init__.py b/src/factorminer/factorminer/tests/__init__.py deleted file mode 100644 index b66dd9f..0000000 --- a/src/factorminer/factorminer/tests/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Test suite for FactorMiner.""" diff --git a/src/factorminer/factorminer/tests/conftest.py b/src/factorminer/factorminer/tests/conftest.py deleted file mode 100644 index 2a6791a..0000000 --- a/src/factorminer/factorminer/tests/conftest.py +++ /dev/null @@ -1,163 +0,0 @@ -"""Shared pytest fixtures for FactorMiner test suite.""" - -from __future__ import annotations - -import numpy as np -import pytest - -from src.factorminer.factorminer.core.factor_library import Factor, FactorLibrary -from src.factorminer.factorminer.memory.experience_memory import ExperienceMemoryManager - - -# --------------------------------------------------------------------------- -# Mock data fixtures -# --------------------------------------------------------------------------- - -@pytest.fixture -def rng(): - """Seeded random generator for reproducibility.""" - return np.random.default_rng(42) - - -@pytest.fixture -def small_data(rng): - """Small (M=10, T=50) synthetic dataset dict mapping feature names to arrays.""" - M, T = 10, 50 - close = 100.0 + np.cumsum(rng.normal(0, 0.5, (M, T)), axis=1) - open_ = close + rng.normal(0, 0.1, (M, T)) - high = np.maximum(close, open_) + np.abs(rng.normal(0, 0.2, (M, T))) - low = np.minimum(close, open_) - np.abs(rng.normal(0, 0.2, (M, T))) - low = np.maximum(low, 1.0) - volume = np.abs(rng.normal(1e6, 1e5, (M, T))) - vwap = (high + low + close) / 3 - amt = volume * vwap - returns = np.zeros((M, T)) - returns[:, 1:] = np.diff(close, axis=1) / close[:, :-1] - - return { - "$open": open_, - "$high": high, - "$low": low, - "$close": close, - "$volume": volume, - "$amt": amt, - "$vwap": vwap, - "$returns": returns, - } - - -@pytest.fixture -def medium_data(rng): - """Medium (M=20, T=100) synthetic dataset for evaluation tests.""" - M, T = 20, 100 - close = 50.0 + np.cumsum(rng.normal(0, 0.3, (M, T)), axis=1) - open_ = close + rng.normal(0, 0.05, (M, T)) - high = np.maximum(close, open_) + np.abs(rng.normal(0, 0.1, (M, T))) - low = np.minimum(close, open_) - np.abs(rng.normal(0, 0.1, (M, T))) - low = np.maximum(low, 1.0) - volume = np.abs(rng.normal(1e6, 1e5, (M, T))) - vwap = (high + low + close) / 3 - amt = volume * vwap - returns = np.zeros((M, T)) - returns[:, 1:] = np.diff(close, axis=1) / close[:, :-1] - - return { - "$open": open_, - "$high": high, - "$low": low, - "$close": close, - "$volume": volume, - "$amt": amt, - "$vwap": vwap, - "$returns": returns, - } - - -# --------------------------------------------------------------------------- -# Library fixtures -# --------------------------------------------------------------------------- - -@pytest.fixture -def mock_library(rng): - """Small FactorLibrary pre-loaded with 3 known factors.""" - lib = FactorLibrary(correlation_threshold=0.5, ic_threshold=0.04) - - M, T = 20, 60 - for i in range(3): - signals = rng.normal(0, 1, (M, T)) - factor = Factor( - id=0, - name=f"test_factor_{i}", - formula=f"Neg($close)" if i == 0 else f"CsRank(Mean($close, {10 + i * 5}))", - category="test", - ic_mean=0.05 + i * 0.01, - icir=0.8 + i * 0.1, - ic_win_rate=0.55 + i * 0.05, - max_correlation=0.1 * i, - batch_number=1, - signals=signals, - ) - lib.admit_factor(factor) - - return lib - - -# --------------------------------------------------------------------------- -# Memory fixtures -# --------------------------------------------------------------------------- - -@pytest.fixture -def mock_memory(): - """ExperienceMemoryManager with default patterns initialized.""" - return ExperienceMemoryManager( - max_success_patterns=20, - max_failure_patterns=30, - max_insights=15, - ) - - -@pytest.fixture -def sample_trajectory(): - """Sample batch trajectory for memory update tests.""" - return [ - { - "formula": "CsRank(Corr($close, $volume, 20))", - "factor_id": "f001", - "ic": 0.08, - "icir": 1.2, - "max_correlation": 0.15, - "correlated_with": "", - "admitted": True, - "rejection_reason": "", - }, - { - "formula": "Neg(Div(Sub($close, $vwap), $vwap))", - "factor_id": "f002", - "ic": 0.06, - "icir": 0.9, - "max_correlation": 0.65, - "correlated_with": "existing_factor_3", - "admitted": False, - "rejection_reason": "Max correlation 0.65 >= threshold 0.5", - }, - { - "formula": "IfElse(Skew($close, 20), CsRank($returns), Neg($returns))", - "factor_id": "f003", - "ic": 0.10, - "icir": 1.5, - "max_correlation": 0.20, - "correlated_with": "", - "admitted": True, - "rejection_reason": "", - }, - { - "formula": "CsZScore(Std($returns, 10))", - "factor_id": "f004", - "ic": 0.03, - "icir": 0.4, - "max_correlation": 0.70, - "correlated_with": "existing_factor_1", - "admitted": False, - "rejection_reason": "IC 0.03 below threshold 0.04", - }, - ] diff --git a/src/factorminer/factorminer/tests/test_auto_inventor.py b/src/factorminer/factorminer/tests/test_auto_inventor.py deleted file mode 100644 index 76dead4..0000000 --- a/src/factorminer/factorminer/tests/test_auto_inventor.py +++ /dev/null @@ -1,130 +0,0 @@ -"""Tests for auto-operator invention (operators/auto_inventor.py).""" - -from __future__ import annotations - -import numpy as np -import pytest - -from src.factorminer.factorminer.operators.auto_inventor import ( - OperatorInventor, - ProposedOperator, - _BLOCKED_TOKENS, -) -from src.factorminer.factorminer.operators.custom import CustomOperatorStore - - -# ----------------------------------------------------------------------- -# _compile_safely: valid numpy code -> callable -# ----------------------------------------------------------------------- - -def test_compile_safely_valid_code(): - """Valid numpy code defining compute() should return a callable.""" - code = "def compute(x):\n return np.nanmean(x, axis=1, keepdims=True) * np.ones_like(x)" - - # Use OperatorInventor._compile_safely as a static-like test - data = np.random.default_rng(42).normal(0, 1, (10, 50)) - inventor = OperatorInventor( - llm_provider=_mock_provider(), - data_tensor=data.reshape(10, 50, 1), - returns=data, - ) - fn = inventor._compile_safely(code) - assert fn is not None - assert callable(fn) - result = fn(data) - assert isinstance(result, np.ndarray) - - -# ----------------------------------------------------------------------- -# _compile_safely: os.system -> returns None (SECURITY) -# ----------------------------------------------------------------------- - -def test_compile_safely_blocks_os_system(): - """Code containing os.system should be blocked.""" - code = "import os\ndef compute(x):\n os.system('echo hacked')\n return x" - inventor = _make_inventor() - fn = inventor._compile_safely(code) - assert fn is None - - -# ----------------------------------------------------------------------- -# _compile_safely: import os -> returns None (SECURITY) -# ----------------------------------------------------------------------- - -def test_compile_safely_blocks_import_os(): - """Code with 'import ' token should be blocked.""" - code = "import os\ndef compute(x):\n return x" - inventor = _make_inventor() - fn = inventor._compile_safely(code) - assert fn is None - - -def test_compile_safely_blocks_eval(): - """Code with eval() should be blocked.""" - code = "def compute(x):\n return eval('x + 1')" - inventor = _make_inventor() - fn = inventor._compile_safely(code) - assert fn is None - - -# ----------------------------------------------------------------------- -# CustomOperatorStore: register and list -# ----------------------------------------------------------------------- - -def test_custom_operator_store_register_and_list(tmp_path): - store = CustomOperatorStore(store_dir=str(tmp_path / "ops")) - - from factorminer.core.types import OperatorSpec, OperatorType, SignatureType - spec = OperatorSpec( - name="TestOp", - arity=1, - category=OperatorType.AUTO_INVENTED, - signature=SignatureType.ELEMENT_WISE, - description="test operator", - ) - from factorminer.operators.custom import CustomOperator - op = CustomOperator( - name="TestOp", - spec=spec, - numpy_code="def compute(x): return x * 2", - numpy_fn=lambda x: x * 2, - validation_ic=0.05, - ) - store.register(op) - assert "TestOp" in store.list_operators() - assert store.get_operator("TestOp") is not None - - -# ----------------------------------------------------------------------- -# ProposedOperator dataclass -# ----------------------------------------------------------------------- - -def test_proposed_operator_dataclass(): - op = ProposedOperator( - name="TestOp", - arity=1, - description="A test operator", - numpy_code="def compute(x): return x", - ) - assert op.name == "TestOp" - assert op.arity == 1 - assert op.param_names == () - assert op.based_on == [] - - -# ----------------------------------------------------------------------- -# Helpers -# ----------------------------------------------------------------------- - -def _mock_provider(): - from factorminer.agent.llm_interface import MockProvider - return MockProvider() - - -def _make_inventor(): - data = np.random.default_rng(42).normal(0, 1, (10, 50)) - return OperatorInventor( - llm_provider=_mock_provider(), - data_tensor=data.reshape(10, 50, 1), - returns=data, - ) diff --git a/src/factorminer/factorminer/tests/test_benchmark.py b/src/factorminer/factorminer/tests/test_benchmark.py deleted file mode 100644 index f6cd1a8..0000000 --- a/src/factorminer/factorminer/tests/test_benchmark.py +++ /dev/null @@ -1,484 +0,0 @@ -"""Benchmark-runtime and CLI coverage.""" - -from __future__ import annotations - -import json -from types import SimpleNamespace - -from click.testing import CliRunner -import numpy as np - -from src.factorminer.factorminer.benchmark.runtime import ( - build_benchmark_library, - run_table1_benchmark, - select_frozen_top_k, -) -from src.factorminer.factorminer.benchmark.helix_benchmark import StatisticalComparisonTests, _json_safe -from src.factorminer.factorminer.cli import main -from src.factorminer.factorminer.core.factor_library import Factor, FactorLibrary -from src.factorminer.factorminer.core.library_io import save_library -from src.factorminer.factorminer.core.session import MiningSession -from src.factorminer.factorminer.evaluation.runtime import FactorEvaluationArtifact -from src.factorminer.factorminer.utils.config import load_config -from run_phase2_benchmark import ( - _build_phase2_manifest, - _collect_runtime_manifest_refs, - _generate_markdown_report, - _write_markdown_table, -) - - -def _artifact( - factor_id: int, - formula: str, - train_ic: float, - train_icir: float, - signal_scale: float, -) -> FactorEvaluationArtifact: - signal = np.array( - [ - [1.0, 2.0, 3.0], - [2.0, 1.0, 0.0], - [0.5, 0.3, 0.1], - ], - dtype=np.float64, - ) * signal_scale - return FactorEvaluationArtifact( - factor_id=factor_id, - name=f"factor_{factor_id}", - formula=formula, - category="test", - parse_ok=True, - signals_full=signal, - split_signals={"train": signal, "test": signal, "full": signal}, - split_stats={ - "train": { - "ic_mean": train_ic, - "ic_abs_mean": abs(train_ic), - "icir": train_icir, - "ic_win_rate": 0.6, - }, - "test": { - "ic_mean": train_ic / 2.0, - "ic_abs_mean": abs(train_ic / 2.0), - "icir": train_icir / 2.0, - "ic_win_rate": 0.5, - }, - "full": { - "ic_mean": train_ic, - "ic_abs_mean": abs(train_ic), - "icir": train_icir, - "ic_win_rate": 0.6, - }, - }, - ) - - -def test_select_frozen_top_k_prefers_thresholded_admitted_then_fills(): - cfg = load_config() - artifacts = [ - _artifact(1, "Neg($close)", 0.07, 0.8, 1.0), - _artifact(2, "Neg($open)", 0.06, 0.7, 0.7), - _artifact(3, "Neg($high)", 0.049, 0.9, 0.2), - ] - library, _ = build_benchmark_library(artifacts, cfg, split_name="train") - - frozen = select_frozen_top_k( - artifacts, - library, - top_k=3, - split_name="train", - min_ic=0.05, - min_icir=0.5, - ) - - assert [artifact.formula for artifact in frozen[:2]] == ["Neg($close)", "Neg($open)"] - assert frozen[2].formula == "Neg($high)" - - -def test_build_benchmark_library_rejects_low_ic_candidates(): - cfg = load_config() - artifacts = [ - _artifact(1, "Neg($close)", 0.07, 0.8, 1.0), - _artifact(2, "Neg($open)", 0.01, 0.6, 0.9), - ] - - library, stats = build_benchmark_library(artifacts, cfg, split_name="train") - - assert library.size == 1 - assert stats["threshold_rejections"] == 1 - assert stats["admitted"] == 1 - - -def test_benchmark_table1_cli_invokes_runtime(monkeypatch, tmp_path): - captured = {} - - def _fake_run(*args, **kwargs): - captured["called"] = True - return { - "factor_miner": { - "freeze_library_size": 12, - "frozen_top_k": [{"name": "f1"}], - "universes": { - "CSI500": { - "library": {"ic": 0.08, "icir": 0.9, "avg_abs_rho": 0.2} - } - }, - } - } - - monkeypatch.setattr("src.factorminer.factorminer.benchmark.runtime.run_table1_benchmark", _fake_run) - - runner = CliRunner() - result = runner.invoke( - main, - [ - "--cpu", - "--output-dir", - str(tmp_path / "out"), - "benchmark", - "table1", - "--mock", - ], - ) - - assert result.exit_code == 0, result.output - assert captured.get("called") is True - assert "Benchmark Table 1" in result.output - assert "Baseline: factor_miner" in result.output - assert "CSI500: library IC=0.0800" in result.output - - -def test_table1_manifest_includes_saved_library_provenance(monkeypatch, tmp_path): - saved_root = tmp_path / "saved" - library_base = saved_root / "factor_miner_library" - - library = FactorLibrary() - factor = Factor( - id=0, - name="saved_factor", - formula="Neg($close)", - category="test", - ic_mean=0.07, - icir=0.8, - ic_win_rate=0.6, - max_correlation=0.1, - batch_number=1, - signals=np.array( - [ - [1.0, 2.0, 3.0], - [0.5, 0.4, 0.3], - [0.2, 0.3, 0.4], - ], - dtype=np.float64, - ), - ) - library.admit_factor(factor) - save_library(library, library_base) - - session = MiningSession( - session_id="session-001", - output_dir=str(saved_root), - library_path=str(library_base), - ) - session.record_iteration({"candidates": 3, "admitted": 1, "replaced": 0, "library_size": 1}) - session.record_iteration({"candidates": 2, "admitted": 1, "replaced": 0, "library_size": 1}) - session.finalize() - session.save(saved_root / "session.json") - with open(saved_root / "session_log.json", "w") as fp: - json.dump({"summary": session.get_summary(), "iterations": session.iterations}, fp) - - cfg = load_config() - output_dir = tmp_path / "results" - artifact = _artifact(1, "Neg($close)", 0.07, 0.8, 1.0) - - monkeypatch.setattr( - "src.factorminer.factorminer.benchmark.runtime.load_benchmark_dataset", - lambda *args, **kwargs: (SimpleNamespace(), "freeze-hash"), - ) - monkeypatch.setattr( - "src.factorminer.factorminer.benchmark.runtime.evaluate_factors", - lambda *args, **kwargs: [artifact], - ) - monkeypatch.setattr( - "src.factorminer.factorminer.benchmark.runtime.evaluate_frozen_set", - lambda frozen, dataset, **kwargs: { - "factor_count": len(frozen), - "library": {"ic": 0.1, "icir": 1.0, "avg_abs_rho": 0.2}, - "combinations": {}, - "selections": {}, - }, - ) - - run_table1_benchmark( - cfg, - output_dir, - baseline_names=["factor_miner"], - factor_miner_library_path=str(library_base), - ) - - result_path = output_dir / "benchmark" / "table1" / "factor_miner.json" - manifest_path = output_dir / "benchmark" / "table1" / "factor_miner_manifest.json" - result = json.loads(result_path.read_text()) - manifest = json.loads(manifest_path.read_text()) - - provenance = manifest["baseline_provenance"]["factor_miner"] - assert provenance["kind"] == "saved_library" - assert provenance["library_summary"]["factor_count"] == 1 - assert provenance["session_summary"]["total_iterations"] == 2 - assert provenance["source_files"]["library_json"]["path"].endswith("factor_miner_library.json") - assert provenance["source_files"]["signal_cache"]["path"].endswith("factor_miner_library_signals.npz") - assert manifest["artifact_paths"]["result"] == str(result_path) - assert manifest["artifact_paths"]["manifest"] == str(manifest_path) - assert result["provenance"]["kind"] == "saved_library" - - -def test_phase2_manifest_references_runtime_manifest_and_sanitizes_stats(tmp_path): - runtime_root = tmp_path / "runtime" - manifest_path = runtime_root / "benchmark" / "table1" / "factor_miner_manifest.json" - manifest_path.parent.mkdir(parents=True, exist_ok=True) - manifest_path.write_text( - json.dumps( - { - "benchmark_name": "table1", - "baseline": "factor_miner", - "mode": "paper", - "artifact_paths": {"result": "result.json", "manifest": str(manifest_path)}, - "baseline_provenance": { - "factor_miner": { - "kind": "saved_library", - "source": "factor_miner", - } - }, - } - ) - ) - - refs = _collect_runtime_manifest_refs(runtime_root) - assert len(refs) == 1 - assert refs[0]["path"] == str(manifest_path) - assert refs[0]["baseline_provenance"]["factor_miner"]["kind"] == "saved_library" - - phase2_manifest = _build_phase2_manifest( - output_dir=tmp_path / "phase2", - methods=["ralph_loop", "helix_phase2"], - seed=7, - n_factors=40, - mock=True, - data_path=None, - full_ablation=False, - skip_ablation=True, - artifact_paths={"html_report": str(tmp_path / "phase2" / "benchmark_report.html")}, - statistical_tests={ - "diebold_mariano": {"dm_stat": np.nan, "p_value": np.inf}, - "bootstrap_ci_95": {"lower": -np.inf, "upper": np.nan}, - }, - ablation_configs=["full"], - runtime_manifest_root=runtime_root, - ) - - assert phase2_manifest["runtime_manifest_refs"][0]["path"] == str(manifest_path) - assert phase2_manifest["statistical_tests"]["diebold_mariano"]["dm_stat"] is None - assert phase2_manifest["statistical_tests"]["diebold_mariano"]["p_value"] is None - assert phase2_manifest["statistical_tests"]["bootstrap_ci_95"]["lower"] is None - dumped = json.dumps(_json_safe(phase2_manifest), allow_nan=False) - assert "NaN" not in dumped - - -def test_diebold_mariano_handles_identical_series_without_nan_direction(): - tests = StatisticalComparisonTests(seed=42) - series = np.array([0.05, 0.05, 0.05, 0.05, 0.05], dtype=np.float64) - - result = tests.diebold_mariano_test(series, series.copy()) - - assert result.direction == "no_difference" - assert result.p_value == 1.0 - assert np.isfinite(result.dm_statistic) - - -def test_json_safe_removes_non_finite_values(): - payload = { - "finite": 1.5, - "nan": float("nan"), - "nested": [np.float64(np.inf), {"value": -np.inf}], - } - - cleaned = _json_safe(payload) - - assert cleaned == {"finite": 1.5, "nan": None, "nested": [None, {"value": None}]} - dumped = json.dumps(cleaned, allow_nan=False) - assert "NaN" not in dumped - - -def test_markdown_artifacts_use_expected_paths(tmp_path): - table_stub = SimpleNamespace( - to_markdown=lambda **kwargs: "| a | b |\n|---|---|\n| 1 | 2 |\n" - ) - bench_result = SimpleNamespace( - factor_library_metrics=table_stub, - combination_metrics=table_stub, - selection_metrics=table_stub, - speed_metrics=table_stub, - statistical_tests={"diebold_mariano": {"dm_stat": 0.0, "p_value": 1.0}}, - to_markdown_table=lambda: "| a | b |\n|---|---|\n| 1 | 2 |\n", - ) - - table_path = _write_markdown_table(bench_result, tmp_path) - report_path = _generate_markdown_report(bench_result, None, tmp_path) - - assert table_path.endswith("benchmark_report.md") - assert report_path.endswith("benchmark_report_full.md") - assert (tmp_path / "benchmark_report.md").exists() - assert (tmp_path / "benchmark_report_full.md").exists() - - -def _runtime_dataset_stub(): - data_tensor = np.ones((2, 6, 8), dtype=np.float64) - returns = np.array( - [ - [0.01, 0.02, 0.01, 0.03, 0.02, 0.01], - [0.02, 0.01, 0.03, 0.02, 0.01, 0.02], - ], - dtype=np.float64, - ) - splits = { - "train": SimpleNamespace( - indices=np.array([0, 1, 2]), - returns=returns[:, :3], - timestamps=np.arange(3), - ), - "test": SimpleNamespace( - indices=np.array([3, 4, 5]), - returns=returns[:, 3:], - timestamps=np.arange(3, 6), - ), - "full": SimpleNamespace( - indices=np.arange(6), - returns=returns, - timestamps=np.arange(6), - ), - } - - return SimpleNamespace( - data_tensor=data_tensor, - returns=returns, - data_dict={ - "$open": data_tensor[:, :, 0], - "$high": data_tensor[:, :, 1], - "$low": data_tensor[:, :, 2], - "$close": data_tensor[:, :, 3], - "$volume": data_tensor[:, :, 4], - "$amt": data_tensor[:, :, 5], - "$vwap": data_tensor[:, :, 6], - "$returns": data_tensor[:, :, 7], - }, - target_panels={"paper": returns}, - target_specs={"paper": SimpleNamespace(holding_bars=1)}, - get_split=lambda name: splits[name], - ) - - -def _single_factor_library(): - library = FactorLibrary() - library.admit_factor( - Factor( - id=0, - name="runtime_factor", - formula="Neg($close)", - category="test", - ic_mean=0.08, - icir=0.9, - ic_win_rate=0.6, - max_correlation=0.0, - batch_number=1, - signals=np.ones((2, 3), dtype=np.float64), - ) - ) - return library - - -def test_table1_runtime_methods_instantiate_live_loops(monkeypatch, tmp_path): - cfg = load_config() - calls = [] - - monkeypatch.setattr( - "src.factorminer.factorminer.benchmark.runtime.load_benchmark_dataset", - lambda *args, **kwargs: (_runtime_dataset_stub(), "dataset-hash"), - ) - monkeypatch.setattr( - "src.factorminer.factorminer.benchmark.runtime._get_baseline_entries", - lambda *args, **kwargs: (_ for _ in ()).throw(AssertionError("catalog fallback used")), - ) - monkeypatch.setattr( - "src.factorminer.factorminer.benchmark.runtime.evaluate_factors", - lambda *args, **kwargs: [_artifact(1, "Neg($close)", 0.08, 0.9, 1.0)], - ) - monkeypatch.setattr( - "src.factorminer.factorminer.benchmark.runtime.evaluate_frozen_set", - lambda frozen, dataset, **kwargs: { - "factor_count": len(frozen), - "library": {"ic": 0.1, "icir": 1.0, "avg_abs_rho": 0.2}, - "combinations": { - "equal_weight": {"ic": 0.12, "icir": 1.1, "turnover": 0.3}, - "ic_weighted": {"ic": 0.13, "icir": 1.2, "turnover": 0.25}, - }, - "selections": {"lasso": {"ic": 0.09, "icir": 0.8}}, - }, - ) - - def _fake_ralph_run(self, *args, **kwargs): - calls.append("ralph") - return _single_factor_library() - - def _fake_helix_run(self, *args, **kwargs): - calls.append("helix") - return _single_factor_library() - - monkeypatch.setattr("src.factorminer.factorminer.core.ralph_loop.RalphLoop.run", _fake_ralph_run) - monkeypatch.setattr("src.factorminer.factorminer.core.helix_loop.HelixLoop.run", _fake_helix_run) - - payload = run_table1_benchmark( - cfg, - tmp_path, - mock=True, - baseline_names=["ralph_loop", "helix_phase2"], - use_runtime_loops=True, - ) - - assert calls == ["ralph", "helix"] - assert payload["ralph_loop"]["provenance"]["kind"] == "runtime_loop" - assert payload["helix_phase2"]["provenance"]["kind"] == "runtime_loop" - - -def test_table1_runtime_methods_fail_loudly_without_catalog_fallback(monkeypatch, tmp_path): - cfg = load_config() - fallback_called = {"value": False} - - monkeypatch.setattr( - "src.factorminer.factorminer.benchmark.runtime.load_benchmark_dataset", - lambda *args, **kwargs: (_runtime_dataset_stub(), "dataset-hash"), - ) - - def _forbidden_catalog(*args, **kwargs): - fallback_called["value"] = True - raise AssertionError("catalog fallback used") - - monkeypatch.setattr("src.factorminer.factorminer.benchmark.runtime._get_baseline_entries", _forbidden_catalog) - monkeypatch.setattr( - "src.factorminer.factorminer.benchmark.runtime._run_runtime_mining_loop", - lambda *args, **kwargs: (_ for _ in ()).throw(RuntimeError("runtime loop failed")), - ) - - try: - run_table1_benchmark( - cfg, - tmp_path, - mock=True, - baseline_names=["ralph_loop"], - use_runtime_loops=True, - ) - assert False, "expected runtime loop failure" - except RuntimeError as exc: - assert "runtime loop failed" in str(exc) - - assert fallback_called["value"] is False diff --git a/src/factorminer/factorminer/tests/test_canonicalizer.py b/src/factorminer/factorminer/tests/test_canonicalizer.py deleted file mode 100644 index d680b2b..0000000 --- a/src/factorminer/factorminer/tests/test_canonicalizer.py +++ /dev/null @@ -1,79 +0,0 @@ -"""Tests for the SymPy-based formula canonicalizer (core/canonicalizer.py).""" - -from __future__ import annotations - -import time - -import pytest - -from src.factorminer.factorminer.core.canonicalizer import FormulaCanonicalizer -from src.factorminer.factorminer.core.parser import parse - - -@pytest.fixture -def canon(): - return FormulaCanonicalizer() - - -# ----------------------------------------------------------------------- -# Double negation: Neg(Neg($close)) == $close -# ----------------------------------------------------------------------- - -def test_double_negation(canon): - tree_a = parse("Neg(Neg($close))") - tree_b = parse("$close") - assert canon.is_duplicate(tree_a, tree_b) - - -# ----------------------------------------------------------------------- -# Commutativity: Add($close, $open) == Add($open, $close) -# ----------------------------------------------------------------------- - -def test_commutativity_add(canon): - tree_a = parse("Add($close, $open)") - tree_b = parse("Add($open, $close)") - assert canon.is_duplicate(tree_a, tree_b) - - -# ----------------------------------------------------------------------- -# Non-algebraic preserved: CsRank(Neg($close)) != Neg(CsRank($close)) -# ----------------------------------------------------------------------- - -def test_non_algebraic_not_simplified(canon): - tree_a = parse("CsRank(Neg($close))") - tree_b = parse("Neg(CsRank($close))") - assert not canon.is_duplicate(tree_a, tree_b) - - -# ----------------------------------------------------------------------- -# is_duplicate method -# ----------------------------------------------------------------------- - -def test_is_duplicate_same_formula(canon): - tree = parse("CsRank($close)") - assert canon.is_duplicate(tree, tree) - - -def test_is_duplicate_different_formulas(canon): - tree_a = parse("CsRank($close)") - tree_b = parse("CsRank($volume)") - assert not canon.is_duplicate(tree_a, tree_b) - - -# ----------------------------------------------------------------------- -# Cache: second call should be faster (or at least not slower) -# ----------------------------------------------------------------------- - -def test_cache_works(canon): - tree = parse("Add(Mul($close, $open), Neg($volume))") - - # First call populates cache - h1 = canon.canonicalize(tree) - - # Second call should hit cache and return same hash - h2 = canon.canonicalize(tree) - assert h1 == h2 - - # Verify cache is populated - key = tree.to_string() - assert key in canon._cache diff --git a/src/factorminer/factorminer/tests/test_capacity.py b/src/factorminer/factorminer/tests/test_capacity.py deleted file mode 100644 index 0d5fd3a..0000000 --- a/src/factorminer/factorminer/tests/test_capacity.py +++ /dev/null @@ -1,118 +0,0 @@ -"""Tests for capacity-aware backtesting (evaluation/capacity.py).""" - -from __future__ import annotations - -import numpy as np -import pytest - -from src.factorminer.factorminer.evaluation.capacity import ( - CapacityConfig, - CapacityEstimator, - MarketImpactModel, - NetCostResult, -) - - -@pytest.fixture -def rng(): - return np.random.default_rng(42) - - -@pytest.fixture -def market_data(rng): - """Synthetic returns and volume for capacity tests.""" - M, T = 20, 100 - returns = rng.normal(0, 0.01, (M, T)) - volume = np.abs(rng.normal(1e6, 1e5, (M, T))) - signals = rng.normal(0, 1, (M, T)) - return returns, volume, signals - - -# ----------------------------------------------------------------------- -# MarketImpactModel: higher capital -> higher impact_bps -# ----------------------------------------------------------------------- - -def test_impact_increases_with_capital(rng): - """Higher capital should result in higher average impact.""" - M, T = 20, 100 - signals = rng.normal(0, 1, (M, T)) - # Use very high volume so low capital stays below participation limit - volume = np.abs(rng.normal(1e9, 1e8, (M, T))) - model = MarketImpactModel() - - low_cap = model.estimate_impact(signals, volume, capital=1e6) - high_cap = model.estimate_impact(signals, volume, capital=1e9) - - assert high_cap.avg_impact_bps > low_cap.avg_impact_bps - - -def test_impact_result_shape(market_data): - """Impact arrays should match T dimension.""" - returns, volume, signals = market_data - T = signals.shape[1] - model = MarketImpactModel() - result = model.estimate_impact(signals, volume, capital=1e8) - - assert result.impact_bps.shape == (T,) - assert result.participation_rate.shape == (T,) - assert result.avg_impact_bps >= 0 - - -# ----------------------------------------------------------------------- -# CapacityEstimator: low capital -> net_icir ~ gross_icir -# ----------------------------------------------------------------------- - -def test_low_capital_minimal_degradation(market_data): - """At very low capital, net ICIR should be close to gross ICIR.""" - returns, volume, signals = market_data - estimator = CapacityEstimator( - returns=returns, - volume=volume, - config=CapacityConfig(base_capital_usd=1e4), - ) - result = estimator.net_cost_evaluation("test", signals, capital=1e4) - assert isinstance(result, NetCostResult) - # At very low capital, impact is tiny, so net ~ gross - diff = abs(result.gross_icir - result.net_icir) - assert diff < abs(result.gross_icir) + 0.5 # generous tolerance - - -# ----------------------------------------------------------------------- -# CapacityEstimator: high capital -> significant IC degradation -# ----------------------------------------------------------------------- - -def test_high_capital_degrades_ic(market_data): - """At very high capital, the net ICIR should be meaningfully lower.""" - returns, volume, signals = market_data - config = CapacityConfig( - capacity_levels=[1e4, 1e6, 1e8, 1e10], - ) - estimator = CapacityEstimator( - returns=returns, - volume=volume, - config=config, - ) - cap_est = estimator.estimate("test", signals) - # The capacity curve should show increasing degradation - degradations = list(cap_est.capacity_curve.values()) - assert degradations[-1] >= degradations[0] - - -# ----------------------------------------------------------------------- -# Edge case: zero volume -# ----------------------------------------------------------------------- - -def test_zero_volume_handling(rng): - """Zero volume should be handled gracefully (participation_limit used).""" - M, T = 10, 50 - returns = rng.normal(0, 0.01, (M, T)) - volume = np.zeros((M, T)) # all zero volume - signals = rng.normal(0, 1, (M, T)) - - model = MarketImpactModel() - result = model.estimate_impact(signals, volume, capital=1e8) - - # Should not crash; participation rate should be capped at limit - assert not np.any(np.isnan(result.impact_bps)) - cfg = CapacityConfig() - assert np.allclose(result.participation_rate, cfg.participation_limit) diff --git a/src/factorminer/factorminer/tests/test_causal.py b/src/factorminer/factorminer/tests/test_causal.py deleted file mode 100644 index 0f03278..0000000 --- a/src/factorminer/factorminer/tests/test_causal.py +++ /dev/null @@ -1,147 +0,0 @@ -"""Tests for the causal validation layer (evaluation/causal.py).""" - -from __future__ import annotations - -import numpy as np -import pytest - -from src.factorminer.factorminer.evaluation.causal import CausalConfig, CausalTestResult, CausalValidator - - -@pytest.fixture -def rng(): - return np.random.default_rng(42) - - -# ----------------------------------------------------------------------- -# CausalConfig defaults -# ----------------------------------------------------------------------- - -def test_causal_config_defaults(): - cfg = CausalConfig() - assert cfg.enabled is True - assert cfg.granger_max_lag == 5 - assert cfg.granger_significance == 0.05 - assert cfg.n_interventions == 3 - assert cfg.robustness_threshold == 0.4 - - -# ----------------------------------------------------------------------- -# CausalTestResult dataclass -# ----------------------------------------------------------------------- - -def test_causal_test_result_fields(): - r = CausalTestResult( - factor_name="test", - granger_p_value=0.01, - granger_f_stat=5.0, - granger_passes=True, - intervention_ic_ratio=0.8, - intervention_passes=True, - robustness_score=0.7, - passes=True, - ) - assert r.factor_name == "test" - assert r.passes is True - assert isinstance(r.details, dict) - - -# ----------------------------------------------------------------------- -# Granger test: planted causal signal should pass -# ----------------------------------------------------------------------- - -def test_granger_causal_signal_passes(rng): - """A signal that IS lag-1 predictive of returns should produce low p.""" - M, T = 20, 200 - noise = rng.normal(0, 0.01, (M, T)) - signal = rng.normal(0, 1, (M, T)) - # Returns are a lagged copy of the signal + small noise - returns = np.zeros((M, T)) - returns[:, 1:] = signal[:, :-1] * 0.5 + noise[:, 1:] - - validator = CausalValidator( - returns=returns, - data_tensor=None, - library_signals={}, - config=CausalConfig(granger_max_lag=3, seed=42), - ) - result = validator.validate("planted_signal", signal) - # The Granger test should detect causality (low p-value) - assert result.granger_p_value < 0.10 or result.granger_passes - - -# ----------------------------------------------------------------------- -# Granger test: random noise should fail (high p-value) -# ----------------------------------------------------------------------- - -def test_granger_random_noise_high_pvalue(rng): - """Pure noise signal should have high p-value.""" - M, T = 20, 200 - signal = rng.normal(0, 1, (M, T)) - returns = rng.normal(0, 0.01, (M, T)) - - validator = CausalValidator( - returns=returns, - data_tensor=None, - library_signals={}, - config=CausalConfig(granger_max_lag=3, seed=42), - ) - result = validator.validate("noise_signal", signal) - # High p-value expected (not necessarily >0.05 due to random chance, - # but the test is about the API working correctly) - assert isinstance(result.granger_p_value, float) - assert 0.0 <= result.granger_p_value <= 1.0 - - -# ----------------------------------------------------------------------- -# Intervention robustness: robust signal retains IC -# ----------------------------------------------------------------------- - -def test_intervention_robust_signal(rng): - """A signal strongly correlated with returns should be robust.""" - M, T = 20, 100 - returns = rng.normal(0, 0.01, (M, T)) - # Signal is nearly identical to returns -> high IC, robust - signal = returns * 10 + rng.normal(0, 0.001, (M, T)) - - validator = CausalValidator( - returns=returns, - data_tensor=None, - library_signals={}, - config=CausalConfig(seed=42), - ) - result = validator.validate("robust_factor", signal) - assert result.intervention_ic_ratio > 0.0 - assert isinstance(result.intervention_passes, bool) - assert isinstance(result.robustness_score, float) - - -def test_validate_excludes_candidate_from_control_library(rng): - """A factor under test should not be used as its own Granger control.""" - M, T = 8, 40 - signal = rng.normal(0, 1, (M, T)) - returns = rng.normal(0, 0.01, (M, T)) - control = rng.normal(0, 1, (M, T)) - - validator = CausalValidator( - returns=returns, - data_tensor=None, - library_signals={ - "candidate_factor": signal.copy(), - "control_factor": control, - }, - config=CausalConfig(seed=42), - ) - - captured: dict[str, np.ndarray] = {} - - def _capture_controls(signals_arg, returns_arg, library_signals_arg): - captured.update(library_signals_arg) - return 1.0, 0.0, True - - validator._granger_test = _capture_controls # type: ignore[method-assign] - result = validator.validate("candidate_factor", signal) - - assert result.granger_passes is True - assert "candidate_factor" not in captured - assert "control_factor" in captured diff --git a/src/factorminer/factorminer/tests/test_cli_analysis.py b/src/factorminer/factorminer/tests/test_cli_analysis.py deleted file mode 100644 index 33c2c5c..0000000 --- a/src/factorminer/factorminer/tests/test_cli_analysis.py +++ /dev/null @@ -1,312 +0,0 @@ -"""Focused CLI analysis tests for evaluate, combine, and visualize.""" - -from __future__ import annotations - -from dataclasses import dataclass - -from click.testing import CliRunner -import numpy as np - -from src.factorminer.factorminer.cli import main -from src.factorminer.factorminer.core.factor_library import Factor, FactorLibrary -from src.factorminer.factorminer.core.library_io import save_library -from src.factorminer.factorminer.evaluation.runtime import DatasetSplit, FactorEvaluationArtifact - - -@dataclass -class _FakeDataset: - """Small runtime dataset stub sufficient for analysis CLI commands.""" - - asset_ids: np.ndarray - timestamps: np.ndarray - splits: dict[str, DatasetSplit] - - def get_split(self, name: str) -> DatasetSplit: - return self.splits[name] - - -def _make_stats( - ic_mean: float, - ic_abs_mean: float, - icir: float, - ic_win_rate: float, - turnover: float, -) -> dict: - return { - "ic_mean": ic_mean, - "ic_abs_mean": ic_abs_mean, - "icir": icir, - "ic_win_rate": ic_win_rate, - "turnover": turnover, - "ic_series": np.array([ic_mean, -ic_mean / 2.0, ic_mean / 3.0], dtype=np.float64), - "Q1": -0.02, - "Q2": -0.01, - "Q3": 0.0, - "Q4": 0.01, - "Q5": 0.02, - "long_short": 0.04, - "monotonicity": 1.0, - } - - -def _make_artifact( - factor_id: int, - name: str, - train_abs_ic: float, - test_abs_ic: float, -) -> FactorEvaluationArtifact: - train_signal = np.full((2, 3), float(factor_id), dtype=np.float64) - test_signal = np.full((2, 3), float(factor_id) * 10.0, dtype=np.float64) - full_signal = np.concatenate([train_signal, test_signal], axis=1) - - return FactorEvaluationArtifact( - factor_id=factor_id, - name=name, - formula="Neg($close)", - category="test", - parse_ok=True, - signals_full=full_signal, - split_signals={ - "train": train_signal, - "test": test_signal, - "full": full_signal, - }, - split_stats={ - "train": _make_stats(0.05 * factor_id, train_abs_ic, 1.0 + factor_id, 0.6, 0.1), - "test": _make_stats(-0.04 * factor_id, test_abs_ic, 0.8 + factor_id, 0.4, 0.2), - "full": _make_stats(0.01 * factor_id, max(train_abs_ic, test_abs_ic), 0.9, 0.5, 0.15), - }, - ) - - -def _make_dataset() -> _FakeDataset: - timestamps = np.array( - [ - np.datetime64("2025-01-01"), - np.datetime64("2025-01-02"), - np.datetime64("2025-01-03"), - np.datetime64("2025-01-04"), - np.datetime64("2025-01-05"), - np.datetime64("2025-01-06"), - ] - ) - returns = np.zeros((2, 3), dtype=np.float64) - return _FakeDataset( - asset_ids=np.array(["A", "B"]), - timestamps=timestamps, - splits={ - "train": DatasetSplit( - name="train", - indices=np.array([0, 1, 2]), - timestamps=timestamps[:3], - returns=returns, - ), - "test": DatasetSplit( - name="test", - indices=np.array([3, 4, 5]), - timestamps=timestamps[3:], - returns=returns, - ), - "full": DatasetSplit( - name="full", - indices=np.array([0, 1, 2, 3, 4, 5]), - timestamps=timestamps, - returns=np.zeros((2, 6), dtype=np.float64), - ), - }, - ) - - -def _save_test_library(tmp_path) -> str: - library = FactorLibrary(correlation_threshold=0.5, ic_threshold=0.04) - library.admit_factor( - Factor( - id=0, - name="factor_one", - formula="Neg($close)", - category="test", - ic_mean=9.99, - icir=8.88, - ic_win_rate=0.99, - max_correlation=0.0, - batch_number=1, - ) - ) - library.admit_factor( - Factor( - id=0, - name="factor_two", - formula="Neg($open)", - category="test", - ic_mean=7.77, - icir=6.66, - ic_win_rate=0.95, - max_correlation=0.0, - batch_number=1, - ) - ) - base_path = tmp_path / "factor_library" - save_library(library, base_path, save_signals=False) - return str(base_path.with_suffix(".json")) - - -def test_evaluate_recomputes_and_selects_top_k_by_train_split(tmp_path, monkeypatch): - """`evaluate --period both` should use recomputed train metrics for top-k.""" - library_path = _save_test_library(tmp_path) - dataset = _make_dataset() - artifacts = [ - _make_artifact(1, "factor_one", train_abs_ic=0.20, test_abs_ic=0.90), - _make_artifact(2, "factor_two", train_abs_ic=0.70, test_abs_ic=0.10), - ] - - monkeypatch.setattr( - "src.factorminer.factorminer.cli._load_runtime_dataset_for_analysis", - lambda cfg, data_path, mock: dataset, - ) - monkeypatch.setattr( - "src.factorminer.factorminer.cli._recompute_analysis_artifacts", - lambda library, dataset_arg, signal_failure_policy: artifacts, - ) - - runner = CliRunner() - result = runner.invoke( - main, - [ - "--cpu", - "--output-dir", - str(tmp_path / "out"), - "evaluate", - library_path, - "--mock", - "--period", - "both", - "--top-k", - "1", - ], - ) - - assert result.exit_code == 0, result.output - assert "Evaluating top 1 factors by train |IC| for train/test comparison" in result.output - assert "factor_two" in result.output - assert "factor_one" not in result.output - assert "0.7000" in result.output - assert "9.9900" not in result.output - assert "Decay summary (train -> test)" in result.output - - -def test_combine_uses_fit_split_for_factor_preselection(tmp_path, monkeypatch): - """`combine` should pre-select factors by fit split rather than eval split.""" - library_path = _save_test_library(tmp_path) - dataset = _make_dataset() - artifacts = [ - _make_artifact(1, "factor_one", train_abs_ic=0.20, test_abs_ic=0.90), - _make_artifact(2, "factor_two", train_abs_ic=0.70, test_abs_ic=0.10), - ] - captured_factor_ids: list[int] = [] - - monkeypatch.setattr( - "src.factorminer.factorminer.cli._load_runtime_dataset_for_analysis", - lambda cfg, data_path, mock: dataset, - ) - monkeypatch.setattr( - "src.factorminer.factorminer.cli._recompute_analysis_artifacts", - lambda library, dataset_arg, signal_failure_policy: artifacts, - ) - - def _capture_equal_weight(self, factor_signals): - captured_factor_ids.extend(sorted(factor_signals.keys())) - return next(iter(factor_signals.values())) - - monkeypatch.setattr( - "src.factorminer.factorminer.evaluation.combination.FactorCombiner.equal_weight", - _capture_equal_weight, - ) - monkeypatch.setattr( - "src.factorminer.factorminer.evaluation.portfolio.PortfolioBacktester.quintile_backtest", - lambda self, combined_signal, returns, transaction_cost_bps=0: { - "ic_mean": 0.12, - "icir": 1.23, - "ls_return": 0.04, - "monotonicity": 1.0, - "avg_turnover": 0.10, - }, - ) - - runner = CliRunner() - result = runner.invoke( - main, - [ - "--cpu", - "--output-dir", - str(tmp_path / "out"), - "combine", - library_path, - "--mock", - "--fit-period", - "train", - "--eval-period", - "test", - "--method", - "equal-weight", - "--top-k", - "1", - ], - ) - - assert result.exit_code == 0, result.output - assert "Pre-selected top 1 factors by train |IC|" in result.output - assert "Fit split: train" in result.output - assert "Eval split: test" in result.output - assert captured_factor_ids == [2] - - -def test_visualize_defaults_factor_specific_plots_to_split_top_factor(tmp_path, monkeypatch): - """`visualize` should default factor-specific plots to the split top factor.""" - library_path = _save_test_library(tmp_path) - dataset = _make_dataset() - artifacts = [ - _make_artifact(1, "factor_one", train_abs_ic=0.80, test_abs_ic=0.20), - _make_artifact(2, "factor_two", train_abs_ic=0.30, test_abs_ic=0.90), - ] - ic_paths: list[str] = [] - quintile_paths: list[str] = [] - - monkeypatch.setattr( - "src.factorminer.factorminer.cli._load_runtime_dataset_for_analysis", - lambda cfg, data_path, mock: dataset, - ) - monkeypatch.setattr( - "src.factorminer.factorminer.cli._recompute_analysis_artifacts", - lambda library, dataset_arg, signal_failure_policy: artifacts, - ) - monkeypatch.setattr( - "src.factorminer.factorminer.utils.visualization.plot_ic_timeseries", - lambda ic_series, dates, rolling_window=21, title="", save_path=None: ic_paths.append(save_path), - ) - monkeypatch.setattr( - "src.factorminer.factorminer.utils.visualization.plot_quintile_returns", - lambda quintile_returns, title="", save_path=None: quintile_paths.append(save_path), - ) - - runner = CliRunner() - result = runner.invoke( - main, - [ - "--cpu", - "--output-dir", - str(tmp_path / "viz"), - "visualize", - library_path, - "--mock", - "--period", - "test", - "--ic-timeseries", - "--quintile", - ], - ) - - assert result.exit_code == 0, result.output - assert "Defaulted to factor #2 factor_two for factor-specific plots." in result.output - assert ic_paths and all("factor_2" in path for path in ic_paths) - assert quintile_paths and all("factor_2" in path for path in quintile_paths) - assert not any("factor_1" in path for path in ic_paths + quintile_paths) diff --git a/src/factorminer/factorminer/tests/test_cli_helix.py b/src/factorminer/factorminer/tests/test_cli_helix.py deleted file mode 100644 index 761a1dc..0000000 --- a/src/factorminer/factorminer/tests/test_cli_helix.py +++ /dev/null @@ -1,142 +0,0 @@ -"""CLI tests for the Helix command.""" - -from __future__ import annotations - -import json - -from click.testing import CliRunner -import numpy as np -import pandas as pd - -from src.factorminer.factorminer.cli import _build_core_mining_config, _prepare_data_arrays, main -from src.factorminer.factorminer.utils.config import load_config - - -def test_helix_cli_runs_with_mock_data(tmp_path): - """The helix command should execute end-to-end and save a library.""" - output_dir = tmp_path / "helix-output" - runner = CliRunner() - - result = runner.invoke( - main, - [ - "--cpu", - "--output-dir", - str(output_dir), - "helix", - "--mock", - "-n", - "1", - "-b", - "5", - "-t", - "3", - ], - ) - - assert result.exit_code == 0, result.output - assert "Starting Helix Loop..." in result.output - assert "Helix mining complete!" in result.output - - library_path = output_dir / "factor_library.json" - assert library_path.exists() - - payload = json.loads(library_path.read_text()) - assert "factors" in payload - - -def test_helix_cli_reports_enabled_features(tmp_path): - """Explicit feature flags should be reflected in the CLI output.""" - output_dir = tmp_path / "helix-flags" - runner = CliRunner() - - result = runner.invoke( - main, - [ - "--cpu", - "--output-dir", - str(output_dir), - "helix", - "--mock", - "--debate", - "--canonicalize", - "-n", - "1", - "-b", - "4", - "-t", - "2", - ], - ) - - assert result.exit_code == 0, result.output - assert "Active Phase 2 features: debate, canonicalization" in result.output - - -def test_prepare_data_arrays_builds_full_factor_feature_surface(): - """The CLI tensor builder should expose the paper's canonical features.""" - df = pd.DataFrame( - [ - { - "datetime": "2025-01-01 09:30:00", - "asset_id": "A", - "open": 10.0, - "high": 11.0, - "low": 9.0, - "close": 10.0, - "volume": 2.0, - "amount": 20.0, - }, - { - "datetime": "2025-01-01 09:40:00", - "asset_id": "A", - "open": 10.0, - "high": 12.0, - "low": 9.5, - "close": 11.0, - "volume": 2.0, - "amount": 22.0, - }, - { - "datetime": "2025-01-01 09:30:00", - "asset_id": "B", - "open": 20.0, - "high": 21.0, - "low": 19.0, - "close": 20.0, - "volume": 4.0, - "amount": 80.0, - }, - { - "datetime": "2025-01-01 09:40:00", - "asset_id": "B", - "open": 20.0, - "high": 22.0, - "low": 19.5, - "close": 18.0, - "volume": 4.0, - "amount": 72.0, - }, - ] - ) - df["datetime"] = pd.to_datetime(df["datetime"]) - - data_tensor, forward_returns = _prepare_data_arrays(df) - - assert data_tensor.shape == (2, 2, 8) - np.testing.assert_allclose(data_tensor[:, :, 6], np.array([[10.0, 11.0], [20.0, 18.0]])) - assert np.isnan(data_tensor[0, 0, 7]) - np.testing.assert_allclose(data_tensor[:, 1, 7], np.array([0.1, -0.1])) - assert np.isnan(forward_returns[0, 1]) - np.testing.assert_allclose(forward_returns[:, 0], np.array([0.1, -0.1])) - - -def test_mock_mining_config_uses_synthetic_signal_failures(tmp_path): - """Mock mining flows should bypass strict benchmark recomputation defaults.""" - cfg = load_config() - - normal_config = _build_core_mining_config(cfg, tmp_path / "normal", mock=False) - mock_config = _build_core_mining_config(cfg, tmp_path / "mock", mock=True) - - assert normal_config.signal_failure_policy == "reject" - assert mock_config.signal_failure_policy == "synthetic" diff --git a/src/factorminer/factorminer/tests/test_combination.py b/src/factorminer/factorminer/tests/test_combination.py deleted file mode 100644 index ebf2d8f..0000000 --- a/src/factorminer/factorminer/tests/test_combination.py +++ /dev/null @@ -1,531 +0,0 @@ -"""Tests for factor combination and selection strategies.""" - -from __future__ import annotations - -import numpy as np -import pytest - -from src.factorminer.factorminer.evaluation.combination import FactorCombiner -from src.factorminer.factorminer.evaluation.selection import FactorSelector - - -# --------------------------------------------------------------------------- -# Fixtures -# --------------------------------------------------------------------------- - -@pytest.fixture -def combiner(): - return FactorCombiner() - - -@pytest.fixture -def rng(): - return np.random.default_rng(42) - - -@pytest.fixture -def simple_signals(rng): - """Three factor signals of shape (T=50, N=20).""" - T, N = 50, 20 - return { - 1: rng.normal(0, 1, (T, N)), - 2: rng.normal(0, 1, (T, N)), - 3: rng.normal(0, 1, (T, N)), - } - - -@pytest.fixture -def identical_signals(rng): - """Two identical factor signals.""" - T, N = 30, 10 - sig = rng.normal(0, 1, (T, N)) - return {1: sig.copy(), 2: sig.copy()} - - -# --------------------------------------------------------------------------- -# Equal weight -# --------------------------------------------------------------------------- - -class TestEqualWeight: - """Test equal-weight combination.""" - - def test_output_shape(self, combiner, simple_signals): - result = combiner.equal_weight(simple_signals) - T, N = next(iter(simple_signals.values())).shape - assert result.shape == (T, N) - - def test_single_factor_is_zscore(self, combiner, rng): - T, N = 30, 10 - sig = rng.normal(5, 2, (T, N)) - signals = {1: sig} - result = combiner.equal_weight(signals) - # Should be z-scored: mean ~0 per row - row_means = np.nanmean(result, axis=1) - np.testing.assert_array_almost_equal(row_means, np.zeros(T), decimal=10) - - def test_two_identical_factors(self, combiner, identical_signals): - result = combiner.equal_weight(identical_signals) - # Average of two identical z-scored signals = same z-scored signal - single = combiner.equal_weight({1: identical_signals[1]}) - np.testing.assert_array_almost_equal(result, single, decimal=10) - - def test_empty_raises(self, combiner): - with pytest.raises(ValueError, match="not be empty"): - combiner.equal_weight({}) - - def test_result_is_average(self, combiner, rng): - """EW of multiple factors should be the average of their z-scores.""" - T, N = 20, 10 - s1 = np.ones((T, N)) # Constant -> z-score = 0 - s2 = np.tile(np.arange(N, dtype=np.float64), (T, 1)) # Variable - signals = {1: s1, 2: s2} - result = combiner.equal_weight(signals) - # s1 z-score = 0 everywhere (constant), s2 z-score is not 0 - # Average should be s2_zscore / 2 - s2_zscore = combiner._cross_sectional_standardize(s2) - # s1_zscore is 0 (constant cross-section, std=0 -> std=1 fallback) - s1_zscore = combiner._cross_sectional_standardize(s1) - expected = np.nanmean(np.stack([s1_zscore, s2_zscore]), axis=0) - np.testing.assert_array_almost_equal(result, expected) - - -# --------------------------------------------------------------------------- -# IC-weighted -# --------------------------------------------------------------------------- - -class TestICWeighted: - """Test IC-weighted combination.""" - - def test_output_shape(self, combiner, simple_signals): - ic_values = {1: 0.05, 2: 0.08, 3: 0.03} - result = combiner.ic_weighted(simple_signals, ic_values) - T, N = next(iter(simple_signals.values())).shape - assert result.shape == (T, N) - - def test_higher_ic_gets_more_weight(self, combiner, rng): - T, N = 30, 15 - s1 = rng.normal(0, 1, (T, N)) - s2 = rng.normal(0, 1, (T, N)) - signals = {1: s1, 2: s2} - - # Give all weight to factor 1 - ic_values_1 = {1: 1.0, 2: 0.0001} - result_1 = combiner.ic_weighted(signals, ic_values_1) - - # Give all weight to factor 2 - ic_values_2 = {1: 0.0001, 2: 1.0} - result_2 = combiner.ic_weighted(signals, ic_values_2) - - # Results should be different (weighted differently) - assert not np.allclose(result_1, result_2) - - def test_fallback_to_ew_with_nonpositive_ic(self, combiner, simple_signals): - ic_values = {1: -0.01, 2: 0.0, 3: -0.05} - result = combiner.ic_weighted(simple_signals, ic_values) - # Should fall back to equal weight - ew_result = combiner.equal_weight(simple_signals) - np.testing.assert_array_almost_equal(result, ew_result) - - def test_empty_raises(self, combiner): - with pytest.raises(ValueError, match="not be empty"): - combiner.ic_weighted({}, {}) - - -# --------------------------------------------------------------------------- -# Orthogonal -# --------------------------------------------------------------------------- - -class TestOrthogonal: - """Test orthogonal (Gram-Schmidt) combination.""" - - def test_output_shape(self, combiner, simple_signals): - result = combiner.orthogonal(simple_signals) - T, N = next(iter(simple_signals.values())).shape - assert result.shape == (T, N) - - def test_single_factor(self, combiner, rng): - T, N = 20, 10 - sig = rng.normal(0, 1, (T, N)) - result = combiner.orthogonal({1: sig}) - # Single factor orthogonalized = z-scored version - zscore = combiner._cross_sectional_standardize(sig) - np.testing.assert_array_almost_equal(result, zscore) - - def test_orthogonal_different_from_ew(self, combiner, simple_signals): - ew = combiner.equal_weight(simple_signals) - ortho = combiner.orthogonal(simple_signals) - # They should generally differ (unless signals are already orthogonal) - # Check that the operation at least runs without error - assert ortho.shape == ew.shape - - def test_empty_raises(self, combiner): - with pytest.raises(ValueError, match="not be empty"): - combiner.orthogonal({}) - - -# --------------------------------------------------------------------------- -# Cross-sectional standardization helper -# --------------------------------------------------------------------------- - -class TestCrossSectionalStandardize: - """Test the internal _cross_sectional_standardize method.""" - - def test_zero_mean_per_row(self, combiner, rng): - T, N = 20, 15 - signals = rng.normal(5.0, 2.0, (T, N)) - result = combiner._cross_sectional_standardize(signals) - row_means = np.nanmean(result, axis=1) - np.testing.assert_array_almost_equal(row_means, np.zeros(T), decimal=10) - - def test_unit_std_per_row(self, combiner, rng): - T, N = 20, 30 - signals = rng.normal(10.0, 3.0, (T, N)) - result = combiner._cross_sectional_standardize(signals) - row_stds = np.nanstd(result, axis=1) - np.testing.assert_array_almost_equal(row_stds, np.ones(T), decimal=5) - - def test_constant_row_handled(self, combiner): - signals = np.ones((5, 10)) - result = combiner._cross_sectional_standardize(signals) - # Constant row: std=0, should be 0 after standardization - np.testing.assert_array_almost_equal(result, np.zeros((5, 10))) - - def test_nan_handling(self, combiner, rng): - T, N = 10, 10 - signals = rng.normal(0, 1, (T, N)) - signals[0, 0] = np.nan - result = combiner._cross_sectional_standardize(signals) - assert np.isnan(result[0, 0]) - - -# --------------------------------------------------------------------------- -# Gram-Schmidt helper -# --------------------------------------------------------------------------- - -class TestGramSchmidt: - """Test the Gram-Schmidt orthogonalization helper.""" - - def test_orthogonal_output(self, rng): - T, N = 20, 10 - factors = [rng.normal(0, 1, (T, N)) for _ in range(3)] - ortho = FactorCombiner._gram_schmidt(factors) - assert len(ortho) == 3 - - # Check approximate orthogonality of flattened vectors - for i in range(len(ortho)): - for j in range(i + 1, len(ortho)): - vi = np.where(np.isnan(ortho[i]), 0, ortho[i]).ravel() - vj = np.where(np.isnan(ortho[j]), 0, ortho[j]).ravel() - denom = np.sqrt(np.dot(vi, vi) * np.dot(vj, vj)) - if denom > 1e-10: - cos_sim = abs(np.dot(vi, vj) / denom) - assert cos_sim < 0.01, f"Factors {i} and {j} not orthogonal: cos={cos_sim}" - - def test_single_factor(self, rng): - T, N = 10, 5 - f = [rng.normal(0, 1, (T, N))] - ortho = FactorCombiner._gram_schmidt(f) - assert len(ortho) == 1 - np.testing.assert_array_almost_equal(ortho[0], f[0]) - - def test_nan_preserved(self, rng): - T, N = 10, 5 - f1 = rng.normal(0, 1, (T, N)) - f2 = rng.normal(0, 1, (T, N)) - f1[0, 0] = np.nan - ortho = FactorCombiner._gram_schmidt([f1, f2]) - assert np.isnan(ortho[0][0, 0]) - - -# =========================================================================== -# Factor Selection Tests -# =========================================================================== - -# --------------------------------------------------------------------------- -# Fixtures for selection tests -# --------------------------------------------------------------------------- - -@pytest.fixture -def selector(): - return FactorSelector() - - -@pytest.fixture -def synthetic_factors(rng): - """Synthetic factor signals for selection tests. - - Creates 5 factors of shape (T=80, N=30) where factor 0 is predictive - (correlated with returns) and the rest are noise. - """ - T, N = 80, 30 - returns = rng.normal(0, 0.02, (T, N)) - - signals = {} - # Factor 0: predictive (signal ~ returns + noise) - signals[0] = returns + rng.normal(0, 0.01, (T, N)) - # Factors 1-4: pure noise - for i in range(1, 5): - signals[i] = rng.normal(0, 1, (T, N)) - - return signals, returns - - -@pytest.fixture -def uniform_factors(rng): - """5 factors that are all weakly predictive.""" - T, N = 60, 25 - returns = rng.normal(0, 0.02, (T, N)) - - signals = {} - for i in range(5): - signals[i] = returns * (0.5 + 0.1 * i) + rng.normal(0, 0.05, (T, N)) - - return signals, returns - - -# --------------------------------------------------------------------------- -# _prepare_panel helper tests -# --------------------------------------------------------------------------- - -class TestPreparePanel: - """Test the _prepare_panel static helper.""" - - def test_empty_returns_empty(self, selector): - ids, X, y = selector._prepare_panel({}, np.empty((10, 5))) - assert ids == [] - assert X.shape == (0, 0) - assert y.shape == (0,) - - def test_output_shapes(self, selector, rng): - T, N = 20, 10 - signals = { - 1: rng.normal(0, 1, (T, N)), - 2: rng.normal(0, 1, (T, N)), - } - returns = rng.normal(0, 1, (T, N)) - - ids, X, y = selector._prepare_panel(signals, returns) - assert ids == [1, 2] - # X should be (n_valid_samples, 2), y should be (n_valid_samples,) - assert X.shape[1] == 2 - assert X.shape[0] == y.shape[0] - assert X.shape[0] <= T * N - - def test_nan_rows_dropped(self, selector, rng): - T, N = 10, 5 - signals = {1: np.ones((T, N))} - returns = np.ones((T, N)) - # Inject NaN into one position - signals[1][0, 0] = np.nan - - ids, X, y = selector._prepare_panel(signals, returns) - assert X.shape[0] == T * N - 1 # One row dropped - - def test_ids_sorted(self, selector, rng): - T, N = 5, 3 - signals = { - 3: rng.normal(0, 1, (T, N)), - 1: rng.normal(0, 1, (T, N)), - 2: rng.normal(0, 1, (T, N)), - } - returns = rng.normal(0, 1, (T, N)) - ids, _, _ = selector._prepare_panel(signals, returns) - assert ids == [1, 2, 3] - - -# --------------------------------------------------------------------------- -# _composite_icir helper tests -# --------------------------------------------------------------------------- - -class TestCompositeICIR: - """Test the _composite_icir static helper.""" - - def test_empty_returns_zero(self, selector, rng): - T, N = 20, 10 - signals = {1: rng.normal(0, 1, (T, N))} - returns = rng.normal(0, 1, (T, N)) - assert selector._composite_icir(signals, [], returns) == 0.0 - - def test_single_factor(self, selector, rng): - T, N = 50, 20 - returns = rng.normal(0, 0.02, (T, N)) - signals = {1: returns + rng.normal(0, 0.01, (T, N))} - icir = selector._composite_icir(signals, [1], returns) - assert isinstance(icir, float) - # Predictive signal should have positive ICIR - assert icir > 0 - - def test_noise_factor_low_icir(self, selector, rng): - T, N = 50, 20 - returns = rng.normal(0, 0.02, (T, N)) - signals = {1: rng.normal(0, 1, (T, N))} - icir = selector._composite_icir(signals, [1], returns) - # Pure noise should have ICIR near zero (much lower than predictive) - assert abs(icir) < 2.0 # Loose bound, noise can have some correlation - - -# --------------------------------------------------------------------------- -# Lasso selection tests -# --------------------------------------------------------------------------- - -class TestLassoSelection: - """Test L1-regularized Lasso factor selection.""" - - def test_empty_signals_returns_empty(self, selector): - result = selector.lasso_selection({}, np.empty((10, 5))) - assert result == [] - - def test_returns_list_of_tuples(self, selector, synthetic_factors): - signals, returns = synthetic_factors - result = selector.lasso_selection(signals, returns, alpha=0.001) - assert isinstance(result, list) - for item in result: - assert isinstance(item, tuple) - assert len(item) == 2 - fid, coef = item - assert isinstance(fid, int) - assert isinstance(coef, float) - - def test_sorted_by_abs_coefficient(self, selector, synthetic_factors): - signals, returns = synthetic_factors - result = selector.lasso_selection(signals, returns, alpha=0.001) - if len(result) >= 2: - abs_coefs = [abs(c) for _, c in result] - assert abs_coefs == sorted(abs_coefs, reverse=True) - - def test_selects_predictive_factor(self, selector, synthetic_factors): - signals, returns = synthetic_factors - result = selector.lasso_selection(signals, returns, alpha=0.0001) - if result: - selected_ids = [fid for fid, _ in result] - # Factor 0 is correlated with returns; it should be selected - assert 0 in selected_ids - - def test_sparsity_with_high_alpha(self, selector, synthetic_factors): - signals, returns = synthetic_factors - result_low = selector.lasso_selection(signals, returns, alpha=0.0001) - result_high = selector.lasso_selection(signals, returns, alpha=1.0) - # Higher alpha should select fewer (or equal) factors - assert len(result_high) <= len(result_low) - - def test_auto_alpha_via_cv(self, selector, synthetic_factors): - signals, returns = synthetic_factors - # alpha=None triggers LassoCV - result = selector.lasso_selection(signals, returns, alpha=None) - assert isinstance(result, list) - - def test_nonzero_coefficients_only(self, selector, synthetic_factors): - signals, returns = synthetic_factors - result = selector.lasso_selection(signals, returns, alpha=0.001) - for _, coef in result: - assert abs(coef) > 1e-10 - - -# --------------------------------------------------------------------------- -# Forward stepwise selection tests -# --------------------------------------------------------------------------- - -class TestForwardStepwise: - """Test greedy forward stepwise factor selection.""" - - def test_empty_signals_returns_empty(self, selector): - result = selector.forward_stepwise({}, np.empty((10, 5))) - assert result == [] - - def test_returns_list_of_tuples(self, selector, synthetic_factors): - signals, returns = synthetic_factors - result = selector.forward_stepwise(signals, returns, max_factors=3) - assert isinstance(result, list) - for item in result: - assert isinstance(item, tuple) - assert len(item) == 2 - - def test_respects_max_factors(self, selector, synthetic_factors): - signals, returns = synthetic_factors - result = selector.forward_stepwise(signals, returns, max_factors=2) - assert len(result) <= 2 - - def test_selection_order(self, selector, synthetic_factors): - signals, returns = synthetic_factors - result = selector.forward_stepwise(signals, returns, max_factors=5) - # Each entry should have positive delta (ICIR improvement) - for _, delta in result: - assert delta > 0 - - def test_no_duplicate_selections(self, selector, synthetic_factors): - signals, returns = synthetic_factors - result = selector.forward_stepwise(signals, returns, max_factors=5) - selected_ids = [fid for fid, _ in result] - assert len(selected_ids) == len(set(selected_ids)) - - def test_predictive_factor_selected_first(self, selector, synthetic_factors): - signals, returns = synthetic_factors - result = selector.forward_stepwise(signals, returns, max_factors=5) - if result: - # Factor 0 is highly predictive; should likely be selected first - first_id = result[0][0] - assert first_id == 0 - - def test_single_factor_available(self, selector, rng): - T, N = 40, 15 - returns = rng.normal(0, 0.02, (T, N)) - signals = {42: returns + rng.normal(0, 0.01, (T, N))} - result = selector.forward_stepwise(signals, returns, max_factors=5) - assert len(result) <= 1 - if result: - assert result[0][0] == 42 - - -# --------------------------------------------------------------------------- -# XGBoost selection tests -# --------------------------------------------------------------------------- - -class TestXGBoostSelection: - """Test XGBoost importance-based factor selection.""" - - def test_empty_signals_returns_empty(self, selector): - result = selector.xgboost_selection({}, np.empty((10, 5))) - assert result == [] - - def test_returns_all_factors(self, selector, synthetic_factors): - signals, returns = synthetic_factors - result = selector.xgboost_selection(signals, returns) - # XGBoost returns importance for all factors - assert len(result) == len(signals) - - def test_returns_list_of_tuples(self, selector, synthetic_factors): - signals, returns = synthetic_factors - result = selector.xgboost_selection(signals, returns) - for item in result: - assert isinstance(item, tuple) - assert len(item) == 2 - fid, importance = item - assert isinstance(fid, int) - assert isinstance(importance, float) - - def test_sorted_by_importance(self, selector, synthetic_factors): - signals, returns = synthetic_factors - result = selector.xgboost_selection(signals, returns) - importances = [imp for _, imp in result] - assert importances == sorted(importances, reverse=True) - - def test_importances_nonnegative(self, selector, synthetic_factors): - signals, returns = synthetic_factors - result = selector.xgboost_selection(signals, returns) - for _, importance in result: - assert importance >= 0.0 - - def test_predictive_factor_has_high_importance(self, selector, synthetic_factors): - signals, returns = synthetic_factors - result = selector.xgboost_selection(signals, returns) - if result: - # Factor 0 is predictive; it should have the highest importance - top_id = result[0][0] - assert top_id == 0 - - def test_importances_sum_to_one(self, selector, synthetic_factors): - signals, returns = synthetic_factors - result = selector.xgboost_selection(signals, returns) - total = sum(imp for _, imp in result) - # Gain importances from XGBoost should sum to ~1 - assert total == pytest.approx(1.0, abs=0.05) diff --git a/src/factorminer/factorminer/tests/test_data.py b/src/factorminer/factorminer/tests/test_data.py deleted file mode 100644 index a294f41..0000000 --- a/src/factorminer/factorminer/tests/test_data.py +++ /dev/null @@ -1,258 +0,0 @@ -"""Tests for the data pipeline: mock data generation, preprocessing, tensor building.""" - -from __future__ import annotations - -import numpy as np -import pandas as pd -import pytest - -from src.factorminer.factorminer.data.loader import load_market_data -from src.factorminer.factorminer.data.mock_data import MockConfig, generate_mock_data, generate_with_halts - - -# --------------------------------------------------------------------------- -# Mock data generation -# --------------------------------------------------------------------------- - -class TestMockDataGeneration: - """Test the synthetic market data generator.""" - - @pytest.fixture - def small_config(self): - return MockConfig( - num_assets=10, - num_periods=100, - frequency="1d", - seed=42, - ) - - @pytest.fixture - def small_df(self, small_config): - return generate_mock_data(small_config) - - def test_returns_dataframe(self, small_df): - assert isinstance(small_df, pd.DataFrame) - - def test_required_columns(self, small_df): - for col in ["datetime", "asset_id", "open", "high", "low", "close", "volume", "amount"]: - assert col in small_df.columns, f"Missing column: {col}" - - def test_correct_shape(self, small_config, small_df): - expected_rows = small_config.num_assets * small_config.num_periods - assert len(small_df) == expected_rows - - def test_unique_assets(self, small_config, small_df): - n_unique = small_df["asset_id"].nunique() - assert n_unique == small_config.num_assets - - def test_periods_per_asset(self, small_config, small_df): - counts = small_df.groupby("asset_id").size() - assert (counts == small_config.num_periods).all() - - -# --------------------------------------------------------------------------- -# OHLC consistency -# --------------------------------------------------------------------------- - -class TestOHLCConsistency: - """Test that generated data maintains OHLC invariants.""" - - @pytest.fixture - def df(self): - config = MockConfig(num_assets=20, num_periods=200, seed=123) - return generate_mock_data(config) - - def test_low_le_high(self, df): - assert (df["low"] <= df["high"] + 1e-8).all(), "Found low > high" - - def test_open_within_range(self, df): - assert (df["open"] >= df["low"] - 1e-8).all(), "Found open < low" - assert (df["open"] <= df["high"] + 1e-8).all(), "Found open > high" - - def test_close_within_range(self, df): - assert (df["close"] >= df["low"] - 1e-8).all(), "Found close < low" - assert (df["close"] <= df["high"] + 1e-8).all(), "Found close > high" - - def test_positive_prices(self, df): - for col in ["open", "high", "low", "close"]: - assert (df[col] > 0).all(), f"Found non-positive {col}" - - def test_positive_volume(self, df): - assert (df["volume"] >= 0).all(), "Found negative volume" - - def test_positive_amount(self, df): - assert (df["amount"] >= 0).all(), "Found negative amount" - - -# --------------------------------------------------------------------------- -# Trading halts -# --------------------------------------------------------------------------- - -class TestHaltGeneration: - """Test synthetic data with trading halts.""" - - def test_generate_with_halts(self): - config = MockConfig(num_assets=10, num_periods=100, seed=42) - df = generate_with_halts(config, halt_fraction=0.05) - assert isinstance(df, pd.DataFrame) - # Should have some zero-volume bars - assert (df["volume"] == 0).any() - - def test_halt_bars_have_flat_ohlc(self): - config = MockConfig(num_assets=10, num_periods=100, seed=42) - df = generate_with_halts(config, halt_fraction=0.05) - halted = df[df["volume"] == 0] - if len(halted) > 0: - # Open = High = Low = Close for halted bars - np.testing.assert_array_almost_equal(halted["open"], halted["close"]) - np.testing.assert_array_almost_equal(halted["high"], halted["close"]) - np.testing.assert_array_almost_equal(halted["low"], halted["close"]) - - -# --------------------------------------------------------------------------- -# Different frequencies -# --------------------------------------------------------------------------- - -class TestFrequencies: - """Test data generation at different frequencies.""" - - @pytest.mark.parametrize("freq", ["10min", "30min", "1h", "1d"]) - def test_frequency(self, freq): - config = MockConfig(num_assets=5, num_periods=50, frequency=freq, seed=42) - df = generate_mock_data(config) - assert len(df) > 0 - assert "datetime" in df.columns - - -# --------------------------------------------------------------------------- -# MockConfig defaults -# --------------------------------------------------------------------------- - -class TestMockConfig: - """Test MockConfig defaults and overrides.""" - - def test_default_config(self): - config = MockConfig() - assert config.num_assets == 50 - assert config.num_periods == 1000 - assert config.frequency == "10min" - assert config.seed == 42 - - def test_config_with_universe(self): - config = MockConfig(num_assets=5, num_periods=20, universe="CSI300") - df = generate_mock_data(config) - assert "universe" in df.columns - assert (df["universe"] == "CSI300").all() - - def test_config_no_planted_alpha(self): - config = MockConfig(num_assets=5, num_periods=20, plant_alpha=False) - df = generate_mock_data(config) - assert len(df) > 0 - - -# --------------------------------------------------------------------------- -# Feature computation (basic checks with preprocessor if available) -# --------------------------------------------------------------------------- - -class TestFeatureComputation: - """Test derived feature computation.""" - - def test_vwap_computable(self): - config = MockConfig(num_assets=5, num_periods=50, seed=42) - df = generate_mock_data(config) - # VWAP can be approximated from high, low, close - vwap = (df["high"] + df["low"] + df["close"]) / 3 - assert len(vwap) == len(df) - assert (vwap > 0).all() - - def test_returns_computable(self): - config = MockConfig(num_assets=5, num_periods=50, seed=42) - df = generate_mock_data(config) - # Returns per asset - df = df.sort_values(["asset_id", "datetime"]) - df["returns"] = df.groupby("asset_id")["close"].pct_change() - # First bar per asset should be NaN - first_bar_per_asset = df.groupby("asset_id").head(1) - assert first_bar_per_asset["returns"].isna().all() - # Rest should be finite - rest = df.dropna(subset=["returns"]) - assert np.isfinite(rest["returns"]).all() - - -# --------------------------------------------------------------------------- -# Tensor builder integration -# --------------------------------------------------------------------------- - -class TestTensorBuilder: - """Test tensor construction from mock data (if modules available).""" - - def test_build_pipeline_import(self): - """Verify we can import the tensor builder.""" - from factorminer.data.tensor_builder import TensorConfig, build_tensor - config = TensorConfig() - assert config.backend == "numpy" - assert "close" in config.features - - def test_temporal_split_import(self): - """Verify temporal_split is importable.""" - from factorminer.data.tensor_builder import temporal_split - assert callable(temporal_split) - - -# --------------------------------------------------------------------------- -# Loader schema compatibility -# --------------------------------------------------------------------------- - -class TestLoaderSchemaCompatibility: - """Test common market-data schema variants accepted by the loader.""" - - def test_accepts_common_column_aliases(self, tmp_path): - path = tmp_path / "alias_data.csv" - df = pd.DataFrame( - { - "timestamp": pd.to_datetime( - ["2025-01-01 09:30:00", "2025-01-01 09:40:00"] - ), - "code": ["600519.SH", "600519.SH"], - "open": [10.0, 10.2], - "high": [10.3, 10.4], - "low": [9.9, 10.1], - "close": [10.1, 10.3], - "volume": [1000.0, 1200.0], - "amt": [10100.0, 12360.0], - } - ) - df.to_csv(path, index=False) - - loaded = load_market_data(path) - - assert list(loaded.columns[:8]) == [ - "datetime", - "asset_id", - "open", - "high", - "low", - "close", - "volume", - "amount", - ] - assert loaded.loc[0, "asset_id"] == "600519.SH" - assert loaded.loc[1, "amount"] == 12360.0 - - def test_missing_asset_id_still_raises_clear_error(self, tmp_path): - path = tmp_path / "missing_asset_id.csv" - df = pd.DataFrame( - { - "datetime": pd.to_datetime(["2025-01-01 09:30:00"]), - "open": [10.0], - "high": [10.3], - "low": [9.9], - "close": [10.1], - "volume": [1000.0], - "amount": [10100.0], - } - ) - df.to_csv(path, index=False) - - with pytest.raises(ValueError, match="missing required columns: \\['asset_id'\\]"): - load_market_data(path) diff --git a/src/factorminer/factorminer/tests/test_debate.py b/src/factorminer/factorminer/tests/test_debate.py deleted file mode 100644 index e6fee03..0000000 --- a/src/factorminer/factorminer/tests/test_debate.py +++ /dev/null @@ -1,229 +0,0 @@ -"""Tests for the multi-agent debate orchestrator (agent/debate.py).""" - -from __future__ import annotations - -import pytest - -from src.factorminer.factorminer.agent.critic import CriticAgent -from src.factorminer.factorminer.agent.debate import DebateConfig, DebateGenerator -from src.factorminer.factorminer.agent.llm_interface import MockProvider -from src.factorminer.factorminer.agent.output_parser import CandidateFactor -from src.factorminer.factorminer.agent.prompt_builder import PromptBuilder -from src.factorminer.factorminer.agent.specialists import ( - SpecialistConfig, - SpecialistPromptBuilder, -) - - -# ----------------------------------------------------------------------- -# SpecialistConfig and SpecialistPromptBuilder -# ----------------------------------------------------------------------- - -def test_specialist_config_creation(): - cfg = SpecialistConfig( - name="test_spec", - domain="testing domain", - preferred_operators=["CsRank", "Neg"], - preferred_features=["$close"], - temperature=0.7, - ) - assert cfg.name == "test_spec" - assert "CsRank" in cfg.preferred_operators - - -def test_specialist_prompt_builder_inherits(): - """SpecialistPromptBuilder should be a subclass of PromptBuilder.""" - assert issubclass(SpecialistPromptBuilder, PromptBuilder) - - -def test_specialist_prompt_builder_creates(): - cfg = SpecialistConfig( - name="momentum", - domain="trend-following", - preferred_operators=["Delta"], - preferred_features=["$close"], - system_prompt_suffix="Focus on momentum.", - ) - pb = SpecialistPromptBuilder(specialist_config=cfg) - assert "SPECIALIST DOMAIN DIRECTIVE" in pb.system_prompt - assert "Focus on momentum." in pb.system_prompt - - -@pytest.fixture -def helix_memory_signal(): - return { - "prompt_text": ( - "Prefer library-adjacent structures.\n" - "Avoid saturated price-only motifs." - ), - "complementary_patterns": [ - "Combine TsRank momentum with liquidity normalization.", - ], - "conflict_warnings": [ - "Price-volume reversal cluster is saturated.", - ], - "operator_cooccurrence": [ - "TsRank + CsRank", - ], - "semantic_gaps": [ - "VWAP-driven dispersion factors", - ], - } - - -@pytest.fixture -def prompt_library_state(): - return { - "size": 12, - "target_size": 110, - } - - -def _assert_helix_retrieval_sections(prompt: str) -> None: - assert "## HELIX RETRIEVAL SUMMARY" in prompt - assert "Prefer library-adjacent structures." in prompt - assert "Avoid saturated price-only motifs." in prompt - assert "## COMPLEMENTARY PATTERNS" in prompt - assert "Combine TsRank momentum with liquidity normalization." in prompt - assert "## SATURATION WARNINGS" in prompt - assert "Price-volume reversal cluster is saturated." in prompt - assert "## OPERATOR CO-OCCURRENCE PRIORS" in prompt - assert "TsRank + CsRank" in prompt - assert "## SEMANTIC GAPS" in prompt - assert "Underused but promising: VWAP-driven dispersion factors" in prompt - - -def test_prompt_builder_renders_helix_retrieval_fields( - helix_memory_signal, - prompt_library_state, -): - pb = PromptBuilder() - - prompt = pb.build_user_prompt( - memory_signal=helix_memory_signal, - library_state=prompt_library_state, - batch_size=5, - ) - - _assert_helix_retrieval_sections(prompt) - - -def test_specialist_prompt_builder_renders_helix_retrieval_fields( - helix_memory_signal, - prompt_library_state, -): - cfg = SpecialistConfig( - name="momentum", - domain="trend-following", - preferred_operators=["Delta", "TsRank"], - preferred_features=["$close", "$returns"], - system_prompt_suffix="Focus on momentum.", - ) - pb = SpecialistPromptBuilder(specialist_config=cfg) - - prompt = pb.build_user_prompt( - memory_signal=helix_memory_signal, - library_state=prompt_library_state, - batch_size=5, - ) - - _assert_helix_retrieval_sections(prompt) - assert "## SPECIALIST FOCUS" in prompt - assert "trend-following specialist" in prompt - - -# ----------------------------------------------------------------------- -# CriticAgent with MockProvider -# ----------------------------------------------------------------------- - -def test_critic_agent_with_mock(): - """CriticAgent should produce scores when given proposals.""" - provider = MockProvider() - critic = CriticAgent(llm_provider=provider) - - candidates = [ - CandidateFactor(name="f1", formula="Neg($close)", category="test"), - CandidateFactor(name="f2", formula="CsRank($volume)", category="test"), - ] - proposals = {"test_specialist": candidates} - - scores = critic.review_candidates( - proposals=proposals, - library_state={"size": 0}, - memory_signal={}, - ) - # Should return scores (fallback uniform if parsing fails) - assert len(scores) >= 2 - assert all(hasattr(s, "final_score") for s in scores) - - -# ----------------------------------------------------------------------- -# DebateGenerator.generate_batch returns List[CandidateFactor] -# ----------------------------------------------------------------------- - -def test_debate_generator_returns_candidates(): - provider = MockProvider() - gen = DebateGenerator( - llm_provider=provider, - debate_config=DebateConfig( - enable_critic=False, - candidates_per_specialist=5, - ), - ) - result = gen.generate_batch(batch_size=10) - assert isinstance(result, list) - # Should have some candidates (specialists produce them) - assert len(result) > 0 - assert all(isinstance(c, CandidateFactor) for c in result) - - -# ----------------------------------------------------------------------- -# DebateGenerator with critic produces non-empty results -# ----------------------------------------------------------------------- - -def test_debate_generator_with_critic(): - provider = MockProvider() - gen = DebateGenerator( - llm_provider=provider, - debate_config=DebateConfig( - enable_critic=True, - candidates_per_specialist=5, - top_k_after_critic=10, - ), - ) - result = gen.generate_batch(batch_size=10) - assert isinstance(result, list) - assert len(result) > 0 - - -def test_debate_generator_accepts_dict_recent_admissions(): - provider = MockProvider() - gen = DebateGenerator( - llm_provider=provider, - debate_config=DebateConfig( - enable_critic=True, - candidates_per_specialist=2, - top_k_after_critic=6, - ), - ) - - result = gen.generate_batch( - batch_size=6, - library_state={ - "recent_admissions": [ - { - "id": 7, - "name": "volatilityminer_factor_2", - "category": "VWAP", - }, - { - "id": 8, - "name": "regimeminer_factor_2", - "category": "Amount", - }, - ] - }, - ) - - assert isinstance(result, list) - assert len(result) > 0 diff --git a/src/factorminer/factorminer/tests/test_evaluation.py b/src/factorminer/factorminer/tests/test_evaluation.py deleted file mode 100644 index 0030398..0000000 --- a/src/factorminer/factorminer/tests/test_evaluation.py +++ /dev/null @@ -1,287 +0,0 @@ -"""Tests for the evaluation metrics pipeline.""" - -from __future__ import annotations - -import numpy as np -import pytest - -from src.factorminer.factorminer.evaluation.metrics import ( - compute_factor_stats, - compute_ic, - compute_ic_mean, - compute_ic_win_rate, - compute_icir, - compute_pairwise_correlation, - compute_quintile_returns, - compute_turnover, -) - - -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - -@pytest.fixture -def rng(): - return np.random.default_rng(123) - - -@pytest.fixture -def perfect_signal(rng): - """Signal perfectly correlated with returns -> IC should be ~1.0.""" - M, T = 50, 60 - returns = rng.normal(0, 0.01, (M, T)) - signals = returns.copy() # Perfect correlation - return signals, returns - - -@pytest.fixture -def random_signal(rng): - """Random signal independent of returns -> IC should be ~0.""" - M, T = 50, 80 - returns = rng.normal(0, 0.01, (M, T)) - signals = rng.normal(0, 1.0, (M, T)) # Independent - return signals, returns - - -@pytest.fixture -def known_quintile_signal(rng): - """Signal where high-signal assets have high returns.""" - M, T = 100, 50 - signals = np.tile(np.arange(M, dtype=np.float64).reshape(M, 1), (1, T)) - # Returns correlated with signal rank - returns = signals * 0.001 + rng.normal(0, 0.001, (M, T)) - return signals, returns - - -# --------------------------------------------------------------------------- -# IC computation -# --------------------------------------------------------------------------- - -class TestIC: - """Test Information Coefficient computation.""" - - def test_perfect_signal_ic_near_one(self, perfect_signal): - signals, returns = perfect_signal - ic_series = compute_ic(signals, returns) - valid = ic_series[~np.isnan(ic_series)] - assert len(valid) > 0 - # Perfect correlation should give IC close to 1.0 - mean_ic = np.mean(valid) - assert mean_ic > 0.9, f"Expected IC > 0.9, got {mean_ic}" - - def test_random_signal_ic_near_zero(self, random_signal): - signals, returns = random_signal - ic_series = compute_ic(signals, returns) - valid = ic_series[~np.isnan(ic_series)] - assert len(valid) > 0 - # Random signal should give IC near 0 - mean_ic = np.mean(np.abs(valid)) - assert mean_ic < 0.2, f"Expected |IC| < 0.2, got {mean_ic}" - - def test_ic_shape(self, perfect_signal): - signals, returns = perfect_signal - ic_series = compute_ic(signals, returns) - assert ic_series.shape == (signals.shape[1],) - - def test_ic_with_nans(self, rng): - M, T = 30, 20 - signals = rng.normal(0, 1, (M, T)) - returns = rng.normal(0, 0.01, (M, T)) - # Inject NaNs - signals[0, :] = np.nan - signals[:, 0] = np.nan - ic_series = compute_ic(signals, returns) - assert ic_series.shape == (T,) - - def test_ic_too_few_assets_returns_nan(self): - # Only 3 assets (below threshold of 5) - signals = np.array([[1, 2], [3, 4], [5, 6]], dtype=np.float64) - returns = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]], dtype=np.float64) - ic_series = compute_ic(signals, returns) - assert np.all(np.isnan(ic_series)) - - -# --------------------------------------------------------------------------- -# ICIR computation -# --------------------------------------------------------------------------- - -class TestICIR: - """Test ICIR = mean(IC) / std(IC).""" - - def test_icir_positive_for_good_signal(self, rng): - # Use a signal that is correlated but not perfectly, so IC has variance - M, T = 50, 80 - returns = rng.normal(0, 0.01, (M, T)) - signals = returns + rng.normal(0, 0.005, (M, T)) # Noisy correlation - ic_series = compute_ic(signals, returns) - icir = compute_icir(ic_series) - assert icir > 0, f"Expected positive ICIR, got {icir}" - - def test_icir_near_zero_for_random(self, random_signal): - signals, returns = random_signal - ic_series = compute_ic(signals, returns) - icir = compute_icir(ic_series) - # Random signal: ICIR should be small in magnitude - assert abs(icir) < 2.0, f"Expected small ICIR, got {icir}" - - def test_icir_with_few_valid_points(self): - ic_series = np.array([np.nan, np.nan, 0.05]) - icir = compute_icir(ic_series) - # Only 1 valid point -> returns 0.0 - assert icir == 0.0 - - def test_icir_constant_ic_returns_zero(self): - ic_series = np.array([0.05, 0.05, 0.05, 0.05]) - icir = compute_icir(ic_series) - # std = 0 -> returns 0.0 - assert icir == 0.0 - - -# --------------------------------------------------------------------------- -# IC-derived statistics -# --------------------------------------------------------------------------- - -class TestICStats: - """Test IC mean and win rate.""" - - def test_ic_mean_absolute(self): - ic_series = np.array([0.1, -0.05, 0.08, -0.03, np.nan]) - result = compute_ic_mean(ic_series) - expected = np.mean(np.abs([0.1, 0.05, 0.08, 0.03])) - np.testing.assert_almost_equal(result, expected) - - def test_ic_win_rate(self): - ic_series = np.array([0.1, -0.05, 0.08, -0.03, 0.02, np.nan]) - result = compute_ic_win_rate(ic_series) - # 3 positive out of 5 valid - np.testing.assert_almost_equal(result, 0.6) - - def test_ic_mean_all_nan(self): - ic_series = np.array([np.nan, np.nan, np.nan]) - assert compute_ic_mean(ic_series) == 0.0 - - def test_ic_win_rate_all_nan(self): - ic_series = np.array([np.nan, np.nan]) - assert compute_ic_win_rate(ic_series) == 0.0 - - -# --------------------------------------------------------------------------- -# Pairwise correlation -# --------------------------------------------------------------------------- - -class TestPairwiseCorrelation: - """Test pairwise cross-sectional correlation.""" - - def test_identical_signals_correlation_one(self, rng): - M, T = 30, 40 - signals = rng.normal(0, 1, (M, T)) - corr = compute_pairwise_correlation(signals, signals) - assert corr > 0.95, f"Expected corr > 0.95 for identical, got {corr}" - - def test_independent_signals_low_correlation(self, rng): - M, T = 50, 60 - a = rng.normal(0, 1, (M, T)) - b = rng.normal(0, 1, (M, T)) - corr = compute_pairwise_correlation(a, b) - assert abs(corr) < 0.3, f"Expected low corr, got {corr}" - - def test_negatively_correlated(self, rng): - M, T = 30, 40 - a = rng.normal(0, 1, (M, T)) - b = -a # Perfectly negatively correlated - corr = compute_pairwise_correlation(a, b) - assert corr < -0.95, f"Expected corr < -0.95, got {corr}" - - def test_correlation_with_nans(self, rng): - M, T = 30, 20 - a = rng.normal(0, 1, (M, T)) - b = rng.normal(0, 1, (M, T)) - a[:5, :] = np.nan - corr = compute_pairwise_correlation(a, b) - # Should still produce a valid number - assert np.isfinite(corr) - - -# --------------------------------------------------------------------------- -# Quintile returns -# --------------------------------------------------------------------------- - -class TestQuintileReturns: - """Test quintile return computation.""" - - def test_quintile_keys(self, known_quintile_signal): - signals, returns = known_quintile_signal - result = compute_quintile_returns(signals, returns) - assert "Q1" in result - assert "Q5" in result - assert "long_short" in result - assert "monotonicity" in result - - def test_quintile_monotonic_for_known_signal(self, known_quintile_signal): - signals, returns = known_quintile_signal - result = compute_quintile_returns(signals, returns) - # With positively correlated signal, Q5 > Q1 - assert result["long_short"] > 0, ( - f"Expected positive long_short, got {result['long_short']}" - ) - # Monotonicity should be positive - assert result["monotonicity"] > 0.5, ( - f"Expected high monotonicity, got {result['monotonicity']}" - ) - - def test_quintile_returns_shape(self, rng): - M, T = 20, 30 - signals = rng.normal(0, 1, (M, T)) - returns = rng.normal(0, 0.01, (M, T)) - result = compute_quintile_returns(signals, returns, n_quantiles=5) - # Should have Q1..Q5 plus long_short and monotonicity - assert len(result) == 7 - - -# --------------------------------------------------------------------------- -# Turnover -# --------------------------------------------------------------------------- - -class TestTurnover: - """Test portfolio turnover computation.""" - - def test_constant_signal_zero_turnover(self): - M, T = 20, 10 - signals = np.tile(np.arange(M, dtype=np.float64).reshape(M, 1), (1, T)) - turnover = compute_turnover(signals, top_fraction=0.2) - assert turnover == 0.0 - - def test_random_signal_positive_turnover(self, rng): - M, T = 30, 50 - signals = rng.normal(0, 1, (M, T)) - turnover = compute_turnover(signals, top_fraction=0.2) - assert 0 <= turnover <= 1.0 - - -# --------------------------------------------------------------------------- -# Comprehensive factor stats -# --------------------------------------------------------------------------- - -class TestFactorStats: - """Test the compute_factor_stats wrapper.""" - - def test_factor_stats_keys(self, rng): - M, T = 30, 40 - signals = rng.normal(0, 1, (M, T)) - returns = rng.normal(0, 0.01, (M, T)) - stats = compute_factor_stats(signals, returns) - assert "ic_mean" in stats - assert "icir" in stats - assert "ic_win_rate" in stats - assert "Q1" in stats - assert "long_short" in stats - assert "turnover" in stats - assert "ic_series" in stats - - def test_factor_stats_ic_series_shape(self, rng): - M, T = 20, 30 - signals = rng.normal(0, 1, (M, T)) - returns = rng.normal(0, 0.01, (M, T)) - stats = compute_factor_stats(signals, returns) - assert stats["ic_series"].shape == (T,) diff --git a/src/factorminer/factorminer/tests/test_expression_tree.py b/src/factorminer/factorminer/tests/test_expression_tree.py deleted file mode 100644 index 1462bb7..0000000 --- a/src/factorminer/factorminer/tests/test_expression_tree.py +++ /dev/null @@ -1,307 +0,0 @@ -"""Tests for the expression tree and parser modules.""" - -from __future__ import annotations - -import numpy as np -import pytest - -from src.factorminer.factorminer.core.parser import parse, try_parse, tokenize -from src.factorminer.factorminer.core.expression_tree import ( - ConstantNode, - ExpressionTree, - LeafNode, - OperatorNode, -) -from src.factorminer.factorminer.core.types import OPERATOR_REGISTRY, get_operator - - -# --------------------------------------------------------------------------- -# Parsing simple formulas -# --------------------------------------------------------------------------- - -class TestParseSimple: - """Test parsing of basic single-operator formulas.""" - - def test_parse_neg_close(self): - tree = parse("Neg($close)") - assert tree.to_string() == "Neg($close)" - - def test_parse_add_open_close(self): - tree = parse("Add($open, $close)") - assert tree.to_string() == "Add($open, $close)" - - def test_parse_leaf_only(self): - tree = parse("$close") - assert tree.to_string() == "$close" - assert tree.depth() == 1 - assert tree.size() == 1 - - def test_parse_constant(self): - tree = parse("0.0001") - assert tree.depth() == 1 - - def test_parse_div_with_two_features(self): - tree = parse("Div($high, $low)") - assert tree.to_string() == "Div($high, $low)" - - def test_parse_sub(self): - tree = parse("Sub($close, $open)") - assert tree.to_string() == "Sub($close, $open)" - - def test_parse_operator_with_window(self): - tree = parse("Mean($close, 20)") - assert tree.to_string() == "Mean($close, 20)" - - def test_parse_ema_with_window(self): - tree = parse("EMA($close, 10)") - assert tree.to_string() == "EMA($close, 10)" - - -# --------------------------------------------------------------------------- -# Parsing complex nested formulas from the paper -# --------------------------------------------------------------------------- - -class TestParseComplex: - """Test parsing of complex nested formulas (paper factors).""" - - def test_factor_006(self): - """Neg(Div(Sub($close, $vwap), $vwap))""" - formula = "Neg(Div(Sub($close, $vwap), $vwap))" - tree = parse(formula) - assert tree.to_string() == formula - - def test_factor_002(self): - """Neg(Div(Sub($close, EMA($close, 10)), EMA($close, 18)))""" - formula = "Neg(Div(Sub($close, EMA($close, 10)), EMA($close, 18)))" - tree = parse(formula) - assert tree.to_string() == formula - - def test_factor_046_ifelse(self): - """Complex IfElse with Greater, Std, Mean, Neg, CsRank, Delta, Div, Sub, Add.""" - formula = ( - "IfElse(Greater(Std($returns, 12), Mean(Std($returns, 12), 48)), " - "Neg(CsRank(Delta($close, 3))), " - "Neg(CsRank(Div(Sub($close, $low), Add(Sub($high, $low), 0.0001)))))" - ) - tree = parse(formula) - roundtrip = tree.to_string() - # Parse roundtrip should also succeed - tree2 = parse(roundtrip) - assert tree2.to_string() == roundtrip - - def test_nested_csrank_corr(self): - formula = "CsRank(Corr($close, $volume, 20))" - tree = parse(formula) - assert tree.to_string() == formula - - def test_deeply_nested(self): - formula = "CsRank(Neg(Div(Sub($close, Mean($close, 20)), Std($close, 20))))" - tree = parse(formula) - assert tree.to_string() == formula - - -# --------------------------------------------------------------------------- -# Roundtrip: parse -> to_string -> parse -# --------------------------------------------------------------------------- - -class TestRoundtrip: - """Test that parse -> to_string -> parse produces identical trees.""" - - @pytest.mark.parametrize( - "formula", - [ - "Neg($close)", - "Add($open, $close)", - "Div(Sub($close, $vwap), $vwap)", - "Mean($close, 20)", - "EMA($close, 10)", - "CsRank(Std($returns, 12))", - "IfElse(Greater($close, $open), $high, $low)", - ], - ) - def test_roundtrip(self, formula): - tree1 = parse(formula) - s1 = tree1.to_string() - tree2 = parse(s1) - s2 = tree2.to_string() - assert s1 == s2 - - -# --------------------------------------------------------------------------- -# Expression tree evaluation with mock data -# --------------------------------------------------------------------------- - -class TestEvaluate: - """Test evaluate on known inputs.""" - - def test_neg_evaluate(self, small_data): - tree = parse("Neg($close)") - result = tree.evaluate(small_data) - np.testing.assert_array_almost_equal(result, -small_data["$close"]) - - def test_add_evaluate(self, small_data): - tree = parse("Add($open, $close)") - result = tree.evaluate(small_data) - expected = small_data["$open"] + small_data["$close"] - np.testing.assert_array_almost_equal(result, expected) - - def test_sub_evaluate(self, small_data): - tree = parse("Sub($close, $open)") - result = tree.evaluate(small_data) - expected = small_data["$close"] - small_data["$open"] - np.testing.assert_array_almost_equal(result, expected) - - def test_div_evaluate(self, small_data): - tree = parse("Div($high, $low)") - result = tree.evaluate(small_data) - assert result.shape == small_data["$high"].shape - # Should be positive since high > low - valid = ~np.isnan(result) & (result != 0) - assert np.all(result[valid] > 0) - - def test_constant_in_formula(self, small_data): - tree = parse("Add($close, 0.0001)") - result = tree.evaluate(small_data) - # The constant becomes a ConstantNode, which is treated as a trailing - # parameter if arity is 2 and it becomes the second child. - assert result.shape == small_data["$close"].shape - - def test_nested_evaluate_shape(self, small_data): - tree = parse("Neg(Div(Sub($close, $vwap), $vwap))") - result = tree.evaluate(small_data) - assert result.shape == small_data["$close"].shape - - -# --------------------------------------------------------------------------- -# Tree depth and size -# --------------------------------------------------------------------------- - -class TestTreeStructure: - """Test depth() and size() computations.""" - - def test_leaf_depth(self): - tree = parse("$close") - assert tree.depth() == 1 - - def test_leaf_size(self): - tree = parse("$close") - assert tree.size() == 1 - - def test_unary_depth(self): - tree = parse("Neg($close)") - assert tree.depth() == 2 - - def test_unary_size(self): - tree = parse("Neg($close)") - assert tree.size() == 2 - - def test_binary_depth(self): - tree = parse("Add($open, $close)") - assert tree.depth() == 2 - - def test_binary_size(self): - tree = parse("Add($open, $close)") - assert tree.size() == 3 # Add + $open + $close - - def test_nested_depth(self): - tree = parse("Neg(Div(Sub($close, $vwap), $vwap))") - assert tree.depth() == 4 # Neg -> Div -> Sub -> $close - - def test_nested_size(self): - tree = parse("Neg(Div(Sub($close, $vwap), $vwap))") - assert tree.size() == 6 # Neg, Div, Sub, $close, $vwap, $vwap - - def test_leaf_features(self): - tree = parse("Neg(Div(Sub($close, $vwap), $vwap))") - feats = tree.leaf_features() - assert feats == ["$close", "$vwap"] - - def test_clone_preserves_structure(self): - tree = parse("Add($open, $close)") - cloned = tree.clone() - assert cloned.to_string() == tree.to_string() - assert cloned.depth() == tree.depth() - assert cloned.size() == tree.size() - - -# --------------------------------------------------------------------------- -# Error handling -# --------------------------------------------------------------------------- - -class TestErrorHandling: - """Test that invalid inputs raise appropriate errors.""" - - def test_unknown_operator(self): - with pytest.raises(SyntaxError, match="Unknown operator"): - parse("FooBar($close)") - - def test_unknown_feature(self): - with pytest.raises(SyntaxError, match="Unknown feature"): - parse("Neg($foobar)") - - def test_wrong_arity_too_few(self): - with pytest.raises(SyntaxError, match="expects"): - parse("Add($close)") - - def test_wrong_arity_too_many_nodes(self): - # Neg expects 1 expression arg; passing 2 should fail - with pytest.raises(SyntaxError): - parse("Neg($close, $open)") - - def test_empty_string(self): - with pytest.raises((SyntaxError, IndexError)): - parse("") - - def test_unmatched_paren(self): - with pytest.raises(SyntaxError): - parse("Neg($close") - - def test_trailing_content(self): - with pytest.raises(SyntaxError, match="Unexpected trailing"): - parse("Neg($close) extra") - - def test_try_parse_returns_none_on_failure(self): - assert try_parse("InvalidOp($close)") is None - assert try_parse("") is None - - def test_try_parse_returns_tree_on_success(self): - result = try_parse("Neg($close)") - assert result is not None - assert result.to_string() == "Neg($close)" - - def test_missing_feature_in_data(self, small_data): - tree = parse("Neg($close)") - data_missing = {k: v for k, v in small_data.items() if k != "$close"} - with pytest.raises(KeyError, match="\\$close"): - tree.evaluate(data_missing) - - -# --------------------------------------------------------------------------- -# Tokenizer -# --------------------------------------------------------------------------- - -class TestTokenizer: - """Test the tokenizer separately.""" - - def test_simple_tokens(self): - tokens = tokenize("Neg($close)") - types = [t.type.name for t in tokens] - assert types == ["IDENT", "LPAREN", "FEATURE", "RPAREN", "EOF"] - - def test_number_token(self): - tokens = tokenize("0.0001") - assert tokens[0].type.name == "NUMBER" - assert tokens[0].value == "0.0001" - - def test_negative_number_token(self): - tokens = tokenize("Mean($close, -3)") - # -3 should be a number token after comma - num_tokens = [t for t in tokens if t.type.name == "NUMBER"] - assert len(num_tokens) == 1 - assert num_tokens[0].value == "-3" - - def test_whitespace_handling(self): - tokens = tokenize(" Add( $open , $close ) ") - ident_tokens = [t for t in tokens if t.type.name == "IDENT"] - assert len(ident_tokens) == 1 - assert ident_tokens[0].value == "Add" diff --git a/src/factorminer/factorminer/tests/test_helix_loop.py b/src/factorminer/factorminer/tests/test_helix_loop.py deleted file mode 100644 index 54ffe10..0000000 --- a/src/factorminer/factorminer/tests/test_helix_loop.py +++ /dev/null @@ -1,251 +0,0 @@ -"""Tests for the Helix Loop (core/helix_loop.py).""" - -from __future__ import annotations - -import numpy as np -import pytest - -try: - from factorminer.core.helix_loop import HelixLoop - HAS_HELIX = True -except ImportError: - HAS_HELIX = False - -from src.factorminer.factorminer.agent.llm_interface import MockProvider -from src.factorminer.factorminer.core.factor_library import Factor, FactorLibrary -from src.factorminer.factorminer.core.config import MiningConfig -from src.factorminer.factorminer.core.ralph_loop import EvaluationResult - - -pytestmark = pytest.mark.skipif(not HAS_HELIX, reason="helix_loop not yet built") - - -@pytest.fixture -def rng(): - return np.random.default_rng(42) - - -@pytest.fixture -def small_tensor(rng): - """Small data tensor and returns for HelixLoop tests.""" - M, T, F = 10, 50, 3 - data = rng.normal(0, 1, (M, T, F)) - close = 100.0 + np.cumsum(rng.normal(0, 0.5, (M, T)), axis=1) - returns = np.zeros((M, T)) - returns[:, 1:] = np.diff(close, axis=1) / close[:, :-1] - return data, returns - - -# ----------------------------------------------------------------------- -# HelixLoop can be instantiated with all defaults -# ----------------------------------------------------------------------- - -def test_helix_loop_instantiates_with_defaults(small_tensor): - """HelixLoop with all features off should be instantiable.""" - data, returns = small_tensor - config = MiningConfig(target_library_size=5, max_iterations=1) - provider = MockProvider() - - loop = HelixLoop( - config=config, - data_tensor=data, - returns=returns, - llm_provider=provider, - canonicalize=False, - enable_knowledge_graph=False, - enable_auto_inventor=False, - ) - assert loop is not None - - -# ----------------------------------------------------------------------- -# HelixLoop with canonicalize=True -# ----------------------------------------------------------------------- - -def test_helix_loop_canonicalize_flag(small_tensor): - """HelixLoop with canonicalize=True should initialize the canonicalizer.""" - data, returns = small_tensor - config = MiningConfig(target_library_size=5, max_iterations=1) - provider = MockProvider() - - loop = HelixLoop( - config=config, - data_tensor=data, - returns=returns, - llm_provider=provider, - canonicalize=True, - ) - assert loop._canonicalize is True - - -# ----------------------------------------------------------------------- -# HelixLoop with MockProvider runs 1 iteration -# ----------------------------------------------------------------------- - -def test_helix_loop_runs_one_iteration(small_tensor): - """HelixLoop should complete 1 iteration without error using MockProvider.""" - data, returns = small_tensor - config = MiningConfig( - target_library_size=3, - max_iterations=1, - batch_size=5, - ) - provider = MockProvider() - - loop = HelixLoop( - config=config, - data_tensor=data, - returns=returns, - llm_provider=provider, - canonicalize=False, - enable_knowledge_graph=False, - enable_auto_inventor=False, - ) - # Run the loop -- should not raise - loop.run() - assert loop.library is not None - - -def test_phase2_revocation_updates_stats_and_library_state(small_tensor): - """Post-admission revocation should keep stats aligned with library state.""" - data, returns = small_tensor - config = MiningConfig( - target_library_size=3, - max_iterations=1, - batch_size=5, - ic_threshold=0.0001, - correlation_threshold=0.95, - ) - provider = MockProvider() - - loop = HelixLoop( - config=config, - data_tensor=data, - returns=returns, - llm_provider=provider, - canonicalize=False, - enable_knowledge_graph=False, - enable_auto_inventor=False, - ) - - original_validate = loop._helix_validate - - def force_one_revocation(results, admitted_results): - rejected = original_validate(results, admitted_results) - for admitted in admitted_results: - if admitted.admitted: - loop._revoke_admission(admitted, results, "forced test revocation") - return rejected + 1 - return rejected - - loop._helix_validate = force_one_revocation - - stats = loop._run_iteration(batch_size=5) - - assert stats["admitted"] == loop.library.size - if loop.library.correlation_matrix is not None: - assert loop.library.correlation_matrix.shape[0] == loop.library.size - - -def test_revoke_admission_rebuilds_library_indices(small_tensor): - """Revoking a factor should rebuild the library correlation bookkeeping.""" - data, returns = small_tensor - config = MiningConfig(target_library_size=5, max_iterations=1) - provider = MockProvider() - - loop = HelixLoop( - config=config, - data_tensor=data, - returns=returns, - llm_provider=provider, - canonicalize=False, - enable_knowledge_graph=False, - enable_auto_inventor=False, - ) - - factor_a = Factor( - id=0, - name="factor_a", - formula="Mean($close, 5)", - category="test", - ic_mean=0.1, - icir=1.0, - ic_win_rate=0.6, - max_correlation=0.0, - batch_number=1, - signals=np.ones_like(returns), - ) - factor_b = Factor( - id=0, - name="factor_b", - formula="Std($close, 5)", - category="test", - ic_mean=0.08, - icir=0.9, - ic_win_rate=0.55, - max_correlation=0.1, - batch_number=1, - signals=np.full_like(returns, 2.0), - ) - - loop.library.admit_factor(factor_a) - loop.library.admit_factor(factor_b) - - result = EvaluationResult( - factor_name="factor_a", - formula="Mean($close, 5)", - admitted=True, - ) - loop._revoke_admission(result, [], "forced test revocation") - - assert loop.library.size == 1 - assert list(loop.library.factors.values())[0].name == "factor_b" - assert loop.library._id_to_index == {list(loop.library.factors.keys())[0]: 0} - assert loop.library.correlation_matrix is not None - assert loop.library.correlation_matrix.shape == (1, 1) - - -def test_helix_embedding_screen_filters_library_duplicates(small_tensor): - """Embedding-aware synthesis should drop near-duplicates of admitted factors.""" - data, returns = small_tensor - config = MiningConfig(target_library_size=5, max_iterations=1) - provider = MockProvider() - - library = FactorLibrary(correlation_threshold=0.95, ic_threshold=0.0001) - library.admit_factor( - Factor( - id=0, - name="existing_factor", - formula="Mean($close, 5)", - category="test", - ic_mean=0.1, - icir=1.0, - ic_win_rate=0.6, - max_correlation=0.0, - batch_number=0, - signals=np.ones_like(returns), - ) - ) - - loop = HelixLoop( - config=config, - data_tensor=data, - returns=returns, - llm_provider=provider, - library=library, - canonicalize=False, - enable_embeddings=True, - enable_knowledge_graph=False, - enable_auto_inventor=False, - ) - - deduped, canon_dupes, semantic_dupes = loop._canonicalize_and_dedup( - [ - ("dup_factor", "Mean($close, 5)"), - ("novel_factor", "Std($close, 5)"), - ] - ) - - assert canon_dupes == 0 - assert semantic_dupes == 1 - assert deduped == [("novel_factor", "Std($close, 5)")] diff --git a/src/factorminer/factorminer/tests/test_knowledge_graph.py b/src/factorminer/factorminer/tests/test_knowledge_graph.py deleted file mode 100644 index adce2db..0000000 --- a/src/factorminer/factorminer/tests/test_knowledge_graph.py +++ /dev/null @@ -1,166 +0,0 @@ -"""Tests for the factor knowledge graph (memory/knowledge_graph.py).""" - -from __future__ import annotations - -import pytest - -from src.factorminer.factorminer.memory.knowledge_graph import ( - EdgeType, - FactorKnowledgeGraph, - FactorNode, -) - - -# ----------------------------------------------------------------------- -# Basic node and edge operations -# ----------------------------------------------------------------------- - -def test_add_factor(): - kg = FactorKnowledgeGraph() - node = FactorNode( - factor_id="f1", - formula="CsRank($close)", - ic_mean=0.05, - operators=["CsRank"], - features=["$close"], - admitted=True, - ) - kg.add_factor(node) - assert kg.get_factor_count() == 1 - # Operator node should also be created - assert kg.get_edge_count() >= 1 - assert kg.get_factor_node("f1") is not None - - -def test_list_factor_nodes_filters_admitted(): - kg = FactorKnowledgeGraph() - kg.add_factor(FactorNode( - factor_id="f1", formula="CsRank($close)", operators=["CsRank"], admitted=True, - )) - kg.add_factor(FactorNode( - factor_id="f2", formula="Neg($volume)", operators=["Neg"], admitted=False, - )) - - admitted_ids = [node.factor_id for node in kg.list_factor_nodes(admitted_only=True)] - all_ids = [node.factor_id for node in kg.list_factor_nodes()] - - assert admitted_ids == ["f1"] - assert set(all_ids) == {"f1", "f2"} - - -def test_add_correlation_edge(): - kg = FactorKnowledgeGraph() - kg.add_factor(FactorNode(factor_id="f1", formula="A", operators=["CsRank"])) - kg.add_factor(FactorNode(factor_id="f2", formula="B", operators=["Neg"])) - - # Below threshold -> no edge - kg.add_correlation_edge("f1", "f2", rho=0.3, threshold=0.4) - initial_edges = kg.get_edge_count() - - # Above threshold -> edge added (bidirectional = 2 edges) - kg.add_correlation_edge("f1", "f2", rho=0.6, threshold=0.4) - assert kg.get_edge_count() >= initial_edges + 2 - - -# ----------------------------------------------------------------------- -# find_saturated_regions -# ----------------------------------------------------------------------- - -def test_find_saturated_regions(): - kg = FactorKnowledgeGraph() - for i in range(3): - kg.add_factor(FactorNode( - factor_id=f"f{i}", formula=f"Op{i}($close)", operators=[f"Op{i}"], - )) - # High correlation between f0 and f1 - kg.add_correlation_edge("f0", "f1", rho=0.8, threshold=0.4) - # Low correlation with f2 - kg.add_correlation_edge("f0", "f2", rho=0.2, threshold=0.4) - - regions = kg.find_saturated_regions(threshold=0.5) - assert len(regions) >= 1 - # f0 and f1 should be in the same cluster - found = any({"f0", "f1"}.issubset(r) for r in regions) - assert found - - -# ----------------------------------------------------------------------- -# find_complementary_patterns -# ----------------------------------------------------------------------- - -def test_find_complementary_patterns(): - kg = FactorKnowledgeGraph() - kg.add_factor(FactorNode( - factor_id="f1", formula="CsRank($close)", operators=["CsRank"], - )) - kg.add_factor(FactorNode( - factor_id="f2", formula="Neg($volume)", operators=["Neg"], - )) - # Connect them via a shared operator node (indirectly) - # f1 uses CsRank, f2 uses Neg -- different operators - # Add a derivation edge so they are reachable - kg.add_derivation_edge("f2", "f1", mutation_type="test") - - complementary = kg.find_complementary_patterns("f1", max_hops=2) - # f2 uses a different operator set and is not correlated -> complementary - assert "f2" in complementary - - -# ----------------------------------------------------------------------- -# Serialization roundtrip -# ----------------------------------------------------------------------- - -def test_save_load_roundtrip(): - kg = FactorKnowledgeGraph() - kg.add_factor(FactorNode( - factor_id="f1", formula="CsRank($close)", - ic_mean=0.05, operators=["CsRank"], admitted=True, - )) - kg.add_factor(FactorNode( - factor_id="f2", formula="Neg($volume)", - operators=["Neg"], admitted=True, - )) - kg.add_correlation_edge("f1", "f2", rho=0.5) - - data = kg.to_dict() - kg2 = FactorKnowledgeGraph.from_dict(data) - assert kg2.get_factor_count() == 2 - assert kg2.get_edge_count() == kg.get_edge_count() - - -def test_remove_factor_prunes_graph_state(): - kg = FactorKnowledgeGraph() - kg.add_factor(FactorNode( - factor_id="f1", - formula="CsRank(Neg($close))", - operators=["CsRank", "Neg"], - features=["$close"], - admitted=True, - )) - - assert kg.remove_factor("f1") is True - assert kg.get_factor_count() == 0 - assert kg.get_factor_node("f1") is None - assert kg.get_edge_count() == 0 - assert kg.remove_factor("f1") is False - - -# ----------------------------------------------------------------------- -# get_operator_cooccurrence -# ----------------------------------------------------------------------- - -def test_get_operator_cooccurrence(): - kg = FactorKnowledgeGraph() - kg.add_factor(FactorNode( - factor_id="f1", formula="CsRank(Neg($close))", - operators=["CsRank", "Neg"], admitted=True, - )) - kg.add_factor(FactorNode( - factor_id="f2", formula="CsRank(Mean($close, 10))", - operators=["CsRank", "Mean"], admitted=True, - )) - - cooc = kg.get_operator_cooccurrence() - assert ("CsRank", "Neg") in cooc - assert ("CsRank", "Mean") in cooc - assert cooc[("CsRank", "Neg")] == 1 diff --git a/src/factorminer/factorminer/tests/test_library.py b/src/factorminer/factorminer/tests/test_library.py deleted file mode 100644 index bc9eab9..0000000 --- a/src/factorminer/factorminer/tests/test_library.py +++ /dev/null @@ -1,356 +0,0 @@ -"""Tests for the factor library management system.""" - -from __future__ import annotations - -import numpy as np -import pytest - -from src.factorminer.factorminer.core.factor_library import Factor, FactorLibrary - - -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - -@pytest.fixture -def rng(): - return np.random.default_rng(42) - - -@pytest.fixture -def empty_library(): - return FactorLibrary(correlation_threshold=0.5, ic_threshold=0.04) - - -def _make_factor( - name="test", - formula="Neg($close)", - ic=0.06, - signals=None, - rng=None, - M=20, - T=60, -): - """Helper to create a Factor with random signals.""" - if signals is None and rng is not None: - signals = rng.normal(0, 1, (M, T)) - return Factor( - id=0, - name=name, - formula=formula, - category="test", - ic_mean=ic, - icir=1.0, - ic_win_rate=0.6, - max_correlation=0.0, - batch_number=1, - signals=signals, - ) - - -# --------------------------------------------------------------------------- -# Admission -# --------------------------------------------------------------------------- - -class TestAdmission: - """Test factor admission rules.""" - - def test_admit_first_factor(self, empty_library, rng): - factor = _make_factor(name="f1", ic=0.05, rng=rng) - fid = empty_library.admit_factor(factor) - assert fid == 1 - assert empty_library.size == 1 - assert factor.id == 1 - - def test_admit_assigns_incremental_ids(self, empty_library, rng): - f1 = _make_factor(name="f1", rng=rng) - f2 = _make_factor(name="f2", rng=rng) - id1 = empty_library.admit_factor(f1) - id2 = empty_library.admit_factor(f2) - assert id1 == 1 - assert id2 == 2 - - def test_check_admission_ic_below_threshold(self, empty_library, rng): - signals = rng.normal(0, 1, (20, 60)) - admitted, reason = empty_library.check_admission(0.03, signals) - assert not admitted - assert "below threshold" in reason - - def test_check_admission_first_factor(self, empty_library, rng): - signals = rng.normal(0, 1, (20, 60)) - admitted, reason = empty_library.check_admission(0.05, signals) - assert admitted - assert "First factor" in reason - - def test_check_admission_rejects_high_correlation(self, rng): - lib = FactorLibrary(correlation_threshold=0.5, ic_threshold=0.04) - # Add a factor - f1 = _make_factor(name="f1", rng=rng) - lib.admit_factor(f1) - - # Try to admit same signals (correlation = 1.0) - admitted, reason = lib.check_admission(0.05, f1.signals) - assert not admitted - assert "correlation" in reason.lower() - - def test_check_admission_accepts_low_correlation(self, rng): - lib = FactorLibrary(correlation_threshold=0.5, ic_threshold=0.04) - f1 = _make_factor(name="f1", rng=rng) - lib.admit_factor(f1) - - # Independent signals - independent_signals = rng.normal(0, 1, (20, 60)) - admitted, reason = lib.check_admission(0.05, independent_signals) - assert admitted - - def test_factor_in_library_after_admission(self, empty_library, rng): - factor = _make_factor(name="f1", rng=rng) - fid = empty_library.admit_factor(factor) - assert fid in empty_library.factors - retrieved = empty_library.get_factor(fid) - assert retrieved.name == "f1" - - -# --------------------------------------------------------------------------- -# Replacement -# --------------------------------------------------------------------------- - -class TestReplacement: - """Test the replacement mechanism (Eq. 11).""" - - def test_replacement_ic_below_floor(self, rng): - lib = FactorLibrary(correlation_threshold=0.5, ic_threshold=0.04) - f1 = _make_factor(name="f1", ic=0.06, rng=rng) - lib.admit_factor(f1) - - signals = rng.normal(0, 1, (20, 60)) - should, fid, reason = lib.check_replacement(0.05, signals) # Below 0.10 - assert not should - assert "below replacement floor" in reason - - def test_replacement_needs_exactly_one_correlated(self, rng): - lib = FactorLibrary(correlation_threshold=0.5, ic_threshold=0.04) - f1 = _make_factor(name="f1", ic=0.06, rng=rng) - lib.admit_factor(f1) - - # Independent signals -> 0 correlated factors - signals = rng.normal(0, 1, (20, 60)) - should, fid, reason = lib.check_replacement(0.15, signals) - assert not should - assert "0 correlated" in reason - - def test_replacement_success(self, rng): - lib = FactorLibrary(correlation_threshold=0.5, ic_threshold=0.04) - f1_signals = rng.normal(0, 1, (20, 60)) - f1 = _make_factor(name="f1", ic=0.06, signals=f1_signals) - lib.admit_factor(f1) - - # Candidate highly correlated with f1 but much better IC - candidate_signals = f1_signals + rng.normal(0, 0.1, (20, 60)) - should, old_id, reason = lib.check_replacement( - 0.15, candidate_signals, ic_min=0.10, ic_ratio=1.3 - ) - assert should - assert old_id == 1 - - def test_replace_factor(self, rng): - lib = FactorLibrary(correlation_threshold=0.5, ic_threshold=0.04) - f1 = _make_factor(name="old_factor", ic=0.06, rng=rng) - fid = lib.admit_factor(f1) - - new_factor = _make_factor(name="new_factor", ic=0.15, rng=rng) - lib.replace_factor(fid, new_factor) - - assert fid not in lib.factors - assert lib.size == 1 - remaining = lib.list_factors() - assert remaining[0].name == "new_factor" - - def test_replace_nonexistent_raises(self, empty_library, rng): - new_factor = _make_factor(name="new", rng=rng) - with pytest.raises(KeyError): - empty_library.replace_factor(999, new_factor) - - -# --------------------------------------------------------------------------- -# Correlation matrix -# --------------------------------------------------------------------------- - -class TestCorrelationMatrix: - """Test correlation matrix management.""" - - def test_matrix_initialized_on_first_admit(self, empty_library, rng): - f = _make_factor(name="f1", rng=rng) - empty_library.admit_factor(f) - assert empty_library.correlation_matrix is not None - assert empty_library.correlation_matrix.shape == (1, 1) - - def test_matrix_grows_with_admissions(self, empty_library, rng): - for i in range(3): - f = _make_factor(name=f"f{i}", rng=rng) - empty_library.admit_factor(f) - assert empty_library.correlation_matrix.shape == (3, 3) - - def test_matrix_symmetric(self, rng): - lib = FactorLibrary() - for i in range(4): - f = _make_factor(name=f"f{i}", rng=rng) - lib.admit_factor(f) - mat = lib.correlation_matrix - np.testing.assert_array_almost_equal(mat, mat.T) - - def test_update_correlation_matrix_full(self, rng): - lib = FactorLibrary() - for i in range(3): - f = _make_factor(name=f"f{i}", rng=rng) - lib.admit_factor(f) - # Full recompute - lib.update_correlation_matrix() - assert lib.correlation_matrix.shape == (3, 3) - np.testing.assert_array_almost_equal( - lib.correlation_matrix, lib.correlation_matrix.T - ) - - def test_compute_correlation_same_signals(self, rng): - lib = FactorLibrary() - signals = rng.normal(0, 1, (20, 60)) - corr = lib.compute_correlation(signals, signals) - assert corr > 0.95 - - -# --------------------------------------------------------------------------- -# Queries and diagnostics -# --------------------------------------------------------------------------- - -class TestQueries: - """Test library query methods.""" - - def test_size_property(self, mock_library): - assert mock_library.size == 3 - - def test_list_factors(self, mock_library): - factors = mock_library.list_factors() - assert len(factors) == 3 - # Should be sorted by ID - ids = [f.id for f in factors] - assert ids == sorted(ids) - - def test_get_factor(self, mock_library): - factors = mock_library.list_factors() - fid = factors[0].id - f = mock_library.get_factor(fid) - assert f.id == fid - - def test_get_factor_nonexistent_raises(self, mock_library): - with pytest.raises(KeyError): - mock_library.get_factor(9999) - - def test_get_factors_by_category(self, mock_library): - result = mock_library.get_factors_by_category("test") - assert len(result) == 3 - - def test_get_factors_by_nonexistent_category(self, mock_library): - result = mock_library.get_factors_by_category("nonexistent") - assert len(result) == 0 - - def test_get_diagnostics(self, mock_library): - diag = mock_library.get_diagnostics() - assert "size" in diag - assert diag["size"] == 3 - assert "avg_correlation" in diag - assert "max_correlation" in diag - assert "category_counts" in diag - assert "saturation" in diag - - def test_get_state_summary(self, mock_library): - summary = mock_library.get_state_summary() - assert "library_size" in summary - assert summary["library_size"] == 3 - assert "categories" in summary - assert "recent_admissions" in summary - - -# --------------------------------------------------------------------------- -# Factor serialization -# --------------------------------------------------------------------------- - -class TestFactorSerialization: - """Test Factor to_dict / from_dict.""" - - def test_factor_to_dict(self): - f = Factor( - id=1, - name="test", - formula="Neg($close)", - category="momentum", - ic_mean=0.06, - icir=1.0, - ic_win_rate=0.6, - max_correlation=0.1, - batch_number=1, - admission_date="2024-01-01 00:00:00", - ) - d = f.to_dict() - assert d["id"] == 1 - assert d["name"] == "test" - assert d["formula"] == "Neg($close)" - assert "signals" not in d - - def test_factor_from_dict(self): - d = { - "id": 2, - "name": "restored", - "formula": "Add($open, $close)", - "category": "arithmetic", - "ic_mean": 0.08, - "icir": 1.2, - "ic_win_rate": 0.65, - "max_correlation": 0.2, - "batch_number": 3, - "admission_date": "2024-06-15 12:00:00", - } - f = Factor.from_dict(d) - assert f.id == 2 - assert f.name == "restored" - assert f.formula == "Add($open, $close)" - - def test_factor_roundtrip(self): - f = Factor( - id=5, - name="roundtrip", - formula="CsRank($close)", - category="cross_sectional", - ic_mean=0.07, - icir=0.9, - ic_win_rate=0.58, - max_correlation=0.15, - batch_number=2, - ) - restored = Factor.from_dict(f.to_dict()) - assert restored.name == f.name - assert restored.ic_mean == f.ic_mean - assert restored.formula == f.formula - - def test_factor_roundtrip_preserves_provenance(self): - f = Factor( - id=6, - name="with_provenance", - formula="Neg($close)", - category="test", - ic_mean=0.05, - icir=0.9, - ic_win_rate=0.55, - max_correlation=0.1, - batch_number=2, - provenance={ - "run_id": "run_001", - "generator_family": "MockProvider", - "candidate_rank": 2, - }, - ) - - restored = Factor.from_dict(f.to_dict()) - - assert restored.provenance["run_id"] == "run_001" - assert restored.provenance["generator_family"] == "MockProvider" - assert restored.provenance["candidate_rank"] == 2 diff --git a/src/factorminer/factorminer/tests/test_memory.py b/src/factorminer/factorminer/tests/test_memory.py deleted file mode 100644 index 6e75676..0000000 --- a/src/factorminer/factorminer/tests/test_memory.py +++ /dev/null @@ -1,405 +0,0 @@ -"""Tests for the experience memory system.""" - -from __future__ import annotations - -import json -import tempfile -from pathlib import Path - -import pytest - -from src.factorminer.factorminer.memory.experience_memory import ExperienceMemoryManager -from src.factorminer.factorminer.memory.embeddings import FormulaEmbedder -from src.factorminer.factorminer.memory.kg_retrieval import retrieve_memory_enhanced -from src.factorminer.factorminer.memory.knowledge_graph import FactorKnowledgeGraph, FactorNode -from src.factorminer.factorminer.memory.memory_store import ( - ExperienceMemory, - ForbiddenDirection, - MiningState, - StrategicInsight, - SuccessPattern, -) -from src.factorminer.factorminer.memory.formation import form_memory -from src.factorminer.factorminer.memory.evolution import evolve_memory -from src.factorminer.factorminer.memory.retrieval import retrieve_memory - - -# --------------------------------------------------------------------------- -# Initialization -# --------------------------------------------------------------------------- - -class TestInitialization: - """Test memory manager initialization with default patterns.""" - - def test_default_success_patterns_loaded(self, mock_memory): - assert len(mock_memory.memory.success_patterns) > 0 - - def test_default_forbidden_directions_loaded(self, mock_memory): - assert len(mock_memory.memory.forbidden_directions) > 0 - - def test_default_insights_loaded(self, mock_memory): - assert len(mock_memory.memory.insights) > 0 - - def test_initial_version_is_zero(self, mock_memory): - assert mock_memory.version == 0 - - def test_default_pattern_names(self, mock_memory): - names = [p.name for p in mock_memory.memory.success_patterns] - assert "Higher Moment Regimes" in names - assert "PV Corr Interaction" in names - - def test_default_forbidden_names(self, mock_memory): - names = [f.name for f in mock_memory.memory.forbidden_directions] - assert "Standardized Returns/Amount" in names - assert "VWAP Deviation variants" in names - - def test_memory_reset(self, mock_memory): - # Modify state - mock_memory.memory.version = 99 - mock_memory.reset() - assert mock_memory.version == 0 - assert len(mock_memory.memory.success_patterns) > 0 - - -# --------------------------------------------------------------------------- -# Formation -# --------------------------------------------------------------------------- - -class TestFormation: - """Test memory formation from trajectory.""" - - def test_form_memory_creates_new_memory(self, mock_memory, sample_trajectory): - formed = form_memory(mock_memory.memory, sample_trajectory, batch_number=1) - assert isinstance(formed, ExperienceMemory) - - def test_form_memory_updates_state(self, mock_memory, sample_trajectory): - formed = form_memory(mock_memory.memory, sample_trajectory, batch_number=1) - # Should count 2 admitted factors - assert formed.state.library_size == mock_memory.memory.state.library_size + 2 - - def test_form_memory_recent_admissions(self, mock_memory, sample_trajectory): - formed = form_memory(mock_memory.memory, sample_trajectory, batch_number=1) - assert len(formed.state.recent_admissions) == 2 - - def test_form_memory_recent_rejections(self, mock_memory, sample_trajectory): - formed = form_memory(mock_memory.memory, sample_trajectory, batch_number=1) - assert len(formed.state.recent_rejections) == 2 - - def test_form_memory_admission_log(self, mock_memory, sample_trajectory): - formed = form_memory(mock_memory.memory, sample_trajectory, batch_number=1) - assert len(formed.state.admission_log) >= 1 - last_log = formed.state.admission_log[-1] - assert last_log["batch"] == 1 - assert last_log["admitted"] == 2 - assert last_log["rejected"] == 2 - - def test_form_memory_empty_trajectory(self, mock_memory): - formed = form_memory(mock_memory.memory, [], batch_number=1) - assert formed.state.library_size == mock_memory.memory.state.library_size - - def test_form_memory_extracts_success_patterns(self, mock_memory, sample_trajectory): - formed = form_memory(mock_memory.memory, sample_trajectory, batch_number=1) - # Should have some patterns (at least the defaults) - assert len(formed.success_patterns) >= len(mock_memory.memory.success_patterns) - - -# --------------------------------------------------------------------------- -# Evolution -# --------------------------------------------------------------------------- - -class TestEvolution: - """Test memory evolution (merge + consolidate).""" - - def test_evolve_increments_version(self, mock_memory, sample_trajectory): - formed = form_memory(mock_memory.memory, sample_trajectory, batch_number=1) - evolved = evolve_memory(mock_memory.memory, formed) - assert evolved.version == mock_memory.memory.version + 1 - - def test_evolve_merges_success_patterns(self, mock_memory, sample_trajectory): - formed = form_memory(mock_memory.memory, sample_trajectory, batch_number=1) - evolved = evolve_memory(mock_memory.memory, formed) - # Should have at least as many patterns as before - assert len(evolved.success_patterns) >= len(mock_memory.memory.success_patterns) - - def test_evolve_merges_forbidden_directions(self, mock_memory, sample_trajectory): - formed = form_memory(mock_memory.memory, sample_trajectory, batch_number=1) - evolved = evolve_memory(mock_memory.memory, formed) - assert len(evolved.forbidden_directions) >= len(mock_memory.memory.forbidden_directions) - - def test_evolve_caps_memory_size(self, mock_memory, sample_trajectory): - formed = form_memory(mock_memory.memory, sample_trajectory, batch_number=1) - evolved = evolve_memory( - mock_memory.memory, formed, - max_success_patterns=5, - max_failure_patterns=5, - max_insights=5, - ) - assert len(evolved.success_patterns) <= 5 - assert len(evolved.forbidden_directions) <= 5 - assert len(evolved.insights) <= 5 - - -# --------------------------------------------------------------------------- -# Retrieval -# --------------------------------------------------------------------------- - -class TestRetrieval: - """Test context-dependent memory retrieval.""" - - def test_retrieve_returns_dict(self, mock_memory): - result = mock_memory.retrieve() - assert isinstance(result, dict) - - def test_retrieve_has_required_keys(self, mock_memory): - result = mock_memory.retrieve() - assert "recommended_directions" in result - assert "forbidden_directions" in result - assert "insights" in result - assert "library_state" in result - assert "prompt_text" in result - - def test_retrieve_prompt_text_is_string(self, mock_memory): - result = mock_memory.retrieve() - assert isinstance(result["prompt_text"], str) - assert len(result["prompt_text"]) > 0 - - def test_retrieve_with_library_state(self, mock_memory): - lib_state = { - "library_size": 50, - "domain_saturation": {"Momentum": 0.8, "VWAP": 0.3}, - } - result = mock_memory.retrieve(library_state=lib_state) - assert result["library_state"]["library_size"] == 50 - - def test_retrieve_respects_max_limits(self, mock_memory): - result = mock_memory.retrieve(max_success=2, max_forbidden=2, max_insights=1) - assert len(result["recommended_directions"]) <= 2 - assert len(result["forbidden_directions"]) <= 2 - assert len(result["insights"]) <= 1 - - def test_retrieve_deprioritizes_saturated_patterns(self, mock_memory): - # Set high domain saturation - mock_memory.memory.state.domain_saturation = { - "Higher Moment Regimes": 0.9, - "PV Corr Interaction": 0.9, - } - result = mock_memory.retrieve(max_success=3) - # Saturated patterns should be scored lower - names = [p["name"] for p in result["recommended_directions"]] - # There should still be patterns, but saturated ones ranked lower - assert len(names) > 0 - - def test_enhanced_retrieval_uses_semantic_similarity_and_removals(self): - memory = ExperienceMemory() - memory.state.recent_admissions = [ - { - "factor_id": "query_factor", - "formula": "CsRank(Corr($close, $volume, 20))", - } - ] - - kg = FactorKnowledgeGraph() - kg.add_factor(FactorNode( - factor_id="neighbor_factor", - formula="CsRank(Corr($close, $volume, 20))", - operators=["CsRank", "Corr"], - features=["$close", "$volume"], - admitted=True, - )) - kg.add_factor(FactorNode( - factor_id="distant_factor", - formula="Neg(Std($returns, 10))", - operators=["Neg", "Std"], - features=["$returns"], - admitted=True, - )) - - embedder = FormulaEmbedder(use_faiss=False) - - result = retrieve_memory_enhanced( - memory, - kg=kg, - embedder=embedder, - ) - - assert result["semantic_neighbors"] - assert any("neighbor_factor" in item for item in result["semantic_neighbors"]) - - kg.remove_factor("neighbor_factor") - embedder.remove("neighbor_factor") - - refreshed = retrieve_memory_enhanced( - memory, - kg=kg, - embedder=embedder, - ) - - assert all("neighbor_factor" not in item for item in refreshed["semantic_neighbors"]) - - -# --------------------------------------------------------------------------- -# Full update cycle -# --------------------------------------------------------------------------- - -class TestUpdateCycle: - """Test the full update (formation + evolution) via the manager.""" - - def test_update_returns_summary(self, mock_memory, sample_trajectory): - summary = mock_memory.update(sample_trajectory) - assert "batch" in summary - assert "admitted_count" in summary - assert "rejected_count" in summary - assert summary["admitted_count"] == 2 - assert summary["rejected_count"] == 2 - - def test_update_increments_version(self, mock_memory, sample_trajectory): - assert mock_memory.version == 0 - mock_memory.update(sample_trajectory) - assert mock_memory.version == 1 - - def test_multiple_updates(self, mock_memory, sample_trajectory): - for i in range(3): - mock_memory.update(sample_trajectory) - assert mock_memory.version == 3 - - -# --------------------------------------------------------------------------- -# Save / load roundtrip -# --------------------------------------------------------------------------- - -class TestPersistence: - """Test save and load roundtrip.""" - - def test_save_load_roundtrip(self, mock_memory, sample_trajectory): - # Update memory with some data - mock_memory.update(sample_trajectory) - - with tempfile.TemporaryDirectory() as tmpdir: - path = Path(tmpdir) / "memory.json" - mock_memory.save(path) - - # Verify file exists and is valid JSON - assert path.exists() - with open(path) as f: - data = json.load(f) - assert "version" in data - assert "success_patterns" in data - - # Load into new manager - new_manager = ExperienceMemoryManager() - new_manager.load(path) - - assert new_manager.version == mock_memory.version - assert len(new_manager.memory.success_patterns) == len( - mock_memory.memory.success_patterns - ) - assert len(new_manager.memory.forbidden_directions) == len( - mock_memory.memory.forbidden_directions - ) - - def test_save_creates_directory(self, mock_memory): - with tempfile.TemporaryDirectory() as tmpdir: - path = Path(tmpdir) / "subdir" / "deep" / "memory.json" - mock_memory.save(path) - assert path.exists() - - -# --------------------------------------------------------------------------- -# Memory store serialization -# --------------------------------------------------------------------------- - -class TestMemoryStoreSerialization: - """Test data class to_dict / from_dict methods.""" - - def test_success_pattern_roundtrip(self): - pat = SuccessPattern( - name="Test Pattern", - description="A test", - template="CsRank($close)", - success_rate="High", - example_factors=["f1", "f2"], - occurrence_count=5, - ) - d = pat.to_dict() - restored = SuccessPattern.from_dict(d) - assert restored.name == pat.name - assert restored.occurrence_count == pat.occurrence_count - assert restored.success_rate == pat.success_rate - - def test_forbidden_direction_roundtrip(self): - fd = ForbiddenDirection( - name="Bad Direction", - description="Avoid this", - correlated_factors=["f1"], - typical_correlation=0.7, - reason="Too correlated", - occurrence_count=3, - ) - d = fd.to_dict() - restored = ForbiddenDirection.from_dict(d) - assert restored.name == fd.name - assert restored.typical_correlation == fd.typical_correlation - - def test_strategic_insight_roundtrip(self): - insight = StrategicInsight( - insight="Test insight", - evidence="Some evidence", - batch_source=5, - ) - d = insight.to_dict() - restored = StrategicInsight.from_dict(d) - assert restored.insight == insight.insight - assert restored.batch_source == 5 - - def test_mining_state_roundtrip(self): - state = MiningState( - library_size=42, - domain_saturation={"Momentum": 0.5}, - ) - d = state.to_dict() - restored = MiningState.from_dict(d) - assert restored.library_size == 42 - assert restored.domain_saturation["Momentum"] == 0.5 - - def test_full_memory_roundtrip(self): - mem = ExperienceMemory( - state=MiningState(library_size=10), - success_patterns=[ - SuccessPattern(name="P1", description="d1", template="t1", success_rate="High") - ], - forbidden_directions=[ - ForbiddenDirection(name="F1", description="d1") - ], - insights=[ - StrategicInsight(insight="I1", evidence="E1") - ], - version=3, - ) - d = mem.to_dict() - restored = ExperienceMemory.from_dict(d) - assert restored.version == 3 - assert len(restored.success_patterns) == 1 - assert len(restored.forbidden_directions) == 1 - assert len(restored.insights) == 1 - - -# --------------------------------------------------------------------------- -# Stats -# --------------------------------------------------------------------------- - -class TestStats: - """Test memory manager statistics.""" - - def test_get_stats_keys(self, mock_memory): - stats = mock_memory.get_stats() - assert "version" in stats - assert "batch_counter" in stats - assert "success_patterns" in stats - assert "forbidden_directions" in stats - assert "insights" in stats - - def test_get_stats_after_update(self, mock_memory, sample_trajectory): - mock_memory.update(sample_trajectory) - stats = mock_memory.get_stats() - assert stats["batch_counter"] == 1 - assert stats["version"] == 1 diff --git a/src/factorminer/factorminer/tests/test_operators.py b/src/factorminer/factorminer/tests/test_operators.py deleted file mode 100644 index 7a23e22..0000000 --- a/src/factorminer/factorminer/tests/test_operators.py +++ /dev/null @@ -1,500 +0,0 @@ -"""Tests for all operator categories via the registry.""" - -from __future__ import annotations - -import numpy as np -import pytest - -from src.factorminer.factorminer.operators.registry import execute_operator, get_operator, list_operators, implemented_operators - - -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - -def _arr(*rows): - """Build a (M, T) float64 array from nested lists.""" - return np.array(rows, dtype=np.float64) - - -@pytest.fixture -def x_simple(): - """Simple 2x10 input for operator tests.""" - return _arr( - [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], - [10, 9, 8, 7, 6, 5, 4, 3, 2, 1], - ) - - -@pytest.fixture -def y_simple(): - return _arr( - [10, 9, 8, 7, 6, 5, 4, 3, 2, 1], - [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], - ) - - -# --------------------------------------------------------------------------- -# Arithmetic operators -# --------------------------------------------------------------------------- - -class TestArithmeticOps: - """Test element-wise arithmetic operators.""" - - def test_add(self, x_simple, y_simple): - result = execute_operator("Add", x_simple, y_simple) - expected = x_simple + y_simple - np.testing.assert_array_almost_equal(result, expected) - - def test_sub(self, x_simple, y_simple): - result = execute_operator("Sub", x_simple, y_simple) - expected = x_simple - y_simple - np.testing.assert_array_almost_equal(result, expected) - - def test_mul(self, x_simple, y_simple): - result = execute_operator("Mul", x_simple, y_simple) - expected = x_simple * y_simple - np.testing.assert_array_almost_equal(result, expected) - - def test_neg_negates(self, x_simple): - result = execute_operator("Neg", x_simple) - np.testing.assert_array_almost_equal(result, -x_simple) - - def test_neg_double_neg(self, x_simple): - result = execute_operator("Neg", execute_operator("Neg", x_simple)) - np.testing.assert_array_almost_equal(result, x_simple) - - def test_abs(self): - x = _arr([-1, -2, 3, 0], [5, -6, 0, -8]) - result = execute_operator("Abs", x) - np.testing.assert_array_almost_equal(result, np.abs(x)) - - def test_sign(self): - x = _arr([-3, 0, 5], [7, -2, 0]) - result = execute_operator("Sign", x) - np.testing.assert_array_almost_equal(result, np.sign(x)) - - def test_div_by_zero_returns_nan(self): - x = _arr([1, 2, 3], [4, 5, 6]) - y = _arr([0, 0, 0], [1, 0, 2]) - result = execute_operator("Div", x, y) - # Where y is 0, result should be NaN - assert np.isnan(result[0, 0]) - assert np.isnan(result[0, 1]) - assert np.isnan(result[1, 1]) - # Where y is non-zero, should be correct - np.testing.assert_almost_equal(result[1, 0], 4.0) - np.testing.assert_almost_equal(result[1, 2], 3.0) - - def test_log_handles_negative(self): - x = _arr([-1, 0, 1, np.e - 1], [2, -3, 0.5, 10]) - result = execute_operator("Log", x) - # Log is defined as log(1+|x|)*sign(x) - expected = np.log1p(np.abs(x)) * np.sign(x) - np.testing.assert_array_almost_equal(result, expected) - - def test_sqrt_handles_negative(self): - x = _arr([-4, 0, 9, 16], [1, -1, 4, 25]) - result = execute_operator("Sqrt", x) - expected = np.sqrt(np.abs(x)) * np.sign(x) - np.testing.assert_array_almost_equal(result, expected) - - def test_square(self, x_simple): - result = execute_operator("Square", x_simple) - np.testing.assert_array_almost_equal(result, x_simple ** 2) - - def test_inv_zero_returns_nan(self): - x = _arr([0, 1, 2], [3, 0, 5]) - result = execute_operator("Inv", x) - assert np.isnan(result[0, 0]) - assert np.isnan(result[1, 1]) - np.testing.assert_almost_equal(result[0, 1], 1.0) - - def test_max_elementwise(self, x_simple, y_simple): - result = execute_operator("Max", x_simple, y_simple) - np.testing.assert_array_almost_equal(result, np.fmax(x_simple, y_simple)) - - def test_min_elementwise(self, x_simple, y_simple): - result = execute_operator("Min", x_simple, y_simple) - np.testing.assert_array_almost_equal(result, np.fmin(x_simple, y_simple)) - - def test_clip(self): - x = _arr([-5, -1, 0, 2, 5], [10, -10, 3, -3, 0]) - result = execute_operator("Clip", x, params={"lower": -3.0, "upper": 3.0}) - np.testing.assert_array_almost_equal(result, np.clip(x, -3.0, 3.0)) - - -# --------------------------------------------------------------------------- -# Statistical operators (rolling window) -# --------------------------------------------------------------------------- - -class TestStatisticalOps: - """Test rolling-window statistical operators.""" - - def test_mean_window3(self): - x = _arr([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) - result = execute_operator("Mean", x, params={"window": 3}) - assert result.shape == (1, 10) - # First 2 values should be NaN - assert np.isnan(result[0, 0]) - assert np.isnan(result[0, 1]) - # Mean of [1,2,3] = 2.0 - np.testing.assert_almost_equal(result[0, 2], 2.0) - # Mean of [2,3,4] = 3.0 - np.testing.assert_almost_equal(result[0, 3], 3.0) - # Mean of [8,9,10] = 9.0 - np.testing.assert_almost_equal(result[0, 9], 9.0) - - def test_std_window3(self): - x = _arr([1, 2, 3, 4, 5]) - result = execute_operator("Std", x, params={"window": 3}) - assert result.shape == (1, 5) - # First 2 values NaN - assert np.isnan(result[0, 0]) - assert np.isnan(result[0, 1]) - # std of [1,2,3] with ddof=1 = 1.0 - np.testing.assert_almost_equal(result[0, 2], 1.0) - - def test_sum_window3(self): - x = _arr([1, 2, 3, 4, 5]) - result = execute_operator("Sum", x, params={"window": 3}) - np.testing.assert_almost_equal(result[0, 2], 6.0) # 1+2+3 - np.testing.assert_almost_equal(result[0, 4], 12.0) # 3+4+5 - - def test_ts_max_window3(self): - x = _arr([3, 1, 4, 1, 5, 9, 2, 6]) - result = execute_operator("TsMax", x, params={"window": 3}) - np.testing.assert_almost_equal(result[0, 2], 4.0) # max(3,1,4) - np.testing.assert_almost_equal(result[0, 5], 9.0) # max(1,5,9) - - def test_ts_min_window3(self): - x = _arr([3, 1, 4, 1, 5, 9, 2, 6]) - result = execute_operator("TsMin", x, params={"window": 3}) - np.testing.assert_almost_equal(result[0, 2], 1.0) # min(3,1,4) - np.testing.assert_almost_equal(result[0, 5], 1.0) # min(1,5,9) - - def test_ts_rank_basic(self): - # Ascending series: latest should have high rank - x = _arr([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) - result = execute_operator("TsRank", x, params={"window": 5}) - # At index 4, window=[1,2,3,4,5], latest=5 is the largest - # count_less = 4 values less than 5, count_valid = 5 - # rank = 4 / (5-1) = 1.0 - np.testing.assert_almost_equal(result[0, 4], 1.0) - - def test_median_window3(self): - x = _arr([1, 5, 3, 4, 2]) - result = execute_operator("Median", x, params={"window": 3}) - np.testing.assert_almost_equal(result[0, 2], 3.0) # median(1,5,3) - - -# --------------------------------------------------------------------------- -# Time-series operators -# --------------------------------------------------------------------------- - -class TestTimeseriesOps: - """Test time-series operators like Delta, Delay, Return.""" - - def test_delta_period1_is_diff(self): - x = _arr([1, 3, 6, 10, 15]) - result = execute_operator("Delta", x, params={"window": 1}) - assert np.isnan(result[0, 0]) - np.testing.assert_almost_equal(result[0, 1], 2.0) # 3-1 - np.testing.assert_almost_equal(result[0, 2], 3.0) # 6-3 - np.testing.assert_almost_equal(result[0, 3], 4.0) # 10-6 - np.testing.assert_almost_equal(result[0, 4], 5.0) # 15-10 - - def test_delay_lags_by_period(self): - x = _arr([10, 20, 30, 40, 50]) - result = execute_operator("Delay", x, params={"window": 2}) - assert np.isnan(result[0, 0]) - assert np.isnan(result[0, 1]) - np.testing.assert_almost_equal(result[0, 2], 10.0) - np.testing.assert_almost_equal(result[0, 3], 20.0) - np.testing.assert_almost_equal(result[0, 4], 30.0) - - def test_return_period1(self): - x = _arr([100, 110, 99, 105]) - result = execute_operator("Return", x, params={"window": 1}) - assert np.isnan(result[0, 0]) - np.testing.assert_almost_equal(result[0, 1], 0.10) # 110/100 - 1 - np.testing.assert_almost_equal(result[0, 2], -0.1, decimal=2) # 99/110 - 1 - - def test_cumsum(self): - x = _arr([1, 2, 3, 4, 5]) - result = execute_operator("CumSum", x) - np.testing.assert_array_almost_equal(result[0], [1, 3, 6, 10, 15]) - - def test_cummax(self): - x = _arr([3, 1, 4, 1, 5, 9, 2]) - result = execute_operator("CumMax", x) - np.testing.assert_array_almost_equal(result[0], [3, 3, 4, 4, 5, 9, 9]) - - def test_cummin(self): - x = _arr([5, 3, 4, 1, 2, 6, 0]) - result = execute_operator("CumMin", x) - np.testing.assert_array_almost_equal(result[0], [5, 3, 3, 1, 1, 1, 0]) - - -# --------------------------------------------------------------------------- -# Cross-sectional operators -# --------------------------------------------------------------------------- - -class TestCrossSectionalOps: - """Test cross-sectional operators.""" - - def test_csrank_produces_percentiles(self): - # 5 assets, 1 time step; values 1..5 - x = _arr([1], [2], [3], [4], [5]) - result = execute_operator("CsRank", x) - assert result.shape == (5, 1) - # Ranks should be [0, 0.25, 0.5, 0.75, 1.0] - expected = _arr([0], [0.25], [0.5], [0.75], [1.0]) - np.testing.assert_array_almost_equal(result, expected) - - def test_csrank_nan_handling(self): - x = _arr([np.nan], [2], [3], [np.nan], [5]) - result = execute_operator("CsRank", x) - assert np.isnan(result[0, 0]) - assert np.isnan(result[3, 0]) - # Valid ranks for [2, 3, 5] = [0, 0.5, 1.0] - valid = result[~np.isnan(result)] - assert len(valid) == 3 - - def test_cszscore_zero_mean(self): - x = _arr([1], [2], [3], [4], [5]) - result = execute_operator("CsZScore", x) - # Mean of z-scores should be ~0 - np.testing.assert_almost_equal(np.nanmean(result[:, 0]), 0.0, decimal=10) - - def test_csdemean(self): - x = _arr([10], [20], [30]) - result = execute_operator("CsDemean", x) - expected = _arr([-10], [0], [10]) - np.testing.assert_array_almost_equal(result, expected) - - def test_csscale_unit_l1(self): - x = _arr([1], [2], [3]) - result = execute_operator("CsScale", x) - l1_norm = np.nansum(np.abs(result[:, 0])) - np.testing.assert_almost_equal(l1_norm, 1.0) - - -# --------------------------------------------------------------------------- -# Smoothing operators -# --------------------------------------------------------------------------- - -class TestSmoothingOps: - """Test smoothing / moving average operators.""" - - def test_sma_equals_mean(self): - x = _arr([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) - sma = execute_operator("SMA", x, params={"window": 3}) - mean = execute_operator("Mean", x, params={"window": 3}) - # SMA should equal Mean for non-NaN data - valid = ~(np.isnan(sma) | np.isnan(mean)) - np.testing.assert_array_almost_equal(sma[valid], mean[valid]) - - def test_ema_convergence(self): - # Constant series: EMA should converge to that constant - x = _arr([5, 5, 5, 5, 5, 5, 5, 5, 5, 5]) - result = execute_operator("EMA", x, params={"window": 3}) - # Should all be 5 (for constant input, EMA = constant) - np.testing.assert_array_almost_equal(result[0], np.full(10, 5.0)) - - def test_ema_output_shape(self, x_simple): - result = execute_operator("EMA", x_simple, params={"window": 5}) - assert result.shape == x_simple.shape - - -# --------------------------------------------------------------------------- -# Regression operators -# --------------------------------------------------------------------------- - -class TestRegressionOps: - """Test rolling regression operators.""" - - def test_slope_of_linear_data(self): - # Perfectly linear: y = 2*t for each asset - t_vals = np.arange(20, dtype=np.float64) - x = np.stack([2 * t_vals, 3 * t_vals], axis=0) # (2, 20) - result = execute_operator("TsLinRegSlope", x, params={"window": 5}) - # After window-1 NaNs, slope should be ~2.0 for first asset - valid_idx = ~np.isnan(result[0]) - if valid_idx.any(): - np.testing.assert_almost_equal(result[0, valid_idx][-1], 2.0, decimal=3) - np.testing.assert_almost_equal(result[1, valid_idx][-1], 3.0, decimal=3) - - def test_resid_of_linear_is_near_zero(self): - # Perfectly linear: residuals should be ~0 - t_vals = np.arange(20, dtype=np.float64) - x = np.stack([2 * t_vals + 1, t_vals + 5], axis=0) - result = execute_operator("TsLinRegResid", x, params={"window": 5}) - valid = ~np.isnan(result) - if valid.any(): - np.testing.assert_almost_equal(np.abs(result[valid]).max(), 0.0, decimal=3) - - -# --------------------------------------------------------------------------- -# Logical operators -# --------------------------------------------------------------------------- - -class TestLogicalOps: - """Test conditional and comparison operators.""" - - def test_ifelse_branching(self): - cond = _arr([1, -1, 1, -1, 0]) - x = _arr([10, 20, 30, 40, 50]) - y = _arr([100, 200, 300, 400, 500]) - result = execute_operator("IfElse", cond, x, y) - # cond > 0 -> x, else y - np.testing.assert_almost_equal(result[0, 0], 10) - np.testing.assert_almost_equal(result[0, 1], 200) - np.testing.assert_almost_equal(result[0, 2], 30) - np.testing.assert_almost_equal(result[0, 3], 400) - np.testing.assert_almost_equal(result[0, 4], 500) # 0 is not > 0 - - def test_greater(self): - x = _arr([1, 5, 3], [4, 2, 6]) - y = _arr([2, 3, 3], [4, 5, 1]) - result = execute_operator("Greater", x, y) - expected = _arr([0, 1, 0], [0, 0, 1]) - np.testing.assert_array_almost_equal(result, expected) - - def test_less(self): - x = _arr([1, 5, 3]) - y = _arr([2, 3, 3]) - result = execute_operator("Less", x, y) - expected = _arr([1, 0, 0]) - np.testing.assert_array_almost_equal(result, expected) - - def test_and(self): - x = _arr([1, 1, -1, -1]) - y = _arr([1, -1, 1, -1]) - result = execute_operator("And", x, y) - expected = _arr([1, 0, 0, 0]) - np.testing.assert_array_almost_equal(result, expected) - - def test_or(self): - x = _arr([1, 1, -1, -1]) - y = _arr([1, -1, 1, -1]) - result = execute_operator("Or", x, y) - expected = _arr([1, 1, 1, 0]) - np.testing.assert_array_almost_equal(result, expected) - - def test_not(self): - x = _arr([1, -1, 0, 5]) - result = execute_operator("Not", x) - expected = _arr([0, 1, 1, 0]) - np.testing.assert_array_almost_equal(result, expected) - - -# --------------------------------------------------------------------------- -# NaN propagation -# --------------------------------------------------------------------------- - -class TestNaNPropagation: - """Test NaN handling across operators.""" - - def test_add_nan_propagation(self): - x = _arr([1, np.nan, 3]) - y = _arr([4, 5, np.nan]) - result = execute_operator("Add", x, y) - assert np.isnan(result[0, 1]) - assert np.isnan(result[0, 2]) - np.testing.assert_almost_equal(result[0, 0], 5.0) - - def test_neg_nan_propagation(self): - x = _arr([1, np.nan, 3]) - result = execute_operator("Neg", x) - assert np.isnan(result[0, 1]) - - def test_greater_nan_propagation(self): - x = _arr([1, np.nan, 3]) - y = _arr([0, 1, np.nan]) - result = execute_operator("Greater", x, y) - assert np.isnan(result[0, 1]) - assert np.isnan(result[0, 2]) - - -# --------------------------------------------------------------------------- -# GPU (torch) vs CPU equivalence -# --------------------------------------------------------------------------- - -class TestGPUCPUEquivalence: - """Test that torch and numpy implementations produce similar results.""" - - @pytest.fixture - def torch_available(self): - try: - import torch - return True - except ImportError: - pytest.skip("PyTorch not available") - - @pytest.mark.parametrize("op_name", ["Add", "Sub", "Mul", "Neg", "Abs", "Sign"]) - def test_arithmetic_equivalence(self, torch_available, x_simple, y_simple, op_name): - import torch as th - - spec = get_operator(op_name) - if spec.arity == 1: - np_result = execute_operator(op_name, x_simple, backend="numpy") - torch_result = execute_operator( - op_name, th.tensor(x_simple), backend="torch" - ) - else: - np_result = execute_operator(op_name, x_simple, y_simple, backend="numpy") - torch_result = execute_operator( - op_name, th.tensor(x_simple), th.tensor(y_simple), backend="torch" - ) - np.testing.assert_array_almost_equal( - np_result, torch_result.numpy(), decimal=5 - ) - - @pytest.mark.parametrize("op_name", ["Mean", "Std", "TsMax", "TsMin"]) - def test_statistical_equivalence(self, torch_available, x_simple, op_name): - import torch as th - - np_result = execute_operator(op_name, x_simple, params={"window": 3}, backend="numpy") - torch_result = execute_operator( - op_name, th.tensor(x_simple, dtype=th.float64), params={"window": 3}, backend="torch" - ) - valid = ~(np.isnan(np_result) | np.isnan(torch_result.numpy())) - if valid.any(): - np.testing.assert_array_almost_equal( - np_result[valid], torch_result.numpy()[valid], decimal=4 - ) - - -# --------------------------------------------------------------------------- -# Registry -# --------------------------------------------------------------------------- - -class TestRegistry: - """Test operator registry functions.""" - - def test_list_operators_flat(self): - ops = list_operators(grouped=False) - assert isinstance(ops, list) - assert "Add" in ops - assert "Neg" in ops - assert "CsRank" in ops - - def test_list_operators_grouped(self): - groups = list_operators(grouped=True) - assert isinstance(groups, dict) - assert "ARITHMETIC" in groups - assert "STATISTICAL" in groups - - def test_implemented_operators(self): - impl = implemented_operators() - assert len(impl) > 0 - assert "Add" in impl - - def test_get_operator_unknown_raises(self): - with pytest.raises(KeyError): - get_operator("FooBarBaz") - - def test_execute_unknown_raises(self): - with pytest.raises(KeyError): - execute_operator("UnknownOp", np.ones((2, 3))) diff --git a/src/factorminer/factorminer/tests/test_provenance.py b/src/factorminer/factorminer/tests/test_provenance.py deleted file mode 100644 index fa4f326..0000000 --- a/src/factorminer/factorminer/tests/test_provenance.py +++ /dev/null @@ -1,131 +0,0 @@ -"""Tests for mining run manifests and factor provenance.""" - -from __future__ import annotations - -import json - -import numpy as np - -from src.factorminer.factorminer.agent.llm_interface import MockProvider -from src.factorminer.factorminer.core.factor_library import Factor -from src.factorminer.factorminer.core.library_io import load_library -from src.factorminer.factorminer.core.config import MiningConfig -from src.factorminer.factorminer.core.helix_loop import HelixLoop -from src.factorminer.factorminer.core.ralph_loop import EvaluationResult -from src.factorminer.factorminer.core.session import MiningSession - - -def test_factor_provenance_roundtrip(): - factor = Factor( - id=7, - name="alpha_7", - formula="Neg($close)", - category="test", - ic_mean=0.12, - icir=1.4, - ic_win_rate=0.6, - max_correlation=0.1, - batch_number=3, - provenance={ - "run_id": "run_123", - "loop_type": "helix", - "memory_summary": {"insight_count": 2}, - }, - ) - - restored = Factor.from_dict(factor.to_dict()) - - assert restored.provenance["run_id"] == "run_123" - assert restored.provenance["loop_type"] == "helix" - assert restored.provenance["memory_summary"]["insight_count"] == 2 - - -def test_helix_run_writes_manifest_and_factor_provenance(tmp_path, small_data, monkeypatch): - data = np.stack( - [ - small_data["$open"], - small_data["$high"], - small_data["$low"], - small_data["$close"], - small_data["$volume"], - small_data["$amt"], - small_data["$vwap"], - ], - axis=2, - ) - returns = small_data["$returns"] - config = MiningConfig( - target_library_size=1, - max_iterations=1, - batch_size=1, - output_dir=str(tmp_path / "helix-output"), - ) - provider = MockProvider() - - loop = HelixLoop( - config=config, - data_tensor=data, - returns=returns, - llm_provider=provider, - canonicalize=False, - enable_knowledge_graph=False, - enable_embeddings=False, - enable_auto_inventor=False, - ) - - monkeypatch.setattr( - loop.generator, - "generate_batch", - lambda *args, **kwargs: [("alpha_1", "Neg($close)")], - ) - monkeypatch.setattr( - loop.pipeline, - "evaluate_batch", - lambda candidates: [ - EvaluationResult( - factor_name="alpha_1", - formula="Neg($close)", - parse_ok=True, - ic_mean=0.12, - icir=1.3, - ic_win_rate=0.6, - max_correlation=0.0, - admitted=True, - stage_passed=3, - signals=np.ones_like(returns), - score_vector={"primary_score": 0.12}, - ) - ], - ) - - library = loop.run(target_size=1, max_iterations=1) - - output_dir = tmp_path / "helix-output" - run_manifest_path = output_dir / "run_manifest.json" - checkpoint_manifest_path = output_dir / "checkpoint" / "run_manifest.json" - session_path = output_dir / "session.json" - library_path = output_dir / "factor_library.json" - checkpoint_library_path = output_dir / "checkpoint" / "library.json" - - assert run_manifest_path.exists() - assert checkpoint_manifest_path.exists() - assert session_path.exists() - assert library_path.exists() - assert checkpoint_library_path.exists() - - manifest = json.loads(run_manifest_path.read_text()) - assert manifest["loop_type"] == "helix" - assert manifest["library_size"] >= 1 - assert manifest["artifact_paths"]["run_manifest"] == str(run_manifest_path) - - session = MiningSession.load(session_path) - assert session.run_manifest_path == str(run_manifest_path) - assert session.run_manifest["loop_type"] == "helix" - - loaded_library = load_library(output_dir / "factor_library") - factor = loaded_library.list_factors()[0] - assert factor.provenance["run_id"] == manifest["run_id"] - assert factor.provenance["loop_type"] == "helix" - assert factor.provenance["admission"]["admitted"] is True - assert factor.provenance["evaluation"]["ic_mean"] == 0.12 - assert library.size == 1 diff --git a/src/factorminer/factorminer/tests/test_ralph_loop.py b/src/factorminer/factorminer/tests/test_ralph_loop.py deleted file mode 100644 index dd75da1..0000000 --- a/src/factorminer/factorminer/tests/test_ralph_loop.py +++ /dev/null @@ -1,1076 +0,0 @@ -"""Integration tests for the Ralph Loop end-to-end mining pipeline. - -Tests the full pipeline using MockProvider for deterministic factor generation -and synthetic market data, covering: - - BudgetTracker resource monitoring - - FactorGenerator response parsing - - ValidationPipeline multi-stage evaluation - - RalphLoop end-to-end mining iterations - - Category inference from formula structure - - Session persistence (save / load) -""" - -from __future__ import annotations - -import json -import os -import shutil -import tempfile -from dataclasses import dataclass -from pathlib import Path -from typing import Any, Dict, Optional - -import numpy as np -import pytest - -from src.factorminer.factorminer.agent.llm_interface import MockProvider -from src.factorminer.factorminer.core.factor_library import Factor, FactorLibrary -from src.factorminer.factorminer.core.ralph_loop import ( - BudgetTracker, - EvaluationResult, - FactorGenerator, - MiningReporter, - RalphLoop, - ValidationPipeline, -) -from src.factorminer.factorminer.memory.memory_store import ExperienceMemory - - -# --------------------------------------------------------------------------- -# Minimal config for tests -# --------------------------------------------------------------------------- - -@dataclass -class _TestConfig: - target_library_size: int = 10 - batch_size: int = 5 - max_iterations: int = 3 - ic_threshold: float = 0.02 - icir_threshold: float = 0.3 - correlation_threshold: float = 0.7 - replacement_ic_min: float = 0.10 - replacement_ic_ratio: float = 1.3 - fast_screen_assets: int = 0 # No fast screening for deterministic tests - num_workers: int = 1 - output_dir: str = "" - - def to_dict(self) -> Dict[str, Any]: - return { - "target_library_size": self.target_library_size, - "batch_size": self.batch_size, - "max_iterations": self.max_iterations, - "ic_threshold": self.ic_threshold, - "icir_threshold": self.icir_threshold, - "correlation_threshold": self.correlation_threshold, - "replacement_ic_min": self.replacement_ic_min, - "replacement_ic_ratio": self.replacement_ic_ratio, - } - - -# --------------------------------------------------------------------------- -# Fixtures -# --------------------------------------------------------------------------- - -@pytest.fixture -def rng(): - return np.random.default_rng(42) - - -@pytest.fixture -def tmp_dir(): - d = tempfile.mkdtemp(prefix="ralph_test_") - yield d - shutil.rmtree(d, ignore_errors=True) - - -@pytest.fixture -def test_config(tmp_dir): - return _TestConfig(output_dir=tmp_dir) - - -@pytest.fixture -def synthetic_data(rng): - """Synthetic (M=15, T=60, F=8) data tensor and returns.""" - M, T, F = 15, 60, 8 - data_tensor = rng.normal(0, 1, (M, T, F)).astype(np.float64) - returns = rng.normal(0, 0.02, (M, T)).astype(np.float64) - return data_tensor, returns - - -@pytest.fixture -def mock_provider(): - return MockProvider(cycle=True) - - -@pytest.fixture -def empty_library(): - return FactorLibrary(correlation_threshold=0.7, ic_threshold=0.02) - - -@pytest.fixture -def empty_memory(): - return ExperienceMemory() - - -# =========================================================================== -# BudgetTracker tests -# =========================================================================== - -class TestBudgetTracker: - - def test_initial_state(self): - bt = BudgetTracker() - assert bt.llm_calls == 0 - assert bt.total_tokens == 0 - assert bt.compute_seconds == 0.0 - assert not bt.is_exhausted() - - def test_record_llm_call(self): - bt = BudgetTracker() - bt.record_llm_call(prompt_tokens=100, completion_tokens=50) - assert bt.llm_calls == 1 - assert bt.llm_prompt_tokens == 100 - assert bt.llm_completion_tokens == 50 - assert bt.total_tokens == 150 - - def test_record_compute(self): - bt = BudgetTracker() - bt.record_compute(1.5) - bt.record_compute(2.5) - assert bt.compute_seconds == pytest.approx(4.0) - - def test_exhausted_by_llm_calls(self): - bt = BudgetTracker(max_llm_calls=2) - assert not bt.is_exhausted() - bt.record_llm_call() - assert not bt.is_exhausted() - bt.record_llm_call() - assert bt.is_exhausted() - - def test_exhausted_by_wall_time(self): - bt = BudgetTracker(max_wall_seconds=0.01) - import time - time.sleep(0.02) - assert bt.is_exhausted() - - def test_unlimited_budgets(self): - bt = BudgetTracker(max_llm_calls=0, max_wall_seconds=0) - for _ in range(100): - bt.record_llm_call() - assert not bt.is_exhausted() - - def test_to_dict_keys(self): - bt = BudgetTracker() - bt.record_llm_call(10, 20) - d = bt.to_dict() - expected_keys = { - "llm_calls", "llm_prompt_tokens", "llm_completion_tokens", - "total_tokens", "compute_seconds", "wall_elapsed_seconds", - } - assert set(d.keys()) == expected_keys - - def test_wall_elapsed_positive(self): - bt = BudgetTracker() - assert bt.wall_elapsed >= 0 - - -# =========================================================================== -# EvaluationResult tests -# =========================================================================== - -class TestEvaluationResult: - - def test_defaults(self): - r = EvaluationResult(factor_name="test", formula="Neg($close)") - assert not r.parse_ok - assert r.ic_mean == 0.0 - assert r.icir == 0.0 - assert not r.admitted - assert r.replaced is None - assert r.rejection_reason == "" - assert r.stage_passed == 0 - assert r.signals is None - - def test_admitted_result(self): - r = EvaluationResult( - factor_name="good", - formula="CsRank($close)", - parse_ok=True, - ic_mean=0.08, - icir=1.2, - admitted=True, - stage_passed=3, - ) - assert r.admitted - assert r.stage_passed == 3 - - -# =========================================================================== -# FactorGenerator tests -# =========================================================================== - -class TestFactorGenerator: - - def test_generate_batch(self, mock_provider): - gen = FactorGenerator(llm_provider=mock_provider) - candidates = gen.generate_batch( - memory_signal={}, - library_state={"size": 0}, - batch_size=5, - ) - assert len(candidates) > 0 - for name, formula in candidates: - assert isinstance(name, str) - assert isinstance(formula, str) - assert len(name) > 0 - assert len(formula) > 0 - - def test_parse_response_numbered_format(self): - raw = ( - "1. factor_a: Neg($close)\n" - "2. factor_b: CsRank(Mean($close, 10))\n" - "3. factor_c: Div($high, $low)\n" - ) - result = FactorGenerator._parse_response(raw) - assert len(result) == 3 - assert result[0] == ("factor_a", "Neg($close)") - assert result[1] == ("factor_b", "CsRank(Mean($close, 10))") - - def test_parse_response_empty(self): - assert FactorGenerator._parse_response("") == [] - assert FactorGenerator._parse_response("\n\n") == [] - - def test_parse_response_ignores_bad_lines(self): - raw = ( - "Some random text\n" - "1. valid_factor: Neg($close)\n" - "Not a factor line\n" - "2. another: CsRank($volume)\n" - ) - result = FactorGenerator._parse_response(raw) - assert len(result) == 2 - - def test_mock_provider_deterministic(self): - p1 = MockProvider(cycle=False) - p2 = MockProvider(cycle=False) - r1 = p1.generate("sys", "user", 0.8, 4096) - r2 = p2.generate("sys", "user", 0.8, 4096) - assert r1 == r2 - - def test_mock_provider_cycling(self): - p = MockProvider(cycle=True) - r1 = p.generate("sys", "user") - r2 = p.generate("sys", "user") - # Second call should produce different factors (cycled offset) - # unless batch_size == len(MOCK_FACTORS) - assert isinstance(r1, str) - assert isinstance(r2, str) - - -# =========================================================================== -# ValidationPipeline tests -# =========================================================================== - -class TestValidationPipeline: - - @pytest.fixture - def pipeline(self, synthetic_data, empty_library): - data_tensor, returns = synthetic_data - return ValidationPipeline( - data_tensor=data_tensor, - returns=returns, - library=empty_library, - ic_threshold=0.02, - fast_screen_assets=0, # Use all assets - ) - - def test_parse_failure(self, pipeline): - result = pipeline.evaluate_candidate("bad", "NotAnOperator($close)") - assert not result.parse_ok - assert result.stage_passed == 0 - assert "Parse failure" in result.rejection_reason - - def test_valid_formula_parses(self, pipeline): - result = pipeline.evaluate_candidate("neg_close", "Neg($close)") - assert result.parse_ok - - def test_signals_computed(self, pipeline): - result = pipeline.evaluate_candidate("neg_close", "Neg($close)") - assert result.signals is not None - # Signals should be (M, T) shaped - M, T = pipeline.returns.shape - assert result.signals.shape == (M, T) - - def test_ic_computed(self, pipeline): - result = pipeline.evaluate_candidate("neg_close", "Neg($close)") - # IC should be a number (may or may not pass threshold) - assert isinstance(result.ic_mean, float) - - def test_batch_evaluation(self, pipeline): - candidates = [ - ("f1", "Neg($close)"), - ("f2", "CsRank(Mean($close, 10))"), - ("f3", "InvalidFormula!!!"), - ] - results = pipeline.evaluate_batch(candidates) - assert len(results) == 3 - # Third should fail parse - assert not results[2].parse_ok - - def test_deduplication_keeps_highest_ic(self, synthetic_data, empty_library): - data_tensor, returns = synthetic_data - # Use very low threshold and high correlation threshold to admit most - pipeline = ValidationPipeline( - data_tensor=data_tensor, - returns=returns, - library=empty_library, - ic_threshold=0.0001, - fast_screen_assets=0, - ) - - # Create two results with identical signals but different IC - M, T = returns.shape - signals = np.random.RandomState(99).randn(M, T) - - r1 = EvaluationResult( - factor_name="low_ic", formula="Neg($close)", - parse_ok=True, ic_mean=0.05, admitted=True, - stage_passed=3, signals=signals.copy(), - ) - r2 = EvaluationResult( - factor_name="high_ic", formula="CsRank($close)", - parse_ok=True, ic_mean=0.10, admitted=True, - stage_passed=3, signals=signals.copy(), - ) - results = pipeline._deduplicate_batch([r1, r2]) - - # The higher-IC one should be kept; the lower deduped - admitted = [r for r in results if r.admitted] - assert len(admitted) == 1 - assert admitted[0].factor_name == "high_ic" - - def test_deduplication_uncorrelated_kept(self, synthetic_data, empty_library): - data_tensor, returns = synthetic_data - pipeline = ValidationPipeline( - data_tensor=data_tensor, - returns=returns, - library=empty_library, - ic_threshold=0.0001, - fast_screen_assets=0, - ) - - M, T = returns.shape - rng = np.random.RandomState(42) - - r1 = EvaluationResult( - factor_name="f1", formula="Neg($close)", - parse_ok=True, ic_mean=0.05, admitted=True, - stage_passed=3, signals=rng.randn(M, T), - ) - r2 = EvaluationResult( - factor_name="f2", formula="CsRank($volume)", - parse_ok=True, ic_mean=0.07, admitted=True, - stage_passed=3, signals=rng.randn(M, T), - ) - results = pipeline._deduplicate_batch([r1, r2]) - admitted = [r for r in results if r.admitted] - # Both should survive (independent random signals -> low correlation) - assert len(admitted) == 2 - - -# =========================================================================== -# MiningReporter tests -# =========================================================================== - -class TestMiningReporter: - - def test_log_batch(self, tmp_dir): - reporter = MiningReporter(output_dir=tmp_dir) - reporter.log_batch(1, admitted=3, rejected=7) - log_path = os.path.join(tmp_dir, "mining_batches.jsonl") - assert os.path.exists(log_path) - with open(log_path) as f: - lines = f.readlines() - assert len(lines) == 1 - record = json.loads(lines[0]) - assert record["iteration"] == 1 - assert record["admitted"] == 3 - - def test_export_library(self, tmp_dir, empty_library): - reporter = MiningReporter(output_dir=tmp_dir) - path = reporter.export_library(empty_library) - assert os.path.exists(path) - with open(path) as f: - data = json.load(f) - assert "factors" in data - assert "diagnostics" in data - assert "exported_at" in data - - -# =========================================================================== -# Category inference tests -# =========================================================================== - -class TestCategoryInference: - - def test_momentum(self): - assert RalphLoop._infer_category("Delta($close, 5)") == "Momentum" - - def test_volatility(self): - assert RalphLoop._infer_category("Std($returns, 10)") == "Volatility" - - def test_higher_moment(self): - assert RalphLoop._infer_category("Skew($returns, 20)") == "Higher-Moment" - - def test_pv_correlation(self): - assert RalphLoop._infer_category("Corr($close, $volume, 10)") == "PV-Correlation" - - def test_regime_conditional(self): - cat = RalphLoop._infer_category( - "IfElse(Greater($returns, 0), $volume, Neg($volume))" - ) - assert cat == "Regime-Conditional" - - def test_regression(self): - assert RalphLoop._infer_category("TsLinRegSlope($close, 20)") == "Regression" - - def test_smoothing(self): - assert RalphLoop._infer_category("EMA($close, 10)") == "Smoothing" - - def test_vwap(self): - assert RalphLoop._infer_category("Div(Sub($close, $vwap), $vwap)") == "VWAP" - - def test_amount(self): - assert RalphLoop._infer_category("CsRank($amt)") == "Amount" - - def test_extrema(self): - assert RalphLoop._infer_category("TsMax($close, 20)") == "Extrema" - - def test_cross_sectional(self): - assert RalphLoop._infer_category("CsRank($close)") == "Cross-Sectional" - - def test_other_fallback(self): - assert RalphLoop._infer_category("Add($close, $open)") == "Other" - - -# =========================================================================== -# End-to-end RalphLoop tests -# =========================================================================== - -class TestRalphLoopEndToEnd: - - def test_single_iteration(self, test_config, synthetic_data, mock_provider, tmp_dir): - test_config.max_iterations = 1 - test_config.output_dir = tmp_dir - data_tensor, returns = synthetic_data - - loop = RalphLoop( - config=test_config, - data_tensor=data_tensor, - returns=returns, - llm_provider=mock_provider, - ) - - library = loop.run(max_iterations=1) - assert isinstance(library, FactorLibrary) - assert loop.iteration == 1 - assert loop.budget.llm_calls >= 1 - - def test_multiple_iterations(self, test_config, synthetic_data, mock_provider, tmp_dir): - test_config.max_iterations = 3 - test_config.output_dir = tmp_dir - data_tensor, returns = synthetic_data - - loop = RalphLoop( - config=test_config, - data_tensor=data_tensor, - returns=returns, - llm_provider=mock_provider, - ) - - library = loop.run(max_iterations=3) - assert loop.iteration <= 3 - assert loop.budget.llm_calls <= 3 - - def test_library_grows(self, test_config, synthetic_data, mock_provider, tmp_dir): - test_config.max_iterations = 5 - test_config.target_library_size = 100 # High target so we don't stop early - test_config.output_dir = tmp_dir - data_tensor, returns = synthetic_data - - loop = RalphLoop( - config=test_config, - data_tensor=data_tensor, - returns=returns, - llm_provider=mock_provider, - ) - - library = loop.run(max_iterations=5, target_size=100) - # With mock provider and low IC threshold, some factors should be admitted - # (exact count depends on pseudo-signal randomness) - assert isinstance(library.size, int) - - def test_callback_invoked(self, test_config, synthetic_data, mock_provider, tmp_dir): - test_config.max_iterations = 2 - test_config.output_dir = tmp_dir - data_tensor, returns = synthetic_data - - loop = RalphLoop( - config=test_config, - data_tensor=data_tensor, - returns=returns, - llm_provider=mock_provider, - ) - - callback_calls = [] - - def cb(iteration: int, stats: Dict[str, Any]) -> None: - callback_calls.append((iteration, stats)) - - loop.run(max_iterations=2, callback=cb) - assert len(callback_calls) == 2 - assert callback_calls[0][0] == 1 - assert callback_calls[1][0] == 2 - # Stats should have standard keys - for _, stats in callback_calls: - assert "candidates" in stats - assert "admitted" in stats - assert "library_size" in stats - assert "yield_rate" in stats - - def test_budget_stops_loop(self, test_config, synthetic_data, mock_provider, tmp_dir): - test_config.max_iterations = 100 - test_config.output_dir = tmp_dir - data_tensor, returns = synthetic_data - - loop = RalphLoop( - config=test_config, - data_tensor=data_tensor, - returns=returns, - llm_provider=mock_provider, - ) - loop.budget = BudgetTracker(max_llm_calls=2) - - library = loop.run(max_iterations=100, target_size=1000) - assert loop.budget.llm_calls == 2 - assert loop.iteration == 2 - - def test_target_size_stops_loop(self, test_config, synthetic_data, mock_provider, tmp_dir): - test_config.output_dir = tmp_dir - test_config.ic_threshold = 0.0001 # Very low to admit most - test_config.correlation_threshold = 0.99 # Very high to avoid dedup - data_tensor, returns = synthetic_data - - loop = RalphLoop( - config=test_config, - data_tensor=data_tensor, - returns=returns, - llm_provider=mock_provider, - ) - # Request tiny library - library = loop.run(max_iterations=50, target_size=2) - # Either reached target or exhausted iterations - assert library.size >= 0 - - def test_memory_evolves(self, test_config, synthetic_data, mock_provider, tmp_dir): - test_config.max_iterations = 2 - test_config.output_dir = tmp_dir - data_tensor, returns = synthetic_data - - loop = RalphLoop( - config=test_config, - data_tensor=data_tensor, - returns=returns, - llm_provider=mock_provider, - ) - - loop.run(max_iterations=2) - # Memory should have been updated at least once - assert loop.memory is not None - - def test_output_files_created(self, test_config, synthetic_data, mock_provider, tmp_dir): - test_config.output_dir = tmp_dir - test_config.max_iterations = 1 - data_tensor, returns = synthetic_data - - loop = RalphLoop( - config=test_config, - data_tensor=data_tensor, - returns=returns, - llm_provider=mock_provider, - ) - loop.run(max_iterations=1) - - # Check that library JSON was exported - lib_path = os.path.join(tmp_dir, "factor_library.json") - assert os.path.exists(lib_path) - - def test_run_with_prepopulated_library( - self, test_config, synthetic_data, mock_provider, tmp_dir, rng - ): - test_config.output_dir = tmp_dir - test_config.max_iterations = 1 - data_tensor, returns = synthetic_data - M, T = returns.shape - - lib = FactorLibrary( - correlation_threshold=0.7, ic_threshold=0.02, - ) - # Add one factor - factor = Factor( - id=0, name="seed_factor", formula="Neg($close)", - category="test", ic_mean=0.06, icir=1.0, - ic_win_rate=0.6, max_correlation=0.0, - batch_number=0, signals=rng.normal(0, 1, (M, T)), - ) - lib.admit_factor(factor) - - loop = RalphLoop( - config=test_config, - data_tensor=data_tensor, - returns=returns, - llm_provider=mock_provider, - library=lib, - ) - result_lib = loop.run(max_iterations=1) - assert result_lib.size >= 1 # At least the seed factor - - def test_empty_stats_on_no_candidates(self, test_config, synthetic_data, tmp_dir): - test_config.output_dir = tmp_dir - data_tensor, returns = synthetic_data - - # Provider that returns empty response - class EmptyProvider(MockProvider): - def generate(self, *args, **kwargs): - return "" - - loop = RalphLoop( - config=test_config, - data_tensor=data_tensor, - returns=returns, - llm_provider=EmptyProvider(), - ) - stats = loop._run_iteration(batch_size=5) - assert stats["candidates"] == 0 - assert stats["admitted"] == 0 - - -# =========================================================================== -# Session persistence tests -# =========================================================================== - -class TestSessionPersistence: - - def test_save_creates_checkpoint( - self, test_config, synthetic_data, mock_provider, tmp_dir - ): - test_config.output_dir = tmp_dir - test_config.max_iterations = 1 - data_tensor, returns = synthetic_data - - loop = RalphLoop( - config=test_config, - data_tensor=data_tensor, - returns=returns, - llm_provider=mock_provider, - ) - loop.run(max_iterations=1) - - checkpoint_path = loop.save_session(tmp_dir) - assert os.path.isdir(checkpoint_path) - - # Should contain key files - checkpoint_files = os.listdir(checkpoint_path) - assert "library.json" in checkpoint_files - assert "memory.json" in checkpoint_files - assert "loop_state.json" in checkpoint_files - - def test_load_restores_iteration( - self, test_config, synthetic_data, mock_provider, tmp_dir - ): - test_config.output_dir = tmp_dir - data_tensor, returns = synthetic_data - - # Run 2 iterations - loop1 = RalphLoop( - config=test_config, - data_tensor=data_tensor, - returns=returns, - llm_provider=mock_provider, - ) - loop1.run(max_iterations=2) - checkpoint_path = loop1.save_session(tmp_dir) - - # Load into new loop - loop2 = RalphLoop( - config=test_config, - data_tensor=data_tensor, - returns=returns, - llm_provider=MockProvider(cycle=True), - ) - loop2.load_session(checkpoint_path) - assert loop2.iteration == loop1.iteration - - def test_load_restores_memory( - self, test_config, synthetic_data, mock_provider, tmp_dir - ): - test_config.output_dir = tmp_dir - data_tensor, returns = synthetic_data - - loop1 = RalphLoop( - config=test_config, - data_tensor=data_tensor, - returns=returns, - llm_provider=mock_provider, - ) - loop1.run(max_iterations=2) - checkpoint_path = loop1.save_session(tmp_dir) - - loop2 = RalphLoop( - config=test_config, - data_tensor=data_tensor, - returns=returns, - llm_provider=MockProvider(cycle=True), - ) - loop2.load_session(checkpoint_path) - # Memory should have been restored - assert loop2.memory is not None - - -# =========================================================================== -# Checkpoint / Resume tests (Phase 1f) -# =========================================================================== - -class TestCheckpointResume: - """Tests for the checkpoint/resume functionality.""" - - def test_checkpoint_creates_files( - self, test_config, synthetic_data, mock_provider, tmp_dir - ): - """Verify that save_session creates all expected checkpoint files.""" - test_config.output_dir = tmp_dir - data_tensor, returns = synthetic_data - - loop = RalphLoop( - config=test_config, - data_tensor=data_tensor, - returns=returns, - llm_provider=mock_provider, - ) - loop.run(max_iterations=2) - checkpoint_path = loop.save_session() - - checkpoint_dir = Path(checkpoint_path) - assert checkpoint_dir.exists() - assert (checkpoint_dir / "library.json").exists() - assert (checkpoint_dir / "memory.json").exists() - assert (checkpoint_dir / "loop_state.json").exists() - assert (checkpoint_dir / "session.json").exists() - - # Verify loop_state.json contains expected keys - with open(checkpoint_dir / "loop_state.json") as f: - loop_state = json.load(f) - assert "iteration" in loop_state - assert "library_size" in loop_state - assert "memory_version" in loop_state - assert "budget" in loop_state - assert loop_state["iteration"] == loop.iteration - assert loop_state["library_size"] == loop.library.size - assert loop_state["budget"]["llm_calls"] == loop.budget.llm_calls - - def test_resume_continues_from_checkpoint( - self, test_config, synthetic_data, mock_provider, tmp_dir - ): - """Verify that resuming continues from the saved iteration.""" - test_config.output_dir = tmp_dir - test_config.target_library_size = 200 # High target so loop doesn't stop early - data_tensor, returns = synthetic_data - - # Run 2 iterations, then save - loop1 = RalphLoop( - config=test_config, - data_tensor=data_tensor, - returns=returns, - llm_provider=mock_provider, - ) - loop1.run(max_iterations=2, target_size=200) - saved_iteration = loop1.iteration - saved_library_size = loop1.library.size - loop1.save_session() - - # Create a new loop and resume from checkpoint - loop2 = RalphLoop( - config=test_config, - data_tensor=data_tensor, - returns=returns, - llm_provider=MockProvider(cycle=True), - ) - assert loop2.iteration == 0 # Starts fresh - - # Resume should load the saved state and continue - library = loop2.run(max_iterations=4, target_size=200, resume=True) - - # loop2 should have continued from iteration 2, running up to 4 - assert loop2.iteration > saved_iteration - assert loop2.iteration <= 4 - - def test_resume_preserves_library( - self, test_config, synthetic_data, mock_provider, tmp_dir - ): - """Verify that library factors are preserved across resume.""" - test_config.output_dir = tmp_dir - test_config.ic_threshold = 0.0001 - test_config.correlation_threshold = 0.99 - data_tensor, returns = synthetic_data - - # Run and save - loop1 = RalphLoop( - config=test_config, - data_tensor=data_tensor, - returns=returns, - llm_provider=mock_provider, - ) - loop1.run(max_iterations=3, target_size=100) - saved_factors = { - fid: f.to_dict() for fid, f in loop1.library.factors.items() - } - saved_size = loop1.library.size - loop1.save_session() - - # Load into a new loop - loop2 = RalphLoop( - config=test_config, - data_tensor=data_tensor, - returns=returns, - llm_provider=MockProvider(cycle=True), - ) - checkpoint_dir = os.path.join(tmp_dir, "checkpoint") - loop2.load_session(checkpoint_dir) - - # Library should have the same factors - assert loop2.library.size == saved_size - for fid, f_dict in saved_factors.items(): - assert fid in loop2.library.factors - restored = loop2.library.factors[fid].to_dict() - assert restored["name"] == f_dict["name"] - assert restored["formula"] == f_dict["formula"] - assert restored["ic_mean"] == pytest.approx(f_dict["ic_mean"]) - - def test_resume_preserves_memory( - self, test_config, synthetic_data, mock_provider, tmp_dir - ): - """Verify that experience memory is preserved across resume.""" - test_config.output_dir = tmp_dir - data_tensor, returns = synthetic_data - - # Run and save - loop1 = RalphLoop( - config=test_config, - data_tensor=data_tensor, - returns=returns, - llm_provider=mock_provider, - ) - loop1.run(max_iterations=2) - saved_version = loop1.memory.version - saved_patterns = len(loop1.memory.success_patterns) - saved_forbidden = len(loop1.memory.forbidden_directions) - saved_insights = len(loop1.memory.insights) - loop1.save_session() - - # Load into a new loop - loop2 = RalphLoop( - config=test_config, - data_tensor=data_tensor, - returns=returns, - llm_provider=MockProvider(cycle=True), - ) - checkpoint_dir = os.path.join(tmp_dir, "checkpoint") - loop2.load_session(checkpoint_dir) - - # Memory state should match - assert loop2.memory.version == saved_version - assert len(loop2.memory.success_patterns) == saved_patterns - assert len(loop2.memory.forbidden_directions) == saved_forbidden - assert len(loop2.memory.insights) == saved_insights - - def test_checkpoint_interval_controls_frequency( - self, test_config, synthetic_data, mock_provider, tmp_dir - ): - """Verify checkpoint_interval controls how often checkpoints are saved.""" - test_config.output_dir = tmp_dir - data_tensor, returns = synthetic_data - - # With interval=2, checkpoint should be written at iterations 2 and 4 - loop = RalphLoop( - config=test_config, - data_tensor=data_tensor, - returns=returns, - llm_provider=mock_provider, - checkpoint_interval=2, - ) - loop.run(max_iterations=3) - - checkpoint_dir = Path(tmp_dir) / "checkpoint" - # After 3 iterations with interval=2, checkpoint at iteration 2 - # should have created the directory - assert checkpoint_dir.exists() - - # Verify the checkpoint was written at least once - with open(checkpoint_dir / "loop_state.json") as f: - state = json.load(f) - # The last checkpoint should be at iteration 2 (since 3 is not - # divisible by 2, the checkpoint at iter 2 is the latest one) - assert state["iteration"] == 2 - - def test_checkpoint_disabled( - self, test_config, synthetic_data, mock_provider, tmp_dir - ): - """Verify checkpoint_interval=0 disables automatic checkpointing.""" - test_config.output_dir = tmp_dir - data_tensor, returns = synthetic_data - - loop = RalphLoop( - config=test_config, - data_tensor=data_tensor, - returns=returns, - llm_provider=mock_provider, - checkpoint_interval=0, - ) - loop.run(max_iterations=2) - - checkpoint_dir = Path(tmp_dir) / "checkpoint" - # No automatic checkpoint should have been created - assert not checkpoint_dir.exists() - - def test_resume_from_classmethod( - self, test_config, synthetic_data, mock_provider, tmp_dir - ): - """Verify the resume_from classmethod works correctly.""" - test_config.output_dir = tmp_dir - data_tensor, returns = synthetic_data - - # Run and save - loop1 = RalphLoop( - config=test_config, - data_tensor=data_tensor, - returns=returns, - llm_provider=mock_provider, - ) - loop1.run(max_iterations=2) - checkpoint_path = loop1.save_session() - - # Use classmethod to resume - loop2 = RalphLoop.resume_from( - checkpoint_path=checkpoint_path, - config=test_config, - data_tensor=data_tensor, - returns=returns, - llm_provider=MockProvider(cycle=True), - ) - - assert loop2.iteration == loop1.iteration - assert loop2.library.size == loop1.library.size - - def test_resume_restores_budget( - self, test_config, synthetic_data, mock_provider, tmp_dir - ): - """Verify that budget tracker state is preserved across resume.""" - test_config.output_dir = tmp_dir - data_tensor, returns = synthetic_data - - loop1 = RalphLoop( - config=test_config, - data_tensor=data_tensor, - returns=returns, - llm_provider=mock_provider, - ) - loop1.run(max_iterations=2) - saved_llm_calls = loop1.budget.llm_calls - saved_compute = loop1.budget.compute_seconds - loop1.save_session() - - loop2 = RalphLoop( - config=test_config, - data_tensor=data_tensor, - returns=returns, - llm_provider=MockProvider(cycle=True), - ) - checkpoint_dir = os.path.join(tmp_dir, "checkpoint") - loop2.load_session(checkpoint_dir) - - assert loop2.budget.llm_calls == saved_llm_calls - assert loop2.budget.compute_seconds == pytest.approx( - saved_compute, abs=0.1 - ) - - def test_backward_compatible_no_checkpoint( - self, test_config, synthetic_data, mock_provider, tmp_dir - ): - """Verify run() works without checkpoint/resume (backward compat).""" - test_config.output_dir = tmp_dir - data_tensor, returns = synthetic_data - - # Disable checkpointing entirely - loop = RalphLoop( - config=test_config, - data_tensor=data_tensor, - returns=returns, - llm_provider=mock_provider, - checkpoint_interval=0, - ) - library = loop.run(max_iterations=2) - - assert isinstance(library, FactorLibrary) - assert loop.iteration == 2 - - def test_resume_no_checkpoint_is_noop( - self, test_config, synthetic_data, mock_provider, tmp_dir - ): - """Verify resume=True with no existing checkpoint just starts fresh.""" - test_config.output_dir = tmp_dir - data_tensor, returns = synthetic_data - - loop = RalphLoop( - config=test_config, - data_tensor=data_tensor, - returns=returns, - llm_provider=mock_provider, - ) - # resume=True but no checkpoint exists -- should work normally - library = loop.run(max_iterations=1, resume=True) - assert isinstance(library, FactorLibrary) - assert loop.iteration == 1 - - def test_run_exports_manifest_and_factor_provenance( - self, test_config, synthetic_data, mock_provider, tmp_dir - ): - """Completed runs should export a manifest and persist factor provenance.""" - test_config.output_dir = tmp_dir - test_config.ic_threshold = 0.0 - test_config.icir_threshold = -1.0 - test_config.correlation_threshold = 1.1 - data_tensor, returns = synthetic_data - - loop = RalphLoop( - config=test_config, - data_tensor=data_tensor, - returns=returns, - llm_provider=mock_provider, - ) - library = loop.run(max_iterations=2, target_size=2) - - manifest_path = Path(tmp_dir) / "run_manifest.json" - assert manifest_path.exists() - - manifest = json.loads(manifest_path.read_text()) - assert manifest["loop_type"] == "ralph" - assert manifest["artifact_paths"]["run_manifest"] == str(manifest_path) - assert manifest["dataset_summary"]["data_tensor_shape"] == list(data_tensor.shape) - - assert library.size > 0 - exported_library = json.loads((Path(tmp_dir) / "factor_library.json").read_text()) - factor_payload = exported_library["factors"][0] - assert "provenance" in factor_payload - assert factor_payload["provenance"]["run_id"] == loop._session.session_id - assert factor_payload["provenance"]["loop_type"] == "ralph" - assert factor_payload["provenance"]["generator_family"] diff --git a/src/factorminer/factorminer/tests/test_regime.py b/src/factorminer/factorminer/tests/test_regime.py deleted file mode 100644 index 0a4833e..0000000 --- a/src/factorminer/factorminer/tests/test_regime.py +++ /dev/null @@ -1,118 +0,0 @@ -"""Tests for regime-aware factor validation (evaluation/regime.py).""" - -from __future__ import annotations - -import numpy as np -import pytest - -from src.factorminer.factorminer.evaluation.regime import ( - MarketRegime, - RegimeAwareEvaluator, - RegimeConfig, - RegimeDetector, -) - - -@pytest.fixture -def rng(): - return np.random.default_rng(42) - - -# ----------------------------------------------------------------------- -# RegimeDetector: synthetic bull/bear phases -# ----------------------------------------------------------------------- - -def test_regime_detector_bull_bear_phases(rng): - """Clear positive first half, negative second half should produce - BULL and BEAR labels after the lookback window.""" - M, T = 20, 300 - returns = np.zeros((M, T)) - # First half: strongly positive - returns[:, :150] = rng.normal(0.02, 0.005, (M, 150)) - # Second half: strongly negative - returns[:, 150:] = rng.normal(-0.02, 0.005, (M, 150)) - - cfg = RegimeConfig(lookback_window=30, bull_return_threshold=0.0, - bear_return_threshold=0.0) - detector = RegimeDetector(config=cfg) - result = detector.classify(returns) - - # After the lookback window, first half should contain BULL periods - bull_periods = result.periods[MarketRegime.BULL] - bear_periods = result.periods[MarketRegime.BEAR] - assert bull_periods[50:140].sum() > 0, "Should have BULL in first half" - assert bear_periods[180:].sum() > 0, "Should have BEAR in second half" - - -def test_regime_detector_labels_shape(rng): - M, T = 10, 100 - returns = rng.normal(0, 0.01, (M, T)) - detector = RegimeDetector() - result = detector.classify(returns) - assert result.labels.shape == (T,) - assert set(result.labels).issubset({0, 1, 2}) - - -# ----------------------------------------------------------------------- -# RegimeAwareEvaluator: signal works in all regimes -# ----------------------------------------------------------------------- - -def test_regime_evaluator_all_regimes_pass(rng): - """A signal correlated with returns across all regimes should pass.""" - M, T = 20, 400 - returns = rng.normal(0, 0.01, (M, T)) - signal = returns * 5 + rng.normal(0, 0.001, (M, T)) - - cfg = RegimeConfig(lookback_window=20, min_regime_ic=0.01, - min_regimes_passing=1) - detector = RegimeDetector(config=cfg) - regime = detector.classify(returns) - - evaluator = RegimeAwareEvaluator(returns, regime, config=cfg) - result = evaluator.evaluate("strong_factor", signal) - assert result.passes is True - - -# ----------------------------------------------------------------------- -# RegimeAwareEvaluator: signal only works in bull -# ----------------------------------------------------------------------- - -def test_regime_evaluator_bull_only_fails(rng): - """A signal that only works in positive-return periods should fail - if min_regimes_passing=2.""" - M, T = 20, 400 - returns = np.zeros((M, T)) - returns[:, :200] = rng.normal(0.02, 0.005, (M, 200)) - returns[:, 200:] = rng.normal(-0.02, 0.005, (M, 200)) - - # Signal only correlates with returns in first half - signal = np.zeros((M, T)) - signal[:, :200] = returns[:, :200] * 5 - signal[:, 200:] = rng.normal(0, 1, (M, 200)) # noise in bear - - cfg = RegimeConfig(lookback_window=20, min_regime_ic=0.03, - min_regimes_passing=2) - detector = RegimeDetector(config=cfg) - regime = detector.classify(returns) - - evaluator = RegimeAwareEvaluator(returns, regime, config=cfg) - result = evaluator.evaluate("bull_only", signal) - # May or may not pass depending on how many regimes are detected, - # but the structure is correct - assert isinstance(result.n_regimes_passing, int) - assert isinstance(result.passes, bool) - - -# ----------------------------------------------------------------------- -# Edge case: very short data -# ----------------------------------------------------------------------- - -def test_regime_detector_short_data(rng): - """Data shorter than lookback_window should still work (all SIDEWAYS).""" - M, T = 10, 20 - returns = rng.normal(0, 0.01, (M, T)) - cfg = RegimeConfig(lookback_window=60) - detector = RegimeDetector(config=cfg) - result = detector.classify(returns) - # All periods should be SIDEWAYS since T < lookback_window - assert np.all(result.labels == MarketRegime.SIDEWAYS.value) diff --git a/src/factorminer/factorminer/tests/test_research.py b/src/factorminer/factorminer/tests/test_research.py deleted file mode 100644 index a404a4f..0000000 --- a/src/factorminer/factorminer/tests/test_research.py +++ /dev/null @@ -1,237 +0,0 @@ -"""Research-mode target, scoring, and model-suite coverage.""" - -from __future__ import annotations - -from pathlib import Path - -import numpy as np -import pandas as pd -import pytest - -from src.factorminer.factorminer.core.factor_library import Factor -from src.factorminer.factorminer.data.tensor_builder import TargetSpec, compute_targets -from src.factorminer.factorminer.evaluation.portfolio import PortfolioBacktester -from src.factorminer.factorminer.evaluation.research import ( - FactorGeometryDiagnostics, - build_score_vector, - passes_research_admission, - run_research_model_suite, -) -from src.factorminer.factorminer.evaluation.runtime import DatasetSplit, EvaluationDataset, evaluate_factors -from src.factorminer.factorminer.utils.config import load_config - - -def test_compute_targets_supports_multiple_horizons(): - df = pd.DataFrame( - { - "datetime": pd.date_range("2024-01-01", periods=5, freq="D").tolist() * 2, - "asset_id": ["A"] * 5 + ["B"] * 5, - "open": [10, 11, 12, 13, 14, 20, 21, 22, 23, 24], - "high": [11, 12, 13, 14, 15, 21, 22, 23, 24, 25], - "low": [9, 10, 11, 12, 13, 19, 20, 21, 22, 23], - "close": [10.5, 11.5, 12.5, 13.5, 14.5, 20.5, 21.5, 22.5, 23.5, 24.5], - "volume": [1] * 10, - "amount": [10] * 10, - } - ) - out = compute_targets( - df, - [ - TargetSpec("paper", 1, 1, "open_to_close", "simple"), - TargetSpec("h2_close_to_close", 0, 2, "close_to_close", "simple"), - ], - ) - - a0 = out[(out["asset_id"] == "A")].sort_values("datetime").reset_index(drop=True) - assert a0.loc[0, "target"] == pytest.approx(11.5 / 11.0 - 1.0) - assert a0.loc[0, "target_h2_close_to_close"] == pytest.approx(12.5 / 10.5 - 1.0) - - -def test_evaluate_factors_records_all_target_stats(small_data): - timestamps = np.array( - [np.datetime64("2024-01-01") + np.timedelta64(i, "D") for i in range(50)] - ) - returns = small_data["$returns"] - data_tensor = np.stack( - [ - small_data["$open"], - small_data["$high"], - small_data["$low"], - small_data["$close"], - small_data["$volume"], - small_data["$amt"], - small_data["$vwap"], - small_data["$returns"], - ], - axis=-1, - ) - alt_returns = -returns - splits = { - "train": DatasetSplit( - name="train", - indices=np.arange(25), - timestamps=timestamps[:25], - returns=returns[:, :25], - target_returns={"paper": returns[:, :25], "alt": alt_returns[:, :25]}, - default_target="paper", - ), - "test": DatasetSplit( - name="test", - indices=np.arange(25, 50), - timestamps=timestamps[25:], - returns=returns[:, 25:], - target_returns={"paper": returns[:, 25:], "alt": alt_returns[:, 25:]}, - default_target="paper", - ), - "full": DatasetSplit( - name="full", - indices=np.arange(50), - timestamps=timestamps, - returns=returns, - target_returns={"paper": returns, "alt": alt_returns}, - default_target="paper", - ), - } - dataset = EvaluationDataset( - data_dict=small_data, - data_tensor=data_tensor, - returns=returns, - timestamps=timestamps, - asset_ids=np.array([f"A{i:02d}" for i in range(returns.shape[0])]), - splits=splits, - processed_df=pd.DataFrame(), - target_panels={"paper": returns, "alt": alt_returns}, - default_target="paper", - ) - factor = Factor( - id=1, - name="close_neg", - formula="Neg($close)", - category="test", - ic_mean=0.0, - icir=0.0, - ic_win_rate=0.0, - max_correlation=0.0, - batch_number=1, - ) - - artifact = evaluate_factors([factor], dataset, signal_failure_policy="reject")[0] - - assert artifact.succeeded - assert set(artifact.target_stats["train"]) == {"paper", "alt"} - assert artifact.target_stats["train"]["paper"]["ic_mean"] == pytest.approx( - -artifact.target_stats["train"]["alt"]["ic_mean"] - ) - - -def test_research_score_vector_and_admission(): - cfg = load_config( - overrides={ - "benchmark": {"mode": "research"}, - "research": { - "enabled": True, - "horizon_weights": {"h1": 0.7, "h3": 0.3}, - }, - } - ) - score = build_score_vector( - target_stats={ - "h1": { - "ic_mean": 0.08, - "ic_abs_mean": 0.08, - "icir": 1.1, - "turnover": 0.2, - "ic_series": np.array([0.07, 0.08, 0.09, 0.08, 0.07]), - }, - "h3": { - "ic_mean": 0.05, - "ic_abs_mean": 0.05, - "icir": 0.8, - "turnover": 0.1, - "ic_series": np.array([0.03, 0.05, 0.06, 0.05, 0.04]), - }, - }, - target_horizons={"h1": 1, "h3": 3}, - research_cfg=cfg.research, - geometry=FactorGeometryDiagnostics( - max_abs_correlation=0.2, - mean_abs_correlation=0.1, - projection_loss=0.25, - marginal_span_gain=0.75, - effective_rank_gain=0.4, - residual_ic=0.06, - ), - ) - - assert score.primary_score > 0.0 - assert score.lower_confidence_bound >= 0.0 - admitted, reason = passes_research_admission( - score, - cfg.research, - correlation_threshold=0.5, - ) - assert admitted is True - assert "admission" in reason.lower() - - -def test_research_model_suite_reports_net_ir(): - cfg = load_config( - overrides={ - "benchmark": {"mode": "research"}, - "research": { - "enabled": True, - "selection": { - "models": ["ridge", "lasso"], - "rolling_train_window": 20, - "rolling_test_window": 10, - "rolling_step": 10, - }, - "regimes": {"enabled": False}, - "execution": {"cost_bps": 0.0}, - }, - } - ) - rng = np.random.default_rng(42) - t, n = 60, 8 - base = rng.normal(size=(t, n)) - factor_signals = { - 1: base, - 2: rng.normal(size=(t, n)), - 3: 0.5 * base + 0.1 * rng.normal(size=(t, n)), - } - returns = 0.03 * base + 0.01 * rng.normal(size=(t, n)) - - reports = run_research_model_suite(factor_signals, returns, cfg.research) - - assert "ridge" in reports - assert reports["ridge"]["available"] is True - assert "mean_test_net_ir" in reports["ridge"] - assert reports["ridge"]["selection_stability"] >= 0.0 - - -def test_portfolio_backtest_exposes_raw_series(): - backtester = PortfolioBacktester() - signal = np.array( - [ - [1.0, 0.5, -0.2, -1.0, 0.2], - [1.1, 0.2, -0.3, -0.8, 0.0], - [0.9, 0.1, -0.5, -1.1, 0.3], - [1.2, 0.4, -0.1, -0.9, 0.1], - [1.0, 0.3, -0.4, -1.2, 0.2], - ] - ) - returns = np.array( - [ - [0.03, 0.01, -0.01, -0.02, 0.00], - [0.02, 0.00, -0.01, -0.03, 0.01], - [0.01, 0.02, -0.02, -0.01, 0.00], - [0.03, 0.01, -0.01, -0.02, 0.00], - [0.02, 0.00, -0.03, -0.01, 0.01], - ] - ) - - stats = backtester.quintile_backtest(signal, returns, transaction_cost_bps=4.0) - - assert stats["ls_net_series"].shape[0] == signal.shape[0] - assert stats["turnover_series"].shape[0] == signal.shape[0] - assert stats["quintile_period_returns"].shape == (signal.shape[0], 5) diff --git a/src/factorminer/factorminer/tests/test_runtime_analysis.py b/src/factorminer/factorminer/tests/test_runtime_analysis.py deleted file mode 100644 index 440dfb8..0000000 --- a/src/factorminer/factorminer/tests/test_runtime_analysis.py +++ /dev/null @@ -1,196 +0,0 @@ -"""Unit tests for strict runtime recomputation helpers.""" - -from __future__ import annotations - -from pathlib import Path -from types import SimpleNamespace - -import numpy as np -import pandas as pd -import pytest - -from src.factorminer.factorminer.core.factor_library import Factor -from src.factorminer.factorminer.core.parser import try_parse -from src.factorminer.factorminer.evaluation.metrics import compute_factor_stats -from src.factorminer.factorminer.evaluation.runtime import ( - DatasetSplit, - EvaluationDataset, - SignalComputationError, - compute_tree_signals, - evaluate_factors, -) - - -def _build_dataset(data_dict: dict[str, np.ndarray]) -> EvaluationDataset: - timestamps = np.array( - [np.datetime64("2024-01-01") + np.timedelta64(i, "D") for i in range(50)] - ) - returns = data_dict["$returns"] - feature_order = [ - "$open", - "$high", - "$low", - "$close", - "$volume", - "$amt", - "$vwap", - "$returns", - ] - data_tensor = np.stack([data_dict[name] for name in feature_order], axis=-1) - - splits = { - "train": DatasetSplit( - name="train", - indices=np.arange(25), - timestamps=timestamps[:25], - returns=returns[:, :25], - ), - "test": DatasetSplit( - name="test", - indices=np.arange(25, 50), - timestamps=timestamps[25:], - returns=returns[:, 25:], - ), - "full": DatasetSplit( - name="full", - indices=np.arange(50), - timestamps=timestamps, - returns=returns, - ), - } - - return EvaluationDataset( - data_dict=data_dict, - data_tensor=data_tensor, - returns=returns, - timestamps=timestamps, - asset_ids=np.array([f"A{i:02d}" for i in range(returns.shape[0])]), - splits=splits, - processed_df=pd.DataFrame(), - ) - - -def test_evaluate_factors_matches_direct_metric_computation(small_data): - """Shared runtime evaluation should match direct metric recomputation.""" - dataset = _build_dataset(small_data) - factor = Factor( - id=1, - name="close_neg", - formula="Neg($close)", - category="test", - ic_mean=99.0, - icir=88.0, - ic_win_rate=0.99, - max_correlation=0.0, - batch_number=1, - ) - - artifact = evaluate_factors([factor], dataset, signal_failure_policy="reject")[0] - tree = try_parse(factor.formula) - signals = tree.evaluate(dataset.data_dict) - expected_train = compute_factor_stats(signals[:, :25], dataset.returns[:, :25]) - expected_test = compute_factor_stats(signals[:, 25:], dataset.returns[:, 25:]) - - assert artifact.succeeded - np.testing.assert_allclose( - artifact.split_stats["train"]["ic_series"], - expected_train["ic_series"], - equal_nan=True, - ) - np.testing.assert_allclose( - artifact.split_stats["test"]["ic_series"], - expected_test["ic_series"], - equal_nan=True, - ) - assert artifact.split_stats["train"]["ic_mean"] == pytest.approx( - expected_train["ic_mean"] - ) - assert artifact.split_stats["test"]["long_short"] == pytest.approx( - expected_test["long_short"] - ) - assert artifact.split_stats["train"]["turnover"] == pytest.approx( - expected_train["turnover"] - ) - - -def test_compute_tree_signals_obeys_failure_policy(): - """Signal failures should reject, synthesize, or raise explicitly.""" - tree = try_parse("Neg($close)") - returns_shape = (3, 7) - - with pytest.raises(SignalComputationError): - compute_tree_signals( - tree, - data_dict={}, - returns_shape=returns_shape, - signal_failure_policy="reject", - ) - - synthetic = compute_tree_signals( - tree, - data_dict={}, - returns_shape=returns_shape, - signal_failure_policy="synthetic", - ) - assert synthetic.shape == returns_shape - assert np.isfinite(synthetic).sum() > 0 - - with pytest.raises(Exception): - compute_tree_signals( - tree, - data_dict={}, - returns_shape=returns_shape, - signal_failure_policy="raise", - ) - - -def test_evaluate_factors_records_strict_recomputation_failure(small_data): - """Strict evaluation should record failures instead of hiding them.""" - dataset = _build_dataset(dict(small_data, **{"$close": np.full((10, 50), np.nan)})) - factor = Factor( - id=7, - name="broken_close", - formula="Neg($close)", - category="test", - ic_mean=0.0, - icir=0.0, - ic_win_rate=0.0, - max_correlation=0.0, - batch_number=1, - ) - - artifact = evaluate_factors([factor], dataset, signal_failure_policy="reject")[0] - - assert not artifact.succeeded - assert "Signal computation produced only NaN values" in artifact.error - - -def test_build_core_mining_config_uses_synthetic_policy_for_mock(): - """Mock mining flows should opt into synthetic fallback explicitly.""" - from factorminer.cli import _build_core_mining_config - - cfg = SimpleNamespace( - mining=SimpleNamespace( - target_library_size=10, - batch_size=5, - max_iterations=3, - ic_threshold=0.02, - icir_threshold=0.3, - correlation_threshold=0.7, - replacement_ic_min=0.10, - replacement_ic_ratio=1.3, - ), - evaluation=SimpleNamespace( - fast_screen_assets=10, - num_workers=1, - backend="numpy", - gpu_device="cuda:0", - signal_failure_policy="reject", - ), - ) - - strict_cfg = _build_core_mining_config(cfg, output_dir=Path("/tmp"), mock=False) - mock_cfg = _build_core_mining_config(cfg, output_dir=Path("/tmp"), mock=True) - - assert strict_cfg.signal_failure_policy == "reject" - assert mock_cfg.signal_failure_policy == "synthetic" diff --git a/src/factorminer/factorminer/tests/test_significance.py b/src/factorminer/factorminer/tests/test_significance.py deleted file mode 100644 index e4afd1d..0000000 --- a/src/factorminer/factorminer/tests/test_significance.py +++ /dev/null @@ -1,174 +0,0 @@ -"""Tests for statistical significance testing (evaluation/significance.py).""" - -from __future__ import annotations - -import numpy as np -import pytest - -from src.factorminer.factorminer.evaluation.significance import ( - BootstrapCIResult, - BootstrapICTester, - DeflatedSharpeCalculator, - DeflatedSharpeResult, - FDRController, - FDRResult, - SignificanceConfig, -) - - -@pytest.fixture -def config(): - return SignificanceConfig( - bootstrap_n_samples=500, - bootstrap_block_size=10, - bootstrap_confidence=0.95, - fdr_level=0.05, - seed=42, - ) - - -# ----------------------------------------------------------------------- -# BootstrapICTester: strong signal -> CI excludes zero -# ----------------------------------------------------------------------- - -def test_bootstrap_strong_signal_excludes_zero(config): - """A consistently high IC (0.10) should have CI that excludes zero.""" - T = 200 - ic_series = np.full(T, 0.10) + np.random.default_rng(42).normal(0, 0.01, T) - - tester = BootstrapICTester(config) - result = tester.compute_ci("strong_factor", ic_series) - - assert result.ci_excludes_zero is True - assert result.ci_lower > 0 - assert result.ic_mean > 0.08 - - -# ----------------------------------------------------------------------- -# BootstrapICTester: weak signal -> CI includes zero -# ----------------------------------------------------------------------- - -def test_bootstrap_weak_signal_includes_zero(config): - """A near-zero IC should have CI that includes zero.""" - T = 200 - rng = np.random.default_rng(123) - ic_series = rng.normal(0.0, 0.05, T) # mean ~0 - - tester = BootstrapICTester(config) - result = tester.compute_ci("weak_factor", ic_series) - - # The CI for |IC| may or may not include zero depending on noise, - # but the result should be a valid BootstrapCIResult - assert isinstance(result, BootstrapCIResult) - assert result.ci_lower <= result.ci_upper - - -def test_bootstrap_p_value_distinguishes_signal_from_noise(config): - """The sign-flip p-value should be small for signal and large for noise.""" - rng = np.random.default_rng(7) - strong_ic = 0.08 + rng.normal(0.0, 0.01, 200) - weak_ic = rng.normal(0.0, 0.05, 200) - - tester = BootstrapICTester(config) - - strong_p = tester.compute_p_value(strong_ic) - weak_p = tester.compute_p_value(weak_ic) - - assert strong_p < 0.05 - assert weak_p > 0.05 - - -# ----------------------------------------------------------------------- -# FDRController: BH procedure -# ----------------------------------------------------------------------- - -def test_fdr_batch_evaluate_separates_signal_from_noise(config): - """Batch FDR should keep the strong series and reject the weak one.""" - strong_ic = np.full(200, 0.08) - weak_ic = np.tile(np.array([0.05, -0.05]), 100) - - tester = BootstrapICTester(config) - controller = FDRController(config) - result = controller.batch_evaluate( - {"strong_factor": strong_ic, "weak_factor": weak_ic}, - tester, - ) - - assert result.significant["strong_factor"] - assert not result.significant["weak_factor"] - assert result.n_discoveries == 1 - -def test_fdr_bh_procedure(config): - """10 factors with p-values [0.001, ..., 0.010] at FDR=0.05.""" - # Use small enough p-values that BH adjustment still yields significance - p_values = {f"f{i}": 0.001 * (i + 1) for i in range(10)} - controller = FDRController(config) - result = controller.apply_fdr(p_values) - - assert isinstance(result, FDRResult) - assert result.fdr_level == 0.05 - # With BH at 0.05 and raw p in [0.001..0.010], adjusted p for f0 = 0.001*10/1 = 0.01 < 0.05 - assert result.n_discoveries >= 1 - assert result.significant["f0"] == True # p=0.001, adjusted=0.01 - - -def test_fdr_all_significant(config): - """All p=0.001 should be significant after BH.""" - p_values = {f"f{i}": 0.001 for i in range(10)} - controller = FDRController(config) - result = controller.apply_fdr(p_values) - - assert result.n_discoveries == 10 - for name, sig in result.significant.items(): - assert sig == True - - -def test_fdr_empty_input(config): - """Empty p-value dict should return empty result.""" - controller = FDRController(config) - result = controller.apply_fdr({}) - assert result.n_discoveries == 0 - assert result.significant == {} - - -# ----------------------------------------------------------------------- -# DeflatedSharpeCalculator -# ----------------------------------------------------------------------- - -def test_deflated_sharpe_with_known_returns(config): - """Verify DSR computation with known returns and n_trials.""" - rng = np.random.default_rng(42) - T = 500 - # Strong positive returns - ls_returns = rng.normal(0.001, 0.01, T) - - calc = DeflatedSharpeCalculator(config) - result = calc.compute("good_factor", ls_returns, n_trials=10) - - assert isinstance(result, DeflatedSharpeResult) - assert result.raw_sharpe > 0 - assert result.n_trials == 10 - assert result.haircut >= 0 or result.haircut < 0 # can be negative - - -def test_deflated_sharpe_many_trials_penalizes(config): - """More trials should increase the haircut (higher expected max SR).""" - rng = np.random.default_rng(42) - T = 500 - ls_returns = rng.normal(0.001, 0.01, T) - - calc = DeflatedSharpeCalculator(config) - result_few = calc.compute("factor", ls_returns, n_trials=5) - result_many = calc.compute("factor", ls_returns, n_trials=500) - - # With more trials, the deflated SR should be lower - assert result_many.deflated_sharpe <= result_few.deflated_sharpe - - -def test_deflated_sharpe_short_series(config): - """Very short series (<10) should return default failing result.""" - ls_returns = np.array([0.01, 0.02, 0.01]) - calc = DeflatedSharpeCalculator(config) - result = calc.compute("short", ls_returns, n_trials=10) - assert result.passes is False - assert result.raw_sharpe == 0.0 diff --git a/src/factorminer/factorminer/utils/__init__.py b/src/factorminer/factorminer/utils/__init__.py deleted file mode 100644 index f4982d8..0000000 --- a/src/factorminer/factorminer/utils/__init__.py +++ /dev/null @@ -1,26 +0,0 @@ -"""Utility modules for FactorMiner.""" - -from src.factorminer.factorminer.utils.config import ( - AutoInventorConfig, - CapacityConfig, - CausalConfig, - Config, - DebateConfig, - HelixConfig, - MiningConfig, - Phase2Config, - RegimeConfig, - SignificanceConfig, - load_config, -) -from src.factorminer.factorminer.utils.reporting import MiningReporter -from src.factorminer.factorminer.utils.tearsheet import FactorTearSheet -from src.factorminer.factorminer.utils.visualization import ( - plot_ablation_comparison, - plot_correlation_heatmap, - plot_cost_pressure, - plot_efficiency_benchmark, - plot_ic_timeseries, - plot_mining_funnel, - plot_quintile_returns, -) diff --git a/src/factorminer/factorminer/utils/config.py b/src/factorminer/factorminer/utils/config.py deleted file mode 100644 index c0c8640..0000000 --- a/src/factorminer/factorminer/utils/config.py +++ /dev/null @@ -1,741 +0,0 @@ -"""Configuration loading, validation, and management for FactorMiner.""" - -from __future__ import annotations - -import copy -from dataclasses import dataclass, field, asdict -from pathlib import Path -from typing import Any - -import yaml - -from src.factorminer.factorminer.configs import DEFAULT_CONFIG_PATH - - -@dataclass -class MiningConfig: - """Parameters controlling the factor mining loop.""" - - target_library_size: int = 110 - batch_size: int = 40 - max_iterations: int = 200 - ic_threshold: float = 0.04 - icir_threshold: float = 0.5 - correlation_threshold: float = 0.5 - replacement_ic_min: float = 0.10 - replacement_ic_ratio: float = 1.3 - - def validate(self) -> None: - if self.target_library_size < 1: - raise ValueError("target_library_size must be >= 1") - if self.batch_size < 1: - raise ValueError("batch_size must be >= 1") - if self.max_iterations < 1: - raise ValueError("max_iterations must be >= 1") - if not (0.0 < self.ic_threshold < 1.0): - raise ValueError("ic_threshold must be in (0, 1)") - if not (0.0 < self.icir_threshold < 10.0): - raise ValueError("icir_threshold must be in (0, 10)") - if not (0.0 < self.correlation_threshold <= 1.0): - raise ValueError("correlation_threshold must be in (0, 1]") - if self.replacement_ic_min <= self.ic_threshold: - raise ValueError("replacement_ic_min must be > ic_threshold") - if self.replacement_ic_ratio < 1.0: - raise ValueError("replacement_ic_ratio must be >= 1.0") - - -@dataclass -class EvaluationConfig: - """Parameters for factor evaluation.""" - - num_workers: int = 40 - fast_screen_assets: int = 100 - gpu_device: str = "cuda:0" - backend: str = "gpu" - signal_failure_policy: str = "reject" - - def validate(self) -> None: - if self.num_workers < 1: - raise ValueError("num_workers must be >= 1") - if self.fast_screen_assets < 1: - raise ValueError("fast_screen_assets must be >= 1") - if self.backend not in ("gpu", "numpy", "c"): - raise ValueError(f"backend must be one of: gpu, numpy, c (got '{self.backend}')") - if self.signal_failure_policy not in ("reject", "synthetic", "raise"): - raise ValueError( - "signal_failure_policy must be one of: reject, synthetic, raise" - ) - - -@dataclass -class DataConfig: - """Parameters for data loading and universes.""" - - market: str = "a_shares" - universe: str = "CSI500" - frequency: str = "10min" - features: list[str] = field( - default_factory=lambda: [ - "$open", "$high", "$low", "$close", - "$volume", "$amt", "$vwap", "$returns", - ] - ) - train_period: list[str] = field( - default_factory=lambda: ["2024-01-01", "2024-12-31"] - ) - test_period: list[str] = field( - default_factory=lambda: ["2025-01-01", "2025-12-31"] - ) - targets: list[dict[str, Any]] = field( - default_factory=lambda: [ - { - "name": "paper", - "entry_delay_bars": 1, - "holding_bars": 1, - "price_pair": "open_to_close", - "return_transform": "simple", - } - ] - ) - default_target: str = "paper" - - def validate(self) -> None: - if len(self.train_period) != 2: - raise ValueError("train_period must be a list of [start, end]") - if len(self.test_period) != 2: - raise ValueError("test_period must be a list of [start, end]") - if self.train_period[0] >= self.train_period[1]: - raise ValueError("train_period start must be before end") - if self.test_period[0] >= self.test_period[1]: - raise ValueError("test_period start must be before end") - if not self.features: - raise ValueError("features must not be empty") - if not self.targets: - raise ValueError("data.targets must not be empty") - target_names: list[str] = [] - for target in self.targets: - if not isinstance(target, dict): - raise ValueError("each data.targets entry must be a mapping") - name = str(target.get("name", "")).strip() - if not name: - raise ValueError("each data.targets entry must define a non-empty name") - target_names.append(name) - if int(target.get("entry_delay_bars", 0)) < 0: - raise ValueError("entry_delay_bars must be >= 0") - if int(target.get("holding_bars", 0)) < 0: - raise ValueError("holding_bars must be >= 0") - if target.get("price_pair") not in ( - "open_to_close", - "close_to_close", - "open_to_open", - "close_to_open", - ): - raise ValueError( - "price_pair must be one of: open_to_close, close_to_close, " - "open_to_open, close_to_open" - ) - if target.get("return_transform", "simple") not in ("simple", "log"): - raise ValueError("return_transform must be one of: simple, log") - if len(set(target_names)) != len(target_names): - raise ValueError("data.targets names must be unique") - if self.default_target not in set(target_names): - raise ValueError("data.default_target must match one of data.targets[*].name") - - -@dataclass -class LLMConfig: - """Parameters for LLM-based factor generation.""" - - provider: str = "google" - model: str = "gemini-2.0-flash" - temperature: float = 0.8 - max_tokens: int = 4096 - batch_candidates: int = 40 - - def validate(self) -> None: - if self.provider not in ("google", "openai", "anthropic", "mock"): - raise ValueError( - f"provider must be one of: google, openai, anthropic, mock " - f"(got '{self.provider}')" - ) - if not (0.0 <= self.temperature <= 2.0): - raise ValueError("temperature must be in [0, 2]") - if self.max_tokens < 1: - raise ValueError("max_tokens must be >= 1") - if self.batch_candidates < 1: - raise ValueError("batch_candidates must be >= 1") - - -@dataclass -class MemoryConfig: - """Parameters for the experience memory system.""" - - max_success_patterns: int = 50 - max_failure_patterns: int = 100 - max_insights: int = 30 - consolidation_interval: int = 10 - - def validate(self) -> None: - if self.max_success_patterns < 1: - raise ValueError("max_success_patterns must be >= 1") - if self.max_failure_patterns < 1: - raise ValueError("max_failure_patterns must be >= 1") - if self.max_insights < 1: - raise ValueError("max_insights must be >= 1") - if self.consolidation_interval < 1: - raise ValueError("consolidation_interval must be >= 1") - - -@dataclass -class CausalConfig: - """Parameters for causal validation (Granger + intervention tests).""" - - enabled: bool = False - granger_max_lag: int = 5 - granger_significance: float = 0.05 - n_interventions: int = 3 - intervention_magnitude: float = 2.0 - intervention_ic_threshold: float = 0.5 - robustness_threshold: float = 0.4 - granger_weight: float = 0.4 - intervention_weight: float = 0.6 - - def validate(self) -> None: - if self.granger_max_lag < 1: - raise ValueError("granger_max_lag must be >= 1") - if not (0.0 < self.granger_significance < 1.0): - raise ValueError("granger_significance must be in (0, 1)") - if self.n_interventions < 1: - raise ValueError("n_interventions must be >= 1") - if self.intervention_magnitude <= 0.0: - raise ValueError("intervention_magnitude must be > 0") - if not (0.0 <= self.intervention_ic_threshold <= 1.0): - raise ValueError("intervention_ic_threshold must be in [0, 1]") - if not (0.0 <= self.robustness_threshold <= 1.0): - raise ValueError("robustness_threshold must be in [0, 1]") - if not (0.0 <= self.granger_weight <= 1.0): - raise ValueError("granger_weight must be in [0, 1]") - if not (0.0 <= self.intervention_weight <= 1.0): - raise ValueError("intervention_weight must be in [0, 1]") - if abs(self.granger_weight + self.intervention_weight - 1.0) > 1e-6: - raise ValueError("granger_weight + intervention_weight must equal 1.0") - - -@dataclass -class RegimeConfig: - """Parameters for regime-conditional factor evaluation.""" - - enabled: bool = False - lookback_window: int = 60 - bull_return_threshold: float = 0.0 - bear_return_threshold: float = 0.0 - volatility_percentile: float = 0.7 - min_regime_ic: float = 0.03 - min_regimes_passing: int = 2 - - def validate(self) -> None: - if self.lookback_window < 5: - raise ValueError("lookback_window must be >= 5") - if not (0.0 < self.volatility_percentile < 1.0): - raise ValueError("volatility_percentile must be in (0, 1)") - if self.min_regime_ic < 0.0: - raise ValueError("min_regime_ic must be >= 0") - if not (1 <= self.min_regimes_passing <= 4): - raise ValueError("min_regimes_passing must be in [1, 4]") - - -@dataclass -class CapacityConfig: - """Parameters for strategy capacity estimation.""" - - enabled: bool = False - base_capital_usd: float = 1e8 - ic_degradation_limit: float = 0.20 - net_icir_threshold: float = 0.3 - sigma_annual: float = 0.25 - - def validate(self) -> None: - if self.base_capital_usd <= 0.0: - raise ValueError("base_capital_usd must be > 0") - if not (0.0 < self.ic_degradation_limit < 1.0): - raise ValueError("ic_degradation_limit must be in (0, 1)") - if self.net_icir_threshold < 0.0: - raise ValueError("net_icir_threshold must be >= 0") - if self.sigma_annual <= 0.0: - raise ValueError("sigma_annual must be > 0") - - -@dataclass -class SignificanceConfig: - """Parameters for statistical significance testing.""" - - enabled: bool = False - bootstrap_n_samples: int = 1000 - bootstrap_block_size: int = 20 - fdr_level: float = 0.05 - deflated_sharpe_enabled: bool = True - min_deflated_sharpe: float = 0.0 - - def validate(self) -> None: - if self.bootstrap_n_samples < 100: - raise ValueError("bootstrap_n_samples must be >= 100") - if self.bootstrap_block_size < 1: - raise ValueError("bootstrap_block_size must be >= 1") - if not (0.0 < self.fdr_level < 1.0): - raise ValueError("fdr_level must be in (0, 1)") - - -@dataclass -class DebateConfig: - """Parameters for multi-specialist debate-based generation.""" - - enabled: bool = False - num_specialists: int = 3 - candidates_per_specialist: int = 15 - enable_critic: bool = True - top_k_after_critic: int = 40 - critic_temperature: float = 0.3 - - def validate(self) -> None: - if self.num_specialists < 1: - raise ValueError("num_specialists must be >= 1") - if self.candidates_per_specialist < 1: - raise ValueError("candidates_per_specialist must be >= 1") - if self.top_k_after_critic < 1: - raise ValueError("top_k_after_critic must be >= 1") - if not (0.0 <= self.critic_temperature <= 2.0): - raise ValueError("critic_temperature must be in [0, 2]") - - -@dataclass -class AutoInventorConfig: - """Parameters for automatic operator invention.""" - - enabled: bool = False - invention_interval: int = 10 - max_proposals_per_round: int = 5 - min_ic_contribution: float = 0.03 - store_dir: str = "./output/custom_operators" - - def validate(self) -> None: - if self.invention_interval < 1: - raise ValueError("invention_interval must be >= 1") - if self.max_proposals_per_round < 1: - raise ValueError("max_proposals_per_round must be >= 1") - if self.min_ic_contribution < 0.0: - raise ValueError("min_ic_contribution must be >= 0") - - -@dataclass -class HelixConfig: - """Parameters for the Helix knowledge and memory system.""" - - enabled: bool = False - enable_knowledge_graph: bool = False - enable_embeddings: bool = False - enable_canonicalization: bool = True - forgetting_lambda: float = 0.95 - forgetting_demotion_threshold: int = 20 - - def validate(self) -> None: - if not (0.0 < self.forgetting_lambda <= 1.0): - raise ValueError("forgetting_lambda must be in (0, 1]") - if self.forgetting_demotion_threshold < 1: - raise ValueError("forgetting_demotion_threshold must be >= 1") - - -@dataclass -class Phase2Config: - """Aggregated configuration for all Phase 2 subsystems.""" - - causal: CausalConfig = field(default_factory=CausalConfig) - regime: RegimeConfig = field(default_factory=RegimeConfig) - capacity: CapacityConfig = field(default_factory=CapacityConfig) - significance: SignificanceConfig = field(default_factory=SignificanceConfig) - debate: DebateConfig = field(default_factory=DebateConfig) - auto_inventor: AutoInventorConfig = field(default_factory=AutoInventorConfig) - helix: HelixConfig = field(default_factory=HelixConfig) - - def validate(self) -> None: - for sub in [ - self.causal, - self.regime, - self.capacity, - self.significance, - self.debate, - self.auto_inventor, - self.helix, - ]: - sub.validate() - - -@dataclass -class BenchmarkConfig: - """Parameters for paper/research benchmark execution.""" - - mode: str = "paper" - seed: int = 42 - freeze_top_k: int = 40 - freeze_universe: str = "CSI500" - report_universes: list[str] = field( - default_factory=lambda: ["CSI500", "CSI1000", "HS300", "Binance"] - ) - baselines: list[str] = field( - default_factory=lambda: [ - "alpha101_classic", - "alpha101_adapted", - "random_exploration", - "gplearn", - "alphaforge_style", - "alphaagent_style", - "factor_miner", - "factor_miner_no_memory", - ] - ) - cost_bps: list[float] = field(default_factory=lambda: [1.0, 4.0, 7.0, 10.0, 11.0]) - efficiency_panel_shape: list[int] = field(default_factory=lambda: [12610, 500]) - - def validate(self) -> None: - if self.mode not in ("paper", "research"): - raise ValueError("benchmark.mode must be one of: paper, research") - if self.freeze_top_k < 1: - raise ValueError("benchmark.freeze_top_k must be >= 1") - if not self.freeze_universe: - raise ValueError("benchmark.freeze_universe must not be empty") - if not self.report_universes: - raise ValueError("benchmark.report_universes must not be empty") - if any(not universe for universe in self.report_universes): - raise ValueError("benchmark.report_universes must not contain empty entries") - if not self.baselines: - raise ValueError("benchmark.baselines must not be empty") - if any(cost < 0 for cost in self.cost_bps): - raise ValueError("benchmark.cost_bps must be non-negative") - if len(self.efficiency_panel_shape) != 2: - raise ValueError("benchmark.efficiency_panel_shape must be [periods, assets]") - if any(dim < 1 for dim in self.efficiency_panel_shape): - raise ValueError("benchmark.efficiency_panel_shape values must be >= 1") - - -@dataclass -class ResearchUncertaintyConfig: - """Uncertainty controls for multi-horizon research scoring.""" - - bootstrap_samples: int = 200 - block_size: int = 20 - shrinkage_strength: float = 1.0 - lcb_zscore: float = 1.0 - fdr_alpha: float = 0.05 - - def validate(self) -> None: - if self.bootstrap_samples < 10: - raise ValueError("research.uncertainty.bootstrap_samples must be >= 10") - if self.block_size < 1: - raise ValueError("research.uncertainty.block_size must be >= 1") - if self.shrinkage_strength < 0.0: - raise ValueError("research.uncertainty.shrinkage_strength must be >= 0") - if self.lcb_zscore < 0.0: - raise ValueError("research.uncertainty.lcb_zscore must be >= 0") - if not (0.0 < self.fdr_alpha < 1.0): - raise ValueError("research.uncertainty.fdr_alpha must be in (0, 1)") - - -@dataclass -class ResearchAdmissionConfig: - """Research-mode admission controls.""" - - use_residual_ic: bool = True - use_effective_rank_gain: bool = True - turnover_penalty: float = 0.05 - redundancy_penalty: float = 0.20 - min_score: float = 0.04 - min_lcb: float = 0.0 - min_span_gain: float = 0.05 - min_effective_rank_gain: float = 0.0 - - def validate(self) -> None: - if self.turnover_penalty < 0.0: - raise ValueError("research.admission.turnover_penalty must be >= 0") - if self.redundancy_penalty < 0.0: - raise ValueError("research.admission.redundancy_penalty must be >= 0") - if self.min_score < 0.0: - raise ValueError("research.admission.min_score must be >= 0") - if self.min_span_gain < 0.0: - raise ValueError("research.admission.min_span_gain must be >= 0") - - -@dataclass -class ResearchSelectionConfig: - """Research-mode model configuration.""" - - models: list[str] = field( - default_factory=lambda: ["ridge", "elastic_net", "lasso", "xgboost"] - ) - rolling_train_window: int = 80 - rolling_test_window: int = 20 - rolling_step: int = 20 - - def validate(self) -> None: - allowed = { - "ridge", - "elastic_net", - "lasso", - "stepwise", - "xgboost", - } - if not self.models: - raise ValueError("research.selection.models must not be empty") - if any(model not in allowed for model in self.models): - raise ValueError( - "research.selection.models entries must be one of: " - "ridge, elastic_net, lasso, stepwise, xgboost" - ) - if self.rolling_train_window < 5: - raise ValueError("research.selection.rolling_train_window must be >= 5") - if self.rolling_test_window < 1: - raise ValueError("research.selection.rolling_test_window must be >= 1") - if self.rolling_step < 1: - raise ValueError("research.selection.rolling_step must be >= 1") - - -@dataclass -class ResearchRegimesConfig: - """Research-mode regime diagnostics.""" - - enabled: bool = False - definition: str = "return_volatility_liquidity" - - def validate(self) -> None: - if self.definition not in ( - "return_volatility", - "return_volatility_liquidity", - ): - raise ValueError( - "research.regimes.definition must be one of: " - "return_volatility, return_volatility_liquidity" - ) - - -@dataclass -class ResearchExecutionConfig: - """Execution-aware research scoring controls.""" - - cost_model: str = "linear_bps" - cost_bps: float = 4.0 - - def validate(self) -> None: - if self.cost_model not in ("linear_bps",): - raise ValueError("research.execution.cost_model must be 'linear_bps'") - if self.cost_bps < 0.0: - raise ValueError("research.execution.cost_bps must be >= 0") - - -@dataclass -class ResearchConfig: - """Research-first multi-horizon scoring configuration.""" - - enabled: bool = False - primary_objective: str = "weighted_multi_horizon" - target_aggregation: str = "weighted" - horizon_weights: dict[str, float] = field(default_factory=dict) - uncertainty: ResearchUncertaintyConfig = field(default_factory=ResearchUncertaintyConfig) - admission: ResearchAdmissionConfig = field(default_factory=ResearchAdmissionConfig) - selection: ResearchSelectionConfig = field(default_factory=ResearchSelectionConfig) - regimes: ResearchRegimesConfig = field(default_factory=ResearchRegimesConfig) - execution: ResearchExecutionConfig = field(default_factory=ResearchExecutionConfig) - - def validate(self) -> None: - if self.primary_objective not in ( - "single_horizon", - "weighted_multi_horizon", - "pareto_multi_horizon", - "net_ir", - ): - raise ValueError( - "research.primary_objective must be one of: " - "single_horizon, weighted_multi_horizon, pareto_multi_horizon, net_ir" - ) - if self.target_aggregation not in ("weighted", "pareto"): - raise ValueError( - "research.target_aggregation must be one of: weighted, pareto" - ) - if any(weight < 0.0 for weight in self.horizon_weights.values()): - raise ValueError("research.horizon_weights values must be >= 0") - self.uncertainty.validate() - self.admission.validate() - self.selection.validate() - self.regimes.validate() - self.execution.validate() - - -@dataclass -class Config: - """Top-level configuration aggregating all sub-configs.""" - - mining: MiningConfig = field(default_factory=MiningConfig) - evaluation: EvaluationConfig = field(default_factory=EvaluationConfig) - data: DataConfig = field(default_factory=DataConfig) - llm: LLMConfig = field(default_factory=LLMConfig) - memory: MemoryConfig = field(default_factory=MemoryConfig) - phase2: Phase2Config = field(default_factory=Phase2Config) - benchmark: BenchmarkConfig = field(default_factory=BenchmarkConfig) - research: ResearchConfig = field(default_factory=ResearchConfig) - - def validate(self) -> None: - """Validate all sub-configurations.""" - self.mining.validate() - self.evaluation.validate() - self.data.validate() - self.llm.validate() - self.memory.validate() - self.phase2.validate() - self.benchmark.validate() - self.research.validate() - - def to_dict(self) -> dict[str, Any]: - """Serialize config to a plain dictionary.""" - return asdict(self) - - def save(self, path: str | Path) -> None: - """Write config to a YAML file.""" - path = Path(path) - path.parent.mkdir(parents=True, exist_ok=True) - with open(path, "w") as f: - yaml.dump(self.to_dict(), f, default_flow_style=False, sort_keys=False) - - -_SECTION_MAP: dict[str, type] = { - "mining": MiningConfig, - "evaluation": EvaluationConfig, - "data": DataConfig, - "llm": LLMConfig, - "memory": MemoryConfig, - "phase2": Phase2Config, - "benchmark": BenchmarkConfig, - "research": ResearchConfig, -} - -_PHASE2_SECTION_MAP: dict[str, type] = { - "causal": CausalConfig, - "regime": RegimeConfig, - "capacity": CapacityConfig, - "significance": SignificanceConfig, - "debate": DebateConfig, - "auto_inventor": AutoInventorConfig, - "helix": HelixConfig, -} - -_RESEARCH_SECTION_MAP: dict[str, type] = { - "uncertainty": ResearchUncertaintyConfig, - "admission": ResearchAdmissionConfig, - "selection": ResearchSelectionConfig, - "regimes": ResearchRegimesConfig, - "execution": ResearchExecutionConfig, -} - - -def _deep_merge(base: dict, override: dict) -> dict: - """Recursively merge override into base, returning a new dict.""" - result = copy.deepcopy(base) - for key, value in override.items(): - if key in result and isinstance(result[key], dict) and isinstance(value, dict): - result[key] = _deep_merge(result[key], value) - else: - result[key] = copy.deepcopy(value) - return result - - -def _load_yaml(path: Path) -> dict[str, Any]: - """Load a YAML file and return its contents as a dict.""" - with open(path) as f: - data = yaml.safe_load(f) - if data is None: - return {} - if not isinstance(data, dict): - raise ValueError(f"Config file {path} must contain a YAML mapping at the top level") - return data - - -def _build_section(section_cls: type, raw: dict[str, Any]) -> Any: - """Instantiate a config dataclass, ignoring unknown keys.""" - valid_fields = {f.name for f in section_cls.__dataclass_fields__.values()} - filtered = {k: v for k, v in raw.items() if k in valid_fields} - return section_cls(**filtered) - - -def _build_phase2(raw: dict[str, Any]) -> Phase2Config: - """Build Phase2Config with nested sub-config dataclasses.""" - subs = {} - for sub_name, sub_cls in _PHASE2_SECTION_MAP.items(): - sub_raw = raw.get(sub_name, {}) - if isinstance(sub_raw, dict): - subs[sub_name] = _build_section(sub_cls, sub_raw) - else: - subs[sub_name] = sub_cls() - return Phase2Config(**subs) - - -def _build_research(raw: dict[str, Any]) -> ResearchConfig: - """Build ResearchConfig with nested sub-config dataclasses.""" - subs = {} - valid_fields = {f.name for f in ResearchConfig.__dataclass_fields__.values()} - for key, value in raw.items(): - if key in valid_fields and key not in _RESEARCH_SECTION_MAP: - subs[key] = copy.deepcopy(value) - - for sub_name, sub_cls in _RESEARCH_SECTION_MAP.items(): - sub_raw = raw.get(sub_name, {}) - if isinstance(sub_raw, dict): - subs[sub_name] = _build_section(sub_cls, sub_raw) - else: - subs[sub_name] = sub_cls() - - return ResearchConfig(**subs) - - -def load_config( - config_path: str | Path | None = None, - overrides: dict[str, Any] | None = None, -) -> Config: - """Load configuration from YAML with defaults and optional overrides. - - Resolution order: - 1. Built-in defaults (default.yaml shipped with the package) - 2. User-provided config file (if given) - 3. Programmatic overrides dict (if given) - - Args: - config_path: Path to a user YAML config file. If None, only defaults are used. - overrides: Dict of overrides keyed by section, e.g. - {"mining": {"batch_size": 20}, "llm": {"model": "gpt-4"}}. - - Returns: - A fully validated Config instance. - """ - # 1. Load package defaults - defaults = _load_yaml(DEFAULT_CONFIG_PATH) - - # 2. Merge user config - if config_path is not None: - user_cfg = _load_yaml(Path(config_path)) - merged = _deep_merge(defaults, user_cfg) - else: - merged = defaults - - # 3. Merge programmatic overrides - if overrides: - merged = _deep_merge(merged, overrides) - - # 4. Build typed config objects - sections = {} - for section_name, section_cls in _SECTION_MAP.items(): - raw = merged.get(section_name, {}) - if section_name == "phase2": - sections[section_name] = _build_phase2(raw) - elif section_name == "research": - sections[section_name] = _build_research(raw) - else: - sections[section_name] = _build_section(section_cls, raw) - - config = Config(**sections) - config.validate() - return config diff --git a/src/factorminer/factorminer/utils/logging.py b/src/factorminer/factorminer/utils/logging.py deleted file mode 100644 index 4c13ea1..0000000 --- a/src/factorminer/factorminer/utils/logging.py +++ /dev/null @@ -1,297 +0,0 @@ -"""Structured logging system for FactorMiner mining sessions.""" - -from __future__ import annotations - -import json -import logging -import sys -import time -from dataclasses import dataclass, field, asdict -from pathlib import Path -from typing import Any, TextIO - -from tqdm import tqdm - - -# --------------------------------------------------------------------------- -# Structured data records -# --------------------------------------------------------------------------- - -@dataclass -class FactorRecord: - """Log record for a single evaluated factor candidate.""" - - expression: str - ic: float | None = None - icir: float | None = None - max_correlation: float | None = None - admitted: bool = False - rejection_reason: str | None = None - replaced_factor: str | None = None - timestamp: float = field(default_factory=time.time) - - def to_dict(self) -> dict[str, Any]: - return {k: v for k, v in asdict(self).items() if v is not None} - - -@dataclass -class IterationRecord: - """Aggregated stats for a single mining iteration (batch).""" - - iteration: int - candidates_generated: int = 0 - ic_passed: int = 0 - correlation_passed: int = 0 - admitted: int = 0 - rejected: int = 0 - replaced: int = 0 - library_size: int = 0 - best_ic: float = 0.0 - mean_ic: float = 0.0 - elapsed_seconds: float = 0.0 - timestamp: float = field(default_factory=time.time) - - @property - def yield_rate(self) -> float: - """Fraction of candidates that were admitted to the library.""" - if self.candidates_generated == 0: - return 0.0 - return self.admitted / self.candidates_generated - - def to_dict(self) -> dict[str, Any]: - d = asdict(self) - d["yield_rate"] = self.yield_rate - return d - - -# --------------------------------------------------------------------------- -# JSON log exporter -# --------------------------------------------------------------------------- - -class JSONLogExporter: - """Collects structured records and exports them to a JSON file.""" - - def __init__(self) -> None: - self.iterations: list[dict[str, Any]] = [] - self.factors: list[dict[str, Any]] = [] - - def add_iteration(self, record: IterationRecord) -> None: - self.iterations.append(record.to_dict()) - - def add_factor(self, record: FactorRecord) -> None: - self.factors.append(record.to_dict()) - - def export(self, path: str | Path) -> None: - path = Path(path) - path.parent.mkdir(parents=True, exist_ok=True) - payload = { - "iterations": self.iterations, - "factors": self.factors, - "summary": self._summary(), - } - with open(path, "w") as f: - json.dump(payload, f, indent=2, default=str) - - def _summary(self) -> dict[str, Any]: - if not self.iterations: - return {} - total_candidates = sum(it["candidates_generated"] for it in self.iterations) - total_admitted = sum(it["admitted"] for it in self.iterations) - return { - "total_iterations": len(self.iterations), - "total_candidates": total_candidates, - "total_admitted": total_admitted, - "overall_yield_rate": total_admitted / total_candidates if total_candidates else 0.0, - "final_library_size": self.iterations[-1].get("library_size", 0), - } - - -# --------------------------------------------------------------------------- -# Console formatter -# --------------------------------------------------------------------------- - -class _ConsoleFormatter(logging.Formatter): - """Compact colored formatter for terminal output.""" - - GREY = "\033[90m" - GREEN = "\033[92m" - YELLOW = "\033[93m" - RED = "\033[91m" - BOLD = "\033[1m" - RESET = "\033[0m" - - LEVEL_COLORS = { - logging.DEBUG: GREY, - logging.INFO: GREEN, - logging.WARNING: YELLOW, - logging.ERROR: RED, - logging.CRITICAL: RED + BOLD, - } - - def format(self, record: logging.LogRecord) -> str: - color = self.LEVEL_COLORS.get(record.levelno, self.RESET) - level = record.levelname[0] # Single-char level - ts = time.strftime("%H:%M:%S", time.localtime(record.created)) - return f"{self.GREY}{ts}{self.RESET} {color}{level}{self.RESET} {record.getMessage()}" - - -def setup_logger( - name: str = "factorminer", - level: int = logging.INFO, - log_file: str | Path | None = None, - stream: TextIO = sys.stderr, -) -> logging.Logger: - """Create and configure a FactorMiner logger. - - Args: - name: Logger name. - level: Logging level. - log_file: Optional path for a plain-text log file. - stream: Stream for console output (default stderr). - - Returns: - Configured logger instance. - """ - logger = logging.getLogger(name) - logger.setLevel(level) - logger.handlers.clear() - - # Console handler with colors - console = logging.StreamHandler(stream) - console.setFormatter(_ConsoleFormatter()) - logger.addHandler(console) - - # Optional file handler - if log_file is not None: - log_path = Path(log_file) - log_path.parent.mkdir(parents=True, exist_ok=True) - fh = logging.FileHandler(log_path) - fh.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s")) - logger.addHandler(fh) - - return logger - - -# --------------------------------------------------------------------------- -# Mining session logger (high-level helper) -# --------------------------------------------------------------------------- - -class MiningSessionLogger: - """High-level logger for an entire mining session. - - Combines structured JSON export with pretty console output. - """ - - def __init__( - self, - output_dir: str | Path, - verbose: bool = False, - ) -> None: - self.output_dir = Path(output_dir) - self.output_dir.mkdir(parents=True, exist_ok=True) - - level = logging.DEBUG if verbose else logging.INFO - self.logger = setup_logger( - level=level, - log_file=self.output_dir / "mining.log", - ) - self.exporter = JSONLogExporter() - self._progress: tqdm | None = None - - # -- Progress bar --------------------------------------------------- - - def start_progress(self, total_iterations: int) -> None: - self._progress = tqdm( - total=total_iterations, - desc="Mining", - unit="iter", - bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]", - ) - - def advance_progress(self) -> None: - if self._progress is not None: - self._progress.update(1) - - def close_progress(self) -> None: - if self._progress is not None: - self._progress.close() - self._progress = None - - # -- Iteration-level ------------------------------------------------ - - def log_iteration(self, record: IterationRecord) -> None: - """Log a completed iteration to both console and structured store.""" - self.exporter.add_iteration(record) - self.logger.info( - "Iter %3d | gen=%d ic_ok=%d corr_ok=%d +%d -%d | " - "lib=%d yield=%.1f%% best_ic=%.4f mean_ic=%.4f (%.1fs)", - record.iteration, - record.candidates_generated, - record.ic_passed, - record.correlation_passed, - record.admitted, - record.rejected, - record.library_size, - record.yield_rate * 100, - record.best_ic, - record.mean_ic, - record.elapsed_seconds, - ) - self.advance_progress() - - # -- Factor-level --------------------------------------------------- - - def log_factor(self, record: FactorRecord) -> None: - """Log a single factor evaluation result.""" - self.exporter.add_factor(record) - if record.admitted: - self.logger.debug( - " + ADMIT ic=%.4f icir=%.3f corr=%.3f %s", - record.ic or 0, - record.icir or 0, - record.max_correlation or 0, - record.expression[:80], - ) - else: - self.logger.debug( - " - REJECT (%s) %s", - record.rejection_reason or "unknown", - record.expression[:80], - ) - - # -- Session lifecycle ---------------------------------------------- - - def log_session_start(self, config_summary: dict[str, Any]) -> None: - self.logger.info("=" * 60) - self.logger.info("FactorMiner session started") - self.logger.info( - "Target library: %d | Batch: %d | Max iters: %d", - config_summary.get("target_library_size", "?"), - config_summary.get("batch_size", "?"), - config_summary.get("max_iterations", "?"), - ) - self.logger.info("=" * 60) - - def log_session_end(self, library_size: int, total_time: float) -> None: - summary = self.exporter._summary() - self.close_progress() - self.logger.info("=" * 60) - self.logger.info("Session complete") - self.logger.info( - "Library: %d factors | %d iterations | %.0fs total", - library_size, - summary.get("total_iterations", 0), - total_time, - ) - self.logger.info( - "Candidates: %d generated, %d admitted (%.1f%% yield)", - summary.get("total_candidates", 0), - summary.get("total_admitted", 0), - summary.get("overall_yield_rate", 0) * 100, - ) - self.logger.info("=" * 60) - - # Export structured log - json_path = self.output_dir / "session_log.json" - self.exporter.export(json_path) - self.logger.info("Session log exported to %s", json_path) diff --git a/src/factorminer/factorminer/utils/reporting.py b/src/factorminer/factorminer/utils/reporting.py deleted file mode 100644 index 37aba35..0000000 --- a/src/factorminer/factorminer/utils/reporting.py +++ /dev/null @@ -1,499 +0,0 @@ -"""Mining session reporting for FactorMiner. - -Provides structured logging, text reports, JSON export, and progress -visualization for factor mining sessions. Designed to mirror the batch -reports shown in Appendix H of the paper. -""" - -from __future__ import annotations - -import json -import time -from collections import defaultdict -from dataclasses import dataclass, field, asdict -from datetime import datetime -from pathlib import Path -from typing import Dict, List, Optional - -import matplotlib.pyplot as plt -import numpy as np - - -# --------------------------------------------------------------------------- -# Data classes for structured logging -# --------------------------------------------------------------------------- - -@dataclass -class FactorAdmissionRecord: - """Record of a single factor admission.""" - - factor_id: int - name: str - formula: str - ic: float - icir: float - max_corr: float - batch_number: int - timestamp: str = "" - - def __post_init__(self) -> None: - if not self.timestamp: - self.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") - - -@dataclass -class BatchRecord: - """Record of a single mining batch.""" - - batch_num: int - candidates: int = 0 - ic_passed: int = 0 - corr_passed: int = 0 - admitted: int = 0 - replaced: int = 0 - rejection_reasons: List[str] = field(default_factory=list) - admitted_factors: List[FactorAdmissionRecord] = field(default_factory=list) - library_size: int = 0 - elapsed_seconds: float = 0.0 - timestamp: str = "" - - def __post_init__(self) -> None: - if not self.timestamp: - self.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") - - @property - def rejected(self) -> int: - return self.candidates - self.admitted - self.replaced - - @property - def yield_rate(self) -> float: - if self.candidates == 0: - return 0.0 - return self.admitted / self.candidates - - @property - def rejection_rate(self) -> float: - if self.candidates == 0: - return 0.0 - return self.rejected / self.candidates - - def to_dict(self) -> dict: - d = asdict(self) - d["rejected"] = self.rejected - d["yield_rate"] = self.yield_rate - d["rejection_rate"] = self.rejection_rate - return d - - -# --------------------------------------------------------------------------- -# MiningReporter -# --------------------------------------------------------------------------- - -class MiningReporter: - """Track and report mining session progress. - - Collects batch-level and factor-level logs, generates text reports, - JSON exports, and progress visualisations. - - Parameters - ---------- - output_dir : str - Directory for saving reports and plots. - """ - - def __init__(self, output_dir: str) -> None: - self.output_dir = Path(output_dir) - self.output_dir.mkdir(parents=True, exist_ok=True) - self.batches: List[BatchRecord] = [] - self.factor_admissions: List[FactorAdmissionRecord] = [] - self._session_start: float = time.time() - - # ------------------------------------------------------------------ - # Logging - # ------------------------------------------------------------------ - - def log_batch( - self, - batch_num: int, - candidates: int, - ic_passed: int, - corr_passed: int, - admitted: int, - replaced: int, - rejection_reasons: List[str], - library_size: int = 0, - elapsed_seconds: float = 0.0, - ) -> None: - """Log a batch's results. - - Parameters - ---------- - batch_num : int - Sequential batch number. - candidates : int - Number of candidates generated. - ic_passed : int - Number passing IC screening. - corr_passed : int - Number passing correlation screening. - admitted : int - Number admitted to the library. - replaced : int - Number that replaced existing library factors. - rejection_reasons : List[str] - List of rejection reason strings for this batch. - library_size : int - Current library size after this batch. - elapsed_seconds : float - Time taken for this batch. - """ - record = BatchRecord( - batch_num=batch_num, - candidates=candidates, - ic_passed=ic_passed, - corr_passed=corr_passed, - admitted=admitted, - replaced=replaced, - rejection_reasons=rejection_reasons, - library_size=library_size, - elapsed_seconds=elapsed_seconds, - ) - self.batches.append(record) - - def log_factor_admission( - self, - factor_id: int, - name: str, - formula: str, - ic: float, - icir: float, - max_corr: float, - ) -> None: - """Log an individual factor admission. - - Parameters - ---------- - factor_id : int - Unique factor identifier. - name : str - Human-readable factor name. - formula : str - DSL expression string. - ic : float - Mean IC of the admitted factor. - icir : float - ICIR of the admitted factor. - max_corr : float - Maximum pairwise correlation at admission time. - """ - batch_num = self.batches[-1].batch_num if self.batches else 0 - record = FactorAdmissionRecord( - factor_id=factor_id, - name=name, - formula=formula, - ic=ic, - icir=icir, - max_corr=max_corr, - batch_number=batch_num, - ) - self.factor_admissions.append(record) - if self.batches: - self.batches[-1].admitted_factors.append(record) - - # ------------------------------------------------------------------ - # Text reports - # ------------------------------------------------------------------ - - def generate_batch_report(self, batch_num: int) -> str: - """Generate text report for a specific batch. - - Parameters - ---------- - batch_num : int - The batch number to report on. - - Returns - ------- - str - Formatted text report. - """ - batch = None - for b in self.batches: - if b.batch_num == batch_num: - batch = b - break - - if batch is None: - return f"Batch {batch_num} not found." - - lines = [ - f"{'=' * 60}", - f" BATCH REPORT: Iteration {batch.batch_num}", - f" Timestamp: {batch.timestamp}", - f"{'=' * 60}", - "", - f" Candidates generated: {batch.candidates:>6}", - f" IC screen passed: {batch.ic_passed:>6}", - f" Correlation passed: {batch.corr_passed:>6}", - f" Admitted to library: {batch.admitted:>6}", - f" Replaced in library: {batch.replaced:>6}", - f" Rejected: {batch.rejected:>6}", - "", - f" Yield rate: {batch.yield_rate:>6.1%}", - f" Rejection rate: {batch.rejection_rate:>6.1%}", - f" Library size (after): {batch.library_size:>6}", - f" Elapsed: {batch.elapsed_seconds:>6.1f}s", - ] - - # Admitted factors detail - if batch.admitted_factors: - lines.append("") - lines.append(" Admitted Factors:") - lines.append(f" {'ID':>4} {'IC':>8} {'ICIR':>8} {'MaxCorr':>8} Name") - lines.append(f" {'-'*4} {'-'*8} {'-'*8} {'-'*8} {'-'*20}") - for f in batch.admitted_factors: - lines.append( - f" {f.factor_id:>4} {f.ic:>8.4f} {f.icir:>8.3f} " - f"{f.max_corr:>8.4f} {f.name[:30]}" - ) - - # Rejection breakdown - if batch.rejection_reasons: - reason_counts: Dict[str, int] = defaultdict(int) - for reason in batch.rejection_reasons: - # Normalise to short category - if "IC" in reason.upper() or "ic" in reason.lower(): - reason_counts["IC below threshold"] += 1 - elif "corr" in reason.lower(): - reason_counts["Correlation too high"] += 1 - elif "parse" in reason.lower() or "invalid" in reason.lower(): - reason_counts["Parse / invalid"] += 1 - else: - reason_counts["Other"] += 1 - - lines.append("") - lines.append(" Rejection Breakdown:") - for reason, count in sorted(reason_counts.items(), key=lambda x: -x[1]): - lines.append(f" {reason:<30} {count:>5}") - - lines.append(f"\n{'=' * 60}") - return "\n".join(lines) - - def generate_session_report(self) -> str: - """Generate full session report with cumulative statistics. - - Returns - ------- - str - Formatted text report covering the entire mining session. - """ - elapsed = time.time() - self._session_start - - total_candidates = sum(b.candidates for b in self.batches) - total_ic_passed = sum(b.ic_passed for b in self.batches) - total_corr_passed = sum(b.corr_passed for b in self.batches) - total_admitted = sum(b.admitted for b in self.batches) - total_replaced = sum(b.replaced for b in self.batches) - total_rejected = total_candidates - total_admitted - total_replaced - - overall_yield = total_admitted / total_candidates if total_candidates > 0 else 0.0 - final_lib_size = self.batches[-1].library_size if self.batches else 0 - - lines = [ - f"{'#' * 60}", - f" FACTORMINER SESSION REPORT", - f" Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", - f"{'#' * 60}", - "", - f" Total batches: {len(self.batches):>6}", - f" Total elapsed: {elapsed:>6.0f}s ({elapsed/60:.1f}m)", - "", - " --- Cumulative Pipeline ---", - f" Candidates generated: {total_candidates:>6}", - f" IC screen passed: {total_ic_passed:>6} ({total_ic_passed/total_candidates:.1%})" if total_candidates > 0 else f" IC screen passed: {total_ic_passed:>6}", - f" Correlation passed: {total_corr_passed:>6} ({total_corr_passed/total_candidates:.1%})" if total_candidates > 0 else f" Correlation passed: {total_corr_passed:>6}", - f" Admitted: {total_admitted:>6} ({overall_yield:.1%})", - f" Replaced: {total_replaced:>6}", - f" Rejected: {total_rejected:>6}", - "", - f" Final library size: {final_lib_size:>6}", - f" Overall yield rate: {overall_yield:>6.1%}", - ] - - # Per-batch summary table - if self.batches: - lines.append("") - lines.append(" --- Per-Batch Summary ---") - lines.append( - f" {'Batch':>5} {'Cand':>5} {'IC':>4} {'Corr':>4} " - f"{'Adm':>4} {'Rep':>4} {'Lib':>4} {'Yield':>6} {'Time':>6}" - ) - lines.append(f" {'-'*5} {'-'*5} {'-'*4} {'-'*4} {'-'*4} {'-'*4} {'-'*4} {'-'*6} {'-'*6}") - for b in self.batches: - lines.append( - f" {b.batch_num:>5} {b.candidates:>5} {b.ic_passed:>4} " - f"{b.corr_passed:>4} {b.admitted:>4} {b.replaced:>4} " - f"{b.library_size:>4} {b.yield_rate:>5.1%} {b.elapsed_seconds:>5.1f}s" - ) - - # Top admitted factors - if self.factor_admissions: - top_factors = sorted(self.factor_admissions, key=lambda f: f.ic, reverse=True)[:10] - lines.append("") - lines.append(" --- Top 10 Factors by IC ---") - lines.append( - f" {'ID':>4} {'IC':>8} {'ICIR':>8} {'MaxCorr':>8} Name" - ) - lines.append(f" {'-'*4} {'-'*8} {'-'*8} {'-'*8} {'-'*30}") - for f in top_factors: - lines.append( - f" {f.factor_id:>4} {f.ic:>8.4f} {f.icir:>8.3f} " - f"{f.max_corr:>8.4f} {f.name[:30]}" - ) - - lines.append(f"\n{'#' * 60}") - return "\n".join(lines) - - # ------------------------------------------------------------------ - # Export - # ------------------------------------------------------------------ - - def export_to_json(self, path: str) -> None: - """Export all mining logs to JSON. - - Parameters - ---------- - path : str - File path for the JSON output. - """ - payload = { - "session": { - "start_time": datetime.fromtimestamp(self._session_start).strftime( - "%Y-%m-%d %H:%M:%S" - ), - "elapsed_seconds": time.time() - self._session_start, - "total_batches": len(self.batches), - "total_admissions": len(self.factor_admissions), - }, - "batches": [b.to_dict() for b in self.batches], - "factor_admissions": [asdict(f) for f in self.factor_admissions], - "summary": self._compute_summary(), - } - - out_path = Path(path) - out_path.parent.mkdir(parents=True, exist_ok=True) - with open(out_path, "w") as f: - json.dump(payload, f, indent=2, default=str) - - def save_session_report(self, filename: str = "session_report.txt") -> str: - """Save the session report to a text file. - - Returns the path to the saved file. - """ - report = self.generate_session_report() - path = self.output_dir / filename - with open(path, "w") as f: - f.write(report) - return str(path) - - # ------------------------------------------------------------------ - # Visualization - # ------------------------------------------------------------------ - - def plot_mining_progress(self, save_path: Optional[str] = None) -> None: - """Plot library growth, yield rate, and rejection rate over batches. - - Produces a 3-panel figure: - 1. Library size growth - 2. Yield rate per batch - 3. Rejection breakdown stacked area - - Parameters - ---------- - save_path : Optional[str] - If provided, saves the figure to this path. - """ - if not self.batches: - return - - plt.rcParams.update({ - "figure.facecolor": "white", - "axes.facecolor": "white", - "axes.grid": True, - "grid.alpha": 0.3, - "grid.linestyle": "--", - "figure.dpi": 150, - }) - - batch_nums = [b.batch_num for b in self.batches] - lib_sizes = [b.library_size for b in self.batches] - yield_rates = [b.yield_rate * 100 for b in self.batches] - admitted_counts = [b.admitted for b in self.batches] - replaced_counts = [b.replaced for b in self.batches] - rejected_counts = [b.rejected for b in self.batches] - - fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(12, 10), - sharex=True, - gridspec_kw={"hspace": 0.15}) - - # Panel 1: Library size growth - ax1.plot(batch_nums, lib_sizes, color="#1565C0", linewidth=2.0, - marker="o", markersize=3) - ax1.fill_between(batch_nums, lib_sizes, alpha=0.15, color="#1565C0") - ax1.set_ylabel("Library Size") - ax1.set_title("Mining Progress", fontsize=13, fontweight="bold") - if lib_sizes: - ax1.text(batch_nums[-1], lib_sizes[-1], - f" {lib_sizes[-1]}", va="center", fontsize=9, color="#1565C0") - - # Panel 2: Yield rate - ax2.bar(batch_nums, yield_rates, color="#43A047", alpha=0.7, - edgecolor="white", linewidth=0.5) - if yield_rates: - avg_yield = sum(yield_rates) / len(yield_rates) - ax2.axhline(y=avg_yield, color="#FF6F00", linestyle="--", linewidth=1.0, - label=f"Avg = {avg_yield:.1f}%") - ax2.legend(fontsize=8, loc="upper right") - ax2.set_ylabel("Yield Rate (%)") - ax2.set_ylim(bottom=0) - - # Panel 3: Stacked bar of admitted / replaced / rejected - ax3.bar(batch_nums, admitted_counts, label="Admitted", - color="#43A047", edgecolor="white", linewidth=0.5) - ax3.bar(batch_nums, replaced_counts, bottom=admitted_counts, - label="Replaced", color="#FF8F00", edgecolor="white", linewidth=0.5) - bottoms = [a + r for a, r in zip(admitted_counts, replaced_counts)] - ax3.bar(batch_nums, rejected_counts, bottom=bottoms, - label="Rejected", color="#E53935", alpha=0.6, - edgecolor="white", linewidth=0.5) - ax3.set_ylabel("Candidates") - ax3.set_xlabel("Batch Number") - ax3.legend(loc="upper right", fontsize=8) - - fig.tight_layout() - - if save_path is not None: - fig.savefig(save_path, bbox_inches="tight", facecolor="white", dpi=200) - plt.close(fig) - else: - plt.show() - - # ------------------------------------------------------------------ - # Internal helpers - # ------------------------------------------------------------------ - - def _compute_summary(self) -> dict: - """Compute cumulative summary statistics.""" - total_candidates = sum(b.candidates for b in self.batches) - total_admitted = sum(b.admitted for b in self.batches) - total_replaced = sum(b.replaced for b in self.batches) - - return { - "total_candidates": total_candidates, - "total_admitted": total_admitted, - "total_replaced": total_replaced, - "total_rejected": total_candidates - total_admitted - total_replaced, - "overall_yield_rate": total_admitted / total_candidates if total_candidates > 0 else 0.0, - "final_library_size": self.batches[-1].library_size if self.batches else 0, - "total_elapsed_seconds": time.time() - self._session_start, - } diff --git a/src/factorminer/factorminer/utils/tearsheet.py b/src/factorminer/factorminer/utils/tearsheet.py deleted file mode 100644 index 8371d85..0000000 --- a/src/factorminer/factorminer/utils/tearsheet.py +++ /dev/null @@ -1,399 +0,0 @@ -"""Factor tear sheet generation for FactorMiner. - -Produces comprehensive, multi-panel evaluation reports for individual -factors, following the style of Appendix O / Figure 10 from the paper. -Also provides summary table generation for the full factor library. -""" - -from __future__ import annotations - -from typing import Dict, List, Optional - -import matplotlib.pyplot as plt -import matplotlib.gridspec as gridspec -import numpy as np -import pandas as pd -from scipy.stats import rankdata - -from src.factorminer.factorminer.evaluation.metrics import ( - compute_ic, - compute_icir, - compute_ic_mean, - compute_ic_win_rate, - compute_quintile_returns, - compute_turnover, -) - - -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - -def _rolling_mean(arr: np.ndarray, window: int) -> np.ndarray: - """Compute rolling mean with edge handling.""" - out = np.full_like(arr, np.nan, dtype=np.float64) - clean = np.where(np.isnan(arr), 0.0, arr) - kernel = np.ones(window) / window - conv = np.convolve(clean, kernel, mode="same") - # Fix edges using expanding window - for i in range(window // 2): - w = i + window // 2 + 1 - conv[i] = np.mean(clean[:w]) - conv[-(i + 1)] = np.mean(clean[-w:]) - return conv - - -def _compute_daily_turnover(signals: np.ndarray) -> np.ndarray: - """Compute total daily turnover as fraction of positions changing. - - Parameters - ---------- - signals : np.ndarray, shape (M, T) - - Returns - ------- - np.ndarray, shape (T-1,) - Turnover for each period transition. - """ - M, T = signals.shape - turnovers = np.full(T - 1, np.nan, dtype=np.float64) - - for t in range(1, T): - prev = signals[:, t - 1] - curr = signals[:, t] - valid = ~(np.isnan(prev) | np.isnan(curr)) - n = valid.sum() - if n < 5: - continue - # Rank-based positions - prev_ranks = rankdata(prev[valid]) / n - curr_ranks = rankdata(curr[valid]) / n - turnovers[t - 1] = float(np.mean(np.abs(curr_ranks - prev_ranks))) - - return turnovers - - -# --------------------------------------------------------------------------- -# FactorTearSheet -# --------------------------------------------------------------------------- - -class FactorTearSheet: - """Generate comprehensive evaluation report for a single factor. - - Produces an 8-panel figure plus a metrics summary table. - """ - - # Panel colours - IC_BAR_POS = "#4CAF50" - IC_BAR_NEG = "#F44336" - ROLLING_COLOR = "#1565C0" - CUMULATIVE_COLOR = "#0D47A1" - QUINTILE_CMAP = "RdYlGn" - TURNOVER_COLOR = "#FF8F00" - - def generate( - self, - factor_id: int, - factor_name: str, - formula: str, - signals: np.ndarray, - returns: np.ndarray, - dates: List[str], - save_path: Optional[str] = None, - ) -> Dict[str, float]: - """Generate a multi-panel tear sheet. - - Panels: - (a) IC time-series analysis -- daily mean rank IC with mean line - (b) Rank IC distribution -- histogram with statistics - (c) 21-day rolling IC -- aggregated daily rolling window - (d) Cumulative IC composition - (e) Quintile returns -- bar chart with Q1-Q5 - (f) Cumulative returns -- line chart for quintiles - (g) Factor value distribution -- histogram - (h) Turnover analysis -- daily total turnover - - Parameters - ---------- - factor_id : int - Unique identifier for the factor. - factor_name : str - Human-readable factor name. - formula : str - DSL expression string. - signals : np.ndarray, shape (M, T) - Factor signal values. - returns : np.ndarray, shape (M, T) - Forward returns. - dates : List[str] - Date strings of length T. - save_path : Optional[str] - If provided, saves the figure to this path. - - Returns - ------- - Dict[str, float] - Dictionary of computed metrics. - """ - plt.rcParams.update({ - "figure.facecolor": "white", - "axes.facecolor": "white", - "axes.grid": True, - "grid.alpha": 0.3, - "grid.linestyle": "--", - "figure.dpi": 150, - }) - - M, T = signals.shape - ic_series = compute_ic(signals, returns) - ic_clean = np.where(np.isnan(ic_series), 0.0, ic_series) - valid_ic = ic_series[~np.isnan(ic_series)] - - # Compute all metrics - ic_mean = float(np.mean(valid_ic)) if len(valid_ic) > 0 else 0.0 - ic_abs_mean = compute_ic_mean(ic_series) - icir = compute_icir(ic_series) - win_rate = compute_ic_win_rate(ic_series) - quintile = compute_quintile_returns(signals, returns) - turnover = compute_turnover(signals) - daily_turnover = _compute_daily_turnover(signals) - - metrics = { - "ic_mean": ic_mean, - "ic_abs_mean": ic_abs_mean, - "icir": icir, - "ic_win_rate": win_rate, - "Q1_return": quintile.get("Q1", 0.0), - "Q5_return": quintile.get("Q5", 0.0), - "long_short": quintile.get("long_short", 0.0), - "monotonicity": quintile.get("monotonicity", 0.0), - "avg_turnover": turnover, - } - - # Cumulative IC - cumulative_ic = np.nancumsum(ic_clean) - - # Rolling IC (21-day) - rolling_ic = _rolling_mean(ic_clean, 21) - - # Quintile cumulative returns (compute per-period Q returns) - n_quantiles = 5 - quintile_ts = {q: [] for q in range(1, n_quantiles + 1)} - for t in range(T): - s = signals[:, t] - r = returns[:, t] - valid_mask = ~(np.isnan(s) | np.isnan(r)) - n = valid_mask.sum() - if n < n_quantiles: - for q in range(1, n_quantiles + 1): - quintile_ts[q].append(0.0) - continue - ranks = rankdata(s[valid_mask]) - q_labels = np.clip( - np.ceil(ranks / n * n_quantiles).astype(int), 1, n_quantiles - ) - r_valid = r[valid_mask] - for q in range(1, n_quantiles + 1): - mask_q = q_labels == q - quintile_ts[q].append(float(np.mean(r_valid[mask_q])) if mask_q.any() else 0.0) - - quintile_cumulative = {} - for q in range(1, n_quantiles + 1): - quintile_cumulative[f"Q{q}"] = np.cumsum(quintile_ts[q]) - - # ---- Build 4x2 panel figure ---- - fig = plt.figure(figsize=(16, 18)) - gs = gridspec.GridSpec(4, 2, hspace=0.35, wspace=0.3) - - # Suptitle - fig.suptitle( - f"Factor #{factor_id}: {factor_name}\n{formula[:100]}{'...' if len(formula) > 100 else ''}", - fontsize=13, fontweight="bold", y=0.98, - ) - - # (a) IC time-series - ax_a = fig.add_subplot(gs[0, 0]) - x = np.arange(T) - colors_ic = np.where(ic_clean >= 0, self.IC_BAR_POS, self.IC_BAR_NEG) - ax_a.bar(x, ic_clean, color=colors_ic, alpha=0.5, width=1.0, edgecolor="none") - ax_a.axhline(y=ic_mean, color="#FF6F00", linestyle="--", linewidth=1.0, - label=f"Mean = {ic_mean:.4f}") - ax_a.axhline(y=0, color="black", linewidth=0.4) - ax_a.set_title("(a) Daily Rank IC", fontsize=10) - ax_a.set_ylabel("IC") - ax_a.legend(fontsize=8, loc="upper left") - self._set_date_ticks(ax_a, dates, T) - - # (b) IC distribution - ax_b = fig.add_subplot(gs[0, 1]) - if len(valid_ic) > 0: - ax_b.hist(valid_ic, bins=50, color=self.ROLLING_COLOR, alpha=0.7, - edgecolor="white", linewidth=0.5, density=True) - ax_b.axvline(x=ic_mean, color="#FF6F00", linestyle="--", linewidth=1.2, - label=f"Mean = {ic_mean:.4f}") - ax_b.axvline(x=0, color="black", linewidth=0.4) - ax_b.set_title("(b) Rank IC Distribution", fontsize=10) - ax_b.set_xlabel("IC") - ax_b.set_ylabel("Density") - stats_text = f"Mean={ic_mean:.4f}\nICIR={icir:.3f}\nWin={win_rate:.1%}" - ax_b.text(0.97, 0.97, stats_text, transform=ax_b.transAxes, - ha="right", va="top", fontsize=8, - bbox=dict(boxstyle="round,pad=0.3", facecolor="wheat", alpha=0.5)) - ax_b.legend(fontsize=8, loc="upper left") - - # (c) 21-day rolling IC - ax_c = fig.add_subplot(gs[1, 0]) - ax_c.plot(x, rolling_ic, color=self.ROLLING_COLOR, linewidth=1.0) - ax_c.fill_between(x, rolling_ic, alpha=0.15, color=self.ROLLING_COLOR) - ax_c.axhline(y=0, color="black", linewidth=0.4) - ax_c.axhline(y=ic_mean, color="#FF6F00", linestyle="--", linewidth=0.8, - label=f"Mean = {ic_mean:.4f}") - ax_c.set_title("(c) 21-Day Rolling IC", fontsize=10) - ax_c.set_ylabel("Rolling IC") - ax_c.legend(fontsize=8, loc="upper left") - self._set_date_ticks(ax_c, dates, T) - - # (d) Cumulative IC - ax_d = fig.add_subplot(gs[1, 1]) - ax_d.fill_between(x, cumulative_ic, alpha=0.25, color=self.CUMULATIVE_COLOR) - ax_d.plot(x, cumulative_ic, color=self.CUMULATIVE_COLOR, linewidth=1.0) - ax_d.axhline(y=0, color="black", linewidth=0.4) - ax_d.set_title("(d) Cumulative IC", fontsize=10) - ax_d.set_ylabel("Cumulative IC") - self._set_date_ticks(ax_d, dates, T) - - # (e) Quintile returns bar chart - ax_e = fig.add_subplot(gs[2, 0]) - q_labels_list = [f"Q{q}" for q in range(1, n_quantiles + 1)] - q_vals = [quintile.get(f"Q{q}", 0.0) for q in range(1, n_quantiles + 1)] - cmap = plt.cm.RdYlGn - q_colors = [cmap(i / max(n_quantiles - 1, 1)) for i in range(n_quantiles)] - bars = ax_e.bar(q_labels_list, q_vals, color=q_colors, edgecolor="white", linewidth=0.8) - for bar, val in zip(bars, q_vals): - y_pos = bar.get_height() - ax_e.text(bar.get_x() + bar.get_width() / 2, y_pos, - f"{val:.4f}", ha="center", - va="bottom" if y_pos >= 0 else "top", fontsize=8) - ax_e.axhline(y=0, color="black", linewidth=0.4) - ls = quintile.get("long_short", 0.0) - mono = quintile.get("monotonicity", 0.0) - ax_e.set_title(f"(e) Quintile Returns | L-S={ls:.4f} Mono={mono:.2f}", fontsize=10) - ax_e.set_ylabel("Mean Return") - - # (f) Cumulative quintile returns - ax_f = fig.add_subplot(gs[2, 1]) - q_palette = plt.cm.RdYlGn(np.linspace(0.1, 0.9, n_quantiles)) - for i, q in enumerate(range(1, n_quantiles + 1)): - key = f"Q{q}" - ax_f.plot(quintile_cumulative[key], color=q_palette[i], - linewidth=1.1, label=key) - ax_f.axhline(y=0, color="black", linewidth=0.4) - ax_f.set_title("(f) Cumulative Quintile Returns", fontsize=10) - ax_f.set_ylabel("Cumulative Return") - ax_f.legend(loc="upper left", fontsize=8, ncol=n_quantiles, framealpha=0.9) - self._set_date_ticks(ax_f, dates, T) - - # (g) Factor value distribution - ax_g = fig.add_subplot(gs[3, 0]) - # Sample from signals for histogram (last period or flattened sample) - flat_signals = signals[~np.isnan(signals)] - if len(flat_signals) > 50000: - rng = np.random.default_rng(42) - flat_signals = rng.choice(flat_signals, 50000, replace=False) - if len(flat_signals) > 0: - # Clip to 1st/99th percentile for cleaner visualization - lo, hi = np.percentile(flat_signals, [1, 99]) - clipped = flat_signals[(flat_signals >= lo) & (flat_signals <= hi)] - ax_g.hist(clipped, bins=80, color="#7E57C2", alpha=0.7, - edgecolor="white", linewidth=0.3, density=True) - mean_sig = float(np.mean(flat_signals)) - std_sig = float(np.std(flat_signals)) - ax_g.axvline(x=mean_sig, color="#FF6F00", linestyle="--", linewidth=1.0, - label=f"Mean={mean_sig:.4f}") - stats_text_g = f"Std={std_sig:.4f}\nN={len(flat_signals):,}" - ax_g.text(0.97, 0.97, stats_text_g, transform=ax_g.transAxes, - ha="right", va="top", fontsize=8, - bbox=dict(boxstyle="round,pad=0.3", facecolor="wheat", alpha=0.5)) - ax_g.set_title("(g) Factor Value Distribution", fontsize=10) - ax_g.set_xlabel("Factor Value") - ax_g.set_ylabel("Density") - ax_g.legend(fontsize=8, loc="upper left") - - # (h) Turnover analysis - ax_h = fig.add_subplot(gs[3, 1]) - valid_turnover = daily_turnover[~np.isnan(daily_turnover)] - if len(valid_turnover) > 0: - t_x = np.arange(len(daily_turnover)) - ax_h.bar(t_x, np.where(np.isnan(daily_turnover), 0, daily_turnover), - color=self.TURNOVER_COLOR, alpha=0.5, width=1.0, edgecolor="none") - avg_to = float(np.mean(valid_turnover)) - ax_h.axhline(y=avg_to, color="#D32F2F", linestyle="--", linewidth=1.0, - label=f"Avg = {avg_to:.4f}") - ax_h.legend(fontsize=8, loc="upper right") - ax_h.set_title("(h) Daily Turnover", fontsize=10) - ax_h.set_ylabel("Turnover") - ax_h.set_xlabel("Period") - - # Metrics table at the bottom - metrics_ls_cum = float(np.sum([quintile_ts[n_quantiles][t] - quintile_ts[1][t] - for t in range(T)])) - metrics["long_short_cumulative"] = metrics_ls_cum - - fig.tight_layout(rect=[0, 0, 1, 0.96]) - - if save_path is not None: - fig.savefig(save_path, bbox_inches="tight", facecolor="white", dpi=200) - plt.close(fig) - else: - plt.show() - - return metrics - - def generate_summary_table(self, factors: List[dict]) -> pd.DataFrame: - """Generate summary table for all factors in the library. - - Parameters - ---------- - factors : List[dict] - Each dict should contain keys: 'id', 'name', 'formula', - 'ic_mean', 'icir', 'ic_win_rate', 'Q1_return', 'Q5_return', - 'long_short', 'monotonicity', 'avg_turnover'. - - Returns - ------- - pd.DataFrame - Summary table sorted by IC mean (descending). - """ - if not factors: - return pd.DataFrame() - - rows = [] - for f in factors: - rows.append({ - "ID": f.get("id", ""), - "Name": f.get("name", ""), - "Formula": str(f.get("formula", ""))[:60], - "IC Mean": f.get("ic_mean", 0.0), - "ICIR": f.get("icir", 0.0), - "IC Win Rate": f.get("ic_win_rate", 0.0), - "Q1 Return": f.get("Q1_return", 0.0), - "Q5 Return": f.get("Q5_return", 0.0), - "L-S Return": f.get("long_short", 0.0), - "Monotonicity": f.get("monotonicity", 0.0), - "Avg Turnover": f.get("avg_turnover", 0.0), - }) - - df = pd.DataFrame(rows) - df = df.sort_values("IC Mean", ascending=False).reset_index(drop=True) - return df - - @staticmethod - def _set_date_ticks(ax: plt.Axes, dates: List[str], T: int, n_ticks: int = 8) -> None: - """Set evenly spaced date tick labels on the x-axis.""" - if T == 0: - return - n_ticks = min(n_ticks, T) - step = max(1, T // n_ticks) - positions = list(range(0, T, step)) - ax.set_xticks(positions) - ax.set_xticklabels([dates[i] for i in positions], rotation=45, ha="right", fontsize=7) diff --git a/src/factorminer/factorminer/utils/visualization.py b/src/factorminer/factorminer/utils/visualization.py deleted file mode 100644 index 434cdde..0000000 --- a/src/factorminer/factorminer/utils/visualization.py +++ /dev/null @@ -1,564 +0,0 @@ -"""Core visualization functions for FactorMiner. - -Provides publication-quality plots for factor analysis, mining diagnostics, -and performance reporting. Uses matplotlib and seaborn with a consistent -style inspired by the FactorMiner paper figures. -""" - -from __future__ import annotations - -from typing import Dict, List, Optional, Tuple - -import matplotlib.pyplot as plt -import matplotlib.ticker as mticker -import numpy as np -import seaborn as sns - -# --------------------------------------------------------------------------- -# Global style -# --------------------------------------------------------------------------- - -_STYLE_APPLIED = False - - -def _apply_style() -> None: - """Apply a clean, publication-quality matplotlib style once.""" - global _STYLE_APPLIED - if _STYLE_APPLIED: - return - plt.rcParams.update({ - "figure.facecolor": "white", - "axes.facecolor": "white", - "axes.edgecolor": "#333333", - "axes.labelcolor": "#333333", - "axes.grid": True, - "grid.alpha": 0.3, - "grid.linestyle": "--", - "xtick.color": "#333333", - "ytick.color": "#333333", - "font.size": 10, - "axes.titlesize": 12, - "axes.labelsize": 10, - "legend.fontsize": 9, - "figure.dpi": 150, - "savefig.dpi": 200, - "savefig.bbox": "tight", - }) - _STYLE_APPLIED = True - - -def _save_or_show(fig: plt.Figure, save_path: Optional[str]) -> None: - """Save figure to disk or display interactively.""" - if save_path is not None: - fig.savefig(save_path, bbox_inches="tight", facecolor="white") - plt.close(fig) - else: - plt.show() - - -# --------------------------------------------------------------------------- -# Correlation heatmap (Figure 2) -# --------------------------------------------------------------------------- - -def plot_correlation_heatmap( - correlation_matrix: np.ndarray, - factor_names: List[str], - title: str = "Factor Library Correlation Heatmap", - save_path: Optional[str] = None, -) -> None: - """Generate pairwise Spearman correlation heatmap. - - Displays the average off-diagonal |rho| in the title and uses a - diverging colormap centred at zero. - - Parameters - ---------- - correlation_matrix : np.ndarray, shape (N, N) - Symmetric matrix of pairwise |rho| values. - factor_names : List[str] - Labels for each factor (length N). - title : str - Base title for the plot. - save_path : Optional[str] - If provided, saves the figure to this path instead of displaying. - """ - _apply_style() - n = correlation_matrix.shape[0] - - # Compute average off-diagonal correlation - if n > 1: - triu_idx = np.triu_indices(n, k=1) - off_diag = correlation_matrix[triu_idx] - avg_corr = float(np.nanmean(np.abs(off_diag))) - else: - avg_corr = 0.0 - - # Scale figure size based on number of factors - size = max(6, min(n * 0.35 + 2, 20)) - fig, ax = plt.subplots(figsize=(size, size * 0.85)) - - mask = np.zeros_like(correlation_matrix, dtype=bool) - np.fill_diagonal(mask, True) - - sns.heatmap( - correlation_matrix, - mask=mask, - xticklabels=factor_names, - yticklabels=factor_names, - cmap="RdBu_r", - center=0, - vmin=-1, - vmax=1, - square=True, - linewidths=0.5, - linecolor="white", - cbar_kws={"shrink": 0.7, "label": "Spearman |rho|"}, - ax=ax, - ) - - ax.set_title(f"{title}\nAvg off-diagonal |rho| = {avg_corr:.4f}", fontsize=12) - ax.tick_params(axis="x", rotation=45, labelsize=max(5, 10 - n // 20)) - ax.tick_params(axis="y", rotation=0, labelsize=max(5, 10 - n // 20)) - - fig.tight_layout() - _save_or_show(fig, save_path) - - -# --------------------------------------------------------------------------- -# IC time series (Figure 5) -# --------------------------------------------------------------------------- - -def plot_ic_timeseries( - ic_series: np.ndarray, - dates: List[str], - rolling_window: int = 21, - title: str = "Daily Mean Rank IC", - save_path: Optional[str] = None, -) -> None: - """Plot IC time series with rolling average and cumulative IC. - - Creates a two-panel figure: top panel shows daily IC bars with a - rolling mean line; bottom panel shows cumulative IC. - - Parameters - ---------- - ic_series : np.ndarray, shape (T,) - Daily IC values (may contain NaN). - dates : List[str] - Date labels of length T. - rolling_window : int - Window for rolling mean (default 21 trading days). - title : str - Title for the figure. - save_path : Optional[str] - If provided, saves the figure to this path. - """ - _apply_style() - T = len(ic_series) - x = np.arange(T) - - # Replace NaN with 0 for plotting - ic_clean = np.where(np.isnan(ic_series), 0.0, ic_series) - - # Rolling mean - kernel = np.ones(rolling_window) / rolling_window - rolling_ic = np.convolve(ic_clean, kernel, mode="same") - # Fix edges - for i in range(rolling_window // 2): - w = i + rolling_window // 2 + 1 - rolling_ic[i] = np.mean(ic_clean[:w]) - rolling_ic[-(i + 1)] = np.mean(ic_clean[-w:]) - - # Cumulative IC - cumulative_ic = np.nancumsum(ic_clean) - - fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 7), height_ratios=[2, 1], - sharex=True, gridspec_kw={"hspace": 0.08}) - - # Top: daily IC bars + rolling mean - colors = np.where(ic_clean >= 0, "#4CAF50", "#F44336") - ax1.bar(x, ic_clean, color=colors, alpha=0.5, width=1.0, edgecolor="none") - ax1.plot(x, rolling_ic, color="#1565C0", linewidth=1.5, - label=f"{rolling_window}-day Rolling Mean") - - ic_mean = float(np.nanmean(ic_series)) - ax1.axhline(y=ic_mean, color="#FF6F00", linestyle="--", linewidth=1.0, - label=f"Mean IC = {ic_mean:.4f}") - ax1.axhline(y=0, color="black", linewidth=0.5) - - ax1.set_ylabel("Rank IC") - ax1.set_title(title, fontsize=13, fontweight="bold") - ax1.legend(loc="upper left", framealpha=0.9) - - # Bottom: cumulative IC - ax2.fill_between(x, cumulative_ic, alpha=0.3, color="#1565C0") - ax2.plot(x, cumulative_ic, color="#1565C0", linewidth=1.2) - ax2.set_ylabel("Cumulative IC") - ax2.set_xlabel("Date") - ax2.axhline(y=0, color="black", linewidth=0.5) - - # X-axis tick labels - if T > 0: - n_ticks = min(10, T) - step = max(1, T // n_ticks) - tick_positions = list(range(0, T, step)) - ax2.set_xticks(tick_positions) - ax2.set_xticklabels([dates[i] for i in tick_positions], rotation=45, ha="right") - - fig.tight_layout() - _save_or_show(fig, save_path) - - -# --------------------------------------------------------------------------- -# Quintile returns (Figure 6) -# --------------------------------------------------------------------------- - -def plot_quintile_returns( - quintile_returns: dict, - title: str = "Quintile Returns", - save_path: Optional[str] = None, -) -> None: - """Plot Q1-Q5 quintile bar chart and cumulative returns. - - Parameters - ---------- - quintile_returns : dict - Dictionary with keys Q1..Q5 (mean returns) and optionally - 'quintile_cumulative' mapping Qx -> array of cumulative returns. - Also may contain 'long_short' and 'monotonicity'. - title : str - Title for the figure. - save_path : Optional[str] - If provided, saves the figure to this path. - """ - _apply_style() - - # Extract quintile mean returns - q_labels = [k for k in sorted(quintile_returns.keys()) if k.startswith("Q")] - q_means = [quintile_returns[k] for k in q_labels] - n_q = len(q_labels) - - has_cumulative = "quintile_cumulative" in quintile_returns - n_panels = 2 if has_cumulative else 1 - fig, axes = plt.subplots(1, n_panels, figsize=(6 * n_panels, 5)) - if n_panels == 1: - axes = [axes] - - # Bar chart - ax = axes[0] - cmap = plt.cm.RdYlGn - colors = [cmap(i / max(n_q - 1, 1)) for i in range(n_q)] - bars = ax.bar(q_labels, q_means, color=colors, edgecolor="white", linewidth=0.8) - - # Value labels on bars - for bar, val in zip(bars, q_means): - y = bar.get_height() - ax.text(bar.get_x() + bar.get_width() / 2, y, - f"{val:.4f}", ha="center", - va="bottom" if y >= 0 else "top", fontsize=9) - - ax.axhline(y=0, color="black", linewidth=0.5) - ax.set_ylabel("Mean Return") - ax.set_xlabel("Quintile") - - # Subtitle with L-S return and monotonicity - subtitle_parts = [] - if "long_short" in quintile_returns: - subtitle_parts.append(f"L-S = {quintile_returns['long_short']:.4f}") - if "monotonicity" in quintile_returns: - subtitle_parts.append(f"Mono = {quintile_returns['monotonicity']:.2f}") - subtitle = " | ".join(subtitle_parts) if subtitle_parts else "" - ax.set_title(f"{title}\n{subtitle}" if subtitle else title, fontsize=12) - - # Cumulative returns panel - if has_cumulative: - ax2 = axes[1] - cum_data = quintile_returns["quintile_cumulative"] - for q_label in q_labels: - if q_label in cum_data: - ax2.plot(cum_data[q_label], label=q_label, linewidth=1.2) - ax2.set_title("Cumulative Quintile Returns", fontsize=12) - ax2.set_ylabel("Cumulative Return") - ax2.set_xlabel("Period") - ax2.legend(loc="upper left", framealpha=0.9) - ax2.axhline(y=0, color="black", linewidth=0.5) - - fig.tight_layout() - _save_or_show(fig, save_path) - - -# --------------------------------------------------------------------------- -# Ablation comparison (Figure 3) -# --------------------------------------------------------------------------- - -def plot_ablation_comparison( - with_memory: dict, - without_memory: dict, - save_path: Optional[str] = None, -) -> None: - """Bar charts comparing Have Memory vs No Memory ablation. - - Shows side-by-side bars for: high-quality count, rejected count, - admitted count, yield rate, and rejection rate. - - Parameters - ---------- - with_memory : dict - Keys: 'high_quality', 'rejected', 'admitted', 'yield_rate', 'rejection_rate'. - without_memory : dict - Same keys as with_memory. - save_path : Optional[str] - If provided, saves the figure to this path. - """ - _apply_style() - - # Count metrics (left axis) and rate metrics (right axis) - count_keys = ["high_quality", "rejected", "admitted"] - rate_keys = ["yield_rate", "rejection_rate"] - - count_labels = ["High Quality", "Rejected", "Admitted"] - rate_labels = ["Yield Rate", "Rejection Rate"] - - fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5)) - - # Panel 1: Count metrics - x = np.arange(len(count_keys)) - w = 0.35 - vals_with = [with_memory.get(k, 0) for k in count_keys] - vals_without = [without_memory.get(k, 0) for k in count_keys] - - bars1 = ax1.bar(x - w / 2, vals_with, w, label="With Memory", - color="#1565C0", edgecolor="white") - bars2 = ax1.bar(x + w / 2, vals_without, w, label="Without Memory", - color="#E53935", edgecolor="white") - - for bars in [bars1, bars2]: - for bar in bars: - h = bar.get_height() - ax1.text(bar.get_x() + bar.get_width() / 2, h, - f"{int(h)}", ha="center", va="bottom", fontsize=9) - - ax1.set_xticks(x) - ax1.set_xticklabels(count_labels) - ax1.set_ylabel("Count") - ax1.set_title("Factor Counts: Memory Ablation", fontsize=12) - ax1.legend(loc="upper right") - - # Panel 2: Rate metrics - x2 = np.arange(len(rate_keys)) - vals_with_r = [with_memory.get(k, 0) * 100 for k in rate_keys] - vals_without_r = [without_memory.get(k, 0) * 100 for k in rate_keys] - - bars3 = ax2.bar(x2 - w / 2, vals_with_r, w, label="With Memory", - color="#1565C0", edgecolor="white") - bars4 = ax2.bar(x2 + w / 2, vals_without_r, w, label="Without Memory", - color="#E53935", edgecolor="white") - - for bars in [bars3, bars4]: - for bar in bars: - h = bar.get_height() - ax2.text(bar.get_x() + bar.get_width() / 2, h, - f"{h:.1f}%", ha="center", va="bottom", fontsize=9) - - ax2.set_xticks(x2) - ax2.set_xticklabels(rate_labels) - ax2.set_ylabel("Rate (%)") - ax2.set_title("Yield / Rejection Rates: Memory Ablation", fontsize=12) - ax2.legend(loc="upper right") - - fig.tight_layout() - _save_or_show(fig, save_path) - - -# --------------------------------------------------------------------------- -# Efficiency benchmark (Figure 4) -# --------------------------------------------------------------------------- - -def plot_efficiency_benchmark( - benchmarks: Dict[str, Dict[str, float]], - save_path: Optional[str] = None, -) -> None: - """Grouped bar chart on log scale for computation time. - - Compares Python/C/GPU backends at operator and factor levels. - - Parameters - ---------- - benchmarks : Dict[str, Dict[str, float]] - Outer keys: backend names (e.g. "Python", "C", "GPU"). - Inner keys: operation names (e.g. "operator_eval", "factor_eval"). - Values: time in seconds. - save_path : Optional[str] - If provided, saves the figure to this path. - """ - _apply_style() - - backends = list(benchmarks.keys()) - operations = sorted( - {op for bm in benchmarks.values() for op in bm.keys()} - ) - n_backends = len(backends) - n_ops = len(operations) - - fig, ax = plt.subplots(figsize=(max(8, n_ops * 2), 5)) - - x = np.arange(n_ops) - total_width = 0.7 - w = total_width / n_backends - - palette = ["#1565C0", "#FF8F00", "#43A047", "#8E24AA", "#E53935"] - - for i, backend in enumerate(backends): - vals = [benchmarks[backend].get(op, 0) for op in operations] - offset = (i - (n_backends - 1) / 2) * w - bars = ax.bar(x + offset, vals, w, label=backend, - color=palette[i % len(palette)], edgecolor="white") - - for bar, val in zip(bars, vals): - if val > 0: - ax.text(bar.get_x() + bar.get_width() / 2, - bar.get_height(), - f"{val:.3g}s", ha="center", va="bottom", fontsize=7) - - ax.set_yscale("log") - ax.set_ylabel("Time (seconds, log scale)") - ax.set_xticks(x) - ax.set_xticklabels(operations, rotation=30, ha="right") - ax.set_title("Computation Efficiency by Backend", fontsize=12) - ax.legend(loc="upper right") - ax.yaxis.set_major_formatter(mticker.ScalarFormatter()) - - fig.tight_layout() - _save_or_show(fig, save_path) - - -# --------------------------------------------------------------------------- -# Cost pressure (Figure 9) -# --------------------------------------------------------------------------- - -def plot_cost_pressure( - results: Dict[float, dict], - save_path: Optional[str] = None, -) -> None: - """Cumulative return plots under different transaction cost settings. - - Shows both linear and log-scale y-axis panels. - - Parameters - ---------- - results : Dict[float, dict] - Keys: transaction cost levels (e.g. 0.0, 0.001, 0.003). - Values: dict with 'cumulative_returns' (np.ndarray) and - optionally 'dates' (List[str]). - save_path : Optional[str] - If provided, saves the figure to this path. - """ - _apply_style() - - fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5)) - - cost_levels = sorted(results.keys()) - palette = plt.cm.viridis(np.linspace(0.15, 0.85, len(cost_levels))) - - for color, cost in zip(palette, cost_levels): - data = results[cost] - cum_ret = np.asarray(data["cumulative_returns"]) - label = f"TC = {cost*100:.1f}%" if cost > 0 else "No TC" - - ax1.plot(cum_ret, color=color, linewidth=1.3, label=label) - # For log scale, shift to always positive - shifted = cum_ret - cum_ret.min() + 1.0 - ax2.plot(shifted, color=color, linewidth=1.3, label=label) - - ax1.set_title("Cumulative Returns (Linear)", fontsize=12) - ax1.set_ylabel("Cumulative Return") - ax1.set_xlabel("Period") - ax1.legend(loc="upper left", fontsize=8, framealpha=0.9) - ax1.axhline(y=0, color="black", linewidth=0.5) - - ax2.set_yscale("log") - ax2.set_title("Cumulative Returns (Log Scale)", fontsize=12) - ax2.set_ylabel("Shifted Cumulative Return (log)") - ax2.set_xlabel("Period") - ax2.legend(loc="upper left", fontsize=8, framealpha=0.9) - - fig.tight_layout() - _save_or_show(fig, save_path) - - -# --------------------------------------------------------------------------- -# Mining funnel chart -# --------------------------------------------------------------------------- - -def plot_mining_funnel( - batch_stats: dict, - save_path: Optional[str] = None, -) -> None: - """Funnel chart showing Stage 1 -> 2 -> 3 -> 4 filtering. - - Parameters - ---------- - batch_stats : dict - Keys: 'generated', 'ic_passed', 'corr_passed', 'admitted'. - Each is an int count at the corresponding stage. - save_path : Optional[str] - If provided, saves the figure to this path. - """ - _apply_style() - - stages = [ - ("Generated", batch_stats.get("generated", 0)), - ("IC Screen Passed", batch_stats.get("ic_passed", 0)), - ("Correlation Passed", batch_stats.get("corr_passed", 0)), - ("Admitted", batch_stats.get("admitted", 0)), - ] - - labels = [s[0] for s in stages] - values = [s[1] for s in stages] - max_val = max(values) if values else 1 - - fig, ax = plt.subplots(figsize=(8, 5)) - - # Draw horizontal funnel bars centred on the y-axis - y_positions = list(range(len(stages) - 1, -1, -1)) - bar_colors = ["#42A5F5", "#66BB6A", "#FFA726", "#EF5350"] - - for i, (y, val, label, color) in enumerate( - zip(y_positions, values, labels, bar_colors) - ): - width = val / max_val if max_val > 0 else 0 - bar = ax.barh(y, width, height=0.6, color=color, edgecolor="white", - linewidth=1.5, left=(1 - width) / 2) - # Label inside the bar - ax.text(0.5, y, f"{label}\n{val:,}", ha="center", va="center", - fontsize=10, fontweight="bold", color="white" if width > 0.3 else "#333") - - # Draw connecting trapezoids - for i in range(len(stages) - 1): - y_top = y_positions[i] - y_bot = y_positions[i + 1] - w_top = values[i] / max_val if max_val > 0 else 0 - w_bot = values[i + 1] / max_val if max_val > 0 else 0 - - left_top = (1 - w_top) / 2 - right_top = (1 + w_top) / 2 - left_bot = (1 - w_bot) / 2 - right_bot = (1 + w_bot) / 2 - - # Drop rate annotation - if values[i] > 0: - drop = (1 - values[i + 1] / values[i]) * 100 - mid_y = (y_top + y_bot) / 2 - ax.text(1.02, mid_y, f"-{drop:.0f}%", ha="left", va="center", - fontsize=9, color="#E53935", fontweight="bold", - transform=ax.get_yaxis_transform()) - - ax.set_xlim(-0.05, 1.15) - ax.set_ylim(-0.5, len(stages) - 0.5) - ax.set_yticks([]) - ax.set_xticks([]) - ax.set_title("Mining Pipeline Funnel", fontsize=13, fontweight="bold", pad=15) - ax.spines[:].set_visible(False) - - fig.tight_layout() - _save_or_show(fig, save_path)