refactor(factorminer): 禁用 npz 信号缓存并将库 I/O 对接本地 DSL

- 为 Factor 数据类新增 metadata 字段，用于标记未实现算子（unsupported） - save_library 废弃 save_signals 参数，内部强制忽略，仅持久化 JSON 元数据，不再写入 .npz - load_library 删除 .npz 恢复逻辑；加载时自动将 # TODO 公式的 unsupported 标记设为 True - import_from_paper() 直接基于已本地化的 PAPER_FACTORS 构建库，并同步标记 TODO 公式 - 新增 tests/test_factorminer_library_io.py，覆盖序列化、加载及 paper factors 导入
2026-04-08 22:10:17 +08:00
parent d71f723602
commit 65500cce27
3 changed files with 617 additions and 467 deletions
--- a/src/factorminer/core/factor_library.py
+++ b/src/factorminer/core/factor_library.py
@@ -39,6 +39,7 @@ class Factor:
    signals: Optional[np.ndarray] = field(default=None, repr=False)  # (M, T)
    research_metrics: dict = field(default_factory=dict)
    provenance: dict = field(default_factory=dict)
    metadata: dict = field(default_factory=dict)
    def __post_init__(self) -> None:
        if not self.admission_date:
@@ -59,6 +60,7 @@ class Factor:
            "admission_date": self.admission_date,
            "research_metrics": self.research_metrics,
            "provenance": self.provenance,
            "metadata": self.metadata,
        }
    @classmethod
@@ -77,6 +79,7 @@ class Factor:
            admission_date=d.get("admission_date", ""),
            research_metrics=d.get("research_metrics", {}),
            provenance=d.get("provenance", {}),
            metadata=d.get("metadata", {}),
        )
@@ -172,7 +175,7 @@ class FactorLibrary:
            # Pearson on ranks == Spearman
            ra_c = ra - ra.mean()
            rb_c = rb - rb.mean()
-            denom = np.sqrt((ra_c ** 2).sum() * (rb_c ** 2).sum())
+            denom = np.sqrt((ra_c**2).sum() * (rb_c**2).sum())
            if denom < 1e-12:
                continue
            corr_sum += abs((ra_c * rb_c).sum() / denom)
@@ -206,9 +209,7 @@ class FactorLibrary:
        (admitted, reason) : Tuple[bool, str]
        """
        if candidate_ic < self.ic_threshold:
-            return False, (
+            return False, (f"IC {candidate_ic:.4f} below threshold {self.ic_threshold}")
                f"IC {candidate_ic:.4f} below threshold {self.ic_threshold}"
            )
        if self.size == 0:
            return True, "First factor in library"
@@ -221,9 +222,7 @@ class FactorLibrary:
                f"{self.correlation_threshold} with existing library factor"
            )
-        return True, (
+        return True, (f"Admitted: IC={candidate_ic:.4f}, max_corr={max_corr:.4f}")
            f"Admitted: IC={candidate_ic:.4f}, max_corr={max_corr:.4f}"
        )
    def check_replacement(
        self,
@@ -258,8 +257,10 @@ class FactorLibrary:
        (should_replace, factor_to_replace_id, reason) : Tuple[bool, Optional[int], str]
        """
        if candidate_ic < ic_min:
-            return False, None, (
+            return (
-                f"IC {candidate_ic:.4f} below replacement floor {ic_min}"
+                False,
                None,
                (f"IC {candidate_ic:.4f} below replacement floor {ic_min}"),
            )
        if self.size == 0:
@@ -277,21 +278,33 @@ class FactorLibrary:
                correlated_factors.append((fid, corr, factor.ic_mean))
        if len(correlated_factors) != 1:
-            return False, None, (
+            return (
                False,
                None,
                (
                    f"Found {len(correlated_factors)} correlated factors "
                    f"(need exactly 1 for replacement)"
                ),
            )
        fid, corr, existing_ic = correlated_factors[0]
        if candidate_ic < ic_ratio * existing_ic:
-            return False, None, (
+            return (
                False,
                None,
                (
                    f"IC {candidate_ic:.4f} < {ic_ratio} * {existing_ic:.4f} = "
                    f"{ic_ratio * existing_ic:.4f}"
                ),
            )
-        return True, fid, (
+        return (
            True,
            fid,
            (
                f"Replace factor {fid}: candidate IC {candidate_ic:.4f} > "
                f"{ic_ratio} * {existing_ic:.4f}, corr={corr:.4f}"
            ),
        )
    # ------------------------------------------------------------------
@@ -321,8 +334,11 @@ class FactorLibrary:
        logger.info(
            "Admitted factor %d '%s' (IC=%.4f, max_corr=%.4f, category=%s)",
-            factor.id, factor.name, factor.ic_mean,
+            factor.id,
-            factor.max_correlation, factor.category,
+            factor.name,
            factor.ic_mean,
            factor.max_correlation,
            factor.category,
        )
        return factor.id
@@ -360,7 +376,10 @@ class FactorLibrary:
        logger.info(
            "Replaced factor %d with %d '%s' (IC=%.4f)",
-            old_id, new_factor.id, new_factor.name, new_factor.ic_mean,
+            old_id,
            new_factor.id,
            new_factor.name,
            new_factor.ic_mean,
        )
    def remove_factor(self, factor_id: int) -> None:
@@ -381,9 +400,7 @@ class FactorLibrary:
    # Correlation matrix management
    # ------------------------------------------------------------------
-    def _max_correlation_with_library(
+    def _max_correlation_with_library(self, candidate_signals: np.ndarray) -> float:
        self, candidate_signals: np.ndarray
    ) -> float:
        """Compute max |rho| between candidate and all library factors."""
        max_corr = 0.0
        for factor in self.factors.values():
@@ -453,9 +470,7 @@ class FactorLibrary:
                self.correlation_matrix[idx, other_idx] = 0.0
                self.correlation_matrix[other_idx, idx] = 0.0
                continue
-            corr = self._compute_correlation_vectorized(
+            corr = self._compute_correlation_vectorized(factor.signals, other.signals)
                factor.signals, other.signals
            )
            self.correlation_matrix[idx, other_idx] = corr
            self.correlation_matrix[other_idx, idx] = corr
@@ -509,10 +524,7 @@ class FactorLibrary:
    def get_factors_by_category(self, category: str) -> List[Factor]:
        """Return all factors matching a given category."""
-        return [
+        return [f for f in self.factors.values() if f.category == category]
            f for f in self.factors.values()
            if f.category == category
        ]
    def get_diagnostics(self) -> dict:
        """Library diagnostics: avg |rho|, max tail correlations, per-category counts, saturation.
@@ -539,8 +551,7 @@ class FactorLibrary:
        diag["category_counts"] = dict(cat_counts)
        diag["category_avg_ic"] = {
-            cat: cat_ic_sums[cat] / cat_counts[cat]
+            cat: cat_ic_sums[cat] / cat_counts[cat] for cat in cat_counts
            for cat in cat_counts
        }
        # Correlation statistics
@@ -575,9 +586,7 @@ class FactorLibrary:
        Returns a lightweight dictionary suitable for inclusion in LLM prompts
        or memory store entries.
        """
-        factors_sorted = sorted(
+        factors_sorted = sorted(self.factors.values(), key=lambda f: f.id, reverse=True)
            self.factors.values(), key=lambda f: f.id, reverse=True
        )
        recent = factors_sorted[:5]  # Last 5 admissions
        categories = defaultdict(int)
--- a/src/factorminer/core/library_io.py
+++ b/src/factorminer/core/library_io.py
@@ -23,26 +23,22 @@ logger = logging.getLogger(__name__)
 # Save / Load
 # ======================================================================
 def save_library(
    library: FactorLibrary,
    path: Union[str, Path],
    save_signals: bool = True,
 ) -> None:
-    """Save a FactorLibrary to disk.
+    """Save a FactorLibrary to disk (仅保存 JSON 元数据).
    Creates two files:
    - ``<path>.json`` -- factor metadata and library configuration
    - ``<path>_signals.npz`` -- binary signal cache (if save_signals=True
      and any factors have signals)
    Parameters
    ----------
    library : FactorLibrary
    path : str or Path
        Base path (without extension). E.g. ``"output/my_library"`` produces
-        ``output/my_library.json`` and ``output/my_library_signals.npz``.
+        ``output/my_library.json``.
    save_signals : bool
-        Whether to write the binary signal cache.
+        已废弃，始终忽略，不再写入 .npz 信号缓存。
    """
    path = Path(path)
    path.parent.mkdir(parents=True, exist_ok=True)
@@ -63,20 +59,10 @@ def save_library(
        json.dump(meta, fp, indent=2)
    logger.info("Saved library metadata to %s (%d factors)", json_path, library.size)
-    # -- Binary signal cache --
+    # -- Binary signal cache (已禁用) --
    print("[library_io] 信号缓存已禁用，仅保存 JSON 元数据")
    if save_signals:
-        signal_arrays: Dict[str, np.ndarray] = {}
+        logger.info("save_signals 参数已废弃，信号缓存不再写入")
        for f in library.list_factors():
            if f.signals is not None:
                signal_arrays[f"factor_{f.id}"] = f.signals
        if signal_arrays:
            npz_path = Path(str(path) + "_signals.npz")
            np.savez_compressed(npz_path, **signal_arrays)
            logger.info(
                "Saved signal cache to %s (%d arrays)",
                npz_path, len(signal_arrays),
            )
 def load_library(path: Union[str, Path]) -> FactorLibrary:
@@ -85,8 +71,7 @@ def load_library(path: Union[str, Path]) -> FactorLibrary:
    Parameters
    ----------
    path : str or Path
-        Base path (without extension). Will look for ``<path>.json`` and
+        Base path (without extension). Will look for ``<path>.json``.
        optionally ``<path>_signals.npz``.
    Returns
    -------
@@ -107,6 +92,8 @@ def load_library(path: Union[str, Path]) -> FactorLibrary:
    # Restore factors
    for fd in meta.get("factors", []):
        factor = Factor.from_dict(fd)
        if factor.formula.strip().startswith("# TODO"):
            factor.metadata["unsupported"] = True
        library.factors[factor.id] = factor
    # Restore correlation matrix
@@ -117,24 +104,9 @@ def load_library(path: Union[str, Path]) -> FactorLibrary:
    # Restore id-to-index mapping
    if "id_to_index" in meta:
-        library._id_to_index = {
+        library._id_to_index = {int(k): v for k, v in meta["id_to_index"].items()}
            int(k): v for k, v in meta["id_to_index"].items()
        }
-    # Load signal cache if present
+    logger.info("Loaded library from %s (%d factors)", json_path, library.size)
    npz_path = Path(str(path) + "_signals.npz")
    if npz_path.exists():
        data = np.load(npz_path)
        for f in library.factors.values():
            key = f"factor_{f.id}"
            if key in data:
                f.signals = data[key]
        data.close()
        logger.info("Loaded signal cache from %s", npz_path)
    logger.info(
        "Loaded library from %s (%d factors)", json_path, library.size
    )
    return library
@@ -142,6 +114,7 @@ def load_library(path: Union[str, Path]) -> FactorLibrary:
 # Export utilities
 # ======================================================================
 def export_csv(library: FactorLibrary, path: Union[str, Path]) -> None:
    """Export the factor table to CSV.
@@ -152,15 +125,24 @@ def export_csv(library: FactorLibrary, path: Union[str, Path]) -> None:
    path.parent.mkdir(parents=True, exist_ok=True)
    fieldnames = [
-        "ID", "Name", "Formula", "Category", "IC_Mean", "ICIR",
+        "ID",
-        "IC_Win_Rate", "Max_Correlation", "Batch", "Admission_Date",
+        "Name",
        "Formula",
        "Category",
        "IC_Mean",
        "ICIR",
        "IC_Win_Rate",
        "Max_Correlation",
        "Batch",
        "Admission_Date",
    ]
    with open(path, "w", newline="") as fp:
        writer = csv.DictWriter(fp, fieldnames=fieldnames)
        writer.writeheader()
        for f in library.list_factors():
-            writer.writerow({
+            writer.writerow(
                {
                    "ID": f.id,
                    "Name": f.name,
                    "Formula": f.formula,
@@ -171,7 +153,8 @@ def export_csv(library: FactorLibrary, path: Union[str, Path]) -> None:
                    "Max_Correlation": f"{f.max_correlation:.4f}",
                    "Batch": f.batch_number,
                    "Admission_Date": f.admission_date,
-            })
+                }
            )
    logger.info("Exported %d factors to %s", library.size, path)
@@ -204,49 +187,49 @@ def export_formulas(library: FactorLibrary, path: Union[str, Path]) -> None:
 # Each entry: (name, formula, category)
 PAPER_FACTORS: List[Dict[str, str]] = [
    {
-                "name": 'Intraday Range Position',
+        "name": "Intraday Range Position",
-                "formula": '(-cs_rank(((close - ts_min(close, 48)) / ((ts_max(close, 48) - ts_min(close, 48)) + 1e-8))))',
+        "formula": "(-cs_rank(((close - ts_min(close, 48)) / ((ts_max(close, 48) - ts_min(close, 48)) + 1e-8))))",
-                "category": 'Mean-reversion',
+        "category": "Mean-reversion",
    },
    {
-                "name": 'Volume-Weighted Momentum',
+        "name": "Volume-Weighted Momentum",
-                "formula": '(-cs_rank((ts_pct_change(close, 5) * (vol / ts_mean(vol, 20)))))',
+        "formula": "(-cs_rank((ts_pct_change(close, 5) * (vol / ts_mean(vol, 20)))))",
-                "category": 'Momentum',
+        "category": "Momentum",
    },
    {
-                "name": 'Residual Volatility',
+        "name": "Residual Volatility",
-                "formula": '(-cs_rank(ts_std((close - ts_ema(close, 10)), 20)))',
+        "formula": "(-cs_rank(ts_std((close - ts_ema(close, 10)), 20)))",
-                "category": 'Volatility',
+        "category": "Volatility",
    },
    {
-                "name": 'Intraday Amplitude Ratio',
+        "name": "Intraday Amplitude Ratio",
-                "formula": '(-cs_rank(((high - low) / (close + 1e-8))))',
+        "formula": "(-cs_rank(((high - low) / (close + 1e-8))))",
-                "category": 'Volatility',
+        "category": "Volatility",
    },
    {
-                "name": 'Volume Surprise',
+        "name": "Volume Surprise",
-                "formula": '(-cs_rank(((vol - ts_mean(vol, 20)) / (ts_std(vol, 20) + 1e-8))))',
+        "formula": "(-cs_rank(((vol - ts_mean(vol, 20)) / (ts_std(vol, 20) + 1e-8))))",
-                "category": 'Volume',
+        "category": "Volume",
    },
    {
-                "name": 'VWAP Deviation',
+        "name": "VWAP Deviation",
-                "formula": '(-((close - (amount / vol)) / (amount / vol)))',
+        "formula": "(-((close - (amount / vol)) / (amount / vol)))",
-                "category": 'VWAP',
+        "category": "VWAP",
    },
    {
-                "name": 'Short-term Reversal',
+        "name": "Short-term Reversal",
-                "formula": '(-cs_rank(ts_pct_change(close, 3)))',
+        "formula": "(-cs_rank(ts_pct_change(close, 3)))",
-                "category": 'Mean-reversion',
+        "category": "Mean-reversion",
    },
    {
-                "name": 'Turnover Momentum',
+        "name": "Turnover Momentum",
-                "formula": '(-cs_rank(ts_delta((amount / (vol + 1e-8)), 5)))',
+        "formula": "(-cs_rank(ts_delta((amount / (vol + 1e-8)), 5)))",
-                "category": 'Turnover',
+        "category": "Turnover",
    },
    {
-                "name": 'High-Low Midpoint Reversion',
+        "name": "High-Low Midpoint Reversion",
-                "formula": '(-cs_rank((close - ((high + low) / 2))))',
+        "formula": "(-cs_rank((close - ((high + low) / 2))))",
-                "category": 'Mean-reversion',
+        "category": "Mean-reversion",
    },
    # {
    #     "name": 'Rolling Beta Residual',
@@ -259,69 +242,69 @@ PAPER_FACTORS: List[Dict[str, str]] = [
    #     "category": 'VWAP',
    # },
    {
-                "name": 'Accumulation-Distribution',
+        "name": "Accumulation-Distribution",
-                "formula": '(-cs_rank(ts_sum(((((2 * close) - (high + low)) / ((high - low) + 1e-8)) * vol), 10)))',
+        "formula": "(-cs_rank(ts_sum(((((2 * close) - (high + low)) / ((high - low) + 1e-8)) * vol), 10)))",
-                "category": 'Volume',
+        "category": "Volume",
    },
    {
-                "name": 'Relative Strength Index Deviation',
+        "name": "Relative Strength Index Deviation",
-                "formula": '(-cs_rank((ts_mean(max_(ts_delta(close, 1), 0), 14) - ts_mean(abs(min_(ts_delta(close, 1), 0)), 14))))',
+        "formula": "(-cs_rank((ts_mean(max_(ts_delta(close, 1), 0), 14) - ts_mean(abs(min_(ts_delta(close, 1), 0)), 14))))",
-                "category": 'Momentum',
+        "category": "Momentum",
    },
    {
-                "name": 'Price-Volume Correlation',
+        "name": "Price-Volume Correlation",
-                "formula": '(-ts_corr(close, vol, 10))',
+        "formula": "(-ts_corr(close, vol, 10))",
-                "category": 'Volume',
+        "category": "Volume",
    },
    {
-                "name": 'Skewness of Returns',
+        "name": "Skewness of Returns",
-                "formula": '(-cs_rank(ts_skew((close / ts_delay(close, 1) - 1), 20)))',
+        "formula": "(-cs_rank(ts_skew((close / ts_delay(close, 1) - 1), 20)))",
-                "category": 'Higher-moment',
+        "category": "Higher-moment",
    },
    {
-                "name": 'Kurtosis of Returns',
+        "name": "Kurtosis of Returns",
-                "formula": '(-cs_rank(ts_kurt((close / ts_delay(close, 1) - 1), 20)))',
+        "formula": "(-cs_rank(ts_kurt((close / ts_delay(close, 1) - 1), 20)))",
-                "category": 'Higher-moment',
+        "category": "Higher-moment",
    },
    {
-                "name": 'Volume-Weighted Return',
+        "name": "Volume-Weighted Return",
-                "formula": '(-cs_rank((ts_sum(((close / ts_delay(close, 1) - 1) * vol), 10) / (ts_sum(vol, 10) + 1e-8))))',
+        "formula": "(-cs_rank((ts_sum(((close / ts_delay(close, 1) - 1) * vol), 10) / (ts_sum(vol, 10) + 1e-8))))",
-                "category": 'Volume',
+        "category": "Volume",
    },
    {
-                "name": 'Close-to-High Ratio',
+        "name": "Close-to-High Ratio",
-                "formula": '(-cs_rank(((high - close) / (high + 1e-8))))',
+        "formula": "(-cs_rank(((high - close) / (high + 1e-8))))",
-                "category": 'Mean-reversion',
+        "category": "Mean-reversion",
    },
    {
-                "name": 'Delayed Correlation Shift',
+        "name": "Delayed Correlation Shift",
-                "formula": '(-cs_rank((ts_corr(close, vol, 10) - ts_corr(ts_delay(close, 5), vol, 10))))',
+        "formula": "(-cs_rank((ts_corr(close, vol, 10) - ts_corr(ts_delay(close, 5), vol, 10))))",
-                "category": 'Volume',
+        "category": "Volume",
    },
    {
-                "name": 'Exponential Momentum',
+        "name": "Exponential Momentum",
-                "formula": '(-cs_rank((close - ts_ema(close, 20))))',
+        "formula": "(-cs_rank((close - ts_ema(close, 20))))",
-                "category": 'Momentum',
+        "category": "Momentum",
    },
    {
-                "name": 'Range-Adjusted Volume',
+        "name": "Range-Adjusted Volume",
-                "formula": '(-cs_rank((vol / ((high - low) + 1e-8))))',
+        "formula": "(-cs_rank((vol / ((high - low) + 1e-8))))",
-                "category": 'Volume',
+        "category": "Volume",
    },
    {
-                "name": 'Cumulative Return Rank',
+        "name": "Cumulative Return Rank",
-                "formula": '(-cs_rank(ts_sum((close / ts_delay(close, 1) - 1), 10)))',
+        "formula": "(-cs_rank(ts_sum((close / ts_delay(close, 1) - 1), 10)))",
-                "category": 'Momentum',
+        "category": "Momentum",
    },
    {
-                "name": 'VWAP Momentum',
+        "name": "VWAP Momentum",
-                "formula": '(-cs_rank(ts_pct_change((amount / vol), 5)))',
+        "formula": "(-cs_rank(ts_pct_change((amount / vol), 5)))",
-                "category": 'VWAP',
+        "category": "VWAP",
    },
    {
-                "name": 'Bollinger Band Position',
+        "name": "Bollinger Band Position",
-                "formula": '(-cs_rank(((close - ts_mean(close, 20)) / (ts_std(close, 20) + 1e-8))))',
+        "formula": "(-cs_rank(((close - ts_mean(close, 20)) / (ts_std(close, 20) + 1e-8))))",
-                "category": 'Mean-reversion',
+        "category": "Mean-reversion",
    },
    # {
    #     "name": 'Volume Decay Weighted',
@@ -329,64 +312,64 @@ PAPER_FACTORS: List[Dict[str, str]] = [
    #     "category": 'Volume',
    # },
    {
-                "name": 'Overnight Return',
+        "name": "Overnight Return",
-                "formula": '(-cs_rank(((open - ts_delay(close, 1)) / (ts_delay(close, 1) + 1e-8))))',
+        "formula": "(-cs_rank(((open - ts_delay(close, 1)) / (ts_delay(close, 1) + 1e-8))))",
-                "category": 'Overnight',
+        "category": "Overnight",
    },
    {
-                "name": 'Intraday Return',
+        "name": "Intraday Return",
-                "formula": '(-cs_rank(((close - open) / (open + 1e-8))))',
+        "formula": "(-cs_rank(((close - open) / (open + 1e-8))))",
-                "category": 'Intraday',
+        "category": "Intraday",
    },
    {
-                "name": 'Max Drawdown',
+        "name": "Max Drawdown",
-                "formula": '(-cs_rank(((close - ts_max(close, 20)) / (ts_max(close, 20) + 1e-8))))',
+        "formula": "(-cs_rank(((close - ts_max(close, 20)) / (ts_max(close, 20) + 1e-8))))",
-                "category": 'Risk',
+        "category": "Risk",
    },
    {
-                "name": 'Hurst Exponent Proxy',
+        "name": "Hurst Exponent Proxy",
-                "formula": '(-cs_rank((ts_std((close / ts_delay(close, 1) - 1), 20) / (ts_std((close / ts_delay(close, 1) - 1), 5) + 1e-8))))',
+        "formula": "(-cs_rank((ts_std((close / ts_delay(close, 1) - 1), 20) / (ts_std((close / ts_delay(close, 1) - 1), 5) + 1e-8))))",
-                "category": 'Volatility',
+        "category": "Volatility",
    },
    {
-                "name": 'Volume Imbalance',
+        "name": "Volume Imbalance",
-                "formula": '(-cs_rank((ts_mean(vol, 5) - ts_mean(vol, 20))))',
+        "formula": "(-cs_rank((ts_mean(vol, 5) - ts_mean(vol, 20))))",
-                "category": 'Volume',
+        "category": "Volume",
    },
    {
-                "name": 'Weighted Close Position',
+        "name": "Weighted Close Position",
-                "formula": '(-cs_rank((((2 * close) - (high + low)) / ((high - low) + 1e-8))))',
+        "formula": "(-cs_rank((((2 * close) - (high + low)) / ((high - low) + 1e-8))))",
-                "category": 'Mean-reversion',
+        "category": "Mean-reversion",
    },
    {
-                "name": 'Trend Intensity',
+        "name": "Trend Intensity",
-                "formula": '(-cs_rank((abs(ts_delta(close, 10)) / (ts_sum(abs(ts_delta(close, 1)), 10) + 1e-8))))',
+        "formula": "(-cs_rank((abs(ts_delta(close, 10)) / (ts_sum(abs(ts_delta(close, 1)), 10) + 1e-8))))",
-                "category": 'Trend',
+        "category": "Trend",
    },
    {
-                "name": 'Return Dispersion',
+        "name": "Return Dispersion",
-                "formula": '(-cs_rank(ts_std((close / ts_delay(close, 1) - 1), 5)))',
+        "formula": "(-cs_rank(ts_std((close / ts_delay(close, 1) - 1), 5)))",
-                "category": 'Volatility',
+        "category": "Volatility",
    },
    {
-                "name": 'VWAP Relative Strength',
+        "name": "VWAP Relative Strength",
-                "formula": '(-cs_rank(((ts_mean(close, 5) - (amount / vol)) / ((amount / vol) + 1e-8))))',
+        "formula": "(-cs_rank(((ts_mean(close, 5) - (amount / vol)) / ((amount / vol) + 1e-8))))",
-                "category": 'VWAP',
+        "category": "VWAP",
    },
    {
-                "name": 'Rank Reversal',
+        "name": "Rank Reversal",
-                "formula": '(-cs_rank((ts_rank(close, 10) - ts_rank(close, 30))))',
+        "formula": "(-cs_rank((ts_rank(close, 10) - ts_rank(close, 30))))",
-                "category": 'Mean-reversion',
+        "category": "Mean-reversion",
    },
    {
-                "name": 'Money Flow Index',
+        "name": "Money Flow Index",
-                "formula": '(-cs_rank((ts_sum((max_(ts_delta(close, 1), 0) * vol), 14) / (ts_sum((abs(ts_delta(close, 1)) * vol), 14) + 1e-8))))',
+        "formula": "(-cs_rank((ts_sum((max_(ts_delta(close, 1), 0) * vol), 14) / (ts_sum((abs(ts_delta(close, 1)) * vol), 14) + 1e-8))))",
-                "category": 'Volume',
+        "category": "Volume",
    },
    {
-                "name": 'Adaptive Momentum',
+        "name": "Adaptive Momentum",
-                "formula": '(-cs_rank((ts_pct_change(close, 10) * (ts_std((close / ts_delay(close, 1) - 1), 5) / (ts_std((close / ts_delay(close, 1) - 1), 20) + 1e-8)))))',
+        "formula": "(-cs_rank((ts_pct_change(close, 10) * (ts_std((close / ts_delay(close, 1) - 1), 5) / (ts_std((close / ts_delay(close, 1) - 1), 20) + 1e-8)))))",
-                "category": 'Momentum',
+        "category": "Momentum",
    },
    # {
    #     "name": 'Volume Trend',
@@ -394,29 +377,29 @@ PAPER_FACTORS: List[Dict[str, str]] = [
    #     "category": 'Volume',
    # },
    {
-                "name": 'Price Acceleration',
+        "name": "Price Acceleration",
-                "formula": '(-cs_rank((ts_delta(close, 5) - ts_delta(ts_delay(close, 5), 5))))',
+        "formula": "(-cs_rank((ts_delta(close, 5) - ts_delta(ts_delay(close, 5), 5))))",
-                "category": 'Momentum',
+        "category": "Momentum",
    },
    {
-                "name": 'Realized Volatility Ratio',
+        "name": "Realized Volatility Ratio",
-                "formula": '(-cs_rank((ts_std((close / ts_delay(close, 1) - 1), 10) / (ts_std((close / ts_delay(close, 1) - 1), 30) + 1e-8))))',
+        "formula": "(-cs_rank((ts_std((close / ts_delay(close, 1) - 1), 10) / (ts_std((close / ts_delay(close, 1) - 1), 30) + 1e-8))))",
-                "category": 'Volatility',
+        "category": "Volatility",
    },
    {
-                "name": 'Amount Concentration',
+        "name": "Amount Concentration",
-                "formula": '(-cs_rank((ts_max(amount, 5) / (ts_mean(amount, 20) + 1e-8))))',
+        "formula": "(-cs_rank((ts_max(amount, 5) / (ts_mean(amount, 20) + 1e-8))))",
-                "category": 'Turnover',
+        "category": "Turnover",
    },
    {
-                "name": 'Cross-Sectional Volume Rank',
+        "name": "Cross-Sectional Volume Rank",
-                "formula": '(-cs_rank((vol / (ts_mean(vol, 60) + 1e-8))))',
+        "formula": "(-cs_rank((vol / (ts_mean(vol, 60) + 1e-8))))",
-                "category": 'Volume',
+        "category": "Volume",
    },
    {
-                "name": 'Gap Momentum',
+        "name": "Gap Momentum",
-                "formula": '(-cs_rank(ts_sum(((open - ts_delay(close, 1)) / (ts_delay(close, 1) + 1e-8)), 5)))',
+        "formula": "(-cs_rank(ts_sum(((open - ts_delay(close, 1)) / (ts_delay(close, 1) + 1e-8)), 5)))",
-                "category": 'Overnight',
+        "category": "Overnight",
    },
    # {
    #     "name": 'VWAP Distance Decay',
@@ -424,44 +407,44 @@ PAPER_FACTORS: List[Dict[str, str]] = [
    #     "category": 'VWAP',
    # },
    {
-                "name": 'Tail Risk Indicator',
+        "name": "Tail Risk Indicator",
-                "formula": '(-cs_rank((ts_min((close / ts_delay(close, 1) - 1), 20) / (ts_std((close / ts_delay(close, 1) - 1), 20) + 1e-8))))',
+        "formula": "(-cs_rank((ts_min((close / ts_delay(close, 1) - 1), 20) / (ts_std((close / ts_delay(close, 1) - 1), 20) + 1e-8))))",
-                "category": 'Risk',
+        "category": "Risk",
    },
    {
-                "name": 'Volatility-Regime Reversal Divergence',
+        "name": "Volatility-Regime Reversal Divergence",
-                "formula": 'if_((ts_std((close / ts_delay(close, 1) - 1), 12) > ts_mean(ts_std((close / ts_delay(close, 1) - 1), 12), 48)), (-cs_rank(ts_delta(close, 3))), (-cs_rank(((close - low) / ((high - low) + 0.0001)))))',
+        "formula": "if_((ts_std((close / ts_delay(close, 1) - 1), 12) > ts_mean(ts_std((close / ts_delay(close, 1) - 1), 12), 48)), (-cs_rank(ts_delta(close, 3))), (-cs_rank(((close - low) / ((high - low) + 0.0001)))))",
-                "category": 'Regime-switching',
+        "category": "Regime-switching",
    },
    {
-                "name": 'Regime Volume Signal',
+        "name": "Regime Volume Signal",
-                "formula": 'if_((vol > ts_mean(vol, 20)), (-cs_rank((close / ts_delay(close, 1) - 1))), (-cs_rank(ts_pct_change(close, 5))))',
+        "formula": "if_((vol > ts_mean(vol, 20)), (-cs_rank((close / ts_delay(close, 1) - 1))), (-cs_rank(ts_pct_change(close, 5))))",
-                "category": 'Regime-switching',
+        "category": "Regime-switching",
    },
    {
-                "name": 'Liquidity-Adjusted Reversal',
+        "name": "Liquidity-Adjusted Reversal",
-                "formula": '(-cs_rank((ts_pct_change(close, 3) * (vol / (ts_mean(vol, 20) + 1e-8)))))',
+        "formula": "(-cs_rank((ts_pct_change(close, 3) * (vol / (ts_mean(vol, 20) + 1e-8)))))",
-                "category": 'Mean-reversion',
+        "category": "Mean-reversion",
    },
    {
-                "name": 'Cross-Sectional Volatility Rank',
+        "name": "Cross-Sectional Volatility Rank",
-                "formula": '(-cs_rank(cs_rank(ts_std((close / ts_delay(close, 1) - 1), 10))))',
+        "formula": "(-cs_rank(cs_rank(ts_std((close / ts_delay(close, 1) - 1), 10))))",
-                "category": 'Volatility',
+        "category": "Volatility",
    },
    {
-                "name": 'VWAP Bollinger',
+        "name": "VWAP Bollinger",
-                "formula": '(-cs_rank((((amount / vol) - ts_mean((amount / vol), 20)) / (ts_std((amount / vol), 20) + 1e-8))))',
+        "formula": "(-cs_rank((((amount / vol) - ts_mean((amount / vol), 20)) / (ts_std((amount / vol), 20) + 1e-8))))",
-                "category": 'VWAP',
+        "category": "VWAP",
    },
    {
-                "name": 'Smoothed Return Reversal',
+        "name": "Smoothed Return Reversal",
-                "formula": '(-cs_rank(ts_ema((close / ts_delay(close, 1) - 1), 5)))',
+        "formula": "(-cs_rank(ts_ema((close / ts_delay(close, 1) - 1), 5)))",
-                "category": 'Mean-reversion',
+        "category": "Mean-reversion",
    },
    {
-                "name": 'Volume-Price Divergence',
+        "name": "Volume-Price Divergence",
-                "formula": '(-cs_rank((ts_rank(vol, 10) - ts_rank(close, 10))))',
+        "formula": "(-cs_rank((ts_rank(vol, 10) - ts_rank(close, 10))))",
-                "category": 'Volume',
+        "category": "Volume",
    },
    # {
    #     "name": 'Decay Weighted Momentum',
@@ -469,14 +452,14 @@ PAPER_FACTORS: List[Dict[str, str]] = [
    #     "category": 'Momentum',
    # },
    {
-                "name": 'Range Percentile',
+        "name": "Range Percentile",
-                "formula": '(-cs_rank(((close - ts_min(close, 20)) / ((ts_max(close, 20) - ts_min(close, 20)) + 1e-8))))',
+        "formula": "(-cs_rank(((close - ts_min(close, 20)) / ((ts_max(close, 20) - ts_min(close, 20)) + 1e-8))))",
-                "category": 'Mean-reversion',
+        "category": "Mean-reversion",
    },
    {
-                "name": 'Volume Skewness',
+        "name": "Volume Skewness",
-                "formula": '(-cs_rank(ts_skew(vol, 20)))',
+        "formula": "(-cs_rank(ts_skew(vol, 20)))",
-                "category": 'Volume',
+        "category": "Volume",
    },
    # {
    #     "name": 'Residual Momentum',
@@ -484,54 +467,54 @@ PAPER_FACTORS: List[Dict[str, str]] = [
    #     "category": 'Momentum',
    # },
    {
-                "name": 'VWAP Trend',
+        "name": "VWAP Trend",
-                "formula": '(-cs_rank(ts_delta(((close - (amount / vol)) / (amount / vol)), 5)))',
+        "formula": "(-cs_rank(ts_delta(((close - (amount / vol)) / (amount / vol)), 5)))",
-                "category": 'VWAP',
+        "category": "VWAP",
    },
    {
-                "name": 'Return Autocorrelation',
+        "name": "Return Autocorrelation",
-                "formula": '(-cs_rank(ts_corr((close / ts_delay(close, 1) - 1), ts_delay((close / ts_delay(close, 1) - 1), 1), 10)))',
+        "formula": "(-cs_rank(ts_corr((close / ts_delay(close, 1) - 1), ts_delay((close / ts_delay(close, 1) - 1), 1), 10)))",
-                "category": 'Mean-reversion',
+        "category": "Mean-reversion",
    },
    {
-                "name": 'Price Efficiency',
+        "name": "Price Efficiency",
-                "formula": '(-cs_rank((abs(ts_sum((close / ts_delay(close, 1) - 1), 10)) / (ts_sum(abs((close / ts_delay(close, 1) - 1)), 10) + 1e-8))))',
+        "formula": "(-cs_rank((abs(ts_sum((close / ts_delay(close, 1) - 1), 10)) / (ts_sum(abs((close / ts_delay(close, 1) - 1)), 10) + 1e-8))))",
-                "category": 'Trend',
+        "category": "Trend",
    },
    {
-                "name": 'Relative Volume Change',
+        "name": "Relative Volume Change",
-                "formula": '(-cs_rank(ts_pct_change(vol, 5)))',
+        "formula": "(-cs_rank(ts_pct_change(vol, 5)))",
-                "category": 'Volume',
+        "category": "Volume",
    },
    {
-                "name": 'Weighted VWAP Position',
+        "name": "Weighted VWAP Position",
-                "formula": '(-cs_rank(ts_wma(((close - (amount / vol)) / (amount / vol)), 10)))',
+        "formula": "(-cs_rank(ts_wma(((close - (amount / vol)) / (amount / vol)), 10)))",
-                "category": 'VWAP',
+        "category": "VWAP",
    },
    {
-                "name": 'Regime Momentum Flip',
+        "name": "Regime Momentum Flip",
-                "formula": 'if_((ts_mean((close / ts_delay(close, 1) - 1), 5) > 0), (-cs_rank(ts_pct_change(close, 10))), cs_rank(ts_pct_change(close, 3)))',
+        "formula": "if_((ts_mean((close / ts_delay(close, 1) - 1), 5) > 0), (-cs_rank(ts_pct_change(close, 10))), cs_rank(ts_pct_change(close, 3)))",
-                "category": 'Regime-switching',
+        "category": "Regime-switching",
    },
    {
-                "name": 'High-Low Volatility',
+        "name": "High-Low Volatility",
-                "formula": '(-cs_rank(ts_mean(((high - low) / (close + 1e-8)), 10)))',
+        "formula": "(-cs_rank(ts_mean(((high - low) / (close + 1e-8)), 10)))",
-                "category": 'Volatility',
+        "category": "Volatility",
    },
    {
-                "name": 'Opening Gap Reversal',
+        "name": "Opening Gap Reversal",
-                "formula": '(-cs_rank(((open - ts_delay(close, 1)) / (ts_std((close / ts_delay(close, 1) - 1), 10) + 1e-8))))',
+        "formula": "(-cs_rank(((open - ts_delay(close, 1)) / (ts_std((close / ts_delay(close, 1) - 1), 10) + 1e-8))))",
-                "category": 'Overnight',
+        "category": "Overnight",
    },
    {
-                "name": 'Volume Momentum Spread',
+        "name": "Volume Momentum Spread",
-                "formula": '(-cs_rank((ts_mean(vol, 5) - ts_mean(vol, 40))))',
+        "formula": "(-cs_rank((ts_mean(vol, 5) - ts_mean(vol, 40))))",
-                "category": 'Volume',
+        "category": "Volume",
    },
    {
-                "name": 'Regime Volume Reversal',
+        "name": "Regime Volume Reversal",
-                "formula": 'if_(((vol / (ts_mean(vol, 20) + 1e-8)) > 1.5), (-cs_rank((close / ts_delay(close, 1) - 1))), (-cs_rank(ts_pct_change(close, 10))))',
+        "formula": "if_(((vol / (ts_mean(vol, 20) + 1e-8)) > 1.5), (-cs_rank((close / ts_delay(close, 1) - 1))), (-cs_rank(ts_pct_change(close, 10))))",
-                "category": 'Regime-switching',
+        "category": "Regime-switching",
    },
    # {
    #     "name": 'Slope Reversal',
@@ -544,9 +527,9 @@ PAPER_FACTORS: List[Dict[str, str]] = [
    #     "category": 'VWAP',
    # },
    {
-                "name": 'Turnover Rate Change',
+        "name": "Turnover Rate Change",
-                "formula": '(-cs_rank(ts_delta((amount / (vol + 1e-8)), 10)))',
+        "formula": "(-cs_rank(ts_delta((amount / (vol + 1e-8)), 10)))",
-                "category": 'Turnover',
+        "category": "Turnover",
    },
    # {
    #     "name": 'Return Quantile Signal',
@@ -554,14 +537,14 @@ PAPER_FACTORS: List[Dict[str, str]] = [
    #     "category": 'Higher-moment',
    # },
    {
-                "name": 'Double EMA Crossover',
+        "name": "Double EMA Crossover",
-                "formula": '(-cs_rank((ts_ema(close, 5) - ts_ema(close, 20))))',
+        "formula": "(-cs_rank((ts_ema(close, 5) - ts_ema(close, 20))))",
-                "category": 'Trend',
+        "category": "Trend",
    },
    {
-                "name": 'Conditional Volatility Return',
+        "name": "Conditional Volatility Return",
-                "formula": '(-cs_rank(((close / ts_delay(close, 1) - 1) / (ts_std((close / ts_delay(close, 1) - 1), 10) + 1e-8))))',
+        "formula": "(-cs_rank(((close / ts_delay(close, 1) - 1) / (ts_std((close / ts_delay(close, 1) - 1), 10) + 1e-8))))",
-                "category": 'Risk',
+        "category": "Risk",
    },
    # {
    #     "name": 'Amplitude Trend',
@@ -569,39 +552,39 @@ PAPER_FACTORS: List[Dict[str, str]] = [
    #     "category": 'Volatility',
    # },
    {
-                "name": 'Volume-Weighted Range',
+        "name": "Volume-Weighted Range",
-                "formula": '(-cs_rank(ts_mean((((high - low) / (close + 1e-8)) * vol), 10)))',
+        "formula": "(-cs_rank(ts_mean((((high - low) / (close + 1e-8)) * vol), 10)))",
-                "category": 'Volume',
+        "category": "Volume",
    },
    {
-                "name": 'Intraday Efficiency Ratio',
+        "name": "Intraday Efficiency Ratio",
-                "formula": '(-cs_rank((abs((close - open)) / ((high - low) + 1e-8))))',
+        "formula": "(-cs_rank((abs((close - open)) / ((high - low) + 1e-8))))",
-                "category": 'Intraday',
+        "category": "Intraday",
    },
    {
-                "name": 'Cumulative Volume Signal',
+        "name": "Cumulative Volume Signal",
-                "formula": '(-cs_rank((ts_sum(((close / ts_delay(close, 1) - 1) * vol), 20) / (ts_sum(vol, 20) + 1e-8))))',
+        "formula": "(-cs_rank((ts_sum(((close / ts_delay(close, 1) - 1) * vol), 20) / (ts_sum(vol, 20) + 1e-8))))",
-                "category": 'Volume',
+        "category": "Volume",
    },
    {
-                "name": 'VWAP Cross-Sectional Momentum',
+        "name": "VWAP Cross-Sectional Momentum",
-                "formula": '(-cs_rank(cs_rank(ts_pct_change((amount / vol), 10))))',
+        "formula": "(-cs_rank(cs_rank(ts_pct_change((amount / vol), 10))))",
-                "category": 'VWAP',
+        "category": "VWAP",
    },
    {
-                "name": 'Mean-Reversion Indicator',
+        "name": "Mean-Reversion Indicator",
-                "formula": '(-cs_rank(((close - ts_mean(close, 10)) / (ts_mean(close, 10) + 1e-8))))',
+        "formula": "(-cs_rank(((close - ts_mean(close, 10)) / (ts_mean(close, 10) + 1e-8))))",
-                "category": 'Mean-reversion',
+        "category": "Mean-reversion",
    },
    {
-                "name": 'Volume Regime Indicator',
+        "name": "Volume Regime Indicator",
-                "formula": '(-cs_rank((ts_std(vol, 5) / (ts_std(vol, 20) + 1e-8))))',
+        "formula": "(-cs_rank((ts_std(vol, 5) / (ts_std(vol, 20) + 1e-8))))",
-                "category": 'Volume',
+        "category": "Volume",
    },
    {
-                "name": 'Return Persistence',
+        "name": "Return Persistence",
-                "formula": '(-cs_rank((sign(ts_delta(close, 1)) * sign(ts_delta(close, 5)))))',
+        "formula": "(-cs_rank((sign(ts_delta(close, 1)) * sign(ts_delta(close, 5)))))",
-                "category": 'Momentum',
+        "category": "Momentum",
    },
    # {
    #     "name": 'Regime Trend Strength',
@@ -609,29 +592,29 @@ PAPER_FACTORS: List[Dict[str, str]] = [
    #     "category": 'Regime-switching',
    # },
    {
-                "name": 'VWAP Dispersion',
+        "name": "VWAP Dispersion",
-                "formula": '(-cs_rank(ts_std(((close - (amount / vol)) / (amount / vol)), 10)))',
+        "formula": "(-cs_rank(ts_std(((close - (amount / vol)) / (amount / vol)), 10)))",
-                "category": 'VWAP',
+        "category": "VWAP",
    },
    {
-                "name": 'Smart Money Flow',
+        "name": "Smart Money Flow",
-                "formula": '(-cs_rank(ts_sum((if_((close > ts_delay(close, 1)), vol, (-vol)) * ((high - low) / (close + 1e-8))), 10)))',
+        "formula": "(-cs_rank(ts_sum((if_((close > ts_delay(close, 1)), vol, (-vol)) * ((high - low) / (close + 1e-8))), 10)))",
-                "category": 'Volume',
+        "category": "Volume",
    },
    {
-                "name": 'Return Rank Dispersion',
+        "name": "Return Rank Dispersion",
-                "formula": '(-cs_rank((ts_rank((close / ts_delay(close, 1) - 1), 5) - ts_rank((close / ts_delay(close, 1) - 1), 20))))',
+        "formula": "(-cs_rank((ts_rank((close / ts_delay(close, 1) - 1), 5) - ts_rank((close / ts_delay(close, 1) - 1), 20))))",
-                "category": 'Mean-reversion',
+        "category": "Mean-reversion",
    },
    {
-                "name": 'Volume Acceleration',
+        "name": "Volume Acceleration",
-                "formula": '(-cs_rank((ts_delta(vol, 5) - ts_delta(ts_delay(vol, 5), 5))))',
+        "formula": "(-cs_rank((ts_delta(vol, 5) - ts_delta(ts_delay(vol, 5), 5))))",
-                "category": 'Volume',
+        "category": "Volume",
    },
    {
-                "name": 'Close-Low Ratio Trend',
+        "name": "Close-Low Ratio Trend",
-                "formula": '(-cs_rank(ts_mean(((close - low) / ((high - low) + 1e-8)), 5)))',
+        "formula": "(-cs_rank(ts_mean(((close - low) / ((high - low) + 1e-8)), 5)))",
-                "category": 'Mean-reversion',
+        "category": "Mean-reversion",
    },
    # {
    #     "name": 'Hull MA Deviation',
@@ -644,69 +627,69 @@ PAPER_FACTORS: List[Dict[str, str]] = [
    #     "category": 'Momentum',
    # },
    {
-                "name": 'Volume Profile Skew',
+        "name": "Volume Profile Skew",
-                "formula": '(-cs_rank(ts_skew((vol / (ts_mean(vol, 20) + 1e-8)), 10)))',
+        "formula": "(-cs_rank(ts_skew((vol / (ts_mean(vol, 20) + 1e-8)), 10)))",
-                "category": 'Volume',
+        "category": "Volume",
    },
    {
-                "name": 'Conditional VWAP Signal',
+        "name": "Conditional VWAP Signal",
-                "formula": 'if_((close > (amount / vol)), (-cs_rank(((close - (amount / vol)) / (amount / vol)))), cs_rank((((amount / vol) - close) / (amount / vol))))',
+        "formula": "if_((close > (amount / vol)), (-cs_rank(((close - (amount / vol)) / (amount / vol)))), cs_rank((((amount / vol) - close) / (amount / vol))))",
-                "category": 'VWAP',
+        "category": "VWAP",
    },
    {
-                "name": 'Extreme Volume Reversal',
+        "name": "Extreme Volume Reversal",
-                "formula": '(-cs_rank((if_((vol > (2 * ts_mean(vol, 20))), 1, 0) * (close / ts_delay(close, 1) - 1))))',
+        "formula": "(-cs_rank((if_((vol > (2 * ts_mean(vol, 20))), 1, 0) * (close / ts_delay(close, 1) - 1))))",
-                "category": 'Volume',
+        "category": "Volume",
    },
    {
-                "name": 'Range Expansion Signal',
+        "name": "Range Expansion Signal",
-                "formula": '(-cs_rank(((high - low) / (ts_mean((high - low), 20) + 1e-8))))',
+        "formula": "(-cs_rank(((high - low) / (ts_mean((high - low), 20) + 1e-8))))",
-                "category": 'Volatility',
+        "category": "Volatility",
    },
    {
-                "name": 'Short-Term IC Momentum',
+        "name": "Short-Term IC Momentum",
-                "formula": '(-cs_rank(ts_sum((sign((close / ts_delay(close, 1) - 1)) * abs((close / ts_delay(close, 1) - 1))), 5)))',
+        "formula": "(-cs_rank(ts_sum((sign((close / ts_delay(close, 1) - 1)) * abs((close / ts_delay(close, 1) - 1))), 5)))",
-                "category": 'Momentum',
+        "category": "Momentum",
    },
    {
-                "name": 'VWAP Curvature',
+        "name": "VWAP Curvature",
-                "formula": '(-cs_rank(((((amount / vol) - ts_delay((amount / vol), 5)) / (ts_delay((amount / vol), 5) + 1e-8)) - ((ts_delay((amount / vol), 5) - ts_delay((amount / vol), 10)) / (ts_delay((amount / vol), 10) + 1e-8)))))',
+        "formula": "(-cs_rank(((((amount / vol) - ts_delay((amount / vol), 5)) / (ts_delay((amount / vol), 5) + 1e-8)) - ((ts_delay((amount / vol), 5) - ts_delay((amount / vol), 10)) / (ts_delay((amount / vol), 10) + 1e-8)))))",
-                "category": 'VWAP',
+        "category": "VWAP",
    },
    {
-                "name": 'Relative Strength',
+        "name": "Relative Strength",
-                "formula": '(-cs_rank((ts_pct_change(close, 5) / (ts_pct_change(close, 20) + 1e-8))))',
+        "formula": "(-cs_rank((ts_pct_change(close, 5) / (ts_pct_change(close, 20) + 1e-8))))",
-                "category": 'Momentum',
+        "category": "Momentum",
    },
    {
-                "name": 'Volume-Correlated Return',
+        "name": "Volume-Correlated Return",
-                "formula": '(-cs_rank(ts_cov((close / ts_delay(close, 1) - 1), vol, 10)))',
+        "formula": "(-cs_rank(ts_cov((close / ts_delay(close, 1) - 1), vol, 10)))",
-                "category": 'Volume',
+        "category": "Volume",
    },
    {
-                "name": 'Regime Volatility Band',
+        "name": "Regime Volatility Band",
-                "formula": 'if_((ts_std((close / ts_delay(close, 1) - 1), 5) > (1.5 * ts_std((close / ts_delay(close, 1) - 1), 20))), (-cs_rank(ts_pct_change(close, 1))), (-cs_rank(ts_pct_change(close, 10))))',
+        "formula": "if_((ts_std((close / ts_delay(close, 1) - 1), 5) > (1.5 * ts_std((close / ts_delay(close, 1) - 1), 20))), (-cs_rank(ts_pct_change(close, 1))), (-cs_rank(ts_pct_change(close, 10))))",
-                "category": 'Regime-switching',
+        "category": "Regime-switching",
    },
    {
-                "name": 'Open-Close Spread Momentum',
+        "name": "Open-Close Spread Momentum",
-                "formula": '(-cs_rank(ts_mean(((close - open) / (open + 1e-8)), 5)))',
+        "formula": "(-cs_rank(ts_mean(((close - open) / (open + 1e-8)), 5)))",
-                "category": 'Intraday',
+        "category": "Intraday",
    },
    {
-                "name": 'Volatility-Scaled Reversal',
+        "name": "Volatility-Scaled Reversal",
-                "formula": '(-cs_rank((ts_pct_change(close, 5) / (ts_std((close / ts_delay(close, 1) - 1), 20) + 1e-8))))',
+        "formula": "(-cs_rank((ts_pct_change(close, 5) / (ts_std((close / ts_delay(close, 1) - 1), 20) + 1e-8))))",
-                "category": 'Mean-reversion',
+        "category": "Mean-reversion",
    },
    {
-                "name": 'VWAP Time-Weighted Signal',
+        "name": "VWAP Time-Weighted Signal",
-                "formula": '(-cs_rank(ts_wma(((close - (amount / vol)) / ((amount / vol) + 1e-8)), 20)))',
+        "formula": "(-cs_rank(ts_wma(((close - (amount / vol)) / ((amount / vol) + 1e-8)), 20)))",
-                "category": 'VWAP',
+        "category": "VWAP",
    },
    {
-                "name": 'Covariance Structure Shift',
+        "name": "Covariance Structure Shift",
-                "formula": '(-cs_rank((ts_cov((close / ts_delay(close, 1) - 1), vol, 5) - ts_cov((close / ts_delay(close, 1) - 1), vol, 20))))',
+        "formula": "(-cs_rank((ts_cov((close / ts_delay(close, 1) - 1), vol, 5) - ts_cov((close / ts_delay(close, 1) - 1), vol, 20))))",
-                "category": 'Volume',
+        "category": "Volume",
    },
    # {
    #     "name": 'Quadratic Regression Residual',
@@ -714,34 +697,34 @@ PAPER_FACTORS: List[Dict[str, str]] = [
    #     "category": 'Higher-moment',
    # },
    {
-                "name": 'VWAP Mean-Reversion Strength',
+        "name": "VWAP Mean-Reversion Strength",
-                "formula": '(-cs_rank((((close - (amount / vol)) / (amount / vol)) * (vol / (ts_mean(vol, 20) + 1e-8)))))',
+        "formula": "(-cs_rank((((close - (amount / vol)) / (amount / vol)) * (vol / (ts_mean(vol, 20) + 1e-8)))))",
-                "category": 'VWAP',
+        "category": "VWAP",
    },
    {
-                "name": 'Multi-Scale Momentum',
+        "name": "Multi-Scale Momentum",
-                "formula": '(-cs_rank((ts_pct_change(close, 5) + ts_pct_change(close, 20))))',
+        "formula": "(-cs_rank((ts_pct_change(close, 5) + ts_pct_change(close, 20))))",
-                "category": 'Momentum',
+        "category": "Momentum",
    },
    {
-                "name": 'Relative High Position',
+        "name": "Relative High Position",
-                "formula": '(-cs_rank(((ts_max(high, 20) - close) / (ts_max(high, 20) + 1e-8))))',
+        "formula": "(-cs_rank(((ts_max(high, 20) - close) / (ts_max(high, 20) + 1e-8))))",
-                "category": 'Mean-reversion',
+        "category": "Mean-reversion",
    },
    {
-                "name": 'Turnover Volatility',
+        "name": "Turnover Volatility",
-                "formula": '(-cs_rank(ts_std((amount / (vol + 1e-8)), 10)))',
+        "formula": "(-cs_rank(ts_std((amount / (vol + 1e-8)), 10)))",
-                "category": 'Turnover',
+        "category": "Turnover",
    },
    {
-                "name": 'Regime Correlation Signal',
+        "name": "Regime Correlation Signal",
-                "formula": 'if_((abs(ts_corr(close, vol, 10)) > 0.5), (-cs_rank(ts_pct_change(close, 3))), (-cs_rank(ts_pct_change(close, 10))))',
+        "formula": "if_((abs(ts_corr(close, vol, 10)) > 0.5), (-cs_rank(ts_pct_change(close, 3))), (-cs_rank(ts_pct_change(close, 10))))",
-                "category": 'Regime-switching',
+        "category": "Regime-switching",
    },
    {
-                "name": 'Intraday Momentum Reversal',
+        "name": "Intraday Momentum Reversal",
-                "formula": '(-cs_rank(((close - open) / ((high - low) + 1e-8))))',
+        "formula": "(-cs_rank(((close - open) / ((high - low) + 1e-8))))",
-                "category": 'Intraday',
+        "category": "Intraday",
    },
    # {
    #     "name": 'Volume-Weighted Slope',
@@ -749,14 +732,13 @@ PAPER_FACTORS: List[Dict[str, str]] = [
    #     "category": 'Volume',
    # },
    {
-                "name": 'Adaptive Range Reversal',
+        "name": "Adaptive Range Reversal",
-                "formula": 'if_((ts_std((close / ts_delay(close, 1) - 1), 10) > ts_mean(ts_std((close / ts_delay(close, 1) - 1), 10), 40)), (-cs_rank(((close - ts_min(close, 10)) / ((ts_max(close, 10) - ts_min(close, 10)) + 1e-8)))), (-cs_rank(ts_pct_change(close, 5))))',
+        "formula": "if_((ts_std((close / ts_delay(close, 1) - 1), 10) > ts_mean(ts_std((close / ts_delay(close, 1) - 1), 10), 40)), (-cs_rank(((close - ts_min(close, 10)) / ((ts_max(close, 10) - ts_min(close, 10)) + 1e-8)))), (-cs_rank(ts_pct_change(close, 5))))",
-                "category": 'Regime-switching',
+        "category": "Regime-switching",
    },
 ]
 def import_from_paper(
    path: Optional[Union[str, Path]] = None,
 ) -> FactorLibrary:
@@ -802,6 +784,8 @@ def import_from_paper(
            admission_date=entry.get("admission_date", ""),
            signals=None,
        )
        if factor.formula.strip().startswith("# TODO"):
            factor.metadata["unsupported"] = True
        library.admit_factor(factor)
    logger.info(
--- a/tests/test_factorminer_library_io.py
+++ b/tests/test_factorminer_library_io.py
@@ -0,0 +1,157 @@
 """Tests for library I/O and paper factor imports."""
 import json
 from pathlib import Path
 import numpy as np
 import pytest
 from src.factorminer.core.factor_library import Factor, FactorLibrary
 from src.factorminer.core.library_io import (
    import_from_paper,
    load_library,
    save_library,
 )
 class TestSaveLoadLibrary:
    """测试 FactorLibrary 的序列化与反序列化."""
    def test_save_library_ignores_save_signals(self, tmp_path: Path) -> None:
        """save_signals=True 也不应生成 .npz 文件."""
        library = FactorLibrary()
        factor = Factor(
            id=0,
            name="test_factor",
            formula="close / ts_delay(close, 1) - 1",
            category="Momentum",
            ic_mean=0.05,
            icir=0.5,
            ic_win_rate=0.55,
            max_correlation=0.1,
            batch_number=1,
        )
        # 即使给一个信号矩阵，也不应保存
        factor.signals = np.ones((10, 20))
        library.admit_factor(factor)
        base_path = tmp_path / "test_lib"
        save_library(library, str(base_path), save_signals=True)
        assert (base_path.with_suffix(".json")).exists()
        assert not (Path(str(base_path) + "_signals.npz")).exists()
    def test_load_library_restores_metadata_and_unsupported(
        self, tmp_path: Path
    ) -> None:
        """加载 JSON 后应恢复 metadata，并对 # TODO 公式标记 unsupported."""
        library = FactorLibrary()
        f1 = Factor(
            id=0,
            name="ok_factor",
            formula="cs_rank(close)",
            category="Test",
            ic_mean=0.0,
            icir=0.0,
            ic_win_rate=0.0,
            max_correlation=0.0,
            batch_number=0,
            metadata={"author": "ai"},
        )
        f2 = Factor(
            id=0,
            name="todo_factor",
            formula="# TODO: Neg(CsRank(Decay(close, 10)))",
            category="Test",
            ic_mean=0.0,
            icir=0.0,
            ic_win_rate=0.0,
            max_correlation=0.0,
            batch_number=0,
        )
        library.admit_factor(f1)
        library.admit_factor(f2)
        base_path = tmp_path / "meta_lib"
        save_library(library, str(base_path))
        loaded = load_library(str(base_path))
        assert loaded.size == 2
        f1_loaded = loaded.get_factor(1)
        assert f1_loaded.metadata.get("author") == "ai"
        assert not f1_loaded.metadata.get("unsupported", False)
        f2_loaded = loaded.get_factor(2)
        assert f2_loaded.metadata.get("unsupported") is True
    def test_factor_round_trip_with_metadata(self) -> None:
        """Factor.to_dict / from_dict 应正确传递 metadata."""
        factor = Factor(
            id=1,
            name="round_trip",
            formula="ts_mean(close, 20)",
            category="Momentum",
            ic_mean=0.1,
            icir=1.0,
            ic_win_rate=0.6,
            max_correlation=0.2,
            batch_number=2,
            metadata={"unsupported": True, "tags": ["test"]},
        )
        d = factor.to_dict()
        restored = Factor.from_dict(d)
        assert restored.metadata == factor.metadata
 class TestImportFromPaper:
    """测试从内置 paper catalog 导入因子."""
    def test_import_from_paper_includes_all_translated_factors(self) -> None:
        """内置 PAPER_FACTORS 应全部成功导入."""
        library = import_from_paper()
        assert library.size > 0
        # 当前 catalog 中已有因子应全部被 admit
        for factor in library.list_factors():
            assert factor.id > 0
            assert factor.name
            assert factor.formula
            assert factor.category
    def test_import_from_paper_marks_todo_as_unsupported(self, tmp_path: Path) -> None:
        """对 # TODO 公式应在 metadata 中标记 unsupported."""
        custom_path = tmp_path / "custom_factors.json"
        custom_data = [
            {
                "name": "Normal Factor",
                "formula": "cs_rank(close)",
                "category": "Test",
            },
            {
                "name": "Unsupported Factor",
                "formula": "# TODO: Neg(CsRank(Decay(close, 10)))",
                "category": "Test",
            },
        ]
        custom_path.write_text(json.dumps(custom_data), encoding="utf-8")
        library = import_from_paper(str(custom_path))
        assert library.size == 2
        normal = library.list_factors()[0]
        todo = library.list_factors()[1]
        assert normal.metadata.get("unsupported") is None
        assert todo.metadata.get("unsupported") is True
    def test_import_from_paper_path_override(self, tmp_path: Path) -> None:
        """通过 path 参数加载外部 JSON 列表."""
        custom_path = tmp_path / "override.json"
        custom_data = [
            {"name": "custom_1", "formula": "open + close", "category": "Custom"},
        ]
        custom_path.write_text(json.dumps(custom_data), encoding="utf-8")
        library = import_from_paper(str(custom_path))
        assert library.size == 1
        assert library.list_factors()[0].name == "custom_1"