From 65500cce27ded9462c3cc0594314e538a4b35adc Mon Sep 17 00:00:00 2001 From: liaozhaorun <1300336796@qq.com> Date: Wed, 8 Apr 2026 22:10:17 +0800 Subject: [PATCH] =?UTF-8?q?refactor(factorminer):=20=E7=A6=81=E7=94=A8=20n?= =?UTF-8?q?pz=20=E4=BF=A1=E5=8F=B7=E7=BC=93=E5=AD=98=E5=B9=B6=E5=B0=86?= =?UTF-8?q?=E5=BA=93=20I/O=20=E5=AF=B9=E6=8E=A5=E6=9C=AC=E5=9C=B0=20DSL=20?= =?UTF-8?q?-=20=E4=B8=BA=20Factor=20=E6=95=B0=E6=8D=AE=E7=B1=BB=E6=96=B0?= =?UTF-8?q?=E5=A2=9E=20metadata=20=E5=AD=97=E6=AE=B5=EF=BC=8C=E7=94=A8?= =?UTF-8?q?=E4=BA=8E=E6=A0=87=E8=AE=B0=E6=9C=AA=E5=AE=9E=E7=8E=B0=E7=AE=97?= =?UTF-8?q?=E5=AD=90=EF=BC=88unsupported=EF=BC=89=20-=20save=5Flibrary=20?= =?UTF-8?q?=E5=BA=9F=E5=BC=83=20save=5Fsignals=20=E5=8F=82=E6=95=B0?= =?UTF-8?q?=EF=BC=8C=E5=86=85=E9=83=A8=E5=BC=BA=E5=88=B6=E5=BF=BD=E7=95=A5?= =?UTF-8?q?=EF=BC=8C=E4=BB=85=E6=8C=81=E4=B9=85=E5=8C=96=20JSON=20?= =?UTF-8?q?=E5=85=83=E6=95=B0=E6=8D=AE=EF=BC=8C=E4=B8=8D=E5=86=8D=E5=86=99?= =?UTF-8?q?=E5=85=A5=20.npz=20-=20load=5Flibrary=20=E5=88=A0=E9=99=A4=20.n?= =?UTF-8?q?pz=20=E6=81=A2=E5=A4=8D=E9=80=BB=E8=BE=91=EF=BC=9B=E5=8A=A0?= =?UTF-8?q?=E8=BD=BD=E6=97=B6=E8=87=AA=E5=8A=A8=E5=B0=86=20#=20TODO=20?= =?UTF-8?q?=E5=85=AC=E5=BC=8F=E7=9A=84=20unsupported=20=E6=A0=87=E8=AE=B0?= =?UTF-8?q?=E8=AE=BE=E4=B8=BA=20True=20-=20import=5Ffrom=5Fpaper()=20?= =?UTF-8?q?=E7=9B=B4=E6=8E=A5=E5=9F=BA=E4=BA=8E=E5=B7=B2=E6=9C=AC=E5=9C=B0?= =?UTF-8?q?=E5=8C=96=E7=9A=84=20PAPER=5FFACTORS=20=E6=9E=84=E5=BB=BA?= =?UTF-8?q?=E5=BA=93=EF=BC=8C=E5=B9=B6=E5=90=8C=E6=AD=A5=E6=A0=87=E8=AE=B0?= =?UTF-8?q?=20TODO=20=E5=85=AC=E5=BC=8F=20-=20=E6=96=B0=E5=A2=9E=20tests/t?= =?UTF-8?q?est=5Ffactorminer=5Flibrary=5Fio.py=EF=BC=8C=E8=A6=86=E7=9B=96?= =?UTF-8?q?=E5=BA=8F=E5=88=97=E5=8C=96=E3=80=81=E5=8A=A0=E8=BD=BD=E5=8F=8A?= =?UTF-8?q?=20paper=20factors=20=E5=AF=BC=E5=85=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/factorminer/core/factor_library.py | 81 +-- src/factorminer/core/library_io.py | 846 ++++++++++++------------- tests/test_factorminer_library_io.py | 157 +++++ 3 files changed, 617 insertions(+), 467 deletions(-) create mode 100644 tests/test_factorminer_library_io.py diff --git a/src/factorminer/core/factor_library.py b/src/factorminer/core/factor_library.py index 635b22b..289fef5 100644 --- a/src/factorminer/core/factor_library.py +++ b/src/factorminer/core/factor_library.py @@ -39,6 +39,7 @@ class Factor: signals: Optional[np.ndarray] = field(default=None, repr=False) # (M, T) research_metrics: dict = field(default_factory=dict) provenance: dict = field(default_factory=dict) + metadata: dict = field(default_factory=dict) def __post_init__(self) -> None: if not self.admission_date: @@ -59,6 +60,7 @@ class Factor: "admission_date": self.admission_date, "research_metrics": self.research_metrics, "provenance": self.provenance, + "metadata": self.metadata, } @classmethod @@ -77,6 +79,7 @@ class Factor: admission_date=d.get("admission_date", ""), research_metrics=d.get("research_metrics", {}), provenance=d.get("provenance", {}), + metadata=d.get("metadata", {}), ) @@ -172,7 +175,7 @@ class FactorLibrary: # Pearson on ranks == Spearman ra_c = ra - ra.mean() rb_c = rb - rb.mean() - denom = np.sqrt((ra_c ** 2).sum() * (rb_c ** 2).sum()) + denom = np.sqrt((ra_c**2).sum() * (rb_c**2).sum()) if denom < 1e-12: continue corr_sum += abs((ra_c * rb_c).sum() / denom) @@ -206,9 +209,7 @@ class FactorLibrary: (admitted, reason) : Tuple[bool, str] """ if candidate_ic < self.ic_threshold: - return False, ( - f"IC {candidate_ic:.4f} below threshold {self.ic_threshold}" - ) + return False, (f"IC {candidate_ic:.4f} below threshold {self.ic_threshold}") if self.size == 0: return True, "First factor in library" @@ -221,9 +222,7 @@ class FactorLibrary: f"{self.correlation_threshold} with existing library factor" ) - return True, ( - f"Admitted: IC={candidate_ic:.4f}, max_corr={max_corr:.4f}" - ) + return True, (f"Admitted: IC={candidate_ic:.4f}, max_corr={max_corr:.4f}") def check_replacement( self, @@ -258,8 +257,10 @@ class FactorLibrary: (should_replace, factor_to_replace_id, reason) : Tuple[bool, Optional[int], str] """ if candidate_ic < ic_min: - return False, None, ( - f"IC {candidate_ic:.4f} below replacement floor {ic_min}" + return ( + False, + None, + (f"IC {candidate_ic:.4f} below replacement floor {ic_min}"), ) if self.size == 0: @@ -277,21 +278,33 @@ class FactorLibrary: correlated_factors.append((fid, corr, factor.ic_mean)) if len(correlated_factors) != 1: - return False, None, ( - f"Found {len(correlated_factors)} correlated factors " - f"(need exactly 1 for replacement)" + return ( + False, + None, + ( + f"Found {len(correlated_factors)} correlated factors " + f"(need exactly 1 for replacement)" + ), ) fid, corr, existing_ic = correlated_factors[0] if candidate_ic < ic_ratio * existing_ic: - return False, None, ( - f"IC {candidate_ic:.4f} < {ic_ratio} * {existing_ic:.4f} = " - f"{ic_ratio * existing_ic:.4f}" + return ( + False, + None, + ( + f"IC {candidate_ic:.4f} < {ic_ratio} * {existing_ic:.4f} = " + f"{ic_ratio * existing_ic:.4f}" + ), ) - return True, fid, ( - f"Replace factor {fid}: candidate IC {candidate_ic:.4f} > " - f"{ic_ratio} * {existing_ic:.4f}, corr={corr:.4f}" + return ( + True, + fid, + ( + f"Replace factor {fid}: candidate IC {candidate_ic:.4f} > " + f"{ic_ratio} * {existing_ic:.4f}, corr={corr:.4f}" + ), ) # ------------------------------------------------------------------ @@ -321,8 +334,11 @@ class FactorLibrary: logger.info( "Admitted factor %d '%s' (IC=%.4f, max_corr=%.4f, category=%s)", - factor.id, factor.name, factor.ic_mean, - factor.max_correlation, factor.category, + factor.id, + factor.name, + factor.ic_mean, + factor.max_correlation, + factor.category, ) return factor.id @@ -360,7 +376,10 @@ class FactorLibrary: logger.info( "Replaced factor %d with %d '%s' (IC=%.4f)", - old_id, new_factor.id, new_factor.name, new_factor.ic_mean, + old_id, + new_factor.id, + new_factor.name, + new_factor.ic_mean, ) def remove_factor(self, factor_id: int) -> None: @@ -381,9 +400,7 @@ class FactorLibrary: # Correlation matrix management # ------------------------------------------------------------------ - def _max_correlation_with_library( - self, candidate_signals: np.ndarray - ) -> float: + def _max_correlation_with_library(self, candidate_signals: np.ndarray) -> float: """Compute max |rho| between candidate and all library factors.""" max_corr = 0.0 for factor in self.factors.values(): @@ -453,9 +470,7 @@ class FactorLibrary: self.correlation_matrix[idx, other_idx] = 0.0 self.correlation_matrix[other_idx, idx] = 0.0 continue - corr = self._compute_correlation_vectorized( - factor.signals, other.signals - ) + corr = self._compute_correlation_vectorized(factor.signals, other.signals) self.correlation_matrix[idx, other_idx] = corr self.correlation_matrix[other_idx, idx] = corr @@ -509,10 +524,7 @@ class FactorLibrary: def get_factors_by_category(self, category: str) -> List[Factor]: """Return all factors matching a given category.""" - return [ - f for f in self.factors.values() - if f.category == category - ] + return [f for f in self.factors.values() if f.category == category] def get_diagnostics(self) -> dict: """Library diagnostics: avg |rho|, max tail correlations, per-category counts, saturation. @@ -539,8 +551,7 @@ class FactorLibrary: diag["category_counts"] = dict(cat_counts) diag["category_avg_ic"] = { - cat: cat_ic_sums[cat] / cat_counts[cat] - for cat in cat_counts + cat: cat_ic_sums[cat] / cat_counts[cat] for cat in cat_counts } # Correlation statistics @@ -575,9 +586,7 @@ class FactorLibrary: Returns a lightweight dictionary suitable for inclusion in LLM prompts or memory store entries. """ - factors_sorted = sorted( - self.factors.values(), key=lambda f: f.id, reverse=True - ) + factors_sorted = sorted(self.factors.values(), key=lambda f: f.id, reverse=True) recent = factors_sorted[:5] # Last 5 admissions categories = defaultdict(int) diff --git a/src/factorminer/core/library_io.py b/src/factorminer/core/library_io.py index 0902852..ec45e34 100644 --- a/src/factorminer/core/library_io.py +++ b/src/factorminer/core/library_io.py @@ -23,26 +23,22 @@ logger = logging.getLogger(__name__) # Save / Load # ====================================================================== + def save_library( library: FactorLibrary, path: Union[str, Path], save_signals: bool = True, ) -> None: - """Save a FactorLibrary to disk. - - Creates two files: - - ``.json`` -- factor metadata and library configuration - - ``_signals.npz`` -- binary signal cache (if save_signals=True - and any factors have signals) + """Save a FactorLibrary to disk (仅保存 JSON 元数据). Parameters ---------- library : FactorLibrary path : str or Path Base path (without extension). E.g. ``"output/my_library"`` produces - ``output/my_library.json`` and ``output/my_library_signals.npz``. + ``output/my_library.json``. save_signals : bool - Whether to write the binary signal cache. + 已废弃,始终忽略,不再写入 .npz 信号缓存。 """ path = Path(path) path.parent.mkdir(parents=True, exist_ok=True) @@ -63,20 +59,10 @@ def save_library( json.dump(meta, fp, indent=2) logger.info("Saved library metadata to %s (%d factors)", json_path, library.size) - # -- Binary signal cache -- + # -- Binary signal cache (已禁用) -- + print("[library_io] 信号缓存已禁用,仅保存 JSON 元数据") if save_signals: - signal_arrays: Dict[str, np.ndarray] = {} - for f in library.list_factors(): - if f.signals is not None: - signal_arrays[f"factor_{f.id}"] = f.signals - - if signal_arrays: - npz_path = Path(str(path) + "_signals.npz") - np.savez_compressed(npz_path, **signal_arrays) - logger.info( - "Saved signal cache to %s (%d arrays)", - npz_path, len(signal_arrays), - ) + logger.info("save_signals 参数已废弃,信号缓存不再写入") def load_library(path: Union[str, Path]) -> FactorLibrary: @@ -85,8 +71,7 @@ def load_library(path: Union[str, Path]) -> FactorLibrary: Parameters ---------- path : str or Path - Base path (without extension). Will look for ``.json`` and - optionally ``_signals.npz``. + Base path (without extension). Will look for ``.json``. Returns ------- @@ -107,6 +92,8 @@ def load_library(path: Union[str, Path]) -> FactorLibrary: # Restore factors for fd in meta.get("factors", []): factor = Factor.from_dict(fd) + if factor.formula.strip().startswith("# TODO"): + factor.metadata["unsupported"] = True library.factors[factor.id] = factor # Restore correlation matrix @@ -117,24 +104,9 @@ def load_library(path: Union[str, Path]) -> FactorLibrary: # Restore id-to-index mapping if "id_to_index" in meta: - library._id_to_index = { - int(k): v for k, v in meta["id_to_index"].items() - } + library._id_to_index = {int(k): v for k, v in meta["id_to_index"].items()} - # Load signal cache if present - npz_path = Path(str(path) + "_signals.npz") - if npz_path.exists(): - data = np.load(npz_path) - for f in library.factors.values(): - key = f"factor_{f.id}" - if key in data: - f.signals = data[key] - data.close() - logger.info("Loaded signal cache from %s", npz_path) - - logger.info( - "Loaded library from %s (%d factors)", json_path, library.size - ) + logger.info("Loaded library from %s (%d factors)", json_path, library.size) return library @@ -142,6 +114,7 @@ def load_library(path: Union[str, Path]) -> FactorLibrary: # Export utilities # ====================================================================== + def export_csv(library: FactorLibrary, path: Union[str, Path]) -> None: """Export the factor table to CSV. @@ -152,26 +125,36 @@ def export_csv(library: FactorLibrary, path: Union[str, Path]) -> None: path.parent.mkdir(parents=True, exist_ok=True) fieldnames = [ - "ID", "Name", "Formula", "Category", "IC_Mean", "ICIR", - "IC_Win_Rate", "Max_Correlation", "Batch", "Admission_Date", + "ID", + "Name", + "Formula", + "Category", + "IC_Mean", + "ICIR", + "IC_Win_Rate", + "Max_Correlation", + "Batch", + "Admission_Date", ] with open(path, "w", newline="") as fp: writer = csv.DictWriter(fp, fieldnames=fieldnames) writer.writeheader() for f in library.list_factors(): - writer.writerow({ - "ID": f.id, - "Name": f.name, - "Formula": f.formula, - "Category": f.category, - "IC_Mean": f"{f.ic_mean:.6f}", - "ICIR": f"{f.icir:.6f}", - "IC_Win_Rate": f"{f.ic_win_rate:.4f}", - "Max_Correlation": f"{f.max_correlation:.4f}", - "Batch": f.batch_number, - "Admission_Date": f.admission_date, - }) + writer.writerow( + { + "ID": f.id, + "Name": f.name, + "Formula": f.formula, + "Category": f.category, + "IC_Mean": f"{f.ic_mean:.6f}", + "ICIR": f"{f.icir:.6f}", + "IC_Win_Rate": f"{f.ic_win_rate:.4f}", + "Max_Correlation": f"{f.max_correlation:.4f}", + "Batch": f.batch_number, + "Admission_Date": f.admission_date, + } + ) logger.info("Exported %d factors to %s", library.size, path) @@ -204,50 +187,50 @@ def export_formulas(library: FactorLibrary, path: Union[str, Path]) -> None: # Each entry: (name, formula, category) PAPER_FACTORS: List[Dict[str, str]] = [ { - "name": 'Intraday Range Position', - "formula": '(-cs_rank(((close - ts_min(close, 48)) / ((ts_max(close, 48) - ts_min(close, 48)) + 1e-8))))', - "category": 'Mean-reversion', - }, + "name": "Intraday Range Position", + "formula": "(-cs_rank(((close - ts_min(close, 48)) / ((ts_max(close, 48) - ts_min(close, 48)) + 1e-8))))", + "category": "Mean-reversion", + }, { - "name": 'Volume-Weighted Momentum', - "formula": '(-cs_rank((ts_pct_change(close, 5) * (vol / ts_mean(vol, 20)))))', - "category": 'Momentum', - }, + "name": "Volume-Weighted Momentum", + "formula": "(-cs_rank((ts_pct_change(close, 5) * (vol / ts_mean(vol, 20)))))", + "category": "Momentum", + }, { - "name": 'Residual Volatility', - "formula": '(-cs_rank(ts_std((close - ts_ema(close, 10)), 20)))', - "category": 'Volatility', - }, + "name": "Residual Volatility", + "formula": "(-cs_rank(ts_std((close - ts_ema(close, 10)), 20)))", + "category": "Volatility", + }, { - "name": 'Intraday Amplitude Ratio', - "formula": '(-cs_rank(((high - low) / (close + 1e-8))))', - "category": 'Volatility', - }, + "name": "Intraday Amplitude Ratio", + "formula": "(-cs_rank(((high - low) / (close + 1e-8))))", + "category": "Volatility", + }, { - "name": 'Volume Surprise', - "formula": '(-cs_rank(((vol - ts_mean(vol, 20)) / (ts_std(vol, 20) + 1e-8))))', - "category": 'Volume', - }, + "name": "Volume Surprise", + "formula": "(-cs_rank(((vol - ts_mean(vol, 20)) / (ts_std(vol, 20) + 1e-8))))", + "category": "Volume", + }, { - "name": 'VWAP Deviation', - "formula": '(-((close - (amount / vol)) / (amount / vol)))', - "category": 'VWAP', - }, + "name": "VWAP Deviation", + "formula": "(-((close - (amount / vol)) / (amount / vol)))", + "category": "VWAP", + }, { - "name": 'Short-term Reversal', - "formula": '(-cs_rank(ts_pct_change(close, 3)))', - "category": 'Mean-reversion', - }, + "name": "Short-term Reversal", + "formula": "(-cs_rank(ts_pct_change(close, 3)))", + "category": "Mean-reversion", + }, { - "name": 'Turnover Momentum', - "formula": '(-cs_rank(ts_delta((amount / (vol + 1e-8)), 5)))', - "category": 'Turnover', - }, + "name": "Turnover Momentum", + "formula": "(-cs_rank(ts_delta((amount / (vol + 1e-8)), 5)))", + "category": "Turnover", + }, { - "name": 'High-Low Midpoint Reversion', - "formula": '(-cs_rank((close - ((high + low) / 2))))', - "category": 'Mean-reversion', - }, + "name": "High-Low Midpoint Reversion", + "formula": "(-cs_rank((close - ((high + low) / 2))))", + "category": "Mean-reversion", + }, # { # "name": 'Rolling Beta Residual', # "formula": '# TODO: Neg(CsRank(Resid($returns, Mean($returns, 20), 20)))', @@ -259,280 +242,280 @@ PAPER_FACTORS: List[Dict[str, str]] = [ # "category": 'VWAP', # }, { - "name": 'Accumulation-Distribution', - "formula": '(-cs_rank(ts_sum(((((2 * close) - (high + low)) / ((high - low) + 1e-8)) * vol), 10)))', - "category": 'Volume', - }, + "name": "Accumulation-Distribution", + "formula": "(-cs_rank(ts_sum(((((2 * close) - (high + low)) / ((high - low) + 1e-8)) * vol), 10)))", + "category": "Volume", + }, { - "name": 'Relative Strength Index Deviation', - "formula": '(-cs_rank((ts_mean(max_(ts_delta(close, 1), 0), 14) - ts_mean(abs(min_(ts_delta(close, 1), 0)), 14))))', - "category": 'Momentum', - }, + "name": "Relative Strength Index Deviation", + "formula": "(-cs_rank((ts_mean(max_(ts_delta(close, 1), 0), 14) - ts_mean(abs(min_(ts_delta(close, 1), 0)), 14))))", + "category": "Momentum", + }, { - "name": 'Price-Volume Correlation', - "formula": '(-ts_corr(close, vol, 10))', - "category": 'Volume', - }, + "name": "Price-Volume Correlation", + "formula": "(-ts_corr(close, vol, 10))", + "category": "Volume", + }, { - "name": 'Skewness of Returns', - "formula": '(-cs_rank(ts_skew((close / ts_delay(close, 1) - 1), 20)))', - "category": 'Higher-moment', - }, + "name": "Skewness of Returns", + "formula": "(-cs_rank(ts_skew((close / ts_delay(close, 1) - 1), 20)))", + "category": "Higher-moment", + }, { - "name": 'Kurtosis of Returns', - "formula": '(-cs_rank(ts_kurt((close / ts_delay(close, 1) - 1), 20)))', - "category": 'Higher-moment', - }, + "name": "Kurtosis of Returns", + "formula": "(-cs_rank(ts_kurt((close / ts_delay(close, 1) - 1), 20)))", + "category": "Higher-moment", + }, { - "name": 'Volume-Weighted Return', - "formula": '(-cs_rank((ts_sum(((close / ts_delay(close, 1) - 1) * vol), 10) / (ts_sum(vol, 10) + 1e-8))))', - "category": 'Volume', - }, + "name": "Volume-Weighted Return", + "formula": "(-cs_rank((ts_sum(((close / ts_delay(close, 1) - 1) * vol), 10) / (ts_sum(vol, 10) + 1e-8))))", + "category": "Volume", + }, { - "name": 'Close-to-High Ratio', - "formula": '(-cs_rank(((high - close) / (high + 1e-8))))', - "category": 'Mean-reversion', - }, + "name": "Close-to-High Ratio", + "formula": "(-cs_rank(((high - close) / (high + 1e-8))))", + "category": "Mean-reversion", + }, { - "name": 'Delayed Correlation Shift', - "formula": '(-cs_rank((ts_corr(close, vol, 10) - ts_corr(ts_delay(close, 5), vol, 10))))', - "category": 'Volume', - }, + "name": "Delayed Correlation Shift", + "formula": "(-cs_rank((ts_corr(close, vol, 10) - ts_corr(ts_delay(close, 5), vol, 10))))", + "category": "Volume", + }, { - "name": 'Exponential Momentum', - "formula": '(-cs_rank((close - ts_ema(close, 20))))', - "category": 'Momentum', - }, + "name": "Exponential Momentum", + "formula": "(-cs_rank((close - ts_ema(close, 20))))", + "category": "Momentum", + }, { - "name": 'Range-Adjusted Volume', - "formula": '(-cs_rank((vol / ((high - low) + 1e-8))))', - "category": 'Volume', - }, + "name": "Range-Adjusted Volume", + "formula": "(-cs_rank((vol / ((high - low) + 1e-8))))", + "category": "Volume", + }, { - "name": 'Cumulative Return Rank', - "formula": '(-cs_rank(ts_sum((close / ts_delay(close, 1) - 1), 10)))', - "category": 'Momentum', - }, + "name": "Cumulative Return Rank", + "formula": "(-cs_rank(ts_sum((close / ts_delay(close, 1) - 1), 10)))", + "category": "Momentum", + }, { - "name": 'VWAP Momentum', - "formula": '(-cs_rank(ts_pct_change((amount / vol), 5)))', - "category": 'VWAP', - }, + "name": "VWAP Momentum", + "formula": "(-cs_rank(ts_pct_change((amount / vol), 5)))", + "category": "VWAP", + }, { - "name": 'Bollinger Band Position', - "formula": '(-cs_rank(((close - ts_mean(close, 20)) / (ts_std(close, 20) + 1e-8))))', - "category": 'Mean-reversion', - }, + "name": "Bollinger Band Position", + "formula": "(-cs_rank(((close - ts_mean(close, 20)) / (ts_std(close, 20) + 1e-8))))", + "category": "Mean-reversion", + }, # { # "name": 'Volume Decay Weighted', # "formula": '# TODO: Neg(CsRank(Decay($volume, 10)))', # "category": 'Volume', # }, { - "name": 'Overnight Return', - "formula": '(-cs_rank(((open - ts_delay(close, 1)) / (ts_delay(close, 1) + 1e-8))))', - "category": 'Overnight', - }, + "name": "Overnight Return", + "formula": "(-cs_rank(((open - ts_delay(close, 1)) / (ts_delay(close, 1) + 1e-8))))", + "category": "Overnight", + }, { - "name": 'Intraday Return', - "formula": '(-cs_rank(((close - open) / (open + 1e-8))))', - "category": 'Intraday', - }, + "name": "Intraday Return", + "formula": "(-cs_rank(((close - open) / (open + 1e-8))))", + "category": "Intraday", + }, { - "name": 'Max Drawdown', - "formula": '(-cs_rank(((close - ts_max(close, 20)) / (ts_max(close, 20) + 1e-8))))', - "category": 'Risk', - }, + "name": "Max Drawdown", + "formula": "(-cs_rank(((close - ts_max(close, 20)) / (ts_max(close, 20) + 1e-8))))", + "category": "Risk", + }, { - "name": 'Hurst Exponent Proxy', - "formula": '(-cs_rank((ts_std((close / ts_delay(close, 1) - 1), 20) / (ts_std((close / ts_delay(close, 1) - 1), 5) + 1e-8))))', - "category": 'Volatility', - }, + "name": "Hurst Exponent Proxy", + "formula": "(-cs_rank((ts_std((close / ts_delay(close, 1) - 1), 20) / (ts_std((close / ts_delay(close, 1) - 1), 5) + 1e-8))))", + "category": "Volatility", + }, { - "name": 'Volume Imbalance', - "formula": '(-cs_rank((ts_mean(vol, 5) - ts_mean(vol, 20))))', - "category": 'Volume', - }, + "name": "Volume Imbalance", + "formula": "(-cs_rank((ts_mean(vol, 5) - ts_mean(vol, 20))))", + "category": "Volume", + }, { - "name": 'Weighted Close Position', - "formula": '(-cs_rank((((2 * close) - (high + low)) / ((high - low) + 1e-8))))', - "category": 'Mean-reversion', - }, + "name": "Weighted Close Position", + "formula": "(-cs_rank((((2 * close) - (high + low)) / ((high - low) + 1e-8))))", + "category": "Mean-reversion", + }, { - "name": 'Trend Intensity', - "formula": '(-cs_rank((abs(ts_delta(close, 10)) / (ts_sum(abs(ts_delta(close, 1)), 10) + 1e-8))))', - "category": 'Trend', - }, + "name": "Trend Intensity", + "formula": "(-cs_rank((abs(ts_delta(close, 10)) / (ts_sum(abs(ts_delta(close, 1)), 10) + 1e-8))))", + "category": "Trend", + }, { - "name": 'Return Dispersion', - "formula": '(-cs_rank(ts_std((close / ts_delay(close, 1) - 1), 5)))', - "category": 'Volatility', - }, + "name": "Return Dispersion", + "formula": "(-cs_rank(ts_std((close / ts_delay(close, 1) - 1), 5)))", + "category": "Volatility", + }, { - "name": 'VWAP Relative Strength', - "formula": '(-cs_rank(((ts_mean(close, 5) - (amount / vol)) / ((amount / vol) + 1e-8))))', - "category": 'VWAP', - }, + "name": "VWAP Relative Strength", + "formula": "(-cs_rank(((ts_mean(close, 5) - (amount / vol)) / ((amount / vol) + 1e-8))))", + "category": "VWAP", + }, { - "name": 'Rank Reversal', - "formula": '(-cs_rank((ts_rank(close, 10) - ts_rank(close, 30))))', - "category": 'Mean-reversion', - }, + "name": "Rank Reversal", + "formula": "(-cs_rank((ts_rank(close, 10) - ts_rank(close, 30))))", + "category": "Mean-reversion", + }, { - "name": 'Money Flow Index', - "formula": '(-cs_rank((ts_sum((max_(ts_delta(close, 1), 0) * vol), 14) / (ts_sum((abs(ts_delta(close, 1)) * vol), 14) + 1e-8))))', - "category": 'Volume', - }, + "name": "Money Flow Index", + "formula": "(-cs_rank((ts_sum((max_(ts_delta(close, 1), 0) * vol), 14) / (ts_sum((abs(ts_delta(close, 1)) * vol), 14) + 1e-8))))", + "category": "Volume", + }, { - "name": 'Adaptive Momentum', - "formula": '(-cs_rank((ts_pct_change(close, 10) * (ts_std((close / ts_delay(close, 1) - 1), 5) / (ts_std((close / ts_delay(close, 1) - 1), 20) + 1e-8)))))', - "category": 'Momentum', - }, + "name": "Adaptive Momentum", + "formula": "(-cs_rank((ts_pct_change(close, 10) * (ts_std((close / ts_delay(close, 1) - 1), 5) / (ts_std((close / ts_delay(close, 1) - 1), 20) + 1e-8)))))", + "category": "Momentum", + }, # { # "name": 'Volume Trend', # "formula": '# TODO: Neg(CsRank(TsLinRegSlope($volume, 10)))', # "category": 'Volume', # }, { - "name": 'Price Acceleration', - "formula": '(-cs_rank((ts_delta(close, 5) - ts_delta(ts_delay(close, 5), 5))))', - "category": 'Momentum', - }, + "name": "Price Acceleration", + "formula": "(-cs_rank((ts_delta(close, 5) - ts_delta(ts_delay(close, 5), 5))))", + "category": "Momentum", + }, { - "name": 'Realized Volatility Ratio', - "formula": '(-cs_rank((ts_std((close / ts_delay(close, 1) - 1), 10) / (ts_std((close / ts_delay(close, 1) - 1), 30) + 1e-8))))', - "category": 'Volatility', - }, + "name": "Realized Volatility Ratio", + "formula": "(-cs_rank((ts_std((close / ts_delay(close, 1) - 1), 10) / (ts_std((close / ts_delay(close, 1) - 1), 30) + 1e-8))))", + "category": "Volatility", + }, { - "name": 'Amount Concentration', - "formula": '(-cs_rank((ts_max(amount, 5) / (ts_mean(amount, 20) + 1e-8))))', - "category": 'Turnover', - }, + "name": "Amount Concentration", + "formula": "(-cs_rank((ts_max(amount, 5) / (ts_mean(amount, 20) + 1e-8))))", + "category": "Turnover", + }, { - "name": 'Cross-Sectional Volume Rank', - "formula": '(-cs_rank((vol / (ts_mean(vol, 60) + 1e-8))))', - "category": 'Volume', - }, + "name": "Cross-Sectional Volume Rank", + "formula": "(-cs_rank((vol / (ts_mean(vol, 60) + 1e-8))))", + "category": "Volume", + }, { - "name": 'Gap Momentum', - "formula": '(-cs_rank(ts_sum(((open - ts_delay(close, 1)) / (ts_delay(close, 1) + 1e-8)), 5)))', - "category": 'Overnight', - }, + "name": "Gap Momentum", + "formula": "(-cs_rank(ts_sum(((open - ts_delay(close, 1)) / (ts_delay(close, 1) + 1e-8)), 5)))", + "category": "Overnight", + }, # { # "name": 'VWAP Distance Decay', # "formula": '# TODO: Neg(CsRank(Decay(Div(Sub($close, $vwap), Add($vwap, 1e-8)), 10)))', # "category": 'VWAP', # }, { - "name": 'Tail Risk Indicator', - "formula": '(-cs_rank((ts_min((close / ts_delay(close, 1) - 1), 20) / (ts_std((close / ts_delay(close, 1) - 1), 20) + 1e-8))))', - "category": 'Risk', - }, + "name": "Tail Risk Indicator", + "formula": "(-cs_rank((ts_min((close / ts_delay(close, 1) - 1), 20) / (ts_std((close / ts_delay(close, 1) - 1), 20) + 1e-8))))", + "category": "Risk", + }, { - "name": 'Volatility-Regime Reversal Divergence', - "formula": 'if_((ts_std((close / ts_delay(close, 1) - 1), 12) > ts_mean(ts_std((close / ts_delay(close, 1) - 1), 12), 48)), (-cs_rank(ts_delta(close, 3))), (-cs_rank(((close - low) / ((high - low) + 0.0001)))))', - "category": 'Regime-switching', - }, + "name": "Volatility-Regime Reversal Divergence", + "formula": "if_((ts_std((close / ts_delay(close, 1) - 1), 12) > ts_mean(ts_std((close / ts_delay(close, 1) - 1), 12), 48)), (-cs_rank(ts_delta(close, 3))), (-cs_rank(((close - low) / ((high - low) + 0.0001)))))", + "category": "Regime-switching", + }, { - "name": 'Regime Volume Signal', - "formula": 'if_((vol > ts_mean(vol, 20)), (-cs_rank((close / ts_delay(close, 1) - 1))), (-cs_rank(ts_pct_change(close, 5))))', - "category": 'Regime-switching', - }, + "name": "Regime Volume Signal", + "formula": "if_((vol > ts_mean(vol, 20)), (-cs_rank((close / ts_delay(close, 1) - 1))), (-cs_rank(ts_pct_change(close, 5))))", + "category": "Regime-switching", + }, { - "name": 'Liquidity-Adjusted Reversal', - "formula": '(-cs_rank((ts_pct_change(close, 3) * (vol / (ts_mean(vol, 20) + 1e-8)))))', - "category": 'Mean-reversion', - }, + "name": "Liquidity-Adjusted Reversal", + "formula": "(-cs_rank((ts_pct_change(close, 3) * (vol / (ts_mean(vol, 20) + 1e-8)))))", + "category": "Mean-reversion", + }, { - "name": 'Cross-Sectional Volatility Rank', - "formula": '(-cs_rank(cs_rank(ts_std((close / ts_delay(close, 1) - 1), 10))))', - "category": 'Volatility', - }, + "name": "Cross-Sectional Volatility Rank", + "formula": "(-cs_rank(cs_rank(ts_std((close / ts_delay(close, 1) - 1), 10))))", + "category": "Volatility", + }, { - "name": 'VWAP Bollinger', - "formula": '(-cs_rank((((amount / vol) - ts_mean((amount / vol), 20)) / (ts_std((amount / vol), 20) + 1e-8))))', - "category": 'VWAP', - }, + "name": "VWAP Bollinger", + "formula": "(-cs_rank((((amount / vol) - ts_mean((amount / vol), 20)) / (ts_std((amount / vol), 20) + 1e-8))))", + "category": "VWAP", + }, { - "name": 'Smoothed Return Reversal', - "formula": '(-cs_rank(ts_ema((close / ts_delay(close, 1) - 1), 5)))', - "category": 'Mean-reversion', - }, + "name": "Smoothed Return Reversal", + "formula": "(-cs_rank(ts_ema((close / ts_delay(close, 1) - 1), 5)))", + "category": "Mean-reversion", + }, { - "name": 'Volume-Price Divergence', - "formula": '(-cs_rank((ts_rank(vol, 10) - ts_rank(close, 10))))', - "category": 'Volume', - }, + "name": "Volume-Price Divergence", + "formula": "(-cs_rank((ts_rank(vol, 10) - ts_rank(close, 10))))", + "category": "Volume", + }, # { # "name": 'Decay Weighted Momentum', # "formula": '# TODO: Neg(CsRank(Decay($returns, 20)))', # "category": 'Momentum', # }, { - "name": 'Range Percentile', - "formula": '(-cs_rank(((close - ts_min(close, 20)) / ((ts_max(close, 20) - ts_min(close, 20)) + 1e-8))))', - "category": 'Mean-reversion', - }, + "name": "Range Percentile", + "formula": "(-cs_rank(((close - ts_min(close, 20)) / ((ts_max(close, 20) - ts_min(close, 20)) + 1e-8))))", + "category": "Mean-reversion", + }, { - "name": 'Volume Skewness', - "formula": '(-cs_rank(ts_skew(vol, 20)))', - "category": 'Volume', - }, + "name": "Volume Skewness", + "formula": "(-cs_rank(ts_skew(vol, 20)))", + "category": "Volume", + }, # { # "name": 'Residual Momentum', # "formula": '# TODO: Neg(CsRank(TsLinRegResid($close, 20)))', # "category": 'Momentum', # }, { - "name": 'VWAP Trend', - "formula": '(-cs_rank(ts_delta(((close - (amount / vol)) / (amount / vol)), 5)))', - "category": 'VWAP', - }, + "name": "VWAP Trend", + "formula": "(-cs_rank(ts_delta(((close - (amount / vol)) / (amount / vol)), 5)))", + "category": "VWAP", + }, { - "name": 'Return Autocorrelation', - "formula": '(-cs_rank(ts_corr((close / ts_delay(close, 1) - 1), ts_delay((close / ts_delay(close, 1) - 1), 1), 10)))', - "category": 'Mean-reversion', - }, + "name": "Return Autocorrelation", + "formula": "(-cs_rank(ts_corr((close / ts_delay(close, 1) - 1), ts_delay((close / ts_delay(close, 1) - 1), 1), 10)))", + "category": "Mean-reversion", + }, { - "name": 'Price Efficiency', - "formula": '(-cs_rank((abs(ts_sum((close / ts_delay(close, 1) - 1), 10)) / (ts_sum(abs((close / ts_delay(close, 1) - 1)), 10) + 1e-8))))', - "category": 'Trend', - }, + "name": "Price Efficiency", + "formula": "(-cs_rank((abs(ts_sum((close / ts_delay(close, 1) - 1), 10)) / (ts_sum(abs((close / ts_delay(close, 1) - 1)), 10) + 1e-8))))", + "category": "Trend", + }, { - "name": 'Relative Volume Change', - "formula": '(-cs_rank(ts_pct_change(vol, 5)))', - "category": 'Volume', - }, + "name": "Relative Volume Change", + "formula": "(-cs_rank(ts_pct_change(vol, 5)))", + "category": "Volume", + }, { - "name": 'Weighted VWAP Position', - "formula": '(-cs_rank(ts_wma(((close - (amount / vol)) / (amount / vol)), 10)))', - "category": 'VWAP', - }, + "name": "Weighted VWAP Position", + "formula": "(-cs_rank(ts_wma(((close - (amount / vol)) / (amount / vol)), 10)))", + "category": "VWAP", + }, { - "name": 'Regime Momentum Flip', - "formula": 'if_((ts_mean((close / ts_delay(close, 1) - 1), 5) > 0), (-cs_rank(ts_pct_change(close, 10))), cs_rank(ts_pct_change(close, 3)))', - "category": 'Regime-switching', - }, + "name": "Regime Momentum Flip", + "formula": "if_((ts_mean((close / ts_delay(close, 1) - 1), 5) > 0), (-cs_rank(ts_pct_change(close, 10))), cs_rank(ts_pct_change(close, 3)))", + "category": "Regime-switching", + }, { - "name": 'High-Low Volatility', - "formula": '(-cs_rank(ts_mean(((high - low) / (close + 1e-8)), 10)))', - "category": 'Volatility', - }, + "name": "High-Low Volatility", + "formula": "(-cs_rank(ts_mean(((high - low) / (close + 1e-8)), 10)))", + "category": "Volatility", + }, { - "name": 'Opening Gap Reversal', - "formula": '(-cs_rank(((open - ts_delay(close, 1)) / (ts_std((close / ts_delay(close, 1) - 1), 10) + 1e-8))))', - "category": 'Overnight', - }, + "name": "Opening Gap Reversal", + "formula": "(-cs_rank(((open - ts_delay(close, 1)) / (ts_std((close / ts_delay(close, 1) - 1), 10) + 1e-8))))", + "category": "Overnight", + }, { - "name": 'Volume Momentum Spread', - "formula": '(-cs_rank((ts_mean(vol, 5) - ts_mean(vol, 40))))', - "category": 'Volume', - }, + "name": "Volume Momentum Spread", + "formula": "(-cs_rank((ts_mean(vol, 5) - ts_mean(vol, 40))))", + "category": "Volume", + }, { - "name": 'Regime Volume Reversal', - "formula": 'if_(((vol / (ts_mean(vol, 20) + 1e-8)) > 1.5), (-cs_rank((close / ts_delay(close, 1) - 1))), (-cs_rank(ts_pct_change(close, 10))))', - "category": 'Regime-switching', - }, + "name": "Regime Volume Reversal", + "formula": "if_(((vol / (ts_mean(vol, 20) + 1e-8)) > 1.5), (-cs_rank((close / ts_delay(close, 1) - 1))), (-cs_rank(ts_pct_change(close, 10))))", + "category": "Regime-switching", + }, # { # "name": 'Slope Reversal', # "formula": '# TODO: Neg(CsRank(TsLinRegSlope($close, 5)))', @@ -544,95 +527,95 @@ PAPER_FACTORS: List[Dict[str, str]] = [ # "category": 'VWAP', # }, { - "name": 'Turnover Rate Change', - "formula": '(-cs_rank(ts_delta((amount / (vol + 1e-8)), 10)))', - "category": 'Turnover', - }, + "name": "Turnover Rate Change", + "formula": "(-cs_rank(ts_delta((amount / (vol + 1e-8)), 10)))", + "category": "Turnover", + }, # { # "name": 'Return Quantile Signal', # "formula": '# TODO: Neg(CsRank(Quantile($returns, 20, 0.75)))', # "category": 'Higher-moment', # }, { - "name": 'Double EMA Crossover', - "formula": '(-cs_rank((ts_ema(close, 5) - ts_ema(close, 20))))', - "category": 'Trend', - }, + "name": "Double EMA Crossover", + "formula": "(-cs_rank((ts_ema(close, 5) - ts_ema(close, 20))))", + "category": "Trend", + }, { - "name": 'Conditional Volatility Return', - "formula": '(-cs_rank(((close / ts_delay(close, 1) - 1) / (ts_std((close / ts_delay(close, 1) - 1), 10) + 1e-8))))', - "category": 'Risk', - }, + "name": "Conditional Volatility Return", + "formula": "(-cs_rank(((close / ts_delay(close, 1) - 1) / (ts_std((close / ts_delay(close, 1) - 1), 10) + 1e-8))))", + "category": "Risk", + }, # { # "name": 'Amplitude Trend', # "formula": '# TODO: Neg(CsRank(TsLinRegSlope(Div(Sub($high, $low), Add($close, 1e-8)), 10)))', # "category": 'Volatility', # }, { - "name": 'Volume-Weighted Range', - "formula": '(-cs_rank(ts_mean((((high - low) / (close + 1e-8)) * vol), 10)))', - "category": 'Volume', - }, + "name": "Volume-Weighted Range", + "formula": "(-cs_rank(ts_mean((((high - low) / (close + 1e-8)) * vol), 10)))", + "category": "Volume", + }, { - "name": 'Intraday Efficiency Ratio', - "formula": '(-cs_rank((abs((close - open)) / ((high - low) + 1e-8))))', - "category": 'Intraday', - }, + "name": "Intraday Efficiency Ratio", + "formula": "(-cs_rank((abs((close - open)) / ((high - low) + 1e-8))))", + "category": "Intraday", + }, { - "name": 'Cumulative Volume Signal', - "formula": '(-cs_rank((ts_sum(((close / ts_delay(close, 1) - 1) * vol), 20) / (ts_sum(vol, 20) + 1e-8))))', - "category": 'Volume', - }, + "name": "Cumulative Volume Signal", + "formula": "(-cs_rank((ts_sum(((close / ts_delay(close, 1) - 1) * vol), 20) / (ts_sum(vol, 20) + 1e-8))))", + "category": "Volume", + }, { - "name": 'VWAP Cross-Sectional Momentum', - "formula": '(-cs_rank(cs_rank(ts_pct_change((amount / vol), 10))))', - "category": 'VWAP', - }, + "name": "VWAP Cross-Sectional Momentum", + "formula": "(-cs_rank(cs_rank(ts_pct_change((amount / vol), 10))))", + "category": "VWAP", + }, { - "name": 'Mean-Reversion Indicator', - "formula": '(-cs_rank(((close - ts_mean(close, 10)) / (ts_mean(close, 10) + 1e-8))))', - "category": 'Mean-reversion', - }, + "name": "Mean-Reversion Indicator", + "formula": "(-cs_rank(((close - ts_mean(close, 10)) / (ts_mean(close, 10) + 1e-8))))", + "category": "Mean-reversion", + }, { - "name": 'Volume Regime Indicator', - "formula": '(-cs_rank((ts_std(vol, 5) / (ts_std(vol, 20) + 1e-8))))', - "category": 'Volume', - }, + "name": "Volume Regime Indicator", + "formula": "(-cs_rank((ts_std(vol, 5) / (ts_std(vol, 20) + 1e-8))))", + "category": "Volume", + }, { - "name": 'Return Persistence', - "formula": '(-cs_rank((sign(ts_delta(close, 1)) * sign(ts_delta(close, 5)))))', - "category": 'Momentum', - }, + "name": "Return Persistence", + "formula": "(-cs_rank((sign(ts_delta(close, 1)) * sign(ts_delta(close, 5)))))", + "category": "Momentum", + }, # { # "name": 'Regime Trend Strength', # "formula": '# TODO: IfElse(Greater(Abs(TsLinRegSlope($close, 20)), Std($close, 20)), Neg(CsRank(TsLinRegSlope($close, 5))), Neg(CsRank(Return($close, 3))))', # "category": 'Regime-switching', # }, { - "name": 'VWAP Dispersion', - "formula": '(-cs_rank(ts_std(((close - (amount / vol)) / (amount / vol)), 10)))', - "category": 'VWAP', - }, + "name": "VWAP Dispersion", + "formula": "(-cs_rank(ts_std(((close - (amount / vol)) / (amount / vol)), 10)))", + "category": "VWAP", + }, { - "name": 'Smart Money Flow', - "formula": '(-cs_rank(ts_sum((if_((close > ts_delay(close, 1)), vol, (-vol)) * ((high - low) / (close + 1e-8))), 10)))', - "category": 'Volume', - }, + "name": "Smart Money Flow", + "formula": "(-cs_rank(ts_sum((if_((close > ts_delay(close, 1)), vol, (-vol)) * ((high - low) / (close + 1e-8))), 10)))", + "category": "Volume", + }, { - "name": 'Return Rank Dispersion', - "formula": '(-cs_rank((ts_rank((close / ts_delay(close, 1) - 1), 5) - ts_rank((close / ts_delay(close, 1) - 1), 20))))', - "category": 'Mean-reversion', - }, + "name": "Return Rank Dispersion", + "formula": "(-cs_rank((ts_rank((close / ts_delay(close, 1) - 1), 5) - ts_rank((close / ts_delay(close, 1) - 1), 20))))", + "category": "Mean-reversion", + }, { - "name": 'Volume Acceleration', - "formula": '(-cs_rank((ts_delta(vol, 5) - ts_delta(ts_delay(vol, 5), 5))))', - "category": 'Volume', - }, + "name": "Volume Acceleration", + "formula": "(-cs_rank((ts_delta(vol, 5) - ts_delta(ts_delay(vol, 5), 5))))", + "category": "Volume", + }, { - "name": 'Close-Low Ratio Trend', - "formula": '(-cs_rank(ts_mean(((close - low) / ((high - low) + 1e-8)), 5)))', - "category": 'Mean-reversion', - }, + "name": "Close-Low Ratio Trend", + "formula": "(-cs_rank(ts_mean(((close - low) / ((high - low) + 1e-8)), 5)))", + "category": "Mean-reversion", + }, # { # "name": 'Hull MA Deviation', # "formula": '# TODO: Neg(CsRank(Div(Sub($close, HMA($close, 10)), Add(Std($close, 10), 1e-8))))', @@ -644,119 +627,118 @@ PAPER_FACTORS: List[Dict[str, str]] = [ # "category": 'Momentum', # }, { - "name": 'Volume Profile Skew', - "formula": '(-cs_rank(ts_skew((vol / (ts_mean(vol, 20) + 1e-8)), 10)))', - "category": 'Volume', - }, + "name": "Volume Profile Skew", + "formula": "(-cs_rank(ts_skew((vol / (ts_mean(vol, 20) + 1e-8)), 10)))", + "category": "Volume", + }, { - "name": 'Conditional VWAP Signal', - "formula": 'if_((close > (amount / vol)), (-cs_rank(((close - (amount / vol)) / (amount / vol)))), cs_rank((((amount / vol) - close) / (amount / vol))))', - "category": 'VWAP', - }, + "name": "Conditional VWAP Signal", + "formula": "if_((close > (amount / vol)), (-cs_rank(((close - (amount / vol)) / (amount / vol)))), cs_rank((((amount / vol) - close) / (amount / vol))))", + "category": "VWAP", + }, { - "name": 'Extreme Volume Reversal', - "formula": '(-cs_rank((if_((vol > (2 * ts_mean(vol, 20))), 1, 0) * (close / ts_delay(close, 1) - 1))))', - "category": 'Volume', - }, + "name": "Extreme Volume Reversal", + "formula": "(-cs_rank((if_((vol > (2 * ts_mean(vol, 20))), 1, 0) * (close / ts_delay(close, 1) - 1))))", + "category": "Volume", + }, { - "name": 'Range Expansion Signal', - "formula": '(-cs_rank(((high - low) / (ts_mean((high - low), 20) + 1e-8))))', - "category": 'Volatility', - }, + "name": "Range Expansion Signal", + "formula": "(-cs_rank(((high - low) / (ts_mean((high - low), 20) + 1e-8))))", + "category": "Volatility", + }, { - "name": 'Short-Term IC Momentum', - "formula": '(-cs_rank(ts_sum((sign((close / ts_delay(close, 1) - 1)) * abs((close / ts_delay(close, 1) - 1))), 5)))', - "category": 'Momentum', - }, + "name": "Short-Term IC Momentum", + "formula": "(-cs_rank(ts_sum((sign((close / ts_delay(close, 1) - 1)) * abs((close / ts_delay(close, 1) - 1))), 5)))", + "category": "Momentum", + }, { - "name": 'VWAP Curvature', - "formula": '(-cs_rank(((((amount / vol) - ts_delay((amount / vol), 5)) / (ts_delay((amount / vol), 5) + 1e-8)) - ((ts_delay((amount / vol), 5) - ts_delay((amount / vol), 10)) / (ts_delay((amount / vol), 10) + 1e-8)))))', - "category": 'VWAP', - }, + "name": "VWAP Curvature", + "formula": "(-cs_rank(((((amount / vol) - ts_delay((amount / vol), 5)) / (ts_delay((amount / vol), 5) + 1e-8)) - ((ts_delay((amount / vol), 5) - ts_delay((amount / vol), 10)) / (ts_delay((amount / vol), 10) + 1e-8)))))", + "category": "VWAP", + }, { - "name": 'Relative Strength', - "formula": '(-cs_rank((ts_pct_change(close, 5) / (ts_pct_change(close, 20) + 1e-8))))', - "category": 'Momentum', - }, + "name": "Relative Strength", + "formula": "(-cs_rank((ts_pct_change(close, 5) / (ts_pct_change(close, 20) + 1e-8))))", + "category": "Momentum", + }, { - "name": 'Volume-Correlated Return', - "formula": '(-cs_rank(ts_cov((close / ts_delay(close, 1) - 1), vol, 10)))', - "category": 'Volume', - }, + "name": "Volume-Correlated Return", + "formula": "(-cs_rank(ts_cov((close / ts_delay(close, 1) - 1), vol, 10)))", + "category": "Volume", + }, { - "name": 'Regime Volatility Band', - "formula": 'if_((ts_std((close / ts_delay(close, 1) - 1), 5) > (1.5 * ts_std((close / ts_delay(close, 1) - 1), 20))), (-cs_rank(ts_pct_change(close, 1))), (-cs_rank(ts_pct_change(close, 10))))', - "category": 'Regime-switching', - }, + "name": "Regime Volatility Band", + "formula": "if_((ts_std((close / ts_delay(close, 1) - 1), 5) > (1.5 * ts_std((close / ts_delay(close, 1) - 1), 20))), (-cs_rank(ts_pct_change(close, 1))), (-cs_rank(ts_pct_change(close, 10))))", + "category": "Regime-switching", + }, { - "name": 'Open-Close Spread Momentum', - "formula": '(-cs_rank(ts_mean(((close - open) / (open + 1e-8)), 5)))', - "category": 'Intraday', - }, + "name": "Open-Close Spread Momentum", + "formula": "(-cs_rank(ts_mean(((close - open) / (open + 1e-8)), 5)))", + "category": "Intraday", + }, { - "name": 'Volatility-Scaled Reversal', - "formula": '(-cs_rank((ts_pct_change(close, 5) / (ts_std((close / ts_delay(close, 1) - 1), 20) + 1e-8))))', - "category": 'Mean-reversion', - }, + "name": "Volatility-Scaled Reversal", + "formula": "(-cs_rank((ts_pct_change(close, 5) / (ts_std((close / ts_delay(close, 1) - 1), 20) + 1e-8))))", + "category": "Mean-reversion", + }, { - "name": 'VWAP Time-Weighted Signal', - "formula": '(-cs_rank(ts_wma(((close - (amount / vol)) / ((amount / vol) + 1e-8)), 20)))', - "category": 'VWAP', - }, + "name": "VWAP Time-Weighted Signal", + "formula": "(-cs_rank(ts_wma(((close - (amount / vol)) / ((amount / vol) + 1e-8)), 20)))", + "category": "VWAP", + }, { - "name": 'Covariance Structure Shift', - "formula": '(-cs_rank((ts_cov((close / ts_delay(close, 1) - 1), vol, 5) - ts_cov((close / ts_delay(close, 1) - 1), vol, 20))))', - "category": 'Volume', - }, + "name": "Covariance Structure Shift", + "formula": "(-cs_rank((ts_cov((close / ts_delay(close, 1) - 1), vol, 5) - ts_cov((close / ts_delay(close, 1) - 1), vol, 20))))", + "category": "Volume", + }, # { # "name": 'Quadratic Regression Residual', # "formula": '# TODO: Neg(CsRank(TsLinRegResid(Square($returns), 20)))', # "category": 'Higher-moment', # }, { - "name": 'VWAP Mean-Reversion Strength', - "formula": '(-cs_rank((((close - (amount / vol)) / (amount / vol)) * (vol / (ts_mean(vol, 20) + 1e-8)))))', - "category": 'VWAP', - }, + "name": "VWAP Mean-Reversion Strength", + "formula": "(-cs_rank((((close - (amount / vol)) / (amount / vol)) * (vol / (ts_mean(vol, 20) + 1e-8)))))", + "category": "VWAP", + }, { - "name": 'Multi-Scale Momentum', - "formula": '(-cs_rank((ts_pct_change(close, 5) + ts_pct_change(close, 20))))', - "category": 'Momentum', - }, + "name": "Multi-Scale Momentum", + "formula": "(-cs_rank((ts_pct_change(close, 5) + ts_pct_change(close, 20))))", + "category": "Momentum", + }, { - "name": 'Relative High Position', - "formula": '(-cs_rank(((ts_max(high, 20) - close) / (ts_max(high, 20) + 1e-8))))', - "category": 'Mean-reversion', - }, + "name": "Relative High Position", + "formula": "(-cs_rank(((ts_max(high, 20) - close) / (ts_max(high, 20) + 1e-8))))", + "category": "Mean-reversion", + }, { - "name": 'Turnover Volatility', - "formula": '(-cs_rank(ts_std((amount / (vol + 1e-8)), 10)))', - "category": 'Turnover', - }, + "name": "Turnover Volatility", + "formula": "(-cs_rank(ts_std((amount / (vol + 1e-8)), 10)))", + "category": "Turnover", + }, { - "name": 'Regime Correlation Signal', - "formula": 'if_((abs(ts_corr(close, vol, 10)) > 0.5), (-cs_rank(ts_pct_change(close, 3))), (-cs_rank(ts_pct_change(close, 10))))', - "category": 'Regime-switching', - }, + "name": "Regime Correlation Signal", + "formula": "if_((abs(ts_corr(close, vol, 10)) > 0.5), (-cs_rank(ts_pct_change(close, 3))), (-cs_rank(ts_pct_change(close, 10))))", + "category": "Regime-switching", + }, { - "name": 'Intraday Momentum Reversal', - "formula": '(-cs_rank(((close - open) / ((high - low) + 1e-8))))', - "category": 'Intraday', - }, + "name": "Intraday Momentum Reversal", + "formula": "(-cs_rank(((close - open) / ((high - low) + 1e-8))))", + "category": "Intraday", + }, # { # "name": 'Volume-Weighted Slope', # "formula": '# TODO: Neg(CsRank(TsLinRegSlope(Mul($returns, $volume), 10)))', # "category": 'Volume', # }, { - "name": 'Adaptive Range Reversal', - "formula": 'if_((ts_std((close / ts_delay(close, 1) - 1), 10) > ts_mean(ts_std((close / ts_delay(close, 1) - 1), 10), 40)), (-cs_rank(((close - ts_min(close, 10)) / ((ts_max(close, 10) - ts_min(close, 10)) + 1e-8)))), (-cs_rank(ts_pct_change(close, 5))))', - "category": 'Regime-switching', - }, + "name": "Adaptive Range Reversal", + "formula": "if_((ts_std((close / ts_delay(close, 1) - 1), 10) > ts_mean(ts_std((close / ts_delay(close, 1) - 1), 10), 40)), (-cs_rank(((close - ts_min(close, 10)) / ((ts_max(close, 10) - ts_min(close, 10)) + 1e-8)))), (-cs_rank(ts_pct_change(close, 5))))", + "category": "Regime-switching", + }, ] - def import_from_paper( path: Optional[Union[str, Path]] = None, ) -> FactorLibrary: @@ -802,6 +784,8 @@ def import_from_paper( admission_date=entry.get("admission_date", ""), signals=None, ) + if factor.formula.strip().startswith("# TODO"): + factor.metadata["unsupported"] = True library.admit_factor(factor) logger.info( diff --git a/tests/test_factorminer_library_io.py b/tests/test_factorminer_library_io.py new file mode 100644 index 0000000..a42d749 --- /dev/null +++ b/tests/test_factorminer_library_io.py @@ -0,0 +1,157 @@ +"""Tests for library I/O and paper factor imports.""" + +import json +from pathlib import Path + +import numpy as np +import pytest + +from src.factorminer.core.factor_library import Factor, FactorLibrary +from src.factorminer.core.library_io import ( + import_from_paper, + load_library, + save_library, +) + + +class TestSaveLoadLibrary: + """测试 FactorLibrary 的序列化与反序列化.""" + + def test_save_library_ignores_save_signals(self, tmp_path: Path) -> None: + """save_signals=True 也不应生成 .npz 文件.""" + library = FactorLibrary() + factor = Factor( + id=0, + name="test_factor", + formula="close / ts_delay(close, 1) - 1", + category="Momentum", + ic_mean=0.05, + icir=0.5, + ic_win_rate=0.55, + max_correlation=0.1, + batch_number=1, + ) + # 即使给一个信号矩阵,也不应保存 + factor.signals = np.ones((10, 20)) + library.admit_factor(factor) + + base_path = tmp_path / "test_lib" + save_library(library, str(base_path), save_signals=True) + + assert (base_path.with_suffix(".json")).exists() + assert not (Path(str(base_path) + "_signals.npz")).exists() + + def test_load_library_restores_metadata_and_unsupported( + self, tmp_path: Path + ) -> None: + """加载 JSON 后应恢复 metadata,并对 # TODO 公式标记 unsupported.""" + library = FactorLibrary() + f1 = Factor( + id=0, + name="ok_factor", + formula="cs_rank(close)", + category="Test", + ic_mean=0.0, + icir=0.0, + ic_win_rate=0.0, + max_correlation=0.0, + batch_number=0, + metadata={"author": "ai"}, + ) + f2 = Factor( + id=0, + name="todo_factor", + formula="# TODO: Neg(CsRank(Decay(close, 10)))", + category="Test", + ic_mean=0.0, + icir=0.0, + ic_win_rate=0.0, + max_correlation=0.0, + batch_number=0, + ) + library.admit_factor(f1) + library.admit_factor(f2) + + base_path = tmp_path / "meta_lib" + save_library(library, str(base_path)) + + loaded = load_library(str(base_path)) + assert loaded.size == 2 + + f1_loaded = loaded.get_factor(1) + assert f1_loaded.metadata.get("author") == "ai" + assert not f1_loaded.metadata.get("unsupported", False) + + f2_loaded = loaded.get_factor(2) + assert f2_loaded.metadata.get("unsupported") is True + + def test_factor_round_trip_with_metadata(self) -> None: + """Factor.to_dict / from_dict 应正确传递 metadata.""" + factor = Factor( + id=1, + name="round_trip", + formula="ts_mean(close, 20)", + category="Momentum", + ic_mean=0.1, + icir=1.0, + ic_win_rate=0.6, + max_correlation=0.2, + batch_number=2, + metadata={"unsupported": True, "tags": ["test"]}, + ) + d = factor.to_dict() + restored = Factor.from_dict(d) + assert restored.metadata == factor.metadata + + +class TestImportFromPaper: + """测试从内置 paper catalog 导入因子.""" + + def test_import_from_paper_includes_all_translated_factors(self) -> None: + """内置 PAPER_FACTORS 应全部成功导入.""" + library = import_from_paper() + assert library.size > 0 + # 当前 catalog 中已有因子应全部被 admit + for factor in library.list_factors(): + assert factor.id > 0 + assert factor.name + assert factor.formula + assert factor.category + + def test_import_from_paper_marks_todo_as_unsupported(self, tmp_path: Path) -> None: + """对 # TODO 公式应在 metadata 中标记 unsupported.""" + custom_path = tmp_path / "custom_factors.json" + custom_data = [ + { + "name": "Normal Factor", + "formula": "cs_rank(close)", + "category": "Test", + }, + { + "name": "Unsupported Factor", + "formula": "# TODO: Neg(CsRank(Decay(close, 10)))", + "category": "Test", + }, + ] + custom_path.write_text(json.dumps(custom_data), encoding="utf-8") + + library = import_from_paper(str(custom_path)) + assert library.size == 2 + + normal = library.list_factors()[0] + todo = library.list_factors()[1] + + assert normal.metadata.get("unsupported") is None + assert todo.metadata.get("unsupported") is True + + def test_import_from_paper_path_override(self, tmp_path: Path) -> None: + """通过 path 参数加载外部 JSON 列表.""" + custom_path = tmp_path / "override.json" + custom_data = [ + {"name": "custom_1", "formula": "open + close", "category": "Custom"}, + ] + custom_path.write_text(json.dumps(custom_data), encoding="utf-8") + + library = import_from_paper(str(custom_path)) + assert library.size == 1 + assert library.list_factors()[0].name == "custom_1"