refactor(factorminer): 禁用 npz 信号缓存并将库 I/O 对接本地 DSL

- 为 Factor 数据类新增 metadata 字段,用于标记未实现算子(unsupported)
- save_library 废弃 save_signals 参数,内部强制忽略,仅持久化 JSON 元数据,不再写入 .npz
- load_library 删除 .npz 恢复逻辑;加载时自动将 # TODO 公式的 unsupported 标记设为 True
- import_from_paper() 直接基于已本地化的 PAPER_FACTORS 构建库,并同步标记 TODO 公式
- 新增 tests/test_factorminer_library_io.py,覆盖序列化、加载及 paper factors 导入
This commit is contained in:
2026-04-08 22:10:17 +08:00
parent d71f723602
commit 65500cce27
3 changed files with 617 additions and 467 deletions

View File

@@ -39,6 +39,7 @@ class Factor:
signals: Optional[np.ndarray] = field(default=None, repr=False) # (M, T) signals: Optional[np.ndarray] = field(default=None, repr=False) # (M, T)
research_metrics: dict = field(default_factory=dict) research_metrics: dict = field(default_factory=dict)
provenance: dict = field(default_factory=dict) provenance: dict = field(default_factory=dict)
metadata: dict = field(default_factory=dict)
def __post_init__(self) -> None: def __post_init__(self) -> None:
if not self.admission_date: if not self.admission_date:
@@ -59,6 +60,7 @@ class Factor:
"admission_date": self.admission_date, "admission_date": self.admission_date,
"research_metrics": self.research_metrics, "research_metrics": self.research_metrics,
"provenance": self.provenance, "provenance": self.provenance,
"metadata": self.metadata,
} }
@classmethod @classmethod
@@ -77,6 +79,7 @@ class Factor:
admission_date=d.get("admission_date", ""), admission_date=d.get("admission_date", ""),
research_metrics=d.get("research_metrics", {}), research_metrics=d.get("research_metrics", {}),
provenance=d.get("provenance", {}), provenance=d.get("provenance", {}),
metadata=d.get("metadata", {}),
) )
@@ -172,7 +175,7 @@ class FactorLibrary:
# Pearson on ranks == Spearman # Pearson on ranks == Spearman
ra_c = ra - ra.mean() ra_c = ra - ra.mean()
rb_c = rb - rb.mean() rb_c = rb - rb.mean()
denom = np.sqrt((ra_c ** 2).sum() * (rb_c ** 2).sum()) denom = np.sqrt((ra_c**2).sum() * (rb_c**2).sum())
if denom < 1e-12: if denom < 1e-12:
continue continue
corr_sum += abs((ra_c * rb_c).sum() / denom) corr_sum += abs((ra_c * rb_c).sum() / denom)
@@ -206,9 +209,7 @@ class FactorLibrary:
(admitted, reason) : Tuple[bool, str] (admitted, reason) : Tuple[bool, str]
""" """
if candidate_ic < self.ic_threshold: if candidate_ic < self.ic_threshold:
return False, ( return False, (f"IC {candidate_ic:.4f} below threshold {self.ic_threshold}")
f"IC {candidate_ic:.4f} below threshold {self.ic_threshold}"
)
if self.size == 0: if self.size == 0:
return True, "First factor in library" return True, "First factor in library"
@@ -221,9 +222,7 @@ class FactorLibrary:
f"{self.correlation_threshold} with existing library factor" f"{self.correlation_threshold} with existing library factor"
) )
return True, ( return True, (f"Admitted: IC={candidate_ic:.4f}, max_corr={max_corr:.4f}")
f"Admitted: IC={candidate_ic:.4f}, max_corr={max_corr:.4f}"
)
def check_replacement( def check_replacement(
self, self,
@@ -258,8 +257,10 @@ class FactorLibrary:
(should_replace, factor_to_replace_id, reason) : Tuple[bool, Optional[int], str] (should_replace, factor_to_replace_id, reason) : Tuple[bool, Optional[int], str]
""" """
if candidate_ic < ic_min: if candidate_ic < ic_min:
return False, None, ( return (
f"IC {candidate_ic:.4f} below replacement floor {ic_min}" False,
None,
(f"IC {candidate_ic:.4f} below replacement floor {ic_min}"),
) )
if self.size == 0: if self.size == 0:
@@ -277,21 +278,33 @@ class FactorLibrary:
correlated_factors.append((fid, corr, factor.ic_mean)) correlated_factors.append((fid, corr, factor.ic_mean))
if len(correlated_factors) != 1: if len(correlated_factors) != 1:
return False, None, ( return (
False,
None,
(
f"Found {len(correlated_factors)} correlated factors " f"Found {len(correlated_factors)} correlated factors "
f"(need exactly 1 for replacement)" f"(need exactly 1 for replacement)"
),
) )
fid, corr, existing_ic = correlated_factors[0] fid, corr, existing_ic = correlated_factors[0]
if candidate_ic < ic_ratio * existing_ic: if candidate_ic < ic_ratio * existing_ic:
return False, None, ( return (
False,
None,
(
f"IC {candidate_ic:.4f} < {ic_ratio} * {existing_ic:.4f} = " f"IC {candidate_ic:.4f} < {ic_ratio} * {existing_ic:.4f} = "
f"{ic_ratio * existing_ic:.4f}" f"{ic_ratio * existing_ic:.4f}"
),
) )
return True, fid, ( return (
True,
fid,
(
f"Replace factor {fid}: candidate IC {candidate_ic:.4f} > " f"Replace factor {fid}: candidate IC {candidate_ic:.4f} > "
f"{ic_ratio} * {existing_ic:.4f}, corr={corr:.4f}" f"{ic_ratio} * {existing_ic:.4f}, corr={corr:.4f}"
),
) )
# ------------------------------------------------------------------ # ------------------------------------------------------------------
@@ -321,8 +334,11 @@ class FactorLibrary:
logger.info( logger.info(
"Admitted factor %d '%s' (IC=%.4f, max_corr=%.4f, category=%s)", "Admitted factor %d '%s' (IC=%.4f, max_corr=%.4f, category=%s)",
factor.id, factor.name, factor.ic_mean, factor.id,
factor.max_correlation, factor.category, factor.name,
factor.ic_mean,
factor.max_correlation,
factor.category,
) )
return factor.id return factor.id
@@ -360,7 +376,10 @@ class FactorLibrary:
logger.info( logger.info(
"Replaced factor %d with %d '%s' (IC=%.4f)", "Replaced factor %d with %d '%s' (IC=%.4f)",
old_id, new_factor.id, new_factor.name, new_factor.ic_mean, old_id,
new_factor.id,
new_factor.name,
new_factor.ic_mean,
) )
def remove_factor(self, factor_id: int) -> None: def remove_factor(self, factor_id: int) -> None:
@@ -381,9 +400,7 @@ class FactorLibrary:
# Correlation matrix management # Correlation matrix management
# ------------------------------------------------------------------ # ------------------------------------------------------------------
def _max_correlation_with_library( def _max_correlation_with_library(self, candidate_signals: np.ndarray) -> float:
self, candidate_signals: np.ndarray
) -> float:
"""Compute max |rho| between candidate and all library factors.""" """Compute max |rho| between candidate and all library factors."""
max_corr = 0.0 max_corr = 0.0
for factor in self.factors.values(): for factor in self.factors.values():
@@ -453,9 +470,7 @@ class FactorLibrary:
self.correlation_matrix[idx, other_idx] = 0.0 self.correlation_matrix[idx, other_idx] = 0.0
self.correlation_matrix[other_idx, idx] = 0.0 self.correlation_matrix[other_idx, idx] = 0.0
continue continue
corr = self._compute_correlation_vectorized( corr = self._compute_correlation_vectorized(factor.signals, other.signals)
factor.signals, other.signals
)
self.correlation_matrix[idx, other_idx] = corr self.correlation_matrix[idx, other_idx] = corr
self.correlation_matrix[other_idx, idx] = corr self.correlation_matrix[other_idx, idx] = corr
@@ -509,10 +524,7 @@ class FactorLibrary:
def get_factors_by_category(self, category: str) -> List[Factor]: def get_factors_by_category(self, category: str) -> List[Factor]:
"""Return all factors matching a given category.""" """Return all factors matching a given category."""
return [ return [f for f in self.factors.values() if f.category == category]
f for f in self.factors.values()
if f.category == category
]
def get_diagnostics(self) -> dict: def get_diagnostics(self) -> dict:
"""Library diagnostics: avg |rho|, max tail correlations, per-category counts, saturation. """Library diagnostics: avg |rho|, max tail correlations, per-category counts, saturation.
@@ -539,8 +551,7 @@ class FactorLibrary:
diag["category_counts"] = dict(cat_counts) diag["category_counts"] = dict(cat_counts)
diag["category_avg_ic"] = { diag["category_avg_ic"] = {
cat: cat_ic_sums[cat] / cat_counts[cat] cat: cat_ic_sums[cat] / cat_counts[cat] for cat in cat_counts
for cat in cat_counts
} }
# Correlation statistics # Correlation statistics
@@ -575,9 +586,7 @@ class FactorLibrary:
Returns a lightweight dictionary suitable for inclusion in LLM prompts Returns a lightweight dictionary suitable for inclusion in LLM prompts
or memory store entries. or memory store entries.
""" """
factors_sorted = sorted( factors_sorted = sorted(self.factors.values(), key=lambda f: f.id, reverse=True)
self.factors.values(), key=lambda f: f.id, reverse=True
)
recent = factors_sorted[:5] # Last 5 admissions recent = factors_sorted[:5] # Last 5 admissions
categories = defaultdict(int) categories = defaultdict(int)

View File

@@ -23,26 +23,22 @@ logger = logging.getLogger(__name__)
# Save / Load # Save / Load
# ====================================================================== # ======================================================================
def save_library( def save_library(
library: FactorLibrary, library: FactorLibrary,
path: Union[str, Path], path: Union[str, Path],
save_signals: bool = True, save_signals: bool = True,
) -> None: ) -> None:
"""Save a FactorLibrary to disk. """Save a FactorLibrary to disk (仅保存 JSON 元数据).
Creates two files:
- ``<path>.json`` -- factor metadata and library configuration
- ``<path>_signals.npz`` -- binary signal cache (if save_signals=True
and any factors have signals)
Parameters Parameters
---------- ----------
library : FactorLibrary library : FactorLibrary
path : str or Path path : str or Path
Base path (without extension). E.g. ``"output/my_library"`` produces Base path (without extension). E.g. ``"output/my_library"`` produces
``output/my_library.json`` and ``output/my_library_signals.npz``. ``output/my_library.json``.
save_signals : bool save_signals : bool
Whether to write the binary signal cache. 已废弃,始终忽略,不再写入 .npz 信号缓存。
""" """
path = Path(path) path = Path(path)
path.parent.mkdir(parents=True, exist_ok=True) path.parent.mkdir(parents=True, exist_ok=True)
@@ -63,20 +59,10 @@ def save_library(
json.dump(meta, fp, indent=2) json.dump(meta, fp, indent=2)
logger.info("Saved library metadata to %s (%d factors)", json_path, library.size) logger.info("Saved library metadata to %s (%d factors)", json_path, library.size)
# -- Binary signal cache -- # -- Binary signal cache (已禁用) --
print("[library_io] 信号缓存已禁用,仅保存 JSON 元数据")
if save_signals: if save_signals:
signal_arrays: Dict[str, np.ndarray] = {} logger.info("save_signals 参数已废弃,信号缓存不再写入")
for f in library.list_factors():
if f.signals is not None:
signal_arrays[f"factor_{f.id}"] = f.signals
if signal_arrays:
npz_path = Path(str(path) + "_signals.npz")
np.savez_compressed(npz_path, **signal_arrays)
logger.info(
"Saved signal cache to %s (%d arrays)",
npz_path, len(signal_arrays),
)
def load_library(path: Union[str, Path]) -> FactorLibrary: def load_library(path: Union[str, Path]) -> FactorLibrary:
@@ -85,8 +71,7 @@ def load_library(path: Union[str, Path]) -> FactorLibrary:
Parameters Parameters
---------- ----------
path : str or Path path : str or Path
Base path (without extension). Will look for ``<path>.json`` and Base path (without extension). Will look for ``<path>.json``.
optionally ``<path>_signals.npz``.
Returns Returns
------- -------
@@ -107,6 +92,8 @@ def load_library(path: Union[str, Path]) -> FactorLibrary:
# Restore factors # Restore factors
for fd in meta.get("factors", []): for fd in meta.get("factors", []):
factor = Factor.from_dict(fd) factor = Factor.from_dict(fd)
if factor.formula.strip().startswith("# TODO"):
factor.metadata["unsupported"] = True
library.factors[factor.id] = factor library.factors[factor.id] = factor
# Restore correlation matrix # Restore correlation matrix
@@ -117,24 +104,9 @@ def load_library(path: Union[str, Path]) -> FactorLibrary:
# Restore id-to-index mapping # Restore id-to-index mapping
if "id_to_index" in meta: if "id_to_index" in meta:
library._id_to_index = { library._id_to_index = {int(k): v for k, v in meta["id_to_index"].items()}
int(k): v for k, v in meta["id_to_index"].items()
}
# Load signal cache if present logger.info("Loaded library from %s (%d factors)", json_path, library.size)
npz_path = Path(str(path) + "_signals.npz")
if npz_path.exists():
data = np.load(npz_path)
for f in library.factors.values():
key = f"factor_{f.id}"
if key in data:
f.signals = data[key]
data.close()
logger.info("Loaded signal cache from %s", npz_path)
logger.info(
"Loaded library from %s (%d factors)", json_path, library.size
)
return library return library
@@ -142,6 +114,7 @@ def load_library(path: Union[str, Path]) -> FactorLibrary:
# Export utilities # Export utilities
# ====================================================================== # ======================================================================
def export_csv(library: FactorLibrary, path: Union[str, Path]) -> None: def export_csv(library: FactorLibrary, path: Union[str, Path]) -> None:
"""Export the factor table to CSV. """Export the factor table to CSV.
@@ -152,15 +125,24 @@ def export_csv(library: FactorLibrary, path: Union[str, Path]) -> None:
path.parent.mkdir(parents=True, exist_ok=True) path.parent.mkdir(parents=True, exist_ok=True)
fieldnames = [ fieldnames = [
"ID", "Name", "Formula", "Category", "IC_Mean", "ICIR", "ID",
"IC_Win_Rate", "Max_Correlation", "Batch", "Admission_Date", "Name",
"Formula",
"Category",
"IC_Mean",
"ICIR",
"IC_Win_Rate",
"Max_Correlation",
"Batch",
"Admission_Date",
] ]
with open(path, "w", newline="") as fp: with open(path, "w", newline="") as fp:
writer = csv.DictWriter(fp, fieldnames=fieldnames) writer = csv.DictWriter(fp, fieldnames=fieldnames)
writer.writeheader() writer.writeheader()
for f in library.list_factors(): for f in library.list_factors():
writer.writerow({ writer.writerow(
{
"ID": f.id, "ID": f.id,
"Name": f.name, "Name": f.name,
"Formula": f.formula, "Formula": f.formula,
@@ -171,7 +153,8 @@ def export_csv(library: FactorLibrary, path: Union[str, Path]) -> None:
"Max_Correlation": f"{f.max_correlation:.4f}", "Max_Correlation": f"{f.max_correlation:.4f}",
"Batch": f.batch_number, "Batch": f.batch_number,
"Admission_Date": f.admission_date, "Admission_Date": f.admission_date,
}) }
)
logger.info("Exported %d factors to %s", library.size, path) logger.info("Exported %d factors to %s", library.size, path)
@@ -204,49 +187,49 @@ def export_formulas(library: FactorLibrary, path: Union[str, Path]) -> None:
# Each entry: (name, formula, category) # Each entry: (name, formula, category)
PAPER_FACTORS: List[Dict[str, str]] = [ PAPER_FACTORS: List[Dict[str, str]] = [
{ {
"name": 'Intraday Range Position', "name": "Intraday Range Position",
"formula": '(-cs_rank(((close - ts_min(close, 48)) / ((ts_max(close, 48) - ts_min(close, 48)) + 1e-8))))', "formula": "(-cs_rank(((close - ts_min(close, 48)) / ((ts_max(close, 48) - ts_min(close, 48)) + 1e-8))))",
"category": 'Mean-reversion', "category": "Mean-reversion",
}, },
{ {
"name": 'Volume-Weighted Momentum', "name": "Volume-Weighted Momentum",
"formula": '(-cs_rank((ts_pct_change(close, 5) * (vol / ts_mean(vol, 20)))))', "formula": "(-cs_rank((ts_pct_change(close, 5) * (vol / ts_mean(vol, 20)))))",
"category": 'Momentum', "category": "Momentum",
}, },
{ {
"name": 'Residual Volatility', "name": "Residual Volatility",
"formula": '(-cs_rank(ts_std((close - ts_ema(close, 10)), 20)))', "formula": "(-cs_rank(ts_std((close - ts_ema(close, 10)), 20)))",
"category": 'Volatility', "category": "Volatility",
}, },
{ {
"name": 'Intraday Amplitude Ratio', "name": "Intraday Amplitude Ratio",
"formula": '(-cs_rank(((high - low) / (close + 1e-8))))', "formula": "(-cs_rank(((high - low) / (close + 1e-8))))",
"category": 'Volatility', "category": "Volatility",
}, },
{ {
"name": 'Volume Surprise', "name": "Volume Surprise",
"formula": '(-cs_rank(((vol - ts_mean(vol, 20)) / (ts_std(vol, 20) + 1e-8))))', "formula": "(-cs_rank(((vol - ts_mean(vol, 20)) / (ts_std(vol, 20) + 1e-8))))",
"category": 'Volume', "category": "Volume",
}, },
{ {
"name": 'VWAP Deviation', "name": "VWAP Deviation",
"formula": '(-((close - (amount / vol)) / (amount / vol)))', "formula": "(-((close - (amount / vol)) / (amount / vol)))",
"category": 'VWAP', "category": "VWAP",
}, },
{ {
"name": 'Short-term Reversal', "name": "Short-term Reversal",
"formula": '(-cs_rank(ts_pct_change(close, 3)))', "formula": "(-cs_rank(ts_pct_change(close, 3)))",
"category": 'Mean-reversion', "category": "Mean-reversion",
}, },
{ {
"name": 'Turnover Momentum', "name": "Turnover Momentum",
"formula": '(-cs_rank(ts_delta((amount / (vol + 1e-8)), 5)))', "formula": "(-cs_rank(ts_delta((amount / (vol + 1e-8)), 5)))",
"category": 'Turnover', "category": "Turnover",
}, },
{ {
"name": 'High-Low Midpoint Reversion', "name": "High-Low Midpoint Reversion",
"formula": '(-cs_rank((close - ((high + low) / 2))))', "formula": "(-cs_rank((close - ((high + low) / 2))))",
"category": 'Mean-reversion', "category": "Mean-reversion",
}, },
# { # {
# "name": 'Rolling Beta Residual', # "name": 'Rolling Beta Residual',
@@ -259,69 +242,69 @@ PAPER_FACTORS: List[Dict[str, str]] = [
# "category": 'VWAP', # "category": 'VWAP',
# }, # },
{ {
"name": 'Accumulation-Distribution', "name": "Accumulation-Distribution",
"formula": '(-cs_rank(ts_sum(((((2 * close) - (high + low)) / ((high - low) + 1e-8)) * vol), 10)))', "formula": "(-cs_rank(ts_sum(((((2 * close) - (high + low)) / ((high - low) + 1e-8)) * vol), 10)))",
"category": 'Volume', "category": "Volume",
}, },
{ {
"name": 'Relative Strength Index Deviation', "name": "Relative Strength Index Deviation",
"formula": '(-cs_rank((ts_mean(max_(ts_delta(close, 1), 0), 14) - ts_mean(abs(min_(ts_delta(close, 1), 0)), 14))))', "formula": "(-cs_rank((ts_mean(max_(ts_delta(close, 1), 0), 14) - ts_mean(abs(min_(ts_delta(close, 1), 0)), 14))))",
"category": 'Momentum', "category": "Momentum",
}, },
{ {
"name": 'Price-Volume Correlation', "name": "Price-Volume Correlation",
"formula": '(-ts_corr(close, vol, 10))', "formula": "(-ts_corr(close, vol, 10))",
"category": 'Volume', "category": "Volume",
}, },
{ {
"name": 'Skewness of Returns', "name": "Skewness of Returns",
"formula": '(-cs_rank(ts_skew((close / ts_delay(close, 1) - 1), 20)))', "formula": "(-cs_rank(ts_skew((close / ts_delay(close, 1) - 1), 20)))",
"category": 'Higher-moment', "category": "Higher-moment",
}, },
{ {
"name": 'Kurtosis of Returns', "name": "Kurtosis of Returns",
"formula": '(-cs_rank(ts_kurt((close / ts_delay(close, 1) - 1), 20)))', "formula": "(-cs_rank(ts_kurt((close / ts_delay(close, 1) - 1), 20)))",
"category": 'Higher-moment', "category": "Higher-moment",
}, },
{ {
"name": 'Volume-Weighted Return', "name": "Volume-Weighted Return",
"formula": '(-cs_rank((ts_sum(((close / ts_delay(close, 1) - 1) * vol), 10) / (ts_sum(vol, 10) + 1e-8))))', "formula": "(-cs_rank((ts_sum(((close / ts_delay(close, 1) - 1) * vol), 10) / (ts_sum(vol, 10) + 1e-8))))",
"category": 'Volume', "category": "Volume",
}, },
{ {
"name": 'Close-to-High Ratio', "name": "Close-to-High Ratio",
"formula": '(-cs_rank(((high - close) / (high + 1e-8))))', "formula": "(-cs_rank(((high - close) / (high + 1e-8))))",
"category": 'Mean-reversion', "category": "Mean-reversion",
}, },
{ {
"name": 'Delayed Correlation Shift', "name": "Delayed Correlation Shift",
"formula": '(-cs_rank((ts_corr(close, vol, 10) - ts_corr(ts_delay(close, 5), vol, 10))))', "formula": "(-cs_rank((ts_corr(close, vol, 10) - ts_corr(ts_delay(close, 5), vol, 10))))",
"category": 'Volume', "category": "Volume",
}, },
{ {
"name": 'Exponential Momentum', "name": "Exponential Momentum",
"formula": '(-cs_rank((close - ts_ema(close, 20))))', "formula": "(-cs_rank((close - ts_ema(close, 20))))",
"category": 'Momentum', "category": "Momentum",
}, },
{ {
"name": 'Range-Adjusted Volume', "name": "Range-Adjusted Volume",
"formula": '(-cs_rank((vol / ((high - low) + 1e-8))))', "formula": "(-cs_rank((vol / ((high - low) + 1e-8))))",
"category": 'Volume', "category": "Volume",
}, },
{ {
"name": 'Cumulative Return Rank', "name": "Cumulative Return Rank",
"formula": '(-cs_rank(ts_sum((close / ts_delay(close, 1) - 1), 10)))', "formula": "(-cs_rank(ts_sum((close / ts_delay(close, 1) - 1), 10)))",
"category": 'Momentum', "category": "Momentum",
}, },
{ {
"name": 'VWAP Momentum', "name": "VWAP Momentum",
"formula": '(-cs_rank(ts_pct_change((amount / vol), 5)))', "formula": "(-cs_rank(ts_pct_change((amount / vol), 5)))",
"category": 'VWAP', "category": "VWAP",
}, },
{ {
"name": 'Bollinger Band Position', "name": "Bollinger Band Position",
"formula": '(-cs_rank(((close - ts_mean(close, 20)) / (ts_std(close, 20) + 1e-8))))', "formula": "(-cs_rank(((close - ts_mean(close, 20)) / (ts_std(close, 20) + 1e-8))))",
"category": 'Mean-reversion', "category": "Mean-reversion",
}, },
# { # {
# "name": 'Volume Decay Weighted', # "name": 'Volume Decay Weighted',
@@ -329,64 +312,64 @@ PAPER_FACTORS: List[Dict[str, str]] = [
# "category": 'Volume', # "category": 'Volume',
# }, # },
{ {
"name": 'Overnight Return', "name": "Overnight Return",
"formula": '(-cs_rank(((open - ts_delay(close, 1)) / (ts_delay(close, 1) + 1e-8))))', "formula": "(-cs_rank(((open - ts_delay(close, 1)) / (ts_delay(close, 1) + 1e-8))))",
"category": 'Overnight', "category": "Overnight",
}, },
{ {
"name": 'Intraday Return', "name": "Intraday Return",
"formula": '(-cs_rank(((close - open) / (open + 1e-8))))', "formula": "(-cs_rank(((close - open) / (open + 1e-8))))",
"category": 'Intraday', "category": "Intraday",
}, },
{ {
"name": 'Max Drawdown', "name": "Max Drawdown",
"formula": '(-cs_rank(((close - ts_max(close, 20)) / (ts_max(close, 20) + 1e-8))))', "formula": "(-cs_rank(((close - ts_max(close, 20)) / (ts_max(close, 20) + 1e-8))))",
"category": 'Risk', "category": "Risk",
}, },
{ {
"name": 'Hurst Exponent Proxy', "name": "Hurst Exponent Proxy",
"formula": '(-cs_rank((ts_std((close / ts_delay(close, 1) - 1), 20) / (ts_std((close / ts_delay(close, 1) - 1), 5) + 1e-8))))', "formula": "(-cs_rank((ts_std((close / ts_delay(close, 1) - 1), 20) / (ts_std((close / ts_delay(close, 1) - 1), 5) + 1e-8))))",
"category": 'Volatility', "category": "Volatility",
}, },
{ {
"name": 'Volume Imbalance', "name": "Volume Imbalance",
"formula": '(-cs_rank((ts_mean(vol, 5) - ts_mean(vol, 20))))', "formula": "(-cs_rank((ts_mean(vol, 5) - ts_mean(vol, 20))))",
"category": 'Volume', "category": "Volume",
}, },
{ {
"name": 'Weighted Close Position', "name": "Weighted Close Position",
"formula": '(-cs_rank((((2 * close) - (high + low)) / ((high - low) + 1e-8))))', "formula": "(-cs_rank((((2 * close) - (high + low)) / ((high - low) + 1e-8))))",
"category": 'Mean-reversion', "category": "Mean-reversion",
}, },
{ {
"name": 'Trend Intensity', "name": "Trend Intensity",
"formula": '(-cs_rank((abs(ts_delta(close, 10)) / (ts_sum(abs(ts_delta(close, 1)), 10) + 1e-8))))', "formula": "(-cs_rank((abs(ts_delta(close, 10)) / (ts_sum(abs(ts_delta(close, 1)), 10) + 1e-8))))",
"category": 'Trend', "category": "Trend",
}, },
{ {
"name": 'Return Dispersion', "name": "Return Dispersion",
"formula": '(-cs_rank(ts_std((close / ts_delay(close, 1) - 1), 5)))', "formula": "(-cs_rank(ts_std((close / ts_delay(close, 1) - 1), 5)))",
"category": 'Volatility', "category": "Volatility",
}, },
{ {
"name": 'VWAP Relative Strength', "name": "VWAP Relative Strength",
"formula": '(-cs_rank(((ts_mean(close, 5) - (amount / vol)) / ((amount / vol) + 1e-8))))', "formula": "(-cs_rank(((ts_mean(close, 5) - (amount / vol)) / ((amount / vol) + 1e-8))))",
"category": 'VWAP', "category": "VWAP",
}, },
{ {
"name": 'Rank Reversal', "name": "Rank Reversal",
"formula": '(-cs_rank((ts_rank(close, 10) - ts_rank(close, 30))))', "formula": "(-cs_rank((ts_rank(close, 10) - ts_rank(close, 30))))",
"category": 'Mean-reversion', "category": "Mean-reversion",
}, },
{ {
"name": 'Money Flow Index', "name": "Money Flow Index",
"formula": '(-cs_rank((ts_sum((max_(ts_delta(close, 1), 0) * vol), 14) / (ts_sum((abs(ts_delta(close, 1)) * vol), 14) + 1e-8))))', "formula": "(-cs_rank((ts_sum((max_(ts_delta(close, 1), 0) * vol), 14) / (ts_sum((abs(ts_delta(close, 1)) * vol), 14) + 1e-8))))",
"category": 'Volume', "category": "Volume",
}, },
{ {
"name": 'Adaptive Momentum', "name": "Adaptive Momentum",
"formula": '(-cs_rank((ts_pct_change(close, 10) * (ts_std((close / ts_delay(close, 1) - 1), 5) / (ts_std((close / ts_delay(close, 1) - 1), 20) + 1e-8)))))', "formula": "(-cs_rank((ts_pct_change(close, 10) * (ts_std((close / ts_delay(close, 1) - 1), 5) / (ts_std((close / ts_delay(close, 1) - 1), 20) + 1e-8)))))",
"category": 'Momentum', "category": "Momentum",
}, },
# { # {
# "name": 'Volume Trend', # "name": 'Volume Trend',
@@ -394,29 +377,29 @@ PAPER_FACTORS: List[Dict[str, str]] = [
# "category": 'Volume', # "category": 'Volume',
# }, # },
{ {
"name": 'Price Acceleration', "name": "Price Acceleration",
"formula": '(-cs_rank((ts_delta(close, 5) - ts_delta(ts_delay(close, 5), 5))))', "formula": "(-cs_rank((ts_delta(close, 5) - ts_delta(ts_delay(close, 5), 5))))",
"category": 'Momentum', "category": "Momentum",
}, },
{ {
"name": 'Realized Volatility Ratio', "name": "Realized Volatility Ratio",
"formula": '(-cs_rank((ts_std((close / ts_delay(close, 1) - 1), 10) / (ts_std((close / ts_delay(close, 1) - 1), 30) + 1e-8))))', "formula": "(-cs_rank((ts_std((close / ts_delay(close, 1) - 1), 10) / (ts_std((close / ts_delay(close, 1) - 1), 30) + 1e-8))))",
"category": 'Volatility', "category": "Volatility",
}, },
{ {
"name": 'Amount Concentration', "name": "Amount Concentration",
"formula": '(-cs_rank((ts_max(amount, 5) / (ts_mean(amount, 20) + 1e-8))))', "formula": "(-cs_rank((ts_max(amount, 5) / (ts_mean(amount, 20) + 1e-8))))",
"category": 'Turnover', "category": "Turnover",
}, },
{ {
"name": 'Cross-Sectional Volume Rank', "name": "Cross-Sectional Volume Rank",
"formula": '(-cs_rank((vol / (ts_mean(vol, 60) + 1e-8))))', "formula": "(-cs_rank((vol / (ts_mean(vol, 60) + 1e-8))))",
"category": 'Volume', "category": "Volume",
}, },
{ {
"name": 'Gap Momentum', "name": "Gap Momentum",
"formula": '(-cs_rank(ts_sum(((open - ts_delay(close, 1)) / (ts_delay(close, 1) + 1e-8)), 5)))', "formula": "(-cs_rank(ts_sum(((open - ts_delay(close, 1)) / (ts_delay(close, 1) + 1e-8)), 5)))",
"category": 'Overnight', "category": "Overnight",
}, },
# { # {
# "name": 'VWAP Distance Decay', # "name": 'VWAP Distance Decay',
@@ -424,44 +407,44 @@ PAPER_FACTORS: List[Dict[str, str]] = [
# "category": 'VWAP', # "category": 'VWAP',
# }, # },
{ {
"name": 'Tail Risk Indicator', "name": "Tail Risk Indicator",
"formula": '(-cs_rank((ts_min((close / ts_delay(close, 1) - 1), 20) / (ts_std((close / ts_delay(close, 1) - 1), 20) + 1e-8))))', "formula": "(-cs_rank((ts_min((close / ts_delay(close, 1) - 1), 20) / (ts_std((close / ts_delay(close, 1) - 1), 20) + 1e-8))))",
"category": 'Risk', "category": "Risk",
}, },
{ {
"name": 'Volatility-Regime Reversal Divergence', "name": "Volatility-Regime Reversal Divergence",
"formula": 'if_((ts_std((close / ts_delay(close, 1) - 1), 12) > ts_mean(ts_std((close / ts_delay(close, 1) - 1), 12), 48)), (-cs_rank(ts_delta(close, 3))), (-cs_rank(((close - low) / ((high - low) + 0.0001)))))', "formula": "if_((ts_std((close / ts_delay(close, 1) - 1), 12) > ts_mean(ts_std((close / ts_delay(close, 1) - 1), 12), 48)), (-cs_rank(ts_delta(close, 3))), (-cs_rank(((close - low) / ((high - low) + 0.0001)))))",
"category": 'Regime-switching', "category": "Regime-switching",
}, },
{ {
"name": 'Regime Volume Signal', "name": "Regime Volume Signal",
"formula": 'if_((vol > ts_mean(vol, 20)), (-cs_rank((close / ts_delay(close, 1) - 1))), (-cs_rank(ts_pct_change(close, 5))))', "formula": "if_((vol > ts_mean(vol, 20)), (-cs_rank((close / ts_delay(close, 1) - 1))), (-cs_rank(ts_pct_change(close, 5))))",
"category": 'Regime-switching', "category": "Regime-switching",
}, },
{ {
"name": 'Liquidity-Adjusted Reversal', "name": "Liquidity-Adjusted Reversal",
"formula": '(-cs_rank((ts_pct_change(close, 3) * (vol / (ts_mean(vol, 20) + 1e-8)))))', "formula": "(-cs_rank((ts_pct_change(close, 3) * (vol / (ts_mean(vol, 20) + 1e-8)))))",
"category": 'Mean-reversion', "category": "Mean-reversion",
}, },
{ {
"name": 'Cross-Sectional Volatility Rank', "name": "Cross-Sectional Volatility Rank",
"formula": '(-cs_rank(cs_rank(ts_std((close / ts_delay(close, 1) - 1), 10))))', "formula": "(-cs_rank(cs_rank(ts_std((close / ts_delay(close, 1) - 1), 10))))",
"category": 'Volatility', "category": "Volatility",
}, },
{ {
"name": 'VWAP Bollinger', "name": "VWAP Bollinger",
"formula": '(-cs_rank((((amount / vol) - ts_mean((amount / vol), 20)) / (ts_std((amount / vol), 20) + 1e-8))))', "formula": "(-cs_rank((((amount / vol) - ts_mean((amount / vol), 20)) / (ts_std((amount / vol), 20) + 1e-8))))",
"category": 'VWAP', "category": "VWAP",
}, },
{ {
"name": 'Smoothed Return Reversal', "name": "Smoothed Return Reversal",
"formula": '(-cs_rank(ts_ema((close / ts_delay(close, 1) - 1), 5)))', "formula": "(-cs_rank(ts_ema((close / ts_delay(close, 1) - 1), 5)))",
"category": 'Mean-reversion', "category": "Mean-reversion",
}, },
{ {
"name": 'Volume-Price Divergence', "name": "Volume-Price Divergence",
"formula": '(-cs_rank((ts_rank(vol, 10) - ts_rank(close, 10))))', "formula": "(-cs_rank((ts_rank(vol, 10) - ts_rank(close, 10))))",
"category": 'Volume', "category": "Volume",
}, },
# { # {
# "name": 'Decay Weighted Momentum', # "name": 'Decay Weighted Momentum',
@@ -469,14 +452,14 @@ PAPER_FACTORS: List[Dict[str, str]] = [
# "category": 'Momentum', # "category": 'Momentum',
# }, # },
{ {
"name": 'Range Percentile', "name": "Range Percentile",
"formula": '(-cs_rank(((close - ts_min(close, 20)) / ((ts_max(close, 20) - ts_min(close, 20)) + 1e-8))))', "formula": "(-cs_rank(((close - ts_min(close, 20)) / ((ts_max(close, 20) - ts_min(close, 20)) + 1e-8))))",
"category": 'Mean-reversion', "category": "Mean-reversion",
}, },
{ {
"name": 'Volume Skewness', "name": "Volume Skewness",
"formula": '(-cs_rank(ts_skew(vol, 20)))', "formula": "(-cs_rank(ts_skew(vol, 20)))",
"category": 'Volume', "category": "Volume",
}, },
# { # {
# "name": 'Residual Momentum', # "name": 'Residual Momentum',
@@ -484,54 +467,54 @@ PAPER_FACTORS: List[Dict[str, str]] = [
# "category": 'Momentum', # "category": 'Momentum',
# }, # },
{ {
"name": 'VWAP Trend', "name": "VWAP Trend",
"formula": '(-cs_rank(ts_delta(((close - (amount / vol)) / (amount / vol)), 5)))', "formula": "(-cs_rank(ts_delta(((close - (amount / vol)) / (amount / vol)), 5)))",
"category": 'VWAP', "category": "VWAP",
}, },
{ {
"name": 'Return Autocorrelation', "name": "Return Autocorrelation",
"formula": '(-cs_rank(ts_corr((close / ts_delay(close, 1) - 1), ts_delay((close / ts_delay(close, 1) - 1), 1), 10)))', "formula": "(-cs_rank(ts_corr((close / ts_delay(close, 1) - 1), ts_delay((close / ts_delay(close, 1) - 1), 1), 10)))",
"category": 'Mean-reversion', "category": "Mean-reversion",
}, },
{ {
"name": 'Price Efficiency', "name": "Price Efficiency",
"formula": '(-cs_rank((abs(ts_sum((close / ts_delay(close, 1) - 1), 10)) / (ts_sum(abs((close / ts_delay(close, 1) - 1)), 10) + 1e-8))))', "formula": "(-cs_rank((abs(ts_sum((close / ts_delay(close, 1) - 1), 10)) / (ts_sum(abs((close / ts_delay(close, 1) - 1)), 10) + 1e-8))))",
"category": 'Trend', "category": "Trend",
}, },
{ {
"name": 'Relative Volume Change', "name": "Relative Volume Change",
"formula": '(-cs_rank(ts_pct_change(vol, 5)))', "formula": "(-cs_rank(ts_pct_change(vol, 5)))",
"category": 'Volume', "category": "Volume",
}, },
{ {
"name": 'Weighted VWAP Position', "name": "Weighted VWAP Position",
"formula": '(-cs_rank(ts_wma(((close - (amount / vol)) / (amount / vol)), 10)))', "formula": "(-cs_rank(ts_wma(((close - (amount / vol)) / (amount / vol)), 10)))",
"category": 'VWAP', "category": "VWAP",
}, },
{ {
"name": 'Regime Momentum Flip', "name": "Regime Momentum Flip",
"formula": 'if_((ts_mean((close / ts_delay(close, 1) - 1), 5) > 0), (-cs_rank(ts_pct_change(close, 10))), cs_rank(ts_pct_change(close, 3)))', "formula": "if_((ts_mean((close / ts_delay(close, 1) - 1), 5) > 0), (-cs_rank(ts_pct_change(close, 10))), cs_rank(ts_pct_change(close, 3)))",
"category": 'Regime-switching', "category": "Regime-switching",
}, },
{ {
"name": 'High-Low Volatility', "name": "High-Low Volatility",
"formula": '(-cs_rank(ts_mean(((high - low) / (close + 1e-8)), 10)))', "formula": "(-cs_rank(ts_mean(((high - low) / (close + 1e-8)), 10)))",
"category": 'Volatility', "category": "Volatility",
}, },
{ {
"name": 'Opening Gap Reversal', "name": "Opening Gap Reversal",
"formula": '(-cs_rank(((open - ts_delay(close, 1)) / (ts_std((close / ts_delay(close, 1) - 1), 10) + 1e-8))))', "formula": "(-cs_rank(((open - ts_delay(close, 1)) / (ts_std((close / ts_delay(close, 1) - 1), 10) + 1e-8))))",
"category": 'Overnight', "category": "Overnight",
}, },
{ {
"name": 'Volume Momentum Spread', "name": "Volume Momentum Spread",
"formula": '(-cs_rank((ts_mean(vol, 5) - ts_mean(vol, 40))))', "formula": "(-cs_rank((ts_mean(vol, 5) - ts_mean(vol, 40))))",
"category": 'Volume', "category": "Volume",
}, },
{ {
"name": 'Regime Volume Reversal', "name": "Regime Volume Reversal",
"formula": 'if_(((vol / (ts_mean(vol, 20) + 1e-8)) > 1.5), (-cs_rank((close / ts_delay(close, 1) - 1))), (-cs_rank(ts_pct_change(close, 10))))', "formula": "if_(((vol / (ts_mean(vol, 20) + 1e-8)) > 1.5), (-cs_rank((close / ts_delay(close, 1) - 1))), (-cs_rank(ts_pct_change(close, 10))))",
"category": 'Regime-switching', "category": "Regime-switching",
}, },
# { # {
# "name": 'Slope Reversal', # "name": 'Slope Reversal',
@@ -544,9 +527,9 @@ PAPER_FACTORS: List[Dict[str, str]] = [
# "category": 'VWAP', # "category": 'VWAP',
# }, # },
{ {
"name": 'Turnover Rate Change', "name": "Turnover Rate Change",
"formula": '(-cs_rank(ts_delta((amount / (vol + 1e-8)), 10)))', "formula": "(-cs_rank(ts_delta((amount / (vol + 1e-8)), 10)))",
"category": 'Turnover', "category": "Turnover",
}, },
# { # {
# "name": 'Return Quantile Signal', # "name": 'Return Quantile Signal',
@@ -554,14 +537,14 @@ PAPER_FACTORS: List[Dict[str, str]] = [
# "category": 'Higher-moment', # "category": 'Higher-moment',
# }, # },
{ {
"name": 'Double EMA Crossover', "name": "Double EMA Crossover",
"formula": '(-cs_rank((ts_ema(close, 5) - ts_ema(close, 20))))', "formula": "(-cs_rank((ts_ema(close, 5) - ts_ema(close, 20))))",
"category": 'Trend', "category": "Trend",
}, },
{ {
"name": 'Conditional Volatility Return', "name": "Conditional Volatility Return",
"formula": '(-cs_rank(((close / ts_delay(close, 1) - 1) / (ts_std((close / ts_delay(close, 1) - 1), 10) + 1e-8))))', "formula": "(-cs_rank(((close / ts_delay(close, 1) - 1) / (ts_std((close / ts_delay(close, 1) - 1), 10) + 1e-8))))",
"category": 'Risk', "category": "Risk",
}, },
# { # {
# "name": 'Amplitude Trend', # "name": 'Amplitude Trend',
@@ -569,39 +552,39 @@ PAPER_FACTORS: List[Dict[str, str]] = [
# "category": 'Volatility', # "category": 'Volatility',
# }, # },
{ {
"name": 'Volume-Weighted Range', "name": "Volume-Weighted Range",
"formula": '(-cs_rank(ts_mean((((high - low) / (close + 1e-8)) * vol), 10)))', "formula": "(-cs_rank(ts_mean((((high - low) / (close + 1e-8)) * vol), 10)))",
"category": 'Volume', "category": "Volume",
}, },
{ {
"name": 'Intraday Efficiency Ratio', "name": "Intraday Efficiency Ratio",
"formula": '(-cs_rank((abs((close - open)) / ((high - low) + 1e-8))))', "formula": "(-cs_rank((abs((close - open)) / ((high - low) + 1e-8))))",
"category": 'Intraday', "category": "Intraday",
}, },
{ {
"name": 'Cumulative Volume Signal', "name": "Cumulative Volume Signal",
"formula": '(-cs_rank((ts_sum(((close / ts_delay(close, 1) - 1) * vol), 20) / (ts_sum(vol, 20) + 1e-8))))', "formula": "(-cs_rank((ts_sum(((close / ts_delay(close, 1) - 1) * vol), 20) / (ts_sum(vol, 20) + 1e-8))))",
"category": 'Volume', "category": "Volume",
}, },
{ {
"name": 'VWAP Cross-Sectional Momentum', "name": "VWAP Cross-Sectional Momentum",
"formula": '(-cs_rank(cs_rank(ts_pct_change((amount / vol), 10))))', "formula": "(-cs_rank(cs_rank(ts_pct_change((amount / vol), 10))))",
"category": 'VWAP', "category": "VWAP",
}, },
{ {
"name": 'Mean-Reversion Indicator', "name": "Mean-Reversion Indicator",
"formula": '(-cs_rank(((close - ts_mean(close, 10)) / (ts_mean(close, 10) + 1e-8))))', "formula": "(-cs_rank(((close - ts_mean(close, 10)) / (ts_mean(close, 10) + 1e-8))))",
"category": 'Mean-reversion', "category": "Mean-reversion",
}, },
{ {
"name": 'Volume Regime Indicator', "name": "Volume Regime Indicator",
"formula": '(-cs_rank((ts_std(vol, 5) / (ts_std(vol, 20) + 1e-8))))', "formula": "(-cs_rank((ts_std(vol, 5) / (ts_std(vol, 20) + 1e-8))))",
"category": 'Volume', "category": "Volume",
}, },
{ {
"name": 'Return Persistence', "name": "Return Persistence",
"formula": '(-cs_rank((sign(ts_delta(close, 1)) * sign(ts_delta(close, 5)))))', "formula": "(-cs_rank((sign(ts_delta(close, 1)) * sign(ts_delta(close, 5)))))",
"category": 'Momentum', "category": "Momentum",
}, },
# { # {
# "name": 'Regime Trend Strength', # "name": 'Regime Trend Strength',
@@ -609,29 +592,29 @@ PAPER_FACTORS: List[Dict[str, str]] = [
# "category": 'Regime-switching', # "category": 'Regime-switching',
# }, # },
{ {
"name": 'VWAP Dispersion', "name": "VWAP Dispersion",
"formula": '(-cs_rank(ts_std(((close - (amount / vol)) / (amount / vol)), 10)))', "formula": "(-cs_rank(ts_std(((close - (amount / vol)) / (amount / vol)), 10)))",
"category": 'VWAP', "category": "VWAP",
}, },
{ {
"name": 'Smart Money Flow', "name": "Smart Money Flow",
"formula": '(-cs_rank(ts_sum((if_((close > ts_delay(close, 1)), vol, (-vol)) * ((high - low) / (close + 1e-8))), 10)))', "formula": "(-cs_rank(ts_sum((if_((close > ts_delay(close, 1)), vol, (-vol)) * ((high - low) / (close + 1e-8))), 10)))",
"category": 'Volume', "category": "Volume",
}, },
{ {
"name": 'Return Rank Dispersion', "name": "Return Rank Dispersion",
"formula": '(-cs_rank((ts_rank((close / ts_delay(close, 1) - 1), 5) - ts_rank((close / ts_delay(close, 1) - 1), 20))))', "formula": "(-cs_rank((ts_rank((close / ts_delay(close, 1) - 1), 5) - ts_rank((close / ts_delay(close, 1) - 1), 20))))",
"category": 'Mean-reversion', "category": "Mean-reversion",
}, },
{ {
"name": 'Volume Acceleration', "name": "Volume Acceleration",
"formula": '(-cs_rank((ts_delta(vol, 5) - ts_delta(ts_delay(vol, 5), 5))))', "formula": "(-cs_rank((ts_delta(vol, 5) - ts_delta(ts_delay(vol, 5), 5))))",
"category": 'Volume', "category": "Volume",
}, },
{ {
"name": 'Close-Low Ratio Trend', "name": "Close-Low Ratio Trend",
"formula": '(-cs_rank(ts_mean(((close - low) / ((high - low) + 1e-8)), 5)))', "formula": "(-cs_rank(ts_mean(((close - low) / ((high - low) + 1e-8)), 5)))",
"category": 'Mean-reversion', "category": "Mean-reversion",
}, },
# { # {
# "name": 'Hull MA Deviation', # "name": 'Hull MA Deviation',
@@ -644,69 +627,69 @@ PAPER_FACTORS: List[Dict[str, str]] = [
# "category": 'Momentum', # "category": 'Momentum',
# }, # },
{ {
"name": 'Volume Profile Skew', "name": "Volume Profile Skew",
"formula": '(-cs_rank(ts_skew((vol / (ts_mean(vol, 20) + 1e-8)), 10)))', "formula": "(-cs_rank(ts_skew((vol / (ts_mean(vol, 20) + 1e-8)), 10)))",
"category": 'Volume', "category": "Volume",
}, },
{ {
"name": 'Conditional VWAP Signal', "name": "Conditional VWAP Signal",
"formula": 'if_((close > (amount / vol)), (-cs_rank(((close - (amount / vol)) / (amount / vol)))), cs_rank((((amount / vol) - close) / (amount / vol))))', "formula": "if_((close > (amount / vol)), (-cs_rank(((close - (amount / vol)) / (amount / vol)))), cs_rank((((amount / vol) - close) / (amount / vol))))",
"category": 'VWAP', "category": "VWAP",
}, },
{ {
"name": 'Extreme Volume Reversal', "name": "Extreme Volume Reversal",
"formula": '(-cs_rank((if_((vol > (2 * ts_mean(vol, 20))), 1, 0) * (close / ts_delay(close, 1) - 1))))', "formula": "(-cs_rank((if_((vol > (2 * ts_mean(vol, 20))), 1, 0) * (close / ts_delay(close, 1) - 1))))",
"category": 'Volume', "category": "Volume",
}, },
{ {
"name": 'Range Expansion Signal', "name": "Range Expansion Signal",
"formula": '(-cs_rank(((high - low) / (ts_mean((high - low), 20) + 1e-8))))', "formula": "(-cs_rank(((high - low) / (ts_mean((high - low), 20) + 1e-8))))",
"category": 'Volatility', "category": "Volatility",
}, },
{ {
"name": 'Short-Term IC Momentum', "name": "Short-Term IC Momentum",
"formula": '(-cs_rank(ts_sum((sign((close / ts_delay(close, 1) - 1)) * abs((close / ts_delay(close, 1) - 1))), 5)))', "formula": "(-cs_rank(ts_sum((sign((close / ts_delay(close, 1) - 1)) * abs((close / ts_delay(close, 1) - 1))), 5)))",
"category": 'Momentum', "category": "Momentum",
}, },
{ {
"name": 'VWAP Curvature', "name": "VWAP Curvature",
"formula": '(-cs_rank(((((amount / vol) - ts_delay((amount / vol), 5)) / (ts_delay((amount / vol), 5) + 1e-8)) - ((ts_delay((amount / vol), 5) - ts_delay((amount / vol), 10)) / (ts_delay((amount / vol), 10) + 1e-8)))))', "formula": "(-cs_rank(((((amount / vol) - ts_delay((amount / vol), 5)) / (ts_delay((amount / vol), 5) + 1e-8)) - ((ts_delay((amount / vol), 5) - ts_delay((amount / vol), 10)) / (ts_delay((amount / vol), 10) + 1e-8)))))",
"category": 'VWAP', "category": "VWAP",
}, },
{ {
"name": 'Relative Strength', "name": "Relative Strength",
"formula": '(-cs_rank((ts_pct_change(close, 5) / (ts_pct_change(close, 20) + 1e-8))))', "formula": "(-cs_rank((ts_pct_change(close, 5) / (ts_pct_change(close, 20) + 1e-8))))",
"category": 'Momentum', "category": "Momentum",
}, },
{ {
"name": 'Volume-Correlated Return', "name": "Volume-Correlated Return",
"formula": '(-cs_rank(ts_cov((close / ts_delay(close, 1) - 1), vol, 10)))', "formula": "(-cs_rank(ts_cov((close / ts_delay(close, 1) - 1), vol, 10)))",
"category": 'Volume', "category": "Volume",
}, },
{ {
"name": 'Regime Volatility Band', "name": "Regime Volatility Band",
"formula": 'if_((ts_std((close / ts_delay(close, 1) - 1), 5) > (1.5 * ts_std((close / ts_delay(close, 1) - 1), 20))), (-cs_rank(ts_pct_change(close, 1))), (-cs_rank(ts_pct_change(close, 10))))', "formula": "if_((ts_std((close / ts_delay(close, 1) - 1), 5) > (1.5 * ts_std((close / ts_delay(close, 1) - 1), 20))), (-cs_rank(ts_pct_change(close, 1))), (-cs_rank(ts_pct_change(close, 10))))",
"category": 'Regime-switching', "category": "Regime-switching",
}, },
{ {
"name": 'Open-Close Spread Momentum', "name": "Open-Close Spread Momentum",
"formula": '(-cs_rank(ts_mean(((close - open) / (open + 1e-8)), 5)))', "formula": "(-cs_rank(ts_mean(((close - open) / (open + 1e-8)), 5)))",
"category": 'Intraday', "category": "Intraday",
}, },
{ {
"name": 'Volatility-Scaled Reversal', "name": "Volatility-Scaled Reversal",
"formula": '(-cs_rank((ts_pct_change(close, 5) / (ts_std((close / ts_delay(close, 1) - 1), 20) + 1e-8))))', "formula": "(-cs_rank((ts_pct_change(close, 5) / (ts_std((close / ts_delay(close, 1) - 1), 20) + 1e-8))))",
"category": 'Mean-reversion', "category": "Mean-reversion",
}, },
{ {
"name": 'VWAP Time-Weighted Signal', "name": "VWAP Time-Weighted Signal",
"formula": '(-cs_rank(ts_wma(((close - (amount / vol)) / ((amount / vol) + 1e-8)), 20)))', "formula": "(-cs_rank(ts_wma(((close - (amount / vol)) / ((amount / vol) + 1e-8)), 20)))",
"category": 'VWAP', "category": "VWAP",
}, },
{ {
"name": 'Covariance Structure Shift', "name": "Covariance Structure Shift",
"formula": '(-cs_rank((ts_cov((close / ts_delay(close, 1) - 1), vol, 5) - ts_cov((close / ts_delay(close, 1) - 1), vol, 20))))', "formula": "(-cs_rank((ts_cov((close / ts_delay(close, 1) - 1), vol, 5) - ts_cov((close / ts_delay(close, 1) - 1), vol, 20))))",
"category": 'Volume', "category": "Volume",
}, },
# { # {
# "name": 'Quadratic Regression Residual', # "name": 'Quadratic Regression Residual',
@@ -714,34 +697,34 @@ PAPER_FACTORS: List[Dict[str, str]] = [
# "category": 'Higher-moment', # "category": 'Higher-moment',
# }, # },
{ {
"name": 'VWAP Mean-Reversion Strength', "name": "VWAP Mean-Reversion Strength",
"formula": '(-cs_rank((((close - (amount / vol)) / (amount / vol)) * (vol / (ts_mean(vol, 20) + 1e-8)))))', "formula": "(-cs_rank((((close - (amount / vol)) / (amount / vol)) * (vol / (ts_mean(vol, 20) + 1e-8)))))",
"category": 'VWAP', "category": "VWAP",
}, },
{ {
"name": 'Multi-Scale Momentum', "name": "Multi-Scale Momentum",
"formula": '(-cs_rank((ts_pct_change(close, 5) + ts_pct_change(close, 20))))', "formula": "(-cs_rank((ts_pct_change(close, 5) + ts_pct_change(close, 20))))",
"category": 'Momentum', "category": "Momentum",
}, },
{ {
"name": 'Relative High Position', "name": "Relative High Position",
"formula": '(-cs_rank(((ts_max(high, 20) - close) / (ts_max(high, 20) + 1e-8))))', "formula": "(-cs_rank(((ts_max(high, 20) - close) / (ts_max(high, 20) + 1e-8))))",
"category": 'Mean-reversion', "category": "Mean-reversion",
}, },
{ {
"name": 'Turnover Volatility', "name": "Turnover Volatility",
"formula": '(-cs_rank(ts_std((amount / (vol + 1e-8)), 10)))', "formula": "(-cs_rank(ts_std((amount / (vol + 1e-8)), 10)))",
"category": 'Turnover', "category": "Turnover",
}, },
{ {
"name": 'Regime Correlation Signal', "name": "Regime Correlation Signal",
"formula": 'if_((abs(ts_corr(close, vol, 10)) > 0.5), (-cs_rank(ts_pct_change(close, 3))), (-cs_rank(ts_pct_change(close, 10))))', "formula": "if_((abs(ts_corr(close, vol, 10)) > 0.5), (-cs_rank(ts_pct_change(close, 3))), (-cs_rank(ts_pct_change(close, 10))))",
"category": 'Regime-switching', "category": "Regime-switching",
}, },
{ {
"name": 'Intraday Momentum Reversal', "name": "Intraday Momentum Reversal",
"formula": '(-cs_rank(((close - open) / ((high - low) + 1e-8))))', "formula": "(-cs_rank(((close - open) / ((high - low) + 1e-8))))",
"category": 'Intraday', "category": "Intraday",
}, },
# { # {
# "name": 'Volume-Weighted Slope', # "name": 'Volume-Weighted Slope',
@@ -749,14 +732,13 @@ PAPER_FACTORS: List[Dict[str, str]] = [
# "category": 'Volume', # "category": 'Volume',
# }, # },
{ {
"name": 'Adaptive Range Reversal', "name": "Adaptive Range Reversal",
"formula": 'if_((ts_std((close / ts_delay(close, 1) - 1), 10) > ts_mean(ts_std((close / ts_delay(close, 1) - 1), 10), 40)), (-cs_rank(((close - ts_min(close, 10)) / ((ts_max(close, 10) - ts_min(close, 10)) + 1e-8)))), (-cs_rank(ts_pct_change(close, 5))))', "formula": "if_((ts_std((close / ts_delay(close, 1) - 1), 10) > ts_mean(ts_std((close / ts_delay(close, 1) - 1), 10), 40)), (-cs_rank(((close - ts_min(close, 10)) / ((ts_max(close, 10) - ts_min(close, 10)) + 1e-8)))), (-cs_rank(ts_pct_change(close, 5))))",
"category": 'Regime-switching', "category": "Regime-switching",
}, },
] ]
def import_from_paper( def import_from_paper(
path: Optional[Union[str, Path]] = None, path: Optional[Union[str, Path]] = None,
) -> FactorLibrary: ) -> FactorLibrary:
@@ -802,6 +784,8 @@ def import_from_paper(
admission_date=entry.get("admission_date", ""), admission_date=entry.get("admission_date", ""),
signals=None, signals=None,
) )
if factor.formula.strip().startswith("# TODO"):
factor.metadata["unsupported"] = True
library.admit_factor(factor) library.admit_factor(factor)
logger.info( logger.info(

View File

@@ -0,0 +1,157 @@
"""Tests for library I/O and paper factor imports."""
import json
from pathlib import Path
import numpy as np
import pytest
from src.factorminer.core.factor_library import Factor, FactorLibrary
from src.factorminer.core.library_io import (
import_from_paper,
load_library,
save_library,
)
class TestSaveLoadLibrary:
"""测试 FactorLibrary 的序列化与反序列化."""
def test_save_library_ignores_save_signals(self, tmp_path: Path) -> None:
"""save_signals=True 也不应生成 .npz 文件."""
library = FactorLibrary()
factor = Factor(
id=0,
name="test_factor",
formula="close / ts_delay(close, 1) - 1",
category="Momentum",
ic_mean=0.05,
icir=0.5,
ic_win_rate=0.55,
max_correlation=0.1,
batch_number=1,
)
# 即使给一个信号矩阵,也不应保存
factor.signals = np.ones((10, 20))
library.admit_factor(factor)
base_path = tmp_path / "test_lib"
save_library(library, str(base_path), save_signals=True)
assert (base_path.with_suffix(".json")).exists()
assert not (Path(str(base_path) + "_signals.npz")).exists()
def test_load_library_restores_metadata_and_unsupported(
self, tmp_path: Path
) -> None:
"""加载 JSON 后应恢复 metadata并对 # TODO 公式标记 unsupported."""
library = FactorLibrary()
f1 = Factor(
id=0,
name="ok_factor",
formula="cs_rank(close)",
category="Test",
ic_mean=0.0,
icir=0.0,
ic_win_rate=0.0,
max_correlation=0.0,
batch_number=0,
metadata={"author": "ai"},
)
f2 = Factor(
id=0,
name="todo_factor",
formula="# TODO: Neg(CsRank(Decay(close, 10)))",
category="Test",
ic_mean=0.0,
icir=0.0,
ic_win_rate=0.0,
max_correlation=0.0,
batch_number=0,
)
library.admit_factor(f1)
library.admit_factor(f2)
base_path = tmp_path / "meta_lib"
save_library(library, str(base_path))
loaded = load_library(str(base_path))
assert loaded.size == 2
f1_loaded = loaded.get_factor(1)
assert f1_loaded.metadata.get("author") == "ai"
assert not f1_loaded.metadata.get("unsupported", False)
f2_loaded = loaded.get_factor(2)
assert f2_loaded.metadata.get("unsupported") is True
def test_factor_round_trip_with_metadata(self) -> None:
"""Factor.to_dict / from_dict 应正确传递 metadata."""
factor = Factor(
id=1,
name="round_trip",
formula="ts_mean(close, 20)",
category="Momentum",
ic_mean=0.1,
icir=1.0,
ic_win_rate=0.6,
max_correlation=0.2,
batch_number=2,
metadata={"unsupported": True, "tags": ["test"]},
)
d = factor.to_dict()
restored = Factor.from_dict(d)
assert restored.metadata == factor.metadata
class TestImportFromPaper:
"""测试从内置 paper catalog 导入因子."""
def test_import_from_paper_includes_all_translated_factors(self) -> None:
"""内置 PAPER_FACTORS 应全部成功导入."""
library = import_from_paper()
assert library.size > 0
# 当前 catalog 中已有因子应全部被 admit
for factor in library.list_factors():
assert factor.id > 0
assert factor.name
assert factor.formula
assert factor.category
def test_import_from_paper_marks_todo_as_unsupported(self, tmp_path: Path) -> None:
"""对 # TODO 公式应在 metadata 中标记 unsupported."""
custom_path = tmp_path / "custom_factors.json"
custom_data = [
{
"name": "Normal Factor",
"formula": "cs_rank(close)",
"category": "Test",
},
{
"name": "Unsupported Factor",
"formula": "# TODO: Neg(CsRank(Decay(close, 10)))",
"category": "Test",
},
]
custom_path.write_text(json.dumps(custom_data), encoding="utf-8")
library = import_from_paper(str(custom_path))
assert library.size == 2
normal = library.list_factors()[0]
todo = library.list_factors()[1]
assert normal.metadata.get("unsupported") is None
assert todo.metadata.get("unsupported") is True
def test_import_from_paper_path_override(self, tmp_path: Path) -> None:
"""通过 path 参数加载外部 JSON 列表."""
custom_path = tmp_path / "override.json"
custom_data = [
{"name": "custom_1", "formula": "open + close", "category": "Custom"},
]
custom_path.write_text(json.dumps(custom_data), encoding="utf-8")
library = import_from_paper(str(custom_path))
assert library.size == 1
assert library.list_factors()[0].name == "custom_1"