test(debug): 添加因子回测一致性问题的调试测试套件

- 分析GTJA_alpha032等因子在不同LOOKBACK_DAYS下的差异来源
- 验证cs_rank嵌套和截面股票数量对结果的影响
- 测试ts_rank NaN处理和除法除零修复
This commit is contained in:
2026-03-22 02:43:23 +08:00
parent ccd42082c2
commit 31b25074c3
10 changed files with 1532 additions and 87 deletions

View File

@@ -184,7 +184,8 @@ class PolarsTranslator:
"+": lambda l, r: l + r,
"-": lambda l, r: l - r,
"*": lambda l, r: l * r,
"/": lambda l, r: l / r,
# 【修复】除法处理除零,避免产生 NaN/inf 导致 EMA 永久感染
"/": lambda l, r: pl.when(r == 0).then(None).otherwise(l / r),
"**": lambda l, r: l.pow(r),
"//": lambda l, r: l.floor_div(r),
"%": lambda l, r: l % r,
@@ -363,6 +364,7 @@ class PolarsTranslator:
# 抛弃极慢的 rolling_map借用 pandas 的 Cython 引擎
def kurt_calc(s: pl.Series) -> pl.Series:
import pandas as pd
# pandas.rolling.kurt() 是用 Cython 编写的,速度比 pure python 快很多
pd_series = pd.Series(s.to_numpy())
result = pd_series.rolling(window).kurt().to_numpy()
@@ -499,11 +501,21 @@ class PolarsTranslator:
# 当前值即为每个窗口的最后一个元素 (N - window + 1, )
current_vals = windows[:, -1]
# 向量化广播比较,然后沿窗口轴(axis=1)求和,直接得出排名比例
ranks = np.sum(windows <= current_vals[:, None], axis=1) / window
# 【终极修复】使用窗口内实际有效数据个数作为分母
# 1. 统计小于等于当前值的个数
less_equal = np.sum(windows <= current_vals[:, None], axis=1)
# 2. 统计当前窗口内有效的非 NaN 数据个数
valid_counts = np.sum(~np.isnan(windows), axis=1)
# 3. 使用真实有效个数作为分母,避免分母陷阱
with np.errstate(divide="ignore", invalid="ignore"):
ranks = np.where(valid_counts > 0, less_equal / valid_counts, np.nan)
# 【修复】如果当前值是 NaN则排名也必须是 NaN
ranks[np.isnan(current_vals)] = np.nan
result = np.full(n, np.nan)
result[window - 1:] = ranks
result[window - 1 :] = ranks
return pl.Series(result)
return expr.map_batches(rank_calc, return_dtype=pl.Float64)
@@ -592,7 +604,7 @@ class PolarsTranslator:
distances = window - 1 - argmax_indices
result = np.full(n, np.nan)
result[window - 1:] = distances
result[window - 1 :] = distances
return pl.Series(result)
return expr.map_batches(argmax_calc, return_dtype=pl.Float64)
@@ -616,7 +628,7 @@ class PolarsTranslator:
distances = window - 1 - argmin_indices
result = np.full(n, np.nan)
result[window - 1:] = distances
result[window - 1 :] = distances
return pl.Series(result)
return expr.map_batches(argmin_calc, return_dtype=pl.Float64)
@@ -650,7 +662,7 @@ class PolarsTranslator:
prods = np.prod(windows, axis=1)
result = np.full(n, np.nan)
result[window - 1:] = prods
result[window - 1 :] = prods
return pl.Series(result)
return expr.map_batches(prod_calc, return_dtype=pl.Float64)