test(debug): 添加因子回测一致性问题的调试测试套件
- 分析GTJA_alpha032等因子在不同LOOKBACK_DAYS下的差异来源 - 验证cs_rank嵌套和截面股票数量对结果的影响 - 测试ts_rank NaN处理和除法除零修复
This commit is contained in:
@@ -184,7 +184,8 @@ class PolarsTranslator:
|
||||
"+": lambda l, r: l + r,
|
||||
"-": lambda l, r: l - r,
|
||||
"*": lambda l, r: l * r,
|
||||
"/": lambda l, r: l / r,
|
||||
# 【修复】除法处理除零,避免产生 NaN/inf 导致 EMA 永久感染
|
||||
"/": lambda l, r: pl.when(r == 0).then(None).otherwise(l / r),
|
||||
"**": lambda l, r: l.pow(r),
|
||||
"//": lambda l, r: l.floor_div(r),
|
||||
"%": lambda l, r: l % r,
|
||||
@@ -363,6 +364,7 @@ class PolarsTranslator:
|
||||
# 抛弃极慢的 rolling_map,借用 pandas 的 Cython 引擎
|
||||
def kurt_calc(s: pl.Series) -> pl.Series:
|
||||
import pandas as pd
|
||||
|
||||
# pandas.rolling.kurt() 是用 Cython 编写的,速度比 pure python 快很多
|
||||
pd_series = pd.Series(s.to_numpy())
|
||||
result = pd_series.rolling(window).kurt().to_numpy()
|
||||
@@ -499,11 +501,21 @@ class PolarsTranslator:
|
||||
# 当前值即为每个窗口的最后一个元素 (N - window + 1, )
|
||||
current_vals = windows[:, -1]
|
||||
|
||||
# 向量化广播比较,然后沿窗口轴(axis=1)求和,直接得出排名比例
|
||||
ranks = np.sum(windows <= current_vals[:, None], axis=1) / window
|
||||
# 【终极修复】使用窗口内实际有效数据个数作为分母
|
||||
# 1. 统计小于等于当前值的个数
|
||||
less_equal = np.sum(windows <= current_vals[:, None], axis=1)
|
||||
# 2. 统计当前窗口内有效的非 NaN 数据个数
|
||||
valid_counts = np.sum(~np.isnan(windows), axis=1)
|
||||
|
||||
# 3. 使用真实有效个数作为分母,避免分母陷阱
|
||||
with np.errstate(divide="ignore", invalid="ignore"):
|
||||
ranks = np.where(valid_counts > 0, less_equal / valid_counts, np.nan)
|
||||
|
||||
# 【修复】如果当前值是 NaN,则排名也必须是 NaN
|
||||
ranks[np.isnan(current_vals)] = np.nan
|
||||
|
||||
result = np.full(n, np.nan)
|
||||
result[window - 1:] = ranks
|
||||
result[window - 1 :] = ranks
|
||||
return pl.Series(result)
|
||||
|
||||
return expr.map_batches(rank_calc, return_dtype=pl.Float64)
|
||||
@@ -592,7 +604,7 @@ class PolarsTranslator:
|
||||
distances = window - 1 - argmax_indices
|
||||
|
||||
result = np.full(n, np.nan)
|
||||
result[window - 1:] = distances
|
||||
result[window - 1 :] = distances
|
||||
return pl.Series(result)
|
||||
|
||||
return expr.map_batches(argmax_calc, return_dtype=pl.Float64)
|
||||
@@ -616,7 +628,7 @@ class PolarsTranslator:
|
||||
distances = window - 1 - argmin_indices
|
||||
|
||||
result = np.full(n, np.nan)
|
||||
result[window - 1:] = distances
|
||||
result[window - 1 :] = distances
|
||||
return pl.Series(result)
|
||||
|
||||
return expr.map_batches(argmin_calc, return_dtype=pl.Float64)
|
||||
@@ -650,7 +662,7 @@ class PolarsTranslator:
|
||||
prods = np.prod(windows, axis=1)
|
||||
|
||||
result = np.full(n, np.nan)
|
||||
result[window - 1:] = prods
|
||||
result[window - 1 :] = prods
|
||||
return pl.Series(result)
|
||||
|
||||
return expr.map_batches(prod_calc, return_dtype=pl.Float64)
|
||||
|
||||
Reference in New Issue
Block a user