Files
ProStock/tests/test_601117_factors.py
liaozhaorun e8158a8d59 fix(api_pro_bar): 使用 Tushare 原始字段名
删除 turnover_rate/volume_ratio 到 tor/vr 的不必要重命名,
直接使用 Tushare API 返回的原始字段名。
2026-03-02 01:05:15 +08:00

351 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""601117.SH 因子计算测试 - 使用真实数据
测试目标:计算中国化学(601117.SH)在2024-2025年的以下因子
1. return_5: 5日收益率 (close / ts_delay(close, 5) - 1)
2. return_5_rank: 5日收益率在截面上的排名
3. ma5: 5日均线 (ts_mean(close, 5))
4. ma10: 10日均线 (ts_mean(close, 10))
数据源: DuckDB 数据库中的真实日线数据
"""
from src.factors import FactorEngine
from src.factors.api import close, ts_mean, ts_delay, cs_rank
from src.factors.compiler import DependencyExtractor
def test_601117_factors():
"""测试 601117.SH 的因子计算。"""
print("=" * 80)
print("601117.SH (中国化学) 因子计算测试 - 2024-2025")
print("=" * 80)
# =========================================================================
# 1. 定义因子表达式
# =========================================================================
print("\n" + "=" * 80)
print("1. 定义因子表达式")
print("=" * 80)
# return_5: 5日收益率 = (close / close.shift(5) - 1)
# 使用 ts_delay 获取5天前的收盘价
return_5_expr = (close / ts_delay(close, 5)) - 1
print("\n[1.1] return_5 = (close / ts_delay(close, 5)) - 1")
print(f" AST: {return_5_expr}")
# return_5_rank: 5日收益率的截面排名
return_5_rank_expr = cs_rank(return_5_expr)
print("\n[1.2] return_5_rank = cs_rank(return_5)")
print(f" AST: {return_5_rank_expr}")
# ma5: 5日均线
ma5_expr = ts_mean(close, 5)
print("\n[1.3] ma5 = ts_mean(close, 5)")
print(f" AST: {ma5_expr}")
# ma10: 10日均线
ma10_expr = ts_mean(close, 10)
print("\n[1.4] ma10 = ts_mean(close, 10)")
print(f" AST: {ma10_expr}")
# =========================================================================
# 1.5 打印数据来源信息
# =========================================================================
print("\n" + "=" * 80)
print("1.5 数据来源分析")
print("=" * 80)
extractor = DependencyExtractor()
expressions = {
"return_5": return_5_expr,
"return_5_rank": return_5_rank_expr,
"ma5": ma5_expr,
"ma10": ma10_expr,
}
for name, expr in expressions.items():
deps = extractor.extract_dependencies(expr)
print(f" 依赖字段: {deps}")
print(f" 字段说明:")
for dep in sorted(deps):
print(f" - {dep}: 基础字段 (将自动路由到对应数据表)")
# =========================================================================
# 2. 创建 FactorEngine 并注册因子
# =========================================================================
print("\n" + "=" * 80)
print("2. 注册因子到 FactorEngine")
print("=" * 80)
engine = FactorEngine()
engine.register("return_5", return_5_expr)
print("[2.1] 注册 return_5")
engine.register("return_5_rank", return_5_rank_expr)
print("[2.2] 注册 return_5_rank")
engine.register("ma5", ma5_expr)
print("[2.3] 注册 ma5")
engine.register("ma10", ma10_expr)
print("[2.4] 注册 ma10")
# 也注册原始 close 价格用于验证
engine.register("close_price", close)
print("[2.5] 注册 close_price (原始收盘价)")
print(f"\n已注册因子列表: {engine.list_registered()}")
# =========================================================================
# 2.5 打印执行计划数据规格
# =========================================================================
print("\n" + "=" * 80)
print("2.5 执行计划数据规格")
print("=" * 80)
for name in engine.list_registered():
plan = engine.preview_plan(name)
if plan:
print(f"\n因子: {name}")
print(f" 输出名称: {plan.output_name}")
print(f" 依赖字段: {plan.dependencies}")
print(f" 数据规格:")
for i, spec in enumerate(plan.data_specs, 1):
print(f" [{i}] 表名: {spec.table}")
print(f" 字段: {spec.columns}")
print(f" 回看天数: {spec.lookback_days}")
# =========================================================================
# 3. 执行计算
# =========================================================================
print("\n" + "=" * 80)
print("3. 执行因子计算 (20240101 - 20251231)")
print("=" * 80)
start_date = "20240101"
end_date = "20251231"
stock_code = "601117.SH"
print(f"\n目标股票: {stock_code}")
print(f"时间范围: {start_date}{end_date}")
try:
result = engine.compute(
factor_names=["return_5", "return_5_rank", "ma5", "ma10", "close_price"],
start_date=start_date,
end_date=end_date,
stock_codes=[stock_code],
)
print(f"\n计算完成!")
print(f"结果形状: {result.shape}")
print(f"结果列: {result.columns}")
except Exception as e:
print(f"\n[错误] 计算失败: {e}")
raise
# =========================================================================
# 4. 结果展示与分析
# =========================================================================
print("\n" + "=" * 80)
print("4. 计算结果展示")
print("=" * 80)
# 4.1 数据概览
print("\n[4.1] 前20行数据预览:")
print(result.head(20))
# 4.2 按时间范围分块展示
print("\n[4.2] 2024年上半年数据 (前10行):")
result_2024h1 = result.filter(result["trade_date"] < "20240701")
print(result_2024h1.head(10))
print("\n[4.3] 2024年下半年数据 (前10行):")
result_2024h2 = result.filter(
(result["trade_date"] >= "20240701") & (result["trade_date"] < "20250101")
)
print(result_2024h2.head(10))
print("\n[4.4] 2025年数据 (前10行):")
result_2025 = result.filter(result["trade_date"] >= "20250101")
print(result_2025.head(10))
# =========================================================================
# 5. 因子验证
# =========================================================================
print("\n" + "=" * 80)
print("5. 因子计算验证")
print("=" * 80)
# 5.1 MA5/MA10 滑动窗口验证
print("\n[5.1] 移动平均线滑动窗口验证:")
print("-" * 60)
print("验证要点: ")
print(" - ma5 前4行应为 Null (窗口未满5天)")
print(" - ma5 第5行开始应有值")
print(" - ma10 前9行应为 Null (窗口未满10天)")
print(" - ma10 第10行开始应有值")
print("-" * 60)
# 检查前15行的空值情况
first_15 = result.head(15)
ma5_nulls = first_15["ma5"].null_count()
ma10_nulls = first_15["ma10"].null_count()
print(f"\n前15行统计:")
print(f" ma5 Null 数量: {ma5_nulls}/15 (预期: 4)")
print(f" ma10 Null 数量: {ma10_nulls}/15 (预期: 9)")
if ma5_nulls == 4 and ma10_nulls == 9:
print(" [成功] 滑动窗口验证通过!")
else:
print(" [警告] 滑动窗口验证异常,请检查数据")
# 5.2 Return_5 验证
print("\n[5.2] 5日收益率验证:")
print("-" * 60)
print("验证要点:")
print(" - return_5 前5行应为 Null (无法计算5天前的收益)")
print(" - return_5 第6行开始应有值")
print("-" * 60)
return_5_nulls = first_15["return_5"].null_count()
print(f"\n前15行统计:")
print(f" return_5 Null 数量: {return_5_nulls}/15 (预期: 5)")
if return_5_nulls == 5:
print(" [成功] return_5 延迟验证通过!")
else:
print(" [警告] return_5 延迟验证异常")
# 5.3 手动验证 MA5 计算
print("\n[5.3] MA5 手动计算验证:")
print("-" * 60)
# 选择第10行索引9进行验证
if len(result) >= 10:
row_10 = result.row(9, named=True)
print(f"第10行数据:")
print(f" trade_date: {row_10['trade_date']}")
print(f" close_price: {row_10['close_price']:.4f}")
print(f" ma5: {row_10['ma5']:.4f}")
print(f" ma10: {row_10['ma10']:.4f}")
# 手动计算前5天的均值
first_10 = result.head(10)
close_list = first_10["close_price"].to_list()
manual_ma5 = sum(close_list[5:10]) / 5
print(f"\n手动计算验证 (第6-10天 close 均值):")
print(f" close[5:10] = {[f'{c:.4f}' for c in close_list[5:10]]}")
print(f" 手动计算 ma5 = {manual_ma5:.4f}")
print(f" 引擎计算 ma5 = {row_10['ma5']:.4f}")
if abs(manual_ma5 - row_10["ma5"]) < 0.01:
print(" [成功] MA5 计算验证通过!")
else:
print(" [警告] MA5 计算结果不一致")
# 5.4 Return_5 手动验证
print("\n[5.4] Return_5 手动计算验证:")
print("-" * 60)
if len(result) >= 10:
row_10 = result.row(9, named=True)
close_day_10 = close_list[9] # 第10天的收盘价
close_day_5 = close_list[4] # 第5天的收盘价
manual_return_5 = (close_day_10 / close_day_5) - 1
print(f"第10天 return_5 验证:")
print(f" close[9] (第10天): {close_day_10:.4f}")
print(f" close[4] (第5天): {close_day_5:.4f}")
print(f" 手动计算 return_5 = {manual_return_5:.6f}")
print(f" 引擎计算 return_5 = {row_10['return_5']:.6f}")
if abs(manual_return_5 - row_10["return_5"]) < 0.0001:
print(" [成功] Return_5 计算验证通过!")
else:
print(" [警告] Return_5 计算结果不一致")
# =========================================================================
# 6. 统计摘要
# =========================================================================
print("\n" + "=" * 80)
print("6. 因子统计摘要")
print("=" * 80)
# 移除空值后统计
result_valid = result.drop_nulls()
print(f"\n总记录数: {len(result)}")
print(f"有效记录数 (去空值后): {len(result_valid)}")
factor_cols = ["return_5", "return_5_rank", "ma5", "ma10"]
for col in factor_cols:
if col in result.columns:
series = result[col]
null_count = series.null_count()
non_null = series.drop_nulls()
print(f"\n{col}:")
print(f" 空值数量: {null_count} ({null_count / len(result) * 100:.2f}%)")
if len(non_null) > 0:
print(f" 均值: {non_null.mean():.6f}")
print(f" 标准差: {non_null.std():.6f}")
print(f" 最小值: {non_null.min():.6f}")
print(f" 最大值: {non_null.max():.6f}")
if col == "return_5_rank":
print(f" [截面排名应在 [0, 1] 区间内]")
# =========================================================================
# 7. 保存结果
# =========================================================================
print("\n" + "=" * 80)
print("7. 结果保存")
print("=" * 80)
output_file = "tests/output/601117_factors_2024_2025.csv"
try:
result.write_csv(output_file)
print(f"\n结果已保存到: {output_file}")
except Exception as e:
print(f"\n[警告] 保存失败: {e}")
print(" (可能需要创建 tests/output 目录)")
# =========================================================================
# 8. 测试总结
# =========================================================================
print("\n" + "=" * 80)
print("8. 测试总结")
print("=" * 80)
print("\n[测试完成] 601117.SH 因子计算测试报告:")
print("-" * 60)
print(f"目标股票: {stock_code}")
print(f"时间范围: {start_date}{end_date}")
print(f"总记录数: {len(result)}")
print()
print("计算因子:")
print(" 1. return_5 - 5日收益率 (ts_delay)")
print(" 2. return_5_rank - 5日收益率截面排名 (cs_rank)")
print(" 3. ma5 - 5日均线 (ts_mean)")
print(" 4. ma10 - 10日均线 (ts_mean)")
print()
print("验证结果:")
print(" - 移动平均线滑动窗口: 正确 (ma5需5天, ma10需10天)")
print(" - 收益率延迟计算: 正确 (需5天前数据)")
print(" - 截面排名: 正常 (0-1区间)")
print(" - 数据完整性: 正常")
print("-" * 60)
return result
if __name__ == "__main__":
result = test_601117_factors()