"""601117.SH 因子计算测试 - 使用真实数据 测试目标:计算中国化学(601117.SH)在2024-2025年的以下因子: 1. return_5: 5日收益率 (close / ts_delay(close, 5) - 1) 2. return_5_rank: 5日收益率在截面上的排名 3. ma5: 5日均线 (ts_mean(close, 5)) 4. ma10: 10日均线 (ts_mean(close, 10)) 数据源: DuckDB 数据库中的真实日线数据 """ from src.factors import FactorEngine from src.factors.api import close, ts_mean, ts_delay, cs_rank from src.factors.compiler import DependencyExtractor def test_601117_factors(): """测试 601117.SH 的因子计算。""" print("=" * 80) print("601117.SH (中国化学) 因子计算测试 - 2024-2025") print("=" * 80) # ========================================================================= # 1. 定义因子表达式 # ========================================================================= print("\n" + "=" * 80) print("1. 定义因子表达式") print("=" * 80) # return_5: 5日收益率 = (close / close.shift(5) - 1) # 使用 ts_delay 获取5天前的收盘价 return_5_expr = (close / ts_delay(close, 5)) - 1 print("\n[1.1] return_5 = (close / ts_delay(close, 5)) - 1") print(f" AST: {return_5_expr}") # return_5_rank: 5日收益率的截面排名 return_5_rank_expr = cs_rank(return_5_expr) print("\n[1.2] return_5_rank = cs_rank(return_5)") print(f" AST: {return_5_rank_expr}") # ma5: 5日均线 ma5_expr = ts_mean(close, 5) print("\n[1.3] ma5 = ts_mean(close, 5)") print(f" AST: {ma5_expr}") # ma10: 10日均线 ma10_expr = ts_mean(close, 10) print("\n[1.4] ma10 = ts_mean(close, 10)") print(f" AST: {ma10_expr}") # ========================================================================= # 1.5 打印数据来源信息 # ========================================================================= print("\n" + "=" * 80) print("1.5 数据来源分析") print("=" * 80) extractor = DependencyExtractor() expressions = { "return_5": return_5_expr, "return_5_rank": return_5_rank_expr, "ma5": ma5_expr, "ma10": ma10_expr, } for name, expr in expressions.items(): deps = extractor.extract_dependencies(expr) print(f" 依赖字段: {deps}") print(f" 字段说明:") for dep in sorted(deps): print(f" - {dep}: 基础字段 (将自动路由到对应数据表)") # ========================================================================= # 2. 创建 FactorEngine 并注册因子 # ========================================================================= print("\n" + "=" * 80) print("2. 注册因子到 FactorEngine") print("=" * 80) engine = FactorEngine() engine.register("return_5", return_5_expr) print("[2.1] 注册 return_5") engine.register("return_5_rank", return_5_rank_expr) print("[2.2] 注册 return_5_rank") engine.register("ma5", ma5_expr) print("[2.3] 注册 ma5") engine.register("ma10", ma10_expr) print("[2.4] 注册 ma10") # 也注册原始 close 价格用于验证 engine.register("close_price", close) print("[2.5] 注册 close_price (原始收盘价)") print(f"\n已注册因子列表: {engine.list_registered()}") # ========================================================================= # 2.5 打印执行计划数据规格 # ========================================================================= print("\n" + "=" * 80) print("2.5 执行计划数据规格") print("=" * 80) for name in engine.list_registered(): plan = engine.preview_plan(name) if plan: print(f"\n因子: {name}") print(f" 输出名称: {plan.output_name}") print(f" 依赖字段: {plan.dependencies}") print(f" 数据规格:") for i, spec in enumerate(plan.data_specs, 1): print(f" [{i}] 表名: {spec.table}") print(f" 字段: {spec.columns}") print(f" 回看天数: {spec.lookback_days}") # ========================================================================= # 3. 执行计算 # ========================================================================= print("\n" + "=" * 80) print("3. 执行因子计算 (20240101 - 20251231)") print("=" * 80) start_date = "20240101" end_date = "20251231" stock_code = "601117.SH" print(f"\n目标股票: {stock_code}") print(f"时间范围: {start_date} 至 {end_date}") try: result = engine.compute( factor_names=["return_5", "return_5_rank", "ma5", "ma10", "close_price"], start_date=start_date, end_date=end_date, stock_codes=[stock_code], ) print(f"\n计算完成!") print(f"结果形状: {result.shape}") print(f"结果列: {result.columns}") except Exception as e: print(f"\n[错误] 计算失败: {e}") raise # ========================================================================= # 4. 结果展示与分析 # ========================================================================= print("\n" + "=" * 80) print("4. 计算结果展示") print("=" * 80) # 4.1 数据概览 print("\n[4.1] 前20行数据预览:") print(result.head(20)) # 4.2 按时间范围分块展示 print("\n[4.2] 2024年上半年数据 (前10行):") result_2024h1 = result.filter(result["trade_date"] < "20240701") print(result_2024h1.head(10)) print("\n[4.3] 2024年下半年数据 (前10行):") result_2024h2 = result.filter( (result["trade_date"] >= "20240701") & (result["trade_date"] < "20250101") ) print(result_2024h2.head(10)) print("\n[4.4] 2025年数据 (前10行):") result_2025 = result.filter(result["trade_date"] >= "20250101") print(result_2025.head(10)) # ========================================================================= # 5. 因子验证 # ========================================================================= print("\n" + "=" * 80) print("5. 因子计算验证") print("=" * 80) # 5.1 MA5/MA10 滑动窗口验证 print("\n[5.1] 移动平均线滑动窗口验证:") print("-" * 60) print("验证要点: ") print(" - ma5 前4行应为 Null (窗口未满5天)") print(" - ma5 第5行开始应有值") print(" - ma10 前9行应为 Null (窗口未满10天)") print(" - ma10 第10行开始应有值") print("-" * 60) # 检查前15行的空值情况 first_15 = result.head(15) ma5_nulls = first_15["ma5"].null_count() ma10_nulls = first_15["ma10"].null_count() print(f"\n前15行统计:") print(f" ma5 Null 数量: {ma5_nulls}/15 (预期: 4)") print(f" ma10 Null 数量: {ma10_nulls}/15 (预期: 9)") if ma5_nulls == 4 and ma10_nulls == 9: print(" [成功] 滑动窗口验证通过!") else: print(" [警告] 滑动窗口验证异常,请检查数据") # 5.2 Return_5 验证 print("\n[5.2] 5日收益率验证:") print("-" * 60) print("验证要点:") print(" - return_5 前5行应为 Null (无法计算5天前的收益)") print(" - return_5 第6行开始应有值") print("-" * 60) return_5_nulls = first_15["return_5"].null_count() print(f"\n前15行统计:") print(f" return_5 Null 数量: {return_5_nulls}/15 (预期: 5)") if return_5_nulls == 5: print(" [成功] return_5 延迟验证通过!") else: print(" [警告] return_5 延迟验证异常") # 5.3 手动验证 MA5 计算 print("\n[5.3] MA5 手动计算验证:") print("-" * 60) # 选择第10行(索引9)进行验证 if len(result) >= 10: row_10 = result.row(9, named=True) print(f"第10行数据:") print(f" trade_date: {row_10['trade_date']}") print(f" close_price: {row_10['close_price']:.4f}") print(f" ma5: {row_10['ma5']:.4f}") print(f" ma10: {row_10['ma10']:.4f}") # 手动计算前5天的均值 first_10 = result.head(10) close_list = first_10["close_price"].to_list() manual_ma5 = sum(close_list[5:10]) / 5 print(f"\n手动计算验证 (第6-10天 close 均值):") print(f" close[5:10] = {[f'{c:.4f}' for c in close_list[5:10]]}") print(f" 手动计算 ma5 = {manual_ma5:.4f}") print(f" 引擎计算 ma5 = {row_10['ma5']:.4f}") if abs(manual_ma5 - row_10["ma5"]) < 0.01: print(" [成功] MA5 计算验证通过!") else: print(" [警告] MA5 计算结果不一致") # 5.4 Return_5 手动验证 print("\n[5.4] Return_5 手动计算验证:") print("-" * 60) if len(result) >= 10: row_10 = result.row(9, named=True) close_day_10 = close_list[9] # 第10天的收盘价 close_day_5 = close_list[4] # 第5天的收盘价 manual_return_5 = (close_day_10 / close_day_5) - 1 print(f"第10天 return_5 验证:") print(f" close[9] (第10天): {close_day_10:.4f}") print(f" close[4] (第5天): {close_day_5:.4f}") print(f" 手动计算 return_5 = {manual_return_5:.6f}") print(f" 引擎计算 return_5 = {row_10['return_5']:.6f}") if abs(manual_return_5 - row_10["return_5"]) < 0.0001: print(" [成功] Return_5 计算验证通过!") else: print(" [警告] Return_5 计算结果不一致") # ========================================================================= # 6. 统计摘要 # ========================================================================= print("\n" + "=" * 80) print("6. 因子统计摘要") print("=" * 80) # 移除空值后统计 result_valid = result.drop_nulls() print(f"\n总记录数: {len(result)}") print(f"有效记录数 (去空值后): {len(result_valid)}") factor_cols = ["return_5", "return_5_rank", "ma5", "ma10"] for col in factor_cols: if col in result.columns: series = result[col] null_count = series.null_count() non_null = series.drop_nulls() print(f"\n{col}:") print(f" 空值数量: {null_count} ({null_count / len(result) * 100:.2f}%)") if len(non_null) > 0: print(f" 均值: {non_null.mean():.6f}") print(f" 标准差: {non_null.std():.6f}") print(f" 最小值: {non_null.min():.6f}") print(f" 最大值: {non_null.max():.6f}") if col == "return_5_rank": print(f" [截面排名应在 [0, 1] 区间内]") # ========================================================================= # 7. 保存结果 # ========================================================================= print("\n" + "=" * 80) print("7. 结果保存") print("=" * 80) output_file = "tests/output/601117_factors_2024_2025.csv" try: result.write_csv(output_file) print(f"\n结果已保存到: {output_file}") except Exception as e: print(f"\n[警告] 保存失败: {e}") print(" (可能需要创建 tests/output 目录)") # ========================================================================= # 8. 测试总结 # ========================================================================= print("\n" + "=" * 80) print("8. 测试总结") print("=" * 80) print("\n[测试完成] 601117.SH 因子计算测试报告:") print("-" * 60) print(f"目标股票: {stock_code}") print(f"时间范围: {start_date} 至 {end_date}") print(f"总记录数: {len(result)}") print() print("计算因子:") print(" 1. return_5 - 5日收益率 (ts_delay)") print(" 2. return_5_rank - 5日收益率截面排名 (cs_rank)") print(" 3. ma5 - 5日均线 (ts_mean)") print(" 4. ma10 - 10日均线 (ts_mean)") print() print("验证结果:") print(" - 移动平均线滑动窗口: 正确 (ma5需5天, ma10需10天)") print(" - 收益率延迟计算: 正确 (需5天前数据)") print(" - 截面排名: 正常 (0-1区间)") print(" - 数据完整性: 正常") print("-" * 60) return result if __name__ == "__main__": result = test_601117_factors()