feat(data): 添加每日指标接口并优化因子引擎

- 新增 api_daily_basic.py 封装 Tushare 每日指标接口
- 因子引擎移除 lookback_days,支持 daily_basic 表字段路由
- 将每日指标纳入自动同步流程
- 删除废弃的 training/main.py
This commit is contained in:
2026-03-03 17:09:39 +08:00
parent 780284af7f
commit 53225b9443
12 changed files with 1132 additions and 433 deletions

View File

@@ -5,6 +5,7 @@
2. return_5_rank: 5日收益率在截面上的排名
3. ma5: 5日均线 (ts_mean(close, 5))
4. ma10: 10日均线 (ts_mean(close, 10))
5. market_cap_rank: 市值百分比排名 (cs_rank(total_mv))
特点使用因子字符串架构add_factor + 字符串表达式)
@@ -48,6 +49,11 @@ def test_two_stocks_string_factors():
print("\n[1.4] ma10 = ts_mean(close, 10)")
print(f" 字符串表达式: {ma10_str}")
# market_cap_rank: 市值百分比排名 (截面排名)
market_cap_rank_str = "cs_rank(total_mv)"
print("\n[1.5] market_cap_rank = cs_rank(total_mv)")
print(f" 字符串表达式: {market_cap_rank_str}")
# ========================================================================
# 1.5 打印数据来源信息
# ========================================================================
@@ -66,6 +72,7 @@ def test_two_stocks_string_factors():
"return_5_rank": return_5_rank_str,
"ma5": ma5_str,
"ma10": ma10_str,
"market_cap_rank": market_cap_rank_str,
}
for name, expr_str in expressions_str.items():
@@ -102,9 +109,12 @@ def test_two_stocks_string_factors():
engine.add_factor("ma10", ma10_str)
print("[2.4] 注册 ma10 (字符串方式)")
engine.add_factor("market_cap_rank", market_cap_rank_str)
print("[2.5] 注册 market_cap_rank (市值百分比排名,字符串方式)")
# 也注册原始 close 价格用于验证
engine.add_factor("close_price", "close")
print("[2.5] 注册 close_price (原始收盘价,字符串方式)")
print("[2.6] 注册 close_price (原始收盘价,字符串方式)")
print(f"\n已注册因子列表: {engine.list_registered()}")
@@ -125,7 +135,6 @@ def test_two_stocks_string_factors():
for i, spec in enumerate(plan.data_specs, 1):
print(f" [{i}] 表名: {spec.table}")
print(f" 字段: {spec.columns}")
print(f" 回看天数: {spec.lookback_days}")
# ========================================================================
# 3. 执行计算(两支股票)
@@ -143,7 +152,14 @@ def test_two_stocks_string_factors():
try:
result = engine.compute(
factor_names=["return_5", "return_5_rank", "ma5", "ma10", "close_price"],
factor_names=[
"return_5",
"return_5_rank",
"ma5",
"ma10",
"close_price",
"market_cap_rank",
],
start_date=start_date,
end_date=end_date,
stock_codes=stock_codes,
@@ -345,6 +361,10 @@ def test_two_stocks_string_factors():
print(" - 每天两支股票的排名之和应接近 1")
print("-" * 60)
# 5.5.1 return_5_rank 截面排名验证
print("\n[5.5.1] return_5_rank 截面排名验证:")
print("-" * 60)
# 获取有效数据
result_valid = result.drop_nulls(subset=["return_5_rank"])
@@ -373,6 +393,39 @@ def test_two_stocks_string_factors():
else:
print(" [警告] 截面排名之和不接近 1")
# 5.5.2 market_cap_rank 市值百分比排名验证
print("\n[5.5.2] market_cap_rank 市值百分比排名验证:")
print("-" * 60)
result_valid_mv = result.drop_nulls(subset=["market_cap_rank"])
if len(result_valid_mv) > 0:
min_rank_mv = result_valid_mv["market_cap_rank"].min()
max_rank_mv = result_valid_mv["market_cap_rank"].max()
print(f"\n市值排名范围: [{min_rank_mv:.4f}, {max_rank_mv:.4f}]")
if 0 <= min_rank_mv <= 1 and 0 <= max_rank_mv <= 1:
print(" [成功] 市值排名值在 [0, 1] 区间内!")
else:
print(" [警告] 市值排名值超出 [0, 1] 区间")
# 检查某天两支股票的市值排名之和
sample_date_mv = result_valid_mv["trade_date"][0]
day_data_mv = result_valid_mv.filter(
result_valid_mv["trade_date"] == sample_date_mv
)
if len(day_data_mv) == 2:
rank_sum_mv = day_data_mv["market_cap_rank"].sum()
print(f"\n示例日期 {sample_date_mv} 的市值排名验证:")
for row in day_data_mv.iter_rows(named=True):
print(f" {row['ts_code']}: {row['market_cap_rank']:.4f}")
print(f" 排名之和: {rank_sum_mv:.4f} (两支股票应接近 1)")
if abs(rank_sum_mv - 1.0) < 0.01:
print(" [成功] 市值排名之和验证通过!")
else:
print(" [警告] 市值排名之和不接近 1")
# ========================================================================
# 6. 统计摘要
# ========================================================================
@@ -394,7 +447,7 @@ def test_two_stocks_string_factors():
print(f"总记录数: {len(stock_data)}")
print(f"有效记录数 (去空值后): {len(stock_valid)}")
factor_cols = ["return_5", "return_5_rank", "ma5", "ma10"]
factor_cols = ["return_5", "return_5_rank", "ma5", "ma10", "market_cap_rank"]
for col in factor_cols:
if col in stock_data.columns:
@@ -418,7 +471,6 @@ def test_two_stocks_string_factors():
# ========================================================================
print("\n" + "=" * 80)
# ========================================================================
# 8. 测试总结
# ========================================================================
@@ -434,10 +486,13 @@ def test_two_stocks_string_factors():
print()
print("因子定义方式: 字符串表达式 (add_factor 方法)")
print("计算因子:")
print(" 1. return_5 - 5日收益率 (字符串: '(close / ts_delay(close, 5)) - 1')")
print(" 2. return_5_rank - 5日收益率截面排名 (字符串: 'cs_rank(...)')")
print(" 3. ma5 - 5日均线 (字符串: 'ts_mean(close, 5)')")
print(" 4. ma10 - 10日均线 (字符串: 'ts_mean(close, 10)')")
print(
" 1. return_5 - 5日收益率 (字符串: '(close / ts_delay(close, 5)) - 1')"
)
print(" 2. return_5_rank - 5日收益率截面排名 (字符串: 'cs_rank(...)')")
print(" 3. ma5 - 5日均线 (字符串: 'ts_mean(close, 5)')")
print(" 4. ma10 - 10日均线 (字符串: 'ts_mean(close, 10)')")
print(" 5. market_cap_rank - 市值百分比排名 (字符串: 'cs_rank(total_mv)')")
print()
print("验证结果:")
print(" - 字符串表达式解析: 正常")