feat(data): 添加每日指标接口并优化因子引擎
- 新增 api_daily_basic.py 封装 Tushare 每日指标接口 - 因子引擎移除 lookback_days,支持 daily_basic 表字段路由 - 将每日指标纳入自动同步流程 - 删除废弃的 training/main.py
This commit is contained in:
@@ -71,7 +71,7 @@ class TestFactorEngineEndToEnd:
|
||||
@pytest.fixture
|
||||
def engine(self, mock_data):
|
||||
"""提供配置好的 FactorEngine fixture。"""
|
||||
data_source = {"daily": mock_data}
|
||||
data_source = {"pro_bar": mock_data}
|
||||
return FactorEngine(data_source=data_source, max_workers=2)
|
||||
|
||||
def test_simple_symbol_expression(self, engine):
|
||||
@@ -116,7 +116,7 @@ class TestFullWorkflow:
|
||||
|
||||
# 2. 初始化引擎
|
||||
print("\nStep 2: Initialize FactorEngine...")
|
||||
engine = FactorEngine(data_source={"daily": mock_data})
|
||||
engine = FactorEngine(data_source={"pro_bar": mock_data})
|
||||
print(" Engine initialized")
|
||||
|
||||
# 3. 注册因子 - 使用简单因子避免回看窗口问题
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
2. return_5_rank: 5日收益率在截面上的排名
|
||||
3. ma5: 5日均线 (ts_mean(close, 5))
|
||||
4. ma10: 10日均线 (ts_mean(close, 10))
|
||||
5. market_cap_rank: 市值百分比排名 (cs_rank(total_mv))
|
||||
|
||||
特点:使用因子字符串架构(add_factor + 字符串表达式)
|
||||
|
||||
@@ -48,6 +49,11 @@ def test_two_stocks_string_factors():
|
||||
print("\n[1.4] ma10 = ts_mean(close, 10)")
|
||||
print(f" 字符串表达式: {ma10_str}")
|
||||
|
||||
# market_cap_rank: 市值百分比排名 (截面排名)
|
||||
market_cap_rank_str = "cs_rank(total_mv)"
|
||||
print("\n[1.5] market_cap_rank = cs_rank(total_mv)")
|
||||
print(f" 字符串表达式: {market_cap_rank_str}")
|
||||
|
||||
# ========================================================================
|
||||
# 1.5 打印数据来源信息
|
||||
# ========================================================================
|
||||
@@ -66,6 +72,7 @@ def test_two_stocks_string_factors():
|
||||
"return_5_rank": return_5_rank_str,
|
||||
"ma5": ma5_str,
|
||||
"ma10": ma10_str,
|
||||
"market_cap_rank": market_cap_rank_str,
|
||||
}
|
||||
|
||||
for name, expr_str in expressions_str.items():
|
||||
@@ -102,9 +109,12 @@ def test_two_stocks_string_factors():
|
||||
engine.add_factor("ma10", ma10_str)
|
||||
print("[2.4] 注册 ma10 (字符串方式)")
|
||||
|
||||
engine.add_factor("market_cap_rank", market_cap_rank_str)
|
||||
print("[2.5] 注册 market_cap_rank (市值百分比排名,字符串方式)")
|
||||
|
||||
# 也注册原始 close 价格用于验证
|
||||
engine.add_factor("close_price", "close")
|
||||
print("[2.5] 注册 close_price (原始收盘价,字符串方式)")
|
||||
print("[2.6] 注册 close_price (原始收盘价,字符串方式)")
|
||||
|
||||
print(f"\n已注册因子列表: {engine.list_registered()}")
|
||||
|
||||
@@ -125,7 +135,6 @@ def test_two_stocks_string_factors():
|
||||
for i, spec in enumerate(plan.data_specs, 1):
|
||||
print(f" [{i}] 表名: {spec.table}")
|
||||
print(f" 字段: {spec.columns}")
|
||||
print(f" 回看天数: {spec.lookback_days}")
|
||||
|
||||
# ========================================================================
|
||||
# 3. 执行计算(两支股票)
|
||||
@@ -143,7 +152,14 @@ def test_two_stocks_string_factors():
|
||||
|
||||
try:
|
||||
result = engine.compute(
|
||||
factor_names=["return_5", "return_5_rank", "ma5", "ma10", "close_price"],
|
||||
factor_names=[
|
||||
"return_5",
|
||||
"return_5_rank",
|
||||
"ma5",
|
||||
"ma10",
|
||||
"close_price",
|
||||
"market_cap_rank",
|
||||
],
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
stock_codes=stock_codes,
|
||||
@@ -345,6 +361,10 @@ def test_two_stocks_string_factors():
|
||||
print(" - 每天两支股票的排名之和应接近 1")
|
||||
print("-" * 60)
|
||||
|
||||
# 5.5.1 return_5_rank 截面排名验证
|
||||
print("\n[5.5.1] return_5_rank 截面排名验证:")
|
||||
print("-" * 60)
|
||||
|
||||
# 获取有效数据
|
||||
result_valid = result.drop_nulls(subset=["return_5_rank"])
|
||||
|
||||
@@ -373,6 +393,39 @@ def test_two_stocks_string_factors():
|
||||
else:
|
||||
print(" [警告] 截面排名之和不接近 1")
|
||||
|
||||
# 5.5.2 market_cap_rank 市值百分比排名验证
|
||||
print("\n[5.5.2] market_cap_rank 市值百分比排名验证:")
|
||||
print("-" * 60)
|
||||
|
||||
result_valid_mv = result.drop_nulls(subset=["market_cap_rank"])
|
||||
|
||||
if len(result_valid_mv) > 0:
|
||||
min_rank_mv = result_valid_mv["market_cap_rank"].min()
|
||||
max_rank_mv = result_valid_mv["market_cap_rank"].max()
|
||||
print(f"\n市值排名范围: [{min_rank_mv:.4f}, {max_rank_mv:.4f}]")
|
||||
|
||||
if 0 <= min_rank_mv <= 1 and 0 <= max_rank_mv <= 1:
|
||||
print(" [成功] 市值排名值在 [0, 1] 区间内!")
|
||||
else:
|
||||
print(" [警告] 市值排名值超出 [0, 1] 区间")
|
||||
|
||||
# 检查某天两支股票的市值排名之和
|
||||
sample_date_mv = result_valid_mv["trade_date"][0]
|
||||
day_data_mv = result_valid_mv.filter(
|
||||
result_valid_mv["trade_date"] == sample_date_mv
|
||||
)
|
||||
if len(day_data_mv) == 2:
|
||||
rank_sum_mv = day_data_mv["market_cap_rank"].sum()
|
||||
print(f"\n示例日期 {sample_date_mv} 的市值排名验证:")
|
||||
for row in day_data_mv.iter_rows(named=True):
|
||||
print(f" {row['ts_code']}: {row['market_cap_rank']:.4f}")
|
||||
print(f" 排名之和: {rank_sum_mv:.4f} (两支股票应接近 1)")
|
||||
|
||||
if abs(rank_sum_mv - 1.0) < 0.01:
|
||||
print(" [成功] 市值排名之和验证通过!")
|
||||
else:
|
||||
print(" [警告] 市值排名之和不接近 1")
|
||||
|
||||
# ========================================================================
|
||||
# 6. 统计摘要
|
||||
# ========================================================================
|
||||
@@ -394,7 +447,7 @@ def test_two_stocks_string_factors():
|
||||
print(f"总记录数: {len(stock_data)}")
|
||||
print(f"有效记录数 (去空值后): {len(stock_valid)}")
|
||||
|
||||
factor_cols = ["return_5", "return_5_rank", "ma5", "ma10"]
|
||||
factor_cols = ["return_5", "return_5_rank", "ma5", "ma10", "market_cap_rank"]
|
||||
|
||||
for col in factor_cols:
|
||||
if col in stock_data.columns:
|
||||
@@ -418,7 +471,6 @@ def test_two_stocks_string_factors():
|
||||
# ========================================================================
|
||||
print("\n" + "=" * 80)
|
||||
|
||||
|
||||
# ========================================================================
|
||||
# 8. 测试总结
|
||||
# ========================================================================
|
||||
@@ -434,10 +486,13 @@ def test_two_stocks_string_factors():
|
||||
print()
|
||||
print("因子定义方式: 字符串表达式 (add_factor 方法)")
|
||||
print("计算因子:")
|
||||
print(" 1. return_5 - 5日收益率 (字符串: '(close / ts_delay(close, 5)) - 1')")
|
||||
print(" 2. return_5_rank - 5日收益率截面排名 (字符串: 'cs_rank(...)')")
|
||||
print(" 3. ma5 - 5日均线 (字符串: 'ts_mean(close, 5)')")
|
||||
print(" 4. ma10 - 10日均线 (字符串: 'ts_mean(close, 10)')")
|
||||
print(
|
||||
" 1. return_5 - 5日收益率 (字符串: '(close / ts_delay(close, 5)) - 1')"
|
||||
)
|
||||
print(" 2. return_5_rank - 5日收益率截面排名 (字符串: 'cs_rank(...)')")
|
||||
print(" 3. ma5 - 5日均线 (字符串: 'ts_mean(close, 5)')")
|
||||
print(" 4. ma10 - 10日均线 (字符串: 'ts_mean(close, 10)')")
|
||||
print(" 5. market_cap_rank - 市值百分比排名 (字符串: 'cs_rank(total_mv)')")
|
||||
print()
|
||||
print("验证结果:")
|
||||
print(" - 字符串表达式解析: 正常")
|
||||
|
||||
Reference in New Issue
Block a user