refactor(factor): 完全重构因子计算框架 - 引入DSL表达式系统

- 删除旧因子框架:移除 base.py、composite.py、data_loader.py、data_spec.py
  及所有子模块(momentum、financial、quality、sentiment等)
- 新增DSL表达式系统:实现 factor DSL 编译器和翻译器
  - dsl.py: 领域特定语言定义
  - compiler.py: AST编译与优化
  - translator.py: Polars表达式翻译
  - api.py: 统一API接口
- 新增数据路由层:data_router.py 实现字段到表的动态路由
- 新增API封装:api_pro_bar.py 提供pro_bar数据接口
- 更新执行引擎:engine.py 适配新的DSL架构
- 重构测试体系:删除旧测试,新增 test_dsl_promotion.py、
  test_factor_integration.py、test_pro_bar.py
- 清理文档:删除8个过时文档(factor_design、db_sync_guide等)
This commit is contained in:
2026-02-27 22:22:23 +08:00
parent c3c20ed7ea
commit a56433e440
51 changed files with 667 additions and 11287 deletions

View File

@@ -29,6 +29,7 @@ import pandas as pd
from src.data.api_wrappers import sync_all_stocks
from src.data.api_wrappers.api_daily import sync_daily, preview_daily_sync
from src.data.api_wrappers.api_pro_bar import sync_pro_bar
def preview_sync(
@@ -134,7 +135,6 @@ def sync_all_data(
dry_run: bool = False,
) -> Dict[str, pd.DataFrame]:
"""同步所有数据类型(每日同步)。
该函数按顺序同步所有可用的数据类型:
1. 交易日历 (sync_trade_cal_cache)
2. 股票基本信息 (sync_all_stocks)
@@ -146,13 +146,12 @@ def sync_all_data(
Args:
force_full: 若为 True强制所有数据类型完整重载
max_workers: 日线数据同步的工作线程数(默认: 10
dry_run: 若为 True仅显示将要同步的内容 Returns:
映射数据类型,不写入数据
dry_run: 若为 True仅显示将要同步的内容,不写入数据
到同步结果的字典
Returns:
映射数据类型到同步结果的字典
Example:
>>> # 同步所有数据(增量)
>>> result = sync_all_data()
>>>
>>> # 强制完整重载
@@ -167,6 +166,92 @@ def sync_all_data(
print("[sync_all_data] Starting full data synchronization...")
print("=" * 60)
# 1. Sync trade calendar (always needed first)
print("\n[1/6] Syncing trade calendar cache...")
try:
from src.data.api_wrappers import sync_trade_cal_cache
sync_trade_cal_cache()
results["trade_cal"] = pd.DataFrame()
print("[1/6] Trade calendar: OK")
except Exception as e:
print(f"[1/6] Trade calendar: FAILED - {e}")
results["trade_cal"] = pd.DataFrame()
# 2. Sync stock basic info
print("\n[2/6] Syncing stock basic info...")
try:
sync_all_stocks()
results["stock_basic"] = pd.DataFrame()
print("[2/6] Stock basic: OK")
except Exception as e:
print(f"[2/6] Stock basic: FAILED - {e}")
results["stock_basic"] = pd.DataFrame()
# # 3. Sync daily market data
# print("\n[3/6] Syncing daily market data...")
# try:
# daily_result = sync_daily(
# force_full=force_full,
# max_workers=max_workers,
# dry_run=dry_run,
# )
# results["daily"] = (
# pd.concat(daily_result.values(), ignore_index=True)
# if daily_result
# else pd.DataFrame()
# )
# print("[3/6] Daily data: OK")
# except Exception as e:
# print(f"[3/6] Daily data: FAILED - {e}")
# results["daily"] = pd.DataFrame()
# 4. Sync Pro Bar data
print("\n[4/6] Syncing Pro Bar data (with adj, tor, vr)...")
try:
pro_bar_result = sync_pro_bar(
force_full=force_full,
max_workers=max_workers,
dry_run=dry_run,
)
results["pro_bar"] = (
pd.concat(pro_bar_result.values(), ignore_index=True)
if pro_bar_result
else pd.DataFrame()
)
print(f"[4/6] Pro Bar data: OK ({len(results['pro_bar'])} records)")
except Exception as e:
print(f"[4/6] Pro Bar data: FAILED - {e}")
results["pro_bar"] = pd.DataFrame()
# 5. Sync stock historical list (bak_basic)
print("\n[5/6] Syncing stock historical list (bak_basic)...")
try:
bak_basic_result = sync_bak_basic(force_full=force_full)
results["bak_basic"] = bak_basic_result
print(f"[5/6] Bak basic: OK ({len(bak_basic_result)} records)")
except Exception as e:
print(f"[5/6] Bak basic: FAILED - {e}")
results["bak_basic"] = pd.DataFrame()
# Summary
print("\n" + "=" * 60)
print("[sync_all_data] Sync Summary")
print("=" * 60)
for data_type, df in results.items():
print(f" {data_type}: {len(df)} records")
print("=" * 60)
print("\nNote: namechange is NOT in auto-sync. To sync manually:")
print(" from src.data.api_wrappers import sync_namechange")
print(" sync_namechange(force=True)")
return results
results: Dict[str, pd.DataFrame] = {}
print("\n" + "=" * 60)
print("[sync_all_data] Starting full data synchronization...")
print("=" * 60)
# 1. Sync trade calendar (always needed first)
print("\n[1/5] Syncing trade calendar cache...")
try: