refactor(factor): 完全重构因子计算框架 - 引入DSL表达式系统
- 删除旧因子框架:移除 base.py、composite.py、data_loader.py、data_spec.py 及所有子模块(momentum、financial、quality、sentiment等) - 新增DSL表达式系统:实现 factor DSL 编译器和翻译器 - dsl.py: 领域特定语言定义 - compiler.py: AST编译与优化 - translator.py: Polars表达式翻译 - api.py: 统一API接口 - 新增数据路由层:data_router.py 实现字段到表的动态路由 - 新增API封装:api_pro_bar.py 提供pro_bar数据接口 - 更新执行引擎:engine.py 适配新的DSL架构 - 重构测试体系:删除旧测试,新增 test_dsl_promotion.py、 test_factor_integration.py、test_pro_bar.py - 清理文档:删除8个过时文档(factor_design、db_sync_guide等)
This commit is contained in:
@@ -29,6 +29,7 @@ import pandas as pd
|
||||
|
||||
from src.data.api_wrappers import sync_all_stocks
|
||||
from src.data.api_wrappers.api_daily import sync_daily, preview_daily_sync
|
||||
from src.data.api_wrappers.api_pro_bar import sync_pro_bar
|
||||
|
||||
|
||||
def preview_sync(
|
||||
@@ -134,7 +135,6 @@ def sync_all_data(
|
||||
dry_run: bool = False,
|
||||
) -> Dict[str, pd.DataFrame]:
|
||||
"""同步所有数据类型(每日同步)。
|
||||
|
||||
该函数按顺序同步所有可用的数据类型:
|
||||
1. 交易日历 (sync_trade_cal_cache)
|
||||
2. 股票基本信息 (sync_all_stocks)
|
||||
@@ -146,13 +146,12 @@ def sync_all_data(
|
||||
Args:
|
||||
force_full: 若为 True,强制所有数据类型完整重载
|
||||
max_workers: 日线数据同步的工作线程数(默认: 10)
|
||||
dry_run: 若为 True,仅显示将要同步的内容 Returns:
|
||||
映射数据类型,不写入数据
|
||||
dry_run: 若为 True,仅显示将要同步的内容,不写入数据
|
||||
|
||||
到同步结果的字典
|
||||
Returns:
|
||||
映射数据类型到同步结果的字典
|
||||
|
||||
Example:
|
||||
>>> # 同步所有数据(增量)
|
||||
>>> result = sync_all_data()
|
||||
>>>
|
||||
>>> # 强制完整重载
|
||||
@@ -167,6 +166,92 @@ def sync_all_data(
|
||||
print("[sync_all_data] Starting full data synchronization...")
|
||||
print("=" * 60)
|
||||
|
||||
# 1. Sync trade calendar (always needed first)
|
||||
print("\n[1/6] Syncing trade calendar cache...")
|
||||
try:
|
||||
from src.data.api_wrappers import sync_trade_cal_cache
|
||||
|
||||
sync_trade_cal_cache()
|
||||
results["trade_cal"] = pd.DataFrame()
|
||||
print("[1/6] Trade calendar: OK")
|
||||
except Exception as e:
|
||||
print(f"[1/6] Trade calendar: FAILED - {e}")
|
||||
results["trade_cal"] = pd.DataFrame()
|
||||
|
||||
# 2. Sync stock basic info
|
||||
print("\n[2/6] Syncing stock basic info...")
|
||||
try:
|
||||
sync_all_stocks()
|
||||
results["stock_basic"] = pd.DataFrame()
|
||||
print("[2/6] Stock basic: OK")
|
||||
except Exception as e:
|
||||
print(f"[2/6] Stock basic: FAILED - {e}")
|
||||
results["stock_basic"] = pd.DataFrame()
|
||||
|
||||
# # 3. Sync daily market data
|
||||
# print("\n[3/6] Syncing daily market data...")
|
||||
# try:
|
||||
# daily_result = sync_daily(
|
||||
# force_full=force_full,
|
||||
# max_workers=max_workers,
|
||||
# dry_run=dry_run,
|
||||
# )
|
||||
# results["daily"] = (
|
||||
# pd.concat(daily_result.values(), ignore_index=True)
|
||||
# if daily_result
|
||||
# else pd.DataFrame()
|
||||
# )
|
||||
# print("[3/6] Daily data: OK")
|
||||
# except Exception as e:
|
||||
# print(f"[3/6] Daily data: FAILED - {e}")
|
||||
# results["daily"] = pd.DataFrame()
|
||||
|
||||
# 4. Sync Pro Bar data
|
||||
print("\n[4/6] Syncing Pro Bar data (with adj, tor, vr)...")
|
||||
try:
|
||||
pro_bar_result = sync_pro_bar(
|
||||
force_full=force_full,
|
||||
max_workers=max_workers,
|
||||
dry_run=dry_run,
|
||||
)
|
||||
results["pro_bar"] = (
|
||||
pd.concat(pro_bar_result.values(), ignore_index=True)
|
||||
if pro_bar_result
|
||||
else pd.DataFrame()
|
||||
)
|
||||
print(f"[4/6] Pro Bar data: OK ({len(results['pro_bar'])} records)")
|
||||
except Exception as e:
|
||||
print(f"[4/6] Pro Bar data: FAILED - {e}")
|
||||
results["pro_bar"] = pd.DataFrame()
|
||||
|
||||
# 5. Sync stock historical list (bak_basic)
|
||||
print("\n[5/6] Syncing stock historical list (bak_basic)...")
|
||||
try:
|
||||
bak_basic_result = sync_bak_basic(force_full=force_full)
|
||||
results["bak_basic"] = bak_basic_result
|
||||
print(f"[5/6] Bak basic: OK ({len(bak_basic_result)} records)")
|
||||
except Exception as e:
|
||||
print(f"[5/6] Bak basic: FAILED - {e}")
|
||||
results["bak_basic"] = pd.DataFrame()
|
||||
|
||||
# Summary
|
||||
print("\n" + "=" * 60)
|
||||
print("[sync_all_data] Sync Summary")
|
||||
print("=" * 60)
|
||||
for data_type, df in results.items():
|
||||
print(f" {data_type}: {len(df)} records")
|
||||
print("=" * 60)
|
||||
print("\nNote: namechange is NOT in auto-sync. To sync manually:")
|
||||
print(" from src.data.api_wrappers import sync_namechange")
|
||||
print(" sync_namechange(force=True)")
|
||||
|
||||
return results
|
||||
results: Dict[str, pd.DataFrame] = {}
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("[sync_all_data] Starting full data synchronization...")
|
||||
print("=" * 60)
|
||||
|
||||
# 1. Sync trade calendar (always needed first)
|
||||
print("\n[1/5] Syncing trade calendar cache...")
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user