feat(data): 为数据同步添加事务支持和同步日志

- Storage/ThreadSafeStorage 添加事务支持(begin/commit/rollback)
- 新增 SyncLogManager 记录所有同步任务的执行状态
- 集成事务到 StockBasedSync、DateBasedSync、QuarterBasedSync
- 在 sync_all 和 sync_financial 调度中心添加日志记录
- 新增测试验证事务和日志功能
This commit is contained in:
2026-03-23 21:10:15 +08:00
parent 31b25074c3
commit bace4cc5f4
10 changed files with 1468 additions and 177 deletions

View File

@@ -55,6 +55,7 @@ import pandas as pd
from src.data import api_wrappers # noqa: F401
from src.data.sync_registry import sync_registry
from src.data.api_wrappers import sync_all_stocks
from src.data.sync_logger import SyncLogManager
def sync_all_data(
@@ -109,7 +110,7 @@ def sync_all_data(
>>> result = sync_all_data(dry_run=True)
>>>
>>> # 只同步特定任务
>>> result = sync_all_data(selected=["trade_cal", "stock_basic"])
>>> result = sync_all_data(selected=['trade_cal', 'pro_bar'])
>>>
>>> # 查看所有可用任务
>>> from src.data.sync_registry import sync_registry
@@ -117,13 +118,80 @@ def sync_all_data(
>>> for t in tasks:
... print(f"{t.name}: {t.display_name}")
"""
return sync_registry.sync_all(
force_full=force_full,
max_workers=max_workers,
dry_run=dry_run,
selected=selected,
# 记录调度中心开始
log_manager = SyncLogManager()
sync_mode = "full" if force_full else "incremental"
selected_str = ",".join(selected) if selected else "all"
log_manager.start_sync(
table_name="daily_data_batch",
sync_type=sync_mode,
metadata={
"selected": selected_str,
"dry_run": dry_run,
"max_workers": max_workers,
},
)
try:
result = sync_registry.sync_all(
force_full=force_full,
max_workers=max_workers,
dry_run=dry_run,
selected=selected,
)
# 计算成功/失败数量
success_count = 0
failed_count = 0
total_records = 0
for task_name, task_result in result.items():
if isinstance(task_result, dict):
if task_result.get("status") == "error":
failed_count += 1
else:
success_count += 1
# 累加记录数(如果有)
if "rows" in task_result:
total_records += task_result.get("rows", 0)
elif isinstance(task_result, pd.DataFrame):
success_count += 1
total_records += len(task_result)
else:
success_count += 1
# 记录完成日志
status = "partial" if failed_count > 0 else "success"
error_msg = f"Failed: {failed_count} tasks" if failed_count > 0 else None
log_manager.complete_sync(
table_name="daily_data_batch",
sync_type=sync_mode,
status=status,
records_inserted=total_records,
error_message=error_msg,
metadata={
"selected": selected_str,
"dry_run": dry_run,
"max_workers": max_workers,
},
)
return result
except Exception as e:
# 记录失败日志
log_manager.complete_sync(
table_name="daily_data_batch",
sync_type=sync_mode,
status="failed",
error_message=str(e),
metadata={
"selected": selected_str,
"dry_run": dry_run,
"max_workers": max_workers,
},
)
raise
def list_sync_tasks() -> list[dict[str, Any]]:
"""列出所有已注册的同步任务。