feat(data): 为数据同步添加事务支持和同步日志
- Storage/ThreadSafeStorage 添加事务支持(begin/commit/rollback) - 新增 SyncLogManager 记录所有同步任务的执行状态 - 集成事务到 StockBasedSync、DateBasedSync、QuarterBasedSync - 在 sync_all 和 sync_financial 调度中心添加日志记录 - 新增测试验证事务和日志功能
This commit is contained in:
@@ -55,6 +55,7 @@ import pandas as pd
|
||||
from src.data import api_wrappers # noqa: F401
|
||||
from src.data.sync_registry import sync_registry
|
||||
from src.data.api_wrappers import sync_all_stocks
|
||||
from src.data.sync_logger import SyncLogManager
|
||||
|
||||
|
||||
def sync_all_data(
|
||||
@@ -109,7 +110,7 @@ def sync_all_data(
|
||||
>>> result = sync_all_data(dry_run=True)
|
||||
>>>
|
||||
>>> # 只同步特定任务
|
||||
>>> result = sync_all_data(selected=["trade_cal", "stock_basic"])
|
||||
>>> result = sync_all_data(selected=['trade_cal', 'pro_bar'])
|
||||
>>>
|
||||
>>> # 查看所有可用任务
|
||||
>>> from src.data.sync_registry import sync_registry
|
||||
@@ -117,13 +118,80 @@ def sync_all_data(
|
||||
>>> for t in tasks:
|
||||
... print(f"{t.name}: {t.display_name}")
|
||||
"""
|
||||
return sync_registry.sync_all(
|
||||
force_full=force_full,
|
||||
max_workers=max_workers,
|
||||
dry_run=dry_run,
|
||||
selected=selected,
|
||||
# 记录调度中心开始
|
||||
log_manager = SyncLogManager()
|
||||
sync_mode = "full" if force_full else "incremental"
|
||||
selected_str = ",".join(selected) if selected else "all"
|
||||
log_manager.start_sync(
|
||||
table_name="daily_data_batch",
|
||||
sync_type=sync_mode,
|
||||
metadata={
|
||||
"selected": selected_str,
|
||||
"dry_run": dry_run,
|
||||
"max_workers": max_workers,
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
result = sync_registry.sync_all(
|
||||
force_full=force_full,
|
||||
max_workers=max_workers,
|
||||
dry_run=dry_run,
|
||||
selected=selected,
|
||||
)
|
||||
|
||||
# 计算成功/失败数量
|
||||
success_count = 0
|
||||
failed_count = 0
|
||||
total_records = 0
|
||||
for task_name, task_result in result.items():
|
||||
if isinstance(task_result, dict):
|
||||
if task_result.get("status") == "error":
|
||||
failed_count += 1
|
||||
else:
|
||||
success_count += 1
|
||||
# 累加记录数(如果有)
|
||||
if "rows" in task_result:
|
||||
total_records += task_result.get("rows", 0)
|
||||
elif isinstance(task_result, pd.DataFrame):
|
||||
success_count += 1
|
||||
total_records += len(task_result)
|
||||
else:
|
||||
success_count += 1
|
||||
|
||||
# 记录完成日志
|
||||
status = "partial" if failed_count > 0 else "success"
|
||||
error_msg = f"Failed: {failed_count} tasks" if failed_count > 0 else None
|
||||
log_manager.complete_sync(
|
||||
table_name="daily_data_batch",
|
||||
sync_type=sync_mode,
|
||||
status=status,
|
||||
records_inserted=total_records,
|
||||
error_message=error_msg,
|
||||
metadata={
|
||||
"selected": selected_str,
|
||||
"dry_run": dry_run,
|
||||
"max_workers": max_workers,
|
||||
},
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
# 记录失败日志
|
||||
log_manager.complete_sync(
|
||||
table_name="daily_data_batch",
|
||||
sync_type=sync_mode,
|
||||
status="failed",
|
||||
error_message=str(e),
|
||||
metadata={
|
||||
"selected": selected_str,
|
||||
"dry_run": dry_run,
|
||||
"max_workers": max_workers,
|
||||
},
|
||||
)
|
||||
raise
|
||||
|
||||
|
||||
def list_sync_tasks() -> list[dict[str, Any]]:
|
||||
"""列出所有已注册的同步任务。
|
||||
|
||||
Reference in New Issue
Block a user