2026-02-23 16:23:53 +08:00
|
|
|
|
"""数据同步调度中心模块。
|
2026-02-01 04:44:01 +08:00
|
|
|
|
|
2026-02-23 16:23:53 +08:00
|
|
|
|
该模块作为数据同步的调度中心,统一管理各类型数据的同步流程。
|
2026-02-01 04:44:01 +08:00
|
|
|
|
|
2026-03-01 01:24:39 +08:00
|
|
|
|
【重要规范 - sync.py 职责范围】
|
|
|
|
|
|
本模块**仅包含每日更新的数据接口**,季度/低频数据不应放入此文件:
|
|
|
|
|
|
|
|
|
|
|
|
✅ 本模块包含的同步逻辑(每日更新):
|
|
|
|
|
|
- api_daily.py: 日线数据同步 (DailySync 类)
|
2026-03-03 17:09:39 +08:00
|
|
|
|
- api_daily_basic.py: 每日指标数据同步 (DailyBasicSync 类)
|
2026-03-01 01:24:39 +08:00
|
|
|
|
- api_bak_basic.py: 历史股票列表同步 (BakBasicSync 类)
|
|
|
|
|
|
- api_pro_bar.py: Pro Bar 数据同步 (ProBarSync 类)
|
|
|
|
|
|
- api_stock_basic.py: 股票基本信息同步
|
|
|
|
|
|
- api_trade_cal.py: 交易日历同步
|
|
|
|
|
|
|
|
|
|
|
|
❌ 不应包含的同步逻辑(季度/低频更新):
|
|
|
|
|
|
- financial_data/: 财务数据(利润表、资产负债表、现金流量表等)
|
|
|
|
|
|
使用方式:
|
|
|
|
|
|
from src.data.api_wrappers.financial_data.api_financial_sync import sync_financial
|
|
|
|
|
|
sync_financial()
|
|
|
|
|
|
|
|
|
|
|
|
- api_namechange.py: 股票名称变更(不频繁)
|
|
|
|
|
|
使用方式:
|
|
|
|
|
|
from src.data.api_wrappers import sync_namechange
|
|
|
|
|
|
sync_namechange(force=True)
|
2026-02-01 04:44:01 +08:00
|
|
|
|
|
2026-03-05 21:11:18 +08:00
|
|
|
|
【架构说明】
|
|
|
|
|
|
本模块使用 SyncRegistry 注册表模式管理同步任务,避免手动罗列各个接口。
|
|
|
|
|
|
同步任务在 api_wrappers/__init__.py 中自动注册,新增接口无需修改 sync.py。
|
|
|
|
|
|
|
2026-02-23 16:23:53 +08:00
|
|
|
|
使用方式:
|
|
|
|
|
|
# 预览同步(检查数据量,不写入)
|
|
|
|
|
|
from src.data.sync import preview_sync
|
|
|
|
|
|
preview = preview_sync()
|
2026-02-01 04:44:01 +08:00
|
|
|
|
|
2026-03-01 01:24:39 +08:00
|
|
|
|
# 同步所有每日更新数据(不包括财务数据、namechange)
|
2026-02-23 16:23:53 +08:00
|
|
|
|
from src.data.sync import sync_all_data
|
|
|
|
|
|
result = sync_all_data()
|
2026-02-21 03:43:30 +08:00
|
|
|
|
|
2026-02-23 16:23:53 +08:00
|
|
|
|
# 强制全量重载
|
|
|
|
|
|
result = sync_all_data(force_full=True)
|
2026-03-05 21:11:18 +08:00
|
|
|
|
|
|
|
|
|
|
# 查看已注册的所有同步任务
|
|
|
|
|
|
from src.data.sync_registry import sync_registry
|
|
|
|
|
|
tasks = sync_registry.list_tasks()
|
|
|
|
|
|
for task in tasks:
|
|
|
|
|
|
print(f"{task.name}: {task.display_name}")
|
2026-02-01 04:44:01 +08:00
|
|
|
|
"""
|
|
|
|
|
|
|
2026-03-01 01:24:39 +08:00
|
|
|
|
from typing import Optional, Dict, Union, Any
|
2026-02-01 04:44:01 +08:00
|
|
|
|
|
2026-02-23 16:23:53 +08:00
|
|
|
|
import pandas as pd
|
2026-02-01 04:44:01 +08:00
|
|
|
|
|
2026-03-05 21:11:18 +08:00
|
|
|
|
# 导入以触发自动注册
|
|
|
|
|
|
from src.data import api_wrappers # noqa: F401
|
|
|
|
|
|
from src.data.sync_registry import sync_registry
|
2026-02-23 16:23:53 +08:00
|
|
|
|
from src.data.api_wrappers import sync_all_stocks
|
2026-03-23 21:10:15 +08:00
|
|
|
|
from src.data.sync_logger import SyncLogManager
|
2026-02-01 04:44:01 +08:00
|
|
|
|
|
|
|
|
|
|
|
2026-02-23 16:23:53 +08:00
|
|
|
|
def sync_all_data(
|
|
|
|
|
|
force_full: bool = False,
|
|
|
|
|
|
max_workers: Optional[int] = None,
|
|
|
|
|
|
dry_run: bool = False,
|
2026-03-05 21:11:18 +08:00
|
|
|
|
selected: Optional[list[str]] = None,
|
2026-03-01 01:24:39 +08:00
|
|
|
|
) -> dict[str, Any]:
|
|
|
|
|
|
"""同步所有每日更新的数据类型。
|
2026-02-27 23:34:12 +08:00
|
|
|
|
|
2026-03-01 01:24:39 +08:00
|
|
|
|
【重要】本函数仅同步每日更新的数据,不包含季度/低频数据。
|
|
|
|
|
|
|
2026-03-05 21:11:18 +08:00
|
|
|
|
【自动注册机制】
|
|
|
|
|
|
同步任务在 api_wrappers/__init__.py 中自动注册到 SyncRegistry。
|
|
|
|
|
|
当前注册的同步任务(按执行顺序):
|
|
|
|
|
|
1. trade_cal: 交易日历缓存
|
|
|
|
|
|
2. stock_basic: 股票基本信息
|
|
|
|
|
|
3. pro_bar: Pro Bar 数据(复权、换手率、量比)
|
|
|
|
|
|
4. daily_basic: 每日指标(PE、PB、换手率、市值)
|
|
|
|
|
|
5. bak_basic: 历史股票列表
|
|
|
|
|
|
6. stock_st: ST股票列表
|
|
|
|
|
|
|
|
|
|
|
|
新增接口时,只需在 api_wrappers/__init__.py 中添加注册代码,
|
|
|
|
|
|
无需修改本函数。
|
2026-03-01 01:24:39 +08:00
|
|
|
|
|
|
|
|
|
|
【不包含的同步(需单独调用)】
|
|
|
|
|
|
- 财务数据: 利润表、资产负债表、现金流量表(季度更新)
|
|
|
|
|
|
使用: from src.data.api_wrappers.financial_data.api_financial_sync import sync_financial
|
|
|
|
|
|
调用: sync_financial()
|
2026-02-27 23:34:12 +08:00
|
|
|
|
|
2026-03-01 01:24:39 +08:00
|
|
|
|
- 名称变更 (namechange): 股票曾用名(低频更新)
|
|
|
|
|
|
使用: from src.data.api_wrappers import sync_namechange
|
|
|
|
|
|
调用: sync_namechange(force=True)
|
2026-02-27 23:34:12 +08:00
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
force_full: 若为 True,强制所有数据类型完整重载
|
|
|
|
|
|
max_workers: 日线数据同步的工作线程数(默认: 10)
|
|
|
|
|
|
dry_run: 若为 True,仅显示将要同步的内容,不写入数据
|
2026-03-05 21:11:18 +08:00
|
|
|
|
selected: 只同步指定的任务列表,None表示同步所有
|
|
|
|
|
|
例如: selected=["trade_cal", "stock_basic"] 只同步交易日历和股票基本信息
|
2026-02-27 23:34:12 +08:00
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
映射数据类型到同步结果的字典
|
|
|
|
|
|
|
|
|
|
|
|
Example:
|
|
|
|
|
|
>>> result = sync_all_data()
|
|
|
|
|
|
>>>
|
|
|
|
|
|
>>> # 强制完整重载
|
|
|
|
|
|
>>> result = sync_all_data(force_full=True)
|
|
|
|
|
|
>>>
|
|
|
|
|
|
>>> # Dry run
|
|
|
|
|
|
>>> result = sync_all_data(dry_run=True)
|
2026-03-05 21:11:18 +08:00
|
|
|
|
>>>
|
|
|
|
|
|
>>> # 只同步特定任务
|
2026-03-23 21:10:15 +08:00
|
|
|
|
>>> result = sync_all_data(selected=['trade_cal', 'pro_bar'])
|
2026-03-05 21:11:18 +08:00
|
|
|
|
>>>
|
|
|
|
|
|
>>> # 查看所有可用任务
|
|
|
|
|
|
>>> from src.data.sync_registry import sync_registry
|
|
|
|
|
|
>>> tasks = sync_registry.list_tasks()
|
|
|
|
|
|
>>> for t in tasks:
|
|
|
|
|
|
... print(f"{t.name}: {t.display_name}")
|
2026-02-23 16:23:53 +08:00
|
|
|
|
"""
|
2026-03-23 21:10:15 +08:00
|
|
|
|
# 记录调度中心开始
|
|
|
|
|
|
log_manager = SyncLogManager()
|
|
|
|
|
|
sync_mode = "full" if force_full else "incremental"
|
|
|
|
|
|
selected_str = ",".join(selected) if selected else "all"
|
|
|
|
|
|
log_manager.start_sync(
|
|
|
|
|
|
table_name="daily_data_batch",
|
|
|
|
|
|
sync_type=sync_mode,
|
|
|
|
|
|
metadata={
|
|
|
|
|
|
"selected": selected_str,
|
|
|
|
|
|
"dry_run": dry_run,
|
|
|
|
|
|
"max_workers": max_workers,
|
|
|
|
|
|
},
|
2026-03-05 21:11:18 +08:00
|
|
|
|
)
|
2026-02-23 16:23:53 +08:00
|
|
|
|
|
2026-03-23 21:10:15 +08:00
|
|
|
|
try:
|
|
|
|
|
|
result = sync_registry.sync_all(
|
|
|
|
|
|
force_full=force_full,
|
|
|
|
|
|
max_workers=max_workers,
|
|
|
|
|
|
dry_run=dry_run,
|
|
|
|
|
|
selected=selected,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# 计算成功/失败数量
|
|
|
|
|
|
success_count = 0
|
|
|
|
|
|
failed_count = 0
|
|
|
|
|
|
total_records = 0
|
|
|
|
|
|
for task_name, task_result in result.items():
|
|
|
|
|
|
if isinstance(task_result, dict):
|
|
|
|
|
|
if task_result.get("status") == "error":
|
|
|
|
|
|
failed_count += 1
|
|
|
|
|
|
else:
|
|
|
|
|
|
success_count += 1
|
|
|
|
|
|
# 累加记录数(如果有)
|
|
|
|
|
|
if "rows" in task_result:
|
|
|
|
|
|
total_records += task_result.get("rows", 0)
|
|
|
|
|
|
elif isinstance(task_result, pd.DataFrame):
|
|
|
|
|
|
success_count += 1
|
|
|
|
|
|
total_records += len(task_result)
|
|
|
|
|
|
else:
|
|
|
|
|
|
success_count += 1
|
|
|
|
|
|
|
|
|
|
|
|
# 记录完成日志
|
|
|
|
|
|
status = "partial" if failed_count > 0 else "success"
|
|
|
|
|
|
error_msg = f"Failed: {failed_count} tasks" if failed_count > 0 else None
|
|
|
|
|
|
log_manager.complete_sync(
|
|
|
|
|
|
table_name="daily_data_batch",
|
|
|
|
|
|
sync_type=sync_mode,
|
|
|
|
|
|
status=status,
|
|
|
|
|
|
records_inserted=total_records,
|
|
|
|
|
|
error_message=error_msg,
|
|
|
|
|
|
metadata={
|
|
|
|
|
|
"selected": selected_str,
|
|
|
|
|
|
"dry_run": dry_run,
|
|
|
|
|
|
"max_workers": max_workers,
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
# 记录失败日志
|
|
|
|
|
|
log_manager.complete_sync(
|
|
|
|
|
|
table_name="daily_data_batch",
|
|
|
|
|
|
sync_type=sync_mode,
|
|
|
|
|
|
status="failed",
|
|
|
|
|
|
error_message=str(e),
|
|
|
|
|
|
metadata={
|
|
|
|
|
|
"selected": selected_str,
|
|
|
|
|
|
"dry_run": dry_run,
|
|
|
|
|
|
"max_workers": max_workers,
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
raise
|
|
|
|
|
|
|
2026-02-23 16:23:53 +08:00
|
|
|
|
|
2026-03-05 21:11:18 +08:00
|
|
|
|
def list_sync_tasks() -> list[dict[str, Any]]:
|
|
|
|
|
|
"""列出所有已注册的同步任务。
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
任务信息列表,每个任务包含 name, display_name, description, order, enabled
|
2026-02-23 16:23:53 +08:00
|
|
|
|
|
2026-03-05 21:11:18 +08:00
|
|
|
|
Example:
|
|
|
|
|
|
>>> tasks = list_sync_tasks()
|
|
|
|
|
|
>>> for task in tasks:
|
|
|
|
|
|
... print(f"{task['order']:2d}. {task['name']}: {task['display_name']}")
|
|
|
|
|
|
"""
|
|
|
|
|
|
tasks = sync_registry.list_tasks()
|
|
|
|
|
|
return [
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": t.name,
|
|
|
|
|
|
"display_name": t.display_name,
|
|
|
|
|
|
"description": t.description,
|
|
|
|
|
|
"order": t.order,
|
|
|
|
|
|
"enabled": t.enabled,
|
|
|
|
|
|
}
|
|
|
|
|
|
for t in tasks
|
|
|
|
|
|
]
|
2026-02-23 16:23:53 +08:00
|
|
|
|
|
|
|
|
|
|
|
2026-02-01 04:44:01 +08:00
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
|
print("=" * 60)
|
|
|
|
|
|
print("Data Sync Module")
|
|
|
|
|
|
print("=" * 60)
|
2026-03-05 21:11:18 +08:00
|
|
|
|
print("\nRegistered sync tasks:")
|
|
|
|
|
|
print("-" * 60)
|
|
|
|
|
|
|
|
|
|
|
|
tasks = list_sync_tasks()
|
|
|
|
|
|
for task in tasks:
|
|
|
|
|
|
status = "[启用]" if task["enabled"] else "[禁用]"
|
|
|
|
|
|
print(f" {status} {task['order']:2d}. {task['name']}: {task['display_name']}")
|
|
|
|
|
|
|
|
|
|
|
|
print("-" * 60)
|
|
|
|
|
|
print(f"\nTotal: {len(tasks)} tasks")
|
2026-02-01 04:44:01 +08:00
|
|
|
|
print("\nUsage:")
|
2026-02-23 16:23:53 +08:00
|
|
|
|
print(" # Sync all data types at once (RECOMMENDED)")
|
|
|
|
|
|
print(" from src.data.sync import sync_all_data")
|
|
|
|
|
|
print(" result = sync_all_data() # Incremental sync all")
|
|
|
|
|
|
print(" result = sync_all_data(force_full=True) # Full reload")
|
|
|
|
|
|
print("")
|
2026-03-05 21:11:18 +08:00
|
|
|
|
print(" # Sync selected data types only")
|
|
|
|
|
|
print(" result = sync_all_data(selected=['trade_cal', 'pro_bar'])")
|
|
|
|
|
|
print("")
|
|
|
|
|
|
print(" # List all available sync tasks")
|
|
|
|
|
|
print(" tasks = list_sync_tasks()")
|
2026-02-21 03:43:30 +08:00
|
|
|
|
print("")
|
|
|
|
|
|
print(" # Preview before sync (recommended)")
|
|
|
|
|
|
print(" preview = preview_sync()")
|
|
|
|
|
|
print("")
|
|
|
|
|
|
print(" # Dry run (preview only)")
|
|
|
|
|
|
print(" result = sync_all(dry_run=True)")
|
|
|
|
|
|
print("")
|
|
|
|
|
|
print(" # Actual sync")
|
2026-02-01 04:44:01 +08:00
|
|
|
|
print(" result = sync_all() # Incremental sync")
|
|
|
|
|
|
print(" result = sync_all(force_full=True) # Full reload")
|
|
|
|
|
|
print("\n" + "=" * 60)
|
|
|
|
|
|
|
2026-02-23 16:23:53 +08:00
|
|
|
|
# Run sync_all_data by default
|
|
|
|
|
|
print("\n[Main] Running sync_all_data()...")
|
|
|
|
|
|
result = sync_all_data()
|
|
|
|
|
|
print("\n[Main] Sync completed!")
|
|
|
|
|
|
print(f"Total data types synced: {len(result)}")
|