Files
ProStock/src/data/sync.py
liaozhaorun 6730acbae1 feat(data): 添加每日筹码及胜率数据接口 (cyq_perf)
- 新增 api_cyq_perf 模块,支持筹码分布数据获取和同步
- 在 sync_registry 中注册 cyq_perf 同步器
2026-03-26 00:15:30 +08:00

267 lines
9.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""数据同步调度中心模块。
该模块作为数据同步的调度中心,统一管理各类型数据的同步流程。
【重要规范 - sync.py 职责范围】
本模块**仅包含每日更新的数据接口**,季度/低频数据不应放入此文件:
✅ 本模块包含的同步逻辑(每日更新):
- api_daily.py: 日线数据同步 (DailySync 类)
- api_daily_basic.py: 每日指标数据同步 (DailyBasicSync 类)
- api_bak_basic.py: 历史股票列表同步 (BakBasicSync 类)
- api_pro_bar.py: Pro Bar 数据同步 (ProBarSync 类)
- api_stock_st.py: ST股票列表同步 (StockSTSync 类)
- api_stk_limit.py: 涨跌停价格同步 (StkLimitSync 类)
- api_cyq_perf.py: 筹码分布数据同步 (CyqPerfSync 类)
- api_stock_basic.py: 股票基本信息同步
- api_trade_cal.py: 交易日历同步
❌ 不应包含的同步逻辑(季度/低频更新):
- financial_data/: 财务数据(利润表、资产负债表、现金流量表等)
使用方式:
from src.data.api_wrappers.financial_data.api_financial_sync import sync_financial
sync_financial()
- api_namechange.py: 股票名称变更(不频繁)
使用方式:
from src.data.api_wrappers import sync_namechange
sync_namechange(force=True)
【架构说明】
本模块使用 SyncRegistry 注册表模式管理同步任务,避免手动罗列各个接口。
同步任务在 api_wrappers/__init__.py 中自动注册,新增接口无需修改 sync.py。
使用方式:
# 预览同步(检查数据量,不写入)
from src.data.sync import preview_sync
preview = preview_sync()
# 同步所有每日更新数据不包括财务数据、namechange
from src.data.sync import sync_all_data
result = sync_all_data()
# 强制全量重载
result = sync_all_data(force_full=True)
# 查看已注册的所有同步任务
from src.data.sync_registry import sync_registry
tasks = sync_registry.list_tasks()
for task in tasks:
print(f"{task.name}: {task.display_name}")
"""
from typing import Optional, Dict, Union, Any
import pandas as pd
# 导入以触发自动注册
from src.data import api_wrappers # noqa: F401
from src.data.sync_registry import sync_registry
from src.data.api_wrappers import sync_all_stocks
from src.data.sync_logger import SyncLogManager
def sync_all_data(
force_full: bool = False,
max_workers: Optional[int] = None,
dry_run: bool = False,
selected: Optional[list[str]] = None,
) -> dict[str, Any]:
"""同步所有每日更新的数据类型。
【重要】本函数仅同步每日更新的数据,不包含季度/低频数据。
【自动注册机制】
同步任务在 api_wrappers/__init__.py 中自动注册到 SyncRegistry。
当前注册的同步任务(按执行顺序):
1. trade_cal: 交易日历缓存
2. stock_basic: 股票基本信息
3. pro_bar: Pro Bar 数据(复权、换手率、量比)
4. daily_basic: 每日指标PE、PB、换手率、市值
5. bak_basic: 历史股票列表
6. stock_st: ST股票列表
7. stk_limit: 每日涨跌停价格
8. cyq_perf: 每日筹码及胜率
新增接口时,只需在 api_wrappers/__init__.py 中添加注册代码,
无需修改本函数。
【不包含的同步(需单独调用)】
- 财务数据: 利润表、资产负债表、现金流量表(季度更新)
使用: from src.data.api_wrappers.financial_data.api_financial_sync import sync_financial
调用: sync_financial()
- 名称变更 (namechange): 股票曾用名(低频更新)
使用: from src.data.api_wrappers import sync_namechange
调用: sync_namechange(force=True)
Args:
force_full: 若为 True强制所有数据类型完整重载
max_workers: 日线数据同步的工作线程数(默认: 10
dry_run: 若为 True仅显示将要同步的内容不写入数据
selected: 只同步指定的任务列表None表示同步所有
例如: selected=["trade_cal", "stock_basic"] 只同步交易日历和股票基本信息
Returns:
映射数据类型到同步结果的字典
Example:
>>> result = sync_all_data()
>>>
>>> # 强制完整重载
>>> result = sync_all_data(force_full=True)
>>>
>>> # Dry run
>>> result = sync_all_data(dry_run=True)
>>>
>>> # 只同步特定任务
>>> result = sync_all_data(selected=['trade_cal', 'pro_bar'])
>>>
>>> # 查看所有可用任务
>>> from src.data.sync_registry import sync_registry
>>> tasks = sync_registry.list_tasks()
>>> for t in tasks:
... print(f"{t.name}: {t.display_name}")
"""
# 记录调度中心开始
log_manager = SyncLogManager()
sync_mode = "full" if force_full else "incremental"
selected_str = ",".join(selected) if selected else "all"
log_manager.start_sync(
table_name="daily_data_batch",
sync_type=sync_mode,
metadata={
"selected": selected_str,
"dry_run": dry_run,
"max_workers": max_workers,
},
)
try:
result = sync_registry.sync_all(
force_full=force_full,
max_workers=max_workers,
dry_run=dry_run,
selected=selected,
)
# 计算成功/失败数量
success_count = 0
failed_count = 0
total_records = 0
for task_name, task_result in result.items():
if isinstance(task_result, dict):
if task_result.get("status") == "error":
failed_count += 1
else:
success_count += 1
# 累加记录数(如果有)
if "rows" in task_result:
total_records += task_result.get("rows", 0)
elif isinstance(task_result, pd.DataFrame):
success_count += 1
total_records += len(task_result)
else:
success_count += 1
# 记录完成日志
status = "partial" if failed_count > 0 else "success"
error_msg = f"Failed: {failed_count} tasks" if failed_count > 0 else None
log_manager.complete_sync(
table_name="daily_data_batch",
sync_type=sync_mode,
status=status,
records_inserted=total_records,
error_message=error_msg,
metadata={
"selected": selected_str,
"dry_run": dry_run,
"max_workers": max_workers,
},
)
return result
except Exception as e:
# 记录失败日志
log_manager.complete_sync(
table_name="daily_data_batch",
sync_type=sync_mode,
status="failed",
error_message=str(e),
metadata={
"selected": selected_str,
"dry_run": dry_run,
"max_workers": max_workers,
},
)
raise
def list_sync_tasks() -> list[dict[str, Any]]:
"""列出所有已注册的同步任务。
Returns:
任务信息列表,每个任务包含 name, display_name, description, order, enabled
Example:
>>> tasks = list_sync_tasks()
>>> for task in tasks:
... print(f"{task['order']:2d}. {task['name']}: {task['display_name']}")
"""
tasks = sync_registry.list_tasks()
return [
{
"name": t.name,
"display_name": t.display_name,
"description": t.description,
"order": t.order,
"enabled": t.enabled,
}
for t in tasks
]
if __name__ == "__main__":
print("=" * 60)
print("Data Sync Module")
print("=" * 60)
print("\nRegistered sync tasks:")
print("-" * 60)
tasks = list_sync_tasks()
for task in tasks:
status = "[启用]" if task["enabled"] else "[禁用]"
print(f" {status} {task['order']:2d}. {task['name']}: {task['display_name']}")
print("-" * 60)
print(f"\nTotal: {len(tasks)} tasks")
print("\nUsage:")
print(" # Sync all data types at once (RECOMMENDED)")
print(" from src.data.sync import sync_all_data")
print(" result = sync_all_data() # Incremental sync all")
print(" result = sync_all_data(force_full=True) # Full reload")
print("")
print(" # Sync selected data types only")
print(" result = sync_all_data(selected=['trade_cal', 'pro_bar'])")
print("")
print(" # List all available sync tasks")
print(" tasks = list_sync_tasks()")
print("")
print(" # Preview before sync (recommended)")
print(" preview = preview_sync()")
print("")
print(" # Dry run (preview only)")
print(" result = sync_all(dry_run=True)")
print("")
print(" # Actual sync")
print(" result = sync_all() # Incremental sync")
print(" result = sync_all(force_full=True) # Full reload")
print("\n" + "=" * 60)
# Run sync_all_data by default
print("\n[Main] Running sync_all_data()...")
result = sync_all_data()
print("\n[Main] Sync completed!")
print(f"Total data types synced: {len(result)}")