2026-02-23 16:23:53 +08:00
|
|
|
|
"""数据同步调度中心模块。
|
2026-02-01 04:44:01 +08:00
|
|
|
|
|
2026-02-23 16:23:53 +08:00
|
|
|
|
该模块作为数据同步的调度中心,统一管理各类型数据的同步流程。
|
|
|
|
|
|
具体的同步逻辑已迁移到对应的 api_xxx.py 文件中:
|
|
|
|
|
|
- api_daily.py: 日线数据同步 (DailySync 类)
|
2026-02-27 23:34:12 +08:00
|
|
|
|
- api_bak_basic.py: 历史股票列表同步 (BakBasicSync 类)
|
|
|
|
|
|
- api_pro_bar.py: Pro Bar 数据同步 (ProBarSync 类)
|
2026-02-23 16:23:53 +08:00
|
|
|
|
- api_stock_basic.py: 股票基本信息同步
|
|
|
|
|
|
- api_trade_cal.py: 交易日历同步
|
2026-02-01 04:44:01 +08:00
|
|
|
|
|
2026-02-23 16:23:53 +08:00
|
|
|
|
注意:名称变更 (namechange) 已从自动同步中移除,
|
|
|
|
|
|
因为股票名称变更不频繁,建议手动定期同步。
|
2026-02-01 04:44:01 +08:00
|
|
|
|
|
2026-02-23 16:23:53 +08:00
|
|
|
|
使用方式:
|
|
|
|
|
|
# 预览同步(检查数据量,不写入)
|
|
|
|
|
|
from src.data.sync import preview_sync
|
|
|
|
|
|
preview = preview_sync()
|
2026-02-01 04:44:01 +08:00
|
|
|
|
|
2026-02-23 16:23:53 +08:00
|
|
|
|
# 同步所有数据(不包括 namechange)
|
|
|
|
|
|
from src.data.sync import sync_all_data
|
|
|
|
|
|
result = sync_all_data()
|
2026-02-21 03:43:30 +08:00
|
|
|
|
|
2026-02-23 16:23:53 +08:00
|
|
|
|
# 强制全量重载
|
|
|
|
|
|
result = sync_all_data(force_full=True)
|
2026-02-01 04:44:01 +08:00
|
|
|
|
"""
|
|
|
|
|
|
|
2026-02-23 16:23:53 +08:00
|
|
|
|
from typing import Optional, Dict
|
2026-02-01 04:44:01 +08:00
|
|
|
|
|
2026-02-23 16:23:53 +08:00
|
|
|
|
import pandas as pd
|
2026-02-01 04:44:01 +08:00
|
|
|
|
|
2026-02-23 16:23:53 +08:00
|
|
|
|
from src.data.api_wrappers import sync_all_stocks
|
|
|
|
|
|
from src.data.api_wrappers.api_daily import sync_daily, preview_daily_sync
|
2026-02-27 22:22:23 +08:00
|
|
|
|
from src.data.api_wrappers.api_pro_bar import sync_pro_bar
|
2026-02-27 23:34:12 +08:00
|
|
|
|
from src.data.api_wrappers.api_bak_basic import sync_bak_basic
|
2026-02-01 04:44:01 +08:00
|
|
|
|
|
|
|
|
|
|
|
2026-02-21 03:43:30 +08:00
|
|
|
|
def preview_sync(
|
|
|
|
|
|
force_full: bool = False,
|
|
|
|
|
|
start_date: Optional[str] = None,
|
|
|
|
|
|
end_date: Optional[str] = None,
|
|
|
|
|
|
sample_size: int = 3,
|
|
|
|
|
|
max_workers: Optional[int] = None,
|
|
|
|
|
|
) -> dict:
|
2026-02-23 16:23:53 +08:00
|
|
|
|
"""预览日线同步数据量和样本(不实际同步)。
|
2026-02-21 03:43:30 +08:00
|
|
|
|
|
2026-02-23 16:23:53 +08:00
|
|
|
|
这是推荐的方式,可在实际同步前检查将要同步的内容。
|
2026-02-21 03:43:30 +08:00
|
|
|
|
|
|
|
|
|
|
Args:
|
2026-02-23 16:23:53 +08:00
|
|
|
|
force_full: 若为 True,预览全量同步(从 20180101)
|
|
|
|
|
|
start_date: 手动指定起始日期(覆盖自动检测)
|
|
|
|
|
|
end_date: 手动指定结束日期(默认为今天)
|
|
|
|
|
|
sample_size: 预览用样本股票数量(默认: 3)
|
|
|
|
|
|
max_workers: 工作线程数(默认: 10)
|
2026-02-21 03:43:30 +08:00
|
|
|
|
|
|
|
|
|
|
Returns:
|
2026-02-23 16:23:53 +08:00
|
|
|
|
包含预览信息的字典:
|
2026-02-21 03:43:30 +08:00
|
|
|
|
{
|
|
|
|
|
|
'sync_needed': bool,
|
|
|
|
|
|
'stock_count': int,
|
|
|
|
|
|
'start_date': str,
|
|
|
|
|
|
'end_date': str,
|
|
|
|
|
|
'estimated_records': int,
|
|
|
|
|
|
'sample_data': pd.DataFrame,
|
2026-02-23 16:23:53 +08:00
|
|
|
|
'mode': str, # 'full', 'incremental', 'partial', 或 'none'
|
2026-02-21 03:43:30 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Example:
|
2026-02-23 16:23:53 +08:00
|
|
|
|
>>> # 预览将要同步的内容
|
2026-02-21 03:43:30 +08:00
|
|
|
|
>>> preview = preview_sync()
|
|
|
|
|
|
>>>
|
2026-02-23 16:23:53 +08:00
|
|
|
|
>>> # 预览全量同步
|
2026-02-21 03:43:30 +08:00
|
|
|
|
>>> preview = preview_sync(force_full=True)
|
|
|
|
|
|
>>>
|
2026-02-23 16:23:53 +08:00
|
|
|
|
>>> # 预览更多样本
|
2026-02-21 03:43:30 +08:00
|
|
|
|
>>> preview = preview_sync(sample_size=5)
|
|
|
|
|
|
"""
|
2026-02-23 16:23:53 +08:00
|
|
|
|
return preview_daily_sync(
|
2026-02-21 03:43:30 +08:00
|
|
|
|
force_full=force_full,
|
|
|
|
|
|
start_date=start_date,
|
|
|
|
|
|
end_date=end_date,
|
|
|
|
|
|
sample_size=sample_size,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-02-01 04:44:01 +08:00
|
|
|
|
def sync_all(
|
|
|
|
|
|
force_full: bool = False,
|
|
|
|
|
|
start_date: Optional[str] = None,
|
|
|
|
|
|
end_date: Optional[str] = None,
|
|
|
|
|
|
max_workers: Optional[int] = None,
|
2026-02-21 03:43:30 +08:00
|
|
|
|
dry_run: bool = False,
|
2026-02-01 04:44:01 +08:00
|
|
|
|
) -> Dict[str, pd.DataFrame]:
|
2026-02-23 16:23:53 +08:00
|
|
|
|
"""同步所有股票的日线数据。
|
2026-02-01 04:44:01 +08:00
|
|
|
|
|
2026-02-23 16:23:53 +08:00
|
|
|
|
这是日线数据同步的主要入口点。
|
2026-02-01 04:44:01 +08:00
|
|
|
|
|
|
|
|
|
|
Args:
|
2026-02-23 16:23:53 +08:00
|
|
|
|
force_full: 若为 True,强制从 20180101 完整重载
|
|
|
|
|
|
start_date: 手动指定起始日期(YYYYMMDD)
|
|
|
|
|
|
end_date: 手动指定结束日期(默认为今天)
|
|
|
|
|
|
max_workers: 工作线程数(默认: 10)
|
|
|
|
|
|
dry_run: 若为 True,仅预览将要同步的内容,不写入数据
|
2026-02-01 04:44:01 +08:00
|
|
|
|
|
|
|
|
|
|
Returns:
|
2026-02-23 16:23:53 +08:00
|
|
|
|
映射 ts_code 到 DataFrame 的字典
|
2026-02-01 04:44:01 +08:00
|
|
|
|
|
|
|
|
|
|
Example:
|
2026-02-23 16:23:53 +08:00
|
|
|
|
>>> # 首次同步(从 20180101 全量加载)
|
2026-02-01 04:44:01 +08:00
|
|
|
|
>>> result = sync_all()
|
|
|
|
|
|
>>>
|
2026-02-23 16:23:53 +08:00
|
|
|
|
>>> # 后续同步(增量 - 仅新数据)
|
2026-02-01 04:44:01 +08:00
|
|
|
|
>>> result = sync_all()
|
|
|
|
|
|
>>>
|
2026-02-23 16:23:53 +08:00
|
|
|
|
>>> # 强制完整重载
|
2026-02-01 04:44:01 +08:00
|
|
|
|
>>> result = sync_all(force_full=True)
|
|
|
|
|
|
>>>
|
2026-02-23 16:23:53 +08:00
|
|
|
|
>>> # 手动指定日期范围
|
2026-02-01 04:44:01 +08:00
|
|
|
|
>>> result = sync_all(start_date='20240101', end_date='20240131')
|
|
|
|
|
|
>>>
|
2026-02-23 16:23:53 +08:00
|
|
|
|
>>> # 自定义线程数
|
2026-02-01 04:44:01 +08:00
|
|
|
|
>>> result = sync_all(max_workers=20)
|
2026-02-21 03:43:30 +08:00
|
|
|
|
>>>
|
2026-02-23 16:23:53 +08:00
|
|
|
|
>>> # Dry run(仅预览)
|
2026-02-21 03:43:30 +08:00
|
|
|
|
>>> result = sync_all(dry_run=True)
|
2026-02-01 04:44:01 +08:00
|
|
|
|
"""
|
2026-02-23 16:23:53 +08:00
|
|
|
|
return sync_daily(
|
2026-02-01 04:44:01 +08:00
|
|
|
|
force_full=force_full,
|
|
|
|
|
|
start_date=start_date,
|
|
|
|
|
|
end_date=end_date,
|
2026-02-23 16:23:53 +08:00
|
|
|
|
max_workers=max_workers,
|
2026-02-21 03:43:30 +08:00
|
|
|
|
dry_run=dry_run,
|
2026-02-01 04:44:01 +08:00
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-02-23 16:23:53 +08:00
|
|
|
|
def sync_all_data(
|
|
|
|
|
|
force_full: bool = False,
|
|
|
|
|
|
max_workers: Optional[int] = None,
|
|
|
|
|
|
dry_run: bool = False,
|
|
|
|
|
|
) -> Dict[str, pd.DataFrame]:
|
|
|
|
|
|
"""同步所有数据类型(每日同步)。
|
2026-02-27 23:34:12 +08:00
|
|
|
|
|
|
|
|
|
|
该函数按顺序同步所有可用的数据类型:
|
|
|
|
|
|
1. 交易日历 (sync_trade_cal_cache)
|
|
|
|
|
|
2. 股票基本信息 (sync_all_stocks)
|
|
|
|
|
|
3. Pro Bar 数据 (sync_pro_bar)
|
|
|
|
|
|
4. 历史股票列表 (sync_bak_basic)
|
|
|
|
|
|
|
|
|
|
|
|
注意:名称变更 (namechange) 不在自动同步中,如需同步请手动调用。
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
force_full: 若为 True,强制所有数据类型完整重载
|
|
|
|
|
|
max_workers: 日线数据同步的工作线程数(默认: 10)
|
|
|
|
|
|
dry_run: 若为 True,仅显示将要同步的内容,不写入数据
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
映射数据类型到同步结果的字典
|
|
|
|
|
|
|
|
|
|
|
|
Example:
|
|
|
|
|
|
>>> result = sync_all_data()
|
|
|
|
|
|
>>>
|
|
|
|
|
|
>>> # 强制完整重载
|
|
|
|
|
|
>>> result = sync_all_data(force_full=True)
|
|
|
|
|
|
>>>
|
|
|
|
|
|
>>> # Dry run
|
|
|
|
|
|
>>> result = sync_all_data(dry_run=True)
|
2026-02-23 16:23:53 +08:00
|
|
|
|
"""
|
|
|
|
|
|
results: Dict[str, pd.DataFrame] = {}
|
|
|
|
|
|
|
|
|
|
|
|
print("\n" + "=" * 60)
|
|
|
|
|
|
print("[sync_all_data] Starting full data synchronization...")
|
|
|
|
|
|
print("=" * 60)
|
|
|
|
|
|
|
2026-02-27 22:22:23 +08:00
|
|
|
|
# 1. Sync trade calendar (always needed first)
|
2026-02-27 23:34:12 +08:00
|
|
|
|
print("\n[1/4] Syncing trade calendar cache...")
|
2026-02-27 22:22:23 +08:00
|
|
|
|
try:
|
|
|
|
|
|
from src.data.api_wrappers import sync_trade_cal_cache
|
|
|
|
|
|
|
|
|
|
|
|
sync_trade_cal_cache()
|
|
|
|
|
|
results["trade_cal"] = pd.DataFrame()
|
2026-02-27 23:34:12 +08:00
|
|
|
|
print("[1/4] Trade calendar: OK")
|
2026-02-27 22:22:23 +08:00
|
|
|
|
except Exception as e:
|
2026-02-27 23:34:12 +08:00
|
|
|
|
print(f"[1/4] Trade calendar: FAILED - {e}")
|
2026-02-27 22:22:23 +08:00
|
|
|
|
results["trade_cal"] = pd.DataFrame()
|
|
|
|
|
|
|
|
|
|
|
|
# 2. Sync stock basic info
|
2026-02-27 23:34:12 +08:00
|
|
|
|
print("\n[2/4] Syncing stock basic info...")
|
2026-02-27 22:22:23 +08:00
|
|
|
|
try:
|
|
|
|
|
|
sync_all_stocks()
|
|
|
|
|
|
results["stock_basic"] = pd.DataFrame()
|
2026-02-27 23:34:12 +08:00
|
|
|
|
print("[2/4] Stock basic: OK")
|
2026-02-27 22:22:23 +08:00
|
|
|
|
except Exception as e:
|
2026-02-27 23:34:12 +08:00
|
|
|
|
print(f"[2/4] Stock basic: FAILED - {e}")
|
2026-02-27 22:22:23 +08:00
|
|
|
|
results["stock_basic"] = pd.DataFrame()
|
|
|
|
|
|
|
2026-02-27 23:34:12 +08:00
|
|
|
|
# 3. Sync Pro Bar data
|
|
|
|
|
|
print("\n[3/4] Syncing Pro Bar data (with adj, tor, vr)...")
|
2026-02-27 22:22:23 +08:00
|
|
|
|
try:
|
|
|
|
|
|
pro_bar_result = sync_pro_bar(
|
|
|
|
|
|
force_full=force_full,
|
|
|
|
|
|
max_workers=max_workers,
|
|
|
|
|
|
dry_run=dry_run,
|
|
|
|
|
|
)
|
|
|
|
|
|
results["pro_bar"] = (
|
|
|
|
|
|
pd.concat(pro_bar_result.values(), ignore_index=True)
|
|
|
|
|
|
if pro_bar_result
|
|
|
|
|
|
else pd.DataFrame()
|
|
|
|
|
|
)
|
2026-02-27 23:34:12 +08:00
|
|
|
|
print(f"[3/4] Pro Bar data: OK ({len(results['pro_bar'])} records)")
|
2026-02-27 22:22:23 +08:00
|
|
|
|
except Exception as e:
|
2026-02-27 23:34:12 +08:00
|
|
|
|
print(f"[3/4] Pro Bar data: FAILED - {e}")
|
2026-02-27 22:22:23 +08:00
|
|
|
|
results["pro_bar"] = pd.DataFrame()
|
|
|
|
|
|
|
2026-02-23 16:23:53 +08:00
|
|
|
|
# 4. Sync stock historical list (bak_basic)
|
2026-02-27 23:34:12 +08:00
|
|
|
|
print("\n[4/4] Syncing stock historical list (bak_basic)...")
|
2026-02-23 16:23:53 +08:00
|
|
|
|
try:
|
|
|
|
|
|
bak_basic_result = sync_bak_basic(force_full=force_full)
|
|
|
|
|
|
results["bak_basic"] = bak_basic_result
|
2026-02-27 23:34:12 +08:00
|
|
|
|
print(f"[4/4] Bak basic: OK ({len(bak_basic_result)} records)")
|
2026-02-23 16:23:53 +08:00
|
|
|
|
except Exception as e:
|
2026-02-27 23:34:12 +08:00
|
|
|
|
print(f"[4/4] Bak basic: FAILED - {e}")
|
2026-02-23 16:23:53 +08:00
|
|
|
|
results["bak_basic"] = pd.DataFrame()
|
|
|
|
|
|
|
|
|
|
|
|
# Summary
|
|
|
|
|
|
print("\n" + "=" * 60)
|
|
|
|
|
|
print("[sync_all_data] Sync Summary")
|
|
|
|
|
|
print("=" * 60)
|
|
|
|
|
|
for data_type, df in results.items():
|
|
|
|
|
|
print(f" {data_type}: {len(df)} records")
|
|
|
|
|
|
print("=" * 60)
|
|
|
|
|
|
print("\nNote: namechange is NOT in auto-sync. To sync manually:")
|
|
|
|
|
|
print(" from src.data.api_wrappers import sync_namechange")
|
|
|
|
|
|
print(" sync_namechange(force=True)")
|
|
|
|
|
|
|
|
|
|
|
|
return results
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-02-01 04:44:01 +08:00
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
|
print("=" * 60)
|
|
|
|
|
|
print("Data Sync Module")
|
|
|
|
|
|
print("=" * 60)
|
|
|
|
|
|
print("\nUsage:")
|
2026-02-23 16:23:53 +08:00
|
|
|
|
print(" # Sync all data types at once (RECOMMENDED)")
|
|
|
|
|
|
print(" from src.data.sync import sync_all_data")
|
|
|
|
|
|
print(" result = sync_all_data() # Incremental sync all")
|
|
|
|
|
|
print(" result = sync_all_data(force_full=True) # Full reload")
|
|
|
|
|
|
print("")
|
|
|
|
|
|
print(" # Or sync individual data types:")
|
2026-02-21 03:43:30 +08:00
|
|
|
|
print(" from src.data.sync import sync_all, preview_sync")
|
2026-02-23 16:23:53 +08:00
|
|
|
|
print(" from src.data.sync import sync_bak_basic")
|
2026-02-21 03:43:30 +08:00
|
|
|
|
print("")
|
|
|
|
|
|
print(" # Preview before sync (recommended)")
|
|
|
|
|
|
print(" preview = preview_sync()")
|
|
|
|
|
|
print("")
|
|
|
|
|
|
print(" # Dry run (preview only)")
|
|
|
|
|
|
print(" result = sync_all(dry_run=True)")
|
|
|
|
|
|
print("")
|
|
|
|
|
|
print(" # Actual sync")
|
2026-02-01 04:44:01 +08:00
|
|
|
|
print(" result = sync_all() # Incremental sync")
|
|
|
|
|
|
print(" result = sync_all(force_full=True) # Full reload")
|
2026-02-23 16:23:53 +08:00
|
|
|
|
print("")
|
|
|
|
|
|
print(" # bak_basic sync")
|
|
|
|
|
|
print(" result = sync_bak_basic() # Incremental sync")
|
|
|
|
|
|
print(" result = sync_bak_basic(force_full=True) # Full reload")
|
2026-02-01 04:44:01 +08:00
|
|
|
|
print("\n" + "=" * 60)
|
|
|
|
|
|
|
2026-02-23 16:23:53 +08:00
|
|
|
|
# Run sync_all_data by default
|
|
|
|
|
|
print("\n[Main] Running sync_all_data()...")
|
|
|
|
|
|
result = sync_all_data()
|
|
|
|
|
|
print("\n[Main] Sync completed!")
|
|
|
|
|
|
print(f"Total data types synced: {len(result)}")
|