"""数据同步调度中心模块。 该模块作为数据同步的调度中心,统一管理各类型数据的同步流程。 具体的同步逻辑已迁移到对应的 api_xxx.py 文件中: - api_daily.py: 日线数据同步 (DailySync 类) - api_bak_basic.py: 历史股票列表同步 (BakBasicSync 类) - api_pro_bar.py: Pro Bar 数据同步 (ProBarSync 类) - api_stock_basic.py: 股票基本信息同步 - api_trade_cal.py: 交易日历同步 注意:名称变更 (namechange) 已从自动同步中移除, 因为股票名称变更不频繁,建议手动定期同步。 使用方式: # 预览同步(检查数据量,不写入) from src.data.sync import preview_sync preview = preview_sync() # 同步所有数据(不包括 namechange) from src.data.sync import sync_all_data result = sync_all_data() # 强制全量重载 result = sync_all_data(force_full=True) """ from typing import Optional, Dict import pandas as pd from src.data.api_wrappers import sync_all_stocks from src.data.api_wrappers.api_daily import sync_daily, preview_daily_sync from src.data.api_wrappers.api_pro_bar import sync_pro_bar from src.data.api_wrappers.api_bak_basic import sync_bak_basic def preview_sync( force_full: bool = False, start_date: Optional[str] = None, end_date: Optional[str] = None, sample_size: int = 3, max_workers: Optional[int] = None, ) -> dict: """预览日线同步数据量和样本(不实际同步)。 这是推荐的方式,可在实际同步前检查将要同步的内容。 Args: force_full: 若为 True,预览全量同步(从 20180101) start_date: 手动指定起始日期(覆盖自动检测) end_date: 手动指定结束日期(默认为今天) sample_size: 预览用样本股票数量(默认: 3) max_workers: 工作线程数(默认: 10) Returns: 包含预览信息的字典: { 'sync_needed': bool, 'stock_count': int, 'start_date': str, 'end_date': str, 'estimated_records': int, 'sample_data': pd.DataFrame, 'mode': str, # 'full', 'incremental', 'partial', 或 'none' } Example: >>> # 预览将要同步的内容 >>> preview = preview_sync() >>> >>> # 预览全量同步 >>> preview = preview_sync(force_full=True) >>> >>> # 预览更多样本 >>> preview = preview_sync(sample_size=5) """ return preview_daily_sync( force_full=force_full, start_date=start_date, end_date=end_date, sample_size=sample_size, ) def sync_all( force_full: bool = False, start_date: Optional[str] = None, end_date: Optional[str] = None, max_workers: Optional[int] = None, dry_run: bool = False, ) -> Dict[str, pd.DataFrame]: """同步所有股票的日线数据。 这是日线数据同步的主要入口点。 Args: force_full: 若为 True,强制从 20180101 完整重载 start_date: 手动指定起始日期(YYYYMMDD) end_date: 手动指定结束日期(默认为今天) max_workers: 工作线程数(默认: 10) dry_run: 若为 True,仅预览将要同步的内容,不写入数据 Returns: 映射 ts_code 到 DataFrame 的字典 Example: >>> # 首次同步(从 20180101 全量加载) >>> result = sync_all() >>> >>> # 后续同步(增量 - 仅新数据) >>> result = sync_all() >>> >>> # 强制完整重载 >>> result = sync_all(force_full=True) >>> >>> # 手动指定日期范围 >>> result = sync_all(start_date='20240101', end_date='20240131') >>> >>> # 自定义线程数 >>> result = sync_all(max_workers=20) >>> >>> # Dry run(仅预览) >>> result = sync_all(dry_run=True) """ return sync_daily( force_full=force_full, start_date=start_date, end_date=end_date, max_workers=max_workers, dry_run=dry_run, ) def sync_all_data( force_full: bool = False, max_workers: Optional[int] = None, dry_run: bool = False, ) -> Dict[str, pd.DataFrame]: """同步所有数据类型(每日同步)。 该函数按顺序同步所有可用的数据类型: 1. 交易日历 (sync_trade_cal_cache) 2. 股票基本信息 (sync_all_stocks) 3. Pro Bar 数据 (sync_pro_bar) 4. 历史股票列表 (sync_bak_basic) 注意:名称变更 (namechange) 不在自动同步中,如需同步请手动调用。 Args: force_full: 若为 True,强制所有数据类型完整重载 max_workers: 日线数据同步的工作线程数(默认: 10) dry_run: 若为 True,仅显示将要同步的内容,不写入数据 Returns: 映射数据类型到同步结果的字典 Example: >>> result = sync_all_data() >>> >>> # 强制完整重载 >>> result = sync_all_data(force_full=True) >>> >>> # Dry run >>> result = sync_all_data(dry_run=True) """ results: Dict[str, pd.DataFrame] = {} print("\n" + "=" * 60) print("[sync_all_data] Starting full data synchronization...") print("=" * 60) # 1. Sync trade calendar (always needed first) print("\n[1/4] Syncing trade calendar cache...") try: from src.data.api_wrappers import sync_trade_cal_cache sync_trade_cal_cache() results["trade_cal"] = pd.DataFrame() print("[1/4] Trade calendar: OK") except Exception as e: print(f"[1/4] Trade calendar: FAILED - {e}") results["trade_cal"] = pd.DataFrame() # 2. Sync stock basic info print("\n[2/4] Syncing stock basic info...") try: sync_all_stocks() results["stock_basic"] = pd.DataFrame() print("[2/4] Stock basic: OK") except Exception as e: print(f"[2/4] Stock basic: FAILED - {e}") results["stock_basic"] = pd.DataFrame() # 3. Sync Pro Bar data print("\n[3/4] Syncing Pro Bar data (with adj, tor, vr)...") try: pro_bar_result = sync_pro_bar( force_full=force_full, max_workers=max_workers, dry_run=dry_run, ) results["pro_bar"] = ( pd.concat(pro_bar_result.values(), ignore_index=True) if pro_bar_result else pd.DataFrame() ) print(f"[3/4] Pro Bar data: OK ({len(results['pro_bar'])} records)") except Exception as e: print(f"[3/4] Pro Bar data: FAILED - {e}") results["pro_bar"] = pd.DataFrame() # 4. Sync stock historical list (bak_basic) print("\n[4/4] Syncing stock historical list (bak_basic)...") try: bak_basic_result = sync_bak_basic(force_full=force_full) results["bak_basic"] = bak_basic_result print(f"[4/4] Bak basic: OK ({len(bak_basic_result)} records)") except Exception as e: print(f"[4/4] Bak basic: FAILED - {e}") results["bak_basic"] = pd.DataFrame() # Summary print("\n" + "=" * 60) print("[sync_all_data] Sync Summary") print("=" * 60) for data_type, df in results.items(): print(f" {data_type}: {len(df)} records") print("=" * 60) print("\nNote: namechange is NOT in auto-sync. To sync manually:") print(" from src.data.api_wrappers import sync_namechange") print(" sync_namechange(force=True)") return results if __name__ == "__main__": print("=" * 60) print("Data Sync Module") print("=" * 60) print("\nUsage:") print(" # Sync all data types at once (RECOMMENDED)") print(" from src.data.sync import sync_all_data") print(" result = sync_all_data() # Incremental sync all") print(" result = sync_all_data(force_full=True) # Full reload") print("") print(" # Or sync individual data types:") print(" from src.data.sync import sync_all, preview_sync") print(" from src.data.sync import sync_bak_basic") print("") print(" # Preview before sync (recommended)") print(" preview = preview_sync()") print("") print(" # Dry run (preview only)") print(" result = sync_all(dry_run=True)") print("") print(" # Actual sync") print(" result = sync_all() # Incremental sync") print(" result = sync_all(force_full=True) # Full reload") print("") print(" # bak_basic sync") print(" result = sync_bak_basic() # Incremental sync") print(" result = sync_bak_basic(force_full=True) # Full reload") print("\n" + "=" * 60) # Run sync_all_data by default print("\n[Main] Running sync_all_data()...") result = sync_all_data() print("\n[Main] Sync completed!") print(f"Total data types synced: {len(result)}")