refactor: 将表结构定义从 storage 迁移到各 API 文件
- 移除 storage.py 集中式建表逻辑,改为各 API 文件自管理 - base_sync.py 新增 ensure_table_exists() 和表探测机制 - api_daily/api_pro_bar/api_bak_basic 添加 TABLE_SCHEMA 定义 - api_financial_sync 添加完整利润表字段定义 - sync.py 更新职责文档,明确仅同步每日更新数据 - AGENTS.md 添加 v2.1 架构变更历史和 AI 行为准则
This commit is contained in:
141
src/data/sync.py
141
src/data/sync.py
@@ -1,22 +1,34 @@
|
||||
"""数据同步调度中心模块。
|
||||
|
||||
该模块作为数据同步的调度中心,统一管理各类型数据的同步流程。
|
||||
具体的同步逻辑已迁移到对应的 api_xxx.py 文件中:
|
||||
- api_daily.py: 日线数据同步 (DailySync 类)
|
||||
- api_bak_basic.py: 历史股票列表同步 (BakBasicSync 类)
|
||||
- api_pro_bar.py: Pro Bar 数据同步 (ProBarSync 类)
|
||||
- api_stock_basic.py: 股票基本信息同步
|
||||
- api_trade_cal.py: 交易日历同步
|
||||
|
||||
注意:名称变更 (namechange) 已从自动同步中移除,
|
||||
因为股票名称变更不频繁,建议手动定期同步。
|
||||
【重要规范 - sync.py 职责范围】
|
||||
本模块**仅包含每日更新的数据接口**,季度/低频数据不应放入此文件:
|
||||
|
||||
✅ 本模块包含的同步逻辑(每日更新):
|
||||
- api_daily.py: 日线数据同步 (DailySync 类)
|
||||
- api_bak_basic.py: 历史股票列表同步 (BakBasicSync 类)
|
||||
- api_pro_bar.py: Pro Bar 数据同步 (ProBarSync 类)
|
||||
- api_stock_basic.py: 股票基本信息同步
|
||||
- api_trade_cal.py: 交易日历同步
|
||||
|
||||
❌ 不应包含的同步逻辑(季度/低频更新):
|
||||
- financial_data/: 财务数据(利润表、资产负债表、现金流量表等)
|
||||
使用方式:
|
||||
from src.data.api_wrappers.financial_data.api_financial_sync import sync_financial
|
||||
sync_financial()
|
||||
|
||||
- api_namechange.py: 股票名称变更(不频繁)
|
||||
使用方式:
|
||||
from src.data.api_wrappers import sync_namechange
|
||||
sync_namechange(force=True)
|
||||
|
||||
使用方式:
|
||||
# 预览同步(检查数据量,不写入)
|
||||
from src.data.sync import preview_sync
|
||||
preview = preview_sync()
|
||||
|
||||
# 同步所有数据(不包括 namechange)
|
||||
# 同步所有每日更新数据(不包括财务数据、namechange)
|
||||
from src.data.sync import sync_all_data
|
||||
result = sync_all_data()
|
||||
|
||||
@@ -24,7 +36,7 @@
|
||||
result = sync_all_data(force_full=True)
|
||||
"""
|
||||
|
||||
from typing import Optional, Dict
|
||||
from typing import Optional, Dict, Union, Any
|
||||
|
||||
import pandas as pd
|
||||
|
||||
@@ -40,7 +52,7 @@ def preview_sync(
|
||||
end_date: Optional[str] = None,
|
||||
sample_size: int = 3,
|
||||
max_workers: Optional[int] = None,
|
||||
) -> dict:
|
||||
) -> dict[str, Any]:
|
||||
"""预览日线同步数据量和样本(不实际同步)。
|
||||
|
||||
这是推荐的方式,可在实际同步前检查将要同步的内容。
|
||||
@@ -88,7 +100,7 @@ def sync_all(
|
||||
end_date: Optional[str] = None,
|
||||
max_workers: Optional[int] = None,
|
||||
dry_run: bool = False,
|
||||
) -> Dict[str, pd.DataFrame]:
|
||||
) -> dict[str, pd.DataFrame]:
|
||||
"""同步所有股票的日线数据。
|
||||
|
||||
这是日线数据同步的主要入口点。
|
||||
@@ -135,16 +147,26 @@ def sync_all_data(
|
||||
force_full: bool = False,
|
||||
max_workers: Optional[int] = None,
|
||||
dry_run: bool = False,
|
||||
) -> Dict[str, pd.DataFrame]:
|
||||
"""同步所有数据类型(每日同步)。
|
||||
) -> dict[str, Any]:
|
||||
"""同步所有每日更新的数据类型。
|
||||
|
||||
该函数按顺序同步所有可用的数据类型:
|
||||
【重要】本函数仅同步每日更新的数据,不包含季度/低频数据。
|
||||
|
||||
该函数按顺序同步以下每日更新的数据类型:
|
||||
1. 交易日历 (sync_trade_cal_cache)
|
||||
2. 股票基本信息 (sync_all_stocks)
|
||||
3. Pro Bar 数据 (sync_pro_bar)
|
||||
4. 历史股票列表 (sync_bak_basic)
|
||||
3. 日线数据 (sync_daily)
|
||||
4. Pro Bar 数据 (sync_pro_bar)
|
||||
5. 历史股票列表 (sync_bak_basic)
|
||||
|
||||
注意:名称变更 (namechange) 不在自动同步中,如需同步请手动调用。
|
||||
【不包含的同步(需单独调用)】
|
||||
- 财务数据: 利润表、资产负债表、现金流量表(季度更新)
|
||||
使用: from src.data.api_wrappers.financial_data.api_financial_sync import sync_financial
|
||||
调用: sync_financial()
|
||||
|
||||
- 名称变更 (namechange): 股票曾用名(低频更新)
|
||||
使用: from src.data.api_wrappers import sync_namechange
|
||||
调用: sync_namechange(force=True)
|
||||
|
||||
Args:
|
||||
force_full: 若为 True,强制所有数据类型完整重载
|
||||
@@ -163,68 +185,109 @@ def sync_all_data(
|
||||
>>> # Dry run
|
||||
>>> result = sync_all_data(dry_run=True)
|
||||
"""
|
||||
results: Dict[str, pd.DataFrame] = {}
|
||||
results: dict[str, Any] = {}
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("[sync_all_data] Starting full data synchronization...")
|
||||
print("=" * 60)
|
||||
|
||||
# 1. Sync trade calendar (always needed first)
|
||||
print("\n[1/4] Syncing trade calendar cache...")
|
||||
print("\n[1/5] Syncing trade calendar cache...")
|
||||
try:
|
||||
from src.data.api_wrappers import sync_trade_cal_cache
|
||||
|
||||
sync_trade_cal_cache()
|
||||
results["trade_cal"] = pd.DataFrame()
|
||||
print("[1/4] Trade calendar: OK")
|
||||
print("[1/5] Trade calendar: OK")
|
||||
except Exception as e:
|
||||
print(f"[1/4] Trade calendar: FAILED - {e}")
|
||||
print(f"[1/5] Trade calendar: FAILED - {e}")
|
||||
results["trade_cal"] = pd.DataFrame()
|
||||
|
||||
# 2. Sync stock basic info
|
||||
print("\n[2/4] Syncing stock basic info...")
|
||||
print("\n[2/5] Syncing stock basic info...")
|
||||
try:
|
||||
sync_all_stocks()
|
||||
results["stock_basic"] = pd.DataFrame()
|
||||
print("[2/4] Stock basic: OK")
|
||||
print("[2/5] Stock basic: OK")
|
||||
except Exception as e:
|
||||
print(f"[2/4] Stock basic: FAILED - {e}")
|
||||
print(f"[2/5] Stock basic: FAILED - {e}")
|
||||
results["stock_basic"] = pd.DataFrame()
|
||||
|
||||
# 3. Sync Pro Bar data
|
||||
print("\n[3/4] Syncing Pro Bar data (with adj, tor, vr)...")
|
||||
# 3. Sync daily market data
|
||||
print("\n[3/5] Syncing daily market data...")
|
||||
try:
|
||||
# 确保表存在
|
||||
from src.data.api_wrappers.api_daily import DailySync
|
||||
|
||||
DailySync().ensure_table_exists()
|
||||
|
||||
daily_result = sync_daily(
|
||||
force_full=force_full,
|
||||
max_workers=max_workers,
|
||||
dry_run=dry_run,
|
||||
)
|
||||
results["daily"] = daily_result
|
||||
total_daily_records = (
|
||||
sum(len(df) for df in daily_result.values()) if daily_result else 0
|
||||
)
|
||||
print(
|
||||
f"[3/5] Daily data: OK ({total_daily_records} records from {len(daily_result)} stocks)"
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"[3/5] Daily data: FAILED - {e}")
|
||||
results["daily"] = pd.DataFrame()
|
||||
|
||||
# 4. Sync Pro Bar data
|
||||
print("\n[4/5] Syncing Pro Bar data (with adj, tor, vr)...")
|
||||
try:
|
||||
# 确保表存在
|
||||
from src.data.api_wrappers.api_pro_bar import ProBarSync
|
||||
|
||||
ProBarSync().ensure_table_exists()
|
||||
|
||||
pro_bar_result = sync_pro_bar(
|
||||
force_full=force_full,
|
||||
max_workers=max_workers,
|
||||
dry_run=dry_run,
|
||||
)
|
||||
results["pro_bar"] = (
|
||||
pd.concat(pro_bar_result.values(), ignore_index=True)
|
||||
if pro_bar_result
|
||||
else pd.DataFrame()
|
||||
results["pro_bar"] = pro_bar_result
|
||||
total_pro_bar_records = (
|
||||
sum(len(df) for df in pro_bar_result.values()) if pro_bar_result else 0
|
||||
)
|
||||
print(
|
||||
f"[4/5] Pro Bar data: OK ({total_pro_bar_records} records from {len(pro_bar_result)} stocks)"
|
||||
)
|
||||
print(f"[3/4] Pro Bar data: OK ({len(results['pro_bar'])} records)")
|
||||
except Exception as e:
|
||||
print(f"[3/4] Pro Bar data: FAILED - {e}")
|
||||
print(f"[4/5] Pro Bar data: FAILED - {e}")
|
||||
results["pro_bar"] = pd.DataFrame()
|
||||
|
||||
# 4. Sync stock historical list (bak_basic)
|
||||
print("\n[4/4] Syncing stock historical list (bak_basic)...")
|
||||
# 5. Sync stock historical list (bak_basic)
|
||||
print("\n[5/5] Syncing stock historical list (bak_basic)...")
|
||||
try:
|
||||
# 确保表存在
|
||||
from src.data.api_wrappers.api_bak_basic import BakBasicSync
|
||||
|
||||
BakBasicSync().ensure_table_exists()
|
||||
|
||||
bak_basic_result = sync_bak_basic(force_full=force_full)
|
||||
results["bak_basic"] = bak_basic_result
|
||||
print(f"[4/4] Bak basic: OK ({len(bak_basic_result)} records)")
|
||||
print(f"[5/5] Bak basic: OK ({len(bak_basic_result)} records)")
|
||||
except Exception as e:
|
||||
print(f"[4/4] Bak basic: FAILED - {e}")
|
||||
print(f"[5/5] Bak basic: FAILED - {e}")
|
||||
results["bak_basic"] = pd.DataFrame()
|
||||
|
||||
# Summary
|
||||
print("\n" + "=" * 60)
|
||||
print("[sync_all_data] Sync Summary")
|
||||
print("=" * 60)
|
||||
for data_type, df in results.items():
|
||||
print(f" {data_type}: {len(df)} records")
|
||||
for data_type, data in results.items():
|
||||
if isinstance(data, dict):
|
||||
# 日线和 Pro Bar 返回的是 dict[str, DataFrame]
|
||||
total_records = sum(len(df) for df in data.values())
|
||||
print(f" {data_type}: {len(data)} stocks, {total_records} total records")
|
||||
else:
|
||||
# bak_basic 返回的是 DataFrame
|
||||
print(f" {data_type}: {len(data)} records")
|
||||
print("=" * 60)
|
||||
print("\nNote: namechange is NOT in auto-sync. To sync manually:")
|
||||
print(" from src.data.api_wrappers import sync_namechange")
|
||||
|
||||
Reference in New Issue
Block a user