feat: HDF5迁移至DuckDB存储
- 新增DuckDB Storage与ThreadSafeStorage实现 - 新增db_manager模块支持增量同步策略 - DataLoader与Sync模块适配DuckDB - 补充迁移相关文档与测试 - 修复README文档链接
This commit is contained in:
@@ -10,6 +10,10 @@ from pathlib import Path
|
||||
from src.data.client import TushareClient
|
||||
from src.data.config import get_config
|
||||
|
||||
# Module-level flag to track if cache has been synced in this session
|
||||
_cache_synced = False
|
||||
|
||||
|
||||
|
||||
# Trading calendar cache file path
|
||||
def _get_cache_path() -> Path:
|
||||
@@ -51,8 +55,9 @@ def _load_from_cache() -> pd.DataFrame:
|
||||
|
||||
try:
|
||||
with pd.HDFStore(cache_path, mode="r") as store:
|
||||
if "trade_cal" in store.keys():
|
||||
data = store["trade_cal"]
|
||||
# HDF5 keys include leading slash (e.g., '/trade_cal')
|
||||
if "/trade_cal" in store.keys():
|
||||
data = store["/trade_cal"]
|
||||
print(f"[trade_cal] Loaded {len(data)} records from cache")
|
||||
return data
|
||||
except Exception as e:
|
||||
@@ -77,6 +82,7 @@ def _get_cached_date_range() -> tuple[Optional[str], Optional[str]]:
|
||||
def sync_trade_cal_cache(
|
||||
start_date: str = "20180101",
|
||||
end_date: Optional[str] = None,
|
||||
force: bool = False,
|
||||
) -> pd.DataFrame:
|
||||
"""Sync trade calendar data to local cache with incremental updates.
|
||||
|
||||
@@ -86,10 +92,17 @@ def sync_trade_cal_cache(
|
||||
Args:
|
||||
start_date: Initial start date for full sync (default: 20180101)
|
||||
end_date: End date (defaults to today)
|
||||
force: If True, force sync even if already synced in this session
|
||||
|
||||
Returns:
|
||||
Full trade calendar DataFrame (cached + new)
|
||||
"""
|
||||
global _cache_synced
|
||||
|
||||
# Skip if already synced in this session (unless forced)
|
||||
if _cache_synced and not force:
|
||||
return _load_from_cache()
|
||||
|
||||
if end_date is None:
|
||||
from datetime import datetime
|
||||
|
||||
@@ -137,6 +150,8 @@ def sync_trade_cal_cache(
|
||||
combined = new_data
|
||||
|
||||
# Save combined data to cache
|
||||
# Mark as synced to avoid redundant syncs in this session
|
||||
_cache_synced = True
|
||||
_save_to_cache(combined)
|
||||
return combined
|
||||
else:
|
||||
@@ -153,6 +168,8 @@ def sync_trade_cal_cache(
|
||||
print("[trade_cal] No data returned")
|
||||
return data
|
||||
|
||||
# Mark as synced to avoid redundant syncs in this session
|
||||
_cache_synced = True
|
||||
_save_to_cache(data)
|
||||
return data
|
||||
|
||||
|
||||
Reference in New Issue
Block a user