feat: HDF5迁移至DuckDB存储
- 新增DuckDB Storage与ThreadSafeStorage实现 - 新增db_manager模块支持增量同步策略 - DataLoader与Sync模块适配DuckDB - 补充迁移相关文档与测试 - 修复README文档链接
This commit is contained in:
@@ -36,7 +36,7 @@ import threading
|
||||
import sys
|
||||
|
||||
from src.data.client import TushareClient
|
||||
from src.data.storage import Storage
|
||||
from src.data.storage import ThreadSafeStorage
|
||||
from src.data.api_wrappers import get_daily
|
||||
from src.data.api_wrappers import (
|
||||
get_first_trading_day,
|
||||
@@ -83,7 +83,7 @@ class DataSync:
|
||||
Args:
|
||||
max_workers: Number of worker threads (default: 10)
|
||||
"""
|
||||
self.storage = Storage()
|
||||
self.storage = ThreadSafeStorage()
|
||||
self.client = TushareClient()
|
||||
self.max_workers = max_workers or self.DEFAULT_MAX_WORKERS
|
||||
self._stop_flag = threading.Event()
|
||||
@@ -667,11 +667,15 @@ class DataSync:
|
||||
finally:
|
||||
pbar.close()
|
||||
|
||||
# Write all data at once (only if no error)
|
||||
# Queue all data for batch write (only if no error)
|
||||
if results and not error_occurred:
|
||||
combined_data = pd.concat(results.values(), ignore_index=True)
|
||||
self.storage.save("daily", combined_data, mode="append")
|
||||
print(f"\n[DataSync] Saved {len(combined_data)} rows to storage")
|
||||
for ts_code, data in results.items():
|
||||
if not data.empty:
|
||||
self.storage.queue_save("daily", data)
|
||||
# Flush all queued writes at once
|
||||
self.storage.flush()
|
||||
total_rows = sum(len(df) for df in results.values())
|
||||
print(f"\n[DataSync] Saved {total_rows} rows to storage")
|
||||
|
||||
# Summary
|
||||
print("\n" + "=" * 60)
|
||||
|
||||
Reference in New Issue
Block a user