feat: HDF5迁移至DuckDB存储

- 新增DuckDB Storage与ThreadSafeStorage实现
- 新增db_manager模块支持增量同步策略
- DataLoader与Sync模块适配DuckDB
- 补充迁移相关文档与测试
- 修复README文档链接
This commit is contained in:
2026-02-23 00:07:21 +08:00
parent 0a16129548
commit e58b39970c
14 changed files with 2265 additions and 329 deletions

View File

@@ -18,10 +18,26 @@ from src.data.sync import (
get_next_date,
DEFAULT_START_DATE,
)
from src.data.storage import Storage
from src.data.storage import ThreadSafeStorage
from src.data.client import TushareClient
@pytest.fixture
def mock_storage():
"""Create a mock storage instance."""
storage = Mock(spec=ThreadSafeStorage)
storage.exists = Mock(return_value=False)
storage.load = Mock(return_value=pd.DataFrame())
storage.save = Mock(return_value={"status": "success", "rows": 0})
return storage
@pytest.fixture
def mock_client():
"""Create a mock client instance."""
return Mock(spec=TushareClient)
class TestDateUtilities:
"""Test date utility functions."""
@@ -50,23 +66,9 @@ class TestDateUtilities:
class TestDataSync:
"""Test DataSync class functionality."""
@pytest.fixture
def mock_storage(self):
"""Create a mock storage instance."""
storage = Mock(spec=Storage)
storage.exists = Mock(return_value=False)
storage.load = Mock(return_value=pd.DataFrame())
storage.save = Mock(return_value={"status": "success", "rows": 0})
return storage
@pytest.fixture
def mock_client(self):
"""Create a mock client instance."""
return Mock(spec=TushareClient)
def test_get_all_stock_codes_from_daily(self, mock_storage):
"""Test getting stock codes from daily data."""
with patch("src.data.sync.Storage", return_value=mock_storage):
with patch("src.data.sync.ThreadSafeStorage", return_value=mock_storage):
sync = DataSync()
sync.storage = mock_storage
@@ -84,7 +86,7 @@ class TestDataSync:
def test_get_all_stock_codes_fallback(self, mock_storage):
"""Test fallback to stock_basic when daily is empty."""
with patch("src.data.sync.Storage", return_value=mock_storage):
with patch("src.data.sync.ThreadSafeStorage", return_value=mock_storage):
sync = DataSync()
sync.storage = mock_storage
@@ -100,7 +102,7 @@ class TestDataSync:
def test_get_global_last_date(self, mock_storage):
"""Test getting global last date."""
with patch("src.data.sync.Storage", return_value=mock_storage):
with patch("src.data.sync.ThreadSafeStorage", return_value=mock_storage):
sync = DataSync()
sync.storage = mock_storage
@@ -116,7 +118,7 @@ class TestDataSync:
def test_get_global_last_date_empty(self, mock_storage):
"""Test getting last date from empty storage."""
with patch("src.data.sync.Storage", return_value=mock_storage):
with patch("src.data.sync.ThreadSafeStorage", return_value=mock_storage):
sync = DataSync()
sync.storage = mock_storage
@@ -127,7 +129,7 @@ class TestDataSync:
def test_sync_single_stock(self, mock_storage):
"""Test syncing a single stock."""
with patch("src.data.sync.Storage", return_value=mock_storage):
with patch("src.data.sync.ThreadSafeStorage", return_value=mock_storage):
with patch(
"src.data.sync.get_daily",
return_value=pd.DataFrame(
@@ -151,7 +153,7 @@ class TestDataSync:
def test_sync_single_stock_empty(self, mock_storage):
"""Test syncing a stock with no data."""
with patch("src.data.sync.Storage", return_value=mock_storage):
with patch("src.data.sync.ThreadSafeStorage", return_value=mock_storage):
with patch("src.data.sync.get_daily", return_value=pd.DataFrame()):
sync = DataSync()
sync.storage = mock_storage
@@ -170,7 +172,7 @@ class TestSyncAll:
def test_full_sync_mode(self, mock_storage):
"""Test full sync mode when force_full=True."""
with patch("src.data.sync.Storage", return_value=mock_storage):
with patch("src.data.sync.ThreadSafeStorage", return_value=mock_storage):
with patch("src.data.sync.get_daily", return_value=pd.DataFrame()):
sync = DataSync()
sync.storage = mock_storage
@@ -191,7 +193,7 @@ class TestSyncAll:
def test_incremental_sync_mode(self, mock_storage):
"""Test incremental sync mode when data exists."""
with patch("src.data.sync.Storage", return_value=mock_storage):
with patch("src.data.sync.ThreadSafeStorage", return_value=mock_storage):
sync = DataSync()
sync.storage = mock_storage
sync.sync_single_stock = Mock(return_value=pd.DataFrame())
@@ -221,7 +223,7 @@ class TestSyncAll:
def test_manual_start_date(self, mock_storage):
"""Test sync with manual start date."""
with patch("src.data.sync.Storage", return_value=mock_storage):
with patch("src.data.sync.ThreadSafeStorage", return_value=mock_storage):
sync = DataSync()
sync.storage = mock_storage
sync.sync_single_stock = Mock(return_value=pd.DataFrame())
@@ -240,7 +242,7 @@ class TestSyncAll:
def test_no_stocks_found(self, mock_storage):
"""Test sync when no stocks are found."""
with patch("src.data.sync.Storage", return_value=mock_storage):
with patch("src.data.sync.ThreadSafeStorage", return_value=mock_storage):
sync = DataSync()
sync.storage = mock_storage
@@ -268,6 +270,7 @@ class TestSyncAllConvenienceFunction:
force_full=True,
start_date=None,
end_date=None,
dry_run=False,
)