- 将独立 API 模块 (daily, stock_basic, trade_cal) 整合至 api_wrappers/ - 重写 sync.py 使用新的 wrapper 结构,支持更多同步功能 - 更新测试文件适配新的模块结构 - 添加 pytest.ini 配置文件
257 lines
8.9 KiB
Python
257 lines
8.9 KiB
Python
"""Tests for data sync with REAL data (read-only).
|
|
|
|
Tests verify:
|
|
1. get_global_last_date() correctly reads local data's max date
|
|
2. Incremental sync date calculation (local_last_date + 1)
|
|
3. Full sync date calculation (20180101)
|
|
4. Multi-stock scenario with real data
|
|
|
|
⚠️ IMPORTANT: These tests ONLY read data, no write operations.
|
|
- NO sync_all() calls (writes daily.h5)
|
|
- NO check_sync_needed() calls (writes trade_cal.h5)
|
|
"""
|
|
|
|
import pytest
|
|
import pandas as pd
|
|
from pathlib import Path
|
|
|
|
from src.data.sync import (
|
|
DataSync,
|
|
get_next_date,
|
|
DEFAULT_START_DATE,
|
|
)
|
|
from src.data.storage import Storage
|
|
|
|
|
|
class TestDataSyncReadOnly:
|
|
"""Read-only tests for data sync - verify date calculation logic."""
|
|
|
|
@pytest.fixture
|
|
def storage(self):
|
|
"""Create storage instance."""
|
|
return Storage()
|
|
|
|
@pytest.fixture
|
|
def data_sync(self):
|
|
"""Create DataSync instance."""
|
|
return DataSync()
|
|
|
|
@pytest.fixture
|
|
def daily_exists(self, storage):
|
|
"""Check if daily.h5 exists."""
|
|
return storage.exists("daily")
|
|
|
|
def test_daily_h5_exists(self, storage):
|
|
"""Verify daily.h5 data file exists before running tests."""
|
|
assert storage.exists("daily"), (
|
|
"daily.h5 not found. Please run full sync first: "
|
|
"uv run python -c 'from src.data.sync import sync_all; sync_all(force_full=True)'"
|
|
)
|
|
|
|
def test_get_global_last_date(self, data_sync, daily_exists):
|
|
"""Test get_global_last_date returns correct max date from local data."""
|
|
if not daily_exists:
|
|
pytest.skip("daily.h5 not found")
|
|
|
|
last_date = data_sync.get_global_last_date()
|
|
|
|
# Verify it's a valid date string
|
|
assert last_date is not None, "get_global_last_date returned None"
|
|
assert isinstance(last_date, str), f"Expected str, got {type(last_date)}"
|
|
assert len(last_date) == 8, f"Expected 8-digit date, got {last_date}"
|
|
assert last_date.isdigit(), f"Expected numeric date, got {last_date}"
|
|
|
|
# Verify by reading storage directly
|
|
daily_data = data_sync.storage.load("daily")
|
|
expected_max = str(daily_data["trade_date"].max())
|
|
|
|
assert last_date == expected_max, (
|
|
f"get_global_last_date returned {last_date}, "
|
|
f"but actual max date is {expected_max}"
|
|
)
|
|
|
|
print(f"[TEST] Local data last date: {last_date}")
|
|
|
|
def test_incremental_sync_date_calculation(self, data_sync, daily_exists):
|
|
"""Test incremental sync: start_date = local_last_date + 1.
|
|
|
|
This verifies that when local data exists, incremental sync should
|
|
fetch data from (local_last_date + 1), not from 20180101.
|
|
"""
|
|
if not daily_exists:
|
|
pytest.skip("daily.h5 not found")
|
|
|
|
# Get local last date
|
|
local_last_date = data_sync.get_global_last_date()
|
|
assert local_last_date is not None, "No local data found"
|
|
|
|
# Calculate expected incremental start date
|
|
expected_start_date = get_next_date(local_last_date)
|
|
|
|
# Verify the calculation is correct
|
|
local_last_int = int(local_last_date)
|
|
expected_int = local_last_int + 1
|
|
actual_int = int(expected_start_date)
|
|
|
|
assert actual_int == expected_int, (
|
|
f"Incremental start date calculation error: "
|
|
f"expected {expected_int}, got {actual_int}"
|
|
)
|
|
|
|
print(
|
|
f"[TEST] Incremental sync: local_last={local_last_date}, "
|
|
f"start_date should be {expected_start_date}"
|
|
)
|
|
|
|
# Verify this is NOT 20180101 (would be full sync)
|
|
assert expected_start_date != DEFAULT_START_DATE, (
|
|
f"Incremental sync should NOT start from {DEFAULT_START_DATE}"
|
|
)
|
|
|
|
def test_full_sync_date_calculation(self):
|
|
"""Test full sync: start_date = 20180101 when force_full=True.
|
|
|
|
This verifies that force_full=True always starts from 20180101.
|
|
"""
|
|
# Full sync should always use DEFAULT_START_DATE
|
|
full_sync_start = DEFAULT_START_DATE
|
|
|
|
assert full_sync_start == "20180101", (
|
|
f"Full sync should start from 20180101, got {full_sync_start}"
|
|
)
|
|
|
|
print(f"[TEST] Full sync start date: {full_sync_start}")
|
|
|
|
def test_date_comparison_logic(self, data_sync, daily_exists):
|
|
"""Test date comparison: incremental vs full sync selection logic.
|
|
|
|
Verify that:
|
|
- If local_last_date < today: incremental sync needed
|
|
- If local_last_date >= today: no sync needed
|
|
"""
|
|
if not daily_exists:
|
|
pytest.skip("daily.h5 not found")
|
|
|
|
from datetime import datetime
|
|
|
|
local_last_date = data_sync.get_global_last_date()
|
|
today = datetime.now().strftime("%Y%m%d")
|
|
|
|
local_last_int = int(local_last_date)
|
|
today_int = int(today)
|
|
|
|
# Log the comparison
|
|
print(
|
|
f"[TEST] Date comparison: local_last={local_last_date} ({local_last_int}), "
|
|
f"today={today} ({today_int})"
|
|
)
|
|
|
|
# This test just verifies the comparison logic works
|
|
if local_last_int < today_int:
|
|
print("[TEST] Local data is older than today - sync needed")
|
|
# Incremental sync should fetch from local_last_date + 1
|
|
sync_start = get_next_date(local_last_date)
|
|
assert int(sync_start) > local_last_int, (
|
|
"Sync start should be after local last"
|
|
)
|
|
else:
|
|
print("[TEST] Local data is up-to-date - no sync needed")
|
|
|
|
def test_get_all_stock_codes_real_data(self, data_sync, daily_exists):
|
|
"""Test get_all_stock_codes returns multiple real stock codes."""
|
|
if not daily_exists:
|
|
pytest.skip("daily.h5 not found")
|
|
|
|
codes = data_sync.get_all_stock_codes()
|
|
|
|
# Verify it's a list
|
|
assert isinstance(codes, list), f"Expected list, got {type(codes)}"
|
|
assert len(codes) > 0, "No stock codes found"
|
|
|
|
# Verify multiple stocks
|
|
assert len(codes) >= 10, (
|
|
f"Expected at least 10 stocks for multi-stock test, got {len(codes)}"
|
|
)
|
|
|
|
# Verify format (should be like 000001.SZ, 600000.SH)
|
|
sample_codes = codes[:5]
|
|
for code in sample_codes:
|
|
assert "." in code, f"Invalid stock code format: {code}"
|
|
suffix = code.split(".")[-1]
|
|
assert suffix in ["SZ", "SH"], f"Invalid exchange suffix: {suffix}"
|
|
|
|
print(f"[TEST] Found {len(codes)} stock codes (sample: {sample_codes})")
|
|
|
|
def test_multi_stock_date_range(self, data_sync, daily_exists):
|
|
"""Test that multiple stocks share the same date range in local data.
|
|
|
|
This verifies that local data has consistent date coverage across stocks.
|
|
"""
|
|
if not daily_exists:
|
|
pytest.skip("daily.h5 not found")
|
|
|
|
daily_data = data_sync.storage.load("daily")
|
|
|
|
# Get date range for each stock
|
|
stock_dates = daily_data.groupby("ts_code")["trade_date"].agg(["min", "max"])
|
|
|
|
# Get global min and max
|
|
global_min = str(daily_data["trade_date"].min())
|
|
global_max = str(daily_data["trade_date"].max())
|
|
|
|
print(f"[TEST] Global date range: {global_min} to {global_max}")
|
|
print(f"[TEST] Total stocks: {len(stock_dates)}")
|
|
|
|
# Verify we have data for multiple stocks
|
|
assert len(stock_dates) >= 10, (
|
|
f"Expected at least 10 stocks, got {len(stock_dates)}"
|
|
)
|
|
|
|
# Verify date range is reasonable (at least 1 year of data)
|
|
global_min_int = int(global_min)
|
|
global_max_int = int(global_max)
|
|
days_span = global_max_int - global_min_int
|
|
|
|
assert days_span > 100, (
|
|
f"Date range too small: {days_span} days. "
|
|
f"Expected at least 100 days of data."
|
|
)
|
|
|
|
print(f"[TEST] Date span: {days_span} days")
|
|
|
|
|
|
class TestDateUtilities:
|
|
"""Test date utility functions."""
|
|
|
|
def test_get_next_date(self):
|
|
"""Test get_next_date correctly calculates next day."""
|
|
# Test normal cases
|
|
assert get_next_date("20240101") == "20240102"
|
|
assert get_next_date("20240131") == "20240201" # Month boundary
|
|
assert get_next_date("20241231") == "20250101" # Year boundary
|
|
|
|
def test_incremental_vs_full_sync_logic(self):
|
|
"""Test the logic difference between incremental and full sync.
|
|
|
|
Incremental: start_date = local_last_date + 1
|
|
Full: start_date = 20180101
|
|
"""
|
|
# Scenario 1: Local data exists
|
|
local_last_date = "20240115"
|
|
incremental_start = get_next_date(local_last_date)
|
|
|
|
assert incremental_start == "20240116"
|
|
assert incremental_start != DEFAULT_START_DATE
|
|
|
|
# Scenario 2: Force full sync
|
|
full_sync_start = DEFAULT_START_DATE # "20180101"
|
|
|
|
assert full_sync_start == "20180101"
|
|
assert incremental_start != full_sync_start
|
|
|
|
print("[TEST] Incremental vs Full sync logic verified")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pytest.main([__file__, "-v", "-s"])
|