134 lines
4.1 KiB
Python
134 lines
4.1 KiB
Python
|
|
"""Simplified HDF5 storage for data persistence."""
|
||
|
|
import os
|
||
|
|
import pandas as pd
|
||
|
|
from pathlib import Path
|
||
|
|
from typing import Optional
|
||
|
|
from src.data.config import get_config
|
||
|
|
|
||
|
|
|
||
|
|
class Storage:
|
||
|
|
"""HDF5 storage manager for saving and loading data."""
|
||
|
|
|
||
|
|
def __init__(self, path: Optional[Path] = None):
|
||
|
|
"""Initialize storage.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
path: Base path for data storage (auto-loaded from config if not provided)
|
||
|
|
"""
|
||
|
|
cfg = get_config()
|
||
|
|
self.base_path = path or cfg.data_path
|
||
|
|
self.base_path.mkdir(parents=True, exist_ok=True)
|
||
|
|
|
||
|
|
def _get_file_path(self, name: str) -> Path:
|
||
|
|
"""Get full path for an HDF5 file."""
|
||
|
|
return self.base_path / f"{name}.h5"
|
||
|
|
|
||
|
|
def save(self, name: str, data: pd.DataFrame, mode: str = "append") -> dict:
|
||
|
|
"""Save data to HDF5 file.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
name: Dataset name (also used as filename)
|
||
|
|
data: DataFrame to save
|
||
|
|
mode: 'append' or 'replace'
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Dict with save result
|
||
|
|
"""
|
||
|
|
if data.empty:
|
||
|
|
return {"status": "skipped", "rows": 0}
|
||
|
|
|
||
|
|
file_path = self._get_file_path(name)
|
||
|
|
|
||
|
|
try:
|
||
|
|
with pd.HDFStore(file_path, mode="a") as store:
|
||
|
|
if mode == "replace" or name not in store.keys():
|
||
|
|
store.put(name, data, format="table")
|
||
|
|
else:
|
||
|
|
# Merge with existing data
|
||
|
|
existing = store[name]
|
||
|
|
combined = pd.concat([existing, data], ignore_index=True)
|
||
|
|
combined = combined.drop_duplicates(subset=["ts_code", "trade_date"], keep="last")
|
||
|
|
store.put(name, combined, format="table")
|
||
|
|
|
||
|
|
print(f"[Storage] Saved {len(data)} rows to {file_path}")
|
||
|
|
return {"status": "success", "rows": len(data), "path": str(file_path)}
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
print(f"[Storage] Error saving {name}: {e}")
|
||
|
|
return {"status": "error", "error": str(e)}
|
||
|
|
|
||
|
|
def load(self, name: str,
|
||
|
|
start_date: Optional[str] = None,
|
||
|
|
end_date: Optional[str] = None,
|
||
|
|
ts_code: Optional[str] = None) -> pd.DataFrame:
|
||
|
|
"""Load data from HDF5 file.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
name: Dataset name
|
||
|
|
start_date: Start date filter (YYYYMMDD)
|
||
|
|
end_date: End date filter (YYYYMMDD)
|
||
|
|
ts_code: Stock code filter
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
DataFrame with loaded data
|
||
|
|
"""
|
||
|
|
file_path = self._get_file_path(name)
|
||
|
|
|
||
|
|
if not file_path.exists():
|
||
|
|
print(f"[Storage] File not found: {file_path}")
|
||
|
|
return pd.DataFrame()
|
||
|
|
|
||
|
|
try:
|
||
|
|
with pd.HDFStore(file_path, mode="r") as store:
|
||
|
|
if name not in store.keys():
|
||
|
|
return pd.DataFrame()
|
||
|
|
|
||
|
|
data = store[name]
|
||
|
|
|
||
|
|
# Apply filters
|
||
|
|
if start_date and end_date and "trade_date" in data.columns:
|
||
|
|
data = data[(data["trade_date"] >= start_date) & (data["trade_date"] <= end_date)]
|
||
|
|
|
||
|
|
if ts_code and "ts_code" in data.columns:
|
||
|
|
data = data[data["ts_code"] == ts_code]
|
||
|
|
|
||
|
|
return data
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
print(f"[Storage] Error loading {name}: {e}")
|
||
|
|
return pd.DataFrame()
|
||
|
|
|
||
|
|
def get_last_date(self, name: str) -> Optional[str]:
|
||
|
|
"""Get the latest date in storage.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
name: Dataset name
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Latest date string or None
|
||
|
|
"""
|
||
|
|
data = self.load(name)
|
||
|
|
if data.empty or "trade_date" not in data.columns:
|
||
|
|
return None
|
||
|
|
return str(data["trade_date"].max())
|
||
|
|
|
||
|
|
def exists(self, name: str) -> bool:
|
||
|
|
"""Check if dataset exists."""
|
||
|
|
return self._get_file_path(name).exists()
|
||
|
|
|
||
|
|
def delete(self, name: str) -> bool:
|
||
|
|
"""Delete a dataset.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
name: Dataset name
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
True if deleted
|
||
|
|
"""
|
||
|
|
file_path = self._get_file_path(name)
|
||
|
|
if file_path.exists():
|
||
|
|
file_path.unlink()
|
||
|
|
print(f"[Storage] Deleted {file_path}")
|
||
|
|
return True
|
||
|
|
return False
|