Files
ProStock/src/data/storage.py

134 lines
4.2 KiB
Python
Raw Normal View History

"""Simplified HDF5 storage for data persistence."""
import os
import pandas as pd
from pathlib import Path
from typing import Optional
from src.data.config import get_config
class Storage:
"""HDF5 storage manager for saving and loading data."""
def __init__(self, path: Optional[Path] = None):
"""Initialize storage.
Args:
path: Base path for data storage (auto-loaded from config if not provided)
"""
cfg = get_config()
self.base_path = path or cfg.data_path_resolved
self.base_path.mkdir(parents=True, exist_ok=True)
def _get_file_path(self, name: str) -> Path:
"""Get full path for an HDF5 file."""
return self.base_path / f"{name}.h5"
def save(self, name: str, data: pd.DataFrame, mode: str = "append") -> dict:
"""Save data to HDF5 file.
Args:
name: Dataset name (also used as filename)
data: DataFrame to save
mode: 'append' or 'replace'
Returns:
Dict with save result
"""
if data.empty:
return {"status": "skipped", "rows": 0}
file_path = self._get_file_path(name)
try:
with pd.HDFStore(file_path, mode="a") as store:
if mode == "replace" or name not in store.keys():
store.put(name, data, format="table")
else:
# Merge with existing data
existing = store[name]
combined = pd.concat([existing, data], ignore_index=True)
combined = combined.drop_duplicates(subset=["ts_code", "trade_date"], keep="last")
store.put(name, combined, format="table")
print(f"[Storage] Saved {len(data)} rows to {file_path}")
return {"status": "success", "rows": len(data), "path": str(file_path)}
except Exception as e:
print(f"[Storage] Error saving {name}: {e}")
return {"status": "error", "error": str(e)}
def load(self, name: str,
start_date: Optional[str] = None,
end_date: Optional[str] = None,
ts_code: Optional[str] = None) -> pd.DataFrame:
"""Load data from HDF5 file.
Args:
name: Dataset name
start_date: Start date filter (YYYYMMDD)
end_date: End date filter (YYYYMMDD)
ts_code: Stock code filter
Returns:
DataFrame with loaded data
"""
file_path = self._get_file_path(name)
if not file_path.exists():
print(f"[Storage] File not found: {file_path}")
return pd.DataFrame()
try:
with pd.HDFStore(file_path, mode="r") as store:
if name not in store.keys():
return pd.DataFrame()
data = store[name]
# Apply filters
if start_date and end_date and "trade_date" in data.columns:
data = data[(data["trade_date"] >= start_date) & (data["trade_date"] <= end_date)]
if ts_code and "ts_code" in data.columns:
data = data[data["ts_code"] == ts_code]
return data
except Exception as e:
print(f"[Storage] Error loading {name}: {e}")
return pd.DataFrame()
def get_last_date(self, name: str) -> Optional[str]:
"""Get the latest date in storage.
Args:
name: Dataset name
Returns:
Latest date string or None
"""
data = self.load(name)
if data.empty or "trade_date" not in data.columns:
return None
return str(data["trade_date"].max())
def exists(self, name: str) -> bool:
"""Check if dataset exists."""
return self._get_file_path(name).exists()
def delete(self, name: str) -> bool:
"""Delete a dataset.
Args:
name: Dataset name
Returns:
True if deleted
"""
file_path = self._get_file_path(name)
if file_path.exists():
file_path.unlink()
print(f"[Storage] Deleted {file_path}")
return True
return False