feat: 初始化 ProStock 项目基础结构和配置
- 添加项目规则文档(开发规范、安全规则、配置管理) - 实现数据模块核心功能(API 客户端、限流器、存储管理、配置加载) - 添加 .gitignore 和 .kilocodeignore 配置 - 配置环境变量模板 - 编写 daily 模块单元测试
This commit is contained in:
133
src/data/storage.py
Normal file
133
src/data/storage.py
Normal file
@@ -0,0 +1,133 @@
|
||||
"""Simplified HDF5 storage for data persistence."""
|
||||
import os
|
||||
import pandas as pd
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from src.data.config import get_config
|
||||
|
||||
|
||||
class Storage:
|
||||
"""HDF5 storage manager for saving and loading data."""
|
||||
|
||||
def __init__(self, path: Optional[Path] = None):
|
||||
"""Initialize storage.
|
||||
|
||||
Args:
|
||||
path: Base path for data storage (auto-loaded from config if not provided)
|
||||
"""
|
||||
cfg = get_config()
|
||||
self.base_path = path or cfg.data_path
|
||||
self.base_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def _get_file_path(self, name: str) -> Path:
|
||||
"""Get full path for an HDF5 file."""
|
||||
return self.base_path / f"{name}.h5"
|
||||
|
||||
def save(self, name: str, data: pd.DataFrame, mode: str = "append") -> dict:
|
||||
"""Save data to HDF5 file.
|
||||
|
||||
Args:
|
||||
name: Dataset name (also used as filename)
|
||||
data: DataFrame to save
|
||||
mode: 'append' or 'replace'
|
||||
|
||||
Returns:
|
||||
Dict with save result
|
||||
"""
|
||||
if data.empty:
|
||||
return {"status": "skipped", "rows": 0}
|
||||
|
||||
file_path = self._get_file_path(name)
|
||||
|
||||
try:
|
||||
with pd.HDFStore(file_path, mode="a") as store:
|
||||
if mode == "replace" or name not in store.keys():
|
||||
store.put(name, data, format="table")
|
||||
else:
|
||||
# Merge with existing data
|
||||
existing = store[name]
|
||||
combined = pd.concat([existing, data], ignore_index=True)
|
||||
combined = combined.drop_duplicates(subset=["ts_code", "trade_date"], keep="last")
|
||||
store.put(name, combined, format="table")
|
||||
|
||||
print(f"[Storage] Saved {len(data)} rows to {file_path}")
|
||||
return {"status": "success", "rows": len(data), "path": str(file_path)}
|
||||
|
||||
except Exception as e:
|
||||
print(f"[Storage] Error saving {name}: {e}")
|
||||
return {"status": "error", "error": str(e)}
|
||||
|
||||
def load(self, name: str,
|
||||
start_date: Optional[str] = None,
|
||||
end_date: Optional[str] = None,
|
||||
ts_code: Optional[str] = None) -> pd.DataFrame:
|
||||
"""Load data from HDF5 file.
|
||||
|
||||
Args:
|
||||
name: Dataset name
|
||||
start_date: Start date filter (YYYYMMDD)
|
||||
end_date: End date filter (YYYYMMDD)
|
||||
ts_code: Stock code filter
|
||||
|
||||
Returns:
|
||||
DataFrame with loaded data
|
||||
"""
|
||||
file_path = self._get_file_path(name)
|
||||
|
||||
if not file_path.exists():
|
||||
print(f"[Storage] File not found: {file_path}")
|
||||
return pd.DataFrame()
|
||||
|
||||
try:
|
||||
with pd.HDFStore(file_path, mode="r") as store:
|
||||
if name not in store.keys():
|
||||
return pd.DataFrame()
|
||||
|
||||
data = store[name]
|
||||
|
||||
# Apply filters
|
||||
if start_date and end_date and "trade_date" in data.columns:
|
||||
data = data[(data["trade_date"] >= start_date) & (data["trade_date"] <= end_date)]
|
||||
|
||||
if ts_code and "ts_code" in data.columns:
|
||||
data = data[data["ts_code"] == ts_code]
|
||||
|
||||
return data
|
||||
|
||||
except Exception as e:
|
||||
print(f"[Storage] Error loading {name}: {e}")
|
||||
return pd.DataFrame()
|
||||
|
||||
def get_last_date(self, name: str) -> Optional[str]:
|
||||
"""Get the latest date in storage.
|
||||
|
||||
Args:
|
||||
name: Dataset name
|
||||
|
||||
Returns:
|
||||
Latest date string or None
|
||||
"""
|
||||
data = self.load(name)
|
||||
if data.empty or "trade_date" not in data.columns:
|
||||
return None
|
||||
return str(data["trade_date"].max())
|
||||
|
||||
def exists(self, name: str) -> bool:
|
||||
"""Check if dataset exists."""
|
||||
return self._get_file_path(name).exists()
|
||||
|
||||
def delete(self, name: str) -> bool:
|
||||
"""Delete a dataset.
|
||||
|
||||
Args:
|
||||
name: Dataset name
|
||||
|
||||
Returns:
|
||||
True if deleted
|
||||
"""
|
||||
file_path = self._get_file_path(name)
|
||||
if file_path.exists():
|
||||
file_path.unlink()
|
||||
print(f"[Storage] Deleted {file_path}")
|
||||
return True
|
||||
return False
|
||||
Reference in New Issue
Block a user