refactor(data): 移除 api_daily 模块并更新文档
- 删除 src/data/api_wrappers/api_daily.py (240行) - 更新 6 个文档文件,将 daily 表引用替换为 pro_bar - 同步 README.md 中的因子框架和训练模块示例 BREAKING CHANGE: api_daily 模块已移除,请使用 api_pro_bar 替代
This commit is contained in:
@@ -1,240 +0,0 @@
|
||||
"""Simplified daily market data interface.
|
||||
|
||||
A single function to fetch A股日线行情 data from Tushare.
|
||||
Supports all output fields including tor (换手率) and vr (量比).
|
||||
|
||||
This module provides both single-stock fetching (get_daily) and
|
||||
batch synchronization (DailySync class) for daily market data.
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
from typing import Optional, List, Literal, Dict
|
||||
|
||||
from src.data.client import TushareClient
|
||||
from src.data.api_wrappers.base_sync import StockBasedSync
|
||||
|
||||
|
||||
def get_daily(
|
||||
ts_code: str,
|
||||
start_date: Optional[str] = None,
|
||||
end_date: Optional[str] = None,
|
||||
trade_date: Optional[str] = None,
|
||||
adj: Literal[None, "qfq", "hfq"] = None,
|
||||
factors: Optional[List[Literal["tor", "vr"]]] = None,
|
||||
adjfactor: bool = False,
|
||||
) -> pd.DataFrame:
|
||||
"""Fetch daily market data for A-share stocks.
|
||||
|
||||
This is a simplified interface that combines rate limiting, API calls,
|
||||
and error handling into a single function.
|
||||
|
||||
Args:
|
||||
ts_code: Stock code (e.g., '000001.SZ', '600000.SH')
|
||||
start_date: Start date in YYYYMMDD format
|
||||
end_date: End date in YYYYMMDD format
|
||||
trade_date: Specific trade date in YYYYMMDD format
|
||||
adj: Adjustment type - None, 'qfq' (forward), 'hfq' (backward)
|
||||
factors: List of factors to include - 'tor' (turnover rate), 'vr' (volume ratio)
|
||||
adjfactor: Whether to include adjustment factor
|
||||
|
||||
Returns:
|
||||
pd.DataFrame with daily market data containing:
|
||||
- Base fields: ts_code, trade_date, open, high, low, close, pre_close,
|
||||
change, pct_chg, vol, amount
|
||||
- Factor fields (if requested): tor, vr
|
||||
- Adjustment factor (if adjfactor=True): adjfactor
|
||||
|
||||
Example:
|
||||
>>> data = get_daily('000001.SZ', start_date='20240101', end_date='20240131')
|
||||
>>> data = get_daily('600000.SH', factors=['tor', 'vr'])
|
||||
"""
|
||||
# Initialize client
|
||||
client = TushareClient()
|
||||
|
||||
# Build parameters
|
||||
params = {"ts_code": ts_code}
|
||||
|
||||
if start_date:
|
||||
params["start_date"] = start_date
|
||||
if end_date:
|
||||
params["end_date"] = end_date
|
||||
if trade_date:
|
||||
params["trade_date"] = trade_date
|
||||
if adj:
|
||||
params["adj"] = adj
|
||||
if factors:
|
||||
# Tushare expects factors as comma-separated string, not list
|
||||
if isinstance(factors, list):
|
||||
factors_str = ",".join(factors)
|
||||
else:
|
||||
factors_str = factors
|
||||
params["factors"] = factors_str
|
||||
if adjfactor:
|
||||
params["adjfactor"] = "True"
|
||||
|
||||
# Fetch data using pro_bar (supports factors like tor, vr)
|
||||
data = client.query("pro_bar", **params)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
class DailySync(StockBasedSync):
|
||||
"""日线数据批量同步管理器,支持全量/增量同步。
|
||||
|
||||
继承自 StockBasedSync,使用多线程按股票并发获取数据。
|
||||
|
||||
Example:
|
||||
>>> sync = DailySync()
|
||||
>>> results = sync.sync_all() # 增量同步
|
||||
>>> results = sync.sync_all(force_full=True) # 全量同步
|
||||
>>> preview = sync.preview_sync() # 预览
|
||||
"""
|
||||
|
||||
table_name = "daily"
|
||||
|
||||
# 表结构定义
|
||||
TABLE_SCHEMA = {
|
||||
"ts_code": "VARCHAR(16) NOT NULL",
|
||||
"trade_date": "DATE NOT NULL",
|
||||
"open": "DOUBLE",
|
||||
"high": "DOUBLE",
|
||||
"low": "DOUBLE",
|
||||
"close": "DOUBLE",
|
||||
"pre_close": "DOUBLE",
|
||||
"change": "DOUBLE",
|
||||
"pct_chg": "DOUBLE",
|
||||
"vol": "DOUBLE",
|
||||
"amount": "DOUBLE",
|
||||
"turnover_rate": "DOUBLE",
|
||||
"volume_ratio": "DOUBLE",
|
||||
}
|
||||
|
||||
# 索引定义
|
||||
TABLE_INDEXES = [
|
||||
("idx_daily_date_code", ["trade_date", "ts_code"]),
|
||||
]
|
||||
|
||||
# 主键定义
|
||||
PRIMARY_KEY = ("ts_code", "trade_date")
|
||||
|
||||
def fetch_single_stock(
|
||||
self,
|
||||
ts_code: str,
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
) -> pd.DataFrame:
|
||||
"""获取单只股票的日线数据。
|
||||
|
||||
Args:
|
||||
ts_code: 股票代码
|
||||
start_date: 起始日期(YYYYMMDD)
|
||||
end_date: 结束日期(YYYYMMDD)
|
||||
|
||||
Returns:
|
||||
包含日线数据的 DataFrame
|
||||
"""
|
||||
# 使用共享客户端进行跨线程速率限制
|
||||
data = self.client.query(
|
||||
"pro_bar",
|
||||
ts_code=ts_code,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
factors="tor,vr",
|
||||
)
|
||||
return data
|
||||
|
||||
|
||||
def sync_daily(
|
||||
force_full: bool = False,
|
||||
start_date: Optional[str] = None,
|
||||
end_date: Optional[str] = None,
|
||||
max_workers: Optional[int] = None,
|
||||
dry_run: bool = False,
|
||||
) -> Dict[str, pd.DataFrame]:
|
||||
"""同步所有股票的日线数据。
|
||||
|
||||
这是日线数据同步的主要入口点。
|
||||
|
||||
Args:
|
||||
force_full: 若为 True,强制从 20180101 完整重载
|
||||
start_date: 手动指定起始日期(YYYYMMDD)
|
||||
end_date: 手动指定结束日期(默认为今天)
|
||||
max_workers: 工作线程数(默认: 10)
|
||||
dry_run: 若为 True,仅预览将要同步的内容,不写入数据
|
||||
|
||||
Returns:
|
||||
映射 ts_code 到 DataFrame 的字典
|
||||
|
||||
Example:
|
||||
>>> # 首次同步(从 20180101 全量加载)
|
||||
>>> result = sync_daily()
|
||||
>>>
|
||||
>>> # 后续同步(增量 - 仅新数据)
|
||||
>>> result = sync_daily()
|
||||
>>>
|
||||
>>> # 强制完整重载
|
||||
>>> result = sync_daily(force_full=True)
|
||||
>>>
|
||||
>>> # 手动指定日期范围
|
||||
>>> result = sync_daily(start_date='20240101', end_date='20240131')
|
||||
>>>
|
||||
>>> # 自定义线程数
|
||||
>>> result = sync_daily(max_workers=20)
|
||||
>>>
|
||||
>>> # Dry run(仅预览)
|
||||
>>> result = sync_daily(dry_run=True)
|
||||
"""
|
||||
sync_manager = DailySync(max_workers=max_workers)
|
||||
return sync_manager.sync_all(
|
||||
force_full=force_full,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
dry_run=dry_run,
|
||||
)
|
||||
|
||||
|
||||
def preview_daily_sync(
|
||||
force_full: bool = False,
|
||||
start_date: Optional[str] = None,
|
||||
end_date: Optional[str] = None,
|
||||
sample_size: int = 3,
|
||||
) -> dict:
|
||||
"""预览日线同步数据量和样本(不实际同步)。
|
||||
|
||||
这是推荐的方式,可在实际同步前检查将要同步的内容。
|
||||
|
||||
Args:
|
||||
force_full: 若为 True,预览全量同步(从 20180101)
|
||||
start_date: 手动指定起始日期(覆盖自动检测)
|
||||
end_date: 手动指定结束日期(默认为今天)
|
||||
sample_size: 预览用样本股票数量(默认: 3)
|
||||
|
||||
Returns:
|
||||
包含预览信息的字典:
|
||||
{
|
||||
'sync_needed': bool,
|
||||
'stock_count': int,
|
||||
'start_date': str,
|
||||
'end_date': str,
|
||||
'estimated_records': int,
|
||||
'sample_data': pd.DataFrame,
|
||||
'mode': str, # 'full', 'incremental', 'partial', 或 'none'
|
||||
}
|
||||
|
||||
Example:
|
||||
>>> # 预览将要同步的内容
|
||||
>>> preview = preview_daily_sync()
|
||||
>>>
|
||||
>>> # 预览全量同步
|
||||
>>> preview = preview_daily_sync(force_full=True)
|
||||
>>>
|
||||
>>> # 预览更多样本
|
||||
>>> preview = preview_daily_sync(sample_size=5)
|
||||
"""
|
||||
sync_manager = DailySync()
|
||||
return sync_manager.preview_sync(
|
||||
force_full=force_full,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
sample_size=sample_size,
|
||||
)
|
||||
Reference in New Issue
Block a user