feat(data): 添加每日筹码及胜率数据接口 (cyq_perf)

- 新增 api_cyq_perf 模块,支持筹码分布数据获取和同步
- 在 sync_registry 中注册 cyq_perf 同步器
This commit is contained in:
2026-03-26 00:15:30 +08:00
parent 3806b8021b
commit 6730acbae1
10 changed files with 612 additions and 15 deletions

View File

@@ -0,0 +1,233 @@
"""CYQ Performance (筹码分布) interface.
Fetch A-share stock chip distribution data (cost distribution and win rate) from Tushare.
This interface retrieves daily chip average cost and win rate information.
Data starts from 2018.
"""
import pandas as pd
from typing import Optional
from src.data.client import TushareClient
from src.data.api_wrappers.base_sync import StockBasedSync
def get_cyq_perf(
ts_code: str,
start_date: Optional[str] = None,
end_date: Optional[str] = None,
client: Optional[TushareClient] = None,
) -> pd.DataFrame:
"""Fetch chip distribution (CYQ) performance data from Tushare.
This interface retrieves daily chip average cost and win rate information
for A-share stocks. Data starts from 2018.
Args:
ts_code: Stock code (e.g., '000001.SZ', '600000.SH')
start_date: Start date in YYYYMMDD format
end_date: End date in YYYYMMDD format
client: Optional TushareClient instance for shared rate limiting.
If None, creates a new client. For concurrent sync operations,
pass a shared client to ensure proper rate limiting.
Returns:
pd.DataFrame with columns:
- ts_code: Stock code
- trade_date: Trade date (YYYYMMDD)
- his_low: Historical lowest price
- his_high: Historical highest price
- cost_5pct: 5th percentile cost
- cost_15pct: 15th percentile cost
- cost_50pct: 50th percentile cost (median)
- cost_85pct: 85th percentile cost
- cost_95pct: 95th percentile cost
- weight_avg: Weighted average cost
- winner_rate: Win rate (percentage)
Example:
>>> # Get chip distribution data for a stock
>>> data = get_cyq_perf('000001.SZ', start_date='20240101', end_date='20240131')
>>>
>>> # Get data with shared client for rate limiting
>>> from src.data.client import TushareClient
>>> client = TushareClient()
>>> data = get_cyq_perf('000001.SZ', start_date='20240101', end_date='20240131', client=client)
"""
client = client or TushareClient()
# Build parameters
params = {"ts_code": ts_code}
if start_date:
params["start_date"] = start_date
if end_date:
params["end_date"] = end_date
# Fetch data using cyq_perf API
data = client.query("cyq_perf", **params)
# Rename date column if needed
if "date" in data.columns:
data = data.rename(columns={"date": "trade_date"})
return data
class CyqPerfSync(StockBasedSync):
"""筹码分布数据批量同步管理器,支持全量/增量同步。
继承自 StockBasedSync使用多线程按股票并发获取数据。
Example:
>>> sync = CyqPerfSync()
>>> results = sync.sync_all() # 增量同步
>>> results = sync.sync_all(force_full=True) # 全量同步
>>> preview = sync.preview_sync() # 预览
"""
table_name = "cyq_perf"
# 表结构定义
TABLE_SCHEMA = {
"ts_code": "VARCHAR(16) NOT NULL",
"trade_date": "DATE NOT NULL",
"his_low": "DOUBLE",
"his_high": "DOUBLE",
"cost_5pct": "DOUBLE",
"cost_15pct": "DOUBLE",
"cost_50pct": "DOUBLE",
"cost_85pct": "DOUBLE",
"cost_95pct": "DOUBLE",
"weight_avg": "DOUBLE",
"winner_rate": "DOUBLE",
}
# 索引定义
TABLE_INDEXES = [
("idx_cyq_perf_date_code", ["trade_date", "ts_code"]),
]
# 主键定义
PRIMARY_KEY = ("ts_code", "trade_date")
def fetch_single_stock(
self,
ts_code: str,
start_date: str,
end_date: str,
) -> pd.DataFrame:
"""获取单只股票的筹码分布数据。
Args:
ts_code: 股票代码
start_date: 起始日期YYYYMMDD
end_date: 结束日期YYYYMMDD
Returns:
包含筹码分布数据的 DataFrame
"""
# 使用 get_cyq_perf 获取数据(传递共享 client
data = get_cyq_perf(
ts_code=ts_code,
start_date=start_date,
end_date=end_date,
client=self.client, # 传递共享客户端以确保限流
)
return data
def sync_cyq_perf(
force_full: bool = False,
start_date: Optional[str] = None,
end_date: Optional[str] = None,
max_workers: Optional[int] = None,
dry_run: bool = False,
) -> dict[str, pd.DataFrame]:
"""同步所有股票的筹码分布数据。
这是筹码分布数据同步的主要入口点。
Args:
force_full: 若为 True强制从 20180101 完整重载
start_date: 手动指定起始日期YYYYMMDD
end_date: 手动指定结束日期(默认为今天)
max_workers: 工作线程数(默认: 10
dry_run: 若为 True仅预览将要同步的内容不写入数据
Returns:
映射 ts_code 到 DataFrame 的字典
Example:
>>> # 首次同步(从 20180101 全量加载)
>>> result = sync_cyq_perf()
>>>
>>> # 后续同步(增量 - 仅新数据)
>>> result = sync_cyq_perf()
>>>
>>> # 强制完整重载
>>> result = sync_cyq_perf(force_full=True)
>>>
>>> # 手动指定日期范围
>>> result = sync_cyq_perf(start_date='20240101', end_date='20240131')
>>>
>>> # 自定义线程数
>>> result = sync_cyq_perf(max_workers=20)
>>>
>>> # Dry run仅预览
>>> result = sync_cyq_perf(dry_run=True)
"""
sync_manager = CyqPerfSync(max_workers=max_workers)
return sync_manager.sync_all(
force_full=force_full,
start_date=start_date,
end_date=end_date,
dry_run=dry_run,
)
def preview_cyq_perf_sync(
force_full: bool = False,
start_date: Optional[str] = None,
end_date: Optional[str] = None,
sample_size: int = 3,
) -> dict:
"""预览筹码分布数据同步数据量和样本(不实际同步)。
这是推荐的方式,可在实际同步前检查将要同步的内容。
Args:
force_full: 若为 True预览全量同步从 20180101
start_date: 手动指定起始日期(覆盖自动检测)
end_date: 手动指定结束日期(默认为今天)
sample_size: 预览用样本股票数量(默认: 3
Returns:
包含预览信息的字典:
{
'sync_needed': bool,
'stock_count': int,
'start_date': str,
'end_date': str,
'estimated_records': int,
'sample_data': pd.DataFrame,
'mode': str, # 'full', 'incremental', 'partial', 或 'none'
}
Example:
>>> # 预览将要同步的内容
>>> preview = preview_cyq_perf_sync()
>>>
>>> # 预览全量同步
>>> preview = preview_cyq_perf_sync(force_full=True)
>>>
>>> # 预览更多样本
>>> preview = preview_cyq_perf_sync(sample_size=5)
"""
sync_manager = CyqPerfSync()
return sync_manager.preview_sync(
force_full=force_full,
start_date=start_date,
end_date=end_date,
sample_size=sample_size,
)