feat(data): 添加每日筹码及胜率数据接口 (cyq_perf)

- 新增 api_cyq_perf 模块,支持筹码分布数据获取和同步
- 在 sync_registry 中注册 cyq_perf 同步器
This commit is contained in:
2026-03-26 00:15:30 +08:00
parent 3806b8021b
commit 6730acbae1
10 changed files with 612 additions and 15 deletions

View File

@@ -13,12 +13,14 @@ Available APIs:
- api_bak_basic: Stock historical list (股票历史列表)
- api_stock_st: ST stock list (ST股票列表)
- api_stk_limit: Stock limit price (每日涨跌停价格)
- api_cyq_perf: CYQ performance (每日筹码及胜率)
Example:
>>> from src.data.api_wrappers import get_daily, get_stock_basic, get_trade_cal, get_bak_basic
>>> from src.data.api_wrappers import get_pro_bar, sync_pro_bar, get_daily_basic, sync_daily_basic
>>> from src.data.api_wrappers import get_stock_st, sync_stock_st
>>> from src.data.api_wrappers import get_stk_limit, sync_stk_limit
>>> from src.data.api_wrappers import get_cyq_perf, sync_cyq_perf
>>> data = get_daily('000001.SZ', start_date='20240101', end_date='20240131')
>>> pro_data = get_pro_bar('000001.SZ', start_date='20240101', end_date='20240131')
>>> daily_basic = get_daily_basic(trade_date='20240101')
@@ -27,6 +29,7 @@ Example:
>>> bak_basic = get_bak_basic(trade_date='20240101')
>>> stock_st = get_stock_st(trade_date='20240101')
>>> stk_limit = get_stk_limit(trade_date='20240101')
>>> cyq_perf = get_cyq_perf('000001.SZ', start_date='20240101', end_date='20240131')
"""
from src.data.api_wrappers.api_daily_basic import (
@@ -68,6 +71,12 @@ from src.data.api_wrappers.api_trade_cal import (
get_last_trading_day,
sync_trade_cal_cache,
)
from src.data.api_wrappers.api_cyq_perf import (
get_cyq_perf,
sync_cyq_perf,
preview_cyq_perf_sync,
CyqPerfSync,
)
__all__ = [
# Daily market data
@@ -115,6 +124,11 @@ __all__ = [
"sync_stk_limit",
"preview_stk_limit_sync",
"StkLimitSync",
# CYQ Performance (筹码分布)
"get_cyq_perf",
"sync_cyq_perf",
"preview_cyq_perf_sync",
"CyqPerfSync",
]
# =============================================================================
@@ -198,6 +212,17 @@ try:
order=50,
)
# 8. CYQ Performance - 每日筹码及胜率
from src.data.api_wrappers.api_cyq_perf import CyqPerfSync
sync_registry.register_class(
name="cyq_perf",
sync_class=CyqPerfSync,
display_name="每日筹码及胜率",
description="A股每日筹码平均成本和胜率情况2018年开始",
order=60,
)
except ImportError:
# sync_registry 可能不存在(首次导入),忽略
pass

View File

@@ -0,0 +1,233 @@
"""CYQ Performance (筹码分布) interface.
Fetch A-share stock chip distribution data (cost distribution and win rate) from Tushare.
This interface retrieves daily chip average cost and win rate information.
Data starts from 2018.
"""
import pandas as pd
from typing import Optional
from src.data.client import TushareClient
from src.data.api_wrappers.base_sync import StockBasedSync
def get_cyq_perf(
ts_code: str,
start_date: Optional[str] = None,
end_date: Optional[str] = None,
client: Optional[TushareClient] = None,
) -> pd.DataFrame:
"""Fetch chip distribution (CYQ) performance data from Tushare.
This interface retrieves daily chip average cost and win rate information
for A-share stocks. Data starts from 2018.
Args:
ts_code: Stock code (e.g., '000001.SZ', '600000.SH')
start_date: Start date in YYYYMMDD format
end_date: End date in YYYYMMDD format
client: Optional TushareClient instance for shared rate limiting.
If None, creates a new client. For concurrent sync operations,
pass a shared client to ensure proper rate limiting.
Returns:
pd.DataFrame with columns:
- ts_code: Stock code
- trade_date: Trade date (YYYYMMDD)
- his_low: Historical lowest price
- his_high: Historical highest price
- cost_5pct: 5th percentile cost
- cost_15pct: 15th percentile cost
- cost_50pct: 50th percentile cost (median)
- cost_85pct: 85th percentile cost
- cost_95pct: 95th percentile cost
- weight_avg: Weighted average cost
- winner_rate: Win rate (percentage)
Example:
>>> # Get chip distribution data for a stock
>>> data = get_cyq_perf('000001.SZ', start_date='20240101', end_date='20240131')
>>>
>>> # Get data with shared client for rate limiting
>>> from src.data.client import TushareClient
>>> client = TushareClient()
>>> data = get_cyq_perf('000001.SZ', start_date='20240101', end_date='20240131', client=client)
"""
client = client or TushareClient()
# Build parameters
params = {"ts_code": ts_code}
if start_date:
params["start_date"] = start_date
if end_date:
params["end_date"] = end_date
# Fetch data using cyq_perf API
data = client.query("cyq_perf", **params)
# Rename date column if needed
if "date" in data.columns:
data = data.rename(columns={"date": "trade_date"})
return data
class CyqPerfSync(StockBasedSync):
"""筹码分布数据批量同步管理器,支持全量/增量同步。
继承自 StockBasedSync使用多线程按股票并发获取数据。
Example:
>>> sync = CyqPerfSync()
>>> results = sync.sync_all() # 增量同步
>>> results = sync.sync_all(force_full=True) # 全量同步
>>> preview = sync.preview_sync() # 预览
"""
table_name = "cyq_perf"
# 表结构定义
TABLE_SCHEMA = {
"ts_code": "VARCHAR(16) NOT NULL",
"trade_date": "DATE NOT NULL",
"his_low": "DOUBLE",
"his_high": "DOUBLE",
"cost_5pct": "DOUBLE",
"cost_15pct": "DOUBLE",
"cost_50pct": "DOUBLE",
"cost_85pct": "DOUBLE",
"cost_95pct": "DOUBLE",
"weight_avg": "DOUBLE",
"winner_rate": "DOUBLE",
}
# 索引定义
TABLE_INDEXES = [
("idx_cyq_perf_date_code", ["trade_date", "ts_code"]),
]
# 主键定义
PRIMARY_KEY = ("ts_code", "trade_date")
def fetch_single_stock(
self,
ts_code: str,
start_date: str,
end_date: str,
) -> pd.DataFrame:
"""获取单只股票的筹码分布数据。
Args:
ts_code: 股票代码
start_date: 起始日期YYYYMMDD
end_date: 结束日期YYYYMMDD
Returns:
包含筹码分布数据的 DataFrame
"""
# 使用 get_cyq_perf 获取数据(传递共享 client
data = get_cyq_perf(
ts_code=ts_code,
start_date=start_date,
end_date=end_date,
client=self.client, # 传递共享客户端以确保限流
)
return data
def sync_cyq_perf(
force_full: bool = False,
start_date: Optional[str] = None,
end_date: Optional[str] = None,
max_workers: Optional[int] = None,
dry_run: bool = False,
) -> dict[str, pd.DataFrame]:
"""同步所有股票的筹码分布数据。
这是筹码分布数据同步的主要入口点。
Args:
force_full: 若为 True强制从 20180101 完整重载
start_date: 手动指定起始日期YYYYMMDD
end_date: 手动指定结束日期(默认为今天)
max_workers: 工作线程数(默认: 10
dry_run: 若为 True仅预览将要同步的内容不写入数据
Returns:
映射 ts_code 到 DataFrame 的字典
Example:
>>> # 首次同步(从 20180101 全量加载)
>>> result = sync_cyq_perf()
>>>
>>> # 后续同步(增量 - 仅新数据)
>>> result = sync_cyq_perf()
>>>
>>> # 强制完整重载
>>> result = sync_cyq_perf(force_full=True)
>>>
>>> # 手动指定日期范围
>>> result = sync_cyq_perf(start_date='20240101', end_date='20240131')
>>>
>>> # 自定义线程数
>>> result = sync_cyq_perf(max_workers=20)
>>>
>>> # Dry run仅预览
>>> result = sync_cyq_perf(dry_run=True)
"""
sync_manager = CyqPerfSync(max_workers=max_workers)
return sync_manager.sync_all(
force_full=force_full,
start_date=start_date,
end_date=end_date,
dry_run=dry_run,
)
def preview_cyq_perf_sync(
force_full: bool = False,
start_date: Optional[str] = None,
end_date: Optional[str] = None,
sample_size: int = 3,
) -> dict:
"""预览筹码分布数据同步数据量和样本(不实际同步)。
这是推荐的方式,可在实际同步前检查将要同步的内容。
Args:
force_full: 若为 True预览全量同步从 20180101
start_date: 手动指定起始日期(覆盖自动检测)
end_date: 手动指定结束日期(默认为今天)
sample_size: 预览用样本股票数量(默认: 3
Returns:
包含预览信息的字典:
{
'sync_needed': bool,
'stock_count': int,
'start_date': str,
'end_date': str,
'estimated_records': int,
'sample_data': pd.DataFrame,
'mode': str, # 'full', 'incremental', 'partial', 或 'none'
}
Example:
>>> # 预览将要同步的内容
>>> preview = preview_cyq_perf_sync()
>>>
>>> # 预览全量同步
>>> preview = preview_cyq_perf_sync(force_full=True)
>>>
>>> # 预览更多样本
>>> preview = preview_cyq_perf_sync(sample_size=5)
"""
sync_manager = CyqPerfSync()
return sync_manager.preview_sync(
force_full=force_full,
start_date=start_date,
end_date=end_date,
sample_size=sample_size,
)

View File

@@ -10,6 +10,9 @@
- api_daily_basic.py: 每日指标数据同步 (DailyBasicSync 类)
- api_bak_basic.py: 历史股票列表同步 (BakBasicSync 类)
- api_pro_bar.py: Pro Bar 数据同步 (ProBarSync 类)
- api_stock_st.py: ST股票列表同步 (StockSTSync 类)
- api_stk_limit.py: 涨跌停价格同步 (StkLimitSync 类)
- api_cyq_perf.py: 筹码分布数据同步 (CyqPerfSync 类)
- api_stock_basic.py: 股票基本信息同步
- api_trade_cal.py: 交易日历同步
@@ -77,6 +80,8 @@ def sync_all_data(
4. daily_basic: 每日指标PE、PB、换手率、市值
5. bak_basic: 历史股票列表
6. stock_st: ST股票列表
7. stk_limit: 每日涨跌停价格
8. cyq_perf: 每日筹码及胜率
新增接口时,只需在 api_wrappers/__init__.py 中添加注册代码,
无需修改本函数。