feat(data): 添加每日筹码及胜率数据接口 (cyq_perf)

- 新增 api_cyq_perf 模块,支持筹码分布数据获取和同步
- 在 sync_registry 中注册 cyq_perf 同步器
This commit is contained in:
2026-03-26 22:22:43 +08:00
parent 6730acbae1
commit d4e0e2a0b6
9 changed files with 261 additions and 230 deletions

View File

@@ -9,11 +9,12 @@ import pandas as pd
from typing import Optional
from src.data.client import TushareClient
from src.data.api_wrappers.base_sync import StockBasedSync
from src.data.api_wrappers.base_sync import DateBasedSync
def get_cyq_perf(
ts_code: str,
trade_date: Optional[str] = None,
ts_code: Optional[str] = None,
start_date: Optional[str] = None,
end_date: Optional[str] = None,
client: Optional[TushareClient] = None,
@@ -24,9 +25,10 @@ def get_cyq_perf(
for A-share stocks. Data starts from 2018.
Args:
ts_code: Stock code (e.g., '000001.SZ', '600000.SH')
start_date: Start date in YYYYMMDD format
end_date: End date in YYYYMMDD format
trade_date: Specific trade date in YYYYMMDD format
ts_code: Stock code filter (optional, e.g., '000001.SZ')
start_date: Start date for date range query (YYYYMMDD format)
end_date: End date for date range query (YYYYMMDD format)
client: Optional TushareClient instance for shared rate limiting.
If None, creates a new client. For concurrent sync operations,
pass a shared client to ensure proper rate limiting.
@@ -46,19 +48,23 @@ def get_cyq_perf(
- winner_rate: Win rate (percentage)
Example:
>>> # Get chip distribution data for a stock
>>> data = get_cyq_perf('000001.SZ', start_date='20240101', end_date='20240131')
>>> # Get all stocks' chip distribution for a single date
>>> data = get_cyq_perf(trade_date='20240115')
>>>
>>> # Get data with shared client for rate limiting
>>> from src.data.client import TushareClient
>>> client = TushareClient()
>>> data = get_cyq_perf('000001.SZ', start_date='20240101', end_date='20240131', client=client)
>>> # Get date range data for a specific stock
>>> data = get_cyq_perf(ts_code='000001.SZ', start_date='20240101', end_date='20240131')
>>>
>>> # Get specific stock on specific date
>>> data = get_cyq_perf(ts_code='000001.SZ', trade_date='20240115')
"""
client = client or TushareClient()
# Build parameters
params = {"ts_code": ts_code}
params = {}
if trade_date:
params["trade_date"] = trade_date
if ts_code:
params["ts_code"] = ts_code
if start_date:
params["start_date"] = start_date
if end_date:
@@ -74,10 +80,10 @@ def get_cyq_perf(
return data
class CyqPerfSync(StockBasedSync):
class CyqPerfSync(DateBasedSync):
"""筹码分布数据批量同步管理器,支持全量/增量同步。
继承自 StockBasedSync使用多线程按股票并发获取数据。
继承自 DateBasedSync使用按日期并发获取数据。
Example:
>>> sync = CyqPerfSync()
@@ -87,6 +93,7 @@ class CyqPerfSync(StockBasedSync):
"""
table_name = "cyq_perf"
default_start_date = "20180101"
# 表结构定义
TABLE_SCHEMA = {
@@ -111,52 +118,36 @@ class CyqPerfSync(StockBasedSync):
# 主键定义
PRIMARY_KEY = ("ts_code", "trade_date")
def fetch_single_stock(
self,
ts_code: str,
start_date: str,
end_date: str,
) -> pd.DataFrame:
"""获取单只股票的筹码分布数据。
def fetch_single_date(self, trade_date: str) -> pd.DataFrame:
"""获取单日所有股票的筹码分布数据。
Args:
ts_code: 股票代码
start_date: 起始日期YYYYMMDD
end_date: 结束日期YYYYMMDD
trade_date: 交易日期YYYYMMDD
Returns:
包含筹码分布数据的 DataFrame
包含当日所有股票筹码分布数据的 DataFrame
"""
# 使用 get_cyq_perf 获取数据(传递共享 client
data = get_cyq_perf(
ts_code=ts_code,
start_date=start_date,
end_date=end_date,
client=self.client, # 传递共享客户端以确保限流
)
return data
return get_cyq_perf(trade_date=trade_date, client=self.client)
def sync_cyq_perf(
force_full: bool = False,
start_date: Optional[str] = None,
end_date: Optional[str] = None,
max_workers: Optional[int] = None,
dry_run: bool = False,
) -> dict[str, pd.DataFrame]:
"""同步所有股票的筹码分布数据。
force_full: bool = False,
) -> pd.DataFrame:
"""同步筹码分布数据到 DuckDB支持智能增量同步。
这是筹码分布数据同步的主要入口点。
逻辑:
- 若表不存在:创建表 + 复合索引 (trade_date, ts_code) + 全量同步
- 若表存在:从 last_date + 1 开始增量同步
Args:
start_date: 起始日期YYYYMMDD 格式,默认全量从 20180101增量从 last_date+1
end_date: 结束日期YYYYMMDD 格式,默认为今天)
force_full: 若为 True强制从 20180101 完整重载
start_date: 手动指定起始日期YYYYMMDD
end_date: 手动指定结束日期(默认为今天)
max_workers: 工作线程数(默认: 10
dry_run: 若为 True仅预览将要同步的内容不写入数据
Returns:
映射 ts_code 到 DataFrame 的字典
包含同步数据的 pd.DataFrame
Example:
>>> # 首次同步(从 20180101 全量加载)
@@ -170,49 +161,31 @@ def sync_cyq_perf(
>>>
>>> # 手动指定日期范围
>>> result = sync_cyq_perf(start_date='20240101', end_date='20240131')
>>>
>>> # 自定义线程数
>>> result = sync_cyq_perf(max_workers=20)
>>>
>>> # Dry run仅预览
>>> result = sync_cyq_perf(dry_run=True)
"""
sync_manager = CyqPerfSync(max_workers=max_workers)
sync_manager = CyqPerfSync()
return sync_manager.sync_all(
force_full=force_full,
start_date=start_date,
end_date=end_date,
dry_run=dry_run,
force_full=force_full,
)
def preview_cyq_perf_sync(
force_full: bool = False,
start_date: Optional[str] = None,
end_date: Optional[str] = None,
force_full: bool = False,
sample_size: int = 3,
) -> dict:
"""预览筹码分布数据同步数据量和样本(不实际同步)。
这是推荐的方式,可在实际同步前检查将要同步的内容。
Args:
force_full: 若为 True预览全量同步从 20180101
start_date: 手动指定起始日期(覆盖自动检测)
end_date: 手动指定结束日期(默认为今天)
sample_size: 预览用样本股票数量(默认: 3
force_full: 若为 True预览全量同步从 20180101
sample_size: 预览天数(默认: 3
Returns:
包含预览信息的字典
{
'sync_needed': bool,
'stock_count': int,
'start_date': str,
'end_date': str,
'estimated_records': int,
'sample_data': pd.DataFrame,
'mode': str, # 'full', 'incremental', 'partial', 或 'none'
}
包含预览信息的字典
Example:
>>> # 预览将要同步的内容
@@ -220,14 +193,11 @@ def preview_cyq_perf_sync(
>>>
>>> # 预览全量同步
>>> preview = preview_cyq_perf_sync(force_full=True)
>>>
>>> # 预览更多样本
>>> preview = preview_cyq_perf_sync(sample_size=5)
"""
sync_manager = CyqPerfSync()
return sync_manager.preview_sync(
force_full=force_full,
start_date=start_date,
end_date=end_date,
force_full=force_full,
sample_size=sample_size,
)