Files
ProStock/src/data/api_wrappers/api_cyq_perf.py

234 lines
7.2 KiB
Python
Raw Normal View History

"""CYQ Performance (筹码分布) interface.
Fetch A-share stock chip distribution data (cost distribution and win rate) from Tushare.
This interface retrieves daily chip average cost and win rate information.
Data starts from 2018.
"""
import pandas as pd
from typing import Optional
from src.data.client import TushareClient
from src.data.api_wrappers.base_sync import StockBasedSync
def get_cyq_perf(
ts_code: str,
start_date: Optional[str] = None,
end_date: Optional[str] = None,
client: Optional[TushareClient] = None,
) -> pd.DataFrame:
"""Fetch chip distribution (CYQ) performance data from Tushare.
This interface retrieves daily chip average cost and win rate information
for A-share stocks. Data starts from 2018.
Args:
ts_code: Stock code (e.g., '000001.SZ', '600000.SH')
start_date: Start date in YYYYMMDD format
end_date: End date in YYYYMMDD format
client: Optional TushareClient instance for shared rate limiting.
If None, creates a new client. For concurrent sync operations,
pass a shared client to ensure proper rate limiting.
Returns:
pd.DataFrame with columns:
- ts_code: Stock code
- trade_date: Trade date (YYYYMMDD)
- his_low: Historical lowest price
- his_high: Historical highest price
- cost_5pct: 5th percentile cost
- cost_15pct: 15th percentile cost
- cost_50pct: 50th percentile cost (median)
- cost_85pct: 85th percentile cost
- cost_95pct: 95th percentile cost
- weight_avg: Weighted average cost
- winner_rate: Win rate (percentage)
Example:
>>> # Get chip distribution data for a stock
>>> data = get_cyq_perf('000001.SZ', start_date='20240101', end_date='20240131')
>>>
>>> # Get data with shared client for rate limiting
>>> from src.data.client import TushareClient
>>> client = TushareClient()
>>> data = get_cyq_perf('000001.SZ', start_date='20240101', end_date='20240131', client=client)
"""
client = client or TushareClient()
# Build parameters
params = {"ts_code": ts_code}
if start_date:
params["start_date"] = start_date
if end_date:
params["end_date"] = end_date
# Fetch data using cyq_perf API
data = client.query("cyq_perf", **params)
# Rename date column if needed
if "date" in data.columns:
data = data.rename(columns={"date": "trade_date"})
return data
class CyqPerfSync(StockBasedSync):
"""筹码分布数据批量同步管理器,支持全量/增量同步。
继承自 StockBasedSync使用多线程按股票并发获取数据
Example:
>>> sync = CyqPerfSync()
>>> results = sync.sync_all() # 增量同步
>>> results = sync.sync_all(force_full=True) # 全量同步
>>> preview = sync.preview_sync() # 预览
"""
table_name = "cyq_perf"
# 表结构定义
TABLE_SCHEMA = {
"ts_code": "VARCHAR(16) NOT NULL",
"trade_date": "DATE NOT NULL",
"his_low": "DOUBLE",
"his_high": "DOUBLE",
"cost_5pct": "DOUBLE",
"cost_15pct": "DOUBLE",
"cost_50pct": "DOUBLE",
"cost_85pct": "DOUBLE",
"cost_95pct": "DOUBLE",
"weight_avg": "DOUBLE",
"winner_rate": "DOUBLE",
}
# 索引定义
TABLE_INDEXES = [
("idx_cyq_perf_date_code", ["trade_date", "ts_code"]),
]
# 主键定义
PRIMARY_KEY = ("ts_code", "trade_date")
def fetch_single_stock(
self,
ts_code: str,
start_date: str,
end_date: str,
) -> pd.DataFrame:
"""获取单只股票的筹码分布数据。
Args:
ts_code: 股票代码
start_date: 起始日期YYYYMMDD
end_date: 结束日期YYYYMMDD
Returns:
包含筹码分布数据的 DataFrame
"""
# 使用 get_cyq_perf 获取数据(传递共享 client
data = get_cyq_perf(
ts_code=ts_code,
start_date=start_date,
end_date=end_date,
client=self.client, # 传递共享客户端以确保限流
)
return data
def sync_cyq_perf(
force_full: bool = False,
start_date: Optional[str] = None,
end_date: Optional[str] = None,
max_workers: Optional[int] = None,
dry_run: bool = False,
) -> dict[str, pd.DataFrame]:
"""同步所有股票的筹码分布数据。
这是筹码分布数据同步的主要入口点
Args:
force_full: 若为 True强制从 20180101 完整重载
start_date: 手动指定起始日期YYYYMMDD
end_date: 手动指定结束日期默认为今天
max_workers: 工作线程数默认: 10
dry_run: 若为 True仅预览将要同步的内容不写入数据
Returns:
映射 ts_code DataFrame 的字典
Example:
>>> # 首次同步(从 20180101 全量加载)
>>> result = sync_cyq_perf()
>>>
>>> # 后续同步(增量 - 仅新数据)
>>> result = sync_cyq_perf()
>>>
>>> # 强制完整重载
>>> result = sync_cyq_perf(force_full=True)
>>>
>>> # 手动指定日期范围
>>> result = sync_cyq_perf(start_date='20240101', end_date='20240131')
>>>
>>> # 自定义线程数
>>> result = sync_cyq_perf(max_workers=20)
>>>
>>> # Dry run仅预览
>>> result = sync_cyq_perf(dry_run=True)
"""
sync_manager = CyqPerfSync(max_workers=max_workers)
return sync_manager.sync_all(
force_full=force_full,
start_date=start_date,
end_date=end_date,
dry_run=dry_run,
)
def preview_cyq_perf_sync(
force_full: bool = False,
start_date: Optional[str] = None,
end_date: Optional[str] = None,
sample_size: int = 3,
) -> dict:
"""预览筹码分布数据同步数据量和样本(不实际同步)。
这是推荐的方式可在实际同步前检查将要同步的内容
Args:
force_full: 若为 True预览全量同步 20180101
start_date: 手动指定起始日期覆盖自动检测
end_date: 手动指定结束日期默认为今天
sample_size: 预览用样本股票数量默认: 3
Returns:
包含预览信息的字典
{
'sync_needed': bool,
'stock_count': int,
'start_date': str,
'end_date': str,
'estimated_records': int,
'sample_data': pd.DataFrame,
'mode': str, # 'full', 'incremental', 'partial', 或 'none'
}
Example:
>>> # 预览将要同步的内容
>>> preview = preview_cyq_perf_sync()
>>>
>>> # 预览全量同步
>>> preview = preview_cyq_perf_sync(force_full=True)
>>>
>>> # 预览更多样本
>>> preview = preview_cyq_perf_sync(sample_size=5)
"""
sync_manager = CyqPerfSync()
return sync_manager.preview_sync(
force_full=force_full,
start_date=start_date,
end_date=end_date,
sample_size=sample_size,
)