"""CYQ Performance (筹码分布) interface. Fetch A-share stock chip distribution data (cost distribution and win rate) from Tushare. This interface retrieves daily chip average cost and win rate information. Data starts from 2018. """ import pandas as pd from typing import Optional from src.data.client import TushareClient from src.data.api_wrappers.base_sync import StockBasedSync def get_cyq_perf( ts_code: str, start_date: Optional[str] = None, end_date: Optional[str] = None, client: Optional[TushareClient] = None, ) -> pd.DataFrame: """Fetch chip distribution (CYQ) performance data from Tushare. This interface retrieves daily chip average cost and win rate information for A-share stocks. Data starts from 2018. Args: ts_code: Stock code (e.g., '000001.SZ', '600000.SH') start_date: Start date in YYYYMMDD format end_date: End date in YYYYMMDD format client: Optional TushareClient instance for shared rate limiting. If None, creates a new client. For concurrent sync operations, pass a shared client to ensure proper rate limiting. Returns: pd.DataFrame with columns: - ts_code: Stock code - trade_date: Trade date (YYYYMMDD) - his_low: Historical lowest price - his_high: Historical highest price - cost_5pct: 5th percentile cost - cost_15pct: 15th percentile cost - cost_50pct: 50th percentile cost (median) - cost_85pct: 85th percentile cost - cost_95pct: 95th percentile cost - weight_avg: Weighted average cost - winner_rate: Win rate (percentage) Example: >>> # Get chip distribution data for a stock >>> data = get_cyq_perf('000001.SZ', start_date='20240101', end_date='20240131') >>> >>> # Get data with shared client for rate limiting >>> from src.data.client import TushareClient >>> client = TushareClient() >>> data = get_cyq_perf('000001.SZ', start_date='20240101', end_date='20240131', client=client) """ client = client or TushareClient() # Build parameters params = {"ts_code": ts_code} if start_date: params["start_date"] = start_date if end_date: params["end_date"] = end_date # Fetch data using cyq_perf API data = client.query("cyq_perf", **params) # Rename date column if needed if "date" in data.columns: data = data.rename(columns={"date": "trade_date"}) return data class CyqPerfSync(StockBasedSync): """筹码分布数据批量同步管理器,支持全量/增量同步。 继承自 StockBasedSync,使用多线程按股票并发获取数据。 Example: >>> sync = CyqPerfSync() >>> results = sync.sync_all() # 增量同步 >>> results = sync.sync_all(force_full=True) # 全量同步 >>> preview = sync.preview_sync() # 预览 """ table_name = "cyq_perf" # 表结构定义 TABLE_SCHEMA = { "ts_code": "VARCHAR(16) NOT NULL", "trade_date": "DATE NOT NULL", "his_low": "DOUBLE", "his_high": "DOUBLE", "cost_5pct": "DOUBLE", "cost_15pct": "DOUBLE", "cost_50pct": "DOUBLE", "cost_85pct": "DOUBLE", "cost_95pct": "DOUBLE", "weight_avg": "DOUBLE", "winner_rate": "DOUBLE", } # 索引定义 TABLE_INDEXES = [ ("idx_cyq_perf_date_code", ["trade_date", "ts_code"]), ] # 主键定义 PRIMARY_KEY = ("ts_code", "trade_date") def fetch_single_stock( self, ts_code: str, start_date: str, end_date: str, ) -> pd.DataFrame: """获取单只股票的筹码分布数据。 Args: ts_code: 股票代码 start_date: 起始日期(YYYYMMDD) end_date: 结束日期(YYYYMMDD) Returns: 包含筹码分布数据的 DataFrame """ # 使用 get_cyq_perf 获取数据(传递共享 client) data = get_cyq_perf( ts_code=ts_code, start_date=start_date, end_date=end_date, client=self.client, # 传递共享客户端以确保限流 ) return data def sync_cyq_perf( force_full: bool = False, start_date: Optional[str] = None, end_date: Optional[str] = None, max_workers: Optional[int] = None, dry_run: bool = False, ) -> dict[str, pd.DataFrame]: """同步所有股票的筹码分布数据。 这是筹码分布数据同步的主要入口点。 Args: force_full: 若为 True,强制从 20180101 完整重载 start_date: 手动指定起始日期(YYYYMMDD) end_date: 手动指定结束日期(默认为今天) max_workers: 工作线程数(默认: 10) dry_run: 若为 True,仅预览将要同步的内容,不写入数据 Returns: 映射 ts_code 到 DataFrame 的字典 Example: >>> # 首次同步(从 20180101 全量加载) >>> result = sync_cyq_perf() >>> >>> # 后续同步(增量 - 仅新数据) >>> result = sync_cyq_perf() >>> >>> # 强制完整重载 >>> result = sync_cyq_perf(force_full=True) >>> >>> # 手动指定日期范围 >>> result = sync_cyq_perf(start_date='20240101', end_date='20240131') >>> >>> # 自定义线程数 >>> result = sync_cyq_perf(max_workers=20) >>> >>> # Dry run(仅预览) >>> result = sync_cyq_perf(dry_run=True) """ sync_manager = CyqPerfSync(max_workers=max_workers) return sync_manager.sync_all( force_full=force_full, start_date=start_date, end_date=end_date, dry_run=dry_run, ) def preview_cyq_perf_sync( force_full: bool = False, start_date: Optional[str] = None, end_date: Optional[str] = None, sample_size: int = 3, ) -> dict: """预览筹码分布数据同步数据量和样本(不实际同步)。 这是推荐的方式,可在实际同步前检查将要同步的内容。 Args: force_full: 若为 True,预览全量同步(从 20180101) start_date: 手动指定起始日期(覆盖自动检测) end_date: 手动指定结束日期(默认为今天) sample_size: 预览用样本股票数量(默认: 3) Returns: 包含预览信息的字典: { 'sync_needed': bool, 'stock_count': int, 'start_date': str, 'end_date': str, 'estimated_records': int, 'sample_data': pd.DataFrame, 'mode': str, # 'full', 'incremental', 'partial', 或 'none' } Example: >>> # 预览将要同步的内容 >>> preview = preview_cyq_perf_sync() >>> >>> # 预览全量同步 >>> preview = preview_cyq_perf_sync(force_full=True) >>> >>> # 预览更多样本 >>> preview = preview_cyq_perf_sync(sample_size=5) """ sync_manager = CyqPerfSync() return sync_manager.preview_sync( force_full=force_full, start_date=start_date, end_date=end_date, sample_size=sample_size, )