feat(data): 添加每日指标接口并优化因子引擎

- 新增 api_daily_basic.py 封装 Tushare 每日指标接口
- 因子引擎移除 lookback_days,支持 daily_basic 表字段路由
- 将每日指标纳入自动同步流程
- 删除废弃的 training/main.py
This commit is contained in:
2026-03-03 17:09:39 +08:00
parent 780284af7f
commit 53225b9443
12 changed files with 1132 additions and 433 deletions

View File

@@ -5,6 +5,7 @@ All wrapper files follow the naming convention: api_{data_type}.py
Available APIs:
- api_daily: Daily market data (日线行情)
- api_daily_basic: Daily basic indicators (每日指标换手率、PE、PB、市值等)
- api_pro_bar: Pro Bar universal market data (通用行情,后复权)
- api_stock_basic: Stock basic information (股票基本信息)
- api_trade_cal: Trading calendar (交易日历)
@@ -13,9 +14,10 @@ Available APIs:
Example:
>>> from src.data.api_wrappers import get_daily, get_stock_basic, get_trade_cal, get_bak_basic
>>> from src.data.api_wrappers import get_pro_bar, sync_pro_bar
>>> from src.data.api_wrappers import get_pro_bar, sync_pro_bar, get_daily_basic, sync_daily_basic
>>> data = get_daily('000001.SZ', start_date='20240101', end_date='20240131')
>>> pro_data = get_pro_bar('000001.SZ', start_date='20240101', end_date='20240131')
>>> daily_basic = get_daily_basic(trade_date='20240101')
>>> stocks = get_stock_basic()
>>> calendar = get_trade_cal('20240101', '20240131')
>>> bak_basic = get_bak_basic(trade_date='20240101')
@@ -27,6 +29,12 @@ from src.data.api_wrappers.api_daily import (
preview_daily_sync,
DailySync,
)
from src.data.api_wrappers.api_daily_basic import (
get_daily_basic,
sync_daily_basic,
preview_daily_basic_sync,
DailyBasicSync,
)
from src.data.api_wrappers.api_pro_bar import (
get_pro_bar,
sync_pro_bar,
@@ -55,6 +63,11 @@ __all__ = [
"sync_daily",
"preview_daily_sync",
"DailySync",
# Daily basic indicators
"get_daily_basic",
"sync_daily_basic",
"preview_daily_basic_sync",
"DailyBasicSync",
# Pro Bar (universal market data)
"get_pro_bar",
"sync_pro_bar",

View File

@@ -495,4 +495,74 @@ df = ts.pro_bar(ts_code='000001.SZ', start_date='20180101', end_date='20181011',
例如:
df = ts.pro_bar(ts_code='000001.SH', asset='I', start_date='20180101', end_date='20181011')
df = ts.pro_bar(ts_code='000001.SH', asset='I', start_date='20180101', end_date='20181011')
每日指标
接口daily_basic可以通过数据工具调试和查看数据。
更新时间交易日每日15点17点之间
描述获取全部股票每日重要的基本面指标可用于选股分析、报表展示等。单次请求最大返回6000条数据可按日线循环提取全部历史。
积分至少2000积分才可以调取5000积分无总量限制具体请参阅积分获取办法
输入参数
名称 类型 必选 描述
ts_code str Y 股票代码(二选一)
trade_date str N 交易日期 (二选一)
start_date str N 开始日期(YYYYMMDD)
end_date str N 结束日期(YYYYMMDD)
日期都填YYYYMMDD格式比如20181010
输出参数
名称 类型 描述
ts_code str TS股票代码
trade_date str 交易日期
close float 当日收盘价
turnover_rate float 换手率(%
turnover_rate_f float 换手率(自由流通股)
volume_ratio float 量比
pe float 市盈率(总市值/净利润, 亏损的PE为空
pe_ttm float 市盈率TTM亏损的PE为空
pb float 市净率(总市值/净资产)
ps float 市销率
ps_ttm float 市销率TTM
dv_ratio float 股息率 %
dv_ttm float 股息率TTM%
total_share float 总股本 (万股)
float_share float 流通股本 (万股)
free_share float 自由流通股本 (万)
total_mv float 总市值 (万元)
circ_mv float 流通市值(万元)
接口用法
pro = ts.pro_api()
df = pro.daily_basic(ts_code='', trade_date='20180726', fields='ts_code,trade_date,turnover_rate,volume_ratio,pe,pb')
或者
df = pro.query('daily_basic', ts_code='', trade_date='20180726',fields='ts_code,trade_date,turnover_rate,volume_ratio,pe,pb')
数据样例
ts_code trade_date turnover_rate volume_ratio pe pb
0 600230.SH 20180726 2.4584 0.72 8.6928 3.7203
1 600237.SH 20180726 1.4737 0.88 166.4001 1.8868
2 002465.SZ 20180726 0.7489 0.72 71.8943 2.6391
3 300732.SZ 20180726 6.7083 0.77 21.8101 3.2513
4 600007.SH 20180726 0.0381 0.61 23.7696 2.3774
5 300068.SZ 20180726 1.4583 0.52 27.8166 1.7549
6 300552.SZ 20180726 2.0728 0.95 56.8004 2.9279
7 601369.SH 20180726 0.2088 0.95 44.1163 1.8001
8 002518.SZ 20180726 0.5814 0.76 15.1004 2.5626
9 002913.SZ 20180726 12.1096 1.03 33.1279 2.9217
10 601818.SH 20180726 0.1893 0.86 6.3064 0.7209
11 600926.SH 20180726 0.6065 0.46 9.1772 0.9808
12 002166.SZ 20180726 0.7582 0.82 16.9868 3.3452
13 600841.SH 20180726 0.3754 1.02 66.2647 2.2302
14 300634.SZ 20180726 23.1127 1.26 120.3053 14.3168
15 300126.SZ 20180726 1.2304 1.11 348.4306 1.5171
16 300718.SZ 20180726 17.6612 0.92 32.0239 3.8661
17 000708.SZ 20180726 0.5575 0.70 10.3674 1.0276
18 002626.SZ 20180726 0.6187 0.83 22.7580 4.2446
19 600816.SH 20180726 0.6745 0.65 11.0778 3.2214

View File

@@ -0,0 +1,252 @@
"""每日指标数据接口。
获取全部股票每日重要的基本面指标,包括换手率、市盈率、市净率、
总市值、流通市值等,可用于选股分析、报表展示等。
"""
from typing import Optional, Dict, Any
import pandas as pd
from src.data.client import TushareClient
from src.data.api_wrappers.base_sync import DateBasedSync
def get_daily_basic(
trade_date: Optional[str] = None,
ts_code: Optional[str] = None,
start_date: Optional[str] = None,
end_date: Optional[str] = None,
client: Optional[TushareClient] = None,
) -> pd.DataFrame:
"""Fetch daily basic indicators from Tushare.
This interface retrieves important daily fundamental indicators for all stocks,
including turnover rate, PE, PB, market value, etc. It can be used for stock
selection analysis and report display.
Note: At least one of trade_date or ts_code must be provided. The recommended
approach is to use trade_date to fetch data for all stocks on a specific date,
which is more efficient than fetching by individual stock codes.
Args:
trade_date: Specific trade date (YYYYMMDD format). Use this to get all
stocks' data for a single date. More efficient than ts_code.
ts_code: Stock code (e.g., '000001.SZ', '600000.SH'). Optional if
trade_date is provided.
start_date: Start date (YYYYMMDD format). Use with end_date for date range.
end_date: End date (YYYYMMDD format). Use with start_date for date range.
client: Optional TushareClient instance for shared rate limiting.
If None, creates a new client. For concurrent sync operations,
pass a shared client to ensure proper rate limiting.
Returns:
pd.DataFrame with columns:
- ts_code: TS stock code
- trade_date: Trade date (YYYYMMDD)
- close: Closing price
- turnover_rate: Turnover rate (%)
- turnover_rate_f: Turnover rate (free float shares)
- volume_ratio: Volume ratio
- pe: Price-to-earnings ratio (total market cap / net profit)
- pe_ttm: PE ratio (TTM)
- pb: Price-to-book ratio (total market cap / net assets)
- ps: Price-to-sales ratio
- ps_ttm: PS ratio (TTM)
- dv_ratio: Dividend yield (%)
- dv_ttm: Dividend yield (TTM) (%)
- total_share: Total shares (10k shares)
- float_share: Float shares (10k shares)
- free_share: Free float shares (10k shares)
- total_mv: Total market value (10k CNY)
- circ_mv: Circulating market value (10k CNY)
Example:
>>> # Get all stocks for a single date (recommended)
>>> data = get_daily_basic(trade_date='20240101')
>>>
>>> # Get specific stock data
>>> data = get_daily_basic(ts_code='000001.SZ', trade_date='20240101')
>>>
>>> # Get date range data for a specific stock
>>> data = get_daily_basic(
... ts_code='000001.SZ',
... start_date='20240101',
... end_date='20240131'
... )
"""
client = client or TushareClient()
# Build parameters
params = {}
if trade_date:
params["trade_date"] = trade_date
if ts_code:
params["ts_code"] = ts_code
if start_date:
params["start_date"] = start_date
if end_date:
params["end_date"] = end_date
# Fetch data using daily_basic API
data = client.query("daily_basic", **params)
# Rename date column if needed
if "date" in data.columns:
data = data.rename(columns={"date": "trade_date"})
return data
class DailyBasicSync(DateBasedSync):
"""每日指标数据批量同步管理器,支持全量/增量同步。
继承自 DateBasedSync按日期顺序获取数据。
每日指标数据适合按日期获取,一次 API 调用即可获取全市场数据。
Example:
>>> sync = DailyBasicSync()
>>> results = sync.sync_all() # 增量同步
>>> results = sync.sync_all(force_full=True) # 全量同步
>>> preview = sync.preview_sync() # 预览
"""
table_name = "daily_basic"
default_start_date = "20180101"
# 表结构定义
TABLE_SCHEMA = {
"ts_code": "VARCHAR(16) NOT NULL",
"trade_date": "DATE NOT NULL",
"close": "DOUBLE",
"turnover_rate": "DOUBLE",
"turnover_rate_f": "DOUBLE",
"volume_ratio": "DOUBLE",
"pe": "DOUBLE",
"pe_ttm": "DOUBLE",
"pb": "DOUBLE",
"ps": "DOUBLE",
"ps_ttm": "DOUBLE",
"dv_ratio": "DOUBLE",
"dv_ttm": "DOUBLE",
"total_share": "DOUBLE",
"float_share": "DOUBLE",
"free_share": "DOUBLE",
"total_mv": "DOUBLE",
"circ_mv": "DOUBLE",
}
# 索引定义
TABLE_INDEXES = [
("idx_daily_basic_date_code", ["trade_date", "ts_code"]),
]
# 主键定义
PRIMARY_KEY = ("ts_code", "trade_date")
def fetch_single_date(self, trade_date: str) -> pd.DataFrame:
"""获取单日的每日指标数据。
Args:
trade_date: 交易日期YYYYMMDD
Returns:
包含当日所有股票指标的 DataFrame
"""
# 使用 get_daily_basic 获取数据(传递共享 client
data = get_daily_basic(
trade_date=trade_date,
client=self.client, # 传递共享客户端以确保限流
)
return data
def sync_daily_basic(
force_full: bool = False,
start_date: Optional[str] = None,
end_date: Optional[str] = None,
dry_run: bool = False,
) -> pd.DataFrame:
"""同步所有股票的每日指标数据。
这是每日指标数据同步的主要入口点。
Args:
force_full: 若为 True强制从 20180101 完整重载
start_date: 手动指定起始日期YYYYMMDD
end_date: 手动指定结束日期(默认为今天)
dry_run: 若为 True仅预览将要同步的内容不写入数据
Returns:
同步的数据 DataFrame
Example:
>>> # 首次同步(从 20180101 全量加载)
>>> result = sync_daily_basic()
>>>
>>> # 后续同步(增量 - 仅新数据)
>>> result = sync_daily_basic()
>>>
>>> # 强制完整重载
>>> result = sync_daily_basic(force_full=True)
>>>
>>> # 手动指定日期范围
>>> result = sync_daily_basic(start_date='20240101', end_date='20240131')
>>>
>>> # Dry run仅预览
>>> result = sync_daily_basic(dry_run=True)
"""
sync_manager = DailyBasicSync()
return sync_manager.sync_all(
force_full=force_full,
start_date=start_date,
end_date=end_date,
dry_run=dry_run,
)
def preview_daily_basic_sync(
force_full: bool = False,
start_date: Optional[str] = None,
end_date: Optional[str] = None,
sample_size: int = 3,
) -> Dict[str, Any]:
"""预览每日指标同步数据量和样本(不实际同步)。
这是推荐的方式,可在实际同步前检查将要同步的内容。
Args:
force_full: 若为 True预览全量同步从 20180101
start_date: 手动指定起始日期(覆盖自动检测)
end_date: 手动指定结束日期(默认为今天)
sample_size: 预览用样本天数(默认: 3
Returns:
包含预览信息的字典:
{
'sync_needed': bool,
'date_count': int,
'start_date': str,
'end_date': str,
'estimated_records': int,
'sample_data': pd.DataFrame,
'mode': str, # 'full', 'incremental', 或 'none'
}
Example:
>>> # 预览将要同步的内容
>>> preview = preview_daily_basic_sync()
>>>
>>> # 预览全量同步
>>> preview = preview_daily_basic_sync(force_full=True)
>>>
>>> # 预览更多样本
>>> preview = preview_daily_basic_sync(sample_size=5)
"""
sync_manager = DailyBasicSync()
return sync_manager.preview_sync(
force_full=force_full,
start_date=start_date,
end_date=end_date,
sample_size=sample_size,
)

View File

@@ -7,6 +7,7 @@
✅ 本模块包含的同步逻辑(每日更新):
- api_daily.py: 日线数据同步 (DailySync 类)
- api_daily_basic.py: 每日指标数据同步 (DailyBasicSync 类)
- api_bak_basic.py: 历史股票列表同步 (BakBasicSync 类)
- api_pro_bar.py: Pro Bar 数据同步 (ProBarSync 类)
- api_stock_basic.py: 股票基本信息同步
@@ -44,6 +45,7 @@ from src.data.api_wrappers import sync_all_stocks
from src.data.api_wrappers.api_daily import sync_daily, preview_daily_sync
from src.data.api_wrappers.api_pro_bar import sync_pro_bar
from src.data.api_wrappers.api_bak_basic import sync_bak_basic
from src.data.api_wrappers.api_daily_basic import sync_daily_basic
def preview_sync(
@@ -157,7 +159,8 @@ def sync_all_data(
2. 股票基本信息 (sync_all_stocks)
3. 日线数据 (sync_daily)
4. Pro Bar 数据 (sync_pro_bar)
5. 历史股票列表 (sync_bak_basic)
5. 每日指标数据 (sync_daily_basic)
6. 历史股票列表 (sync_bak_basic)
【不包含的同步(需单独调用)】
- 财务数据: 利润表、资产负债表、现金流量表(季度更新)
@@ -238,7 +241,7 @@ def sync_all_data(
results["daily"] = pd.DataFrame()
# 4. Sync Pro Bar data
print("\n[4/5] Syncing Pro Bar data (with adj, tor, vr)...")
print("\n[4/6] Syncing Pro Bar data (with adj, tor, vr)...")
try:
# 确保表存在
from src.data.api_wrappers.api_pro_bar import ProBarSync
@@ -255,14 +258,31 @@ def sync_all_data(
sum(len(df) for df in pro_bar_result.values()) if pro_bar_result else 0
)
print(
f"[4/5] Pro Bar data: OK ({total_pro_bar_records} records from {len(pro_bar_result)} stocks)"
f"[4/6] Pro Bar data: OK ({total_pro_bar_records} records from {len(pro_bar_result)} stocks)"
)
except Exception as e:
print(f"[4/5] Pro Bar data: FAILED - {e}")
print(f"[4/6] Pro Bar data: FAILED - {e}")
results["pro_bar"] = pd.DataFrame()
# 5. Sync stock historical list (bak_basic)
print("\n[5/5] Syncing stock historical list (bak_basic)...")
# 5. Sync daily basic indicators
print(
"\n[5/6] Syncing daily basic indicators (PE, PB, turnover rate, market value)..."
)
try:
# 确保表存在
from src.data.api_wrappers.api_daily_basic import DailyBasicSync
DailyBasicSync().ensure_table_exists()
daily_basic_result = sync_daily_basic(force_full=force_full, dry_run=dry_run)
results["daily_basic"] = daily_basic_result
print(f"[5/6] Daily basic: OK ({len(daily_basic_result)} records)")
except Exception as e:
print(f"[5/6] Daily basic: FAILED - {e}")
results["daily_basic"] = pd.DataFrame()
# 6. Sync stock historical list (bak_basic)
print("\n[6/6] Syncing stock historical list (bak_basic)...")
try:
# 确保表存在
from src.data.api_wrappers.api_bak_basic import BakBasicSync
@@ -271,9 +291,9 @@ def sync_all_data(
bak_basic_result = sync_bak_basic(force_full=force_full)
results["bak_basic"] = bak_basic_result
print(f"[5/5] Bak basic: OK ({len(bak_basic_result)} records)")
print(f"[6/6] Bak basic: OK ({len(bak_basic_result)} records)")
except Exception as e:
print(f"[5/5] Bak basic: FAILED - {e}")
print(f"[6/6] Bak basic: FAILED - {e}")
results["bak_basic"] = pd.DataFrame()
# Summary
@@ -286,7 +306,7 @@ def sync_all_data(
total_records = sum(len(df) for df in data.values())
print(f" {data_type}: {len(data)} stocks, {total_records} total records")
else:
# bak_basic 返回的是 DataFrame
# daily_basic 和 bak_basic 返回的是 DataFrame
print(f" {data_type}: {len(data)} records")
print("=" * 60)
print("\nNote: namechange is NOT in auto-sync. To sync manually:")
@@ -308,7 +328,7 @@ if __name__ == "__main__":
print("")
print(" # Or sync individual data types:")
print(" from src.data.sync import sync_all, preview_sync")
print(" from src.data.sync import sync_bak_basic")
print(" from src.data.api_wrappers import sync_daily_basic, sync_bak_basic")
print("")
print(" # Preview before sync (recommended)")
print(" preview = preview_sync()")