feat(data): 添加每日指标接口并优化因子引擎

- 新增 api_daily_basic.py 封装 Tushare 每日指标接口
- 因子引擎移除 lookback_days,支持 daily_basic 表字段路由
- 将每日指标纳入自动同步流程
- 删除废弃的 training/main.py
This commit is contained in:
2026-03-03 17:09:39 +08:00
parent 780284af7f
commit 53225b9443
12 changed files with 1132 additions and 433 deletions

View File

@@ -69,16 +69,11 @@ class DataRouter:
# 收集所有需要的表和字段
required_tables: Dict[str, Set[str]] = {}
max_lookback = 0
for spec in data_specs:
if spec.table not in required_tables:
required_tables[spec.table] = set()
required_tables[spec.table].update(spec.columns)
max_lookback = max(max_lookback, spec.lookback_days)
# 调整日期范围以包含回看期
adjusted_start = self._adjust_start_date(start_date, max_lookback)
# 从数据源获取各表数据
table_data = {}
@@ -86,7 +81,7 @@ class DataRouter:
df = self._load_table(
table_name=table_name,
columns=list(columns),
start_date=adjusted_start,
start_date=start_date,
end_date=end_date,
stock_codes=stock_codes,
)
@@ -95,11 +90,6 @@ class DataRouter:
# 组装核心宽表
core_table = self._assemble_wide_table(table_data, required_tables)
# 过滤到实际请求日期范围
core_table = core_table.filter(
(pl.col("trade_date") >= start_date) & (pl.col("trade_date") <= end_date)
)
return core_table
def _load_table(
@@ -265,34 +255,6 @@ class DataRouter:
return result
def _adjust_start_date(self, start_date: str, lookback_days: int) -> str:
"""根据回看天数调整开始日期。
Args:
start_date: 原始开始日期 (YYYYMMDD)
lookback_days: 需要回看的交易日数
Returns:
调整后的开始日期
"""
# 简化的日期调整假设每月30天向前推移
# 实际应用中应该使用交易日历
year = int(start_date[:4])
month = int(start_date[4:6])
day = int(start_date[6:8])
total_days = lookback_days + 30 # 额外缓冲
day -= total_days
while day <= 0:
month -= 1
if month <= 0:
month = 12
year -= 1
day += 30
return f"{year:04d}{month:02d}{day:02d}"
def clear_cache(self) -> None:
"""清除数据缓存。"""
with self._lock:

View File

@@ -18,12 +18,10 @@ class DataSpec:
Attributes:
table: 数据表名称
columns: 需要的字段列表
lookback_days: 回看天数(用于时序计算)
"""
table: str
columns: List[str]
lookback_days: int = 1
@dataclass

View File

@@ -73,9 +73,9 @@ class ExecutionPlanner:
) -> List[DataSpec]:
"""从依赖推导数据规格。
根据表达式中的函数类型推断回看天数需求。
基础行情字段open, high, low, close, vol, amount, pre_close, change, pct_chg
默认从 pro_bar 表获取。
每日指标字段total_mv, circ_mv, pe, pb 等)从 daily_basic 表获取。
Args:
dependencies: 依赖的字段集合
@@ -84,10 +84,6 @@ class ExecutionPlanner:
Returns:
数据规格列表
"""
# 计算最大回看窗口
max_window = self._extract_max_window(expression)
lookback_days = max(1, max_window)
# 基础行情字段集合(这些字段从 pro_bar 表获取)
pro_bar_fields = {
"open",
@@ -103,9 +99,27 @@ class ExecutionPlanner:
"volume_ratio",
}
# 将依赖分为 pro_bar 字段和其他字段
# 每日指标字段集合(这些字段从 daily_basic 表获取)
daily_basic_fields = {
"turnover_rate_f",
"pe",
"pe_ttm",
"pb",
"ps",
"ps_ttm",
"dv_ratio",
"dv_ttm",
"total_share",
"float_share",
"free_share",
"total_mv",
"circ_mv",
}
# 将依赖分为不同表的字段
pro_bar_deps = dependencies & pro_bar_fields
other_deps = dependencies - pro_bar_fields
daily_basic_deps = dependencies & daily_basic_fields
other_deps = dependencies - pro_bar_fields - daily_basic_fields
data_specs = []
@@ -115,7 +129,15 @@ class ExecutionPlanner:
DataSpec(
table="pro_bar",
columns=sorted(pro_bar_deps),
lookback_days=lookback_days,
)
)
# daily_basic 表的数据规格
if daily_basic_deps:
data_specs.append(
DataSpec(
table="daily_basic",
columns=sorted(daily_basic_deps),
)
)
@@ -125,46 +147,7 @@ class ExecutionPlanner:
DataSpec(
table="daily",
columns=sorted(other_deps),
lookback_days=lookback_days,
)
)
return data_specs
def _extract_max_window(self, node: Node) -> int:
"""从表达式中提取最大窗口大小。
Args:
node: AST 节点
Returns:
最大窗口大小,无时序函数返回 1
"""
if isinstance(node, FunctionNode):
window = 1
# 检查函数参数中的窗口大小
for arg in node.args:
if (
isinstance(arg, Constant)
and isinstance(arg.value, int)
and arg.value > window
):
window = arg.value
# 递归检查子表达式
for arg in node.args:
if isinstance(arg, Node) and not isinstance(arg, Constant):
window = max(window, self._extract_max_window(arg))
return window
elif isinstance(node, BinaryOpNode):
return max(
self._extract_max_window(node.left),
self._extract_max_window(node.right),
)
elif isinstance(node, UnaryOpNode):
return self._extract_max_window(node.operand)
return 1