feat(data): 添加每日指标接口并优化因子引擎
- 新增 api_daily_basic.py 封装 Tushare 每日指标接口 - 因子引擎移除 lookback_days,支持 daily_basic 表字段路由 - 将每日指标纳入自动同步流程 - 删除废弃的 training/main.py
This commit is contained in:
@@ -69,16 +69,11 @@ class DataRouter:
|
||||
|
||||
# 收集所有需要的表和字段
|
||||
required_tables: Dict[str, Set[str]] = {}
|
||||
max_lookback = 0
|
||||
|
||||
for spec in data_specs:
|
||||
if spec.table not in required_tables:
|
||||
required_tables[spec.table] = set()
|
||||
required_tables[spec.table].update(spec.columns)
|
||||
max_lookback = max(max_lookback, spec.lookback_days)
|
||||
|
||||
# 调整日期范围以包含回看期
|
||||
adjusted_start = self._adjust_start_date(start_date, max_lookback)
|
||||
|
||||
# 从数据源获取各表数据
|
||||
table_data = {}
|
||||
@@ -86,7 +81,7 @@ class DataRouter:
|
||||
df = self._load_table(
|
||||
table_name=table_name,
|
||||
columns=list(columns),
|
||||
start_date=adjusted_start,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
stock_codes=stock_codes,
|
||||
)
|
||||
@@ -95,11 +90,6 @@ class DataRouter:
|
||||
# 组装核心宽表
|
||||
core_table = self._assemble_wide_table(table_data, required_tables)
|
||||
|
||||
# 过滤到实际请求日期范围
|
||||
core_table = core_table.filter(
|
||||
(pl.col("trade_date") >= start_date) & (pl.col("trade_date") <= end_date)
|
||||
)
|
||||
|
||||
return core_table
|
||||
|
||||
def _load_table(
|
||||
@@ -265,34 +255,6 @@ class DataRouter:
|
||||
|
||||
return result
|
||||
|
||||
def _adjust_start_date(self, start_date: str, lookback_days: int) -> str:
|
||||
"""根据回看天数调整开始日期。
|
||||
|
||||
Args:
|
||||
start_date: 原始开始日期 (YYYYMMDD)
|
||||
lookback_days: 需要回看的交易日数
|
||||
|
||||
Returns:
|
||||
调整后的开始日期
|
||||
"""
|
||||
# 简化的日期调整:假设每月30天,向前推移
|
||||
# 实际应用中应该使用交易日历
|
||||
year = int(start_date[:4])
|
||||
month = int(start_date[4:6])
|
||||
day = int(start_date[6:8])
|
||||
|
||||
total_days = lookback_days + 30 # 额外缓冲
|
||||
|
||||
day -= total_days
|
||||
while day <= 0:
|
||||
month -= 1
|
||||
if month <= 0:
|
||||
month = 12
|
||||
year -= 1
|
||||
day += 30
|
||||
|
||||
return f"{year:04d}{month:02d}{day:02d}"
|
||||
|
||||
def clear_cache(self) -> None:
|
||||
"""清除数据缓存。"""
|
||||
with self._lock:
|
||||
|
||||
@@ -18,12 +18,10 @@ class DataSpec:
|
||||
Attributes:
|
||||
table: 数据表名称
|
||||
columns: 需要的字段列表
|
||||
lookback_days: 回看天数(用于时序计算)
|
||||
"""
|
||||
|
||||
table: str
|
||||
columns: List[str]
|
||||
lookback_days: int = 1
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -73,9 +73,9 @@ class ExecutionPlanner:
|
||||
) -> List[DataSpec]:
|
||||
"""从依赖推导数据规格。
|
||||
|
||||
根据表达式中的函数类型推断回看天数需求。
|
||||
基础行情字段(open, high, low, close, vol, amount, pre_close, change, pct_chg)
|
||||
默认从 pro_bar 表获取。
|
||||
每日指标字段(total_mv, circ_mv, pe, pb 等)从 daily_basic 表获取。
|
||||
|
||||
Args:
|
||||
dependencies: 依赖的字段集合
|
||||
@@ -84,10 +84,6 @@ class ExecutionPlanner:
|
||||
Returns:
|
||||
数据规格列表
|
||||
"""
|
||||
# 计算最大回看窗口
|
||||
max_window = self._extract_max_window(expression)
|
||||
lookback_days = max(1, max_window)
|
||||
|
||||
# 基础行情字段集合(这些字段从 pro_bar 表获取)
|
||||
pro_bar_fields = {
|
||||
"open",
|
||||
@@ -103,9 +99,27 @@ class ExecutionPlanner:
|
||||
"volume_ratio",
|
||||
}
|
||||
|
||||
# 将依赖分为 pro_bar 字段和其他字段
|
||||
# 每日指标字段集合(这些字段从 daily_basic 表获取)
|
||||
daily_basic_fields = {
|
||||
"turnover_rate_f",
|
||||
"pe",
|
||||
"pe_ttm",
|
||||
"pb",
|
||||
"ps",
|
||||
"ps_ttm",
|
||||
"dv_ratio",
|
||||
"dv_ttm",
|
||||
"total_share",
|
||||
"float_share",
|
||||
"free_share",
|
||||
"total_mv",
|
||||
"circ_mv",
|
||||
}
|
||||
|
||||
# 将依赖分为不同表的字段
|
||||
pro_bar_deps = dependencies & pro_bar_fields
|
||||
other_deps = dependencies - pro_bar_fields
|
||||
daily_basic_deps = dependencies & daily_basic_fields
|
||||
other_deps = dependencies - pro_bar_fields - daily_basic_fields
|
||||
|
||||
data_specs = []
|
||||
|
||||
@@ -115,7 +129,15 @@ class ExecutionPlanner:
|
||||
DataSpec(
|
||||
table="pro_bar",
|
||||
columns=sorted(pro_bar_deps),
|
||||
lookback_days=lookback_days,
|
||||
)
|
||||
)
|
||||
|
||||
# daily_basic 表的数据规格
|
||||
if daily_basic_deps:
|
||||
data_specs.append(
|
||||
DataSpec(
|
||||
table="daily_basic",
|
||||
columns=sorted(daily_basic_deps),
|
||||
)
|
||||
)
|
||||
|
||||
@@ -125,46 +147,7 @@ class ExecutionPlanner:
|
||||
DataSpec(
|
||||
table="daily",
|
||||
columns=sorted(other_deps),
|
||||
lookback_days=lookback_days,
|
||||
)
|
||||
)
|
||||
|
||||
return data_specs
|
||||
|
||||
def _extract_max_window(self, node: Node) -> int:
|
||||
"""从表达式中提取最大窗口大小。
|
||||
|
||||
Args:
|
||||
node: AST 节点
|
||||
|
||||
Returns:
|
||||
最大窗口大小,无时序函数返回 1
|
||||
"""
|
||||
if isinstance(node, FunctionNode):
|
||||
window = 1
|
||||
# 检查函数参数中的窗口大小
|
||||
for arg in node.args:
|
||||
if (
|
||||
isinstance(arg, Constant)
|
||||
and isinstance(arg.value, int)
|
||||
and arg.value > window
|
||||
):
|
||||
window = arg.value
|
||||
|
||||
# 递归检查子表达式
|
||||
for arg in node.args:
|
||||
if isinstance(arg, Node) and not isinstance(arg, Constant):
|
||||
window = max(window, self._extract_max_window(arg))
|
||||
|
||||
return window
|
||||
|
||||
elif isinstance(node, BinaryOpNode):
|
||||
return max(
|
||||
self._extract_max_window(node.left),
|
||||
self._extract_max_window(node.right),
|
||||
)
|
||||
|
||||
elif isinstance(node, UnaryOpNode):
|
||||
return self._extract_max_window(node.operand)
|
||||
|
||||
return 1
|
||||
|
||||
Reference in New Issue
Block a user