refactor: 存储层迁移DuckDB + 模块重构
- 存储层重构: HDF5 → DuckDB(UPSERT模式、线程安全存储) - Sync类迁移: DataSync从sync.py迁移到api_daily.py(职责分离) - 模型模块重构: src/models → src/pipeline(更清晰的命名) - 新增因子模块: factors/momentum (MA、收益率排名)、factors/financial - 新增API接口: api_namechange、api_bak_basic - 新增训练入口: training模块(main.py、pipeline配置) - 工具函数统一: get_today_date等移至utils.py - 文档更新: AGENTS.md添加架构变更历史
This commit is contained in:
@@ -18,28 +18,29 @@
|
||||
- CompositeFactor: 组合因子
|
||||
- ScalarFactor: 标量运算因子
|
||||
|
||||
动量因子(momentum/):
|
||||
- MovingAverageFactor: 移动平均线(时序因子)
|
||||
- ReturnRankFactor: 收益率排名(截面因子)
|
||||
|
||||
财务因子(financial/):
|
||||
- (待添加)
|
||||
|
||||
数据加载和执行(Phase 3-4):
|
||||
- DataLoader: 数据加载器
|
||||
- FactorEngine: 因子执行引擎
|
||||
|
||||
使用示例:
|
||||
from src.factors import DataSpec, FactorContext, FactorData
|
||||
from src.factors import CrossSectionalFactor, TimeSeriesFactor
|
||||
# 使用通用因子(参数化)
|
||||
from src.factors import MovingAverageFactor, ReturnRankFactor
|
||||
from src.factors import DataLoader, FactorEngine
|
||||
|
||||
# 定义数据需求
|
||||
spec = DataSpec(
|
||||
source="daily",
|
||||
columns=["ts_code", "trade_date", "close"],
|
||||
lookback_days=20
|
||||
)
|
||||
ma5 = MovingAverageFactor(period=5) # 5日MA
|
||||
ma10 = MovingAverageFactor(period=10) # 10日MA
|
||||
ret5 = ReturnRankFactor(period=5) # 5日收益率排名
|
||||
|
||||
# 初始化引擎
|
||||
loader = DataLoader(data_dir="data")
|
||||
engine = FactorEngine(loader)
|
||||
|
||||
# 计算因子
|
||||
result = engine.compute(factor, start_date="20240101", end_date="20240131")
|
||||
result = engine.compute(ma5, stock_codes=["000001.SZ"], start_date="20240101", end_date="20240131")
|
||||
"""
|
||||
|
||||
from src.factors.data_spec import DataSpec, FactorContext, FactorData
|
||||
@@ -48,6 +49,9 @@ from src.factors.composite import CompositeFactor, ScalarFactor
|
||||
from src.factors.data_loader import DataLoader
|
||||
from src.factors.engine import FactorEngine
|
||||
|
||||
# 动量因子
|
||||
from src.factors.momentum import MovingAverageFactor, ReturnRankFactor
|
||||
|
||||
__all__ = [
|
||||
# Phase 1: 数据类型定义
|
||||
"DataSpec",
|
||||
@@ -62,4 +66,7 @@ __all__ = [
|
||||
# Phase 3-4: 数据加载和执行引擎
|
||||
"DataLoader",
|
||||
"FactorEngine",
|
||||
# 动量因子
|
||||
"MovingAverageFactor",
|
||||
"ReturnRankFactor",
|
||||
]
|
||||
|
||||
15
src/factors/financial/__init__.py
Normal file
15
src/factors/financial/__init__.py
Normal file
@@ -0,0 +1,15 @@
|
||||
"""财务因子模块
|
||||
|
||||
本模块提供财务类型的因子:
|
||||
|
||||
因子分类:
|
||||
- financial: 财务因子
|
||||
- (待添加)
|
||||
|
||||
待添加因子:
|
||||
- PERankFactor: 市盈率排名
|
||||
- PBFactor: 市净率因子
|
||||
- DividendFactor: 股息率因子
|
||||
"""
|
||||
|
||||
__all__ = []
|
||||
19
src/factors/momentum/__init__.py
Normal file
19
src/factors/momentum/__init__.py
Normal file
@@ -0,0 +1,19 @@
|
||||
"""动量因子模块
|
||||
|
||||
本模块提供动量类型的因子:
|
||||
- MovingAverageFactor: 移动平均线(时序因子)
|
||||
- ReturnRankFactor: 收益率排名(截面因子)
|
||||
|
||||
因子分类:
|
||||
- momentum: 动量因子
|
||||
- ma: 移动平均线
|
||||
- return_rank: 收益率排名
|
||||
"""
|
||||
|
||||
from src.factors.momentum.ma import MovingAverageFactor
|
||||
from src.factors.momentum.return_rank import ReturnRankFactor
|
||||
|
||||
__all__ = [
|
||||
"MovingAverageFactor",
|
||||
"ReturnRankFactor",
|
||||
]
|
||||
78
src/factors/momentum/ma.py
Normal file
78
src/factors/momentum/ma.py
Normal file
@@ -0,0 +1,78 @@
|
||||
"""动量因子 - 移动平均线
|
||||
|
||||
本模块提供通用移动平均线因子,支持参数化配置:
|
||||
- MovingAverageFactor: 移动平均线(时序因子)
|
||||
|
||||
使用示例:
|
||||
>>> from src.factors.momentum import MovingAverageFactor
|
||||
>>> ma5 = MovingAverageFactor(period=5) # 5日MA
|
||||
>>> ma10 = MovingAverageFactor(period=10) # 10日MA
|
||||
>>> ma20 = MovingAverageFactor(period=20) # 20日MA
|
||||
"""
|
||||
|
||||
from typing import List
|
||||
|
||||
import polars as pl
|
||||
|
||||
from src.factors.base import TimeSeriesFactor
|
||||
from src.factors.data_spec import DataSpec, FactorData
|
||||
|
||||
|
||||
class MovingAverageFactor(TimeSeriesFactor):
|
||||
"""移动平均线因子
|
||||
|
||||
计算逻辑:对每只股票,计算其过去n日收盘价的移动平均值。
|
||||
|
||||
特点:
|
||||
- 参数化因子:训练时通过 period 参数指定计算窗口
|
||||
- 时序因子:每只股票单独计算,防止股票间数据泄露
|
||||
|
||||
Attributes:
|
||||
period: MA计算期(天数),默认5
|
||||
|
||||
Example:
|
||||
>>> ma5 = MovingAverageFactor(period=5)
|
||||
>>> # 计算过去5日的收盘价均值
|
||||
"""
|
||||
|
||||
name: str = "ma"
|
||||
factor_type: str = "time_series"
|
||||
category: str = "momentum"
|
||||
description: str = "移动平均线因子,计算过去n日收盘价的均值"
|
||||
data_specs: List[DataSpec] = [
|
||||
DataSpec("daily", ["ts_code", "trade_date", "close"], lookback_days=5)
|
||||
]
|
||||
|
||||
def __init__(self, period: int = 5):
|
||||
"""初始化因子
|
||||
|
||||
Args:
|
||||
period: MA计算期(天数),默认5日
|
||||
"""
|
||||
super().__init__(period=period)
|
||||
# 重新创建 DataSpec 以设置正确的 lookback_days(DataSpec 是 frozen 的)
|
||||
self.data_specs = [
|
||||
DataSpec(
|
||||
"daily",
|
||||
["ts_code", "trade_date", "close"],
|
||||
lookback_days=period,
|
||||
)
|
||||
]
|
||||
self.name = f"ma_{period}"
|
||||
|
||||
def compute(self, data: FactorData) -> pl.Series:
|
||||
"""计算移动平均线
|
||||
|
||||
Args:
|
||||
data: FactorData,包含单只股票的完整时间序列
|
||||
|
||||
Returns:
|
||||
移动平均值序列
|
||||
"""
|
||||
# 获取收盘价序列
|
||||
close_prices = data.get_column("close")
|
||||
|
||||
# 计算移动平均
|
||||
ma = close_prices.rolling_mean(window_size=self.params["period"])
|
||||
|
||||
return ma
|
||||
100
src/factors/momentum/return_rank.py
Normal file
100
src/factors/momentum/return_rank.py
Normal file
@@ -0,0 +1,100 @@
|
||||
"""动量因子 - 收益率排名
|
||||
|
||||
本模块提供收益率排名因子:
|
||||
- ReturnRankFactor: 过去n日收益率的rank因子(截面因子)
|
||||
|
||||
使用示例:
|
||||
>>> from src.factors.momentum import ReturnRankFactor
|
||||
>>> ret5 = ReturnRankFactor(period=5) # 5日收益率排名
|
||||
>>> ret10 = ReturnRankFactor(period=10) # 10日收益率排名
|
||||
"""
|
||||
|
||||
from typing import List
|
||||
|
||||
import polars as pl
|
||||
|
||||
from src.factors.base import CrossSectionalFactor
|
||||
from src.factors.data_spec import DataSpec, FactorData
|
||||
|
||||
|
||||
class ReturnRankFactor(CrossSectionalFactor):
|
||||
"""过去n日收益率排名因子
|
||||
|
||||
计算逻辑:每个交易日,计算所有股票过去n日的收益率,然后进行截面排名。
|
||||
|
||||
特点:
|
||||
- 参数化因子:训练时通过 period 参数指定计算窗口
|
||||
- 截面因子:每天对所有股票进行横向排名,防止日期泄露
|
||||
|
||||
Attributes:
|
||||
period: 收益率计算期(默认5日)
|
||||
|
||||
Example:
|
||||
>>> ret5 = ReturnRankFactor(period=5)
|
||||
>>> # 每个交易日,返回所有股票过去5日收益率的排名
|
||||
"""
|
||||
|
||||
name: str = "return_rank"
|
||||
factor_type: str = "cross_sectional"
|
||||
category: str = "momentum"
|
||||
description: str = "过去n日收益率的截面排名因子"
|
||||
data_specs: List[DataSpec] = [
|
||||
DataSpec("daily", ["ts_code", "trade_date", "close"], lookback_days=5)
|
||||
]
|
||||
|
||||
def __init__(self, period: int = 5):
|
||||
"""初始化因子
|
||||
|
||||
Args:
|
||||
period: 收益率计算期(天数)
|
||||
"""
|
||||
super().__init__(period=period)
|
||||
# 重新创建 DataSpec 以设置正确的 lookback_days(DataSpec 是 frozen 的)
|
||||
self.data_specs = [
|
||||
DataSpec(
|
||||
"daily",
|
||||
["ts_code", "trade_date", "close"],
|
||||
lookback_days=period + 1,
|
||||
)
|
||||
]
|
||||
self.name = f"return_{period}_rank"
|
||||
|
||||
def compute(self, data: FactorData) -> pl.Series:
|
||||
"""计算过去n日收益率排名
|
||||
|
||||
Args:
|
||||
data: FactorData,包含过去n+1天的截面数据
|
||||
|
||||
Returns:
|
||||
过去n日收益率的截面排名(0-1之间)
|
||||
"""
|
||||
# 获取当前日期的截面数据
|
||||
cs = data.to_polars()
|
||||
|
||||
# 获取所有交易日期(已按日期排序)
|
||||
trade_dates = cs["trade_date"].unique().sort()
|
||||
|
||||
if len(trade_dates) < 2:
|
||||
# 数据不足,返回空排名
|
||||
return pl.Series(name=self.name, values=[])
|
||||
|
||||
# 获取最新日期的数据
|
||||
latest_date = trade_dates[-1]
|
||||
current_data = cs.filter(pl.col("trade_date") == latest_date)
|
||||
|
||||
# 获取n天前的日期
|
||||
n_days_ago = trade_dates[-(self.params["period"] + 1)]
|
||||
past_data = cs.filter(pl.col("trade_date") == n_days_ago)
|
||||
|
||||
# 通过 ts_code join 计算收益率
|
||||
merged = current_data.select(["ts_code", "close"]).join(
|
||||
past_data.select(["ts_code", "close"]).rename({"close": "close_past"}),
|
||||
on="ts_code",
|
||||
how="inner",
|
||||
)
|
||||
|
||||
# 计算收益率
|
||||
returns = (merged["close"] - merged["close_past"]) / merged["close_past"]
|
||||
|
||||
# 返回排名(0-1之间)
|
||||
return returns.rank(method="average") / len(returns)
|
||||
Reference in New Issue
Block a user