refactor: 代码审查修复 - 日期过滤、性能优化、数据泄露防护
- 修复 data_loader.py 财务数据日期过滤,支持按范围加载 - 优化 MADClipper 使用窗口函数替代 join,提升性能 - 修复训练日期边界问题,添加1天间隔避免数据泄露 - 新增 .gitignore 规则忽略训练输出目录
This commit is contained in:
66
src/factors/financial/eps_factor.py
Normal file
66
src/factors/financial/eps_factor.py
Normal file
@@ -0,0 +1,66 @@
|
||||
"""EPS因子
|
||||
|
||||
每股收益(EPS)排名因子实现
|
||||
"""
|
||||
|
||||
from typing import List
|
||||
import polars as pl
|
||||
|
||||
from src.factors.base import CrossSectionalFactor
|
||||
from src.factors.data_spec import DataSpec, FactorData
|
||||
|
||||
|
||||
class EPSFactor(CrossSectionalFactor):
|
||||
"""每股收益(EPS)排名因子
|
||||
|
||||
计算逻辑:使用最新报告期的basic_eps,每天对所有股票进行截面排名
|
||||
|
||||
Attributes:
|
||||
name: 因子名称 "eps_rank"
|
||||
category: 因子分类 "financial"
|
||||
data_specs: 数据需求规格
|
||||
|
||||
Example:
|
||||
>>> from src.factors import FactorEngine, DataLoader
|
||||
>>> from src.factors.financial.eps_factor import EPSFactor
|
||||
>>> loader = DataLoader('data')
|
||||
>>> engine = FactorEngine(loader)
|
||||
>>> eps_factor = EPSFactor()
|
||||
>>> result = engine.compute(eps_factor, start_date='20210101', end_date='20210131')
|
||||
"""
|
||||
|
||||
name: str = "eps_rank"
|
||||
category: str = "financial"
|
||||
description: str = "每股收益截面排名因子"
|
||||
data_specs: List[DataSpec] = [
|
||||
DataSpec(
|
||||
"financial_income", ["ts_code", "trade_date", "basic_eps"], lookback_days=1
|
||||
)
|
||||
]
|
||||
|
||||
def compute(self, data: FactorData) -> pl.Series:
|
||||
"""计算EPS排名
|
||||
|
||||
Args:
|
||||
data: FactorData,包含当前日期的截面数据
|
||||
|
||||
Returns:
|
||||
EPS排名的0-1标准化值(0-1之间)
|
||||
"""
|
||||
# 获取当前日期的截面数据
|
||||
cs = data.get_cross_section()
|
||||
|
||||
if len(cs) == 0:
|
||||
return pl.Series(name=self.name, values=[])
|
||||
|
||||
# 提取EPS值,填充缺失值为0
|
||||
eps = cs["basic_eps"].fill_null(0)
|
||||
|
||||
# 计算排名并归一化到0-1
|
||||
if len(eps) > 1 and eps.max() != eps.min():
|
||||
ranks = eps.rank(method="average") / len(eps)
|
||||
else:
|
||||
# 数据不足或全部相同,返回0.5
|
||||
ranks = pl.Series(name=self.name, values=[0.5] * len(eps))
|
||||
|
||||
return ranks
|
||||
Reference in New Issue
Block a user