- 修复 data_loader.py 财务数据日期过滤,支持按范围加载 - 优化 MADClipper 使用窗口函数替代 join,提升性能 - 修复训练日期边界问题,添加1天间隔避免数据泄露 - 新增 .gitignore 规则忽略训练输出目录
67 lines
1.9 KiB
Python
67 lines
1.9 KiB
Python
"""EPS因子
|
||
|
||
每股收益(EPS)排名因子实现
|
||
"""
|
||
|
||
from typing import List
|
||
import polars as pl
|
||
|
||
from src.factors.base import CrossSectionalFactor
|
||
from src.factors.data_spec import DataSpec, FactorData
|
||
|
||
|
||
class EPSFactor(CrossSectionalFactor):
|
||
"""每股收益(EPS)排名因子
|
||
|
||
计算逻辑:使用最新报告期的basic_eps,每天对所有股票进行截面排名
|
||
|
||
Attributes:
|
||
name: 因子名称 "eps_rank"
|
||
category: 因子分类 "financial"
|
||
data_specs: 数据需求规格
|
||
|
||
Example:
|
||
>>> from src.factors import FactorEngine, DataLoader
|
||
>>> from src.factors.financial.eps_factor import EPSFactor
|
||
>>> loader = DataLoader('data')
|
||
>>> engine = FactorEngine(loader)
|
||
>>> eps_factor = EPSFactor()
|
||
>>> result = engine.compute(eps_factor, start_date='20210101', end_date='20210131')
|
||
"""
|
||
|
||
name: str = "eps_rank"
|
||
category: str = "financial"
|
||
description: str = "每股收益截面排名因子"
|
||
data_specs: List[DataSpec] = [
|
||
DataSpec(
|
||
"financial_income", ["ts_code", "trade_date", "basic_eps"], lookback_days=1
|
||
)
|
||
]
|
||
|
||
def compute(self, data: FactorData) -> pl.Series:
|
||
"""计算EPS排名
|
||
|
||
Args:
|
||
data: FactorData,包含当前日期的截面数据
|
||
|
||
Returns:
|
||
EPS排名的0-1标准化值(0-1之间)
|
||
"""
|
||
# 获取当前日期的截面数据
|
||
cs = data.get_cross_section()
|
||
|
||
if len(cs) == 0:
|
||
return pl.Series(name=self.name, values=[])
|
||
|
||
# 提取EPS值,填充缺失值为0
|
||
eps = cs["basic_eps"].fill_null(0)
|
||
|
||
# 计算排名并归一化到0-1
|
||
if len(eps) > 1 and eps.max() != eps.min():
|
||
ranks = eps.rank(method="average") / len(eps)
|
||
else:
|
||
# 数据不足或全部相同,返回0.5
|
||
ranks = pl.Series(name=self.name, values=[0.5] * len(eps))
|
||
|
||
return ranks
|