Files
ProStock/src/factors/financial/eps_factor.py
liaozhaorun a9e4746239 refactor: 代码审查修复 - 日期过滤、性能优化、数据泄露防护
- 修复 data_loader.py 财务数据日期过滤,支持按范围加载
- 优化 MADClipper 使用窗口函数替代 join,提升性能
- 修复训练日期边界问题,添加1天间隔避免数据泄露
- 新增 .gitignore 规则忽略训练输出目录
2026-02-25 21:11:19 +08:00

67 lines
1.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""EPS因子
每股收益(EPS)排名因子实现
"""
from typing import List
import polars as pl
from src.factors.base import CrossSectionalFactor
from src.factors.data_spec import DataSpec, FactorData
class EPSFactor(CrossSectionalFactor):
"""每股收益(EPS)排名因子
计算逻辑使用最新报告期的basic_eps每天对所有股票进行截面排名
Attributes:
name: 因子名称 "eps_rank"
category: 因子分类 "financial"
data_specs: 数据需求规格
Example:
>>> from src.factors import FactorEngine, DataLoader
>>> from src.factors.financial.eps_factor import EPSFactor
>>> loader = DataLoader('data')
>>> engine = FactorEngine(loader)
>>> eps_factor = EPSFactor()
>>> result = engine.compute(eps_factor, start_date='20210101', end_date='20210131')
"""
name: str = "eps_rank"
category: str = "financial"
description: str = "每股收益截面排名因子"
data_specs: List[DataSpec] = [
DataSpec(
"financial_income", ["ts_code", "trade_date", "basic_eps"], lookback_days=1
)
]
def compute(self, data: FactorData) -> pl.Series:
"""计算EPS排名
Args:
data: FactorData包含当前日期的截面数据
Returns:
EPS排名的0-1标准化值0-1之间
"""
# 获取当前日期的截面数据
cs = data.get_cross_section()
if len(cs) == 0:
return pl.Series(name=self.name, values=[])
# 提取EPS值填充缺失值为0
eps = cs["basic_eps"].fill_null(0)
# 计算排名并归一化到0-1
if len(eps) > 1 and eps.max() != eps.min():
ranks = eps.rank(method="average") / len(eps)
else:
# 数据不足或全部相同返回0.5
ranks = pl.Series(name=self.name, values=[0.5] * len(eps))
return ranks