factor优化,改为polars
This commit is contained in:
237
main/factor/polars_factors.py
Normal file
237
main/factor/polars_factors.py
Normal file
@@ -0,0 +1,237 @@
|
||||
"""
|
||||
Polars因子主入口 - 整合所有Polars-based因子计算
|
||||
提供统一的接口来应用所有类别的因子
|
||||
"""
|
||||
|
||||
import polars as pl
|
||||
from typing import Dict, List, Optional, Any
|
||||
import logging
|
||||
|
||||
# 配置日志
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# 因子类别映射
|
||||
FACTOR_CATEGORIES = {
|
||||
'money_flow': '资金流因子',
|
||||
'chip': '筹码分布因子',
|
||||
'volatility': '波动率因子',
|
||||
'volume': '成交量因子',
|
||||
'technical': '技术指标因子',
|
||||
'sentiment': '情绪因子',
|
||||
'momentum': '动量因子',
|
||||
'complex': '复杂组合因子'
|
||||
}
|
||||
|
||||
|
||||
def apply_money_flow_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||||
"""应用资金流因子"""
|
||||
try:
|
||||
from polars_money_flow_factors import apply_money_flow_factors as _apply_money_flow
|
||||
return _apply_money_flow(df, operators)
|
||||
except ImportError as e:
|
||||
logger.warning(f"无法导入资金流因子模块: {e}")
|
||||
return df
|
||||
|
||||
|
||||
def apply_chip_distribution_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||||
"""应用筹码分布因子"""
|
||||
try:
|
||||
from polars_chip_factors import apply_chip_distribution_factors as _apply_chip
|
||||
return _apply_chip(df, operators)
|
||||
except ImportError as e:
|
||||
logger.warning(f"无法导入筹码分布因子模块: {e}")
|
||||
return df
|
||||
|
||||
|
||||
def apply_volatility_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||||
"""应用波动率因子"""
|
||||
try:
|
||||
from polars_volatility_factors import apply_volatility_factors as _apply_volatility
|
||||
return _apply_volatility(df, operators)
|
||||
except ImportError as e:
|
||||
logger.warning(f"无法导入波动率因子模块: {e}")
|
||||
return df
|
||||
|
||||
|
||||
def apply_volume_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||||
"""应用成交量因子"""
|
||||
try:
|
||||
from polars_volume_factors import apply_volume_factors as _apply_volume
|
||||
return _apply_volume(df, operators)
|
||||
except ImportError as e:
|
||||
logger.warning(f"无法导入成交量因子模块: {e}")
|
||||
return df
|
||||
|
||||
|
||||
def apply_technical_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||||
"""应用技术指标因子"""
|
||||
try:
|
||||
from polars_technical_factors import apply_technical_factors as _apply_technical
|
||||
return _apply_technical(df, operators)
|
||||
except ImportError as e:
|
||||
logger.warning(f"无法导入技术指标因子模块: {e}")
|
||||
return df
|
||||
|
||||
|
||||
def apply_sentiment_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||||
"""应用情绪因子"""
|
||||
try:
|
||||
from polars_sentiment_factors import apply_sentiment_factors as _apply_sentiment
|
||||
return _apply_sentiment(df, operators)
|
||||
except ImportError as e:
|
||||
logger.warning(f"无法导入情绪因子模块: {e}")
|
||||
return df
|
||||
|
||||
|
||||
def apply_momentum_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||||
"""应用动量因子"""
|
||||
try:
|
||||
from polars_momentum_factors import apply_momentum_factors as _apply_momentum
|
||||
return _apply_momentum(df, operators)
|
||||
except ImportError as e:
|
||||
logger.warning(f"无法导入动量因子模块: {e}")
|
||||
return df
|
||||
|
||||
|
||||
def apply_complex_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||||
"""应用复杂组合因子"""
|
||||
try:
|
||||
from polars_complex_factors import apply_complex_factors as _apply_complex
|
||||
return _apply_complex(df, operators)
|
||||
except ImportError as e:
|
||||
logger.warning(f"无法导入复杂组合因子模块: {e}")
|
||||
return df
|
||||
|
||||
|
||||
def apply_all_factors(df: pl.DataFrame,
|
||||
factor_categories: List[str] = None,
|
||||
exclude_categories: List[str] = None) -> pl.DataFrame:
|
||||
"""
|
||||
应用所有类别的因子
|
||||
|
||||
Args:
|
||||
df: 输入的Polars DataFrame,必须包含必需的列
|
||||
factor_categories: 要应用的因子类别列表,如果为None则应用所有类别
|
||||
exclude_categories: 要排除的因子类别列表
|
||||
|
||||
Returns:
|
||||
添加了所有因子的DataFrame
|
||||
"""
|
||||
if factor_categories is None:
|
||||
factor_categories = list(FACTOR_CATEGORIES.keys())
|
||||
|
||||
if exclude_categories:
|
||||
factor_categories = [cat for cat in factor_categories if cat not in exclude_categories]
|
||||
|
||||
logger.info(f"开始应用因子类别: {factor_categories}")
|
||||
|
||||
result_df = df
|
||||
total_factors = 0
|
||||
|
||||
# 因子类别到函数的映射
|
||||
factor_functions = {
|
||||
'money_flow': apply_money_flow_factors,
|
||||
'chip': apply_chip_distribution_factors,
|
||||
'volatility': apply_volatility_factors,
|
||||
'volume': apply_volume_factors,
|
||||
'technical': apply_technical_factors,
|
||||
'sentiment': apply_sentiment_factors,
|
||||
'momentum': apply_momentum_factors,
|
||||
'complex': apply_complex_factors
|
||||
}
|
||||
|
||||
for category in factor_categories:
|
||||
if category not in factor_functions:
|
||||
logger.warning(f"未知的因子类别: {category}")
|
||||
continue
|
||||
|
||||
logger.info(f"应用{FACTOR_CATEGORIES[category]}...")
|
||||
|
||||
try:
|
||||
before_cols = len(result_df.columns)
|
||||
result_df = factor_functions[category](result_df)
|
||||
after_cols = len(result_df.columns)
|
||||
new_factors = after_cols - before_cols
|
||||
|
||||
logger.info(f"{FACTOR_CATEGORIES[category]}应用完成,新增{new_factors}个因子")
|
||||
total_factors += new_factors
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"应用{FACTOR_CATEGORIES[category]}时出错: {e}")
|
||||
continue
|
||||
|
||||
logger.info(f"因子应用完成,总共新增{total_factors}个因子")
|
||||
return result_df
|
||||
|
||||
|
||||
def get_factor_info() -> Dict[str, Any]:
|
||||
"""
|
||||
获取因子信息
|
||||
|
||||
Returns:
|
||||
包含因子类别信息的字典
|
||||
"""
|
||||
return {
|
||||
'categories': FACTOR_CATEGORIES,
|
||||
'total_categories': len(FACTOR_CATEGORIES),
|
||||
'category_descriptions': list(FACTOR_CATEGORIES.values())
|
||||
}
|
||||
|
||||
|
||||
def validate_required_columns(df: pl.DataFrame, factor_categories: List[str] = None) -> Dict[str, List[str]]:
|
||||
"""
|
||||
验证DataFrame是否包含必需的列
|
||||
|
||||
Args:
|
||||
df: 输入的Polars DataFrame
|
||||
factor_categories: 要验证的因子类别列表
|
||||
|
||||
Returns:
|
||||
包含缺失列信息的字典
|
||||
"""
|
||||
if factor_categories is None:
|
||||
factor_categories = list(FACTOR_CATEGORIES.keys())
|
||||
|
||||
missing_columns = {}
|
||||
|
||||
# 基础必需列
|
||||
base_required = ['ts_code', 'trade_date']
|
||||
missing_base = [col for col in base_required if col not in df.columns]
|
||||
if missing_base:
|
||||
missing_columns['base'] = missing_base
|
||||
|
||||
# 各因子类别的必需列
|
||||
category_requirements = {
|
||||
'money_flow': ['buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol', 'sell_elg_vol', 'vol'],
|
||||
'chip': ['cost_95pct', 'cost_85pct', 'cost_50pct', 'cost_15pct', 'cost_5pct',
|
||||
'winner_rate', 'weight_avg', 'close'],
|
||||
'volatility': ['pct_chg'],
|
||||
'volume': ['vol', 'turnover_rate', 'volume_ratio', 'amount'],
|
||||
'technical': ['open', 'high', 'low', 'close', 'vol'],
|
||||
'sentiment': ['pct_chg', 'vol', 'volume_ratio'],
|
||||
'momentum': ['close', 'turnover_rate'],
|
||||
'complex': ['close', 'vol', 'pct_chg', 'turnover_rate', 'winner_rate']
|
||||
}
|
||||
|
||||
for category in factor_categories:
|
||||
if category in category_requirements:
|
||||
required_cols = category_requirements[category]
|
||||
missing_cols = [col for col in required_cols if col not in df.columns]
|
||||
if missing_cols:
|
||||
missing_columns[category] = missing_cols
|
||||
|
||||
return missing_columns
|
||||
|
||||
|
||||
# 向后兼容的函数名
|
||||
apply_factors = apply_all_factors
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 测试代码
|
||||
print("Polars因子系统已加载")
|
||||
print("可用的因子类别:")
|
||||
for key, description in FACTOR_CATEGORIES.items():
|
||||
print(f" {key}: {description}")
|
||||
Reference in New Issue
Block a user