238 lines
7.9 KiB
Python
238 lines
7.9 KiB
Python
"""
|
||
Polars因子主入口 - 整合所有Polars-based因子计算
|
||
提供统一的接口来应用所有类别的因子
|
||
"""
|
||
|
||
import polars as pl
|
||
from typing import Dict, List, Optional, Any
|
||
import logging
|
||
|
||
# 配置日志
|
||
logging.basicConfig(level=logging.INFO)
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
# 因子类别映射
|
||
FACTOR_CATEGORIES = {
|
||
'money_flow': '资金流因子',
|
||
'chip': '筹码分布因子',
|
||
'volatility': '波动率因子',
|
||
'volume': '成交量因子',
|
||
'technical': '技术指标因子',
|
||
'sentiment': '情绪因子',
|
||
'momentum': '动量因子',
|
||
'complex': '复杂组合因子'
|
||
}
|
||
|
||
|
||
def apply_money_flow_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||
"""应用资金流因子"""
|
||
try:
|
||
from polars_money_flow_factors import apply_money_flow_factors as _apply_money_flow
|
||
return _apply_money_flow(df, operators)
|
||
except ImportError as e:
|
||
logger.warning(f"无法导入资金流因子模块: {e}")
|
||
return df
|
||
|
||
|
||
def apply_chip_distribution_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||
"""应用筹码分布因子"""
|
||
try:
|
||
from polars_chip_factors import apply_chip_distribution_factors as _apply_chip
|
||
return _apply_chip(df, operators)
|
||
except ImportError as e:
|
||
logger.warning(f"无法导入筹码分布因子模块: {e}")
|
||
return df
|
||
|
||
|
||
def apply_volatility_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||
"""应用波动率因子"""
|
||
try:
|
||
from polars_volatility_factors import apply_volatility_factors as _apply_volatility
|
||
return _apply_volatility(df, operators)
|
||
except ImportError as e:
|
||
logger.warning(f"无法导入波动率因子模块: {e}")
|
||
return df
|
||
|
||
|
||
def apply_volume_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||
"""应用成交量因子"""
|
||
try:
|
||
from polars_volume_factors import apply_volume_factors as _apply_volume
|
||
return _apply_volume(df, operators)
|
||
except ImportError as e:
|
||
logger.warning(f"无法导入成交量因子模块: {e}")
|
||
return df
|
||
|
||
|
||
def apply_technical_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||
"""应用技术指标因子"""
|
||
try:
|
||
from polars_technical_factors import apply_technical_factors as _apply_technical
|
||
return _apply_technical(df, operators)
|
||
except ImportError as e:
|
||
logger.warning(f"无法导入技术指标因子模块: {e}")
|
||
return df
|
||
|
||
|
||
def apply_sentiment_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||
"""应用情绪因子"""
|
||
try:
|
||
from polars_sentiment_factors import apply_sentiment_factors as _apply_sentiment
|
||
return _apply_sentiment(df, operators)
|
||
except ImportError as e:
|
||
logger.warning(f"无法导入情绪因子模块: {e}")
|
||
return df
|
||
|
||
|
||
def apply_momentum_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||
"""应用动量因子"""
|
||
try:
|
||
from polars_momentum_factors import apply_momentum_factors as _apply_momentum
|
||
return _apply_momentum(df, operators)
|
||
except ImportError as e:
|
||
logger.warning(f"无法导入动量因子模块: {e}")
|
||
return df
|
||
|
||
|
||
def apply_complex_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||
"""应用复杂组合因子"""
|
||
try:
|
||
from polars_complex_factors import apply_complex_factors as _apply_complex
|
||
return _apply_complex(df, operators)
|
||
except ImportError as e:
|
||
logger.warning(f"无法导入复杂组合因子模块: {e}")
|
||
return df
|
||
|
||
|
||
def apply_all_factors(df: pl.DataFrame,
|
||
factor_categories: List[str] = None,
|
||
exclude_categories: List[str] = None) -> pl.DataFrame:
|
||
"""
|
||
应用所有类别的因子
|
||
|
||
Args:
|
||
df: 输入的Polars DataFrame,必须包含必需的列
|
||
factor_categories: 要应用的因子类别列表,如果为None则应用所有类别
|
||
exclude_categories: 要排除的因子类别列表
|
||
|
||
Returns:
|
||
添加了所有因子的DataFrame
|
||
"""
|
||
if factor_categories is None:
|
||
factor_categories = list(FACTOR_CATEGORIES.keys())
|
||
|
||
if exclude_categories:
|
||
factor_categories = [cat for cat in factor_categories if cat not in exclude_categories]
|
||
|
||
logger.info(f"开始应用因子类别: {factor_categories}")
|
||
|
||
result_df = df
|
||
total_factors = 0
|
||
|
||
# 因子类别到函数的映射
|
||
factor_functions = {
|
||
'money_flow': apply_money_flow_factors,
|
||
'chip': apply_chip_distribution_factors,
|
||
'volatility': apply_volatility_factors,
|
||
'volume': apply_volume_factors,
|
||
'technical': apply_technical_factors,
|
||
'sentiment': apply_sentiment_factors,
|
||
'momentum': apply_momentum_factors,
|
||
'complex': apply_complex_factors
|
||
}
|
||
|
||
for category in factor_categories:
|
||
if category not in factor_functions:
|
||
logger.warning(f"未知的因子类别: {category}")
|
||
continue
|
||
|
||
logger.info(f"应用{FACTOR_CATEGORIES[category]}...")
|
||
|
||
try:
|
||
before_cols = len(result_df.columns)
|
||
result_df = factor_functions[category](result_df)
|
||
after_cols = len(result_df.columns)
|
||
new_factors = after_cols - before_cols
|
||
|
||
logger.info(f"{FACTOR_CATEGORIES[category]}应用完成,新增{new_factors}个因子")
|
||
total_factors += new_factors
|
||
|
||
except Exception as e:
|
||
logger.error(f"应用{FACTOR_CATEGORIES[category]}时出错: {e}")
|
||
continue
|
||
|
||
logger.info(f"因子应用完成,总共新增{total_factors}个因子")
|
||
return result_df
|
||
|
||
|
||
def get_factor_info() -> Dict[str, Any]:
|
||
"""
|
||
获取因子信息
|
||
|
||
Returns:
|
||
包含因子类别信息的字典
|
||
"""
|
||
return {
|
||
'categories': FACTOR_CATEGORIES,
|
||
'total_categories': len(FACTOR_CATEGORIES),
|
||
'category_descriptions': list(FACTOR_CATEGORIES.values())
|
||
}
|
||
|
||
|
||
def validate_required_columns(df: pl.DataFrame, factor_categories: List[str] = None) -> Dict[str, List[str]]:
|
||
"""
|
||
验证DataFrame是否包含必需的列
|
||
|
||
Args:
|
||
df: 输入的Polars DataFrame
|
||
factor_categories: 要验证的因子类别列表
|
||
|
||
Returns:
|
||
包含缺失列信息的字典
|
||
"""
|
||
if factor_categories is None:
|
||
factor_categories = list(FACTOR_CATEGORIES.keys())
|
||
|
||
missing_columns = {}
|
||
|
||
# 基础必需列
|
||
base_required = ['ts_code', 'trade_date']
|
||
missing_base = [col for col in base_required if col not in df.columns]
|
||
if missing_base:
|
||
missing_columns['base'] = missing_base
|
||
|
||
# 各因子类别的必需列
|
||
category_requirements = {
|
||
'money_flow': ['buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol', 'sell_elg_vol', 'vol'],
|
||
'chip': ['cost_95pct', 'cost_85pct', 'cost_50pct', 'cost_15pct', 'cost_5pct',
|
||
'winner_rate', 'weight_avg', 'close'],
|
||
'volatility': ['pct_chg'],
|
||
'volume': ['vol', 'turnover_rate', 'volume_ratio', 'amount'],
|
||
'technical': ['open', 'high', 'low', 'close', 'vol'],
|
||
'sentiment': ['pct_chg', 'vol', 'volume_ratio'],
|
||
'momentum': ['close', 'turnover_rate'],
|
||
'complex': ['close', 'vol', 'pct_chg', 'turnover_rate', 'winner_rate']
|
||
}
|
||
|
||
for category in factor_categories:
|
||
if category in category_requirements:
|
||
required_cols = category_requirements[category]
|
||
missing_cols = [col for col in required_cols if col not in df.columns]
|
||
if missing_cols:
|
||
missing_columns[category] = missing_cols
|
||
|
||
return missing_columns
|
||
|
||
|
||
# 向后兼容的函数名
|
||
apply_factors = apply_all_factors
|
||
|
||
|
||
if __name__ == "__main__":
|
||
# 测试代码
|
||
print("Polars因子系统已加载")
|
||
print("可用的因子类别:")
|
||
for key, description in FACTOR_CATEGORIES.items():
|
||
print(f" {key}: {description}")
|