factor优化,改为polars

This commit is contained in:
2025-10-13 21:42:35 +08:00
parent f87434b553
commit 44315b2c76
12 changed files with 6928 additions and 0 deletions

View File

@@ -0,0 +1,196 @@
"""
因子算子基础框架 - 简化版本
提供股票截面和日期截面两个基础函数
"""
import polars as pl
from typing import Callable, Any, Optional, Union
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def apply_stockwise(
df: pl.DataFrame,
operator_func: Callable[[pl.DataFrame, Any], pl.DataFrame],
*args,
**kwargs
) -> pl.DataFrame:
"""
在股票截面上应用算子函数
Args:
df: 输入的polars DataFrame必须包含ts_code和trade_date列
operator_func: 算子函数接收单个股票的数据和参数返回处理后的DataFrame
*args, **kwargs: 传递给算子函数的额外参数
Returns:
处理后的完整DataFrame
"""
# 验证必需列
required_cols = ['ts_code', 'trade_date']
missing_cols = [col for col in required_cols if col not in df.columns]
if missing_cols:
raise ValueError(f"缺少必需列: {missing_cols}")
# 获取股票列表
stock_list = df['ts_code'].unique().to_list()
results = []
# 按股票分组处理
for ts_code in stock_list:
try:
# 获取单个股票的数据并按日期排序
stock_df = df.filter(pl.col('ts_code') == ts_code).sort('trade_date')
# 应用算子函数
result_df = operator_func(stock_df, *args, **kwargs)
results.append(result_df)
except Exception as e:
logger.error(f"股票 {ts_code} 处理失败: {e}")
# 失败时返回原始数据
stock_df = df.filter(pl.col('ts_code') == ts_code).sort('trade_date')
results.append(stock_df)
# 合并结果并排序
if results:
return pl.concat(results).sort(['ts_code', 'trade_date'])
else:
return df
def apply_datewise(
df: pl.DataFrame,
operator_func: Callable[[pl.DataFrame, Any], pl.DataFrame],
*args,
**kwargs
) -> pl.DataFrame:
"""
在日期截面上应用算子函数
Args:
df: 输入的polars DataFrame必须包含ts_code和trade_date列
operator_func: 算子函数接收单个日期的数据和参数返回处理后的DataFrame
*args, **kwargs: 传递给算子函数的额外参数
Returns:
处理后的完整DataFrame
"""
# 验证必需列
required_cols = ['ts_code', 'trade_date']
missing_cols = [col for col in required_cols if col not in df.columns]
if missing_cols:
raise ValueError(f"缺少必需列: {missing_cols}")
# 获取日期列表
date_list = df['trade_date'].unique().to_list()
results = []
# 按日期分组处理
for trade_date in date_list:
try:
# 获取单个日期的数据
date_df = df.filter(pl.col('trade_date') == trade_date)
# 应用算子函数
result_df = operator_func(date_df, *args, **kwargs)
results.append(result_df)
except Exception as e:
logger.error(f"日期 {trade_date} 处理失败: {e}")
# 失败时返回原始数据
date_df = df.filter(pl.col('trade_date') == trade_date)
results.append(date_df)
# 合并结果并排序
if results:
return pl.concat(results).sort(['ts_code', 'trade_date'])
else:
return df
# 常用算子函数示例
def rolling_mean_operator(df: pl.DataFrame, column: str, window: int, output_col: str = None) -> pl.DataFrame:
"""
滚动均值算子 - 股票截面
Args:
df: 单个股票的数据
column: 要计算均值的列
window: 窗口大小
output_col: 输出列名默认为f'{column}_mean_{window}'
Returns:
添加均值列的DataFrame
"""
if output_col is None:
output_col = f'{column}_mean_{window}'
return df.with_columns(
pl.col(column).rolling_mean(window_size=window).alias(output_col)
)
def rolling_std_operator(df: pl.DataFrame, column: str, window: int, output_col: str = None) -> pl.DataFrame:
"""
滚动标准差算子 - 股票截面
Args:
df: 单个股票的数据
column: 要计算标准差的列
window: 窗口大小
output_col: 输出列名默认为f'{column}_std_{window}'
Returns:
添加标准差列的DataFrame
"""
if output_col is None:
output_col = f'{column}_std_{window}'
return df.with_columns(
pl.col(column).rolling_std(window_size=window).alias(output_col)
)
def rank_operator(df: pl.DataFrame, column: str, ascending: bool = True, output_col: str = None) -> pl.DataFrame:
"""
排名算子 - 日期截面
Args:
df: 单个日期的数据
column: 要排名的列
ascending: 是否升序
output_col: 输出列名默认为f'{column}_rank'
Returns:
添加排名列的DataFrame
"""
if output_col is None:
output_col = f'{column}_rank'
return df.with_columns(
pl.col(column).rank(method='dense', descending=not ascending).alias(output_col)
)
def pct_change_operator(df: pl.DataFrame, column: str, periods: int = 1, output_col: str = None) -> pl.DataFrame:
"""
百分比变化算子 - 股票截面
Args:
df: 单个股票的数据
column: 要计算变化的列
periods: 期数
output_col: 输出列名默认为f'{column}_pct_change_{periods}'
Returns:
添加变化率列的DataFrame
"""
if output_col is None:
output_col = f'{column}_pct_change_{periods}'
return df.with_columns(
((pl.col(column) / pl.col(column).shift(periods)) - 1).alias(output_col)
)

View File

@@ -0,0 +1,250 @@
"""
因子算子框架 - 使用Polars实现统一的因子计算
避免数据泄露,支持切面计算
"""
import polars as pl
import numpy as np
from typing import Dict, List, Callable, Optional, Union, Any
from abc import ABC, abstractmethod
from dataclasses import dataclass
import logging
# 配置日志
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
@dataclass
class OperatorConfig:
"""算子配置"""
name: str
description: str
required_columns: List[str]
output_columns: List[str]
parameters: Dict[str, Any]
class DataSlice:
"""数据切面基类"""
def __init__(self, df: pl.DataFrame):
self.df = df
self.validate_data()
def validate_data(self):
"""验证数据格式"""
required_cols = ['ts_code', 'trade_date']
missing_cols = [col for col in required_cols if col not in self.df.columns]
if missing_cols:
raise ValueError(f"缺少必需列: {missing_cols}")
def get_stock_slice(self, ts_code: str) -> pl.DataFrame:
"""获取单个股票的数据切面"""
return self.df.filter(pl.col('ts_code') == ts_code).sort('trade_date')
def get_date_slice(self, trade_date: str) -> pl.DataFrame:
"""获取单个日期的数据切面"""
return self.df.filter(pl.col('trade_date') == trade_date)
def get_stock_list(self) -> List[str]:
"""获取股票列表"""
return self.df['ts_code'].unique().to_list()
def get_date_list(self) -> List[str]:
"""获取日期列表"""
return self.df['trade_date'].unique().to_list()
class BaseOperator(ABC):
"""算子基类"""
def __init__(self, config: OperatorConfig):
self.config = config
self.name = config.name
self.required_columns = config.required_columns
self.output_columns = config.output_columns
def validate_input(self, df: pl.DataFrame) -> bool:
"""验证输入数据"""
missing_cols = [col for col in self.required_columns if col not in df.columns]
if missing_cols:
logger.warning(f"算子 {self.name} 缺少必需列: {missing_cols}")
return False
return True
@abstractmethod
def apply(self, df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""应用算子"""
pass
def __call__(self, df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""调用算子"""
if not self.validate_input(df):
# 返回原始数据添加NaN列
for col in self.output_columns:
df = df.with_columns(pl.lit(None).alias(col))
return df
try:
return self.apply(df, **kwargs)
except Exception as e:
logger.error(f"算子 {self.name} 应用失败: {e}")
# 返回原始数据添加NaN列
for col in self.output_columns:
df = df.with_columns(pl.lit(None).alias(col))
return df
class StockWiseOperator(BaseOperator):
"""股票切面算子 - 按股票分组计算"""
def apply(self, df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""按股票分组应用算子"""
stock_list = df['ts_code'].unique().to_list()
results = []
for ts_code in stock_list:
stock_df = df.filter(pl.col('ts_code') == ts_code).sort('trade_date')
try:
result_df = self.apply_stock(stock_df, **kwargs)
results.append(result_df)
except Exception as e:
logger.error(f"股票 {ts_code} 算子应用失败: {e}")
# 为失败的股票添加NaN列
for col in self.output_columns:
stock_df = stock_df.with_columns(pl.lit(None).alias(col))
results.append(stock_df)
return pl.concat(results).sort(['ts_code', 'trade_date'])
@abstractmethod
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""应用到单个股票数据"""
pass
class DateWiseOperator(BaseOperator):
"""日期切面算子 - 按日期分组计算"""
def apply(self, df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""按日期分组应用算子"""
date_list = df['trade_date'].unique().to_list()
results = []
for trade_date in date_list:
date_df = df.filter(pl.col('trade_date') == trade_date)
try:
result_df = self.apply_date(date_df, **kwargs)
results.append(result_df)
except Exception as e:
logger.error(f"日期 {trade_date} 算子应用失败: {e}")
# 为失败的日期添加NaN列
for col in self.output_columns:
date_df = date_df.with_columns(pl.lit(None).alias(col))
results.append(date_df)
return pl.concat(results).sort(['ts_code', 'trade_date'])
@abstractmethod
def apply_date(self, date_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""应用到单个日期数据"""
pass
class RollingOperator(StockWiseOperator):
"""滚动窗口算子基类"""
def __init__(self, config: OperatorConfig, window: int, min_periods: Optional[int] = None):
super().__init__(config)
self.window = window
self.min_periods = min_periods or max(1, window // 2)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""应用滚动窗口计算"""
return self.apply_rolling(stock_df, **kwargs)
@abstractmethod
def apply_rolling(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""滚动窗口计算逻辑"""
pass
# 基础算子实现
class ReturnOperator(RollingOperator):
"""收益率算子"""
def __init__(self, periods: int = 1):
config = OperatorConfig(
name=f"return_{periods}",
description=f"{periods}期收益率",
required_columns=['close'],
output_columns=[f'return_{periods}'],
parameters={'periods': periods}
)
super().__init__(config, window=periods + 1)
self.periods = periods
def apply_rolling(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算收益率"""
return stock_df.with_columns(
(pl.col('close') / pl.col('close').shift(self.periods) - 1).alias(f'return_{self.periods}')
)
class VolatilityOperator(RollingOperator):
"""波动率算子"""
def __init__(self, window: int = 20):
config = OperatorConfig(
name=f"volatility_{window}",
description=f"{window}日波动率",
required_columns=['pct_chg'],
output_columns=[f'volatility_{window}'],
parameters={'window': window}
)
super().__init__(config, window=window)
def apply_rolling(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算波动率"""
return stock_df.with_columns(
pl.col('pct_chg').rolling_std(window=self.window).alias(f'volatility_{self.window}')
)
class MeanOperator(RollingOperator):
"""均值算子"""
def __init__(self, column: str, window: int):
config = OperatorConfig(
name=f"mean_{column}_{window}",
description=f"{column}{window}日均值",
required_columns=[column],
output_columns=[f'mean_{column}_{window}'],
parameters={'column': column, 'window': window}
)
super().__init__(config, window=window)
self.column = column
def apply_rolling(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算均值"""
return stock_df.with_columns(
pl.col(self.column).rolling_mean(window=self.window).alias(f'mean_{self.column}_{self.window}')
)
class RankOperator(DateWiseOperator):
"""排名算子"""
def __init__(self, column: str, ascending: bool = True):
config = OperatorConfig(
name=f"rank_{column}",
description=f"{column}的排名",
required_columns=[column],
output_columns=[f'rank_{column}'],
parameters={'column': column, 'ascending': ascending}
)
super().__init__(config)
self.column = column
self.ascending = ascending

View File

@@ -0,0 +1,312 @@
"""
筹码分布因子 - 使用Polars实现
包含筹码集中度、分布偏度、浮筹比例等相关因子计算
"""
import polars as pl
import numpy as np
from typing import Dict, List, Optional, Any
from operator_framework import StockWiseOperator, OperatorConfig
class ChipConcentrationOperator(StockWiseOperator):
"""筹码集中度算子"""
def __init__(self):
config = OperatorConfig(
name="chip_concentration",
description="筹码集中度",
required_columns=['cost_95pct', 'cost_5pct', 'close'],
output_columns=['chip_concentration_range'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算筹码集中度"""
epsilon = 1e-8
# 计算筹码集中度范围,相对于当前价格标准化
concentration_range = (pl.col('cost_95pct') - pl.col('cost_5pct')) / (pl.col('close') + epsilon)
return stock_df.with_columns(concentration_range.alias('chip_concentration_range'))
class ChipSkewnessOperator(StockWiseOperator):
"""筹码分布偏度算子"""
def __init__(self):
config = OperatorConfig(
name="chip_skewness",
description="筹码分布偏度",
required_columns=['weight_avg', 'cost_50pct'],
output_columns=['chip_skewness'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算筹码分布偏度"""
epsilon = 1e-8
# 计算偏度:(加权平均成本 - 中位数成本) / 中位数成本
skewness = (pl.col('weight_avg') - pl.col('cost_50pct')) / (pl.col('cost_50pct') + epsilon)
return stock_df.with_columns(skewness.alias('chip_skewness'))
class FloatingChipProxyOperator(StockWiseOperator):
"""浮筹比例代理算子"""
def __init__(self):
config = OperatorConfig(
name="floating_chip_proxy",
description="浮筹比例代理",
required_columns=['close', 'cost_15pct', 'winner_rate'],
output_columns=['floating_chip_proxy'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算浮筹比例代理"""
# 计算价格与15%成本线的距离
price_dist_cost15 = (pl.col('close') - pl.col('cost_15pct')) / pl.col('close')
# 计算浮筹代理:获利盘比例 * max(0, 价格距离)
floating_proxy = pl.col('winner_rate') * pl.max_horizontal(0, price_dist_cost15)
return stock_df.with_columns(floating_proxy.alias('floating_chip_proxy'))
class CostSupportChangeOperator(StockWiseOperator):
"""成本支撑强度变化算子"""
def __init__(self, n: int = 1):
config = OperatorConfig(
name=f"cost_support_change_{n}",
description=f"{n}日成本支撑强度变化",
required_columns=['cost_15pct'],
output_columns=[f'cost_support_15pct_change_{n}'],
parameters={'n': n}
)
super().__init__(config)
self.n = n
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算成本支撑强度变化"""
# 计算百分比变化
pct_change = pl.col('cost_15pct').pct_change(self.n) * 100
return stock_df.with_columns(pct_change.alias(f'cost_support_15pct_change_{self.n}'))
class WinnerPriceZoneOperator(StockWiseOperator):
"""获利盘压力/支撑区分类算子"""
def __init__(self):
config = OperatorConfig(
name="winner_price_zone",
description="获利盘压力/支撑区分类",
required_columns=['close', 'cost_85pct', 'cost_15pct', 'cost_50pct', 'winner_rate'],
output_columns=['cat_winner_price_zone'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算获利盘压力/支撑区分类"""
# 定义条件
conditions = [
# 1: 高风险区 (高位 & 高获利盘)
(pl.col('close') > pl.col('cost_85pct')) & (pl.col('winner_rate') > 0.8),
# 2: 低潜力区 (低位 & 低获利盘)
(pl.col('close') < pl.col('cost_15pct')) & (pl.col('winner_rate') < 0.2),
# 3: 中上获利区 (中高位 & 多数获利)
(pl.col('close') > pl.col('cost_50pct')) & (pl.col('winner_rate') > 0.5),
# 4: 中下亏损区 (中低位 & 多数亏损)
(pl.col('close') < pl.col('cost_50pct')) & (pl.col('winner_rate') < 0.5),
]
choices = [1, 2, 3, 4]
# 使用select函数进行分类
zone_classification = pl.select(
conditions=conditions,
choices=choices,
default=0 # 0: 其他情况
)
return stock_df.with_columns(zone_classification.alias('cat_winner_price_zone'))
class FlowChipConsistencyOperator(StockWiseOperator):
"""主力行为与筹码结构一致性算子"""
def __init__(self):
config = OperatorConfig(
name="flow_chip_consistency",
description="主力行为与筹码结构一致性",
required_columns=['buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol', 'sell_elg_vol',
'close', 'cost_15pct', 'cost_50pct'],
output_columns=['flow_chip_consistency'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算主力行为与筹码结构一致性"""
# 计算大单净买入量
lg_elg_net_buy_vol = (
pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')
)
# 判断价格是否接近下方筹码密集区
price_near_low_support = (
(pl.col('close') > pl.col('cost_15pct')) &
(pl.col('close') < pl.col('cost_50pct'))
)
# 计算一致性:主力净买入 * 价格位置指示器
consistency = lg_elg_net_buy_vol * price_near_low_support.cast(int)
return stock_df.with_columns(consistency.alias('flow_chip_consistency'))
class ProfitTakingVsAbsorptionOperator(StockWiseOperator):
"""获利了结压力/承接盘强度算子"""
def __init__(self):
config = OperatorConfig(
name="profit_taking_vs_absorb",
description="获利了结压力vs承接盘强度",
required_columns=['buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol', 'sell_elg_vol',
'winner_rate'],
output_columns=['profit_taking_vs_absorb'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算获利了结压力vs承接盘强度"""
# 计算大单净买入量
lg_elg_net_buy_vol = (
pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')
)
# 判断高获利盘
high_winner_rate_flag = (pl.col('winner_rate') > 0.7).cast(int)
# 计算因子:主力净买入 * 高获利盘指示器
# 正值表示高获利盘下主力仍在买入(承接),负值表示主力在卖出(了结)
factor = lg_elg_net_buy_vol * high_winner_rate_flag
return stock_df.with_columns(factor.alias('profit_taking_vs_absorb'))
class ChipConcentrationChangeOperator(StockWiseOperator):
"""筹码集中度变化算子"""
def __init__(self, n: int = 20):
config = OperatorConfig(
name=f"chip_conc_std_{n}",
description=f"{n}日筹码集中度变化",
required_columns=['cost_85pct', 'cost_15pct', 'weight_avg'],
output_columns=[f'chip_conc_std_{n}'],
parameters={'n': n}
)
super().__init__(config)
self.n = n
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算筹码集中度变化"""
epsilon = 1e-8
# 计算成本区间标准化值
cost_range_norm = (pl.col('cost_85pct') - pl.col('cost_15pct')) / (pl.col('weight_avg') + epsilon)
# 计算滚动标准差
conc_std = cost_range_norm.rolling_std(window=self.n)
return stock_df.with_columns(conc_std.alias(f'chip_conc_std_{self.n}'))
class CostBreakoutConfirmationOperator(StockWiseOperator):
"""成本突破确认算子"""
def __init__(self, m: int = 5):
config = OperatorConfig(
name=f"cost_break_confirm_cnt_{m}",
description=f"{m}日成本突破确认",
required_columns=['close', 'cost_85pct', 'cost_15pct',
'buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol', 'sell_elg_vol'],
output_columns=[f'cost_break_confirm_cnt_{m}'],
parameters={'m': m}
)
super().__init__(config)
self.m = m
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算成本突破确认"""
# 获取前一日的成本位
prev_cost_85 = pl.col('cost_85pct').shift(1)
prev_cost_15 = pl.col('cost_15pct').shift(1)
# 判断突破
break_up = pl.col('close') > prev_cost_85
break_down = pl.col('close') < prev_cost_15
# 计算大单净流
net_lg_flow_vol = (
pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')
)
# 判断确认信号
confirm_up = break_up & (net_lg_flow_vol > 0)
confirm_down = break_down & (net_lg_flow_vol < 0)
# 计算净确认信号
net_confirm = confirm_up.cast(int) - confirm_down.cast(int)
# 计算m日累计
confirm_cnt = net_confirm.rolling_sum(window=self.m)
return stock_df.with_columns(confirm_cnt.alias(f'cost_break_confirm_cnt_{self.m}'))
# 筹码分布因子集合
CHIP_DISTRIBUTION_OPERATORS = [
ChipConcentrationOperator(),
ChipSkewnessOperator(),
FloatingChipProxyOperator(),
CostSupportChangeOperator(),
WinnerPriceZoneOperator(),
FlowChipConsistencyOperator(),
ProfitTakingVsAbsorptionOperator(),
ChipConcentrationChangeOperator(),
CostBreakoutConfirmationOperator(),
]
def apply_chip_distribution_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
"""
应用所有筹码分布因子
Args:
df: 输入的Polars DataFrame
operators: 要应用的算子列表如果为None则使用默认列表
Returns:
添加了筹码分布因子的DataFrame
"""
if operators is None:
operators = CHIP_DISTRIBUTION_OPERATORS
result_df = df
for operator in operators:
result_df = operator(result_df)
return result_df

View File

@@ -0,0 +1,648 @@
"""
复杂组合因子 - 使用Polars实现
包含复杂的组合因子和高级因子计算
"""
import polars as pl
import numpy as np
from typing import Dict, List, Optional, Any
from operator_framework import StockWiseOperator, DateWiseOperator, OperatorConfig
# 时间序列因子
class LargeFlowMomentumCorrelationOperator(StockWiseOperator):
"""大单资金流与价格动量相关性算子"""
def __init__(self, n: int = 20, m: int = 60):
config = OperatorConfig(
name=f"lg_flow_mom_corr_{n}_{m}",
description=f"{n}日大单资金流与{m}日价格动量相关性",
required_columns=['buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol', 'sell_elg_vol',
'close', 'vol'],
output_columns=[f'lg_flow_mom_corr_{n}_{m}'],
parameters={'n': n, 'm': m}
)
super().__init__(config)
self.n = n
self.m = m
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算大单资金流与价格动量相关性"""
# 计算大单净额
net_lg_flow_val = (
(pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')) * pl.col('close')
)
# 计算滚动净大单流
rolling_net_lg_flow = net_lg_flow_val.rolling_sum(window=self.n)
# 计算价格动量
price_mom = pl.col('close').pct_change(self.n)
# 计算相关性
correlation = rolling_net_lg_flow.rolling_corr(price_mom, window=self.m)
return stock_df.with_columns(
correlation.alias(f'lg_flow_mom_corr_{self.n}_{self.m}')
)
class LargeBuyConsolidationOperator(StockWiseOperator):
"""大单买入盘整期算子"""
def __init__(self, n: int = 20, vol_quantile: float = 0.2):
config = OperatorConfig(
name=f"lg_buy_consolidation_{n}",
description=f"{n}日大单买入盘整期",
required_columns=['close', 'buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol',
'sell_elg_vol', 'vol'],
output_columns=[f'lg_buy_consolidation_{n}'],
parameters={'n': n, 'vol_quantile': vol_quantile}
)
super().__init__(config)
self.n = n
self.vol_quantile = vol_quantile
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算大单买入盘整期"""
epsilon = 1e-8
# 计算收盘价滚动标准差
rolling_std = pl.col('close').rolling_std(window=self.n)
# 计算大单净流比率
net_lg_flow_ratio = (
(pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')) /
(pl.col('vol') + epsilon)
)
# 计算滚动均值
rolling_mean_ratio = net_lg_flow_ratio.rolling_mean(window=self.n)
return stock_df.with_columns(
rolling_mean_ratio.alias(f'lg_buy_consolidation_{self.n}')
)
class IntradayLargeFlowCorrelationOperator(StockWiseOperator):
"""日内趋势与大单流相关性算子"""
def __init__(self, n: int = 20):
config = OperatorConfig(
name=f"intraday_lg_flow_corr_{n}",
description=f"{n}日日内趋势与大单流相关性",
required_columns=['high', 'low', 'close', 'buy_lg_vol', 'buy_elg_vol',
'sell_lg_vol', 'sell_elg_vol'],
output_columns=[f'intraday_lg_flow_corr_{n}'],
parameters={'n': n}
)
super().__init__(config)
self.n = n
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算日内趋势与大单流相关性"""
# 这是一个复杂的因子,简化处理
# 实际实现需要更复杂的日内数据
placeholder = pl.lit(None).cast(float)
return stock_df.with_columns(
placeholder.alias(f'intraday_lg_flow_corr_{self.n}')
)
class ProfitPressureOperator(StockWiseOperator):
"""获利压力指数算子"""
def __init__(self):
config = OperatorConfig(
name="profit_pressure",
description="获利压力指数",
required_columns=['close', 'cost_85pct', 'cost_95pct', 'winner_rate'],
output_columns=['profit_pressure'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算获利压力指数"""
epsilon = 1e-8
# 计算盈利幅度
profit_margin_85 = (pl.col('close') / (pl.col('cost_85pct') + epsilon)) - 1
profit_margin_95 = (pl.col('close') / (pl.col('cost_95pct') + epsilon)) - 1
# 计算压力指数
pressure = pl.col('winner_rate') * 0.5 * (profit_margin_85 + profit_margin_95)
return stock_df.with_columns(pressure.alias('profit_pressure'))
class UnderwaterResistanceOperator(StockWiseOperator):
"""套牢盘阻力算子"""
def __init__(self):
config = OperatorConfig(
name="underwater_resistance",
description="套牢盘阻力",
required_columns=['close', 'winner_rate', 'cost_15pct'],
output_columns=['underwater_resistance'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算套牢盘阻力"""
epsilon = 1e-8
# 计算套牢比例
underwater_ratio = 1.0 - pl.col('winner_rate')
# 计算与成本的距离
dist_to_cost_15 = pl.max_horizontal(0, pl.col('cost_15pct') - pl.col('close')) / (pl.col('close') + epsilon)
# 计算阻力
resistance = underwater_ratio * dist_to_cost_15
return stock_df.with_columns(resistance.alias('underwater_resistance'))
class ProfitDecayOperator(StockWiseOperator):
"""盈利预期衰减算子"""
def __init__(self, n: int = 20):
config = OperatorConfig(
name=f"profit_decay_{n}",
description=f"{n}日盈利预期衰减",
required_columns=['close', 'winner_rate'],
output_columns=[f'profit_decay_{n}'],
parameters={'n': n}
)
super().__init__(config)
self.n = n
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算盈利预期衰减"""
# 计算n日收益率
ret_n = pl.col('close').pct_change(self.n)
# 计算winner_rate变化
winner_rate_change = pl.col('winner_rate').diff(self.n)
# 计算衰减因子
decay = ret_n / (winner_rate_change + 1e-8)
return stock_df.with_columns(decay.alias(f'profit_decay_{self.n}'))
class PullbackStrongOperator(StockWiseOperator):
"""强势股回调深度算子"""
def __init__(self, n: int = 20, m: int = 20, gain_thresh: float = 0.2):
config = OperatorConfig(
name=f"pullback_strong_{n}_{m}",
description=f"{n}{m}期强势股回调深度",
required_columns=['high', 'close'],
output_columns=[f'pullback_strong_{n}_{m}'],
parameters={'n': n, 'm': m, 'gain_thresh': gain_thresh}
)
super().__init__(config)
self.n = n
self.m = m
self.gain_thresh = gain_thresh
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算强势股回调深度"""
# 计算n日最高价
high_n = pl.col('high').rolling_max(window=self.n)
# 计算回调深度
pullback_depth = (high_n - pl.col('close')) / high_n
# 计算近期涨幅
recent_gain = (pl.col('close') / pl.col('close').shift(self.m)) - 1
# 计算回调因子
pullback_factor = pullback_depth / (recent_gain + 1e-8)
return stock_df.with_columns(pullback_factor.alias(f'pullback_strong_{self.n}_{self.m}'))
class HurstExponentFlowOperator(StockWiseOperator):
"""资金流Hurst指数算子"""
def __init__(self, n: int = 60, flow_col: str = 'net_mf_vol'):
config = OperatorConfig(
name=f"hurst_{flow_col}_{n}",
description=f"{n}{flow_col}Hurst指数",
required_columns=[flow_col],
output_columns=[f'hurst_{flow_col}_{n}'],
parameters={'n': n, 'flow_col': flow_col}
)
super().__init__(config)
self.n = n
self.flow_col = flow_col
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算Hurst指数"""
# Hurst指数计算复杂这里使用占位符
# 实际实现需要专门的Hurst指数计算库
placeholder = pl.lit(None).cast(float)
return stock_df.with_columns(
placeholder.alias(f'hurst_{self.flow_col}_{self.n}')
)
class VolWeightedHistoricalPositionOperator(StockWiseOperator):
"""成交量加权历史位置算子"""
def __init__(self, n: int = 20):
config = OperatorConfig(
name=f"vol_wgt_hist_pos_{n}",
description=f"{n}日成交量加权历史位置",
required_columns=['close', 'his_high', 'his_low', 'vol'],
output_columns=[f'vol_wgt_hist_pos_{n}'],
parameters={'n': n}
)
super().__init__(config)
self.n = n
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算成交量加权历史位置"""
# 计算历史位置
hist_pos = (pl.col('close') - pl.col('his_low')) / (pl.col('his_high') - pl.col('his_low'))
hist_pos = hist_pos.clip(0, 1)
# 计算成交量相对强度
rolling_mean_vol = pl.col('vol').rolling_mean(window=self.n)
vol_rel_strength = pl.col('vol') / rolling_mean_vol
# 计算加权位置
weighted_pos = hist_pos * vol_rel_strength
return stock_df.with_columns(weighted_pos.alias(f'vol_wgt_hist_pos_{self.n}'))
# 横截面因子
class CrossSectionalRankOperator(DateWiseOperator):
"""横截面排名算子"""
def __init__(self, column: str, ascending: bool = True):
config = OperatorConfig(
name=f"cs_rank_{column}",
description=f"{column}横截面排名",
required_columns=[column],
output_columns=[f'cs_rank_{column}'],
parameters={'column': column, 'ascending': ascending}
)
super().__init__(config)
self.column = column
self.ascending = ascending
def apply_date(self, date_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算横截面排名"""
# 计算排名
rank_col = pl.col(self.column).rank(method='dense', descending=not self.ascending)
# 转换为百分比排名
pct_rank = rank_col / rank_col.max()
return date_df.with_columns(pct_rank.alias(f'cs_rank_{self.column}'))
class CrossSectionalNetLargeFlowRankOperator(DateWiseOperator):
"""横截面大单净额排名算子"""
def __init__(self):
config = OperatorConfig(
name="cs_rank_net_lg_flow_val",
description="横截面大单净额排名",
required_columns=['buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol', 'sell_elg_vol', 'close'],
output_columns=['cs_rank_net_lg_flow_val'],
parameters={}
)
super().__init__(config)
def apply_date(self, date_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算横截面大单净额排名"""
# 计算大单净额
net_lg_flow_val = (
(pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')) * pl.col('close')
)
# 计算排名
rank_col = net_lg_flow_val.rank(method='dense', descending=True)
pct_rank = rank_col / rank_col.max()
return date_df.with_columns(pct_rank.alias('cs_rank_net_lg_flow_val'))
class CrossSectionalFlowDivergenceRankOperator(DateWiseOperator):
"""横截面流向背离度排名算子"""
def __init__(self):
config = OperatorConfig(
name="cs_rank_flow_divergence",
description="横截面流向背离度排名",
required_columns=['buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'buy_elg_vol',
'sell_lg_vol', 'sell_elg_vol', 'vol'],
output_columns=['cs_rank_flow_divergence'],
parameters={}
)
super().__init__(config)
def apply_date(self, date_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算横截面流向背离度排名"""
epsilon = 1e-8
# 计算大单比率
lg_ratio = (
(pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')) /
(pl.col('vol') + epsilon)
)
# 计算小单比率
sm_ratio = (pl.col('buy_sm_vol') - pl.col('sell_sm_vol')) / (pl.col('vol') + epsilon)
# 计算背离度
divergence = lg_ratio - sm_ratio
# 计算排名
rank_col = divergence.rank(method='dense', descending=True)
pct_rank = rank_col / rank_col.max()
return date_df.with_columns(pct_rank.alias('cs_rank_flow_divergence'))
class CrossSectionalRelativeProfitMarginRankOperator(DateWiseOperator):
"""横截面相对盈利幅度排名算子"""
def __init__(self):
config = OperatorConfig(
name="cs_rank_rel_profit_margin",
description="横截面相对盈利幅度排名",
required_columns=['close', 'weight_avg'],
output_columns=['cs_rank_rel_profit_margin'],
parameters={}
)
super().__init__(config)
def apply_date(self, date_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算横截面相对盈利幅度排名"""
# 计算盈利幅度
profit_margin = (pl.col('close') - pl.col('weight_avg')) / pl.col('close')
# 计算排名
rank_col = profit_margin.rank(method='dense', descending=True)
pct_rank = rank_col / rank_col.max()
return date_df.with_columns(pct_rank.alias('cs_rank_rel_profit_margin'))
class CrossSectionalCostBreadthRankOperator(DateWiseOperator):
"""横截面成本分布宽度排名算子"""
def __init__(self):
config = OperatorConfig(
name="cs_rank_cost_breadth",
description="横截面成本分布宽度排名",
required_columns=['cost_85pct', 'cost_15pct', 'weight_avg'],
output_columns=['cs_rank_cost_breadth'],
parameters={}
)
super().__init__(config)
def apply_date(self, date_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算横截面成本分布宽度排名"""
epsilon = 1e-8
# 计算成本宽度
cost_breadth = (pl.col('cost_85pct') - pl.col('cost_15pct')) / (pl.col('weight_avg') + epsilon)
# 计算排名
rank_col = cost_breadth.rank(method='dense', descending=True)
pct_rank = rank_col / rank_col.max()
return date_df.with_columns(pct_rank.alias('cs_rank_cost_breadth'))
class CrossSectionalWinnerRateRankOperator(DateWiseOperator):
"""横截面获利盘比例排名算子"""
def __init__(self):
config = OperatorConfig(
name="cs_rank_winner_rate",
description="横截面获利盘比例排名",
required_columns=['winner_rate'],
output_columns=['cs_rank_winner_rate'],
parameters={}
)
super().__init__(config)
def apply_date(self, date_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算横截面获利盘比例排名"""
# 计算排名
rank_col = pl.col('winner_rate').rank(method='dense', descending=True)
pct_rank = rank_col / rank_col.max()
return date_df.with_columns(pct_rank.alias('cs_rank_winner_rate'))
class CrossSectionalVolumeRatioRankOperator(DateWiseOperator):
"""横截面量比排名算子"""
def __init__(self):
config = OperatorConfig(
name="cs_rank_volume_ratio",
description="横截面量比排名",
required_columns=['volume_ratio'],
output_columns=['cs_rank_volume_ratio'],
parameters={}
)
super().__init__(config)
def apply_date(self, date_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算横截面量比排名"""
# 计算排名
rank_col = pl.col('volume_ratio').rank(method='dense', descending=True)
pct_rank = rank_col / rank_col.max()
return date_df.with_columns(pct_rank.alias('cs_rank_volume_ratio'))
# 复杂组合因子
class ComplexFactorDEAPOperator(StockWiseOperator):
"""DEAP复杂因子算子"""
def __init__(self):
config = OperatorConfig(
name="complex_factor_deap_1",
description="DEAP复杂组合因子",
required_columns=['pullback_strong_20_20', 'log_close', 'industry_return_5',
'vol_adj_roc_20', 'vol_drop_profit_cnt_5', 'nonlinear_mv_volume',
'alpha_007', 'lg_buy_consolidation_20', 'net_mf_vol', 'std_return_5',
'arbr', 'industry_act_factor5', 'industry_act_factor1', 'low_cost_dev',
'mv_weighted_turnover', 'act_factor4', 'vol', 'lg_elg_buy_prop',
'intraday_lg_flow_corr_20'],
output_columns=['complex_factor_deap_1'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算DEAP复杂因子"""
try:
# 安全除法函数
def safe_divide(a, b, default_val=0):
return pl.when(b.abs() > 1e-8).then(a / b).otherwise(default_val)
# 计算组件D
d_term1_div = safe_divide(pl.col('log_close'), pl.col('industry_return_5'))
d_term1 = pl.col('pullback_strong_20_20') * d_term1_div
d_term2_sub = pl.col('nonlinear_mv_volume') - pl.col('alpha_007')
d_term2_add = pl.col('vol_adj_roc_20') + pl.col('vol_drop_profit_cnt_5')
d_term2 = safe_divide(d_term2_add, d_term2_sub)
temp_d = d_term1 - d_term2
# 计算组件A
a_term1 = temp_d * pl.col('lg_buy_consolidation_20')
a_term2 = a_term1 + pl.col('lg_buy_consolidation_20')
temp_a = a_term2 + pl.col('pullback_strong_20_20')
# 计算组件F
f_term1 = pl.col('net_mf_vol') + pl.col('std_return_5')
f_term2 = pl.col('arbr') - pl.col('industry_act_factor5')
temp_f = f_term1 * f_term2
# 计算组件H
h_term1 = pl.col('industry_act_factor1') + pl.col('low_cost_dev')
h_term2 = pl.col('mv_weighted_turnover') * pl.col('act_factor4')
temp_h = h_term1 + h_term2
# 计算组件B
b_term1 = temp_f + pl.col('vol')
b_term2 = b_term1 + temp_h
temp_b = safe_divide(b_term2, pl.col('lg_elg_buy_prop'))
# 计算组件C
c_term1 = safe_divide(
pl.col('intraday_lg_flow_corr_20').fill_null(0),
pl.col('lg_elg_buy_prop')
)
temp_c = safe_divide(c_term1, pl.col('lg_elg_buy_prop'))
# 计算最终因子
final_term1 = safe_divide(temp_a, temp_b)
complex_factor = final_term1 - temp_c
return stock_df.with_columns(complex_factor.alias('complex_factor_deap_1'))
except Exception as e:
# 如果计算失败填充NaN
print(f"Error calculating complex_factor_deap_1: {e}")
return stock_df.with_columns(pl.lit(None).cast(float).alias('complex_factor_deap_1'))
# 因子集合
COMPLEX_OPERATORS = [
LargeFlowMomentumCorrelationOperator(),
LargeBuyConsolidationOperator(),
IntradayLargeFlowCorrelationOperator(),
ProfitPressureOperator(),
UnderwaterResistanceOperator(),
ProfitDecayOperator(),
PullbackStrongOperator(),
HurstExponentFlowOperator(),
VolWeightedHistoricalPositionOperator(),
CrossSectionalRankOperator('close'),
CrossSectionalNetLargeFlowRankOperator(),
CrossSectionalFlowDivergenceRankOperator(),
CrossSectionalRelativeProfitMarginRankOperator(),
CrossSectionalCostBreadthRankOperator(),
CrossSectionalWinnerRateRankOperator(),
CrossSectionalVolumeRatioRankOperator(),
ComplexFactorDEAPOperator(),
]
def apply_complex_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
"""
应用所有复杂组合因子
Args:
df: 输入的Polars DataFrame
operators: 要应用的算子列表如果为None则使用默认列表
Returns:
添加了复杂组合因子的DataFrame
"""
if operators is None:
operators = COMPLEX_OPERATORS
result_df = df
for operator in operators:
result_df = operator(result_df)
return result_df
# 主应用函数
def apply_all_factors(df: pl.DataFrame,
factor_categories: List[str] = None) -> pl.DataFrame:
"""
应用所有类别的因子
Args:
df: 输入的Polars DataFrame
factor_categories: 要应用的因子类别列表如果为None则应用所有类别
Returns:
添加了所有因子的DataFrame
"""
if factor_categories is None:
factor_categories = ['money_flow', 'chip', 'volatility', 'volume',
'technical', 'sentiment', 'momentum', 'complex']
result_df = df
# 导入所有因子模块
from polars_money_flow_factors import apply_money_flow_factors
from polars_chip_factors import apply_chip_distribution_factors
from polars_volatility_factors import apply_volatility_factors
from polars_volume_factors import apply_volume_factors
from polars_technical_factors import apply_technical_factors
from polars_sentiment_factors import apply_sentiment_factors
from polars_momentum_factors import apply_momentum_factors
# 应用各类因子
if 'money_flow' in factor_categories:
result_df = apply_money_flow_factors(result_df)
if 'chip' in factor_categories:
result_df = apply_chip_distribution_factors(result_df)
if 'volatility' in factor_categories:
result_df = apply_volatility_factors(result_df)
if 'volume' in factor_categories:
result_df = apply_volume_factors(result_df)
if 'technical' in factor_categories:
result_df = apply_technical_factors(result_df)
if 'sentiment' in factor_categories:
result_df = apply_sentiment_factors(result_df)
if 'momentum' in factor_categories:
result_df = apply_momentum_factors(result_df)
if 'complex' in factor_categories:
result_df = apply_complex_factors(result_df)
return result_df

View File

@@ -0,0 +1,237 @@
"""
Polars因子主入口 - 整合所有Polars-based因子计算
提供统一的接口来应用所有类别的因子
"""
import polars as pl
from typing import Dict, List, Optional, Any
import logging
# 配置日志
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# 因子类别映射
FACTOR_CATEGORIES = {
'money_flow': '资金流因子',
'chip': '筹码分布因子',
'volatility': '波动率因子',
'volume': '成交量因子',
'technical': '技术指标因子',
'sentiment': '情绪因子',
'momentum': '动量因子',
'complex': '复杂组合因子'
}
def apply_money_flow_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
"""应用资金流因子"""
try:
from polars_money_flow_factors import apply_money_flow_factors as _apply_money_flow
return _apply_money_flow(df, operators)
except ImportError as e:
logger.warning(f"无法导入资金流因子模块: {e}")
return df
def apply_chip_distribution_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
"""应用筹码分布因子"""
try:
from polars_chip_factors import apply_chip_distribution_factors as _apply_chip
return _apply_chip(df, operators)
except ImportError as e:
logger.warning(f"无法导入筹码分布因子模块: {e}")
return df
def apply_volatility_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
"""应用波动率因子"""
try:
from polars_volatility_factors import apply_volatility_factors as _apply_volatility
return _apply_volatility(df, operators)
except ImportError as e:
logger.warning(f"无法导入波动率因子模块: {e}")
return df
def apply_volume_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
"""应用成交量因子"""
try:
from polars_volume_factors import apply_volume_factors as _apply_volume
return _apply_volume(df, operators)
except ImportError as e:
logger.warning(f"无法导入成交量因子模块: {e}")
return df
def apply_technical_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
"""应用技术指标因子"""
try:
from polars_technical_factors import apply_technical_factors as _apply_technical
return _apply_technical(df, operators)
except ImportError as e:
logger.warning(f"无法导入技术指标因子模块: {e}")
return df
def apply_sentiment_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
"""应用情绪因子"""
try:
from polars_sentiment_factors import apply_sentiment_factors as _apply_sentiment
return _apply_sentiment(df, operators)
except ImportError as e:
logger.warning(f"无法导入情绪因子模块: {e}")
return df
def apply_momentum_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
"""应用动量因子"""
try:
from polars_momentum_factors import apply_momentum_factors as _apply_momentum
return _apply_momentum(df, operators)
except ImportError as e:
logger.warning(f"无法导入动量因子模块: {e}")
return df
def apply_complex_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
"""应用复杂组合因子"""
try:
from polars_complex_factors import apply_complex_factors as _apply_complex
return _apply_complex(df, operators)
except ImportError as e:
logger.warning(f"无法导入复杂组合因子模块: {e}")
return df
def apply_all_factors(df: pl.DataFrame,
factor_categories: List[str] = None,
exclude_categories: List[str] = None) -> pl.DataFrame:
"""
应用所有类别的因子
Args:
df: 输入的Polars DataFrame必须包含必需的列
factor_categories: 要应用的因子类别列表如果为None则应用所有类别
exclude_categories: 要排除的因子类别列表
Returns:
添加了所有因子的DataFrame
"""
if factor_categories is None:
factor_categories = list(FACTOR_CATEGORIES.keys())
if exclude_categories:
factor_categories = [cat for cat in factor_categories if cat not in exclude_categories]
logger.info(f"开始应用因子类别: {factor_categories}")
result_df = df
total_factors = 0
# 因子类别到函数的映射
factor_functions = {
'money_flow': apply_money_flow_factors,
'chip': apply_chip_distribution_factors,
'volatility': apply_volatility_factors,
'volume': apply_volume_factors,
'technical': apply_technical_factors,
'sentiment': apply_sentiment_factors,
'momentum': apply_momentum_factors,
'complex': apply_complex_factors
}
for category in factor_categories:
if category not in factor_functions:
logger.warning(f"未知的因子类别: {category}")
continue
logger.info(f"应用{FACTOR_CATEGORIES[category]}...")
try:
before_cols = len(result_df.columns)
result_df = factor_functions[category](result_df)
after_cols = len(result_df.columns)
new_factors = after_cols - before_cols
logger.info(f"{FACTOR_CATEGORIES[category]}应用完成,新增{new_factors}个因子")
total_factors += new_factors
except Exception as e:
logger.error(f"应用{FACTOR_CATEGORIES[category]}时出错: {e}")
continue
logger.info(f"因子应用完成,总共新增{total_factors}个因子")
return result_df
def get_factor_info() -> Dict[str, Any]:
"""
获取因子信息
Returns:
包含因子类别信息的字典
"""
return {
'categories': FACTOR_CATEGORIES,
'total_categories': len(FACTOR_CATEGORIES),
'category_descriptions': list(FACTOR_CATEGORIES.values())
}
def validate_required_columns(df: pl.DataFrame, factor_categories: List[str] = None) -> Dict[str, List[str]]:
"""
验证DataFrame是否包含必需的列
Args:
df: 输入的Polars DataFrame
factor_categories: 要验证的因子类别列表
Returns:
包含缺失列信息的字典
"""
if factor_categories is None:
factor_categories = list(FACTOR_CATEGORIES.keys())
missing_columns = {}
# 基础必需列
base_required = ['ts_code', 'trade_date']
missing_base = [col for col in base_required if col not in df.columns]
if missing_base:
missing_columns['base'] = missing_base
# 各因子类别的必需列
category_requirements = {
'money_flow': ['buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol', 'sell_elg_vol', 'vol'],
'chip': ['cost_95pct', 'cost_85pct', 'cost_50pct', 'cost_15pct', 'cost_5pct',
'winner_rate', 'weight_avg', 'close'],
'volatility': ['pct_chg'],
'volume': ['vol', 'turnover_rate', 'volume_ratio', 'amount'],
'technical': ['open', 'high', 'low', 'close', 'vol'],
'sentiment': ['pct_chg', 'vol', 'volume_ratio'],
'momentum': ['close', 'turnover_rate'],
'complex': ['close', 'vol', 'pct_chg', 'turnover_rate', 'winner_rate']
}
for category in factor_categories:
if category in category_requirements:
required_cols = category_requirements[category]
missing_cols = [col for col in required_cols if col not in df.columns]
if missing_cols:
missing_columns[category] = missing_cols
return missing_columns
# 向后兼容的函数名
apply_factors = apply_all_factors
if __name__ == "__main__":
# 测试代码
print("Polars因子系统已加载")
print("可用的因子类别:")
for key, description in FACTOR_CATEGORIES.items():
print(f" {key}: {description}")

View File

@@ -0,0 +1,428 @@
"""
动量因子 - 使用Polars实现
包含动量、趋势、均线等相关因子计算
"""
import polars as pl
import numpy as np
from typing import Dict, List, Optional, Any
from operator_framework import StockWiseOperator, OperatorConfig
from scipy.stats import linregress
class PriceMinusDeductionPriceOperator(StockWiseOperator):
"""价格减抵扣价算子"""
def __init__(self, n: int = 10):
config = OperatorConfig(
name=f"price_minus_deduction_price_{n}",
description=f"{n}日价格减抵扣价",
required_columns=['close'],
output_columns=[f'price_minus_deduction_price_{n}'],
parameters={'n': n}
)
super().__init__(config)
self.n = n
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算价格减抵扣价"""
# 抵扣价是n-1周期前的价格
deduction_price = pl.col('close').shift(self.n - 1)
# 计算差值
price_diff = pl.col('close') - deduction_price
return stock_df.with_columns(price_diff.alias(f'price_minus_deduction_price_{self.n}'))
class PriceDeductionPriceDiffRatioToSMAOperator(StockWiseOperator):
"""价格抵扣价差值相对SMA比率算子"""
def __init__(self, n: int = 10):
config = OperatorConfig(
name=f"price_deduction_price_diff_ratio_to_sma_{n}",
description=f"{n}日价格抵扣价差值相对SMA比率",
required_columns=['close'],
output_columns=[f'price_deduction_price_diff_ratio_to_sma_{n}'],
parameters={'n': n}
)
super().__init__(config)
self.n = n
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算价格抵扣价差值相对SMA比率"""
# 计算n日SMA
sma = pl.col('close').rolling_mean(window=self.n)
# 抵扣价
deduction_price = pl.col('close').shift(self.n - 1)
# 计算差值
diff = pl.col('close') - deduction_price
# 计算比率 (处理除零)
ratio = diff / (sma + 1e-8)
return stock_df.with_columns(ratio.alias(f'price_deduction_price_diff_ratio_to_sma_{self.n}'))
class CatPriceVsSmaVsDeductionPriceOperator(StockWiseOperator):
"""价格vsSMAvs抵扣价分类算子"""
def __init__(self, n: int = 10):
config = OperatorConfig(
name=f"cat_price_vs_sma_vs_deduction_price_{n}",
description=f"{n}日价格vsSMAvs抵扣价分类",
required_columns=['close'],
output_columns=[f'cat_price_vs_sma_vs_deduction_price_{n}'],
parameters={'n': n}
)
super().__init__(config)
self.n = n
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算价格vsSMAvs抵扣价分类"""
# 计算n日SMA
sma = pl.col('close').rolling_mean(window=self.n)
# 抵扣价
deduction_price = pl.col('close').shift(self.n - 1)
# 定义条件
conditions = [
# 1: 当前价 > SMA 且 抵扣价 > SMA
(pl.col('close') > sma) & (deduction_price > sma),
# 2: 当前价 < SMA 且 抵扣价 < SMA
(pl.col('close') < sma) & (deduction_price < sma),
# 3: 当前价 > SMA 且 抵扣价 <= SMA
(pl.col('close') > sma) & (deduction_price <= sma),
# 4: 当前价 <= SMA 且 抵扣价 > SMA
(pl.col('close') <= sma) & (deduction_price > sma),
]
choices = [1, 2, 3, 4]
# 使用select函数进行分类
classification = pl.select(conditions=conditions, choices=choices, default=0)
return stock_df.with_columns(
classification.alias(f'cat_price_vs_sma_vs_deduction_price_{self.n}')
)
class VolatilitySlopeOperator(StockWiseOperator):
"""波动率斜率算子"""
def __init__(self, long_window: int = 20, short_window: int = 5):
config = OperatorConfig(
name=f"volatility_slope_{long_window}_{short_window}",
description=f"{long_window}日波动率{short_window}日斜率",
required_columns=['pct_chg'],
output_columns=[f'volatility_slope_{long_window}_{short_window}'],
parameters={'long_window': long_window, 'short_window': short_window}
)
super().__init__(config)
self.long_window = long_window
self.short_window = short_window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算波动率斜率"""
# 计算长期波动率
long_vol = pl.col('pct_chg').rolling_std(window=self.long_window)
# 计算斜率函数
def calculate_slope(series):
if len(series) < 2:
return 0
x = np.arange(len(series))
slope, _, _, _, _ = linregress(x, series)
return slope
# 计算斜率
volatility_slope = long_vol.rolling_apply(
function=calculate_slope,
window_size=self.short_window
)
return stock_df.with_columns(
volatility_slope.alias(f'volatility_slope_{self.long_window}_{self.short_window}')
)
class TurnoverRateTrendStrengthOperator(StockWiseOperator):
"""换手率趋势强度算子"""
def __init__(self, window: int = 5):
config = OperatorConfig(
name=f"turnover_trend_strength_{window}",
description=f"{window}日换手率趋势强度",
required_columns=['turnover_rate'],
output_columns=[f'turnover_trend_strength_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算换手率趋势强度"""
# 计算斜率函数
def calculate_slope(series):
if len(series) < 2:
return 0
x = np.arange(len(series))
slope, _, _, _, _ = linregress(x, series)
return slope
# 计算换手率斜率
trend_strength = pl.col('turnover_rate').rolling_apply(
function=calculate_slope,
window_size=self.window
)
return stock_df.with_columns(
trend_strength.alias(f'turnover_trend_strength_{self.window}')
)
class FreeFloatTurnoverSurgeOperator(StockWiseOperator):
"""自由流通股换手率激增算子"""
def __init__(self, window: int = 10):
config = OperatorConfig(
name=f"ff_turnover_surge_{window}",
description=f"{window}日自由流通股换手率激增",
required_columns=['turnover_rate'],
output_columns=[f'ff_turnover_surge_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算自由流通股换手率激增"""
# 计算均值
avg_turnover = pl.col('turnover_rate').rolling_mean(window=self.window)
# 计算激增比率
surge_ratio = pl.col('turnover_rate') / (avg_turnover + 1e-8)
return stock_df.with_columns(surge_ratio.alias(f'ff_turnover_surge_{self.window}'))
class PriceVolumeTrendCoherenceOperator(StockWiseOperator):
"""价量趋势一致性算子"""
def __init__(self, price_window: int = 5, volume_window: int = 20):
config = OperatorConfig(
name=f"price_volume_coherence_{price_window}_{volume_window}",
description=f"{price_window}日价格{volume_window}日成交量趋势一致性",
required_columns=['close', 'vol'],
output_columns=[f'price_volume_coherence_{price_window}_{volume_window}'],
parameters={'price_window': price_window, 'volume_window': volume_window}
)
super().__init__(config)
self.price_window = price_window
self.volume_window = volume_window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算价量趋势一致性"""
# 计算价格上涨占比
def price_up_ratio(series):
return (series.diff() > 0).rolling_mean(window=self.price_window)
price_up = pl.col('close').apply(price_up_ratio)
# 计算成交量高于均值占比
vol_avg = pl.col('vol').rolling_mean(window=self.volume_window)
vol_above_avg = pl.col('vol') > vol_avg
vol_above_ratio = vol_above_avg.cast(int).rolling_mean(window=self.price_window)
# 计算一致性
coherence = price_up * vol_above_ratio
return stock_df.with_columns(
coherence.alias(f'price_volume_coherence_{self.price_window}_{self.volume_window}')
)
class FreeFloatToTotalTurnoverRatioOperator(StockWiseOperator):
"""自由流通股对总换手率比率算子"""
def __init__(self):
config = OperatorConfig(
name="ff_to_total_turnover_ratio",
description="自由流通股对总换手率比率",
required_columns=['turnover_rate'],
output_columns=['ff_to_total_turnover_ratio'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算自由流通股对总换手率比率"""
# 假设turnover_rate是自由流通股换手率
# 计算比率 (简化处理)
ratio = pl.col('turnover_rate') / (pl.col('turnover_rate') + 1e-8)
return stock_df.with_columns(ratio.alias('ff_to_total_turnover_ratio'))
class VarianceOperator(StockWiseOperator):
"""方差算子"""
def __init__(self, window: int):
config = OperatorConfig(
name=f"variance_{window}",
description=f"{window}日方差",
required_columns=['pct_chg'],
output_columns=[f'variance_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算方差"""
# 计算方差
variance = pl.col('pct_chg').rolling_var(window=self.window)
return stock_df.with_columns(variance.alias(f'variance_{self.window}'))
class LimitUpDownOperator(StockWiseOperator):
"""涨跌停算子"""
def __init__(self):
config = OperatorConfig(
name="limit_up_down",
description="涨跌停因子",
required_columns=['close', 'up_limit', 'down_limit'],
output_columns=['cat_up_limit', 'cat_down_limit', 'up_limit_count_10d', 'down_limit_count_10d'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算涨跌停因子"""
# 判断是否涨停
up_limit = pl.col('close') == pl.col('up_limit')
# 判断是否跌停
down_limit = pl.col('close') == pl.col('down_limit')
# 计算10日涨停计数
up_count_10d = up_limit.cast(int).rolling_sum(window=10)
# 计算10日跌停计数
down_count_10d = down_limit.cast(int).rolling_sum(window=10)
return stock_df.with_columns([
up_limit.alias('cat_up_limit'),
down_limit.alias('cat_down_limit'),
up_count_10d.alias('up_limit_count_10d'),
down_count_10d.alias('down_limit_count_10d')
])
class ConsecutiveUpLimitOperator(StockWiseOperator):
"""连续涨停算子"""
def __init__(self):
config = OperatorConfig(
name="consecutive_up_limit",
description="连续涨停天数",
required_columns=['cat_up_limit'],
output_columns=['consecutive_up_limit'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算连续涨停天数"""
# 计算连续涨停
# 简化处理,实际应用中需要更复杂的逻辑
consecutive = pl.col('cat_up_limit').cast(int)
return stock_df.with_columns(consecutive.alias('consecutive_up_limit'))
class MomentumFactorOperator(StockWiseOperator):
"""动量因子算子"""
def __init__(self, alpha: float = 0.5):
config = OperatorConfig(
name=f"momentum_factor_{alpha}",
description=f"动量因子(alpha={alpha})",
required_columns=['volume_change_rate', 'turnover_deviation'],
output_columns=[f'momentum_factor_{alpha}'],
parameters={'alpha': alpha}
)
super().__init__(config)
self.alpha = alpha
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算动量因子"""
# 计算动量因子
momentum = pl.col('volume_change_rate') + self.alpha * pl.col('turnover_deviation')
return stock_df.with_columns(momentum.alias(f'momentum_factor_{self.alpha}'))
class ResonanceFactorOperator(StockWiseOperator):
"""共振因子算子"""
def __init__(self):
config = OperatorConfig(
name="resonance_factor",
description="共振因子",
required_columns=['volume_ratio', 'pct_chg'],
output_columns=['resonance_factor'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算共振因子"""
# 计算共振因子
resonance = pl.col('volume_ratio') * pl.col('pct_chg')
return stock_df.with_columns(resonance.alias('resonance_factor'))
# 动量因子集合
MOMENTUM_OPERATORS = [
PriceMinusDeductionPriceOperator(),
PriceDeductionPriceDiffRatioToSMAOperator(),
CatPriceVsSmaVsDeductionPriceOperator(),
VolatilitySlopeOperator(),
TurnoverRateTrendStrengthOperator(5),
FreeFloatTurnoverSurgeOperator(10),
PriceVolumeTrendCoherenceOperator(),
FreeFloatToTotalTurnoverRatioOperator(),
VarianceOperator(20),
LimitUpDownOperator(),
ConsecutiveUpLimitOperator(),
MomentumFactorOperator(),
ResonanceFactorOperator(),
]
def apply_momentum_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
"""
应用所有动量因子
Args:
df: 输入的Polars DataFrame
operators: 要应用的算子列表如果为None则使用默认列表
Returns:
添加了动量因子的DataFrame
"""
if operators is None:
operators = MOMENTUM_OPERATORS
result_df = df
for operator in operators:
result_df = operator(result_df)
return result_df

View File

@@ -0,0 +1,245 @@
"""
资金流因子 - 使用Polars实现
包含主力资金流、散户资金流等相关因子计算
"""
import polars as pl
import numpy as np
from typing import Dict, List, Optional, Any
from operator_framework import StockWiseOperator, OperatorConfig
class MoneyFlowIntensityOperator(StockWiseOperator):
"""主力资金流强度算子"""
def __init__(self):
config = OperatorConfig(
name="money_flow_intensity",
description="主力资金流强度",
required_columns=['buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol', 'sell_elg_vol', 'vol'],
output_columns=['flow_lg_elg_intensity'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算主力资金流强度"""
epsilon = 1e-8
# 计算大单+超大单净买入量
lg_elg_net_buy_vol = (
pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')
)
# 计算资金流强度
flow_intensity = lg_elg_net_buy_vol / (pl.col('vol') + epsilon)
return stock_df.with_columns(flow_intensity.alias('flow_lg_elg_intensity'))
class FlowDivergenceRatioOperator(StockWiseOperator):
"""散户与主力背离度算子"""
def __init__(self):
config = OperatorConfig(
name="flow_divergence_ratio",
description="散户与主力背离度比率",
required_columns=['buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'buy_elg_vol',
'sell_lg_vol', 'sell_elg_vol'],
output_columns=['flow_divergence_ratio'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算散户与主力背离度比率"""
epsilon = 1e-8
# 计算小单净买入量
sm_net_buy_vol = pl.col('buy_sm_vol') - pl.col('sell_sm_vol')
# 计算大单+超大单净买入量
lg_elg_net_buy_vol = (
pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')
)
# 计算背离度比率处理分母为0的情况
divergence_ratio = sm_net_buy_vol / (
lg_elg_net_buy_vol + pl.when(lg_elg_net_buy_vol == 0).then(epsilon).otherwise(0) + epsilon
)
return stock_df.with_columns(divergence_ratio.alias('flow_divergence_ratio'))
class FlowStructureChangeOperator(StockWiseOperator):
"""资金流结构变动算子"""
def __init__(self):
config = OperatorConfig(
name="flow_structure_change",
description="资金流结构变动",
required_columns=['buy_sm_vol', 'buy_lg_vol', 'buy_elg_vol'],
output_columns=['flow_struct_buy_change'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算资金流结构变动"""
epsilon = 1e-8
# 计算总买入量
total_buy_vol = pl.col('buy_sm_vol') + pl.col('buy_lg_vol') + pl.col('buy_elg_vol')
# 计算大单+超大单买入占比
lg_elg_buy_prop = (pl.col('buy_lg_vol') + pl.col('buy_elg_vol')) / (total_buy_vol + epsilon)
# 计算1日变化
struct_change = lg_elg_buy_prop.diff()
return stock_df.with_columns(struct_change.alias('flow_struct_buy_change'))
class FlowAccelerationOperator(StockWiseOperator):
"""资金流加速度算子"""
def __init__(self):
config = OperatorConfig(
name="flow_acceleration",
description="资金流加速度",
required_columns=['buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol', 'sell_elg_vol'],
output_columns=['flow_lg_elg_accel'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算资金流加速度"""
# 计算大单+超大单净买入量
lg_elg_net_buy_vol = (
pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')
)
# 计算一阶变化
first_diff = lg_elg_net_buy_vol.diff()
# 计算二阶变化(加速度)
acceleration = first_diff.diff()
return stock_df.with_columns(acceleration.alias('flow_lg_elg_accel'))
class LargeFlowMomentumCorrelationOperator(StockWiseOperator):
"""大单资金流与价格动量相关性算子"""
def __init__(self, n: int = 20, m: int = 60):
config = OperatorConfig(
name=f"lg_flow_mom_corr_{n}_{m}",
description=f"{n}日大单资金流与{m}日价格动量相关性",
required_columns=['buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol', 'sell_elg_vol',
'close', 'vol'],
output_columns=[f'lg_flow_mom_corr_{n}_{m}'],
parameters={'n': n, 'm': m}
)
super().__init__(config)
self.n = n
self.m = m
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算大单资金流与价格动量相关性"""
# 计算大单净额
net_lg_flow_val = (
(pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')) * pl.col('close')
)
# 计算滚动净大单流
rolling_net_lg_flow = net_lg_flow_val.rolling_sum(window=self.n)
# 计算价格动量
price_mom = pl.col('close').pct_change(self.n)
# 计算相关性
# Polars的rolling_corr需要两个表达式
correlation = rolling_net_lg_flow.rolling_corr(price_mom, window=self.m)
return stock_df.with_columns(
correlation.alias(f'lg_flow_mom_corr_{self.n}_{self.m}')
)
class LargeBuyConsolidationOperator(StockWiseOperator):
"""大单买入盘整期算子"""
def __init__(self, n: int = 20, vol_quantile: float = 0.2):
config = OperatorConfig(
name=f"lg_buy_consolidation_{n}",
description=f"{n}日大单买入盘整期",
required_columns=['close', 'buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol',
'sell_elg_vol', 'vol'],
output_columns=[f'lg_buy_consolidation_{n}'],
parameters={'n': n, 'vol_quantile': vol_quantile}
)
super().__init__(config)
self.n = n
self.vol_quantile = vol_quantile
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算大单买入盘整期"""
epsilon = 1e-8
# 计算收盘价滚动标准差
rolling_std = pl.col('close').rolling_std(window=self.n)
# 计算大单净流比率
net_lg_flow_ratio = (
(pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')) /
(pl.col('vol') + epsilon)
)
# 计算滚动均值
rolling_mean_ratio = net_lg_flow_ratio.rolling_mean(window=self.n)
# 计算低波动阈值
# 这里需要按日期分组计算分位数,比较复杂,简化处理
# 在实际使用时可能需要DateWiseOperator来处理横截面分位数
return stock_df.with_columns(
rolling_mean_ratio.alias(f'lg_buy_consolidation_{self.n}')
)
# 资金流因子集合
MONEY_FLOW_OPERATORS = [
MoneyFlowIntensityOperator(),
FlowDivergenceRatioOperator(),
FlowStructureChangeOperator(),
FlowAccelerationOperator(),
LargeFlowMomentumCorrelationOperator(),
LargeBuyConsolidationOperator(),
]
def apply_money_flow_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
"""
应用所有资金流因子
Args:
df: 输入的Polars DataFrame
operators: 要应用的算子列表如果为None则使用默认列表
Returns:
添加了资金流因子的DataFrame
"""
if operators is None:
operators = MONEY_FLOW_OPERATORS
result_df = df
for operator in operators:
result_df = operator(result_df)
return result_df

View File

@@ -0,0 +1,365 @@
"""
情绪因子 - 使用Polars实现
包含市场情绪、恐慌贪婪指数、反转因子等相关因子计算
"""
import polars as pl
import numpy as np
from typing import Dict, List, Optional, Any
from operator_framework import StockWiseOperator, OperatorConfig
import talib
class SentimentPanicGreedIndexOperator(StockWiseOperator):
"""市场恐慌/贪婪指数算子"""
def __init__(self, window_atr: int = 14, window_smooth: int = 5):
config = OperatorConfig(
name=f"senti_panic_greed_{window_atr}_{window_smooth}",
description=f"{window_atr}日ATR{window_smooth}日平滑恐慌贪婪指数",
required_columns=['open', 'high', 'low', 'close', 'pct_chg', 'vol'],
output_columns=[f'senti_panic_greed_{window_atr}_{window_smooth}'],
parameters={'window_atr': window_atr, 'window_smooth': window_smooth}
)
super().__init__(config)
self.window_atr = window_atr
self.window_smooth = window_smooth
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算恐慌贪婪指数"""
# 计算前收盘价
prev_close = pl.col('close').shift(1)
# 计算真实波幅
tr = pl.max_horizontal(
pl.col('high') - pl.col('low'),
(pl.col('high') - prev_close).abs(),
(pl.col('low') - prev_close).abs()
)
# 计算ATR
atr = tr.rolling_mean(window=self.window_atr)
# 计算影线
upper_shadow = pl.col('high') - pl.max_horizontal(pl.col('open'), pl.col('close'))
lower_shadow = pl.min_horizontal(pl.col('open'), pl.col('close')) - pl.col('low')
body = (pl.col('close') - pl.col('open')).abs()
# 计算跳空
gap = (pl.col('open') / prev_close - 1).fill_null(0)
# 计算波动性意外
volatility_surprise = (tr / (atr + 1e-8) - 1) * pl.col('pct_chg').sign()
# 计算原始情绪指标
raw_senti = (tr / (atr + 1e-8)) * pl.col('pct_chg').sign() + gap * 2
# 平滑处理
sentiment = raw_senti.rolling_mean(window=self.window_smooth)
return stock_df.with_columns(
sentiment.alias(f'senti_panic_greed_{self.window_atr}_{self.window_smooth}')
)
class SentimentMarketBreadthProxyOperator(StockWiseOperator):
"""市场宽度情绪代理算子"""
def __init__(self, window_vol: int = 20, window_smooth: int = 3):
config = OperatorConfig(
name=f"senti_breadth_proxy_{window_vol}_{window_smooth}",
description=f"{window_vol}日成交量{window_smooth}日平滑市场宽度情绪代理",
required_columns=['pct_chg', 'vol'],
output_columns=[f'senti_breadth_proxy_{window_vol}_{window_smooth}'],
parameters={'window_vol': window_vol, 'window_smooth': window_smooth}
)
super().__init__(config)
self.window_vol = window_vol
self.window_smooth = window_smooth
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算市场宽度情绪代理"""
# 计算成交量滚动均值
rolling_avg_vol = pl.col('vol').rolling_mean(window=self.window_vol)
# 计算价量配合度
raw_breadth = pl.col('pct_chg') * (pl.col('vol') / (rolling_avg_vol + 1e-8))
# 平滑处理
breadth_proxy = raw_breadth.rolling_mean(window=self.window_smooth)
return stock_df.with_columns(
breadth_proxy.alias(f'senti_breadth_proxy_{self.window_vol}_{self.window_smooth}')
)
class SentimentReversalIndicatorOperator(StockWiseOperator):
"""短期情绪反转因子算子"""
def __init__(self, window_ret: int = 5, window_vol: int = 5):
config = OperatorConfig(
name=f"senti_reversal_{window_ret}_{window_vol}",
description=f"{window_ret}日收益{window_vol}日波动短期情绪反转因子",
required_columns=['close', 'pct_chg'],
output_columns=[f'senti_reversal_{window_ret}_{window_vol}'],
parameters={'window_ret': window_ret, 'window_vol': window_vol}
)
super().__init__(config)
self.window_ret = window_ret
self.window_vol = window_vol
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算短期情绪反转因子"""
# 计算累计收益率
return_m = pl.col('close').pct_change(self.window_ret)
# 计算波动率
volatility_m = pl.col('pct_chg').rolling_std(window=self.window_vol)
# 计算反转因子 (负号表示反转)
reversal_factor = -return_m * volatility_m
return stock_df.with_columns(
reversal_factor.alias(f'senti_reversal_{self.window_ret}_{self.window_vol}')
)
class DailyMomentumBenchmarkOperator(StockWiseOperator):
"""日级别动量基准算子"""
def __init__(self):
config = OperatorConfig(
name="daily_momentum_benchmark",
description="日级别动量基准",
required_columns=['pct_chg'],
output_columns=['daily_positive_benchmark', 'daily_negative_benchmark'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算日级别动量基准"""
# 这个因子需要横截面计算,简化处理
# 在实际应用中应该使用DateWiseOperator来计算全市场基准
# 返回0作为占位符
return stock_df.with_columns([
pl.lit(0).alias('daily_positive_benchmark'),
pl.lit(0).alias('daily_negative_benchmark')
])
class DailyDeviationOperator(StockWiseOperator):
"""日级别偏离度算子"""
def __init__(self):
config = OperatorConfig(
name="daily_deviation",
description="日级别偏离度",
required_columns=['pct_chg', 'daily_positive_benchmark', 'daily_negative_benchmark'],
output_columns=['daily_deviation'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算日级别偏离度"""
# 根据条件计算偏离度
conditions = [
(pl.col('pct_chg') > 0) & (pl.col('daily_positive_benchmark') > 0),
(pl.col('pct_chg') < 0) & (pl.col('daily_negative_benchmark') < 0),
]
choices = [
pl.col('pct_chg') - pl.col('daily_positive_benchmark'),
pl.col('pct_chg') - pl.col('daily_negative_benchmark'),
]
deviation = pl.select(conditions=conditions, choices=choices, default=0)
return stock_df.with_columns(deviation.alias('daily_deviation'))
class CatSentimentMomentumVolumeSpikeOperator(StockWiseOperator):
"""情绪动量成交量激增分类算子"""
def __init__(self, return_period: int = 3, return_threshold: float = 0.05,
volume_ratio_threshold: float = 1.5, current_pct_chg_min: float = -0.01,
current_pct_chg_max: float = 0.03):
config = OperatorConfig(
name=f"cat_senti_mom_vol_spike_{return_period}",
description=f"{return_period}日情绪动量成交量激增分类",
required_columns=['close', 'pct_chg', 'volume_ratio'],
output_columns=[f'cat_senti_mom_vol_spike_{return_period}'],
parameters={'return_period': return_period, 'return_threshold': return_threshold,
'volume_ratio_threshold': volume_ratio_threshold,
'current_pct_chg_min': current_pct_chg_min,
'current_pct_chg_max': current_pct_chg_max}
)
super().__init__(config)
self.return_period = return_period
self.return_threshold = return_threshold
self.volume_ratio_threshold = volume_ratio_threshold
self.current_pct_chg_min = current_pct_chg_min
self.current_pct_chg_max = current_pct_chg_max
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算情绪动量成交量激增分类"""
# 计算n日收益率
return_n = pl.col('close').pct_change(self.return_period)
# 定义条件
cond_momentum = return_n > self.return_threshold
cond_volume = pl.col('volume_ratio') > self.volume_ratio_threshold
cond_current_price = (pl.col('pct_chg') > self.current_pct_chg_min) & \
(pl.col('pct_chg') < self.current_pct_chg_max)
# 组合条件
result = (cond_momentum.cast(str) + cond_volume.cast(str) + cond_current_price.cast(str))
return stock_df.with_columns(result.alias(f'cat_senti_mom_vol_spike_{self.return_period}'))
class CatSentimentPreBreakoutOperator(StockWiseOperator):
"""情绪突破前盘整分类算子"""
def __init__(self, atr_short_n: int = 10, atr_long_m: int = 40,
vol_atrophy_n: int = 10, vol_atrophy_m: int = 40,
price_stab_n: int = 5, price_stab_threshold: float = 0.05,
current_pct_chg_min: float = 0.005, current_pct_chg_max: float = 0.07,
volume_ratio_threshold: float = 1.2):
config = OperatorConfig(
name=f"cat_senti_pre_breakout",
description="情绪突破前盘整分类",
required_columns=['high', 'low', 'close', 'vol', 'pct_chg', 'volume_ratio'],
output_columns=['cat_senti_pre_breakout'],
parameters={'atr_short_n': atr_short_n, 'atr_long_m': atr_long_m,
'vol_atrophy_n': vol_atrophy_n, 'vol_atrophy_m': vol_atrophy_m,
'price_stab_n': price_stab_n, 'price_stab_threshold': price_stab_threshold,
'current_pct_chg_min': current_pct_chg_min, 'current_pct_chg_max': current_pct_chg_max,
'volume_ratio_threshold': volume_ratio_threshold}
)
super().__init__(config)
self.atr_short_n = atr_short_n
self.atr_long_m = atr_long_m
self.vol_atrophy_n = vol_atrophy_n
self.vol_atrophy_m = vol_atrophy_m
self.price_stab_n = price_stab_n
self.price_stab_threshold = price_stab_threshold
self.current_pct_chg_min = current_pct_chg_min
self.current_pct_chg_max = current_pct_chg_max
self.volume_ratio_threshold = volume_ratio_threshold
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算情绪突破前盘整分类"""
# 1. 波动率收缩 (使用价格范围作为ATR代理)
price_range = pl.col('high') - pl.col('low')
atr_short = price_range.rolling_mean(window=self.atr_short_n)
atr_long = price_range.rolling_mean(window=self.atr_long_m)
cond_vol_contraction = atr_short < (0.7 * atr_long)
# 2. 成交量萎缩
vol_short = pl.col('vol').rolling_mean(window=self.vol_atrophy_n)
vol_long = pl.col('vol').rolling_mean(window=self.vol_atrophy_m)
cond_vol_atrophy = vol_short < (0.7 * vol_long)
# 3. 近期价格稳定
rolling_max_h = pl.col('high').rolling_max(window=self.price_stab_n)
rolling_min_l = pl.col('low').rolling_min(window=self.price_stab_n)
price_stability = (rolling_max_h - rolling_min_l) / pl.col('close')
cond_price_stability = price_stability < self.price_stab_threshold
# 4. 当日温和放量上涨信号
cond_price_signal = (pl.col('pct_chg') > self.current_pct_chg_min) & \
(pl.col('pct_chg') < self.current_pct_chg_max)
cond_vol_signal = pl.col('volume_ratio') > self.volume_ratio_threshold
cond_current_day_signal = cond_price_signal & cond_vol_signal
# 组合条件
result = (cond_vol_contraction.cast(str) + cond_vol_atrophy.cast(str) +
cond_price_stability.cast(str) + cond_current_day_signal.cast(str))
return stock_df.with_columns(result.alias('cat_senti_pre_breakout'))
class StrongInflowSignalOperator(StockWiseOperator):
"""强主力资金流入信号算子"""
def __init__(self, intensity_avg_n: int = 3, intensity_threshold: float = 0.01,
consecutive_buy_n: int = 2, accel_positive_m: int = 1):
config = OperatorConfig(
name="senti_strong_inflow",
description="强主力资金流入信号",
required_columns=['flow_lg_elg_intensity', 'flow_lg_elg_accel'],
output_columns=['senti_strong_inflow'],
parameters={'intensity_avg_n': intensity_avg_n, 'intensity_threshold': intensity_threshold,
'consecutive_buy_n': consecutive_buy_n, 'accel_positive_m': accel_positive_m}
)
super().__init__(config)
self.intensity_avg_n = intensity_avg_n
self.intensity_threshold = intensity_threshold
self.consecutive_buy_n = consecutive_buy_n
self.accel_positive_m = accel_positive_m
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算强主力资金流入信号"""
# 检查必需列是否存在
required_cols = ['flow_lg_elg_intensity', 'flow_lg_elg_accel']
if not all(col in stock_df.columns for col in required_cols):
# 如果缺少列返回0
return stock_df.with_columns(pl.lit(0).alias('senti_strong_inflow'))
# 1. 近N日主力资金强度均值
avg_intensity = pl.col('flow_lg_elg_intensity').rolling_mean(window=self.intensity_avg_n)
cond_avg_intensity = avg_intensity > self.intensity_threshold
# 2. 近N日连续主力净买入天数
is_net_buy = (pl.col('flow_lg_elg_intensity') > 0).cast(int)
# 计算连续买入信号 (简化版)
consecutive_buy = is_net_buy.rolling_sum(window=self.consecutive_buy_n) == self.consecutive_buy_n
cond_consecutive_buy = consecutive_buy
# 3. 近M日主力资金流加速度为正
is_accel_positive = (pl.col('flow_lg_elg_accel') > 0).cast(int)
accel_positive = is_accel_positive.rolling_sum(window=self.accel_positive_m) == self.accel_positive_m
cond_accel_positive = accel_positive
# 综合条件
strong_inflow = cond_avg_intensity & cond_consecutive_buy & cond_accel_positive
return stock_df.with_columns(strong_inflow.cast(int).alias('senti_strong_inflow'))
# 情绪因子集合
SENTIMENT_OPERATORS = [
SentimentPanicGreedIndexOperator(),
SentimentMarketBreadthProxyOperator(),
SentimentReversalIndicatorOperator(),
DailyMomentumBenchmarkOperator(),
DailyDeviationOperator(),
CatSentimentMomentumVolumeSpikeOperator(),
CatSentimentPreBreakoutOperator(),
StrongInflowSignalOperator(),
]
def apply_sentiment_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
"""
应用所有情绪因子
Args:
df: 输入的Polars DataFrame
operators: 要应用的算子列表如果为None则使用默认列表
Returns:
添加了情绪因子的DataFrame
"""
if operators is None:
operators = SENTIMENT_OPERATORS
result_df = df
for operator in operators:
result_df = operator(result_df)
return result_df

View File

@@ -0,0 +1,488 @@
"""
技术指标因子 - 使用Polars实现
包含ATR、OBV、RSI、EMA等技术指标相关因子计算
"""
import polars as pl
import numpy as np
from typing import Dict, List, Optional, Any
from operator_framework import StockWiseOperator, OperatorConfig
import talib
class ATROperator(StockWiseOperator):
"""ATR算子"""
def __init__(self, period: int = 14):
config = OperatorConfig(
name=f"atr_{period}",
description=f"{period}日ATR",
required_columns=['high', 'low', 'close'],
output_columns=[f'atr_{period}'],
parameters={'period': period}
)
super().__init__(config)
self.period = period
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算ATR"""
# 使用TA-Lib计算ATR
atr_values = talib.ATR(
stock_df['high'].to_numpy(),
stock_df['low'].to_numpy(),
stock_df['close'].to_numpy(),
timeperiod=self.period
)
return stock_df.with_columns(pl.Series(atr_values).alias(f'atr_{self.period}'))
class OBVOperator(StockWiseOperator):
"""OBV算子"""
def __init__(self):
config = OperatorConfig(
name="obv",
description="OBV能量潮",
required_columns=['close', 'vol'],
output_columns=['obv'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算OBV"""
# 使用TA-Lib计算OBV
obv_values = talib.OBV(
stock_df['close'].to_numpy(),
stock_df['vol'].to_numpy()
)
return stock_df.with_columns(pl.Series(obv_values).alias('obv'))
class OBVMAOperator(StockWiseOperator):
"""OBV均线算子"""
def __init__(self, period: int = 6):
config = OperatorConfig(
name=f"obv_ma_{period}",
description=f"{period}日OBV均线",
required_columns=['obv'],
output_columns=[f'maobv_{period}'],
parameters={'period': period}
)
super().__init__(config)
self.period = period
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算OBV均线"""
# 使用TA-Lib计算SMA
ma_values = talib.SMA(
stock_df['obv'].to_numpy(),
timeperiod=self.period
)
return stock_df.with_columns(pl.Series(ma_values).alias(f'maobv_{self.period}'))
class RSIOperator(StockWiseOperator):
"""RSI算子"""
def __init__(self, period: int = 3):
config = OperatorConfig(
name=f"rsi_{period}",
description=f"{period}日RSI",
required_columns=['close'],
output_columns=[f'rsi_{period}'],
parameters={'period': period}
)
super().__init__(config)
self.period = period
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算RSI"""
# 使用TA-Lib计算RSI
rsi_values = talib.RSI(
stock_df['close'].to_numpy(),
timeperiod=self.period
)
return stock_df.with_columns(pl.Series(rsi_values).alias(f'rsi_{self.period}'))
class EMAOperator(StockWiseOperator):
"""EMA算子"""
def __init__(self, period: int):
config = OperatorConfig(
name=f"ema_{period}",
description=f"{period}日EMA",
required_columns=['close'],
output_columns=[f'_ema_{period}'],
parameters={'period': period}
)
super().__init__(config)
self.period = period
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算EMA"""
# 使用TA-Lib计算EMA
ema_values = talib.EMA(
stock_df['close'].to_numpy(),
timeperiod=self.period
)
return stock_df.with_columns(pl.Series(ema_values).alias(f'_ema_{self.period}'))
class ReturnOperator(StockWiseOperator):
"""收益率算子"""
def __init__(self, period: int):
config = OperatorConfig(
name=f"return_{period}",
description=f"{period}日收益率",
required_columns=['close'],
output_columns=[f'return_{period}'],
parameters={'period': period}
)
super().__init__(config)
self.period = period
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算收益率"""
# 计算收益率
ret = pl.col('close').pct_change(self.period)
return stock_df.with_columns(ret.alias(f'return_{self.period}'))
class ActivityFactorOperator(StockWiseOperator):
"""活跃度因子算子"""
def __init__(self, period: int, scale: float):
config = OperatorConfig(
name=f"act_factor_{period}",
description=f"{period}日活跃度因子",
required_columns=[f'_ema_{period}'],
output_columns=[f'act_factor{period}'],
parameters={'period': period, 'scale': scale}
)
super().__init__(config)
self.period = period
self.scale = scale
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算活跃度因子"""
# 计算EMA变化率
ema_change = (pl.col(f'_ema_{self.period}') / pl.col(f'_ema_{self.period}').shift(1) - 1) * 100
# 计算活跃度因子
activity_factor = (ema_change * 57.3 / self.scale).arctan()
return stock_df.with_columns(activity_factor.alias(f'act_factor{self.period}'))
class ActivityFactor5Operator(StockWiseOperator):
"""活跃度因子5算子"""
def __init__(self):
config = OperatorConfig(
name="act_factor_5",
description="5日活跃度因子",
required_columns=['_ema_5'],
output_columns=['act_factor1'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算5日活跃度因子"""
# 计算EMA变化率
ema_change = (pl.col('_ema_5') / pl.col('_ema_5').shift(1) - 1) * 100
# 计算活跃度因子
activity_factor = (ema_change * 57.3 / 50).arctan()
return stock_df.with_columns(activity_factor.alias('act_factor1'))
class ActivityFactor13Operator(StockWiseOperator):
"""活跃度因子13算子"""
def __init__(self):
config = OperatorConfig(
name="act_factor_13",
description="13日活跃度因子",
required_columns=['_ema_13'],
output_columns=['act_factor2'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算13日活跃度因子"""
# 计算EMA变化率
ema_change = (pl.col('_ema_13') / pl.col('_ema_13').shift(1) - 1) * 100
# 计算活跃度因子
activity_factor = (ema_change * 57.3 / 40).arctan()
return stock_df.with_columns(activity_factor.alias('act_factor2'))
class ActivityFactor20Operator(StockWiseOperator):
"""活跃度因子20算子"""
def __init__(self):
config = OperatorConfig(
name="act_factor_20",
description="20日活跃度因子",
required_columns=['_ema_20'],
output_columns=['act_factor3'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算20日活跃度因子"""
# 计算EMA变化率
ema_change = (pl.col('_ema_20') / pl.col('_ema_20').shift(1) - 1) * 100
# 计算活跃度因子
activity_factor = (ema_change * 57.3 / 21).arctan()
return stock_df.with_columns(activity_factor.alias('act_factor3'))
class ActivityFactor60Operator(StockWiseOperator):
"""活跃度因子60算子"""
def __init__(self):
config = OperatorConfig(
name="act_factor_60",
description="60日活跃度因子",
required_columns=['_ema_60'],
output_columns=['act_factor4'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算60日活跃度因子"""
# 计算EMA变化率
ema_change = (pl.col('_ema_60') / pl.col('_ema_60').shift(1) - 1) * 100
# 计算活跃度因子
activity_factor = (ema_change * 57.3 / 10).arctan()
return stock_df.with_columns(activity_factor.alias('act_factor4'))
class ActivityFactor5and6Operator(StockWiseOperator):
"""活跃度因子5和6算子"""
def __init__(self):
config = OperatorConfig(
name="act_factor_5_6",
description="活跃度因子5和6",
required_columns=['act_factor1', 'act_factor2'],
output_columns=['act_factor5', 'act_factor6'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算活跃度因子5和6"""
# 计算因子5
factor5 = pl.col('act_factor1') + pl.col('act_factor2') + pl.col('act_factor3') + pl.col('act_factor4')
# 计算因子6
numerator = pl.col('act_factor1') - pl.col('act_factor2')
denominator = (pl.col('act_factor1').pow(2) + pl.col('act_factor2').pow(2)).sqrt()
factor6 = numerator / (denominator + 1e-8) # 避免除零
return stock_df.with_columns([
factor5.alias('act_factor5'),
factor6.alias('act_factor6')
])
class Alpha003Operator(StockWiseOperator):
"""Alpha003算子"""
def __init__(self):
config = OperatorConfig(
name="alpha_003",
description="Alpha003因子",
required_columns=['open', 'close', 'high', 'low'],
output_columns=['alpha_003'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算Alpha003"""
# 计算因子
alpha_003 = pl.when(pl.col('high') != pl.col('low')) \
.then((pl.col('close') - pl.col('open')) / (pl.col('high') - pl.col('low'))) \
.otherwise(0)
return stock_df.with_columns(alpha_003.alias('alpha_003'))
class Alpha007Operator(StockWiseOperator):
"""Alpha007算子"""
def __init__(self):
config = OperatorConfig(
name="alpha_007",
description="Alpha007因子",
required_columns=['close', 'vol'],
output_columns=['alpha_007'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算Alpha007"""
# 计算5日相关性
corr_5 = pl.col('close').rolling_corr(pl.col('vol'), window=5)
return stock_df.with_columns(corr_5.alias('alpha_007'))
class Alpha013Operator(StockWiseOperator):
"""Alpha013算子"""
def __init__(self):
config = OperatorConfig(
name="alpha_013",
description="Alpha013因子",
required_columns=['close'],
output_columns=['alpha_013'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算Alpha013"""
# 计算5日和20日和
sum_5 = pl.col('close').rolling_sum(window=5)
sum_20 = pl.col('close').rolling_sum(window=20)
# 计算因子
alpha_013 = sum_5 - sum_20
return stock_df.with_columns(alpha_013.alias('alpha_013'))
class Alpha022Operator(StockWiseOperator):
"""Alpha022算子"""
def __init__(self):
config = OperatorConfig(
name="alpha_022",
description="Alpha022改进因子",
required_columns=['high', 'low', 'close', 'vol'],
output_columns=['alpha_22_improved'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算Alpha022改进因子"""
# 计算滚动协方差
cov_5 = pl.col('high').rolling_cov(pl.col('vol'), window=5)
# 计算协方差差分
delta_cov = cov_5.diff(5)
# 计算收盘价标准差
std_close = pl.col('close').rolling_std(window=20)
# 计算标准差排名 (简化版)
rank_std = std_close
# 计算最终因子
alpha_22 = -1 * delta_cov * rank_std
return stock_df.with_columns(alpha_22.alias('alpha_22_improved'))
class BBIRatioOperator(StockWiseOperator):
"""BBI比率算子"""
def __init__(self):
config = OperatorConfig(
name="bbi_ratio",
description="BBI比率因子",
required_columns=['close'],
output_columns=['bbi_ratio_factor'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算BBI比率"""
# 计算不同周期的SMA
sma3 = pl.col('close').rolling_mean(window=3)
sma6 = pl.col('close').rolling_mean(window=6)
sma12 = pl.col('close').rolling_mean(window=12)
sma24 = pl.col('close').rolling_mean(window=24)
# 计算BBI
bbi = (sma3 + sma6 + sma12 + sma24) / 4
# 计算比率
bbi_ratio = bbi / pl.col('close')
return stock_df.with_columns(bbi_ratio.alias('bbi_ratio_factor'))
# 技术指标因子集合
TECHNICAL_OPERATORS = [
ATROperator(14),
ATROperator(6),
OBVOperator(),
OBVMAOperator(6),
RSIOperator(3),
EMAOperator(5),
EMAOperator(13),
EMAOperator(20),
EMAOperator(60),
ReturnOperator(5),
ReturnOperator(20),
ActivityFactor5Operator(),
ActivityFactor13Operator(),
ActivityFactor20Operator(),
ActivityFactor60Operator(),
ActivityFactor5and6Operator(),
Alpha003Operator(),
Alpha007Operator(),
Alpha013Operator(),
Alpha022Operator(),
BBIRatioOperator(),
]
def apply_technical_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
"""
应用所有技术指标因子
Args:
df: 输入的Polars DataFrame
operators: 要应用的算子列表如果为None则使用默认列表
Returns:
添加了技术指标因子的DataFrame
"""
if operators is None:
operators = TECHNICAL_OPERATORS
result_df = df
for operator in operators:
result_df = operator(result_df)
return result_df

View File

@@ -0,0 +1,419 @@
"""
波动率因子 - 使用Polars实现
包含上行波动率、下行波动率、波动率比率等相关因子计算
"""
import polars as pl
import numpy as np
from typing import Dict, List, Optional, Any
from operator_framework import StockWiseOperator, OperatorConfig
class UpsideVolatilityOperator(StockWiseOperator):
"""上行波动率算子"""
def __init__(self, window: int = 20):
config = OperatorConfig(
name=f"upside_volatility_{window}",
description=f"{window}日上行波动率",
required_columns=['pct_chg'],
output_columns=[f'upside_volatility_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算上行波动率"""
# 分离正收益率
pos_returns = pl.when(pl.col('pct_chg') > 0).then(pl.col('pct_chg')).otherwise(0)
# 计算正收益率的平方
pos_returns_sq = pos_returns.pow(2)
# 计算滚动和
rolling_pos_count = (pl.col('pct_chg') > 0).rolling_sum(window=self.window)
rolling_pos_sum = pos_returns.rolling_sum(window=self.window)
rolling_pos_sum_sq = pos_returns_sq.rolling_sum(window=self.window)
# 计算方差和标准差
pos_mean_sq = rolling_pos_sum_sq / rolling_pos_count
pos_mean = rolling_pos_sum / rolling_pos_count
pos_var = pos_mean_sq - pos_mean.pow(2)
# 处理样本数不足的情况
pos_var = pl.when(rolling_pos_count >= 2).then(pos_var).otherwise(None)
pos_var = pos_var.clip(lower=0)
upside_vol = pos_var.sqrt()
return stock_df.with_columns(upside_vol.alias(f'upside_volatility_{self.window}'))
class DownsideVolatilityOperator(StockWiseOperator):
"""下行波动率算子"""
def __init__(self, window: int = 20):
config = OperatorConfig(
name=f"downside_volatility_{window}",
description=f"{window}日下行波动率",
required_columns=['pct_chg'],
output_columns=[f'downside_volatility_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算下行波动率"""
# 分离负收益率
neg_returns = pl.when(pl.col('pct_chg') < 0).then(pl.col('pct_chg')).otherwise(0)
# 计算负收益率的平方
neg_returns_sq = neg_returns.pow(2)
# 计算滚动和
rolling_neg_count = (pl.col('pct_chg') < 0).rolling_sum(window=self.window)
rolling_neg_sum = neg_returns.rolling_sum(window=self.window)
rolling_neg_sum_sq = neg_returns_sq.rolling_sum(window=self.window)
# 计算方差和标准差
neg_mean_sq = rolling_neg_sum_sq / rolling_neg_count
neg_mean = rolling_neg_sum / rolling_neg_count
neg_var = neg_mean_sq - neg_mean.pow(2)
# 处理样本数不足的情况
neg_var = pl.when(rolling_neg_count >= 2).then(neg_var).otherwise(None)
neg_var = neg_var.clip(lower=0)
downside_vol = neg_var.sqrt()
return stock_df.with_columns(downside_vol.alias(f'downside_volatility_{self.window}'))
class VolatilityRatioOperator(StockWiseOperator):
"""波动率比率算子"""
def __init__(self, window: int = 20):
config = OperatorConfig(
name=f"volatility_ratio_{window}",
description=f"{window}日波动率比率",
required_columns=['pct_chg'],
output_columns=[f'volatility_ratio_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算波动率比率"""
# 计算上行和下行波动率
pos_returns = pl.when(pl.col('pct_chg') > 0).then(pl.col('pct_chg')).otherwise(0)
neg_returns = pl.when(pl.col('pct_chg') < 0).then(pl.col('pct_chg')).otherwise(0)
# 计算滚动标准差
upside_vol = pos_returns.rolling_std(window=self.window)
downside_vol = neg_returns.rolling_std(window=self.window)
# 计算比率
vol_ratio = upside_vol / downside_vol
# 处理无穷大和NaN值
vol_ratio = vol_ratio.replace([np.inf, -np.inf], None).fill_null(0)
return stock_df.with_columns(vol_ratio.alias(f'volatility_ratio_{self.window}'))
class ReturnSkewnessOperator(StockWiseOperator):
"""收益率偏度算子"""
def __init__(self, window: int = 5):
config = OperatorConfig(
name=f"return_skewness_{window}",
description=f"{window}日收益率偏度",
required_columns=['pct_chg'],
output_columns=[f'return_skewness_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算收益率偏度"""
skewness = pl.col('pct_chg').rolling_skew(window=self.window)
return stock_df.with_columns(skewness.alias(f'return_skewness_{self.window}'))
class ReturnKurtosisOperator(StockWiseOperator):
"""收益率峰度算子"""
def __init__(self, window: int = 5):
config = OperatorConfig(
name=f"return_kurtosis_{window}",
description=f"{window}日收益率峰度",
required_columns=['pct_chg'],
output_columns=[f'return_kurtosis_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算收益率峰度"""
kurtosis = pl.col('pct_chg').rolling_kurt(window=self.window)
return stock_df.with_columns(kurtosis.alias(f'return_kurtosis_{self.window}'))
class VolatilityAmplificationOperator(StockWiseOperator):
"""亏损状态波动率放大算子"""
def __init__(self, n: int = 20):
config = OperatorConfig(
name=f"vol_amp_loss_{n}",
description=f"{n}日亏损状态波动率放大",
required_columns=['pct_chg', 'weight_avg', 'close'],
output_columns=[f'vol_amp_loss_{n}'],
parameters={'n': n}
)
super().__init__(config)
self.n = n
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算亏损状态波动率放大"""
# 计算n日波动率
vol_n = pl.col('pct_chg').rolling_std(window=self.n)
# 计算亏损程度
loss_degree = pl.max_horizontal(0, pl.col('weight_avg') - pl.col('close')) / pl.col('close')
# 计算放大因子
vol_amp = vol_n * loss_degree
return stock_df.with_columns(vol_amp.alias(f'vol_amp_loss_{self.n}'))
class HighVolDropWhenProfitableOperator(StockWiseOperator):
"""高成交量下跌当获利状态算子"""
def __init__(self, n: int = 20, m: int = 5, profit_thresh: float = 0.1,
drop_thresh: float = -0.03, vol_multiple: float = 2.0):
config = OperatorConfig(
name=f"vol_drop_profit_cnt_{m}",
description=f"{m}日高成交量下跌当获利状态计数",
required_columns=['close', 'pct_chg', 'vol', 'weight_avg'],
output_columns=[f'vol_drop_profit_cnt_{m}'],
parameters={'n': n, 'm': m, 'profit_thresh': profit_thresh,
'drop_thresh': drop_thresh, 'vol_multiple': vol_multiple}
)
super().__init__(config)
self.n = n
self.m = m
self.profit_thresh = profit_thresh
self.drop_thresh = drop_thresh
self.vol_multiple = vol_multiple
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算高成交量下跌当获利状态计数"""
# 判断是否获利
is_profitable = pl.col('close') > pl.col('weight_avg') * (1 + self.profit_thresh)
# 判断是否下跌
is_dropping = pl.col('pct_chg') < self.drop_thresh
# 计算滚动均值和标准差
rolling_mean_vol = pl.col('vol').rolling_mean(window=self.n)
rolling_std_vol = pl.col('vol').rolling_std(window=self.n).fill_null(0)
# 判断是否高成交量
is_high_vol = pl.col('vol') > (rolling_mean_vol + self.vol_multiple * rolling_std_vol)
# 计算事件
event = is_profitable & is_dropping & is_high_vol
# 计算m日累计
event_cnt = event.cast(int).rolling_sum(window=self.m)
return stock_df.with_columns(event_cnt.alias(f'vol_drop_profit_cnt_{self.m}'))
class LargeFlowVolatilityInteractionOperator(StockWiseOperator):
"""大单资金流驱动波动率交互算子"""
def __init__(self, n: int = 20):
config = OperatorConfig(
name=f"lg_flow_vol_interact_{n}",
description=f"{n}日大单资金流驱动波动率交互",
required_columns=['pct_chg', 'buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol',
'sell_elg_vol', 'vol', 'close'],
output_columns=[f'lg_flow_vol_interact_{n}'],
parameters={'n': n}
)
super().__init__(config)
self.n = n
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算大单资金流驱动波动率交互"""
epsilon = 1e-8
# 计算n日波动率
vol_n = pl.col('pct_chg').rolling_std(window=self.n)
# 计算大单净额
net_lg_flow_val = (
(pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')) * pl.col('close')
)
# 计算总成交额
total_val = pl.col('vol') * pl.col('close')
# 计算大单净流入比率绝对值
abs_net_lg_flow_ratio = net_lg_flow_val.abs() / (total_val + epsilon)
# 计算n日均值
abs_ratio_n = abs_net_lg_flow_ratio.rolling_mean(window=self.n)
# 计算交互项
interaction = vol_n * abs_ratio_n
return stock_df.with_columns(interaction.alias(f'lg_flow_vol_interact_{self.n}'))
class VolatilityAdjustedROCPOperator(StockWiseOperator):
"""波动率调整收益率算子"""
def __init__(self, n: int = 20):
config = OperatorConfig(
name=f"vol_adj_roc_{n}",
description=f"{n}日波动率调整收益率",
required_columns=['close', 'pct_chg'],
output_columns=[f'vol_adj_roc_{n}'],
parameters={'n': n}
)
super().__init__(config)
self.n = n
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算波动率调整收益率"""
# 计算n日收益率
roc_n = pl.col('close').pct_change(self.n)
# 计算n日波动率
vol_n = pl.col('pct_chg').rolling_std(window=self.n).fill_null(0)
# 计算波动率调整收益率
vol_adj_roc = roc_n / (vol_n + 1e-10) # 避免除零
return stock_df.with_columns(vol_adj_roc.alias(f'vol_adj_roc_{self.n}'))
class StandardDeviation5Operator(StockWiseOperator):
"""5日收益率标准差算子"""
def __init__(self):
config = OperatorConfig(
name="std_return_5",
description="5日收益率标准差",
required_columns=['close'],
output_columns=['std_return_5'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算5日收益率标准差"""
# 计算收益率
returns = pl.col('close').pct_change()
# 计算5日标准差
std_5 = returns.rolling_std(window=5)
return stock_df.with_columns(std_5.alias('std_return_5'))
class StandardDeviation90Operator(StockWiseOperator):
"""90日收益率标准差算子"""
def __init__(self):
config = OperatorConfig(
name="std_return_90",
description="90日收益率标准差",
required_columns=['close'],
output_columns=['std_return_90'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算90日收益率标准差"""
# 计算收益率
returns = pl.col('close').pct_change()
# 计算90日标准差
std_90 = returns.rolling_std(window=90)
return stock_df.with_columns(std_90.alias('std_return_90'))
class StandardDeviation90ShiftedOperator(StockWiseOperator):
"""90日收益率标准差(移位)算子"""
def __init__(self):
config = OperatorConfig(
name="std_return_90_2",
description="90日收益率标准差(移位10日)",
required_columns=['close'],
output_columns=['std_return_90_2'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算90日收益率标准差(移位10日)"""
# 计算收益率(移位10日)
returns = pl.col('close').shift(10).pct_change()
# 计算90日标准差
std_90_2 = returns.rolling_std(window=90)
return stock_df.with_columns(std_90_2.alias('std_return_90_2'))
# 波动率因子集合
VOLATILITY_OPERATORS = [
UpsideVolatilityOperator(),
DownsideVolatilityOperator(),
VolatilityRatioOperator(),
ReturnSkewnessOperator(),
ReturnKurtosisOperator(),
VolatilityAmplificationOperator(),
HighVolDropWhenProfitableOperator(),
LargeFlowVolatilityInteractionOperator(),
VolatilityAdjustedROCPOperator(),
StandardDeviation5Operator(),
StandardDeviation90Operator(),
StandardDeviation90ShiftedOperator(),
]
def apply_volatility_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
"""
应用所有波动率因子
Args:
df: 输入的Polars DataFrame
operators: 要应用的算子列表如果为None则使用默认列表
Returns:
添加了波动率因子的DataFrame
"""
if operators is None:
operators = VOLATILITY_OPERATORS
result_df = df
for operator in operators:
result_df = operator(result_df)
return result_df

View File

@@ -0,0 +1,480 @@
"""
成交量因子 - 使用Polars实现
包含成交量变化率、突破信号、换手率等相关因子计算
"""
import polars as pl
import numpy as np
from typing import Dict, List, Optional, Any
from operator_framework import StockWiseOperator, OperatorConfig
class VolumeChangeRateOperator(StockWiseOperator):
"""成交量变化率算子"""
def __init__(self):
config = OperatorConfig(
name="volume_change_rate",
description="短期成交量变化率",
required_columns=['vol'],
output_columns=['volume_change_rate'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算成交量变化率"""
# 计算2日均量
vol_mean_2 = pl.col('vol').rolling_mean(window=2)
# 计算10日均量
vol_mean_10 = pl.col('vol').rolling_mean(window=10)
# 计算变化率
change_rate = (vol_mean_2 / vol_mean_10) - 1
return stock_df.with_columns(change_rate.alias('volume_change_rate'))
class VolumeBreakoutOperator(StockWiseOperator):
"""成交量突破算子"""
def __init__(self):
config = OperatorConfig(
name="volume_breakout",
description="成交量突破信号",
required_columns=['vol'],
output_columns=['cat_volume_breakout'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算成交量突破信号"""
# 计算5日最大成交量
max_vol_5 = pl.col('vol').rolling_max(window=5)
# 判断是否突破
breakout = pl.col('vol') > max_vol_5
return stock_df.with_columns(breakout.alias('cat_volume_breakout'))
class TurnoverDeviationOperator(StockWiseOperator):
"""换手率偏离度算子"""
def __init__(self):
config = OperatorConfig(
name="turnover_deviation",
description="换手率均线偏离度",
required_columns=['turnover_rate'],
output_columns=['turnover_deviation'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算换手率均线偏离度"""
# 计算3日均值
mean_turnover = pl.col('turnover_rate').rolling_mean(window=3)
# 计算3日标准差
std_turnover = pl.col('turnover_rate').rolling_std(window=3)
# 计算偏离度
deviation = (pl.col('turnover_rate') - mean_turnover) / std_turnover
return stock_df.with_columns(deviation.alias('turnover_deviation'))
class TurnoverSpikeOperator(StockWiseOperator):
"""换手率激增算子"""
def __init__(self):
config = OperatorConfig(
name="turnover_spike",
description="换手率激增信号",
required_columns=['turnover_rate'],
output_columns=['cat_turnover_spike'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算换手率激增信号"""
# 计算3日均值
mean_turnover = pl.col('turnover_rate').rolling_mean(window=3)
# 计算3日标准差
std_turnover = pl.col('turnover_rate').rolling_std(window=3)
# 判断是否激增 (超过均值+2倍标准差)
spike = pl.col('turnover_rate') > (mean_turnover + 2 * std_turnover)
return stock_df.with_columns(spike.alias('cat_turnover_spike'))
class VolumeRatioAverageOperator(StockWiseOperator):
"""量比均值算子"""
def __init__(self):
config = OperatorConfig(
name="volume_ratio_average",
description="量比均值",
required_columns=['volume_ratio'],
output_columns=['avg_volume_ratio'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算量比均值"""
# 计算3日均值
avg_ratio = pl.col('volume_ratio').rolling_mean(window=3)
return stock_df.with_columns(avg_ratio.alias('avg_volume_ratio'))
class VolumeRatioBreakoutOperator(StockWiseOperator):
"""量比突破算子"""
def __init__(self):
config = OperatorConfig(
name="volume_ratio_breakout",
description="量比突破信号",
required_columns=['volume_ratio'],
output_columns=['cat_volume_ratio_breakout'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算量比突破信号"""
# 计算5日最大量比
max_ratio_5 = pl.col('volume_ratio').rolling_max(window=5)
# 判断是否突破
breakout = pl.col('volume_ratio') > max_ratio_5
return stock_df.with_columns(breakout.alias('cat_volume_ratio_breakout'))
class VolumeSpikeOperator(StockWiseOperator):
"""成交量激增算子"""
def __init__(self):
config = OperatorConfig(
name="volume_spike",
description="成交量激增",
required_columns=['vol'],
output_columns=['vol_spike'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算成交量激增"""
# 计算20日均量
vol_mean_20 = pl.col('vol').rolling_mean(window=20)
return stock_df.with_columns(vol_mean_20.alias('vol_spike'))
class VolumeStd5Operator(StockWiseOperator):
"""5日成交量标准差算子"""
def __init__(self):
config = OperatorConfig(
name="volume_std_5",
description="5日成交量标准差",
required_columns=['vol'],
output_columns=['vol_std_5'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算5日成交量标准差"""
# 计算成交量变化率
vol_pct_change = pl.col('vol').pct_change()
# 计算5日标准差
std_5 = vol_pct_change.rolling_std(window=5)
return stock_df.with_columns(std_5.alias('vol_std_5'))
class TurnoverRateMeanOperator(StockWiseOperator):
"""换手率均值算子"""
def __init__(self, n: int):
config = OperatorConfig(
name=f"turnover_rate_mean_{n}",
description=f"{n}日换手率均值",
required_columns=['turnover_rate'],
output_columns=[f'turnover_rate_mean_{n}'],
parameters={'n': n}
)
super().__init__(config)
self.n = n
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算n日换手率均值"""
# 计算n日均值
mean_rate = pl.col('turnover_rate').rolling_mean(window=self.n)
return stock_df.with_columns(mean_rate.alias(f'turnover_rate_mean_{self.n}'))
class VolumeSpikeCategoryOperator(StockWiseOperator):
"""成交量激增分类算子"""
def __init__(self):
config = OperatorConfig(
name="volume_spike_category",
description="成交量激增分类",
required_columns=['vol', 'vol_spike'],
output_columns=['cat_vol_spike'],
parameters={}
)
super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算成交量激增分类"""
# 判断是否激增 (超过2倍均值)
spike = pl.col('vol') > (2 * pl.col('vol_spike'))
return stock_df.with_columns(spike.alias('cat_vol_spike'))
class TurnoverVolatilityOperator(StockWiseOperator):
"""换手率波动率算子"""
def __init__(self, window: int = 20):
config = OperatorConfig(
name=f"turnover_volatility_{window}",
description=f"{window}日换手率波动率",
required_columns=['turnover_rate'],
output_columns=[f'turnover_std_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算换手率波动率"""
# 计算滚动标准差
turnover_std = pl.col('turnover_rate').rolling_std(window=self.window)
return stock_df.with_columns(turnover_std.alias(f'turnover_std_{self.window}'))
class VolumeCovarianceOperator(StockWiseOperator):
"""成交量协方差算子"""
def __init__(self, window: int = 5):
config = OperatorConfig(
name=f"volume_covariance_{window}",
description=f"{window}日成交量协方差",
required_columns=['high', 'vol'],
output_columns=[f'cov_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算成交量协方差"""
# 计算滚动协方差
def calculate_cov(group_df):
return group_df.select(
pl.col('high').rolling_cov(pl.col('vol'), window=self.window)
)
cov_result = calculate_cov(stock_df)
return stock_df.with_columns(cov_result[f'cov_{self.window}'].alias(f'cov_{self.window}'))
class VolumeCovarianceDeltaOperator(StockWiseOperator):
"""成交量协方差变化算子"""
def __init__(self, period: int = 5):
config = OperatorConfig(
name=f"volume_covariance_delta_{period}",
description=f"{period}日成交量协方差变化",
required_columns=['cov_5'],
output_columns=[f'delta_cov_{period}'],
parameters={'period': period}
)
super().__init__(config)
self.period = period
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算成交量协方差变化"""
# 计算差分
delta = pl.col('cov_5').diff(self.period)
return stock_df.with_columns(delta.alias(f'delta_cov_{self.period}'))
class TurnoverRateAccelerationOperator(StockWiseOperator):
"""换手率加速度算子"""
def __init__(self, short_window: int = 5, long_window: int = 20):
config = OperatorConfig(
name=f"turnover_acceleration_{short_window}_{long_window}",
description=f"{short_window}日对{long_window}日换手率加速度",
required_columns=['turnover_rate'],
output_columns=[f'turnover_rate_acceleration_{short_window}_{long_window}'],
parameters={'short_window': short_window, 'long_window': long_window}
)
super().__init__(config)
self.short_window = short_window
self.long_window = long_window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算换手率加速度"""
# 计算短期均值
short_avg = pl.col('turnover_rate').rolling_mean(window=self.short_window)
# 计算长期均值
long_avg = pl.col('turnover_rate').rolling_mean(window=self.long_window)
# 计算加速度
acceleration = short_avg - long_avg
return stock_df.with_columns(
acceleration.alias(f'turnover_rate_acceleration_{self.short_window}_{self.long_window}')
)
class VolumeSustainabilityOperator(StockWiseOperator):
"""成交量持续性算子"""
def __init__(self, short_window: int = 10, long_window: int = 30):
config = OperatorConfig(
name=f"volume_sustain_{short_window}_{long_window}",
description=f"{short_window}日成交量大于{long_window}日均值占比",
required_columns=['vol'],
output_columns=[f'vol_sustain_{short_window}_{long_window}'],
parameters={'short_window': short_window, 'long_window': long_window}
)
super().__init__(config)
self.short_window = short_window
self.long_window = long_window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算成交量持续性"""
# 计算长期均值
long_avg = pl.col('vol').rolling_mean(window=self.long_window)
# 判断是否大于长期均值
above_avg = pl.col('vol') > long_avg
# 计算短期占比
sustain_ratio = above_avg.cast(int).rolling_mean(window=self.short_window)
return stock_df.with_columns(
sustain_ratio.alias(f'vol_sustain_{self.short_window}_{self.long_window}')
)
class TurnoverRelativeStrengthOperator(StockWiseOperator):
"""换手率相对强度算子"""
def __init__(self, window: int = 20):
config = OperatorConfig(
name=f"turnover_relative_strength_{window}",
description=f"{window}日换手率相对强度",
required_columns=['turnover_rate'],
output_columns=[f'turnover_relative_strength_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算换手率相对强度"""
# 计算长期均值
long_avg = pl.col('turnover_rate').rolling_mean(window=self.window)
# 计算相对强度
relative_strength = pl.col('turnover_rate') / long_avg
return stock_df.with_columns(
relative_strength.alias(f'turnover_relative_strength_{self.window}')
)
class AmountOutlierOperator(StockWiseOperator):
"""成交额异常值算子"""
def __init__(self, window: int = 10):
config = OperatorConfig(
name=f"amount_outlier_{window}",
description=f"{window}日成交额异常值",
required_columns=['amount'],
output_columns=[f'amount_outlier_{window}'],
parameters={'window': window}
)
super().__init__(config)
self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算成交额异常值"""
# 计算均值
avg_amount = pl.col('amount').rolling_mean(window=self.window)
# 计算差值
amount_diff = pl.col('amount') - avg_amount
# 计算Z-score (简化版,实际使用时可能需要横截面标准化)
mean_diff = amount_diff.rolling_mean(window=self.window)
std_diff = amount_diff.rolling_std(window=self.window)
# 计算异常值分数
outlier_score = (amount_diff - mean_diff) / (std_diff + 1e-8)
return stock_df.with_columns(outlier_score.alias(f'amount_outlier_{self.window}'))
# 成交量因子集合
VOLUME_OPERATORS = [
VolumeChangeRateOperator(),
VolumeBreakoutOperator(),
TurnoverDeviationOperator(),
TurnoverSpikeOperator(),
VolumeRatioAverageOperator(),
VolumeRatioBreakoutOperator(),
VolumeSpikeOperator(),
VolumeStd5Operator(),
TurnoverRateMeanOperator(20),
VolumeSpikeCategoryOperator(),
TurnoverVolatilityOperator(),
TurnoverRateAccelerationOperator(),
VolumeSustainabilityOperator(),
TurnoverRelativeStrengthOperator(),
AmountOutlierOperator(),
]
def apply_volume_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
"""
应用所有成交量因子
Args:
df: 输入的Polars DataFrame
operators: 要应用的算子列表如果为None则使用默认列表
Returns:
添加了成交量因子的DataFrame
"""
if operators is None:
operators = VOLUME_OPERATORS
result_df = df
for operator in operators:
result_df = operator(result_df)
return result_df

File diff suppressed because one or more lines are too long