factor优化,改为polars
This commit is contained in:
196
main/factor/operator_base.py
Normal file
196
main/factor/operator_base.py
Normal file
@@ -0,0 +1,196 @@
|
||||
"""
|
||||
因子算子基础框架 - 简化版本
|
||||
提供股票截面和日期截面两个基础函数
|
||||
"""
|
||||
|
||||
import polars as pl
|
||||
from typing import Callable, Any, Optional, Union
|
||||
import logging
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def apply_stockwise(
|
||||
df: pl.DataFrame,
|
||||
operator_func: Callable[[pl.DataFrame, Any], pl.DataFrame],
|
||||
*args,
|
||||
**kwargs
|
||||
) -> pl.DataFrame:
|
||||
"""
|
||||
在股票截面上应用算子函数
|
||||
|
||||
Args:
|
||||
df: 输入的polars DataFrame,必须包含ts_code和trade_date列
|
||||
operator_func: 算子函数,接收单个股票的数据和参数,返回处理后的DataFrame
|
||||
*args, **kwargs: 传递给算子函数的额外参数
|
||||
|
||||
Returns:
|
||||
处理后的完整DataFrame
|
||||
"""
|
||||
# 验证必需列
|
||||
required_cols = ['ts_code', 'trade_date']
|
||||
missing_cols = [col for col in required_cols if col not in df.columns]
|
||||
if missing_cols:
|
||||
raise ValueError(f"缺少必需列: {missing_cols}")
|
||||
|
||||
# 获取股票列表
|
||||
stock_list = df['ts_code'].unique().to_list()
|
||||
results = []
|
||||
|
||||
# 按股票分组处理
|
||||
for ts_code in stock_list:
|
||||
try:
|
||||
# 获取单个股票的数据并按日期排序
|
||||
stock_df = df.filter(pl.col('ts_code') == ts_code).sort('trade_date')
|
||||
|
||||
# 应用算子函数
|
||||
result_df = operator_func(stock_df, *args, **kwargs)
|
||||
results.append(result_df)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"股票 {ts_code} 处理失败: {e}")
|
||||
# 失败时返回原始数据
|
||||
stock_df = df.filter(pl.col('ts_code') == ts_code).sort('trade_date')
|
||||
results.append(stock_df)
|
||||
|
||||
# 合并结果并排序
|
||||
if results:
|
||||
return pl.concat(results).sort(['ts_code', 'trade_date'])
|
||||
else:
|
||||
return df
|
||||
|
||||
|
||||
def apply_datewise(
|
||||
df: pl.DataFrame,
|
||||
operator_func: Callable[[pl.DataFrame, Any], pl.DataFrame],
|
||||
*args,
|
||||
**kwargs
|
||||
) -> pl.DataFrame:
|
||||
"""
|
||||
在日期截面上应用算子函数
|
||||
|
||||
Args:
|
||||
df: 输入的polars DataFrame,必须包含ts_code和trade_date列
|
||||
operator_func: 算子函数,接收单个日期的数据和参数,返回处理后的DataFrame
|
||||
*args, **kwargs: 传递给算子函数的额外参数
|
||||
|
||||
Returns:
|
||||
处理后的完整DataFrame
|
||||
"""
|
||||
# 验证必需列
|
||||
required_cols = ['ts_code', 'trade_date']
|
||||
missing_cols = [col for col in required_cols if col not in df.columns]
|
||||
if missing_cols:
|
||||
raise ValueError(f"缺少必需列: {missing_cols}")
|
||||
|
||||
# 获取日期列表
|
||||
date_list = df['trade_date'].unique().to_list()
|
||||
results = []
|
||||
|
||||
# 按日期分组处理
|
||||
for trade_date in date_list:
|
||||
try:
|
||||
# 获取单个日期的数据
|
||||
date_df = df.filter(pl.col('trade_date') == trade_date)
|
||||
|
||||
# 应用算子函数
|
||||
result_df = operator_func(date_df, *args, **kwargs)
|
||||
results.append(result_df)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"日期 {trade_date} 处理失败: {e}")
|
||||
# 失败时返回原始数据
|
||||
date_df = df.filter(pl.col('trade_date') == trade_date)
|
||||
results.append(date_df)
|
||||
|
||||
# 合并结果并排序
|
||||
if results:
|
||||
return pl.concat(results).sort(['ts_code', 'trade_date'])
|
||||
else:
|
||||
return df
|
||||
|
||||
|
||||
# 常用算子函数示例
|
||||
def rolling_mean_operator(df: pl.DataFrame, column: str, window: int, output_col: str = None) -> pl.DataFrame:
|
||||
"""
|
||||
滚动均值算子 - 股票截面
|
||||
|
||||
Args:
|
||||
df: 单个股票的数据
|
||||
column: 要计算均值的列
|
||||
window: 窗口大小
|
||||
output_col: 输出列名,默认为f'{column}_mean_{window}'
|
||||
|
||||
Returns:
|
||||
添加均值列的DataFrame
|
||||
"""
|
||||
if output_col is None:
|
||||
output_col = f'{column}_mean_{window}'
|
||||
|
||||
return df.with_columns(
|
||||
pl.col(column).rolling_mean(window_size=window).alias(output_col)
|
||||
)
|
||||
|
||||
|
||||
def rolling_std_operator(df: pl.DataFrame, column: str, window: int, output_col: str = None) -> pl.DataFrame:
|
||||
"""
|
||||
滚动标准差算子 - 股票截面
|
||||
|
||||
Args:
|
||||
df: 单个股票的数据
|
||||
column: 要计算标准差的列
|
||||
window: 窗口大小
|
||||
output_col: 输出列名,默认为f'{column}_std_{window}'
|
||||
|
||||
Returns:
|
||||
添加标准差列的DataFrame
|
||||
"""
|
||||
if output_col is None:
|
||||
output_col = f'{column}_std_{window}'
|
||||
|
||||
return df.with_columns(
|
||||
pl.col(column).rolling_std(window_size=window).alias(output_col)
|
||||
)
|
||||
|
||||
|
||||
def rank_operator(df: pl.DataFrame, column: str, ascending: bool = True, output_col: str = None) -> pl.DataFrame:
|
||||
"""
|
||||
排名算子 - 日期截面
|
||||
|
||||
Args:
|
||||
df: 单个日期的数据
|
||||
column: 要排名的列
|
||||
ascending: 是否升序
|
||||
output_col: 输出列名,默认为f'{column}_rank'
|
||||
|
||||
Returns:
|
||||
添加排名列的DataFrame
|
||||
"""
|
||||
if output_col is None:
|
||||
output_col = f'{column}_rank'
|
||||
|
||||
return df.with_columns(
|
||||
pl.col(column).rank(method='dense', descending=not ascending).alias(output_col)
|
||||
)
|
||||
|
||||
|
||||
def pct_change_operator(df: pl.DataFrame, column: str, periods: int = 1, output_col: str = None) -> pl.DataFrame:
|
||||
"""
|
||||
百分比变化算子 - 股票截面
|
||||
|
||||
Args:
|
||||
df: 单个股票的数据
|
||||
column: 要计算变化的列
|
||||
periods: 期数
|
||||
output_col: 输出列名,默认为f'{column}_pct_change_{periods}'
|
||||
|
||||
Returns:
|
||||
添加变化率列的DataFrame
|
||||
"""
|
||||
if output_col is None:
|
||||
output_col = f'{column}_pct_change_{periods}'
|
||||
|
||||
return df.with_columns(
|
||||
((pl.col(column) / pl.col(column).shift(periods)) - 1).alias(output_col)
|
||||
)
|
||||
250
main/factor/operator_framework.py
Normal file
250
main/factor/operator_framework.py
Normal file
@@ -0,0 +1,250 @@
|
||||
"""
|
||||
因子算子框架 - 使用Polars实现统一的因子计算
|
||||
避免数据泄露,支持切面计算
|
||||
"""
|
||||
|
||||
import polars as pl
|
||||
import numpy as np
|
||||
from typing import Dict, List, Callable, Optional, Union, Any
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
import logging
|
||||
|
||||
# 配置日志
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class OperatorConfig:
|
||||
"""算子配置"""
|
||||
name: str
|
||||
description: str
|
||||
required_columns: List[str]
|
||||
output_columns: List[str]
|
||||
parameters: Dict[str, Any]
|
||||
|
||||
|
||||
class DataSlice:
|
||||
"""数据切面基类"""
|
||||
|
||||
def __init__(self, df: pl.DataFrame):
|
||||
self.df = df
|
||||
self.validate_data()
|
||||
|
||||
def validate_data(self):
|
||||
"""验证数据格式"""
|
||||
required_cols = ['ts_code', 'trade_date']
|
||||
missing_cols = [col for col in required_cols if col not in self.df.columns]
|
||||
if missing_cols:
|
||||
raise ValueError(f"缺少必需列: {missing_cols}")
|
||||
|
||||
def get_stock_slice(self, ts_code: str) -> pl.DataFrame:
|
||||
"""获取单个股票的数据切面"""
|
||||
return self.df.filter(pl.col('ts_code') == ts_code).sort('trade_date')
|
||||
|
||||
def get_date_slice(self, trade_date: str) -> pl.DataFrame:
|
||||
"""获取单个日期的数据切面"""
|
||||
return self.df.filter(pl.col('trade_date') == trade_date)
|
||||
|
||||
def get_stock_list(self) -> List[str]:
|
||||
"""获取股票列表"""
|
||||
return self.df['ts_code'].unique().to_list()
|
||||
|
||||
def get_date_list(self) -> List[str]:
|
||||
"""获取日期列表"""
|
||||
return self.df['trade_date'].unique().to_list()
|
||||
|
||||
|
||||
class BaseOperator(ABC):
|
||||
"""算子基类"""
|
||||
|
||||
def __init__(self, config: OperatorConfig):
|
||||
self.config = config
|
||||
self.name = config.name
|
||||
self.required_columns = config.required_columns
|
||||
self.output_columns = config.output_columns
|
||||
|
||||
def validate_input(self, df: pl.DataFrame) -> bool:
|
||||
"""验证输入数据"""
|
||||
missing_cols = [col for col in self.required_columns if col not in df.columns]
|
||||
if missing_cols:
|
||||
logger.warning(f"算子 {self.name} 缺少必需列: {missing_cols}")
|
||||
return False
|
||||
return True
|
||||
|
||||
@abstractmethod
|
||||
def apply(self, df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""应用算子"""
|
||||
pass
|
||||
|
||||
def __call__(self, df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""调用算子"""
|
||||
if not self.validate_input(df):
|
||||
# 返回原始数据,添加NaN列
|
||||
for col in self.output_columns:
|
||||
df = df.with_columns(pl.lit(None).alias(col))
|
||||
return df
|
||||
|
||||
try:
|
||||
return self.apply(df, **kwargs)
|
||||
except Exception as e:
|
||||
logger.error(f"算子 {self.name} 应用失败: {e}")
|
||||
# 返回原始数据,添加NaN列
|
||||
for col in self.output_columns:
|
||||
df = df.with_columns(pl.lit(None).alias(col))
|
||||
return df
|
||||
|
||||
|
||||
class StockWiseOperator(BaseOperator):
|
||||
"""股票切面算子 - 按股票分组计算"""
|
||||
|
||||
def apply(self, df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""按股票分组应用算子"""
|
||||
stock_list = df['ts_code'].unique().to_list()
|
||||
results = []
|
||||
|
||||
for ts_code in stock_list:
|
||||
stock_df = df.filter(pl.col('ts_code') == ts_code).sort('trade_date')
|
||||
try:
|
||||
result_df = self.apply_stock(stock_df, **kwargs)
|
||||
results.append(result_df)
|
||||
except Exception as e:
|
||||
logger.error(f"股票 {ts_code} 算子应用失败: {e}")
|
||||
# 为失败的股票添加NaN列
|
||||
for col in self.output_columns:
|
||||
stock_df = stock_df.with_columns(pl.lit(None).alias(col))
|
||||
results.append(stock_df)
|
||||
|
||||
return pl.concat(results).sort(['ts_code', 'trade_date'])
|
||||
|
||||
@abstractmethod
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""应用到单个股票数据"""
|
||||
pass
|
||||
|
||||
|
||||
class DateWiseOperator(BaseOperator):
|
||||
"""日期切面算子 - 按日期分组计算"""
|
||||
|
||||
def apply(self, df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""按日期分组应用算子"""
|
||||
date_list = df['trade_date'].unique().to_list()
|
||||
results = []
|
||||
|
||||
for trade_date in date_list:
|
||||
date_df = df.filter(pl.col('trade_date') == trade_date)
|
||||
try:
|
||||
result_df = self.apply_date(date_df, **kwargs)
|
||||
results.append(result_df)
|
||||
except Exception as e:
|
||||
logger.error(f"日期 {trade_date} 算子应用失败: {e}")
|
||||
# 为失败的日期添加NaN列
|
||||
for col in self.output_columns:
|
||||
date_df = date_df.with_columns(pl.lit(None).alias(col))
|
||||
results.append(date_df)
|
||||
|
||||
return pl.concat(results).sort(['ts_code', 'trade_date'])
|
||||
|
||||
@abstractmethod
|
||||
def apply_date(self, date_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""应用到单个日期数据"""
|
||||
pass
|
||||
|
||||
|
||||
class RollingOperator(StockWiseOperator):
|
||||
"""滚动窗口算子基类"""
|
||||
|
||||
def __init__(self, config: OperatorConfig, window: int, min_periods: Optional[int] = None):
|
||||
super().__init__(config)
|
||||
self.window = window
|
||||
self.min_periods = min_periods or max(1, window // 2)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""应用滚动窗口计算"""
|
||||
return self.apply_rolling(stock_df, **kwargs)
|
||||
|
||||
@abstractmethod
|
||||
def apply_rolling(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""滚动窗口计算逻辑"""
|
||||
pass
|
||||
|
||||
|
||||
# 基础算子实现
|
||||
class ReturnOperator(RollingOperator):
|
||||
"""收益率算子"""
|
||||
|
||||
def __init__(self, periods: int = 1):
|
||||
config = OperatorConfig(
|
||||
name=f"return_{periods}",
|
||||
description=f"{periods}期收益率",
|
||||
required_columns=['close'],
|
||||
output_columns=[f'return_{periods}'],
|
||||
parameters={'periods': periods}
|
||||
)
|
||||
super().__init__(config, window=periods + 1)
|
||||
self.periods = periods
|
||||
|
||||
def apply_rolling(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算收益率"""
|
||||
return stock_df.with_columns(
|
||||
(pl.col('close') / pl.col('close').shift(self.periods) - 1).alias(f'return_{self.periods}')
|
||||
)
|
||||
|
||||
|
||||
class VolatilityOperator(RollingOperator):
|
||||
"""波动率算子"""
|
||||
|
||||
def __init__(self, window: int = 20):
|
||||
config = OperatorConfig(
|
||||
name=f"volatility_{window}",
|
||||
description=f"{window}日波动率",
|
||||
required_columns=['pct_chg'],
|
||||
output_columns=[f'volatility_{window}'],
|
||||
parameters={'window': window}
|
||||
)
|
||||
super().__init__(config, window=window)
|
||||
|
||||
def apply_rolling(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算波动率"""
|
||||
return stock_df.with_columns(
|
||||
pl.col('pct_chg').rolling_std(window=self.window).alias(f'volatility_{self.window}')
|
||||
)
|
||||
|
||||
|
||||
class MeanOperator(RollingOperator):
|
||||
"""均值算子"""
|
||||
|
||||
def __init__(self, column: str, window: int):
|
||||
config = OperatorConfig(
|
||||
name=f"mean_{column}_{window}",
|
||||
description=f"{column}的{window}日均值",
|
||||
required_columns=[column],
|
||||
output_columns=[f'mean_{column}_{window}'],
|
||||
parameters={'column': column, 'window': window}
|
||||
)
|
||||
super().__init__(config, window=window)
|
||||
self.column = column
|
||||
|
||||
def apply_rolling(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算均值"""
|
||||
return stock_df.with_columns(
|
||||
pl.col(self.column).rolling_mean(window=self.window).alias(f'mean_{self.column}_{self.window}')
|
||||
)
|
||||
|
||||
|
||||
class RankOperator(DateWiseOperator):
|
||||
"""排名算子"""
|
||||
|
||||
def __init__(self, column: str, ascending: bool = True):
|
||||
config = OperatorConfig(
|
||||
name=f"rank_{column}",
|
||||
description=f"{column}的排名",
|
||||
required_columns=[column],
|
||||
output_columns=[f'rank_{column}'],
|
||||
parameters={'column': column, 'ascending': ascending}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.column = column
|
||||
self.ascending = ascending
|
||||
|
||||
312
main/factor/polars_chip_factors.py
Normal file
312
main/factor/polars_chip_factors.py
Normal file
@@ -0,0 +1,312 @@
|
||||
"""
|
||||
筹码分布因子 - 使用Polars实现
|
||||
包含筹码集中度、分布偏度、浮筹比例等相关因子计算
|
||||
"""
|
||||
|
||||
import polars as pl
|
||||
import numpy as np
|
||||
from typing import Dict, List, Optional, Any
|
||||
from operator_framework import StockWiseOperator, OperatorConfig
|
||||
|
||||
|
||||
class ChipConcentrationOperator(StockWiseOperator):
|
||||
"""筹码集中度算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="chip_concentration",
|
||||
description="筹码集中度",
|
||||
required_columns=['cost_95pct', 'cost_5pct', 'close'],
|
||||
output_columns=['chip_concentration_range'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算筹码集中度"""
|
||||
epsilon = 1e-8
|
||||
|
||||
# 计算筹码集中度范围,相对于当前价格标准化
|
||||
concentration_range = (pl.col('cost_95pct') - pl.col('cost_5pct')) / (pl.col('close') + epsilon)
|
||||
|
||||
return stock_df.with_columns(concentration_range.alias('chip_concentration_range'))
|
||||
|
||||
|
||||
class ChipSkewnessOperator(StockWiseOperator):
|
||||
"""筹码分布偏度算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="chip_skewness",
|
||||
description="筹码分布偏度",
|
||||
required_columns=['weight_avg', 'cost_50pct'],
|
||||
output_columns=['chip_skewness'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算筹码分布偏度"""
|
||||
epsilon = 1e-8
|
||||
|
||||
# 计算偏度:(加权平均成本 - 中位数成本) / 中位数成本
|
||||
skewness = (pl.col('weight_avg') - pl.col('cost_50pct')) / (pl.col('cost_50pct') + epsilon)
|
||||
|
||||
return stock_df.with_columns(skewness.alias('chip_skewness'))
|
||||
|
||||
|
||||
class FloatingChipProxyOperator(StockWiseOperator):
|
||||
"""浮筹比例代理算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="floating_chip_proxy",
|
||||
description="浮筹比例代理",
|
||||
required_columns=['close', 'cost_15pct', 'winner_rate'],
|
||||
output_columns=['floating_chip_proxy'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算浮筹比例代理"""
|
||||
# 计算价格与15%成本线的距离
|
||||
price_dist_cost15 = (pl.col('close') - pl.col('cost_15pct')) / pl.col('close')
|
||||
|
||||
# 计算浮筹代理:获利盘比例 * max(0, 价格距离)
|
||||
floating_proxy = pl.col('winner_rate') * pl.max_horizontal(0, price_dist_cost15)
|
||||
|
||||
return stock_df.with_columns(floating_proxy.alias('floating_chip_proxy'))
|
||||
|
||||
|
||||
class CostSupportChangeOperator(StockWiseOperator):
|
||||
"""成本支撑强度变化算子"""
|
||||
|
||||
def __init__(self, n: int = 1):
|
||||
config = OperatorConfig(
|
||||
name=f"cost_support_change_{n}",
|
||||
description=f"{n}日成本支撑强度变化",
|
||||
required_columns=['cost_15pct'],
|
||||
output_columns=[f'cost_support_15pct_change_{n}'],
|
||||
parameters={'n': n}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.n = n
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算成本支撑强度变化"""
|
||||
# 计算百分比变化
|
||||
pct_change = pl.col('cost_15pct').pct_change(self.n) * 100
|
||||
|
||||
return stock_df.with_columns(pct_change.alias(f'cost_support_15pct_change_{self.n}'))
|
||||
|
||||
|
||||
class WinnerPriceZoneOperator(StockWiseOperator):
|
||||
"""获利盘压力/支撑区分类算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="winner_price_zone",
|
||||
description="获利盘压力/支撑区分类",
|
||||
required_columns=['close', 'cost_85pct', 'cost_15pct', 'cost_50pct', 'winner_rate'],
|
||||
output_columns=['cat_winner_price_zone'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算获利盘压力/支撑区分类"""
|
||||
# 定义条件
|
||||
conditions = [
|
||||
# 1: 高风险区 (高位 & 高获利盘)
|
||||
(pl.col('close') > pl.col('cost_85pct')) & (pl.col('winner_rate') > 0.8),
|
||||
# 2: 低潜力区 (低位 & 低获利盘)
|
||||
(pl.col('close') < pl.col('cost_15pct')) & (pl.col('winner_rate') < 0.2),
|
||||
# 3: 中上获利区 (中高位 & 多数获利)
|
||||
(pl.col('close') > pl.col('cost_50pct')) & (pl.col('winner_rate') > 0.5),
|
||||
# 4: 中下亏损区 (中低位 & 多数亏损)
|
||||
(pl.col('close') < pl.col('cost_50pct')) & (pl.col('winner_rate') < 0.5),
|
||||
]
|
||||
|
||||
choices = [1, 2, 3, 4]
|
||||
|
||||
# 使用select函数进行分类
|
||||
zone_classification = pl.select(
|
||||
conditions=conditions,
|
||||
choices=choices,
|
||||
default=0 # 0: 其他情况
|
||||
)
|
||||
|
||||
return stock_df.with_columns(zone_classification.alias('cat_winner_price_zone'))
|
||||
|
||||
|
||||
class FlowChipConsistencyOperator(StockWiseOperator):
|
||||
"""主力行为与筹码结构一致性算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="flow_chip_consistency",
|
||||
description="主力行为与筹码结构一致性",
|
||||
required_columns=['buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol', 'sell_elg_vol',
|
||||
'close', 'cost_15pct', 'cost_50pct'],
|
||||
output_columns=['flow_chip_consistency'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算主力行为与筹码结构一致性"""
|
||||
# 计算大单净买入量
|
||||
lg_elg_net_buy_vol = (
|
||||
pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
|
||||
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')
|
||||
)
|
||||
|
||||
# 判断价格是否接近下方筹码密集区
|
||||
price_near_low_support = (
|
||||
(pl.col('close') > pl.col('cost_15pct')) &
|
||||
(pl.col('close') < pl.col('cost_50pct'))
|
||||
)
|
||||
|
||||
# 计算一致性:主力净买入 * 价格位置指示器
|
||||
consistency = lg_elg_net_buy_vol * price_near_low_support.cast(int)
|
||||
|
||||
return stock_df.with_columns(consistency.alias('flow_chip_consistency'))
|
||||
|
||||
|
||||
class ProfitTakingVsAbsorptionOperator(StockWiseOperator):
|
||||
"""获利了结压力/承接盘强度算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="profit_taking_vs_absorb",
|
||||
description="获利了结压力vs承接盘强度",
|
||||
required_columns=['buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol', 'sell_elg_vol',
|
||||
'winner_rate'],
|
||||
output_columns=['profit_taking_vs_absorb'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算获利了结压力vs承接盘强度"""
|
||||
# 计算大单净买入量
|
||||
lg_elg_net_buy_vol = (
|
||||
pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
|
||||
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')
|
||||
)
|
||||
|
||||
# 判断高获利盘
|
||||
high_winner_rate_flag = (pl.col('winner_rate') > 0.7).cast(int)
|
||||
|
||||
# 计算因子:主力净买入 * 高获利盘指示器
|
||||
# 正值表示高获利盘下主力仍在买入(承接),负值表示主力在卖出(了结)
|
||||
factor = lg_elg_net_buy_vol * high_winner_rate_flag
|
||||
|
||||
return stock_df.with_columns(factor.alias('profit_taking_vs_absorb'))
|
||||
|
||||
|
||||
class ChipConcentrationChangeOperator(StockWiseOperator):
|
||||
"""筹码集中度变化算子"""
|
||||
|
||||
def __init__(self, n: int = 20):
|
||||
config = OperatorConfig(
|
||||
name=f"chip_conc_std_{n}",
|
||||
description=f"{n}日筹码集中度变化",
|
||||
required_columns=['cost_85pct', 'cost_15pct', 'weight_avg'],
|
||||
output_columns=[f'chip_conc_std_{n}'],
|
||||
parameters={'n': n}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.n = n
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算筹码集中度变化"""
|
||||
epsilon = 1e-8
|
||||
|
||||
# 计算成本区间标准化值
|
||||
cost_range_norm = (pl.col('cost_85pct') - pl.col('cost_15pct')) / (pl.col('weight_avg') + epsilon)
|
||||
|
||||
# 计算滚动标准差
|
||||
conc_std = cost_range_norm.rolling_std(window=self.n)
|
||||
|
||||
return stock_df.with_columns(conc_std.alias(f'chip_conc_std_{self.n}'))
|
||||
|
||||
|
||||
class CostBreakoutConfirmationOperator(StockWiseOperator):
|
||||
"""成本突破确认算子"""
|
||||
|
||||
def __init__(self, m: int = 5):
|
||||
config = OperatorConfig(
|
||||
name=f"cost_break_confirm_cnt_{m}",
|
||||
description=f"{m}日成本突破确认",
|
||||
required_columns=['close', 'cost_85pct', 'cost_15pct',
|
||||
'buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol', 'sell_elg_vol'],
|
||||
output_columns=[f'cost_break_confirm_cnt_{m}'],
|
||||
parameters={'m': m}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.m = m
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算成本突破确认"""
|
||||
# 获取前一日的成本位
|
||||
prev_cost_85 = pl.col('cost_85pct').shift(1)
|
||||
prev_cost_15 = pl.col('cost_15pct').shift(1)
|
||||
|
||||
# 判断突破
|
||||
break_up = pl.col('close') > prev_cost_85
|
||||
break_down = pl.col('close') < prev_cost_15
|
||||
|
||||
# 计算大单净流
|
||||
net_lg_flow_vol = (
|
||||
pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
|
||||
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')
|
||||
)
|
||||
|
||||
# 判断确认信号
|
||||
confirm_up = break_up & (net_lg_flow_vol > 0)
|
||||
confirm_down = break_down & (net_lg_flow_vol < 0)
|
||||
|
||||
# 计算净确认信号
|
||||
net_confirm = confirm_up.cast(int) - confirm_down.cast(int)
|
||||
|
||||
# 计算m日累计
|
||||
confirm_cnt = net_confirm.rolling_sum(window=self.m)
|
||||
|
||||
return stock_df.with_columns(confirm_cnt.alias(f'cost_break_confirm_cnt_{self.m}'))
|
||||
|
||||
|
||||
# 筹码分布因子集合
|
||||
CHIP_DISTRIBUTION_OPERATORS = [
|
||||
ChipConcentrationOperator(),
|
||||
ChipSkewnessOperator(),
|
||||
FloatingChipProxyOperator(),
|
||||
CostSupportChangeOperator(),
|
||||
WinnerPriceZoneOperator(),
|
||||
FlowChipConsistencyOperator(),
|
||||
ProfitTakingVsAbsorptionOperator(),
|
||||
ChipConcentrationChangeOperator(),
|
||||
CostBreakoutConfirmationOperator(),
|
||||
]
|
||||
|
||||
|
||||
def apply_chip_distribution_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||||
"""
|
||||
应用所有筹码分布因子
|
||||
|
||||
Args:
|
||||
df: 输入的Polars DataFrame
|
||||
operators: 要应用的算子列表,如果为None则使用默认列表
|
||||
|
||||
Returns:
|
||||
添加了筹码分布因子的DataFrame
|
||||
"""
|
||||
if operators is None:
|
||||
operators = CHIP_DISTRIBUTION_OPERATORS
|
||||
|
||||
result_df = df
|
||||
for operator in operators:
|
||||
result_df = operator(result_df)
|
||||
|
||||
return result_df
|
||||
648
main/factor/polars_complex_factors.py
Normal file
648
main/factor/polars_complex_factors.py
Normal file
@@ -0,0 +1,648 @@
|
||||
"""
|
||||
复杂组合因子 - 使用Polars实现
|
||||
包含复杂的组合因子和高级因子计算
|
||||
"""
|
||||
|
||||
import polars as pl
|
||||
import numpy as np
|
||||
from typing import Dict, List, Optional, Any
|
||||
from operator_framework import StockWiseOperator, DateWiseOperator, OperatorConfig
|
||||
|
||||
|
||||
# 时间序列因子
|
||||
class LargeFlowMomentumCorrelationOperator(StockWiseOperator):
|
||||
"""大单资金流与价格动量相关性算子"""
|
||||
|
||||
def __init__(self, n: int = 20, m: int = 60):
|
||||
config = OperatorConfig(
|
||||
name=f"lg_flow_mom_corr_{n}_{m}",
|
||||
description=f"{n}日大单资金流与{m}日价格动量相关性",
|
||||
required_columns=['buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol', 'sell_elg_vol',
|
||||
'close', 'vol'],
|
||||
output_columns=[f'lg_flow_mom_corr_{n}_{m}'],
|
||||
parameters={'n': n, 'm': m}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.n = n
|
||||
self.m = m
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算大单资金流与价格动量相关性"""
|
||||
# 计算大单净额
|
||||
net_lg_flow_val = (
|
||||
(pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
|
||||
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')) * pl.col('close')
|
||||
)
|
||||
|
||||
# 计算滚动净大单流
|
||||
rolling_net_lg_flow = net_lg_flow_val.rolling_sum(window=self.n)
|
||||
|
||||
# 计算价格动量
|
||||
price_mom = pl.col('close').pct_change(self.n)
|
||||
|
||||
# 计算相关性
|
||||
correlation = rolling_net_lg_flow.rolling_corr(price_mom, window=self.m)
|
||||
|
||||
return stock_df.with_columns(
|
||||
correlation.alias(f'lg_flow_mom_corr_{self.n}_{self.m}')
|
||||
)
|
||||
|
||||
|
||||
class LargeBuyConsolidationOperator(StockWiseOperator):
|
||||
"""大单买入盘整期算子"""
|
||||
|
||||
def __init__(self, n: int = 20, vol_quantile: float = 0.2):
|
||||
config = OperatorConfig(
|
||||
name=f"lg_buy_consolidation_{n}",
|
||||
description=f"{n}日大单买入盘整期",
|
||||
required_columns=['close', 'buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol',
|
||||
'sell_elg_vol', 'vol'],
|
||||
output_columns=[f'lg_buy_consolidation_{n}'],
|
||||
parameters={'n': n, 'vol_quantile': vol_quantile}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.n = n
|
||||
self.vol_quantile = vol_quantile
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算大单买入盘整期"""
|
||||
epsilon = 1e-8
|
||||
|
||||
# 计算收盘价滚动标准差
|
||||
rolling_std = pl.col('close').rolling_std(window=self.n)
|
||||
|
||||
# 计算大单净流比率
|
||||
net_lg_flow_ratio = (
|
||||
(pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
|
||||
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')) /
|
||||
(pl.col('vol') + epsilon)
|
||||
)
|
||||
|
||||
# 计算滚动均值
|
||||
rolling_mean_ratio = net_lg_flow_ratio.rolling_mean(window=self.n)
|
||||
|
||||
return stock_df.with_columns(
|
||||
rolling_mean_ratio.alias(f'lg_buy_consolidation_{self.n}')
|
||||
)
|
||||
|
||||
|
||||
class IntradayLargeFlowCorrelationOperator(StockWiseOperator):
|
||||
"""日内趋势与大单流相关性算子"""
|
||||
|
||||
def __init__(self, n: int = 20):
|
||||
config = OperatorConfig(
|
||||
name=f"intraday_lg_flow_corr_{n}",
|
||||
description=f"{n}日日内趋势与大单流相关性",
|
||||
required_columns=['high', 'low', 'close', 'buy_lg_vol', 'buy_elg_vol',
|
||||
'sell_lg_vol', 'sell_elg_vol'],
|
||||
output_columns=[f'intraday_lg_flow_corr_{n}'],
|
||||
parameters={'n': n}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.n = n
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算日内趋势与大单流相关性"""
|
||||
# 这是一个复杂的因子,简化处理
|
||||
# 实际实现需要更复杂的日内数据
|
||||
placeholder = pl.lit(None).cast(float)
|
||||
|
||||
return stock_df.with_columns(
|
||||
placeholder.alias(f'intraday_lg_flow_corr_{self.n}')
|
||||
)
|
||||
|
||||
|
||||
class ProfitPressureOperator(StockWiseOperator):
|
||||
"""获利压力指数算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="profit_pressure",
|
||||
description="获利压力指数",
|
||||
required_columns=['close', 'cost_85pct', 'cost_95pct', 'winner_rate'],
|
||||
output_columns=['profit_pressure'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算获利压力指数"""
|
||||
epsilon = 1e-8
|
||||
|
||||
# 计算盈利幅度
|
||||
profit_margin_85 = (pl.col('close') / (pl.col('cost_85pct') + epsilon)) - 1
|
||||
profit_margin_95 = (pl.col('close') / (pl.col('cost_95pct') + epsilon)) - 1
|
||||
|
||||
# 计算压力指数
|
||||
pressure = pl.col('winner_rate') * 0.5 * (profit_margin_85 + profit_margin_95)
|
||||
|
||||
return stock_df.with_columns(pressure.alias('profit_pressure'))
|
||||
|
||||
|
||||
class UnderwaterResistanceOperator(StockWiseOperator):
|
||||
"""套牢盘阻力算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="underwater_resistance",
|
||||
description="套牢盘阻力",
|
||||
required_columns=['close', 'winner_rate', 'cost_15pct'],
|
||||
output_columns=['underwater_resistance'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算套牢盘阻力"""
|
||||
epsilon = 1e-8
|
||||
|
||||
# 计算套牢比例
|
||||
underwater_ratio = 1.0 - pl.col('winner_rate')
|
||||
|
||||
# 计算与成本的距离
|
||||
dist_to_cost_15 = pl.max_horizontal(0, pl.col('cost_15pct') - pl.col('close')) / (pl.col('close') + epsilon)
|
||||
|
||||
# 计算阻力
|
||||
resistance = underwater_ratio * dist_to_cost_15
|
||||
|
||||
return stock_df.with_columns(resistance.alias('underwater_resistance'))
|
||||
|
||||
|
||||
class ProfitDecayOperator(StockWiseOperator):
|
||||
"""盈利预期衰减算子"""
|
||||
|
||||
def __init__(self, n: int = 20):
|
||||
config = OperatorConfig(
|
||||
name=f"profit_decay_{n}",
|
||||
description=f"{n}日盈利预期衰减",
|
||||
required_columns=['close', 'winner_rate'],
|
||||
output_columns=[f'profit_decay_{n}'],
|
||||
parameters={'n': n}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.n = n
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算盈利预期衰减"""
|
||||
# 计算n日收益率
|
||||
ret_n = pl.col('close').pct_change(self.n)
|
||||
|
||||
# 计算winner_rate变化
|
||||
winner_rate_change = pl.col('winner_rate').diff(self.n)
|
||||
|
||||
# 计算衰减因子
|
||||
decay = ret_n / (winner_rate_change + 1e-8)
|
||||
|
||||
return stock_df.with_columns(decay.alias(f'profit_decay_{self.n}'))
|
||||
|
||||
|
||||
class PullbackStrongOperator(StockWiseOperator):
|
||||
"""强势股回调深度算子"""
|
||||
|
||||
def __init__(self, n: int = 20, m: int = 20, gain_thresh: float = 0.2):
|
||||
config = OperatorConfig(
|
||||
name=f"pullback_strong_{n}_{m}",
|
||||
description=f"{n}日{m}期强势股回调深度",
|
||||
required_columns=['high', 'close'],
|
||||
output_columns=[f'pullback_strong_{n}_{m}'],
|
||||
parameters={'n': n, 'm': m, 'gain_thresh': gain_thresh}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.n = n
|
||||
self.m = m
|
||||
self.gain_thresh = gain_thresh
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算强势股回调深度"""
|
||||
# 计算n日最高价
|
||||
high_n = pl.col('high').rolling_max(window=self.n)
|
||||
|
||||
# 计算回调深度
|
||||
pullback_depth = (high_n - pl.col('close')) / high_n
|
||||
|
||||
# 计算近期涨幅
|
||||
recent_gain = (pl.col('close') / pl.col('close').shift(self.m)) - 1
|
||||
|
||||
# 计算回调因子
|
||||
pullback_factor = pullback_depth / (recent_gain + 1e-8)
|
||||
|
||||
return stock_df.with_columns(pullback_factor.alias(f'pullback_strong_{self.n}_{self.m}'))
|
||||
|
||||
|
||||
class HurstExponentFlowOperator(StockWiseOperator):
|
||||
"""资金流Hurst指数算子"""
|
||||
|
||||
def __init__(self, n: int = 60, flow_col: str = 'net_mf_vol'):
|
||||
config = OperatorConfig(
|
||||
name=f"hurst_{flow_col}_{n}",
|
||||
description=f"{n}日{flow_col}Hurst指数",
|
||||
required_columns=[flow_col],
|
||||
output_columns=[f'hurst_{flow_col}_{n}'],
|
||||
parameters={'n': n, 'flow_col': flow_col}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.n = n
|
||||
self.flow_col = flow_col
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算Hurst指数"""
|
||||
# Hurst指数计算复杂,这里使用占位符
|
||||
# 实际实现需要专门的Hurst指数计算库
|
||||
placeholder = pl.lit(None).cast(float)
|
||||
|
||||
return stock_df.with_columns(
|
||||
placeholder.alias(f'hurst_{self.flow_col}_{self.n}')
|
||||
)
|
||||
|
||||
|
||||
class VolWeightedHistoricalPositionOperator(StockWiseOperator):
|
||||
"""成交量加权历史位置算子"""
|
||||
|
||||
def __init__(self, n: int = 20):
|
||||
config = OperatorConfig(
|
||||
name=f"vol_wgt_hist_pos_{n}",
|
||||
description=f"{n}日成交量加权历史位置",
|
||||
required_columns=['close', 'his_high', 'his_low', 'vol'],
|
||||
output_columns=[f'vol_wgt_hist_pos_{n}'],
|
||||
parameters={'n': n}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.n = n
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算成交量加权历史位置"""
|
||||
# 计算历史位置
|
||||
hist_pos = (pl.col('close') - pl.col('his_low')) / (pl.col('his_high') - pl.col('his_low'))
|
||||
hist_pos = hist_pos.clip(0, 1)
|
||||
|
||||
# 计算成交量相对强度
|
||||
rolling_mean_vol = pl.col('vol').rolling_mean(window=self.n)
|
||||
vol_rel_strength = pl.col('vol') / rolling_mean_vol
|
||||
|
||||
# 计算加权位置
|
||||
weighted_pos = hist_pos * vol_rel_strength
|
||||
|
||||
return stock_df.with_columns(weighted_pos.alias(f'vol_wgt_hist_pos_{self.n}'))
|
||||
|
||||
|
||||
# 横截面因子
|
||||
class CrossSectionalRankOperator(DateWiseOperator):
|
||||
"""横截面排名算子"""
|
||||
|
||||
def __init__(self, column: str, ascending: bool = True):
|
||||
config = OperatorConfig(
|
||||
name=f"cs_rank_{column}",
|
||||
description=f"{column}横截面排名",
|
||||
required_columns=[column],
|
||||
output_columns=[f'cs_rank_{column}'],
|
||||
parameters={'column': column, 'ascending': ascending}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.column = column
|
||||
self.ascending = ascending
|
||||
|
||||
def apply_date(self, date_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算横截面排名"""
|
||||
# 计算排名
|
||||
rank_col = pl.col(self.column).rank(method='dense', descending=not self.ascending)
|
||||
|
||||
# 转换为百分比排名
|
||||
pct_rank = rank_col / rank_col.max()
|
||||
|
||||
return date_df.with_columns(pct_rank.alias(f'cs_rank_{self.column}'))
|
||||
|
||||
|
||||
class CrossSectionalNetLargeFlowRankOperator(DateWiseOperator):
|
||||
"""横截面大单净额排名算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="cs_rank_net_lg_flow_val",
|
||||
description="横截面大单净额排名",
|
||||
required_columns=['buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol', 'sell_elg_vol', 'close'],
|
||||
output_columns=['cs_rank_net_lg_flow_val'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_date(self, date_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算横截面大单净额排名"""
|
||||
# 计算大单净额
|
||||
net_lg_flow_val = (
|
||||
(pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
|
||||
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')) * pl.col('close')
|
||||
)
|
||||
|
||||
# 计算排名
|
||||
rank_col = net_lg_flow_val.rank(method='dense', descending=True)
|
||||
pct_rank = rank_col / rank_col.max()
|
||||
|
||||
return date_df.with_columns(pct_rank.alias('cs_rank_net_lg_flow_val'))
|
||||
|
||||
|
||||
class CrossSectionalFlowDivergenceRankOperator(DateWiseOperator):
|
||||
"""横截面流向背离度排名算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="cs_rank_flow_divergence",
|
||||
description="横截面流向背离度排名",
|
||||
required_columns=['buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'buy_elg_vol',
|
||||
'sell_lg_vol', 'sell_elg_vol', 'vol'],
|
||||
output_columns=['cs_rank_flow_divergence'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_date(self, date_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算横截面流向背离度排名"""
|
||||
epsilon = 1e-8
|
||||
|
||||
# 计算大单比率
|
||||
lg_ratio = (
|
||||
(pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
|
||||
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')) /
|
||||
(pl.col('vol') + epsilon)
|
||||
)
|
||||
|
||||
# 计算小单比率
|
||||
sm_ratio = (pl.col('buy_sm_vol') - pl.col('sell_sm_vol')) / (pl.col('vol') + epsilon)
|
||||
|
||||
# 计算背离度
|
||||
divergence = lg_ratio - sm_ratio
|
||||
|
||||
# 计算排名
|
||||
rank_col = divergence.rank(method='dense', descending=True)
|
||||
pct_rank = rank_col / rank_col.max()
|
||||
|
||||
return date_df.with_columns(pct_rank.alias('cs_rank_flow_divergence'))
|
||||
|
||||
|
||||
class CrossSectionalRelativeProfitMarginRankOperator(DateWiseOperator):
|
||||
"""横截面相对盈利幅度排名算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="cs_rank_rel_profit_margin",
|
||||
description="横截面相对盈利幅度排名",
|
||||
required_columns=['close', 'weight_avg'],
|
||||
output_columns=['cs_rank_rel_profit_margin'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_date(self, date_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算横截面相对盈利幅度排名"""
|
||||
# 计算盈利幅度
|
||||
profit_margin = (pl.col('close') - pl.col('weight_avg')) / pl.col('close')
|
||||
|
||||
# 计算排名
|
||||
rank_col = profit_margin.rank(method='dense', descending=True)
|
||||
pct_rank = rank_col / rank_col.max()
|
||||
|
||||
return date_df.with_columns(pct_rank.alias('cs_rank_rel_profit_margin'))
|
||||
|
||||
|
||||
class CrossSectionalCostBreadthRankOperator(DateWiseOperator):
|
||||
"""横截面成本分布宽度排名算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="cs_rank_cost_breadth",
|
||||
description="横截面成本分布宽度排名",
|
||||
required_columns=['cost_85pct', 'cost_15pct', 'weight_avg'],
|
||||
output_columns=['cs_rank_cost_breadth'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_date(self, date_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算横截面成本分布宽度排名"""
|
||||
epsilon = 1e-8
|
||||
|
||||
# 计算成本宽度
|
||||
cost_breadth = (pl.col('cost_85pct') - pl.col('cost_15pct')) / (pl.col('weight_avg') + epsilon)
|
||||
|
||||
# 计算排名
|
||||
rank_col = cost_breadth.rank(method='dense', descending=True)
|
||||
pct_rank = rank_col / rank_col.max()
|
||||
|
||||
return date_df.with_columns(pct_rank.alias('cs_rank_cost_breadth'))
|
||||
|
||||
|
||||
class CrossSectionalWinnerRateRankOperator(DateWiseOperator):
|
||||
"""横截面获利盘比例排名算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="cs_rank_winner_rate",
|
||||
description="横截面获利盘比例排名",
|
||||
required_columns=['winner_rate'],
|
||||
output_columns=['cs_rank_winner_rate'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_date(self, date_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算横截面获利盘比例排名"""
|
||||
# 计算排名
|
||||
rank_col = pl.col('winner_rate').rank(method='dense', descending=True)
|
||||
pct_rank = rank_col / rank_col.max()
|
||||
|
||||
return date_df.with_columns(pct_rank.alias('cs_rank_winner_rate'))
|
||||
|
||||
|
||||
class CrossSectionalVolumeRatioRankOperator(DateWiseOperator):
|
||||
"""横截面量比排名算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="cs_rank_volume_ratio",
|
||||
description="横截面量比排名",
|
||||
required_columns=['volume_ratio'],
|
||||
output_columns=['cs_rank_volume_ratio'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_date(self, date_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算横截面量比排名"""
|
||||
# 计算排名
|
||||
rank_col = pl.col('volume_ratio').rank(method='dense', descending=True)
|
||||
pct_rank = rank_col / rank_col.max()
|
||||
|
||||
return date_df.with_columns(pct_rank.alias('cs_rank_volume_ratio'))
|
||||
|
||||
|
||||
# 复杂组合因子
|
||||
class ComplexFactorDEAPOperator(StockWiseOperator):
|
||||
"""DEAP复杂因子算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="complex_factor_deap_1",
|
||||
description="DEAP复杂组合因子",
|
||||
required_columns=['pullback_strong_20_20', 'log_close', 'industry_return_5',
|
||||
'vol_adj_roc_20', 'vol_drop_profit_cnt_5', 'nonlinear_mv_volume',
|
||||
'alpha_007', 'lg_buy_consolidation_20', 'net_mf_vol', 'std_return_5',
|
||||
'arbr', 'industry_act_factor5', 'industry_act_factor1', 'low_cost_dev',
|
||||
'mv_weighted_turnover', 'act_factor4', 'vol', 'lg_elg_buy_prop',
|
||||
'intraday_lg_flow_corr_20'],
|
||||
output_columns=['complex_factor_deap_1'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算DEAP复杂因子"""
|
||||
try:
|
||||
# 安全除法函数
|
||||
def safe_divide(a, b, default_val=0):
|
||||
return pl.when(b.abs() > 1e-8).then(a / b).otherwise(default_val)
|
||||
|
||||
# 计算组件D
|
||||
d_term1_div = safe_divide(pl.col('log_close'), pl.col('industry_return_5'))
|
||||
d_term1 = pl.col('pullback_strong_20_20') * d_term1_div
|
||||
|
||||
d_term2_sub = pl.col('nonlinear_mv_volume') - pl.col('alpha_007')
|
||||
d_term2_add = pl.col('vol_adj_roc_20') + pl.col('vol_drop_profit_cnt_5')
|
||||
d_term2 = safe_divide(d_term2_add, d_term2_sub)
|
||||
|
||||
temp_d = d_term1 - d_term2
|
||||
|
||||
# 计算组件A
|
||||
a_term1 = temp_d * pl.col('lg_buy_consolidation_20')
|
||||
a_term2 = a_term1 + pl.col('lg_buy_consolidation_20')
|
||||
temp_a = a_term2 + pl.col('pullback_strong_20_20')
|
||||
|
||||
# 计算组件F
|
||||
f_term1 = pl.col('net_mf_vol') + pl.col('std_return_5')
|
||||
f_term2 = pl.col('arbr') - pl.col('industry_act_factor5')
|
||||
temp_f = f_term1 * f_term2
|
||||
|
||||
# 计算组件H
|
||||
h_term1 = pl.col('industry_act_factor1') + pl.col('low_cost_dev')
|
||||
h_term2 = pl.col('mv_weighted_turnover') * pl.col('act_factor4')
|
||||
temp_h = h_term1 + h_term2
|
||||
|
||||
# 计算组件B
|
||||
b_term1 = temp_f + pl.col('vol')
|
||||
b_term2 = b_term1 + temp_h
|
||||
temp_b = safe_divide(b_term2, pl.col('lg_elg_buy_prop'))
|
||||
|
||||
# 计算组件C
|
||||
c_term1 = safe_divide(
|
||||
pl.col('intraday_lg_flow_corr_20').fill_null(0),
|
||||
pl.col('lg_elg_buy_prop')
|
||||
)
|
||||
temp_c = safe_divide(c_term1, pl.col('lg_elg_buy_prop'))
|
||||
|
||||
# 计算最终因子
|
||||
final_term1 = safe_divide(temp_a, temp_b)
|
||||
complex_factor = final_term1 - temp_c
|
||||
|
||||
return stock_df.with_columns(complex_factor.alias('complex_factor_deap_1'))
|
||||
|
||||
except Exception as e:
|
||||
# 如果计算失败,填充NaN
|
||||
print(f"Error calculating complex_factor_deap_1: {e}")
|
||||
return stock_df.with_columns(pl.lit(None).cast(float).alias('complex_factor_deap_1'))
|
||||
|
||||
|
||||
# 因子集合
|
||||
COMPLEX_OPERATORS = [
|
||||
LargeFlowMomentumCorrelationOperator(),
|
||||
LargeBuyConsolidationOperator(),
|
||||
IntradayLargeFlowCorrelationOperator(),
|
||||
ProfitPressureOperator(),
|
||||
UnderwaterResistanceOperator(),
|
||||
ProfitDecayOperator(),
|
||||
PullbackStrongOperator(),
|
||||
HurstExponentFlowOperator(),
|
||||
VolWeightedHistoricalPositionOperator(),
|
||||
CrossSectionalRankOperator('close'),
|
||||
CrossSectionalNetLargeFlowRankOperator(),
|
||||
CrossSectionalFlowDivergenceRankOperator(),
|
||||
CrossSectionalRelativeProfitMarginRankOperator(),
|
||||
CrossSectionalCostBreadthRankOperator(),
|
||||
CrossSectionalWinnerRateRankOperator(),
|
||||
CrossSectionalVolumeRatioRankOperator(),
|
||||
ComplexFactorDEAPOperator(),
|
||||
]
|
||||
|
||||
|
||||
def apply_complex_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||||
"""
|
||||
应用所有复杂组合因子
|
||||
|
||||
Args:
|
||||
df: 输入的Polars DataFrame
|
||||
operators: 要应用的算子列表,如果为None则使用默认列表
|
||||
|
||||
Returns:
|
||||
添加了复杂组合因子的DataFrame
|
||||
"""
|
||||
if operators is None:
|
||||
operators = COMPLEX_OPERATORS
|
||||
|
||||
result_df = df
|
||||
for operator in operators:
|
||||
result_df = operator(result_df)
|
||||
|
||||
return result_df
|
||||
|
||||
|
||||
# 主应用函数
|
||||
def apply_all_factors(df: pl.DataFrame,
|
||||
factor_categories: List[str] = None) -> pl.DataFrame:
|
||||
"""
|
||||
应用所有类别的因子
|
||||
|
||||
Args:
|
||||
df: 输入的Polars DataFrame
|
||||
factor_categories: 要应用的因子类别列表,如果为None则应用所有类别
|
||||
|
||||
Returns:
|
||||
添加了所有因子的DataFrame
|
||||
"""
|
||||
if factor_categories is None:
|
||||
factor_categories = ['money_flow', 'chip', 'volatility', 'volume',
|
||||
'technical', 'sentiment', 'momentum', 'complex']
|
||||
|
||||
result_df = df
|
||||
|
||||
# 导入所有因子模块
|
||||
from polars_money_flow_factors import apply_money_flow_factors
|
||||
from polars_chip_factors import apply_chip_distribution_factors
|
||||
from polars_volatility_factors import apply_volatility_factors
|
||||
from polars_volume_factors import apply_volume_factors
|
||||
from polars_technical_factors import apply_technical_factors
|
||||
from polars_sentiment_factors import apply_sentiment_factors
|
||||
from polars_momentum_factors import apply_momentum_factors
|
||||
|
||||
# 应用各类因子
|
||||
if 'money_flow' in factor_categories:
|
||||
result_df = apply_money_flow_factors(result_df)
|
||||
|
||||
if 'chip' in factor_categories:
|
||||
result_df = apply_chip_distribution_factors(result_df)
|
||||
|
||||
if 'volatility' in factor_categories:
|
||||
result_df = apply_volatility_factors(result_df)
|
||||
|
||||
if 'volume' in factor_categories:
|
||||
result_df = apply_volume_factors(result_df)
|
||||
|
||||
if 'technical' in factor_categories:
|
||||
result_df = apply_technical_factors(result_df)
|
||||
|
||||
if 'sentiment' in factor_categories:
|
||||
result_df = apply_sentiment_factors(result_df)
|
||||
|
||||
if 'momentum' in factor_categories:
|
||||
result_df = apply_momentum_factors(result_df)
|
||||
|
||||
if 'complex' in factor_categories:
|
||||
result_df = apply_complex_factors(result_df)
|
||||
|
||||
return result_df
|
||||
237
main/factor/polars_factors.py
Normal file
237
main/factor/polars_factors.py
Normal file
@@ -0,0 +1,237 @@
|
||||
"""
|
||||
Polars因子主入口 - 整合所有Polars-based因子计算
|
||||
提供统一的接口来应用所有类别的因子
|
||||
"""
|
||||
|
||||
import polars as pl
|
||||
from typing import Dict, List, Optional, Any
|
||||
import logging
|
||||
|
||||
# 配置日志
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# 因子类别映射
|
||||
FACTOR_CATEGORIES = {
|
||||
'money_flow': '资金流因子',
|
||||
'chip': '筹码分布因子',
|
||||
'volatility': '波动率因子',
|
||||
'volume': '成交量因子',
|
||||
'technical': '技术指标因子',
|
||||
'sentiment': '情绪因子',
|
||||
'momentum': '动量因子',
|
||||
'complex': '复杂组合因子'
|
||||
}
|
||||
|
||||
|
||||
def apply_money_flow_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||||
"""应用资金流因子"""
|
||||
try:
|
||||
from polars_money_flow_factors import apply_money_flow_factors as _apply_money_flow
|
||||
return _apply_money_flow(df, operators)
|
||||
except ImportError as e:
|
||||
logger.warning(f"无法导入资金流因子模块: {e}")
|
||||
return df
|
||||
|
||||
|
||||
def apply_chip_distribution_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||||
"""应用筹码分布因子"""
|
||||
try:
|
||||
from polars_chip_factors import apply_chip_distribution_factors as _apply_chip
|
||||
return _apply_chip(df, operators)
|
||||
except ImportError as e:
|
||||
logger.warning(f"无法导入筹码分布因子模块: {e}")
|
||||
return df
|
||||
|
||||
|
||||
def apply_volatility_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||||
"""应用波动率因子"""
|
||||
try:
|
||||
from polars_volatility_factors import apply_volatility_factors as _apply_volatility
|
||||
return _apply_volatility(df, operators)
|
||||
except ImportError as e:
|
||||
logger.warning(f"无法导入波动率因子模块: {e}")
|
||||
return df
|
||||
|
||||
|
||||
def apply_volume_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||||
"""应用成交量因子"""
|
||||
try:
|
||||
from polars_volume_factors import apply_volume_factors as _apply_volume
|
||||
return _apply_volume(df, operators)
|
||||
except ImportError as e:
|
||||
logger.warning(f"无法导入成交量因子模块: {e}")
|
||||
return df
|
||||
|
||||
|
||||
def apply_technical_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||||
"""应用技术指标因子"""
|
||||
try:
|
||||
from polars_technical_factors import apply_technical_factors as _apply_technical
|
||||
return _apply_technical(df, operators)
|
||||
except ImportError as e:
|
||||
logger.warning(f"无法导入技术指标因子模块: {e}")
|
||||
return df
|
||||
|
||||
|
||||
def apply_sentiment_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||||
"""应用情绪因子"""
|
||||
try:
|
||||
from polars_sentiment_factors import apply_sentiment_factors as _apply_sentiment
|
||||
return _apply_sentiment(df, operators)
|
||||
except ImportError as e:
|
||||
logger.warning(f"无法导入情绪因子模块: {e}")
|
||||
return df
|
||||
|
||||
|
||||
def apply_momentum_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||||
"""应用动量因子"""
|
||||
try:
|
||||
from polars_momentum_factors import apply_momentum_factors as _apply_momentum
|
||||
return _apply_momentum(df, operators)
|
||||
except ImportError as e:
|
||||
logger.warning(f"无法导入动量因子模块: {e}")
|
||||
return df
|
||||
|
||||
|
||||
def apply_complex_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||||
"""应用复杂组合因子"""
|
||||
try:
|
||||
from polars_complex_factors import apply_complex_factors as _apply_complex
|
||||
return _apply_complex(df, operators)
|
||||
except ImportError as e:
|
||||
logger.warning(f"无法导入复杂组合因子模块: {e}")
|
||||
return df
|
||||
|
||||
|
||||
def apply_all_factors(df: pl.DataFrame,
|
||||
factor_categories: List[str] = None,
|
||||
exclude_categories: List[str] = None) -> pl.DataFrame:
|
||||
"""
|
||||
应用所有类别的因子
|
||||
|
||||
Args:
|
||||
df: 输入的Polars DataFrame,必须包含必需的列
|
||||
factor_categories: 要应用的因子类别列表,如果为None则应用所有类别
|
||||
exclude_categories: 要排除的因子类别列表
|
||||
|
||||
Returns:
|
||||
添加了所有因子的DataFrame
|
||||
"""
|
||||
if factor_categories is None:
|
||||
factor_categories = list(FACTOR_CATEGORIES.keys())
|
||||
|
||||
if exclude_categories:
|
||||
factor_categories = [cat for cat in factor_categories if cat not in exclude_categories]
|
||||
|
||||
logger.info(f"开始应用因子类别: {factor_categories}")
|
||||
|
||||
result_df = df
|
||||
total_factors = 0
|
||||
|
||||
# 因子类别到函数的映射
|
||||
factor_functions = {
|
||||
'money_flow': apply_money_flow_factors,
|
||||
'chip': apply_chip_distribution_factors,
|
||||
'volatility': apply_volatility_factors,
|
||||
'volume': apply_volume_factors,
|
||||
'technical': apply_technical_factors,
|
||||
'sentiment': apply_sentiment_factors,
|
||||
'momentum': apply_momentum_factors,
|
||||
'complex': apply_complex_factors
|
||||
}
|
||||
|
||||
for category in factor_categories:
|
||||
if category not in factor_functions:
|
||||
logger.warning(f"未知的因子类别: {category}")
|
||||
continue
|
||||
|
||||
logger.info(f"应用{FACTOR_CATEGORIES[category]}...")
|
||||
|
||||
try:
|
||||
before_cols = len(result_df.columns)
|
||||
result_df = factor_functions[category](result_df)
|
||||
after_cols = len(result_df.columns)
|
||||
new_factors = after_cols - before_cols
|
||||
|
||||
logger.info(f"{FACTOR_CATEGORIES[category]}应用完成,新增{new_factors}个因子")
|
||||
total_factors += new_factors
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"应用{FACTOR_CATEGORIES[category]}时出错: {e}")
|
||||
continue
|
||||
|
||||
logger.info(f"因子应用完成,总共新增{total_factors}个因子")
|
||||
return result_df
|
||||
|
||||
|
||||
def get_factor_info() -> Dict[str, Any]:
|
||||
"""
|
||||
获取因子信息
|
||||
|
||||
Returns:
|
||||
包含因子类别信息的字典
|
||||
"""
|
||||
return {
|
||||
'categories': FACTOR_CATEGORIES,
|
||||
'total_categories': len(FACTOR_CATEGORIES),
|
||||
'category_descriptions': list(FACTOR_CATEGORIES.values())
|
||||
}
|
||||
|
||||
|
||||
def validate_required_columns(df: pl.DataFrame, factor_categories: List[str] = None) -> Dict[str, List[str]]:
|
||||
"""
|
||||
验证DataFrame是否包含必需的列
|
||||
|
||||
Args:
|
||||
df: 输入的Polars DataFrame
|
||||
factor_categories: 要验证的因子类别列表
|
||||
|
||||
Returns:
|
||||
包含缺失列信息的字典
|
||||
"""
|
||||
if factor_categories is None:
|
||||
factor_categories = list(FACTOR_CATEGORIES.keys())
|
||||
|
||||
missing_columns = {}
|
||||
|
||||
# 基础必需列
|
||||
base_required = ['ts_code', 'trade_date']
|
||||
missing_base = [col for col in base_required if col not in df.columns]
|
||||
if missing_base:
|
||||
missing_columns['base'] = missing_base
|
||||
|
||||
# 各因子类别的必需列
|
||||
category_requirements = {
|
||||
'money_flow': ['buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol', 'sell_elg_vol', 'vol'],
|
||||
'chip': ['cost_95pct', 'cost_85pct', 'cost_50pct', 'cost_15pct', 'cost_5pct',
|
||||
'winner_rate', 'weight_avg', 'close'],
|
||||
'volatility': ['pct_chg'],
|
||||
'volume': ['vol', 'turnover_rate', 'volume_ratio', 'amount'],
|
||||
'technical': ['open', 'high', 'low', 'close', 'vol'],
|
||||
'sentiment': ['pct_chg', 'vol', 'volume_ratio'],
|
||||
'momentum': ['close', 'turnover_rate'],
|
||||
'complex': ['close', 'vol', 'pct_chg', 'turnover_rate', 'winner_rate']
|
||||
}
|
||||
|
||||
for category in factor_categories:
|
||||
if category in category_requirements:
|
||||
required_cols = category_requirements[category]
|
||||
missing_cols = [col for col in required_cols if col not in df.columns]
|
||||
if missing_cols:
|
||||
missing_columns[category] = missing_cols
|
||||
|
||||
return missing_columns
|
||||
|
||||
|
||||
# 向后兼容的函数名
|
||||
apply_factors = apply_all_factors
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 测试代码
|
||||
print("Polars因子系统已加载")
|
||||
print("可用的因子类别:")
|
||||
for key, description in FACTOR_CATEGORIES.items():
|
||||
print(f" {key}: {description}")
|
||||
428
main/factor/polars_momentum_factors.py
Normal file
428
main/factor/polars_momentum_factors.py
Normal file
@@ -0,0 +1,428 @@
|
||||
"""
|
||||
动量因子 - 使用Polars实现
|
||||
包含动量、趋势、均线等相关因子计算
|
||||
"""
|
||||
|
||||
import polars as pl
|
||||
import numpy as np
|
||||
from typing import Dict, List, Optional, Any
|
||||
from operator_framework import StockWiseOperator, OperatorConfig
|
||||
from scipy.stats import linregress
|
||||
|
||||
|
||||
class PriceMinusDeductionPriceOperator(StockWiseOperator):
|
||||
"""价格减抵扣价算子"""
|
||||
|
||||
def __init__(self, n: int = 10):
|
||||
config = OperatorConfig(
|
||||
name=f"price_minus_deduction_price_{n}",
|
||||
description=f"{n}日价格减抵扣价",
|
||||
required_columns=['close'],
|
||||
output_columns=[f'price_minus_deduction_price_{n}'],
|
||||
parameters={'n': n}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.n = n
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算价格减抵扣价"""
|
||||
# 抵扣价是n-1周期前的价格
|
||||
deduction_price = pl.col('close').shift(self.n - 1)
|
||||
|
||||
# 计算差值
|
||||
price_diff = pl.col('close') - deduction_price
|
||||
|
||||
return stock_df.with_columns(price_diff.alias(f'price_minus_deduction_price_{self.n}'))
|
||||
|
||||
|
||||
class PriceDeductionPriceDiffRatioToSMAOperator(StockWiseOperator):
|
||||
"""价格抵扣价差值相对SMA比率算子"""
|
||||
|
||||
def __init__(self, n: int = 10):
|
||||
config = OperatorConfig(
|
||||
name=f"price_deduction_price_diff_ratio_to_sma_{n}",
|
||||
description=f"{n}日价格抵扣价差值相对SMA比率",
|
||||
required_columns=['close'],
|
||||
output_columns=[f'price_deduction_price_diff_ratio_to_sma_{n}'],
|
||||
parameters={'n': n}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.n = n
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算价格抵扣价差值相对SMA比率"""
|
||||
# 计算n日SMA
|
||||
sma = pl.col('close').rolling_mean(window=self.n)
|
||||
|
||||
# 抵扣价
|
||||
deduction_price = pl.col('close').shift(self.n - 1)
|
||||
|
||||
# 计算差值
|
||||
diff = pl.col('close') - deduction_price
|
||||
|
||||
# 计算比率 (处理除零)
|
||||
ratio = diff / (sma + 1e-8)
|
||||
|
||||
return stock_df.with_columns(ratio.alias(f'price_deduction_price_diff_ratio_to_sma_{self.n}'))
|
||||
|
||||
|
||||
class CatPriceVsSmaVsDeductionPriceOperator(StockWiseOperator):
|
||||
"""价格vsSMAvs抵扣价分类算子"""
|
||||
|
||||
def __init__(self, n: int = 10):
|
||||
config = OperatorConfig(
|
||||
name=f"cat_price_vs_sma_vs_deduction_price_{n}",
|
||||
description=f"{n}日价格vsSMAvs抵扣价分类",
|
||||
required_columns=['close'],
|
||||
output_columns=[f'cat_price_vs_sma_vs_deduction_price_{n}'],
|
||||
parameters={'n': n}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.n = n
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算价格vsSMAvs抵扣价分类"""
|
||||
# 计算n日SMA
|
||||
sma = pl.col('close').rolling_mean(window=self.n)
|
||||
|
||||
# 抵扣价
|
||||
deduction_price = pl.col('close').shift(self.n - 1)
|
||||
|
||||
# 定义条件
|
||||
conditions = [
|
||||
# 1: 当前价 > SMA 且 抵扣价 > SMA
|
||||
(pl.col('close') > sma) & (deduction_price > sma),
|
||||
# 2: 当前价 < SMA 且 抵扣价 < SMA
|
||||
(pl.col('close') < sma) & (deduction_price < sma),
|
||||
# 3: 当前价 > SMA 且 抵扣价 <= SMA
|
||||
(pl.col('close') > sma) & (deduction_price <= sma),
|
||||
# 4: 当前价 <= SMA 且 抵扣价 > SMA
|
||||
(pl.col('close') <= sma) & (deduction_price > sma),
|
||||
]
|
||||
|
||||
choices = [1, 2, 3, 4]
|
||||
|
||||
# 使用select函数进行分类
|
||||
classification = pl.select(conditions=conditions, choices=choices, default=0)
|
||||
|
||||
return stock_df.with_columns(
|
||||
classification.alias(f'cat_price_vs_sma_vs_deduction_price_{self.n}')
|
||||
)
|
||||
|
||||
|
||||
class VolatilitySlopeOperator(StockWiseOperator):
|
||||
"""波动率斜率算子"""
|
||||
|
||||
def __init__(self, long_window: int = 20, short_window: int = 5):
|
||||
config = OperatorConfig(
|
||||
name=f"volatility_slope_{long_window}_{short_window}",
|
||||
description=f"{long_window}日波动率{short_window}日斜率",
|
||||
required_columns=['pct_chg'],
|
||||
output_columns=[f'volatility_slope_{long_window}_{short_window}'],
|
||||
parameters={'long_window': long_window, 'short_window': short_window}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.long_window = long_window
|
||||
self.short_window = short_window
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算波动率斜率"""
|
||||
# 计算长期波动率
|
||||
long_vol = pl.col('pct_chg').rolling_std(window=self.long_window)
|
||||
|
||||
# 计算斜率函数
|
||||
def calculate_slope(series):
|
||||
if len(series) < 2:
|
||||
return 0
|
||||
x = np.arange(len(series))
|
||||
slope, _, _, _, _ = linregress(x, series)
|
||||
return slope
|
||||
|
||||
# 计算斜率
|
||||
volatility_slope = long_vol.rolling_apply(
|
||||
function=calculate_slope,
|
||||
window_size=self.short_window
|
||||
)
|
||||
|
||||
return stock_df.with_columns(
|
||||
volatility_slope.alias(f'volatility_slope_{self.long_window}_{self.short_window}')
|
||||
)
|
||||
|
||||
|
||||
class TurnoverRateTrendStrengthOperator(StockWiseOperator):
|
||||
"""换手率趋势强度算子"""
|
||||
|
||||
def __init__(self, window: int = 5):
|
||||
config = OperatorConfig(
|
||||
name=f"turnover_trend_strength_{window}",
|
||||
description=f"{window}日换手率趋势强度",
|
||||
required_columns=['turnover_rate'],
|
||||
output_columns=[f'turnover_trend_strength_{window}'],
|
||||
parameters={'window': window}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.window = window
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算换手率趋势强度"""
|
||||
# 计算斜率函数
|
||||
def calculate_slope(series):
|
||||
if len(series) < 2:
|
||||
return 0
|
||||
x = np.arange(len(series))
|
||||
slope, _, _, _, _ = linregress(x, series)
|
||||
return slope
|
||||
|
||||
# 计算换手率斜率
|
||||
trend_strength = pl.col('turnover_rate').rolling_apply(
|
||||
function=calculate_slope,
|
||||
window_size=self.window
|
||||
)
|
||||
|
||||
return stock_df.with_columns(
|
||||
trend_strength.alias(f'turnover_trend_strength_{self.window}')
|
||||
)
|
||||
|
||||
|
||||
class FreeFloatTurnoverSurgeOperator(StockWiseOperator):
|
||||
"""自由流通股换手率激增算子"""
|
||||
|
||||
def __init__(self, window: int = 10):
|
||||
config = OperatorConfig(
|
||||
name=f"ff_turnover_surge_{window}",
|
||||
description=f"{window}日自由流通股换手率激增",
|
||||
required_columns=['turnover_rate'],
|
||||
output_columns=[f'ff_turnover_surge_{window}'],
|
||||
parameters={'window': window}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.window = window
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算自由流通股换手率激增"""
|
||||
# 计算均值
|
||||
avg_turnover = pl.col('turnover_rate').rolling_mean(window=self.window)
|
||||
|
||||
# 计算激增比率
|
||||
surge_ratio = pl.col('turnover_rate') / (avg_turnover + 1e-8)
|
||||
|
||||
return stock_df.with_columns(surge_ratio.alias(f'ff_turnover_surge_{self.window}'))
|
||||
|
||||
|
||||
class PriceVolumeTrendCoherenceOperator(StockWiseOperator):
|
||||
"""价量趋势一致性算子"""
|
||||
|
||||
def __init__(self, price_window: int = 5, volume_window: int = 20):
|
||||
config = OperatorConfig(
|
||||
name=f"price_volume_coherence_{price_window}_{volume_window}",
|
||||
description=f"{price_window}日价格{volume_window}日成交量趋势一致性",
|
||||
required_columns=['close', 'vol'],
|
||||
output_columns=[f'price_volume_coherence_{price_window}_{volume_window}'],
|
||||
parameters={'price_window': price_window, 'volume_window': volume_window}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.price_window = price_window
|
||||
self.volume_window = volume_window
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算价量趋势一致性"""
|
||||
# 计算价格上涨占比
|
||||
def price_up_ratio(series):
|
||||
return (series.diff() > 0).rolling_mean(window=self.price_window)
|
||||
|
||||
price_up = pl.col('close').apply(price_up_ratio)
|
||||
|
||||
# 计算成交量高于均值占比
|
||||
vol_avg = pl.col('vol').rolling_mean(window=self.volume_window)
|
||||
vol_above_avg = pl.col('vol') > vol_avg
|
||||
vol_above_ratio = vol_above_avg.cast(int).rolling_mean(window=self.price_window)
|
||||
|
||||
# 计算一致性
|
||||
coherence = price_up * vol_above_ratio
|
||||
|
||||
return stock_df.with_columns(
|
||||
coherence.alias(f'price_volume_coherence_{self.price_window}_{self.volume_window}')
|
||||
)
|
||||
|
||||
|
||||
class FreeFloatToTotalTurnoverRatioOperator(StockWiseOperator):
|
||||
"""自由流通股对总换手率比率算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="ff_to_total_turnover_ratio",
|
||||
description="自由流通股对总换手率比率",
|
||||
required_columns=['turnover_rate'],
|
||||
output_columns=['ff_to_total_turnover_ratio'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算自由流通股对总换手率比率"""
|
||||
# 假设turnover_rate是自由流通股换手率
|
||||
# 计算比率 (简化处理)
|
||||
ratio = pl.col('turnover_rate') / (pl.col('turnover_rate') + 1e-8)
|
||||
|
||||
return stock_df.with_columns(ratio.alias('ff_to_total_turnover_ratio'))
|
||||
|
||||
|
||||
class VarianceOperator(StockWiseOperator):
|
||||
"""方差算子"""
|
||||
|
||||
def __init__(self, window: int):
|
||||
config = OperatorConfig(
|
||||
name=f"variance_{window}",
|
||||
description=f"{window}日方差",
|
||||
required_columns=['pct_chg'],
|
||||
output_columns=[f'variance_{window}'],
|
||||
parameters={'window': window}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.window = window
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算方差"""
|
||||
# 计算方差
|
||||
variance = pl.col('pct_chg').rolling_var(window=self.window)
|
||||
|
||||
return stock_df.with_columns(variance.alias(f'variance_{self.window}'))
|
||||
|
||||
|
||||
class LimitUpDownOperator(StockWiseOperator):
|
||||
"""涨跌停算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="limit_up_down",
|
||||
description="涨跌停因子",
|
||||
required_columns=['close', 'up_limit', 'down_limit'],
|
||||
output_columns=['cat_up_limit', 'cat_down_limit', 'up_limit_count_10d', 'down_limit_count_10d'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算涨跌停因子"""
|
||||
# 判断是否涨停
|
||||
up_limit = pl.col('close') == pl.col('up_limit')
|
||||
|
||||
# 判断是否跌停
|
||||
down_limit = pl.col('close') == pl.col('down_limit')
|
||||
|
||||
# 计算10日涨停计数
|
||||
up_count_10d = up_limit.cast(int).rolling_sum(window=10)
|
||||
|
||||
# 计算10日跌停计数
|
||||
down_count_10d = down_limit.cast(int).rolling_sum(window=10)
|
||||
|
||||
return stock_df.with_columns([
|
||||
up_limit.alias('cat_up_limit'),
|
||||
down_limit.alias('cat_down_limit'),
|
||||
up_count_10d.alias('up_limit_count_10d'),
|
||||
down_count_10d.alias('down_limit_count_10d')
|
||||
])
|
||||
|
||||
|
||||
class ConsecutiveUpLimitOperator(StockWiseOperator):
|
||||
"""连续涨停算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="consecutive_up_limit",
|
||||
description="连续涨停天数",
|
||||
required_columns=['cat_up_limit'],
|
||||
output_columns=['consecutive_up_limit'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算连续涨停天数"""
|
||||
# 计算连续涨停
|
||||
# 简化处理,实际应用中需要更复杂的逻辑
|
||||
consecutive = pl.col('cat_up_limit').cast(int)
|
||||
|
||||
return stock_df.with_columns(consecutive.alias('consecutive_up_limit'))
|
||||
|
||||
|
||||
class MomentumFactorOperator(StockWiseOperator):
|
||||
"""动量因子算子"""
|
||||
|
||||
def __init__(self, alpha: float = 0.5):
|
||||
config = OperatorConfig(
|
||||
name=f"momentum_factor_{alpha}",
|
||||
description=f"动量因子(alpha={alpha})",
|
||||
required_columns=['volume_change_rate', 'turnover_deviation'],
|
||||
output_columns=[f'momentum_factor_{alpha}'],
|
||||
parameters={'alpha': alpha}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.alpha = alpha
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算动量因子"""
|
||||
# 计算动量因子
|
||||
momentum = pl.col('volume_change_rate') + self.alpha * pl.col('turnover_deviation')
|
||||
|
||||
return stock_df.with_columns(momentum.alias(f'momentum_factor_{self.alpha}'))
|
||||
|
||||
|
||||
class ResonanceFactorOperator(StockWiseOperator):
|
||||
"""共振因子算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="resonance_factor",
|
||||
description="共振因子",
|
||||
required_columns=['volume_ratio', 'pct_chg'],
|
||||
output_columns=['resonance_factor'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算共振因子"""
|
||||
# 计算共振因子
|
||||
resonance = pl.col('volume_ratio') * pl.col('pct_chg')
|
||||
|
||||
return stock_df.with_columns(resonance.alias('resonance_factor'))
|
||||
|
||||
|
||||
# 动量因子集合
|
||||
MOMENTUM_OPERATORS = [
|
||||
PriceMinusDeductionPriceOperator(),
|
||||
PriceDeductionPriceDiffRatioToSMAOperator(),
|
||||
CatPriceVsSmaVsDeductionPriceOperator(),
|
||||
VolatilitySlopeOperator(),
|
||||
TurnoverRateTrendStrengthOperator(5),
|
||||
FreeFloatTurnoverSurgeOperator(10),
|
||||
PriceVolumeTrendCoherenceOperator(),
|
||||
FreeFloatToTotalTurnoverRatioOperator(),
|
||||
VarianceOperator(20),
|
||||
LimitUpDownOperator(),
|
||||
ConsecutiveUpLimitOperator(),
|
||||
MomentumFactorOperator(),
|
||||
ResonanceFactorOperator(),
|
||||
]
|
||||
|
||||
|
||||
def apply_momentum_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||||
"""
|
||||
应用所有动量因子
|
||||
|
||||
Args:
|
||||
df: 输入的Polars DataFrame
|
||||
operators: 要应用的算子列表,如果为None则使用默认列表
|
||||
|
||||
Returns:
|
||||
添加了动量因子的DataFrame
|
||||
"""
|
||||
if operators is None:
|
||||
operators = MOMENTUM_OPERATORS
|
||||
|
||||
result_df = df
|
||||
for operator in operators:
|
||||
result_df = operator(result_df)
|
||||
|
||||
return result_df
|
||||
245
main/factor/polars_money_flow_factors.py
Normal file
245
main/factor/polars_money_flow_factors.py
Normal file
@@ -0,0 +1,245 @@
|
||||
"""
|
||||
资金流因子 - 使用Polars实现
|
||||
包含主力资金流、散户资金流等相关因子计算
|
||||
"""
|
||||
|
||||
import polars as pl
|
||||
import numpy as np
|
||||
from typing import Dict, List, Optional, Any
|
||||
from operator_framework import StockWiseOperator, OperatorConfig
|
||||
|
||||
|
||||
class MoneyFlowIntensityOperator(StockWiseOperator):
|
||||
"""主力资金流强度算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="money_flow_intensity",
|
||||
description="主力资金流强度",
|
||||
required_columns=['buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol', 'sell_elg_vol', 'vol'],
|
||||
output_columns=['flow_lg_elg_intensity'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算主力资金流强度"""
|
||||
epsilon = 1e-8
|
||||
|
||||
# 计算大单+超大单净买入量
|
||||
lg_elg_net_buy_vol = (
|
||||
pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
|
||||
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')
|
||||
)
|
||||
|
||||
# 计算资金流强度
|
||||
flow_intensity = lg_elg_net_buy_vol / (pl.col('vol') + epsilon)
|
||||
|
||||
return stock_df.with_columns(flow_intensity.alias('flow_lg_elg_intensity'))
|
||||
|
||||
|
||||
class FlowDivergenceRatioOperator(StockWiseOperator):
|
||||
"""散户与主力背离度算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="flow_divergence_ratio",
|
||||
description="散户与主力背离度比率",
|
||||
required_columns=['buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'buy_elg_vol',
|
||||
'sell_lg_vol', 'sell_elg_vol'],
|
||||
output_columns=['flow_divergence_ratio'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算散户与主力背离度比率"""
|
||||
epsilon = 1e-8
|
||||
|
||||
# 计算小单净买入量
|
||||
sm_net_buy_vol = pl.col('buy_sm_vol') - pl.col('sell_sm_vol')
|
||||
|
||||
# 计算大单+超大单净买入量
|
||||
lg_elg_net_buy_vol = (
|
||||
pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
|
||||
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')
|
||||
)
|
||||
|
||||
# 计算背离度比率,处理分母为0的情况
|
||||
divergence_ratio = sm_net_buy_vol / (
|
||||
lg_elg_net_buy_vol + pl.when(lg_elg_net_buy_vol == 0).then(epsilon).otherwise(0) + epsilon
|
||||
)
|
||||
|
||||
return stock_df.with_columns(divergence_ratio.alias('flow_divergence_ratio'))
|
||||
|
||||
|
||||
class FlowStructureChangeOperator(StockWiseOperator):
|
||||
"""资金流结构变动算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="flow_structure_change",
|
||||
description="资金流结构变动",
|
||||
required_columns=['buy_sm_vol', 'buy_lg_vol', 'buy_elg_vol'],
|
||||
output_columns=['flow_struct_buy_change'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算资金流结构变动"""
|
||||
epsilon = 1e-8
|
||||
|
||||
# 计算总买入量
|
||||
total_buy_vol = pl.col('buy_sm_vol') + pl.col('buy_lg_vol') + pl.col('buy_elg_vol')
|
||||
|
||||
# 计算大单+超大单买入占比
|
||||
lg_elg_buy_prop = (pl.col('buy_lg_vol') + pl.col('buy_elg_vol')) / (total_buy_vol + epsilon)
|
||||
|
||||
# 计算1日变化
|
||||
struct_change = lg_elg_buy_prop.diff()
|
||||
|
||||
return stock_df.with_columns(struct_change.alias('flow_struct_buy_change'))
|
||||
|
||||
|
||||
class FlowAccelerationOperator(StockWiseOperator):
|
||||
"""资金流加速度算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="flow_acceleration",
|
||||
description="资金流加速度",
|
||||
required_columns=['buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol', 'sell_elg_vol'],
|
||||
output_columns=['flow_lg_elg_accel'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算资金流加速度"""
|
||||
# 计算大单+超大单净买入量
|
||||
lg_elg_net_buy_vol = (
|
||||
pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
|
||||
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')
|
||||
)
|
||||
|
||||
# 计算一阶变化
|
||||
first_diff = lg_elg_net_buy_vol.diff()
|
||||
|
||||
# 计算二阶变化(加速度)
|
||||
acceleration = first_diff.diff()
|
||||
|
||||
return stock_df.with_columns(acceleration.alias('flow_lg_elg_accel'))
|
||||
|
||||
|
||||
class LargeFlowMomentumCorrelationOperator(StockWiseOperator):
|
||||
"""大单资金流与价格动量相关性算子"""
|
||||
|
||||
def __init__(self, n: int = 20, m: int = 60):
|
||||
config = OperatorConfig(
|
||||
name=f"lg_flow_mom_corr_{n}_{m}",
|
||||
description=f"{n}日大单资金流与{m}日价格动量相关性",
|
||||
required_columns=['buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol', 'sell_elg_vol',
|
||||
'close', 'vol'],
|
||||
output_columns=[f'lg_flow_mom_corr_{n}_{m}'],
|
||||
parameters={'n': n, 'm': m}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.n = n
|
||||
self.m = m
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算大单资金流与价格动量相关性"""
|
||||
# 计算大单净额
|
||||
net_lg_flow_val = (
|
||||
(pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
|
||||
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')) * pl.col('close')
|
||||
)
|
||||
|
||||
# 计算滚动净大单流
|
||||
rolling_net_lg_flow = net_lg_flow_val.rolling_sum(window=self.n)
|
||||
|
||||
# 计算价格动量
|
||||
price_mom = pl.col('close').pct_change(self.n)
|
||||
|
||||
# 计算相关性
|
||||
# Polars的rolling_corr需要两个表达式
|
||||
correlation = rolling_net_lg_flow.rolling_corr(price_mom, window=self.m)
|
||||
|
||||
return stock_df.with_columns(
|
||||
correlation.alias(f'lg_flow_mom_corr_{self.n}_{self.m}')
|
||||
)
|
||||
|
||||
|
||||
class LargeBuyConsolidationOperator(StockWiseOperator):
|
||||
"""大单买入盘整期算子"""
|
||||
|
||||
def __init__(self, n: int = 20, vol_quantile: float = 0.2):
|
||||
config = OperatorConfig(
|
||||
name=f"lg_buy_consolidation_{n}",
|
||||
description=f"{n}日大单买入盘整期",
|
||||
required_columns=['close', 'buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol',
|
||||
'sell_elg_vol', 'vol'],
|
||||
output_columns=[f'lg_buy_consolidation_{n}'],
|
||||
parameters={'n': n, 'vol_quantile': vol_quantile}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.n = n
|
||||
self.vol_quantile = vol_quantile
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算大单买入盘整期"""
|
||||
epsilon = 1e-8
|
||||
|
||||
# 计算收盘价滚动标准差
|
||||
rolling_std = pl.col('close').rolling_std(window=self.n)
|
||||
|
||||
# 计算大单净流比率
|
||||
net_lg_flow_ratio = (
|
||||
(pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
|
||||
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')) /
|
||||
(pl.col('vol') + epsilon)
|
||||
)
|
||||
|
||||
# 计算滚动均值
|
||||
rolling_mean_ratio = net_lg_flow_ratio.rolling_mean(window=self.n)
|
||||
|
||||
# 计算低波动阈值
|
||||
# 这里需要按日期分组计算分位数,比较复杂,简化处理
|
||||
# 在实际使用时,可能需要DateWiseOperator来处理横截面分位数
|
||||
|
||||
return stock_df.with_columns(
|
||||
rolling_mean_ratio.alias(f'lg_buy_consolidation_{self.n}')
|
||||
)
|
||||
|
||||
|
||||
# 资金流因子集合
|
||||
MONEY_FLOW_OPERATORS = [
|
||||
MoneyFlowIntensityOperator(),
|
||||
FlowDivergenceRatioOperator(),
|
||||
FlowStructureChangeOperator(),
|
||||
FlowAccelerationOperator(),
|
||||
LargeFlowMomentumCorrelationOperator(),
|
||||
LargeBuyConsolidationOperator(),
|
||||
]
|
||||
|
||||
|
||||
def apply_money_flow_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||||
"""
|
||||
应用所有资金流因子
|
||||
|
||||
Args:
|
||||
df: 输入的Polars DataFrame
|
||||
operators: 要应用的算子列表,如果为None则使用默认列表
|
||||
|
||||
Returns:
|
||||
添加了资金流因子的DataFrame
|
||||
"""
|
||||
if operators is None:
|
||||
operators = MONEY_FLOW_OPERATORS
|
||||
|
||||
result_df = df
|
||||
for operator in operators:
|
||||
result_df = operator(result_df)
|
||||
|
||||
return result_df
|
||||
365
main/factor/polars_sentiment_factors.py
Normal file
365
main/factor/polars_sentiment_factors.py
Normal file
@@ -0,0 +1,365 @@
|
||||
"""
|
||||
情绪因子 - 使用Polars实现
|
||||
包含市场情绪、恐慌贪婪指数、反转因子等相关因子计算
|
||||
"""
|
||||
|
||||
import polars as pl
|
||||
import numpy as np
|
||||
from typing import Dict, List, Optional, Any
|
||||
from operator_framework import StockWiseOperator, OperatorConfig
|
||||
import talib
|
||||
|
||||
|
||||
class SentimentPanicGreedIndexOperator(StockWiseOperator):
|
||||
"""市场恐慌/贪婪指数算子"""
|
||||
|
||||
def __init__(self, window_atr: int = 14, window_smooth: int = 5):
|
||||
config = OperatorConfig(
|
||||
name=f"senti_panic_greed_{window_atr}_{window_smooth}",
|
||||
description=f"{window_atr}日ATR{window_smooth}日平滑恐慌贪婪指数",
|
||||
required_columns=['open', 'high', 'low', 'close', 'pct_chg', 'vol'],
|
||||
output_columns=[f'senti_panic_greed_{window_atr}_{window_smooth}'],
|
||||
parameters={'window_atr': window_atr, 'window_smooth': window_smooth}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.window_atr = window_atr
|
||||
self.window_smooth = window_smooth
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算恐慌贪婪指数"""
|
||||
# 计算前收盘价
|
||||
prev_close = pl.col('close').shift(1)
|
||||
|
||||
# 计算真实波幅
|
||||
tr = pl.max_horizontal(
|
||||
pl.col('high') - pl.col('low'),
|
||||
(pl.col('high') - prev_close).abs(),
|
||||
(pl.col('low') - prev_close).abs()
|
||||
)
|
||||
|
||||
# 计算ATR
|
||||
atr = tr.rolling_mean(window=self.window_atr)
|
||||
|
||||
# 计算影线
|
||||
upper_shadow = pl.col('high') - pl.max_horizontal(pl.col('open'), pl.col('close'))
|
||||
lower_shadow = pl.min_horizontal(pl.col('open'), pl.col('close')) - pl.col('low')
|
||||
body = (pl.col('close') - pl.col('open')).abs()
|
||||
|
||||
# 计算跳空
|
||||
gap = (pl.col('open') / prev_close - 1).fill_null(0)
|
||||
|
||||
# 计算波动性意外
|
||||
volatility_surprise = (tr / (atr + 1e-8) - 1) * pl.col('pct_chg').sign()
|
||||
|
||||
# 计算原始情绪指标
|
||||
raw_senti = (tr / (atr + 1e-8)) * pl.col('pct_chg').sign() + gap * 2
|
||||
|
||||
# 平滑处理
|
||||
sentiment = raw_senti.rolling_mean(window=self.window_smooth)
|
||||
|
||||
return stock_df.with_columns(
|
||||
sentiment.alias(f'senti_panic_greed_{self.window_atr}_{self.window_smooth}')
|
||||
)
|
||||
|
||||
|
||||
class SentimentMarketBreadthProxyOperator(StockWiseOperator):
|
||||
"""市场宽度情绪代理算子"""
|
||||
|
||||
def __init__(self, window_vol: int = 20, window_smooth: int = 3):
|
||||
config = OperatorConfig(
|
||||
name=f"senti_breadth_proxy_{window_vol}_{window_smooth}",
|
||||
description=f"{window_vol}日成交量{window_smooth}日平滑市场宽度情绪代理",
|
||||
required_columns=['pct_chg', 'vol'],
|
||||
output_columns=[f'senti_breadth_proxy_{window_vol}_{window_smooth}'],
|
||||
parameters={'window_vol': window_vol, 'window_smooth': window_smooth}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.window_vol = window_vol
|
||||
self.window_smooth = window_smooth
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算市场宽度情绪代理"""
|
||||
# 计算成交量滚动均值
|
||||
rolling_avg_vol = pl.col('vol').rolling_mean(window=self.window_vol)
|
||||
|
||||
# 计算价量配合度
|
||||
raw_breadth = pl.col('pct_chg') * (pl.col('vol') / (rolling_avg_vol + 1e-8))
|
||||
|
||||
# 平滑处理
|
||||
breadth_proxy = raw_breadth.rolling_mean(window=self.window_smooth)
|
||||
|
||||
return stock_df.with_columns(
|
||||
breadth_proxy.alias(f'senti_breadth_proxy_{self.window_vol}_{self.window_smooth}')
|
||||
)
|
||||
|
||||
|
||||
class SentimentReversalIndicatorOperator(StockWiseOperator):
|
||||
"""短期情绪反转因子算子"""
|
||||
|
||||
def __init__(self, window_ret: int = 5, window_vol: int = 5):
|
||||
config = OperatorConfig(
|
||||
name=f"senti_reversal_{window_ret}_{window_vol}",
|
||||
description=f"{window_ret}日收益{window_vol}日波动短期情绪反转因子",
|
||||
required_columns=['close', 'pct_chg'],
|
||||
output_columns=[f'senti_reversal_{window_ret}_{window_vol}'],
|
||||
parameters={'window_ret': window_ret, 'window_vol': window_vol}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.window_ret = window_ret
|
||||
self.window_vol = window_vol
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算短期情绪反转因子"""
|
||||
# 计算累计收益率
|
||||
return_m = pl.col('close').pct_change(self.window_ret)
|
||||
|
||||
# 计算波动率
|
||||
volatility_m = pl.col('pct_chg').rolling_std(window=self.window_vol)
|
||||
|
||||
# 计算反转因子 (负号表示反转)
|
||||
reversal_factor = -return_m * volatility_m
|
||||
|
||||
return stock_df.with_columns(
|
||||
reversal_factor.alias(f'senti_reversal_{self.window_ret}_{self.window_vol}')
|
||||
)
|
||||
|
||||
|
||||
class DailyMomentumBenchmarkOperator(StockWiseOperator):
|
||||
"""日级别动量基准算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="daily_momentum_benchmark",
|
||||
description="日级别动量基准",
|
||||
required_columns=['pct_chg'],
|
||||
output_columns=['daily_positive_benchmark', 'daily_negative_benchmark'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算日级别动量基准"""
|
||||
# 这个因子需要横截面计算,简化处理
|
||||
# 在实际应用中,应该使用DateWiseOperator来计算全市场基准
|
||||
|
||||
# 返回0作为占位符
|
||||
return stock_df.with_columns([
|
||||
pl.lit(0).alias('daily_positive_benchmark'),
|
||||
pl.lit(0).alias('daily_negative_benchmark')
|
||||
])
|
||||
|
||||
|
||||
class DailyDeviationOperator(StockWiseOperator):
|
||||
"""日级别偏离度算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="daily_deviation",
|
||||
description="日级别偏离度",
|
||||
required_columns=['pct_chg', 'daily_positive_benchmark', 'daily_negative_benchmark'],
|
||||
output_columns=['daily_deviation'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算日级别偏离度"""
|
||||
# 根据条件计算偏离度
|
||||
conditions = [
|
||||
(pl.col('pct_chg') > 0) & (pl.col('daily_positive_benchmark') > 0),
|
||||
(pl.col('pct_chg') < 0) & (pl.col('daily_negative_benchmark') < 0),
|
||||
]
|
||||
|
||||
choices = [
|
||||
pl.col('pct_chg') - pl.col('daily_positive_benchmark'),
|
||||
pl.col('pct_chg') - pl.col('daily_negative_benchmark'),
|
||||
]
|
||||
|
||||
deviation = pl.select(conditions=conditions, choices=choices, default=0)
|
||||
|
||||
return stock_df.with_columns(deviation.alias('daily_deviation'))
|
||||
|
||||
|
||||
class CatSentimentMomentumVolumeSpikeOperator(StockWiseOperator):
|
||||
"""情绪动量成交量激增分类算子"""
|
||||
|
||||
def __init__(self, return_period: int = 3, return_threshold: float = 0.05,
|
||||
volume_ratio_threshold: float = 1.5, current_pct_chg_min: float = -0.01,
|
||||
current_pct_chg_max: float = 0.03):
|
||||
config = OperatorConfig(
|
||||
name=f"cat_senti_mom_vol_spike_{return_period}",
|
||||
description=f"{return_period}日情绪动量成交量激增分类",
|
||||
required_columns=['close', 'pct_chg', 'volume_ratio'],
|
||||
output_columns=[f'cat_senti_mom_vol_spike_{return_period}'],
|
||||
parameters={'return_period': return_period, 'return_threshold': return_threshold,
|
||||
'volume_ratio_threshold': volume_ratio_threshold,
|
||||
'current_pct_chg_min': current_pct_chg_min,
|
||||
'current_pct_chg_max': current_pct_chg_max}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.return_period = return_period
|
||||
self.return_threshold = return_threshold
|
||||
self.volume_ratio_threshold = volume_ratio_threshold
|
||||
self.current_pct_chg_min = current_pct_chg_min
|
||||
self.current_pct_chg_max = current_pct_chg_max
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算情绪动量成交量激增分类"""
|
||||
# 计算n日收益率
|
||||
return_n = pl.col('close').pct_change(self.return_period)
|
||||
|
||||
# 定义条件
|
||||
cond_momentum = return_n > self.return_threshold
|
||||
cond_volume = pl.col('volume_ratio') > self.volume_ratio_threshold
|
||||
cond_current_price = (pl.col('pct_chg') > self.current_pct_chg_min) & \
|
||||
(pl.col('pct_chg') < self.current_pct_chg_max)
|
||||
|
||||
# 组合条件
|
||||
result = (cond_momentum.cast(str) + cond_volume.cast(str) + cond_current_price.cast(str))
|
||||
|
||||
return stock_df.with_columns(result.alias(f'cat_senti_mom_vol_spike_{self.return_period}'))
|
||||
|
||||
|
||||
class CatSentimentPreBreakoutOperator(StockWiseOperator):
|
||||
"""情绪突破前盘整分类算子"""
|
||||
|
||||
def __init__(self, atr_short_n: int = 10, atr_long_m: int = 40,
|
||||
vol_atrophy_n: int = 10, vol_atrophy_m: int = 40,
|
||||
price_stab_n: int = 5, price_stab_threshold: float = 0.05,
|
||||
current_pct_chg_min: float = 0.005, current_pct_chg_max: float = 0.07,
|
||||
volume_ratio_threshold: float = 1.2):
|
||||
config = OperatorConfig(
|
||||
name=f"cat_senti_pre_breakout",
|
||||
description="情绪突破前盘整分类",
|
||||
required_columns=['high', 'low', 'close', 'vol', 'pct_chg', 'volume_ratio'],
|
||||
output_columns=['cat_senti_pre_breakout'],
|
||||
parameters={'atr_short_n': atr_short_n, 'atr_long_m': atr_long_m,
|
||||
'vol_atrophy_n': vol_atrophy_n, 'vol_atrophy_m': vol_atrophy_m,
|
||||
'price_stab_n': price_stab_n, 'price_stab_threshold': price_stab_threshold,
|
||||
'current_pct_chg_min': current_pct_chg_min, 'current_pct_chg_max': current_pct_chg_max,
|
||||
'volume_ratio_threshold': volume_ratio_threshold}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.atr_short_n = atr_short_n
|
||||
self.atr_long_m = atr_long_m
|
||||
self.vol_atrophy_n = vol_atrophy_n
|
||||
self.vol_atrophy_m = vol_atrophy_m
|
||||
self.price_stab_n = price_stab_n
|
||||
self.price_stab_threshold = price_stab_threshold
|
||||
self.current_pct_chg_min = current_pct_chg_min
|
||||
self.current_pct_chg_max = current_pct_chg_max
|
||||
self.volume_ratio_threshold = volume_ratio_threshold
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算情绪突破前盘整分类"""
|
||||
# 1. 波动率收缩 (使用价格范围作为ATR代理)
|
||||
price_range = pl.col('high') - pl.col('low')
|
||||
atr_short = price_range.rolling_mean(window=self.atr_short_n)
|
||||
atr_long = price_range.rolling_mean(window=self.atr_long_m)
|
||||
cond_vol_contraction = atr_short < (0.7 * atr_long)
|
||||
|
||||
# 2. 成交量萎缩
|
||||
vol_short = pl.col('vol').rolling_mean(window=self.vol_atrophy_n)
|
||||
vol_long = pl.col('vol').rolling_mean(window=self.vol_atrophy_m)
|
||||
cond_vol_atrophy = vol_short < (0.7 * vol_long)
|
||||
|
||||
# 3. 近期价格稳定
|
||||
rolling_max_h = pl.col('high').rolling_max(window=self.price_stab_n)
|
||||
rolling_min_l = pl.col('low').rolling_min(window=self.price_stab_n)
|
||||
price_stability = (rolling_max_h - rolling_min_l) / pl.col('close')
|
||||
cond_price_stability = price_stability < self.price_stab_threshold
|
||||
|
||||
# 4. 当日温和放量上涨信号
|
||||
cond_price_signal = (pl.col('pct_chg') > self.current_pct_chg_min) & \
|
||||
(pl.col('pct_chg') < self.current_pct_chg_max)
|
||||
cond_vol_signal = pl.col('volume_ratio') > self.volume_ratio_threshold
|
||||
cond_current_day_signal = cond_price_signal & cond_vol_signal
|
||||
|
||||
# 组合条件
|
||||
result = (cond_vol_contraction.cast(str) + cond_vol_atrophy.cast(str) +
|
||||
cond_price_stability.cast(str) + cond_current_day_signal.cast(str))
|
||||
|
||||
return stock_df.with_columns(result.alias('cat_senti_pre_breakout'))
|
||||
|
||||
|
||||
class StrongInflowSignalOperator(StockWiseOperator):
|
||||
"""强主力资金流入信号算子"""
|
||||
|
||||
def __init__(self, intensity_avg_n: int = 3, intensity_threshold: float = 0.01,
|
||||
consecutive_buy_n: int = 2, accel_positive_m: int = 1):
|
||||
config = OperatorConfig(
|
||||
name="senti_strong_inflow",
|
||||
description="强主力资金流入信号",
|
||||
required_columns=['flow_lg_elg_intensity', 'flow_lg_elg_accel'],
|
||||
output_columns=['senti_strong_inflow'],
|
||||
parameters={'intensity_avg_n': intensity_avg_n, 'intensity_threshold': intensity_threshold,
|
||||
'consecutive_buy_n': consecutive_buy_n, 'accel_positive_m': accel_positive_m}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.intensity_avg_n = intensity_avg_n
|
||||
self.intensity_threshold = intensity_threshold
|
||||
self.consecutive_buy_n = consecutive_buy_n
|
||||
self.accel_positive_m = accel_positive_m
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算强主力资金流入信号"""
|
||||
# 检查必需列是否存在
|
||||
required_cols = ['flow_lg_elg_intensity', 'flow_lg_elg_accel']
|
||||
if not all(col in stock_df.columns for col in required_cols):
|
||||
# 如果缺少列,返回0
|
||||
return stock_df.with_columns(pl.lit(0).alias('senti_strong_inflow'))
|
||||
|
||||
# 1. 近N日主力资金强度均值
|
||||
avg_intensity = pl.col('flow_lg_elg_intensity').rolling_mean(window=self.intensity_avg_n)
|
||||
cond_avg_intensity = avg_intensity > self.intensity_threshold
|
||||
|
||||
# 2. 近N日连续主力净买入天数
|
||||
is_net_buy = (pl.col('flow_lg_elg_intensity') > 0).cast(int)
|
||||
|
||||
# 计算连续买入信号 (简化版)
|
||||
consecutive_buy = is_net_buy.rolling_sum(window=self.consecutive_buy_n) == self.consecutive_buy_n
|
||||
cond_consecutive_buy = consecutive_buy
|
||||
|
||||
# 3. 近M日主力资金流加速度为正
|
||||
is_accel_positive = (pl.col('flow_lg_elg_accel') > 0).cast(int)
|
||||
accel_positive = is_accel_positive.rolling_sum(window=self.accel_positive_m) == self.accel_positive_m
|
||||
cond_accel_positive = accel_positive
|
||||
|
||||
# 综合条件
|
||||
strong_inflow = cond_avg_intensity & cond_consecutive_buy & cond_accel_positive
|
||||
|
||||
return stock_df.with_columns(strong_inflow.cast(int).alias('senti_strong_inflow'))
|
||||
|
||||
|
||||
# 情绪因子集合
|
||||
SENTIMENT_OPERATORS = [
|
||||
SentimentPanicGreedIndexOperator(),
|
||||
SentimentMarketBreadthProxyOperator(),
|
||||
SentimentReversalIndicatorOperator(),
|
||||
DailyMomentumBenchmarkOperator(),
|
||||
DailyDeviationOperator(),
|
||||
CatSentimentMomentumVolumeSpikeOperator(),
|
||||
CatSentimentPreBreakoutOperator(),
|
||||
StrongInflowSignalOperator(),
|
||||
]
|
||||
|
||||
|
||||
def apply_sentiment_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||||
"""
|
||||
应用所有情绪因子
|
||||
|
||||
Args:
|
||||
df: 输入的Polars DataFrame
|
||||
operators: 要应用的算子列表,如果为None则使用默认列表
|
||||
|
||||
Returns:
|
||||
添加了情绪因子的DataFrame
|
||||
"""
|
||||
if operators is None:
|
||||
operators = SENTIMENT_OPERATORS
|
||||
|
||||
result_df = df
|
||||
for operator in operators:
|
||||
result_df = operator(result_df)
|
||||
|
||||
return result_df
|
||||
488
main/factor/polars_technical_factors.py
Normal file
488
main/factor/polars_technical_factors.py
Normal file
@@ -0,0 +1,488 @@
|
||||
"""
|
||||
技术指标因子 - 使用Polars实现
|
||||
包含ATR、OBV、RSI、EMA等技术指标相关因子计算
|
||||
"""
|
||||
|
||||
import polars as pl
|
||||
import numpy as np
|
||||
from typing import Dict, List, Optional, Any
|
||||
from operator_framework import StockWiseOperator, OperatorConfig
|
||||
import talib
|
||||
|
||||
|
||||
class ATROperator(StockWiseOperator):
|
||||
"""ATR算子"""
|
||||
|
||||
def __init__(self, period: int = 14):
|
||||
config = OperatorConfig(
|
||||
name=f"atr_{period}",
|
||||
description=f"{period}日ATR",
|
||||
required_columns=['high', 'low', 'close'],
|
||||
output_columns=[f'atr_{period}'],
|
||||
parameters={'period': period}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.period = period
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算ATR"""
|
||||
# 使用TA-Lib计算ATR
|
||||
atr_values = talib.ATR(
|
||||
stock_df['high'].to_numpy(),
|
||||
stock_df['low'].to_numpy(),
|
||||
stock_df['close'].to_numpy(),
|
||||
timeperiod=self.period
|
||||
)
|
||||
|
||||
return stock_df.with_columns(pl.Series(atr_values).alias(f'atr_{self.period}'))
|
||||
|
||||
|
||||
class OBVOperator(StockWiseOperator):
|
||||
"""OBV算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="obv",
|
||||
description="OBV能量潮",
|
||||
required_columns=['close', 'vol'],
|
||||
output_columns=['obv'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算OBV"""
|
||||
# 使用TA-Lib计算OBV
|
||||
obv_values = talib.OBV(
|
||||
stock_df['close'].to_numpy(),
|
||||
stock_df['vol'].to_numpy()
|
||||
)
|
||||
|
||||
return stock_df.with_columns(pl.Series(obv_values).alias('obv'))
|
||||
|
||||
|
||||
class OBVMAOperator(StockWiseOperator):
|
||||
"""OBV均线算子"""
|
||||
|
||||
def __init__(self, period: int = 6):
|
||||
config = OperatorConfig(
|
||||
name=f"obv_ma_{period}",
|
||||
description=f"{period}日OBV均线",
|
||||
required_columns=['obv'],
|
||||
output_columns=[f'maobv_{period}'],
|
||||
parameters={'period': period}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.period = period
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算OBV均线"""
|
||||
# 使用TA-Lib计算SMA
|
||||
ma_values = talib.SMA(
|
||||
stock_df['obv'].to_numpy(),
|
||||
timeperiod=self.period
|
||||
)
|
||||
|
||||
return stock_df.with_columns(pl.Series(ma_values).alias(f'maobv_{self.period}'))
|
||||
|
||||
|
||||
class RSIOperator(StockWiseOperator):
|
||||
"""RSI算子"""
|
||||
|
||||
def __init__(self, period: int = 3):
|
||||
config = OperatorConfig(
|
||||
name=f"rsi_{period}",
|
||||
description=f"{period}日RSI",
|
||||
required_columns=['close'],
|
||||
output_columns=[f'rsi_{period}'],
|
||||
parameters={'period': period}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.period = period
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算RSI"""
|
||||
# 使用TA-Lib计算RSI
|
||||
rsi_values = talib.RSI(
|
||||
stock_df['close'].to_numpy(),
|
||||
timeperiod=self.period
|
||||
)
|
||||
|
||||
return stock_df.with_columns(pl.Series(rsi_values).alias(f'rsi_{self.period}'))
|
||||
|
||||
|
||||
class EMAOperator(StockWiseOperator):
|
||||
"""EMA算子"""
|
||||
|
||||
def __init__(self, period: int):
|
||||
config = OperatorConfig(
|
||||
name=f"ema_{period}",
|
||||
description=f"{period}日EMA",
|
||||
required_columns=['close'],
|
||||
output_columns=[f'_ema_{period}'],
|
||||
parameters={'period': period}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.period = period
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算EMA"""
|
||||
# 使用TA-Lib计算EMA
|
||||
ema_values = talib.EMA(
|
||||
stock_df['close'].to_numpy(),
|
||||
timeperiod=self.period
|
||||
)
|
||||
|
||||
return stock_df.with_columns(pl.Series(ema_values).alias(f'_ema_{self.period}'))
|
||||
|
||||
|
||||
class ReturnOperator(StockWiseOperator):
|
||||
"""收益率算子"""
|
||||
|
||||
def __init__(self, period: int):
|
||||
config = OperatorConfig(
|
||||
name=f"return_{period}",
|
||||
description=f"{period}日收益率",
|
||||
required_columns=['close'],
|
||||
output_columns=[f'return_{period}'],
|
||||
parameters={'period': period}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.period = period
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算收益率"""
|
||||
# 计算收益率
|
||||
ret = pl.col('close').pct_change(self.period)
|
||||
|
||||
return stock_df.with_columns(ret.alias(f'return_{self.period}'))
|
||||
|
||||
|
||||
class ActivityFactorOperator(StockWiseOperator):
|
||||
"""活跃度因子算子"""
|
||||
|
||||
def __init__(self, period: int, scale: float):
|
||||
config = OperatorConfig(
|
||||
name=f"act_factor_{period}",
|
||||
description=f"{period}日活跃度因子",
|
||||
required_columns=[f'_ema_{period}'],
|
||||
output_columns=[f'act_factor{period}'],
|
||||
parameters={'period': period, 'scale': scale}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.period = period
|
||||
self.scale = scale
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算活跃度因子"""
|
||||
# 计算EMA变化率
|
||||
ema_change = (pl.col(f'_ema_{self.period}') / pl.col(f'_ema_{self.period}').shift(1) - 1) * 100
|
||||
|
||||
# 计算活跃度因子
|
||||
activity_factor = (ema_change * 57.3 / self.scale).arctan()
|
||||
|
||||
return stock_df.with_columns(activity_factor.alias(f'act_factor{self.period}'))
|
||||
|
||||
|
||||
class ActivityFactor5Operator(StockWiseOperator):
|
||||
"""活跃度因子5算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="act_factor_5",
|
||||
description="5日活跃度因子",
|
||||
required_columns=['_ema_5'],
|
||||
output_columns=['act_factor1'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算5日活跃度因子"""
|
||||
# 计算EMA变化率
|
||||
ema_change = (pl.col('_ema_5') / pl.col('_ema_5').shift(1) - 1) * 100
|
||||
|
||||
# 计算活跃度因子
|
||||
activity_factor = (ema_change * 57.3 / 50).arctan()
|
||||
|
||||
return stock_df.with_columns(activity_factor.alias('act_factor1'))
|
||||
|
||||
|
||||
class ActivityFactor13Operator(StockWiseOperator):
|
||||
"""活跃度因子13算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="act_factor_13",
|
||||
description="13日活跃度因子",
|
||||
required_columns=['_ema_13'],
|
||||
output_columns=['act_factor2'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算13日活跃度因子"""
|
||||
# 计算EMA变化率
|
||||
ema_change = (pl.col('_ema_13') / pl.col('_ema_13').shift(1) - 1) * 100
|
||||
|
||||
# 计算活跃度因子
|
||||
activity_factor = (ema_change * 57.3 / 40).arctan()
|
||||
|
||||
return stock_df.with_columns(activity_factor.alias('act_factor2'))
|
||||
|
||||
|
||||
class ActivityFactor20Operator(StockWiseOperator):
|
||||
"""活跃度因子20算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="act_factor_20",
|
||||
description="20日活跃度因子",
|
||||
required_columns=['_ema_20'],
|
||||
output_columns=['act_factor3'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算20日活跃度因子"""
|
||||
# 计算EMA变化率
|
||||
ema_change = (pl.col('_ema_20') / pl.col('_ema_20').shift(1) - 1) * 100
|
||||
|
||||
# 计算活跃度因子
|
||||
activity_factor = (ema_change * 57.3 / 21).arctan()
|
||||
|
||||
return stock_df.with_columns(activity_factor.alias('act_factor3'))
|
||||
|
||||
|
||||
class ActivityFactor60Operator(StockWiseOperator):
|
||||
"""活跃度因子60算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="act_factor_60",
|
||||
description="60日活跃度因子",
|
||||
required_columns=['_ema_60'],
|
||||
output_columns=['act_factor4'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算60日活跃度因子"""
|
||||
# 计算EMA变化率
|
||||
ema_change = (pl.col('_ema_60') / pl.col('_ema_60').shift(1) - 1) * 100
|
||||
|
||||
# 计算活跃度因子
|
||||
activity_factor = (ema_change * 57.3 / 10).arctan()
|
||||
|
||||
return stock_df.with_columns(activity_factor.alias('act_factor4'))
|
||||
|
||||
|
||||
class ActivityFactor5and6Operator(StockWiseOperator):
|
||||
"""活跃度因子5和6算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="act_factor_5_6",
|
||||
description="活跃度因子5和6",
|
||||
required_columns=['act_factor1', 'act_factor2'],
|
||||
output_columns=['act_factor5', 'act_factor6'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算活跃度因子5和6"""
|
||||
# 计算因子5
|
||||
factor5 = pl.col('act_factor1') + pl.col('act_factor2') + pl.col('act_factor3') + pl.col('act_factor4')
|
||||
|
||||
# 计算因子6
|
||||
numerator = pl.col('act_factor1') - pl.col('act_factor2')
|
||||
denominator = (pl.col('act_factor1').pow(2) + pl.col('act_factor2').pow(2)).sqrt()
|
||||
factor6 = numerator / (denominator + 1e-8) # 避免除零
|
||||
|
||||
return stock_df.with_columns([
|
||||
factor5.alias('act_factor5'),
|
||||
factor6.alias('act_factor6')
|
||||
])
|
||||
|
||||
|
||||
class Alpha003Operator(StockWiseOperator):
|
||||
"""Alpha003算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="alpha_003",
|
||||
description="Alpha003因子",
|
||||
required_columns=['open', 'close', 'high', 'low'],
|
||||
output_columns=['alpha_003'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算Alpha003"""
|
||||
# 计算因子
|
||||
alpha_003 = pl.when(pl.col('high') != pl.col('low')) \
|
||||
.then((pl.col('close') - pl.col('open')) / (pl.col('high') - pl.col('low'))) \
|
||||
.otherwise(0)
|
||||
|
||||
return stock_df.with_columns(alpha_003.alias('alpha_003'))
|
||||
|
||||
|
||||
class Alpha007Operator(StockWiseOperator):
|
||||
"""Alpha007算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="alpha_007",
|
||||
description="Alpha007因子",
|
||||
required_columns=['close', 'vol'],
|
||||
output_columns=['alpha_007'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算Alpha007"""
|
||||
# 计算5日相关性
|
||||
corr_5 = pl.col('close').rolling_corr(pl.col('vol'), window=5)
|
||||
|
||||
return stock_df.with_columns(corr_5.alias('alpha_007'))
|
||||
|
||||
|
||||
class Alpha013Operator(StockWiseOperator):
|
||||
"""Alpha013算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="alpha_013",
|
||||
description="Alpha013因子",
|
||||
required_columns=['close'],
|
||||
output_columns=['alpha_013'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算Alpha013"""
|
||||
# 计算5日和20日和
|
||||
sum_5 = pl.col('close').rolling_sum(window=5)
|
||||
sum_20 = pl.col('close').rolling_sum(window=20)
|
||||
|
||||
# 计算因子
|
||||
alpha_013 = sum_5 - sum_20
|
||||
|
||||
return stock_df.with_columns(alpha_013.alias('alpha_013'))
|
||||
|
||||
|
||||
class Alpha022Operator(StockWiseOperator):
|
||||
"""Alpha022算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="alpha_022",
|
||||
description="Alpha022改进因子",
|
||||
required_columns=['high', 'low', 'close', 'vol'],
|
||||
output_columns=['alpha_22_improved'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算Alpha022改进因子"""
|
||||
# 计算滚动协方差
|
||||
cov_5 = pl.col('high').rolling_cov(pl.col('vol'), window=5)
|
||||
|
||||
# 计算协方差差分
|
||||
delta_cov = cov_5.diff(5)
|
||||
|
||||
# 计算收盘价标准差
|
||||
std_close = pl.col('close').rolling_std(window=20)
|
||||
|
||||
# 计算标准差排名 (简化版)
|
||||
rank_std = std_close
|
||||
|
||||
# 计算最终因子
|
||||
alpha_22 = -1 * delta_cov * rank_std
|
||||
|
||||
return stock_df.with_columns(alpha_22.alias('alpha_22_improved'))
|
||||
|
||||
|
||||
class BBIRatioOperator(StockWiseOperator):
|
||||
"""BBI比率算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="bbi_ratio",
|
||||
description="BBI比率因子",
|
||||
required_columns=['close'],
|
||||
output_columns=['bbi_ratio_factor'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算BBI比率"""
|
||||
# 计算不同周期的SMA
|
||||
sma3 = pl.col('close').rolling_mean(window=3)
|
||||
sma6 = pl.col('close').rolling_mean(window=6)
|
||||
sma12 = pl.col('close').rolling_mean(window=12)
|
||||
sma24 = pl.col('close').rolling_mean(window=24)
|
||||
|
||||
# 计算BBI
|
||||
bbi = (sma3 + sma6 + sma12 + sma24) / 4
|
||||
|
||||
# 计算比率
|
||||
bbi_ratio = bbi / pl.col('close')
|
||||
|
||||
return stock_df.with_columns(bbi_ratio.alias('bbi_ratio_factor'))
|
||||
|
||||
|
||||
# 技术指标因子集合
|
||||
TECHNICAL_OPERATORS = [
|
||||
ATROperator(14),
|
||||
ATROperator(6),
|
||||
OBVOperator(),
|
||||
OBVMAOperator(6),
|
||||
RSIOperator(3),
|
||||
EMAOperator(5),
|
||||
EMAOperator(13),
|
||||
EMAOperator(20),
|
||||
EMAOperator(60),
|
||||
ReturnOperator(5),
|
||||
ReturnOperator(20),
|
||||
ActivityFactor5Operator(),
|
||||
ActivityFactor13Operator(),
|
||||
ActivityFactor20Operator(),
|
||||
ActivityFactor60Operator(),
|
||||
ActivityFactor5and6Operator(),
|
||||
Alpha003Operator(),
|
||||
Alpha007Operator(),
|
||||
Alpha013Operator(),
|
||||
Alpha022Operator(),
|
||||
BBIRatioOperator(),
|
||||
]
|
||||
|
||||
|
||||
def apply_technical_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||||
"""
|
||||
应用所有技术指标因子
|
||||
|
||||
Args:
|
||||
df: 输入的Polars DataFrame
|
||||
operators: 要应用的算子列表,如果为None则使用默认列表
|
||||
|
||||
Returns:
|
||||
添加了技术指标因子的DataFrame
|
||||
"""
|
||||
if operators is None:
|
||||
operators = TECHNICAL_OPERATORS
|
||||
|
||||
result_df = df
|
||||
for operator in operators:
|
||||
result_df = operator(result_df)
|
||||
|
||||
return result_df
|
||||
419
main/factor/polars_volatility_factors.py
Normal file
419
main/factor/polars_volatility_factors.py
Normal file
@@ -0,0 +1,419 @@
|
||||
"""
|
||||
波动率因子 - 使用Polars实现
|
||||
包含上行波动率、下行波动率、波动率比率等相关因子计算
|
||||
"""
|
||||
|
||||
import polars as pl
|
||||
import numpy as np
|
||||
from typing import Dict, List, Optional, Any
|
||||
from operator_framework import StockWiseOperator, OperatorConfig
|
||||
|
||||
|
||||
class UpsideVolatilityOperator(StockWiseOperator):
|
||||
"""上行波动率算子"""
|
||||
|
||||
def __init__(self, window: int = 20):
|
||||
config = OperatorConfig(
|
||||
name=f"upside_volatility_{window}",
|
||||
description=f"{window}日上行波动率",
|
||||
required_columns=['pct_chg'],
|
||||
output_columns=[f'upside_volatility_{window}'],
|
||||
parameters={'window': window}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.window = window
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算上行波动率"""
|
||||
# 分离正收益率
|
||||
pos_returns = pl.when(pl.col('pct_chg') > 0).then(pl.col('pct_chg')).otherwise(0)
|
||||
|
||||
# 计算正收益率的平方
|
||||
pos_returns_sq = pos_returns.pow(2)
|
||||
|
||||
# 计算滚动和
|
||||
rolling_pos_count = (pl.col('pct_chg') > 0).rolling_sum(window=self.window)
|
||||
rolling_pos_sum = pos_returns.rolling_sum(window=self.window)
|
||||
rolling_pos_sum_sq = pos_returns_sq.rolling_sum(window=self.window)
|
||||
|
||||
# 计算方差和标准差
|
||||
pos_mean_sq = rolling_pos_sum_sq / rolling_pos_count
|
||||
pos_mean = rolling_pos_sum / rolling_pos_count
|
||||
pos_var = pos_mean_sq - pos_mean.pow(2)
|
||||
|
||||
# 处理样本数不足的情况
|
||||
pos_var = pl.when(rolling_pos_count >= 2).then(pos_var).otherwise(None)
|
||||
pos_var = pos_var.clip(lower=0)
|
||||
|
||||
upside_vol = pos_var.sqrt()
|
||||
|
||||
return stock_df.with_columns(upside_vol.alias(f'upside_volatility_{self.window}'))
|
||||
|
||||
|
||||
class DownsideVolatilityOperator(StockWiseOperator):
|
||||
"""下行波动率算子"""
|
||||
|
||||
def __init__(self, window: int = 20):
|
||||
config = OperatorConfig(
|
||||
name=f"downside_volatility_{window}",
|
||||
description=f"{window}日下行波动率",
|
||||
required_columns=['pct_chg'],
|
||||
output_columns=[f'downside_volatility_{window}'],
|
||||
parameters={'window': window}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.window = window
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算下行波动率"""
|
||||
# 分离负收益率
|
||||
neg_returns = pl.when(pl.col('pct_chg') < 0).then(pl.col('pct_chg')).otherwise(0)
|
||||
|
||||
# 计算负收益率的平方
|
||||
neg_returns_sq = neg_returns.pow(2)
|
||||
|
||||
# 计算滚动和
|
||||
rolling_neg_count = (pl.col('pct_chg') < 0).rolling_sum(window=self.window)
|
||||
rolling_neg_sum = neg_returns.rolling_sum(window=self.window)
|
||||
rolling_neg_sum_sq = neg_returns_sq.rolling_sum(window=self.window)
|
||||
|
||||
# 计算方差和标准差
|
||||
neg_mean_sq = rolling_neg_sum_sq / rolling_neg_count
|
||||
neg_mean = rolling_neg_sum / rolling_neg_count
|
||||
neg_var = neg_mean_sq - neg_mean.pow(2)
|
||||
|
||||
# 处理样本数不足的情况
|
||||
neg_var = pl.when(rolling_neg_count >= 2).then(neg_var).otherwise(None)
|
||||
neg_var = neg_var.clip(lower=0)
|
||||
|
||||
downside_vol = neg_var.sqrt()
|
||||
|
||||
return stock_df.with_columns(downside_vol.alias(f'downside_volatility_{self.window}'))
|
||||
|
||||
|
||||
class VolatilityRatioOperator(StockWiseOperator):
|
||||
"""波动率比率算子"""
|
||||
|
||||
def __init__(self, window: int = 20):
|
||||
config = OperatorConfig(
|
||||
name=f"volatility_ratio_{window}",
|
||||
description=f"{window}日波动率比率",
|
||||
required_columns=['pct_chg'],
|
||||
output_columns=[f'volatility_ratio_{window}'],
|
||||
parameters={'window': window}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.window = window
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算波动率比率"""
|
||||
# 计算上行和下行波动率
|
||||
pos_returns = pl.when(pl.col('pct_chg') > 0).then(pl.col('pct_chg')).otherwise(0)
|
||||
neg_returns = pl.when(pl.col('pct_chg') < 0).then(pl.col('pct_chg')).otherwise(0)
|
||||
|
||||
# 计算滚动标准差
|
||||
upside_vol = pos_returns.rolling_std(window=self.window)
|
||||
downside_vol = neg_returns.rolling_std(window=self.window)
|
||||
|
||||
# 计算比率
|
||||
vol_ratio = upside_vol / downside_vol
|
||||
|
||||
# 处理无穷大和NaN值
|
||||
vol_ratio = vol_ratio.replace([np.inf, -np.inf], None).fill_null(0)
|
||||
|
||||
return stock_df.with_columns(vol_ratio.alias(f'volatility_ratio_{self.window}'))
|
||||
|
||||
|
||||
class ReturnSkewnessOperator(StockWiseOperator):
|
||||
"""收益率偏度算子"""
|
||||
|
||||
def __init__(self, window: int = 5):
|
||||
config = OperatorConfig(
|
||||
name=f"return_skewness_{window}",
|
||||
description=f"{window}日收益率偏度",
|
||||
required_columns=['pct_chg'],
|
||||
output_columns=[f'return_skewness_{window}'],
|
||||
parameters={'window': window}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.window = window
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算收益率偏度"""
|
||||
skewness = pl.col('pct_chg').rolling_skew(window=self.window)
|
||||
|
||||
return stock_df.with_columns(skewness.alias(f'return_skewness_{self.window}'))
|
||||
|
||||
|
||||
class ReturnKurtosisOperator(StockWiseOperator):
|
||||
"""收益率峰度算子"""
|
||||
|
||||
def __init__(self, window: int = 5):
|
||||
config = OperatorConfig(
|
||||
name=f"return_kurtosis_{window}",
|
||||
description=f"{window}日收益率峰度",
|
||||
required_columns=['pct_chg'],
|
||||
output_columns=[f'return_kurtosis_{window}'],
|
||||
parameters={'window': window}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.window = window
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算收益率峰度"""
|
||||
kurtosis = pl.col('pct_chg').rolling_kurt(window=self.window)
|
||||
|
||||
return stock_df.with_columns(kurtosis.alias(f'return_kurtosis_{self.window}'))
|
||||
|
||||
|
||||
class VolatilityAmplificationOperator(StockWiseOperator):
|
||||
"""亏损状态波动率放大算子"""
|
||||
|
||||
def __init__(self, n: int = 20):
|
||||
config = OperatorConfig(
|
||||
name=f"vol_amp_loss_{n}",
|
||||
description=f"{n}日亏损状态波动率放大",
|
||||
required_columns=['pct_chg', 'weight_avg', 'close'],
|
||||
output_columns=[f'vol_amp_loss_{n}'],
|
||||
parameters={'n': n}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.n = n
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算亏损状态波动率放大"""
|
||||
# 计算n日波动率
|
||||
vol_n = pl.col('pct_chg').rolling_std(window=self.n)
|
||||
|
||||
# 计算亏损程度
|
||||
loss_degree = pl.max_horizontal(0, pl.col('weight_avg') - pl.col('close')) / pl.col('close')
|
||||
|
||||
# 计算放大因子
|
||||
vol_amp = vol_n * loss_degree
|
||||
|
||||
return stock_df.with_columns(vol_amp.alias(f'vol_amp_loss_{self.n}'))
|
||||
|
||||
|
||||
class HighVolDropWhenProfitableOperator(StockWiseOperator):
|
||||
"""高成交量下跌当获利状态算子"""
|
||||
|
||||
def __init__(self, n: int = 20, m: int = 5, profit_thresh: float = 0.1,
|
||||
drop_thresh: float = -0.03, vol_multiple: float = 2.0):
|
||||
config = OperatorConfig(
|
||||
name=f"vol_drop_profit_cnt_{m}",
|
||||
description=f"{m}日高成交量下跌当获利状态计数",
|
||||
required_columns=['close', 'pct_chg', 'vol', 'weight_avg'],
|
||||
output_columns=[f'vol_drop_profit_cnt_{m}'],
|
||||
parameters={'n': n, 'm': m, 'profit_thresh': profit_thresh,
|
||||
'drop_thresh': drop_thresh, 'vol_multiple': vol_multiple}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.n = n
|
||||
self.m = m
|
||||
self.profit_thresh = profit_thresh
|
||||
self.drop_thresh = drop_thresh
|
||||
self.vol_multiple = vol_multiple
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算高成交量下跌当获利状态计数"""
|
||||
# 判断是否获利
|
||||
is_profitable = pl.col('close') > pl.col('weight_avg') * (1 + self.profit_thresh)
|
||||
|
||||
# 判断是否下跌
|
||||
is_dropping = pl.col('pct_chg') < self.drop_thresh
|
||||
|
||||
# 计算滚动均值和标准差
|
||||
rolling_mean_vol = pl.col('vol').rolling_mean(window=self.n)
|
||||
rolling_std_vol = pl.col('vol').rolling_std(window=self.n).fill_null(0)
|
||||
|
||||
# 判断是否高成交量
|
||||
is_high_vol = pl.col('vol') > (rolling_mean_vol + self.vol_multiple * rolling_std_vol)
|
||||
|
||||
# 计算事件
|
||||
event = is_profitable & is_dropping & is_high_vol
|
||||
|
||||
# 计算m日累计
|
||||
event_cnt = event.cast(int).rolling_sum(window=self.m)
|
||||
|
||||
return stock_df.with_columns(event_cnt.alias(f'vol_drop_profit_cnt_{self.m}'))
|
||||
|
||||
|
||||
class LargeFlowVolatilityInteractionOperator(StockWiseOperator):
|
||||
"""大单资金流驱动波动率交互算子"""
|
||||
|
||||
def __init__(self, n: int = 20):
|
||||
config = OperatorConfig(
|
||||
name=f"lg_flow_vol_interact_{n}",
|
||||
description=f"{n}日大单资金流驱动波动率交互",
|
||||
required_columns=['pct_chg', 'buy_lg_vol', 'buy_elg_vol', 'sell_lg_vol',
|
||||
'sell_elg_vol', 'vol', 'close'],
|
||||
output_columns=[f'lg_flow_vol_interact_{n}'],
|
||||
parameters={'n': n}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.n = n
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算大单资金流驱动波动率交互"""
|
||||
epsilon = 1e-8
|
||||
|
||||
# 计算n日波动率
|
||||
vol_n = pl.col('pct_chg').rolling_std(window=self.n)
|
||||
|
||||
# 计算大单净额
|
||||
net_lg_flow_val = (
|
||||
(pl.col('buy_lg_vol') + pl.col('buy_elg_vol') -
|
||||
pl.col('sell_lg_vol') - pl.col('sell_elg_vol')) * pl.col('close')
|
||||
)
|
||||
|
||||
# 计算总成交额
|
||||
total_val = pl.col('vol') * pl.col('close')
|
||||
|
||||
# 计算大单净流入比率绝对值
|
||||
abs_net_lg_flow_ratio = net_lg_flow_val.abs() / (total_val + epsilon)
|
||||
|
||||
# 计算n日均值
|
||||
abs_ratio_n = abs_net_lg_flow_ratio.rolling_mean(window=self.n)
|
||||
|
||||
# 计算交互项
|
||||
interaction = vol_n * abs_ratio_n
|
||||
|
||||
return stock_df.with_columns(interaction.alias(f'lg_flow_vol_interact_{self.n}'))
|
||||
|
||||
|
||||
class VolatilityAdjustedROCPOperator(StockWiseOperator):
|
||||
"""波动率调整收益率算子"""
|
||||
|
||||
def __init__(self, n: int = 20):
|
||||
config = OperatorConfig(
|
||||
name=f"vol_adj_roc_{n}",
|
||||
description=f"{n}日波动率调整收益率",
|
||||
required_columns=['close', 'pct_chg'],
|
||||
output_columns=[f'vol_adj_roc_{n}'],
|
||||
parameters={'n': n}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.n = n
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算波动率调整收益率"""
|
||||
# 计算n日收益率
|
||||
roc_n = pl.col('close').pct_change(self.n)
|
||||
|
||||
# 计算n日波动率
|
||||
vol_n = pl.col('pct_chg').rolling_std(window=self.n).fill_null(0)
|
||||
|
||||
# 计算波动率调整收益率
|
||||
vol_adj_roc = roc_n / (vol_n + 1e-10) # 避免除零
|
||||
|
||||
return stock_df.with_columns(vol_adj_roc.alias(f'vol_adj_roc_{self.n}'))
|
||||
|
||||
|
||||
class StandardDeviation5Operator(StockWiseOperator):
|
||||
"""5日收益率标准差算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="std_return_5",
|
||||
description="5日收益率标准差",
|
||||
required_columns=['close'],
|
||||
output_columns=['std_return_5'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算5日收益率标准差"""
|
||||
# 计算收益率
|
||||
returns = pl.col('close').pct_change()
|
||||
|
||||
# 计算5日标准差
|
||||
std_5 = returns.rolling_std(window=5)
|
||||
|
||||
return stock_df.with_columns(std_5.alias('std_return_5'))
|
||||
|
||||
|
||||
class StandardDeviation90Operator(StockWiseOperator):
|
||||
"""90日收益率标准差算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="std_return_90",
|
||||
description="90日收益率标准差",
|
||||
required_columns=['close'],
|
||||
output_columns=['std_return_90'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算90日收益率标准差"""
|
||||
# 计算收益率
|
||||
returns = pl.col('close').pct_change()
|
||||
|
||||
# 计算90日标准差
|
||||
std_90 = returns.rolling_std(window=90)
|
||||
|
||||
return stock_df.with_columns(std_90.alias('std_return_90'))
|
||||
|
||||
|
||||
class StandardDeviation90ShiftedOperator(StockWiseOperator):
|
||||
"""90日收益率标准差(移位)算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="std_return_90_2",
|
||||
description="90日收益率标准差(移位10日)",
|
||||
required_columns=['close'],
|
||||
output_columns=['std_return_90_2'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算90日收益率标准差(移位10日)"""
|
||||
# 计算收益率(移位10日)
|
||||
returns = pl.col('close').shift(10).pct_change()
|
||||
|
||||
# 计算90日标准差
|
||||
std_90_2 = returns.rolling_std(window=90)
|
||||
|
||||
return stock_df.with_columns(std_90_2.alias('std_return_90_2'))
|
||||
|
||||
|
||||
# 波动率因子集合
|
||||
VOLATILITY_OPERATORS = [
|
||||
UpsideVolatilityOperator(),
|
||||
DownsideVolatilityOperator(),
|
||||
VolatilityRatioOperator(),
|
||||
ReturnSkewnessOperator(),
|
||||
ReturnKurtosisOperator(),
|
||||
VolatilityAmplificationOperator(),
|
||||
HighVolDropWhenProfitableOperator(),
|
||||
LargeFlowVolatilityInteractionOperator(),
|
||||
VolatilityAdjustedROCPOperator(),
|
||||
StandardDeviation5Operator(),
|
||||
StandardDeviation90Operator(),
|
||||
StandardDeviation90ShiftedOperator(),
|
||||
]
|
||||
|
||||
|
||||
def apply_volatility_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||||
"""
|
||||
应用所有波动率因子
|
||||
|
||||
Args:
|
||||
df: 输入的Polars DataFrame
|
||||
operators: 要应用的算子列表,如果为None则使用默认列表
|
||||
|
||||
Returns:
|
||||
添加了波动率因子的DataFrame
|
||||
"""
|
||||
if operators is None:
|
||||
operators = VOLATILITY_OPERATORS
|
||||
|
||||
result_df = df
|
||||
for operator in operators:
|
||||
result_df = operator(result_df)
|
||||
|
||||
return result_df
|
||||
480
main/factor/polars_volume_factors.py
Normal file
480
main/factor/polars_volume_factors.py
Normal file
@@ -0,0 +1,480 @@
|
||||
"""
|
||||
成交量因子 - 使用Polars实现
|
||||
包含成交量变化率、突破信号、换手率等相关因子计算
|
||||
"""
|
||||
|
||||
import polars as pl
|
||||
import numpy as np
|
||||
from typing import Dict, List, Optional, Any
|
||||
from operator_framework import StockWiseOperator, OperatorConfig
|
||||
|
||||
|
||||
class VolumeChangeRateOperator(StockWiseOperator):
|
||||
"""成交量变化率算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="volume_change_rate",
|
||||
description="短期成交量变化率",
|
||||
required_columns=['vol'],
|
||||
output_columns=['volume_change_rate'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算成交量变化率"""
|
||||
# 计算2日均量
|
||||
vol_mean_2 = pl.col('vol').rolling_mean(window=2)
|
||||
|
||||
# 计算10日均量
|
||||
vol_mean_10 = pl.col('vol').rolling_mean(window=10)
|
||||
|
||||
# 计算变化率
|
||||
change_rate = (vol_mean_2 / vol_mean_10) - 1
|
||||
|
||||
return stock_df.with_columns(change_rate.alias('volume_change_rate'))
|
||||
|
||||
|
||||
class VolumeBreakoutOperator(StockWiseOperator):
|
||||
"""成交量突破算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="volume_breakout",
|
||||
description="成交量突破信号",
|
||||
required_columns=['vol'],
|
||||
output_columns=['cat_volume_breakout'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算成交量突破信号"""
|
||||
# 计算5日最大成交量
|
||||
max_vol_5 = pl.col('vol').rolling_max(window=5)
|
||||
|
||||
# 判断是否突破
|
||||
breakout = pl.col('vol') > max_vol_5
|
||||
|
||||
return stock_df.with_columns(breakout.alias('cat_volume_breakout'))
|
||||
|
||||
|
||||
class TurnoverDeviationOperator(StockWiseOperator):
|
||||
"""换手率偏离度算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="turnover_deviation",
|
||||
description="换手率均线偏离度",
|
||||
required_columns=['turnover_rate'],
|
||||
output_columns=['turnover_deviation'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算换手率均线偏离度"""
|
||||
# 计算3日均值
|
||||
mean_turnover = pl.col('turnover_rate').rolling_mean(window=3)
|
||||
|
||||
# 计算3日标准差
|
||||
std_turnover = pl.col('turnover_rate').rolling_std(window=3)
|
||||
|
||||
# 计算偏离度
|
||||
deviation = (pl.col('turnover_rate') - mean_turnover) / std_turnover
|
||||
|
||||
return stock_df.with_columns(deviation.alias('turnover_deviation'))
|
||||
|
||||
|
||||
class TurnoverSpikeOperator(StockWiseOperator):
|
||||
"""换手率激增算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="turnover_spike",
|
||||
description="换手率激增信号",
|
||||
required_columns=['turnover_rate'],
|
||||
output_columns=['cat_turnover_spike'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算换手率激增信号"""
|
||||
# 计算3日均值
|
||||
mean_turnover = pl.col('turnover_rate').rolling_mean(window=3)
|
||||
|
||||
# 计算3日标准差
|
||||
std_turnover = pl.col('turnover_rate').rolling_std(window=3)
|
||||
|
||||
# 判断是否激增 (超过均值+2倍标准差)
|
||||
spike = pl.col('turnover_rate') > (mean_turnover + 2 * std_turnover)
|
||||
|
||||
return stock_df.with_columns(spike.alias('cat_turnover_spike'))
|
||||
|
||||
|
||||
class VolumeRatioAverageOperator(StockWiseOperator):
|
||||
"""量比均值算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="volume_ratio_average",
|
||||
description="量比均值",
|
||||
required_columns=['volume_ratio'],
|
||||
output_columns=['avg_volume_ratio'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算量比均值"""
|
||||
# 计算3日均值
|
||||
avg_ratio = pl.col('volume_ratio').rolling_mean(window=3)
|
||||
|
||||
return stock_df.with_columns(avg_ratio.alias('avg_volume_ratio'))
|
||||
|
||||
|
||||
class VolumeRatioBreakoutOperator(StockWiseOperator):
|
||||
"""量比突破算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="volume_ratio_breakout",
|
||||
description="量比突破信号",
|
||||
required_columns=['volume_ratio'],
|
||||
output_columns=['cat_volume_ratio_breakout'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算量比突破信号"""
|
||||
# 计算5日最大量比
|
||||
max_ratio_5 = pl.col('volume_ratio').rolling_max(window=5)
|
||||
|
||||
# 判断是否突破
|
||||
breakout = pl.col('volume_ratio') > max_ratio_5
|
||||
|
||||
return stock_df.with_columns(breakout.alias('cat_volume_ratio_breakout'))
|
||||
|
||||
|
||||
class VolumeSpikeOperator(StockWiseOperator):
|
||||
"""成交量激增算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="volume_spike",
|
||||
description="成交量激增",
|
||||
required_columns=['vol'],
|
||||
output_columns=['vol_spike'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算成交量激增"""
|
||||
# 计算20日均量
|
||||
vol_mean_20 = pl.col('vol').rolling_mean(window=20)
|
||||
|
||||
return stock_df.with_columns(vol_mean_20.alias('vol_spike'))
|
||||
|
||||
|
||||
class VolumeStd5Operator(StockWiseOperator):
|
||||
"""5日成交量标准差算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="volume_std_5",
|
||||
description="5日成交量标准差",
|
||||
required_columns=['vol'],
|
||||
output_columns=['vol_std_5'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算5日成交量标准差"""
|
||||
# 计算成交量变化率
|
||||
vol_pct_change = pl.col('vol').pct_change()
|
||||
|
||||
# 计算5日标准差
|
||||
std_5 = vol_pct_change.rolling_std(window=5)
|
||||
|
||||
return stock_df.with_columns(std_5.alias('vol_std_5'))
|
||||
|
||||
|
||||
class TurnoverRateMeanOperator(StockWiseOperator):
|
||||
"""换手率均值算子"""
|
||||
|
||||
def __init__(self, n: int):
|
||||
config = OperatorConfig(
|
||||
name=f"turnover_rate_mean_{n}",
|
||||
description=f"{n}日换手率均值",
|
||||
required_columns=['turnover_rate'],
|
||||
output_columns=[f'turnover_rate_mean_{n}'],
|
||||
parameters={'n': n}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.n = n
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算n日换手率均值"""
|
||||
# 计算n日均值
|
||||
mean_rate = pl.col('turnover_rate').rolling_mean(window=self.n)
|
||||
|
||||
return stock_df.with_columns(mean_rate.alias(f'turnover_rate_mean_{self.n}'))
|
||||
|
||||
|
||||
class VolumeSpikeCategoryOperator(StockWiseOperator):
|
||||
"""成交量激增分类算子"""
|
||||
|
||||
def __init__(self):
|
||||
config = OperatorConfig(
|
||||
name="volume_spike_category",
|
||||
description="成交量激增分类",
|
||||
required_columns=['vol', 'vol_spike'],
|
||||
output_columns=['cat_vol_spike'],
|
||||
parameters={}
|
||||
)
|
||||
super().__init__(config)
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算成交量激增分类"""
|
||||
# 判断是否激增 (超过2倍均值)
|
||||
spike = pl.col('vol') > (2 * pl.col('vol_spike'))
|
||||
|
||||
return stock_df.with_columns(spike.alias('cat_vol_spike'))
|
||||
|
||||
|
||||
class TurnoverVolatilityOperator(StockWiseOperator):
|
||||
"""换手率波动率算子"""
|
||||
|
||||
def __init__(self, window: int = 20):
|
||||
config = OperatorConfig(
|
||||
name=f"turnover_volatility_{window}",
|
||||
description=f"{window}日换手率波动率",
|
||||
required_columns=['turnover_rate'],
|
||||
output_columns=[f'turnover_std_{window}'],
|
||||
parameters={'window': window}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.window = window
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算换手率波动率"""
|
||||
# 计算滚动标准差
|
||||
turnover_std = pl.col('turnover_rate').rolling_std(window=self.window)
|
||||
|
||||
return stock_df.with_columns(turnover_std.alias(f'turnover_std_{self.window}'))
|
||||
|
||||
|
||||
class VolumeCovarianceOperator(StockWiseOperator):
|
||||
"""成交量协方差算子"""
|
||||
|
||||
def __init__(self, window: int = 5):
|
||||
config = OperatorConfig(
|
||||
name=f"volume_covariance_{window}",
|
||||
description=f"{window}日成交量协方差",
|
||||
required_columns=['high', 'vol'],
|
||||
output_columns=[f'cov_{window}'],
|
||||
parameters={'window': window}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.window = window
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算成交量协方差"""
|
||||
# 计算滚动协方差
|
||||
def calculate_cov(group_df):
|
||||
return group_df.select(
|
||||
pl.col('high').rolling_cov(pl.col('vol'), window=self.window)
|
||||
)
|
||||
|
||||
cov_result = calculate_cov(stock_df)
|
||||
|
||||
return stock_df.with_columns(cov_result[f'cov_{self.window}'].alias(f'cov_{self.window}'))
|
||||
|
||||
|
||||
class VolumeCovarianceDeltaOperator(StockWiseOperator):
|
||||
"""成交量协方差变化算子"""
|
||||
|
||||
def __init__(self, period: int = 5):
|
||||
config = OperatorConfig(
|
||||
name=f"volume_covariance_delta_{period}",
|
||||
description=f"{period}日成交量协方差变化",
|
||||
required_columns=['cov_5'],
|
||||
output_columns=[f'delta_cov_{period}'],
|
||||
parameters={'period': period}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.period = period
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算成交量协方差变化"""
|
||||
# 计算差分
|
||||
delta = pl.col('cov_5').diff(self.period)
|
||||
|
||||
return stock_df.with_columns(delta.alias(f'delta_cov_{self.period}'))
|
||||
|
||||
|
||||
class TurnoverRateAccelerationOperator(StockWiseOperator):
|
||||
"""换手率加速度算子"""
|
||||
|
||||
def __init__(self, short_window: int = 5, long_window: int = 20):
|
||||
config = OperatorConfig(
|
||||
name=f"turnover_acceleration_{short_window}_{long_window}",
|
||||
description=f"{short_window}日对{long_window}日换手率加速度",
|
||||
required_columns=['turnover_rate'],
|
||||
output_columns=[f'turnover_rate_acceleration_{short_window}_{long_window}'],
|
||||
parameters={'short_window': short_window, 'long_window': long_window}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.short_window = short_window
|
||||
self.long_window = long_window
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算换手率加速度"""
|
||||
# 计算短期均值
|
||||
short_avg = pl.col('turnover_rate').rolling_mean(window=self.short_window)
|
||||
|
||||
# 计算长期均值
|
||||
long_avg = pl.col('turnover_rate').rolling_mean(window=self.long_window)
|
||||
|
||||
# 计算加速度
|
||||
acceleration = short_avg - long_avg
|
||||
|
||||
return stock_df.with_columns(
|
||||
acceleration.alias(f'turnover_rate_acceleration_{self.short_window}_{self.long_window}')
|
||||
)
|
||||
|
||||
|
||||
class VolumeSustainabilityOperator(StockWiseOperator):
|
||||
"""成交量持续性算子"""
|
||||
|
||||
def __init__(self, short_window: int = 10, long_window: int = 30):
|
||||
config = OperatorConfig(
|
||||
name=f"volume_sustain_{short_window}_{long_window}",
|
||||
description=f"{short_window}日成交量大于{long_window}日均值占比",
|
||||
required_columns=['vol'],
|
||||
output_columns=[f'vol_sustain_{short_window}_{long_window}'],
|
||||
parameters={'short_window': short_window, 'long_window': long_window}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.short_window = short_window
|
||||
self.long_window = long_window
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算成交量持续性"""
|
||||
# 计算长期均值
|
||||
long_avg = pl.col('vol').rolling_mean(window=self.long_window)
|
||||
|
||||
# 判断是否大于长期均值
|
||||
above_avg = pl.col('vol') > long_avg
|
||||
|
||||
# 计算短期占比
|
||||
sustain_ratio = above_avg.cast(int).rolling_mean(window=self.short_window)
|
||||
|
||||
return stock_df.with_columns(
|
||||
sustain_ratio.alias(f'vol_sustain_{self.short_window}_{self.long_window}')
|
||||
)
|
||||
|
||||
|
||||
class TurnoverRelativeStrengthOperator(StockWiseOperator):
|
||||
"""换手率相对强度算子"""
|
||||
|
||||
def __init__(self, window: int = 20):
|
||||
config = OperatorConfig(
|
||||
name=f"turnover_relative_strength_{window}",
|
||||
description=f"{window}日换手率相对强度",
|
||||
required_columns=['turnover_rate'],
|
||||
output_columns=[f'turnover_relative_strength_{window}'],
|
||||
parameters={'window': window}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.window = window
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算换手率相对强度"""
|
||||
# 计算长期均值
|
||||
long_avg = pl.col('turnover_rate').rolling_mean(window=self.window)
|
||||
|
||||
# 计算相对强度
|
||||
relative_strength = pl.col('turnover_rate') / long_avg
|
||||
|
||||
return stock_df.with_columns(
|
||||
relative_strength.alias(f'turnover_relative_strength_{self.window}')
|
||||
)
|
||||
|
||||
|
||||
class AmountOutlierOperator(StockWiseOperator):
|
||||
"""成交额异常值算子"""
|
||||
|
||||
def __init__(self, window: int = 10):
|
||||
config = OperatorConfig(
|
||||
name=f"amount_outlier_{window}",
|
||||
description=f"{window}日成交额异常值",
|
||||
required_columns=['amount'],
|
||||
output_columns=[f'amount_outlier_{window}'],
|
||||
parameters={'window': window}
|
||||
)
|
||||
super().__init__(config)
|
||||
self.window = window
|
||||
|
||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||
"""计算成交额异常值"""
|
||||
# 计算均值
|
||||
avg_amount = pl.col('amount').rolling_mean(window=self.window)
|
||||
|
||||
# 计算差值
|
||||
amount_diff = pl.col('amount') - avg_amount
|
||||
|
||||
# 计算Z-score (简化版,实际使用时可能需要横截面标准化)
|
||||
mean_diff = amount_diff.rolling_mean(window=self.window)
|
||||
std_diff = amount_diff.rolling_std(window=self.window)
|
||||
|
||||
# 计算异常值分数
|
||||
outlier_score = (amount_diff - mean_diff) / (std_diff + 1e-8)
|
||||
|
||||
return stock_df.with_columns(outlier_score.alias(f'amount_outlier_{self.window}'))
|
||||
|
||||
|
||||
# 成交量因子集合
|
||||
VOLUME_OPERATORS = [
|
||||
VolumeChangeRateOperator(),
|
||||
VolumeBreakoutOperator(),
|
||||
TurnoverDeviationOperator(),
|
||||
TurnoverSpikeOperator(),
|
||||
VolumeRatioAverageOperator(),
|
||||
VolumeRatioBreakoutOperator(),
|
||||
VolumeSpikeOperator(),
|
||||
VolumeStd5Operator(),
|
||||
TurnoverRateMeanOperator(20),
|
||||
VolumeSpikeCategoryOperator(),
|
||||
TurnoverVolatilityOperator(),
|
||||
TurnoverRateAccelerationOperator(),
|
||||
VolumeSustainabilityOperator(),
|
||||
TurnoverRelativeStrengthOperator(),
|
||||
AmountOutlierOperator(),
|
||||
]
|
||||
|
||||
|
||||
def apply_volume_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||||
"""
|
||||
应用所有成交量因子
|
||||
|
||||
Args:
|
||||
df: 输入的Polars DataFrame
|
||||
operators: 要应用的算子列表,如果为None则使用默认列表
|
||||
|
||||
Returns:
|
||||
添加了成交量因子的DataFrame
|
||||
"""
|
||||
if operators is None:
|
||||
operators = VOLUME_OPERATORS
|
||||
|
||||
result_df = df
|
||||
for operator in operators:
|
||||
result_df = operator(result_df)
|
||||
|
||||
return result_df
|
||||
2860
main/train/Classify/Classify2.ipynb
Normal file
2860
main/train/Classify/Classify2.ipynb
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user