factor优化(暂存版)
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -18,3 +18,4 @@ model
|
|||||||
|
|
||||||
!.gitignore
|
!.gitignore
|
||||||
!.git
|
!.git
|
||||||
|
!.env
|
||||||
@@ -33,7 +33,7 @@ def holder_trade_factors(all_data_df: pd.DataFrame,
|
|||||||
# 或者如果 'in_de' 已经是 1 和 -1 (或类似数值),则可以跳过映射,但要确保类型正确
|
# 或者如果 'in_de' 已经是 1 和 -1 (或类似数值),则可以跳过映射,但要确保类型正确
|
||||||
stk_trade_processed_df['_direction'] = stk_trade_processed_df['in_de'].map(in_de_map)
|
stk_trade_processed_df['_direction'] = stk_trade_processed_df['in_de'].map(in_de_map)
|
||||||
# 如果 _direction 列在映射后可能产生NaN (因为in_de中有未覆盖的值),需要处理
|
# 如果 _direction 列在映射后可能产生NaN (因为in_de中有未覆盖的值),需要处理
|
||||||
if stk_trade_processed_df['_direction'].isnull().any():
|
if stk_trade_processed_df['_direction'].is_null().any():
|
||||||
print("警告: 'in_de' 列中存在未映射的值,可能导致 _direction 列出现NaN。")
|
print("警告: 'in_de' 列中存在未映射的值,可能导致 _direction 列出现NaN。")
|
||||||
# 可以选择填充NaN,例如用0填充,或者移除这些行
|
# 可以选择填充NaN,例如用0填充,或者移除这些行
|
||||||
# stk_trade_processed_df['_direction'].fillna(0, inplace=True)
|
# stk_trade_processed_df['_direction'].fillna(0, inplace=True)
|
||||||
@@ -109,4 +109,3 @@ def holder_trade_factors(all_data_df: pd.DataFrame,
|
|||||||
|
|
||||||
print("股东增减持因子计算完成。")
|
print("股东增减持因子计算完成。")
|
||||||
return df_merged
|
return df_merged
|
||||||
|
|
||||||
|
|||||||
@@ -1,196 +0,0 @@
|
|||||||
"""
|
|
||||||
因子算子基础框架 - 简化版本
|
|
||||||
提供股票截面和日期截面两个基础函数
|
|
||||||
"""
|
|
||||||
|
|
||||||
import polars as pl
|
|
||||||
from typing import Callable, Any, Optional, Union
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def apply_stockwise(
|
|
||||||
df: pl.DataFrame,
|
|
||||||
operator_func: Callable[[pl.DataFrame, Any], pl.DataFrame],
|
|
||||||
*args,
|
|
||||||
**kwargs
|
|
||||||
) -> pl.DataFrame:
|
|
||||||
"""
|
|
||||||
在股票截面上应用算子函数
|
|
||||||
|
|
||||||
Args:
|
|
||||||
df: 输入的polars DataFrame,必须包含ts_code和trade_date列
|
|
||||||
operator_func: 算子函数,接收单个股票的数据和参数,返回处理后的DataFrame
|
|
||||||
*args, **kwargs: 传递给算子函数的额外参数
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
处理后的完整DataFrame
|
|
||||||
"""
|
|
||||||
# 验证必需列
|
|
||||||
required_cols = ['ts_code', 'trade_date']
|
|
||||||
missing_cols = [col for col in required_cols if col not in df.columns]
|
|
||||||
if missing_cols:
|
|
||||||
raise ValueError(f"缺少必需列: {missing_cols}")
|
|
||||||
|
|
||||||
# 获取股票列表
|
|
||||||
stock_list = df['ts_code'].unique().to_list()
|
|
||||||
results = []
|
|
||||||
|
|
||||||
# 按股票分组处理
|
|
||||||
for ts_code in stock_list:
|
|
||||||
try:
|
|
||||||
# 获取单个股票的数据并按日期排序
|
|
||||||
stock_df = df.filter(pl.col('ts_code') == ts_code).sort('trade_date')
|
|
||||||
|
|
||||||
# 应用算子函数
|
|
||||||
result_df = operator_func(stock_df, *args, **kwargs)
|
|
||||||
results.append(result_df)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"股票 {ts_code} 处理失败: {e}")
|
|
||||||
# 失败时返回原始数据
|
|
||||||
stock_df = df.filter(pl.col('ts_code') == ts_code).sort('trade_date')
|
|
||||||
results.append(stock_df)
|
|
||||||
|
|
||||||
# 合并结果并排序
|
|
||||||
if results:
|
|
||||||
return pl.concat(results).sort(['ts_code', 'trade_date'])
|
|
||||||
else:
|
|
||||||
return df
|
|
||||||
|
|
||||||
|
|
||||||
def apply_datewise(
|
|
||||||
df: pl.DataFrame,
|
|
||||||
operator_func: Callable[[pl.DataFrame, Any], pl.DataFrame],
|
|
||||||
*args,
|
|
||||||
**kwargs
|
|
||||||
) -> pl.DataFrame:
|
|
||||||
"""
|
|
||||||
在日期截面上应用算子函数
|
|
||||||
|
|
||||||
Args:
|
|
||||||
df: 输入的polars DataFrame,必须包含ts_code和trade_date列
|
|
||||||
operator_func: 算子函数,接收单个日期的数据和参数,返回处理后的DataFrame
|
|
||||||
*args, **kwargs: 传递给算子函数的额外参数
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
处理后的完整DataFrame
|
|
||||||
"""
|
|
||||||
# 验证必需列
|
|
||||||
required_cols = ['ts_code', 'trade_date']
|
|
||||||
missing_cols = [col for col in required_cols if col not in df.columns]
|
|
||||||
if missing_cols:
|
|
||||||
raise ValueError(f"缺少必需列: {missing_cols}")
|
|
||||||
|
|
||||||
# 获取日期列表
|
|
||||||
date_list = df['trade_date'].unique().to_list()
|
|
||||||
results = []
|
|
||||||
|
|
||||||
# 按日期分组处理
|
|
||||||
for trade_date in date_list:
|
|
||||||
try:
|
|
||||||
# 获取单个日期的数据
|
|
||||||
date_df = df.filter(pl.col('trade_date') == trade_date)
|
|
||||||
|
|
||||||
# 应用算子函数
|
|
||||||
result_df = operator_func(date_df, *args, **kwargs)
|
|
||||||
results.append(result_df)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"日期 {trade_date} 处理失败: {e}")
|
|
||||||
# 失败时返回原始数据
|
|
||||||
date_df = df.filter(pl.col('trade_date') == trade_date)
|
|
||||||
results.append(date_df)
|
|
||||||
|
|
||||||
# 合并结果并排序
|
|
||||||
if results:
|
|
||||||
return pl.concat(results).sort(['ts_code', 'trade_date'])
|
|
||||||
else:
|
|
||||||
return df
|
|
||||||
|
|
||||||
|
|
||||||
# 常用算子函数示例
|
|
||||||
def rolling_mean_operator(df: pl.DataFrame, column: str, window: int, output_col: str = None) -> pl.DataFrame:
|
|
||||||
"""
|
|
||||||
滚动均值算子 - 股票截面
|
|
||||||
|
|
||||||
Args:
|
|
||||||
df: 单个股票的数据
|
|
||||||
column: 要计算均值的列
|
|
||||||
window: 窗口大小
|
|
||||||
output_col: 输出列名,默认为f'{column}_mean_{window}'
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
添加均值列的DataFrame
|
|
||||||
"""
|
|
||||||
if output_col is None:
|
|
||||||
output_col = f'{column}_mean_{window}'
|
|
||||||
|
|
||||||
return df.with_columns(
|
|
||||||
pl.col(column).rolling_mean(window_size=window).alias(output_col)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def rolling_std_operator(df: pl.DataFrame, column: str, window: int, output_col: str = None) -> pl.DataFrame:
|
|
||||||
"""
|
|
||||||
滚动标准差算子 - 股票截面
|
|
||||||
|
|
||||||
Args:
|
|
||||||
df: 单个股票的数据
|
|
||||||
column: 要计算标准差的列
|
|
||||||
window: 窗口大小
|
|
||||||
output_col: 输出列名,默认为f'{column}_std_{window}'
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
添加标准差列的DataFrame
|
|
||||||
"""
|
|
||||||
if output_col is None:
|
|
||||||
output_col = f'{column}_std_{window}'
|
|
||||||
|
|
||||||
return df.with_columns(
|
|
||||||
pl.col(column).rolling_std(window_size=window).alias(output_col)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def rank_operator(df: pl.DataFrame, column: str, ascending: bool = True, output_col: str = None) -> pl.DataFrame:
|
|
||||||
"""
|
|
||||||
排名算子 - 日期截面
|
|
||||||
|
|
||||||
Args:
|
|
||||||
df: 单个日期的数据
|
|
||||||
column: 要排名的列
|
|
||||||
ascending: 是否升序
|
|
||||||
output_col: 输出列名,默认为f'{column}_rank'
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
添加排名列的DataFrame
|
|
||||||
"""
|
|
||||||
if output_col is None:
|
|
||||||
output_col = f'{column}_rank'
|
|
||||||
|
|
||||||
return df.with_columns(
|
|
||||||
pl.col(column).rank(method='dense', descending=not ascending).alias(output_col)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def pct_change_operator(df: pl.DataFrame, column: str, periods: int = 1, output_col: str = None) -> pl.DataFrame:
|
|
||||||
"""
|
|
||||||
百分比变化算子 - 股票截面
|
|
||||||
|
|
||||||
Args:
|
|
||||||
df: 单个股票的数据
|
|
||||||
column: 要计算变化的列
|
|
||||||
periods: 期数
|
|
||||||
output_col: 输出列名,默认为f'{column}_pct_change_{periods}'
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
添加变化率列的DataFrame
|
|
||||||
"""
|
|
||||||
if output_col is None:
|
|
||||||
output_col = f'{column}_pct_change_{periods}'
|
|
||||||
|
|
||||||
return df.with_columns(
|
|
||||||
((pl.col(column) / pl.col(column).shift(periods)) - 1).alias(output_col)
|
|
||||||
)
|
|
||||||
@@ -1,18 +1,14 @@
|
|||||||
"""
|
"""
|
||||||
因子算子框架 - 使用Polars实现统一的因子计算
|
因子算子框架 - Polars 实现
|
||||||
避免数据泄露,支持切面计算
|
支持:截面滚动 → 拼回长表 → 按列名合并
|
||||||
|
返回形式可选:完整 DataFrame(默认)或单列 Series
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import polars as pl
|
|
||||||
import numpy as np
|
|
||||||
from typing import Dict, List, Callable, Optional, Union, Any
|
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
import logging
|
from typing import List, Literal
|
||||||
|
|
||||||
# 配置日志
|
import polars as pl
|
||||||
logging.basicConfig(level=logging.INFO)
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -22,38 +18,7 @@ class OperatorConfig:
|
|||||||
description: str
|
description: str
|
||||||
required_columns: List[str]
|
required_columns: List[str]
|
||||||
output_columns: List[str]
|
output_columns: List[str]
|
||||||
parameters: Dict[str, Any]
|
parameters: dict
|
||||||
|
|
||||||
|
|
||||||
class DataSlice:
|
|
||||||
"""数据切面基类"""
|
|
||||||
|
|
||||||
def __init__(self, df: pl.DataFrame):
|
|
||||||
self.df = df
|
|
||||||
self.validate_data()
|
|
||||||
|
|
||||||
def validate_data(self):
|
|
||||||
"""验证数据格式"""
|
|
||||||
required_cols = ['ts_code', 'trade_date']
|
|
||||||
missing_cols = [col for col in required_cols if col not in self.df.columns]
|
|
||||||
if missing_cols:
|
|
||||||
raise ValueError(f"缺少必需列: {missing_cols}")
|
|
||||||
|
|
||||||
def get_stock_slice(self, ts_code: str) -> pl.DataFrame:
|
|
||||||
"""获取单个股票的数据切面"""
|
|
||||||
return self.df.filter(pl.col('ts_code') == ts_code).sort('trade_date')
|
|
||||||
|
|
||||||
def get_date_slice(self, trade_date: str) -> pl.DataFrame:
|
|
||||||
"""获取单个日期的数据切面"""
|
|
||||||
return self.df.filter(pl.col('trade_date') == trade_date)
|
|
||||||
|
|
||||||
def get_stock_list(self) -> List[str]:
|
|
||||||
"""获取股票列表"""
|
|
||||||
return self.df['ts_code'].unique().to_list()
|
|
||||||
|
|
||||||
def get_date_list(self) -> List[str]:
|
|
||||||
"""获取日期列表"""
|
|
||||||
return self.df['trade_date'].unique().to_list()
|
|
||||||
|
|
||||||
|
|
||||||
class BaseOperator(ABC):
|
class BaseOperator(ABC):
|
||||||
@@ -65,186 +30,95 @@ class BaseOperator(ABC):
|
|||||||
self.required_columns = config.required_columns
|
self.required_columns = config.required_columns
|
||||||
self.output_columns = config.output_columns
|
self.output_columns = config.output_columns
|
||||||
|
|
||||||
def validate_input(self, df: pl.DataFrame) -> bool:
|
# ---------- 子类必须实现 ----------
|
||||||
"""验证输入数据"""
|
|
||||||
missing_cols = [col for col in self.required_columns if col not in df.columns]
|
|
||||||
if missing_cols:
|
|
||||||
logger.warning(f"算子 {self.name} 缺少必需列: {missing_cols}")
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def apply(self, df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
def get_factor_name(self) -> str:
|
||||||
"""应用算子"""
|
"""返回因子列名(用于合并)"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def __call__(self, df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
@abstractmethod
|
||||||
"""调用算子"""
|
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
|
||||||
|
"""
|
||||||
|
真正的截面计算逻辑。
|
||||||
|
参数:按 ts_code 或 trade_date 分组后的子表
|
||||||
|
返回:与 group_df 行数一一对应的因子 Series(含正确索引)
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
# ---------- 公共接口 ----------
|
||||||
|
def apply(self,
|
||||||
|
df: pl.DataFrame,
|
||||||
|
return_type: Literal['df', 'series'] = 'df',
|
||||||
|
**kwargs) -> pl.DataFrame | pl.Series:
|
||||||
|
"""入口:截面滚动 → 拼回长表 → 合并/返回"""
|
||||||
if not self.validate_input(df):
|
if not self.validate_input(df):
|
||||||
# 返回原始数据,添加NaN列
|
raise ValueError(f"缺少必需列:{self.required_columns}")
|
||||||
for col in self.output_columns:
|
|
||||||
df = df.with_columns(pl.lit(None).alias(col))
|
|
||||||
return df
|
|
||||||
|
|
||||||
try:
|
long_table = self._sectional_roll(df, **kwargs) # ① 滚动
|
||||||
return self.apply(df, **kwargs)
|
merged = self._merge_factor(df, long_table) # ② 合并
|
||||||
except Exception as e:
|
return merged if return_type == 'df' else merged[self.get_factor_name()]
|
||||||
logger.error(f"算子 {self.name} 应用失败: {e}")
|
|
||||||
# 返回原始数据,添加NaN列
|
# ---------- 内部流程 ----------
|
||||||
for col in self.output_columns:
|
def validate_input(self, df: pl.DataFrame) -> bool:
|
||||||
df = df.with_columns(pl.lit(None).alias(col))
|
return all(col in df.columns for col in self.required_columns)
|
||||||
return df
|
|
||||||
|
@abstractmethod
|
||||||
|
def _sectional_roll(self, df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||||
|
"""
|
||||||
|
截面滚动模板:group → calc_factor → 拼回长表
|
||||||
|
返回:含【trade_date, ts_code, factor】的长表
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _merge_factor(self, original: pl.DataFrame, factor_table: pl.DataFrame) -> pl.DataFrame:
|
||||||
|
"""按 [ts_code, trade_date] 左联,原地追加因子列"""
|
||||||
|
factor_name = self.get_factor_name()
|
||||||
|
return original.join(factor_table.select(['ts_code', 'trade_date', factor_name]),
|
||||||
|
on=['ts_code', 'trade_date'],
|
||||||
|
how='left')
|
||||||
|
|
||||||
|
|
||||||
|
# -------------------- 股票截面:按 ts_code 分组 --------------------
|
||||||
class StockWiseOperator(BaseOperator):
|
class StockWiseOperator(BaseOperator):
|
||||||
"""股票切面算子 - 按股票分组计算"""
|
"""股票切面算子抽象类:按 ts_code 分组,对每个股票的时间序列计算因子"""
|
||||||
|
|
||||||
def apply(self, df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
def _sectional_roll(self, df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||||
"""按股票分组应用算子"""
|
factor_name = self.get_factor_name()
|
||||||
stock_list = df['ts_code'].unique().to_list()
|
|
||||||
results = []
|
|
||||||
|
|
||||||
for ts_code in stock_list:
|
# 确保排序(时间顺序对 shift 等操作至关重要)
|
||||||
stock_df = df.filter(pl.col('ts_code') == ts_code).sort('trade_date')
|
df_sorted = df.sort(['ts_code', 'trade_date'])
|
||||||
try:
|
|
||||||
result_df = self.apply_stock(stock_df, **kwargs)
|
|
||||||
results.append(result_df)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"股票 {ts_code} 算子应用失败: {e}")
|
|
||||||
# 为失败的股票添加NaN列
|
|
||||||
for col in self.output_columns:
|
|
||||||
stock_df = stock_df.with_columns(pl.lit(None).alias(col))
|
|
||||||
results.append(stock_df)
|
|
||||||
|
|
||||||
return pl.concat(results).sort(['ts_code', 'trade_date'])
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
|
||||||
"""应用到单个股票数据"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
# 使用 map_groups:对每个 ts_code 分组,传入完整子 DataFrame
|
||||||
|
result = (
|
||||||
|
df_sorted
|
||||||
|
.group_by('ts_code', maintain_order=True)
|
||||||
|
.map_groups(
|
||||||
|
lambda group_df: group_df.with_columns(
|
||||||
|
self.calc_factor(group_df, **kwargs)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.select(['ts_code', 'trade_date', factor_name])
|
||||||
|
)
|
||||||
|
return result
|
||||||
|
|
||||||
|
# -------------------- 日期截面:按 trade_date 分组 --------------------
|
||||||
class DateWiseOperator(BaseOperator):
|
class DateWiseOperator(BaseOperator):
|
||||||
"""日期切面算子 - 按日期分组计算"""
|
"""日期切面算子抽象类:按 trade_date 分组,对每个截面计算因子"""
|
||||||
|
|
||||||
def apply(self, df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
def _sectional_roll(self, df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
||||||
"""按日期分组应用算子"""
|
factor_name = self.get_factor_name()
|
||||||
date_list = df['trade_date'].unique().to_list()
|
|
||||||
results = []
|
|
||||||
|
|
||||||
for trade_date in date_list:
|
df_sorted = df.sort(['trade_date', 'ts_code'])
|
||||||
date_df = df.filter(pl.col('trade_date') == trade_date)
|
|
||||||
try:
|
|
||||||
result_df = self.apply_date(date_df, **kwargs)
|
|
||||||
results.append(result_df)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"日期 {trade_date} 算子应用失败: {e}")
|
|
||||||
# 为失败的日期添加NaN列
|
|
||||||
for col in self.output_columns:
|
|
||||||
date_df = date_df.with_columns(pl.lit(None).alias(col))
|
|
||||||
results.append(date_df)
|
|
||||||
|
|
||||||
return pl.concat(results).sort(['ts_code', 'trade_date'])
|
result = (
|
||||||
|
df_sorted
|
||||||
@abstractmethod
|
.group_by('trade_date', maintain_order=True)
|
||||||
def apply_date(self, date_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
.map_groups(
|
||||||
"""应用到单个日期数据"""
|
lambda group_df: group_df.with_columns(
|
||||||
pass
|
self.calc_factor(group_df, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
class RollingOperator(StockWiseOperator):
|
|
||||||
"""滚动窗口算子基类"""
|
|
||||||
|
|
||||||
def __init__(self, config: OperatorConfig, window: int, min_periods: Optional[int] = None):
|
|
||||||
super().__init__(config)
|
|
||||||
self.window = window
|
|
||||||
self.min_periods = min_periods or max(1, window // 2)
|
|
||||||
|
|
||||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
|
||||||
"""应用滚动窗口计算"""
|
|
||||||
return self.apply_rolling(stock_df, **kwargs)
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def apply_rolling(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
|
||||||
"""滚动窗口计算逻辑"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
# 基础算子实现
|
|
||||||
class ReturnOperator(RollingOperator):
|
|
||||||
"""收益率算子"""
|
|
||||||
|
|
||||||
def __init__(self, periods: int = 1):
|
|
||||||
config = OperatorConfig(
|
|
||||||
name=f"return_{periods}",
|
|
||||||
description=f"{periods}期收益率",
|
|
||||||
required_columns=['close'],
|
|
||||||
output_columns=[f'return_{periods}'],
|
|
||||||
parameters={'periods': periods}
|
|
||||||
)
|
)
|
||||||
super().__init__(config, window=periods + 1)
|
|
||||||
self.periods = periods
|
|
||||||
|
|
||||||
def apply_rolling(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
|
||||||
"""计算收益率"""
|
|
||||||
return stock_df.with_columns(
|
|
||||||
(pl.col('close') / pl.col('close').shift(self.periods) - 1).alias(f'return_{self.periods}')
|
|
||||||
)
|
)
|
||||||
|
.select(['ts_code', 'trade_date', factor_name])
|
||||||
|
|
||||||
class VolatilityOperator(RollingOperator):
|
|
||||||
"""波动率算子"""
|
|
||||||
|
|
||||||
def __init__(self, window: int = 20):
|
|
||||||
config = OperatorConfig(
|
|
||||||
name=f"volatility_{window}",
|
|
||||||
description=f"{window}日波动率",
|
|
||||||
required_columns=['pct_chg'],
|
|
||||||
output_columns=[f'volatility_{window}'],
|
|
||||||
parameters={'window': window}
|
|
||||||
)
|
)
|
||||||
super().__init__(config, window=window)
|
return result
|
||||||
|
|
||||||
def apply_rolling(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
|
||||||
"""计算波动率"""
|
|
||||||
return stock_df.with_columns(
|
|
||||||
pl.col('pct_chg').rolling_std(window=self.window).alias(f'volatility_{self.window}')
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class MeanOperator(RollingOperator):
|
|
||||||
"""均值算子"""
|
|
||||||
|
|
||||||
def __init__(self, column: str, window: int):
|
|
||||||
config = OperatorConfig(
|
|
||||||
name=f"mean_{column}_{window}",
|
|
||||||
description=f"{column}的{window}日均值",
|
|
||||||
required_columns=[column],
|
|
||||||
output_columns=[f'mean_{column}_{window}'],
|
|
||||||
parameters={'column': column, 'window': window}
|
|
||||||
)
|
|
||||||
super().__init__(config, window=window)
|
|
||||||
self.column = column
|
|
||||||
|
|
||||||
def apply_rolling(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
|
||||||
"""计算均值"""
|
|
||||||
return stock_df.with_columns(
|
|
||||||
pl.col(self.column).rolling_mean(window=self.window).alias(f'mean_{self.column}_{self.window}')
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class RankOperator(DateWiseOperator):
|
|
||||||
"""排名算子"""
|
|
||||||
|
|
||||||
def __init__(self, column: str, ascending: bool = True):
|
|
||||||
config = OperatorConfig(
|
|
||||||
name=f"rank_{column}",
|
|
||||||
description=f"{column}的排名",
|
|
||||||
required_columns=[column],
|
|
||||||
output_columns=[f'rank_{column}'],
|
|
||||||
parameters={'column': column, 'ascending': ascending}
|
|
||||||
)
|
|
||||||
super().__init__(config)
|
|
||||||
self.column = column
|
|
||||||
self.ascending = ascending
|
|
||||||
|
|
||||||
@@ -6,7 +6,9 @@
|
|||||||
import polars as pl
|
import polars as pl
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from typing import Dict, List, Optional, Any
|
from typing import Dict, List, Optional, Any
|
||||||
from operator_framework import StockWiseOperator, OperatorConfig
|
|
||||||
|
from tqdm import tqdm
|
||||||
|
from main.factor.operator_framework import StockWiseOperator, OperatorConfig
|
||||||
from scipy.stats import linregress
|
from scipy.stats import linregress
|
||||||
|
|
||||||
|
|
||||||
@@ -14,6 +16,8 @@ class PriceMinusDeductionPriceOperator(StockWiseOperator):
|
|||||||
"""价格减抵扣价算子"""
|
"""价格减抵扣价算子"""
|
||||||
|
|
||||||
def __init__(self, n: int = 10):
|
def __init__(self, n: int = 10):
|
||||||
|
if n <= 0:
|
||||||
|
raise ValueError("n must be positive")
|
||||||
config = OperatorConfig(
|
config = OperatorConfig(
|
||||||
name=f"price_minus_deduction_price_{n}",
|
name=f"price_minus_deduction_price_{n}",
|
||||||
description=f"{n}日价格减抵扣价",
|
description=f"{n}日价格减抵扣价",
|
||||||
@@ -24,21 +28,22 @@ class PriceMinusDeductionPriceOperator(StockWiseOperator):
|
|||||||
super().__init__(config)
|
super().__init__(config)
|
||||||
self.n = n
|
self.n = n
|
||||||
|
|
||||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
def get_factor_name(self) -> str:
|
||||||
"""计算价格减抵扣价"""
|
return f'price_minus_deduction_price_{self.n}'
|
||||||
# 抵扣价是n-1周期前的价格
|
|
||||||
deduction_price = pl.col('close').shift(self.n - 1)
|
|
||||||
|
|
||||||
# 计算差值
|
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
|
||||||
price_diff = pl.col('close') - deduction_price
|
# 抵扣价是 n 日前的价格(更合理),若坚持 n-1 则保留
|
||||||
|
deduction_price = group_df['close'].shift(self.n) # 建议用 n,不是 n-1
|
||||||
return stock_df.with_columns(price_diff.alias(f'price_minus_deduction_price_{self.n}'))
|
price_diff = group_df['close'] - deduction_price
|
||||||
|
return price_diff.alias(self.get_factor_name())
|
||||||
|
|
||||||
|
|
||||||
class PriceDeductionPriceDiffRatioToSMAOperator(StockWiseOperator):
|
class PriceDeductionPriceDiffRatioToSMAOperator(StockWiseOperator):
|
||||||
"""价格抵扣价差值相对SMA比率算子"""
|
"""价格抵扣价差值相对SMA比率算子"""
|
||||||
|
|
||||||
def __init__(self, n: int = 10):
|
def __init__(self, n: int = 10):
|
||||||
|
if n <= 0:
|
||||||
|
raise ValueError("n must be positive")
|
||||||
config = OperatorConfig(
|
config = OperatorConfig(
|
||||||
name=f"price_deduction_price_diff_ratio_to_sma_{n}",
|
name=f"price_deduction_price_diff_ratio_to_sma_{n}",
|
||||||
description=f"{n}日价格抵扣价差值相对SMA比率",
|
description=f"{n}日价格抵扣价差值相对SMA比率",
|
||||||
@@ -49,27 +54,23 @@ class PriceDeductionPriceDiffRatioToSMAOperator(StockWiseOperator):
|
|||||||
super().__init__(config)
|
super().__init__(config)
|
||||||
self.n = n
|
self.n = n
|
||||||
|
|
||||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
def get_factor_name(self) -> str:
|
||||||
"""计算价格抵扣价差值相对SMA比率"""
|
return f'price_deduction_price_diff_ratio_to_sma_{self.n}'
|
||||||
# 计算n日SMA
|
|
||||||
sma = pl.col('close').rolling_mean(window=self.n)
|
|
||||||
|
|
||||||
# 抵扣价
|
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
|
||||||
deduction_price = pl.col('close').shift(self.n - 1)
|
sma = group_df['close'].rolling_mean(window_size=self.n)
|
||||||
|
deduction_price = group_df['close'].shift(self.n)
|
||||||
# 计算差值
|
diff = group_df['close'] - deduction_price
|
||||||
diff = pl.col('close') - deduction_price
|
|
||||||
|
|
||||||
# 计算比率 (处理除零)
|
|
||||||
ratio = diff / (sma + 1e-8)
|
ratio = diff / (sma + 1e-8)
|
||||||
|
return ratio.alias(self.get_factor_name())
|
||||||
return stock_df.with_columns(ratio.alias(f'price_deduction_price_diff_ratio_to_sma_{self.n}'))
|
|
||||||
|
|
||||||
|
|
||||||
class CatPriceVsSmaVsDeductionPriceOperator(StockWiseOperator):
|
class CatPriceVsSmaVsDeductionPriceOperator(StockWiseOperator):
|
||||||
"""价格vsSMAvs抵扣价分类算子"""
|
"""价格vsSMAvs抵扣价分类算子"""
|
||||||
|
|
||||||
def __init__(self, n: int = 10):
|
def __init__(self, n: int = 10):
|
||||||
|
if n <= 0:
|
||||||
|
raise ValueError("n must be positive")
|
||||||
config = OperatorConfig(
|
config = OperatorConfig(
|
||||||
name=f"cat_price_vs_sma_vs_deduction_price_{n}",
|
name=f"cat_price_vs_sma_vs_deduction_price_{n}",
|
||||||
description=f"{n}日价格vsSMAvs抵扣价分类",
|
description=f"{n}日价格vsSMAvs抵扣价分类",
|
||||||
@@ -80,40 +81,35 @@ class CatPriceVsSmaVsDeductionPriceOperator(StockWiseOperator):
|
|||||||
super().__init__(config)
|
super().__init__(config)
|
||||||
self.n = n
|
self.n = n
|
||||||
|
|
||||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
def get_factor_name(self) -> str:
|
||||||
"""计算价格vsSMAvs抵扣价分类"""
|
return f'cat_price_vs_sma_vs_deduction_price_{self.n}'
|
||||||
# 计算n日SMA
|
|
||||||
sma = pl.col('close').rolling_mean(window=self.n)
|
|
||||||
|
|
||||||
# 抵扣价
|
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
|
||||||
deduction_price = pl.col('close').shift(self.n - 1)
|
sma = group_df['close'].rolling_mean(window_size=self.n)
|
||||||
|
deduction_price = group_df['close'].shift(self.n)
|
||||||
|
|
||||||
# 定义条件
|
cond1 = (group_df['close'] > sma) & (deduction_price > sma)
|
||||||
conditions = [
|
cond2 = (group_df['close'] < sma) & (deduction_price < sma)
|
||||||
# 1: 当前价 > SMA 且 抵扣价 > SMA
|
cond3 = (group_df['close'] > sma) & (deduction_price <= sma)
|
||||||
(pl.col('close') > sma) & (deduction_price > sma),
|
cond4 = (group_df['close'] <= sma) & (deduction_price > sma)
|
||||||
# 2: 当前价 < SMA 且 抵扣价 < SMA
|
|
||||||
(pl.col('close') < sma) & (deduction_price < sma),
|
|
||||||
# 3: 当前价 > SMA 且 抵扣价 <= SMA
|
|
||||||
(pl.col('close') > sma) & (deduction_price <= sma),
|
|
||||||
# 4: 当前价 <= SMA 且 抵扣价 > SMA
|
|
||||||
(pl.col('close') <= sma) & (deduction_price > sma),
|
|
||||||
]
|
|
||||||
|
|
||||||
choices = [1, 2, 3, 4]
|
classification = (
|
||||||
|
pl.when(cond1).then(1)
|
||||||
# 使用select函数进行分类
|
.when(cond2).then(2)
|
||||||
classification = pl.select(conditions=conditions, choices=choices, default=0)
|
.when(cond3).then(3)
|
||||||
|
.when(cond4).then(4)
|
||||||
return stock_df.with_columns(
|
.otherwise(0)
|
||||||
classification.alias(f'cat_price_vs_sma_vs_deduction_price_{self.n}')
|
|
||||||
)
|
)
|
||||||
|
return classification.alias(self.get_factor_name())
|
||||||
|
|
||||||
|
|
||||||
|
# ✅ 修复:使用 rolling_map
|
||||||
class VolatilitySlopeOperator(StockWiseOperator):
|
class VolatilitySlopeOperator(StockWiseOperator):
|
||||||
"""波动率斜率算子"""
|
"""波动率斜率算子"""
|
||||||
|
|
||||||
def __init__(self, long_window: int = 20, short_window: int = 5):
|
def __init__(self, long_window: int = 20, short_window: int = 5):
|
||||||
|
if long_window <= 0 or short_window <= 0:
|
||||||
|
raise ValueError("Windows must be positive")
|
||||||
config = OperatorConfig(
|
config = OperatorConfig(
|
||||||
name=f"volatility_slope_{long_window}_{short_window}",
|
name=f"volatility_slope_{long_window}_{short_window}",
|
||||||
description=f"{long_window}日波动率{short_window}日斜率",
|
description=f"{long_window}日波动率{short_window}日斜率",
|
||||||
@@ -125,34 +121,40 @@ class VolatilitySlopeOperator(StockWiseOperator):
|
|||||||
self.long_window = long_window
|
self.long_window = long_window
|
||||||
self.short_window = short_window
|
self.short_window = short_window
|
||||||
|
|
||||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
def get_factor_name(self) -> str:
|
||||||
"""计算波动率斜率"""
|
return f'volatility_slope_{self.long_window}_{self.short_window}'
|
||||||
# 计算长期波动率
|
|
||||||
long_vol = pl.col('pct_chg').rolling_std(window=self.long_window)
|
|
||||||
|
|
||||||
# 计算斜率函数
|
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
|
||||||
def calculate_slope(series):
|
# 先计算长期波动率(标准差)
|
||||||
if len(series) < 2:
|
long_vol = group_df['pct_chg'].rolling_std(window_size=self.long_window)
|
||||||
return 0
|
|
||||||
x = np.arange(len(series))
|
|
||||||
slope, _, _, _, _ = linregress(x, series)
|
|
||||||
return slope
|
|
||||||
|
|
||||||
# 计算斜率
|
# 定义斜率函数(输入是 numpy array)
|
||||||
volatility_slope = long_vol.rolling_apply(
|
def slope_func(window_vals: np.ndarray) -> float:
|
||||||
function=calculate_slope,
|
if len(window_vals) < 2 or pl.Series(window_vals).is_null().any():
|
||||||
window_size=self.short_window
|
return 0.0
|
||||||
)
|
x = np.arange(len(window_vals))
|
||||||
|
try:
|
||||||
return stock_df.with_columns(
|
slope, _, _, _, _ = linregress(x, window_vals)
|
||||||
volatility_slope.alias(f'volatility_slope_{self.long_window}_{self.short_window}')
|
return slope if np.isfinite(slope) else 0.0
|
||||||
|
except:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
# 对波动率序列应用 rolling_map
|
||||||
|
volatility_slope = long_vol.rolling_map(
|
||||||
|
function=slope_func,
|
||||||
|
window_size=self.short_window,
|
||||||
|
min_periods=2 # 至少2点才能算斜率
|
||||||
)
|
)
|
||||||
|
return volatility_slope.alias(self.get_factor_name())
|
||||||
|
|
||||||
|
|
||||||
|
# ✅ 修复:使用 rolling_map
|
||||||
class TurnoverRateTrendStrengthOperator(StockWiseOperator):
|
class TurnoverRateTrendStrengthOperator(StockWiseOperator):
|
||||||
"""换手率趋势强度算子"""
|
"""换手率趋势强度算子"""
|
||||||
|
|
||||||
def __init__(self, window: int = 5):
|
def __init__(self, window: int = 5):
|
||||||
|
if window <= 0:
|
||||||
|
raise ValueError("Window must be positive")
|
||||||
config = OperatorConfig(
|
config = OperatorConfig(
|
||||||
name=f"turnover_trend_strength_{window}",
|
name=f"turnover_trend_strength_{window}",
|
||||||
description=f"{window}日换手率趋势强度",
|
description=f"{window}日换手率趋势强度",
|
||||||
@@ -163,31 +165,34 @@ class TurnoverRateTrendStrengthOperator(StockWiseOperator):
|
|||||||
super().__init__(config)
|
super().__init__(config)
|
||||||
self.window = window
|
self.window = window
|
||||||
|
|
||||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
def get_factor_name(self) -> str:
|
||||||
"""计算换手率趋势强度"""
|
return f'turnover_trend_strength_{self.window}'
|
||||||
# 计算斜率函数
|
|
||||||
def calculate_slope(series):
|
|
||||||
if len(series) < 2:
|
|
||||||
return 0
|
|
||||||
x = np.arange(len(series))
|
|
||||||
slope, _, _, _, _ = linregress(x, series)
|
|
||||||
return slope
|
|
||||||
|
|
||||||
# 计算换手率斜率
|
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
|
||||||
trend_strength = pl.col('turnover_rate').rolling_apply(
|
def slope_func(window_vals: np.ndarray) -> float:
|
||||||
function=calculate_slope,
|
if len(window_vals) < 2 or pl.Series(window_vals).is_null().any():
|
||||||
window_size=self.window
|
return 0.0
|
||||||
)
|
x = np.arange(len(window_vals))
|
||||||
|
try:
|
||||||
|
slope, _, _, _, _ = linregress(x, window_vals)
|
||||||
|
return slope if np.isfinite(slope) else 0.0
|
||||||
|
except:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
return stock_df.with_columns(
|
trend_strength = group_df['turnover_rate'].rolling_map(
|
||||||
trend_strength.alias(f'turnover_trend_strength_{self.window}')
|
function=slope_func,
|
||||||
|
window_size=self.window,
|
||||||
|
min_periods=2
|
||||||
)
|
)
|
||||||
|
return trend_strength.alias(self.get_factor_name())
|
||||||
|
|
||||||
|
|
||||||
class FreeFloatTurnoverSurgeOperator(StockWiseOperator):
|
class FreeFloatTurnoverSurgeOperator(StockWiseOperator):
|
||||||
"""自由流通股换手率激增算子"""
|
"""自由流通股换手率激增算子"""
|
||||||
|
|
||||||
def __init__(self, window: int = 10):
|
def __init__(self, window: int = 10):
|
||||||
|
if window <= 0:
|
||||||
|
raise ValueError("Window must be positive")
|
||||||
config = OperatorConfig(
|
config = OperatorConfig(
|
||||||
name=f"ff_turnover_surge_{window}",
|
name=f"ff_turnover_surge_{window}",
|
||||||
description=f"{window}日自由流通股换手率激增",
|
description=f"{window}日自由流通股换手率激增",
|
||||||
@@ -198,21 +203,21 @@ class FreeFloatTurnoverSurgeOperator(StockWiseOperator):
|
|||||||
super().__init__(config)
|
super().__init__(config)
|
||||||
self.window = window
|
self.window = window
|
||||||
|
|
||||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
def get_factor_name(self) -> str:
|
||||||
"""计算自由流通股换手率激增"""
|
return f'ff_turnover_surge_{self.window}'
|
||||||
# 计算均值
|
|
||||||
avg_turnover = pl.col('turnover_rate').rolling_mean(window=self.window)
|
|
||||||
|
|
||||||
# 计算激增比率
|
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
|
||||||
surge_ratio = pl.col('turnover_rate') / (avg_turnover + 1e-8)
|
avg_turnover = group_df['turnover_rate'].rolling_mean(window_size=self.window)
|
||||||
|
surge_ratio = group_df['turnover_rate'] / (avg_turnover + 1e-8)
|
||||||
return stock_df.with_columns(surge_ratio.alias(f'ff_turnover_surge_{self.window}'))
|
return surge_ratio.alias(self.get_factor_name())
|
||||||
|
|
||||||
|
|
||||||
class PriceVolumeTrendCoherenceOperator(StockWiseOperator):
|
class PriceVolumeTrendCoherenceOperator(StockWiseOperator):
|
||||||
"""价量趋势一致性算子"""
|
"""价量趋势一致性算子"""
|
||||||
|
|
||||||
def __init__(self, price_window: int = 5, volume_window: int = 20):
|
def __init__(self, price_window: int = 5, volume_window: int = 20):
|
||||||
|
if price_window <= 0 or volume_window <= 0:
|
||||||
|
raise ValueError("Windows must be positive")
|
||||||
config = OperatorConfig(
|
config = OperatorConfig(
|
||||||
name=f"price_volume_coherence_{price_window}_{volume_window}",
|
name=f"price_volume_coherence_{price_window}_{volume_window}",
|
||||||
description=f"{price_window}日价格{volume_window}日成交量趋势一致性",
|
description=f"{price_window}日价格{volume_window}日成交量趋势一致性",
|
||||||
@@ -224,25 +229,19 @@ class PriceVolumeTrendCoherenceOperator(StockWiseOperator):
|
|||||||
self.price_window = price_window
|
self.price_window = price_window
|
||||||
self.volume_window = volume_window
|
self.volume_window = volume_window
|
||||||
|
|
||||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
def get_factor_name(self) -> str:
|
||||||
"""计算价量趋势一致性"""
|
return f'price_volume_coherence_{self.price_window}_{self.volume_window}'
|
||||||
# 计算价格上涨占比
|
|
||||||
def price_up_ratio(series):
|
|
||||||
return (series.diff() > 0).rolling_mean(window=self.price_window)
|
|
||||||
|
|
||||||
price_up = pl.col('close').apply(price_up_ratio)
|
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
|
||||||
|
price_up = (group_df['close'].diff() > 0).cast(pl.Int8)
|
||||||
|
price_up_ratio = price_up.rolling_mean(window_size=self.price_window)
|
||||||
|
|
||||||
# 计算成交量高于均值占比
|
vol_avg = group_df['vol'].rolling_mean(window_size=self.volume_window)
|
||||||
vol_avg = pl.col('vol').rolling_mean(window=self.volume_window)
|
vol_above = (group_df['vol'] > vol_avg).cast(pl.Int8)
|
||||||
vol_above_avg = pl.col('vol') > vol_avg
|
vol_above_ratio = vol_above.rolling_mean(window_size=self.price_window)
|
||||||
vol_above_ratio = vol_above_avg.cast(int).rolling_mean(window=self.price_window)
|
|
||||||
|
|
||||||
# 计算一致性
|
coherence = price_up_ratio * vol_above_ratio
|
||||||
coherence = price_up * vol_above_ratio
|
return coherence.alias(self.get_factor_name())
|
||||||
|
|
||||||
return stock_df.with_columns(
|
|
||||||
coherence.alias(f'price_volume_coherence_{self.price_window}_{self.volume_window}')
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class FreeFloatToTotalTurnoverRatioOperator(StockWiseOperator):
|
class FreeFloatToTotalTurnoverRatioOperator(StockWiseOperator):
|
||||||
@@ -258,19 +257,21 @@ class FreeFloatToTotalTurnoverRatioOperator(StockWiseOperator):
|
|||||||
)
|
)
|
||||||
super().__init__(config)
|
super().__init__(config)
|
||||||
|
|
||||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
def get_factor_name(self) -> str:
|
||||||
"""计算自由流通股对总换手率比率"""
|
return 'ff_to_total_turnover_ratio'
|
||||||
# 假设turnover_rate是自由流通股换手率
|
|
||||||
# 计算比率 (简化处理)
|
|
||||||
ratio = pl.col('turnover_rate') / (pl.col('turnover_rate') + 1e-8)
|
|
||||||
|
|
||||||
return stock_df.with_columns(ratio.alias('ff_to_total_turnover_ratio'))
|
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
|
||||||
|
# 实际业务中可能需要 total_turnover_rate,这里简化
|
||||||
|
ratio = pl.lit(1.0) # 或根据实际逻辑修改
|
||||||
|
return ratio.alias('ff_to_total_turnover_ratio')
|
||||||
|
|
||||||
|
|
||||||
class VarianceOperator(StockWiseOperator):
|
class VarianceOperator(StockWiseOperator):
|
||||||
"""方差算子"""
|
"""方差算子"""
|
||||||
|
|
||||||
def __init__(self, window: int):
|
def __init__(self, window: int):
|
||||||
|
if window <= 0:
|
||||||
|
raise ValueError("Window must be positive")
|
||||||
config = OperatorConfig(
|
config = OperatorConfig(
|
||||||
name=f"variance_{window}",
|
name=f"variance_{window}",
|
||||||
description=f"{window}日方差",
|
description=f"{window}日方差",
|
||||||
@@ -281,12 +282,12 @@ class VarianceOperator(StockWiseOperator):
|
|||||||
super().__init__(config)
|
super().__init__(config)
|
||||||
self.window = window
|
self.window = window
|
||||||
|
|
||||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
def get_factor_name(self) -> str:
|
||||||
"""计算方差"""
|
return f'variance_{self.window}'
|
||||||
# 计算方差
|
|
||||||
variance = pl.col('pct_chg').rolling_var(window=self.window)
|
|
||||||
|
|
||||||
return stock_df.with_columns(variance.alias(f'variance_{self.window}'))
|
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
|
||||||
|
variance = group_df['pct_chg'].rolling_var(window_size=self.window)
|
||||||
|
return variance.alias(self.get_factor_name())
|
||||||
|
|
||||||
|
|
||||||
class LimitUpDownOperator(StockWiseOperator):
|
class LimitUpDownOperator(StockWiseOperator):
|
||||||
@@ -302,26 +303,12 @@ class LimitUpDownOperator(StockWiseOperator):
|
|||||||
)
|
)
|
||||||
super().__init__(config)
|
super().__init__(config)
|
||||||
|
|
||||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
def get_factor_name(self) -> str:
|
||||||
"""计算涨跌停因子"""
|
return 'cat_up_limit'
|
||||||
# 判断是否涨停
|
|
||||||
up_limit = pl.col('close') == pl.col('up_limit')
|
|
||||||
|
|
||||||
# 判断是否跌停
|
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
|
||||||
down_limit = pl.col('close') == pl.col('down_limit')
|
up_limit = (group_df['close'] == group_df['up_limit']).cast(pl.Int8)
|
||||||
|
return up_limit.alias('cat_up_limit')
|
||||||
# 计算10日涨停计数
|
|
||||||
up_count_10d = up_limit.cast(int).rolling_sum(window=10)
|
|
||||||
|
|
||||||
# 计算10日跌停计数
|
|
||||||
down_count_10d = down_limit.cast(int).rolling_sum(window=10)
|
|
||||||
|
|
||||||
return stock_df.with_columns([
|
|
||||||
up_limit.alias('cat_up_limit'),
|
|
||||||
down_limit.alias('cat_down_limit'),
|
|
||||||
up_count_10d.alias('up_limit_count_10d'),
|
|
||||||
down_count_10d.alias('down_limit_count_10d')
|
|
||||||
])
|
|
||||||
|
|
||||||
|
|
||||||
class ConsecutiveUpLimitOperator(StockWiseOperator):
|
class ConsecutiveUpLimitOperator(StockWiseOperator):
|
||||||
@@ -337,19 +324,21 @@ class ConsecutiveUpLimitOperator(StockWiseOperator):
|
|||||||
)
|
)
|
||||||
super().__init__(config)
|
super().__init__(config)
|
||||||
|
|
||||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
def get_factor_name(self) -> str:
|
||||||
"""计算连续涨停天数"""
|
return 'consecutive_up_limit'
|
||||||
# 计算连续涨停
|
|
||||||
# 简化处理,实际应用中需要更复杂的逻辑
|
|
||||||
consecutive = pl.col('cat_up_limit').cast(int)
|
|
||||||
|
|
||||||
return stock_df.with_columns(consecutive.alias('consecutive_up_limit'))
|
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
|
||||||
|
# 简化版:实际连续计数需用 cumsum + groupby trick
|
||||||
|
# 这里先返回原始值,后续可优化
|
||||||
|
return group_df['cat_up_limit'].alias('consecutive_up_limit')
|
||||||
|
|
||||||
|
|
||||||
class MomentumFactorOperator(StockWiseOperator):
|
class MomentumFactorOperator(StockWiseOperator):
|
||||||
"""动量因子算子"""
|
"""动量因子算子"""
|
||||||
|
|
||||||
def __init__(self, alpha: float = 0.5):
|
def __init__(self, alpha: float = 0.5):
|
||||||
|
if not (0 <= alpha <= 1):
|
||||||
|
raise ValueError("alpha should be between 0 and 1")
|
||||||
config = OperatorConfig(
|
config = OperatorConfig(
|
||||||
name=f"momentum_factor_{alpha}",
|
name=f"momentum_factor_{alpha}",
|
||||||
description=f"动量因子(alpha={alpha})",
|
description=f"动量因子(alpha={alpha})",
|
||||||
@@ -360,12 +349,12 @@ class MomentumFactorOperator(StockWiseOperator):
|
|||||||
super().__init__(config)
|
super().__init__(config)
|
||||||
self.alpha = alpha
|
self.alpha = alpha
|
||||||
|
|
||||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
def get_factor_name(self) -> str:
|
||||||
"""计算动量因子"""
|
return f'momentum_factor_{self.alpha}'
|
||||||
# 计算动量因子
|
|
||||||
momentum = pl.col('volume_change_rate') + self.alpha * pl.col('turnover_deviation')
|
|
||||||
|
|
||||||
return stock_df.with_columns(momentum.alias(f'momentum_factor_{self.alpha}'))
|
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
|
||||||
|
momentum = group_df['volume_change_rate'] + self.alpha * group_df['turnover_deviation']
|
||||||
|
return momentum.alias(self.get_factor_name())
|
||||||
|
|
||||||
|
|
||||||
class ResonanceFactorOperator(StockWiseOperator):
|
class ResonanceFactorOperator(StockWiseOperator):
|
||||||
@@ -381,28 +370,28 @@ class ResonanceFactorOperator(StockWiseOperator):
|
|||||||
)
|
)
|
||||||
super().__init__(config)
|
super().__init__(config)
|
||||||
|
|
||||||
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
|
def get_factor_name(self) -> str:
|
||||||
"""计算共振因子"""
|
return 'resonance_factor'
|
||||||
# 计算共振因子
|
|
||||||
resonance = pl.col('volume_ratio') * pl.col('pct_chg')
|
|
||||||
|
|
||||||
return stock_df.with_columns(resonance.alias('resonance_factor'))
|
def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
|
||||||
|
resonance = group_df['volume_ratio'] * group_df['pct_chg']
|
||||||
|
return resonance.alias('resonance_factor')
|
||||||
|
|
||||||
|
|
||||||
# 动量因子集合
|
# 动量因子集合
|
||||||
MOMENTUM_OPERATORS = [
|
MOMENTUM_OPERATORS = [
|
||||||
PriceMinusDeductionPriceOperator(),
|
PriceMinusDeductionPriceOperator(10),
|
||||||
PriceDeductionPriceDiffRatioToSMAOperator(),
|
PriceDeductionPriceDiffRatioToSMAOperator(10),
|
||||||
CatPriceVsSmaVsDeductionPriceOperator(),
|
CatPriceVsSmaVsDeductionPriceOperator(10),
|
||||||
VolatilitySlopeOperator(),
|
# VolatilitySlopeOperator(20, 5),
|
||||||
TurnoverRateTrendStrengthOperator(5),
|
# TurnoverRateTrendStrengthOperator(5),
|
||||||
FreeFloatTurnoverSurgeOperator(10),
|
FreeFloatTurnoverSurgeOperator(10),
|
||||||
PriceVolumeTrendCoherenceOperator(),
|
PriceVolumeTrendCoherenceOperator(5, 20),
|
||||||
FreeFloatToTotalTurnoverRatioOperator(),
|
FreeFloatToTotalTurnoverRatioOperator(),
|
||||||
VarianceOperator(20),
|
VarianceOperator(20),
|
||||||
LimitUpDownOperator(),
|
LimitUpDownOperator(),
|
||||||
ConsecutiveUpLimitOperator(),
|
ConsecutiveUpLimitOperator(),
|
||||||
MomentumFactorOperator(),
|
# MomentumFactorOperator(0.5),
|
||||||
ResonanceFactorOperator(),
|
ResonanceFactorOperator(),
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -410,19 +399,12 @@ MOMENTUM_OPERATORS = [
|
|||||||
def apply_momentum_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
def apply_momentum_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
|
||||||
"""
|
"""
|
||||||
应用所有动量因子
|
应用所有动量因子
|
||||||
|
|
||||||
Args:
|
|
||||||
df: 输入的Polars DataFrame
|
|
||||||
operators: 要应用的算子列表,如果为None则使用默认列表
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
添加了动量因子的DataFrame
|
|
||||||
"""
|
"""
|
||||||
if operators is None:
|
if operators is None:
|
||||||
operators = MOMENTUM_OPERATORS
|
operators = MOMENTUM_OPERATORS
|
||||||
|
|
||||||
result_df = df
|
result_df = df
|
||||||
for operator in operators:
|
for operator in tqdm(operators, desc="Applying momentum factors"):
|
||||||
result_df = operator(result_df)
|
result_df = operator.apply(result_df)
|
||||||
|
|
||||||
return result_df
|
return result_df
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -68,56 +68,28 @@
|
|||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"daily data\n",
|
"daily data\n"
|
||||||
"daily basic\n",
|
]
|
||||||
"inner merge on ['ts_code', 'trade_date']\n",
|
},
|
||||||
"stk limit\n",
|
{
|
||||||
"left merge on ['ts_code', 'trade_date']\n",
|
"ename": "KeyboardInterrupt",
|
||||||
"money flow\n",
|
"evalue": "",
|
||||||
"left merge on ['ts_code', 'trade_date']\n",
|
"output_type": "error",
|
||||||
"cyq perf\n",
|
"traceback": [
|
||||||
"left merge on ['ts_code', 'trade_date']\n",
|
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
||||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
"\u001b[31mKeyboardInterrupt\u001b[39m Traceback (most recent call last)",
|
||||||
"RangeIndex: 9162612 entries, 0 to 9162611\n",
|
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[3]\u001b[39m\u001b[32m, line 4\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mmain\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mutils\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mutils\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m read_and_merge_h5_data\n\u001b[32m 3\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33m'\u001b[39m\u001b[33mdaily data\u001b[39m\u001b[33m'\u001b[39m)\n\u001b[32m----> \u001b[39m\u001b[32m4\u001b[39m df = \u001b[43mread_and_merge_h5_data\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m/mnt/d/PyProject/NewStock/data/daily_data.h5\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mdaily_data\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 5\u001b[39m \u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m=\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mts_code\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mtrade_date\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mopen\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mclose\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mhigh\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mlow\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mvol\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mamount\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mpct_chg\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 6\u001b[39m \u001b[43m \u001b[49m\u001b[43mdf\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[32m 8\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33m'\u001b[39m\u001b[33mdaily basic\u001b[39m\u001b[33m'\u001b[39m)\n\u001b[32m 9\u001b[39m df = read_and_merge_h5_data(\u001b[33m'\u001b[39m\u001b[33m/mnt/d/PyProject/NewStock/data/daily_basic.h5\u001b[39m\u001b[33m'\u001b[39m, key=\u001b[33m'\u001b[39m\u001b[33mdaily_basic\u001b[39m\u001b[33m'\u001b[39m,\n\u001b[32m 10\u001b[39m columns=[\u001b[33m'\u001b[39m\u001b[33mts_code\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mtrade_date\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mturnover_rate\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mpe_ttm\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mcirc_mv\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mtotal_mv\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mvolume_ratio\u001b[39m\u001b[33m'\u001b[39m,\n\u001b[32m 11\u001b[39m \u001b[33m'\u001b[39m\u001b[33mis_st\u001b[39m\u001b[33m'\u001b[39m], df=df, join=\u001b[33m'\u001b[39m\u001b[33minner\u001b[39m\u001b[33m'\u001b[39m)\n",
|
||||||
"Data columns (total 33 columns):\n",
|
"\u001b[36mFile \u001b[39m\u001b[32m/mnt/d/PyProject/NewStock/main/utils/utils.py:14\u001b[39m, in \u001b[36mread_and_merge_h5_data\u001b[39m\u001b[34m(h5_filename, key, columns, df, join, on, prefix)\u001b[39m\n\u001b[32m 11\u001b[39m processed_columns.append(col)\n\u001b[32m 13\u001b[39m \u001b[38;5;66;03m# 从 HDF5 文件读取数据,选择需要的列\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m14\u001b[39m data = \u001b[43mpd\u001b[49m\u001b[43m.\u001b[49m\u001b[43mread_hdf\u001b[49m\u001b[43m(\u001b[49m\u001b[43mh5_filename\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m=\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m=\u001b[49m\u001b[43mprocessed_columns\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 16\u001b[39m \u001b[38;5;66;03m# 修改列名,如果列名以前有 _,加上 _\u001b[39;00m\n\u001b[32m 17\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m col \u001b[38;5;129;01min\u001b[39;00m data.columns:\n",
|
||||||
" # Column Dtype \n",
|
"\u001b[36mFile \u001b[39m\u001b[32m~/miniconda3/envs/stock/lib/python3.13/site-packages/pandas/io/pytables.py:452\u001b[39m, in \u001b[36mread_hdf\u001b[39m\u001b[34m(path_or_buf, key, mode, errors, where, start, stop, columns, iterator, chunksize, **kwargs)\u001b[39m\n\u001b[32m 447\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[32m 448\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mkey must be provided when HDF5 \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 449\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mfile contains multiple datasets.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 450\u001b[39m )\n\u001b[32m 451\u001b[39m key = candidate_only_group._v_pathname\n\u001b[32m--> \u001b[39m\u001b[32m452\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mstore\u001b[49m\u001b[43m.\u001b[49m\u001b[43mselect\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 453\u001b[39m \u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 454\u001b[39m \u001b[43m \u001b[49m\u001b[43mwhere\u001b[49m\u001b[43m=\u001b[49m\u001b[43mwhere\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 455\u001b[39m \u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 456\u001b[39m \u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 457\u001b[39m \u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 458\u001b[39m \u001b[43m \u001b[49m\u001b[43miterator\u001b[49m\u001b[43m=\u001b[49m\u001b[43miterator\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 459\u001b[39m \u001b[43m \u001b[49m\u001b[43mchunksize\u001b[49m\u001b[43m=\u001b[49m\u001b[43mchunksize\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 460\u001b[39m \u001b[43m \u001b[49m\u001b[43mauto_close\u001b[49m\u001b[43m=\u001b[49m\u001b[43mauto_close\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 461\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 462\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mValueError\u001b[39;00m, \u001b[38;5;167;01mTypeError\u001b[39;00m, \u001b[38;5;167;01mLookupError\u001b[39;00m):\n\u001b[32m 463\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(path_or_buf, HDFStore):\n\u001b[32m 464\u001b[39m \u001b[38;5;66;03m# if there is an error, close the store if we opened it.\u001b[39;00m\n",
|
||||||
"--- ------ ----- \n",
|
"\u001b[36mFile \u001b[39m\u001b[32m~/miniconda3/envs/stock/lib/python3.13/site-packages/pandas/io/pytables.py:906\u001b[39m, in \u001b[36mHDFStore.select\u001b[39m\u001b[34m(self, key, where, start, stop, columns, iterator, chunksize, auto_close)\u001b[39m\n\u001b[32m 892\u001b[39m \u001b[38;5;66;03m# create the iterator\u001b[39;00m\n\u001b[32m 893\u001b[39m it = TableIterator(\n\u001b[32m 894\u001b[39m \u001b[38;5;28mself\u001b[39m,\n\u001b[32m 895\u001b[39m s,\n\u001b[32m (...)\u001b[39m\u001b[32m 903\u001b[39m auto_close=auto_close,\n\u001b[32m 904\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m906\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mit\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget_result\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||||
" 0 ts_code object \n",
|
"\u001b[36mFile \u001b[39m\u001b[32m~/miniconda3/envs/stock/lib/python3.13/site-packages/pandas/io/pytables.py:2029\u001b[39m, in \u001b[36mTableIterator.get_result\u001b[39m\u001b[34m(self, coordinates)\u001b[39m\n\u001b[32m 2026\u001b[39m where = \u001b[38;5;28mself\u001b[39m.where\n\u001b[32m 2028\u001b[39m \u001b[38;5;66;03m# directly return the result\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m2029\u001b[39m results = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mwhere\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 2030\u001b[39m \u001b[38;5;28mself\u001b[39m.close()\n\u001b[32m 2031\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m results\n",
|
||||||
" 1 trade_date datetime64[ns]\n",
|
"\u001b[36mFile \u001b[39m\u001b[32m~/miniconda3/envs/stock/lib/python3.13/site-packages/pandas/io/pytables.py:890\u001b[39m, in \u001b[36mHDFStore.select.<locals>.func\u001b[39m\u001b[34m(_start, _stop, _where)\u001b[39m\n\u001b[32m 889\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mfunc\u001b[39m(_start, _stop, _where):\n\u001b[32m--> \u001b[39m\u001b[32m890\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43ms\u001b[49m\u001b[43m.\u001b[49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstart\u001b[49m\u001b[43m=\u001b[49m\u001b[43m_start\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m=\u001b[49m\u001b[43m_stop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mwhere\u001b[49m\u001b[43m=\u001b[49m\u001b[43m_where\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||||
" 2 open float64 \n",
|
"\u001b[36mFile \u001b[39m\u001b[32m~/miniconda3/envs/stock/lib/python3.13/site-packages/pandas/io/pytables.py:4631\u001b[39m, in \u001b[36mAppendableFrameTable.read\u001b[39m\u001b[34m(self, where, columns, start, stop)\u001b[39m\n\u001b[32m 4628\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m.infer_axes():\n\u001b[32m 4629\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m4631\u001b[39m result = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_read_axes\u001b[49m\u001b[43m(\u001b[49m\u001b[43mwhere\u001b[49m\u001b[43m=\u001b[49m\u001b[43mwhere\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstop\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 4633\u001b[39m info = (\n\u001b[32m 4634\u001b[39m \u001b[38;5;28mself\u001b[39m.info.get(\u001b[38;5;28mself\u001b[39m.non_index_axes[\u001b[32m0\u001b[39m][\u001b[32m0\u001b[39m], {})\n\u001b[32m 4635\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m.non_index_axes)\n\u001b[32m 4636\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m {}\n\u001b[32m 4637\u001b[39m )\n\u001b[32m 4639\u001b[39m inds = [i \u001b[38;5;28;01mfor\u001b[39;00m i, ax \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(\u001b[38;5;28mself\u001b[39m.axes) \u001b[38;5;28;01mif\u001b[39;00m ax \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28mself\u001b[39m.index_axes[\u001b[32m0\u001b[39m]]\n",
|
||||||
" 3 close float64 \n",
|
"\u001b[36mFile \u001b[39m\u001b[32m~/miniconda3/envs/stock/lib/python3.13/site-packages/pandas/io/pytables.py:3818\u001b[39m, in \u001b[36mTable._read_axes\u001b[39m\u001b[34m(self, where, start, stop)\u001b[39m\n\u001b[32m 3816\u001b[39m \u001b[38;5;66;03m# create the selection\u001b[39;00m\n\u001b[32m 3817\u001b[39m selection = Selection(\u001b[38;5;28mself\u001b[39m, where=where, start=start, stop=stop)\n\u001b[32m-> \u001b[39m\u001b[32m3818\u001b[39m values = \u001b[43mselection\u001b[49m\u001b[43m.\u001b[49m\u001b[43mselect\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 3820\u001b[39m results = []\n\u001b[32m 3821\u001b[39m \u001b[38;5;66;03m# convert the data\u001b[39;00m\n",
|
||||||
" 4 high float64 \n",
|
"\u001b[36mFile \u001b[39m\u001b[32m~/miniconda3/envs/stock/lib/python3.13/site-packages/pandas/io/pytables.py:5397\u001b[39m, in \u001b[36mSelection.select\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 5395\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.coordinates \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 5396\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m.table.table.read_coordinates(\u001b[38;5;28mself\u001b[39m.coordinates)\n\u001b[32m-> \u001b[39m\u001b[32m5397\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mtable\u001b[49m\u001b[43m.\u001b[49m\u001b[43mtable\u001b[49m\u001b[43m.\u001b[49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstart\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mstop\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||||
" 5 low float64 \n",
|
"\u001b[36mFile \u001b[39m\u001b[32m~/miniconda3/envs/stock/lib/python3.13/site-packages/tables/table.py:2083\u001b[39m, in \u001b[36mTable.read\u001b[39m\u001b[34m(self, start, stop, step, field, out)\u001b[39m\n\u001b[32m 2077\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(msg)\n\u001b[32m 2079\u001b[39m start, stop, step = \u001b[38;5;28mself\u001b[39m._process_range(\n\u001b[32m 2080\u001b[39m start, stop, step, warn_negstep=\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[32m 2081\u001b[39m )\n\u001b[32m-> \u001b[39m\u001b[32m2083\u001b[39m arr = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstep\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfield\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mout\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 2084\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m internal_to_flavor(arr, \u001b[38;5;28mself\u001b[39m.flavor)\n",
|
||||||
" 6 vol float64 \n",
|
"\u001b[36mFile \u001b[39m\u001b[32m~/miniconda3/envs/stock/lib/python3.13/site-packages/tables/table.py:1989\u001b[39m, in \u001b[36mTable._read\u001b[39m\u001b[34m(self, start, stop, step, field, out)\u001b[39m\n\u001b[32m 1985\u001b[39m \u001b[38;5;66;03m# Call the routine to fill-up the resulting array\u001b[39;00m\n\u001b[32m 1986\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m step == \u001b[32m1\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m field:\n\u001b[32m 1987\u001b[39m \u001b[38;5;66;03m# This optimization works three times faster than\u001b[39;00m\n\u001b[32m 1988\u001b[39m \u001b[38;5;66;03m# the row._fill_col method (up to 170 MB/s on a pentium IV @ 2GHz)\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1989\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_read_records\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m \u001b[49m\u001b[43m-\u001b[49m\u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mresult\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1990\u001b[39m \u001b[38;5;66;03m# Warning!: _read_field_name should not be used until\u001b[39;00m\n\u001b[32m 1991\u001b[39m \u001b[38;5;66;03m# H5TBread_fields_name in tableextension will be finished\u001b[39;00m\n\u001b[32m 1992\u001b[39m \u001b[38;5;66;03m# F. Alted 2005/05/26\u001b[39;00m\n\u001b[32m 1993\u001b[39m \u001b[38;5;66;03m# XYX Ho implementem per a PyTables 2.0??\u001b[39;00m\n\u001b[32m 1994\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m field \u001b[38;5;129;01mand\u001b[39;00m step > \u001b[32m15\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[32m0\u001b[39m:\n\u001b[32m 1995\u001b[39m \u001b[38;5;66;03m# For step>15, this seems to work always faster than row._fill_col.\u001b[39;00m\n",
|
||||||
" 7 amount float64 \n",
|
"\u001b[31mKeyboardInterrupt\u001b[39m: "
|
||||||
" 8 pct_chg float64 \n",
|
|
||||||
" 9 turnover_rate float64 \n",
|
|
||||||
" 10 pe_ttm float64 \n",
|
|
||||||
" 11 circ_mv float64 \n",
|
|
||||||
" 12 total_mv float64 \n",
|
|
||||||
" 13 volume_ratio float64 \n",
|
|
||||||
" 14 is_st bool \n",
|
|
||||||
" 15 up_limit float64 \n",
|
|
||||||
" 16 down_limit float64 \n",
|
|
||||||
" 17 buy_sm_vol float64 \n",
|
|
||||||
" 18 sell_sm_vol float64 \n",
|
|
||||||
" 19 buy_lg_vol float64 \n",
|
|
||||||
" 20 sell_lg_vol float64 \n",
|
|
||||||
" 21 buy_elg_vol float64 \n",
|
|
||||||
" 22 sell_elg_vol float64 \n",
|
|
||||||
" 23 net_mf_vol float64 \n",
|
|
||||||
" 24 his_low float64 \n",
|
|
||||||
" 25 his_high float64 \n",
|
|
||||||
" 26 cost_5pct float64 \n",
|
|
||||||
" 27 cost_15pct float64 \n",
|
|
||||||
" 28 cost_50pct float64 \n",
|
|
||||||
" 29 cost_85pct float64 \n",
|
|
||||||
" 30 cost_95pct float64 \n",
|
|
||||||
" 31 weight_avg float64 \n",
|
|
||||||
" 32 winner_rate float64 \n",
|
|
||||||
"dtypes: bool(1), datetime64[ns](1), float64(30), object(1)\n",
|
|
||||||
"memory usage: 2.2+ GB\n",
|
|
||||||
"None\n"
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -154,7 +126,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 4,
|
"execution_count": null,
|
||||||
"id": "cac01788dac10678",
|
"id": "cac01788dac10678",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"ExecuteTime": {
|
"ExecuteTime": {
|
||||||
@@ -222,7 +194,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 5,
|
"execution_count": null,
|
||||||
"id": "c4e9e1d31da6dba6",
|
"id": "c4e9e1d31da6dba6",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"ExecuteTime": {
|
"ExecuteTime": {
|
||||||
@@ -322,7 +294,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 6,
|
"execution_count": null,
|
||||||
"id": "a735bc02ceb4d872",
|
"id": "a735bc02ceb4d872",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"ExecuteTime": {
|
"ExecuteTime": {
|
||||||
@@ -338,7 +310,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 7,
|
"execution_count": null,
|
||||||
"id": "53f86ddc0677a6d7",
|
"id": "53f86ddc0677a6d7",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"ExecuteTime": {
|
"ExecuteTime": {
|
||||||
@@ -405,7 +377,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 8,
|
"execution_count": null,
|
||||||
"id": "dbe2fd8021b9417f",
|
"id": "dbe2fd8021b9417f",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"ExecuteTime": {
|
"ExecuteTime": {
|
||||||
@@ -433,7 +405,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 9,
|
"execution_count": null,
|
||||||
"id": "85c3e3d0235ffffa",
|
"id": "85c3e3d0235ffffa",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"ExecuteTime": {
|
"ExecuteTime": {
|
||||||
@@ -465,7 +437,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 10,
|
"execution_count": null,
|
||||||
"id": "92d84ce15a562ec6",
|
"id": "92d84ce15a562ec6",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"ExecuteTime": {
|
"ExecuteTime": {
|
||||||
@@ -722,7 +694,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 11,
|
"execution_count": null,
|
||||||
"id": "b87b938028afa206",
|
"id": "b87b938028afa206",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"ExecuteTime": {
|
"ExecuteTime": {
|
||||||
@@ -760,7 +732,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 12,
|
"execution_count": null,
|
||||||
"id": "f4f16d63ad18d1bc",
|
"id": "f4f16d63ad18d1bc",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"ExecuteTime": {
|
"ExecuteTime": {
|
||||||
@@ -986,7 +958,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 13,
|
"execution_count": null,
|
||||||
"id": "40e6b68a91b30c79",
|
"id": "40e6b68a91b30c79",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"ExecuteTime": {
|
"ExecuteTime": {
|
||||||
@@ -1306,7 +1278,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 14,
|
"execution_count": null,
|
||||||
"id": "47c12bb34062ae7a",
|
"id": "47c12bb34062ae7a",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"ExecuteTime": {
|
"ExecuteTime": {
|
||||||
@@ -1340,7 +1312,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 15,
|
"execution_count": null,
|
||||||
"id": "29221dde",
|
"id": "29221dde",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@@ -1383,7 +1355,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 16,
|
"execution_count": null,
|
||||||
"id": "03ee5daf",
|
"id": "03ee5daf",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -1396,7 +1368,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 17,
|
"execution_count": null,
|
||||||
"id": "b76ea08a",
|
"id": "b76ea08a",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@@ -1621,7 +1593,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 18,
|
"execution_count": null,
|
||||||
"id": "3ff2d1c5",
|
"id": "3ff2d1c5",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -1762,7 +1734,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 19,
|
"execution_count": null,
|
||||||
"id": "a5bbb8be",
|
"id": "a5bbb8be",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@@ -1787,7 +1759,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 20,
|
"execution_count": null,
|
||||||
"id": "5d1522a7538db91b",
|
"id": "5d1522a7538db91b",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"ExecuteTime": {
|
"ExecuteTime": {
|
||||||
@@ -1825,7 +1797,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 21,
|
"execution_count": null,
|
||||||
"id": "09b1799e",
|
"id": "09b1799e",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@@ -1847,7 +1819,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 22,
|
"execution_count": null,
|
||||||
"id": "e53b209a",
|
"id": "e53b209a",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
|
|||||||
1708
predictions_test.tsv
1708
predictions_test.tsv
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user