factor优化(暂存版)

This commit is contained in:
2025-10-14 09:44:46 +08:00
parent 44315b2c76
commit 7862b9739a
9 changed files with 804 additions and 4427 deletions

1
.gitignore vendored
View File

@@ -18,3 +18,4 @@ model
!.gitignore !.gitignore
!.git !.git
!.env

View File

@@ -33,7 +33,7 @@ def holder_trade_factors(all_data_df: pd.DataFrame,
# 或者如果 'in_de' 已经是 1 和 -1 (或类似数值),则可以跳过映射,但要确保类型正确 # 或者如果 'in_de' 已经是 1 和 -1 (或类似数值),则可以跳过映射,但要确保类型正确
stk_trade_processed_df['_direction'] = stk_trade_processed_df['in_de'].map(in_de_map) stk_trade_processed_df['_direction'] = stk_trade_processed_df['in_de'].map(in_de_map)
# 如果 _direction 列在映射后可能产生NaN (因为in_de中有未覆盖的值),需要处理 # 如果 _direction 列在映射后可能产生NaN (因为in_de中有未覆盖的值),需要处理
if stk_trade_processed_df['_direction'].isnull().any(): if stk_trade_processed_df['_direction'].is_null().any():
print("警告: 'in_de' 列中存在未映射的值,可能导致 _direction 列出现NaN。") print("警告: 'in_de' 列中存在未映射的值,可能导致 _direction 列出现NaN。")
# 可以选择填充NaN例如用0填充或者移除这些行 # 可以选择填充NaN例如用0填充或者移除这些行
# stk_trade_processed_df['_direction'].fillna(0, inplace=True) # stk_trade_processed_df['_direction'].fillna(0, inplace=True)
@@ -109,4 +109,3 @@ def holder_trade_factors(all_data_df: pd.DataFrame,
print("股东增减持因子计算完成。") print("股东增减持因子计算完成。")
return df_merged return df_merged

View File

@@ -1,196 +0,0 @@
"""
因子算子基础框架 - 简化版本
提供股票截面和日期截面两个基础函数
"""
import polars as pl
from typing import Callable, Any, Optional, Union
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def apply_stockwise(
df: pl.DataFrame,
operator_func: Callable[[pl.DataFrame, Any], pl.DataFrame],
*args,
**kwargs
) -> pl.DataFrame:
"""
在股票截面上应用算子函数
Args:
df: 输入的polars DataFrame必须包含ts_code和trade_date列
operator_func: 算子函数接收单个股票的数据和参数返回处理后的DataFrame
*args, **kwargs: 传递给算子函数的额外参数
Returns:
处理后的完整DataFrame
"""
# 验证必需列
required_cols = ['ts_code', 'trade_date']
missing_cols = [col for col in required_cols if col not in df.columns]
if missing_cols:
raise ValueError(f"缺少必需列: {missing_cols}")
# 获取股票列表
stock_list = df['ts_code'].unique().to_list()
results = []
# 按股票分组处理
for ts_code in stock_list:
try:
# 获取单个股票的数据并按日期排序
stock_df = df.filter(pl.col('ts_code') == ts_code).sort('trade_date')
# 应用算子函数
result_df = operator_func(stock_df, *args, **kwargs)
results.append(result_df)
except Exception as e:
logger.error(f"股票 {ts_code} 处理失败: {e}")
# 失败时返回原始数据
stock_df = df.filter(pl.col('ts_code') == ts_code).sort('trade_date')
results.append(stock_df)
# 合并结果并排序
if results:
return pl.concat(results).sort(['ts_code', 'trade_date'])
else:
return df
def apply_datewise(
df: pl.DataFrame,
operator_func: Callable[[pl.DataFrame, Any], pl.DataFrame],
*args,
**kwargs
) -> pl.DataFrame:
"""
在日期截面上应用算子函数
Args:
df: 输入的polars DataFrame必须包含ts_code和trade_date列
operator_func: 算子函数接收单个日期的数据和参数返回处理后的DataFrame
*args, **kwargs: 传递给算子函数的额外参数
Returns:
处理后的完整DataFrame
"""
# 验证必需列
required_cols = ['ts_code', 'trade_date']
missing_cols = [col for col in required_cols if col not in df.columns]
if missing_cols:
raise ValueError(f"缺少必需列: {missing_cols}")
# 获取日期列表
date_list = df['trade_date'].unique().to_list()
results = []
# 按日期分组处理
for trade_date in date_list:
try:
# 获取单个日期的数据
date_df = df.filter(pl.col('trade_date') == trade_date)
# 应用算子函数
result_df = operator_func(date_df, *args, **kwargs)
results.append(result_df)
except Exception as e:
logger.error(f"日期 {trade_date} 处理失败: {e}")
# 失败时返回原始数据
date_df = df.filter(pl.col('trade_date') == trade_date)
results.append(date_df)
# 合并结果并排序
if results:
return pl.concat(results).sort(['ts_code', 'trade_date'])
else:
return df
# 常用算子函数示例
def rolling_mean_operator(df: pl.DataFrame, column: str, window: int, output_col: str = None) -> pl.DataFrame:
"""
滚动均值算子 - 股票截面
Args:
df: 单个股票的数据
column: 要计算均值的列
window: 窗口大小
output_col: 输出列名默认为f'{column}_mean_{window}'
Returns:
添加均值列的DataFrame
"""
if output_col is None:
output_col = f'{column}_mean_{window}'
return df.with_columns(
pl.col(column).rolling_mean(window_size=window).alias(output_col)
)
def rolling_std_operator(df: pl.DataFrame, column: str, window: int, output_col: str = None) -> pl.DataFrame:
"""
滚动标准差算子 - 股票截面
Args:
df: 单个股票的数据
column: 要计算标准差的列
window: 窗口大小
output_col: 输出列名默认为f'{column}_std_{window}'
Returns:
添加标准差列的DataFrame
"""
if output_col is None:
output_col = f'{column}_std_{window}'
return df.with_columns(
pl.col(column).rolling_std(window_size=window).alias(output_col)
)
def rank_operator(df: pl.DataFrame, column: str, ascending: bool = True, output_col: str = None) -> pl.DataFrame:
"""
排名算子 - 日期截面
Args:
df: 单个日期的数据
column: 要排名的列
ascending: 是否升序
output_col: 输出列名默认为f'{column}_rank'
Returns:
添加排名列的DataFrame
"""
if output_col is None:
output_col = f'{column}_rank'
return df.with_columns(
pl.col(column).rank(method='dense', descending=not ascending).alias(output_col)
)
def pct_change_operator(df: pl.DataFrame, column: str, periods: int = 1, output_col: str = None) -> pl.DataFrame:
"""
百分比变化算子 - 股票截面
Args:
df: 单个股票的数据
column: 要计算变化的列
periods: 期数
output_col: 输出列名默认为f'{column}_pct_change_{periods}'
Returns:
添加变化率列的DataFrame
"""
if output_col is None:
output_col = f'{column}_pct_change_{periods}'
return df.with_columns(
((pl.col(column) / pl.col(column).shift(periods)) - 1).alias(output_col)
)

View File

@@ -1,18 +1,14 @@
""" """
因子算子框架 - 使用Polars实现统一的因子计算 因子算子框架 - Polars 实现
避免数据泄露,支持切面计算 支持:截面滚动 → 拼回长表 → 按列名合并
返回形式可选:完整 DataFrame默认或单列 Series
""" """
import polars as pl
import numpy as np
from typing import Dict, List, Callable, Optional, Union, Any
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from dataclasses import dataclass from dataclasses import dataclass
import logging from typing import List, Literal
# 配置日志 import polars as pl
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
@dataclass @dataclass
@@ -22,38 +18,7 @@ class OperatorConfig:
description: str description: str
required_columns: List[str] required_columns: List[str]
output_columns: List[str] output_columns: List[str]
parameters: Dict[str, Any] parameters: dict
class DataSlice:
"""数据切面基类"""
def __init__(self, df: pl.DataFrame):
self.df = df
self.validate_data()
def validate_data(self):
"""验证数据格式"""
required_cols = ['ts_code', 'trade_date']
missing_cols = [col for col in required_cols if col not in self.df.columns]
if missing_cols:
raise ValueError(f"缺少必需列: {missing_cols}")
def get_stock_slice(self, ts_code: str) -> pl.DataFrame:
"""获取单个股票的数据切面"""
return self.df.filter(pl.col('ts_code') == ts_code).sort('trade_date')
def get_date_slice(self, trade_date: str) -> pl.DataFrame:
"""获取单个日期的数据切面"""
return self.df.filter(pl.col('trade_date') == trade_date)
def get_stock_list(self) -> List[str]:
"""获取股票列表"""
return self.df['ts_code'].unique().to_list()
def get_date_list(self) -> List[str]:
"""获取日期列表"""
return self.df['trade_date'].unique().to_list()
class BaseOperator(ABC): class BaseOperator(ABC):
@@ -65,186 +30,95 @@ class BaseOperator(ABC):
self.required_columns = config.required_columns self.required_columns = config.required_columns
self.output_columns = config.output_columns self.output_columns = config.output_columns
def validate_input(self, df: pl.DataFrame) -> bool: # ---------- 子类必须实现 ----------
"""验证输入数据"""
missing_cols = [col for col in self.required_columns if col not in df.columns]
if missing_cols:
logger.warning(f"算子 {self.name} 缺少必需列: {missing_cols}")
return False
return True
@abstractmethod @abstractmethod
def apply(self, df: pl.DataFrame, **kwargs) -> pl.DataFrame: def get_factor_name(self) -> str:
"""应用算子""" """返回因子列名(用于合并)"""
pass pass
def __call__(self, df: pl.DataFrame, **kwargs) -> pl.DataFrame: @abstractmethod
"""调用算子""" def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
"""
真正的截面计算逻辑。
参数:按 ts_code 或 trade_date 分组后的子表
返回:与 group_df 行数一一对应的因子 Series含正确索引
"""
pass
# ---------- 公共接口 ----------
def apply(self,
df: pl.DataFrame,
return_type: Literal['df', 'series'] = 'df',
**kwargs) -> pl.DataFrame | pl.Series:
"""入口:截面滚动 → 拼回长表 → 合并/返回"""
if not self.validate_input(df): if not self.validate_input(df):
# 返回原始数据添加NaN列 raise ValueError(f"缺少必需列:{self.required_columns}")
for col in self.output_columns:
df = df.with_columns(pl.lit(None).alias(col))
return df
try: long_table = self._sectional_roll(df, **kwargs) # ① 滚动
return self.apply(df, **kwargs) merged = self._merge_factor(df, long_table) # ② 合并
except Exception as e: return merged if return_type == 'df' else merged[self.get_factor_name()]
logger.error(f"算子 {self.name} 应用失败: {e}")
# 返回原始数据添加NaN列 # ---------- 内部流程 ----------
for col in self.output_columns: def validate_input(self, df: pl.DataFrame) -> bool:
df = df.with_columns(pl.lit(None).alias(col)) return all(col in df.columns for col in self.required_columns)
return df
@abstractmethod
def _sectional_roll(self, df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""
截面滚动模板group → calc_factor → 拼回长表
返回含【trade_date, ts_code, factor】的长表
"""
pass
def _merge_factor(self, original: pl.DataFrame, factor_table: pl.DataFrame) -> pl.DataFrame:
"""按 [ts_code, trade_date] 左联,原地追加因子列"""
factor_name = self.get_factor_name()
return original.join(factor_table.select(['ts_code', 'trade_date', factor_name]),
on=['ts_code', 'trade_date'],
how='left')
# -------------------- 股票截面:按 ts_code 分组 --------------------
class StockWiseOperator(BaseOperator): class StockWiseOperator(BaseOperator):
"""股票切面算子 - 按股票分组计算""" """股票切面算子抽象类:按 ts_code 分组,对每个股票的时间序列计算因子"""
def apply(self, df: pl.DataFrame, **kwargs) -> pl.DataFrame: def _sectional_roll(self, df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""按股票分组应用算子""" factor_name = self.get_factor_name()
stock_list = df['ts_code'].unique().to_list()
results = []
for ts_code in stock_list: # 确保排序(时间顺序对 shift 等操作至关重要)
stock_df = df.filter(pl.col('ts_code') == ts_code).sort('trade_date') df_sorted = df.sort(['ts_code', 'trade_date'])
try:
result_df = self.apply_stock(stock_df, **kwargs)
results.append(result_df)
except Exception as e:
logger.error(f"股票 {ts_code} 算子应用失败: {e}")
# 为失败的股票添加NaN列
for col in self.output_columns:
stock_df = stock_df.with_columns(pl.lit(None).alias(col))
results.append(stock_df)
return pl.concat(results).sort(['ts_code', 'trade_date'])
@abstractmethod
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""应用到单个股票数据"""
pass
# 使用 map_groups对每个 ts_code 分组,传入完整子 DataFrame
result = (
df_sorted
.group_by('ts_code', maintain_order=True)
.map_groups(
lambda group_df: group_df.with_columns(
self.calc_factor(group_df, **kwargs)
)
)
.select(['ts_code', 'trade_date', factor_name])
)
return result
# -------------------- 日期截面:按 trade_date 分组 --------------------
class DateWiseOperator(BaseOperator): class DateWiseOperator(BaseOperator):
"""日期切面算子 - 按日期分组计算""" """日期切面算子抽象类:按 trade_date 分组,对每个截面计算因子"""
def apply(self, df: pl.DataFrame, **kwargs) -> pl.DataFrame: def _sectional_roll(self, df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""按日期分组应用算子""" factor_name = self.get_factor_name()
date_list = df['trade_date'].unique().to_list()
results = []
for trade_date in date_list: df_sorted = df.sort(['trade_date', 'ts_code'])
date_df = df.filter(pl.col('trade_date') == trade_date)
try:
result_df = self.apply_date(date_df, **kwargs)
results.append(result_df)
except Exception as e:
logger.error(f"日期 {trade_date} 算子应用失败: {e}")
# 为失败的日期添加NaN列
for col in self.output_columns:
date_df = date_df.with_columns(pl.lit(None).alias(col))
results.append(date_df)
return pl.concat(results).sort(['ts_code', 'trade_date']) result = (
df_sorted
@abstractmethod .group_by('trade_date', maintain_order=True)
def apply_date(self, date_df: pl.DataFrame, **kwargs) -> pl.DataFrame: .map_groups(
"""应用到单个日期数据""" lambda group_df: group_df.with_columns(
pass self.calc_factor(group_df, **kwargs)
class RollingOperator(StockWiseOperator):
"""滚动窗口算子基类"""
def __init__(self, config: OperatorConfig, window: int, min_periods: Optional[int] = None):
super().__init__(config)
self.window = window
self.min_periods = min_periods or max(1, window // 2)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""应用滚动窗口计算"""
return self.apply_rolling(stock_df, **kwargs)
@abstractmethod
def apply_rolling(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""滚动窗口计算逻辑"""
pass
# 基础算子实现
class ReturnOperator(RollingOperator):
"""收益率算子"""
def __init__(self, periods: int = 1):
config = OperatorConfig(
name=f"return_{periods}",
description=f"{periods}期收益率",
required_columns=['close'],
output_columns=[f'return_{periods}'],
parameters={'periods': periods}
) )
super().__init__(config, window=periods + 1)
self.periods = periods
def apply_rolling(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算收益率"""
return stock_df.with_columns(
(pl.col('close') / pl.col('close').shift(self.periods) - 1).alias(f'return_{self.periods}')
) )
.select(['ts_code', 'trade_date', factor_name])
class VolatilityOperator(RollingOperator):
"""波动率算子"""
def __init__(self, window: int = 20):
config = OperatorConfig(
name=f"volatility_{window}",
description=f"{window}日波动率",
required_columns=['pct_chg'],
output_columns=[f'volatility_{window}'],
parameters={'window': window}
) )
super().__init__(config, window=window) return result
def apply_rolling(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算波动率"""
return stock_df.with_columns(
pl.col('pct_chg').rolling_std(window=self.window).alias(f'volatility_{self.window}')
)
class MeanOperator(RollingOperator):
"""均值算子"""
def __init__(self, column: str, window: int):
config = OperatorConfig(
name=f"mean_{column}_{window}",
description=f"{column}{window}日均值",
required_columns=[column],
output_columns=[f'mean_{column}_{window}'],
parameters={'column': column, 'window': window}
)
super().__init__(config, window=window)
self.column = column
def apply_rolling(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""计算均值"""
return stock_df.with_columns(
pl.col(self.column).rolling_mean(window=self.window).alias(f'mean_{self.column}_{self.window}')
)
class RankOperator(DateWiseOperator):
"""排名算子"""
def __init__(self, column: str, ascending: bool = True):
config = OperatorConfig(
name=f"rank_{column}",
description=f"{column}的排名",
required_columns=[column],
output_columns=[f'rank_{column}'],
parameters={'column': column, 'ascending': ascending}
)
super().__init__(config)
self.column = column
self.ascending = ascending

View File

@@ -6,7 +6,9 @@
import polars as pl import polars as pl
import numpy as np import numpy as np
from typing import Dict, List, Optional, Any from typing import Dict, List, Optional, Any
from operator_framework import StockWiseOperator, OperatorConfig
from tqdm import tqdm
from main.factor.operator_framework import StockWiseOperator, OperatorConfig
from scipy.stats import linregress from scipy.stats import linregress
@@ -14,6 +16,8 @@ class PriceMinusDeductionPriceOperator(StockWiseOperator):
"""价格减抵扣价算子""" """价格减抵扣价算子"""
def __init__(self, n: int = 10): def __init__(self, n: int = 10):
if n <= 0:
raise ValueError("n must be positive")
config = OperatorConfig( config = OperatorConfig(
name=f"price_minus_deduction_price_{n}", name=f"price_minus_deduction_price_{n}",
description=f"{n}日价格减抵扣价", description=f"{n}日价格减抵扣价",
@@ -24,21 +28,22 @@ class PriceMinusDeductionPriceOperator(StockWiseOperator):
super().__init__(config) super().__init__(config)
self.n = n self.n = n
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: def get_factor_name(self) -> str:
"""计算价格减抵扣价""" return f'price_minus_deduction_price_{self.n}'
# 抵扣价是n-1周期前的价格
deduction_price = pl.col('close').shift(self.n - 1)
# 计算差值 def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
price_diff = pl.col('close') - deduction_price # 抵扣价是 n 日前的价格(更合理),若坚持 n-1 则保留
deduction_price = group_df['close'].shift(self.n) # 建议用 n不是 n-1
return stock_df.with_columns(price_diff.alias(f'price_minus_deduction_price_{self.n}')) price_diff = group_df['close'] - deduction_price
return price_diff.alias(self.get_factor_name())
class PriceDeductionPriceDiffRatioToSMAOperator(StockWiseOperator): class PriceDeductionPriceDiffRatioToSMAOperator(StockWiseOperator):
"""价格抵扣价差值相对SMA比率算子""" """价格抵扣价差值相对SMA比率算子"""
def __init__(self, n: int = 10): def __init__(self, n: int = 10):
if n <= 0:
raise ValueError("n must be positive")
config = OperatorConfig( config = OperatorConfig(
name=f"price_deduction_price_diff_ratio_to_sma_{n}", name=f"price_deduction_price_diff_ratio_to_sma_{n}",
description=f"{n}日价格抵扣价差值相对SMA比率", description=f"{n}日价格抵扣价差值相对SMA比率",
@@ -49,27 +54,23 @@ class PriceDeductionPriceDiffRatioToSMAOperator(StockWiseOperator):
super().__init__(config) super().__init__(config)
self.n = n self.n = n
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: def get_factor_name(self) -> str:
"""计算价格抵扣价差值相对SMA比率""" return f'price_deduction_price_diff_ratio_to_sma_{self.n}'
# 计算n日SMA
sma = pl.col('close').rolling_mean(window=self.n)
# 抵扣价 def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
deduction_price = pl.col('close').shift(self.n - 1) sma = group_df['close'].rolling_mean(window_size=self.n)
deduction_price = group_df['close'].shift(self.n)
# 计算差值 diff = group_df['close'] - deduction_price
diff = pl.col('close') - deduction_price
# 计算比率 (处理除零)
ratio = diff / (sma + 1e-8) ratio = diff / (sma + 1e-8)
return ratio.alias(self.get_factor_name())
return stock_df.with_columns(ratio.alias(f'price_deduction_price_diff_ratio_to_sma_{self.n}'))
class CatPriceVsSmaVsDeductionPriceOperator(StockWiseOperator): class CatPriceVsSmaVsDeductionPriceOperator(StockWiseOperator):
"""价格vsSMAvs抵扣价分类算子""" """价格vsSMAvs抵扣价分类算子"""
def __init__(self, n: int = 10): def __init__(self, n: int = 10):
if n <= 0:
raise ValueError("n must be positive")
config = OperatorConfig( config = OperatorConfig(
name=f"cat_price_vs_sma_vs_deduction_price_{n}", name=f"cat_price_vs_sma_vs_deduction_price_{n}",
description=f"{n}日价格vsSMAvs抵扣价分类", description=f"{n}日价格vsSMAvs抵扣价分类",
@@ -80,40 +81,35 @@ class CatPriceVsSmaVsDeductionPriceOperator(StockWiseOperator):
super().__init__(config) super().__init__(config)
self.n = n self.n = n
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: def get_factor_name(self) -> str:
"""计算价格vsSMAvs抵扣价分类""" return f'cat_price_vs_sma_vs_deduction_price_{self.n}'
# 计算n日SMA
sma = pl.col('close').rolling_mean(window=self.n)
# 抵扣价 def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
deduction_price = pl.col('close').shift(self.n - 1) sma = group_df['close'].rolling_mean(window_size=self.n)
deduction_price = group_df['close'].shift(self.n)
# 定义条件 cond1 = (group_df['close'] > sma) & (deduction_price > sma)
conditions = [ cond2 = (group_df['close'] < sma) & (deduction_price < sma)
# 1: 当前价 > SMA 且 抵扣价 > SMA cond3 = (group_df['close'] > sma) & (deduction_price <= sma)
(pl.col('close') > sma) & (deduction_price > sma), cond4 = (group_df['close'] <= sma) & (deduction_price > sma)
# 2: 当前价 < SMA 且 抵扣价 < SMA
(pl.col('close') < sma) & (deduction_price < sma),
# 3: 当前价 > SMA 且 抵扣价 <= SMA
(pl.col('close') > sma) & (deduction_price <= sma),
# 4: 当前价 <= SMA 且 抵扣价 > SMA
(pl.col('close') <= sma) & (deduction_price > sma),
]
choices = [1, 2, 3, 4] classification = (
pl.when(cond1).then(1)
# 使用select函数进行分类 .when(cond2).then(2)
classification = pl.select(conditions=conditions, choices=choices, default=0) .when(cond3).then(3)
.when(cond4).then(4)
return stock_df.with_columns( .otherwise(0)
classification.alias(f'cat_price_vs_sma_vs_deduction_price_{self.n}')
) )
return classification.alias(self.get_factor_name())
# ✅ 修复:使用 rolling_map
class VolatilitySlopeOperator(StockWiseOperator): class VolatilitySlopeOperator(StockWiseOperator):
"""波动率斜率算子""" """波动率斜率算子"""
def __init__(self, long_window: int = 20, short_window: int = 5): def __init__(self, long_window: int = 20, short_window: int = 5):
if long_window <= 0 or short_window <= 0:
raise ValueError("Windows must be positive")
config = OperatorConfig( config = OperatorConfig(
name=f"volatility_slope_{long_window}_{short_window}", name=f"volatility_slope_{long_window}_{short_window}",
description=f"{long_window}日波动率{short_window}日斜率", description=f"{long_window}日波动率{short_window}日斜率",
@@ -125,34 +121,40 @@ class VolatilitySlopeOperator(StockWiseOperator):
self.long_window = long_window self.long_window = long_window
self.short_window = short_window self.short_window = short_window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: def get_factor_name(self) -> str:
"""计算波动率斜率""" return f'volatility_slope_{self.long_window}_{self.short_window}'
# 计算长期波动率
long_vol = pl.col('pct_chg').rolling_std(window=self.long_window)
# 计算斜率函数 def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
def calculate_slope(series): # 先计算长期波动率(标准差)
if len(series) < 2: long_vol = group_df['pct_chg'].rolling_std(window_size=self.long_window)
return 0
x = np.arange(len(series))
slope, _, _, _, _ = linregress(x, series)
return slope
# 计算斜率 # 定义斜率函数(输入是 numpy array
volatility_slope = long_vol.rolling_apply( def slope_func(window_vals: np.ndarray) -> float:
function=calculate_slope, if len(window_vals) < 2 or pl.Series(window_vals).is_null().any():
window_size=self.short_window return 0.0
) x = np.arange(len(window_vals))
try:
return stock_df.with_columns( slope, _, _, _, _ = linregress(x, window_vals)
volatility_slope.alias(f'volatility_slope_{self.long_window}_{self.short_window}') return slope if np.isfinite(slope) else 0.0
except:
return 0.0
# 对波动率序列应用 rolling_map
volatility_slope = long_vol.rolling_map(
function=slope_func,
window_size=self.short_window,
min_periods=2 # 至少2点才能算斜率
) )
return volatility_slope.alias(self.get_factor_name())
# ✅ 修复:使用 rolling_map
class TurnoverRateTrendStrengthOperator(StockWiseOperator): class TurnoverRateTrendStrengthOperator(StockWiseOperator):
"""换手率趋势强度算子""" """换手率趋势强度算子"""
def __init__(self, window: int = 5): def __init__(self, window: int = 5):
if window <= 0:
raise ValueError("Window must be positive")
config = OperatorConfig( config = OperatorConfig(
name=f"turnover_trend_strength_{window}", name=f"turnover_trend_strength_{window}",
description=f"{window}日换手率趋势强度", description=f"{window}日换手率趋势强度",
@@ -163,31 +165,34 @@ class TurnoverRateTrendStrengthOperator(StockWiseOperator):
super().__init__(config) super().__init__(config)
self.window = window self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: def get_factor_name(self) -> str:
"""计算换手率趋势强度""" return f'turnover_trend_strength_{self.window}'
# 计算斜率函数
def calculate_slope(series):
if len(series) < 2:
return 0
x = np.arange(len(series))
slope, _, _, _, _ = linregress(x, series)
return slope
# 计算换手率斜率 def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
trend_strength = pl.col('turnover_rate').rolling_apply( def slope_func(window_vals: np.ndarray) -> float:
function=calculate_slope, if len(window_vals) < 2 or pl.Series(window_vals).is_null().any():
window_size=self.window return 0.0
) x = np.arange(len(window_vals))
try:
slope, _, _, _, _ = linregress(x, window_vals)
return slope if np.isfinite(slope) else 0.0
except:
return 0.0
return stock_df.with_columns( trend_strength = group_df['turnover_rate'].rolling_map(
trend_strength.alias(f'turnover_trend_strength_{self.window}') function=slope_func,
window_size=self.window,
min_periods=2
) )
return trend_strength.alias(self.get_factor_name())
class FreeFloatTurnoverSurgeOperator(StockWiseOperator): class FreeFloatTurnoverSurgeOperator(StockWiseOperator):
"""自由流通股换手率激增算子""" """自由流通股换手率激增算子"""
def __init__(self, window: int = 10): def __init__(self, window: int = 10):
if window <= 0:
raise ValueError("Window must be positive")
config = OperatorConfig( config = OperatorConfig(
name=f"ff_turnover_surge_{window}", name=f"ff_turnover_surge_{window}",
description=f"{window}日自由流通股换手率激增", description=f"{window}日自由流通股换手率激增",
@@ -198,21 +203,21 @@ class FreeFloatTurnoverSurgeOperator(StockWiseOperator):
super().__init__(config) super().__init__(config)
self.window = window self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: def get_factor_name(self) -> str:
"""计算自由流通股换手率激增""" return f'ff_turnover_surge_{self.window}'
# 计算均值
avg_turnover = pl.col('turnover_rate').rolling_mean(window=self.window)
# 计算激增比率 def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
surge_ratio = pl.col('turnover_rate') / (avg_turnover + 1e-8) avg_turnover = group_df['turnover_rate'].rolling_mean(window_size=self.window)
surge_ratio = group_df['turnover_rate'] / (avg_turnover + 1e-8)
return stock_df.with_columns(surge_ratio.alias(f'ff_turnover_surge_{self.window}')) return surge_ratio.alias(self.get_factor_name())
class PriceVolumeTrendCoherenceOperator(StockWiseOperator): class PriceVolumeTrendCoherenceOperator(StockWiseOperator):
"""价量趋势一致性算子""" """价量趋势一致性算子"""
def __init__(self, price_window: int = 5, volume_window: int = 20): def __init__(self, price_window: int = 5, volume_window: int = 20):
if price_window <= 0 or volume_window <= 0:
raise ValueError("Windows must be positive")
config = OperatorConfig( config = OperatorConfig(
name=f"price_volume_coherence_{price_window}_{volume_window}", name=f"price_volume_coherence_{price_window}_{volume_window}",
description=f"{price_window}日价格{volume_window}日成交量趋势一致性", description=f"{price_window}日价格{volume_window}日成交量趋势一致性",
@@ -224,25 +229,19 @@ class PriceVolumeTrendCoherenceOperator(StockWiseOperator):
self.price_window = price_window self.price_window = price_window
self.volume_window = volume_window self.volume_window = volume_window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: def get_factor_name(self) -> str:
"""计算价量趋势一致性""" return f'price_volume_coherence_{self.price_window}_{self.volume_window}'
# 计算价格上涨占比
def price_up_ratio(series):
return (series.diff() > 0).rolling_mean(window=self.price_window)
price_up = pl.col('close').apply(price_up_ratio) def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
price_up = (group_df['close'].diff() > 0).cast(pl.Int8)
price_up_ratio = price_up.rolling_mean(window_size=self.price_window)
# 计算成交量高于均值占比 vol_avg = group_df['vol'].rolling_mean(window_size=self.volume_window)
vol_avg = pl.col('vol').rolling_mean(window=self.volume_window) vol_above = (group_df['vol'] > vol_avg).cast(pl.Int8)
vol_above_avg = pl.col('vol') > vol_avg vol_above_ratio = vol_above.rolling_mean(window_size=self.price_window)
vol_above_ratio = vol_above_avg.cast(int).rolling_mean(window=self.price_window)
# 计算一致性 coherence = price_up_ratio * vol_above_ratio
coherence = price_up * vol_above_ratio return coherence.alias(self.get_factor_name())
return stock_df.with_columns(
coherence.alias(f'price_volume_coherence_{self.price_window}_{self.volume_window}')
)
class FreeFloatToTotalTurnoverRatioOperator(StockWiseOperator): class FreeFloatToTotalTurnoverRatioOperator(StockWiseOperator):
@@ -258,19 +257,21 @@ class FreeFloatToTotalTurnoverRatioOperator(StockWiseOperator):
) )
super().__init__(config) super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: def get_factor_name(self) -> str:
"""计算自由流通股对总换手率比率""" return 'ff_to_total_turnover_ratio'
# 假设turnover_rate是自由流通股换手率
# 计算比率 (简化处理)
ratio = pl.col('turnover_rate') / (pl.col('turnover_rate') + 1e-8)
return stock_df.with_columns(ratio.alias('ff_to_total_turnover_ratio')) def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
# 实际业务中可能需要 total_turnover_rate这里简化
ratio = pl.lit(1.0) # 或根据实际逻辑修改
return ratio.alias('ff_to_total_turnover_ratio')
class VarianceOperator(StockWiseOperator): class VarianceOperator(StockWiseOperator):
"""方差算子""" """方差算子"""
def __init__(self, window: int): def __init__(self, window: int):
if window <= 0:
raise ValueError("Window must be positive")
config = OperatorConfig( config = OperatorConfig(
name=f"variance_{window}", name=f"variance_{window}",
description=f"{window}日方差", description=f"{window}日方差",
@@ -281,12 +282,12 @@ class VarianceOperator(StockWiseOperator):
super().__init__(config) super().__init__(config)
self.window = window self.window = window
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: def get_factor_name(self) -> str:
"""计算方差""" return f'variance_{self.window}'
# 计算方差
variance = pl.col('pct_chg').rolling_var(window=self.window)
return stock_df.with_columns(variance.alias(f'variance_{self.window}')) def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
variance = group_df['pct_chg'].rolling_var(window_size=self.window)
return variance.alias(self.get_factor_name())
class LimitUpDownOperator(StockWiseOperator): class LimitUpDownOperator(StockWiseOperator):
@@ -302,26 +303,12 @@ class LimitUpDownOperator(StockWiseOperator):
) )
super().__init__(config) super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: def get_factor_name(self) -> str:
"""计算涨跌停因子""" return 'cat_up_limit'
# 判断是否涨停
up_limit = pl.col('close') == pl.col('up_limit')
# 判断是否跌停 def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
down_limit = pl.col('close') == pl.col('down_limit') up_limit = (group_df['close'] == group_df['up_limit']).cast(pl.Int8)
return up_limit.alias('cat_up_limit')
# 计算10日涨停计数
up_count_10d = up_limit.cast(int).rolling_sum(window=10)
# 计算10日跌停计数
down_count_10d = down_limit.cast(int).rolling_sum(window=10)
return stock_df.with_columns([
up_limit.alias('cat_up_limit'),
down_limit.alias('cat_down_limit'),
up_count_10d.alias('up_limit_count_10d'),
down_count_10d.alias('down_limit_count_10d')
])
class ConsecutiveUpLimitOperator(StockWiseOperator): class ConsecutiveUpLimitOperator(StockWiseOperator):
@@ -337,19 +324,21 @@ class ConsecutiveUpLimitOperator(StockWiseOperator):
) )
super().__init__(config) super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: def get_factor_name(self) -> str:
"""计算连续涨停天数""" return 'consecutive_up_limit'
# 计算连续涨停
# 简化处理,实际应用中需要更复杂的逻辑
consecutive = pl.col('cat_up_limit').cast(int)
return stock_df.with_columns(consecutive.alias('consecutive_up_limit')) def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
# 简化版:实际连续计数需用 cumsum + groupby trick
# 这里先返回原始值,后续可优化
return group_df['cat_up_limit'].alias('consecutive_up_limit')
class MomentumFactorOperator(StockWiseOperator): class MomentumFactorOperator(StockWiseOperator):
"""动量因子算子""" """动量因子算子"""
def __init__(self, alpha: float = 0.5): def __init__(self, alpha: float = 0.5):
if not (0 <= alpha <= 1):
raise ValueError("alpha should be between 0 and 1")
config = OperatorConfig( config = OperatorConfig(
name=f"momentum_factor_{alpha}", name=f"momentum_factor_{alpha}",
description=f"动量因子(alpha={alpha})", description=f"动量因子(alpha={alpha})",
@@ -360,12 +349,12 @@ class MomentumFactorOperator(StockWiseOperator):
super().__init__(config) super().__init__(config)
self.alpha = alpha self.alpha = alpha
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: def get_factor_name(self) -> str:
"""计算动量因子""" return f'momentum_factor_{self.alpha}'
# 计算动量因子
momentum = pl.col('volume_change_rate') + self.alpha * pl.col('turnover_deviation')
return stock_df.with_columns(momentum.alias(f'momentum_factor_{self.alpha}')) def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
momentum = group_df['volume_change_rate'] + self.alpha * group_df['turnover_deviation']
return momentum.alias(self.get_factor_name())
class ResonanceFactorOperator(StockWiseOperator): class ResonanceFactorOperator(StockWiseOperator):
@@ -381,28 +370,28 @@ class ResonanceFactorOperator(StockWiseOperator):
) )
super().__init__(config) super().__init__(config)
def apply_stock(self, stock_df: pl.DataFrame, **kwargs) -> pl.DataFrame: def get_factor_name(self) -> str:
"""计算共振因子""" return 'resonance_factor'
# 计算共振因子
resonance = pl.col('volume_ratio') * pl.col('pct_chg')
return stock_df.with_columns(resonance.alias('resonance_factor')) def calc_factor(self, group_df: pl.DataFrame, **kwargs) -> pl.Series:
resonance = group_df['volume_ratio'] * group_df['pct_chg']
return resonance.alias('resonance_factor')
# 动量因子集合 # 动量因子集合
MOMENTUM_OPERATORS = [ MOMENTUM_OPERATORS = [
PriceMinusDeductionPriceOperator(), PriceMinusDeductionPriceOperator(10),
PriceDeductionPriceDiffRatioToSMAOperator(), PriceDeductionPriceDiffRatioToSMAOperator(10),
CatPriceVsSmaVsDeductionPriceOperator(), CatPriceVsSmaVsDeductionPriceOperator(10),
VolatilitySlopeOperator(), # VolatilitySlopeOperator(20, 5),
TurnoverRateTrendStrengthOperator(5), # TurnoverRateTrendStrengthOperator(5),
FreeFloatTurnoverSurgeOperator(10), FreeFloatTurnoverSurgeOperator(10),
PriceVolumeTrendCoherenceOperator(), PriceVolumeTrendCoherenceOperator(5, 20),
FreeFloatToTotalTurnoverRatioOperator(), FreeFloatToTotalTurnoverRatioOperator(),
VarianceOperator(20), VarianceOperator(20),
LimitUpDownOperator(), LimitUpDownOperator(),
ConsecutiveUpLimitOperator(), ConsecutiveUpLimitOperator(),
MomentumFactorOperator(), # MomentumFactorOperator(0.5),
ResonanceFactorOperator(), ResonanceFactorOperator(),
] ]
@@ -410,19 +399,12 @@ MOMENTUM_OPERATORS = [
def apply_momentum_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame: def apply_momentum_factors(df: pl.DataFrame, operators: List = None) -> pl.DataFrame:
""" """
应用所有动量因子 应用所有动量因子
Args:
df: 输入的Polars DataFrame
operators: 要应用的算子列表如果为None则使用默认列表
Returns:
添加了动量因子的DataFrame
""" """
if operators is None: if operators is None:
operators = MOMENTUM_OPERATORS operators = MOMENTUM_OPERATORS
result_df = df result_df = df
for operator in operators: for operator in tqdm(operators, desc="Applying momentum factors"):
result_df = operator(result_df) result_df = operator.apply(result_df)
return result_df return result_df

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -68,56 +68,28 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"daily data\n", "daily data\n"
"daily basic\n", ]
"inner merge on ['ts_code', 'trade_date']\n", },
"stk limit\n", {
"left merge on ['ts_code', 'trade_date']\n", "ename": "KeyboardInterrupt",
"money flow\n", "evalue": "",
"left merge on ['ts_code', 'trade_date']\n", "output_type": "error",
"cyq perf\n", "traceback": [
"left merge on ['ts_code', 'trade_date']\n", "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
"<class 'pandas.core.frame.DataFrame'>\n", "\u001b[31mKeyboardInterrupt\u001b[39m Traceback (most recent call last)",
"RangeIndex: 9162612 entries, 0 to 9162611\n", "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[3]\u001b[39m\u001b[32m, line 4\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mmain\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mutils\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mutils\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m read_and_merge_h5_data\n\u001b[32m 3\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33m'\u001b[39m\u001b[33mdaily data\u001b[39m\u001b[33m'\u001b[39m)\n\u001b[32m----> \u001b[39m\u001b[32m4\u001b[39m df = \u001b[43mread_and_merge_h5_data\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m/mnt/d/PyProject/NewStock/data/daily_data.h5\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mdaily_data\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 5\u001b[39m \u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m=\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mts_code\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mtrade_date\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mopen\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mclose\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mhigh\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mlow\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mvol\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mamount\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mpct_chg\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 6\u001b[39m \u001b[43m \u001b[49m\u001b[43mdf\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[32m 8\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33m'\u001b[39m\u001b[33mdaily basic\u001b[39m\u001b[33m'\u001b[39m)\n\u001b[32m 9\u001b[39m df = read_and_merge_h5_data(\u001b[33m'\u001b[39m\u001b[33m/mnt/d/PyProject/NewStock/data/daily_basic.h5\u001b[39m\u001b[33m'\u001b[39m, key=\u001b[33m'\u001b[39m\u001b[33mdaily_basic\u001b[39m\u001b[33m'\u001b[39m,\n\u001b[32m 10\u001b[39m columns=[\u001b[33m'\u001b[39m\u001b[33mts_code\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mtrade_date\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mturnover_rate\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mpe_ttm\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mcirc_mv\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mtotal_mv\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mvolume_ratio\u001b[39m\u001b[33m'\u001b[39m,\n\u001b[32m 11\u001b[39m \u001b[33m'\u001b[39m\u001b[33mis_st\u001b[39m\u001b[33m'\u001b[39m], df=df, join=\u001b[33m'\u001b[39m\u001b[33minner\u001b[39m\u001b[33m'\u001b[39m)\n",
"Data columns (total 33 columns):\n", "\u001b[36mFile \u001b[39m\u001b[32m/mnt/d/PyProject/NewStock/main/utils/utils.py:14\u001b[39m, in \u001b[36mread_and_merge_h5_data\u001b[39m\u001b[34m(h5_filename, key, columns, df, join, on, prefix)\u001b[39m\n\u001b[32m 11\u001b[39m processed_columns.append(col)\n\u001b[32m 13\u001b[39m \u001b[38;5;66;03m# 从 HDF5 文件读取数据,选择需要的列\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m14\u001b[39m data = \u001b[43mpd\u001b[49m\u001b[43m.\u001b[49m\u001b[43mread_hdf\u001b[49m\u001b[43m(\u001b[49m\u001b[43mh5_filename\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m=\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m=\u001b[49m\u001b[43mprocessed_columns\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 16\u001b[39m \u001b[38;5;66;03m# 修改列名,如果列名以前有 _加上 _\u001b[39;00m\n\u001b[32m 17\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m col \u001b[38;5;129;01min\u001b[39;00m data.columns:\n",
" # Column Dtype \n", "\u001b[36mFile \u001b[39m\u001b[32m~/miniconda3/envs/stock/lib/python3.13/site-packages/pandas/io/pytables.py:452\u001b[39m, in \u001b[36mread_hdf\u001b[39m\u001b[34m(path_or_buf, key, mode, errors, where, start, stop, columns, iterator, chunksize, **kwargs)\u001b[39m\n\u001b[32m 447\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[32m 448\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mkey must be provided when HDF5 \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 449\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mfile contains multiple datasets.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 450\u001b[39m )\n\u001b[32m 451\u001b[39m key = candidate_only_group._v_pathname\n\u001b[32m--> \u001b[39m\u001b[32m452\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mstore\u001b[49m\u001b[43m.\u001b[49m\u001b[43mselect\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 453\u001b[39m \u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 454\u001b[39m \u001b[43m \u001b[49m\u001b[43mwhere\u001b[49m\u001b[43m=\u001b[49m\u001b[43mwhere\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 455\u001b[39m \u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 456\u001b[39m \u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 457\u001b[39m \u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 458\u001b[39m \u001b[43m \u001b[49m\u001b[43miterator\u001b[49m\u001b[43m=\u001b[49m\u001b[43miterator\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 459\u001b[39m \u001b[43m \u001b[49m\u001b[43mchunksize\u001b[49m\u001b[43m=\u001b[49m\u001b[43mchunksize\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 460\u001b[39m \u001b[43m \u001b[49m\u001b[43mauto_close\u001b[49m\u001b[43m=\u001b[49m\u001b[43mauto_close\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 461\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 462\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mValueError\u001b[39;00m, \u001b[38;5;167;01mTypeError\u001b[39;00m, \u001b[38;5;167;01mLookupError\u001b[39;00m):\n\u001b[32m 463\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(path_or_buf, HDFStore):\n\u001b[32m 464\u001b[39m \u001b[38;5;66;03m# if there is an error, close the store if we opened it.\u001b[39;00m\n",
"--- ------ ----- \n", "\u001b[36mFile \u001b[39m\u001b[32m~/miniconda3/envs/stock/lib/python3.13/site-packages/pandas/io/pytables.py:906\u001b[39m, in \u001b[36mHDFStore.select\u001b[39m\u001b[34m(self, key, where, start, stop, columns, iterator, chunksize, auto_close)\u001b[39m\n\u001b[32m 892\u001b[39m \u001b[38;5;66;03m# create the iterator\u001b[39;00m\n\u001b[32m 893\u001b[39m it = TableIterator(\n\u001b[32m 894\u001b[39m \u001b[38;5;28mself\u001b[39m,\n\u001b[32m 895\u001b[39m s,\n\u001b[32m (...)\u001b[39m\u001b[32m 903\u001b[39m auto_close=auto_close,\n\u001b[32m 904\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m906\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mit\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget_result\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
" 0 ts_code object \n", "\u001b[36mFile \u001b[39m\u001b[32m~/miniconda3/envs/stock/lib/python3.13/site-packages/pandas/io/pytables.py:2029\u001b[39m, in \u001b[36mTableIterator.get_result\u001b[39m\u001b[34m(self, coordinates)\u001b[39m\n\u001b[32m 2026\u001b[39m where = \u001b[38;5;28mself\u001b[39m.where\n\u001b[32m 2028\u001b[39m \u001b[38;5;66;03m# directly return the result\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m2029\u001b[39m results = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mwhere\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 2030\u001b[39m \u001b[38;5;28mself\u001b[39m.close()\n\u001b[32m 2031\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m results\n",
" 1 trade_date datetime64[ns]\n", "\u001b[36mFile \u001b[39m\u001b[32m~/miniconda3/envs/stock/lib/python3.13/site-packages/pandas/io/pytables.py:890\u001b[39m, in \u001b[36mHDFStore.select.<locals>.func\u001b[39m\u001b[34m(_start, _stop, _where)\u001b[39m\n\u001b[32m 889\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mfunc\u001b[39m(_start, _stop, _where):\n\u001b[32m--> \u001b[39m\u001b[32m890\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43ms\u001b[49m\u001b[43m.\u001b[49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstart\u001b[49m\u001b[43m=\u001b[49m\u001b[43m_start\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m=\u001b[49m\u001b[43m_stop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mwhere\u001b[49m\u001b[43m=\u001b[49m\u001b[43m_where\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m)\u001b[49m\n",
" 2 open float64 \n", "\u001b[36mFile \u001b[39m\u001b[32m~/miniconda3/envs/stock/lib/python3.13/site-packages/pandas/io/pytables.py:4631\u001b[39m, in \u001b[36mAppendableFrameTable.read\u001b[39m\u001b[34m(self, where, columns, start, stop)\u001b[39m\n\u001b[32m 4628\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m.infer_axes():\n\u001b[32m 4629\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m4631\u001b[39m result = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_read_axes\u001b[49m\u001b[43m(\u001b[49m\u001b[43mwhere\u001b[49m\u001b[43m=\u001b[49m\u001b[43mwhere\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstop\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 4633\u001b[39m info = (\n\u001b[32m 4634\u001b[39m \u001b[38;5;28mself\u001b[39m.info.get(\u001b[38;5;28mself\u001b[39m.non_index_axes[\u001b[32m0\u001b[39m][\u001b[32m0\u001b[39m], {})\n\u001b[32m 4635\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m.non_index_axes)\n\u001b[32m 4636\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m {}\n\u001b[32m 4637\u001b[39m )\n\u001b[32m 4639\u001b[39m inds = [i \u001b[38;5;28;01mfor\u001b[39;00m i, ax \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(\u001b[38;5;28mself\u001b[39m.axes) \u001b[38;5;28;01mif\u001b[39;00m ax \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28mself\u001b[39m.index_axes[\u001b[32m0\u001b[39m]]\n",
" 3 close float64 \n", "\u001b[36mFile \u001b[39m\u001b[32m~/miniconda3/envs/stock/lib/python3.13/site-packages/pandas/io/pytables.py:3818\u001b[39m, in \u001b[36mTable._read_axes\u001b[39m\u001b[34m(self, where, start, stop)\u001b[39m\n\u001b[32m 3816\u001b[39m \u001b[38;5;66;03m# create the selection\u001b[39;00m\n\u001b[32m 3817\u001b[39m selection = Selection(\u001b[38;5;28mself\u001b[39m, where=where, start=start, stop=stop)\n\u001b[32m-> \u001b[39m\u001b[32m3818\u001b[39m values = \u001b[43mselection\u001b[49m\u001b[43m.\u001b[49m\u001b[43mselect\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 3820\u001b[39m results = []\n\u001b[32m 3821\u001b[39m \u001b[38;5;66;03m# convert the data\u001b[39;00m\n",
" 4 high float64 \n", "\u001b[36mFile \u001b[39m\u001b[32m~/miniconda3/envs/stock/lib/python3.13/site-packages/pandas/io/pytables.py:5397\u001b[39m, in \u001b[36mSelection.select\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 5395\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.coordinates \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 5396\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m.table.table.read_coordinates(\u001b[38;5;28mself\u001b[39m.coordinates)\n\u001b[32m-> \u001b[39m\u001b[32m5397\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mtable\u001b[49m\u001b[43m.\u001b[49m\u001b[43mtable\u001b[49m\u001b[43m.\u001b[49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstart\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mstop\u001b[49m\u001b[43m)\u001b[49m\n",
" 5 low float64 \n", "\u001b[36mFile \u001b[39m\u001b[32m~/miniconda3/envs/stock/lib/python3.13/site-packages/tables/table.py:2083\u001b[39m, in \u001b[36mTable.read\u001b[39m\u001b[34m(self, start, stop, step, field, out)\u001b[39m\n\u001b[32m 2077\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(msg)\n\u001b[32m 2079\u001b[39m start, stop, step = \u001b[38;5;28mself\u001b[39m._process_range(\n\u001b[32m 2080\u001b[39m start, stop, step, warn_negstep=\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[32m 2081\u001b[39m )\n\u001b[32m-> \u001b[39m\u001b[32m2083\u001b[39m arr = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstep\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfield\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mout\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 2084\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m internal_to_flavor(arr, \u001b[38;5;28mself\u001b[39m.flavor)\n",
" 6 vol float64 \n", "\u001b[36mFile \u001b[39m\u001b[32m~/miniconda3/envs/stock/lib/python3.13/site-packages/tables/table.py:1989\u001b[39m, in \u001b[36mTable._read\u001b[39m\u001b[34m(self, start, stop, step, field, out)\u001b[39m\n\u001b[32m 1985\u001b[39m \u001b[38;5;66;03m# Call the routine to fill-up the resulting array\u001b[39;00m\n\u001b[32m 1986\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m step == \u001b[32m1\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m field:\n\u001b[32m 1987\u001b[39m \u001b[38;5;66;03m# This optimization works three times faster than\u001b[39;00m\n\u001b[32m 1988\u001b[39m \u001b[38;5;66;03m# the row._fill_col method (up to 170 MB/s on a pentium IV @ 2GHz)\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1989\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_read_records\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m \u001b[49m\u001b[43m-\u001b[49m\u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mresult\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1990\u001b[39m \u001b[38;5;66;03m# Warning!: _read_field_name should not be used until\u001b[39;00m\n\u001b[32m 1991\u001b[39m \u001b[38;5;66;03m# H5TBread_fields_name in tableextension will be finished\u001b[39;00m\n\u001b[32m 1992\u001b[39m \u001b[38;5;66;03m# F. Alted 2005/05/26\u001b[39;00m\n\u001b[32m 1993\u001b[39m \u001b[38;5;66;03m# XYX Ho implementem per a PyTables 2.0??\u001b[39;00m\n\u001b[32m 1994\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m field \u001b[38;5;129;01mand\u001b[39;00m step > \u001b[32m15\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[32m0\u001b[39m:\n\u001b[32m 1995\u001b[39m \u001b[38;5;66;03m# For step>15, this seems to work always faster than row._fill_col.\u001b[39;00m\n",
" 7 amount float64 \n", "\u001b[31mKeyboardInterrupt\u001b[39m: "
" 8 pct_chg float64 \n",
" 9 turnover_rate float64 \n",
" 10 pe_ttm float64 \n",
" 11 circ_mv float64 \n",
" 12 total_mv float64 \n",
" 13 volume_ratio float64 \n",
" 14 is_st bool \n",
" 15 up_limit float64 \n",
" 16 down_limit float64 \n",
" 17 buy_sm_vol float64 \n",
" 18 sell_sm_vol float64 \n",
" 19 buy_lg_vol float64 \n",
" 20 sell_lg_vol float64 \n",
" 21 buy_elg_vol float64 \n",
" 22 sell_elg_vol float64 \n",
" 23 net_mf_vol float64 \n",
" 24 his_low float64 \n",
" 25 his_high float64 \n",
" 26 cost_5pct float64 \n",
" 27 cost_15pct float64 \n",
" 28 cost_50pct float64 \n",
" 29 cost_85pct float64 \n",
" 30 cost_95pct float64 \n",
" 31 weight_avg float64 \n",
" 32 winner_rate float64 \n",
"dtypes: bool(1), datetime64[ns](1), float64(30), object(1)\n",
"memory usage: 2.2+ GB\n",
"None\n"
] ]
} }
], ],
@@ -154,7 +126,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": null,
"id": "cac01788dac10678", "id": "cac01788dac10678",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
@@ -222,7 +194,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": null,
"id": "c4e9e1d31da6dba6", "id": "c4e9e1d31da6dba6",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
@@ -322,7 +294,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": null,
"id": "a735bc02ceb4d872", "id": "a735bc02ceb4d872",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
@@ -338,7 +310,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": null,
"id": "53f86ddc0677a6d7", "id": "53f86ddc0677a6d7",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
@@ -405,7 +377,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": null,
"id": "dbe2fd8021b9417f", "id": "dbe2fd8021b9417f",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
@@ -433,7 +405,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 9, "execution_count": null,
"id": "85c3e3d0235ffffa", "id": "85c3e3d0235ffffa",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
@@ -465,7 +437,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10, "execution_count": null,
"id": "92d84ce15a562ec6", "id": "92d84ce15a562ec6",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
@@ -722,7 +694,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 11, "execution_count": null,
"id": "b87b938028afa206", "id": "b87b938028afa206",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
@@ -760,7 +732,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 12, "execution_count": null,
"id": "f4f16d63ad18d1bc", "id": "f4f16d63ad18d1bc",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
@@ -986,7 +958,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 13, "execution_count": null,
"id": "40e6b68a91b30c79", "id": "40e6b68a91b30c79",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
@@ -1306,7 +1278,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 14, "execution_count": null,
"id": "47c12bb34062ae7a", "id": "47c12bb34062ae7a",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
@@ -1340,7 +1312,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 15, "execution_count": null,
"id": "29221dde", "id": "29221dde",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@@ -1383,7 +1355,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 16, "execution_count": null,
"id": "03ee5daf", "id": "03ee5daf",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@@ -1396,7 +1368,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 17, "execution_count": null,
"id": "b76ea08a", "id": "b76ea08a",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@@ -1621,7 +1593,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 18, "execution_count": null,
"id": "3ff2d1c5", "id": "3ff2d1c5",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@@ -1762,7 +1734,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 19, "execution_count": null,
"id": "a5bbb8be", "id": "a5bbb8be",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@@ -1787,7 +1759,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 20, "execution_count": null,
"id": "5d1522a7538db91b", "id": "5d1522a7538db91b",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
@@ -1825,7 +1797,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 21, "execution_count": null,
"id": "09b1799e", "id": "09b1799e",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@@ -1847,7 +1819,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 22, "execution_count": null,
"id": "e53b209a", "id": "e53b209a",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [

File diff suppressed because it is too large Load Diff