Files
NewStock/main/train/Classify/Classify2.ipynb

582 lines
33 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "79a7758178bafdd3",
"metadata": {
"jupyter": {
"is_executing": true,
"source_hidden": true
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/mnt/d/PyProject/NewStock\n"
]
}
],
"source": [
"%load_ext autoreload\n",
"%autoreload 2\n",
"# %load_ext cudf.pandas\n",
"\n",
"import gc\n",
"import os\n",
"import sys\n",
"sys.path.append('/mnt/d/PyProject/NewStock/')\n",
"import sys\n",
"sys.path.append('/mnt/d/PyProject/NewStock/main/')\n",
"import sys\n",
"sys.path.append('/mnt/d/PyProject/NewStock/train')\n",
"import sys\n",
"sys.path.append('/mnt/d/PyProject/NewStock')\n",
"import sys\n",
"sys.path.append('/mnt/d/PyProject/NewStock/main')\n",
"import sys\n",
"sys.path.append('/mnt/d/PyProject/NewStock/train/')\n",
"print(os.getcwd())\n",
"import pandas as pd\n",
"from main.factor.factor import get_rolling_factor, get_simple_factor\n",
"from main.utils.factor import read_industry_data\n",
"from main.utils.factor_processor import calculate_score\n",
"from main.utils.utils import read_and_merge_h5_data, merge_with_industry_data\n",
"\n",
"import warnings\n",
"\n",
"warnings.filterwarnings(\"ignore\")\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "4a481c60",
"metadata": {},
"outputs": [],
"source": [
"# 设置使用核心\n",
"import os\n",
"os.environ[\"MODIN_CPUS\"] = \"4\"\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "a79cafb06a7e0e43",
"metadata": {
"ExecuteTime": {
"end_time": "2025-10-13T13:43:40.016626900Z",
"start_time": "2025-04-03T12:46:06.998047Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"daily data\n",
"daily basic\n",
"inner merge on ['ts_code', 'trade_date']\n",
"stk limit\n",
"left merge on ['ts_code', 'trade_date']\n",
"money flow\n",
"left merge on ['ts_code', 'trade_date']\n",
"cyq perf\n",
"left merge on ['ts_code', 'trade_date']\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 9162612 entries, 0 to 9162611\n",
"Data columns (total 33 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object \n",
" 1 trade_date datetime64[ns]\n",
" 2 open float64 \n",
" 3 close float64 \n",
" 4 high float64 \n",
" 5 low float64 \n",
" 6 vol float64 \n",
" 7 amount float64 \n",
" 8 pct_chg float64 \n",
" 9 turnover_rate float64 \n",
" 10 pe_ttm float64 \n",
" 11 circ_mv float64 \n",
" 12 total_mv float64 \n",
" 13 volume_ratio float64 \n",
" 14 is_st bool \n",
" 15 up_limit float64 \n",
" 16 down_limit float64 \n",
" 17 buy_sm_vol float64 \n",
" 18 sell_sm_vol float64 \n",
" 19 buy_lg_vol float64 \n",
" 20 sell_lg_vol float64 \n",
" 21 buy_elg_vol float64 \n",
" 22 sell_elg_vol float64 \n",
" 23 net_mf_vol float64 \n",
" 24 his_low float64 \n",
" 25 his_high float64 \n",
" 26 cost_5pct float64 \n",
" 27 cost_15pct float64 \n",
" 28 cost_50pct float64 \n",
" 29 cost_85pct float64 \n",
" 30 cost_95pct float64 \n",
" 31 weight_avg float64 \n",
" 32 winner_rate float64 \n",
"dtypes: bool(1), datetime64[ns](1), float64(30), object(1)\n",
"memory usage: 2.2+ GB\n",
"None\n"
]
}
],
"source": [
"from main.utils.utils import read_and_merge_h5_data\n",
"\n",
"print('daily data')\n",
"df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/daily_data.h5', key='daily_data',\n",
" columns=['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'amount', 'pct_chg'],\n",
" df=None)\n",
"\n",
"print('daily basic')\n",
"df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/daily_basic.h5', key='daily_basic',\n",
" columns=['ts_code', 'trade_date', 'turnover_rate', 'pe_ttm', 'circ_mv', 'total_mv', 'volume_ratio',\n",
" 'is_st'], df=df, join='inner')\n",
"\n",
"print('stk limit')\n",
"df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/stk_limit.h5', key='stk_limit',\n",
" columns=['ts_code', 'trade_date', 'pre_close', 'up_limit', 'down_limit'],\n",
" df=df)\n",
"print('money flow')\n",
"df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/money_flow.h5', key='money_flow',\n",
" columns=['ts_code', 'trade_date', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol',\n",
" 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol'],\n",
" df=df)\n",
"print('cyq perf')\n",
"df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/cyq_perf.h5', key='cyq_perf',\n",
" columns=['ts_code', 'trade_date', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct',\n",
" 'cost_50pct',\n",
" 'cost_85pct', 'cost_95pct', 'weight_avg', 'winner_rate'],\n",
" df=df)\n",
"print(df.info())"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "cac01788dac10678",
"metadata": {
"ExecuteTime": {
"end_time": "2025-10-13T13:43:40.020945600Z",
"start_time": "2025-04-03T12:47:00.488715Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"industry\n"
]
}
],
"source": [
"print('industry')\n",
"industry_df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/industry_data.h5', key='industry_data',\n",
" columns=['ts_code', 'l2_code', 'in_date'],\n",
" df=None, on=['ts_code'], join='left')\n",
"\n",
"\n",
"def merge_with_industry_data(df, industry_df):\n",
" # 确保日期字段是 datetime 类型\n",
" df['trade_date'] = pd.to_datetime(df['trade_date'])\n",
" industry_df['in_date'] = pd.to_datetime(industry_df['in_date'])\n",
"\n",
" # 对 industry_df 按 ts_code 和 in_date 排序\n",
" industry_df_sorted = industry_df.sort_values(['in_date', 'ts_code'])\n",
"\n",
" # 对原始 df 按 ts_code 和 trade_date 排序\n",
" df_sorted = df.sort_values(['trade_date', 'ts_code'])\n",
"\n",
" # 使用 merge_asof 进行向后合并\n",
" merged = pd.merge_asof(\n",
" df_sorted,\n",
" industry_df_sorted,\n",
" by='ts_code', # 按 ts_code 分组\n",
" left_on='trade_date',\n",
" right_on='in_date',\n",
" direction='backward'\n",
" )\n",
"\n",
" # 获取每个 ts_code 的最早 in_date 记录\n",
" min_in_date_per_ts = (industry_df_sorted\n",
" .groupby('ts_code')\n",
" .first()\n",
" .reset_index()[['ts_code', 'l2_code']])\n",
"\n",
" # 填充未匹配到的记录trade_date 早于所有 in_date 的情况)\n",
" merged['l2_code'] = merged['l2_code'].fillna(\n",
" merged['ts_code'].map(min_in_date_per_ts.set_index('ts_code')['l2_code'])\n",
" )\n",
"\n",
" # 保留需要的列并重置索引\n",
" result = merged.reset_index(drop=True)\n",
" return result\n",
"\n",
"\n",
"# 使用示例\n",
"df = merge_with_industry_data(df, industry_df)\n",
"# print(mdf[mdf['ts_code'] == '600751.SH'][['ts_code', 'trade_date', 'l2_code']])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "c4e9e1d31da6dba6",
"metadata": {
"ExecuteTime": {
"end_time": "2025-10-13T13:43:40.021492400Z",
"start_time": "2025-04-03T12:47:10.541247Z"
},
"jupyter": {
"source_hidden": true
}
},
"outputs": [],
"source": [
"from main.factor.factor import *\n",
"\n",
"def calculate_indicators(df):\n",
" \"\"\"\n",
" 计算四个指标当日涨跌幅、5日移动平均、RSI、MACD。\n",
" \"\"\"\n",
" df = df.sort_values('trade_date')\n",
" df['daily_return'] = (df['close'] - df['pre_close']) / df['pre_close'] * 100\n",
" # df['5_day_ma'] = df['close'].rolling(window=5).mean()\n",
" delta = df['close'].diff()\n",
" gain = delta.where(delta > 0, 0)\n",
" loss = -delta.where(delta < 0, 0)\n",
" avg_gain = gain.rolling(window=14).mean()\n",
" avg_loss = loss.rolling(window=14).mean()\n",
" rs = avg_gain / avg_loss\n",
" df['RSI'] = 100 - (100 / (1 + rs))\n",
"\n",
" # 计算MACD\n",
" ema12 = df['close'].ewm(span=12, adjust=False).mean()\n",
" ema26 = df['close'].ewm(span=26, adjust=False).mean()\n",
" df['MACD'] = ema12 - ema26\n",
" df['Signal_line'] = df['MACD'].ewm(span=9, adjust=False).mean()\n",
" df['MACD_hist'] = df['MACD'] - df['Signal_line']\n",
"\n",
" # 4. 情绪因子1市场上涨比例Up Ratio\n",
" df['up_ratio'] = df['daily_return'].apply(lambda x: 1 if x > 0 else 0)\n",
" df['up_ratio_20d'] = df['up_ratio'].rolling(window=20).mean() # 过去20天上涨比例\n",
"\n",
" # 5. 情绪因子2成交量变化率Volume Change Rate\n",
" df['volume_mean'] = df['vol'].rolling(window=20).mean() # 过去20天的平均成交量\n",
" df['volume_change_rate'] = (df['vol'] - df['volume_mean']) / df['volume_mean'] * 100 # 成交量变化率\n",
"\n",
" # 6. 情绪因子3波动率Volatility\n",
" df['volatility'] = df['daily_return'].rolling(window=20).std() # 过去20天的日收益率标准差\n",
"\n",
" # 7. 情绪因子4成交额变化率Amount Change Rate\n",
" df['amount_mean'] = df['amount'].rolling(window=20).mean() # 过去20天的平均成交额\n",
" df['amount_change_rate'] = (df['amount'] - df['amount_mean']) / df['amount_mean'] * 100 # 成交额变化率\n",
"\n",
" # df = sentiment_panic_greed_index(df)\n",
" # df = sentiment_market_breadth_proxy(df)\n",
" # df = sentiment_reversal_indicator(df)\n",
"\n",
" return df\n",
"\n",
"\n",
"def generate_index_indicators(h5_filename):\n",
" df = pd.read_hdf(h5_filename, key='index_data')\n",
" df['trade_date'] = pd.to_datetime(df['trade_date'], format='%Y%m%d')\n",
" df = df.sort_values('trade_date')\n",
"\n",
" # 计算每个ts_code的相关指标\n",
" df_indicators = []\n",
" for ts_code in df['ts_code'].unique():\n",
" df_index = df[df['ts_code'] == ts_code].copy()\n",
" df_index = calculate_indicators(df_index)\n",
" df_indicators.append(df_index)\n",
"\n",
" # 合并所有指数的结果\n",
" df_all_indicators = pd.concat(df_indicators, ignore_index=True)\n",
"\n",
" # 保留trade_date列并将同一天的数据按ts_code合并成一行\n",
" df_final = df_all_indicators.pivot_table(\n",
" index='trade_date',\n",
" columns='ts_code',\n",
" values=['daily_return', \n",
" 'RSI', 'MACD', 'Signal_line', 'MACD_hist', \n",
" # 'sentiment_panic_greed_index',\n",
" 'up_ratio_20d', 'volume_change_rate', 'volatility',\n",
" 'amount_change_rate', 'amount_mean'],\n",
" aggfunc='last'\n",
" )\n",
"\n",
" df_final.columns = [f\"{col[1]}_{col[0]}\" for col in df_final.columns]\n",
" df_final = df_final.reset_index()\n",
"\n",
" return df_final\n",
"\n",
"\n",
"# 使用函数\n",
"h5_filename = '/mnt/d/PyProject/NewStock/data/index_data.h5'\n",
"index_data = generate_index_indicators(h5_filename)\n",
"index_data = index_data.dropna()\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "a735bc02ceb4d872",
"metadata": {
"ExecuteTime": {
"end_time": "2025-10-13T13:43:40.021998300Z",
"start_time": "2025-04-03T12:47:10.751831Z"
}
},
"outputs": [],
"source": [
"import talib\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "53f86ddc0677a6d7",
"metadata": {
"ExecuteTime": {
"end_time": "2025-10-13T13:43:40.021998300Z",
"start_time": "2025-04-03T12:47:10.826179Z"
},
"jupyter": {
"source_hidden": true
},
"scrolled": true
},
"outputs": [],
"source": [
"from main.utils.factor import get_act_factor\n",
"\n",
"\n",
"def read_industry_data(h5_filename):\n",
" # 读取 H5 文件中所有的行业数据\n",
" industry_data = pd.read_hdf(h5_filename, key='sw_daily', columns=[\n",
" 'ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'pe', 'pb', 'vol'\n",
" ]) # 假设 H5 文件的键是 'industry_data'\n",
" industry_data = industry_data.sort_values(by=['ts_code', 'trade_date'])\n",
" industry_data = industry_data.reindex()\n",
" industry_data['trade_date'] = pd.to_datetime(industry_data['trade_date'], format='%Y%m%d')\n",
"\n",
" grouped = industry_data.groupby('ts_code', group_keys=False)\n",
" industry_data['obv'] = grouped.apply(\n",
" lambda x: pd.Series(talib.OBV(x['close'].values, x['vol'].values), index=x.index)\n",
" )\n",
" industry_data['return_5'] = grouped['close'].apply(lambda x: x / x.shift(5) - 1)\n",
" industry_data['return_20'] = grouped['close'].apply(lambda x: x / x.shift(20) - 1)\n",
"\n",
" industry_data = get_act_factor(industry_data, cat=False)\n",
" industry_data = industry_data.sort_values(by=['trade_date', 'ts_code'])\n",
"\n",
" # # 计算每天每个 ts_code 的因子和当天所有 ts_code 的中位数的偏差\n",
" # factor_columns = ['obv', 'return_5', 'return_20', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4'] # 因子列\n",
" # \n",
" # for factor in factor_columns:\n",
" # if factor in industry_data.columns:\n",
" # # 计算每天每个 ts_code 的因子值与当天所有 ts_code 的中位数的偏差\n",
" # industry_data[f'{factor}_deviation'] = industry_data.groupby('trade_date')[factor].transform(\n",
" # lambda x: x - x.mean())\n",
"\n",
" industry_data['return_5_percentile'] = industry_data.groupby('trade_date')['return_5'].transform(\n",
" lambda x: x.rank(pct=True))\n",
" industry_data['return_20_percentile'] = industry_data.groupby('trade_date')['return_20'].transform(\n",
" lambda x: x.rank(pct=True))\n",
"\n",
" # cs_rank_intraday_range(industry_data)\n",
" # cs_rank_close_pos_in_range(industry_data)\n",
"\n",
" industry_data = industry_data.drop(columns=['open', 'close', 'high', 'low', 'pe', 'pb', 'vol'])\n",
"\n",
" industry_data = industry_data.rename(\n",
" columns={col: f'industry_{col}' for col in industry_data.columns if col not in ['ts_code', 'trade_date']})\n",
"\n",
" industry_data = industry_data.rename(columns={'ts_code': 'cat_l2_code'})\n",
" return industry_data\n",
"\n",
"\n",
"industry_df = read_industry_data('/mnt/d/PyProject/NewStock/data/sw_daily.h5')\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "dbe2fd8021b9417f",
"metadata": {
"ExecuteTime": {
"end_time": "2025-10-13T13:43:40.055745300Z",
"start_time": "2025-04-03T12:47:15.963327Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['ts_code', 'open', 'close', 'high', 'low', 'amount', 'circ_mv', 'total_mv', 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct', 'cost_50pct', 'cost_85pct', 'cost_95pct', 'weight_avg', 'in_date']\n"
]
}
],
"source": [
"origin_columns = df.columns.tolist()\n",
"origin_columns = [col for col in origin_columns if\n",
" col not in ['turnover_rate', 'pe_ttm', 'volume_ratio', 'vol', 'pct_chg', 'l2_code', 'winner_rate']]\n",
"origin_columns = [col for col in origin_columns if col not in index_data.columns]\n",
"origin_columns = [col for col in origin_columns if 'cyq' not in col]\n",
"print(origin_columns)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "85c3e3d0235ffffa",
"metadata": {
"ExecuteTime": {
"end_time": "2025-10-13T13:43:40.056256100Z",
"start_time": "2025-04-03T12:47:15.990101Z"
}
},
"outputs": [],
"source": [
"fina_indicator_df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/fina_indicator.h5', key='fina_indicator',\n",
" columns=['ts_code', 'ann_date', 'undist_profit_ps', 'ocfps', 'bps', 'roa', 'roe'],\n",
" df=None)\n",
"cashflow_df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/cashflow.h5', key='cashflow',\n",
" columns=['ts_code', 'ann_date', 'n_cashflow_act'],\n",
" df=None)\n",
"balancesheet_df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/balancesheet.h5', key='balancesheet',\n",
" columns=['ts_code', 'ann_date', 'money_cap', 'total_liab'],\n",
" df=None)\n",
"top_list_df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/top_list.h5', key='top_list',\n",
" columns=['ts_code', 'trade_date', 'reason'],\n",
" df=None)\n",
"\n",
"top_list_df = top_list_df.sort_values(by='trade_date', ascending=False).drop_duplicates(subset=['ts_code', 'trade_date'], keep='first').sort_values(by='trade_date')\n",
"\n",
"stk_holdertrade_df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/stk_holdertrade.h5', key='stk_holdertrade',\n",
" columns=['ts_code', 'ann_date', 'in_de', 'change_ratio', 'after_ratio'],\n",
" df=None)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "92d84ce15a562ec6",
"metadata": {
"ExecuteTime": {
"end_time": "2025-10-13T13:43:40.057271300Z",
"start_time": "2025-04-03T12:47:16.121802Z"
}
},
"outputs": [],
"source": [
"\n",
"import numpy as np\n",
"import polars as pl\n",
"from main.factor.factor import *\n",
"from main.factor.money_factor import *\n",
"\n",
"\n",
"def filter_data(df):\n",
" # df = df.groupby('trade_date').apply(lambda x: x.nlargest(1000, 'act_factor1'))\n",
" df = df[df['trade_date'] <= '2025-06-01']\n",
" df = df[~df['is_st']]\n",
" df = df[~df['ts_code'].str.endswith('BJ')]\n",
" df = df[~df['ts_code'].str.startswith('30')]\n",
" df = df[~df['ts_code'].str.startswith('68')]\n",
" df = df[~df['ts_code'].str.startswith('8')]\n",
" df = df[df['trade_date'] >= '2019-01-01']\n",
" if 'in_date' in df.columns:\n",
" df = df.drop(columns=['in_date'])\n",
" df = df.reset_index(drop=True)\n",
" return df\n",
"\n",
"gc.collect()\n",
"\n",
"df = filter_data(df)\n",
"df = df.sort_values(by=['ts_code', 'trade_date'])\n",
"\n",
"df = pl.from_dataframe(df)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a2b3d9e6",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Applying momentum factors: 0%| | 0/10 [00:00<?, ?it/s]\n"
]
},
{
"ename": "DuplicateError",
"evalue": "column with name 'price_minus_deduction_price_10_right' already exists\n\nYou may want to try:\n- renaming the column prior to joining\n- using the `suffix` parameter to specify a suffix different to the default one ('_right')",
"output_type": "error",
"traceback": [
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
"\u001b[31mDuplicateError\u001b[39m Traceback (most recent call last)",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[14]\u001b[39m\u001b[32m, line 4\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mmain\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mfactor\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mpolars_momentum_factors\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m apply_momentum_factors\n\u001b[32m----> \u001b[39m\u001b[32m4\u001b[39m df = \u001b[43mapply_momentum_factors\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 6\u001b[39m \u001b[38;5;28mprint\u001b[39m(df.columns)\n",
"\u001b[36mFile \u001b[39m\u001b[32m/mnt/d/PyProject/NewStock/main/factor/polars_momentum_factors.py:408\u001b[39m, in \u001b[36mapply_momentum_factors\u001b[39m\u001b[34m(df, operators)\u001b[39m\n\u001b[32m 406\u001b[39m result_df = df\n\u001b[32m 407\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m operator \u001b[38;5;129;01min\u001b[39;00m tqdm(operators, desc=\u001b[33m\"\u001b[39m\u001b[33mApplying momentum factors\u001b[39m\u001b[33m\"\u001b[39m):\n\u001b[32m--> \u001b[39m\u001b[32m408\u001b[39m result_df = \u001b[43moperator\u001b[49m\u001b[43m.\u001b[49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresult_df\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 410\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m result_df\n",
"\u001b[36mFile \u001b[39m\u001b[32m/mnt/d/PyProject/NewStock/main/factor/operator_framework.py:58\u001b[39m, in \u001b[36mBaseOperator.apply\u001b[39m\u001b[34m(self, df, return_type, **kwargs)\u001b[39m\n\u001b[32m 55\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33m缺少必需列\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m.required_columns\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n\u001b[32m 57\u001b[39m long_table = \u001b[38;5;28mself\u001b[39m._sectional_roll(df, **kwargs) \u001b[38;5;66;03m# ① 滚动\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m58\u001b[39m merged = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_merge_factor\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlong_table\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# ② 合并\u001b[39;00m\n\u001b[32m 59\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m merged \u001b[38;5;28;01mif\u001b[39;00m return_type == \u001b[33m'\u001b[39m\u001b[33mdf\u001b[39m\u001b[33m'\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m merged[\u001b[38;5;28mself\u001b[39m.get_factor_name()]\n",
"\u001b[36mFile \u001b[39m\u001b[32m/mnt/d/PyProject/NewStock/main/factor/operator_framework.py:76\u001b[39m, in \u001b[36mBaseOperator._merge_factor\u001b[39m\u001b[34m(self, original, factor_table)\u001b[39m\n\u001b[32m 74\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"按 [ts_code, trade_date] 左联,原地追加因子列\"\"\"\u001b[39;00m\n\u001b[32m 75\u001b[39m factor_name = \u001b[38;5;28mself\u001b[39m.get_factor_name()\n\u001b[32m---> \u001b[39m\u001b[32m76\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43moriginal\u001b[49m\u001b[43m.\u001b[49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfactor_table\u001b[49m\u001b[43m.\u001b[49m\u001b[43mselect\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mts_code\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mtrade_date\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfactor_name\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 77\u001b[39m \u001b[43m \u001b[49m\u001b[43mon\u001b[49m\u001b[43m=\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mts_code\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mtrade_date\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 78\u001b[39m \u001b[43m \u001b[49m\u001b[43mhow\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mleft\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/miniconda3/envs/stock/lib/python3.13/site-packages/polars/_utils/deprecation.py:128\u001b[39m, in \u001b[36mdeprecate_renamed_parameter.<locals>.decorate.<locals>.wrapper\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 123\u001b[39m \u001b[38;5;129m@wraps\u001b[39m(function)\n\u001b[32m 124\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mwrapper\u001b[39m(*args: P.args, **kwargs: P.kwargs) -> T:\n\u001b[32m 125\u001b[39m _rename_keyword_argument(\n\u001b[32m 126\u001b[39m old_name, new_name, kwargs, function.\u001b[34m__qualname__\u001b[39m, version\n\u001b[32m 127\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m128\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunction\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/miniconda3/envs/stock/lib/python3.13/site-packages/polars/dataframe/frame.py:7842\u001b[39m, in \u001b[36mDataFrame.join\u001b[39m\u001b[34m(self, other, on, how, left_on, right_on, suffix, validate, nulls_equal, coalesce, maintain_order)\u001b[39m\n\u001b[32m 7824\u001b[39m require_same_type(\u001b[38;5;28mself\u001b[39m, other)\n\u001b[32m 7826\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mpolars\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mlazyframe\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mopt_flags\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m QueryOptFlags\n\u001b[32m 7828\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m (\n\u001b[32m 7829\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mlazy\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 7830\u001b[39m \u001b[43m \u001b[49m\u001b[43m.\u001b[49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 7831\u001b[39m \u001b[43m \u001b[49m\u001b[43mother\u001b[49m\u001b[43m=\u001b[49m\u001b[43mother\u001b[49m\u001b[43m.\u001b[49m\u001b[43mlazy\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 7832\u001b[39m \u001b[43m \u001b[49m\u001b[43mleft_on\u001b[49m\u001b[43m=\u001b[49m\u001b[43mleft_on\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 7833\u001b[39m \u001b[43m \u001b[49m\u001b[43mright_on\u001b[49m\u001b[43m=\u001b[49m\u001b[43mright_on\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 7834\u001b[39m \u001b[43m \u001b[49m\u001b[43mon\u001b[49m\u001b[43m=\u001b[49m\u001b[43mon\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 7835\u001b[39m \u001b[43m \u001b[49m\u001b[43mhow\u001b[49m\u001b[43m=\u001b[49m\u001b[43mhow\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 7836\u001b[39m \u001b[43m \u001b[49m\u001b[43msuffix\u001b[49m\u001b[43m=\u001b[49m\u001b[43msuffix\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 7837\u001b[39m \u001b[43m \u001b[49m\u001b[43mvalidate\u001b[49m\u001b[43m=\u001b[49m\u001b[43mvalidate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 7838\u001b[39m \u001b[43m \u001b[49m\u001b[43mnulls_equal\u001b[49m\u001b[43m=\u001b[49m\u001b[43mnulls_equal\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 7839\u001b[39m \u001b[43m \u001b[49m\u001b[43mcoalesce\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcoalesce\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 7840\u001b[39m \u001b[43m \u001b[49m\u001b[43mmaintain_order\u001b[49m\u001b[43m=\u001b[49m\u001b[43mmaintain_order\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 7841\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m-> \u001b[39m\u001b[32m7842\u001b[39m \u001b[43m \u001b[49m\u001b[43m.\u001b[49m\u001b[43mcollect\u001b[49m\u001b[43m(\u001b[49m\u001b[43moptimizations\u001b[49m\u001b[43m=\u001b[49m\u001b[43mQueryOptFlags\u001b[49m\u001b[43m.\u001b[49m\u001b[43m_eager\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 7843\u001b[39m )\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/miniconda3/envs/stock/lib/python3.13/site-packages/polars/_utils/deprecation.py:97\u001b[39m, in \u001b[36mdeprecate_streaming_parameter.<locals>.decorate.<locals>.wrapper\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 93\u001b[39m kwargs[\u001b[33m\"\u001b[39m\u001b[33mengine\u001b[39m\u001b[33m\"\u001b[39m] = \u001b[33m\"\u001b[39m\u001b[33min-memory\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 95\u001b[39m \u001b[38;5;28;01mdel\u001b[39;00m kwargs[\u001b[33m\"\u001b[39m\u001b[33mstreaming\u001b[39m\u001b[33m\"\u001b[39m]\n\u001b[32m---> \u001b[39m\u001b[32m97\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunction\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/miniconda3/envs/stock/lib/python3.13/site-packages/polars/lazyframe/opt_flags.py:331\u001b[39m, in \u001b[36mforward_old_opt_flags.<locals>.decorate.<locals>.wrapper\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 328\u001b[39m optflags = cb(optflags, kwargs.pop(key)) \u001b[38;5;66;03m# type: ignore[no-untyped-call,unused-ignore]\u001b[39;00m\n\u001b[32m 330\u001b[39m kwargs[\u001b[33m\"\u001b[39m\u001b[33moptimizations\u001b[39m\u001b[33m\"\u001b[39m] = optflags\n\u001b[32m--> \u001b[39m\u001b[32m331\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunction\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/miniconda3/envs/stock/lib/python3.13/site-packages/polars/lazyframe/frame.py:2300\u001b[39m, in \u001b[36mLazyFrame.collect\u001b[39m\u001b[34m(self, type_coercion, predicate_pushdown, projection_pushdown, simplify_expression, slice_pushdown, comm_subplan_elim, comm_subexpr_elim, cluster_with_columns, collapse_joins, no_optimization, engine, background, optimizations, **_kwargs)\u001b[39m\n\u001b[32m 2298\u001b[39m \u001b[38;5;66;03m# Only for testing purposes\u001b[39;00m\n\u001b[32m 2299\u001b[39m callback = _kwargs.get(\u001b[33m\"\u001b[39m\u001b[33mpost_opt_callback\u001b[39m\u001b[33m\"\u001b[39m, callback)\n\u001b[32m-> \u001b[39m\u001b[32m2300\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m wrap_df(\u001b[43mldf\u001b[49m\u001b[43m.\u001b[49m\u001b[43mcollect\u001b[49m\u001b[43m(\u001b[49m\u001b[43mengine\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcallback\u001b[49m\u001b[43m)\u001b[49m)\n",
"\u001b[31mDuplicateError\u001b[39m: column with name 'price_minus_deduction_price_10_right' already exists\n\nYou may want to try:\n- renaming the column prior to joining\n- using the `suffix` parameter to specify a suffix different to the default one ('_right')"
]
}
],
"source": [
"from main.factor.polars_momentum_factors import apply_momentum_factors\n",
"\n",
"\n",
"df = apply_momentum_factors(df)\n",
"\n",
"print(df)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "stock",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}