2025-09-16 09:59:38 +08:00
|
|
|
|
{
|
|
|
|
|
|
"cells": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
"id": "initial_id",
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
"collapsed": true,
|
|
|
|
|
|
"ExecuteTime": {
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"end_time": "2026-01-12T15:24:45.986127Z",
|
|
|
|
|
|
"start_time": "2026-01-12T15:24:45.967595Z"
|
2025-09-16 09:59:38 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
"import matplotlib.pyplot as plt\n",
|
|
|
|
|
|
"import seaborn as sns\n",
|
2025-11-07 16:26:00 +08:00
|
|
|
|
"\n",
|
2025-11-20 16:10:16 +08:00
|
|
|
|
"import pandas as pd\n",
|
2025-09-16 09:59:38 +08:00
|
|
|
|
"\n",
|
|
|
|
|
|
"import warnings\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# 忽略所有警告\n",
|
|
|
|
|
|
"warnings.filterwarnings(\"ignore\")\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# --- 0. Configure your file path ---\n",
|
|
|
|
|
|
"# Please replace 'your_futures_data.csv' with the actual path to your CSV file\n",
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"file_path = 'D:/PyProject/NewQuant/data/data/KQ_m@SHFE_rb/KQ_m@SHFE_rb_min15.csv'\n",
|
2025-09-16 09:59:38 +08:00
|
|
|
|
"\n",
|
|
|
|
|
|
"sns.set(style='whitegrid')\n",
|
|
|
|
|
|
"plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签\n",
|
|
|
|
|
|
"plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号\n"
|
|
|
|
|
|
],
|
|
|
|
|
|
"outputs": [],
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"execution_count": 5
|
2025-09-16 09:59:38 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"end_time": "2026-01-12T15:24:46.032712Z",
|
|
|
|
|
|
"start_time": "2026-01-12T15:24:45.995213Z"
|
2025-09-16 09:59:38 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# --- 1. Data Loading and Preprocessing ---\n",
|
|
|
|
|
|
"def load_and_preprocess_data(file_path):\n",
|
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
|
" Loads historical futures data and performs basic preprocessing.\n",
|
|
|
|
|
|
" Assumes data contains 'datetime', 'open', 'high', 'low', 'close', 'volume' columns.\n",
|
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
|
" try:\n",
|
|
|
|
|
|
" df = pd.read_csv(file_path, parse_dates=['datetime'], index_col='datetime')\n",
|
|
|
|
|
|
" # Ensure data is sorted by time\n",
|
|
|
|
|
|
" df = df.sort_index()\n",
|
|
|
|
|
|
" # Check and handle missing values\n",
|
|
|
|
|
|
" initial_rows = len(df)\n",
|
|
|
|
|
|
" df.dropna(inplace=True)\n",
|
|
|
|
|
|
" if len(df) < initial_rows:\n",
|
|
|
|
|
|
" print(f\"Warning: Missing values found in data, deleted {initial_rows - len(df)} rows.\")\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # Check if necessary columns exist\n",
|
|
|
|
|
|
" required_columns = ['open', 'high', 'low', 'close', 'volume']\n",
|
|
|
|
|
|
" if not all(col in df.columns for col in required_columns):\n",
|
|
|
|
|
|
" raise ValueError(f\"CSV file is missing required columns. Please ensure it contains: {required_columns}\")\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" print(f\"Successfully loaded {len(df)} rows of data.\")\n",
|
|
|
|
|
|
" print(\"First 5 rows of data:\")\n",
|
|
|
|
|
|
" print(df.head())\n",
|
|
|
|
|
|
" return df\n",
|
|
|
|
|
|
" except FileNotFoundError:\n",
|
|
|
|
|
|
" print(f\"Error: File '{file_path}' not found. Please check the path.\")\n",
|
|
|
|
|
|
" return None\n",
|
|
|
|
|
|
" except Exception as e:\n",
|
|
|
|
|
|
" print(f\"Error during data loading or preprocessing: {e}\")\n",
|
|
|
|
|
|
" return None\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"df_raw = load_and_preprocess_data(file_path)\n",
|
2025-11-20 16:10:16 +08:00
|
|
|
|
"print(df_raw)\n",
|
|
|
|
|
|
"# df_df_raw = df_df_raw[df_df_raw.index >= '2024-01-01']"
|
2025-09-16 09:59:38 +08:00
|
|
|
|
],
|
|
|
|
|
|
"id": "1638e05ca7ef1ac8",
|
|
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"Successfully loaded 27060 rows of data.\n",
|
2025-09-16 09:59:38 +08:00
|
|
|
|
"First 5 rows of data:\n",
|
2026-01-25 23:26:03 +08:00
|
|
|
|
" open high low close volume open_oi \\\n",
|
|
|
|
|
|
"datetime \n",
|
|
|
|
|
|
"2020-12-31 14:45:00 4352.0 4400.0 4345.0 4388.0 213731.0 1221661.0 \n",
|
|
|
|
|
|
"2021-01-04 09:00:00 4356.0 4368.0 4309.0 4336.0 338332.0 1217327.0 \n",
|
|
|
|
|
|
"2021-01-04 09:15:00 4336.0 4342.0 4307.0 4318.0 144479.0 1197881.0 \n",
|
|
|
|
|
|
"2021-01-04 09:30:00 4318.0 4329.0 4312.0 4317.0 85679.0 1194567.0 \n",
|
|
|
|
|
|
"2021-01-04 09:45:00 4317.0 4338.0 4316.0 4338.0 66461.0 1194592.0 \n",
|
2025-09-24 23:14:14 +08:00
|
|
|
|
"\n",
|
2026-01-25 23:26:03 +08:00
|
|
|
|
" close_oi underlying_symbol \n",
|
|
|
|
|
|
"datetime \n",
|
|
|
|
|
|
"2020-12-31 14:45:00 1217327.0 SHFE.rb2105 \n",
|
|
|
|
|
|
"2021-01-04 09:00:00 1197881.0 SHFE.rb2105 \n",
|
|
|
|
|
|
"2021-01-04 09:15:00 1194567.0 SHFE.rb2105 \n",
|
|
|
|
|
|
"2021-01-04 09:30:00 1194592.0 SHFE.rb2105 \n",
|
|
|
|
|
|
"2021-01-04 09:45:00 1198035.0 SHFE.rb2105 \n",
|
2025-11-20 16:10:16 +08:00
|
|
|
|
" open high low close volume open_oi \\\n",
|
|
|
|
|
|
"datetime \n",
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"2020-12-31 14:45:00 4352.0 4400.0 4345.0 4388.0 213731.0 1221661.0 \n",
|
|
|
|
|
|
"2021-01-04 09:00:00 4356.0 4368.0 4309.0 4336.0 338332.0 1217327.0 \n",
|
|
|
|
|
|
"2021-01-04 09:15:00 4336.0 4342.0 4307.0 4318.0 144479.0 1197881.0 \n",
|
|
|
|
|
|
"2021-01-04 09:30:00 4318.0 4329.0 4312.0 4317.0 85679.0 1194567.0 \n",
|
|
|
|
|
|
"2021-01-04 09:45:00 4317.0 4338.0 4316.0 4338.0 66461.0 1194592.0 \n",
|
2025-11-20 16:10:16 +08:00
|
|
|
|
"... ... ... ... ... ... ... \n",
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"2025-11-27 13:30:00 3090.0 3095.0 3088.0 3092.0 56214.0 1119421.0 \n",
|
|
|
|
|
|
"2025-11-27 13:45:00 3092.0 3093.0 3088.0 3090.0 22872.0 1104638.0 \n",
|
|
|
|
|
|
"2025-11-27 14:00:00 3090.0 3093.0 3087.0 3092.0 24689.0 1100864.0 \n",
|
|
|
|
|
|
"2025-11-27 14:15:00 3092.0 3098.0 3091.0 3098.0 47317.0 1096482.0 \n",
|
|
|
|
|
|
"2025-11-27 14:30:00 3098.0 3099.0 3091.0 3091.0 52143.0 1083650.0 \n",
|
2025-11-20 16:10:16 +08:00
|
|
|
|
"\n",
|
|
|
|
|
|
" close_oi underlying_symbol \n",
|
|
|
|
|
|
"datetime \n",
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"2020-12-31 14:45:00 1217327.0 SHFE.rb2105 \n",
|
|
|
|
|
|
"2021-01-04 09:00:00 1197881.0 SHFE.rb2105 \n",
|
|
|
|
|
|
"2021-01-04 09:15:00 1194567.0 SHFE.rb2105 \n",
|
|
|
|
|
|
"2021-01-04 09:30:00 1194592.0 SHFE.rb2105 \n",
|
|
|
|
|
|
"2021-01-04 09:45:00 1198035.0 SHFE.rb2105 \n",
|
2025-11-20 16:10:16 +08:00
|
|
|
|
"... ... ... \n",
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"2025-11-27 13:30:00 1104638.0 SHFE.rb2601 \n",
|
|
|
|
|
|
"2025-11-27 13:45:00 1100864.0 SHFE.rb2601 \n",
|
|
|
|
|
|
"2025-11-27 14:00:00 1096482.0 SHFE.rb2601 \n",
|
|
|
|
|
|
"2025-11-27 14:15:00 1083650.0 SHFE.rb2601 \n",
|
|
|
|
|
|
"2025-11-27 14:30:00 1075642.0 SHFE.rb2601 \n",
|
2025-11-20 16:10:16 +08:00
|
|
|
|
"\n",
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"[27060 rows x 8 columns]\n"
|
2025-09-24 23:14:14 +08:00
|
|
|
|
]
|
2025-09-20 00:04:51 +08:00
|
|
|
|
}
|
|
|
|
|
|
],
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"execution_count": 6
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
"outputs": [],
|
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
|
"source": "",
|
|
|
|
|
|
"id": "3c6eed6176b3362d"
|
2025-09-20 00:04:51 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"end_time": "2026-01-12T15:24:49.028370Z",
|
|
|
|
|
|
"start_time": "2026-01-12T15:24:46.038734Z"
|
2025-09-20 00:04:51 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
|
|
|
|
|
"cell_type": "code",
|
2025-09-24 23:14:14 +08:00
|
|
|
|
"source": [
|
2025-11-20 16:10:16 +08:00
|
|
|
|
"import pandas as pd\n",
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"import numpy as np\n",
|
2025-12-16 00:36:36 +08:00
|
|
|
|
"import talib\n",
|
2025-09-24 23:14:14 +08:00
|
|
|
|
"import matplotlib.pyplot as plt\n",
|
|
|
|
|
|
"import seaborn as sns\n",
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"from typing import Tuple\n",
|
2025-12-16 00:36:36 +08:00
|
|
|
|
"\n",
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"# 配置绘图\n",
|
|
|
|
|
|
"plt.rcParams['font.sans-serif'] = ['SimHei']\n",
|
|
|
|
|
|
"plt.rcParams['axes.unicode_minus'] = False\n",
|
2025-11-20 16:10:16 +08:00
|
|
|
|
"\n",
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"def run_threshold_discovery_analysis(df_raw: pd.DataFrame,\n",
|
|
|
|
|
|
" lookback: int = 20,\n",
|
|
|
|
|
|
" forward_window: int = 10,\n",
|
|
|
|
|
|
" stop_loss_atr: float = 1.5):\n",
|
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
|
" 完整的阈值发现分析流程\n",
|
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
|
" df = df_raw.copy()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 1. 计算卡尔曼滤波 (复现策略逻辑)\n",
|
|
|
|
|
|
" # 参数采用你原代码中的默认值\n",
|
|
|
|
|
|
" q, r = 0.01, 0.5\n",
|
|
|
|
|
|
" df['atr'] = talib.ATR(df['high'], df['low'], df['close'], timeperiod=lookback)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" x_hat = df['close'].iloc[0]\n",
|
|
|
|
|
|
" p = 1.0\n",
|
|
|
|
|
|
" kalman_prices = []\n",
|
|
|
|
|
|
" for close in df['close']:\n",
|
|
|
|
|
|
" x_hat_minus = x_hat\n",
|
|
|
|
|
|
" p_minus = p + q\n",
|
|
|
|
|
|
" k = p_minus / (p_minus + r)\n",
|
|
|
|
|
|
" x_hat = x_hat_minus + k * (close - x_hat_minus)\n",
|
|
|
|
|
|
" p = (1 - k) * p_minus\n",
|
|
|
|
|
|
" kalman_prices.append(x_hat)\n",
|
|
|
|
|
|
" df['kalman'] = kalman_prices\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 2. 特征工程:我们观察这三个变量的组合\n",
|
|
|
|
|
|
" # A. 核心自变量:偏离度 (Deviation in ATR)\n",
|
|
|
|
|
|
" df['dev_atr'] = (df['close'] - df['kalman']) / df['atr']\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # B. 环境维度1:波动率相对位置 (当前ATR / 过去N天平均ATR)\n",
|
|
|
|
|
|
" # 反映当前是属于“波动收缩”还是“波动扩张”\n",
|
|
|
|
|
|
" df['vol_ratio'] = df['atr'] / df['atr'].rolling(100).mean()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # C. 环境维度2:区间位置 (Price Channel Position)\n",
|
|
|
|
|
|
" # 当前价在过去N天最高最低价中的百分比。0表示在底部,1表示在顶部。\n",
|
|
|
|
|
|
" # 这取代了不稳定的“斜率”,反映了趋势的成熟度。\n",
|
|
|
|
|
|
" rolling_min = df['low'].rolling(lookback).min()\n",
|
|
|
|
|
|
" rolling_max = df['high'].rolling(lookback).max()\n",
|
|
|
|
|
|
" df['range_pos'] = (df['close'] - rolling_min) / (rolling_max - rolling_min + 1e-9)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 3. 标签工程:实盘绩效评估 (Labeling)\n",
|
|
|
|
|
|
" # 我们计算:如果在当前点开多仓,未来 N 天内的“最大可能盈利”与“固定止损”的比值\n",
|
|
|
|
|
|
" # 这是最接近实盘“盈亏比预期”的指标\n",
|
|
|
|
|
|
" df['fwd_max_profit'] = df['high'].shift(-forward_window).rolling(forward_window).max() - df['close']\n",
|
|
|
|
|
|
" df['risk'] = df['atr'] * stop_loss_atr\n",
|
|
|
|
|
|
" # 盈亏比分数:越高代表该点位开仓越有效\n",
|
|
|
|
|
|
" df['efficiency_score'] = df['fwd_max_profit'] / df['risk']\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 过滤掉未来先碰止损的情况 (简化实盘回撤风险)\n",
|
|
|
|
|
|
" df['fwd_min_low'] = df['low'].shift(-forward_window).rolling(forward_window).min()\n",
|
|
|
|
|
|
" df.loc[df['fwd_min_low'] < (df['close'] - df['risk']), 'efficiency_score'] = -1\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 4. 可视化阶段\n",
|
|
|
|
|
|
" perform_visual_analysis(df)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" return df\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"def perform_visual_analysis(df):\n",
|
|
|
|
|
|
" # 清理数据\n",
|
|
|
|
|
|
" plot_df = df.dropna().copy()\n",
|
|
|
|
|
|
" # 我们只关注偏离度 > 0 的情况(做多分析,做空同理反向即可)\n",
|
|
|
|
|
|
" plot_df = plot_df[plot_df['dev_atr'] > 0]\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" fig = plt.figure(figsize=(20, 12))\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # --- 图1:偏离度 vs 效率得分 (寻找基础阈值) ---\n",
|
|
|
|
|
|
" ax1 = fig.add_subplot(2, 2, 1)\n",
|
|
|
|
|
|
" sns.regplot(x='dev_atr', y='efficiency_score', data=plot_df,\n",
|
|
|
|
|
|
" scatter_kws={'alpha':0.1}, line_kws={'color':'red'}, ax=ax1)\n",
|
|
|
|
|
|
" ax1.set_title('偏离度(ATR) 与 未来盈亏比效率 的直接关系')\n",
|
|
|
|
|
|
" ax1.set_ylim(-1.5, 5)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # --- 图2:热力图 - 波动率环境 vs 偏离度阈值 ---\n",
|
|
|
|
|
|
" ax2 = fig.add_subplot(2, 2, 2)\n",
|
|
|
|
|
|
" # 将连续变量分箱(Binning)\n",
|
|
|
|
|
|
" plot_df['vol_bin'] = pd.qcut(plot_df['vol_ratio'], 5, labels=['极低波', '低波', '中波', '高波', '极高波'])\n",
|
|
|
|
|
|
" plot_df['dev_bin'] = pd.cut(plot_df['dev_atr'], bins=np.linspace(0, 4, 20))\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" pivot_vol = plot_df.pivot_table(index='vol_bin', columns='dev_bin',\n",
|
|
|
|
|
|
" values='efficiency_score', aggfunc='mean')\n",
|
|
|
|
|
|
" sns.heatmap(pivot_vol, cmap='RdYlGn', ax=ax2)\n",
|
|
|
|
|
|
" ax2.set_title('热力图:在不同波动率环境下,多大的偏离度最有效?')\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # --- 图3:热力图 - 区间位置 vs 偏离度阈值 ---\n",
|
|
|
|
|
|
" ax3 = fig.add_subplot(2, 2, 3)\n",
|
|
|
|
|
|
" plot_df['range_bin'] = pd.cut(plot_df['range_pos'], bins=np.linspace(0, 1, 10))\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" pivot_range = plot_df.pivot_table(index='range_bin', columns='dev_bin',\n",
|
|
|
|
|
|
" values='efficiency_score', aggfunc='mean')\n",
|
|
|
|
|
|
" sns.heatmap(pivot_range, cmap='RdYlGn', ax=ax3)\n",
|
|
|
|
|
|
" ax3.set_title('热力图:在不同区间位置(0=底, 1=顶)下,多大的偏离度最有效?')\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # --- 图4:联合分布 - 寻找最优“甜点区” (Sweet Spot) ---\n",
|
|
|
|
|
|
" ax4 = fig.add_subplot(2, 2, 4)\n",
|
|
|
|
|
|
" # 筛选出高效率的样本\n",
|
|
|
|
|
|
" high_eff = plot_df[plot_df['efficiency_score'] > 2]\n",
|
|
|
|
|
|
" sns.kdeplot(x=high_eff['dev_atr'], y=high_eff['range_pos'],\n",
|
|
|
|
|
|
" cmap=\"Blues\", fill=True, ax=ax4)\n",
|
|
|
|
|
|
" ax4.set_title('高胜率样本密度图:偏离度 与 区间位置 的共生关系')\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" plt.tight_layout()\n",
|
|
|
|
|
|
" plt.show()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# 使用示例 (假设你已经有一个包含 OHLC 的 df_raw)\n",
|
|
|
|
|
|
"df_analyzed = run_threshold_discovery_analysis(df_raw)"
|
2025-12-16 00:36:36 +08:00
|
|
|
|
],
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"id": "24e2402d9c065d34",
|
2025-12-16 00:36:36 +08:00
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"data": {
|
|
|
|
|
|
"text/plain": [
|
|
|
|
|
|
"<Figure size 2000x1200 with 6 Axes>"
|
|
|
|
|
|
],
|
|
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAB8AAAASgCAYAAACHXFLNAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjcsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvTLEjVAAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzsvQeYLNtVnr26uzr35BOlqwQCCwwyIht+RDYyYIIxSSIIgQGTMwgwUSRjQBjZZGSiAQMiGgxIJoPI0RIYoXjvPWnydK6u/p931ew5NX26Z3rCOadn5nuleWZOh6pde+/uW3t/61srNxwOhyaEEEIIIYQQQgghhBBCCCGEEEKccfIPuwFCCCGEEEIIIYQQQgghhBBCCCHEaSABXAghhBBCCCGEEEIIIYQQQgghxLlAArgQQgghhBBCCCGEEEIIIYQQQohzgQRwIYQQQgghhBBCCCGEEEIIIYQQ5wIJ4EIIIYQQQgghhBBCCCGEEEIIIc4FEsCFEEIIIYQQQgghhBBCCCGEEEKcCySACyGEEEIIIYQQQgghhBBCCCGEOBdIABdCCCGEEEIIIYQQQgghhBBCCHEukAAuhBBCCCGEEEIIIYQQQgghhBDiXCABXAghhBDiHLO2tmaDweCex3d2duw1r3nNQ2mTOBnb29vW6/XUjUIIIYQQ4kwwbj0ixFlhOBzanTt3xj7393//99btdh94m4QQQhyOBHAhhBAzT6vVuuexxx57zD7zMz/zyAsNRL//+l//q/31X//1idr0JV/yJfbSl770WO+l7bTjQfEP//APp3aszc1N+7Zv+zZ79atfbQ+K//k//6d96Id+6ImO8ad/+qf2l3/5l1O9lrlBn73pTW869OcgEbLT6Ry60ZMkyZHn8A//8A/bq171qqleu76+bu/1Xu9lv/Ebv3HPc6985SvtAz/wA+33f//3j3R+jvWjP/qjdprwebh58+bYPjyOSE+/00bGfVp4LWM6jh/8wR+0P/iDP5g64OAP//AP73n8537u5yZumoRNlX6/b+1227a2tuzxxx/3z9kf//Ef2y/8wi/499aXfumX+pi90zu9k33d133d1NcmhBBCiNniN3/zN+2TP/mT73n8v/yX/2K/+Iu/eKRj/dEf/ZH9m3/zb+xv/uZvDn0t957cZxz1J45ju9+8+MUvts/+7M/2+6Fp7h03NjZ8nci99DQ/vIcgwgDnmfa94eeoAYisY37lV37FTsJf/dVf2Qd90AfZ6173uiO9j/vX3/7t3z70daNrFv7mOrknnTQWvIb3ZfmzP/uziWvEl7zkJX4Pexb5zu/8Tnv00UdPdAzWOV//9V/v669p4TP3+te/fuxzf/d3fzf1McbNWb5jvud7vseOCuvkb/3Wb/V1y3Fhnf0RH/ERvtZ5WPze7/3ekdeYXPcnfdInjX3uy77sy+z5z3/+KbVOCCHEaRKd6tGEEEKIU4bF1Qd/8AfbJ3zCJ+xbVFy7ds3+4i/+wv7P//k/9pznPGfs+1jslcvlfY/X63X7pV/6JRe6nvnMZ+57Liz2K5WK5XK5veOMEzHz+bz9xE/8hH38x3/8vgUmGwGLi4sTr4dNl3//7/+9ffqnf7pvVN1vON8LXvAC+8Zv/EZ79rOffeLj3b59277v+77vVI41LYzJ6urqiY5BmxlXNhUPg/FhHIvFojWbTSuVSv736OYhz/3O7/yOXb16dexxmLNsWB3GP//n/9zF0dH2EmAxTkj9T//pP9lXfuVX2lu91VsdeuylpSX7//6//88330Y/Jwi+nPvd3/3dJ/Y718lnKHwewqbFD/zAD9hHfuRHep+Ozv9Go2FHhQ2zT/u0T/PN4EceeWTf5s5zn/tc74/3fM/3HPteNj8ZI36ybX/Ri15kX/RFX2Rv93Zvd8/3Ap/farW67zhf+7Vfa8973vPsYz7mY8aK/v/3//5fe7d3e7dDr4XvhR/6oR+yn//5n7cnP/nJ/hgbrl/91V/tY8DmyTgILPn+7//+vX+HOcf3ydu8zdv4PHva055m7/zO72yXLl3y7zKuJXvdQgghhDgbcN807v4WARHB+UM+5EOmPhb3adyfZe/XJnHjxg177/d+7yO393u/93s9qPJ+8ru/+7t2/fr1e+67x8E903EEPNZf//k//+c90eqXf/mXj/R+7sV+7dd+be/f//2//3ebn5+3f/tv/+3E4ITl5WUXsCfNA9aa3M8xfiEgksfCvWqhULB//Md/vOfeNYBQzWvot+wcYJ1Cn2bvocPxOW+tVpu4Zvl3/+7f+TrkIOGXe1POkb0HfspTnmLPeMYz9tYRXBf33dy7vuENb/C1AusHns9eI2tMfrgGXj8NXAc/b/u2b2v3i+/+7u+2n/7pnx6733AUuNYf+7Ef8/Xbu7zLu0z1HgIemDe8j+DXrJjOfGNtwTrpIAjg5XWs3d78zd9873HmLGslgnDGfd7CeimKonuug2Mxp1iTZGEswtox+/pRsfwJT3iC/dM//ZP97M/+rO+JZNdo/B5dS/Laf/2v/7U96UlPmvgdR5D+m73Zm9lP/uRP2jTQJ6wRWX8d5buDoBKC6rPrS76v+f79ru/6rkOPQR8hvC8sLPh3nRBCiPuPBHAhhBAzDYt2XKwsOr/qq77KF6DZRfEXfMEX+A+LpX/xL/7F3qIHl/W/+lf/yhfdbAiME7SyrlgWa2EzgIX85cuX9zZXDloYISCOi4weFd4DX/EVX2Hv+q7veo/4jdj5zd/8zS7osygMsDHzspe9bOyxXv7yl/umBJsWAcSyt3/7t3fhj0Xu3Nyci7645XGRXrlyxU4CGxPAYviwqGkWsbThKAIdQiEL2+xim7HhsaxTOkSzs3jMzgc2gNjYG13I8xoW4KMOX45Jf2c3lLKR+f/yX/5L+5qv+Rr7gA/4gH3vQ5glIGOS+A1BbB83/7LXNi6CnvkzaQ7RNwf1KX3DhiDv57q5PvqJBTtz/FnPepZvvDB/Rp1HPM/x6W82BF/4whf6JlVW6A6bdKObaThweD8p4I4Km3pslmXF7xCd/8QnPtFF/Em83/u93z43TxY2OMMmZ5bP+qzPcodRFuZM6FeEeNweYW4xpxDAQyYCrvN93/d97Qu/8AvvOfanfuqnepANn/XglP/VX/1VPxafy0kQAPDRH/3RviEVxG3Ox5wmgIXN09D/B80pIYQQQswu3Hvy33HuC8L9KT/cv3E/w/P8jRDOPQD3RmFdMolwXzBJIB33Wu5V3vIt39L/5nxkmuE+hCDjUd76rd96KlH6KIR1RLjnR8Di/hqha5zrNfRRaPOnfMqnuPhHu7LrBu7REG+5T/ucz/mcfcfg/dl1A/d9rB+//du/fao2sy7kfjDLa1/7Wvupn/opD3z8hm/4Br+Py96XBid1dg3CfTlrNPjf//t/+1p2FNYeo2uJIFiPQhAn6zzu18O9LGuMcK1ZwZV+5J79Hd7hHfbuU1kj8D7WD9yTE6zJ61iDcwz6mOtgbU0GIgKhg9iZJTsWrKdZf45Cf2f/Zm0f5iPjwDGy97mMGf0X+mtc8Dri/ahQe1I4NhkJCMKFcQEprAPpdwKODyOs68Z9vibBWHBs1vaj+yPMhWmC6dlHQHRnTU5A7eiYhfHgehlbvgOA6+aHfh0NSGAsPuMzPmPfY7yfOcHaORvQi4DPZ2QcCMajojFZAr7jO75j32Phe42+Zo006VjjMn+xt0DgRQgwgVu3bvnnkaDncfsZXAfzkD4jgIC9nfD9h3OdwO0/+ZM/8e9q1pTs3xDgkQ0Qop/pk+z3JvP08z7v83yfiraw9vv8z//8sdcjhBDi9JAALoQQYqZhAUWqKRaLLMD4mzTAo+CgZeMhgGjGJgoLExYYpB5mUY/49x7v8R6+cCNKnYUUi71RN3iABfg/+2f/zP7H//gf/m8WVpwLQSrriGChRATzONd5VsxjM2ucIBfSUPMbZ22ARRWuVGAzh+sPDgLE7BCV/y3f8i0e9cxijgUg72FDhQ0WFs0f9VEf5ZsZk9yn00LEOYvgEK09CTYr6Pv/9b/+154Ldhq+/Mu/3Ns9jnFjRJ9mNwUZ89Bf40DYHOX
|
|
|
|
|
|
},
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"output_type": "display_data",
|
|
|
|
|
|
"jetTransient": {
|
|
|
|
|
|
"display_id": null
|
|
|
|
|
|
}
|
2025-12-16 00:36:36 +08:00
|
|
|
|
}
|
|
|
|
|
|
],
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"execution_count": 7
|
2025-12-16 00:36:36 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"end_time": "2026-01-12T15:24:49.050291Z",
|
|
|
|
|
|
"start_time": "2026-01-12T15:24:49.048095Z"
|
2025-12-16 00:36:36 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
|
|
|
|
|
"cell_type": "code",
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"source": "",
|
|
|
|
|
|
"id": "42ce521c2fe47329",
|
|
|
|
|
|
"outputs": [],
|
|
|
|
|
|
"execution_count": null
|
2025-12-16 00:36:36 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"end_time": "2026-01-12T15:25:32.692633Z",
|
|
|
|
|
|
"start_time": "2026-01-12T15:25:19.919215Z"
|
2025-12-16 00:36:36 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
"source": [
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"import pandas as pd\n",
|
|
|
|
|
|
"import numpy as np\n",
|
|
|
|
|
|
"import matplotlib.pyplot as plt\n",
|
|
|
|
|
|
"import seaborn as sns\n",
|
|
|
|
|
|
"import talib\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"def analyze_dynamic_threshold_study(df_raw, q=0.01, r=0.5):\n",
|
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
|
" 针对股票数据,分析卡尔曼过滤后的动态阈值选择。\n",
|
|
|
|
|
|
" 不预设阈值与波动率关系,通过全阈值平面扫描进行观察。\n",
|
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
|
" df = df_raw.copy()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # --- 1. 基础指标计算 (卡尔曼滤波) ---\n",
|
|
|
|
|
|
" closes = df['close'].values\n",
|
|
|
|
|
|
" x_hat = closes[0]\n",
|
|
|
|
|
|
" p = 1.0\n",
|
|
|
|
|
|
" kalman_values = np.zeros_like(closes)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" for i in range(len(closes)):\n",
|
|
|
|
|
|
" x_hat_minus = x_hat\n",
|
|
|
|
|
|
" p_minus = p + q\n",
|
|
|
|
|
|
" k = p_minus / (p_minus + r)\n",
|
|
|
|
|
|
" x_hat = x_hat_minus + k * (closes[i] - x_hat_minus)\n",
|
|
|
|
|
|
" p = (1 - k) * p_minus\n",
|
|
|
|
|
|
" kalman_values[i] = x_hat\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" df['kalman'] = kalman_values\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 使用百分比偏离度,避免股票价格绝对值的影响 (比斜率更稳定)\n",
|
|
|
|
|
|
" # Deviation = (Price - Kalman) / Kalman * 100\n",
|
|
|
|
|
|
" df['dev_pct'] = (df['close'] - df['kalman']) / df['kalman'] * 100\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # --- 2. 仿真引擎:扫描不同阈值下的表现 ---\n",
|
|
|
|
|
|
" # 我们测试 0.1% 到 5.0% 的所有可能阈值\n",
|
|
|
|
|
|
" threshold_range = np.linspace(0.2, 5.0, 25)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 记录每个阈值在每个时间点的“持仓状态”和“未来20日收益”\n",
|
|
|
|
|
|
" # 逻辑:只要 dev_pct > T,就持仓(1),否则持仓(0)\n",
|
|
|
|
|
|
" results = []\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 为了实盘考虑,我们计算未来 N 日的累计收益\n",
|
|
|
|
|
|
" look_forward = 20\n",
|
|
|
|
|
|
" df['fwd_ret'] = df['close'].shift(-look_forward) / df['close'] - 1\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" for t in threshold_range:\n",
|
|
|
|
|
|
" # 生成持仓信号:dev > t 则持仓\n",
|
|
|
|
|
|
" # 注意:这里模拟的是你描述的“大于某阈值持仓,直到小于该阈值”\n",
|
|
|
|
|
|
" pos = (df['dev_pct'] > t).astype(int)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 计算该阈值下的“筛选效能”:\n",
|
|
|
|
|
|
" # 如果当前满足持仓条件,未来20天的平均表现如何?\n",
|
|
|
|
|
|
" perf = pos * df['fwd_ret']\n",
|
|
|
|
|
|
" results.append(perf.values)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 转化为 DataFrame 方便绘图,行是时间,列是阈值\n",
|
|
|
|
|
|
" perf_map = pd.DataFrame(np.array(results).T, index=df.index, columns=[f\"{t:.1f}%\" for t in threshold_range])\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # --- 3. 环境因子准备 (用于寻找动态规律) ---\n",
|
|
|
|
|
|
" # 我们不预设关系,但我们需要把“可能的干扰因素”列出来观察\n",
|
|
|
|
|
|
" # 因子 A: 价格在过去一段时间的震荡区间宽度 (High - Low) / Close\n",
|
|
|
|
|
|
" df['range_ext'] = (df['high'].rolling(20).max() - df['low'].rolling(20).min()) / df['close']\n",
|
|
|
|
|
|
" # 因子 B: 成交量相对强弱 (成交量 Z-Score)\n",
|
|
|
|
|
|
" df['vol_z'] = (df['volume'] - df['volume'].rolling(20).mean()) / df['volume'].rolling(20).std()\n",
|
|
|
|
|
|
" # 因子 C: 距离 250 日最高点的距离 (用来判断是否在高位陷阱)\n",
|
|
|
|
|
|
" df['dist_max'] = df['close'] / df['close'].rolling(250).max()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # --- 4. 可视化分析 ---\n",
|
|
|
|
|
|
" fig = plt.figure(figsize=(18, 12))\n",
|
|
|
|
|
|
" gs = fig.add_gridspec(4, 1, height_ratios=[1, 1.5, 0.8, 0.8])\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 图 1: 价格与卡尔曼线\n",
|
|
|
|
|
|
" ax1 = fig.add_subplot(gs[0])\n",
|
|
|
|
|
|
" ax1.plot(df.index, df['close'], label='Price', alpha=0.6)\n",
|
|
|
|
|
|
" ax1.plot(df.index, df['kalman'], label='Kalman', color='orange', lw=2)\n",
|
|
|
|
|
|
" ax1.set_title(\"Price & Kalman Foundation\")\n",
|
|
|
|
|
|
" ax1.legend()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 图 2: 阈值效率热力图 (核心分析图)\n",
|
|
|
|
|
|
" # X轴是时间,Y轴是不同阈值,颜色代表未来20天的收益\n",
|
|
|
|
|
|
" ax2 = fig.add_subplot(gs[1])\n",
|
|
|
|
|
|
" sns.heatmap(perf_map.T, cmap='RdYlGn', center=0, ax=ax2, cbar_kws={'label': 'Fwd 20D Return'})\n",
|
|
|
|
|
|
" ax2.set_title(\"Strategy Performance Heatmap: Which Threshold worked when?\")\n",
|
|
|
|
|
|
" ax2.set_ylabel(\"Threshold (%)\")\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 图 3: 环境因子对比 A (Range Expansion)\n",
|
|
|
|
|
|
" ax3 = fig.add_subplot(gs[2])\n",
|
|
|
|
|
|
" ax3.fill_between(df.index, 0, df['range_ext'], color='purple', alpha=0.3, label='Historical Range (20D)')\n",
|
|
|
|
|
|
" ax3.set_title(\"Environment Factor: Volatility Range (Is the market 'tight' or 'loose'?)\")\n",
|
|
|
|
|
|
" ax3.legend()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 图 4: 环境因子对比 B (Volume Z-Score)\n",
|
|
|
|
|
|
" ax4 = fig.add_subplot(gs[3])\n",
|
|
|
|
|
|
" ax4.bar(df.index, df['vol_z'], color='gray', alpha=0.5, label='Volume Z-Score')\n",
|
|
|
|
|
|
" ax4.axhline(0, color='black', lw=0.5)\n",
|
|
|
|
|
|
" ax4.set_title(\"Environment Factor: Relative Volume (Is there conviction?)\")\n",
|
|
|
|
|
|
" ax4.legend()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" plt.tight_layout()\n",
|
|
|
|
|
|
" plt.show()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" return df, perf_map\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# 使用示例 (假设你已有 raw_df)\n",
|
|
|
|
|
|
"processed_df, perf_results = analyze_dynamic_threshold_study(df_raw)"
|
2025-12-16 00:36:36 +08:00
|
|
|
|
],
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"id": "6af30d4c1690ba07",
|
2025-12-16 00:36:36 +08:00
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"data": {
|
|
|
|
|
|
"text/plain": [
|
|
|
|
|
|
"<Figure size 1800x1200 with 5 Axes>"
|
|
|
|
|
|
],
|
|
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABvgAAASfCAYAAADI2vq5AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjcsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvTLEjVAAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs3Qd4G/X9x/GPpuW9d/beOyGMQBJG2avMttBBKZT5pxTKKpSyaUuBQktb6IKyyt4jIRAgrJCE7L2nE8fb1tb/+Z3jFTuJHTJ89vv1PIqtu9PpdJIvkj73/f4csVgsJgAAAAAAAAAAAAC24DzYGwAAAAAAAAAAAACg9Qj4AAAAAAAAAAAAABsh4AMAAAAAAAAAAABshIAPAAAAAAAAAAAAsBECPgAAAAAAAAAAAMBGCPgAAAAAAAAAAAAAGyHgAwAAAAAAAAAAAGyEgA8AAAAAAAAAAACwEQI+AAAAAAAAAAAAwEYI+AAAAADY1vr169W/f//6y8iRI/X9739fc+fO3av1TZ48WS+99JIOloceekiHHHKITjjhBC1cuLBNt73hhht02mmn1V8PBoM65ZRT9J3vfEdVVVVtWtcXX3xh7c/2qG7bWrr86U9/OqjbZrbBbN++Zh7XBRdcsM/XCwAAAMC+3Ad7AwAAAADg27rmmmt0+OGHq6SkRM8//7wuvPBCvfXWWyooKGjTev7yl78oJyfnoDwhM2bM0BNPPKHf//73Vsh4//3361//+tder8/cfvXq1XruueeUmJiojsbspx49ejSZdrCeu2/DBIIbNmzQmWeeuctlzjnnHJ100kkHdLsAAAAAtG8EfAAAAABsr2vXrho6dKj1+/jx43XUUUfpf//7n66++uo2redgVq2Zir2BAwfquOOOU1FRkZ599tm9XtfHH3+sp556SjfddJMGDRqkjqhPnz7W/rK7L7/80rrsLuDLzc09oNsEAAAAoP2jRScAAACADsXr9VqB39q1a2UnWVlZ2rZtm/X7p59+qiFDhuzVerZv364bb7xRkyZNsioZAQAAAAAdDwEfAAAAgA7HBGWN2zXWja03c+ZMayyzk08+uU1j8Jnx7O677z4deuihGjNmjC655JJmAeK7775rjXk3bNgwnX766frss8/atM1m3Rs3btS1116rpUuX6qqrrtLeMFV7NTU1uueee1rcL6adqRnnb9y4cVaFowkE2zLe4T//+U9rW804gR999JEVJB5xxBH14x5WV1fr1ltv1WGHHabRo0froosu0rp16+rXY/a/GVPuhRdesPb3qFGjrMccCAS0L7388stWNaSp7DzvvPOajMvY0ph25nk329N4nL+tW7fq0ksv1YgRI3Tsscdq+vTp9cubx2mCVDPuo6kYfeONN5ptw2uvvaYTTzxRw4cP1/HHH99kGXP/5j4eeeQRq4KvbhzBll5/uxuDzzwH5nVnAuFTTz21yTbWPaaVK1daY1Oa7TDLzps3r837EwAAAED7QsAHAAAAoMMwYZUJTExQZgKVxmbPnq3LL7/cCnx++tOftmm9N9xwgxUYXXfddXr44YdVUVFhrSMUCtUHQiYsO+aYY6xx9Mx9XHzxxVqxYkWr7yM9Pd0aM3DOnDnWOIJtHT/QWLZsmaZNm6bKykqtWbOm2Xyz/YsWLbLG5/vDH/5gLW/GsmuLqVOnWmGnCThvvvlm3Xbbbda2m/1j3H333dYyt99+ux599FErbLzllluarGPKlCn6+9//bgVk5vLOO+9YLVX3FRNsmaDThJB//etflZ2drR/+8Idtej4M8xz27NnTGpvRPB+/+tWvFI1GrXn33nuv3n//fSvM/PWvf60HHnigyW1NmHz99ddbIaN5TZhQ2dy+Luw0+8eEnGZ8vcGDB1u/m4sJTFvLhMgmgDQho9mfJog0183rsU5VVZX1Wp0wYYL+/Oc/W9PMcwYAAADA3hiDDwAAAIDt/eIXv7AuRlJSku644w6rWqkxE0A9/fTTVoVdW6xevVpvvvmmFYqddtpp1rSMjAwrLCkuLlZeXp4VKppgpm7MP1O5ZsIfc7vWVOKZCkETPiYkJGjVqlUqKSlRZmamFZQNGDBAhYWFrdrWSCSin/zkJ9b9miDSBEuNmZDJhElmnYYJAz///PM27Q+zn03VnamQPPfcczVx4kS9/fbbVpBnmMo9M56cWcaYP3++VYG28z41FY9m3xlmXy1evLhN22GqJBsz4VjdOIzm+TCVaqZa0Rg7dqxVSff444+3WNm4K6Za04RyRmJios4++2yrqs+8xl588UVr/WeccYY13+Fw6LLLLqu/rVne3Ffd/G7dulmvGVM9Z1rI9urVy5r+4YcfWvujbtvbwgSoZj//9re/ta6bykpTrWcev6nSNEpLS63Q78c//rF13WzjL3/5yzbfFwAAAID2hYAPAAAAgO2ZFo+mQsmEKl26dJHT2bxZyVlnndXmcM9YuHBhfWhXxwRkJkCrY1pqmiDFtFhsrKUqupa88sorVtXVJ598YgWCpjLuqaeessIlU2nX2oAvPz/fCuDM8ibk/Prrr5tst6kmM8HUgw8+qG+++cbaZnObtqhrfWoCrca/1zFVjCZM/de//mVVI5pArK7qrfEydeFeXWAaDofbtB1//OMfreq6OnW/myrODRs26Iorrqif5/F4rJCvcZvOne28jYZpa9l4Gw1TtWmq8Mz2Ng6RzfobGzhwoFVJWfc8mGpJcx9+v1/7iglPTQvUxkzIZ4LMOuZv4fzzz/9W+xoAAABA+0PABwAAAMD2TKBlApXd2Ztwb1disZgV2nTv3t1q/2h873vfs9otNpacnNzqENEEkykpKfrNb35jVcD9/Oc/t1qB1lV6tUZqaqoVZpnt+Nvf/maFkP/+97+teSZsMlVvpvrM/DQVXaZyzLSw3FdMBeEPfvADK2Qz1W7mYkKtn/3sZ02WMxVs35YJ9Fp6zs1zsyu7m7d58+Zm00zV3e7W0zhI3jlUNgGtaeNpgmXT6tO0z9zVOHp7a1ePp/F0E8L6fL59er8AAAAADj7G4AMAAACA3ahrZ2nGVKuzfv16q7rLtFs0+vbta1WqmcCp7mLaa3700Uet2rdZWVnW7U2rzh49euh3v/udVc1nquv2Ziw+r9drhWqm/eaXX35pTTO/m+027RtNuGdaONaNB7evmCo1Uxl455136pJLLrGqKs14iDtzuVzaX0xrU7PPGo9DZyrWzPNX1wbT3H9dS1HDhJCmZWhrt9MElCbQMxV0dWbNmtWsZehJJ51kBbbmZ3x8vFUxubO4uLi9rqgzj6fx46x7nhu3+9yf+xoAAADAwUPABwAAAAC7YSrojj/+eKsa66WXXtKMGTN00003WdVd48ePt5Yx4+dNmTLFahv51VdfWdVzZny0uuq+PTHrN4HTDTfcYAU0prWlaXtpQj/TfnRvmCq+3Nzc+vHv0tPTrZ+mfaYJ/UwIZ8bo25ftGk0Fodnut956ywrUTAXh3Xffbc07kG0hr7zySr322mt66KGH9Nlnn1ltS82+NJV0daGtGfPPtOysrq629oUZT7G1TBXkqaeeao2p9+qrr1phbt04eHXM/jbPo7l/sy2mwrOqqsqqcty5stRUcL733nvWa6eu4rI1zOvOVJKaENHcj/lprjduTwoAAACgYyLgAwAAAIA9uP/++3XaaadZP6+++mqr9eY//vEPJSQk1I979sADD1hBz09+8hMrRLvrrrusMe9ao0+fPvrLX/5ijdlnKt9MWPj3v//dCgoPO+ywvXp+6qr4TJhnwh8zFp/Z9ueee84KulatWmWN8WeCr31VyWcqDs2Ycx9//LFVJVgXOhkmeDpQTItTs//feOMN67EWFRVZwVnv3r2t+ZMmTdK5555rjV937LHHWlVuZuzDtrj11lut25oA0/z+05/+tMn8W265xaomNM+nCX5Nu9IhQ4Y02w8mJL700kt1++23W/ts+vTprd4G87p77LHHrHWax2l+muumOhMAAABAx+aI7W4QAgAAAAAAAAAAAADtChV8AAAAAAAAAAAAgI0Q8AEAAAAAAAAAAAA2QsAHAAAAAAAAAAAA2Ag
|
|
|
|
|
|
},
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"output_type": "display_data",
|
|
|
|
|
|
"jetTransient": {
|
|
|
|
|
|
"display_id": null
|
|
|
|
|
|
}
|
2025-12-16 00:36:36 +08:00
|
|
|
|
}
|
|
|
|
|
|
],
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"execution_count": 9
|
2025-12-16 00:36:36 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"end_time": "2026-01-12T15:30:30.319511Z",
|
|
|
|
|
|
"start_time": "2026-01-12T15:30:29.417034Z"
|
2025-12-16 00:36:36 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
"source": [
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"import pandas as pd\n",
|
|
|
|
|
|
"import numpy as np\n",
|
|
|
|
|
|
"import matplotlib.pyplot as plt\n",
|
|
|
|
|
|
"import seaborn as sns\n",
|
|
|
|
|
|
"import talib\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"def analyze_threshold_regime_matrix(df_raw, q=0.01, r=0.5, look_forward=10):\n",
|
|
|
|
|
|
" df = df_raw.copy()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 1. 基础指标:卡尔曼百分比偏离 (dev_pct)\n",
|
|
|
|
|
|
" closes = df['close'].values\n",
|
|
|
|
|
|
" x_hat, p = closes[0], 1.0\n",
|
|
|
|
|
|
" kalman_values = np.zeros_like(closes)\n",
|
|
|
|
|
|
" for i in range(len(closes)):\n",
|
|
|
|
|
|
" x_hat_minus = x_hat\n",
|
|
|
|
|
|
" p_minus = p + q\n",
|
|
|
|
|
|
" k = p_minus / (p_minus + r)\n",
|
|
|
|
|
|
" x_hat = x_hat_minus + k * (closes[i] - x_hat_minus)\n",
|
|
|
|
|
|
" p = (1 - k) * p_minus\n",
|
|
|
|
|
|
" kalman_values[i] = x_hat\n",
|
|
|
|
|
|
" df['kalman'] = kalman_values\n",
|
|
|
|
|
|
" df['dev_pct'] = (df['close'] - df['kalman']) / df['kalman'] * 100\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 2. 定义环境因子 (自变量)\n",
|
|
|
|
|
|
" # 因子A:相对波动率 (当前20日振幅 / 历史100日平均振幅) -> 反映市场是否异常活跃\n",
|
|
|
|
|
|
" df['range_raw'] = (df['high'] - df['low']) / df['close']\n",
|
|
|
|
|
|
" df['rel_vol'] = df['range_raw'].rolling(20).mean() / df['range_raw'].rolling(100).mean()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 因子B:成交量强度 (Z-Score)\n",
|
|
|
|
|
|
" df['vol_z'] = (df['volume'] - df['volume'].rolling(50).mean()) / df['volume'].rolling(50).std()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 3. 计算多空未来收益 (Label)\n",
|
|
|
|
|
|
" df['fwd_ret_long'] = df['close'].shift(-look_forward) / df['close'] - 1\n",
|
|
|
|
|
|
" df['fwd_ret_short'] = -(df['close'].shift(-look_forward) / df['close'] - 1)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 4. 矩阵分析函数\n",
|
|
|
|
|
|
" def get_efficiency_matrix(factor_name, ret_col, is_long=True):\n",
|
|
|
|
|
|
" thresholds = np.linspace(0.2, 4.0, 20)\n",
|
|
|
|
|
|
" # 将环境因子分成15个分箱 (Bins)\n",
|
|
|
|
|
|
" df['factor_bin'] = pd.qcut(df[factor_name].rank(method='first'), 15, labels=False)\n",
|
|
|
|
|
|
" factor_bins = df.groupby('factor_bin')[factor_name].mean().values\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" matrix = np.zeros((len(thresholds), 15))\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" for i, t in enumerate(thresholds):\n",
|
|
|
|
|
|
" for b in range(15):\n",
|
|
|
|
|
|
" # 策略逻辑:偏离度 > 阈值 时的未来收益\n",
|
|
|
|
|
|
" if is_long:\n",
|
|
|
|
|
|
" mask = (df['factor_bin'] == b) & (df['dev_pct'] > t)\n",
|
|
|
|
|
|
" else:\n",
|
|
|
|
|
|
" mask = (df['factor_bin'] == b) & (df['dev_pct'] < -t)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" if mask.any():\n",
|
|
|
|
|
|
" # 计算该阈值在特定环境下的平均胜率 * 平均盈亏\n",
|
|
|
|
|
|
" matrix[i, b] = df.loc[mask, ret_col].mean()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" return pd.DataFrame(matrix, index=[f\"{t:.2f}%\" for t in thresholds], columns=[f\"{v:.2f}\" for v in factor_bins])\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 5. 可视化:多空双向 + 双因子矩阵\n",
|
|
|
|
|
|
" fig, axes = plt.subplots(2, 2, figsize=(20, 16))\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # Long x Relative Volatility\n",
|
|
|
|
|
|
" m1 = get_efficiency_matrix('rel_vol', 'fwd_ret_long', True)\n",
|
|
|
|
|
|
" sns.heatmap(m1, cmap='RdYlGn', center=0, ax=axes[0,0])\n",
|
|
|
|
|
|
" axes[0,0].set_title(\"Long Strategy: Threshold vs Relative Volatility\")\n",
|
|
|
|
|
|
" axes[0,0].set_xlabel(\"Relative Volatility (Current/Avg)\")\n",
|
|
|
|
|
|
" axes[0,0].set_ylabel(\"Entry Threshold (%)\")\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # Short x Relative Volatility\n",
|
|
|
|
|
|
" m2 = get_efficiency_matrix('rel_vol', 'fwd_ret_short', False)\n",
|
|
|
|
|
|
" sns.heatmap(m2, cmap='RdYlGn', center=0, ax=axes[0,1])\n",
|
|
|
|
|
|
" axes[0,1].set_title(\"Short Strategy: Threshold vs Relative Volatility\")\n",
|
|
|
|
|
|
" axes[0,1].set_xlabel(\"Relative Volatility (Current/Avg)\")\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # Long x Volume Z-Score\n",
|
|
|
|
|
|
" m3 = get_efficiency_matrix('vol_z', 'fwd_ret_long', True)\n",
|
|
|
|
|
|
" sns.heatmap(m3, cmap='RdYlGn', center=0, ax=axes[1,0])\n",
|
|
|
|
|
|
" axes[1,0].set_title(\"Long Strategy: Threshold vs Volume Conviction\")\n",
|
|
|
|
|
|
" axes[1,0].set_xlabel(\"Volume Z-Score\")\n",
|
|
|
|
|
|
" axes[1,0].set_ylabel(\"Entry Threshold (%)\")\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # Short x Volume Z-Score\n",
|
|
|
|
|
|
" m4 = get_efficiency_matrix('vol_z', 'fwd_ret_short', False)\n",
|
|
|
|
|
|
" sns.heatmap(m4, cmap='RdYlGn', center=0, ax=axes[1,1])\n",
|
|
|
|
|
|
" axes[1,1].set_title(\"Short Strategy: Threshold vs Volume Conviction\")\n",
|
|
|
|
|
|
" axes[1,1].set_xlabel(\"Volume Z-Score\")\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" plt.tight_layout()\n",
|
|
|
|
|
|
" plt.show()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"analyze_threshold_regime_matrix(df_raw)"
|
2025-12-16 00:36:36 +08:00
|
|
|
|
],
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"id": "619287ced265c735",
|
2025-12-16 00:36:36 +08:00
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"data": {
|
|
|
|
|
|
"text/plain": [
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"<Figure size 2000x1600 with 8 Axes>"
|
2025-12-16 00:36:36 +08:00
|
|
|
|
],
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAB5MAAAYvCAYAAACtBtiiAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjcsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvTLEjVAAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs3Ql8VOXV+PEzk5UtEASUJUG2ssmmZU3BF4KiqCDg+lbQSgWEQOuCYKVWfVlcKKKgVZS/VC3VChSCgiBI+9ZKkQICBVlksaERME0ghISEZOb/OU+deSeZkD2Ze29+38/nfsjcmXvn3jt3hnvueZ7zuLxer1cAAAAAAAAAAAAAAAjgDnwAAAAAAAAAAAAAAADJZAAAAAAAAAAAAABAseiZDAAAAAAAAAAAAAAIQjIZAAAAAAAAAAAAABCEZDIAAAAAAAAAAAAAIAjJZAAAAAAAAAAAAABAEJLJAAAAAAAAAAAAAIAgJJMBAAAAAAAAAAAAAEFIJgMAAAAAAAAAAAAAgpBMBmqxbdu2SceOHeWrr74Sq/vHP/4hP/7xj6Vnz54yePBg+d3vfhfqTbKlVatWmc+8uEmfU0OGDPH/HSpjx46VRYsWVcm6dD26vpp+36r8jvqmvn37yoMPPihHjhyp0Ppq4rMN1flz7733yq233ho0/+6775bbb7+93OuriXOhtPfQ46jH81JOnDhhzgv9FwAAAOVz8eJFWbhwoVx77bXSo0cP+elPfyrffvttma/VrMLr9cpvf/tbGTp0qHTr1s1c/x48eDDUm2VL+plfKl4OjM9CqapjgLLGb1aMPWbOnOn/fDp37my+Ay+99JLk5uaWe1018dmG6vzR3zV9X/2dCPTNN9+Y+cuXLy/X+mriXCjLe5T2G12ee0EAgNKRTAZgeadPn5b7779f2rVrJ6+//rrccMMN8swzz8gXX3xR6HWaFF+2bFm1boteyNrhhsKlaCJ+xYoVZuratav06dPH/1ifgzU999xz8sEHH8ivfvUrOX78uGlYcfbs2RrfDv1+ldb45De/+U1IzqWEhARz0yzwuFy4cEH27t1rnqtpekNGbxaU5Omnn5Y77rijwu/RrFkz893Vf4uj7x/qhiEAAABWpUknvVZ6/PHHTVJZr7Mfe+yxkGxLWa6zL+W9996Tl19+WSZPniyvvfaaSaRNmzZN8vPzy319WlmbNm0yk13p9bleX2tMo2bMmOGPl2FN8fHx5vN55513ZMyYMfLmm2+az7GmleVekd6DCcW51Lx5c2nTpk3QPTTf4x/96Ec1uj2ZmZnmWOm/FY11y0Jj7ZLOhcr87gJAbUQyGYDlffLJJ+LxeOSpp54yPTM1oPvBD34QlCTRi8C33367WrflX//6lyxevFjsKjY21rRW16levXrSoEED/2N9DtakDSm6d+8uw4cPNzeKMjIyZMuWLTW+Hfr9Ki3Y0tbDoTiXBgwYYH4ntm/f7p+3a9cu0+MkFMnkP/7xj0HBelFt27aVyy+/vMLvERkZab67+m9x9P11OwAAABBs5cqV8pOf/MQ0VtbGkNOnTzfXT6HoeVmW6+xL0YTLqFGjZPTo0ea6Vxtea2J8x44d5b4+re3JZL0+1+trvd/gS1T64mVYU1RUlPl8fvjDH5oqXvfcc4+sXbtW8vLyanQ7ynKvqH79+iE7l/S34e9//7upZOCjvwetWrUy53lN0iSyHquSksmlxbplobG2fqer43cXAGojkskALE8TZxoInDt3zj/vxRdflPHjx4d0u4BQ6NChg9SpU0dOnjzJB1CklbcmsQNvkOnfdevWNeXxAQAAAJ+CggITX/773//2z9PeeTqcUqNGjWx1oM6cOVNoP7Rxp+6Hxg1AbaMJSL1/pPeRUDiZrL8VgSXwtSF2KBpeAwDsiWQygFJpC+brr7/eXJTfddddsmfPnqAxSLQHoLaE1rGm9DUpKSn+12jZ2Z///OdyzTXXyE033WRKVQ8bNsyU7i0LHcNKgwFtNa4tKVX79u39wbFvnBwtT6atQX1j5hQtMaTztKzX5s2bzRiqOiZWIN0vLR/cq1cvGThwoMyfP9/falPXpcuPGzfOvy6d9L0DbdiwQW655RbTi1THb926dWuh5/XCXY+Pvsd9991nkuJ68a7bpMdFy04HtqDV1uT6PocOHSq0Ht0ebXlbnXR7dNt69+4t//M//1OoBatvbJrDhw/LpEmTgrZFgxTtQa7L9u/fX5588knJzs72P68l1/Tz1+OsiT497gcOHCi2lb2O4XT11VfLI488Umjso6NHj5oGBXqs9Rz5f//v/5V7H5csWWL2Udf//PPPF9rHso53dOrUKf88vSF11VVXFeo1rD3o9XzX7bzxxhvl448/lsrQ98jJyZHGjRuX+bwrjX42+hlp7179nupx9X2HfWMV6aTfL/2eBY4bVpYxt8p6rCqzHy6Xy5xrgT2T9W+tZhAREeGf9+c//9m8h773iBEj5H//93+lvDceFyxYIIMGDTLfY/0+79u3z/+879hoIltbW/seF9fDpbLj8F1qHCnfWG/6/rodRcdF199rfawlwH30u6Xfg3fffbfC2wMAAGAXYWFhJnn81ltvmco/WVlZpnKTxjXae7CqYhLfdbBW0HnjjTdMXP3qq69W6Dr7UrRX9bp160yP5LS0NHPtq/vhixfKcn2q14m6jxqLahz8X//1X7J69eqgkrCJiYkm5h85cqR8/vnn/ud0WV2n3jvQyfcegWW19bjNmTPHxBwaJ+o9gvT09ELvoUlwPY79+vUz8eKECRPMdbfSmEr3MdCsWbPMfYiidHv0varLd999Z+JgjWWvu+66QjFF4DW6njsae/zyl78stLx+FnpPwhcjfvjhh4WeP3LkiBnqS2MzjXG0SlvRnrbnz583Zdn1vNRjVLQiUUn3ccpCGyho6XTdRl3P3/72t3Itr/dKinYA0M9X42+NqZR+7/S813NC90M/79TUVKkM/Q5obOirllWW8640VXGvqKQxk8tyrCq7H3qvKTw83N/4WuN9HUs5MJmsVb1eeOEF8x66rz/72c/M8SwPrYrwwAMPmHNX16Pnvg4/5fud0f3X3xGl/+rj4sY1ropxmYsbM7ksv7t6b+Tmm28utFxycrK5h6D3ugCgtiKZDKBEerH3i1/8wgQ4mlxs2rSp3HvvvSa48dGL/alTp5rxaXTcKb0g1QtQH/37n//8p7zyyismKNVgXcct0eCpLDT40XV888035gJek7CBydWkpCQTpOm/un2+MY2KG4t0/fr1ZtxZDbbuvPNO/3wNYvSCNyYmxiQYNSjTi/c1a9aY53Vduk7feCu+99D3DAwM9GJ76NChsnTpUrPdus7AY6XHqXXr1uZY6g0MTZ7p3xqQa5CpZX4+++yzQtvbpUsXf5ktH90eHROouvz2t781CW4NVjTprgmmP/3pT4Veo5+pBkw6/o4GMoF0PzXBpp+bBr6ffvqpOe4+uj7d/ocfftjcTNFy20XXoeXR9IaLXtzrpElYHTdYaUCj5as0KNHl9ZzUxLzvxkxZfPTRRyYpqIG1BhmaWNMkW1lpAKfnmyYnff7yl7+YnrC+MYf0GOj3R28w6DmhwdKjjz5qviMVocGinoN6g8h3U6Us511p5s6daz5vXbd+TzVZrTdmAscq0kn31/d9K89YT2U5VlWxHxqsaqME/R5psL17924zz0eT03rTRwNjPbf05o8+Ls/Ycfr7oOWw9NzVv3W/dLt9Asck9/1uVHasp4qO9abv7xuXK3BcdN1//R3S74CPfjZ6g0ob/AAAANQG8+bNM0lLvf7V6yS9PtSkb3XEJNo4VxOGGnv4rn+r4jpbaXyhyV2NXzWJqokuX/KmvNenGsdpTKQNuQPL8WoiRZO7GovrtbpeT+q4zBpHKx1j2He9qVPge/poXKjxryaXdBs1pg+Mp7XhuH4mDz30kMyePdssr43I9f6B0n3U4+9LrmniS4fE0kaoRen2aDK2umicomPQ6vu0aNHCNKQueu7ocdIxrDUppcMV+Rw7dsxsW+fOnc1rNEmujRQCG9JqrOErBazHS4+bJvMD6T0LTZr
|
2025-12-16 00:36:36 +08:00
|
|
|
|
},
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"output_type": "display_data",
|
|
|
|
|
|
"jetTransient": {
|
|
|
|
|
|
"display_id": null
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"execution_count": 10
|
2025-12-16 00:36:36 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"end_time": "2026-01-12T15:49:42.971235Z",
|
|
|
|
|
|
"start_time": "2026-01-12T15:49:36.349250Z"
|
2025-12-16 00:36:36 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
"source": [
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"import pandas as pd\n",
|
|
|
|
|
|
"import numpy as np\n",
|
|
|
|
|
|
"import matplotlib.pyplot as plt\n",
|
|
|
|
|
|
"import seaborn as sns\n",
|
|
|
|
|
|
"import talib\n",
|
|
|
|
|
|
"from tqdm import tqdm\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"class KalmanDynamicAnalyzer:\n",
|
|
|
|
|
|
" def __init__(self, df_raw, q=0.01, r=0.5):\n",
|
|
|
|
|
|
" self.df = df_raw.copy()\n",
|
|
|
|
|
|
" self.q = q\n",
|
|
|
|
|
|
" self.r = r\n",
|
|
|
|
|
|
" self._prepare_base_indicators()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" def _prepare_base_indicators(self):\n",
|
|
|
|
|
|
" \"\"\"计算基础指标:卡尔曼、偏离度、波动率、成交量因子\"\"\"\n",
|
|
|
|
|
|
" # 1. 卡尔曼滤波\n",
|
|
|
|
|
|
" closes = self.df['close'].values\n",
|
|
|
|
|
|
" x_hat, p = closes[0], 1.0\n",
|
|
|
|
|
|
" kalman_values = np.zeros_like(closes)\n",
|
|
|
|
|
|
" for i in range(len(closes)):\n",
|
|
|
|
|
|
" x_hat_minus = x_hat\n",
|
|
|
|
|
|
" p_minus = p + self.q\n",
|
|
|
|
|
|
" k = p_minus / (p_minus + self.r)\n",
|
|
|
|
|
|
" x_hat = x_hat_minus + k * (closes[i] - x_hat_minus)\n",
|
|
|
|
|
|
" p = (1 - k) * p_minus\n",
|
|
|
|
|
|
" kalman_values[i] = x_hat\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" self.df['kalman'] = kalman_values\n",
|
|
|
|
|
|
" # 偏离度百分比\n",
|
|
|
|
|
|
" self.df['dev_pct'] = (self.df['close'] - self.df['kalman']) / self.df['kalman'] * 100\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 2. 收益率 (用于评估)\n",
|
|
|
|
|
|
" self.df['fwd_ret'] = self.df['close'].pct_change().shift(-1)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 3. 环境因子\n",
|
|
|
|
|
|
" # 价格振幅 (Range)\n",
|
|
|
|
|
|
" self.df['range_pct'] = (self.df['high'] - self.df['low']) / self.df['close']\n",
|
|
|
|
|
|
" # 波动率因子:最近一天的平均振幅\n",
|
|
|
|
|
|
" self.df['vol_factor'] = self.df['range_pct'].rolling(23).mean()\n",
|
|
|
|
|
|
" # 成交量因子:Z-Score\n",
|
|
|
|
|
|
" self.df['vol_z'] = (self.df['volume'] - self.df['volume'].rolling(23*5).mean()) / \\\n",
|
|
|
|
|
|
" self.df['volume'].rolling(23*5).std()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" def run_rolling_optimization(self, days_window=20):\n",
|
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
|
" 滚动窗口寻优\n",
|
|
|
|
|
|
" window_size: 23 * days_window (以天为单位滚动)\n",
|
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
|
" window_size = 23 * days_window\n",
|
|
|
|
|
|
" step = 23 # 每天滚动一次\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" threshold_range = np.linspace(0.3, 4.0, 30) # 扫描 0.3% 到 4% 的阈值\n",
|
|
|
|
|
|
" results = []\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 进度条\n",
|
|
|
|
|
|
" for start in tqdm(range(0, len(self.df) - window_size, step), desc=\"Rolling Optimization\"):\n",
|
|
|
|
|
|
" end = start + window_size\n",
|
|
|
|
|
|
" sub_df = self.df.iloc[start:end]\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 记录当前窗口的环境因子均值\n",
|
|
|
|
|
|
" current_vol = sub_df['vol_factor'].mean()\n",
|
|
|
|
|
|
" current_vol_z = sub_df['vol_z'].mean()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" best_t_long = 0\n",
|
|
|
|
|
|
" max_pnl_long = -np.inf\n",
|
|
|
|
|
|
" best_t_short = 0\n",
|
|
|
|
|
|
" max_pnl_short = -np.inf\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" for t in threshold_range:\n",
|
|
|
|
|
|
" # 策略逻辑:持仓直到跌破阈值\n",
|
|
|
|
|
|
" # 这里为了性能使用简化的持有收益计算\n",
|
|
|
|
|
|
" long_mask = (sub_df['dev_pct'] > t).astype(int)\n",
|
|
|
|
|
|
" long_pnl = (long_mask * sub_df['fwd_ret']).sum()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" if long_pnl > max_pnl_long:\n",
|
|
|
|
|
|
" max_pnl_long = long_pnl\n",
|
|
|
|
|
|
" best_t_long = t\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" short_mask = (sub_df['dev_pct'] < -t).astype(int)\n",
|
|
|
|
|
|
" short_pnl = (short_mask * -sub_df['fwd_ret']).sum()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" if short_pnl > max_pnl_short:\n",
|
|
|
|
|
|
" max_pnl_short = short_pnl\n",
|
|
|
|
|
|
" best_t_short = t\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" results.append({\n",
|
|
|
|
|
|
" 'date': self.df.index[end],\n",
|
|
|
|
|
|
" 'best_t_long': best_t_long,\n",
|
|
|
|
|
|
" 'best_t_short': best_t_short,\n",
|
|
|
|
|
|
" 'volatility': current_vol,\n",
|
|
|
|
|
|
" 'volume_z': current_vol_z,\n",
|
|
|
|
|
|
" 'max_pnl_long': max_pnl_long,\n",
|
|
|
|
|
|
" 'max_pnl_short': max_pnl_short\n",
|
|
|
|
|
|
" })\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" self.opt_df = pd.DataFrame(results).set_index('date')\n",
|
|
|
|
|
|
" return self.opt_df\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" def visualize_relationships(self):\n",
|
|
|
|
|
|
" \"\"\"可视化最佳阈值与环境因子的内在关系\"\"\"\n",
|
|
|
|
|
|
" if not hasattr(self, 'opt_df'):\n",
|
|
|
|
|
|
" print(\"Please run run_rolling_optimization first.\")\n",
|
|
|
|
|
|
" return\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 过滤掉 PnL 为负的窗口(即在该环境下趋势模式本身不适用)\n",
|
|
|
|
|
|
" valid_long = self.opt_df[self.opt_df['max_pnl_long'] > 0]\n",
|
|
|
|
|
|
" valid_short = self.opt_df[self.opt_df['max_pnl_short'] > 0]\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" fig = plt.figure(figsize=(20, 15))\n",
|
|
|
|
|
|
" gs = fig.add_gridspec(3, 2)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 1. Long: 最佳阈值 vs 波动率\n",
|
|
|
|
|
|
" ax1 = fig.add_subplot(gs[0, 0])\n",
|
|
|
|
|
|
" sns.regplot(data=valid_long, x='volatility', y='best_t_long', ax=ax1,\n",
|
|
|
|
|
|
" lowess=True, scatter_kws={'alpha':0.4}, line_kws={'color':'green'})\n",
|
|
|
|
|
|
" ax1.set_title(\"Long: Best Threshold vs Market Volatility\", fontsize=14)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 2. Short: 最佳阈值 vs 波动率\n",
|
|
|
|
|
|
" ax2 = fig.add_subplot(gs[0, 1])\n",
|
|
|
|
|
|
" sns.regplot(data=valid_short, x='volatility', y='best_t_short', ax=ax2,\n",
|
|
|
|
|
|
" lowess=True, scatter_kws={'alpha':0.4}, line_kws={'color':'red'})\n",
|
|
|
|
|
|
" ax2.set_title(\"Short: Best Threshold vs Market Volatility\", fontsize=14)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 3. Long: 最佳阈值 vs 成交量 Z-Score\n",
|
|
|
|
|
|
" ax3 = fig.add_subplot(gs[1, 0])\n",
|
|
|
|
|
|
" sns.regplot(data=valid_long, x='volume_z', y='best_t_long', ax=ax3,\n",
|
|
|
|
|
|
" lowess=True, scatter_kws={'alpha':0.4}, line_kws={'color':'green'})\n",
|
|
|
|
|
|
" ax3.set_title(\"Long: Best Threshold vs Volume Conviction\", fontsize=14)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 4. Short: 最佳阈值 vs 成交量 Z-Score\n",
|
|
|
|
|
|
" ax4 = fig.add_subplot(gs[1, 1])\n",
|
|
|
|
|
|
" sns.regplot(data=valid_short, x='volume_z', y='best_t_short', ax=ax4,\n",
|
|
|
|
|
|
" lowess=True, scatter_kws={'alpha':0.4}, line_kws={'color':'red'})\n",
|
|
|
|
|
|
" ax4.set_title(\"Short: Best Threshold vs Volume Conviction\", fontsize=14)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 5. 时间轴上的演变\n",
|
|
|
|
|
|
" ax5 = fig.add_subplot(gs[2, :])\n",
|
|
|
|
|
|
" ax5.plot(self.opt_df.index, self.opt_df['best_t_long'], label='Best Long T', color='green', alpha=0.6)\n",
|
|
|
|
|
|
" ax5.plot(self.opt_df.index, self.opt_df['best_t_short'], label='Best Short T', color='red', alpha=0.6)\n",
|
|
|
|
|
|
" ax5.fill_between(self.opt_df.index, 0, self.opt_df['volatility']*50, color='gray', alpha=0.2, label='Scaled Volatility')\n",
|
|
|
|
|
|
" ax5.set_title(\"Time-Series Evolution of Best Thresholds\", fontsize=14)\n",
|
|
|
|
|
|
" ax5.legend()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" plt.tight_layout()\n",
|
|
|
|
|
|
" plt.show()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# =============================================================================\n",
|
|
|
|
|
|
"# 使用示例\n",
|
|
|
|
|
|
"# =============================================================================\n",
|
|
|
|
|
|
"analyzer = KalmanDynamicAnalyzer(df_raw)\n",
|
|
|
|
|
|
"opt_data = analyzer.run_rolling_optimization(days_window=50) # 以20个交易日为窗口寻优\n",
|
|
|
|
|
|
"analyzer.visualize_relationships()"
|
2025-09-24 23:14:14 +08:00
|
|
|
|
],
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"id": "fb6613c50a442644",
|
2025-09-20 00:04:51 +08:00
|
|
|
|
"outputs": [
|
2025-12-16 00:36:36 +08:00
|
|
|
|
{
|
|
|
|
|
|
"name": "stderr",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"Rolling Optimization: 100%|██████████| 1127/1127 [00:06<00:00, 186.98it/s]\n"
|
2025-09-24 23:14:14 +08:00
|
|
|
|
]
|
|
|
|
|
|
},
|
2025-09-16 09:59:38 +08:00
|
|
|
|
{
|
|
|
|
|
|
"data": {
|
|
|
|
|
|
"text/plain": [
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"<Figure size 2000x1500 with 5 Axes>"
|
2025-09-16 09:59:38 +08:00
|
|
|
|
],
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAB8AAAAXMCAYAAABeHZZYAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjcsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvTLEjVAAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs3Ql8XGd59/1r9k2LtVjencSxSYLjNGQHAmRjKbRQCAWaQjeg5CkhUErZSthCAhRKKVDaEngCYSnwBpKwlIeEhKWUbCUhcVZsbCd2bMeWrHWk2ef9/O/xUUbjkTSjxRqNfl9Q5Jk5c849Z845Otd93YuvWCwWDQAAAAAAAAAAAACARc6/0AUAAAAAAAAAAAAAAGAukAAHAAAAAAAAAAAAADQFEuAAAAAAAAAAAAAAgKZAAhwAAAAAAAAAAAAA0BRIgAMAAAAAAAAAAAAAmgIJcAAAAAAAAAAAAABAUyABDgAAAAAAAAAAAABoCiTAAQAAAAAAAAAAAABNgQQ4AAAAAAAAAAAAAKApkAAHgEns2bPHTjjhBPv4xz/e9Pvo3e9+t/us3s/mzZvthS98oV177bVWLBZtMbjgggsmfIbKH8/rXvc699Oo34M+x3wdy9/97ncXvCzz9dn085Of/GT8ef3be17LzDXty/la92w8+OCDrlzvec97jnht+/bt7rW/+7u/m/H69f7PfvazdjTVes7WUrY777zTLaffAAAAWDi33XabvepVr7JnPOMZds4557j4Y2BgYEnet1XGsqeccorbNz//+c9tMSiPyar9lN/LL0Q8Uc/3oOOwEWLH+SrLfH02/QwNDY0/f9VVV7nn5qtOoVHrK1SHps994403HvHa17/+dffat771rRmte6GuiXMZZ2s95fVzADDfgvO+BQDAovGRj3zEwuGwZTIZ+8UvfmEf+9jHzOfz2V/8xV/M2zYVJH3lK1+xiy66yE466aQZr+e9732vJZNJ9+/PfOYz7t/VkoBoXg8//LA7juShhx6yxejLX/6yrV27dvxz1OvpT3+6LV++3O64444jXvuf//kf9/u8886zRviu1Ejhz//8z62trW3KZS+99NI52+7xxx9v//iP/+h+z0XZAAAAUL9vf/vbdsUVV7h73le84hX229/+1v7zP//TDh48aF/60pea4r68XtrW5Zdf7v49MjJi/9//9//Z//k//8cly7Zs2TJv252Le9/Ozk53jy2qS3jf+95nZ5xxhkviS3d395yWGY1Jx9LZZ589/u/FZi7qpp73vOe5erRf/epX9kd/9EcTXtNzjRKP65xXg4xa6vp0bs9V0vr5z3++rV+/fs7KBgDTIQEOABj34he/2BKJhPv3H//xH9trXvMa+7d/+7d5T4B/7nOfszVr1swqAV5eMaGgRT3XX/ayl81RKbEYlAfZizHgluuuu87OOuusGVe0qcGKgu7rr7/edu7caccdd9z4a7fffrsFAgF7znOeYwtN34/O+5e//OXTVrQ9+9nPnrPtqvJtuutCPWUDAABA/XSvpaTuv/7rv44/F4lE7P/+3//r7sVmExc2yn15vdrb2yfcp77kJS9xibIvfOEL89pjei7ufePx+HjZ1RBdCfB169YRjy/RBLjqYh555BFbbOaibmrDhg0uwavYu1w+n7e77rrLrXfFihW20JRkVnlqqeuby3q1E0880f3MVdkAYDoMgQ4AmJRuTDUM3aFDh9hLaHjNkACfC16Lcq+FueRyORdE/t7v/Z4tW7ZsAUsHAACApUz3pQcOHHCjFlUmWf7qr/7KNeiEuXv21atX244dO9gdWBS8GFy9d4eHh22pUoN0XeM0BZln69atLsGu1wAARw8JcACYAxrmS8Nun3/++XbyySfbH/7hH9r3vve9qnPdaFnNK37uuefaaaedZm9605vsySefnLCsWstqCDLNh6YW51/72tfsHe94h0telc9zXD7Xzny0Cn/88cctFApNaAmuYObtb3+7a9l7+umn22tf+1r73//93yPee/PNN9vFF1/sPqPmdPubv/kb27Vr1/jrmgdM5b7wwgvdYw1X7s0ddTTmBNM+f8tb3uI+w7Oe9Sw3/Hs2m52wjFcWtWL/5Cc/6YZr0lB9lTQsnb5z9WLQ9/WJT3zCxsbGJiyjRgRqCa+AR3O6vehFL3K9CqrRkHca0l37WL0O9N3ruXL333+//eVf/qU7JrTcu971LhdkzcS+ffvssssus1NPPdXtC7V6nunxon2mnhvltP9Uzg9+8IMTWkD/+7//u5trXvtD++XKK68cH8a+XtoHe/futcHBQddoQ59Jx10lnT+vfOUr3bmlc1DzYVfut/L5q/SZ/v7v/94tW20er3L63tWiu3K56c4Z79qgnyeeeMJuuOGGqvPl1Urfoc7b8lbnv/nNb9y+rRxuTb3E9d3rPNXPm9/8ZvfcTBQKBddDxftOdW7/8z//s7vmebzP5U1PoGW85yabL6zWOcBrMdXcZLWULZ1Ou3Oy2rDsOq50jVCPBwAAAFQXDAbd/dV///d/249//OMJja8V01TrHajRjXSPqZhCPZXvueeeCa8rVlI8p3t2xWQaelvrn+y+Uvdr3/zmN93w6+ppPZv7cu89cz0vr+479+/f74YXL6dGrSqLYjd93re97W0uDipXS6w1k/vyuaTvUCPO6XNo2xryfSYxmeJuDdGsuhgdH4rLta7Ke3LVsbzxjW90MaJiwVe/+tX2y1/+smrZfve737nGGFruuc99btX4+Pvf/7699KUvdfU/2va//Mu/TIh76qHYUCPw6djVd3bLLbfMaD06T7TPHnjggQnPKy7U8//1X/81o/qJWijW9RLg3u/KeLyWeLFynmiVXd+FYjCdk5NJpVLuvFA5KnufT3fOzHXdVLUG6ZMNf65jUMeivnuNfKbrWGXdT630nb7//e93x6w+a2W9pHdO6UfXNu1P7/FUQ5zPZR3dZHOA11o21YNVq3NSPZD2oerRAKAcQ6ADwCzpJl4JJM2ZrWBcAdD/+3//zwVoClj/+q//esLySmYqwaT5vPRbyW0lBjXUuBfAveENb7COjg5XAfDYY4+5YFXBlX4/7WlPqzqn7VzMyaPEoQJtBQ8/+MEP3JzBCgZUSSEKEhSYdXV1ufIryfbd737X/uzP/sy+/vWvuwDRu3nV/GUKat75zne69WlYcn0uJcb9fr9LIClh1N/fbx/96EfdTb+SgzJX8wtNRi1v9bk0PJXKd+utt9pXv/pVN+9a5TBLo6Oj9id/8ifW19fngiUFEuWUGNfNt8qv/aDvVJ9V89hdc80148u99a1vtXvvvdde//rXu+38+te/tquuuspaW1vdceNREl77Sfvob//2b91yClrUA0ABqhfAaT0aOkvBm743Bat33323C/b1/dRKwZUSsppvTwl1bUfz3anFdr09hTXUl/bPj370Ixekem677TZ3DOgY9uh4V/CjoEyfV8fWF7/4Rbe/dTzUS8GOjrvyub91LpbPha1zVOeq5qN797vf7Y4DfXc6J6sF+9u2bXMJ4ZUrV7ogcqq5qjR8o+Yr1DlaPtdXLedM+TxY+uw6p2czX56mMTjzzDPd/tD1SceSlwwvb3Guih1VOun8VkMc0XevbatCcKp5sqv59Kc/bf/xH//hGr7oc6m3is4BfWbtd/HmBtRxrYYjqlzQtU7q3d5cq6VsGppTx6z2T29v7/j3o8+qVv1q6ECvJQAAgKnpnlkxgGJGNSDVvbFiBS/uLKd7Zt3Xer3Ddb+phsyK4aLRqIuf9f5HH33ULrnkEjf0thIoisPVQPUP/uAPjlin4irFu7o3Lr/Hn8l9ufee2d7LKmntjbym+M5riK1YzfPTn/7UxSdKlOleVUkfxTEqo2IwxZa1xloLeV+uBK3iMMUNKqPqRNTQXMeCYrhaYzLFzoqLFddoP2kZJRT1/WofKuHt7U91MAiHw+64ULykRLris5tuusnVC3iUePvTP/1Td2yoPkbLeUk7fdeiJO4//dM/uaSs9ps+z+c//3l78MEHXcMDxV/17AvVQfT09Lg6A8XhqqNQHFcvJZY/9KEPuWOhfD8
|
2025-09-16 09:59:38 +08:00
|
|
|
|
},
|
|
|
|
|
|
"metadata": {},
|
2025-11-07 16:26:00 +08:00
|
|
|
|
"output_type": "display_data",
|
|
|
|
|
|
"jetTransient": {
|
|
|
|
|
|
"display_id": null
|
|
|
|
|
|
}
|
2025-09-16 09:59:38 +08:00
|
|
|
|
}
|
|
|
|
|
|
],
|
2026-01-25 23:26:03 +08:00
|
|
|
|
"execution_count": 16
|
2025-09-16 09:59:38 +08:00
|
|
|
|
}
|
|
|
|
|
|
],
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
"kernelspec": {
|
|
|
|
|
|
"display_name": "Python 3",
|
|
|
|
|
|
"language": "python",
|
|
|
|
|
|
"name": "python3"
|
|
|
|
|
|
},
|
|
|
|
|
|
"language_info": {
|
|
|
|
|
|
"codemirror_mode": {
|
|
|
|
|
|
"name": "ipython",
|
|
|
|
|
|
"version": 2
|
|
|
|
|
|
},
|
|
|
|
|
|
"file_extension": ".py",
|
|
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
|
|
"name": "python",
|
|
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
|
|
"pygments_lexer": "ipython2",
|
|
|
|
|
|
"version": "2.7.6"
|
|
|
|
|
|
}
|
|
|
|
|
|
},
|
|
|
|
|
|
"nbformat": 4,
|
|
|
|
|
|
"nbformat_minor": 5
|
|
|
|
|
|
}
|