Files
NewQuant/data/ analysis/Volume.ipynb

1013 lines
1.2 MiB
Plaintext
Raw Normal View History

{
"cells": [
{
2025-07-10 15:07:31 +08:00
"cell_type": "code",
"execution_count": 8,
"id": "b93c7ca1",
"metadata": {
"ExecuteTime": {
"end_time": "2025-06-21T18:08:34.210364Z",
"start_time": "2025-06-21T18:08:33.967513Z"
}
},
2025-07-10 15:07:31 +08:00
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"import talib as ta # Make sure TA-Lib is installed: pip install TA-Lib\n",
"import statsmodels.api as sm\n",
"\n",
"import warnings\n",
"\n",
"# 忽略所有警告\n",
"warnings.filterwarnings(\"ignore\")\n",
"\n",
"# --- 0. Configure your file path ---\n",
"# Please replace 'your_futures_data.csv' with the actual path to your CSV file\n",
2025-07-10 15:07:31 +08:00
"file_path = '/mnt/d/PyProject/NewQuant/data/data/KQ_m@CZCE_MA/KQ_m@CZCE_MA_min60.csv'\n"
]
},
{
"cell_type": "code",
2025-07-10 15:07:31 +08:00
"execution_count": 9,
"id": "60a48bac",
"metadata": {
"ExecuteTime": {
"end_time": "2025-06-21T18:08:34.354678Z",
"start_time": "2025-06-21T18:08:34.306963Z"
}
},
2025-07-10 15:07:31 +08:00
"outputs": [],
"source": [
"\n",
"# --- 1. Data Loading and Preprocessing ---\n",
"def load_and_preprocess_data(file_path):\n",
" \"\"\"\n",
" Loads historical futures data and performs basic preprocessing.\n",
" Assumes data contains 'datetime', 'open', 'high', 'low', 'close', 'volume' columns.\n",
" \"\"\"\n",
" try:\n",
" df = pd.read_csv(file_path, parse_dates=['datetime'], index_col='datetime')\n",
" # Ensure data is sorted by time\n",
" df = df.sort_index()\n",
" # Check and handle missing values\n",
" initial_rows = len(df)\n",
" df.dropna(inplace=True)\n",
" if len(df) < initial_rows:\n",
" print(f\"Warning: Missing values found in data, deleted {initial_rows - len(df)} rows.\")\n",
"\n",
" # Check if necessary columns exist\n",
" required_columns = ['open', 'high', 'low', 'close', 'volume']\n",
" if not all(col in df.columns for col in required_columns):\n",
" raise ValueError(f\"CSV file is missing required columns. Please ensure it contains: {required_columns}\")\n",
"\n",
" print(f\"Successfully loaded {len(df)} rows of data.\")\n",
" print(\"First 5 rows of data:\")\n",
" print(df.head())\n",
" return df\n",
" except FileNotFoundError:\n",
" print(f\"Error: File '{file_path}' not found. Please check the path.\")\n",
" return None\n",
" except Exception as e:\n",
" print(f\"Error during data loading or preprocessing: {e}\")\n",
" return None\n",
"\n",
"# --- 2. Stationary Indicator Calculation Function ---\n",
"def calculate_stationary_indicators(df, volume_window=10, price_lag=5):\n",
" \"\"\"\n",
" Calculates stationary indicators based on volume and price.\n",
"\n",
" Parameters:\n",
" df (pd.DataFrame): K-line data containing 'close' and 'volume' columns.\n",
" volume_window (int): Window size for calculating volume indicators (e.g., 10 for the past 10 periods' average volume).\n",
" price_lag (int): Lag period for calculating future returns (e.g., 5 for future 5 periods' returns).\n",
" \"\"\"\n",
" df_processed = df.copy()\n",
"\n",
" # --- Stationary Volume Indicators ---\n",
" # 1. Volume Rate of Change (VROC)\n",
" df_processed['volume_roc'] = df_processed['volume'].pct_change(volume_window) * 100\n",
"\n",
" # 2. Volume to Moving Average Ratio\n",
" df_processed['volume_ma_ratio'] = df_processed['volume'] / df_processed['volume'].rolling(window=volume_window).mean()\n",
"\n",
" # 3. Normalized Volume (Z-score standardization)\n",
" # Using rolling mean and rolling standard deviation to avoid look-ahead bias and ensure local stationarity\n",
" rolling_mean_vol = df_processed['volume'].rolling(window=volume_window).mean()\n",
" rolling_std_vol = df_processed['volume'].rolling(window=volume_window).std()\n",
" # Avoid division by zero\n",
" df_processed['volume_normalized_zscore'] = (df_processed['volume'] - rolling_mean_vol) / rolling_std_vol.replace(0, np.nan)\n",
"\n",
" # --- Stationary Price Indicators ---\n",
" # 1. Current Period Log Return\n",
" df_processed['log_return'] = np.log(df_processed['close'] / df_processed['close'].shift(1))\n",
"\n",
" # 2. Future N-period Log Return (Our target variable for research)\n",
" # shift(-price_lag) moves future data up to align with the current row for future return calculation\n",
" df_processed['future_log_return'] = np.log(df_processed['close'].shift(-price_lag) / df_processed['close'])\n",
"\n",
" # 3. MACD Histogram Difference (Measures momentum change rate, potentially capturing trend initiation)\n",
" try:\n",
" macd, macdsignal, macdhist = ta.MACD(df_processed['close'], fastperiod=12, slowperiod=26, signalperiod=9)\n",
" df_processed['macd_hist_diff'] = macdhist.diff(1)\n",
" except Exception as e:\n",
" print(f\"TA-Lib MACD calculation failed, possibly due to installation or data issues: {e}. 'macd_hist_diff' will contain NaN.\")\n",
" df_processed['macd_hist_diff'] = np.nan\n",
"\n",
" # Drop rows with NaN values resulting from rolling windows and shift operations\n",
" df_processed.dropna(inplace=True)\n",
" if df_processed.empty:\n",
" print(\"Warning: Data is empty after indicator calculation. Check original data volume or adjust window parameters.\")\n",
" else:\n",
" print(f\"Indicators calculated. {len(df_processed)} rows of data remaining for analysis.\")\n",
" return df_processed\n",
"\n",
"# --- 3. Analysis and Visualization Function ---\n",
"def analyze_and_visualize(processed_df):\n",
" if processed_df.empty:\n",
" print(\"No data available for analysis. Please check data loading and indicator calculation steps.\")\n",
" return\n",
"\n",
" print(\"\\n--- Statistical Description of Indicators ---\")\n",
" print(processed_df[['volume_roc', 'volume_ma_ratio', 'volume_normalized_zscore',\n",
" 'log_return', 'future_log_return']].describe())\n",
"\n",
" # --- Correlation Analysis ---\n",
" print(\"\\n--- Correlation between Volume Indicators and Future Returns ---\")\n",
" volume_indicators = ['volume_roc', 'volume_ma_ratio', 'volume_normalized_zscore']\n",
" for indicator in volume_indicators:\n",
" if indicator in processed_df.columns and 'future_log_return' in processed_df.columns:\n",
" correlation = processed_df[indicator].corr(processed_df['future_log_return'])\n",
" print(f\"Correlation between '{indicator}' and 'future_log_return': {correlation:.4f}\")\n",
" else:\n",
" print(f\"Column '{indicator}' or 'future_log_return' does not exist. Skipping correlation calculation.\")\n",
"\n",
" # Plot correlation heatmap\n",
" plt.figure(figsize=(9, 7))\n",
" sns.heatmap(processed_df[volume_indicators + ['future_log_return']].corr(), annot=True, cmap='coolwarm', fmt=\".2f\")\n",
" plt.title('Correlation Matrix: Volume Indicators vs. Future Log Returns', fontsize=16)\n",
" plt.show()\n",
"\n",
" # --- Conditional Analysis: Future Returns based on Volume Anomaly ---\n",
" # Define thresholds for abnormal volume (using quantiles of Z-score to adapt dynamically)\n",
" if 'volume_normalized_zscore' in processed_df.columns:\n",
" low_vol_threshold = processed_df['volume_normalized_zscore'].quantile(0.2)\n",
" high_vol_threshold = processed_df['volume_normalized_zscore'].quantile(0.8)\n",
"\n",
" def categorize_volume(zscore):\n",
" if zscore <= low_vol_threshold:\n",
" return 'Low Volume'\n",
" elif zscore >= high_vol_threshold:\n",
" return 'High Volume'\n",
" else:\n",
" return 'Normal Volume'\n",
"\n",
" processed_df['volume_category'] = processed_df['volume_normalized_zscore'].apply(categorize_volume)\n",
"\n",
" print(\"\\n--- Statistics of Future Log Returns by Volume Category ---\")\n",
" print(processed_df.groupby('volume_category')['future_log_return'].describe())\n",
"\n",
" # Plot box plot of future returns by volume category\n",
" plt.figure(figsize=(10, 6))\n",
" sns.boxplot(x='volume_category', y='future_log_return', data=processed_df, order=['Low Volume', 'Normal Volume', 'High Volume'], palette='viridis')\n",
" plt.title('Distribution of Future Log Returns by Volume Category', fontsize=16)\n",
" plt.xlabel('Volume Category', fontsize=12)\n",
" plt.ylabel('Future Log Return', fontsize=12)\n",
" plt.grid(True, linestyle='--', alpha=0.7)\n",
" plt.show()\n",
"\n",
" # Plot histogram of future returns, categorized by volume\n",
" plt.figure(figsize=(12, 7))\n",
" sns.histplot(data=processed_df, x='future_log_return', hue='volume_category', kde=True, bins=70,\n",
" palette={'Low Volume': 'red', 'Normal Volume': 'blue', 'High Volume': 'green'},\n",
" alpha=0.6, line_kws={'linewidth':2})\n",
" plt.title('Distribution of Future Log Returns by Volume Category', fontsize=16)\n",
" plt.xlabel('Future Log Return', fontsize=12)\n",
" plt.ylabel('Frequency', fontsize=12)\n",
" plt.grid(True, linestyle='--', alpha=0.7)\n",
" plt.show()\n",
" else:\n",
" print(\"Column 'volume_normalized_zscore' not found. Skipping volume category analysis.\")\n",
"\n",
"\n",
" # --- Price Chart with Indicator Overlay (Simplified to line plot; consider mplfinance for OHLC charts) ---\n",
" print(\"\\n--- Price Chart with Volume Indicator Overlay ---\")\n",
" # Select a segment of data for visualization, ensuring sufficient data points\n",
" if len(processed_df) > 100: # Need at least 100 data points to select a segment\n",
" sample_size = min(200, len(processed_df) // 2) # Show max 200 data points or half of data\n",
" plot_df = processed_df.sample(n=sample_size, random_state=42).sort_index() # Randomly sample and sort to maintain time continuity\n",
" else:\n",
" plot_df = processed_df.copy() # If data volume is small, plot all\n",
"\n",
" if not plot_df.empty:\n",
" fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(15, 12), sharex=True, gridspec_kw={'height_ratios': [3, 1, 1]})\n",
"\n",
" # Subplot 1: Price Trend\n",
" ax1.plot(plot_df.index, plot_df['close'], label='Close Price', color='blue', linewidth=1.5)\n",
" ax1.set_title(f'Futures Price Trend, Normalized Volume, and Future Returns (Sample Period: {plot_df.index.min().strftime(\"%Y-%m-%d %H:%M\")} to {plot_df.index.max().strftime(\"%Y-%m-%d %H:%M\")})', fontsize=16)\n",
" ax1.set_ylabel('Price', fontsize=12)\n",
" ax1.grid(True, linestyle='--', alpha=0.6)\n",
" ax1.legend()\n",
"\n",
" # Subplot 2: Normalized Volume Indicator\n",
" ax2.bar(plot_df.index, plot_df['volume_normalized_zscore'], color='grey', alpha=0.7, label='Normalized Volume (Z-score)')\n",
" if 'volume_normalized_zscore' in processed_df.columns:\n",
" ax2.axhline(high_vol_threshold, color='green', linestyle='--', linewidth=0.8, label=f'High Vol Threshold ({high_vol_threshold:.2f})')\n",
" ax2.axhline(low_vol_threshold, color='red', linestyle='--', linewidth=0.8, label=f'Low Vol Threshold ({low_vol_threshold:.2f})')\n",
" ax2.set_ylabel('Normalized Volume', fontsize=12)\n",
" ax2.grid(True, linestyle='--', alpha=0.6)\n",
" ax2.legend()\n",
"\n",
" # Subplot 3: Future Log Return\n",
" ax3.plot(plot_df.index, plot_df['future_log_return'], label='Future Log Return', color='purple', linewidth=1.5)\n",
" ax3.axhline(0, color='black', linestyle='--', linewidth=0.8) # Zero return line\n",
" ax3.set_ylabel('Future Log Return', fontsize=12)\n",
" ax3.set_xlabel('Time', fontsize=12)\n",
" ax3.grid(True, linestyle='--', alpha=0.6)\n",
" ax3.legend()\n",
"\n",
" plt.tight_layout()\n",
" plt.show()\n",
" else:\n",
" print(\"Selected plot time range has no data. Adjust time range or check data volume.\")\n",
"\n"
2025-07-10 15:07:31 +08:00
]
},
{
"cell_type": "code",
2025-07-10 15:07:31 +08:00
"execution_count": 10,
"id": "9ab3d054",
"metadata": {
"ExecuteTime": {
"end_time": "2025-06-21T18:08:35.089167Z",
"start_time": "2025-06-21T18:08:34.428402Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2025-07-10 15:07:31 +08:00
"Successfully loaded 5811 rows of data.\n",
"First 5 rows of data:\n",
2025-07-10 15:07:31 +08:00
" open high low close volume open_oi \\\n",
"datetime \n",
"2021-12-31 14:00:00 2499.0 2509.0 2476.0 2478.0 198556.0 974005.0 \n",
"2022-01-04 09:00:00 2516.0 2584.0 2513.0 2567.0 669046.0 988671.0 \n",
"2022-01-04 10:00:00 2567.0 2597.0 2567.0 2595.0 155426.0 828049.0 \n",
"2022-01-04 11:00:00 2595.0 2600.0 2585.0 2586.0 119967.0 843256.0 \n",
"2022-01-04 13:00:00 2587.0 2607.0 2586.0 2592.0 127710.0 841771.0 \n",
"\n",
2025-07-10 15:07:31 +08:00
" close_oi underlying_symbol \n",
"datetime \n",
"2021-12-31 14:00:00 988671.0 CZCE.MA205 \n",
"2022-01-04 09:00:00 828049.0 CZCE.MA205 \n",
"2022-01-04 10:00:00 843256.0 CZCE.MA205 \n",
"2022-01-04 11:00:00 841771.0 CZCE.MA205 \n",
"2022-01-04 13:00:00 862642.0 CZCE.MA205 \n",
"Indicators calculated. 5772 rows of data remaining for analysis.\n",
"\n",
"--- Statistical Description of Indicators ---\n",
" volume_roc volume_ma_ratio volume_normalized_zscore log_return \\\n",
2025-07-10 15:07:31 +08:00
"count 5772.000000 5772.000000 5772.000000 5772.000000 \n",
"mean inf 0.994263 -0.015035 -0.000006 \n",
"std NaN 0.502843 0.918518 0.005649 \n",
"min -100.000000 0.000000 -2.236018 -0.056908 \n",
"25% -48.237935 0.614125 -0.735016 -0.002677 \n",
"50% -1.175509 0.909233 -0.176304 0.000000 \n",
"75% 83.348551 1.272413 0.574619 0.002653 \n",
"max inf 4.738222 2.744064 0.049518 \n",
"\n",
" future_log_return \n",
2025-07-10 15:07:31 +08:00
"count 5772.000000 \n",
"mean -0.000012 \n",
"std 0.012453 \n",
"min -0.079841 \n",
"25% -0.007286 \n",
"50% 0.000000 \n",
"75% 0.007021 \n",
"max 0.071571 \n",
"\n",
"--- Correlation between Volume Indicators and Future Returns ---\n",
"Correlation between 'volume_roc' and 'future_log_return': nan\n",
2025-07-10 15:07:31 +08:00
"Correlation between 'volume_ma_ratio' and 'future_log_return': -0.0142\n",
"Correlation between 'volume_normalized_zscore' and 'future_log_return': -0.0159\n"
]
},
{
"data": {
2025-07-10 15:07:31 +08:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1oAAAMHCAYAAAAgsia3AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQAAszRJREFUeJzs3Xd8jef/x/F3diTIJPZuYiQ2MatCUS01qlVqFC1K1bdaq6pqU2qVtmoUrVmilFbNkpbUnkGLVOxMK4ms8/vDL4cjCQnnJMbr+XicB7nPdd33dd3nPuO6P9ewMhgMBgEAAAAAzMY6pwsAAAAAAE8bGloAAAAAYGY0tAAAAADAzGhoAQAAAICZ0dACAAAAADOjoQUAAAAAZkZDCwAAAADMjIYWAAAAAJgZDS0AAAAAMDMaWrCYP//8U0OGDFHTpk1VtWpV+fr6ql69enr77bf1/fffKyoqKqeL+MhmzJghHx8fzZgxI9uOGRAQIB8fH507dy7bjplVnTp1ko+Pj3x8fNS7d+/7pv3111+NaX18fHTp0qVsKmXmpJYrp3Ts2FE+Pj6aNGlSptKPHj1aPj4+eueddx76mE/CNWYJ586dM77e2VX3jD5DVq1aJR8fHw0ePDhbyoGccfdnX0aPTZs25XQxHzup5yY4ODini5Ild3/G3P2oVKmSmjRpoqFDh+rEiRM5XUyYEQ0tmF1UVJTefvttdevWTatWrVJiYqL8/f3VtGlTlSpVSvv379e4cePUqFEjHTx4MKeL+1gZPHiwfHx8tGrVqpwuitls375dERERGT7/008/WeS4Od1AMpfXXntNkrR69WolJyffN21CQoLWrl1rkg94GMHBwfLx8VGnTp1yuijPhHr16ql169bpPgoWLPhI+6bR/nhq2rSp8TWuXr26oqOjtXLlSrVp00a///67WY6R2rALCAgwy/6QdbY5XQA8Xa5fv64OHTrozJkzKlWqlEaNGqXq1aubpElISFBgYKBmzJih8PDwHCrpk+v7779XYmKivLy8crooD+Tr66sjR45o9erV6tGjR5rnL168qL/++kt+fn46fPhwDpTwwdavX5+jx2/WrJlGjx6t8PBwbd++XQ0bNsww7ebNmxUTEyN3d3e+WJ9wL774oipVqqQ8efLkdFGQDd599135+/vndDGQjQYOHKgiRYoY/46Ojlbv3r21f/9+DR8+XM8//7wcHR1zsIQwByJaMKtRo0bpzJkzKly4sJYsWZKmkSVJ9vb2euONN7R69WqVKlUqB0r5ZCtWrJhKly4tOzu7nC7KA7Vs2VJ2dnYZRuhWrVqllJQUtW3bNptLlnmlS5dW6dKlc+z4uXLl0ssvvyxJD4x0pj6fet7x5MqTJ49Kly6t/Pnz53RRAGQDNzc3DRw4UNLtRtf+/ftzuEQwBxpaMJuwsDD98ssvkqQhQ4bI1dX1vuk9PT3TbWitW7dOXbp0Uc2aNeXr66uGDRtqyJAhOnPmTLr7uXs8yaZNm9S5c2fVrFnTpP/23d3IVq5cqTfeeEPVqlVLMxbj8uXLGjdunF566SVVqlRJVapUUdu2bfXDDz8oKSkp0+ciMTFRP//8swYMGKBmzZqpatWqqlixopo2barRo0fr8uXLJulTw/uBgYHG83d3/+27x2/cb/xMXFycZs+erdatW6tKlSqqVKmSXn75ZU2ZMkVXr15Nk/7ubgUGg0HLli1TmzZtVLlyZVWrVk3dunV7pA97V1dXBQQE6NSpU2n2YzAYFBgYKEdHR73yyisZ7uP8+fOaPXu2OnfurBdeeEG+vr6qXr263nzzTS1dulQpKSkm6VPHvKS6ty986nm7uztNTEyMxowZo8aNG8vX19eku1R6XRDnzZsnHx8fNW3aVDdu3EhT5uXLl8vHx0cNGjQwy1jE1G6AW7duzXB/ly9f1p9//mmSXpKSkpK0ZMkStW/fXtWqVZOfn5+aNGmS7nX4IA8au5VR19e7t58+fVr9+/dX7dq1VblyZbVt29ZkDMrBgwfVq1cv1apVSxUrVtQbb7yhnTt3Zlim+Ph4zZs3T6+//rqqV68uPz8/NW3aVBMnTlR0dHSW6nc/qeMOg4ODFRISor59+8rf31++vr5q3ry55s2bJ4PBkGEZZ8yYoSZNmhjHqg4aNEgXLlzI8HgP6u51+fJlTZgwQS1atFCVKlVUuXJlNW3aVIMHD9a+fftM0h46dEgTJ07Ua6+9prp168rX11d16tRRr1699Ndff6Vb186dO0uS/v77b5P3z72R0oe5vjLzeXz9+nVNmTJFLVq0UOXKlY3nrX379po2bZoSExMzPHepTp06JR8fH9WoUUO3bt3KMF2bNm3SjIW6cuWKRo8eraZNm8rPz0+VKlVSgwYN1KVLF82dO/eBx7aEu6/B9KQ33i8gIEBDhgyRJAUGBpq8lnd/zpnjvX3y5En1799f9erVU7ly5UzKkZSUpBUrVqhTp07G7/aAgAB99tlnunjx4kOfk6w4deqUhgwZooYNG8rX11c1a9ZUly5d7ttrISkpSfPmzdMrr7wiPz8/1a5dW/369dO///5rkS6Zd3/XREZGpluezJ7HwYMHq1GjRpJuf4/e+114d7r7DVnIqJ6Z+Q69+5qMiorS559/rgYNGsjX11cNGjTQqFGjdO3atXSP++uvv6pr167y9/dXhQoV5O/vr+bNm2vYsGE6fvx4Js7m44GugzCbrVu3Kjk5WXnz5n2obksGg0GDBw/W6tWrZWtrq+rVq8vDw0NHjx7VqlWr9Ouvv2r69Ol6/vnn080/f/58/fDDD/L19VX9+vV15coV2djYmKQZNWqUFi9erCpVquiFF15QWFiYrKysJEm7d+9Wnz59dPXqVRUuXFh16tRRQkKCDh8+rFGjRmnr1q365ptvMhUpiIyM1MCBA413pX18fBQXF6eQkBAtWrRI69at09KlS1W8eHFJkpOTk1q3bq29e/fq7Nmzqlq1qvE5SSpXrtwDjxkTE6OuXbsqJCREuXPnVq1atWRnZ6e///5b33zzjX755RctWLDApKvC3YYMGaJffvlF1apV0wsvvKCQkBD9+eef2r17t3744QdVqlTpgWVIT9u2bbVhwwatXLlSVapUMW7ftWuXwsLC1KJFi/t2j/r55581bdo0FSlSRCVKlFDVqlUVHh6u/fv3a9++ffrzzz81ffp04+tYrlw5tW7d2thobd26tcn+nJycTP6Ojo5W27Ztdf36dVWrVk0VKlR44GvcrVs37d69W1u2bNHw4cP15ZdfGp87fvy4Ro8eLVtbW02ZMkXu7u7G586dO2f84tu8eXOGr8W9KlasKG9vb508eVJr1qxR165d06QJDAxUcnKyKlWqpOeee07S7W66PXv21F9//SUHBwf5+/srd+7c2r9/vxYtWqRffvlFc+fOVYUKFTJVjkd17NgxjRo1Sl5eXqpdu7YuXLig/fv3q2/fvpo6dapsbW3Vv39/Pffcc6pdu7ZOnz6tAwcOqEePHlqwYEGaCPnly5fVo0cPnTx5Uq6urvLz85Ozs7OOHTumuXPn6rffftOiRYtUuHBhs9UhKChI8+fPV7FixVS3bl2Fh4dr7969mjBhgi5evKhPPvnEJH1cXJy6du2qAwcOyMnJSfXq1ZODg4OCgoK0bds2vfDCC1kuw86dO9WvXz9du3ZNHh4eql27tuzs7HT+/Hnjza6qVasa03/55ZcKDg5WmTJlVKFCBeXKlUthYWHaunWrtm7dqqFDh6pLly7G9PXr15e9vb2CgoLk6emp+vXrG59zc3Mz/v9Rr6+MPo/j4uLUoUMHnTx5Uu7u7qpVq5acnJwUHh6uM2fOaNasWXr77bcf+D4tXbq0qlSpov3792vTpk3GyPDdTpw4oaNHj8rT09P4WoSHh6tt27a6cuWKChUqpPr168vBwUFXrlzR8ePHdfToUXXv3v3BL9RjoGnTpjpw4ID27dunYsWKqVq1asbnzNmjZP/+/frss8+UL18+Va9eXfHx8XJ2dpYk3bhxQ71799bff/8tJycn+fr6ys3NTSdPntTSpUv122+/af78+SpfvrzZynOvbdu
"text/plain": [
"<Figure size 900x700 with 2 Axes>"
2025-07-10 15:07:31 +08:00
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"--- Statistics of Future Log Returns by Volume Category ---\n",
" count mean std min 25% 50% \\\n",
"volume_category \n",
2025-07-10 15:07:31 +08:00
"High Volume 1155.0 -0.000148 0.011627 -0.047952 -0.007024 -0.000368 \n",
"Low Volume 1155.0 0.000082 0.012478 -0.079841 -0.007213 0.000395 \n",
"Normal Volume 3462.0 0.000003 0.012711 -0.062969 -0.007457 0.000378 \n",
"\n",
" 75% max \n",
"volume_category \n",
2025-07-10 15:07:31 +08:00
"High Volume 0.006456 0.055263 \n",
"Low Volume 0.007588 0.051147 \n",
"Normal Volume 0.007031 0.071571 \n"
]
},
{
"data": {
2025-07-10 15:07:31 +08:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA2EAAAIkCAYAAACX7iNUAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQAAuZRJREFUeJzs3XlYVNX/B/D3zLAIKQIiyIBkQCAKiuCSShpqLmWLmqYmaOVWplJJot9yyUoM64eatrmhuYtm9dUsk0xEDQUUFDVxZxGQ1UCBmfv7w2ful2FAh2EZZni/nqcnOffM8Dnc4XA/955FIgiCACIiIiIiImoUUn0HQERERERE1JwwCSMiIiIiImpETMKIiIiIiIgaEZMwIiIiIiKiRsQkjIiIiIiIqBExCSMiIiIiImpETMKIiIiIiIgaEZMwIiIiIiKiRsQkjIiIiIiIqBExCaMmacCAAfD09BT/69ixI7p164Z+/fohKCgIy5Ytw9mzZx/6HkFBQfD09MTJkycbKeqHU7Xp1q1bauVNLU4ACAsLg6enJ/bs2aPvUBrE4cOHMX78ePj5+YmfMW1+/lU/l9X9t3HjxoZvgIFR/dwM8fNU3Tnu0qULBgwYgHfffRenTp3Sd4hGZdWqVfD09MSqVav0FsNrr70GT09PLF++XKv6n3zyCTw9PTFlyhSdv2dNfx+ak/LyckRHR+Ptt9/GM888gy5duqBr164YOHAgZs2ahZ9++gllZWX6DpOo3pjoOwCih/Hz88Pjjz8OALh37x7y8/ORmpqKv//+G+vXr0fPnj3x2WefoX379g0Ww4ABA5Ceno4//vgDzs7ODfZ9GsuePXswb948jBgxAuHh4foOp9GlpqZi1qxZUCqVeOqpp9C2bVtIJBLY2dlp/R6VP5dVubu71ym+kydPIjg4GD179sTmzZvr9F5UfwICAtC2bVsAQH5+PlJSUrB//34cOHAA8+bNw8SJE+vl+3h6egIALl68WC/vR7X3yiuv4NSpU/jxxx/x7rvvQiaT1Vi3rKwMP//8s/g60s25c+cwa9Ys3Lp1CxKJBB07dkSXLl0gkUiQnp6OQ4cO4eDBg4iMjMR///tfWFhY1On78feMmgImYdSkjR49GiNHjlQrEwQBf/31Fz777DP8/fffGDt2LLZv366RiC1btgylpaWQy+WNGXKNNm7ciPLycjg4OOg7lEd67733MGXKFNjb2+s7lHp36NAhlJeXY/r06Xj33Xd1eo/qPpdk3KZOnYpevXqJX5eWluKDDz7Ab7/9hoiICAwdOtQgfrfp0YYOHYpPPvkEOTk5+OuvvxAYGFhj3T/++AMFBQWwtbXFgAEDGjFK43Hu3Dm89tprKC0tRWBgIP7zn/9o/D3Py8vDxo0bsX79epSXl9c5CSNqCjgckQyORCJB//79sWvXLnTo0AG5ubn48MMPNerJ5XK4ubk1mc7axcUFbm5uMDU11Xcoj2Rvbw83Nze0atVK36HUu4yMDACo8UkWkTYsLCzEfqe8vBxHjx7Vc0RUXywsLPD8888DwCOH0KqOv/jiiwbRtzc15eXlmD17NkpLSzFo0CCsWbOm2pEttra2eO+997B161aYmZnpIVKi+sckjAyWlZUV5s+fDwA4ceIEUlJS1I7XNNeqrKwMa9euxciRI9GtWzd4e3ujb9++GDVqFD7//HMUFBQAePDH1dPTE+np6QCAgQMHqs0LUb3vyZMn4enpiaCgIJSWlmLFihUYNmwYunbtqnZnVJsx/3///TfeeOMN9OzZE127dsUrr7yCH3/8sdq6j5pLVt3cigEDBmDevHkAgL1796q1JygoSKz3qDlh//3vfzFx4kT07NkT3t7eCAwMxLx583D16tVq61du+4kTJ/DGG2+gR48e6NKlC0aMGFFjGx+loqIC27Ztw9ixY+Hv7w8fHx8MHjwYn3zyCW7fvl3tz0PVpnnz5lXb9vryqJ+h6vMVFhYmlgUFBSE4OBjAg89C5fNT+bOky7mvWp6RkYH58+ejf//+6Ny5s1ocAPDrr7/izTffxFNPPQVvb288/fTTmDNnDi5fvqzTz6O2srKysGTJEgwePBg+Pj7w9/cXn3orFIpqXyMIAnbv3o2RI0eia9eu6NWrFyZPnoyEhAS139P64uDgAGtrawDAnTt3qq2j7c9RdW5Uqs5DU/Ubj5ozVVM7temnKn8mS0pK8MUXX+DZZ58V+8i5c+dq/F6pxMXFYfr06ejTpw86d+6MHj16YPDgwZgzZw7i4+Mf/cOsRnp6Oj744AMEBATAx8cHQ4YMwapVq3Dv3j21eitXroSnpycWLFhQ43udPXsWnp6eePrpp1FRUfHI760aWhgTE4O8vLxq69y+fRvHjh1Tqw/Url96lEf93aipn6lcfuXKFYSEhKB3797w9fXFqFGjcOjQIbHumTNnMH36dDz11FPo0qULXn31VRw/frzGmO7du4f169djzJgx6N69u3huPv/8c+Tn59eqfb/88gtu3rwJU1NTLFq0CFLpwy9Lu3TpghYtWohfp6en47vvvkNwcDCeeeYZeHt7o3v37hg3bhy2b98OpVKp9nptf89Url69igULFmDQoEFiP/Taa69h3759NcaYn5+PTz75RIwnMDAQn376KYqKih76d0GXz40qbgCIjo7Gq6++Cn9/f3h6euLmzZvidUtiYmKN8S5atAienp74/PPPa6xDDYPDEcmg9evXD9bW1igoKEBcXBy8vb0fWl+pVGLq1Kk4fvw4WrZsie7du8PKygp5eXm4fv061q1bhxdeeAHW1tZwcXHBiBEjcPDgQZSUlGDIkCGwtLQU36vqHKL79+8jKCgIaWlp6N69Ozp27CgmdNr4/fffsWXLFri6uiIgIADZ2dk4ffo05s6diwsXLmhcJOtiyJAhSEpKQkJCAlxcXODv7y8ec3V1feTrBUFAWFgYfvzxR5iYmKB79+5o06YNzp07hz179uDAgQNYuXIl+vXrV+3ro6Oj8fXXX6NTp054+umnkZ6ejqSkJMydOxcFBQWYNGmS1m0pKyvDtGnTEBcXB3Nzc/Tq1QstW7ZEYmIiNm/ejF9++QXr1q1D586dAQBeXl4YMWIETp8+jRs3bqjN69Km7Y3h6aefhpmZGWJjY2FnZ4enn35aPGZjY1Nv3+fatWsYMWIETE1N4efnB0EQxPevqKjAnDlzcODAAZiZmaFz585wcHDAtWvX8PPPP+P333/HqlWrajzH9eHs2bOYMmUKCgoKIJfLMWjQIBQXF+Pvv/9GYmIifv/9d3z99dcad8QXL16Mbdu2QSqVonv37mjbti0uXbqECRMm1NucrcqUSiVKSkoAAG3atFE7Vtufo+rzuXfvXgDAiBEj1N6vct9TF9r0U8XFxRg7diwyMzPh7++PJ598EklJSfjxxx8RHx+Pffv2qT0l37t3r3hzp0uXLujVqxfu3buH27dvY//+/bCxsUGPHj1qFeetW7cwcuRIsZ+5f/8+Tp48ia+++gpxcXHYuHEjzM3NAQDjxo3Dd999h59//hlz5syBlZWVxvtt2bIFAPDqq6/CxOTRlz5dunSBh4cHLl26hJ9++qnavmnv3r1QKBTo2rUrnnzySQC175ca2vnz57FkyRI4ODigd+/eyMjIQGJiIt555x1ERkbCxMQEISEhePLJJ9G7d29cuXIFSUlJmDx5MqKiotC9e3e197t9+zYmT56MS5cuwdraGj4+Pnjsscdw/vx5rFu3Dr/++is2b94MJycnreL7448/AKjPuayNffv2YcWKFXB2dkaHDh3g5+eHnJwcJCYmIiEhAceOHcPKlSshkUgA1O737MCBA5g7dy7u378PV1dX9O/fH8XFxTh79iw++OADnDhxAkuXLlV7fXZ2Nl577TXcuHED1tbWCAwMhFKpxL59+3D06FG4ublV2466fm6WLFmCrVu3olu3bnjmmWdw8+ZNSKVSTJgwAeHh4fjhhx/QrVs3jdfdvXsX+/btg1Qqxfjx47X/wVP9EIiaoMDAQMHDw0OIjo5+ZN1JkyYJHh4ewpw5c9TKJ0yYIHh4eAgnTpwQy/7++2/Bw8N
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
2025-07-10 15:07:31 +08:00
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
2025-07-10 15:07:31 +08:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA+wAAAJxCAYAAADYVjihAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs3XlcVFXjBvDnDvsiAgIqICoYuICKG6KoueRSmmury1tZWmm2aS6V2Wa2vqYtvplamZYl4FJZaZlLue8LaoqKgAqIyDLsc39/zG8GRmbYZmDmXJ7v59On6507954zzxxmztxzz5VkWZZBRERERERERDZFZe0CEBEREREREVFF7LATERERERER2SB22ImIiIiIiIhsEDvsRERERERERDaIHXYiIiIiIiIiG8QOOxEREREREZENYoediIiIiIiIyAaxw05ERERERERkg9hhJyIiIiIiIrJB7LAT2YgBAwYgLCxM/1/btm0RGRmJvn37YuLEiXj33Xdx/PjxSvcxceJEhIWFYd++ffVU6srp6pScnGyw3tbKCQBz5sxBWFgY4uLirF2UOvHnn3/i4YcfRpcuXfTvseq8/re/L43999VXX9V9BQSje91EfD8Zy7hjx44YMGAAnn/+eRw8eNDaRVSUpUuXIiwsDEuXLrVaGcaPH4+wsDB88MEH1dr+rbfeQlhYGJ544olaH9PU50NDUlxcjNjYWDz99NO488470bFjR3Tq1AkDBw7EjBkzsGnTJhQVFVm7mERkZfbWLgARGerSpQtatmwJACgoKMDNmzeRkJCA/fv3Y+XKlejRowcWLlyIFi1a1FkZBgwYgJSUFPzxxx8IDAyss+PUl7i4OMydOxejR4/GokWLrF2cepeQkIAZM2ZAo9GgZ8+e8PX1hSRJ8PHxqfY+yr8vb9emTRuzyrdv3z5MmjQJPXr0wOrVq83aF1lOTEwMfH19AQA3b97EyZMn8csvv2DLli2YO3cu/vOf/1jkOGFhYQCAs2fPWmR/VHPjxo3DwYMHsWHDBjz//POws7MzuW1RURE2b96sfx7VzqlTpzBjxgwkJydDkiS0bdsWHTt2hCRJSElJwbZt2/Dbb79h8eLF+Pnnn+Hi4mLW8djOiMTFDjuRjbnvvvswZswYg3WyLGPnzp1YuHAh9u/fjwcffBDff/99hU77u+++i/z8fPj7+9dnkU366quvUFxcjKZNm1q7KFV64YUX8MQTT8DPz8/aRbG4bdu2obi4GE8++SSef/75Wu3D2PuSlG3KlCmIiorS/zs/Px8vvfQSfv/9d7z//vsYOnSoEG2bqjZ06FC89dZbSE9Px86dO9G/f3+T2/7xxx/IysqCt7c3BgwYUI+lVI5Tp05h/PjxyM/PR//+/fHyyy9X+DzPzMzEV199hZUrV6K4uNjsDjsRiYtD4okEIEkS+vXrhx9//BGtWrVCRkYGXnnllQrb+fv7IyQkxGY+2IOCghASEgIHBwdrF6VKfn5+CAkJQaNGjaxdFItLTU0FAJNnyImqw8XFRf93p7i4GLt27bJyichSXFxccM899wBAlZdx6B6/9957hfjbbmuKi4vx7LPPIj8/H4MGDcJnn31mdMSct7c3XnjhBaxduxaOjo5WKCkR2Qp22IkE4uHhgXnz5gEA9u7di5MnTxo8bura8KKiInz55ZcYM2YMIiMjER4ejt69e2Ps2LF47733kJWVBUD7RSwsLAwpKSkAgIEDBxpcx6rb7759+xAWFoaJEyciPz8fH3/8MYYNG4ZOnToZnHGpzjWK+/fvx2OPPYYePXqgU6dOGDduHDZs2GB026qufTd2LeiAAQMwd+5cAEB8fLxBfSZOnKjfrqpr2H/++Wf85z//QY8ePRAeHo7+/ftj7ty5uHjxotHty9d97969eOyxx9C9e3d07NgRo0ePNlnHqpSUlOC7777Dgw8+iK5duyIiIgKDBw/GW2+9hevXrxt9PXR1mjt3rtG6W0pVr6Hu/TVnzhz9uokTJ2LSpEkAtO+F8vmUfy/VJvvb16empmLevHno168fOnToYFAOAPj1118xefJk9OzZE+Hh4ejTpw9mzpyJ8+fP1+r1qKlr167hzTffxODBgxEREYGuXbvqR9OUlpYafY4sy1i/fj3GjBmDTp06ISoqCo8//jgOHz5s0E4tpWnTpvD09AQA3Lhxw+g21X0dddno3H7dvO7vRlXXeJuqZ3X+TpV/T6rVanz44Ye466679H8jZ8+eXaFd6fzzzz948skn0atXL3To0AHdu3fH4MGDMXPmTBw4cKDqF9OIlJQUvPTSS4iJiUFERASGDBmCpUuXoqCgwGC7JUuWICwsDPPnzze5r+PHjyMsLAx9+vRBSUlJlcfWDW/fvn07MjMzjW5z/fp1/P333wbbAzX7u1SVqj43TP2dKb8+MTERzz33HKKjo9G5c2eMHTsW27Zt02977NgxPPnkk+jZsyc6duyIBx54AHv27DFZpoKCAqxcuRL3338/unXrps/mvffew82bN2tUv59++glXrlyBg4MDFixYAJWq8q/iHTt2hLOzs/7fKSkp+OKLLzBp0iTceeedCA8PR7du3fDQQw/h+++/h0ajMXh+dduZzsWLFzF//nwMGjRI/3do/Pjx2Lhxo8ky3rx5E2+99Za+PP3798fbb7+N7OzsSj8XavO+0ZUbAGJjY/HAAw+ga9euCAsLw5UrV/TfW44cOWKyvAsWLEBYWBjee+89k9sQ2RIOiScSTN++feHp6YmsrCz8888/CA8Pr3R7jUaDKVOmYM+ePXB3d0e3bt3g4eGBzMxMXL58GStWrMCIESPg6emJoKAgjB49Gr/99hvUajWGDBkCV1dX/b5uv+a5sLAQEydOxIULF9CtWze0bdtW3/mvjq1bt2LNmjUIDg5GTEwM0tLScOjQIcyePRtnzpyp0KGqjSFDhuDo0aM4fPgwgoKC0LVrV/1jwcHBVT5flmXMmTMHGzZsgL29Pbp164YmTZrg1KlTiIuLw5YtW7BkyRL07dvX6PNjY2Px+eefo3379ujTpw9SUlJw9OhRzJ49G1lZWXjkkUeqXZeioiJMnToV//zzD5ycnBAVFQV3d3ccOXIEq1evxk8//YQVK1agQ4cOAIB27dph9OjROHToEJKSkgyuQ69O3etDnz594OjoiN27d8PHxwd9+vTRP+bl5WWx41y6dAmjR4+Gg4MDunTpAlmW9fsvKSnBzJkzsWXLFjg6OqJDhw5o2rQpLl26hM2bN2Pr1q1YunSpyYwt4fjx43jiiSeQlZUFf39/DBo0CDk5Odi/fz+OHDmCrVu34vPPP69wpu3111/Hd999B5VKhW7dusHX1xfnzp3DhAkTLHaNeXkajQZqtRoA0KRJE4PHavo66t6f8fHxAIDRo0cb7K/83x5zVOfvVE5ODh588EFcvXoVXbt2xR133IGjR49iw4YNOHDgADZu3Ggw+iY+Pl7/Q2DHjh0RFRWFgoICXL9+Hb/88gu8vLzQvXv3GpUzOTkZY8aM0f+dKSwsxL59+/DJJ5/gn3/+wVdffQUnJycAwEMPPYQvvvgCmzdvxsyZM+Hh4VFhf2vWrAEAPPDAA7C3r/rrXseOHREaGopz585h06ZNRv82xcfHo7S0FJ06dcIdd9wBoOZ/l+ra6dOn8eabb6Jp06aIjo5Gamoqjhw5gunTp2Px4sWwt7fHc889hzvuuAPR0dFITEzE0aNH8fjjj+Prr79Gt27dDPZ3/fp1PP744zh37hw8PT0REREBNzc3nD59GitWrMCvv/6K1atXIyAgoFrl++OPPwAYzhFRExs3bsTHH3+MwMBAtGrVCl26dEF6ejqOHDmCw4cP4++//8aSJUsgSRKAmrWzLVu2YPbs2SgsLERwcDD69euHnJwcHD9+HC+99BL27t2Ld955x+D5aWlpGD9+PJKSkuDp6Yn+/ftDo9Fg48aN2LVrF0JCQozWw9z3zZtvvom1a9ciMjISd955J65cuQKVSoUJEyZg0aJF+PbbbxEZGVnhebm5udi4cSNUKhUefvjh6r/wRNYkE5FN6N+/vxwaGirHxsZWue0
"text/plain": [
"<Figure size 1200x700 with 1 Axes>"
2025-07-10 15:07:31 +08:00
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"--- Price Chart with Volume Indicator Overlay ---\n"
]
},
{
"data": {
2025-07-10 15:07:31 +08:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAABckAAASlCAYAAABz6FF1AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs3XmczPUfB/DXzF72wLJY15JzHetYZxulUHRQpChEEZJQJFQiQo6So8gPOSu3EinKlftY57rJsuyuZZe92N2Z3x/T92tnd2Z37tn9fF7Px8Njxneuz2e+rzn2PZ/v56PR6/V6EBERERERERERERFJSOvuBhARERERERERERERuQuL5EREREREREREREQkLRbJiYiIiIiIiIiIiEhaLJITERERERERERERkbRYJCciIiIiIiIiIiIiabFITkRERERERERERETSYpGciIiIiIiIiIiIiKTFIjkRERERERERERERSYtFciIiIiIiIiIiIiKSlqe7G0Cu17p1a1y/fj3P64waNQq9e/d2TYMEtH//frzxxhu5tvv5+SEkJARPPPEE3nzzTQQFBVl936GhoQCAs2fP2t1OR7EkUzlVqFABf/31l5Na5Hg9e/bEgQMHsGTJEjRv3tzu+5s1axZmz54NAHjyyScxb948k9fbsGEDRowYgWbNmmHp0qV2P25ho7yWTPW/IL4WFGvXrsWoUaPQqVMnTJ48Oc/r6nQ6tG3bFtevX8fYsWPx2muv5Xv/77zzDv766y+89tprGDt2rE1tLMjPHxko7xODBg3Ce++9Z9FtzH3+5HTw4EEUK1bM3iYKI6/P7fLlyyMiIgK9e/dGxYoV3dA6saWmpuLZZ59F8eLFsX79emi1xmN4Tp06hWXLluHQoUOIjY2FRqNByZIlERwcjPDwcLRs2RItWrRwU+udx5bXv72PlZ1Wq0XRokVRvXp1PPvss+jWrRu8vLyc2g6FKz+frl27hjZt2jj0e2lMTAx27tyJ3bt349SpU4iPj4eXlxdCQkLw5JNPonfv3ihZsqTZ2ycnJ+P777/Hli1bcOPGDfj6+qJBgwZ48803ERERkev6aWlp2LdvH3bt2oVDhw4hOjoaGRkZCAoKQqNGjdCjRw80btw41+0yMjJw6NAh7Ny5EwcOHMC///6LtLQ0BAYGol69eujWrRuefPJJu56LkydP4vvvv8ehQ4dw7949lC5dGk899RQGDhxo8u+grKws/Pnnnzh16pT6LzExER4eHjh9+rRdbTl48CAOHz6s3q/yt8vy5cvRpEkTk7ex9bnNi06nQ2RkJHbt2oV9+/bh0qVLSE5ORkBAAOrUqYNOnTqhQ4cO0Gg0uW57584d/P3332ofoqKikJ6ejoiICPzwww9WPyc527Vy5UqsWbMGFy5cAABUr14dXbp0wauvvmqyPYo9e/Zg0aJFOH78ONLS0lC+fHm0a9cO/fr1g7+/v13tcgVHvBasfQ4uXbqEnTt34p9//sGZM2dw584deHt7o0qVKnjmmWfQo0cPk7dT3rPy8vbbb2P48OFWPQcKe3Kg0+mwYcMGbNiwAWfOnEFycjICAwNRtWpVtGvXDt27d7eqLampqdi2bZvR+0FKSgoqVaqEP//80+ztbt68iSVLliAqKgr//vsv7ty5g4yMDJQqVQrh4eHo3r272dd8Xmx5DzFnyJAh+P333wEAU6ZMwYsvvmh1ewDg1q1b+Pbbb7F9+3bExcWhWLFiaNKkCfr374+6deuavd2DBw/www8/YOPGjbh69Sq8vLxQq1YtdO/eHe3bt891/Xv37qFt27aoVKkSVq5cmWcOCjsWySXWqFEjVK5c2eRl1atXt+u+8ypsyaZTp04AAL1ej5iYGERGRuLs2bNYt24dlixZgmrVqrm5hfZr164d7ty5Y7QtNTUVW7ZsUS/38/MzurxEiRIua19Bt337dhw8eBBNmzZ1d1PIDbRaLTp37oxZs2ZhzZo1+RbJb926hZ07dwIAunTp4oomUiGlfP6YYm+xa+TIkVi3bh0mTZqEzp0723VfBU32z+2bN2/i2LFjWLp0KdasWYOFCxciPDzc7sfg96SH5s2bh5s3b2LcuHG5CuRLly7FxIkTodPpEBwcjObNm6NYsWK4c+cOTp06haNHj2L//v1CFsndoVSpUnj88ccBGIpGly9fxuHDh3H48GH89ttvWLhwYa7vc5TbsGHDcOTIEXh6eqJ27dpo2LAhkpKScOzYMcybNw+rVq3CwoULUbt27Vy3TUhIwOuvv44rV66oBeWEhATs3LkTO3fuxMcff4yePXsa3Wbjxo345JNPABgGoURERMDT0xNnzpzBpk2bsHnzZgwZMgTvvPOO0e0OHjyIN998EwBQunRpNG7cGL6+vrh48SL+/vtv/P333+jatSvGjRtnU0Hk999/x7Bhw5CZmYl69eqhYsWKOHnyJJYtW4bff/8dK1asyPW3aEpKCoYMGWL1Y1liwoQJOHPmjFW3sfW5zUt0dLT6XS8wMBBhYWEoVqwYoqOjsWfPHuzZswebNm3CzJkz4e3tbXTbw4cPY9SoUVb1wRJZWVkYOnQo/vjjD/j6+uLRRx8FAOzduxdjxozBnj178PXXX+d6jwaAH374AZMmTYJGo0GTJk0QFBSEw4cPY+7cudiyZQtWrFiR549COVkz0MRR7H0t2PIc9O7dG7GxsfDx8UFYWBiaNm2KW7duITIyEidPnsTq1auxePFilC9f3mSb/fz80K5dO5OX5VUczYs9Obh37x7eeecdHDx4EAEBAQgPD0exYsUQGxuLqKgoJCcnW10k//fff20q9l+6dAkLFixA8eLFUa1aNYSFhSErKwsXL17Epk2bsGnTJgwfPhxvv/22Vfdry3uIKZs2bcLvv/8OjUYDvV5v8/1cvnwZ3bt3R0JCAkJCQtC2bVtcu3YNW7ZswbZt2zBjxgw8/fTTuW6XlpaGN998E0ePHkWxYsXw+OOPIzU1Ffv27cOBAwfw1ltv4aOPPjK6TdGiRdGvXz9MmTIF69evz/NvjMKORXKJvfLKK8L9YVsQ5fxwv3z5Mnr37o2bN29izJgxWL58uVX3t2nTJkc2zyFyvokCUN+gAWDEiBEcfWeGr68v0tLSMG3aNPz888/ubk6hUhBfC7bq3Lkz5syZgxMnTuD8+fOoUaOG2euuX78emZmZqFWrFsLCwlzYSipsXPXHpWhyPm83btxA7969ceXKFXzyySf47bff3NQy8cTGxmLhwoWoV69erlF6Z86cUQvko0aNQs+ePeHh4aFertPp1AIuOUbVqlVz5f+vv/7CoEGDcPToUcyfP99pBczsCvvne3BwMEaNGoUXX3zRaFDI7du3MWTIEBw4cABDhw7Fpk2bjDINAJ9++imuXLmCiIgIfPfdd/D19QUA7NixA++88w4mTpyIpk2bolatWuptPD098fLLL6NHjx6oU6eOul2v1+OHH37A5MmTMWPGDDRu3BjNmjVTL9doNGjXrh3eeOONXKMglSLSzz//jEaNGuGll16y6jmIjY3FyJEjkZmZic8//xxdu3YFYCjCjRw5Er/88guGDRuGVatWGRUdPT090aFDB9SpUwd16tRBYGCgzSMsc3rsscfw9NNPo06dOqhbty5ee+21fI+EtfW5zYtGo8Gjjz6KPn36oEWLFkYZOHDgAPr374+///4b33//PQYNGmR026CgIHTt2hV169ZFnTp1cOrUKXz22WdWPAumLV26FH/88QeCg4OxfPlyhISEADAU9F9//XX8/vvvaNq0KXr06GF0u9OnT2Py5Mnw8PDAd999h1atWgEwFOHeeecd7N27F2PHjsXMmTPtbqMz2fNasPU5qFKlCgYPHoxnn33WaMT4tWvXMGDAAJw/fx4jR47EkiVLTLa5RIkSDv+eZ2sO9Ho9Bg4ciIMHD6Jr16746KOPjPr04MEDm44M8vf3R+fOnVG3bl3Url0b9+7dQ//+/fO9Xc2aNbF+/XqEhobmKuhv3LgRI0aMwFdffYXWrVtbNWDRlveQnG7duoVx48ahTp06KFKkCI4cOWLV7RV6vR4ffPABEhIS8OK
"text/plain": [
"<Figure size 1500x1200 with 3 Axes>"
2025-07-10 15:07:31 +08:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
2025-07-10 15:07:31 +08:00
"source": [
"\n",
"df_raw = load_and_preprocess_data(file_path)\n",
"\n",
"if df_raw is not None and not df_raw.empty:\n",
" # 您可以在这里调整 volume_window 和 price_lag 参数\n",
" # volume_window: 用于计算成交量移动平均的周期例如5分钟K线设置为5表示过去5分钟的平均成交量\n",
" # price_lag: 用于计算未来收益率的周期例如5分钟K线设置为5表示未来5分钟的收益率\n",
" processed_data = calculate_stationary_indicators(\n",
" df_raw, volume_window=10, price_lag=5\n",
" )\n",
" analyze_and_visualize(processed_data)\n",
"else:\n",
" print(\"无法进行分析,请检查数据加载是否成功。\")"
]
},
{
"cell_type": "code",
2025-07-10 15:07:31 +08:00
"execution_count": 11,
"id": "f13d0294",
"metadata": {
"ExecuteTime": {
"end_time": "2025-06-21T18:08:35.137491Z",
"start_time": "2025-06-21T18:08:35.131334Z"
}
},
2025-07-10 15:07:31 +08:00
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"# 无需数据加载和指标计算函数,假设 processed_df 已经传入并包含所需列\n",
"\n",
"def analyze_trend_continuation_probability(processed_df, return_threshold=0.0001, num_bins=20):\n",
" \"\"\"\n",
" Analyzes the probability of trend continuation (direction consistency) as volume change rate (Z-score) varies.\n",
" This version ignores the specific direction (Up/Down) of the trend, focusing only on whether it continues.\n",
" It plots the individual bin probabilities and a bar chart showing the number of data points in each bin.\n",
"\n",
" Parameters:\n",
" processed_df (pd.DataFrame): DataFrame with calculated indicators.\n",
" return_threshold (float): Minimum absolute log_return to classify current/future as 'significant' move.\n",
" num_bins (int): Number of bins to divide the volume_normalized_zscore range.\n",
" \"\"\"\n",
" if processed_df.empty:\n",
" print(\"Processed data is empty. Cannot perform volume-trend analysis.\")\n",
" return\n",
" \n",
" required_cols = ['log_return', 'future_log_return', 'volume_normalized_zscore']\n",
" if not all(col in processed_df.columns for col in required_cols):\n",
" print(f\"Error: Missing one or more required columns: {required_cols}. Please ensure they are calculated.\")\n",
" return\n",
"\n",
" print(\"\\n--- Analyzing Trend Continuation Probability by Volume Z-score (Ignoring Overall Trend Bias) ---\")\n",
"\n",
" # 1. Define Current K-line Direction (significant move)\n",
" # 1 if significant UP, -1 if significant DOWN, 0 if Flat\n",
" def get_direction_sign(log_ret):\n",
" if log_ret > return_threshold:\n",
" return 1 # Up\n",
" elif log_ret < -return_threshold:\n",
" return -1 # Down\n",
" else:\n",
" return 0 # Flat\n",
" \n",
" processed_df['current_direction_sign'] = processed_df['log_return'].apply(get_direction_sign)\n",
" processed_df['future_direction_sign'] = processed_df['future_log_return'].apply(get_direction_sign)\n",
"\n",
" # 2. Define 'Is Continuation' (Target Variable)\n",
" # A continuation occurs if current_direction_sign is not 0 AND future_direction_sign is the same\n",
" processed_df['is_continuation'] = np.nan # Initialize with NaN\n",
" \n",
" # Cases where current move is Up and future is also Up\n",
" processed_df.loc[(processed_df['current_direction_sign'] == 1) & (processed_df['future_direction_sign'] == 1), 'is_continuation'] = 1\n",
" # Cases where current move is Down and future is also Down\n",
" processed_df.loc[(processed_df['current_direction_sign'] == -1) & (processed_df['future_direction_sign'] == -1), 'is_continuation'] = 1\n",
" # Cases where current move is significant but future is not in the same direction (e.g., flat, reverse)\n",
" processed_df.loc[((processed_df['current_direction_sign'] != 0) & (processed_df['is_continuation'].isna())), 'is_continuation'] = 0\n",
"\n",
" # Filter out rows where current K-line was flat, as there's no trend to \"continue\"\n",
" df_for_analysis = processed_df[processed_df['current_direction_sign'] != 0].copy()\n",
" \n",
" if df_for_analysis.empty:\n",
" print(\"No significant current moves (Up/Down) to analyze for continuation.\")\n",
" return\n",
"\n",
" # --- REMOVED: Filtering out 1% and 99% Z-score outliers ---\n",
" # Now using the full range of df_for_analysis for binning\n",
"\n",
" # 3. Binning Volume Normalized Z-score (using unfiltered data)\n",
" min_z = df_for_analysis['volume_normalized_zscore'].min()\n",
" max_z = df_for_analysis['volume_normalized_zscore'].max()\n",
" \n",
" if pd.isna(min_z) or pd.isna(max_z) or (max_z - min_z < 0.001):\n",
" print(\"Warning: Volume Z-score range is too small or contains NaNs for binning.\")\n",
" # Fallback for very small ranges to prevent errors\n",
" if pd.isna(min_z) or pd.isna(max_z):\n",
" min_z = -5\n",
" max_z = 5\n",
" elif (max_z - min_z < 0.001):\n",
" max_z = min_z + 0.001\n",
"\n",
" bins = np.linspace(min_z, max_z, num_bins + 1)\n",
" labels = [f'{bins[i]:.2f} to {bins[i+1]:.2f}' for i in range(num_bins)]\n",
" # Use pd.cut for binning\n",
" df_for_analysis['volume_zscore_bin'] = pd.cut(df_for_analysis['volume_normalized_zscore'], bins=bins, labels=labels, include_lowest=True)\n",
"\n",
" # 4. Calculate Continuation Probability for each bin\n",
" continuation_prob = df_for_analysis.groupby('volume_zscore_bin')['is_continuation'].mean()\n",
" \n",
" continuation_df = pd.DataFrame({\n",
" 'Volume Z-score Bin': continuation_prob.index,\n",
" 'Trend Continuation Probability': continuation_prob.values\n",
" }).dropna() # Drop NA if a bin has no data\n",
"\n",
" if continuation_df.empty:\n",
" print(\"No data points for trend continuation within the bins. Adjust thresholds or data range.\")\n",
" return\n",
" \n",
" print(\"\\nTrend Continuation Probabilities by Volume Z-score Bin (Direction Agnostic, All Data):\")\n",
" print(continuation_df)\n",
"\n",
" # 5. Visualization - Individual Bin Probabilities (Original Plot, without filtering)\n",
" plt.figure(figsize=(14, 8))\n",
" \n",
" plt.plot(continuation_df['Volume Z-score Bin'], continuation_df['Trend Continuation Probability'],\n",
" marker='o', linestyle='-', color='purple', label='Trend Continuation Probability')\n",
" \n",
" plt.title('Trend Continuation Probability vs. Volume Z-score (Direction Agnostic, All Data)', fontsize=18)\n",
" plt.xlabel('Volume Z-score Bins', fontsize=14)\n",
" plt.ylabel('Continuation Probability', fontsize=14)\n",
" plt.xticks(rotation=45, ha='right') # Rotate labels for readability\n",
" plt.ylim(0, 1) # Probability range\n",
" plt.axhline(0.5, color='gray', linestyle=':', linewidth=1, label='Random (0.5)') # Reference line for 0.5 probability\n",
" plt.grid(True, linestyle='--', alpha=0.7)\n",
" plt.legend(fontsize=12)\n",
" plt.tight_layout()\n",
" plt.show()\n",
"\n",
" # print(\"\\nThis plot shows the probability of ANY trend (up or down) continuing, across different levels of volume change rate.\")\n",
" # print(\"Peaks above 0.5 indicate where current direction is more likely to be followed by the same direction in the future.\")\n",
" # print(\"No Z-score outliers have been removed in this plot.\")\n",
"\n",
"\n",
" # --- NEW PLOT: Number of Data Points per Bin ---\n",
" print(\"\\n--- Plotting Number of Data Points per Volume Z-score Bin ---\")\n",
"\n",
" # Count the number of data points in each bin\n",
" bin_counts = df_for_analysis['volume_zscore_bin'].value_counts().sort_index()\n",
"\n",
" # Ensure the order of bins in bin_counts matches that in continuation_df\n",
" # (pd.cut with labels usually handles this, but explicitly reindexing can ensure consistency)\n",
" bin_counts = bin_counts.reindex(continuation_df['Volume Z-score Bin'])\n",
" \n",
" # Convert to DataFrame for plotting\n",
" bin_counts_df = pd.DataFrame({\n",
" 'Volume Z-score Bin': bin_counts.index,\n",
" 'Number of Data Points': bin_counts.values\n",
" }).dropna()\n",
"\n",
" if bin_counts_df.empty:\n",
" print(\"No data points found for plotting bin counts.\")\n",
" return\n",
"\n",
" plt.figure(figsize=(14, 8))\n",
" sns.barplot(x='Volume Z-score Bin', y='Number of Data Points', data=bin_counts_df, palette='viridis')\n",
" plt.title('Number of Data Points per Volume Z-score Bin', fontsize=18)\n",
" plt.xlabel('Volume Z-score Bins', fontsize=14)\n",
" plt.ylabel('Number of Data Points', fontsize=14)\n",
" plt.xticks(rotation=45, ha='right')\n",
" plt.grid(axis='y', linestyle='--', alpha=0.7)\n",
" plt.tight_layout()\n",
" plt.show()\n",
"\n",
" # print(\"\\nThis plot shows the raw count of data points falling into each Volume Z-score bin.\")\n",
" # print(\"Bins with very low counts might produce less reliable trend continuation probability estimates.\")\n"
2025-07-10 15:07:31 +08:00
]
},
{
"cell_type": "code",
2025-07-10 15:07:31 +08:00
"execution_count": 12,
"id": "74770a30",
"metadata": {
"ExecuteTime": {
"end_time": "2025-06-21T18:08:35.362961Z",
"start_time": "2025-06-21T18:08:35.152984Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2025-07-10 15:07:31 +08:00
"Successfully loaded 5811 rows of data.\n",
"First 5 rows of data:\n",
2025-07-10 15:07:31 +08:00
" open high low close volume open_oi \\\n",
"datetime \n",
"2021-12-31 14:00:00 2499.0 2509.0 2476.0 2478.0 198556.0 974005.0 \n",
"2022-01-04 09:00:00 2516.0 2584.0 2513.0 2567.0 669046.0 988671.0 \n",
"2022-01-04 10:00:00 2567.0 2597.0 2567.0 2595.0 155426.0 828049.0 \n",
"2022-01-04 11:00:00 2595.0 2600.0 2585.0 2586.0 119967.0 843256.0 \n",
"2022-01-04 13:00:00 2587.0 2607.0 2586.0 2592.0 127710.0 841771.0 \n",
"\n",
2025-07-10 15:07:31 +08:00
" close_oi underlying_symbol \n",
"datetime \n",
"2021-12-31 14:00:00 988671.0 CZCE.MA205 \n",
"2022-01-04 09:00:00 828049.0 CZCE.MA205 \n",
"2022-01-04 10:00:00 843256.0 CZCE.MA205 \n",
"2022-01-04 11:00:00 841771.0 CZCE.MA205 \n",
"2022-01-04 13:00:00 862642.0 CZCE.MA205 \n",
"Indicators calculated. 5772 rows of data remaining for analysis.\n",
"\n",
"--- Analyzing Trend Continuation Probability by Volume Z-score (Ignoring Overall Trend Bias) ---\n",
"\n",
"Trend Continuation Probabilities by Volume Z-score Bin (Direction Agnostic, All Data):\n",
" Volume Z-score Bin Trend Continuation Probability\n",
2025-07-10 15:07:31 +08:00
"0 -1.77 to -1.10 0.528139\n",
"1 -1.10 to -0.43 0.515532\n",
"2 -0.43 to 0.24 0.485424\n",
"3 0.24 to 0.91 0.503597\n",
"4 0.91 to 1.58 0.461089\n",
"5 1.58 to 2.25 0.421053\n",
"6 2.25 to 2.92 0.439252\n",
"7 2.92 to 3.59 0.444444\n",
"8 3.59 to 4.26 0.500000\n",
"9 4.26 to 4.93 0.000000\n"
]
},
{
"data": {
2025-07-10 15:07:31 +08:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAABW4AAAMWCAYAAABhlR+IAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs3Xd4FFX//vF704AAoXdEiiYgHSkiHRGQIk2KINgRRIrlQVAfK4Jd6cKD0mwUgwhSBBSQIkURFSNK7wQIEEKAkOz8/shv55s92Q1pJEvyfl0Xl2Z2duecnTuzm8+ePcdhWZYlAAAAAAAAAIDP8MvuBgAAAAAAAAAA3FG4BQAAAAAAAAAfQ+EWAAAAAAAAAHwMhVsAAAAAAAAA8DEUbgEAAAAAAADAx1C4BQAAAAAAAAAfQ+EWAAAAAAAAAHwMhVsAAAAAAAAA8DEUbgEAAAAAAADAx1C4BZBt+vfvr7CwME2cODG7m+KzRo0apbCwMI0aNSq7m5ItWrdurbCwMIWHh2d3U3KMI0eOKCwsTGFhYTpy5EiWH9917C1btmT6/dN7G24MW7Zssc8jcCNZsGCBwsLC9Morr1zX44SHhyssLEytW7e+rsfJLrn9PRGSmzhxosLCwtS/f//sbkqmSinr/B6kzrFjx1SjRg3dfffdiouLy+7mABkSkN0NAHKzjPzxOW7cOHXv3j0TW3PjiomJ0aJFi7Rx40bt3r1bZ8+eldPpVOHChRUWFqY777xTnTp1UokSJbK7qbYtW7Zo69atKleuXK48j+Hh4Tp69KgaNmyoRo0aZXdzMt2oUaO0aNGiZNvz5Mmj4sWLq1atWurRo4eaNWuWDa1DUtHR0Zo9e7Yk6cEHH1RISEg2t8h3vfTSS1qwYIEKFy6sn376SUFBQam6X9u2bXXw4EG1atVKH3/88XVuZe7j7XpzLQ0bNtTcuXOvQ4uQ1MWLFzV+/HgFBQVp8ODByW73dP78/PwUHBysggUL6qabblK1atXUtGlTNW3aVH5+OW/cTW57T5SQkKCWLVsqMjJSkvTpp5+qSZMm2dwq3xIREaHVq1erYMGCeuihh7K7ORmyd+9edejQQZKUN29ebdy4UQUKFMiWtkycOFGTJk1y2+ZwOBQcHKwCBQqobNmyqlatmho1aqTWrVun+nU+rbLivVfZsmXVvXt3zZs3T1988cUNnyPkbhRugWxUvHhxj9tjY2MVGxub4j558+a9bu26kSxYsEDvvvuuzp8/b2/LmzevgoKCdPLkSZ08eVLr16/Xhx9+qMGDB3v8oyk7bN26VZMmTVLDhg1T/COlRIkSqlSpkk8VnTPDokWLtHXrVj311FMpFm5vuukmBQUFqWDBglnYuszj5+enokWL2j+fP39eR48e1dGjR7V8+XLdd999GjNmjBwORza28sZSqVIlSVK+fPky5X7R0dH2HzHdunWjcJuC++67TwsWLNC5c+e0evVq+w/RlGzdulUHDx6074/MV6BAAa/vFUyXL19WTEyMJF23P8jh7pNPPtGpU6fUr18/lS5d2ut+5utFbGysjh8/ruPHj2vr1q2aPXu2ypQpo9GjR6tdu3YeH6NgwYKqVKmSSpUqlen9uJ5y23ui9evX20VbSfr6668p3BoiIiI0adIklStXLsWCW5EiRVSpUiWVKVMm6xqXRgsXLrT///Lly1q6dKn69OmTjS1KlPR14/Lly4qMjNTJkye1Y8cOffHFFypcuLBGjBih+++/P9OPnVXvvQYNGqTw8HBNnTpV3bt35z0eblgUboFstHHjRo/bk34a6m0fSB9++KE9eqtKlSoaOHCgmjVrpmLFiklKfBOyfft2LV68WN99951WrFjhM4Xb1Hr22Wf17LPPZnczso3r0/gbVZkyZfTDDz/YP8fHx2vXrl16/fXX9eeff2rhwoW67bbb1K9fv2xs5Y1lxYoVWXo//J86derolltu0Z49exQeHp6qwq1rmpPixYurZcuW17mFudNLL72kl1566Zr7xcXFqW/fvvrjjz8UHByskSNHZkHrcrfLly/bo5qvVfwwXy+kxHO2e/durVu3Tl9++aWOHz+uYcOG6YknntAzzzyT7DHuvvtu3X333ZnXAR+TU94TuQp5/fr10xdffKFVq1bp3LlzKly4cPY27Ab0wAMP6IEHHsjuZnh19epVLV68WFLiFHFz587VwoULfaJwa/6NmZCQoD179mjTpk367LPPdOTIEb366qvavn273nvvvRtykEHZsmXVvHlzrVmzRgsWLNCjjz6a3U0C0iXnfdcGQK6wbNkyu2h7zz336JtvvlHXrl3toq2UOPK2adOmevfdd/XNN9/olltuya7mApKkgIAA1a5dW9OnT7f/QPvss8+yt1FAGrhGzW7cuFEnT55Mcd+YmBitXLlSktSlSxcFBDBeIDu98sor+uOPPyQlTrfEXMHX37JlyxQdHa1q1arp1ltvTfP9g4KCVLNmTT311FNaunSp/Q2VadOmacmSJZndXGSB06dPa+3atfL399cTTzyhBg0aKC4ujvOZQ/344486c+aMqlSpomeeeUbBwcH6448/9M8//2R305Lx9/dXWFiYHn74YS1dulQdO3aUJC1dulTTp0/P5tal37333itJmj9/vizLyubWAOlD4Ra4ASVdZOfMmTMaN26c2rVrp9q1a3v8Q2zt2rUaOnSomjVrpho1aqhBgwb2p/zeJmtPunCYZVmaP3++evbsqXr16qlu3brq3bu3/QmyNwkJCZo7d666deumOnXqqGHDhurfv3+GR77FxcXpnXfekSTdcsstevvtt6/5lc/Q0FC9++67Hm/766+/NHLkSLVq1Uo1a9ZUgwYN1KdPH82aNcvr82MuAPLnn39q+PDhatq0qWrUqKG77rpL48aNc5vCQfq/haFcI6q3bt1qn0/Xv6QLcaW0AEFGz1FqFmtKaQG5w4cPa/r06Xr00UfVrl071alTR3Xr1lWHDh305ptv6tixY16ft61bt0qSJk2alKz/SRfMutbiZAkJCVq4cKEGDBigRo0aqUaNGmrWrJmGDRuW6n6lN98ZUaxYMTVt2lSStG/fPl28eFFS8sWX/vrrLz377LNq3ry5qlevnmzxjVOnTuntt99Wx44dVadOHdWpU0cdO3bUO++8o9OnT6eqLQcOHNCoUaPUvHlz1ahRQy1bttTLL7+cYlHut99+07vvvqu+ffvavzf169dXr169NH36dLs/13Lq1Cm9/vrrat26tWrWrKkmTZro2Wef1d69e73eJ72LjHm6X//+/XXXXXfZP991111uWXQ9308//bTCwsL0+OOPp3iMgwcPqmrVqqlu39WrV9WoUSOFhYVpzpw5Ke67cOFChYWFqV69erp06ZK9PT4+XvPmzVP//v3VqFEjVa9eXY0aNVK7du00YsQILViw4JrtSIsuXbooMDBQTqfzmosGLl++3J72p0ePHm63pee6m5LULFCT0uJm5v3XrFmjBx98UI0aNVK9evXUp08frV692u0+33zzjfr06aMGDRqobt266tevnzZv3nzNtqbnNTmj5syZY5+vQYMGqX379ul6nMuXL+uTTz5R79691aBBA1WvXl133HGHOnTooOeff94u1Huyd+9evfbaa+rQoYPq1q2runXrql27dnr66ae1cuVKOZ3OZPe5cuWKZs2aZT/PNWvWVKtWrTRy5EhFRER4PVbS1w7XHLOdO3dW3bp1PS7M+Msvv+i5556z83j77bfrvvvuS9P1zJP58+dLkjp16pTux3ApUqSIJk2aZE+D8NFHH+nq1atu+6S0OJmZ8ZUrV+qRRx5R48aNVbVq1WSv81FRUfrwww/VtWtX3X777apZs6buuusuvfDCC/r3339TbKvT6dSyZcv05JNP2jm/44471L17d7377rt20Soz3xO5bNmyRcOGDbOP26hRIz344IP6+uuvlZCQ4PE+5nOzefNmDRw4UHfccYdq1qype+65R5MmTdKVK1dS7HdqfPPNN4qPj1fjxo1VqlQpdevWTVLidAmpsW3bNg0aNEiNGjVSrVq11K5dO3344Ye6ePFiiuf
"text/plain": [
"<Figure size 1400x800 with 1 Axes>"
2025-07-10 15:07:31 +08:00
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"--- Plotting Number of Data Points per Volume Z-score Bin ---\n"
]
},
{
"data": {
2025-07-10 15:07:31 +08:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAABW4AAAMWCAYAAABhlR+IAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQAA4KFJREFUeJzs3Xl8TGf///H3TBZEFoKEUFVLImoXtEpTrerd6ob2Vm6luClFVS2trdRW7U1Layu1lG/pYumvitqVVmurpSrUvsUWW0RCtvP7w51zZ0gimcxwyOv5ePTxyFznmjPXZ87Jib7nmuvYDMMwBAAAAAAAAACwDPudHgAAAAAAAAAAwBHBLQAAAAAAAABYDMEtAAAAAAAAAFgMwS0AAAAAAAAAWAzBLQAAAAAAAABYDMEtAAAAAAAAAFgMwS0AAAAAAAAAWAzBLQAAAAAAAABYDMEtAAAAAAAAAFgMwS0AAHepV199VWFhYfrss8/u9FDuqISEBI0dO1ZPP/20qlatqrCwMIWFhSkqKupODw3ZxLmMjKT9Lm/cuPFODwW4LTjnAQA38rzTAwAAwJU+++wzjR8/XpKUP39+LV++XMHBwRn2PX78uJ544glJ0qxZs1S3bt3bNk64Ts+ePbVmzRpJ14950aJFJUmentn7Z86rr76qTZs2ObR5eHioYMGC8vPzU5kyZRQeHq6GDRsqIiLCtYNPJyoqSitXrpSfn59ee+01t71OVtL/TqRnt9vl5+enBx54QA0aNFCrVq0UGBh4B0aYc2lhcNOmTVWqVKk7PJq7xw8//KA+ffpIkubNm6cqVapk63n9+/fX/PnzVahQIa1fv17e3t7uHGaes2DBAvXr18+p5+7du9fFo0GazK6dkuTl5aWAgACFhobqqaeeUvPmzeXl5XWbRwgAuFsR3AIA7llXr17VhAkTNHTo0Ds9FLjJgQMHzND2k08+0TPPPOP0vtL+5zrNlStXdOLECZ04cUK//vqrvvjiC5UrV05DhgxRnTp1cj32G0VFRWn8+PEqWbLkHQtu0/P19VX+/PklSUlJSbp06ZK2b9+u7du3a86cOfr888+zHebdSokSJfTAAw+ocOHCLtlfemkf5NSpU4fgNgeeeuopDRs2TLGxsZo/f362jnV8fLyWLl0qSXrhhRcIbd0g/YdTt5KSkqILFy5IEsfiNkp/7ZSkuLg4xcTEKCYmRhs2bNC3336r6dOnq1ChQjc994EHHpAkFShQ4HYNFwBgcQS3AIB72vz589WuXTvzf4Zwb/n7778lSYUKFcpVaCtJNWrU0OzZsx3arl69qt27d2v58uX67rvvdODAAbVp00aDBw9Wy5Ytc/V6VjdgwAA1a9bMfHzp0iXNmTNHEyZM0Llz59S9e3ctW7ZM+fLly/VrffTRR7neB1wrX758evbZZzVnzhwtXrxY/fr1u+Wx/umnnxQfHy9Jat68+e0YZp7zzDPPZPtaN3z4cPOaNnjwYHcOC+nceO2UpNOnT2vatGn68ssv9ddff+njjz/O8EPln3766XYNEwBwl2CNWwDAPalEiRIKCwtTcnKyPvnkkzs9HLjJ1atXJUkFCxZ0y/7z58+vmjVr6t1339UPP/ygsLAwGYahYcOGacuWLW55TasKCAhQly5d9O9//1uSdPLkSa1ateoOjwru9NJLL0mSYmNjtWLFilv2nz9/viSpSpUqCgsLc+vYkLUFCxaYoW3r1q3NY4k7Izg4WP3799fDDz8sSVq5cuUdHhEA4G7BjFsAwD3JbrerV69e6tSpk5YtW6adO3eqatWq2X5++vXqVq1alelXrB9//HGdOHFCH3zwgcMMmxufb7PZNGnSJP3yyy86d+6cgoOD1aRJE73++uvy8fGRdH326JQpU7R582adP39eJUqU0IsvvqiOHTvecj28xMREzZw5U4sWLdKxY8fk5eWlypUr67XXXlNkZGSWz/377781e/Zsbdy4UadPn5bdblepUqX0+OOPq23bthmuZZq2lnCdOnU0e/ZsLVu2TN98842ioqJ04cIFde3aVd27d8/yddO7du2a5s6dq59++kkHDhzQ1atXVbRoUdWuXVvt2rVTeHh4hq+f5sSJEw5BUdOmTTVq1Khsv352lCxZUhMnTtRzzz2n+Ph4ffzxx5ozZ45Dn4SEBK1atUrr1q3T3r17dfr0acXFxalQoUKqWrWqWrRokeHxSD/2G2uRpG7dupnvp7Ov4SovvPCCJk2aJEn6888/HWb/5fQ4pklbZzh9nWnS/449++yzmjVrln744QcdPXpUHh4eevDBB/Xvf/9bjz76qMPz3n33XS1cuNB83KZNG4ftJUuW1OrVq83Hp06d0vTp0/Xrr7/qxIkTSk5OVqFChRQUFKSIiAg9++yzubqGJCcna/LkydqwYYPOnz+vokWL6tFHH1XXrl0zXYdbklJTU/Xjjz9q0aJF+uuvvxQbGytfX19VqlRJzZo1U5MmTWSz2W56Xvr37amnntIXX3yhlStX6vjx44qPj8/yupbmwQcfVHh4uKKiojR//nw9++yzmfY9cuSI+WHGjSHh2bNnNX36dK1bt04nTpyQdP39j4yMVPv27bP91f80GzduNI9nVuu2pv0e3biG+Y3P37Nnj6ZMmaJNmzYpNjZWJUuW1EsvvaS2bduaa2Vv3bpV06ZN086dO3Xp0iXdf//9atmypVq1apXh+5/Gmetrbu3cudOcYVunTh2n18SVpCVLlmjBggXavXu3Ll26pAIFCigwMFBly5ZVgwYN9NJLL2U4E/vChQv66quv9PPPP+vIkSNKSEhQsWLFVKZMGTVq1EjPPfec/Pz8bnre8uXLNX/+fP3555+KjY2Vv7+/qlSpopdeeklPPvlkhmNM+11v2rSpPvjgA82bN08LFizQwYMHdfHixQz/Pn/55ZfasGGDoqOjlZqaqhIlSqh+/fpq3769QkJCnH6/biU8PFy//fabOTP9RpmdszdeT/Lnz6/Jkydr9erVOnv2rPz8/FS3bl1169ZN5cqVc9v4AQC3H8EtAOCeFRkZqTp16mjTpk0aPXq0Zs2adUfGsXv3bg0YMMAMXFJSUnTs2DFNnjxZW7Zs0cyZM/Xrr7/qrbfeUkJCgvz8/JSUlKQjR45o3Lhx2rdvX5azhpOSktSuXTtt2bJFnp6e8vHxUWxsrDZs2KANGzZkGIalmTp1qj7++GOlpqZKur6uXlJSkv7++2/9/fffmj9/vqZMmaJKlSpl+vqjRo3SjBkzZLPZ5O/vL7s9Z1/oOX36tP7973+byx54eXkpf/78io6O1v/7f/9PixYtUv/+/fXqq6+az/Hx8VHRokV19epVxcXFyW63OwQgvr6+ORpDdpUqVUpNmzbVV199pa1bt+rYsWO67777zO1Lly41QxKbzSZfX195enrq7NmzWrVqlVatWqX27dvrnXfecdhvVrWk1Zvb13CV4sWLmz/HxcWZPztzHHMiPj5erVu31o4dO+Tl5SUvLy/FxcVp48aN2rRpk4YPH+4QGPr6+qpo0aKKiYmRdH3GcPoPQNKvp7tnzx61adNGly5dknT95nS+vr6KiYnR2bNnzcA0J8Ftejt37tTAgQN15coV+fj4yMPDQydPntQ333yjZcuWafr06XrwwQdvet7FixfVrVs3bd682Wzz8/PThQsX9Ouvv+rXX3/V4sWLNW7cuEzXML148aKaNWumw4cPy8vLK8drZ7700ksaNmyYfv/9d0VHR2caai1YsEDS9Vnq6QPeTZs2qWvXroqNjZX0v3N5//792r9/v+bNm6eJEye69cZ/Wfn555/VvXt3Xbt2TX5+fkpMTNTBgwf10UcfmV9p/+677zR48GClpqbK19dXiYmJ2rdvn4YOHaqTJ0+qd+/eGe7bFdfXnIqJiVH37t2VmJiokJAQjRs3Lts3arxRv379zOMqXT92ycnJOnLkiI4cOaI1a9YoMjLypg8AfvnlF7399tvm75Onp6d8fX115swZc83woKAgNWrUyHxOYmKi3nnnHS1ZskTS/26IeOHCBa1du1Zr167Vs88+q1GjRmX6QaZhGOrRo4eWLVt
"text/plain": [
"<Figure size 1400x800 with 1 Axes>"
2025-07-10 15:07:31 +08:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
2025-07-10 15:07:31 +08:00
"source": [
"\n",
"df_raw = load_and_preprocess_data(file_path)\n",
"if df_raw is not None and not df_raw.empty:\n",
" # volume_window and price_lag parameters for indicator calculation\n",
" # price_lag defines \"N\" in \"next N K-lines\"\n",
" processed_data = calculate_stationary_indicators(df_raw, volume_window=30, price_lag=5)\n",
" \n",
" # Analyze the impact of volume change rate and current K-line direction on future trend\n",
" # return_threshold: set a small threshold to define 'significant' up/down move, otherwise it's 'flat'\n",
" analyze_trend_continuation_probability(processed_data, return_threshold=0.0001, num_bins=10)\n",
"else:\n",
" print(\"Analysis cannot proceed. Please check if data loading was successful.\") "
]
},
{
"cell_type": "code",
2025-07-10 15:07:31 +08:00
"execution_count": 13,
"id": "8fa62ad6",
"metadata": {
"ExecuteTime": {
"end_time": "2025-06-21T18:08:35.708913Z",
"start_time": "2025-06-21T18:08:35.375716Z"
}
},
2025-07-10 15:07:31 +08:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Successfully loaded 5811 rows of data.\n",
"Indicators calculated. 5772 rows of data remaining for analysis.\n",
"\n",
"--- Analyzing Price Change Rate (Log Returns) for Trend Characteristics ---\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1EAAAIkCAYAAAD/HaFsAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQAAvqdJREFUeJzs3Xd8VFX6P/DPnZKZSScJiYTEBiaAoaMIxkWxy1oA1waoa0FcFb8q0lYFLIiKu6Cuoq6yi6siClgWdVfUxYIKKkiRLiUFUkmfyZR7vn8M9zIlgSQzydw7+bx/P1/f5dybuefMPHNmnjn3PlcSQggQERERERFRixgi3QEiIiIiIiI9YRJFRERERETUCkyiiIiIiIiIWoFJFBERERERUSswiSIiIiIiImoFJlFEREREREStwCSKiIiIiIioFZhEERERERERtQKTKCIiIiIiolYwRboDRJEycuRIFBUVqf+WJAk2mw0JCQk46aSTkJeXh0svvRT9+vVr9jEmTJiAdevWYcmSJRg6dGhHdPuYlDF9/vnnyMrKUtu11k8AmD59OlauXIknn3wSY8aMiXR3wu6LL77A3//+d2zfvh319fUA0KLnX3kN9fi85ObmBrVZLBakpaWhf//+GDduHIYMGRKBnkW/u+66C19//TX++9//4oQTTlDblfe+L7PZjOTkZOTl5eGaa67ByJEjW328559/Hi+88ALuvvtu3HPPPSH3v700NDTgvffew5o1a7Bjxw5UVVXBbDYjPT0dffv2xSWXXIKRI0fCYDj6m3Jz8yg1L1re+z/++CPGjRuHW2+9FVOnTo10d0jjmERRpzdo0CCcdNJJAACHw4HDhw9j27ZtWLduHV5//XWceeaZmDt3LrKzs9utD9H2ob1ixQrMmDEDo0ePxrx58yLdnQ63bds2TJ48GbIs46yzzkLXrl0hSRLS0tIi3bUOkZ+fj65duwIADh8+jC1btuDjjz/GJ598ghkzZuCmm24Ky3GUL247duwIy+Pp1dq1a7F69WrccsstfgmUr169eqF3794AvInFr7/+ii+//BJffvklJkyYgIceeqgju9whvvnmGzz44IOorKyEyWTC6aefjiFDhsDj8eDAgQP46KOP8NFHH6Fv37547733It3dqNBR7/32SuKHDBmCc889F0uWLME111yDk08+OWyPTdGHSRR1en/4wx+CfvEXQuCrr77C3LlzsW7dOlx33XVYunRpUCL11FNPwW63IzMzsyO73Kx//OMfcLlcyMjIiHRXjuv+++/H7bffjvT09Eh3JexWr14Nl8uFSZMm4b777ot0dzrcxIkT/Vbc7HY7pk6div/+97945plncMkll+giRvXiySefhMViwcSJE5vd54ILLvD7sinLMhYsWICXX34Zb7zxBs4//3wMGzasxcccN24cLrvsMnTp0iWkvreX//3vf/jTn/4Ej8eDsWPH4oEHHkBqaqrfPsXFxVi0aBE+/fTTCPUy+kTDe/+ee+7B//73P8yfPx8vvPBCpLtDGsZrooiaIEkSRowYgXfffRcnn3wyysvLm/ylNjMzEz169IDNZotAL4OdeOKJ6NGjB8xmc6S7clzp6eno0aMHEhISIt2VsCsuLgYAdYWzs7PZbOr7x+Vy4euvv45wj6LHt99+i507d+KCCy5oVUJjMBhw7733qj8MffLJJ606bkpKCnr06IGUlJRW/V1HOHz4MB588EF4PB5MmDABc+fODUqgAO/8/eijj+Jvf/tbBHrZOejxvZ+Xl4devXrh888/R2FhYaS7QxrGJIroGBITEzFz5kwAwPfff48tW7b4bZ8wYQJyc3Pxww8/+LU7nU78/e9/x5gxYzBw4EDk5eXh7LPPxtixY/H000+jqqoKgPe0t9zcXPXarPPPPx+5ubnqf8rj/vDDD8jNzcWECRNgt9uxcOFCXHrppejfv7/f9QwjR45Ebm7uMSf+devW4ZZbbsGZZ56J/v374+qrr8b777/f5L7NjU/x/PPPIzc3F88//7xfH2bMmAEAWLlypd94JkyYoO43ffp05ObmYsWKFU0+9qpVq3DTTTfhzDPPRF5eHs477zzMmDEDe/fubXJ/37F///33uOWWW3DGGWegX79+GD16dLNjPB632423334b1113HQYPHoy+ffvioosuwuOPP46SkpImnw9lTDNmzGhy7OF26NAhPPbYY7jooovQt29fDB48WF099Xg8Tf6NEALvvfcexowZg/79+2Po0KG47bbb8PPPP/vFW7hkZGQgOTkZAFBRUdHkPp9++iluvfVWnHXWWcjLy8M555yDKVOmYPfu3X77Kc+zwjfGfOO/qfj01dw4W/J+U96706dPR0NDA5599llceOGF6nt92rRpQfGhWLt2LSZNmoThw4fj9NNPxxlnnIGLLroIU6ZMwfr164//ZPr417/+BQAYPXp0q/4OAIxGo3qKn+/1ob7vpdWrV+PGG2/EmWee6TcXHO+53bt3L2bPno2LL74Y/fv3x6BBg3DZZZdh9uzZ2LlzZ9D+1dXVeO6553DllVdi4MCB6N+/Py6//HK8+OKLsNvtrRrXm2++iZqaGqSmprboupYzzjij2W2tmUuKiorwyiuv4MYbb8S5556LvLw8DBkyBNdffz2WLl0KWZaD/qawsBC5ubkYOXIkhBB45513MGbMGAwYMACDBw/GLbfcgg0bNjTbv507d+Kee+7B0KFD1efsH//4B2RZPubngdvtxrvvvosJEyaoc+zIkSMxa9YsHDx48LjPWWsc773/3//+F3/+85/x+9//HmeccQb69u2rfo789ttvQfvn5uaqK0QvvPCC33t/+vTpYRnn6NGjIcsy3n777RBGTtGOp/MRHcfvfvc7JCcno6qqCmvXrkVeXt4x95dlGRMnTsR3332H+Ph4DBkyBImJiaisrMT+/fvx2muv4fLLL0dycjJOPPFEjB49Gv/5z3/Q0NCAiy++GLGxsepjBV5D09jYiAkTJmDPnj0YMmQIevXqpSZkLfHZZ5/hzTffxKmnnor8/HyUlpbip59+wrRp07B9+/agD6C2uPjii7Fx40b8/PPPOPHEEzF48GB126mnnnrcvxdCYPr06Xj//fdhMpkwZMgQpKamYuvWrVixYgU++eQTPPfcc/jd737X5N8vX74cL730Evr06YNzzjkHRUVF2LhxI6ZNm4aqqircfPPNLR6L0+nEHXfcgbVr18JisWDo0KGIj4/Hhg0b8MYbb+Df//43XnvtNZx++ukAgN69e2P06NH46aefcODAAb/r7Voy9rbYtGkTbr/9dlRVVSEzMxMXXHABamtrsW7dOmzYsAGfffYZXnrpJcTExPj93Zw5c/D222/DYDBgyJAh6Nq1K3bu3Inx48eH7boFX7Iso6GhAQCCVgXcbjemTJmCTz75BDExMTj99NORkZGBffv24aOPPsJnn32G559/Xn3Nled55cqVAIITCN/3UCha8n6rra3Fddddh4MHD2Lw4ME47bTTsHHjRrz//vtYv349PvjgA7/V1pUrV6o/MvTr1w9Dhw6Fw+FASUkJPv74Y3Tp0uWYX+oD+/fNN9/AbDa3+G8C1dXVAUBQfADA4sWL8a9//UtNaEtLS2E0Go/7mB999BFmzpwJp9OJzMxMjBgxArIso6CgAEuXLkVqaipycnLU/Xfv3o3bbrsNBw8eRNeuXTF48GCYTCZs3rwZCxcuxH//+1+88cYbLV61/vzzzwEAl112WZPjaqnWziUffPABFi5ciKysLJx88skYNGgQysrKsGHDBvz888/49ttv8dxzz0GSpCaPN2PGDPz73//G4MGDce6552Lbtm349ttvsX79evzrX/9C//79/fZft24dbr/9djgcDpx44ok4++yzUVVVhfnz5+OXX35pdlx1dXW48847sW7dOsTGxiIvLw9dunTBzp07sXTpUnz66adYvHgx+vTp0+bnztex3vsA8H//93+IiYlBjx49cNZZZ8HtdmPXrl1YsWIFPv30U7z22msYNGiQuv/o0aOxbds2bN++3e9aPwB+nzehjPPss88G4D01+8EHHwz
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Statistical summary of Log Returns:\n",
"count 5772.000000\n",
"mean -0.000006\n",
"std 0.005649\n",
"min -0.056908\n",
"25% -0.002677\n",
"50% 0.000000\n",
"75% 0.002653\n",
"max 0.049518\n",
"Name: log_return, dtype: float64\n",
"Kurtosis of Log Returns: 9.1817\n",
"Skewness of Log Returns: -0.1073\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABc8AAAPdCAYAAABcIJAQAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzsnXeYFEX6x78zu8uSc1AQzzs8AQElmEX9qZxnOETATBYPVORARcKpGBEFVBRQz4AEEUUEBAEVREEEyZKTILrAssACCws7m6Z/f7Q909PTebp7wn4/z7PP7naofqur3qrqt956yycIggBCCCGEEEIIIYQQQgghhITwx1sAQgghhBBCCCGEEEIIISTRoPGcEEIIIYQQQgghhBBCCFFA4zkhhBBCCCGEEEIIIYQQooDGc0IIIYQQQgghhBBCCCFEAY3nhBBCCCGEEEIIIYQQQogCGs8JIYQQQgghhBBCCCGEEAU0nhNCCCGEEEIIIYQQQgghCmg8J4QQQgghhBBCCCGEEEIU0HhOCCGEEEIIIYQQQgghhChIj7cAhBBCCInmhhtuwIEDBzBy5Eh06tQp3uJYonHjxlHHMjMzUbt2bVx88cXo0qULLrnkkjhIlvpkZ2fj448/xk8//YQDBw6goKAANWvWRIsWLXDrrbfi1ltvhc/ni7eYtpH0wgoNGjTAkiVLXJJIm1mzZmHYsGHo2LEjXnnlldDxVatWoXv37rjsssswdepUz+Uyy9ChQzF79uyIY2lpaahatSqaNm2KDh06oEOHDp7Up/379+PGG2/0rCy9LqN+/frhxx9/xLfffouzzjpL9ZqNGzdi1qxZWLt2LXJychAIBFClShU0atQIV111Fe644w7Ur1/fdVljoVu3bli9ejWmTJmCyy+/PN7iJCXSO3z00UfRv39/15/nte6p0bNnT2zatAnffPMN6tSpExcZCCGElG1oPCeEEEKIK7Rt2zb0oXv8+HFs2bIFCxYswMKFCzFs2DD06NHDkedIxvqdO3c6kl6yMm3aNLzyyisoKipC9erV0aZNG1SsWBF//PEHvvvuOyxevBgfffQRJkyYgHr16sVbXFv885//xPHjxyOOnTlzBt98803ofMWKFSPO16hRwzP5UpFzzz0Xbdq0AQAUFhZi9+7dWLFiBVasWIHvvvsOY8eORVpaWpylTF5WrFiBxYsX44EHHlA1nBcUFODpp5/GV199BQCoU6cO2rRpg8qVK+PEiRPYtGkT1qxZg7fffhtjx45Fu3btvM4CAGDcuHEYP368Z0ZdUnZ44okncOedd+L111/HyJEj4y0OIYSQMgiN54QQQghxhT59+kR4FxYUFGDw4MH49ttvMXr0aNx8881Ja8RNNCZPnoyXX34Zfr8fTzzxBHr16oWMjIzQ+T179uDJJ5/E5s2b0aVLF8yaNQtVq1aNo8T2GDJkSNSx/fv3h4zngwcPxjnnnOO1WJa46KKLsGDBAlSoUCHeopiiTZs2EZ7zAPDJJ5/g+eefx7fffovZs2fjzjvvdFWGevXqYcGCBRF1OlUYOXIkMjMz0adPn6hzxcXF6N27N9atW4c6derg+eefx4033hhxTUlJCRYtWoQ33ngD+/fv90psW7z66qsoKChIeA95kli0aNEC119/PWbPno0ePXqgSZMm8RaJEEJIGYMxzwkhhBDiCRUqVMDTTz8NQDQK/fjjj3GWKDX49ddfMXr0aADAsGHD0KdPnygjY6NGjTB58mSce+65yMrKwosvvhgPUQlEPWjUqFFSGxDvv/9+XHbZZQCAhQsXuv68jIwMNGrUCOeee67rz/KSn376Cbt27UK7du1UV0i8/fbbWLduHapWrYrp06dHGc4BID09Hbfccgtmz54dKpNEpX79+mjUqFHSTByRxOHOO++EIAiYPHlyvEUhhBBSBqHxnBBCCEkRDh06hBdffBE33XQTWrRogTZt2uDee+/Fp59+itLSUtV7BEHAzJkz0alTJ1x88cW4/PLL8eCDD2L9+vVYtWoVGjdujG7dujkmY7169VC9enUAQG5uruo1X3/9NXr37o0rrrgCzZs3xzXXXINBgwbh119/jbhu3LhxEfHVGzduHPEjeWFK140bN071eVr5lB8vKCjAm2++iVtuuQUXX3wxbrjhBgBiXOvGjRtj6NChOHPmDF577TX84x//QPPmzXH11VdjyJAhyMnJUX3uihUr8NBDD+Gqq65Cs2bNcOmll+Kmm27CoEGDsGbNGuOX+ScffvghiouLDcuqSpUqGDx4MABg/vz5yMrKAiB6pTdu3BiXXnopCgsLNe/v1KkTGjdujMWLF0ccLykpweeff45u3brhsssuQ/PmzXHDDTfg2WefRXZ2dlQ6Zt6rU8jL58SJExgxYgTatWuH5s2bR7yrFStW4MUXX0SHDh1w+eWXo3nz5rj22msxcOBAbNq0STP9kpISTJo0Ce3bt0eLFi1wxRVXoH///rohhLTq2/79+9G4cWPccMMNEAQBn332GTp16oSWLVuiTZs2eOCBB7BhwwbNdHft2oX+/fvj8ssvx8UXX4z27dtj0qRJCAaDuOGGGyJ0wgmaNWsGAFEx6PPy8vDWW2+hQ4cOaNWqVUiWt99+GwUFBVHpyPXz4MGD+O9//4vrrrsOzZo1w9ChQ6PejRp22j4AmDNnDjp37oyLL74Yl112GXr37o21a9fq5tspvQWAjz/+GADQsWPHqHP5+fmYMmUKADEmesOGDXXTqlSpEi688MKo4z/++CP69u2LK6+8Es2bN0fbtm0xcOBAbN68WTWdbt26oXHjxli1ahW2b9+ORx99NKQTt956KyZOnAhBECLuady4McaPHw8AGD9+fEQ7LJWhMm05Q4cORePGjTFr1ixkZWXhySefxNVXX43mzZujXbt2eOONN1BUVBQlq/w+NeT6r4bVd2OkR1ryFBUV4YMPPkCnTp3QqlWrUP/QuXNnjBo1CidOnFBNzwqx9EUA8P3336Nr165o1aoV2rRpg/vvvz+qrVfDir5PnDgRjRs3xj//+U/k5+dHpTVjxgw0btwY1113HY4dOxZx7rrrrkONGjUwf/58R94XIYQQYgWGbSGEEEJSgE2bNuHf//43Tpw4gfr166Ndu3Y4deoUVq9ejQ0bNmDRokV45513UK5cuYj7nn/+eUyfPh1+vx+XXHIJ6tSpg127dqFr166OxSSXEwwGcebMGQBArVq1Is6VlJRg0KBBWLhwIcqVK4dmzZqhXr162LdvH+bNm4dFixZh3LhxuPbaawEATZs2RceOHUObGioNUMrY13YpLCxEt27dsGfPHlxyySVo0qRJ1Mf7qVOncO+99yI7Oxtt2rTB3//+d/zyyy+YM2cO1qxZgy+//BJVqlQJXT979mwMGzYMgBjG4/LLL0cgEEBOTg4WLFiAGjVq4NJLLzWUTRCE0CZuZjZvvP7661G1alWcPHkSP/zwA7p164ZGjRqhVatW2LBhAxYvXozbbrst6r6dO3di69atqF27Nv7v//4vdDw/Px8PP/wwVq9ejYoVK6J58+aoUaMGdu3ahU8//RRff/01PvroI1Wjnpn36hTHjx9H586dcerUKbRp0wbNmjWL8M6XDP1///vf0bp1a6Snp2Pv3r1YuHAhFi1ahNdffx3//Oc/I9IMBoMYMGAAFi9ejIyMDFx++eWoWrUqNm7ciLvuugudO3e2Le+wYcPw1VdfoU2bNvi///s/bN++HT/99BPWrFmDjz/+GBdffHHE9atXr8a///1vBAIBnHvuubj66qtx4sQJjBkzBhs3brQthx6S8Uvepvz666948MEHkZ2dHYrNnZ6ejs2bN+PNN9/Et99+i6lTp0bogsS+ffvQsWNHZGRkoHXr1hAEwVS8ertt30svvYSpU6fC7/ejTZs2qFu3Lnbu3Ilu3bqha9euqs9ySm8Bsf4vX74cGRkZqvesWrUK+fn58Pl86NChg6k0lYwdOxbvvPMOfD4fWrVqhfr162PPnj1YuHAhvv32W7zwwguaIXeWL1+Ojz76KFSfjhw5gnXr1uHVV19FdnY2nnrqqdC1HTt2xPbt27F
"text/plain": [
"<Figure size 1500x1000 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Analysis focused on price change rate and its dynamics over continuous trading periods.\n",
"Higher volatility periods often provide more opportunities for trend-following strategies.\n"
]
}
],
"source": [
"\n",
"\n",
"# --- 1. Data Loading and Preprocessing (与之前代码相同) ---\n",
"def load_and_preprocess_data(file_path):\n",
" \"\"\"\n",
" Loads historical futures data and performs basic preprocessing.\n",
" Assumes data contains 'datetime', 'open', 'high', 'low', 'close', 'volume' columns.\n",
" \"\"\"\n",
" try:\n",
" df = pd.read_csv(file_path, parse_dates=['datetime'], index_col='datetime')\n",
" # Ensure data is sorted by time\n",
" df = df.sort_index()\n",
" \n",
" # --- NEW: Optional filtering for typical trading hours ---\n",
" # If your data includes non-trading hours (e.g., overnight, weekends in daily data)\n",
" # and you only want to analyze main trading sessions, uncomment and adjust.\n",
" # Example for typical daytime futures trading:\n",
" # df = df[(df.index.hour >= 9) & (df.index.hour < 15)] # Filter 9:00 to 14:59 for example\n",
" # df = df[df.index.dayofweek < 5] # Exclude Saturday (5) and Sunday (6) if using daily data with weekends\n",
"\n",
" initial_rows = len(df)\n",
" df.dropna(inplace=True)\n",
" if len(df) < initial_rows:\n",
" print(f\"Warning: Missing values found in data, deleted {initial_rows - len(df)} rows.\")\n",
" \n",
" # Check if necessary columns exist\n",
" required_columns = ['open', 'high', 'low', 'close', 'volume']\n",
" if not all(col in df.columns for col in required_columns):\n",
" raise ValueError(f\"CSV file is missing required columns. Please ensure it contains: {required_columns}\")\n",
" \n",
" print(f\"Successfully loaded {len(df)} rows of data.\")\n",
" return df\n",
" except FileNotFoundError:\n",
" print(f\"Error: File '{file_path}' not found. Please check the path.\")\n",
" return None\n",
" except Exception as e:\n",
" print(f\"Error during data loading or preprocessing: {e}\")\n",
" return None\n",
"\n",
"# --- 2. Stationary Indicator Calculation Function (与之前代码相同) ---\n",
"def calculate_stationary_indicators(df, volume_window=10, price_lag=5):\n",
" \"\"\"\n",
" Calculates stationary indicators based on volume and price.\n",
" \"\"\"\n",
" df_processed = df.copy() \n",
"\n",
" df_processed['volume_roc'] = df_processed['volume'].pct_change(volume_window) * 100\n",
" df_processed['volume_ma_ratio'] = df_processed['volume'] / df_processed['volume'].rolling(window=volume_window).mean()\n",
" rolling_mean_vol = df_processed['volume'].rolling(window=volume_window).mean()\n",
" rolling_std_vol = df_processed['volume'].rolling(window=volume_window).std()\n",
" df_processed['volume_normalized_zscore'] = (df_processed['volume'] - rolling_mean_vol) / rolling_std_vol.replace(0, np.nan)\n",
"\n",
" df_processed['log_return'] = np.log(df_processed['close'] / df_processed['close'].shift(1))\n",
" df_processed['future_log_return'] = np.log(df_processed['close'].shift(-price_lag) / df_processed['close'])\n",
"\n",
" try:\n",
" macd, macdsignal, macdhist = ta.MACD(df_processed['close'], fastperiod=12, slowperiod=26, signalperiod=9)\n",
" df_processed['macd_hist_diff'] = macdhist.diff(1) \n",
" except Exception as e:\n",
" # print(f\"TA-Lib MACD calculation failed, possibly due to installation or data issues: {e}. 'macd_hist_diff' will contain NaN.\")\n",
" df_processed['macd_hist_diff'] = np.nan \n",
"\n",
" df_processed.dropna(inplace=True)\n",
" if df_processed.empty:\n",
" print(\"Warning: Data is empty after indicator calculation. Check original data volume or adjust window parameters.\")\n",
" else:\n",
" print(f\"Indicators calculated. {len(df_processed)} rows of data remaining for analysis.\")\n",
" return df_processed\n",
"\n",
"# --- 3. 价格变化率分析与可视化函数 (横轴调整为连续索引) ---\n",
"def analyze_price_change_rate_for_trend(processed_df, rolling_vol_window=30):\n",
" if 'log_return' not in processed_df.columns or processed_df['log_return'].isnull().all():\n",
" print(\"Error: 'log_return' column not found or contains only NaN values. Cannot analyze price change rate for trend.\")\n",
" return\n",
"\n",
" print(\"\\n--- Analyzing Price Change Rate (Log Returns) for Trend Characteristics ---\")\n",
"\n",
" # Add a continuous index for plotting, ignoring date gaps\n",
" processed_df['continuous_index'] = range(len(processed_df))\n",
"\n",
" # 1. Distribution of Log Returns (Price Change Rate)\n",
" plt.figure(figsize=(10, 6))\n",
" sns.histplot(processed_df['log_return'], bins=100, kde=True, color='purple', alpha=0.7)\n",
" plt.title('Distribution of Log Returns (Price Change Rate)', fontsize=16)\n",
" plt.xlabel('Log Return', fontsize=12)\n",
" plt.ylabel('Frequency', fontsize=12)\n",
" plt.grid(True, linestyle='--', alpha=0.6)\n",
" plt.show()\n",
"\n",
" print(\"\\nStatistical summary of Log Returns:\")\n",
" print(processed_df['log_return'].describe())\n",
" print(f\"Kurtosis of Log Returns: {processed_df['log_return'].kurtosis():.4f}\")\n",
" print(f\"Skewness of Log Returns: {processed_df['log_return'].skew():.4f}\")\n",
"\n",
" # 2. Time Series of Log Returns and Rolling Volatility\n",
" processed_df['rolling_volatility'] = processed_df['log_return'].rolling(window=rolling_vol_window).std()\n",
"\n",
" fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 10), sharex=True)\n",
"\n",
" # Subplot 1: Log Returns Over Continuous Trading Periods\n",
" # Use 'continuous_index' for the x-axis to remove date gaps\n",
" ax1.plot(processed_df['continuous_index'], processed_df['log_return'], label='Log Returns', color='blue', alpha=0.7, linewidth=0.8)\n",
" ax1.set_title('Log Returns Over Trading Periods (Continuous Index)', fontsize=16)\n",
" ax1.set_ylabel('Log Return', fontsize=12)\n",
" ax1.grid(True, linestyle='--', alpha=0.6)\n",
" ax1.legend()\n",
"\n",
" # Subplot 2: Rolling Volatility Over Continuous Trading Periods\n",
" # Use 'continuous_index' for the x-axis\n",
" ax2.plot(processed_df['continuous_index'], processed_df['rolling_volatility'], label=f'Rolling Volatility ({rolling_vol_window} periods)', color='red', linewidth=1.5)\n",
" ax2.set_title(f'Rolling Volatility ({rolling_vol_window}-period Std Dev of Log Returns) Over Trading Periods', fontsize=16)\n",
" ax2.set_ylabel('Volatility', fontsize=12)\n",
" ax2.set_xlabel('Trading Period Index', fontsize=12) # Changed x-axis label\n",
" ax2.grid(True, linestyle='--', alpha=0.6)\n",
" ax2.legend()\n",
"\n",
" plt.tight_layout()\n",
" plt.show()\n",
"\n",
" print(\"\\nAnalysis focused on price change rate and its dynamics over continuous trading periods.\")\n",
" print(\"Higher volatility periods often provide more opportunities for trend-following strategies.\")\n",
"\n",
"\n",
"# --- Main Execution Flow ---\n",
"if __name__ == \"__main__\":\n",
" df_raw = load_and_preprocess_data(file_path)\n",
"\n",
" if df_raw is not None and not df_raw.empty:\n",
" processed_data = calculate_stationary_indicators(df_raw, volume_window=10, price_lag=5)\n",
" analyze_price_change_rate_for_trend(processed_data, rolling_vol_window=30)\n",
" else:\n",
" print(\"Analysis cannot proceed. Please check if data loading was successful.\")"
2025-07-10 15:07:31 +08:00
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "20c278fde79da68a",
"metadata": {
"ExecuteTime": {
"end_time": "2025-06-21T18:08:35.997351Z",
"start_time": "2025-06-21T18:08:35.773406Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2025-07-10 15:07:31 +08:00
"Indicators calculated. 5772 rows of data remaining for analysis.\n",
"已计算 'pct_change',共 5772 条有效数据。\n",
"\n",
2025-07-10 15:07:31 +08:00
"自相关性计算结果 (前5期):\n",
" Lag Autocorrelation\n",
"0 1 0.359482\n",
"1 2 0.288453\n",
"2 3 0.256691\n",
"3 4 0.289832\n",
"4 5 0.323939\n"
]
},
{
"data": {
2025-07-10 15:07:31 +08:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAABW0AAAScCAYAAADwLq27AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs3Xd8FNX+//H3pkBAIJBAIKIgLYGQ0ItSJQhEijQBkYCADa6oqCigXiFfUOwioGChKagoPRcNClwRMBCaF0IRKQpiKBIIoYSy2d8f+e2aJRvSNsns7uv5ePAgOzN7zpmayWfOfI7JYrFYBAAAAAAAAAAwBK/ibgAAAAAAAAAA4B8EbQEAAAAAAADAQAjaAgAAAAAAAICBELQFAAAAAAAAAAMhaAsAAAAAAAAABkLQFgAAAAAAAAAMhKAtAAAAAAAAABgIQVsAAAAAAAAAMBCCtgAAAAAAAABgIARtAQAwiMjISIWGhtr9Cw8P1913363Ro0dr27Zt+Sp33LhxCg0N1dKlS53cYuc6e/asPvroIw0ePFitW7dWeHi4mjRpou7du+vll19WfHx8lu9YtxPy5++//1Z4eLhCQ0PVp0+f4m4OHNiyZYtCQ0M1ePDgIqvTVa4Zmc2dO9d2Pfj888+Luzl2pk+frtDQUE2fPt2j6gYAAAVD0BYAAINp0qSJevfurd69e6tdu3ZKT0/Xd999p+joaM2dO7e4m1coli9frsjISL377rv65ZdfdMcdd6hz58668847df36dX3zzTcaOnSonn766eJuqltZvny5rl27Jknas2eP9u/fXyj1DB48WKGhodqyZUuhlI/cW7p0qUJDQzVu3LjibopTLV682PbzkiVLirElAAAAzuFT3A0AAAD2+vXrZ9fr8cqVK3rllVe0fPlyvfXWW7r77rtVo0aNXJf37LPP6tFHH1VQUFBhNLfAvvzyS02cOFEmk0mPPvqoRowYoTJlytgtc/DgQU2fPl1//PFHMbXSPVmDW5UrV9bJkye1ePFivfzyy8XcKhQ3o18zbvTLL7/o4MGDKleunK5fv659+/Zpz549ql+/fnE3rdgNGjRIXbt2VYUKFYq7KQAAII/oaQsAgMGVLFlSr7zyikqXLi2z2awffvghT98PCgpSrVq1VLZs2UJqYf4dOnRIr776qqSMV7LHjBmTJWArSbVr19b777+vl156qaib6La2b9+uw4cPy9/fX6+99pokKTY2VlevXi3mlqG4Gfma4Yi1l223bt0UFRVlN83TBQQEqFatWgoICCjupgAAgDwiaAsAgAu45ZZbbL1r//zzT9v0zDldlyxZogEDBqhp06YKDQ21LZdTfsrExESNHTtWkZGRioiIUIsWLXTffffpjTfe0PHjx7Msf/LkSU2ZMkX33nuvGjZsqMaNG6tv375asGCBrl+/nqf1+vTTT3Xt2jXVrVtXDz30UI7LN2/ePNt5q1ev1sCBA9WkSRM1atRIDzzwgNavX+9w2YMHD2ratGl64IEH1LZtW4WHh6tly5YaOnSovv32W4ffyZxb9Nq1a/r444/VrVs3NWjQQC1bttSoUaN06NChbNu3bds2Pfzww2rWrJltmy1fvlzSzXPzpqWlac6cOerfv7+aNWumiIgIdenSRW+++abOnj2bbX05sQa1evToodatW6t69eo6d+5ctg8F/vzzT4WGhioyMjLbMq15ma3HnnWbJSQkSJKGDBlil7P5xmPy0KFDGj9+vDp06KDw8HC1aNFCDz30ULb7xCqvx/CuXbv09NNPq02bNgoPD9ddd92lESNGaNOmTQ7Lz3wOHThwQKNHj1abNm1Ur149W67QzCkgtm3bphEjRujOO+9U3bp17dbTmfvz559/1qRJk9SzZ0+1bNlS4eHhateunUaPHq1du3ZlWT4yMlLjx4+XJC1btsxuX2TOmZvTNWPVqlV66KGH1KJFC4WHh6tDhw4aP368jhw54nD5zMfF5s2bNXz4cDVv3lwNGjRQ7969bedBfly6dMl2fNx///3q27evJOk///mPrly54vA7mVNEXLp0Se+88446deqk8PBwtW7dWmPHjtXJkycdfvf777/XSy+9pO7du6t58+aKiIiwbdfDhw/nut3Tpk1TaGioXnnllWyX2bVrl0JDQ9W2bVu7a+vPP/+sESNGqFWrVqpfv76aN2+uzp07a8yYMdq6datdGTfLafvdd99p6NChatmyperXr6+WLVuqa9euevnllwstVQoAAMg9grYAALiICxcuSJJKlCiRZd6kSZP08ssvy9vbW3fffbcaNmwok8mUY5mffvqp+vXrp+XLl8vX11cdO3ZUkyZNdP36dc2ZMydLDtKtW7eqR48emjdvnq5cuaJWrVqpSZMmOnbsmCZNmqTHH3/cliM1JxaLRevWrZMk9erVK1ftzc60adNs+W7bt2+v6tWra+fOnXr88ccdBiHnzp2rDz74QCkpKQoJCVGnTp1Uo0YNbdmyRc8884ymTJmSbV3Xrl3TY489pg8//FDBwcG6++67VapUKf3www964IEH7ILqVqtWrdLgwYO1ceNGBQcHKzIyUqVKldL48eP19ttvZ1vXyZMn1a9fP73xxhv6448/FBERofbt2+vatWuaPXu2+vbt6zAomZMLFy4oLi5OktS3b1+ZTCZbSg5n5gOtWLGievfurYoVK0qS2rRpY8vX3Lt3b1WrVs227I8//qjevXtr6dKlKlmypDp37qx69epp69ateuaZZ/Tiiy86rCOvx/DXX3+tAQMGKC4uTpUqVVKXLl1UvXp1/fe//9Xw4cM1Y8aMbNdn586d6tu3r3bt2qVmzZqpffv2uuWWW+yWiYuL0+DBg3Xs2DG1atVKrVu3tp2zzt6fEyZM0KJFi+Tl5aUmTZqoQ4cOKlu2rL777jsNHDhQq1evtlu+S5cuatKkiSSpWrVqdvuibdu2OdZnsVg0duxYPfvss9q2bZvq1aunzp07q0SJElq6dKl69+6tn376KdvvL1myREOHDtW5c+fUtm1b1atXT3v37tXYsWM1b968XK93Zt9++60uXrxoG7ixWbNmuuOOO3T+/Hl9//33N/1uamqqHnjgAX311VeqVauW2rVrJ4vFouXLl2vgwIFKTU3N8p3Ro0dr1apVKlmypO688061adNGXl5eWrp0qfr27asdO3bkqt0DBw6Ur6+vYmNjdf78eYfLLFy4UJI0YMAA+fhkZLVbtmyZhg8frh9//FG33XabOnfurGbNmqlMmTL69ttvc1xnqxkzZmj06NHaunWr6tSpo6ioKDVs2FDe3t5avHixNm/enKtyAABAIbIAAABD6NChgyUkJMSyZMmSLPP27dtnqVu3riUkJMSyePFi2/SQkBBLSEiIpUmTJpadO3c6LHfs2LEOy12zZo0lJCTEEhERYVm1alWW7/3222+WgwcP2j6fOnXK0qJFC0toaKhl4cKFFrPZbJuXnJxsGTJkiCUkJMQyffr0XK3v0aNHbe3funVrrr5zI+v3mzVrZvnll1/s5k2bNs0SEhJi6dy5c5bvbdmyxXL06NEs0w8dOmRp166dJSQkxPK///3Pbt7mzZtt9fXq1cty6tQp27y0tDTL8OHDLSEhIZZ///vfdt87ceKEpVGjRpaQkBDL/Pnz7eYlJCTY5oWEhNjNS09PtzzwwAOWkJAQy4svvmhJTU21zbt27Zrl9ddft4SEhFgGDx6cw1bKatGiRZaQkBBLz5497dpZr149S926dS1//vlnlu8cO3bMEhISYunQoUO25VqP4WPHjtlNj46OtoSEhFg2b97s8HunT5+2NG3a1BISEmL58MMPLenp6bZ5u3btsjRv3twSEhJiWbRokd338noM79+/3xIWFmYJDQ21LFu2zG7ZH3/80VK/fn1LSEiIZePGjXbzrOdQSEiI5e2337Y79m9cx5CQEMuCBQuyzM/v/rQed9HR0VnK/OGHHyznzp1zOD0sLMzSokULy+XLl+3mLVmyxBISEmIZO3Zslu/duL43XjO++OILS0hIiKVly5aWvXv32q2b9Xxr1qyZ5cyZM3bfsx4X9evXt6x
"text/plain": [
2025-07-10 15:07:31 +08:00
"<Figure size 1400x1200 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"def analyze_price_change_autocorrelation(df: pd.DataFrame,\n",
" price_col: str = 'close',\n",
" max_lags: int = 50,\n",
" plot_specific_lag: int = 1):\n",
" \"\"\"\n",
" 分析时间序列价格变化的自相关性,并绘制图表。\n",
"\n",
" Args:\n",
" df (pd.DataFrame): 包含行情数据的DataFrame必须有日期索引或排好序。\n",
" price_col (str): 用于计算价格变化的列名,默认为 'close'。\n",
" max_lags (int): 要计算的最大滞后期数,默认为 50。\n",
" plot_specific_lag (int): 要单独绘制散点图的特定滞后期,默认为 1。\n",
" \"\"\"\n",
" # --- 1. 数据准备和计算 ---\n",
" if price_col not in df.columns:\n",
" print(f\"错误: DataFrame中找不到列 '{price_col}'\")\n",
" return\n",
"\n",
" # 创建一个副本以避免修改原始DataFrame\n",
" df_analysis = df.copy()\n",
"\n",
" # 计算价格变化百分比\n",
" df_analysis['pct_change'] = ((df_analysis['high'] - df_analysis['low'])).abs()\n",
" # df_analysis['pct_change'] = df[price_col].pct_change().abs()\n",
"\n",
" # 移除第一个NaN值\n",
" df_analysis = df_analysis.dropna(subset=['pct_change'])\n",
"\n",
" if df_analysis.empty:\n",
" print(\"错误: 计算'pct_change'后DataFrame为空无法进行分析。\")\n",
" return\n",
"\n",
" print(f\"已计算 'pct_change',共 {len(df_analysis)} 条有效数据。\")\n",
"\n",
" # --- 2. 计算自相关性 ---\n",
" lags = range(1, max_lags + 1)\n",
" try:\n",
" autocorrs = [df_analysis['pct_change'].autocorr(lag=n) for n in lags]\n",
" except Exception as e:\n",
" print(f\"计算自相关性时出错: {e}\")\n",
" return\n",
"\n",
" autocorr_df = pd.DataFrame({'Lag': lags, 'Autocorrelation': autocorrs})\n",
" print(\"\\n自相关性计算结果 (前5期):\")\n",
" print(autocorr_df.head())\n",
"\n",
" # --- 3. 可视化 ---\n",
"\n",
" # a) 绘制自相关图 (ACF Plot)\n",
" plt.style.use('seaborn-v0_8-whitegrid') # 使用一个好看的样式\n",
" fig, axes = plt.subplots(2, 1, figsize=(14, 12)) # 创建一个包含两个子图的画布\n",
" fig.suptitle('Price Change Autocorrelation Analysis', fontsize=16)\n",
"\n",
" ax1 = axes[0]\n",
" ax1.stem(autocorr_df['Lag'], autocorr_df['Autocorrelation'])\n",
" ax1.set_title(f'Autocorrelation of Daily Price Changes (Lags 1-{max_lags})')\n",
" ax1.set_xlabel('Lag (Number of Previous K-lines)')\n",
" ax1.set_ylabel('Correlation Coefficient')\n",
" # ax1.axhline(y=0, color='grey', linestyle='--')\n",
"\n",
" # 添加置信区间\n",
" conf_interval = 1.96 / np.sqrt(len(df_analysis))\n",
" # ax1.axhline(y=conf_interval, color='red', linestyle='--', label='95% Confidence Interval')\n",
" # ax1.axhline(y=-conf_interval, color='red', linestyle='--')\n",
" ax1.legend()\n",
" ax1.grid(True)\n",
"\n",
" # b) 绘制特定滞后期的散点图\n",
" ax2 = axes[1]\n",
" if plot_specific_lag is not None and 1 <= plot_specific_lag <= max_lags:\n",
" lag_col_name = f'pct_change_lag{plot_specific_lag}'\n",
" df_analysis[lag_col_name] = df_analysis['pct_change'].shift(plot_specific_lag)\n",
" df_scatter = df_analysis.dropna()\n",
"\n",
" sns.regplot(x=lag_col_name, y='pct_change', data=df_scatter, ax=ax2,\n",
" scatter_kws={'alpha': 0.5, 's': 20},\n",
" line_kws={'color': 'red', 'linestyle': '--'})\n",
" ax2.set_title(f'Current vs. Lag-{plot_specific_lag} Price Change')\n",
" ax2.set_xlabel(f'Previous K-line\\'s pct_change (t-{plot_specific_lag})')\n",
" ax2.set_ylabel('Current K-line\\'s pct_change (t)')\n",
" ax2.grid(True)\n",
" ax2.axhline(0, color='grey', lw=0.5)\n",
" ax2.axvline(0, color='grey', lw=0.5)\n",
" else:\n",
" ax2.text(0.5, 0.5, 'No specific lag plot requested or lag is out of range.',\n",
" ha='center', va='center', transform=ax2.transAxes)\n",
" ax2.set_axis_off()\n",
"\n",
"\n",
" plt.tight_layout() # 调整布局以适应主标题\n",
" plt.show()\n",
"\n",
"\n",
"if df_raw is not None and not df_raw.empty:\n",
" processed_data = calculate_stationary_indicators(df_raw, volume_window=10, price_lag=5)\n",
" analyzed_df = analyze_price_change_autocorrelation(processed_data, plot_specific_lag=50)\n",
"else:\n",
" print(\"Analysis cannot proceed. Please check if data loading was successful.\")\n"
2025-07-10 15:07:31 +08:00
]
}
],
"metadata": {
"kernelspec": {
"display_name": "quant",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}