2025-06-22 23:03:50 +08:00
|
|
|
|
{
|
|
|
|
|
|
"cells": [
|
|
|
|
|
|
{
|
2025-07-10 15:07:31 +08:00
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
"execution_count": 8,
|
|
|
|
|
|
"id": "b93c7ca1",
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-07-15 22:45:51 +08:00
|
|
|
|
"end_time": "2025-07-12T14:59:51.444292Z",
|
|
|
|
|
|
"start_time": "2025-07-12T14:59:51.441812Z"
|
2025-06-22 23:03:50 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
2025-07-10 15:07:31 +08:00
|
|
|
|
"outputs": [],
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"source": [
|
|
|
|
|
|
"import pandas as pd\n",
|
|
|
|
|
|
"import numpy as np\n",
|
|
|
|
|
|
"import matplotlib.pyplot as plt\n",
|
|
|
|
|
|
"import seaborn as sns\n",
|
|
|
|
|
|
"import talib as ta # Make sure TA-Lib is installed: pip install TA-Lib\n",
|
|
|
|
|
|
"import statsmodels.api as sm\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"import warnings\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# 忽略所有警告\n",
|
|
|
|
|
|
"warnings.filterwarnings(\"ignore\")\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# --- 0. Configure your file path ---\n",
|
|
|
|
|
|
"# Please replace 'your_futures_data.csv' with the actual path to your CSV file\n",
|
2025-07-15 22:45:51 +08:00
|
|
|
|
"file_path = '/mnt/d/PyProject/NewQuant/data/data/KQ_m@DCE_jm/KQ_m@DCE_jm_min60.csv'\n"
|
2025-07-10 15:07:31 +08:00
|
|
|
|
]
|
2025-06-22 23:03:50 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
2025-07-10 15:07:31 +08:00
|
|
|
|
"execution_count": 9,
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"id": "60a48bac",
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-07-15 22:45:51 +08:00
|
|
|
|
"end_time": "2025-07-12T14:59:51.476119Z",
|
|
|
|
|
|
"start_time": "2025-07-12T14:59:51.467752Z"
|
2025-06-22 23:03:50 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
2025-07-10 15:07:31 +08:00
|
|
|
|
"outputs": [],
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"source": [
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# --- 1. Data Loading and Preprocessing ---\n",
|
|
|
|
|
|
"def load_and_preprocess_data(file_path):\n",
|
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
|
" Loads historical futures data and performs basic preprocessing.\n",
|
|
|
|
|
|
" Assumes data contains 'datetime', 'open', 'high', 'low', 'close', 'volume' columns.\n",
|
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
|
" try:\n",
|
|
|
|
|
|
" df = pd.read_csv(file_path, parse_dates=['datetime'], index_col='datetime')\n",
|
|
|
|
|
|
" # Ensure data is sorted by time\n",
|
|
|
|
|
|
" df = df.sort_index()\n",
|
|
|
|
|
|
" # Check and handle missing values\n",
|
|
|
|
|
|
" initial_rows = len(df)\n",
|
|
|
|
|
|
" df.dropna(inplace=True)\n",
|
|
|
|
|
|
" if len(df) < initial_rows:\n",
|
|
|
|
|
|
" print(f\"Warning: Missing values found in data, deleted {initial_rows - len(df)} rows.\")\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # Check if necessary columns exist\n",
|
|
|
|
|
|
" required_columns = ['open', 'high', 'low', 'close', 'volume']\n",
|
|
|
|
|
|
" if not all(col in df.columns for col in required_columns):\n",
|
|
|
|
|
|
" raise ValueError(f\"CSV file is missing required columns. Please ensure it contains: {required_columns}\")\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" print(f\"Successfully loaded {len(df)} rows of data.\")\n",
|
|
|
|
|
|
" print(\"First 5 rows of data:\")\n",
|
|
|
|
|
|
" print(df.head())\n",
|
|
|
|
|
|
" return df\n",
|
|
|
|
|
|
" except FileNotFoundError:\n",
|
|
|
|
|
|
" print(f\"Error: File '{file_path}' not found. Please check the path.\")\n",
|
|
|
|
|
|
" return None\n",
|
|
|
|
|
|
" except Exception as e:\n",
|
|
|
|
|
|
" print(f\"Error during data loading or preprocessing: {e}\")\n",
|
|
|
|
|
|
" return None\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# --- 2. Stationary Indicator Calculation Function ---\n",
|
|
|
|
|
|
"def calculate_stationary_indicators(df, volume_window=10, price_lag=5):\n",
|
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
|
" Calculates stationary indicators based on volume and price.\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" Parameters:\n",
|
|
|
|
|
|
" df (pd.DataFrame): K-line data containing 'close' and 'volume' columns.\n",
|
|
|
|
|
|
" volume_window (int): Window size for calculating volume indicators (e.g., 10 for the past 10 periods' average volume).\n",
|
|
|
|
|
|
" price_lag (int): Lag period for calculating future returns (e.g., 5 for future 5 periods' returns).\n",
|
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
|
" df_processed = df.copy()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # --- Stationary Volume Indicators ---\n",
|
|
|
|
|
|
" # 1. Volume Rate of Change (VROC)\n",
|
|
|
|
|
|
" df_processed['volume_roc'] = df_processed['volume'].pct_change(volume_window) * 100\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 2. Volume to Moving Average Ratio\n",
|
|
|
|
|
|
" df_processed['volume_ma_ratio'] = df_processed['volume'] / df_processed['volume'].rolling(window=volume_window).mean()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 3. Normalized Volume (Z-score standardization)\n",
|
|
|
|
|
|
" # Using rolling mean and rolling standard deviation to avoid look-ahead bias and ensure local stationarity\n",
|
|
|
|
|
|
" rolling_mean_vol = df_processed['volume'].rolling(window=volume_window).mean()\n",
|
|
|
|
|
|
" rolling_std_vol = df_processed['volume'].rolling(window=volume_window).std()\n",
|
|
|
|
|
|
" # Avoid division by zero\n",
|
|
|
|
|
|
" df_processed['volume_normalized_zscore'] = (df_processed['volume'] - rolling_mean_vol) / rolling_std_vol.replace(0, np.nan)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # --- Stationary Price Indicators ---\n",
|
|
|
|
|
|
" # 1. Current Period Log Return\n",
|
|
|
|
|
|
" df_processed['log_return'] = np.log(df_processed['close'] / df_processed['close'].shift(1))\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 2. Future N-period Log Return (Our target variable for research)\n",
|
|
|
|
|
|
" # shift(-price_lag) moves future data up to align with the current row for future return calculation\n",
|
|
|
|
|
|
" df_processed['future_log_return'] = np.log(df_processed['close'].shift(-price_lag) / df_processed['close'])\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 3. MACD Histogram Difference (Measures momentum change rate, potentially capturing trend initiation)\n",
|
|
|
|
|
|
" try:\n",
|
|
|
|
|
|
" macd, macdsignal, macdhist = ta.MACD(df_processed['close'], fastperiod=12, slowperiod=26, signalperiod=9)\n",
|
|
|
|
|
|
" df_processed['macd_hist_diff'] = macdhist.diff(1)\n",
|
|
|
|
|
|
" except Exception as e:\n",
|
|
|
|
|
|
" print(f\"TA-Lib MACD calculation failed, possibly due to installation or data issues: {e}. 'macd_hist_diff' will contain NaN.\")\n",
|
|
|
|
|
|
" df_processed['macd_hist_diff'] = np.nan\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # Drop rows with NaN values resulting from rolling windows and shift operations\n",
|
|
|
|
|
|
" df_processed.dropna(inplace=True)\n",
|
|
|
|
|
|
" if df_processed.empty:\n",
|
|
|
|
|
|
" print(\"Warning: Data is empty after indicator calculation. Check original data volume or adjust window parameters.\")\n",
|
|
|
|
|
|
" else:\n",
|
|
|
|
|
|
" print(f\"Indicators calculated. {len(df_processed)} rows of data remaining for analysis.\")\n",
|
|
|
|
|
|
" return df_processed\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# --- 3. Analysis and Visualization Function ---\n",
|
|
|
|
|
|
"def analyze_and_visualize(processed_df):\n",
|
|
|
|
|
|
" if processed_df.empty:\n",
|
|
|
|
|
|
" print(\"No data available for analysis. Please check data loading and indicator calculation steps.\")\n",
|
|
|
|
|
|
" return\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" print(\"\\n--- Statistical Description of Indicators ---\")\n",
|
|
|
|
|
|
" print(processed_df[['volume_roc', 'volume_ma_ratio', 'volume_normalized_zscore',\n",
|
|
|
|
|
|
" 'log_return', 'future_log_return']].describe())\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # --- Correlation Analysis ---\n",
|
|
|
|
|
|
" print(\"\\n--- Correlation between Volume Indicators and Future Returns ---\")\n",
|
|
|
|
|
|
" volume_indicators = ['volume_roc', 'volume_ma_ratio', 'volume_normalized_zscore']\n",
|
|
|
|
|
|
" for indicator in volume_indicators:\n",
|
|
|
|
|
|
" if indicator in processed_df.columns and 'future_log_return' in processed_df.columns:\n",
|
|
|
|
|
|
" correlation = processed_df[indicator].corr(processed_df['future_log_return'])\n",
|
|
|
|
|
|
" print(f\"Correlation between '{indicator}' and 'future_log_return': {correlation:.4f}\")\n",
|
|
|
|
|
|
" else:\n",
|
|
|
|
|
|
" print(f\"Column '{indicator}' or 'future_log_return' does not exist. Skipping correlation calculation.\")\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # Plot correlation heatmap\n",
|
|
|
|
|
|
" plt.figure(figsize=(9, 7))\n",
|
|
|
|
|
|
" sns.heatmap(processed_df[volume_indicators + ['future_log_return']].corr(), annot=True, cmap='coolwarm', fmt=\".2f\")\n",
|
|
|
|
|
|
" plt.title('Correlation Matrix: Volume Indicators vs. Future Log Returns', fontsize=16)\n",
|
|
|
|
|
|
" plt.show()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # --- Conditional Analysis: Future Returns based on Volume Anomaly ---\n",
|
|
|
|
|
|
" # Define thresholds for abnormal volume (using quantiles of Z-score to adapt dynamically)\n",
|
|
|
|
|
|
" if 'volume_normalized_zscore' in processed_df.columns:\n",
|
|
|
|
|
|
" low_vol_threshold = processed_df['volume_normalized_zscore'].quantile(0.2)\n",
|
|
|
|
|
|
" high_vol_threshold = processed_df['volume_normalized_zscore'].quantile(0.8)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" def categorize_volume(zscore):\n",
|
|
|
|
|
|
" if zscore <= low_vol_threshold:\n",
|
|
|
|
|
|
" return 'Low Volume'\n",
|
|
|
|
|
|
" elif zscore >= high_vol_threshold:\n",
|
|
|
|
|
|
" return 'High Volume'\n",
|
|
|
|
|
|
" else:\n",
|
|
|
|
|
|
" return 'Normal Volume'\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" processed_df['volume_category'] = processed_df['volume_normalized_zscore'].apply(categorize_volume)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" print(\"\\n--- Statistics of Future Log Returns by Volume Category ---\")\n",
|
|
|
|
|
|
" print(processed_df.groupby('volume_category')['future_log_return'].describe())\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # Plot box plot of future returns by volume category\n",
|
|
|
|
|
|
" plt.figure(figsize=(10, 6))\n",
|
|
|
|
|
|
" sns.boxplot(x='volume_category', y='future_log_return', data=processed_df, order=['Low Volume', 'Normal Volume', 'High Volume'], palette='viridis')\n",
|
|
|
|
|
|
" plt.title('Distribution of Future Log Returns by Volume Category', fontsize=16)\n",
|
|
|
|
|
|
" plt.xlabel('Volume Category', fontsize=12)\n",
|
|
|
|
|
|
" plt.ylabel('Future Log Return', fontsize=12)\n",
|
|
|
|
|
|
" plt.grid(True, linestyle='--', alpha=0.7)\n",
|
|
|
|
|
|
" plt.show()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # Plot histogram of future returns, categorized by volume\n",
|
|
|
|
|
|
" plt.figure(figsize=(12, 7))\n",
|
|
|
|
|
|
" sns.histplot(data=processed_df, x='future_log_return', hue='volume_category', kde=True, bins=70,\n",
|
|
|
|
|
|
" palette={'Low Volume': 'red', 'Normal Volume': 'blue', 'High Volume': 'green'},\n",
|
|
|
|
|
|
" alpha=0.6, line_kws={'linewidth':2})\n",
|
|
|
|
|
|
" plt.title('Distribution of Future Log Returns by Volume Category', fontsize=16)\n",
|
|
|
|
|
|
" plt.xlabel('Future Log Return', fontsize=12)\n",
|
|
|
|
|
|
" plt.ylabel('Frequency', fontsize=12)\n",
|
|
|
|
|
|
" plt.grid(True, linestyle='--', alpha=0.7)\n",
|
|
|
|
|
|
" plt.show()\n",
|
|
|
|
|
|
" else:\n",
|
|
|
|
|
|
" print(\"Column 'volume_normalized_zscore' not found. Skipping volume category analysis.\")\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # --- Price Chart with Indicator Overlay (Simplified to line plot; consider mplfinance for OHLC charts) ---\n",
|
|
|
|
|
|
" print(\"\\n--- Price Chart with Volume Indicator Overlay ---\")\n",
|
|
|
|
|
|
" # Select a segment of data for visualization, ensuring sufficient data points\n",
|
|
|
|
|
|
" if len(processed_df) > 100: # Need at least 100 data points to select a segment\n",
|
|
|
|
|
|
" sample_size = min(200, len(processed_df) // 2) # Show max 200 data points or half of data\n",
|
|
|
|
|
|
" plot_df = processed_df.sample(n=sample_size, random_state=42).sort_index() # Randomly sample and sort to maintain time continuity\n",
|
|
|
|
|
|
" else:\n",
|
|
|
|
|
|
" plot_df = processed_df.copy() # If data volume is small, plot all\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" if not plot_df.empty:\n",
|
|
|
|
|
|
" fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(15, 12), sharex=True, gridspec_kw={'height_ratios': [3, 1, 1]})\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # Subplot 1: Price Trend\n",
|
|
|
|
|
|
" ax1.plot(plot_df.index, plot_df['close'], label='Close Price', color='blue', linewidth=1.5)\n",
|
|
|
|
|
|
" ax1.set_title(f'Futures Price Trend, Normalized Volume, and Future Returns (Sample Period: {plot_df.index.min().strftime(\"%Y-%m-%d %H:%M\")} to {plot_df.index.max().strftime(\"%Y-%m-%d %H:%M\")})', fontsize=16)\n",
|
|
|
|
|
|
" ax1.set_ylabel('Price', fontsize=12)\n",
|
|
|
|
|
|
" ax1.grid(True, linestyle='--', alpha=0.6)\n",
|
|
|
|
|
|
" ax1.legend()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # Subplot 2: Normalized Volume Indicator\n",
|
|
|
|
|
|
" ax2.bar(plot_df.index, plot_df['volume_normalized_zscore'], color='grey', alpha=0.7, label='Normalized Volume (Z-score)')\n",
|
|
|
|
|
|
" if 'volume_normalized_zscore' in processed_df.columns:\n",
|
|
|
|
|
|
" ax2.axhline(high_vol_threshold, color='green', linestyle='--', linewidth=0.8, label=f'High Vol Threshold ({high_vol_threshold:.2f})')\n",
|
|
|
|
|
|
" ax2.axhline(low_vol_threshold, color='red', linestyle='--', linewidth=0.8, label=f'Low Vol Threshold ({low_vol_threshold:.2f})')\n",
|
|
|
|
|
|
" ax2.set_ylabel('Normalized Volume', fontsize=12)\n",
|
|
|
|
|
|
" ax2.grid(True, linestyle='--', alpha=0.6)\n",
|
|
|
|
|
|
" ax2.legend()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # Subplot 3: Future Log Return\n",
|
|
|
|
|
|
" ax3.plot(plot_df.index, plot_df['future_log_return'], label='Future Log Return', color='purple', linewidth=1.5)\n",
|
|
|
|
|
|
" ax3.axhline(0, color='black', linestyle='--', linewidth=0.8) # Zero return line\n",
|
|
|
|
|
|
" ax3.set_ylabel('Future Log Return', fontsize=12)\n",
|
|
|
|
|
|
" ax3.set_xlabel('Time', fontsize=12)\n",
|
|
|
|
|
|
" ax3.grid(True, linestyle='--', alpha=0.6)\n",
|
|
|
|
|
|
" ax3.legend()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" plt.tight_layout()\n",
|
|
|
|
|
|
" plt.show()\n",
|
|
|
|
|
|
" else:\n",
|
|
|
|
|
|
" print(\"Selected plot time range has no data. Adjust time range or check data volume.\")\n",
|
|
|
|
|
|
"\n"
|
2025-07-10 15:07:31 +08:00
|
|
|
|
]
|
2025-06-22 23:03:50 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
2025-07-10 15:07:31 +08:00
|
|
|
|
"execution_count": 10,
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"id": "9ab3d054",
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-07-15 22:45:51 +08:00
|
|
|
|
"end_time": "2025-07-12T14:59:52.109768Z",
|
|
|
|
|
|
"start_time": "2025-07-12T14:59:51.487174Z"
|
2025-06-22 23:03:50 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
|
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
2025-07-15 22:45:51 +08:00
|
|
|
|
"Successfully loaded 7601 rows of data.\n",
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"First 5 rows of data:\n",
|
2025-07-15 22:45:51 +08:00
|
|
|
|
" open high low close volume open_oi \\\n",
|
|
|
|
|
|
"datetime \n",
|
|
|
|
|
|
"2020-12-31 14:00:00 1637.5 1641.0 1617.0 1629.0 38945.0 105378.0 \n",
|
|
|
|
|
|
"2021-01-04 09:00:00 1622.5 1655.0 1622.0 1655.0 50393.0 103492.0 \n",
|
|
|
|
|
|
"2021-01-04 10:00:00 1655.0 1671.5 1651.5 1667.5 22096.0 102361.0 \n",
|
|
|
|
|
|
"2021-01-04 11:00:00 1667.5 1673.0 1665.5 1672.0 7241.0 104808.0 \n",
|
|
|
|
|
|
"2021-01-04 13:00:00 1671.5 1684.5 1670.5 1683.5 12756.0 105361.0 \n",
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"\n",
|
2025-07-10 15:07:31 +08:00
|
|
|
|
" close_oi underlying_symbol \n",
|
|
|
|
|
|
"datetime \n",
|
2025-07-15 22:45:51 +08:00
|
|
|
|
"2020-12-31 14:00:00 103492.0 DCE.jm2105 \n",
|
|
|
|
|
|
"2021-01-04 09:00:00 102361.0 DCE.jm2105 \n",
|
|
|
|
|
|
"2021-01-04 10:00:00 104808.0 DCE.jm2105 \n",
|
|
|
|
|
|
"2021-01-04 11:00:00 105361.0 DCE.jm2105 \n",
|
|
|
|
|
|
"2021-01-04 13:00:00 107994.0 DCE.jm2105 \n",
|
|
|
|
|
|
"Indicators calculated. 7562 rows of data remaining for analysis.\n",
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"\n",
|
|
|
|
|
|
"--- Statistical Description of Indicators ---\n",
|
|
|
|
|
|
" volume_roc volume_ma_ratio volume_normalized_zscore log_return \\\n",
|
2025-07-15 22:45:51 +08:00
|
|
|
|
"count 7562.000000 7562.000000 7562.000000 7562.000000 \n",
|
|
|
|
|
|
"mean inf 0.990829 -0.018057 -0.000090 \n",
|
|
|
|
|
|
"std NaN 0.548813 0.911218 0.010415 \n",
|
|
|
|
|
|
"min -100.000000 0.000000 -2.159083 -0.209683 \n",
|
|
|
|
|
|
"25% -53.025281 0.577703 -0.724892 -0.004721 \n",
|
|
|
|
|
|
"50% -1.585616 0.876829 -0.219656 0.000000 \n",
|
|
|
|
|
|
"75% 92.828648 1.290911 0.554619 0.004706 \n",
|
|
|
|
|
|
"max inf 8.934612 2.837810 0.083932 \n",
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"\n",
|
|
|
|
|
|
" future_log_return \n",
|
2025-07-15 22:45:51 +08:00
|
|
|
|
"count 7562.000000 \n",
|
|
|
|
|
|
"mean -0.000439 \n",
|
|
|
|
|
|
"std 0.023079 \n",
|
|
|
|
|
|
"min -0.216902 \n",
|
|
|
|
|
|
"25% -0.012910 \n",
|
|
|
|
|
|
"50% -0.000594 \n",
|
|
|
|
|
|
"75% 0.012445 \n",
|
|
|
|
|
|
"max 0.119191 \n",
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"\n",
|
|
|
|
|
|
"--- Correlation between Volume Indicators and Future Returns ---\n",
|
|
|
|
|
|
"Correlation between 'volume_roc' and 'future_log_return': nan\n",
|
2025-07-15 22:45:51 +08:00
|
|
|
|
"Correlation between 'volume_ma_ratio' and 'future_log_return': -0.0011\n",
|
|
|
|
|
|
"Correlation between 'volume_normalized_zscore' and 'future_log_return': 0.0032\n"
|
2025-06-22 23:03:50 +08:00
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"data": {
|
2025-07-15 22:45:51 +08:00
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1oAAAMHCAYAAAAgsia3AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQAAt85JREFUeJzs3XmcTfUfx/H3nZ3BrIx9b8YyY2esyRASCinZshUi+aWEVGRNZItKllBEWSIkQiEmy9gHhWGGwZjFNvvM/f0xzeWaGQb3ju31fDzugznne875fs8999z7PZ/vYjAajUYBAAAAACzG5kFnAAAAAAAeN1S0AAAAAMDCqGgBAAAAgIVR0QIAAAAAC6OiBQAAAAAWRkULAAAAACyMihYAAAAAWBgVLQAAAACwMCpaAAAAAGBhVLRgNdu3b9fQoUPVrFkzVatWTb6+vqpfv766d++ub7/9VlFRUQ86i/dt+vTp8vHx0fTp03PsmAEBAfLx8VFYWFiOHfNudenSRT4+PvLx8VHfvn1vm3bdunWmtD4+Pjp//nwO5TJ70vP1oHTq1Ek+Pj6aOHFittKPHj1aPj4+ev311+/5mI/CNWYNYWFhpvc7p8qe1T1k+fLl8vHx0ZAhQ3IkH3gwbr73ZfXauHHjg87mQyf93AQGBj7orNyVm+8xN78qV66spk2batiwYTp27NiDziYsiIoWLC4qKkrdu3dXjx49tHz5ciUlJcnf31/NmjVT6dKlFRQUpHHjxqlx48bav3//g87uQ2XIkCHy8fHR8uXLH3RWLObPP//UpUuXslz/008/WeW4D7qCZCkvvfSSJGnlypVKSUm5bdrExEStXr3abDvgXgQGBsrHx0ddunR50Fl5ItSvX19t2rTJ9FWoUKH72jeV9odTs2bNTO9xjRo1FB0drWXLlqlt27b67bffLHKM9IpdQECARfaHu2f3oDOAx8vVq1fVsWNHnTp1SqVLl9aoUaNUo0YNszSJiYlasWKFpk+froiIiAeU00fXt99+q6SkJHl5eT3orNyRr6+vDh06pJUrV6pXr14Z1oeHh+uvv/6Sn5+fDh48+AByeGdr1659oMdv3ry5Ro8erYiICP35559q1KhRlml///13xcTEyN3dnS/WR9yzzz6rypUrK2/evA86K8gBb7zxhvz9/R90NpCDBg8erKJFi5r+jo6OVt++fRUUFKSPPvpITz/9tJycnB5gDmEJRLRgUaNGjdKpU6dUpEgRLV68OEMlS5IcHBz0yiuvaOXKlSpduvQDyOWjrXjx4ipTpozs7e0fdFbuqHXr1rK3t88yQrd8+XKlpqaqXbt2OZyz7CtTpozKlCnzwI6fK1cuPf/885J0x0hn+vr0845HV968eVWmTBkVKFDgQWcFQA5wc3PT4MGDJaVVuoKCgh5wjmAJVLRgMaGhofrll18kSUOHDpWrq+tt03t6emZa0VqzZo1ee+011apVS76+vmrUqJGGDh2qU6dOZbqfm/uTbNy4UV27dlWtWrXM2m/f3Ixs2bJleuWVV1S9evUMfTEuXLigcePG6bnnnlPlypVVtWpVtWvXTt99952Sk5OzfS6SkpL0888/a9CgQWrevLmqVaumSpUqqVmzZho9erQuXLhglj49vL9ixQrT+bu5/fbN/Tdu138mLi5Os2bNUps2bVS1alVVrlxZzz//vCZPnqzLly9nSH9zswKj0aglS5aobdu2qlKliqpXr64ePXrc183e1dVVAQEBOnHiRIb9GI1GrVixQk5OTmrZsmWW+zh79qxmzZqlrl276plnnpGvr69q1KihV199VT/88INSU1PN0qf3eUl3a1v49PN2c3OamJgYjRkzRk2aNJGvr69Zc6nMmiDOnTtXPj4+atasma5du5Yhz0uXLpWPj48aNmxokb6I6c0AN2/enOX+Lly4oO3bt5ull6Tk5GQtXrxYHTp0UPXq1eXn56emTZtmeh3eyZ36bmXV9PXm5SdPntTAgQNVp04dValSRe3atTPrg7J//3716dNHtWvXVqVKlfTKK69ox44dWeYpPj5ec+fO1csvv6waNWrIz89PzZo104QJExQdHX1X5bud9H6HgYGBCg4OVv/+/eXv7y9fX1+1aNFCc+fOldFozDKP06dPV9OmTU19Vd9//32dO3cuy+PdqbnXhQsX9Omnn6pVq1aqWrWqqlSpombNmmnIkCHau3evWdoDBw5owoQJeumll1SvXj35+vqqbt266tOnj/76669My9q1a1dJ0t9//232+bk1Unov11d27sdXr17V5MmT1apVK1WpUsV03jp06KCpU6cqKSkpy3OX7sSJE/Lx8VHNmjWVkJCQZbq2bdtm6At18eJFjR49Ws2aNZOfn58qV66shg0b6rXXXtOcOXPueGxruPkazExm/f0CAgI0dOhQSdKKFSvM3sub73OW+GwfP35cAwcOVP369VW+fHmzfCQnJ+vHH39Uly5dTN/tAQEB+vjjjxUeHn7P5+RunDhxQkOHDlWjRo3k6+urWrVq6bXXXrttq4Xk5GTNnTtXLVu2lJ+fn+rUqaMBAwbo33//tUqTzJu/ayIjIzPNT3bP45AhQ9S4cWNJad+jt34X3pzudl0Wsipndr5Db74mo6KiNHLkSDVs2FC+vr5q2LChRo0apStXrmR63HXr1qlbt27y9/dXxYoV5e/vrxYtWmj48OE6evRoNs7mw4Gmg7CYzZs3KyUlRfny5bunZktGo1FDhgzRypUrZWdnpxo1asjDw0OHDx/W8uXLtW7dOk2bNk1PP/10ptvPmzdP3333nXx9fdWgQQNdvHhRtra2ZmlGjRqlRYsWqWrVqnrmmWcUGhoqg8EgSdq1a5f69euny5cvq0iRIqpbt64SExN18OBBjRo1Sps3b9ZXX32VrUhBZGSkBg8ebHoq7ePjo7i4OAUHB2vhwoVas2aNfvjhB5UoUUKSlDt3brVp00Z79uzRmTNnVK1aNdM6SSpfvvwdjxkTE6Nu3bopODhYefLkUe3atWVvb6+///5bX331lX755RfNnz/frKnCzYYOHapffvlF1atX1zPPPKPg4GBt375du3bt0nfffafKlSvfMQ+ZadeundavX69ly5apatWqpuU7d+5UaGioWrVqddvmUT///LOmTp2qokWLqmTJkqpWrZoiIiIUFBSkvXv3avv27Zo2bZrpfSxfvrzatGljqrS2adPGbH+5c+c2+zs6Olrt2rXT1atXVb16dVWsWPGO73GPHj20a9cubdq0SR999JE+//xz07qjR49q9OjRsrOz0+TJk+Xu7m5aFxYWZvri+/3337N8L25VqVIleXt76/jx41q1apW6deuWIc2KFSuUkpKiypUr66mnnpKU1ky3d+/e+uuvv+To6Ch/f3/lyZNHQUFBWrhwoX755RfNmTNHFStWzFY+7teRI0c0atQoeXl5qU6dOjp37pyCgoLUv39/TZkyRXZ2dho4cKCeeuop1alTRydPntS+ffvUq1cvzZ8/P0OE/MKFC+rVq5eOHz8uV1dX+fn5ydnZWUeOHNGcOXP066+/auHChSpSpIjFyrBt2zbNmzdPxYsXV7169RQREaE9e/bo008/VXh4uD744AOz9HFxcerWrZv27dun3Llzq379+nJ0dNS2bdu0ZcsWPfPMM3edhx07dmjAgAG6cuWKPDw8VKdOHdnb2+vs2bOmh13VqlUzpf/8888VGBiosmXLqmLFisqVK5dCQ0O1efNmbd68WcOGDdNrr71mSt+gQQM5ODho27Zt8vT0VIMGDUzr3NzcTP+/3+srq/txXFycOnbsqOPHj8vd3V21a9dW7ty5FRERoVOnTmnmzJnq3r37HT+nZcqUUdWqVRUUFKSNGzeaIsM3O3bsmA4fPixPT0/TexEREaF27drp4sWLKly4sBo0aCBHR0ddvHhRR48e1eHDh9WzZ887v1EPgWbNmmnfvn3au3evihcvrurVq5vWWbJFSVBQkD7++GPlz59fNWrUUHx8vJydnSVJ165dU9++ffX3338rd+7c8vX1lZubm44fP64ffvhBv/76q+bNm6cKFSpYLD+
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"text/plain": [
|
|
|
|
|
|
"<Figure size 900x700 with 2 Axes>"
|
2025-07-10 15:07:31 +08:00
|
|
|
|
]
|
2025-06-22 23:03:50 +08:00
|
|
|
|
},
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"--- Statistics of Future Log Returns by Volume Category ---\n",
|
|
|
|
|
|
" count mean std min 25% 50% \\\n",
|
|
|
|
|
|
"volume_category \n",
|
2025-07-15 22:45:51 +08:00
|
|
|
|
"High Volume 1513.0 0.000063 0.021089 -0.215961 -0.011448 0.000000 \n",
|
|
|
|
|
|
"Low Volume 1513.0 -0.000316 0.024593 -0.216902 -0.013264 -0.000571 \n",
|
|
|
|
|
|
"Normal Volume 4536.0 -0.000648 0.023194 -0.210238 -0.013161 -0.000731 \n",
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"\n",
|
|
|
|
|
|
" 75% max \n",
|
|
|
|
|
|
"volume_category \n",
|
2025-07-15 22:45:51 +08:00
|
|
|
|
"High Volume 0.011198 0.119191 \n",
|
|
|
|
|
|
"Low Volume 0.013302 0.101466 \n",
|
|
|
|
|
|
"Normal Volume 0.012605 0.103522 \n"
|
2025-06-22 23:03:50 +08:00
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"data": {
|
2025-07-15 22:45:51 +08:00
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA2EAAAIkCAYAAACX7iNUAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQAAsORJREFUeJzs3XlcVFX/B/DPzLAIKgIiCiIWGIuyibuiiZq5VE9qWu5mLpWllhtamaZPUlo/KDPLfcklt9QntUeTzAVMWVxREzKVRUBWA1lm7u8PXvc+DAw4DAPDDJ/369Ur59wzw/dwZw7zvfcsMkEQBBAREREREVGdkBs6ACIiIiIiooaESRgREREREVEdYhJGRERERERUh5iEERERERER1SEmYURERERERHWISRgREREREVEdYhJGRERERERUh5iEERERERER1SEmYURERERERHWISRjVS/369YOnp6f0n5eXFzp27Ig+ffpg/Pjx+Oyzz3D58uUqX2P8+PHw9PTE+fPn6yjqqoltun//vlp5fYsTAEJCQuDp6Yn9+/cbOpRacfLkSYwZMwaBgYHSe0yb33/596Wm/zZv3lz7DTAy4u/NGN9Pms6xn58f+vXrh/feew8XL140dIgm5euvv4anpye+/vprg8UwduxYeHp6YtWqVVrVX758OTw9PTF16lSdf2Zlfx8akuLiYuzbtw9vv/02+vbtCz8/P/j7+6N///6YOXMmDh06hKKiIkOHSaQ3ZoYOgKgqgYGBaNu2LQDg8ePHyMrKQnx8PP744w9s3LgRXbt2xaeffoo2bdrUWgz9+vVDUlISfv31V7i4uNTaz6kr+/fvx8KFCzFs2DCEhoYaOpw6Fx8fj5kzZ0KlUqF79+5o0aIFZDIZHBwctH6Nsu/L8tq1a1ej+M6fP48JEyaga9eu2LZtW41ei/QnKCgILVq0AABkZWXh6tWrOHLkCI4ePYqFCxdi4sSJevk5np6eAICbN2/q5fWo+l555RVcvHgRP/30E9577z0oFIpK6xYVFeHw4cPS80g3165dw8yZM3H//n3IZDJ4eXnBz88PMpkMSUlJOHHiBH755ReEhYXh559/hpWVVY1+Hj9nVB8wCaN6beTIkRg+fLhamSAI+P333/Hpp5/ijz/+wGuvvYZdu3ZVSMQ+++wzFBQUwNnZuS5DrtTmzZtRXFyMli1bGjqUJ3r//fcxdepUODo6GjoUvTtx4gSKi4vx5ptv4r333tPpNTS9L8m0TZs2Dd26dZMeFxQUYP78+fjvf/+LlStXYtCgQUbx2aYnGzRoEJYvX4709HT8/vvvCA4OrrTur7/+iuzsbNjb26Nfv351GKXpuHbtGsaOHYuCggIEBwfjgw8+qPD3PDMzE5s3b8bGjRtRXFxc4ySMqD7gcEQyOjKZDM8++yz27NmDp556ChkZGfjwww8r1HN2doa7u3u96axdXV3h7u4Oc3NzQ4fyRI6OjnB3d0fTpk0NHYreJScnA0Cld7KItGFlZSX1O8XFxTh9+rSBIyJ9sbKywtChQwHgiUNoxeMvvfSSUfTt9U1xcTFmzZqFgoICDBgwAGvWrNE4ssXe3h7vv/8+duzYAQsLCwNESqR/TMLIaNnY2GDRokUAgKioKFy9elXteGVzrYqKirB+/XoMHz4cHTt2hI+PD3r16oURI0bg888/R3Z2NoDSP66enp5ISkoCAPTv319tXoj4uufPn4enpyfGjx+PgoIChIeHY/DgwfD391e7MqrNmP8//vgDkydPRteuXeHv749XXnkFP/30k8a6T5pLpmluRb9+/bBw4UIAwIEDB9TaM378eKnek+aE/fzzz5g4cSK6du0KHx8fBAcHY+HChfjrr7801i/b9qioKEyePBldunSBn58fhg0bVmkbn6SkpAQ7d+7Ea6+9hk6dOsHX1xcDBw7E8uXL8eDBA42/D7FNCxcu1Nh2fXnS71B8f4WEhEhl48ePx4QJEwCUvhfKnp+y7yVdzn358uTkZCxatAjPPvssOnTooBYHABw7dgxvvPEGunfvDh8fH/Tu3Rtz587F7du3dfp9VFdqaiqWLVuGgQMHwtfXF506dZLueiuVSo3PEQQBe/fuxfDhw+Hv749u3bphypQpiImJUfuc6kvLli1ha2sLAHj48KHGOtr+HsVzIyo/D03sN540Z6qydmrTT5V9T+bn5+OLL77Ac889J/WRCxYsqPC5Ep07dw5vvvkmevbsiQ4dOqBLly4YOHAg5s6diwsXLjz5l6lBUlIS5s+fj6CgIPj6+uL555/H119/jcePH6vV++qrr+Dp6YnFixdX+lqXL1+Gp6cnevfujZKSkif+bHFoYUREBDIzMzXWefDgAc6ePatWH6hev/QkT/q7UVk/U7Y8MTERs2fPRo8ePRAQEIARI0bgxIkTUt1Lly7hzTffRPfu3eHn54dXX30VkZGRlcb0+PFjbNy4EaNGjULnzp2lc/P5558jKyurWu37z3/+g3v37sHc3BxLliyBXF7111I/Pz80atRIepyUlITvv/8eEyZMQN++feHj44POnTtj9OjR2LVrF1Qqldrztf2cif766y8sXrwYAwYMkPqhsWPH4uDBg5XGmJWVheXLl0vxBAcH49///jdyc3Or/Lugy/tGjBsA9u3bh1dffRWdOnWCp6cn7t27J31viY2NrTTeJUuWwNPTE59//nmldah2cDgiGbU+ffrA1tYW2dnZOHfuHHx8fKqsr1KpMG3aNERGRqJJkybo3LkzbGxskJmZib///hsbNmzAiy++CFtbW7i6umLYsGH45ZdfkJ+fj+effx7W1tbSa5WfQ1RYWIjx48cjISEBnTt3hpeXl5TQaeP48eP44Ycf4ObmhqCgIKSlpSE6OhoLFizAjRs3KnxJ1sXzzz+PuLg4xMTEwNXVFZ06dZKOubm5PfH5giAgJCQEP/30E8zMzNC5c2c0b94c165dw/79+3H06FF89dVX6NOnj8bn79u3D99++y3at2+P3r17IykpCXFxcViwYAGys7MxadIkrdtSVFSE6dOn49y5c7C0tES3bt3QpEkTxMbGYtu2bfjPf/6DDRs2oEOHDgAAb29vDBs2DNHR0bh7967avC5t2l4XevfuDQsLC5w5cwYODg7o3bu3dMzOzk5vP+fOnTsYNmwYzM3NERgYCEEQpNcvKSnB3LlzcfToUVhYWKBDhw5o2bIl7ty5g8OHD+P48eP4+uuvKz3H+nD58mVMnToV2dnZcHZ2xoABA5CXl4c//vgDsbGxOH78OL799tsKV8SXLl2KnTt3Qi6Xo3PnzmjRogVu3bqFcePG6W3OVlkqlQr5+fkAgObNm6sdq+7vUXx/HjhwAAAwbNgwtdcr2/fUhDb9VF5eHl577TWkpKSgU6dOeOaZZxAXF4effvoJFy5cwMGDB9Xukh84cEC6uOPn54du3brh8ePHePDgAY4cOQI7Ozt06dKlWnHev38fw4cPl/qZwsJCnD9/HqtXr8a5c+ewefNmWFpaAgBGjx6N77//HocPH8bcuXNhY2NT4fV++OEHAMCrr74KM7Mnf/Xx8/ODh4cHbt26hUOHDmnsmw4cOAClUgl/f38888wzAKrfL9W269evY9myZWjZsiV69OiB5ORkxMbG4p133kFYWBjMzMwwe/ZsPPPMM+jRowcSExMRFxeHKVOmYMuWLejcubPa6z148ABTpkzBrVu3YGtrC19fXzRu3BjXr1/Hhg0bcOzYMWzbtg2tW7fWKr5ff/0VgPqcy+o4ePAgwsPD4eLigqeeegqBgYFIT09HbGwsYmJicPbsWXz11VeQyWQAqvc5O3r0KBYsWIDCwkK4ubnh2WefRV5eHi5fvoz58+cjKioKK1asUHt+Wloaxo4di7t378LW1hbBwcFQqVQ4ePAgTp8+DXd3d43tqOn7ZtmyZdixYwc6duyIvn374t69e5DL5Rg3bhxCQ0Oxfft2dOzYscLzHj16hIMHD0Iul2PMmDHa/+JJPwSieig4OFjw8PAQ9u3b98S6kyZNEjw8PIS5c+eqlY8bN07w8PAQoqKipLI//vhD8PDwEF5++WU
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"text/plain": [
|
|
|
|
|
|
"<Figure size 1000x600 with 1 Axes>"
|
2025-07-10 15:07:31 +08:00
|
|
|
|
]
|
2025-06-22 23:03:50 +08:00
|
|
|
|
},
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"data": {
|
2025-07-15 22:45:51 +08:00
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA+wAAAJxCAYAAADYVjihAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs3Xd4FNX+BvB3Nr2ShBAgQIAENoQkkNA7UqQoSFVRhGsFFcUGUrwiXgWxXgTbDy6gYBdCu4ooXgSR3glVCCGQACG9bPrO7491N1nSs212z/t5Hh4ms7MzZ/bds8l3Z+aMJMuyDCIiIiIiIiJSFJWtG0BERERERERElbFgJyIiIiIiIlIgFuxERERERERECsSCnYiIiIiIiEiBWLATERERERERKRALdiIiIiIiIiIFYsFOREREREREpEAs2ImIiIiIiIgUiAU7ERERERERkQKxYCdSiMGDByM8PNzwr0OHDoiNjcWAAQMwZcoUvP322zh58mSN65gyZQrCw8Nx4MABK7W6Zvp9unbtmtF8pbUTAObOnYvw8HDExcXZuikW8b///Q8PPvggunTpYniP1eX1v/19WdW/zz//3PI7YGf0r5s9vp+qyrhTp04YPHgwXnjhBRw+fNjWTXQoy5cvR3h4OJYvX26zNkyePBnh4eF477336rT8m2++ifDwcDzxxBMN3mZ1vx9EUlJSgg0bNuDpp5/GHXfcgU6dOqFz584YMmQIZs6ciS1btqC4uNjWzSQiG3O2dQOIyFiXLl3QunVrAEBhYSEyMzNx9uxZHDx4EKtXr0aPHj2wePFitGrVymJtGDx4MJKTk/Hbb7+hZcuWFtuOtcTFxWHevHkYN24clixZYuvmWN3Zs2cxc+ZMaLVa9OrVC02aNIEkSQgMDKzzOiq+L2/Xrl07k9p34MABTJ06FT169MC6detMWheZT79+/dCkSRMAQGZmJuLj4/HTTz9h27ZtmDdvHv7xj3+YZTvh4eEAgPPnz5tlfVR/EydOxOHDh7Fp0ya88MILcHJyqnbZ4uJibN261fA8apjTp09j5syZuHbtGiRJQocOHdCpUydIkoTk5GTs2LED27dvx9KlS/Hjjz/Cw8PDpO2xnxHZLxbsRApz7733Yvz48UbzZFnG7t27sXjxYhw8eBCTJk3Ct99+W6lof/vtt1FQUIDg4GBrNrlan3/+OUpKStC0aVNbN6VWL774Ip544gkEBQXZuilmt2PHDpSUlODJJ5/ECy+80KB1VPW+JMc2bdo09OzZ0/BzQUEBXn75Zfzyyy949913MWLECLvo21S7ESNG4M0338StW7ewe/duDBo0qNplf/vtN2RlZSEgIACDBw+2Yisdx+nTpzF58mQUFBRg0KBBeOWVVyr9Ps/IyMDnn3+O1atXo6SkxOSCnYjsF0+JJ7IDkiRh4MCB+OGHH9CmTRukpaXhn//8Z6XlgoODERYWpphf7CEhIQgLC4OLi4utm1KroKAghIWFwcfHx9ZNMbuUlBQAqPYIOVFdeHh4GD53SkpK8Mcff9i4RWQuHh4euPvuuwGg1ss49I/fc889dvHZrjQlJSV47rnnUFBQgKFDh+KTTz6p8oy5gIAAvPjii/j666/h6upqg5YSkVKwYCeyI76+vpg/fz4AYP/+/YiPjzd6vLprw4uLi/Gf//wH48ePR2xsLKKiotC3b19MmDAB77zzDrKysgDo/hALDw9HcnIyAGDIkCFG17Hq13vgwAGEh4djypQpKCgowIcffoiRI0eic+fORkdc6nKN4sGDB/Hoo4+iR48e6Ny5MyZOnIhNmzZVuWxt175XdS3o4MGDMW/ePADAxo0bjfZnypQphuVqu4b9xx9/xD/+8Q/06NEDUVFRGDRoEObNm4fLly9XuXzFfd+/fz8effRRdO/eHZ06dcK4ceOq3cfalJaW4ptvvsGkSZPQtWtXREdHY9iwYXjzzTdx8+bNKl8P/T7Nmzevyn03l9peQ/37a+7cuYZ5U6ZMwdSpUwHo3gsV86n4XmpI9rfPT0lJwfz58zFw4EBERkYatQMAfv75Zzz22GPo1asXoqKi0L9/f8yaNQsXL15s0OtRXzdu3MAbb7yBYcOGITo6Gl27djWcTVNWVlblc2RZxvr16zF+/Hh07twZPXv2xOOPP46jR48a9VNzadq0Kfz8/AAA6enpVS5T19dRn43e7dfN6z83arvGu7r9rMvnVMX3pEajwfvvv48777zT8Bk5Z86cSv1Kb+/evXjyySfRp08fREZGonv37hg2bBhmzZqFQ4cO1f5iViE5ORkvv/wy+vXrh+joaAwfPhzLly9HYWGh0XLLli1DeHg4FixYUO26Tp48ifDwcPTv3x+lpaW1blt/evvOnTuRkZFR5TI3b97En3/+abQ8UL/PpdrU9nujus+ZivMTEhLw/PPPo3fv3oiJicGECROwY8cOw7InTpzAk08+iV69eqFTp064//77sW/fvmrbVFhYiNWrV+O+++5Dt27dDNm88847yMzMrNf+/fe//8XVq1fh4uKChQsXQqWq+U/xTp06wd3d3fBzcnIyVqxYgalTp+KOO+5AVFQUunXrhgceeADffvsttFqt0fPr2s/0Ll++jAULFmDo0KGGz6HJkydj8+bN1bYxMzMTb775pqE9gwYNwqJFi5CTk1Pj74WGvG/07QaADRs24P7770fXrl0RHh6Oq1evGv5uOXbsWLXtXbhwIcLDw/HOO+9UuwyRkvCUeCI7M2DAAPj5+SErKwt79+5FVFRUjctrtVpMmzYN+/btg7e3N7p16wZfX19kZGTgypUrWLVqFUaPHg0/Pz+EhIRg3Lhx2L59OzQaDYYPHw5PT0/Dum6/5rmoqAhTpkzBpUuX0K1bN3To0MFQ/NfFr7/+iq+++gqhoaHo168fUlNTceTIEcyZMwfnzp2rVFA1xPDhw3H8+HEcPXoUISEh6Nq1q+Gx0NDQWp8vyzLmzp2LTZs2wdnZGd26dUPjxo1x+vRpxMXFYdu2bVi2bBkGDBhQ5fM3bNiATz/9FB07dkT//v2RnJyM48ePY86cOcjKysLDDz9c530pLi7G9OnTsXfvXri5uaFnz57w9vbGsWPHsG7dOvz3v//FqlWrEBkZCQCIiIjAuHHjcOTIESQlJRldh16XfbeG/v37w9XVFXv27EFgYCD69+9veMzf399s20lMTMS4cePg4uKCLl26QJZlw/pLS0sxa9YsbNu2Da6uroiMjETTpk2RmJiIrVu34tdff8Xy5curzdgcTp48iSeeeAJZWVkIDg7G0KFDkZubi4MHD+LYsWP49ddf8emnn1Y60vb666/jm2++gUqlQrdu3dCkSRNcuHABDz30kNmuMa9Iq9VCo9EAABo3bmz0WH1fR/37c+PGjQCAcePGGa2v4mePKeryOZWbm4tJkybh+vXr6Nq1K9q3b4/jx49j06ZNOHToEDZv3mx09s3GjRsNXwR26tQJPXv2RGFhIW7evImffvoJ/v7+6N69e73aee3aNYwfP97wOVNUVIQDBw7go48+wt69e/H555/Dzc0NAPDAAw9gxYoV2Lp1K2bNmgVfX99K6/vqq68AAPfffz+cnWv/c69Tp05Qq9W4cOECtmzZUuVn08aNG1FWVobOnTujffv2AOr/uWRpZ86cwRtvvIGmTZuid+/eSElJwbFjx/DMM89g6dKlcHZ2xvPPP4/27dujd+/eSEhIwPHjx/H444/jiy++QLdu3YzWd/PmTTz++OO4cOEC/Pz8EB0dDS8vL5w5cwarVq3Czz//jHXr1qFFixZ1at9vv/0GwHiMiPrYvHkzPvzwQ7Rs2RJt2rRBly5dcOvWLRw7dgxHjx7Fn3/+iWXLlkGSJAD162fbtm3DnDlzUFRUhNDQUAwcOBC5ubk4efIkXn75Zezfvx9vvfWW0fNTU1MxefJkJCUlwc/PD4MGDYJWq8XmzZvxxx9/ICwsrMr9MPV988Ybb+Drr79GbGws7rjjDly9ehUqlQoPPfQQlixZgi+//BKxsbGVnpeXl4fNmzdDpVLhwQcfrPsLT2RLMhEpwqBBg2S1Wi1v2LC
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"text/plain": [
|
|
|
|
|
|
"<Figure size 1200x700 with 1 Axes>"
|
2025-07-10 15:07:31 +08:00
|
|
|
|
]
|
2025-06-22 23:03:50 +08:00
|
|
|
|
},
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"--- Price Chart with Volume Indicator Overlay ---\n"
|
|
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"data": {
|
2025-07-15 22:45:51 +08:00
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABdEAAASlCAYAAABHkZBpAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs3Xl4TGf/BvB7JguJIEQSCaG2xBJN7FJatRRdtKhaiqILXWy1VFWrqLVovZaW+qF21VraV4uWWmvfSSKpPSSCkJBNljm/P+Y9RyYzk8w5M8nMmdyf63LNOLM9z8w9Zybfec7zaARBEEBEREREREREREREREa09m4AEREREREREREREZGjYhGdiIiIiIiIiIiIiMgMFtGJiIiIiIiIiIiIiMxgEZ2IiIiIiIiIiIiIyAwW0YmIiIiIiIiIiIiIzGARnYiIiIiIiIiIiIjIDBbRiYiIiIiIiIiIiIjMYBGdiIiIiIiIiIiIiMgMFtGJiIiIiIiIiIiIiMxwtXcDyP7atWuHW7duFXid8ePHY+DAgcXTICd09OhRvPXWW0bbPT09ERQUhOeeew6DBg2Cj4+P7PsOCQkBAMTExFjdTluxJFP5ValSBX///XcRtcj2+vfvj2PHjmHVqlVo0aKF1fe3YMECLFy4EADw/PPPY8mSJSav9+uvv+KTTz5B8+bNsXr1aqsfV23E95Kp/jvie0G0efNmjB8/Ht26dcPMmTMLvK5Op0OHDh1w69YtTJo0CX369Cn0/j/44AP8/fff6NOnDyZNmqSojY78/JGeuJ8YOnQohg0bZtFtzH3+5Hf8+HGUK1fO2iY6jYI+twMDAxEREYGBAweiatWqdmidc0tPT8eLL76I8uXLY+vWrdBqDcf8REZGYs2aNThx4gQSExOh0WhQsWJF+Pv7o1GjRmjdujVatWplp9YXHSXvf2sfKy+tVouyZcuidu3aePHFF9G7d2+4ubkVaTtExfn5dPPmTbRv396m30vj4+Oxf/9+HDx4EJGRkbh79y7c3NwQFBSE559/HgMHDkTFihXN3j41NRU//PADdu7ciYSEBHh4eCAsLAyDBg1CRESE0fUzMjJw5MgRHDhwACdOnEBcXByys7Ph4+ODxo0bo1+/fmjSpInJx7py5Qr++ecfREZGIjIyEpcvX0Zubi5GjBiBDz/80Orn4sKFC/jhhx9w4sQJPHr0CL6+vmjbti0+/PBDk38H5ebm4q+//pLaExkZieTkZLi4uCAqKsqqthw/fhwnT56U7lf822Xt2rVo2rSprPsaMWIEduzYAQD4+uuv8dprrylq06FDh7BixQqcO3cOGRkZCAwMRKdOnTB48GCUKVPG5G1ycnLw008/4ddff8WlS5eQm5uLatWqoXPnznjnnXdQunRpRW0BgO3bt2PdunW4ePEisrOzUa1aNXTp0gUDBw4s8P0v93V2NDqdDmfOnMGBAwdw5MgRXLlyBampqfDy8kL9+vXRrVs3dOnSBRqNxux9yH0OrNlPiPtIc1566SV8++238p6EPOTmwNJawLBhwzB06FCL22Ht/kDuvtRSSt4nGRkZWLVqFf744w9cv34dGo0GNWvWRNeuXfHmm2/CxcVFUVt0Oh02btyITZs24dKlSwCA2rVro0ePHujZs2eBmZW7/5kwYQK2bNmCLVu2FJpBZ8IiOkkaN26M6tWrm7ysdu3aVt13QYWvkqZbt24AAEEQEB8fjzNnziAmJgZbtmzBqlWrUKtWLTu30HqdOnXCgwcPDLalp6dj586d0uWenp4Gl1eoUKHY2ufo9u7di+PHj6NZs2b2bgrZgVarRffu3bFgwQJs2rSp0CL6vXv3sH//fgBAjx49iqOJpFLi548p1hbDPv30U2zZsgUzZsxA9+7drbovR5P3c/v27ds4e/YsVq9ejU2bNmH58uVo1KiR1Y/B70lPLFmyBLdv38bkyZONCuirV6/G9OnTodPp4O/vjxYtWqBcuXJ48OABIiMjcfr0aRw9etQpi+j2UKlSJTz77LMAgOzsbFy9ehUnT57EyZMn8fvvv2P58uVG3+fI2OjRo3Hq1Cm4urqiXr16CA8PR0pKCs6ePYslS5bg559/xvLly1GvXj2j2yYlJeHNN9/EtWvXpCJcUlIS9u/fj/3792PChAno37+/wW22bduGzz//HIB+kEpERARcXV1x8eJF/PHHH9i+fTtGjBiBDz74wOjx1q9fj1WrVhXJ87Bjxw6MHj0aOTk5aNiwIapWrYoLFy5gzZo12LFjB9atW2f0t2haWhpGjBhRJO2ZOnUqLl68aPX9/PHHH9ixYwc0Gg0EQVB8Pz/++CNmzJgBjUaDpk2bwsfHBydPnsTixYuxc+dOrFu3zqiImpWVhSFDhuDQoUNwd3dHeHg4ypQpg3PnzmH+/Pn4888/sXr1akU/kk+bNg2rVq2Cq6srWrZsCU9PTxw5cgRz5szBnj17sHz5cpMFeiWvc0Hs8fkYFxcnff/29vZGaGgoypUrh7i4OBw6dAiHDh3CH3/8gfnz58Pd3d3o9kqeA2v2EyJz3/Oefvppxc+FkhyYqgWIkpOTsWfPHgBAy5YtZbXFmv2Bkn2pJZQ8P8nJyRgwYAAuXryIMmXKoHHjxtBqtTh79iymTp2KPXv2YPHixSazVZDc3FyMHDkSf/75Jzw8PKTn9/Dhw5g4cSIOHTqEb7/91ui7FaBs/zNs2DD897//xdSpU0vWd1eBSry2bdsKwcHBwqZNm4rsMY4cOSIEBwcL/fr1K7LHcGRi/4ODg40uu3LlivDcc88JwcHBwptvvin7vi9duiRcunTJFs0sUnFxcdJzEBcXZ+/mWK1fv35CcHCwcOTIEZvc3/z584Xg4GAhLCxMCA4OFnr27Gnyelu3buV7yUz/Hfm9sGnTJiE4OFgYN26cRde/deuWULduXSE4OFiIjY0t8LpLly4VgoODhVdffdWqNprbR5HjEPcT8+fPt/g2BX3+2NK4ceOK/LtEcSroeYuPjxc6duwoBAcHCy+99JJNH6+k7ttFt2/fFkJDQ4XXX3/d6LLo6Ghpv7hixQohJyfH4PLc3Fzh2LFjwvfff19czS1WSt7/1j6WqTzu3r1bqFevnhAcHCzMmzevyNsiCMX7+S5+X23btq3N7nPEiBHCihUrhPv37xtsT0pKkr5PduzY0SjTgiAIH3zwgRAcHCwMGDBASE9Pl7bv3btXqFevnlC3bl0hOjra4DabN28Wxo8fL0RGRhps1+l0wvLly6V929GjR40eb+PGjcLMmTOF3377Tbh06ZIwduxYITg4WFi0aJE1T4Fw+/Zt6Tvuhg0bpO05OTnCmDFjhODgYOH1118XdDqdwe3S0tKE0aNHC8uWLRMOHz4sREdHC8HBwUK9evWsao8gCMLMmTOFBQsWCLt37xZu374t/U18/Phxi+/j7t27QvPmzYWuXbsKvXv3FoKDg4WtW7fKbktkZKQQEhIi1KtXT9i7d6+0PT09XRgwYIAQHBwsDBs2zOh2s2bNEoKDg4Vnn31WiImJkbY/evRIGDx4sBAcHCyMGjVKdnv++usvITg4WAgPDxcuXLggbU9KShJeeeUVITg4WJg5c6bR7ZS+zgWxx+fj9evXhbfeekvYt2+f0fvy6NGjQnh4uBAcHCwsWLDA6LZKnwNr9hNF9T1PaQ4K8sMPP0h9kcua/YGSfWlhlD4/w4cPF4KDg4VXXnlFiI+Pl7bfvXtX6NGjhxAcHCx88803stoiCIKwYsUKaX9w48YNafuNGzeE1q1bC8HBwcLq1auNbqd0/yMIgjBlyhQhODhY2LVrl+z2qhXnRCeysxo1aki/qJ44cQJ37tyRdftatWo5xeh10uvQoQMCAgJw5swZ/PXXX/Zujqo403shMDAQzzzzDABg06ZNBV538+bNAIDXX3+9yNtFREBAQIB0+PGlS5cQFxdn5xY5j3Xr1iErK8vkUTU7duyATqdDo0aNMHDgQKNDnbVaLZo1a4b333+/uJpbIrVr1w6vvvoqAP0h7MVB7Z/v8+bNw8CBA42OuqxYsSJmz54NALh27RpOnz5tcPmlS5ewe/d
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"text/plain": [
|
|
|
|
|
|
"<Figure size 1500x1200 with 3 Axes>"
|
2025-07-10 15:07:31 +08:00
|
|
|
|
]
|
2025-06-22 23:03:50 +08:00
|
|
|
|
},
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
2025-07-10 15:07:31 +08:00
|
|
|
|
"source": [
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"df_raw = load_and_preprocess_data(file_path)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"if df_raw is not None and not df_raw.empty:\n",
|
|
|
|
|
|
" # 您可以在这里调整 volume_window 和 price_lag 参数\n",
|
|
|
|
|
|
" # volume_window: 用于计算成交量移动平均的周期,例如5分钟K线,设置为5表示过去5分钟的平均成交量\n",
|
|
|
|
|
|
" # price_lag: 用于计算未来收益率的周期,例如5分钟K线,设置为5表示未来5分钟的收益率\n",
|
|
|
|
|
|
" processed_data = calculate_stationary_indicators(\n",
|
|
|
|
|
|
" df_raw, volume_window=10, price_lag=5\n",
|
|
|
|
|
|
" )\n",
|
|
|
|
|
|
" analyze_and_visualize(processed_data)\n",
|
|
|
|
|
|
"else:\n",
|
|
|
|
|
|
" print(\"无法进行分析,请检查数据加载是否成功。\")"
|
|
|
|
|
|
]
|
2025-06-22 23:03:50 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
2025-07-10 15:07:31 +08:00
|
|
|
|
"execution_count": 11,
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"id": "f13d0294",
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-07-15 22:45:51 +08:00
|
|
|
|
"end_time": "2025-07-12T14:59:52.150501Z",
|
|
|
|
|
|
"start_time": "2025-07-12T14:59:52.142821Z"
|
2025-06-22 23:03:50 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
2025-07-10 15:07:31 +08:00
|
|
|
|
"outputs": [],
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"source": [
|
|
|
|
|
|
"import pandas as pd\n",
|
|
|
|
|
|
"import numpy as np\n",
|
|
|
|
|
|
"import matplotlib.pyplot as plt\n",
|
|
|
|
|
|
"import seaborn as sns\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# 无需数据加载和指标计算函数,假设 processed_df 已经传入并包含所需列\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"def analyze_trend_continuation_probability(processed_df, return_threshold=0.0001, num_bins=20):\n",
|
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
|
" Analyzes the probability of trend continuation (direction consistency) as volume change rate (Z-score) varies.\n",
|
|
|
|
|
|
" This version ignores the specific direction (Up/Down) of the trend, focusing only on whether it continues.\n",
|
|
|
|
|
|
" It plots the individual bin probabilities and a bar chart showing the number of data points in each bin.\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" Parameters:\n",
|
|
|
|
|
|
" processed_df (pd.DataFrame): DataFrame with calculated indicators.\n",
|
|
|
|
|
|
" return_threshold (float): Minimum absolute log_return to classify current/future as 'significant' move.\n",
|
|
|
|
|
|
" num_bins (int): Number of bins to divide the volume_normalized_zscore range.\n",
|
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
|
" if processed_df.empty:\n",
|
|
|
|
|
|
" print(\"Processed data is empty. Cannot perform volume-trend analysis.\")\n",
|
|
|
|
|
|
" return\n",
|
|
|
|
|
|
" \n",
|
|
|
|
|
|
" required_cols = ['log_return', 'future_log_return', 'volume_normalized_zscore']\n",
|
|
|
|
|
|
" if not all(col in processed_df.columns for col in required_cols):\n",
|
|
|
|
|
|
" print(f\"Error: Missing one or more required columns: {required_cols}. Please ensure they are calculated.\")\n",
|
|
|
|
|
|
" return\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" print(\"\\n--- Analyzing Trend Continuation Probability by Volume Z-score (Ignoring Overall Trend Bias) ---\")\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 1. Define Current K-line Direction (significant move)\n",
|
|
|
|
|
|
" # 1 if significant UP, -1 if significant DOWN, 0 if Flat\n",
|
|
|
|
|
|
" def get_direction_sign(log_ret):\n",
|
|
|
|
|
|
" if log_ret > return_threshold:\n",
|
|
|
|
|
|
" return 1 # Up\n",
|
|
|
|
|
|
" elif log_ret < -return_threshold:\n",
|
|
|
|
|
|
" return -1 # Down\n",
|
|
|
|
|
|
" else:\n",
|
|
|
|
|
|
" return 0 # Flat\n",
|
|
|
|
|
|
" \n",
|
|
|
|
|
|
" processed_df['current_direction_sign'] = processed_df['log_return'].apply(get_direction_sign)\n",
|
|
|
|
|
|
" processed_df['future_direction_sign'] = processed_df['future_log_return'].apply(get_direction_sign)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 2. Define 'Is Continuation' (Target Variable)\n",
|
|
|
|
|
|
" # A continuation occurs if current_direction_sign is not 0 AND future_direction_sign is the same\n",
|
|
|
|
|
|
" processed_df['is_continuation'] = np.nan # Initialize with NaN\n",
|
|
|
|
|
|
" \n",
|
|
|
|
|
|
" # Cases where current move is Up and future is also Up\n",
|
|
|
|
|
|
" processed_df.loc[(processed_df['current_direction_sign'] == 1) & (processed_df['future_direction_sign'] == 1), 'is_continuation'] = 1\n",
|
|
|
|
|
|
" # Cases where current move is Down and future is also Down\n",
|
|
|
|
|
|
" processed_df.loc[(processed_df['current_direction_sign'] == -1) & (processed_df['future_direction_sign'] == -1), 'is_continuation'] = 1\n",
|
|
|
|
|
|
" # Cases where current move is significant but future is not in the same direction (e.g., flat, reverse)\n",
|
|
|
|
|
|
" processed_df.loc[((processed_df['current_direction_sign'] != 0) & (processed_df['is_continuation'].isna())), 'is_continuation'] = 0\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # Filter out rows where current K-line was flat, as there's no trend to \"continue\"\n",
|
|
|
|
|
|
" df_for_analysis = processed_df[processed_df['current_direction_sign'] != 0].copy()\n",
|
|
|
|
|
|
" \n",
|
|
|
|
|
|
" if df_for_analysis.empty:\n",
|
|
|
|
|
|
" print(\"No significant current moves (Up/Down) to analyze for continuation.\")\n",
|
|
|
|
|
|
" return\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # --- REMOVED: Filtering out 1% and 99% Z-score outliers ---\n",
|
|
|
|
|
|
" # Now using the full range of df_for_analysis for binning\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 3. Binning Volume Normalized Z-score (using unfiltered data)\n",
|
|
|
|
|
|
" min_z = df_for_analysis['volume_normalized_zscore'].min()\n",
|
|
|
|
|
|
" max_z = df_for_analysis['volume_normalized_zscore'].max()\n",
|
|
|
|
|
|
" \n",
|
|
|
|
|
|
" if pd.isna(min_z) or pd.isna(max_z) or (max_z - min_z < 0.001):\n",
|
|
|
|
|
|
" print(\"Warning: Volume Z-score range is too small or contains NaNs for binning.\")\n",
|
|
|
|
|
|
" # Fallback for very small ranges to prevent errors\n",
|
|
|
|
|
|
" if pd.isna(min_z) or pd.isna(max_z):\n",
|
|
|
|
|
|
" min_z = -5\n",
|
|
|
|
|
|
" max_z = 5\n",
|
|
|
|
|
|
" elif (max_z - min_z < 0.001):\n",
|
|
|
|
|
|
" max_z = min_z + 0.001\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" bins = np.linspace(min_z, max_z, num_bins + 1)\n",
|
|
|
|
|
|
" labels = [f'{bins[i]:.2f} to {bins[i+1]:.2f}' for i in range(num_bins)]\n",
|
|
|
|
|
|
" # Use pd.cut for binning\n",
|
|
|
|
|
|
" df_for_analysis['volume_zscore_bin'] = pd.cut(df_for_analysis['volume_normalized_zscore'], bins=bins, labels=labels, include_lowest=True)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 4. Calculate Continuation Probability for each bin\n",
|
|
|
|
|
|
" continuation_prob = df_for_analysis.groupby('volume_zscore_bin')['is_continuation'].mean()\n",
|
|
|
|
|
|
" \n",
|
|
|
|
|
|
" continuation_df = pd.DataFrame({\n",
|
|
|
|
|
|
" 'Volume Z-score Bin': continuation_prob.index,\n",
|
|
|
|
|
|
" 'Trend Continuation Probability': continuation_prob.values\n",
|
|
|
|
|
|
" }).dropna() # Drop NA if a bin has no data\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" if continuation_df.empty:\n",
|
|
|
|
|
|
" print(\"No data points for trend continuation within the bins. Adjust thresholds or data range.\")\n",
|
|
|
|
|
|
" return\n",
|
|
|
|
|
|
" \n",
|
|
|
|
|
|
" print(\"\\nTrend Continuation Probabilities by Volume Z-score Bin (Direction Agnostic, All Data):\")\n",
|
|
|
|
|
|
" print(continuation_df)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 5. Visualization - Individual Bin Probabilities (Original Plot, without filtering)\n",
|
|
|
|
|
|
" plt.figure(figsize=(14, 8))\n",
|
|
|
|
|
|
" \n",
|
|
|
|
|
|
" plt.plot(continuation_df['Volume Z-score Bin'], continuation_df['Trend Continuation Probability'],\n",
|
|
|
|
|
|
" marker='o', linestyle='-', color='purple', label='Trend Continuation Probability')\n",
|
|
|
|
|
|
" \n",
|
|
|
|
|
|
" plt.title('Trend Continuation Probability vs. Volume Z-score (Direction Agnostic, All Data)', fontsize=18)\n",
|
|
|
|
|
|
" plt.xlabel('Volume Z-score Bins', fontsize=14)\n",
|
|
|
|
|
|
" plt.ylabel('Continuation Probability', fontsize=14)\n",
|
|
|
|
|
|
" plt.xticks(rotation=45, ha='right') # Rotate labels for readability\n",
|
|
|
|
|
|
" plt.ylim(0, 1) # Probability range\n",
|
|
|
|
|
|
" plt.axhline(0.5, color='gray', linestyle=':', linewidth=1, label='Random (0.5)') # Reference line for 0.5 probability\n",
|
|
|
|
|
|
" plt.grid(True, linestyle='--', alpha=0.7)\n",
|
|
|
|
|
|
" plt.legend(fontsize=12)\n",
|
|
|
|
|
|
" plt.tight_layout()\n",
|
|
|
|
|
|
" plt.show()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # print(\"\\nThis plot shows the probability of ANY trend (up or down) continuing, across different levels of volume change rate.\")\n",
|
|
|
|
|
|
" # print(\"Peaks above 0.5 indicate where current direction is more likely to be followed by the same direction in the future.\")\n",
|
|
|
|
|
|
" # print(\"No Z-score outliers have been removed in this plot.\")\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # --- NEW PLOT: Number of Data Points per Bin ---\n",
|
|
|
|
|
|
" print(\"\\n--- Plotting Number of Data Points per Volume Z-score Bin ---\")\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # Count the number of data points in each bin\n",
|
|
|
|
|
|
" bin_counts = df_for_analysis['volume_zscore_bin'].value_counts().sort_index()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # Ensure the order of bins in bin_counts matches that in continuation_df\n",
|
|
|
|
|
|
" # (pd.cut with labels usually handles this, but explicitly reindexing can ensure consistency)\n",
|
|
|
|
|
|
" bin_counts = bin_counts.reindex(continuation_df['Volume Z-score Bin'])\n",
|
|
|
|
|
|
" \n",
|
|
|
|
|
|
" # Convert to DataFrame for plotting\n",
|
|
|
|
|
|
" bin_counts_df = pd.DataFrame({\n",
|
|
|
|
|
|
" 'Volume Z-score Bin': bin_counts.index,\n",
|
|
|
|
|
|
" 'Number of Data Points': bin_counts.values\n",
|
|
|
|
|
|
" }).dropna()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" if bin_counts_df.empty:\n",
|
|
|
|
|
|
" print(\"No data points found for plotting bin counts.\")\n",
|
|
|
|
|
|
" return\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" plt.figure(figsize=(14, 8))\n",
|
|
|
|
|
|
" sns.barplot(x='Volume Z-score Bin', y='Number of Data Points', data=bin_counts_df, palette='viridis')\n",
|
|
|
|
|
|
" plt.title('Number of Data Points per Volume Z-score Bin', fontsize=18)\n",
|
|
|
|
|
|
" plt.xlabel('Volume Z-score Bins', fontsize=14)\n",
|
|
|
|
|
|
" plt.ylabel('Number of Data Points', fontsize=14)\n",
|
|
|
|
|
|
" plt.xticks(rotation=45, ha='right')\n",
|
|
|
|
|
|
" plt.grid(axis='y', linestyle='--', alpha=0.7)\n",
|
|
|
|
|
|
" plt.tight_layout()\n",
|
|
|
|
|
|
" plt.show()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # print(\"\\nThis plot shows the raw count of data points falling into each Volume Z-score bin.\")\n",
|
|
|
|
|
|
" # print(\"Bins with very low counts might produce less reliable trend continuation probability estimates.\")\n"
|
2025-07-10 15:07:31 +08:00
|
|
|
|
]
|
2025-06-22 23:03:50 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
2025-07-10 15:07:31 +08:00
|
|
|
|
"execution_count": 12,
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"id": "74770a30",
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-07-15 22:45:51 +08:00
|
|
|
|
"end_time": "2025-07-12T14:59:52.425440Z",
|
|
|
|
|
|
"start_time": "2025-07-12T14:59:52.170677Z"
|
2025-06-22 23:03:50 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
|
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
2025-07-15 22:45:51 +08:00
|
|
|
|
"Successfully loaded 7601 rows of data.\n",
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"First 5 rows of data:\n",
|
2025-07-15 22:45:51 +08:00
|
|
|
|
" open high low close volume open_oi \\\n",
|
|
|
|
|
|
"datetime \n",
|
|
|
|
|
|
"2020-12-31 14:00:00 1637.5 1641.0 1617.0 1629.0 38945.0 105378.0 \n",
|
|
|
|
|
|
"2021-01-04 09:00:00 1622.5 1655.0 1622.0 1655.0 50393.0 103492.0 \n",
|
|
|
|
|
|
"2021-01-04 10:00:00 1655.0 1671.5 1651.5 1667.5 22096.0 102361.0 \n",
|
|
|
|
|
|
"2021-01-04 11:00:00 1667.5 1673.0 1665.5 1672.0 7241.0 104808.0 \n",
|
|
|
|
|
|
"2021-01-04 13:00:00 1671.5 1684.5 1670.5 1683.5 12756.0 105361.0 \n",
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"\n",
|
2025-07-10 15:07:31 +08:00
|
|
|
|
" close_oi underlying_symbol \n",
|
|
|
|
|
|
"datetime \n",
|
2025-07-15 22:45:51 +08:00
|
|
|
|
"2020-12-31 14:00:00 103492.0 DCE.jm2105 \n",
|
|
|
|
|
|
"2021-01-04 09:00:00 102361.0 DCE.jm2105 \n",
|
|
|
|
|
|
"2021-01-04 10:00:00 104808.0 DCE.jm2105 \n",
|
|
|
|
|
|
"2021-01-04 11:00:00 105361.0 DCE.jm2105 \n",
|
|
|
|
|
|
"2021-01-04 13:00:00 107994.0 DCE.jm2105 \n",
|
|
|
|
|
|
"Indicators calculated. 7562 rows of data remaining for analysis.\n",
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"\n",
|
|
|
|
|
|
"--- Analyzing Trend Continuation Probability by Volume Z-score (Ignoring Overall Trend Bias) ---\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"Trend Continuation Probabilities by Volume Z-score Bin (Direction Agnostic, All Data):\n",
|
|
|
|
|
|
" Volume Z-score Bin Trend Continuation Probability\n",
|
2025-07-15 22:45:51 +08:00
|
|
|
|
"0 -1.75 to -1.10 0.496047\n",
|
|
|
|
|
|
"1 -1.10 to -0.45 0.507095\n",
|
|
|
|
|
|
"2 -0.45 to 0.20 0.497137\n",
|
|
|
|
|
|
"3 0.20 to 0.85 0.511824\n",
|
|
|
|
|
|
"4 0.85 to 1.50 0.480501\n",
|
|
|
|
|
|
"5 1.50 to 2.15 0.525000\n",
|
|
|
|
|
|
"6 2.15 to 2.80 0.500000\n",
|
|
|
|
|
|
"7 2.80 to 3.45 0.422535\n",
|
|
|
|
|
|
"8 3.45 to 4.10 0.375000\n",
|
|
|
|
|
|
"9 4.10 to 4.75 0.833333\n"
|
2025-06-22 23:03:50 +08:00
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"data": {
|
2025-07-15 22:45:51 +08:00
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABW4AAAMWCAYAAABhlR+IAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs3XdUFNffBvBnl17EghUrahZRwN57id3YYomKsSSWGEuiMRrTNZrEqLGXaOwaG9hiiZpYQSyxixorAgoqIiAiZef9g3fnx15mYYGFXeH5nONJmJ3duTPzMDt89+69KkmSJBARERERERERERGRxVCbuwFEREREREREREREpI+FWyIiIiIiIiIiIiILw8ItERERERERERERkYVh4ZaIiIiIiIiIiIjIwrBwS0RERERERERERGRhWLglIiIiIiIiIiIisjAs3BIRERERERERERFZGBZuiYiIiIiIiIiIiCwMC7dEREREREREREREFoaFWyIyG19fX3h4eGDhwoXmborFmjJlCjw8PDBlyhRzN8Us2rRpAw8PD/j5+Zm7KflGaGgoPDw84OHhgdDQ0Dzfvm7bQUFBJn9+dh+jN0NQUJB8HoneJNu2bYOHhwe++eabXN2On58fPDw80KZNm1zdjrkU9HsiSm/hwoXw8PCAr6+vuZtiUhllnb8HxgkPD4eXlxfefvttJCYmmrs5RDlibe4GEBVkOfnjc9asWejVq5cJW/PmiouLg7+/P06dOoWbN2/i+fPn0Gq1KFKkCDw8PNCkSRN07doVJUqUMHdTZUFBQThz5gzKli1bIM+jn58fwsLC0KBBAzRs2NDczTG5KVOmwN/fP91yOzs7FC9eHD4+PujduzeaN29uhtZRWjExMVi7di0A4P3334eLi4uZW2S5vvzyS2zbtg1FihTBiRMnYGtra9Tz2rdvjwcPHqB169ZYtmxZLrey4DF0vclMgwYNsH79+lxoEaX18uVLzJ8/H7a2thg9enS6x5XOn1qthqOjIwoVKoTy5cvD09MTzZo1Q7NmzaBW579+NwXtniglJQWtWrVCZGQkAOD3339H06ZNzdwqyxIcHIzDhw+jUKFCGDJkiLmbkyN37txB586dAQD29vY4deoUnJ2dzdKWhQsXYtGiRXrLVCoVHB0d4ezsDDc3N3h6eqJhw4Zo06aN0e/zWZUX915ubm7o1asXtmzZgk2bNr3xOaKCjYVbIjMqXry44vL4+HjEx8dnuI69vX2utetNsm3bNsyePRsvXryQl9nb28PW1hYRERGIiIjA8ePHMW/ePIwePVrxjyZzOHPmDBYtWoQGDRpk+EdKiRIl4O7ublFFZ1Pw9/fHmTNn8PHHH2dYuC1fvjxsbW1RqFChPGyd6ajVahQrVkz++cWLFwgLC0NYWBj279+Pd999FzNmzIBKpTJjK98s7u7uAAAHBweTPC8mJkb+I6Znz54s3Gbg3XffxbZt2xAdHY3Dhw/Lf4hm5MyZM3jw4IH8fDI9Z2dng/cKooSEBMTFxQFArv1BTvpWrVqFJ0+eYODAgShdurTB9cT3i/j4eDx69AiPHj3CmTNnsHbtWpQpUwZTp05Fhw4dFF+jUKFCcHd3R6lSpUy+H7mpoN0THT9+XC7aAsCOHTtYuBUEBwdj0aJFKFu2bIYFt6JFi8Ld3R1lypTJu8Zl0fbt2+X/T0hIwN69e9G/f38ztihV2veNhIQEREZGIiIiAhcuXMCmTZtQpEgRTJgwAe+9957Jt51X916jRo2Cn58fli5dil69evEej95YLNwSmdGpU6cUl6f9NNTQOgTMmzdP7r1VpUoVjBgxAs2bN4erqyuA1JuQc+fOYdeuXfjzzz9x4MABiyncGmvixImYOHGiuZthNrpP499UZcqUwd9//y3/nJycjGvXruH777/H1atXsX37dlSvXh0DBw40YyvfLAcOHMjT59H/1KpVC1WrVsXt27fh5+dnVOFWN8xJ8eLF0apVq1xuYcH05Zdf4ssvv8x0vcTERAwYMABXrlyBo6MjJk+enAetK9gSEhLkXs2ZFT/E9wsg9ZzdvHkTx44dw+bNm/Ho0SOMGzcOI0eOxKeffpruNd5++228/fbbptsBC5Nf7ol0hbyBAwdi06ZNOHToEKKjo1GkSBHzNuwNNGjQIAwaNMjczTAoKSkJu3btApA6RNz69euxfft2iyjcin9jpqSk4Pbt2wgICMCGDRsQGhqKb7/9FufOncMvv/zyRnYycHNzQ4sWLXDkyBFs27YNw4cPN3eTiLIl/33XhogKhH379slF206dOmHnzp3o0aOHXLQFUnveNmvWDLNnz8bOnTtRtWpVczWXCABgbW2NmjVrYsWKFfIfaBs2bDBvo4iyQNdr9tSpU4iIiMhw3bi4OBw8eBAA0L17d1hbs7+AOX3zzTe4cuUKgNThljhWcO7bt28fYmJi4OnpibfeeivLz7e1tYW3tzc+/vhj7N27V/6GyvLly7Fnzx5TN5fywNOnT3H06FFYWVlh5MiRqF+/PhITE3k+86l//vkHz549Q5UqVfDpp5/C0dERV65cwa1bt8zdtHSsrKzg4eGBoUOHYu/evejSpQsAYO/evVixYoWZW5d977zzDgBg69atkCTJzK0hyh4WboneQGkn2Xn27BlmzZqFDh06oGbNmop/iB09ehRjx45F8+bN4eXlhfr168uf8hsarD3txGGSJGHr1q3o06cP6tSpg9q1a6Nfv37yJ8iGpKSkYP369ejZsydq1aqFBg0awNfXN8c93xITE/Hzzz8DAKpWrYqffvop0698ajQazJ49W/Gx69evY/LkyWjdujW8vb1Rv3599O/fH2vWrDF4fMQJQK5evYrx48ejWbNm8PLyQtu2bTFr1iy9IRyA/00MpetRfebMGfl86v6lnYgrowkIcnqOjJmsKaMJ5B4+fIgVK1Zg+PDh6NChA2rVqoXatWujc+fO+OGHHxAeHm7wuJ05cwYAsGjRonT7n3bCrMwmJ0tJScH27dsxePBgNGzYEF5eXmjevDnGjRtn9H5lN9854erqimbNmgEA7t69i5cvXwJIP/nS9evXMXHiRLRo0QI1atRIN/nGkydP8NNPP6FLly6oVasWatWqhS5duuDnn3/G06dPjWrL/fv3MWXKFLRo0QJeXl5o1aoVvv766wyLchcvXsTs2bMxYMAA+femXr166Nu3L1asWCHvT2aePHmC77//Hm3atIG3tzeaNm2KiRMn4s6dOwafk91JxpSe5+vri7Zt28o/t23bVi+LuuP9ySefwMPDAx9++GGG23jw4AGqVatmdPuSkpLQsGFDeHh4YN26dRmuu337dnh4eKBOnTp49eqVvDw5ORlbtmyBr68vGjZsiBo1aqBhw4bo0KEDJkyYgG3btmXajqzo3r07bGxsoNVqM500cP/+/fKwP71799Z7LDvX3YwYM0FNRpObic8/cuQI3n//fTRs2BB16tRB//79cfjwYb3n7Ny5E/3790f9+vVRu3ZtDBw4EIGBgZm2NTvvyTm1bt06+XyNGjUKHTt2zNbrJCQkYNWqVejXrx/q16+PGjVqoFGjRujcuTM+//xzuVCv5M6dO/juu+/QuXNn1K5dG7Vr10aHDh3wySef4ODBg9Bqteme8/r1a6xZs0Y+zt7e3mjdujUmT56M4OBgg9tK+96hG2O2W7duqF27tuLEjOfPn8ekSZPkPNatWxfvvvtulq5nSrZu3QoA6Nq1a7ZfQ6do0aJYtGiRPAzCr7/+iqSkJL11MpqcTMz4wYMHMWzYMDRu3BjVqlVL9z4fFRWFefPmoUePHqhbty68vb3Rtm1bfPHFF/jvv/8ybKtWq8W+ffvw0UcfyTlv1KgRevXqhdmzZ8tFK1PeE+kEBQVh3Lhx8nYbNmyI999/Hzt27EBKSoric8RjExgYiBEjRqBRo0bw9vZGp06dsGjRIrx+/TrD/TbGzp07kZycjMaNG6NUqVLo2bMngNThEoxx9uxZjBo1Cg0bNoSPjw86dOiAefPm4eXLlxmef/HYHThwAL6+vmjQoAFq1qyJ7t27Y+3atYq/h2ll5/gCwKV
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"text/plain": [
|
|
|
|
|
|
"<Figure size 1400x800 with 1 Axes>"
|
2025-07-10 15:07:31 +08:00
|
|
|
|
]
|
2025-06-22 23:03:50 +08:00
|
|
|
|
},
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"--- Plotting Number of Data Points per Volume Z-score Bin ---\n"
|
|
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"data": {
|
2025-07-15 22:45:51 +08:00
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABW0AAAMWCAYAAACKoqSLAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQAAyR9JREFUeJzs3XeUVeX5NuB7hgEVKYoIimiMhaKiqKBiMMQSTdQkIib2HjUqxthibLGXJNbYNYrRWBJjicbeuwjGEhV7o4iAoICD1PP9wTfnx9AchkF2nOtai7XmvHuffZ5n9pkN3POed1eUSqVSAAAAAAAohMrFXQAAAAAAAP9HaAsAAAAAUCBCWwAAAACAAhHaAgAAAAAUiNAWAAAAAKBAhLYAAAAAAAUitAUAAAAAKBChLQAAAABAgQhtAQAAAAAKRGgLAP+j9txzz3Tu3DkXX3zx4i5lsZo0aVIuvPDC/PjHP866666bzp07p3PnzhkyZMjiLo068l5mbmp+lgcOHLi4S4FvhPc8ALOqWtwFAEBDuvjii3PJJZckSZZccsk8+OCDad++/Vz3HTZsWLbccsskyfXXX5+NN974G6uThnPEEUfkscceSzLznLdt2zZJUlVVt3/m7LnnnnnhhRdqjTVp0iRLL710WrZsmVVXXTVdu3bN5ptvnh49ejRs8bMYMmRIHn744bRs2TL77LPPInud+Zn1Z2JWlZWVadmyZb773e9ms802y2677ZY2bdoshgoXXE0Q3Ldv33Ts2HExV/O/46677soxxxyTJPnnP/+Zbt261el5xx9/fG677bYss8wyeeqpp9KsWbNFWWajc/vtt+e4446r13PfeuutBq6GGvO6diZJ06ZN07p163Tq1CnbbLNN+vXrl6ZNm37DFQLwv0hoC8C31ldffZVLL700p5122uIuhUXkvffeKwe2F1xwQbbddtt6H6vmP9Y1vvzyywwfPjzDhw/PM888k7/85S9ZffXVc8opp2SjjTZa6NpnN2TIkFxyySVZaaWVFltoO6sWLVpkySWXTJJMnTo1X3zxRV5++eW8/PLLuemmm3LllVfWOcj7OiuuuGK++93vZtlll22Q482q5pc4G220kdB2AWyzzTY5/fTTM378+Nx22211OtfV1dW57777kiQ/+9nPBLaLwKy/mPo606dPz7hx45LEufgGzXrtTJKJEydmzJgxGTNmTJ599tn84x//yLXXXptllllmjud+97vfTZIstdRS31S5ABSY0BaAb7Xbbrst++67b/k/Qny7vP3220mSZZZZZqEC2yRZf/31c8MNN9Qa++qrr/LGG2/kwQcfzK233pr33nsve+21V04++eTsuuuuC/V6RXfCCSdkxx13LD/+4osvctNNN+XSSy/NZ599lsMOOywPPPBAllhiiYV+rT/+8Y8LfQwa1hJLLJHtt98+N910U+65554cd9xxX3uu77///lRXVydJ+vXr902U2ehsu+22db7WnXHGGeVr2sknn7woy2IWs187k+TTTz/NNddck7/+9a95/fXXc/7558/1F8r333//N1UmAP8DrGkLwLfSiiuumM6dO2fatGm54IILFnc5LCJfffVVkmTppZdeJMdfcskls8EGG+R3v/td7rrrrnTu3DmlUimnn356Bg8evEhes6hat26dgw8+OL/85S+TJJ988kkeeeSRxVwVi9JOO+2UJBk/fnweeuihr93/tttuS5J069YtnTt3XqS1MX+33357ObDdY489yueSxaN9+/Y5/vjj06tXryTJww8/vJgrAuB/gZm2AHwrVVZW5qijjsqBBx6YBx54IK+++mrWXXfdOj9/1vXpHnnkkXl+rHqLLbbI8OHDc/bZZ9eaWTP78ysqKnL55Zfn6aefzmeffZb27dtnu+22y0EHHZTmzZsnmTlr9KqrrsqgQYMyduzYrLjiitlhhx1ywAEHfO36d1OmTMl1112Xu+++O0OHDk3Tpk2zzjrrZJ999kmfPn3m+9y33347N9xwQwYOHJhPP/00lZWV6dixY7bYYovsvffec127tGbt4I022ig33HBDHnjggfz973/PkCFDMm7cuBx66KE57LDD5vu6s5o8eXJuvvnm3H///Xnvvffy1VdfpW3btunZs2f23XffdO3ada6vX2P48OG1QqK+ffvmnHPOqfPr18VKK62Uyy67LD/5yU9SXV2d888/PzfddFOtfSZNmpRHHnkkTz75ZN566618+umnmThxYpZZZpmsu+662Xnnned6PmatffZekqR///7l72d9X6Oh/OxnP8vll1+eJPnvf/9ba9bfgp7HGjXrCs/aZ41Zf8a23377XH/99bnrrrvy8ccfp0mTJll77bXzy1/+Mt///vdrPe93v/td7rjjjvLjvfbaq9b2lVZaKY8++mj58ciRI3PttdfmmWeeyfDhwzNt2rQss8wyadeuXXr06JHtt99+oa4h06ZNyxVXXJFnn302Y8eOTdu2bfP9738/hx566DzX3U6SGTNm5N///nfuvvvuvP766xk/fnxatGiRtdZaKzvuuGO22267VFRUzPG8Wb9v22yzTf7yl7/k4YcfzrBhw1JdXT3f61qNtddeO127ds2QIUNy2223Zfvtt5/nvh999FH5FxmzB4SjR4/OtddemyeffDLDhw9PMvP736dPn+y33351/rh/jYEDB5bP5/zWaa35OZp9zfLZn//mm2/mqquuygsvvJDx48dnpZVWyk477ZS99967vDb2iy++mGuuuSavvvpqvvjii3znO9/Jrrvumt12222u3/8a9bm+LqxXX321PLN2o402qvcauEly77335vbbb88bb7yRL774IksttVTatGmT1VZbLZtttll22mmnuc7AHjduXG688cY88cQT+eijjzJp0qQsv/zyWXXVVbPVVlvlJz/5SVq2bDnH8x588MHcdttt+e9//5vx48enVatW6datW3baaaf88Ic/nGuNNT/rffv2zdlnn51//vOfuf322/P+++/n888/n+vfz3/961/z7LPPZsSIEZkxY0ZWXHHF9O7dO/vtt186dOhQ7+/X1+natWuee+658oz02c3rPTv79WTJJZfMFVdckUcffTSjR49Oy5Yts/HGG6d///5ZffXVF1n9AHyzhLYAfGv16dMnG220UV544YWce+65uf766xdLHW+88UZOOOGEctgyffr0DB06NFdccUUGDx6c6667Ls8880x+85vfZNKkSWnZsmWmTp2ajz76KBdddFHeeeed+c4Wnjp1avbdd98MHjw4VVVVad68ecaPH59nn302zz777FyDsBpXX311zj///MyYMSPJzHX0pk6dmrfffjtvv/12brvttlx11VVZa6215vn655xzTgYMGJCKioq0atUqlZUL9kGeTz/9NL/85S/LSx00bdo0Sy65ZEaMGJF//etfufvuu3P88cdnzz33LD+nefPmadu2bb766qtMnDgxlZWVtcKPFi1aLFANddWxY8f07ds3N954Y1588cUMHTo0K6+8cnn7fffdVw5IKioq0qJFi1RVVWX06NF55JFH8sgjj2S//fbLscceW+u48+ulpt+FfY2GssIKK5S/njhxYvnr+pzHBVFdXZ099tgjr7zySpo2bZqmTZtm4sSJGThwYF544YWcccYZtcLCFi1apG3bthkzZkySmTOFZ/3lx6zr57755pvZa6+98sUXXySZeSO6Fi1aZMyYMRk9enQ5LF2Q0HZWr776ak488cR8+eWXad68eZo0aZJPPvkkf//73/PAAw/k2muvzdprrz3H8z7//PP0798/gwYNKo+1bNky48aNyzPPPJNnnnkm99xzTy666KJ5rln6+eefZ8cdd8yHH36Ypk2bLvBamTvttFNOP/30PP/88xkxYsQ8A63bb789yczZ6bOGuy+88EIOPfTQjB8/Psn/vZfffffdvPvuu/nnP/+Zyy67bJHe5G9+nnjiiRx22GGZPHlyWrZsmSlTpuT999/PH//4x/LH2G+99dacfPLJmTFjRlq0aJEpU6bknXfeyWmnnZZPPvkkRx999FyP3RDX1wU1ZsyYHHb
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"text/plain": [
|
|
|
|
|
|
"<Figure size 1400x800 with 1 Axes>"
|
2025-07-10 15:07:31 +08:00
|
|
|
|
]
|
2025-06-22 23:03:50 +08:00
|
|
|
|
},
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
2025-07-10 15:07:31 +08:00
|
|
|
|
"source": [
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"df_raw = load_and_preprocess_data(file_path)\n",
|
|
|
|
|
|
"if df_raw is not None and not df_raw.empty:\n",
|
|
|
|
|
|
" # volume_window and price_lag parameters for indicator calculation\n",
|
|
|
|
|
|
" # price_lag defines \"N\" in \"next N K-lines\"\n",
|
|
|
|
|
|
" processed_data = calculate_stationary_indicators(df_raw, volume_window=30, price_lag=5)\n",
|
|
|
|
|
|
" \n",
|
|
|
|
|
|
" # Analyze the impact of volume change rate and current K-line direction on future trend\n",
|
|
|
|
|
|
" # return_threshold: set a small threshold to define 'significant' up/down move, otherwise it's 'flat'\n",
|
|
|
|
|
|
" analyze_trend_continuation_probability(processed_data, return_threshold=0.0001, num_bins=10)\n",
|
|
|
|
|
|
"else:\n",
|
|
|
|
|
|
" print(\"Analysis cannot proceed. Please check if data loading was successful.\") "
|
|
|
|
|
|
]
|
2025-06-22 23:03:50 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
2025-07-10 15:07:31 +08:00
|
|
|
|
"execution_count": 13,
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"id": "8fa62ad6",
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-07-15 22:45:51 +08:00
|
|
|
|
"end_time": "2025-07-12T14:59:52.720376Z",
|
|
|
|
|
|
"start_time": "2025-07-12T14:59:52.448750Z"
|
2025-06-22 23:03:50 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
2025-07-10 15:07:31 +08:00
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
2025-07-15 22:45:51 +08:00
|
|
|
|
"Successfully loaded 7601 rows of data.\n",
|
|
|
|
|
|
"Indicators calculated. 7562 rows of data remaining for analysis.\n",
|
2025-07-10 15:07:31 +08:00
|
|
|
|
"\n",
|
|
|
|
|
|
"--- Analyzing Price Change Rate (Log Returns) for Trend Characteristics ---\n"
|
|
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"data": {
|
2025-07-15 22:45:51 +08:00
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1oAAAIkCAYAAAAH/VqbAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQAAsgxJREFUeJzs3Xl8U1XeBvDnZGmattBCaQuldQNb9l0RxEEBV8YFmHEF1xHRcXkVRFTccFdwxNERRWUGxg0VXEbUER1xF1AQkLLK0lJoaUsLbZM2yT3vHyGXhm5pkzTJuc/3fecj3Gzn9OkJ+eWce66QUkoQERERERFRyJgi3QAiIiIiIiLVsNAiIiIiIiIKMRZaREREREREIcZCi4iIiIiIKMRYaBEREREREYUYCy0iIiIiIqIQY6FFREREREQUYiy0iIiIiIiIQoyFFhERERERUYhZIt0Aomg1atQo7NmzR/+7EAJ2ux3t2rXDscceiz59+uDcc89Fv379Gn2OSZMmYeXKlVi4cCGGDh3aFs1ukq9PX3zxBbKysvTj0dZOAJgxYwaWLl2Kxx9/HOPHj490c0Luyy+/xCuvvIJNmzahqqoKAAL6+fsyjMWfS25ubr1jNpsNnTp1Qv/+/XHFFVdgyJAhEWiZ+v7617/im2++wX//+1907txZP+4b+3VZrVakpKSgT58+uPjiizFq1KgWv97f//53PP/887j55ptxyy23BN3+cKmursa7776LFStWYPPmzSgvL4fVakV6ejr69u2Lc845B6NGjYLJdOR76cbeR6lxqoz91atX44orrsB1112H6dOnR7o5FANYaBE1Y9CgQTj22GMBAE6nEwcOHEBeXh5WrlyJ1157DSeffDIee+wxZGdnh60Nqv3DvmTJEtx9990YN24cnnjiiUg3p83l5eXh1ltvhaZpOOWUU5CWlgYhBDp16hTpprWJESNGIC0tDQBw4MABbNiwAcuWLcMnn3yCu+++G1dddVVIXsf34W7z5s0heb5Y9f3332P58uW49tpr/Yqsunr06IGePXsC8BYfGzduxP/+9z/873//w6RJkzBz5sy2bHKb+Pbbb3HnnXeirKwMFosFvXv3xpAhQ+DxeLB792589NFH+Oijj9C3b1+8++67kW6uEtpq7Ier0B8yZAhOP/10LFy4EBdffDGOO+64kD03qYmFFlEz/vznP9ebOZBS4uuvv8Zjjz2GlStX4tJLL8Vbb71Vr9h68skn4XA4kJmZ2ZZNbtQ///lPuFwuZGRkRLopzbrjjjtw/fXXIz09PdJNCbnly5fD5XJhypQpuP322yPdnDY3efJkv5k7h8OB6dOn47///S+efvppnHPOOTHxOxorHn/8cdhsNkyePLnR+4wZM8bvA6mmaXj22Wfx0ksvYdGiRRg9ejSGDRsW8GteccUVOO+889ChQ4eg2h4uX331FW666SZ4PB5MmDABU6dORWpqqt99CgsLMW/ePHz66acRaqV6VBj7t9xyC7766ivMnj0bzz//fKSbQ1GO52gRtYIQAiNHjsQ777yD4447DiUlJQ1+45uZmYlu3brBbrdHoJX1HXPMMejWrRusVmukm9Ks9PR0dOvWDe3atYt0U0KusLAQAPSZUqOz2+36+HG5XPjmm28i3CJ1fPfdd9iyZQvGjBnToqLHZDLhtttu0788+uSTT1r0uh07dkS3bt3QsWPHFj2uLRw4cAB33nknPB4PJk2ahMcee6xekQV4379nzZqFF154IQKtNIZYHPt9+vRBjx498MUXX6CgoCDSzaEox0KLKAjt27fHPffcAwD48ccfsWHDBr/bJ02ahNzcXPz0009+x2tra/HKK69g/PjxGDhwIPr06YNTTz0VEyZMwFNPPYXy8nIA3iV2ubm5+rlio0ePRm5urv4/3/P+9NNPyM3NxaRJk+BwODB37lyce+656N+/v9/5FaNGjUJubm6T/zisXLkS1157LU4++WT0798ff/rTn/D+++83eN/G+ufz97//Hbm5ufj73//u14a7774bALB06VK//kyaNEm/34wZM5Cbm4slS5Y0+Nwff/wxrrrqKpx88sno06cPzjjjDNx9993YsWNHg/ev2/cff/wR1157LU466ST069cP48aNa7SPzXG73XjzzTdx6aWXYvDgwejbty/OOussPPLIIygqKmrw5+Hr0913391g30Nt3759ePjhh3HWWWehb9++GDx4sD4L6/F4GnyMlBLvvvsuxo8fj/79+2Po0KH4y1/+gl9++cXv9y1UMjIykJKSAgAoLS1t8D6ffvoprrvuOpxyyino06cPTjvtNEybNg3btm3zu5/v5+xT93es7u9/Q7+fdTXWz0DGm2/szpgxA9XV1ZgzZw7OPPNMfazfdddd9X4/fL7//ntMmTIFw4cPR+/evXHSSSfhrLPOwrRp07Bq1armf5h1/Pvf/wYAjBs3rkWPAwCz2awvJ6x7vmrdsbR8+XJceeWVOPnkk/3eC5r72e7YsQMPPvggzj77bPTv3x+DBg3CeeedhwcffBBbtmypd/+Kigo899xzuPDCCzFw4ED0798f559/Pv7xj3/A4XC0qF+vv/46Dh48iNTU1IDOsznppJMava0l7yV79uzByy+/jCuvvBKnn346+vTpgyFDhuCyyy7DW2+9BU3T6j2moKAAubm5GDVqFKSUePvttzF+/HgMGDAAgwcPxrXXXos1a9Y02r4tW7bglltuwdChQ/Wf2T//+U9omtbkvwdutxvvvPMOJk2apL/Hjho1Cg888AD27t3b7M+sJZob+//9739x77334o9//CNOOukk9O3bV/935Pfff693/9zcXH2m6fnnn/cb+zNmzAhJP8eNGwdN0/Dmm28G0XMyAi4dJArSH/7wB6SkpKC8vBzff/89+vTp0+T9NU3D5MmT8cMPPyApKQlDhgxB+/btUVZWhl27duHVV1/F+eefj5SUFBxzzDEYN24cPvvsM1RXV+Pss89GQkKC/lxHn9NTU1ODSZMmYfv27RgyZAh69OihF22B+Pzzz/H666/jhBNOwIgRI1BcXIyff/4Zd911FzZt2lTvH6nWOPvss7F27Vr88ssvOOaYYzB48GD9thNOOKHZx0spMWPGDLz//vuwWCwYMmQIUlNT8dtvv2HJkiX45JNP8Nxzz+EPf/hDg49/77338OKLL6JXr1447bTTsGfPHqxduxZ33XUXysvLcfXVVwfcl9raWtxwww34/vvvYbPZMHToUCQlJWHNmjVYtGgR/vOf/+DVV19F7969AQA9e/bEuHHj8PPPP2P37t1+5/8F0vfWWLduHa6//nqUl5cjMzMTY8aMwaFDh7By5UqsWbMGn3/+OV588UXExcX5Pe6hhx7Cm2++CZPJhCFDhiAtLQ1btmzBxIkTQ3YeRV2apqG6uhoA6s0uuN1uTJs2DZ988gni4uLQu3dvZGRkYOfOnfjoo4/w+eef4+9//7ueue/nvHTpUgD1i4y6YygYgYy3Q4cO4dJLL8XevXsxePBgnHjiiVi7di3ef/99rFq1Ch988IHfrO3SpUv1LyL69euHoUOHwul0oqioCMuWLUOHDh2a/OB/dPu+/fZbWK3WgB9ztMrKSgCo9/sBAAsWLMC///1vvegtLi6G2Wxu9jk/+ugj3HPPPaitrUVmZiZGjhwJTdOQn5+Pt956C6mpqcjJydHvv23bNvzlL3/B3r17kZaWhsGDB8NisWD9+vWYO3cu/vvf/2LRokUBz35/8cUXAIDzzjuvwX4FqqXvJR988AHmzp2LrKwsHHfccRg0aBD279+PNWvW4JdffsF3332H5557DkKIBl/v7rvvxn/+8x8MHjwYp59+OvLy8vDdd99h1apV+Pe//43+/fv73X/lypW4/vrr4XQ6ccwxx+DUU09FeXk5Zs+ejV9//bXRflVWVuLGG2/EypUrkZCQgD59+qBDhw7YsmUL3nrrLXz66adYsGABevXq1eqfXV1NjX0A+L//+z/ExcWhW7duOOWUU+B2u7F161YsWbIEn376KV599VUMGjRIv/+4ceOQl5eHTZs2+Z17CMDv35t
|
2025-07-10 15:07:31 +08:00
|
|
|
|
"text/plain": [
|
|
|
|
|
|
"<Figure size 1000x600 with 1 Axes>"
|
|
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"Statistical summary of Log Returns:\n",
|
2025-07-15 22:45:51 +08:00
|
|
|
|
"count 7562.000000\n",
|
|
|
|
|
|
"mean -0.000090\n",
|
|
|
|
|
|
"std 0.010415\n",
|
|
|
|
|
|
"min -0.209683\n",
|
|
|
|
|
|
"25% -0.004721\n",
|
2025-07-10 15:07:31 +08:00
|
|
|
|
"50% 0.000000\n",
|
2025-07-15 22:45:51 +08:00
|
|
|
|
"75% 0.004706\n",
|
|
|
|
|
|
"max 0.083932\n",
|
2025-07-10 15:07:31 +08:00
|
|
|
|
"Name: log_return, dtype: float64\n",
|
2025-07-15 22:45:51 +08:00
|
|
|
|
"Kurtosis of Log Returns: 31.0872\n",
|
|
|
|
|
|
"Skewness of Log Returns: -1.5436\n"
|
2025-07-10 15:07:31 +08:00
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"data": {
|
2025-07-15 22:45:51 +08:00
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABdEAAAPdCAYAAABlRyFLAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzsnXm8TdX7xz/n3Hu55nmMJpVkCs1pVmmQDGlAiBTqR5Eh0qhCJGMTJaQoQ4SiQYmQRCRTKVMyZ7jz2b8/9nfds/Y+a+299j77nLPPvc/79fJy7757r73W2s+anvWs5wlomqaBIAiCIAiCIAiCIAiCIAiCIIgIgonOAEEQBEEQBEEQBEEQBEEQBEH4FVKiEwRBEARBEARBEARBEARBEIQEUqITBEEQBEEQBEEQBEEQBEEQhARSohMEQRAEQRAEQRAEQRAEQRCEBFKiEwRBEARBEARBEARBEARBEIQEUqITBEEQBEEQBEEQBEEQBEEQhARSohMEQRAEQRAEQRAEQRAEQRCEBFKiEwRBEARBEARBEARBEARBEIQEUqITBEEQBEEQBEEQBEEQBEEQhITURGeAIAiCIIhIbrzxRuzduxevvPIKWrdunejsOKJ27doR14oWLYqKFSuiYcOGaN++PS655JIE5Kzgs3//fkyfPh0//PAD9u7di4yMDJQvXx7169fH7bffjttvvx2BQCDR2XQNaxdOOOOMM/D111/HKEdy5syZg0GDBqFVq1Z49dVX86+vXr0aDz74IC677DJMmzYt7vlSZeDAgZg7d67hWkpKCkqXLo06deqgZcuWaNmyZVzkac+ePbjpppvi9i3j/Y169eqF77//Hl9++SWqVq0qvGfDhg2YM2cOfvrpJxw4cACZmZkoVaoUatWqhauuugp33303qlevHvO8RkPHjh2xZs0afPDBB7j88ssTnZ2khNXhY489hscffzzm74t32xPRuXNnbNy4EV988QUqVaqUkDwQBEEQBEBKdIIgCIIgYkTTpk3zF7xHjx7Fpk2bsGjRIixevBiDBg1Cp06dPHkPU9pv3brVk/SSlRkzZuDVV19FdnY2ypYtiyZNmqB48eL4+++/8dVXX2HZsmV47733MGHCBFSpUiXR2XXFrbfeiqNHjxqunT59Gl988UX+34sXL274e7ly5eKWv4LImWeeiSZNmgAAsrKysH37dqxcuRIrV67EV199hTFjxiAlJSXBuUxeVq5ciWXLluGhhx4SKtAzMjIwZMgQLFy4EABQqVIlNGnSBCVLlsSxY8ewceNGrF27FhMnTsSYMWPQrFmzeBcBADBu3DiMHz8+bspdovDQt29ftG3bFqNHj8Yrr7yS6OwQBEEQhRhSohMEQRAEERO6d+9usDbMyMhA//798eWXX2LkyJFo3rx50ipz/cbUqVPx8ssvIxgMom/fvujSpQvS0tLy/75z50489dRT+PXXX9G+fXvMmTMHpUuXTmCO3TFgwICIa3v27MlXovfv3x81atSId7Yc0aBBAyxatAjFihVLdFaUaNKkicGSHgA+/PBDPP/88/jyyy8xd+5ctG3bNqZ5qFKlChYtWmSQ6YLCK6+8gqJFi6J79+4Rf8vJyUHXrl2xbt06VKpUCc8//zxuuukmwz25ublYunQpXn/9dezZsyde2XbF8OHDkZGR4XuLecJf1K9fHzfccAPmzp2LTp064cILL0x0lgiCIIhCCvlEJwiCIAgiLhQrVgxDhgwBoCuHvv/++wTnqGCwY8cOjBw5EgAwaNAgdO/ePULZWKtWLUydOhVnnnkmdu/ejRdffDERWSWgt4NatWoltSLxgQcewGWXXQYAWLx4cczfl5aWhlq1auHMM8+M+bviyQ8//IBt27ahWbNmwhMTEydOxLp161C6dGnMnDkzQoEOAKmpqbjtttswd+7c/G/iV6pXr45atWolzQYS4R/atm0LTdMwderURGeFIAiCKMSQEp0gCIIgCgj//PMPXnzxRdxyyy2oX78+mjRpgvvuuw8fffQR8vLyhM9omoZPPvkErVu3RsOGDXH55ZejW7du+Pnnn7F69WrUrl0bHTt29CyPVapUQdmyZQEAhw8fFt6zZMkSdO3aFVdccQXq1auHa665Bv369cOOHTsM940bN87gf7127dqGf8wqk903btw44ftk5eSvZ2Rk4I033sBtt92Ghg0b4sYbbwSg+72uXbs2Bg4ciNOnT2PUqFG4+eabUa9ePVx99dUYMGAADhw4IHzvypUr8eijj+Kqq65C3bp1cemll+KWW25Bv379sHbtWvvK/B+TJ09GTk6O7bcqVaoU+vfvDwD4/PPPsXv3bgC6lXrt2rVx6aWXIisrS/p869atUbt2bSxbtsxwPTc3F7Nnz0bHjh1x2WWXoV69erjxxhvx7LPPYv/+/RHpqNSrV/Df59ixYxg2bBiaNWuGevXqGepq5cqVePHFF9GyZUtcfvnlqFevHq699lr06dMHGzdulKafm5uL999/Hy1atED9+vVxxRVX4PHHH7d0LSSTtz179qB27dq48cYboWkaPv74Y7Ru3RoXX3wxmjRpgoceegjr16+Xprtt2zY8/vjjuPzyy9GwYUO0aNEC77//PkKhEG688UZDm/CCunXrAkCEj/rjx49j7NixaNmyJRo1apSfl4kTJyIjIyMiHb597tu3D08//TSuu+461K1bFwMHDoyoGxFu+j4AmDdvHtq0aYOGDRvisssuQ9euXfHTTz9ZlturdgsA06dPBwC0atUq4m8nT57EBx98AED3mV6zZk3LtEqUKIGLLroo4vr333+PRx55BFdeeSXq1auHpk2bok+fPvj111+F6XTs2BG1a9fG6tWrsWXLFjz22GP5beL222/HlClToGma4ZnatWtj/PjxAIDx48cb+mH2Dc1p8wwcOBC1a9fGnDlzsHv3bjz11FO4+uqrUa9ePTRr1gyvv/46srOzI/LKPyeCb/8inNaNXTuS5Sc7OxvvvvsuWrdujUaNGuWPD23atMGIESNw7NgxYXpOiGYsAoBvvvkGHTp0QKNGjdCkSRM88MADEX29CCftfcqUKahduzZuvfVWnDx5MiKtWbNmoXbt2rjuuutw5MgRw9+uu+46lCtXDp9//rkn9UUQBEEQbiB3LgRBEARRANi4cSMefvhhHDt2DNWrV0ezZs1w4sQJrFmzBuvXr8fSpUsxadIkFClSxPDc888/j5kzZyIYDOKSSy5BpUqVsG3bNnTo0MEzn+U8oVAIp0+fBgBUqFDB8Lfc3Fz069cPixcvRpEiRVC3bl1UqVIFu3btwoIFC7B06VKMGzcO1157LQCgTp06aNWqVX7wQ7Miyuwb2y1ZWVno2LEjdu7ciUsuuQQXXnhhxCL+xIkTuO+++7B//340adIE559/Pn755RfMmzcPa9euxfz581GqVKn8++fOnYtBgwYB0N17XH755cjMzMSBAwewaNEilCtXDpdeeqlt3jRNyw/2phLk8YYbbkDp0qXx33//4dtvv0XHjh1Rq1YtNGrUCOvXr8eyZctwxx13RDy3detWbN68GRUrVsT111+ff/3kyZPo0aMH1qxZg+LFi6NevXooV64ctm3bho8++ghLlizBe++9J1TuqdSrVxw9ehRt2rTBiRMn0KRJE9StW9dgrc8U/ueffz4aN26M1NRU/PHHH1i8eDGWLl2K0aNH49ZbbzWkGQqF0Lt3byxbtgxpaWm4/PLLUbp0aWzYsAH33HMP2rRp4zq/gwYNwsKFC9GkSRNcf/312LJlC3744QesXbsW06dPR8OGDQ33r1mzBg8//DAyMzNx5pln4uqrr8axY8fw2muvYcOGDa7zYQVTgvF9yo4dO9CtWzfs378/33d3amoqfv31V7zxxhv48ssvMW3aNENbYOzatQutWrVCWloaGjduDE3TlPzZu+37XnrpJUybNg3BYBBNmjRB5cqVsXXrVnTs2BEdOnQQvsurdgvo8r9ixQqkpaUJn1m9ejVOnjyJQCCAli1bKqVpZsyYMZg0aRICgQAaNWqE6tWrY+fOnVi8eDG+/PJLvPDCC1JXPCtWrMB7772
|
2025-07-10 15:07:31 +08:00
|
|
|
|
"text/plain": [
|
|
|
|
|
|
"<Figure size 1500x1000 with 2 Axes>"
|
|
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"Analysis focused on price change rate and its dynamics over continuous trading periods.\n",
|
|
|
|
|
|
"Higher volatility periods often provide more opportunities for trend-following strategies.\n"
|
|
|
|
|
|
]
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"source": [
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# --- 1. Data Loading and Preprocessing (与之前代码相同) ---\n",
|
|
|
|
|
|
"def load_and_preprocess_data(file_path):\n",
|
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
|
" Loads historical futures data and performs basic preprocessing.\n",
|
|
|
|
|
|
" Assumes data contains 'datetime', 'open', 'high', 'low', 'close', 'volume' columns.\n",
|
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
|
" try:\n",
|
|
|
|
|
|
" df = pd.read_csv(file_path, parse_dates=['datetime'], index_col='datetime')\n",
|
|
|
|
|
|
" # Ensure data is sorted by time\n",
|
|
|
|
|
|
" df = df.sort_index()\n",
|
|
|
|
|
|
" \n",
|
|
|
|
|
|
" # --- NEW: Optional filtering for typical trading hours ---\n",
|
|
|
|
|
|
" # If your data includes non-trading hours (e.g., overnight, weekends in daily data)\n",
|
|
|
|
|
|
" # and you only want to analyze main trading sessions, uncomment and adjust.\n",
|
|
|
|
|
|
" # Example for typical daytime futures trading:\n",
|
|
|
|
|
|
" # df = df[(df.index.hour >= 9) & (df.index.hour < 15)] # Filter 9:00 to 14:59 for example\n",
|
|
|
|
|
|
" # df = df[df.index.dayofweek < 5] # Exclude Saturday (5) and Sunday (6) if using daily data with weekends\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" initial_rows = len(df)\n",
|
|
|
|
|
|
" df.dropna(inplace=True)\n",
|
|
|
|
|
|
" if len(df) < initial_rows:\n",
|
|
|
|
|
|
" print(f\"Warning: Missing values found in data, deleted {initial_rows - len(df)} rows.\")\n",
|
|
|
|
|
|
" \n",
|
|
|
|
|
|
" # Check if necessary columns exist\n",
|
|
|
|
|
|
" required_columns = ['open', 'high', 'low', 'close', 'volume']\n",
|
|
|
|
|
|
" if not all(col in df.columns for col in required_columns):\n",
|
|
|
|
|
|
" raise ValueError(f\"CSV file is missing required columns. Please ensure it contains: {required_columns}\")\n",
|
|
|
|
|
|
" \n",
|
|
|
|
|
|
" print(f\"Successfully loaded {len(df)} rows of data.\")\n",
|
|
|
|
|
|
" return df\n",
|
|
|
|
|
|
" except FileNotFoundError:\n",
|
|
|
|
|
|
" print(f\"Error: File '{file_path}' not found. Please check the path.\")\n",
|
|
|
|
|
|
" return None\n",
|
|
|
|
|
|
" except Exception as e:\n",
|
|
|
|
|
|
" print(f\"Error during data loading or preprocessing: {e}\")\n",
|
|
|
|
|
|
" return None\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# --- 2. Stationary Indicator Calculation Function (与之前代码相同) ---\n",
|
|
|
|
|
|
"def calculate_stationary_indicators(df, volume_window=10, price_lag=5):\n",
|
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
|
" Calculates stationary indicators based on volume and price.\n",
|
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
|
" df_processed = df.copy() \n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" df_processed['volume_roc'] = df_processed['volume'].pct_change(volume_window) * 100\n",
|
|
|
|
|
|
" df_processed['volume_ma_ratio'] = df_processed['volume'] / df_processed['volume'].rolling(window=volume_window).mean()\n",
|
|
|
|
|
|
" rolling_mean_vol = df_processed['volume'].rolling(window=volume_window).mean()\n",
|
|
|
|
|
|
" rolling_std_vol = df_processed['volume'].rolling(window=volume_window).std()\n",
|
|
|
|
|
|
" df_processed['volume_normalized_zscore'] = (df_processed['volume'] - rolling_mean_vol) / rolling_std_vol.replace(0, np.nan)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" df_processed['log_return'] = np.log(df_processed['close'] / df_processed['close'].shift(1))\n",
|
|
|
|
|
|
" df_processed['future_log_return'] = np.log(df_processed['close'].shift(-price_lag) / df_processed['close'])\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" try:\n",
|
|
|
|
|
|
" macd, macdsignal, macdhist = ta.MACD(df_processed['close'], fastperiod=12, slowperiod=26, signalperiod=9)\n",
|
|
|
|
|
|
" df_processed['macd_hist_diff'] = macdhist.diff(1) \n",
|
|
|
|
|
|
" except Exception as e:\n",
|
|
|
|
|
|
" # print(f\"TA-Lib MACD calculation failed, possibly due to installation or data issues: {e}. 'macd_hist_diff' will contain NaN.\")\n",
|
|
|
|
|
|
" df_processed['macd_hist_diff'] = np.nan \n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" df_processed.dropna(inplace=True)\n",
|
|
|
|
|
|
" if df_processed.empty:\n",
|
|
|
|
|
|
" print(\"Warning: Data is empty after indicator calculation. Check original data volume or adjust window parameters.\")\n",
|
|
|
|
|
|
" else:\n",
|
|
|
|
|
|
" print(f\"Indicators calculated. {len(df_processed)} rows of data remaining for analysis.\")\n",
|
|
|
|
|
|
" return df_processed\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# --- 3. 价格变化率分析与可视化函数 (横轴调整为连续索引) ---\n",
|
|
|
|
|
|
"def analyze_price_change_rate_for_trend(processed_df, rolling_vol_window=30):\n",
|
|
|
|
|
|
" if 'log_return' not in processed_df.columns or processed_df['log_return'].isnull().all():\n",
|
|
|
|
|
|
" print(\"Error: 'log_return' column not found or contains only NaN values. Cannot analyze price change rate for trend.\")\n",
|
|
|
|
|
|
" return\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" print(\"\\n--- Analyzing Price Change Rate (Log Returns) for Trend Characteristics ---\")\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # Add a continuous index for plotting, ignoring date gaps\n",
|
|
|
|
|
|
" processed_df['continuous_index'] = range(len(processed_df))\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 1. Distribution of Log Returns (Price Change Rate)\n",
|
|
|
|
|
|
" plt.figure(figsize=(10, 6))\n",
|
|
|
|
|
|
" sns.histplot(processed_df['log_return'], bins=100, kde=True, color='purple', alpha=0.7)\n",
|
|
|
|
|
|
" plt.title('Distribution of Log Returns (Price Change Rate)', fontsize=16)\n",
|
|
|
|
|
|
" plt.xlabel('Log Return', fontsize=12)\n",
|
|
|
|
|
|
" plt.ylabel('Frequency', fontsize=12)\n",
|
|
|
|
|
|
" plt.grid(True, linestyle='--', alpha=0.6)\n",
|
|
|
|
|
|
" plt.show()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" print(\"\\nStatistical summary of Log Returns:\")\n",
|
|
|
|
|
|
" print(processed_df['log_return'].describe())\n",
|
|
|
|
|
|
" print(f\"Kurtosis of Log Returns: {processed_df['log_return'].kurtosis():.4f}\")\n",
|
|
|
|
|
|
" print(f\"Skewness of Log Returns: {processed_df['log_return'].skew():.4f}\")\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 2. Time Series of Log Returns and Rolling Volatility\n",
|
|
|
|
|
|
" processed_df['rolling_volatility'] = processed_df['log_return'].rolling(window=rolling_vol_window).std()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 10), sharex=True)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # Subplot 1: Log Returns Over Continuous Trading Periods\n",
|
|
|
|
|
|
" # Use 'continuous_index' for the x-axis to remove date gaps\n",
|
|
|
|
|
|
" ax1.plot(processed_df['continuous_index'], processed_df['log_return'], label='Log Returns', color='blue', alpha=0.7, linewidth=0.8)\n",
|
|
|
|
|
|
" ax1.set_title('Log Returns Over Trading Periods (Continuous Index)', fontsize=16)\n",
|
|
|
|
|
|
" ax1.set_ylabel('Log Return', fontsize=12)\n",
|
|
|
|
|
|
" ax1.grid(True, linestyle='--', alpha=0.6)\n",
|
|
|
|
|
|
" ax1.legend()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # Subplot 2: Rolling Volatility Over Continuous Trading Periods\n",
|
|
|
|
|
|
" # Use 'continuous_index' for the x-axis\n",
|
|
|
|
|
|
" ax2.plot(processed_df['continuous_index'], processed_df['rolling_volatility'], label=f'Rolling Volatility ({rolling_vol_window} periods)', color='red', linewidth=1.5)\n",
|
|
|
|
|
|
" ax2.set_title(f'Rolling Volatility ({rolling_vol_window}-period Std Dev of Log Returns) Over Trading Periods', fontsize=16)\n",
|
|
|
|
|
|
" ax2.set_ylabel('Volatility', fontsize=12)\n",
|
|
|
|
|
|
" ax2.set_xlabel('Trading Period Index', fontsize=12) # Changed x-axis label\n",
|
|
|
|
|
|
" ax2.grid(True, linestyle='--', alpha=0.6)\n",
|
|
|
|
|
|
" ax2.legend()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" plt.tight_layout()\n",
|
|
|
|
|
|
" plt.show()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" print(\"\\nAnalysis focused on price change rate and its dynamics over continuous trading periods.\")\n",
|
|
|
|
|
|
" print(\"Higher volatility periods often provide more opportunities for trend-following strategies.\")\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# --- Main Execution Flow ---\n",
|
|
|
|
|
|
"if __name__ == \"__main__\":\n",
|
|
|
|
|
|
" df_raw = load_and_preprocess_data(file_path)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" if df_raw is not None and not df_raw.empty:\n",
|
|
|
|
|
|
" processed_data = calculate_stationary_indicators(df_raw, volume_window=10, price_lag=5)\n",
|
|
|
|
|
|
" analyze_price_change_rate_for_trend(processed_data, rolling_vol_window=30)\n",
|
|
|
|
|
|
" else:\n",
|
|
|
|
|
|
" print(\"Analysis cannot proceed. Please check if data loading was successful.\")"
|
2025-07-10 15:07:31 +08:00
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
2025-07-15 22:45:51 +08:00
|
|
|
|
"execution_count": 14,
|
2025-07-10 15:07:31 +08:00
|
|
|
|
"id": "20c278fde79da68a",
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-07-15 22:45:51 +08:00
|
|
|
|
"end_time": "2025-07-12T14:59:52.975843Z",
|
|
|
|
|
|
"start_time": "2025-07-12T14:59:52.744508Z"
|
2025-07-10 15:07:31 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
2025-07-15 22:45:51 +08:00
|
|
|
|
"Indicators calculated. 7562 rows of data remaining for analysis.\n",
|
|
|
|
|
|
"已计算 'pct_change',共 7562 条有效数据。\n",
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"\n",
|
2025-07-10 15:07:31 +08:00
|
|
|
|
"自相关性计算结果 (前5期):\n",
|
|
|
|
|
|
" Lag Autocorrelation\n",
|
2025-07-15 22:45:51 +08:00
|
|
|
|
"0 1 0.546944\n",
|
|
|
|
|
|
"1 2 0.481986\n",
|
|
|
|
|
|
"2 3 0.450359\n",
|
|
|
|
|
|
"3 4 0.454769\n",
|
|
|
|
|
|
"4 5 0.485791\n"
|
2025-06-22 23:03:50 +08:00
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"data": {
|
2025-07-15 22:45:51 +08:00
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABW0AAAScCAYAAADwLq27AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs3Xd4FOXax/HfphFqIAECoiAtoSQ0KUqVUA9FmoBIQKrAEREVEbBAXlDUY0HAThVQQCBADggqCAKGKh66SBPEUEwIPRA2+/7BtWuWJJCyu5nsfj/X5SWZmX3u59mZ2WzueeYek8VisQgAAAAAAAAAYAheud0BAAAAAAAAAMA/SNoCAAAAAAAAgIGQtAUAAAAAAAAAAyFpCwAAAAAAAAAGQtIWAAAAAAAAAAyEpC0AAAAAAAAAGAhJWwAAAAAAAAAwEJK2AAAAAAAAAGAgJG0BAAAAAAAAwEBI2gIAYBAREREKDQ21+y8sLEyPPvqoRo4cqZ07d2ar3TFjxig0NFTLli1zcI8d68KFC/rss8/Up08fNWrUSGFhYapTp446dOigV199VbGxsWleY32fkD1///23wsLCFBoaqq5du+Z2d5CObdu2KTQ0VH369HFZzLzymZHa7NmzbZ8H8+bNy+3u2Jk2bZpCQ0M1bdo0j4oNAAByhqQtAAAGU6dOHXXp0kVdunRR06ZNlZKSom+//VaRkZGaPXt2bnfPKZYvX66IiAi9//77+vXXX/Xggw+qdevWevjhh3Xr1i1988036tevn5577rnc7qpbWb58uZKTkyVJ+/fv16FDh5wSp0+fPgoNDdW2bduc0j4yb9myZQoNDdWYMWNyuysOtWTJEtu/ly5dmos9AQAAcAyf3O4AAACw1717d7tZjzdu3NDrr7+u5cuX6z//+Y8effRRlS9fPtPtvfDCCxo8eLBKlizpjO7m2Ndff60JEybIZDJp8ODBGjp0qAoVKmS3zZEjRzRt2jT98ccfudRL92RNbgUHB+vs2bNasmSJXn311VzuFXKb0T8z7vTrr7/qyJEjKlKkiG7duqWDBw9q//79ql69em53Ldf17t1b7dq1U7FixXK7KwAAIIuYaQsAgMHly5dPr7/+ugoUKCCz2azvv/8+S68vWbKkKlasqMKFCzuph9l39OhRvfHGG5Ju35I9atSoNAlbSapUqZI+/PBDvfLKK67uotvatWuXjh07poCAAL355puSpJiYGN28eTOXe4bcZuTPjPRYZ9m2b99ebdu2tVvm6QIDA1WxYkUFBgbmdlcAAEAWkbQFACAPKFiwoG127Z9//mlbnrqm69KlS9WzZ0899NBDCg0NtW13r/qU+/bt08svv6yIiAiFh4erfv36euyxx/T222/r9OnTabY/e/asJk+erH/961+qWbOmateurW7dumn+/Pm6detWlsY1Y8YMJScnq0qVKnrqqafuuX29evUyXLd27Vr16tVLderUUa1atfTEE09o48aN6W575MgRTZ06VU888YSaNGmisLAwNWjQQP369dPq1avTfU3q2qLJycn6/PPP1b59e9WoUUMNGjTQ8OHDdfTo0Qz7t3PnTg0cOFB169a1vWfLly+XdPfavElJSZo1a5Z69OihunXrKjw8XG3atNE777yjCxcuZBjvXqxJrY4dO6pRo0YqV66cEhMTM7wo8Oeffyo0NFQREREZtmmty2w99qzv2fbt2yVJffv2tavZfOcxefToUY0dO1bNmzdXWFiY6tevr6eeeirDfWKV1WN4z549eu6559S4cWOFhYXpkUce0dChQ7Vly5Z02099Dh0+fFgjR45U48aNVbVqVVut0NQlIHbu3KmhQ4fq4YcfVpUqVezG6cj9+fPPP2vixInq1KmTGjRooLCwMDVt2lQjR47Unj170mwfERGhsWPHSpKio6Pt9kXqmrn3+sxYtWqVnnrqKdWvX19hYWFq3ry5xo4dq+PHj6e7ferjYuvWrRowYIDq1aunGjVqqEuXLrbzIDuuXbtmOz4ef/xxdevWTZL03//+Vzdu3Ej3NalLRFy7dk3vvfeeWrVqpbCwMDVq1Egvv/yyzp49m+5rv/vuO73yyivq0KGD6tWrp/DwcNv7euzYsUz3e+rUqQoNDdXrr7+e4TZ79uxRaGiomjRpYvfZ+vPPP2vo0KFq2LChqlevrnr16ql169YaNWqUduzYYdfG3Wrafvvtt+rXr58aNGig6tWrq0GDBmrXrp1effVVp5VKAQAAmUfSFgCAPOLKlSuSJD8/vzTrJk6cqFdffVXe3t569NFHVbNmTZlMpnu2OWPGDHXv3l3Lly+Xr6+vWrRooTp16ujWrVuaNWtWmhqkO3bsUMeOHTVnzhzduHFDDRs2VJ06dXTq1ClNnDhRQ4YMsdVIvReLxaL169dLkjp37pyp/mZk6tSptnq3zZo1U7ly5bR7924NGTIk3STk7Nmz9dFHH+nixYsKCQlRq1atVL58eW3btk3PP/+8Jk+enGGs5ORkPf300/r4449VunRpPfroo8qfP7++//57PfHEE3ZJdatVq1apT58+2rx5s0qXLq2IiAjlz59fY8eO1bvvvpthrLNnz6p79+56++239ccffyg8PFzNmjVTcnKyZs6cqW7duqWblLyXK1euaM2aNZKkbt26yWQy2UpyOLIeaPHixdWlSxcVL15cktS4cWNbveYuXbqobNmytm03bNigLl26aNmyZcqXL59at26tqlWraseOHXr++ec1bty4dGNk9RhevHixevbsqTVr1qhEiRJq06aNypUrpx9//FEDBgzQ9OnTMxzP7t271a1bN+3Zs0d169ZVs2bNVLBgQbtt1qxZoz59+ujUqVNq2LChGjVqZDtnHb0/x48fr0WLFsnLy0t16tRR8+bNVbhwYX377bfq1auX1q5da7d9mzZtVKdOHUlS2bJl7fZFkyZN7hnPYrHo5Zdf1gsvvKCdO3eqatWqat26tfz8/LRs2TJ16dJFP/30U4avX7p0qfr166fExEQ1adJEVatW1YEDB/Tyyy9rzpw5mR53aqtXr9bVq1dtD26sW7euHnzwQV26dEnffffdXV97+fJlPfHEE1q4cKEqVqyopk2bymKxaPny5erVq5cuX76c5jUjR47UqlWrlC9fPj388MNq3LixvLy8tGzZMnXr1k2//PJLpvrdq1cv+fr6KiYmRpcuXUp3mwULFkiSevbsKR+f21XtoqOjNWDAAG3YsEH333+/Wrdurbp166pQoUJavXr1PcdsNX36dI0cOVI7duxQ5cqV1bZtW9WsWVPe3t5asmSJtm7dmql2AACAE1kAAIAhNG/e3BISEmJZunRpmnUHDx60VKlSxRISEmJZsmSJbXlISIglJCTEUqdOHcvu3bvTbffll19Ot90ffvjBEhISYgkPD7esWrUqzet+//13y5EjR2w/nzt3zlK/fn1LaGioZcGCBRaz2Wxbl5CQYOnbt68lJCTEMm3atEyN9+TJk7b+79ixI1OvuZP19XXr1rX8+uuvduumTp1qCQkJsbRu3TrN67Zt22Y5efJkmuVHjx61NG3a1BISEmL53//+Z7du69attnidO3e2nDt3zrYuKSnJMmDAAEtISIjltddes3vdmTNnLLVq1bKEhIRY5s6da7du+/bttnUhISF261JSUixPPPGEJSQkxDJu3DjL5cuXbeuSk5Mtb731liUkJMTSp0+fe7xLaS1atMgSEhJi6dSpk10/q1ataqlSpYrlzz//TPOaU6dOWUJCQizNmzfPsF3rMXzq1Cm75ZGRkZaQkBDL1q1b033d+fPnLQ899JAlJCTE8vHHH1tSUlJs6/bs2WOpV6+eJSQkxLJo0SK712X1GD506JClWrVqltDQUEt0dLTdths2bLBUr17dEhISYtm8ebPdOus5FBISYnn33Xftjv07xxgSEmKZP39+mvXZ3Z/W4y4yMjJNm99//70lMTEx3eXVqlWz1K9f33L9+nW7dUuXLrWEhIRYXn755TSvu3O8d35mfPXVV5aQkBBLgwYNLAcOHLAbm/V8q1u3riU+Pt7uddbjonr16pb169e
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"text/plain": [
|
2025-07-10 15:07:31 +08:00
|
|
|
|
"<Figure size 1400x1200 with 2 Axes>"
|
|
|
|
|
|
]
|
2025-06-22 23:03:50 +08:00
|
|
|
|
},
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
"import pandas as pd\n",
|
|
|
|
|
|
"import numpy as np\n",
|
|
|
|
|
|
"import matplotlib.pyplot as plt\n",
|
|
|
|
|
|
"import seaborn as sns\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"def analyze_price_change_autocorrelation(df: pd.DataFrame,\n",
|
|
|
|
|
|
" price_col: str = 'close',\n",
|
|
|
|
|
|
" max_lags: int = 50,\n",
|
|
|
|
|
|
" plot_specific_lag: int = 1):\n",
|
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
|
" 分析时间序列价格变化的自相关性,并绘制图表。\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" Args:\n",
|
|
|
|
|
|
" df (pd.DataFrame): 包含行情数据的DataFrame,必须有日期索引或排好序。\n",
|
|
|
|
|
|
" price_col (str): 用于计算价格变化的列名,默认为 'close'。\n",
|
|
|
|
|
|
" max_lags (int): 要计算的最大滞后期数,默认为 50。\n",
|
|
|
|
|
|
" plot_specific_lag (int): 要单独绘制散点图的特定滞后期,默认为 1。\n",
|
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
|
" # --- 1. 数据准备和计算 ---\n",
|
|
|
|
|
|
" if price_col not in df.columns:\n",
|
|
|
|
|
|
" print(f\"错误: DataFrame中找不到列 '{price_col}'\")\n",
|
|
|
|
|
|
" return\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 创建一个副本以避免修改原始DataFrame\n",
|
|
|
|
|
|
" df_analysis = df.copy()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 计算价格变化百分比\n",
|
2025-07-15 22:45:51 +08:00
|
|
|
|
" df_analysis['pct_change'] = (df_analysis['high'] - df_analysis['low'])\n",
|
2025-06-22 23:03:50 +08:00
|
|
|
|
" # df_analysis['pct_change'] = df[price_col].pct_change().abs()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 移除第一个NaN值\n",
|
|
|
|
|
|
" df_analysis = df_analysis.dropna(subset=['pct_change'])\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" if df_analysis.empty:\n",
|
|
|
|
|
|
" print(\"错误: 计算'pct_change'后DataFrame为空,无法进行分析。\")\n",
|
|
|
|
|
|
" return\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" print(f\"已计算 'pct_change',共 {len(df_analysis)} 条有效数据。\")\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # --- 2. 计算自相关性 ---\n",
|
|
|
|
|
|
" lags = range(1, max_lags + 1)\n",
|
|
|
|
|
|
" try:\n",
|
|
|
|
|
|
" autocorrs = [df_analysis['pct_change'].autocorr(lag=n) for n in lags]\n",
|
|
|
|
|
|
" except Exception as e:\n",
|
|
|
|
|
|
" print(f\"计算自相关性时出错: {e}\")\n",
|
|
|
|
|
|
" return\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" autocorr_df = pd.DataFrame({'Lag': lags, 'Autocorrelation': autocorrs})\n",
|
|
|
|
|
|
" print(\"\\n自相关性计算结果 (前5期):\")\n",
|
|
|
|
|
|
" print(autocorr_df.head())\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # --- 3. 可视化 ---\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # a) 绘制自相关图 (ACF Plot)\n",
|
|
|
|
|
|
" plt.style.use('seaborn-v0_8-whitegrid') # 使用一个好看的样式\n",
|
|
|
|
|
|
" fig, axes = plt.subplots(2, 1, figsize=(14, 12)) # 创建一个包含两个子图的画布\n",
|
|
|
|
|
|
" fig.suptitle('Price Change Autocorrelation Analysis', fontsize=16)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" ax1 = axes[0]\n",
|
|
|
|
|
|
" ax1.stem(autocorr_df['Lag'], autocorr_df['Autocorrelation'])\n",
|
|
|
|
|
|
" ax1.set_title(f'Autocorrelation of Daily Price Changes (Lags 1-{max_lags})')\n",
|
|
|
|
|
|
" ax1.set_xlabel('Lag (Number of Previous K-lines)')\n",
|
|
|
|
|
|
" ax1.set_ylabel('Correlation Coefficient')\n",
|
|
|
|
|
|
" # ax1.axhline(y=0, color='grey', linestyle='--')\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 添加置信区间\n",
|
|
|
|
|
|
" conf_interval = 1.96 / np.sqrt(len(df_analysis))\n",
|
|
|
|
|
|
" # ax1.axhline(y=conf_interval, color='red', linestyle='--', label='95% Confidence Interval')\n",
|
|
|
|
|
|
" # ax1.axhline(y=-conf_interval, color='red', linestyle='--')\n",
|
|
|
|
|
|
" ax1.legend()\n",
|
|
|
|
|
|
" ax1.grid(True)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # b) 绘制特定滞后期的散点图\n",
|
|
|
|
|
|
" ax2 = axes[1]\n",
|
|
|
|
|
|
" if plot_specific_lag is not None and 1 <= plot_specific_lag <= max_lags:\n",
|
|
|
|
|
|
" lag_col_name = f'pct_change_lag{plot_specific_lag}'\n",
|
|
|
|
|
|
" df_analysis[lag_col_name] = df_analysis['pct_change'].shift(plot_specific_lag)\n",
|
|
|
|
|
|
" df_scatter = df_analysis.dropna()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" sns.regplot(x=lag_col_name, y='pct_change', data=df_scatter, ax=ax2,\n",
|
|
|
|
|
|
" scatter_kws={'alpha': 0.5, 's': 20},\n",
|
|
|
|
|
|
" line_kws={'color': 'red', 'linestyle': '--'})\n",
|
|
|
|
|
|
" ax2.set_title(f'Current vs. Lag-{plot_specific_lag} Price Change')\n",
|
|
|
|
|
|
" ax2.set_xlabel(f'Previous K-line\\'s pct_change (t-{plot_specific_lag})')\n",
|
|
|
|
|
|
" ax2.set_ylabel('Current K-line\\'s pct_change (t)')\n",
|
|
|
|
|
|
" ax2.grid(True)\n",
|
|
|
|
|
|
" ax2.axhline(0, color='grey', lw=0.5)\n",
|
|
|
|
|
|
" ax2.axvline(0, color='grey', lw=0.5)\n",
|
|
|
|
|
|
" else:\n",
|
|
|
|
|
|
" ax2.text(0.5, 0.5, 'No specific lag plot requested or lag is out of range.',\n",
|
|
|
|
|
|
" ha='center', va='center', transform=ax2.transAxes)\n",
|
|
|
|
|
|
" ax2.set_axis_off()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" plt.tight_layout() # 调整布局以适应主标题\n",
|
|
|
|
|
|
" plt.show()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"if df_raw is not None and not df_raw.empty:\n",
|
|
|
|
|
|
" processed_data = calculate_stationary_indicators(df_raw, volume_window=10, price_lag=5)\n",
|
|
|
|
|
|
" analyzed_df = analyze_price_change_autocorrelation(processed_data, plot_specific_lag=50)\n",
|
|
|
|
|
|
"else:\n",
|
|
|
|
|
|
" print(\"Analysis cannot proceed. Please check if data loading was successful.\")\n"
|
2025-07-10 15:07:31 +08:00
|
|
|
|
]
|
2025-06-22 23:03:50 +08:00
|
|
|
|
}
|
|
|
|
|
|
],
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
"kernelspec": {
|
|
|
|
|
|
"display_name": "quant",
|
|
|
|
|
|
"language": "python",
|
|
|
|
|
|
"name": "python3"
|
|
|
|
|
|
},
|
|
|
|
|
|
"language_info": {
|
|
|
|
|
|
"codemirror_mode": {
|
|
|
|
|
|
"name": "ipython",
|
|
|
|
|
|
"version": 3
|
|
|
|
|
|
},
|
|
|
|
|
|
"file_extension": ".py",
|
|
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
|
|
"name": "python",
|
|
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
|
|
"version": "3.12.11"
|
|
|
|
|
|
}
|
|
|
|
|
|
},
|
|
|
|
|
|
"nbformat": 4,
|
|
|
|
|
|
"nbformat_minor": 5
|
|
|
|
|
|
}
|