2025-06-22 23:03:50 +08:00
|
|
|
|
{
|
|
|
|
|
|
"cells": [
|
|
|
|
|
|
{
|
2025-07-10 15:07:31 +08:00
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
"id": "b93c7ca1",
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"end_time": "2025-07-28T11:47:18.240669Z",
|
|
|
|
|
|
"start_time": "2025-07-28T11:47:18.238159Z"
|
2025-06-22 23:03:50 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
"import pandas as pd\n",
|
|
|
|
|
|
"import numpy as np\n",
|
|
|
|
|
|
"import matplotlib.pyplot as plt\n",
|
|
|
|
|
|
"import seaborn as sns\n",
|
|
|
|
|
|
"import talib as ta # Make sure TA-Lib is installed: pip install TA-Lib\n",
|
|
|
|
|
|
"import statsmodels.api as sm\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"import warnings\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# 忽略所有警告\n",
|
|
|
|
|
|
"warnings.filterwarnings(\"ignore\")\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# --- 0. Configure your file path ---\n",
|
|
|
|
|
|
"# Please replace 'your_futures_data.csv' with the actual path to your CSV file\n",
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"file_path = '/mnt/d/PyProject/NewQuant/data/data/KQ_m@SHFE_rb/KQ_m@SHFE_rb_min15.csv'\n"
|
2025-07-28 14:36:58 +08:00
|
|
|
|
],
|
|
|
|
|
|
"outputs": [],
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"execution_count": 15
|
2025-06-22 23:03:50 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
"id": "60a48bac",
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"end_time": "2025-07-28T11:47:18.260496Z",
|
|
|
|
|
|
"start_time": "2025-07-28T11:47:18.252924Z"
|
2025-06-22 23:03:50 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# --- 1. Data Loading and Preprocessing ---\n",
|
|
|
|
|
|
"def load_and_preprocess_data(file_path):\n",
|
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
|
" Loads historical futures data and performs basic preprocessing.\n",
|
|
|
|
|
|
" Assumes data contains 'datetime', 'open', 'high', 'low', 'close', 'volume' columns.\n",
|
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
|
" try:\n",
|
|
|
|
|
|
" df = pd.read_csv(file_path, parse_dates=['datetime'], index_col='datetime')\n",
|
|
|
|
|
|
" # Ensure data is sorted by time\n",
|
|
|
|
|
|
" df = df.sort_index()\n",
|
|
|
|
|
|
" # Check and handle missing values\n",
|
|
|
|
|
|
" initial_rows = len(df)\n",
|
|
|
|
|
|
" df.dropna(inplace=True)\n",
|
|
|
|
|
|
" if len(df) < initial_rows:\n",
|
|
|
|
|
|
" print(f\"Warning: Missing values found in data, deleted {initial_rows - len(df)} rows.\")\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # Check if necessary columns exist\n",
|
|
|
|
|
|
" required_columns = ['open', 'high', 'low', 'close', 'volume']\n",
|
|
|
|
|
|
" if not all(col in df.columns for col in required_columns):\n",
|
|
|
|
|
|
" raise ValueError(f\"CSV file is missing required columns. Please ensure it contains: {required_columns}\")\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" print(f\"Successfully loaded {len(df)} rows of data.\")\n",
|
|
|
|
|
|
" print(\"First 5 rows of data:\")\n",
|
|
|
|
|
|
" print(df.head())\n",
|
|
|
|
|
|
" return df\n",
|
|
|
|
|
|
" except FileNotFoundError:\n",
|
|
|
|
|
|
" print(f\"Error: File '{file_path}' not found. Please check the path.\")\n",
|
|
|
|
|
|
" return None\n",
|
|
|
|
|
|
" except Exception as e:\n",
|
|
|
|
|
|
" print(f\"Error during data loading or preprocessing: {e}\")\n",
|
|
|
|
|
|
" return None\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# --- 2. Stationary Indicator Calculation Function ---\n",
|
|
|
|
|
|
"def calculate_stationary_indicators(df, volume_window=10, price_lag=5):\n",
|
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
|
" Calculates stationary indicators based on volume and price.\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" Parameters:\n",
|
|
|
|
|
|
" df (pd.DataFrame): K-line data containing 'close' and 'volume' columns.\n",
|
|
|
|
|
|
" volume_window (int): Window size for calculating volume indicators (e.g., 10 for the past 10 periods' average volume).\n",
|
|
|
|
|
|
" price_lag (int): Lag period for calculating future returns (e.g., 5 for future 5 periods' returns).\n",
|
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
|
" df_processed = df.copy()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # --- Stationary Volume Indicators ---\n",
|
|
|
|
|
|
" # 1. Volume Rate of Change (VROC)\n",
|
|
|
|
|
|
" df_processed['volume_roc'] = df_processed['volume'].pct_change(volume_window) * 100\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 2. Volume to Moving Average Ratio\n",
|
|
|
|
|
|
" df_processed['volume_ma_ratio'] = df_processed['volume'] / df_processed['volume'].rolling(window=volume_window).mean()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 3. Normalized Volume (Z-score standardization)\n",
|
|
|
|
|
|
" # Using rolling mean and rolling standard deviation to avoid look-ahead bias and ensure local stationarity\n",
|
|
|
|
|
|
" rolling_mean_vol = df_processed['volume'].rolling(window=volume_window).mean()\n",
|
|
|
|
|
|
" rolling_std_vol = df_processed['volume'].rolling(window=volume_window).std()\n",
|
|
|
|
|
|
" # Avoid division by zero\n",
|
|
|
|
|
|
" df_processed['volume_normalized_zscore'] = (df_processed['volume'] - rolling_mean_vol) / rolling_std_vol.replace(0, np.nan)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # --- Stationary Price Indicators ---\n",
|
|
|
|
|
|
" # 1. Current Period Log Return\n",
|
|
|
|
|
|
" df_processed['log_return'] = np.log(df_processed['close'] / df_processed['close'].shift(1))\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 2. Future N-period Log Return (Our target variable for research)\n",
|
|
|
|
|
|
" # shift(-price_lag) moves future data up to align with the current row for future return calculation\n",
|
|
|
|
|
|
" df_processed['future_log_return'] = np.log(df_processed['close'].shift(-price_lag) / df_processed['close'])\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 3. MACD Histogram Difference (Measures momentum change rate, potentially capturing trend initiation)\n",
|
|
|
|
|
|
" try:\n",
|
|
|
|
|
|
" macd, macdsignal, macdhist = ta.MACD(df_processed['close'], fastperiod=12, slowperiod=26, signalperiod=9)\n",
|
|
|
|
|
|
" df_processed['macd_hist_diff'] = macdhist.diff(1)\n",
|
|
|
|
|
|
" except Exception as e:\n",
|
|
|
|
|
|
" print(f\"TA-Lib MACD calculation failed, possibly due to installation or data issues: {e}. 'macd_hist_diff' will contain NaN.\")\n",
|
|
|
|
|
|
" df_processed['macd_hist_diff'] = np.nan\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # Drop rows with NaN values resulting from rolling windows and shift operations\n",
|
|
|
|
|
|
" df_processed.dropna(inplace=True)\n",
|
|
|
|
|
|
" if df_processed.empty:\n",
|
|
|
|
|
|
" print(\"Warning: Data is empty after indicator calculation. Check original data volume or adjust window parameters.\")\n",
|
|
|
|
|
|
" else:\n",
|
|
|
|
|
|
" print(f\"Indicators calculated. {len(df_processed)} rows of data remaining for analysis.\")\n",
|
|
|
|
|
|
" return df_processed\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# --- 3. Analysis and Visualization Function ---\n",
|
|
|
|
|
|
"def analyze_and_visualize(processed_df):\n",
|
|
|
|
|
|
" if processed_df.empty:\n",
|
|
|
|
|
|
" print(\"No data available for analysis. Please check data loading and indicator calculation steps.\")\n",
|
|
|
|
|
|
" return\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" print(\"\\n--- Statistical Description of Indicators ---\")\n",
|
|
|
|
|
|
" print(processed_df[['volume_roc', 'volume_ma_ratio', 'volume_normalized_zscore',\n",
|
|
|
|
|
|
" 'log_return', 'future_log_return']].describe())\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # --- Correlation Analysis ---\n",
|
|
|
|
|
|
" print(\"\\n--- Correlation between Volume Indicators and Future Returns ---\")\n",
|
|
|
|
|
|
" volume_indicators = ['volume_roc', 'volume_ma_ratio', 'volume_normalized_zscore']\n",
|
|
|
|
|
|
" for indicator in volume_indicators:\n",
|
|
|
|
|
|
" if indicator in processed_df.columns and 'future_log_return' in processed_df.columns:\n",
|
|
|
|
|
|
" correlation = processed_df[indicator].corr(processed_df['future_log_return'])\n",
|
|
|
|
|
|
" print(f\"Correlation between '{indicator}' and 'future_log_return': {correlation:.4f}\")\n",
|
|
|
|
|
|
" else:\n",
|
|
|
|
|
|
" print(f\"Column '{indicator}' or 'future_log_return' does not exist. Skipping correlation calculation.\")\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # Plot correlation heatmap\n",
|
|
|
|
|
|
" plt.figure(figsize=(9, 7))\n",
|
|
|
|
|
|
" sns.heatmap(processed_df[volume_indicators + ['future_log_return']].corr(), annot=True, cmap='coolwarm', fmt=\".2f\")\n",
|
|
|
|
|
|
" plt.title('Correlation Matrix: Volume Indicators vs. Future Log Returns', fontsize=16)\n",
|
|
|
|
|
|
" plt.show()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # --- Conditional Analysis: Future Returns based on Volume Anomaly ---\n",
|
|
|
|
|
|
" # Define thresholds for abnormal volume (using quantiles of Z-score to adapt dynamically)\n",
|
|
|
|
|
|
" if 'volume_normalized_zscore' in processed_df.columns:\n",
|
|
|
|
|
|
" low_vol_threshold = processed_df['volume_normalized_zscore'].quantile(0.2)\n",
|
|
|
|
|
|
" high_vol_threshold = processed_df['volume_normalized_zscore'].quantile(0.8)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" def categorize_volume(zscore):\n",
|
|
|
|
|
|
" if zscore <= low_vol_threshold:\n",
|
|
|
|
|
|
" return 'Low Volume'\n",
|
|
|
|
|
|
" elif zscore >= high_vol_threshold:\n",
|
|
|
|
|
|
" return 'High Volume'\n",
|
|
|
|
|
|
" else:\n",
|
|
|
|
|
|
" return 'Normal Volume'\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" processed_df['volume_category'] = processed_df['volume_normalized_zscore'].apply(categorize_volume)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" print(\"\\n--- Statistics of Future Log Returns by Volume Category ---\")\n",
|
|
|
|
|
|
" print(processed_df.groupby('volume_category')['future_log_return'].describe())\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # Plot box plot of future returns by volume category\n",
|
|
|
|
|
|
" plt.figure(figsize=(10, 6))\n",
|
|
|
|
|
|
" sns.boxplot(x='volume_category', y='future_log_return', data=processed_df, order=['Low Volume', 'Normal Volume', 'High Volume'], palette='viridis')\n",
|
|
|
|
|
|
" plt.title('Distribution of Future Log Returns by Volume Category', fontsize=16)\n",
|
|
|
|
|
|
" plt.xlabel('Volume Category', fontsize=12)\n",
|
|
|
|
|
|
" plt.ylabel('Future Log Return', fontsize=12)\n",
|
|
|
|
|
|
" plt.grid(True, linestyle='--', alpha=0.7)\n",
|
|
|
|
|
|
" plt.show()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # Plot histogram of future returns, categorized by volume\n",
|
|
|
|
|
|
" plt.figure(figsize=(12, 7))\n",
|
|
|
|
|
|
" sns.histplot(data=processed_df, x='future_log_return', hue='volume_category', kde=True, bins=70,\n",
|
|
|
|
|
|
" palette={'Low Volume': 'red', 'Normal Volume': 'blue', 'High Volume': 'green'},\n",
|
|
|
|
|
|
" alpha=0.6, line_kws={'linewidth':2})\n",
|
|
|
|
|
|
" plt.title('Distribution of Future Log Returns by Volume Category', fontsize=16)\n",
|
|
|
|
|
|
" plt.xlabel('Future Log Return', fontsize=12)\n",
|
|
|
|
|
|
" plt.ylabel('Frequency', fontsize=12)\n",
|
|
|
|
|
|
" plt.grid(True, linestyle='--', alpha=0.7)\n",
|
|
|
|
|
|
" plt.show()\n",
|
|
|
|
|
|
" else:\n",
|
|
|
|
|
|
" print(\"Column 'volume_normalized_zscore' not found. Skipping volume category analysis.\")\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # --- Price Chart with Indicator Overlay (Simplified to line plot; consider mplfinance for OHLC charts) ---\n",
|
|
|
|
|
|
" print(\"\\n--- Price Chart with Volume Indicator Overlay ---\")\n",
|
|
|
|
|
|
" # Select a segment of data for visualization, ensuring sufficient data points\n",
|
|
|
|
|
|
" if len(processed_df) > 100: # Need at least 100 data points to select a segment\n",
|
|
|
|
|
|
" sample_size = min(200, len(processed_df) // 2) # Show max 200 data points or half of data\n",
|
|
|
|
|
|
" plot_df = processed_df.sample(n=sample_size, random_state=42).sort_index() # Randomly sample and sort to maintain time continuity\n",
|
|
|
|
|
|
" else:\n",
|
|
|
|
|
|
" plot_df = processed_df.copy() # If data volume is small, plot all\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" if not plot_df.empty:\n",
|
|
|
|
|
|
" fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(15, 12), sharex=True, gridspec_kw={'height_ratios': [3, 1, 1]})\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # Subplot 1: Price Trend\n",
|
|
|
|
|
|
" ax1.plot(plot_df.index, plot_df['close'], label='Close Price', color='blue', linewidth=1.5)\n",
|
|
|
|
|
|
" ax1.set_title(f'Futures Price Trend, Normalized Volume, and Future Returns (Sample Period: {plot_df.index.min().strftime(\"%Y-%m-%d %H:%M\")} to {plot_df.index.max().strftime(\"%Y-%m-%d %H:%M\")})', fontsize=16)\n",
|
|
|
|
|
|
" ax1.set_ylabel('Price', fontsize=12)\n",
|
|
|
|
|
|
" ax1.grid(True, linestyle='--', alpha=0.6)\n",
|
|
|
|
|
|
" ax1.legend()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # Subplot 2: Normalized Volume Indicator\n",
|
|
|
|
|
|
" ax2.bar(plot_df.index, plot_df['volume_normalized_zscore'], color='grey', alpha=0.7, label='Normalized Volume (Z-score)')\n",
|
|
|
|
|
|
" if 'volume_normalized_zscore' in processed_df.columns:\n",
|
|
|
|
|
|
" ax2.axhline(high_vol_threshold, color='green', linestyle='--', linewidth=0.8, label=f'High Vol Threshold ({high_vol_threshold:.2f})')\n",
|
|
|
|
|
|
" ax2.axhline(low_vol_threshold, color='red', linestyle='--', linewidth=0.8, label=f'Low Vol Threshold ({low_vol_threshold:.2f})')\n",
|
|
|
|
|
|
" ax2.set_ylabel('Normalized Volume', fontsize=12)\n",
|
|
|
|
|
|
" ax2.grid(True, linestyle='--', alpha=0.6)\n",
|
|
|
|
|
|
" ax2.legend()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # Subplot 3: Future Log Return\n",
|
|
|
|
|
|
" ax3.plot(plot_df.index, plot_df['future_log_return'], label='Future Log Return', color='purple', linewidth=1.5)\n",
|
|
|
|
|
|
" ax3.axhline(0, color='black', linestyle='--', linewidth=0.8) # Zero return line\n",
|
|
|
|
|
|
" ax3.set_ylabel('Future Log Return', fontsize=12)\n",
|
|
|
|
|
|
" ax3.set_xlabel('Time', fontsize=12)\n",
|
|
|
|
|
|
" ax3.grid(True, linestyle='--', alpha=0.6)\n",
|
|
|
|
|
|
" ax3.legend()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" plt.tight_layout()\n",
|
|
|
|
|
|
" plt.show()\n",
|
|
|
|
|
|
" else:\n",
|
|
|
|
|
|
" print(\"Selected plot time range has no data. Adjust time range or check data volume.\")\n",
|
|
|
|
|
|
"\n"
|
2025-07-28 14:36:58 +08:00
|
|
|
|
],
|
|
|
|
|
|
"outputs": [],
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"execution_count": 16
|
2025-06-22 23:03:50 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
"id": "9ab3d054",
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"end_time": "2025-07-28T11:47:20.928078Z",
|
|
|
|
|
|
"start_time": "2025-07-28T11:47:18.270022Z"
|
2025-06-22 23:03:50 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
2025-07-28 14:36:58 +08:00
|
|
|
|
"source": [
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"df_raw = load_and_preprocess_data(file_path)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"if df_raw is not None and not df_raw.empty:\n",
|
|
|
|
|
|
" # 您可以在这里调整 volume_window 和 price_lag 参数\n",
|
|
|
|
|
|
" # volume_window: 用于计算成交量移动平均的周期,例如5分钟K线,设置为5表示过去5分钟的平均成交量\n",
|
|
|
|
|
|
" # price_lag: 用于计算未来收益率的周期,例如5分钟K线,设置为5表示未来5分钟的收益率\n",
|
|
|
|
|
|
" processed_data = calculate_stationary_indicators(\n",
|
|
|
|
|
|
" df_raw, volume_window=10, price_lag=5\n",
|
|
|
|
|
|
" )\n",
|
|
|
|
|
|
" analyze_and_visualize(processed_data)\n",
|
|
|
|
|
|
"else:\n",
|
|
|
|
|
|
" print(\"无法进行分析,请检查数据加载是否成功。\")"
|
|
|
|
|
|
],
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"Successfully loaded 25090 rows of data.\n",
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"First 5 rows of data:\n",
|
2025-07-28 14:36:58 +08:00
|
|
|
|
" open high low close volume open_oi \\\n",
|
|
|
|
|
|
"datetime \n",
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"2020-12-31 14:45:00 4352.0 4400.0 4345.0 4388.0 213731.0 1221661.0 \n",
|
|
|
|
|
|
"2021-01-04 09:00:00 4356.0 4368.0 4309.0 4336.0 338332.0 1217327.0 \n",
|
|
|
|
|
|
"2021-01-04 09:15:00 4336.0 4342.0 4307.0 4318.0 144479.0 1197881.0 \n",
|
|
|
|
|
|
"2021-01-04 09:30:00 4318.0 4329.0 4312.0 4317.0 85679.0 1194567.0 \n",
|
|
|
|
|
|
"2021-01-04 09:45:00 4317.0 4338.0 4316.0 4338.0 66461.0 1194592.0 \n",
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"\n",
|
2025-07-28 14:36:58 +08:00
|
|
|
|
" close_oi underlying_symbol \n",
|
|
|
|
|
|
"datetime \n",
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"2020-12-31 14:45:00 1217327.0 SHFE.rb2105 \n",
|
|
|
|
|
|
"2021-01-04 09:00:00 1197881.0 SHFE.rb2105 \n",
|
|
|
|
|
|
"2021-01-04 09:15:00 1194567.0 SHFE.rb2105 \n",
|
|
|
|
|
|
"2021-01-04 09:30:00 1194592.0 SHFE.rb2105 \n",
|
|
|
|
|
|
"2021-01-04 09:45:00 1198035.0 SHFE.rb2105 \n",
|
|
|
|
|
|
"Indicators calculated. 25051 rows of data remaining for analysis.\n",
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"\n",
|
|
|
|
|
|
"--- Statistical Description of Indicators ---\n",
|
2025-07-30 15:11:48 +08:00
|
|
|
|
" volume_roc volume_ma_ratio volume_normalized_zscore log_return \\\n",
|
|
|
|
|
|
"count 2.505100e+04 25051.000000 25051.000000 25051.000000 \n",
|
|
|
|
|
|
"mean inf 1.009302 -0.002007 -0.000012 \n",
|
|
|
|
|
|
"std NaN 0.558621 0.974367 0.003080 \n",
|
|
|
|
|
|
"min -1.000000e+02 0.000000 -2.408660 -0.059487 \n",
|
|
|
|
|
|
"25% -4.074090e+01 0.626202 -0.717890 -0.001388 \n",
|
|
|
|
|
|
"50% 5.627264e-02 0.869196 -0.265236 0.000000 \n",
|
|
|
|
|
|
"75% 6.852800e+01 1.239913 0.520413 0.001386 \n",
|
|
|
|
|
|
"max inf 9.482381 2.845738 0.046782 \n",
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"\n",
|
|
|
|
|
|
" future_log_return \n",
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"count 25051.000000 \n",
|
|
|
|
|
|
"mean -0.000058 \n",
|
|
|
|
|
|
"std 0.006933 \n",
|
|
|
|
|
|
"min -0.060541 \n",
|
|
|
|
|
|
"25% -0.003403 \n",
|
2025-07-28 14:36:58 +08:00
|
|
|
|
"50% 0.000000 \n",
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"75% 0.003397 \n",
|
|
|
|
|
|
"max 0.055575 \n",
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"\n",
|
|
|
|
|
|
"--- Correlation between Volume Indicators and Future Returns ---\n",
|
|
|
|
|
|
"Correlation between 'volume_roc' and 'future_log_return': nan\n",
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"Correlation between 'volume_ma_ratio' and 'future_log_return': -0.0057\n",
|
|
|
|
|
|
"Correlation between 'volume_normalized_zscore' and 'future_log_return': -0.0040\n"
|
2025-06-22 23:03:50 +08:00
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"data": {
|
|
|
|
|
|
"text/plain": [
|
|
|
|
|
|
"<Figure size 900x700 with 2 Axes>"
|
2025-07-28 14:36:58 +08:00
|
|
|
|
],
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1oAAAMHCAYAAAAgsia3AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQAAt7xJREFUeJzs3Xd8zdcfx/F3NknIlIi9mlixiVUqFFW0qFYpNYtS1WVVldpqK23VKFqrNWq1SlE79o4ttYkMK4ms+/sjv1yuJAT3Jsbr+XjcB/l+z/mec773e8e5n3PO18pgMBgEAAAAADAb68yuAAAAAAA8b+hoAQAAAICZ0dECAAAAADOjowUAAAAAZkZHCwAAAADMjI4WAAAAAJgZHS0AAAAAMDM6WgAAAABgZnS0AAAAAMDM6GjBYrZs2aK+ffuqXr16KleunEqWLKnq1aurXbt2+vnnnxUeHp7ZVXxikyZNkp+fnyZNmpRhZQYGBsrPz0/nz5/PsDIfVevWreXn5yc/Pz917dr1gWn//PNPY1o/Pz9dvnw5g2qZPsn1yiytWrWSn5+fRo8ena70Q4YMkZ+fnzp16vTYZT4L15glnD9/3vh8Z1Tb03oPWbx4sfz8/NSnT58MqQcyx73vfWk91q5dm9nVfOokn5ugoKDMrsojufc95t5H6dKlVbduXfXr10/Hjh3L7GrCjOhowezCw8PVrl07tW/fXosXL1ZcXJwCAgJUr149FSpUSHv37tXw4cNVu3Zt7d+/P7Or+1Tp06eP/Pz8tHjx4syuitls3LhR165dS3P/77//bpFyM7uDZC5vvfWWJGnp0qVKSEh4YNrY2FgtX77cJB/wOIKCguTn56fWrVtndlVeCNWrV1eTJk1Sffj4+DzRsem0P53q1atnfI4rVKigiIgILVq0SE2bNtXff/9tljKSO3aBgYFmOR4enW1mVwDPl5s3b6ply5Y6c+aMChUqpMGDB6tChQomaWJjY7VkyRJNmjRJoaGhmVTTZ9fPP/+suLg4eXt7Z3ZVHqpkyZI6dOiQli5dqo4dO6bYf+nSJW3dulX+/v46ePBgJtTw4VatWpWp5devX19DhgxRaGioNm7cqFq1aqWZ9p9//lFkZKTc3d35YH3GvfrqqypdurSyZcuW2VVBBvjggw8UEBCQ2dVABurVq5fy5Mlj/DsiIkJdu3bV3r17NWDAANWoUUNZsmTJxBrCHIhowawGDx6sM2fOKHfu3Jo3b16KTpYk2dvb65133tHSpUtVqFChTKjlsy1fvnwqXLiw7OzsMrsqD9W4cWPZ2dmlGaFbvHixEhMT1axZswyuWfoVLlxYhQsXzrTys2bNqtdff12SHhrpTN6ffN7x7MqWLZsKFy4sLy+vzK4KgAzg5uamXr16SUrqdO3duzeTawRzoKMFszl37pxWrFghSerbt69cXV0fmN7T0zPVjtbKlSv1/vvvq1KlSipZsqRq1aqlvn376syZM6ke5975JGvXrlWbNm1UqVIlk/Hb9w4jW7Rokd555x2VL18+xVyMK1euaPjw4XrttddUunRplS1bVs2aNdMvv/yi+Pj4dJ+LuLg4/fHHH/rss89Uv359lStXTqVKlVK9evU0ZMgQXblyxSR9cnh/yZIlxvN37/jte+dvPGj+THR0tKZOnaomTZqobNmyKl26tF5//XWNGzdO169fT5H+3mEFBoNBCxYsUNOmTVWmTBmVL19e7du3f6I3e1dXVwUGBurUqVMpjmMwGLRkyRJlyZJFDRs2TPMYFy5c0NSpU9WmTRu98sorKlmypCpUqKB3331X8+fPV2Jiokn65Dkvye4fC5983u4dThMZGamhQ4eqTp06KlmypMlwqdSGIM6YMUN+fn6qV6+ebt26laLOCxculJ+fn2rWrGmWuYjJwwDXr1+f5vGuXLmiLVu2mKSXpPj4eM2bN08tWrRQ+fLl5e/vr7p166Z6HT7Mw+ZupTX09d7tp0+fVs+ePVWlShWVKVNGzZo1M5mDsn//fnXp0kWVK1dWqVKl9M4772jbtm1p1ikmJkYzZszQ22+/rQoVKsjf31/16tXTqFGjFBER8Ujte5DkeYdBQUEKDg5W9+7dFRAQoJIlS6pBgwaaMWOGDAZDmnWcNGmS6tata5yr2rt3b128eDHN8h423OvKlSsaOXKkGjVqpLJly6pMmTKqV6+e+vTpoz179pikPXDggEaNGqW33npL1apVU8mSJVW1alV16dJFW7duTbWtbdq0kSTt2LHD5PVzf6T0ca6v9Lwf37x5U+PGjVOjRo1UpkwZ43lr0aKFJkyYoLi4uDTPXbJTp07Jz89PFStW1J07d9JM17Rp0xRzoa5evaohQ4aoXr168vf3V+nSpVWzZk29//77mj59+kPLtoR7r8HUpDbfLzAwUH379pUkLVmyxOS5vPd9zhyv7ePHj6tnz56qXr26ihUrZlKP+Ph4/fbbb2rdurXxsz0wMFBff/21Ll269Njn5FGcOnVKffv2Va1atVSyZElVqlRJ77///gNHLcTHx2vGjBlq2LCh/P39VaVKFfXo0UMnT560yJDMez9rwsLCUq1Pes9jnz59VLt2bUlJn6P3fxbem+5BUxbSamd6PkPvvSbDw8M1aNAg1axZUyVLllTNmjU1ePBg3bhxI9Vy//zzT7Vt21YBAQEqUaKEAgIC1KBBA/Xv319Hjx5Nx9l8OjB0EGazfv16JSQkKHv27I81bMlgMKhPnz5aunSpbG1tVaFCBXl4eOjw4cNavHix/vzzT02cOFE1atRINf/MmTP1yy+/qGTJknr55Zd19epV2djYmKQZPHiw5s6dq7Jly+qVV17RuXPnZGVlJUnauXOnunXrpuvXryt37tyqWrWqYmNjdfDgQQ0ePFjr16/XDz/8kK5IQVhYmHr16mX8VdrPz0/R0dEKDg7WnDlztHLlSs2fP1/58+eXJDk6OqpJkybavXu3zp49q3Llyhn3SVKxYsUeWmZkZKTatm2r4OBgOTs7q3LlyrKzs9OOHTv0ww8/aMWKFZo1a5bJUIV79e3bVytWrFD58uX1yiuvKDg4WFu2bNHOnTv1yy+/qHTp0g+tQ2qaNWum1atXa9GiRSpbtqxx+/bt23Xu3Dk1atTogcOj/vjjD02YMEF58uRRgQIFVK5cOYWGhmrv3r3as2ePtmzZookTJxqfx2LFiqlJkybGTmuTJk1Mjufo6Gjyd0REhJo1a6abN2+qfPnyKlGixEOf4/bt22vnzp1at26dBgwYoLFjxxr3HT16VEOGDJGtra3GjRsnd3d3477z588bP/j++eefNJ+L+5UqVUq+vr46fvy4li1bprZt26ZIs2TJEiUkJKh06dJ66aWXJCUN0+3cubO2bt0qBwcHBQQEyNnZWXv37tWcOXO0YsUKTZ8+XSVKlEhXPZ7UkSNHNHjwYHl7e6tKlSq6ePGi9u7dq+7du2v8+PGytbVVz5499dJLL6lKlSo6ffq09u3bp44dO2rWrFkpIuRXrlxRx44ddfz4cbm6usrf319OTk46cuSIpk+frr/++ktz5sxR7ty5zdaGzZs3a+bMmcqXL5+qVaum0NBQ7d69WyNHjtSlS5f05ZdfmqSPjo5W27ZttW/fPjk6Oqp69epycHDQ5s2btWHDBr3yyiuPXIdt27apR48eunHjhjw8PFSlShXZ2dnpwoULxh+7ypUrZ0w/duxYBQUFqUiRIipRooSyZs2qc+fOaf369Vq/fr369eun999/35j+5Zdflr29vTZv3ixPT0+9/PLLxn1ubm7G/z/p9ZXW+3F0dLRatmyp48ePy93dXZUrV5ajo6NCQ0N15swZTZkyRe3atXvo67Rw4cIqW7as9u7dq7Vr1xojw/c6duyYDh8+LE9PT+NzERoaqmbNmunq1avKlSuXXn75ZTk4OOjq1as6evSoDh8+rA4dOjz8iXoK1KtXT/v27dOePXuUL18+lS9f3rjPnCNK9u7dq6+//lo5cuRQhQoVFBMTIycnJ0nSrVu31LVrV+3YsUOOjo4qWbKk3NzcdPz4cc2fP19//fWXZs6cqeLFi5utPvfbsGG
|
2025-06-22 23:03:50 +08:00
|
|
|
|
},
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"--- Statistics of Future Log Returns by Volume Category ---\n",
|
2025-07-30 15:11:48 +08:00
|
|
|
|
" count mean std min 25% 50% \\\n",
|
|
|
|
|
|
"volume_category \n",
|
|
|
|
|
|
"High Volume 5011.0 -0.000047 0.007066 -0.055483 -0.003457 0.0 \n",
|
|
|
|
|
|
"Low Volume 5011.0 -0.000054 0.006773 -0.060541 -0.003249 0.0 \n",
|
|
|
|
|
|
"Normal Volume 15029.0 -0.000063 0.006941 -0.058407 -0.003438 0.0 \n",
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"\n",
|
|
|
|
|
|
" 75% max \n",
|
|
|
|
|
|
"volume_category \n",
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"High Volume 0.003508 0.041379 \n",
|
|
|
|
|
|
"Low Volume 0.003338 0.050018 \n",
|
|
|
|
|
|
"Normal Volume 0.003374 0.055575 \n"
|
2025-06-22 23:03:50 +08:00
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"data": {
|
|
|
|
|
|
"text/plain": [
|
|
|
|
|
|
"<Figure size 1000x600 with 1 Axes>"
|
2025-07-28 14:36:58 +08:00
|
|
|
|
],
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA2EAAAIkCAYAAACX7iNUAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQAAvKdJREFUeJzs3XlYVGX/P/D3zLAIKgIiCq6BgSgggksqaqiZ2uZeLqDmVvqo9OSCPj2p2ZMYVphmVmQuaW5oZmmLSeYuiqgobriDKKtgIMvM+f3hb86XYQaYGQaGmXm/rstLuM89w+fmzNzM55x7kQiCIICIiIiIiIhqhdTYARAREREREVkSJmFERERERES1iEkYERERERFRLWISRkREREREVIuYhBEREREREdUiJmFERERERES1iEkYERERERFRLWISRkREREREVIuYhBEREREREdUiJmFUJ/Xt2xfe3t7iv3bt2qFTp07o3bs3QkNDsXz5cpw/f77S5wgNDYW3tzdOnjxZS1FXTtmme/fuqZTXtTgBICIiAt7e3ti1a5exQ6kRBw8exJgxYxAYGCi+xrT5/Zd/XWr6t379+ppvgIlR/t5M8fWk6Rz7+/ujb9++eOedd3D69Gljh2hWVq1aBW9vb6xatcpoMYwdOxbe3t5YsWKFVvU//PBDeHt7Y8qUKXr/zIr+PliSkpISxMbGYvr06Xj++efh7++Pjh07ol+/fpg1axZ++uknFBcXGztMIoOxMnYARJUJDAxE69atAQBPnjxBTk4OkpOTcerUKaxbtw5du3bFRx99hJYtW9ZYDH379kVqair+/PNPtGjRosZ+Tm3ZtWsXFixYgKFDhyIyMtLY4dS65ORkzJo1CwqFAs899xyaNGkCiUQCFxcXrZ+j7OuyvLZt21YrvpMnTyIsLAxdu3bFpk2bqvVcZDjBwcFo0qQJACAnJwdJSUnYt28f9u/fjwULFmD8+PEG+Tne3t4AgCtXrhjk+Uh3I0aMwOnTp/Hjjz/inXfegUwmq7BucXEx9u7dKz6O9HPx4kXMmjUL9+7dg0QiQbt27eDv7w+JRILU1FQcOHAAv/32G6Kjo/HLL7/Azs6uWj+P7zOqC5iEUZ02cuRIDBs2TKVMEAT8/fff+Oijj3Dq1Cm88cYb2Lp1q1oitnz5chQWFsLd3b02Q67Q+vXrUVJSgqZNmxo7lCr9+9//xpQpU+Dq6mrsUAzuwIEDKCkpwVtvvYV33nlHr+fQ9Lok8zZ16lR069ZN/L6wsBDz5s3D77//jqioKAwcONAk3ttUtYEDB+LDDz9ERkYG/v77b4SEhFRY988//0Rubi6cnZ3Rt2/fWozSfFy8eBFjx45FYWEhQkJC8J///Eft73l2djbWr1+PdevWoaSkpNpJGFFdwOGIZHIkEgn69OmDHTt2oE2bNsjMzMR7772nVs/d3R2enp51prNu1aoVPD09YW1tbexQquTq6gpPT080bNjQ2KEYXFpaGgBUeCeLSBt2dnZiv1NSUoLDhw8bOSIyFDs7O7z00ksAUOUQWuXxV1991ST69rqmpKQEs2fPRmFhIfr37481a9ZoHNni7OyMf//739iyZQtsbGyMECmR4TEJI5Pl4OCAhQsXAgBOnDiBpKQkleMVzbUqLi5GTEwMhg0bhk6dOsHX1xc9e/bE8OHD8fHHHyM3NxfA0z+u3t7eSE1NBQD069dPZV6I8nlPnjwJb29vhIaGorCwECtXrsSgQYPQsWNHlSuj2oz5P3XqFN5880107doVHTt2xIgRI/Djjz9qrFvVXDJNcyv69u2LBQsWAAB2796t0p7Q0FCxXlVzwn755ReMHz8eXbt2ha+vL0JCQrBgwQLcvHlTY/2ybT9x4gTefPNNdOnSBf7+/hg6dGiFbaxKaWkpfvjhB7zxxhsICgqCn58fBgwYgA8//BAPHjzQ+PtQtmnBggUa224oVf0Ola+viIgIsSw0NBRhYWEAnr4Wyp6fsq8lfc59+fK0tDQsXLgQffr0QYcOHVTiAIBff/0VkyZNwnPPPQdfX1/06tULc+bMwfXr1/X6fegqPT0dS5cuxYABA+Dn54egoCDxrrdcLtf4GEEQsHPnTgwbNgwdO3ZEt27dMHnyZCQkJKi8Tw2ladOmcHR0BABkZWVprKPt71F5bpTKz0NT9htVzZmqqJ3a9FNlX5MFBQX45JNP8MILL4h95Pz589XeV0rHjh3DW2+9hR49eqBDhw7o0qULBgwYgDlz5iA+Pr7qX6YGqampmDdvHoKDg+Hn54cXX3wRq1atwpMnT1Tqff755/D29sb7779f4XOdP38e3t7e6NWrF0pLS6v82cqhhXFxccjOztZY58GDBzh69KhKfUC3fqkqVf3dqKifKVt+48YNhIeHo3v37ggICMDw4cNx4MABse65c+fw1ltv4bnnnoO/vz9ef/11HD9+vMKYnjx5gnXr1mHUqFHo3LmzeG4+/vhj5OTk6NS+n3/+GXfv3oW1tTUWL14MqbTyj6X+/v6oV6+e+H1qaiq+/vprhIWF4fnnn4evry86d+6M0aNHY+vWrVAoFCqP1/Z9pnTz5k28//776N+/v9gPjR07Fnv27KkwxpycHHz44YdiPCEhIfjf//6HvLy8Sv8u6PO6UcYNALGxsXj99dcRFBQEb29v3L17V/zccvbs2QrjXbx4Mby9vfHxxx9XWIdqBocjkknr3bs3HB0dkZubi2PHjsHX17fS+gqFAlOnTsXx48fRoEEDdO7cGQ4ODsjOzsbt27fx7bff4pVXXoGjoyNatWqFoUOH4rfffkNBQQFefPFF2Nvbi89Vfg5RUVERQkNDkZKSgs6dO6Ndu3ZiQqeNP/74A5s3b4aHhweCg4Px8OFDnDlzBvPnz8fly5fVPiTr48UXX0RiYiISEhLQqlUrBAUFicc8PDyqfLwgCIiIiMCPP/4IKysrdO7cGY0bN8bFixexa9cu7N+/H59//jl69+6t8fGxsbH48ssv0b59e/Tq1QupqalITEzE/PnzkZubiwkTJmjdluLiYkybNg3Hjh2Dra0tunXrhgYNGuDs2bPYtGkTfv75Z3z77bfo0KEDAMDHxwdDhw7FmTNncOfOHZV5Xdq0vTb06tULNjY2OHLkCFxcXNCrVy/xmJOTk8F+zq1btzB06FBYW1sjMDAQgiCIz19aWoo5c+Zg//79sLGxQYcOHdC0aVPcunULe/fuxR9//IFVq1ZVeI4N4fz585gyZQpyc3Ph7u6O/v37Iz8/H6dOncLZs2fxxx9/4Msvv1S7Ir5kyRL88MMPkEql6Ny5M5o0aYKrV69i3LhxBpuzVZZCoUBBQQEAoHHjxirHdP09Kl+fu3fvBgAMHTpU5fnK9j3VoU0/lZ+fjzfeeAP3799HUFAQnn32WSQmJuLHH39EfHw89uzZo3KXfPfu3eLFHX9/f3Tr1g1PnjzBgwcPsG/fPjg5OaFLly46xXnv3j0MGzZM7GeKiopw8uRJrF69GseOHcP69etha2sLABg9ejS+/vpr7N27F3PmzIGDg4Pa823evBkA8Prrr8PKquqPPv7+/vDy8sLVq1fx008/aeybdu/eDblcjo4dO+LZZ58FoHu/VNMuXbqEpUuXomnTpujevTvS0tJw9uxZ/Otf/0J0dDSsrKwQHh6OZ599Ft27d8eNGzeQmJiIyZMnY8OGDejcubPK8z148ACTJ0/G1atX4ejoCD8/P9SvXx+XLl3Ct99+i19//RWbNm1C8+bNtYrvzz//BKA651IXe/bswcqVK9GiRQu0adMGgYGByMjIwNmzZ5GQkICjR4/i888/h0QiAaDb+2z//v2YP38+ioqK4OHhgT59+iA/Px/nz5/HvHnzcOLECSxbtkzl8Q8fPsTYsWNx584dODo6IiQkBAqFAnv27MHhw4fh6empsR3Vfd0sXboUW7ZsQadOnfD888/j7t27kEqlGDduHCIjI/H999+jU6dOao97/Pgx9uzZA6lUijFjxmj/iyfDEIjqoJCQEMHLy0uIjY2tsu6ECRMELy8vYc6cOSrl48aNE7y8vIQTJ06IZadOnRK8vLyEIUO
|
2025-06-22 23:03:50 +08:00
|
|
|
|
},
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"data": {
|
|
|
|
|
|
"text/plain": [
|
|
|
|
|
|
"<Figure size 1200x700 with 1 Axes>"
|
2025-07-28 14:36:58 +08:00
|
|
|
|
],
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA/oAAAJxCAYAAADy5skOAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs3XlYVFUfB/DvHfZVQERFRQFj3HBF0UTNJbXUXMtyy7TU1KzezLXMFpfKylyyMpf0zbIUXEpb7C1tc99SQVNEBERERPZ17vvHNJcZGba5lxlm+H6ex8fhzJ17z5n74zC/OeeeK4iiKIKIiIiIiIiIbILK0hUgIiIiIiIiIuUw0SciIiIiIiKyIUz0iYiIiIiIiGwIE30iIiIiIiIiG8JEn4iIiIiIiMiGMNEnIiIiIiIisiFM9ImIiIiIiIhsCBN9IiIiIiIiIhvCRJ+IiIiIiIjIhjDRJ7Iiffr0gVqtlv61aNECHTp0QM+ePTF+/Hi8/fbbOHv2bLn7GD9+PNRqNY4cOWKmWpdP16aEhASD8ppWTwCYN28e1Go1IiMjLV2VavG///0PY8aMQceOHaUYq8z7f29cGvu3efPm6m+AldG9b9YYT8bOcdu2bdGnTx+8+OKLOH78uKWraFNWr14NtVqN1atXW6wOY8eOhVqtxooVKyq1/VtvvQW1Wo1nnnnG5GOW9fehNiksLMTOnTsxffp0PPDAA2jbti3atWuHvn37YtasWdizZw8KCgosXU0iqoHsLV0BIqq6jh07omnTpgCAvLw83LlzB9HR0Th69Cg2btyILl26YOnSpWjSpEm11aFPnz5ITEzEzz//jMaNG1fbccwlMjIS8+fPx/Dhw7F8+XJLV8fsoqOjMWvWLGg0GnTt2hX16tWDIAjw9fWt9D704/JezZs3l1W/I0eOYMKECejSpQu2bt0qa1+knIiICNSrVw8AcOfOHZw7dw779u3D/v37MX/+fDz55JOKHEetVgMALl68qMj+qOpGjRqF48ePY9euXXjxxRdhZ2dX5rYFBQXYu3ev9Doyzfnz5zFr1iwkJCRAEAS0aNECbdu2hSAISExMxIEDB/DDDz9g5cqV+O677+Di4iLrePw9I7ItTPSJrNCjjz6KESNGGJSJoohDhw5h6dKlOHr0KB5//HF89dVXpZL9t99+G7m5ufD39zdnlcu0efNmFBYWon79+pauSoX+85//4JlnnoGfn5+lq6K4AwcOoLCwENOmTcOLL75o0j6MxSXZtilTpiA8PFz6OTc3F3PmzMGPP/6Id999FwMHDrSK322q2MCBA/HWW2/h1q1bOHToEHr37l3mtj///DPS09Ph4+ODPn36mLGWtuP8+fMYO3YscnNz0bt3byxcuLDU3/O0tDRs3rwZGzduRGFhoexEn4hsC6fuE9kIQRDQq1cvfPPNN2jWrBlSU1PxyiuvlNrO398fwcHBNeYDQUBAAIKDg+Hg4GDpqlTIz88PwcHB8PDwsHRVFJeUlAQAZY7IE1WGi4uL1O8UFhbit99+s3CNSCkuLi4YNGgQAFR4uYnu+UceecQq+vaaprCwEM8//zxyc3PRr18/fPTRR0Zn6Pn4+OA///kPtm3bBkdHRwvUlIhqMib6RDbG09MTCxYsAAAcPnwY586dM3i+rGvfCwoK8Nlnn2HEiBHo0KED2rRpg+7du2PkyJF45513kJ6eDkD7AU6tViMxMREA0LdvX4PrdHX7PXLkCNRqNcaPH4/c3Fx8+OGHeOihh9CuXTuDEZ7KXIN59OhRTJo0CV26dEG7du0watQo7Nq1y+i2FV3bb+xa1z59+mD+/PkAgKioKIP2jB8/Xtquomv0v/vuOzz55JPo0qUL2rRpg969e2P+/Pm4evWq0e3123748GFMmjQJnTt3Rtu2bTF8+PAy21iRoqIifPnll3j88cfRqVMnhIaGon///njrrbdw8+ZNo++Hrk3z58832nalVPQe6uJr3rx5Utn48eMxYcIEANpY0D8/+rFkyrm/tzwpKQkLFixAr1690Lp1a4N6AMD333+PyZMno2vXrmjTpg169OiB2bNn4/Llyya9H1WVnJyMN998E/3790doaCg6deokzd4pLi42+hpRFLFjxw6MGDEC7dq1Q3h4OJ5++mmcPHnS4PdUKfXr14eXlxcA4Pbt20a3qez7qDs3OveuC6DrNyq6hr2sdlamn9KPyZycHLz33nt48MEHpT5y7ty5pX6vdP78809MmzYN999/P1q3bo3OnTujf//+mD17No4dO1bxm2lEYmIi5syZg4iICISGhmLAgAFYvXo18vLyDLZbtWoV1Go1Fi1aVOa+zp49C7VajR49eqCoqKjCY+um4f/yyy9IS0szus3Nmzfxxx9/GGwPVK1fqkhFfzfK6mf0y2NjY/HCCy+gW7duaN++PUaOHIkDBw5I2545cwbTpk1D165d0bZtW4wePRp//fVXmXXKy8vDxo0b8dhjjyEsLEw6N++88w7u3LlTpfZ9++23uH79OhwcHLB48WKoVOV/XG/bti2cnZ2lnxMTE/Hpp59iwoQJeOCBB9CmTRuEhYXhiSeewFdffQWNRmPw+sr+nulcvXoVixYtQr9+/aR+aOzYsdi9e3eZdbxz5w7eeustqT69e/fGkiVLkJGRUe7fBVPiRldvANi5cydGjx6NTp06Qa1W4/r169LnllOnTpVZ38WLF0OtVuOdd94pcxuimo5T94lsUM+ePeHl5YX09HT8+eefaNOmTbnbazQaTJkyBX/99Rfc3d0RFhYGT09PpKWl4dq1a9iwYQOGDBkCLy8vBAQEYPjw4fjhhx+Qk5ODAQMGwNXVVdrXvdd05+fnY/z48bhy5QrCwsLQokUL6UuDyvjpp5/wxRdfICgoCBEREUhJScGJEycwd+5cxMTElErETDFgwACcPn0aJ0+eREBAADp16iQ9FxQUVOHrRVHEvHnzsGvXLtjb2yMsLAx169bF+fPnERkZif3792PVqlXo2bOn0dfv3LkT69atQ6tWrdCjRw8kJibi9OnTmDt3LtLT0zFx4sRKt6WgoABTp07Fn3/+CScnJ4SHh8Pd3R2nTp3C1q1b8e2332LDhg1o3bo1AKBly5YYPnw4Tpw4gfj4eIPr7CvTdnPo0aMHHB0d8fvvv8PX1xc9evSQnvP29lbsOHFxcRg+fDgcHBzQsWNHiKIo7b+oqAizZ8/G/v374ejoiNatW6N+/fqIi4vD3r178dNPP2H16tVlnmMlnD17Fs888wzS09Ph7++Pfv36ITMzE0ePHsWpU6fw008/Yd26daVG9l5//XV8+eWXUKlUCAsLQ7169XDp0iWMGzdOsWvo9Wk0GuTk5AAA6tata/BcVd9HXXxGRUUBAIYPH26wP/2+R47K9FOZmZl4/PHHcePGDXTq1An33XcfTp8+jV27duHYsWPYvXu3wWyfqKgo6QvEtm3bIjw8HHl5ebh58yb27dsHb29vdO7cuUr1TEhIwIgRI6R+Jj8/H0eOHMGaNWvw559/YvPmzXBycgIAPPHEE/j000+xd+9ezJ49G56enqX298UXXwAARo8eDXv7ij8Stm3bFiEhIbh06RL27NljtG+KiopCcXEx2rVrh/vuuw9A1ful6nbhwgW8+eabqF+/Prp164akpCScOnUKM2fOxMqVK2Fvb48XXngB9913H7p164bY2FicPn0aTz/9ND7//HOEhYUZ7O/mzZt4+umncenSJXh5eSE0NBRubm64cOECNmzYgO+//x5bt25Fo0aNKlW/n3/+GYDhGhhVsXv3bnz44Ydo3LgxmjVrho4dO+LWrVs4deoUTp48iT/++AOrVq2CIAgAqvZ7tn//fsydOxf5+fkICgpCr169kJmZibNnz2LOnDk4fPgwli1bZvD6lJQUjB07FvHx8fDy8kLv3r2h0Wiwe/du/PbbbwgODjbaDrlx8+abb2Lbtm3o0KEDHnjgAVy/fh0qlQrjxo3D8uXL8d///hcdOnQo9bqsrCzs3r0bKpUKY8aMqfwbT1TTiERkNXr37i2GhISIO3furHDbiRMniiEhIeLs2bMNyse
|
2025-06-22 23:03:50 +08:00
|
|
|
|
},
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"--- Price Chart with Volume Indicator Overlay ---\n"
|
|
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"data": {
|
|
|
|
|
|
"text/plain": [
|
|
|
|
|
|
"<Figure size 1500x1200 with 3 Axes>"
|
2025-07-28 14:36:58 +08:00
|
|
|
|
],
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABdIAAASlCAYAAACspitqAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs3Xl4TGf/BvB7JpsQpCIhIXaCRC2xNLXVUnRT1Nailra06sVbqqgqtdXWatGir11bVUu1FC211r4TCbVVCEFIiOyZ+f1xfmeSSWaSzJxnZjJz7s91uWbM+pzMnTOT7zzn+2j0er0eRERERERERERERERkktbRAyAiIiIiIiIiIiIiKspYSCciIiIiIiIiIiIiygcL6URERERERERERERE+WAhnYiIiIiIiIiIiIgoHyykExERERERERERERHlg4V0IiIiIiIiIiIiIqJ8sJBORERERERERERERJQPFtKJiIiIiIiIiIiIiPLBQjoRERERERERERERUT7cHT0Acry2bdvi1q1b+d5m3LhxGDBggH0G5IKOHDmCN998M8/lxYsXR3BwMFq1aoWBAwfCz8/P4scOCQkBAFy8eFHxOEUpTKZyq1ChAv766y8bjUi8fv364ejRo1i1ahWaNWum+PHmz5+PBQsWAACee+45LF682OTtNm/ejDFjxqBp06ZYvXq14ud1NvLvkqntL4q/C7KNGzdi3Lhx6Nq1Kz7//PN8b6vT6dC+fXvcunULkyZNwuuvv17g47/33nv466+/8Prrr2PSpElWjbEo//xIIu8nhg0bhv/85z+Fuo+595/cjh07hlKlSikdosvI7307KCgIERERGDBgACpWrOiA0bm25ORkvPDCCyhdujR++eUXaLXG834iIyOxZs0aHD9+HHFxcdBoNChTpgzKlSuHhg0bokWLFmjevLmDRm871vz+K32unLRaLUqWLIkaNWrghRdeQO/eveHh4WHTccjs+f508+ZNtGvXTujn0tjYWOzbtw8HDhxAZGQk7t27Bw8PDwQHB+O5557DgAEDUKZMGbP3T0pKwpIlS7Bjxw7cvn0b3t7eqF+/PgYOHIiIiIg8t09JScHhw4exf/9+HD9+HDExMcjIyICfnx8aNWqEvn37Ijw83ORzXb16FX///TciIyMRGRmJK1euICsrCyNGjMDQoUMV/yzOnz+PJUuW4Pjx43j8+DH8/f3Rpk0bDB061OTfQVlZWfjzzz8N44mMjERCQgLc3Nxw4cIFq8fx+PFjHDhwAPv378fp06dx69Yt6HQ6BAQEoGnTphgwYIAhd7nJfwOYU7ZsWfz9998Wjefhw4fYvXu3YRujoqKQmpqKiIgIrFixwqLHio6ORvfu3ZGRkYFKlSrhzz//tOj+OW3btg0//PADoqOjDY/3yiuvYMCAAfn+/lv6Ohc1Op0Op0+fxv79+3H48GFcvXoVSUlJ8PHxQd26ddG1a1e88sor0Gg0Zh/D0p+Bkv2EuazKXnzxRXz55ZeW/RBysDYHALBz506sX78e586dQ2JiIkqWLInKlSujRYsWGDZsmEXjELE/iImJwYoVK3DgwAHcuXMHbm5uCAgIQMOGDTF06FAEBwcXejyRkZE4fPiwYSz//vsv9Ho9Zs2ahVdffdWibZs1axaWLl0KAML2t0+ePEHnzp1x8+ZNAMDevXtRvnz5PLcrqHZTv359rFu3zuiyx48fo3379qhUqRLWrVuX7++CK2IhnQwaNWqEypUrm7yuRo0aih47v+KX2nTt2hUAoNfrERsbi9OnT+PixYvYtGkTVq1aherVqzt4hMp17NgRDx8+NLosOTkZO3bsMFxfvHhxo+ufeuopu42vqNuzZw+OHTuGJk2aOHoo5ABarRbdunXD/PnzsWHDhgIL6ffv38e+ffsAAN27d7fHEMlJye8/pigtiI0dOxabNm3CjBkz0K1bN0WPVdTkfN++c+cOzpw5g9WrV2PDhg1YtmwZGjZsqPg5+Dkp2+LFi3Hnzh1Mnjw5TxF99erVmD59OnQ6HcqVK4dmzZqhVKlSePjwISIjI3Hq1CkcOXLEJQvpjlC2bFm0bNkSAJCRkYFr167hxIkTOHHiBLZu3Yply5bl+TxHeY0aNQonT56Eu7s76tSpgwYNGiAxMRFnzpzB4sWL8fPPP2PZsmWoU6dOnvvGx8fjjTfewPXr1w2FuPj4eOzbtw/79u3Dxx9/jH79+hndZ8uWLZgwYQIAaaJKREQE3N3dER0djd9//x3btm3DiBEj8N577+V5vh9//BGrVq2yyc9h+/btGDVqFDIzM1GvXj1UrFgR58+fx5o1a7B9+3b88MMPef4WffLkCUaMGCF8LP/73/+waNEiAECVKlXQqlUrZGVlITIyEhs3bsRvv/2GKVOm5Pu+2aJFC/j7++e53MfHx+LxnDhxAuPGjbP4frmlp6djzJgxyMzMVPxY06ZNw6pVq+Du7o5nnnkGxYsXx+HDhzFnzhzs3r0by5YtQ7FixfLcz5rXOT+OeH+MiYkxfP729fVFWFgYSpUqhZiYGBw8eBAHDx7E77//jq+//hqenp557m/Nz0DJfkJmLq9PP/201T8La3OQnp6ODz/8ENu3b0exYsXQoEEDlC1bFvfu3cPly5exevVqiwvpSvcHW7Zswfjx45GWloZatWqhTZs2SE1NRUxMDDZu3IgOHTpYVEhfuHAhdu3aZfV4ZCdPnsTy5cuh0Wig1+sVP55s1qxZFk1uNFWjAWDyZ1KyZEkMHjwYs2bNwi+//JLvvtIVsZBOBj169HC5P36LotyzUa9du4YBAwbgzp07mDhxIr7//nuLHu/3338XOTwhPvroozyX3bx501BIHzNmDGfxmeHt7Y2UlBTMmTMHP/30k6OH41SK4u+Ctbp164aFCxfi3Llz+Oeff1CzZk2zt/3ll1+QmZmJ2rVrIywszI6jJGdT0NEQZFrun9vt27cxYMAAXL9+HRMmTMDWrVsdNDLXExcXh2XLlqFevXp47rnnjK6Ljo42FNHHjRuHfv36wc3NzXC9TqczFHlJjGrVquXJ/19//YVhw4bh1KlT+O6772xS5MzN2d/fy5Urh3HjxuHVV181mjjy4MEDjBgxAkePHsXIkSPx+++/G2UaAD755BNcv34dERER+Pbbb+Ht7Q1Amln43nvvYfr06WjSpAlq165tuI+7uztee+019O3bF3Xr1jVcrtfrsWLFCnz++eeYN28ewsPD0bRpU6Pnq1WrFgYNGoS6deuibt26WLx4MTZv3qz4ZxAXF4exY8ciMzMTn332GXr16gVAmmE6duxY/Prrrxg1ahR+/vlno5mN7u7ueOWVVwzj8fX1tXimpynFixfHwIED0bt3b1SpUsVweUZGBubMmYMVK1bgk08+yXei2eDBg4UclQoAfn5+6NWrF0JDQ1G3bl1ERkbi008/tfhxFi5ciIsXL6Jv375Ys2aN1ePZuXMnVq1aheLFi2PNmjUIDQ0FIGW2f//+OHHiBL766qs8f/NZ+zoXNRqNBs888wzeeustNG/e3Oj38ujRoxgyZAh2796NJUuW5CkGW/szULKfkIn+nGdtDgBp37V9+3a0b98eU6ZMMZpNr9PpcPbsWYvHo2R/cOjQIXz44Yfw8/PDvHnz0LhxY6Prb968afJLkfw0aNAANWvWNIxn/Pjx+R6tYkpKSgrGjRsHf39/1KtXDzt37rTo/ub8/fffWLt2rUX7AktrNH379sX//vc/zJ07Fy+99JLFPz9nxh7pRA5WtWpVwx8hx48fx927dy26f/Xq1V1iFjtJ2rdvj8DAQJw+fVrRoZhq5Eq/C0FBQXj22WcBABs2bMj3ths3bgQAvPbaazYfFxEBgYGBhj+cL1++jJiYGAePyHX88MMPSE9PN3l0zfbt26HT6dCwYUMMGDAgTyFBq9WiSZMmePfdd+01XFVq27YtOnfuDEA63N8enP39fd68eRgwYECeoy/LlCmD2bNnAwCuX7+OU6dOGV1/+fJl7Nq1C25ubpg2bZqhiA4ArVu3RteuXaHT6bBkyRKj+3Xt2hXTp083KqIDUnEwZzsYUwXyHj1
|
2025-06-22 23:03:50 +08:00
|
|
|
|
},
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"execution_count": 17
|
2025-06-22 23:03:50 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
"id": "f13d0294",
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"end_time": "2025-07-28T11:47:20.957896Z",
|
|
|
|
|
|
"start_time": "2025-07-28T11:47:20.952513Z"
|
2025-06-22 23:03:50 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
"import pandas as pd\n",
|
|
|
|
|
|
"import numpy as np\n",
|
|
|
|
|
|
"import matplotlib.pyplot as plt\n",
|
|
|
|
|
|
"import seaborn as sns\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# 无需数据加载和指标计算函数,假设 processed_df 已经传入并包含所需列\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"def analyze_trend_continuation_probability(processed_df, return_threshold=0.0001, num_bins=20):\n",
|
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
|
" Analyzes the probability of trend continuation (direction consistency) as volume change rate (Z-score) varies.\n",
|
|
|
|
|
|
" This version ignores the specific direction (Up/Down) of the trend, focusing only on whether it continues.\n",
|
|
|
|
|
|
" It plots the individual bin probabilities and a bar chart showing the number of data points in each bin.\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" Parameters:\n",
|
|
|
|
|
|
" processed_df (pd.DataFrame): DataFrame with calculated indicators.\n",
|
|
|
|
|
|
" return_threshold (float): Minimum absolute log_return to classify current/future as 'significant' move.\n",
|
|
|
|
|
|
" num_bins (int): Number of bins to divide the volume_normalized_zscore range.\n",
|
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
|
" if processed_df.empty:\n",
|
|
|
|
|
|
" print(\"Processed data is empty. Cannot perform volume-trend analysis.\")\n",
|
|
|
|
|
|
" return\n",
|
|
|
|
|
|
" \n",
|
|
|
|
|
|
" required_cols = ['log_return', 'future_log_return', 'volume_normalized_zscore']\n",
|
|
|
|
|
|
" if not all(col in processed_df.columns for col in required_cols):\n",
|
|
|
|
|
|
" print(f\"Error: Missing one or more required columns: {required_cols}. Please ensure they are calculated.\")\n",
|
|
|
|
|
|
" return\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" print(\"\\n--- Analyzing Trend Continuation Probability by Volume Z-score (Ignoring Overall Trend Bias) ---\")\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 1. Define Current K-line Direction (significant move)\n",
|
|
|
|
|
|
" # 1 if significant UP, -1 if significant DOWN, 0 if Flat\n",
|
|
|
|
|
|
" def get_direction_sign(log_ret):\n",
|
|
|
|
|
|
" if log_ret > return_threshold:\n",
|
|
|
|
|
|
" return 1 # Up\n",
|
|
|
|
|
|
" elif log_ret < -return_threshold:\n",
|
|
|
|
|
|
" return -1 # Down\n",
|
|
|
|
|
|
" else:\n",
|
|
|
|
|
|
" return 0 # Flat\n",
|
|
|
|
|
|
" \n",
|
|
|
|
|
|
" processed_df['current_direction_sign'] = processed_df['log_return'].apply(get_direction_sign)\n",
|
|
|
|
|
|
" processed_df['future_direction_sign'] = processed_df['future_log_return'].apply(get_direction_sign)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 2. Define 'Is Continuation' (Target Variable)\n",
|
|
|
|
|
|
" # A continuation occurs if current_direction_sign is not 0 AND future_direction_sign is the same\n",
|
|
|
|
|
|
" processed_df['is_continuation'] = np.nan # Initialize with NaN\n",
|
|
|
|
|
|
" \n",
|
|
|
|
|
|
" # Cases where current move is Up and future is also Up\n",
|
|
|
|
|
|
" processed_df.loc[(processed_df['current_direction_sign'] == 1) & (processed_df['future_direction_sign'] == 1), 'is_continuation'] = 1\n",
|
|
|
|
|
|
" # Cases where current move is Down and future is also Down\n",
|
|
|
|
|
|
" processed_df.loc[(processed_df['current_direction_sign'] == -1) & (processed_df['future_direction_sign'] == -1), 'is_continuation'] = 1\n",
|
|
|
|
|
|
" # Cases where current move is significant but future is not in the same direction (e.g., flat, reverse)\n",
|
|
|
|
|
|
" processed_df.loc[((processed_df['current_direction_sign'] != 0) & (processed_df['is_continuation'].isna())), 'is_continuation'] = 0\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # Filter out rows where current K-line was flat, as there's no trend to \"continue\"\n",
|
|
|
|
|
|
" df_for_analysis = processed_df[processed_df['current_direction_sign'] != 0].copy()\n",
|
|
|
|
|
|
" \n",
|
|
|
|
|
|
" if df_for_analysis.empty:\n",
|
|
|
|
|
|
" print(\"No significant current moves (Up/Down) to analyze for continuation.\")\n",
|
|
|
|
|
|
" return\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # --- REMOVED: Filtering out 1% and 99% Z-score outliers ---\n",
|
|
|
|
|
|
" # Now using the full range of df_for_analysis for binning\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 3. Binning Volume Normalized Z-score (using unfiltered data)\n",
|
|
|
|
|
|
" min_z = df_for_analysis['volume_normalized_zscore'].min()\n",
|
|
|
|
|
|
" max_z = df_for_analysis['volume_normalized_zscore'].max()\n",
|
|
|
|
|
|
" \n",
|
|
|
|
|
|
" if pd.isna(min_z) or pd.isna(max_z) or (max_z - min_z < 0.001):\n",
|
|
|
|
|
|
" print(\"Warning: Volume Z-score range is too small or contains NaNs for binning.\")\n",
|
|
|
|
|
|
" # Fallback for very small ranges to prevent errors\n",
|
|
|
|
|
|
" if pd.isna(min_z) or pd.isna(max_z):\n",
|
|
|
|
|
|
" min_z = -5\n",
|
|
|
|
|
|
" max_z = 5\n",
|
|
|
|
|
|
" elif (max_z - min_z < 0.001):\n",
|
|
|
|
|
|
" max_z = min_z + 0.001\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" bins = np.linspace(min_z, max_z, num_bins + 1)\n",
|
|
|
|
|
|
" labels = [f'{bins[i]:.2f} to {bins[i+1]:.2f}' for i in range(num_bins)]\n",
|
|
|
|
|
|
" # Use pd.cut for binning\n",
|
|
|
|
|
|
" df_for_analysis['volume_zscore_bin'] = pd.cut(df_for_analysis['volume_normalized_zscore'], bins=bins, labels=labels, include_lowest=True)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 4. Calculate Continuation Probability for each bin\n",
|
|
|
|
|
|
" continuation_prob = df_for_analysis.groupby('volume_zscore_bin')['is_continuation'].mean()\n",
|
|
|
|
|
|
" \n",
|
|
|
|
|
|
" continuation_df = pd.DataFrame({\n",
|
|
|
|
|
|
" 'Volume Z-score Bin': continuation_prob.index,\n",
|
|
|
|
|
|
" 'Trend Continuation Probability': continuation_prob.values\n",
|
|
|
|
|
|
" }).dropna() # Drop NA if a bin has no data\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" if continuation_df.empty:\n",
|
|
|
|
|
|
" print(\"No data points for trend continuation within the bins. Adjust thresholds or data range.\")\n",
|
|
|
|
|
|
" return\n",
|
|
|
|
|
|
" \n",
|
|
|
|
|
|
" print(\"\\nTrend Continuation Probabilities by Volume Z-score Bin (Direction Agnostic, All Data):\")\n",
|
|
|
|
|
|
" print(continuation_df)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 5. Visualization - Individual Bin Probabilities (Original Plot, without filtering)\n",
|
|
|
|
|
|
" plt.figure(figsize=(14, 8))\n",
|
|
|
|
|
|
" \n",
|
|
|
|
|
|
" plt.plot(continuation_df['Volume Z-score Bin'], continuation_df['Trend Continuation Probability'],\n",
|
|
|
|
|
|
" marker='o', linestyle='-', color='purple', label='Trend Continuation Probability')\n",
|
|
|
|
|
|
" \n",
|
|
|
|
|
|
" plt.title('Trend Continuation Probability vs. Volume Z-score (Direction Agnostic, All Data)', fontsize=18)\n",
|
|
|
|
|
|
" plt.xlabel('Volume Z-score Bins', fontsize=14)\n",
|
|
|
|
|
|
" plt.ylabel('Continuation Probability', fontsize=14)\n",
|
|
|
|
|
|
" plt.xticks(rotation=45, ha='right') # Rotate labels for readability\n",
|
|
|
|
|
|
" plt.ylim(0, 1) # Probability range\n",
|
|
|
|
|
|
" plt.axhline(0.5, color='gray', linestyle=':', linewidth=1, label='Random (0.5)') # Reference line for 0.5 probability\n",
|
|
|
|
|
|
" plt.grid(True, linestyle='--', alpha=0.7)\n",
|
|
|
|
|
|
" plt.legend(fontsize=12)\n",
|
|
|
|
|
|
" plt.tight_layout()\n",
|
|
|
|
|
|
" plt.show()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # print(\"\\nThis plot shows the probability of ANY trend (up or down) continuing, across different levels of volume change rate.\")\n",
|
|
|
|
|
|
" # print(\"Peaks above 0.5 indicate where current direction is more likely to be followed by the same direction in the future.\")\n",
|
|
|
|
|
|
" # print(\"No Z-score outliers have been removed in this plot.\")\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # --- NEW PLOT: Number of Data Points per Bin ---\n",
|
|
|
|
|
|
" print(\"\\n--- Plotting Number of Data Points per Volume Z-score Bin ---\")\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # Count the number of data points in each bin\n",
|
|
|
|
|
|
" bin_counts = df_for_analysis['volume_zscore_bin'].value_counts().sort_index()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # Ensure the order of bins in bin_counts matches that in continuation_df\n",
|
|
|
|
|
|
" # (pd.cut with labels usually handles this, but explicitly reindexing can ensure consistency)\n",
|
|
|
|
|
|
" bin_counts = bin_counts.reindex(continuation_df['Volume Z-score Bin'])\n",
|
|
|
|
|
|
" \n",
|
|
|
|
|
|
" # Convert to DataFrame for plotting\n",
|
|
|
|
|
|
" bin_counts_df = pd.DataFrame({\n",
|
|
|
|
|
|
" 'Volume Z-score Bin': bin_counts.index,\n",
|
|
|
|
|
|
" 'Number of Data Points': bin_counts.values\n",
|
|
|
|
|
|
" }).dropna()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" if bin_counts_df.empty:\n",
|
|
|
|
|
|
" print(\"No data points found for plotting bin counts.\")\n",
|
|
|
|
|
|
" return\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" plt.figure(figsize=(14, 8))\n",
|
|
|
|
|
|
" sns.barplot(x='Volume Z-score Bin', y='Number of Data Points', data=bin_counts_df, palette='viridis')\n",
|
|
|
|
|
|
" plt.title('Number of Data Points per Volume Z-score Bin', fontsize=18)\n",
|
|
|
|
|
|
" plt.xlabel('Volume Z-score Bins', fontsize=14)\n",
|
|
|
|
|
|
" plt.ylabel('Number of Data Points', fontsize=14)\n",
|
|
|
|
|
|
" plt.xticks(rotation=45, ha='right')\n",
|
|
|
|
|
|
" plt.grid(axis='y', linestyle='--', alpha=0.7)\n",
|
|
|
|
|
|
" plt.tight_layout()\n",
|
|
|
|
|
|
" plt.show()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # print(\"\\nThis plot shows the raw count of data points falling into each Volume Z-score bin.\")\n",
|
|
|
|
|
|
" # print(\"Bins with very low counts might produce less reliable trend continuation probability estimates.\")\n"
|
2025-07-28 14:36:58 +08:00
|
|
|
|
],
|
|
|
|
|
|
"outputs": [],
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"execution_count": 18
|
2025-06-22 23:03:50 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
"id": "74770a30",
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"end_time": "2025-07-28T11:47:21.200422Z",
|
|
|
|
|
|
"start_time": "2025-07-28T11:47:20.979098Z"
|
2025-06-22 23:03:50 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
2025-07-28 14:36:58 +08:00
|
|
|
|
"source": [
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"df_raw = load_and_preprocess_data(file_path)\n",
|
|
|
|
|
|
"if df_raw is not None and not df_raw.empty:\n",
|
|
|
|
|
|
" # volume_window and price_lag parameters for indicator calculation\n",
|
|
|
|
|
|
" # price_lag defines \"N\" in \"next N K-lines\"\n",
|
|
|
|
|
|
" processed_data = calculate_stationary_indicators(df_raw, volume_window=30, price_lag=5)\n",
|
|
|
|
|
|
" \n",
|
|
|
|
|
|
" # Analyze the impact of volume change rate and current K-line direction on future trend\n",
|
|
|
|
|
|
" # return_threshold: set a small threshold to define 'significant' up/down move, otherwise it's 'flat'\n",
|
|
|
|
|
|
" analyze_trend_continuation_probability(processed_data, return_threshold=0.0001, num_bins=10)\n",
|
|
|
|
|
|
"else:\n",
|
|
|
|
|
|
" print(\"Analysis cannot proceed. Please check if data loading was successful.\") "
|
|
|
|
|
|
],
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"Successfully loaded 25090 rows of data.\n",
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"First 5 rows of data:\n",
|
2025-07-28 14:36:58 +08:00
|
|
|
|
" open high low close volume open_oi \\\n",
|
|
|
|
|
|
"datetime \n",
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"2020-12-31 14:45:00 4352.0 4400.0 4345.0 4388.0 213731.0 1221661.0 \n",
|
|
|
|
|
|
"2021-01-04 09:00:00 4356.0 4368.0 4309.0 4336.0 338332.0 1217327.0 \n",
|
|
|
|
|
|
"2021-01-04 09:15:00 4336.0 4342.0 4307.0 4318.0 144479.0 1197881.0 \n",
|
|
|
|
|
|
"2021-01-04 09:30:00 4318.0 4329.0 4312.0 4317.0 85679.0 1194567.0 \n",
|
|
|
|
|
|
"2021-01-04 09:45:00 4317.0 4338.0 4316.0 4338.0 66461.0 1194592.0 \n",
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"\n",
|
2025-07-28 14:36:58 +08:00
|
|
|
|
" close_oi underlying_symbol \n",
|
|
|
|
|
|
"datetime \n",
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"2020-12-31 14:45:00 1217327.0 SHFE.rb2105 \n",
|
|
|
|
|
|
"2021-01-04 09:00:00 1197881.0 SHFE.rb2105 \n",
|
|
|
|
|
|
"2021-01-04 09:15:00 1194567.0 SHFE.rb2105 \n",
|
|
|
|
|
|
"2021-01-04 09:30:00 1194592.0 SHFE.rb2105 \n",
|
|
|
|
|
|
"2021-01-04 09:45:00 1198035.0 SHFE.rb2105 \n",
|
|
|
|
|
|
"Indicators calculated. 25051 rows of data remaining for analysis.\n",
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"\n",
|
|
|
|
|
|
"--- Analyzing Trend Continuation Probability by Volume Z-score (Ignoring Overall Trend Bias) ---\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"Trend Continuation Probabilities by Volume Z-score Bin (Direction Agnostic, All Data):\n",
|
|
|
|
|
|
" Volume Z-score Bin Trend Continuation Probability\n",
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"0 -1.76 to -1.07 0.495211\n",
|
|
|
|
|
|
"1 -1.07 to -0.37 0.487784\n",
|
|
|
|
|
|
"2 -0.37 to 0.32 0.486844\n",
|
|
|
|
|
|
"3 0.32 to 1.01 0.492180\n",
|
|
|
|
|
|
"4 1.01 to 1.71 0.486222\n",
|
|
|
|
|
|
"5 1.71 to 2.40 0.496089\n",
|
|
|
|
|
|
"6 2.40 to 3.09 0.490526\n",
|
|
|
|
|
|
"7 3.09 to 3.79 0.488095\n",
|
|
|
|
|
|
"8 3.79 to 4.48 0.503650\n",
|
|
|
|
|
|
"9 4.48 to 5.18 0.583333\n"
|
2025-06-22 23:03:50 +08:00
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"data": {
|
|
|
|
|
|
"text/plain": [
|
|
|
|
|
|
"<Figure size 1400x800 with 1 Axes>"
|
2025-07-28 14:36:58 +08:00
|
|
|
|
],
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABW4AAAMWCAYAAABhlR+IAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs3XdYU9f/B/B3wlAQceDEidoginvvWbd11S3WUVeto7W1Wltbq9VWa627Wq2rWie46qjaunHWLdUqKgIKKiAgICP39we/3C853EAIgcTwfj2PT8vNTe45uW9uwicn56gkSZJARERERERERERERFZDbekGEBEREREREREREZE+Fm6JiIiIiIiIiIiIrAwLt0RERERERERERERWhoVbIiIiIiIiIiIiIivDwi0RERERERERERGRlWHhloiIiIiIiIiIiMjKsHBLREREREREREREZGVYuCUiIiIiIiIiIiKyMizcEhEREREREREREVkZFm6JyGJ8fHzg6emJpUuXWropVmvatGnw9PTEtGnTLN0Ui2jbti08PT3h6+tr6abYjODgYHh6esLT0xPBwcG5fnzdsc+fP2/2+5t6G70Zzp8/L59HojfJjh074Onpia+++ipHj+Pr6wtPT0+0bds2R49jKXn9PRGlt3TpUnh6esLHx8fSTTGrjLLO3wPjhIaGwtvbG2+//TYSExMt3RyibLG3dAOI8rLs/PE5b9489O7d24yteXPFxsbCz88PZ86cwZ07dxAZGQmtVovChQvD09MTTZs2Rbdu3VC8eHFLN1V2/vx5XLhwAWXKlMmT59HX1xchISFo2LAhGjVqZOnmmN20adPg5+eXbnu+fPlQrFgx1KxZE3369EGLFi0s0DpKKzo6Ghs2bAAAvPfee3B1dbVwi6zXF198gR07dqBw4cI4deoUHB0djbpfhw4d8OjRI7Rp0wY///xzDrcy7zF0vclMw4YNsWnTphxoEaX16tUrLF68GI6Ojhg3bly625XOn1qthrOzMwoWLIhy5crBy8sLzZs3R/PmzaFW2964m7z2niglJQWtW7dGeHg4AODXX39Fs2bNLNwq6xIQEICjR4+iYMGCGDZsmKWbky33799Hly5dAAD58+fHmTNn4OLiYpG2LF26FMuWLdPbplKp4OzsDBcXF7i7u8PLywuNGjVC27ZtjX6dz6rceO/l7u6O3r17Y9u2bdiyZcsbnyPK21i4JbKgYsWKKW6Pi4tDXFxchvvkz58/x9r1JtmxYwcWLFiAly9fytvy588PR0dHhIWFISwsDCdPnsSiRYswbtw4xT+aLOHChQtYtmwZGjZsmOEfKcWLF4eHh4dVFZ3Nwc/PDxcuXMCHH36YYeG2XLlycHR0RMGCBXOxdeajVqtRtGhR+eeXL18iJCQEISEhOHjwIN59913MmTMHKpXKgq18s3h4eAAAnJyczHK/6Oho+Y+YXr16sXCbgXfffRc7duxAVFQUjh49Kv8hmpELFy7g0aNH8v3J/FxcXAy+VxAlJCQgNjYWAHLsD3LSt3btWjx79gyDBw9GqVKlDO4nvl7ExcXhyZMnePLkCS5cuIANGzagdOnSmD59Ojp27Kj4GAULFoSHhwdKlixp9n7kpLz2nujkyZNy0RYAdu3axcKtICAgAMuWLUOZMmUyLLgVKVIEHh4eKF26dO41Lot27twp/39CQgL279+PAQMGWLBFqdK+biQkJCA8PBxhYWG4cuUKtmzZgsKFC2Py5MkYOHCg2Y+dW++9xo4dC19fX6xcuRK9e/fmezx6Y7FwS2RBZ86cUdye9tNQQ/sQsGjRInn0VuXKlTF69Gi0aNECbm5uAFLfhFy6dAl79uzBH3/8gUOHDllN4dZYU6ZMwZQpUyzdDIvRfRr/pipdujT++usv+efk5GTcunUL33zzDW7evImdO3eiWrVqGDx4sAVb+WY5dOhQrt6P/qd27dqoUqUK7t27B19fX6MKt7ppTooVK4bWrVvncAvzpi+++AJffPFFpvslJiZi0KBBuHHjBpydnTF16tRcaF3elpCQII9qzqz4Ib5eAKnn7M6dOzhx4gR+//13PHnyBBMnTsSYMWPw8ccfp3uMt99+G2+//bb5OmBlbOU9ka6QN3jwYGzZsgVHjhxBVFQUChcubNmGvYGGDBmCIUOGWLoZBiUlJWHPnj0AUqeI27RpE3bu3GkVhVvxb8yUlBTcu3cPZ8+exW+//Ybg4GB8/fXXuHTpEn744Yc3cpCBu7s7WrZsiWPHjmHHjh0YOXKkpZtEZBLb+64NEeUJBw4ckIu2nTt3xu7du9GzZ0+5aAukjrxt3rw5FixYgN27d6NKlSqWai4RAMDe3h61atXC6tWr5T/QfvvtN8s2iigLdKNmz5w5g7CwsAz3jY2NxeHDhwEAPXr0gL09xwtY0ldffYUbN24ASJ1uiXMF57wDBw4gOjoaXl5eeOutt7J8f0dHR9SoUQMffvgh9u/fL39DZdWqVdi3b5+5m0u54Pnz5zh+/Djs7OwwZswYNGjQAImJiTyfNurvv//GixcvULlyZXz88cdwdnbGjRs3cPfuXUs3LR07Ozt4enpi+PDh2L9/P7p27QoA2L9/P1avXm3h1pnunXfeAQBs374dkiRZuDVEpmHhlugNlHaRnRcvXmDevHno2LEjatWqpfiH2PHjxzFhwgS0aNEC3t7eaNCggfwpv6HJ2tMuHCZJErZv346+ffuibt26qFOnDvr37y9/gmxISkoKNm3ahF69eqF27dpo2LAhfHx8sj3yLTExEfPnzwcAVKlSBd9//32mX/nUaDRYsGCB4m23b9/G1KlT0aZNG9SoUQMNGjTAgAEDsH79eoPPj7gAyM2bNzFp0iQ0b94c3t7eaNeuHebNm6c3hQPwv4WhdCOqL1y4IJ9P3b+0C3FltABBds+RMYs1ZbSA3OPHj7F69WqMHDkSHTt2RO3atVGnTh106dIF3377LUJDQw0+bxcuXAAALFu2LF3/0y6YldniZCkpKdi5cyeGDh2KRo0awdvbGy1atMDEiRON7pep+c4ONzc3NG/eHAAQGBiIV69eAUi/+NLt27cxZcoUtGzZEtWrV0+3+MazZ8/w/fffo2vXrqhduzZq166Nrl27Yv78+Xj+/LlRbXn48CGmTZuGli1bwtvbG61bt8bMmTMzLMpdvXoVCxYswKBBg+Tfm/r166Nfv35YvXq13J/MPHv2DN988w3atm2LGjVqoFmzZpgyZQru379v8D6mLjKmdD8fHx+0a9dO/rldu3Z6WdQ93x999BE8PT0xatSoDI/x6NEjVK1a1ej2JSUloVGjRvD09MTGjRsz3Hfnzp3w9PRE3bp1ER8fL29PTk7Gtm3b4OPjg0aNGqF69epo1KgROnbsiMmTJ2PHjh2ZtiMrevToAQcHB2i12kwXDTx48KA87U+fPn30bjPlupsRYxaoyWhxM/H+x44dw3vvvYdGjRqhbt26GDBgAI4ePap3n927d2PAgAFo0KAB6tSpg8GDB8Pf3z/TtprympxdGzdulM/X2LFj0alTJ5MeJyEhAWvXrkX//v3RoEEDVK9eHY0bN0aXLl3w2WefyYV6Jffv38esWbPQpUsX1KlTB3Xq1EHHjh3x0Ucf4fDhw9Bqtenu8/r1a6xfv15+nmvUqIE2bdpg6tSpCAgIMHistK8dujlmu3fvjjp16iguzHj58mV88sknch7r1auHd999N0vXMyXbt28HAHTr1s3kx9ApUqQIli1bJk+D8NNPPyEpKUlvn4wWJxMzfvjwYYwYMQJNmjRB1apV073OR0REYNGiRejZsyfq1auHGjVqoF27dvj888/x33//ZdhWrVaLAwcO4IMPPpBz3rhxY/Tu3RsLFiyQi1bmfE+kc/78eUycOFE+bqNGjfDee+9h165dSElJUbyP+Nz4+/tj9OjRaNy4MWrUqIHOnTtj2bJleP36dYb9Nsbu3buRnJyMJk2aoGTJkujVqxeA1OkSjHHx4kWMHTsWjRo1Qs2aNdGxY0csWrQIr169yvD8i8/doUOH4OPjg4YNG6JWrVro0aMHNmzYoPh7mJY
|
2025-06-22 23:03:50 +08:00
|
|
|
|
},
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"--- Plotting Number of Data Points per Volume Z-score Bin ---\n"
|
|
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"data": {
|
|
|
|
|
|
"text/plain": [
|
|
|
|
|
|
"<Figure size 1400x800 with 1 Axes>"
|
2025-07-28 14:36:58 +08:00
|
|
|
|
],
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABW0AAAMWCAYAAACKoqSLAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQAAztNJREFUeJzs3XeUVeX5NuB7hqZIUURAUGNFULGCqLHEkphEk4iYWGI3aowYY40tsaMxMbGLFaOxx5IYe+8FFDUq9kaRjgKCtDnfH3xzfgzNYRhkx7mutVyLs/d79nmes8/ZyD3vvLuiVCqVAgAAAABAIVQu6QIAAAAAAPg/QlsAAAAAgAIR2gIAAAAAFIjQFgAAAACgQIS2AAAAAAAFIrQFAAAAACgQoS0AAAAAQIEIbQEAAAAACkRoCwAAAABQIEJbAPgftc8++2TttdfOxRdfvKRLWaKmTJmSCy64ID/60Y+y/vrrZ+21187aa6+dwYMHL+nSqCWfZeal+rv84osvLulS4BvhMw/A7Bov6QIAoD5dfPHFueSSS5IkSy21VB566KG0b99+nmOHDh2a7bffPkly/fXXp2fPnt9YndSfo446Ko8//niSWee8bdu2SZLGjWv3vzn77LNPXnrppRrbGjVqlGWWWSYtW7bMqquumq5du2bbbbdN9+7d67f42QwePDiPPPJIWrZsmf3333+xvc6CzP6dmF1lZWVatmyZ1VZbLVtttVX22muvtGnTZglUuPCqg+BevXplpZVWWsLV/O/497//neOOOy5J8s9//jPdunWr1fNOOumk3HHHHVl22WXz9NNPp2nTpouzzAbnzjvvzIknnlin577zzjv1XA3V5nftTJImTZqkdevW6dy5c3bcccf07t07TZo0+YYrBOB/kdAWgG+tr776KpdeemnOOOOMJV0Ki8kHH3xQDmz/9re/5cc//nGdj1X9D+tqX375ZYYNG5Zhw4bl2WefzdVXX5011lgjp512WjbddNNFrn1OgwcPziWXXJJOnTotsdB2di1atMhSSy2VJJk+fXq++OKLvPrqq3n11Vdz00035Yorrqh1kPd1Vlxxxay22mpZbrnl6uV4s6v+Ic6mm24qtF0IO+64Y84888xMmDAhd9xxR63O9eTJk3P//fcnSX72s58JbBeD2X8w9XVmzpyZ8ePHJ4lz8Q2a/dqZJJMmTcqYMWMyZsyYPPfcc7ntttty7bXXZtlll53ruauttlqSZOmll/6mygWgwIS2AHyr3XHHHTnggAPK/xDi2+Xdd99Nkiy77LKLFNgmyUYbbZQbbrihxravvvoqb731Vh566KHcfvvt+eCDD7Lvvvvm1FNPzZ577rlIr1d0J598cnbdddfy4y+++CI33XRTLr300owdOzZHHHFEHnzwwTRr1myRX+u8885b5GNQv5o1a5add945N910U+69996ceOKJX3uuH3jggUyePDlJ0rt372+izAbnxz/+ca2vdWeddVb5mnbqqacuzrKYzZzXziQZOXJkrrnmmvz973/Pm2++mb/+9a/z/IHyAw888E2VCcD/AGvaAvCttOKKK2bttdfOjBkz8re//W1Jl8Ni8tVXXyVJlllmmcVy/KWWWiobb7xxTjjhhPz73//O2muvnVKplDPPPDMDBw5cLK9ZVK1bt85hhx2WX/3qV0mSzz77LI8++ugSrorFabfddkuSTJgwIQ8//PDXjr/jjjuSJN26dcvaa6+9WGtjwe68885yYLv33nuXzyVLRvv27XPSSSdl8803T5I88sgjS7giAP4XmGkLwLdSZWVljjnmmBxyyCF58MEH8/rrr2f99dev9fNnX5/u0Ucfne+vVW+33XYZNmxYzjnnnBoza+Z8fkVFRS6//PI888wzGTt2bNq3b5+ddtophx56aJo3b55k1qzRK6+8MgMGDMi4ceOy4oorZpdddsnBBx/8tevfTZs2Ldddd13uueeeDBkyJE2aNMl6662X/fffP9tss80Cn/vuu+/mhhtuyIsvvpiRI0emsrIyK620Urbbbrvst99+81y7tHrt4E033TQ33HBDHnzwwdx6660ZPHhwxo8fn8MPPzxHHHHEAl93dlOnTs3NN9+cBx54IB988EG++uqrtG3bNj169MgBBxyQrl27zvP1qw0bNqxGSNSrV6+ce+65tX792ujUqVMuu+yy/OQnP8nkyZPz17/+NTfddFONMVOmTMmjjz6ap556Ku+8805GjhyZSZMmZdlll83666+f3XfffZ7nY/ba5+wlSfr06VN+P+v6GvXlZz/7WS6//PIkyX//+98as/4W9jxWq15XePY+q83+Hdt5551z/fXX59///nc+/fTTNGrUKOuuu25+9atfZeutt67xvBNOOCF33XVX+fG+++5bY3+nTp3y2GOPlR+PGDEi1157bZ599tkMGzYsM2bMyLLLLpt27dqle/fu2XnnnRfpGjJjxoz069cvzz33XMaNG5e2bdtm6623zuGHHz7fdbeTpKqqKv/5z39yzz335M0338yECRPSokWLrLPOOtl1112z0047paKiYq7nzf6+7bjjjrn66qvzyCOPZOjQoZk8efICr2vV1l133XTt2jWDBw/OHXfckZ133nm+Yz/55JPyDzLmDAhHjx6da6+9Nk899VSGDRuWZNb7v8022+TAAw+s9a/7V3vxxRfL53NB67RWf4/mXLN8zue//fbbufLKK/PSSy9lwoQJ6dSpU3bbbbfst99+5bWxX3755VxzzTV5/fXX88UXX+Q73/lO9txzz+y1117zfP+r1eX6uqhef/318szaTTfdtM5r4CbJfffdlzvvvDNvvfVWvvjiiyy99NJp06ZNVl999Wy11VbZbbfd5jkDe/z48bnxxhvz5JNP5pNPPsmUKVOywgorZNVVV80OO+yQn/zkJ2nZsuVcz3vooYdyxx135L///W8mTJiQVq1apVu3btltt93y/e9/f541Vn/Xe/XqlXPOOSf//Oc/c+edd+bDDz/M559/Ps+/n//+97/nueeey/Dhw1NVVZUVV1wxW265ZQ488MB07Nixzu/X1+natWuef/758oz0Oc3vMzvn9WSppZZKv3798thjj2X06NFp2bJlevbsmT59+mSNNdZYbPUD8M0S2gLwrbXNNttk0003zUsvvZS//OUvuf7665dIHW+99VZOPvnkctgyc+bMDBkyJP369cvAgQNz3XXX5dlnn83vfve7TJkyJS1btsz06dPzySef5MILL8x77723wNnC06dPzwEHHJCBAwemcePGad68eSZMmJDnnnsuzz333DyDsGpXXXVV/vrXv6aqqirJrHX0pk+fnnfffTfvvvtu7rjjjlx55ZVZZ5115vv65557bvr375+Kioq0atUqlZUL94s8I0eOzK9+9avyUgdNmjTJUkstleHDh+df//pX7rnnnpx00knZZ599ys9p3rx52rZtm6+++iqTJk1KZWVljfCjRYsWC1VDba200krp1atXbrzxxrz88ssZMmRIVl555fL++++/vxyQVFRUpEWLFmncuHFGjx6dRx99NI8++mgOPPDA/P73v69x3AX1Ut3vor5GfenQoUP5z5MmTSr/uS7ncWFMnjw5e++9d1577bU0adIkTZo0yaRJk/Liiy/mpZdeyllnnVUjLGzRokXatm2bMWPGJJk1U3j2H37Mvn7u22+/nX333TdffPFFklk3omvRokXGjBmT0aNHl8PShQltZ/f666/nlFNOyZdffpnmzZunUaNG+eyzz3LrrbfmwQcfzLXXXpt11113rud9/vnn6dOnTwYMGFDe1rJly4wfPz7PPvtsnn322dx777258MIL57tm6eeff55dd901H3/8cZo0abLQa2XutttuOfPMM/PCCy9k+PDh8w207rzzziSzZqfPHu6+9NJLOfzwwzNhwoQk//dZfv/99/P+++/nn//8Zy677LLFepO/BXnyySdzxBFHZOrUqWnZsmWmTZuWDz/8MOedd17519hvv/32nHrqqamqqkqLFi0ybdq0vPfeeznjjDPy2Wef5dhjj53nsevj+rqwxowZkyOOOCL
|
2025-06-22 23:03:50 +08:00
|
|
|
|
},
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"execution_count": 19
|
2025-06-22 23:03:50 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
"id": "8fa62ad6",
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"end_time": "2025-07-28T11:47:21.550402Z",
|
|
|
|
|
|
"start_time": "2025-07-28T11:47:21.220570Z"
|
2025-06-22 23:03:50 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# --- 1. Data Loading and Preprocessing (与之前代码相同) ---\n",
|
|
|
|
|
|
"def load_and_preprocess_data(file_path):\n",
|
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
|
" Loads historical futures data and performs basic preprocessing.\n",
|
|
|
|
|
|
" Assumes data contains 'datetime', 'open', 'high', 'low', 'close', 'volume' columns.\n",
|
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
|
" try:\n",
|
|
|
|
|
|
" df = pd.read_csv(file_path, parse_dates=['datetime'], index_col='datetime')\n",
|
|
|
|
|
|
" # Ensure data is sorted by time\n",
|
|
|
|
|
|
" df = df.sort_index()\n",
|
|
|
|
|
|
" \n",
|
|
|
|
|
|
" # --- NEW: Optional filtering for typical trading hours ---\n",
|
|
|
|
|
|
" # If your data includes non-trading hours (e.g., overnight, weekends in daily data)\n",
|
|
|
|
|
|
" # and you only want to analyze main trading sessions, uncomment and adjust.\n",
|
|
|
|
|
|
" # Example for typical daytime futures trading:\n",
|
|
|
|
|
|
" # df = df[(df.index.hour >= 9) & (df.index.hour < 15)] # Filter 9:00 to 14:59 for example\n",
|
|
|
|
|
|
" # df = df[df.index.dayofweek < 5] # Exclude Saturday (5) and Sunday (6) if using daily data with weekends\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" initial_rows = len(df)\n",
|
|
|
|
|
|
" df.dropna(inplace=True)\n",
|
|
|
|
|
|
" if len(df) < initial_rows:\n",
|
|
|
|
|
|
" print(f\"Warning: Missing values found in data, deleted {initial_rows - len(df)} rows.\")\n",
|
|
|
|
|
|
" \n",
|
|
|
|
|
|
" # Check if necessary columns exist\n",
|
|
|
|
|
|
" required_columns = ['open', 'high', 'low', 'close', 'volume']\n",
|
|
|
|
|
|
" if not all(col in df.columns for col in required_columns):\n",
|
|
|
|
|
|
" raise ValueError(f\"CSV file is missing required columns. Please ensure it contains: {required_columns}\")\n",
|
|
|
|
|
|
" \n",
|
|
|
|
|
|
" print(f\"Successfully loaded {len(df)} rows of data.\")\n",
|
|
|
|
|
|
" return df\n",
|
|
|
|
|
|
" except FileNotFoundError:\n",
|
|
|
|
|
|
" print(f\"Error: File '{file_path}' not found. Please check the path.\")\n",
|
|
|
|
|
|
" return None\n",
|
|
|
|
|
|
" except Exception as e:\n",
|
|
|
|
|
|
" print(f\"Error during data loading or preprocessing: {e}\")\n",
|
|
|
|
|
|
" return None\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# --- 2. Stationary Indicator Calculation Function (与之前代码相同) ---\n",
|
|
|
|
|
|
"def calculate_stationary_indicators(df, volume_window=10, price_lag=5):\n",
|
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
|
" Calculates stationary indicators based on volume and price.\n",
|
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
|
" df_processed = df.copy() \n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" df_processed['volume_roc'] = df_processed['volume'].pct_change(volume_window) * 100\n",
|
|
|
|
|
|
" df_processed['volume_ma_ratio'] = df_processed['volume'] / df_processed['volume'].rolling(window=volume_window).mean()\n",
|
|
|
|
|
|
" rolling_mean_vol = df_processed['volume'].rolling(window=volume_window).mean()\n",
|
|
|
|
|
|
" rolling_std_vol = df_processed['volume'].rolling(window=volume_window).std()\n",
|
|
|
|
|
|
" df_processed['volume_normalized_zscore'] = (df_processed['volume'] - rolling_mean_vol) / rolling_std_vol.replace(0, np.nan)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" df_processed['log_return'] = np.log(df_processed['close'] / df_processed['close'].shift(1))\n",
|
|
|
|
|
|
" df_processed['future_log_return'] = np.log(df_processed['close'].shift(-price_lag) / df_processed['close'])\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" try:\n",
|
|
|
|
|
|
" macd, macdsignal, macdhist = ta.MACD(df_processed['close'], fastperiod=12, slowperiod=26, signalperiod=9)\n",
|
|
|
|
|
|
" df_processed['macd_hist_diff'] = macdhist.diff(1) \n",
|
|
|
|
|
|
" except Exception as e:\n",
|
|
|
|
|
|
" # print(f\"TA-Lib MACD calculation failed, possibly due to installation or data issues: {e}. 'macd_hist_diff' will contain NaN.\")\n",
|
|
|
|
|
|
" df_processed['macd_hist_diff'] = np.nan \n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" df_processed.dropna(inplace=True)\n",
|
|
|
|
|
|
" if df_processed.empty:\n",
|
|
|
|
|
|
" print(\"Warning: Data is empty after indicator calculation. Check original data volume or adjust window parameters.\")\n",
|
|
|
|
|
|
" else:\n",
|
|
|
|
|
|
" print(f\"Indicators calculated. {len(df_processed)} rows of data remaining for analysis.\")\n",
|
|
|
|
|
|
" return df_processed\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# --- 3. 价格变化率分析与可视化函数 (横轴调整为连续索引) ---\n",
|
|
|
|
|
|
"def analyze_price_change_rate_for_trend(processed_df, rolling_vol_window=30):\n",
|
|
|
|
|
|
" if 'log_return' not in processed_df.columns or processed_df['log_return'].isnull().all():\n",
|
|
|
|
|
|
" print(\"Error: 'log_return' column not found or contains only NaN values. Cannot analyze price change rate for trend.\")\n",
|
|
|
|
|
|
" return\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" print(\"\\n--- Analyzing Price Change Rate (Log Returns) for Trend Characteristics ---\")\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # Add a continuous index for plotting, ignoring date gaps\n",
|
|
|
|
|
|
" processed_df['continuous_index'] = range(len(processed_df))\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 1. Distribution of Log Returns (Price Change Rate)\n",
|
|
|
|
|
|
" plt.figure(figsize=(10, 6))\n",
|
|
|
|
|
|
" sns.histplot(processed_df['log_return'], bins=100, kde=True, color='purple', alpha=0.7)\n",
|
|
|
|
|
|
" plt.title('Distribution of Log Returns (Price Change Rate)', fontsize=16)\n",
|
|
|
|
|
|
" plt.xlabel('Log Return', fontsize=12)\n",
|
|
|
|
|
|
" plt.ylabel('Frequency', fontsize=12)\n",
|
|
|
|
|
|
" plt.grid(True, linestyle='--', alpha=0.6)\n",
|
|
|
|
|
|
" plt.show()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" print(\"\\nStatistical summary of Log Returns:\")\n",
|
|
|
|
|
|
" print(processed_df['log_return'].describe())\n",
|
|
|
|
|
|
" print(f\"Kurtosis of Log Returns: {processed_df['log_return'].kurtosis():.4f}\")\n",
|
|
|
|
|
|
" print(f\"Skewness of Log Returns: {processed_df['log_return'].skew():.4f}\")\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 2. Time Series of Log Returns and Rolling Volatility\n",
|
|
|
|
|
|
" processed_df['rolling_volatility'] = processed_df['log_return'].rolling(window=rolling_vol_window).std()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 10), sharex=True)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # Subplot 1: Log Returns Over Continuous Trading Periods\n",
|
|
|
|
|
|
" # Use 'continuous_index' for the x-axis to remove date gaps\n",
|
|
|
|
|
|
" ax1.plot(processed_df['continuous_index'], processed_df['log_return'], label='Log Returns', color='blue', alpha=0.7, linewidth=0.8)\n",
|
|
|
|
|
|
" ax1.set_title('Log Returns Over Trading Periods (Continuous Index)', fontsize=16)\n",
|
|
|
|
|
|
" ax1.set_ylabel('Log Return', fontsize=12)\n",
|
|
|
|
|
|
" ax1.grid(True, linestyle='--', alpha=0.6)\n",
|
|
|
|
|
|
" ax1.legend()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # Subplot 2: Rolling Volatility Over Continuous Trading Periods\n",
|
|
|
|
|
|
" # Use 'continuous_index' for the x-axis\n",
|
|
|
|
|
|
" ax2.plot(processed_df['continuous_index'], processed_df['rolling_volatility'], label=f'Rolling Volatility ({rolling_vol_window} periods)', color='red', linewidth=1.5)\n",
|
|
|
|
|
|
" ax2.set_title(f'Rolling Volatility ({rolling_vol_window}-period Std Dev of Log Returns) Over Trading Periods', fontsize=16)\n",
|
|
|
|
|
|
" ax2.set_ylabel('Volatility', fontsize=12)\n",
|
|
|
|
|
|
" ax2.set_xlabel('Trading Period Index', fontsize=12) # Changed x-axis label\n",
|
|
|
|
|
|
" ax2.grid(True, linestyle='--', alpha=0.6)\n",
|
|
|
|
|
|
" ax2.legend()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" plt.tight_layout()\n",
|
|
|
|
|
|
" plt.show()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" print(\"\\nAnalysis focused on price change rate and its dynamics over continuous trading periods.\")\n",
|
|
|
|
|
|
" print(\"Higher volatility periods often provide more opportunities for trend-following strategies.\")\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# --- Main Execution Flow ---\n",
|
|
|
|
|
|
"if __name__ == \"__main__\":\n",
|
|
|
|
|
|
" df_raw = load_and_preprocess_data(file_path)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" if df_raw is not None and not df_raw.empty:\n",
|
|
|
|
|
|
" processed_data = calculate_stationary_indicators(df_raw, volume_window=10, price_lag=5)\n",
|
|
|
|
|
|
" analyze_price_change_rate_for_trend(processed_data, rolling_vol_window=30)\n",
|
|
|
|
|
|
" else:\n",
|
|
|
|
|
|
" print(\"Analysis cannot proceed. Please check if data loading was successful.\")"
|
2025-07-28 14:36:58 +08:00
|
|
|
|
],
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"Successfully loaded 25090 rows of data.\n",
|
|
|
|
|
|
"Indicators calculated. 25051 rows of data remaining for analysis.\n",
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"\n",
|
2025-07-28 14:36:58 +08:00
|
|
|
|
"--- Analyzing Price Change Rate (Log Returns) for Trend Characteristics ---\n"
|
2025-06-22 23:03:50 +08:00
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"data": {
|
|
|
|
|
|
"text/plain": [
|
2025-07-28 14:36:58 +08:00
|
|
|
|
"<Figure size 1000x600 with 1 Axes>"
|
|
|
|
|
|
],
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1oAAAIkCAYAAAAH/VqbAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQAApFZJREFUeJzs3Xl8E2X+B/DPTJKeoS30gkIRBVuOcqMVreKFeCvguYK6HuCq6M9VuVZFXAV0dddzvRZRXBQPEA/AVTzw4FSpUuRWobRAaaH3lWSe3x8hw2R6N+lkknzerxcv7WSaPE/yydN888w8IwkhBIiIiIiIiMhv5EA3gIiIiIiIKNSw0CIiIiIiIvIzFlpERERERER+xkKLiIiIiIjIz1hoERERERER+RkLLSIiIiIiIj9joUVERERERORnLLSIiIiIiIj8jIUWERERERGRn1kD3QAiszr77LNRUFCg/ixJEqKjo9GpUyccd9xxyMrKwgUXXIBBgwY1eR8TJ07Ehg0bsHDhQmRnZxvR7GZ5+vTFF1+gR48e6naztRMApk+fjg8++ABz587FuHHjAt0cv/vyyy/xn//8B9u2bUNVVRUAtOr597yGwfi8ZGZmNtgWGRmJpKQkDB48GNdddx1GjBgRgJaFvjvuuAPffvstPvvsM3Tt2lXd7nnva9lsNiQkJCArKwtXXXUVzj777DY/3nPPPYfnn38ed955J6ZMmeJz+ztKdXU13n//faxevRrbt29HaWkpbDYbUlJSMHDgQJx//vk4++yzIcvHvpduahylpoXKe/+HH37Addddh5tvvhlTp04NdHMoCLDQImrBsGHDcNxxxwEAamtrceTIEWzduhUbNmzAa6+9hpNPPhlz5sxBenp6h7Uh1P6wL126FDNmzMDYsWMxb968QDfHcFu3bsVdd90FRVFwyimnIDk5GZIkISkpKdBNM0ROTg6Sk5MBAEeOHEFeXh5WrFiBlStXYsaMGbjhhhv88jieD3fbt2/3y/0FqzVr1mDVqlW46aabvIosrb59+6Jfv34A3MXHr7/+iq+++gpfffUVJk6ciAceeMDIJhviu+++w/3334/Dhw/DarViwIABGDFiBFwuF/bu3YuPP/4YH3/8MQYOHIj3338/0M0NCUa99zuq0B8xYgTOPPNMLFy4EFdddRV69erlt/um0MRCi6gFV155ZYOZAyEEvvnmG8yZMwcbNmzANddcg8WLFzcoth5//HHU1NQgLS3NyCY36fXXX4fD4UBqamqgm9Kiv/71r7j11luRkpIS6Kb43apVq+BwOHDbbbfhnnvuCXRzDDdp0iSvmbuamhpMnToVn332Gf7xj3/g/PPPD4qMBou5c+ciMjISkyZNanKfc8891+sDqaIoePrpp/Hyyy/jzTffxDnnnIORI0e2+jGvu+46XHjhhejcubNPbe8oX3/9NW6//Xa4XC6MHz8e9957LxITE732KSwsxEsvvYRPP/00QK0MPaHw3p8yZQq+/vprPPnkk3j++ecD3RwyOZ6jRdQOkiRh1KhReO+999CrVy8UFxc3+o1vWloaevfujejo6AC0sqGePXuid+/esNlsgW5Ki1JSUtC7d2906tQp0E3xu8LCQgBQZ0rDXXR0tPr+cTgc+PbbbwPcotDx/fffY8eOHTj33HPbVPTIsoy7775b/fJo5cqVbXrcLl26oHfv3ujSpUubfs8IR44cwf333w+Xy4WJEydizpw5DYoswD1+P/LII3jhhRcC0MrwEIzv/aysLPTt2xdffPEF9u3bF+jmkMmx0CLyQVxcHGbOnAkAWLduHfLy8rxunzhxIjIzM7F+/Xqv7fX19fjPf/6DcePGYejQocjKysJpp52G8ePH44knnkBpaSkA9yF2mZmZ6rli55xzDjIzM9V/nvtdv349MjMzMXHiRNTU1OCZZ57BBRdcgMGDB3udX3H22WcjMzOz2T8OGzZswE033YSTTz4ZgwcPxhVXXIFly5Y1um9T/fN47rnnkJmZieeee86rDTNmzAAAfPDBB179mThxorrf9OnTkZmZiaVLlzZ638uXL8cNN9yAk08+GVlZWTjrrLMwY8YM/P77743ur+37unXrcNNNN+Gkk07CoEGDMHbs2Cb72BKn04m3334b11xzDYYPH46BAwfivPPOw6OPPoqDBw82+nx4+jRjxoxG++5vBw4cwN///necd955GDhwIIYPH67OwrpcrkZ/RwiB999/H+PGjcPgwYORnZ2NW265BT/99JNX3vwlNTUVCQkJAICSkpJG9/n0009x880345RTTkFWVhZOP/103Hfffdi1a5fXfp7n2UObMW3+G8unVlP9bM37zfPenT59Oqqrq/HUU09h9OjR6nt92rRpDfLhsWbNGtx222049dRTMWDAAJx00kk477zzcN9992Hjxo0tP5ka//3vfwEAY8eObdPvAYDFYlEPJ9Ser6p9L61atQrXX389Tj75ZK+xoKXn9vfff8fDDz+MMWPGYPDgwRg2bBguvPBCPPzww9ixY0eD/cvKyvDss8/isssuw9ChQzF48GBccskl+Pe//42ampo29WvRokUoLy9HYmJiq86zOemkk5q8rS1jSUFBAV555RVcf/31OPPMM5GVlYURI0bg2muvxeLFi6EoSoPf2bdvHzIzM3H22WdDCIF33nkH48aNw5AhQzB8+HDcdNNN2LRpU5Pt27FjB6ZMmYLs7Gz1OXv99dehKEqzfw+cTifee+89TJw4UR1jzz77bMyaNQv79+9v8Tlri5be+5999hn+9re/4eKLL8ZJJ52EgQMHqn9Hfvvttwb7Z2ZmqjNNzz//vNd7f/r06X7p59ixY6EoCt5++20fek7hgIcOEvnojDPOQEJCAkpLS7FmzRpkZWU1u7+iKJg0aRLWrl0Lu92OESNGIC4uDocPH8aePXswf/58XHLJJUhISEDPnj0xduxY/O9//0N1dTXGjBmDmJgY9b705/TU1dVh4sSJ2L17N0aMGIG+ffuqRVtrfP7551i0aBFOOOEE5OTkoKioCD/++COmTZuGbdu2Nfgj1R5jxoxBbm4ufvrpJ/Ts2RPDhw9XbzvhhBNa/H0hBKZPn45ly5bBarVixIgRSExMxJYtW7B06VKsXLkSzz77LM4444xGf3/JkiV48cUX0b9/f5x++ukoKChAbm4upk2bhtLSUtx4442t7kt9fT0mT56MNWvWIDIyEtnZ2bDb7di0aRPefPNNfPLJJ5g/fz4GDBgAAOjXrx/Gjh2LH3/8EXv37vU6/681fW+PX375BbfeeitKS0uRlpaGc889FxUVFdiwYQM2bdqEzz//HC+++CIiIiK8fm/27Nl4++23IcsyRowYgeTkZOzYsQMTJkzw23kUWoqioLq6GgAazC44nU7cd999WLlyJSIiIjBgwACkpqbijz/+wMcff4zPP/8czz33nPqae57nDz74AEDDIkP7HvJFa95vFRUVuOaaa7B//34MHz4cJ554InJzc7Fs2TJs3LgRH374odes7QcffKB+ETFo0CBkZ2ejtrYWBw8exIoVK9C5c+dmP/jr2/fdd9/BZrO1+nf0KisrAaBBPgBgwYIF+O9//6sWvUVFRbBYLC3e58cff4yZM2eivr4eaWlpGDVqFBRFQX5+PhYvXozExERkZGSo++/atQu33HIL9u/fj+TkZAwfPhxWqxWbN2/GM888g88++wxvvvlmq2e/v/jiCwDAhRde2Gi/WqutY8mHH36IZ555Bj169ECvXr0wbNgwHDp0CJs2bcJPP/2E77//Hs8++ywkSWr08WbMmIFPPvkEw4cPx5lnnomtW7fi+++/x8aNG/Hf//4XgwcP9tp/w4YNuPXWW1FbW4uePXvitNNOQ2lpKZ588kn8/PPPTfarsrISf/nLX7BhwwbExMQgKysLnTt3xo4dO7B48WJ8+umnWLBgAfr379/u506rufc+APzf//0fIiIi0Lt3b5xyyilwOp3YuXMnli5dik8//RTz58/HsGHD1P3Hjh2LrVu3Ytu2bV7nHgLw+nvjSz9PO+00AO7DwO+//36/PA8UogQRNeq
|
2025-06-22 23:03:50 +08:00
|
|
|
|
},
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"output_type": "display_data"
|
2025-07-28 14:36:58 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"Statistical summary of Log Returns:\n",
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"count 25051.000000\n",
|
|
|
|
|
|
"mean -0.000012\n",
|
|
|
|
|
|
"std 0.003080\n",
|
|
|
|
|
|
"min -0.059487\n",
|
|
|
|
|
|
"25% -0.001388\n",
|
|
|
|
|
|
"50% 0.000000\n",
|
|
|
|
|
|
"75% 0.001386\n",
|
|
|
|
|
|
"max 0.046782\n",
|
2025-07-28 14:36:58 +08:00
|
|
|
|
"Name: log_return, dtype: float64\n",
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"Kurtosis of Log Returns: 23.4175\n",
|
|
|
|
|
|
"Skewness of Log Returns: -0.4220\n"
|
2025-07-28 14:36:58 +08:00
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"data": {
|
|
|
|
|
|
"text/plain": [
|
|
|
|
|
|
"<Figure size 1500x1000 with 2 Axes>"
|
|
|
|
|
|
],
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABdEAAAPdCAYAAABlRyFLAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs3Xd81PT/B/BXri1llL1kuVAB2eAW9Yug4gAUcLJBQZagIsOBOFCWiCwHQwQRBWSIggriQvYQBNmIlmGBIoVCS1suvz/yy10ul9wludzM6/l48KC93uU+Ge+M92cJoiiKICIiIiIiIiIiIiIiP65oF4CIiIiIiIiIiIiIKFYxiU5EREREREREREREpINJdCIiIiIiIiIiIiIiHUyiExERERERERERERHpYBKdiIiIiIiIiIiIiEgHk+hERERERERERERERDqYRCciIiIiIiIiIiIi0sEkOhERERERERERERGRDibRiYiIiIiIiIiIiIh0JEe7AEREROTvzjvvxJEjR/D222+jTZs20S6OKTVq1PB7LTU1FeXKlUP9+vXRvn17XHfddVEoWeI7duwYPv30U/z22284cuQIcnJyUKZMGdStWxf33Xcf7rvvPgiCEO1iWibHhRlVqlTBqlWrwlQifQsXLsTQoUPx0EMPYeTIkZ7X169fj06dOuGGG27A7NmzI14uo4YMGYJFixb5vJaUlIQSJUqgVq1aaN26NVq3bh2R4+nw4cNo1qxZxPZlpPdRnz598Ouvv+L777/HJZdcovmebdu2YeHChdi0aRMyMjKQm5uL4sWLo3r16rjlllvw4IMPonLlymEvayg6duyIDRs2YNasWbjxxhujXZy4JG/Dvn37ol+/fmH/vkjHnpYuXbpg+/bt+O6771C+fPmolIGIiAhgEp2IiIjCpEmTJp4H3v/++w87duzAsmXLsHz5cgwdOhSdO3e25XvkpP2ePXtsWV68mjNnDkaOHIm8vDyUKlUKjRs3RtGiRfHPP//ghx9+wMqVK/Hxxx9j8uTJqFixYrSLa8k999yD//77z+e18+fP47vvvvP8vWjRoj5/L126dMTKl4guvfRSNG7cGABw4cIF7Nu3D2vWrMGaNWvwww8/YPz48UhKSopyKePXmjVrsHLlSnTr1k0zgZ6Tk4OXX34ZX3/9NQCgfPnyaNy4MdLS0nD69Gls374dGzduxJQpUzB+/Hg0b9480qsAAJg4cSImTZoUseQuOcfzzz+Pdu3aYdy4cXj77bejXRwiInIwJtGJiIgoLHr06OHT2jAnJweDBg3C999/jzFjxqBFixZxm8yNNZ988gneeustuFwuPP/88+jatStSUlI8fz9w4ABeeOEF/PHHH2jfvj0WLlyIEiVKRLHE1gwePNjvtcOHD3uS6IMGDULVqlUjXSxT6tWrh2XLlqFIkSLRLoohjRs39mlJDwCfffYZXnvtNXz//fdYtGgR2rVrF9YyVKxYEcuWLfM5phPF22+/jdTUVPTo0cPvb/n5+ejevTs2b96M8uXL47XXXkOzZs183lNQUIAVK1bg3XffxeHDhyNVbEtGjRqFnJycmG8xT7Glbt26aNq0KRYtWoTOnTujZs2a0S4SERE5FMdEJyIioogoUqQIXn75ZQBScujXX3+NcokSw/79+zFmzBgAwNChQ9GjRw+/ZGP16tXxySef4NJLL0V6ejreeOONaBSVIMVB9erV4zqR+MQTT+CGG24AACxfvjzs35eSkoLq1avj0ksvDft3RdJvv/2GvXv3onnz5po9JqZMmYLNmzejRIkSmDt3rl8CHQCSk5Nx7733YtGiRZ59EqsqV66M6tWrx00FEsWOdu3aQRRFfPLJJ9EuChERORiT6ERERAni33//xRtvvIG7774bdevWRePGjfHYY4/h888/x8WLFzU/I4oiFixYgDZt2qB+/fq48cYb8eSTT2LLli1Yv349atSogY4dO9pWxooVK6JUqVIAgMzMTM33fPvtt+jevTtuuukm1KlTB7fddhsGDhyI/fv3+7xv4sSJPuOv16hRw+ef3CpTft/EiRM1v09vPZWv5+Tk4L333sO9996L+vXr48477wQgjXtdo0YNDBkyBOfPn8c777yDu+66C3Xq1MGtt96KwYMHIyMjQ/N716xZg6effhq33HILateujeuvvx533303Bg4ciI0bNwbfmP9v+vTpyM/PD7qvihcvjkGDBgEAvvnmG6SnpwOQWqnXqFED119/PS5cuKD7+TZt2qBGjRpYuXKlz+sFBQWYP38+OnbsiBtuuAF16tTBnXfeiVdffRXHjh3zW46R7WoX5f45ffo0RowYgebNm6NOnTo+22rNmjV444030Lp1a9x4442oU6cObr/9dgwYMADbt2/XXX5BQQFmzpyJli1bom7durjpppvQr1+/gEML6R1vhw8fRo0aNXDnnXdCFEV88cUXaNOmDRo0aIDGjRujW7du2Lp1q+5y9+7di379+uHGG29E/fr10bJlS8ycORNutxt33nmnT0zYoXbt2gDgN0Z9VlYWJkyYgNatW6Nhw4aeskyZMgU5OTl+y1HG59GjR/Hiiy/ijjvuQO3atTFkyBC/baPFyrkPABYvXoy2bduifv36uOGGG9C9e3ds2rQp4HrbFbcA8OmnnwIAHnroIb+/ZWdnY9asWQCkMdOrVasWcFnFihXDtdde6/f6r7/+ip49e+Lmm29GnTp10KRJEwwYMAB//PGH5nI6duyIGjVqYP369di1axf69u3riYn77rsPM2bMgCiKPp+pUaMGJk2aBACYNGmSz3lY3ofqZSsNGTIENWrUwMKFC5Geno4XXngBt956K+rUqYPmzZvj3XffRV5enl9ZlZ/Toox/LWa3TbA40itPXl4epk2bhjZt2qBhw4ae60Pbtm0xevRonD59WnN5ZoRyLQKAH3/8ER06dEDDhg3RuHFjPPHEE37nei1m4n3GjBmoUaMG7rnnHmRnZ/sta968eahRowbuuOMOnDp1yudvd9xxB0qXLo1vvvnGlu1FRERkBYdzISIiSgDbt2/HU089hdOnT6Ny5cpo3rw5zp49iw0bNmDr1q1YsWIF3n//fRQqVMjnc6+99hrmzp0Ll8uF6667DuXLl8fevXvRoUMH28YsV3K73Th//jwAoGzZsj5/KygowMCBA7F8+XIUKlQItWvXRsWKFXHo0CEsXboUK1aswMSJE3H77bcDAGrVqoWHHnrIM/mhOhGlHhvbqgsXLqBjx444cOAArrvuOtSsWdPvIf7s2bN47LHHcOzYMTRu3BhXX301fv/9dyxevBgbN27EkiVLULx4cc/7Fy1ahKFDhwKQhve48cYbkZubi4yMDCxbtgylS5fG9ddfH7Rsoih6JnszMslj06ZNUaJECZw5cwY//fQTOnbsiOrVq6Nhw4bYunUrVq5cifvvv9/vc3v27MHOnTtRrlw5/O9///O8np2djV69emHDhg0oWrQo6tSpg9KlS2Pv3r34/PPP8e233+Ljjz/WTO4Z2a52+e+//9C2bVucPXsWjRs3Ru3atX1a68sJ/6uvvhqNGjVCcnIyDh48iOXLl2PFihUYN24c7rnnHp9lut1u9O/fHytXrkRKSgpuvPFGlChRAtu2bcPDDz+Mtm3bWi7v0KFD8fXXX6Nx48b43//+h127duG3337Dxo0b8emnn6J+/fo+79+wYQOeeuop5Obm4tJLL8Wtt96K06dPY+zYsdi2bZvlcgQiJ8GU55T9+/fjySefxLFjxzxjdycnJ+OPP/7Ae++9h++//x6zZ8/2iQXZoUOH8NBDDyElJQWNGjWCKIqGxrO3eu578803MXv2bLhcLjRu3BgVKlTAnj170LFjR3To0EHzu+yKW0A6/levXo2UlBTNz6xfvx7Z2dkQBAGtW7c2tEy18ePH4/3334cgCGjYsCEqV66MAwcOYPny5fj+++/x+uuv6w7Fs3r1anz88cee4+nEiRPYvHkzRo0ahWPHjuGll17yvPehhx7Crl27sHv3btSsWRO1atXy/E0eT9+IXbt2YcSIEShZsiSuv/56ZGVlYcuWLfj
|
2025-07-28 14:36:58 +08:00
|
|
|
|
},
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"Analysis focused on price change rate and its dynamics over continuous trading periods.\n",
|
|
|
|
|
|
"Higher volatility periods often provide more opportunities for trend-following strategies.\n"
|
|
|
|
|
|
]
|
2025-06-22 23:03:50 +08:00
|
|
|
|
}
|
|
|
|
|
|
],
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"execution_count": 20
|
2025-07-28 14:36:58 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
"id": "20c278fde79da68a",
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"end_time": "2025-07-28T11:47:22.087217Z",
|
|
|
|
|
|
"start_time": "2025-07-28T11:47:21.570010Z"
|
2025-07-28 14:36:58 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
2025-06-22 23:03:50 +08:00
|
|
|
|
"source": [
|
|
|
|
|
|
"import pandas as pd\n",
|
|
|
|
|
|
"import numpy as np\n",
|
|
|
|
|
|
"import matplotlib.pyplot as plt\n",
|
|
|
|
|
|
"import seaborn as sns\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"def analyze_price_change_autocorrelation(df: pd.DataFrame,\n",
|
|
|
|
|
|
" price_col: str = 'close',\n",
|
|
|
|
|
|
" max_lags: int = 50,\n",
|
|
|
|
|
|
" plot_specific_lag: int = 1):\n",
|
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
|
" 分析时间序列价格变化的自相关性,并绘制图表。\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" Args:\n",
|
|
|
|
|
|
" df (pd.DataFrame): 包含行情数据的DataFrame,必须有日期索引或排好序。\n",
|
|
|
|
|
|
" price_col (str): 用于计算价格变化的列名,默认为 'close'。\n",
|
|
|
|
|
|
" max_lags (int): 要计算的最大滞后期数,默认为 50。\n",
|
|
|
|
|
|
" plot_specific_lag (int): 要单独绘制散点图的特定滞后期,默认为 1。\n",
|
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
|
" # --- 1. 数据准备和计算 ---\n",
|
|
|
|
|
|
" if price_col not in df.columns:\n",
|
|
|
|
|
|
" print(f\"错误: DataFrame中找不到列 '{price_col}'\")\n",
|
|
|
|
|
|
" return\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 创建一个副本以避免修改原始DataFrame\n",
|
|
|
|
|
|
" df_analysis = df.copy()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 计算价格变化百分比\n",
|
2025-07-15 22:45:51 +08:00
|
|
|
|
" df_analysis['pct_change'] = (df_analysis['high'] - df_analysis['low'])\n",
|
2025-06-22 23:03:50 +08:00
|
|
|
|
" # df_analysis['pct_change'] = df[price_col].pct_change().abs()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 移除第一个NaN值\n",
|
|
|
|
|
|
" df_analysis = df_analysis.dropna(subset=['pct_change'])\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" if df_analysis.empty:\n",
|
|
|
|
|
|
" print(\"错误: 计算'pct_change'后DataFrame为空,无法进行分析。\")\n",
|
|
|
|
|
|
" return\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" print(f\"已计算 'pct_change',共 {len(df_analysis)} 条有效数据。\")\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # --- 2. 计算自相关性 ---\n",
|
|
|
|
|
|
" lags = range(1, max_lags + 1)\n",
|
|
|
|
|
|
" try:\n",
|
|
|
|
|
|
" autocorrs = [df_analysis['pct_change'].autocorr(lag=n) for n in lags]\n",
|
|
|
|
|
|
" except Exception as e:\n",
|
|
|
|
|
|
" print(f\"计算自相关性时出错: {e}\")\n",
|
|
|
|
|
|
" return\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" autocorr_df = pd.DataFrame({'Lag': lags, 'Autocorrelation': autocorrs})\n",
|
|
|
|
|
|
" print(\"\\n自相关性计算结果 (前5期):\")\n",
|
|
|
|
|
|
" print(autocorr_df.head())\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # --- 3. 可视化 ---\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # a) 绘制自相关图 (ACF Plot)\n",
|
|
|
|
|
|
" plt.style.use('seaborn-v0_8-whitegrid') # 使用一个好看的样式\n",
|
|
|
|
|
|
" fig, axes = plt.subplots(2, 1, figsize=(14, 12)) # 创建一个包含两个子图的画布\n",
|
|
|
|
|
|
" fig.suptitle('Price Change Autocorrelation Analysis', fontsize=16)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" ax1 = axes[0]\n",
|
|
|
|
|
|
" ax1.stem(autocorr_df['Lag'], autocorr_df['Autocorrelation'])\n",
|
|
|
|
|
|
" ax1.set_title(f'Autocorrelation of Daily Price Changes (Lags 1-{max_lags})')\n",
|
|
|
|
|
|
" ax1.set_xlabel('Lag (Number of Previous K-lines)')\n",
|
|
|
|
|
|
" ax1.set_ylabel('Correlation Coefficient')\n",
|
|
|
|
|
|
" # ax1.axhline(y=0, color='grey', linestyle='--')\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 添加置信区间\n",
|
|
|
|
|
|
" conf_interval = 1.96 / np.sqrt(len(df_analysis))\n",
|
|
|
|
|
|
" # ax1.axhline(y=conf_interval, color='red', linestyle='--', label='95% Confidence Interval')\n",
|
|
|
|
|
|
" # ax1.axhline(y=-conf_interval, color='red', linestyle='--')\n",
|
|
|
|
|
|
" ax1.legend()\n",
|
|
|
|
|
|
" ax1.grid(True)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # b) 绘制特定滞后期的散点图\n",
|
|
|
|
|
|
" ax2 = axes[1]\n",
|
|
|
|
|
|
" if plot_specific_lag is not None and 1 <= plot_specific_lag <= max_lags:\n",
|
|
|
|
|
|
" lag_col_name = f'pct_change_lag{plot_specific_lag}'\n",
|
|
|
|
|
|
" df_analysis[lag_col_name] = df_analysis['pct_change'].shift(plot_specific_lag)\n",
|
|
|
|
|
|
" df_scatter = df_analysis.dropna()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" sns.regplot(x=lag_col_name, y='pct_change', data=df_scatter, ax=ax2,\n",
|
|
|
|
|
|
" scatter_kws={'alpha': 0.5, 's': 20},\n",
|
|
|
|
|
|
" line_kws={'color': 'red', 'linestyle': '--'})\n",
|
|
|
|
|
|
" ax2.set_title(f'Current vs. Lag-{plot_specific_lag} Price Change')\n",
|
|
|
|
|
|
" ax2.set_xlabel(f'Previous K-line\\'s pct_change (t-{plot_specific_lag})')\n",
|
|
|
|
|
|
" ax2.set_ylabel('Current K-line\\'s pct_change (t)')\n",
|
|
|
|
|
|
" ax2.grid(True)\n",
|
|
|
|
|
|
" ax2.axhline(0, color='grey', lw=0.5)\n",
|
|
|
|
|
|
" ax2.axvline(0, color='grey', lw=0.5)\n",
|
|
|
|
|
|
" else:\n",
|
|
|
|
|
|
" ax2.text(0.5, 0.5, 'No specific lag plot requested or lag is out of range.',\n",
|
|
|
|
|
|
" ha='center', va='center', transform=ax2.transAxes)\n",
|
|
|
|
|
|
" ax2.set_axis_off()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" plt.tight_layout() # 调整布局以适应主标题\n",
|
|
|
|
|
|
" plt.show()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"if df_raw is not None and not df_raw.empty:\n",
|
|
|
|
|
|
" processed_data = calculate_stationary_indicators(df_raw, volume_window=10, price_lag=5)\n",
|
|
|
|
|
|
" analyzed_df = analyze_price_change_autocorrelation(processed_data, plot_specific_lag=50)\n",
|
|
|
|
|
|
"else:\n",
|
|
|
|
|
|
" print(\"Analysis cannot proceed. Please check if data loading was successful.\")\n"
|
2025-07-28 14:36:58 +08:00
|
|
|
|
],
|
|
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"Indicators calculated. 25051 rows of data remaining for analysis.\n",
|
|
|
|
|
|
"已计算 'pct_change',共 25051 条有效数据。\n",
|
2025-07-28 14:36:58 +08:00
|
|
|
|
"\n",
|
|
|
|
|
|
"自相关性计算结果 (前5期):\n",
|
|
|
|
|
|
" Lag Autocorrelation\n",
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"0 1 0.591365\n",
|
|
|
|
|
|
"1 2 0.523844\n",
|
|
|
|
|
|
"2 3 0.495323\n",
|
|
|
|
|
|
"3 4 0.497299\n",
|
|
|
|
|
|
"4 5 0.477663\n"
|
2025-07-28 14:36:58 +08:00
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"data": {
|
|
|
|
|
|
"text/plain": [
|
|
|
|
|
|
"<Figure size 1400x1200 with 2 Axes>"
|
|
|
|
|
|
],
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABW0AAAScCAYAAADwLq27AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs3Xd4FFXbx/HfpgGhBBKqNGkJJaEXpUqoUqQJiASkKjwioiLNBi8odhFQQaUJSBEIRRBUeEDAUMWHIog0QaSZEHpI2Oz7B9euLEkgZXczu/v9XJeXZGb23OfM7Elm7z1zjslisVgEAAAAAAAAADAEn+yuAAAAAAAAAADgXyRtAQAAAAAAAMBASNoCAAAAAAAAgIGQtAUAAAAAAAAAAyFpCwAAAAAAAAAGQtIWAAAAAAAAAAyEpC0AAAAAAAAAGAhJWwAAAAAAAAAwEJK2AAAAAAAAAGAgJG0BADCIyMhIhYWF2f0XHh6uRx55RMOGDdOuXbsyVe6oUaMUFhamZcuWObjGjnXx4kVNnz5dvXr1UoMGDRQeHq6aNWuqXbt2evXVVxUTE5PiNdbzhMz5559/FB4errCwMHXu3Dm7q4NUbN++XWFhYerVq5fLYrrL74w7zZo1y/b7YO7cudldHTtTpkxRWFiYpkyZ4lWxAQBA1pC0BQDAYGrWrKlOnTqpU6dOaty4sZKTk/Xdd98pKipKs2bNyu7qOcXy5csVGRmpDz/8UL/++qsefPBBtWzZUg899JBu3bqlb775Rn369NHzzz+f3VX1KMuXL1dSUpIk6cCBAzp06JBT4vTq1UthYWHavn27U8pH+i1btkxhYWEaNWpUdlfFoZYsWWL799KlS7OxJgAAAI7hl90VAAAA9rp27Wo36vHmzZt6/fXXtXz5cr333nt65JFHVKZMmXSX9+KLL2rgwIEqXLiwM6qbZQsWLNDYsWNlMpk0cOBADRo0SHny5LE75siRI5oyZYr+/PPPbKqlZ7Imt4oUKaJz585pyZIlevXVV7O5VshuRv+dcbdff/1VR44cUb58+XTr1i0dPHhQBw4cUJUqVbK7atmuZ8+eatOmjQoUKJDdVQEAABnESFsAAAwuR44cev311xUYGCiz2awffvghQ68vXLiwypUrp7x58zqphpl39OhRvfnmm5JuP5I9fPjwFAlbSSpfvrw+/vhjvfLKK66uosfavXu3jh07pqCgIL311luSpFWrVikxMTGba4bsZuTfGamxjrJt27atWrdubbfN2wUHB6tcuXIKDg7O7qoAAIAMImkLAIAbyJ07t2107V9//WXbfuecrkuXLlX37t1Vq1YthYWF2Y673/yU+/fv18iRIxUZGamIiAjVrVtXjz32mN555x2dPn06xfHnzp3TxIkT9eijj6patWqqUaOGunTponnz5unWrVsZateXX36ppKQkVaxYUU899dR9j69Tp06a+9atW6cePXqoZs2aql69up544glt2rQp1WOPHDmiyZMn64knnlCjRo0UHh6uevXqqU+fPlqzZk2qr7lzbtGkpCR9/vnnatu2rapWrap69eppyJAhOnr0aJr127Vrl/r376/atWvbztny5csl3Xtu3oSEBM2cOVPdunVT7dq1FRERoVatWundd9/VxYsX04x3P9akVvv27dWgQQOVLl1a8fHxaX4p8NdffyksLEyRkZFplmmdl9n63rOesx07dkiSevfubTdn893vyaNHj2r06NFq2rSpwsPDVbduXT311FNpXhOrjL6H9+7dq+eff14NGzZUeHi4Hn74YQ0aNEhbt25Ntfw7+9Dhw4c1bNgwNWzYUJUqVbLNFXrnFBC7du3SoEGD9NBDD6lixYp27XTk9fz55581fvx4dejQQfXq1VN4eLgaN26sYcOGae/evSmOj4yM1OjRoyVJ0dHRdtfizjlz7/c7Y/Xq1XrqqadUt25dhYeHq2nTpho9erSOHz+e6vF3vi+2bdumfv36qU6dOqpatao6depk6weZcf36ddv74/HHH1eXLl0kSd9++61u3ryZ6mvunCLi+vXr+uCDD9SiRQuFh4erQYMGGjlypM6dO5fqa7///nu98sorateunerUqaOIiAjbeT127Fi66z158mSFhYXp9ddfT/OYvXv3KiwsTI0aNbL73frzzz9r0KBBql+/vqpUqaI6deqoZcuWGj58uHbu3GlXxr3mtP3uu+/Up08f1atXT1WqVFG9evXUpk0bvfrqq06bKgUAAKQfSVsAANzE1atXJUkBAQEp9o0fP16vvvqqfH199cgjj6hatWoymUz3LfPLL79U165dtXz5cvn7+6tZs2aqWbOmbt26pZkzZ6aYg3Tnzp1q3769Zs+erZs3b6p+/fqqWbOmTp06pfHjx+uZZ56xzZF6PxaLRRs2bJAkdezYMV31TcvkyZNt8902adJEpUuX1p49e/TMM8+kmoScNWuWPvnkE126dEmhoaFq0aKFypQpo+3bt+uFF17QxIkT04yVlJSkp59+Wp9++qmKFSumRx55RLly5dIPP/ygJ554wi6pbrV69Wr16tVLW7ZsUbFixRQZGalcuXJp9OjRev/999OMde7cOXXt2lXvvPOO/vzzT0VERKhJkyZKSkrSjBkz1KVLl1STkvdz9epVrV27VpLUpUsXmUwm25QcjpwPtGDBgurUqZMKFiwoSWrYsKFtvuZOnTqpVKlStmM3btyoTp06admyZcqRI4datmypSpUqaefOnXrhhRc0ZsyYVGNk9D28ePFide/eXWvXrlWhQoXUqlUrlS5dWv/973/Vr18/TZ06Nc327NmzR126dNHevXtVu3ZtNWnSRLlz57Y7Zu3aterVq5dOnTql+vXrq0GDBrY+6+jr+cYbb2jRokXy8fFRzZo11bRpU+XNm1ffffedevTooXXr1tkd36pVK9WsWVOSVKpUKbtr0ahRo/vGs1gsGjlypF588UXt2rVLlSpVUsuWLRUQEKBly5apU6dO+umnn9J8/dKlS9WnTx/Fx8erUaNGqlSpkn777TeNHDlSs2fPTne777RmzRpdu3bNtnBj7dq19eCDD+ry5cv6/vvv7/naK1eu6IknntDChQtVrlw5NW7cWBaLRcuXL1ePHj105cqVFK8ZNmyYVq9erRw5cuihhx5Sw4YN5ePjo2XLlqlLly765Zdf0lXvHj16yN/fX6tWrdLly5dTPWb+/PmSpO7du8vP7/asdtHR0erXr582btyoEiVKqGXLlqpdu7by5MmjNWvW3LfNVlOnTtWwYcO0c+dOVahQQa1bt1a1atXk6+urJUuWaNu2bekqBwAAOJEFAAAYQtOmTS2hoaGWpUuXpth38OBBS8WKFS2hoaGWJUuW2LaHhoZaQkNDLTVr1rTs2bMn1XJHjhyZark//vijJTQ01BIREWFZvXp1itf98ccfliNHjth+Pn/+vKVu3bqWsLAwy/z58y1ms9m2Ly4uztK7d29LaGioZcqUKelq78mTJ23137lzZ7peczfr62vXrm359ddf7fZNnjzZEhoaamnZsmWK123fvt1y8uTJFNuPHj1qady4sSU0NNTyv//9z27ftm3bbPE6duxoOX/+vG1fQkKCpV+/fpbQ0FDLa6+9Zve6s2fPWqpXr24JDQ21zJkzx27fjh07bPtCQ0Pt9iUnJ1ueeOIJS2hoqGXMmDGWK1eu2PYlJSVZ3n77bUtoaKilV69e9zlLKS1atMgSGhpq6dChg109K1WqZKlYsaLlr7/+SvGaU6dOWUJDQy1NmzZNs1zre/jUqVN226OioiyhoaGWbdu2pfq6CxcuWGrVqmUJDQ21fPrpp5bk5GTbvr1791rq1KljCQ0NtSxatMjudRl9Dx86dMhSuXJlS1hYmCU6Otru2I0bN1qqVKliCQ0NtWzZssVun7UPhYaGWt5//3279/7dbQwNDbXMmzcvxf7MXk/r+y4qKipFmT/88IMlPj4+1e2VK1e21K1b13Ljxg27fUuXLrWEhoZaRo4cmeJ1d7f37t8ZX3/9tSU0NNRSr149y2+//WbXNmt/q127tiU2Ntbuddb3RZUqVSwbNmx
|
2025-07-28 14:36:58 +08:00
|
|
|
|
},
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
2025-07-30 15:11:48 +08:00
|
|
|
|
"execution_count": 21
|
2025-06-22 23:03:50 +08:00
|
|
|
|
}
|
|
|
|
|
|
],
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
"kernelspec": {
|
|
|
|
|
|
"display_name": "quant",
|
|
|
|
|
|
"language": "python",
|
|
|
|
|
|
"name": "python3"
|
|
|
|
|
|
},
|
|
|
|
|
|
"language_info": {
|
|
|
|
|
|
"codemirror_mode": {
|
|
|
|
|
|
"name": "ipython",
|
|
|
|
|
|
"version": 3
|
|
|
|
|
|
},
|
|
|
|
|
|
"file_extension": ".py",
|
|
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
|
|
"name": "python",
|
|
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
|
|
"version": "3.12.11"
|
|
|
|
|
|
}
|
|
|
|
|
|
},
|
|
|
|
|
|
"nbformat": 4,
|
|
|
|
|
|
"nbformat_minor": 5
|
|
|
|
|
|
}
|