Files
NewQuant/data/ analysis/Rsi.ipynb
2025-09-16 09:59:38 +08:00

242 lines
9.2 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "code",
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2025-09-04T03:09:51.064325Z",
"start_time": "2025-09-04T03:09:50.789242Z"
}
},
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"import talib as ta # Make sure TA-Lib is installed: pip install TA-Lib\n",
"import statsmodels.api as sm\n",
"\n",
"import warnings\n",
"\n",
"# 忽略所有警告\n",
"warnings.filterwarnings(\"ignore\")\n",
"\n",
"# --- 0. Configure your file path ---\n",
"# Please replace 'your_futures_data.csv' with the actual path to your CSV file\n",
"file_path = '/mnt/d/PyProject/NewQuant/data/data/KQ_m@SHFE_rb/KQ_m@SHFE_rb_min15.csv'\n"
],
"outputs": [],
"execution_count": 1
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-09-04T03:09:51.149053Z",
"start_time": "2025-09-04T03:09:51.081682Z"
}
},
"cell_type": "code",
"source": [
"\n",
"# --- 1. Data Loading and Preprocessing ---\n",
"def load_and_preprocess_data(file_path):\n",
" \"\"\"\n",
" Loads historical futures data and performs basic preprocessing.\n",
" Assumes data contains 'datetime', 'open', 'high', 'low', 'close', 'volume' columns.\n",
" \"\"\"\n",
" try:\n",
" df = pd.read_csv(file_path, parse_dates=['datetime'], index_col='datetime')\n",
" # Ensure data is sorted by time\n",
" df = df.sort_index()\n",
" # Check and handle missing values\n",
" initial_rows = len(df)\n",
" df.dropna(inplace=True)\n",
" if len(df) < initial_rows:\n",
" print(f\"Warning: Missing values found in data, deleted {initial_rows - len(df)} rows.\")\n",
"\n",
" # Check if necessary columns exist\n",
" required_columns = ['open', 'high', 'low', 'close', 'volume']\n",
" if not all(col in df.columns for col in required_columns):\n",
" raise ValueError(f\"CSV file is missing required columns. Please ensure it contains: {required_columns}\")\n",
"\n",
" print(f\"Successfully loaded {len(df)} rows of data.\")\n",
" print(\"First 5 rows of data:\")\n",
" print(df.head())\n",
" return df\n",
" except FileNotFoundError:\n",
" print(f\"Error: File '{file_path}' not found. Please check the path.\")\n",
" return None\n",
" except Exception as e:\n",
" print(f\"Error during data loading or preprocessing: {e}\")\n",
" return None\n",
"\n",
"\n",
"df_raw = load_and_preprocess_data(file_path)"
],
"id": "548c68daa68af8c1",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Successfully loaded 25470 rows of data.\n",
"First 5 rows of data:\n",
" open high low close volume open_oi \\\n",
"datetime \n",
"2020-12-31 14:45:00 4352.0 4400.0 4345.0 4388.0 213731.0 1221661.0 \n",
"2021-01-04 09:00:00 4356.0 4368.0 4309.0 4336.0 338332.0 1217327.0 \n",
"2021-01-04 09:15:00 4336.0 4342.0 4307.0 4318.0 144479.0 1197881.0 \n",
"2021-01-04 09:30:00 4318.0 4329.0 4312.0 4317.0 85679.0 1194567.0 \n",
"2021-01-04 09:45:00 4317.0 4338.0 4316.0 4338.0 66461.0 1194592.0 \n",
"\n",
" close_oi underlying_symbol \n",
"datetime \n",
"2020-12-31 14:45:00 1217327.0 SHFE.rb2105 \n",
"2021-01-04 09:00:00 1197881.0 SHFE.rb2105 \n",
"2021-01-04 09:15:00 1194567.0 SHFE.rb2105 \n",
"2021-01-04 09:30:00 1194592.0 SHFE.rb2105 \n",
"2021-01-04 09:45:00 1198035.0 SHFE.rb2105 \n"
]
}
],
"execution_count": 2
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-09-04T03:12:29.108246Z",
"start_time": "2025-09-04T03:12:26.136815Z"
}
},
"cell_type": "code",
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import talib\n",
"from scipy.stats import jarque_bera, bootstrap\n",
"from statsmodels.tsa.stattools import adfuller, acf\n",
"from statsmodels.stats.diagnostic import het_arch\n",
"\n",
"\n",
"def rsi_stats(df_raw: pd.DataFrame,\n",
" period: int = 14,\n",
" lookback: int = 252 * 4):\n",
" \"\"\"\n",
" 返回 RSI 多维度统计量\n",
" \"\"\"\n",
" rsi = talib.RSI(df_raw['close'], timeperiod=period)\n",
" rsi = rsi[~np.isnan(rsi)]\n",
" if len(rsi) < 100:\n",
" raise ValueError('RSI 样本不足')\n",
"\n",
" # 基本描述\n",
" stats = {\n",
" 'period': period,\n",
" 'mean': np.mean(rsi),\n",
" 'std': np.std(rsi, ddof=1),\n",
" 'skew': pd.Series(rsi).skew(),\n",
" 'kurt': pd.Series(rsi).kurtosis(),\n",
" 'q10': np.percentile(rsi, 10),\n",
" 'q90': np.percentile(rsi, 90),\n",
" }\n",
"\n",
" # 均值回归 / 动量\n",
" adf_res = adfuller(rsi, regression='c')\n",
" stats['adf_stat'] = adf_res[0]\n",
" stats['adf_p'] = adf_res[1]\n",
" stats['acf_1'] = acf(rsi, nlags=1, fft=False)[1]\n",
"\n",
" # 波动聚集RSI 变化率的 ARCH\n",
" delta_rsi = np.diff(rsi)\n",
" arch_res = het_arch(delta_rsi)\n",
" arch_lm, arch_p = arch_res[0], arch_res[1]\n",
"\n",
" stats['arch_lm'] = arch_lm\n",
" stats['arch_p'] = arch_p\n",
"\n",
" # 重尾指数Hill\n",
" tail = sorted(rsi)[-int(np.ceil(0.05 * len(rsi))):]\n",
" stats['hill_tail'] = len(tail) / np.sum(np.log(tail) - np.log(min(tail)))\n",
"\n",
" # 均值回归区间收益\n",
" extreme_low = stats['q10']\n",
" extreme_high = stats['q90']\n",
" # 事件RSI 穿越极值后 5-bar 收益\n",
" events = []\n",
" for i in range(1, len(rsi)):\n",
" if rsi[i - 1] < extreme_low and rsi[i] >= extreme_low:\n",
" events.append(i)\n",
" elif rsi[i - 1] > extreme_high and rsi[i] <= extreme_high:\n",
" events.append(-i)\n",
"\n",
" if len(events) > 20:\n",
" rets = []\n",
" for idx in events:\n",
" idx_abs = abs(idx)\n",
" if idx_abs + 5 >= len(rsi):\n",
" continue\n",
" direc = 1 if idx > 0 else -1\n",
" start = df_raw['close'].iloc[idx_abs]\n",
" end = df_raw['close'].iloc[idx_abs + 5]\n",
" rets.append((end - start) * direc)\n",
" rets = np.array(rets)\n",
" stats['mean_ret_5'] = np.mean(rets)\n",
" stats['win_rate_5'] = np.mean(rets > 0)\n",
" stats['events_5'] = len(rets)\n",
" else:\n",
" stats.update({'mean_ret_5': np.nan, 'win_rate_5': np.nan, 'events_5': 0})\n",
"\n",
" return pd.Series(stats)\n",
"\n",
"\n",
"# ---------- 一键 ----------\n",
"for p in [6, 14, 21]:\n",
" try:\n",
" res = rsi_stats(df_raw, period=p)\n",
" print(res.to_frame().T.to_string(index=False))\n",
" except ValueError as e:\n",
" e.with_traceback().print_exc()"
],
"id": "c566a4757b4f6456",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" period mean std skew kurt q10 q90 adf_stat adf_p acf_1 arch_lm arch_p hill_tail mean_ret_5 win_rate_5 events_5\n",
" 6.0 50.076761 18.780546 -0.007016 -0.633046 25.069754 75.082648 -39.303166 0.0 0.848566 360.978412 1.861253e-71 16.95949 -36.215042 0.005014 1795.0\n",
" period mean std skew kurt q10 q90 adf_stat adf_p acf_1 arch_lm arch_p hill_tail mean_ret_5 win_rate_5 events_5\n",
" 14.0 50.091278 12.856068 -0.0578 -0.260539 32.889392 66.554807 -27.311287 0.0 0.933965 102.255697 1.925344e-17 16.462544 -16.821584 0.240209 1149.0\n",
" period mean std skew kurt q10 q90 adf_stat adf_p acf_1 arch_lm arch_p hill_tail mean_ret_5 win_rate_5 events_5\n",
" 21.0 50.067891 10.675715 -0.080938 -0.156658 35.637464 63.528908 -22.804102 0.0 0.955594 55.768629 2.268265e-08 16.935726 -13.032393 0.30721 957.0\n"
]
}
],
"execution_count": 7
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}