242 lines
9.2 KiB
Plaintext
242 lines
9.2 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"id": "initial_id",
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"ExecuteTime": {
|
||
"end_time": "2025-09-04T03:09:51.064325Z",
|
||
"start_time": "2025-09-04T03:09:50.789242Z"
|
||
}
|
||
},
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"import numpy as np\n",
|
||
"import matplotlib.pyplot as plt\n",
|
||
"import seaborn as sns\n",
|
||
"import talib as ta # Make sure TA-Lib is installed: pip install TA-Lib\n",
|
||
"import statsmodels.api as sm\n",
|
||
"\n",
|
||
"import warnings\n",
|
||
"\n",
|
||
"# 忽略所有警告\n",
|
||
"warnings.filterwarnings(\"ignore\")\n",
|
||
"\n",
|
||
"# --- 0. Configure your file path ---\n",
|
||
"# Please replace 'your_futures_data.csv' with the actual path to your CSV file\n",
|
||
"file_path = '/mnt/d/PyProject/NewQuant/data/data/KQ_m@SHFE_rb/KQ_m@SHFE_rb_min15.csv'\n"
|
||
],
|
||
"outputs": [],
|
||
"execution_count": 1
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-09-04T03:09:51.149053Z",
|
||
"start_time": "2025-09-04T03:09:51.081682Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"\n",
|
||
"# --- 1. Data Loading and Preprocessing ---\n",
|
||
"def load_and_preprocess_data(file_path):\n",
|
||
" \"\"\"\n",
|
||
" Loads historical futures data and performs basic preprocessing.\n",
|
||
" Assumes data contains 'datetime', 'open', 'high', 'low', 'close', 'volume' columns.\n",
|
||
" \"\"\"\n",
|
||
" try:\n",
|
||
" df = pd.read_csv(file_path, parse_dates=['datetime'], index_col='datetime')\n",
|
||
" # Ensure data is sorted by time\n",
|
||
" df = df.sort_index()\n",
|
||
" # Check and handle missing values\n",
|
||
" initial_rows = len(df)\n",
|
||
" df.dropna(inplace=True)\n",
|
||
" if len(df) < initial_rows:\n",
|
||
" print(f\"Warning: Missing values found in data, deleted {initial_rows - len(df)} rows.\")\n",
|
||
"\n",
|
||
" # Check if necessary columns exist\n",
|
||
" required_columns = ['open', 'high', 'low', 'close', 'volume']\n",
|
||
" if not all(col in df.columns for col in required_columns):\n",
|
||
" raise ValueError(f\"CSV file is missing required columns. Please ensure it contains: {required_columns}\")\n",
|
||
"\n",
|
||
" print(f\"Successfully loaded {len(df)} rows of data.\")\n",
|
||
" print(\"First 5 rows of data:\")\n",
|
||
" print(df.head())\n",
|
||
" return df\n",
|
||
" except FileNotFoundError:\n",
|
||
" print(f\"Error: File '{file_path}' not found. Please check the path.\")\n",
|
||
" return None\n",
|
||
" except Exception as e:\n",
|
||
" print(f\"Error during data loading or preprocessing: {e}\")\n",
|
||
" return None\n",
|
||
"\n",
|
||
"\n",
|
||
"df_raw = load_and_preprocess_data(file_path)"
|
||
],
|
||
"id": "548c68daa68af8c1",
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Successfully loaded 25470 rows of data.\n",
|
||
"First 5 rows of data:\n",
|
||
" open high low close volume open_oi \\\n",
|
||
"datetime \n",
|
||
"2020-12-31 14:45:00 4352.0 4400.0 4345.0 4388.0 213731.0 1221661.0 \n",
|
||
"2021-01-04 09:00:00 4356.0 4368.0 4309.0 4336.0 338332.0 1217327.0 \n",
|
||
"2021-01-04 09:15:00 4336.0 4342.0 4307.0 4318.0 144479.0 1197881.0 \n",
|
||
"2021-01-04 09:30:00 4318.0 4329.0 4312.0 4317.0 85679.0 1194567.0 \n",
|
||
"2021-01-04 09:45:00 4317.0 4338.0 4316.0 4338.0 66461.0 1194592.0 \n",
|
||
"\n",
|
||
" close_oi underlying_symbol \n",
|
||
"datetime \n",
|
||
"2020-12-31 14:45:00 1217327.0 SHFE.rb2105 \n",
|
||
"2021-01-04 09:00:00 1197881.0 SHFE.rb2105 \n",
|
||
"2021-01-04 09:15:00 1194567.0 SHFE.rb2105 \n",
|
||
"2021-01-04 09:30:00 1194592.0 SHFE.rb2105 \n",
|
||
"2021-01-04 09:45:00 1198035.0 SHFE.rb2105 \n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 2
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-09-04T03:12:29.108246Z",
|
||
"start_time": "2025-09-04T03:12:26.136815Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"import numpy as np\n",
|
||
"import pandas as pd\n",
|
||
"import talib\n",
|
||
"from scipy.stats import jarque_bera, bootstrap\n",
|
||
"from statsmodels.tsa.stattools import adfuller, acf\n",
|
||
"from statsmodels.stats.diagnostic import het_arch\n",
|
||
"\n",
|
||
"\n",
|
||
"def rsi_stats(df_raw: pd.DataFrame,\n",
|
||
" period: int = 14,\n",
|
||
" lookback: int = 252 * 4):\n",
|
||
" \"\"\"\n",
|
||
" 返回 RSI 多维度统计量\n",
|
||
" \"\"\"\n",
|
||
" rsi = talib.RSI(df_raw['close'], timeperiod=period)\n",
|
||
" rsi = rsi[~np.isnan(rsi)]\n",
|
||
" if len(rsi) < 100:\n",
|
||
" raise ValueError('RSI 样本不足')\n",
|
||
"\n",
|
||
" # 基本描述\n",
|
||
" stats = {\n",
|
||
" 'period': period,\n",
|
||
" 'mean': np.mean(rsi),\n",
|
||
" 'std': np.std(rsi, ddof=1),\n",
|
||
" 'skew': pd.Series(rsi).skew(),\n",
|
||
" 'kurt': pd.Series(rsi).kurtosis(),\n",
|
||
" 'q10': np.percentile(rsi, 10),\n",
|
||
" 'q90': np.percentile(rsi, 90),\n",
|
||
" }\n",
|
||
"\n",
|
||
" # 均值回归 / 动量\n",
|
||
" adf_res = adfuller(rsi, regression='c')\n",
|
||
" stats['adf_stat'] = adf_res[0]\n",
|
||
" stats['adf_p'] = adf_res[1]\n",
|
||
" stats['acf_1'] = acf(rsi, nlags=1, fft=False)[1]\n",
|
||
"\n",
|
||
" # 波动聚集(RSI 变化率的 ARCH)\n",
|
||
" delta_rsi = np.diff(rsi)\n",
|
||
" arch_res = het_arch(delta_rsi)\n",
|
||
" arch_lm, arch_p = arch_res[0], arch_res[1]\n",
|
||
"\n",
|
||
" stats['arch_lm'] = arch_lm\n",
|
||
" stats['arch_p'] = arch_p\n",
|
||
"\n",
|
||
" # 重尾指数(Hill)\n",
|
||
" tail = sorted(rsi)[-int(np.ceil(0.05 * len(rsi))):]\n",
|
||
" stats['hill_tail'] = len(tail) / np.sum(np.log(tail) - np.log(min(tail)))\n",
|
||
"\n",
|
||
" # 均值回归区间收益\n",
|
||
" extreme_low = stats['q10']\n",
|
||
" extreme_high = stats['q90']\n",
|
||
" # 事件:RSI 穿越极值后 5-bar 收益\n",
|
||
" events = []\n",
|
||
" for i in range(1, len(rsi)):\n",
|
||
" if rsi[i - 1] < extreme_low and rsi[i] >= extreme_low:\n",
|
||
" events.append(i)\n",
|
||
" elif rsi[i - 1] > extreme_high and rsi[i] <= extreme_high:\n",
|
||
" events.append(-i)\n",
|
||
"\n",
|
||
" if len(events) > 20:\n",
|
||
" rets = []\n",
|
||
" for idx in events:\n",
|
||
" idx_abs = abs(idx)\n",
|
||
" if idx_abs + 5 >= len(rsi):\n",
|
||
" continue\n",
|
||
" direc = 1 if idx > 0 else -1\n",
|
||
" start = df_raw['close'].iloc[idx_abs]\n",
|
||
" end = df_raw['close'].iloc[idx_abs + 5]\n",
|
||
" rets.append((end - start) * direc)\n",
|
||
" rets = np.array(rets)\n",
|
||
" stats['mean_ret_5'] = np.mean(rets)\n",
|
||
" stats['win_rate_5'] = np.mean(rets > 0)\n",
|
||
" stats['events_5'] = len(rets)\n",
|
||
" else:\n",
|
||
" stats.update({'mean_ret_5': np.nan, 'win_rate_5': np.nan, 'events_5': 0})\n",
|
||
"\n",
|
||
" return pd.Series(stats)\n",
|
||
"\n",
|
||
"\n",
|
||
"# ---------- 一键 ----------\n",
|
||
"for p in [6, 14, 21]:\n",
|
||
" try:\n",
|
||
" res = rsi_stats(df_raw, period=p)\n",
|
||
" print(res.to_frame().T.to_string(index=False))\n",
|
||
" except ValueError as e:\n",
|
||
" e.with_traceback().print_exc()"
|
||
],
|
||
"id": "c566a4757b4f6456",
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
" period mean std skew kurt q10 q90 adf_stat adf_p acf_1 arch_lm arch_p hill_tail mean_ret_5 win_rate_5 events_5\n",
|
||
" 6.0 50.076761 18.780546 -0.007016 -0.633046 25.069754 75.082648 -39.303166 0.0 0.848566 360.978412 1.861253e-71 16.95949 -36.215042 0.005014 1795.0\n",
|
||
" period mean std skew kurt q10 q90 adf_stat adf_p acf_1 arch_lm arch_p hill_tail mean_ret_5 win_rate_5 events_5\n",
|
||
" 14.0 50.091278 12.856068 -0.0578 -0.260539 32.889392 66.554807 -27.311287 0.0 0.933965 102.255697 1.925344e-17 16.462544 -16.821584 0.240209 1149.0\n",
|
||
" period mean std skew kurt q10 q90 adf_stat adf_p acf_1 arch_lm arch_p hill_tail mean_ret_5 win_rate_5 events_5\n",
|
||
" 21.0 50.067891 10.675715 -0.080938 -0.156658 35.637464 63.528908 -22.804102 0.0 0.955594 55.768629 2.268265e-08 16.935726 -13.032393 0.30721 957.0\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 7
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 2
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython2",
|
||
"version": "2.7.6"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|