Merge branch 'dev'
# Conflicts: # .gitignore # main/train/Classify2.ipynb
This commit is contained in:
5
.gitignore
vendored
5
.gitignore
vendored
@@ -18,4 +18,7 @@ model
|
||||
|
||||
!.gitignore
|
||||
!.git
|
||||
!/.vscode
|
||||
!.env
|
||||
|
||||
**/mlruns/
|
||||
**/mnt/
|
||||
BIN
main/__pycache__/__init__.cpython-312.pyc
Normal file
BIN
main/__pycache__/__init__.cpython-312.pyc
Normal file
Binary file not shown.
5903
main/data/daily_data_none.ipynb
Normal file
5903
main/data/daily_data_none.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
@@ -83,32 +83,32 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ts_code trade_date close open high low \\\n",
|
||||
"0 000905.SH 20251010 7398.2241 7499.3917 7509.1161 7373.9841 \n",
|
||||
"1 000905.SH 20251009 7548.9226 7470.0474 7559.0920 7437.3242 \n",
|
||||
"2 000905.SH 20250930 7412.3684 7372.5240 7428.0307 7372.0634 \n",
|
||||
"3 000905.SH 20250929 7350.5599 7251.5221 7377.2217 7216.7357 \n",
|
||||
"4 000905.SH 20250926 7240.9114 7311.8433 7351.7931 7237.0459 \n",
|
||||
"0 000905.SH 20251121 6817.4103 6955.7485 6986.7784 6817.4103 \n",
|
||||
"1 000905.SH 20251120 7061.9497 7174.1046 7180.7320 7056.9003 \n",
|
||||
"2 000905.SH 20251119 7122.7465 7141.2641 7178.1495 7086.1232 \n",
|
||||
"3 000905.SH 20251118 7151.0176 7215.0302 7230.5416 7118.4085 \n",
|
||||
"4 000905.SH 20251117 7235.3512 7248.9216 7262.3306 7202.5932 \n",
|
||||
"... ... ... ... ... ... ... \n",
|
||||
"13810 399006.SZ 20100607 1069.4680 1005.0280 1075.2250 1001.7020 \n",
|
||||
"13811 399006.SZ 20100604 1027.6810 989.6810 1027.6810 986.5040 \n",
|
||||
"13812 399006.SZ 20100603 998.3940 1002.3550 1026.7020 997.7750 \n",
|
||||
"13813 399006.SZ 20100602 997.1190 967.6090 997.1190 952.6110 \n",
|
||||
"13814 399006.SZ 20100601 973.2330 986.0150 994.7930 948.1180 \n",
|
||||
"13900 399006.SZ 20100607 1069.4680 1005.0280 1075.2250 1001.7020 \n",
|
||||
"13901 399006.SZ 20100604 1027.6810 989.6810 1027.6810 986.5040 \n",
|
||||
"13902 399006.SZ 20100603 998.3940 1002.3550 1026.7020 997.7750 \n",
|
||||
"13903 399006.SZ 20100602 997.1190 967.6090 997.1190 952.6110 \n",
|
||||
"13904 399006.SZ 20100601 973.2330 986.0150 994.7930 948.1180 \n",
|
||||
"\n",
|
||||
" pre_close change pct_chg vol amount \n",
|
||||
"0 7548.9226 -150.6985 -1.9963 2.622566e+08 5.021274e+08 \n",
|
||||
"1 7412.3684 136.5542 1.8422 2.831308e+08 5.357568e+08 \n",
|
||||
"2 7350.5599 61.8085 0.8409 2.207075e+08 4.449564e+08 \n",
|
||||
"3 7240.9114 109.6485 1.5143 2.335394e+08 4.338645e+08 \n",
|
||||
"4 7341.3238 -100.4124 -1.3678 2.114441e+08 4.301976e+08 \n",
|
||||
"0 7061.9497 -244.5394 -3.4628 2.089334e+08 3.109687e+08 \n",
|
||||
"1 7122.7465 -60.7968 -0.8536 1.596187e+08 2.541582e+08 \n",
|
||||
"2 7151.0176 -28.2711 -0.3953 1.627866e+08 2.567551e+08 \n",
|
||||
"3 7235.3512 -84.3336 -1.1656 2.022141e+08 3.065400e+08 \n",
|
||||
"4 7235.4617 -0.1105 -0.0015 2.030506e+08 3.108232e+08 \n",
|
||||
"... ... ... ... ... ... \n",
|
||||
"13810 1027.6810 41.7870 4.0661 2.655275e+06 9.106095e+06 \n",
|
||||
"13811 998.3940 29.2870 2.9334 1.500295e+06 5.269441e+06 \n",
|
||||
"13812 997.1190 1.2750 0.1279 1.616805e+06 6.240835e+06 \n",
|
||||
"13813 973.2330 23.8860 2.4543 1.074628e+06 4.001206e+06 \n",
|
||||
"13814 1000.0000 -26.7670 -2.6767 1.356285e+06 4.924177e+06 \n",
|
||||
"13900 1027.6810 41.7870 4.0661 2.655275e+06 9.106095e+06 \n",
|
||||
"13901 998.3940 29.2870 2.9334 1.500295e+06 5.269441e+06 \n",
|
||||
"13902 997.1190 1.2750 0.1279 1.616805e+06 6.240835e+06 \n",
|
||||
"13903 973.2330 23.8860 2.4543 1.074628e+06 4.001206e+06 \n",
|
||||
"13904 1000.0000 -26.7670 -2.6767 1.356285e+06 4.924177e+06 \n",
|
||||
"\n",
|
||||
"[13815 rows x 11 columns]\n"
|
||||
"[13905 rows x 11 columns]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
179
main/data/qlib.ipynb
Normal file
179
main/data/qlib.ipynb
Normal file
@@ -0,0 +1,179 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "2d9eb12f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from operator import index\n",
|
||||
"\n",
|
||||
"import tushare as ts\n",
|
||||
"import pandas as pd\n",
|
||||
"import time\n",
|
||||
"\n",
|
||||
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
|
||||
"pro = ts.pro_api()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "0c5a87ba",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ts_code trade_date his_low his_high cost_5pct cost_15pct \\\n",
|
||||
"0 000001.SZ 20180104 0.2 12.3 7.2 8.7 \n",
|
||||
"1 000002.SZ 20180104 0.2 25.6 15.0 17.6 \n",
|
||||
"2 000004.SZ 20180104 0.8 53.2 21.6 22.0 \n",
|
||||
"3 000008.SZ 20180104 0.1 13.9 7.2 7.8 \n",
|
||||
"4 000009.SZ 20180104 0.3 15.0 5.8 5.9 \n",
|
||||
"... ... ... ... ... ... ... \n",
|
||||
"3095 603991.SH 20180104 12.0 67.8 26.4 27.0 \n",
|
||||
"3096 603993.SH 20180104 1.4 8.6 5.4 5.6 \n",
|
||||
"3097 603997.SH 20180104 5.4 31.5 9.9 10.2 \n",
|
||||
"3098 603998.SH 20180104 3.8 18.3 9.5 9.8 \n",
|
||||
"3099 603999.SH 20180104 3.6 30.6 6.9 6.9 \n",
|
||||
"\n",
|
||||
" cost_50pct cost_85pct cost_95pct weight_avg winner_rate \n",
|
||||
"0 10.8 11.8 12.1 10.39 44.59 \n",
|
||||
"1 22.2 24.4 24.8 21.31 97.14 \n",
|
||||
"2 23.6 27.6 29.6 24.71 45.41 \n",
|
||||
"3 8.6 9.2 10.5 8.64 47.04 \n",
|
||||
"4 6.6 7.6 7.8 6.74 38.85 \n",
|
||||
"... ... ... ... ... ... \n",
|
||||
"3095 27.6 30.6 34.2 28.54 57.36 \n",
|
||||
"3096 6.1 6.9 7.3 6.15 72.78 \n",
|
||||
"3097 10.5 11.7 11.7 10.84 11.28 \n",
|
||||
"3098 11.5 13.0 15.2 11.72 18.44 \n",
|
||||
"3099 7.8 9.3 9.9 8.00 31.89 \n",
|
||||
"\n",
|
||||
"[3100 rows x 11 columns]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"\n",
|
||||
"df = pro.cyq_perf(trade_date='20180104')\n",
|
||||
"print(df)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "500292d5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import time\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250820')\n",
|
||||
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
|
||||
"trade_dates = trade_cal['cal_date'].tolist()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4ae3cb65",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"✅ 日历文件已保存至: /mnt/d/PyProject/NewStock/data/qlib/calendar/day.txt\n",
|
||||
"📅 共 2097 个交易日\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"calendar_dir = \"/mnt/d/PyProject/NewStock/data/qlib/calendars\"\n",
|
||||
"os.makedirs(calendar_dir, exist_ok=True) # 自动创建目录(包括父目录)\n",
|
||||
"\n",
|
||||
"# 排序为升序(Qlib 要求日历按时间升序)\n",
|
||||
"trade_dates_sorted = sorted(trade_dates)\n",
|
||||
"\n",
|
||||
"# 写入 day.txt\n",
|
||||
"day_txt_path = os.path.join(calendar_dir, \"day.txt\")\n",
|
||||
"with open(day_txt_path, \"w\") as f:\n",
|
||||
" for date_str in trade_dates_sorted:\n",
|
||||
" f.write(date_str + \"\\n\")\n",
|
||||
"\n",
|
||||
"print(f\"✅ 日历文件已保存至: {day_txt_path}\")\n",
|
||||
"print(f\"📅 共 {len(trade_dates_sorted)} 个交易日\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "7a6e529b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"✅ all.txt 已生成,共 5685 只股票\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"stocks_df = pd.read_csv('/mnt/d/PyProject/NewStock/stocks_list.csv', encoding='utf-8-sig')\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"# 假设你有一个包含所有股票代码的列表(来自 stocks_df['ts_code'])\n",
|
||||
"# 例如:\n",
|
||||
"# instrument_list = ['600000.SH', '000001.SZ', '300001.SZ', ...]\n",
|
||||
"\n",
|
||||
"instrument_list = stocks_df['ts_code'].unique().tolist()\n",
|
||||
"\n",
|
||||
"# 获取你的数据时间范围(从 trade_dates)\n",
|
||||
"start_date = min(trade_dates) # e.g., '20201106'\n",
|
||||
"end_date = max(trade_dates) # e.g., '20210125'\n",
|
||||
"\n",
|
||||
"# 创建 instruments 目录\n",
|
||||
"instr_dir = \"/mnt/d/PyProject/NewStock/data/qlib/instruments\"\n",
|
||||
"os.makedirs(instr_dir, exist_ok=True)\n",
|
||||
"\n",
|
||||
"# 写入 all.txt\n",
|
||||
"with open(os.path.join(instr_dir, \"all.txt\"), \"w\") as f:\n",
|
||||
" for inst in instrument_list:\n",
|
||||
" f.write(f\"{inst}\\t{start_date}\\t{end_date}\\n\")\n",
|
||||
"\n",
|
||||
"print(f\"✅ all.txt 已生成,共 {len(instrument_list)} 只股票\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "stock",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
80
main/data/test.ipynb
Normal file
80
main/data/test.ipynb
Normal file
@@ -0,0 +1,80 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "initial_id",
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-10-17T07:14:47.275Z",
|
||||
"start_time": "2025-10-17T07:14:46.966401Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"from operator import index\n",
|
||||
"\n",
|
||||
"import tushare as ts\n",
|
||||
"import pandas as pd\n",
|
||||
"import time\n",
|
||||
"\n",
|
||||
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
|
||||
"pro = ts.pro_api()"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": 1
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-10-17T07:15:47.631705Z",
|
||||
"start_time": "2025-10-17T07:15:47.491485Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"pro = ts.pro_api()\n",
|
||||
"\n",
|
||||
"#获取单个股票数据\n",
|
||||
"df = pro.stk_limit(ts_code='603106.SH', start_date='20240924', end_date='20240928')\n",
|
||||
"\n",
|
||||
"print(df)"
|
||||
],
|
||||
"id": "72dcf1a049d09818",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" trade_date ts_code up_limit down_limit\n",
|
||||
"0 20240927 603106.SH 7.71 6.31\n",
|
||||
"1 20240926 603106.SH 7.01 5.73\n",
|
||||
"2 20240925 603106.SH 6.37 5.21\n",
|
||||
"3 20240924 603106.SH 5.79 4.73\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 6
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 3,
|
||||
"id": "f74ce078-f7e8-4733-a14c-14d8815a3626",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -19,7 +19,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 4,
|
||||
"id": "44dd8d87-e60b-49e5-aed9-efaa7f92d4fe",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -39,15 +39,15 @@
|
||||
"3 000006.SZ 20250312\n",
|
||||
"4 000007.SZ 20250312\n",
|
||||
"... ... ...\n",
|
||||
"27111 920445.BJ 20250922\n",
|
||||
"27112 920489.BJ 20250922\n",
|
||||
"27113 920682.BJ 20250922\n",
|
||||
"27114 920799.BJ 20250922\n",
|
||||
"27115 920819.BJ 20250922\n",
|
||||
"21755 920978.BJ 20251117\n",
|
||||
"21756 920981.BJ 20251117\n",
|
||||
"21757 920982.BJ 20251117\n",
|
||||
"21758 920985.BJ 20251117\n",
|
||||
"21759 920992.BJ 20251117\n",
|
||||
"\n",
|
||||
"[8205543 rows x 2 columns]\n",
|
||||
"20250926\n",
|
||||
"start_date: 20250929\n"
|
||||
"[8385278 rows x 2 columns]\n",
|
||||
"20251120\n",
|
||||
"start_date: 20251121\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -64,7 +64,7 @@
|
||||
" max_date = df['trade_date'].max()\n",
|
||||
"\n",
|
||||
"print(max_date)\n",
|
||||
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20251020')\n",
|
||||
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20251220')\n",
|
||||
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
|
||||
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
|
||||
"start_date = min(trade_dates)\n",
|
||||
@@ -73,7 +73,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 5,
|
||||
"id": "747acc47-0884-4f76-90fb-276f6494e31d",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -86,16 +86,27 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"任务 20251020 完成\n",
|
||||
"任务 20251017 完成\n",
|
||||
"任务 20251016 完成\n",
|
||||
"任务 20251015 完成\n",
|
||||
"任务 20251014 完成\n",
|
||||
"任务 20251013 完成\n",
|
||||
"任务 20251010 完成\n",
|
||||
"任务 20251009 完成\n",
|
||||
"任务 20250930 完成\n",
|
||||
"任务 20250929 完成\n"
|
||||
"任务 20251219 完成\n",
|
||||
"任务 20251218 完成\n",
|
||||
"任务 20251216 完成\n",
|
||||
"任务 20251217 完成\n",
|
||||
"任务 20251215 完成\n",
|
||||
"任务 20251212 完成\n",
|
||||
"任务 20251211 完成\n",
|
||||
"任务 20251210 完成\n",
|
||||
"任务 20251209 完成\n",
|
||||
"任务 20251208 完成\n",
|
||||
"任务 20251205 完成\n",
|
||||
"任务 20251204 完成\n",
|
||||
"任务 20251203 完成\n",
|
||||
"任务 20251202 完成\n",
|
||||
"任务 20251201 完成\n",
|
||||
"任务 20251128 完成\n",
|
||||
"任务 20251127 完成\n",
|
||||
"任务 20251126 完成\n",
|
||||
"任务 20251125 完成\n",
|
||||
"任务 20251124 完成\n",
|
||||
"任务 20251121 完成\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -132,7 +143,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 6,
|
||||
"id": "c6765638-481f-40d8-a259-2e7b25362618",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -177,7 +188,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.2"
|
||||
"version": "3.12.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -38,16 +38,16 @@
|
||||
"2 801003.SI 20250221\n",
|
||||
"3 801005.SI 20250221\n",
|
||||
"4 801010.SI 20250221\n",
|
||||
"... ... ...\n",
|
||||
"2190 859811.SI 20250922\n",
|
||||
"2191 859821.SI 20250922\n",
|
||||
"2192 859822.SI 20250922\n",
|
||||
"2193 859852.SI 20250922\n",
|
||||
"2194 859951.SI 20250922\n",
|
||||
".. ... ...\n",
|
||||
"873 859811.SI 20251120\n",
|
||||
"874 859821.SI 20251120\n",
|
||||
"875 859822.SI 20251120\n",
|
||||
"876 859852.SI 20251120\n",
|
||||
"877 859951.SI 20251120\n",
|
||||
"\n",
|
||||
"[1110243 rows x 2 columns]\n",
|
||||
"20250926\n",
|
||||
"start_date: 20250929\n"
|
||||
"[1123852 rows x 2 columns]\n",
|
||||
"20251120\n",
|
||||
"start_date: 20251121\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -64,7 +64,7 @@
|
||||
" max_date = df['trade_date'].max()\n",
|
||||
"\n",
|
||||
"print(max_date)\n",
|
||||
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20251020')\n",
|
||||
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20251220')\n",
|
||||
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
|
||||
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
|
||||
"start_date = min(trade_dates)\n",
|
||||
@@ -86,16 +86,27 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"任务 20251020 完成\n",
|
||||
"任务 20251017 完成\n",
|
||||
"任务 20251016 完成\n",
|
||||
"任务 20251015 完成\n",
|
||||
"任务 20251014 完成\n",
|
||||
"任务 20251013 完成\n",
|
||||
"任务 20251010 完成\n",
|
||||
"任务 20251009 完成\n",
|
||||
"任务 20250930 完成\n",
|
||||
"任务 20250929 完成\n"
|
||||
"任务 20251218 完成\n",
|
||||
"任务 20251219 完成\n",
|
||||
"任务 20251217 完成\n",
|
||||
"任务 20251216 完成\n",
|
||||
"任务 20251215 完成\n",
|
||||
"任务 20251212 完成\n",
|
||||
"任务 20251211 完成\n",
|
||||
"任务 20251210 完成\n",
|
||||
"任务 20251209 完成\n",
|
||||
"任务 20251208 完成\n",
|
||||
"任务 20251204 完成\n",
|
||||
"任务 20251205 完成\n",
|
||||
"任务 20251202 完成\n",
|
||||
"任务 20251203 完成\n",
|
||||
"任务 20251201 完成\n",
|
||||
"任务 20251128 完成\n",
|
||||
"任务 20251127 完成\n",
|
||||
"任务 20251126 完成\n",
|
||||
"任务 20251125 完成\n",
|
||||
"任务 20251124 完成\n",
|
||||
"任务 20251121 完成\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -94,17 +94,17 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"Index: 9155905 entries, 0 to 27115\n",
|
||||
"Index: 9335158 entries, 0 to 21759\n",
|
||||
"Data columns (total 2 columns):\n",
|
||||
" # Column Dtype \n",
|
||||
"--- ------ ----- \n",
|
||||
" 0 ts_code object\n",
|
||||
" 1 trade_date object\n",
|
||||
"dtypes: object(2)\n",
|
||||
"memory usage: 209.6+ MB\n",
|
||||
"memory usage: 213.7+ MB\n",
|
||||
"None\n",
|
||||
"20250926\n",
|
||||
"20250929\n"
|
||||
"20251120\n",
|
||||
"20251121\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -121,7 +121,7 @@
|
||||
" max_date = df['trade_date'].max()\n",
|
||||
"\n",
|
||||
"print(max_date)\n",
|
||||
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20251020')\n",
|
||||
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20251220')\n",
|
||||
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
|
||||
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
|
||||
"start_date = min(trade_dates)\n",
|
||||
@@ -144,16 +144,27 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"任务 20251017 完成\n",
|
||||
"任务 20251020 完成\n",
|
||||
"任务 20251015 完成\n",
|
||||
"任务 20251016 完成\n",
|
||||
"任务 20251014 完成\n",
|
||||
"任务 20251013 完成\n",
|
||||
"任务 20251010 完成\n",
|
||||
"任务 20251009 完成\n",
|
||||
"任务 20250930 完成\n",
|
||||
"任务 20250929 完成\n"
|
||||
"任务 20251219 完成\n",
|
||||
"任务 20251218 完成\n",
|
||||
"任务 20251217 完成\n",
|
||||
"任务 20251216 完成\n",
|
||||
"任务 20251215 完成\n",
|
||||
"任务 20251212 完成\n",
|
||||
"任务 20251211 完成\n",
|
||||
"任务 20251210 完成\n",
|
||||
"任务 20251209 完成\n",
|
||||
"任务 20251208 完成\n",
|
||||
"任务 20251205 完成\n",
|
||||
"任务 20251204 完成\n",
|
||||
"任务 20251203 完成\n",
|
||||
"任务 20251202 完成\n",
|
||||
"任务 20251201 完成\n",
|
||||
"任务 20251128 完成\n",
|
||||
"任务 20251127 完成\n",
|
||||
"任务 20251126 完成\n",
|
||||
"任务 20251125 完成\n",
|
||||
"任务 20251124 完成\n",
|
||||
"任务 20251121 完成\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -224,58 +235,58 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
|
||||
"0 600642.SH 20251010 8.03 0.4806 1.3835 \n",
|
||||
"1 600295.SH 20251010 10.76 0.8549 3.7056 \n",
|
||||
"2 600444.SH 20251010 19.00 9.6611 17.4605 \n",
|
||||
"3 605100.SH 20251010 28.72 3.4770 7.6902 \n",
|
||||
"4 301399.SZ 20251010 19.53 3.9562 4.6772 \n",
|
||||
"0 000559.SZ 20251121 11.64 4.8762 13.4563 \n",
|
||||
"1 002981.SZ 20251121 27.84 1.5833 4.5574 \n",
|
||||
"2 301053.SZ 20251121 32.50 1.0110 2.9907 \n",
|
||||
"3 603093.SH 20251121 18.29 0.7403 3.2151 \n",
|
||||
"4 600269.SH 20251121 5.25 0.8423 1.8459 \n",
|
||||
"... ... ... ... ... ... \n",
|
||||
"21679 600653.SH 20250929 2.13 2.1746 2.9589 \n",
|
||||
"21680 002344.SZ 20250929 4.49 1.7080 3.6338 \n",
|
||||
"21681 301162.SZ 20250929 60.30 2.8491 3.5744 \n",
|
||||
"21682 920077.BJ 20250929 14.43 1.1113 1.6410 \n",
|
||||
"21683 300283.SZ 20250929 7.04 4.8583 5.7018 \n",
|
||||
"5439 600243.SH 20251121 4.78 1.7524 2.1078 \n",
|
||||
"5440 300759.SZ 20251121 28.39 1.0514 1.6405 \n",
|
||||
"5441 600054.SH 20251121 11.10 1.3130 3.1101 \n",
|
||||
"5442 603579.SH 20251121 23.85 2.2265 4.3412 \n",
|
||||
"5443 002528.SZ 20251121 3.03 1.9087 4.0726 \n",
|
||||
"\n",
|
||||
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
|
||||
"0 1.49 9.9635 10.2617 1.1073 1.3268 1.3600 4.9816 \n",
|
||||
"1 1.56 16.3053 16.4683 1.4839 1.0603 1.1230 7.4349 \n",
|
||||
"2 2.84 69.2746 55.7147 3.8398 3.6313 3.5392 0.5263 \n",
|
||||
"3 0.55 66.7896 123.2961 2.7276 5.3634 6.7180 2.0794 \n",
|
||||
"4 0.94 60.7990 75.8958 2.7675 6.8812 7.1828 1.2177 \n",
|
||||
"0 1.09 40.5790 38.2942 4.1055 2.9989 2.7785 1.2842 \n",
|
||||
"1 1.44 33.9003 28.1141 3.4000 2.2070 1.9328 0.9280 \n",
|
||||
"2 1.24 56.6010 98.7688 4.0251 4.4406 4.0870 0.2389 \n",
|
||||
"3 1.21 24.3641 24.7359 2.5390 1.9536 5.0927 0.3609 \n",
|
||||
"4 1.32 9.5849 6.9841 0.6165 2.0486 2.1055 3.0476 \n",
|
||||
"... ... ... ... ... ... ... ... \n",
|
||||
"21679 0.72 107.4073 227.6354 5.4498 0.9887 0.9724 0.0000 \n",
|
||||
"21680 0.70 64.8238 75.9239 0.6834 5.5516 5.5560 0.9577 \n",
|
||||
"21681 0.96 85.4251 76.2427 5.3380 14.5424 12.3677 0.5586 \n",
|
||||
"21682 0.51 90.3399 82.4861 3.3572 5.2895 4.1636 NaN \n",
|
||||
"21683 0.94 NaN NaN 3.2821 1.1161 0.9970 0.2499 \n",
|
||||
"5439 1.37 NaN NaN 3.3110 8.8659 8.4702 0.0000 \n",
|
||||
"5440 0.86 28.1501 33.3780 3.4547 4.1124 3.7273 0.7056 \n",
|
||||
"5441 1.53 25.7012 28.5474 1.6912 4.1924 3.9403 1.8829 \n",
|
||||
"5442 1.23 25.2677 30.2644 1.7649 3.0372 3.0683 3.8598 \n",
|
||||
"5443 0.61 NaN NaN 35.8962 3.8438 6.1411 0.0000 \n",
|
||||
"\n",
|
||||
" dv_ttm total_share float_share free_share total_mv \\\n",
|
||||
"0 5.6040 489407.9376 489381.3156 170006.8520 3.929946e+06 \n",
|
||||
"1 5.5762 279877.6254 197557.6254 45577.9458 3.011483e+06 \n",
|
||||
"2 0.5789 14642.1932 14642.1932 8101.7360 2.782017e+05 \n",
|
||||
"3 1.0446 17113.2000 16993.2000 7683.2000 4.914911e+05 \n",
|
||||
"4 1.0594 18502.0000 5468.3586 4625.5000 3.613441e+05 \n",
|
||||
"0 1.5410 331535.8444 331454.4214 120110.9588 3.859077e+06 \n",
|
||||
"1 0.9187 13748.6115 11941.3915 4148.6777 3.827613e+05 \n",
|
||||
"2 0.8961 8421.7803 7749.4689 2619.7738 2.737079e+05 \n",
|
||||
"3 0.4117 61006.5893 61006.5893 14046.4993 1.115811e+06 \n",
|
||||
"4 3.2381 233540.7014 233540.7014 106564.7107 1.226089e+06 \n",
|
||||
"... ... ... ... ... ... \n",
|
||||
"21679 NaN 194638.0317 194638.0317 143048.5612 4.145790e+05 \n",
|
||||
"21680 0.8463 128261.6960 128145.0092 60233.0025 5.758950e+05 \n",
|
||||
"21681 0.9704 13258.3724 8522.5548 6793.1764 7.994799e+05 \n",
|
||||
"21682 NaN 58768.1817 31695.6817 21464.7599 8.480249e+05 \n",
|
||||
"21683 NaN 49697.8222 36721.8502 31289.2680 3.498727e+05 \n",
|
||||
"5439 NaN 43885.0000 43885.0000 36485.0000 2.097703e+05 \n",
|
||||
"5440 0.7045 177819.5525 141938.4613 90967.4278 5.048297e+06 \n",
|
||||
"5441 1.5495 72937.9440 51330.0000 21670.4250 8.096112e+05 \n",
|
||||
"5442 1.2636 20335.5564 20335.5564 10429.5044 4.850030e+05 \n",
|
||||
"5443 NaN 119867.5082 105021.9577 49219.1551 3.631985e+05 \n",
|
||||
"\n",
|
||||
" circ_mv is_st \n",
|
||||
"0 3.929732e+06 False \n",
|
||||
"1 2.125720e+06 False \n",
|
||||
"2 2.782017e+05 False \n",
|
||||
"3 4.880447e+05 False \n",
|
||||
"4 1.067970e+05 False \n",
|
||||
"0 3.858129e+06 False \n",
|
||||
"1 3.324483e+05 False \n",
|
||||
"2 2.518577e+05 False \n",
|
||||
"3 1.115811e+06 False \n",
|
||||
"4 1.226089e+06 False \n",
|
||||
"... ... ... \n",
|
||||
"21679 4.145790e+05 False \n",
|
||||
"21680 5.753711e+05 False \n",
|
||||
"21681 5.139101e+05 False \n",
|
||||
"21682 4.573687e+05 False \n",
|
||||
"21683 2.585218e+05 False \n",
|
||||
"5439 2.097703e+05 True \n",
|
||||
"5440 4.029633e+06 False \n",
|
||||
"5441 5.697630e+05 False \n",
|
||||
"5442 4.850030e+05 False \n",
|
||||
"5443 3.182165e+05 True \n",
|
||||
"\n",
|
||||
"[21684 rows x 19 columns]\n"
|
||||
"[5444 rows x 19 columns]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -300,45 +311,58 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
|
||||
"9 300313.SZ 20251010 8.84 3.1146 6.4625 \n",
|
||||
"20 603838.SH 20251010 7.80 0.5503 1.5146 \n",
|
||||
"29 603813.SH 20251010 24.06 1.5835 4.5173 \n",
|
||||
"48 002742.SZ 20251010 4.65 1.0473 1.2924 \n",
|
||||
"69 603559.SH 20251010 8.50 0.2072 0.2945 \n",
|
||||
"55 000909.SZ 20251121 5.63 0.5785 0.9877 \n",
|
||||
"62 002485.SZ 20251121 4.61 0.9593 3.9009 \n",
|
||||
"134 300096.SZ 20251121 7.31 1.6490 1.9675 \n",
|
||||
"154 300343.SZ 20251121 5.48 4.1298 4.7019 \n",
|
||||
"166 600525.SH 20251121 3.53 1.8869 2.7053 \n",
|
||||
"... ... ... ... ... ... \n",
|
||||
"21466 603021.SH 20250929 4.62 1.3860 2.3418 \n",
|
||||
"21552 300020.SZ 20250929 3.58 1.5031 1.6828 \n",
|
||||
"21554 000506.SZ 20250929 10.88 10.5560 15.7565 \n",
|
||||
"21603 600636.SH 20250929 8.29 0.4693 0.7963 \n",
|
||||
"21661 603843.SH 20250929 5.17 0.3798 0.5364 \n",
|
||||
"5340 300368.SZ 20251121 14.86 7.3423 10.4878 \n",
|
||||
"5381 300020.SZ 20251121 3.63 1.9995 2.2386 \n",
|
||||
"5383 000506.SZ 20251121 11.55 2.5685 3.8339 \n",
|
||||
"5439 600243.SH 20251121 4.78 1.7524 2.1078 \n",
|
||||
"5443 002528.SZ 20251121 3.03 1.9087 4.0726 \n",
|
||||
"\n",
|
||||
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio dv_ttm \\\n",
|
||||
"9 1.30 NaN NaN NaN 20.1067 20.9731 0.0000 NaN \n",
|
||||
"20 0.57 NaN NaN 2.6121 8.7517 6.9304 0.0000 NaN \n",
|
||||
"29 1.88 NaN NaN 4.5222 8.4776 7.5124 1.0313 NaN \n",
|
||||
"48 1.28 NaN NaN NaN 1.6800 2.1226 0.0000 NaN \n",
|
||||
"69 0.60 NaN NaN 3.5043 9.5964 8.2315 0.0000 NaN \n",
|
||||
"... ... .. ... ... ... ... ... ... \n",
|
||||
"21466 0.80 NaN NaN NaN 3.5891 3.7851 0.0000 NaN \n",
|
||||
"21552 1.00 NaN NaN 0.9812 5.1924 18.4036 0.0000 NaN \n",
|
||||
"21554 3.17 NaN NaN 16.4257 30.3341 23.4860 0.0000 NaN \n",
|
||||
"21603 0.81 NaN NaN 1.7909 12.8512 11.0116 0.4825 0.6031 \n",
|
||||
"21661 0.05 NaN NaN 12.5612 2.6558 3.1369 0.0000 NaN \n",
|
||||
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
|
||||
"55 0.99 NaN NaN 2.4818 7.6504 7.4923 0.0 \n",
|
||||
"62 0.51 NaN NaN 2.1295 3.0458 3.2777 0.0 \n",
|
||||
"134 0.81 NaN 50.1694 8.9654 5.6290 6.2215 0.0 \n",
|
||||
"154 0.72 267.9489 106.2988 3.0411 6.7430 6.5207 0.0 \n",
|
||||
"166 0.72 NaN NaN 1.2373 0.5912 0.5968 0.0 \n",
|
||||
"... ... ... ... ... ... ... ... \n",
|
||||
"5340 0.94 NaN NaN 42.1875 42.9123 57.8502 0.0 \n",
|
||||
"5381 1.00 NaN NaN 1.0776 5.2649 21.5375 0.0 \n",
|
||||
"5383 0.78 NaN 239.4225 16.7572 32.2021 20.7023 0.0 \n",
|
||||
"5439 1.37 NaN NaN 3.3110 8.8659 8.4702 0.0 \n",
|
||||
"5443 0.61 NaN NaN 35.8962 3.8438 6.1411 0.0 \n",
|
||||
"\n",
|
||||
" total_share float_share free_share total_mv circ_mv is_st \n",
|
||||
"9 31297.7396 19735.2789 9511.5479 2.766720e+05 1.744599e+05 True \n",
|
||||
"20 32001.6000 32001.6000 11627.0468 2.496125e+05 2.496125e+05 True \n",
|
||||
"29 10501.5000 10501.5000 3681.2000 2.526661e+05 2.526661e+05 True \n",
|
||||
"48 43200.0000 43185.8082 34994.8239 2.008800e+05 2.008140e+05 True \n",
|
||||
"69 40127.6979 40127.6979 28231.9697 3.410854e+05 3.410854e+05 True \n",
|
||||
"... ... ... ... ... ... ... \n",
|
||||
"21466 31994.8070 31994.8070 18936.7934 1.478160e+05 1.478160e+05 True \n",
|
||||
"21552 79467.7974 76663.9584 68475.6577 2.844947e+05 2.744570e+05 True \n",
|
||||
"21554 92901.7761 92858.4361 62210.1427 1.010771e+06 1.010300e+06 True \n",
|
||||
"21603 43863.6802 43863.6802 25849.6552 3.636299e+05 3.636299e+05 True \n",
|
||||
"21661 69962.3237 69962.3237 49541.4702 3.617052e+05 3.617052e+05 True \n",
|
||||
" dv_ttm total_share float_share free_share total_mv \\\n",
|
||||
"55 NaN 43771.4245 43771.0570 25634.2299 2.464331e+05 \n",
|
||||
"62 NaN 54400.0000 54400.0000 13377.7333 2.507840e+05 \n",
|
||||
"134 NaN 43000.0000 43000.0000 36039.3251 3.143300e+05 \n",
|
||||
"154 NaN 106896.9119 106621.9389 93649.7579 5.857951e+05 \n",
|
||||
"166 NaN 131878.0152 131878.0152 91981.1744 4.655294e+05 \n",
|
||||
"... ... ... ... ... ... \n",
|
||||
"5340 NaN 52894.3475 52894.3475 37030.2475 7.860100e+05 \n",
|
||||
"5381 NaN 79467.7974 76663.9584 68475.6577 2.884681e+05 \n",
|
||||
"5383 NaN 92901.7761 92858.4361 62210.1427 1.073016e+06 \n",
|
||||
"5439 NaN 43885.0000 43885.0000 36485.0000 2.097703e+05 \n",
|
||||
"5443 NaN 119867.5082 105021.9577 49219.1551 3.631985e+05 \n",
|
||||
"\n",
|
||||
"[749 rows x 19 columns]\n"
|
||||
" circ_mv is_st \n",
|
||||
"55 2.464311e+05 True \n",
|
||||
"62 2.507840e+05 True \n",
|
||||
"134 3.143300e+05 True \n",
|
||||
"154 5.842882e+05 True \n",
|
||||
"166 4.655294e+05 True \n",
|
||||
"... ... ... \n",
|
||||
"5340 7.860100e+05 True \n",
|
||||
"5381 2.782902e+05 True \n",
|
||||
"5383 1.072515e+06 True \n",
|
||||
"5439 2.097703e+05 True \n",
|
||||
"5443 3.182165e+05 True \n",
|
||||
"\n",
|
||||
"[186 rows x 19 columns]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -388,7 +412,7 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"Index: 9177589 entries, 0 to 21683\n",
|
||||
"Index: 9340602 entries, 0 to 5443\n",
|
||||
"Data columns (total 3 columns):\n",
|
||||
" # Column Dtype \n",
|
||||
"--- ------ ----- \n",
|
||||
@@ -396,7 +420,7 @@
|
||||
" 1 trade_date object\n",
|
||||
" 2 is_st bool \n",
|
||||
"dtypes: bool(1), object(2)\n",
|
||||
"memory usage: 218.8+ MB\n",
|
||||
"memory usage: 222.7+ MB\n",
|
||||
"None\n"
|
||||
]
|
||||
}
|
||||
@@ -424,7 +448,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.2"
|
||||
"version": "3.12.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 1,
|
||||
"id": "17cc645336d4eb18",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -18,7 +18,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 2,
|
||||
"id": "48ae71ed02d61819",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -26,14 +26,27 @@
|
||||
"start_time": "2025-02-08T16:55:19.882313Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "FileNotFoundError",
|
||||
"evalue": "File ../../../data/daily_basic.h5 does not exist",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
||||
"\u001b[31mFileNotFoundError\u001b[39m Traceback (most recent call last)",
|
||||
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[2]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m daily_basic = \u001b[43mpd\u001b[49m\u001b[43m.\u001b[49m\u001b[43mread_hdf\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m../../../data/daily_basic.h5\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mdaily_basic\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n",
|
||||
"\u001b[36mFile \u001b[39m\u001b[32m~/miniconda3/envs/stock/lib/python3.12/site-packages/pandas/io/pytables.py:437\u001b[39m, in \u001b[36mread_hdf\u001b[39m\u001b[34m(path_or_buf, key, mode, errors, where, start, stop, columns, iterator, chunksize, **kwargs)\u001b[39m\n\u001b[32m 434\u001b[39m exists = \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[32m 436\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m exists:\n\u001b[32m--> \u001b[39m\u001b[32m437\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mFile \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpath_or_buf\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m does not exist\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 439\u001b[39m store = HDFStore(path_or_buf, mode=mode, errors=errors, **kwargs)\n\u001b[32m 440\u001b[39m \u001b[38;5;66;03m# can't auto open/close if we are using an iterator\u001b[39;00m\n\u001b[32m 441\u001b[39m \u001b[38;5;66;03m# so delegate to the iterator\u001b[39;00m\n",
|
||||
"\u001b[31mFileNotFoundError\u001b[39m: File ../../../data/daily_basic.h5 does not exist"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"daily_basic = pd.read_hdf('../../../data/daily_basic.h5', key='daily_basic')\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": null,
|
||||
"id": "e6606a96e5728b8",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -93,7 +106,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": null,
|
||||
"id": "41bc125d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -163,7 +176,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": null,
|
||||
"id": "initial_id",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -209,7 +222,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "new_trader",
|
||||
"display_name": "stock",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -223,7 +236,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.11"
|
||||
"version": "3.13.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -34,17 +34,17 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"Index: 8964780 entries, 0 to 25739\n",
|
||||
"Index: 9134824 entries, 0 to 20632\n",
|
||||
"Data columns (total 2 columns):\n",
|
||||
" # Column Dtype \n",
|
||||
"--- ------ ----- \n",
|
||||
" 0 ts_code object\n",
|
||||
" 1 trade_date object\n",
|
||||
"dtypes: object(2)\n",
|
||||
"memory usage: 205.2+ MB\n",
|
||||
"memory usage: 209.1+ MB\n",
|
||||
"None\n",
|
||||
"20250926\n",
|
||||
"start_date: 20250929\n"
|
||||
"20251120\n",
|
||||
"start_date: 20251121\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -61,7 +61,7 @@
|
||||
" max_date = df['trade_date'].max()\n",
|
||||
"\n",
|
||||
"print(max_date)\n",
|
||||
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20251020')\n",
|
||||
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20251220')\n",
|
||||
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
|
||||
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
|
||||
"start_date = min(trade_dates)\n",
|
||||
@@ -84,16 +84,27 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"任务 20251020 完成\n",
|
||||
"任务 20251017 完成\n",
|
||||
"任务 20251016 完成\n",
|
||||
"任务 20251015 完成\n",
|
||||
"任务 20251014 完成\n",
|
||||
"任务 20251013 完成\n",
|
||||
"任务 20251009 完成\n",
|
||||
"任务 20251010 完成\n",
|
||||
"任务 20250929 完成\n",
|
||||
"任务 20250930 完成\n"
|
||||
"任务 20251218 完成\n",
|
||||
"任务 20251219 完成\n",
|
||||
"任务 20251217 完成\n",
|
||||
"任务 20251216 完成\n",
|
||||
"任务 20251215 完成\n",
|
||||
"任务 20251212 完成\n",
|
||||
"任务 20251211 完成\n",
|
||||
"任务 20251210 完成\n",
|
||||
"任务 20251209 完成\n",
|
||||
"任务 20251208 完成\n",
|
||||
"任务 20251205 完成\n",
|
||||
"任务 20251204 完成\n",
|
||||
"任务 20251203 完成\n",
|
||||
"任务 20251202 完成\n",
|
||||
"任务 20251201 完成\n",
|
||||
"任务 20251128 完成\n",
|
||||
"任务 20251127 完成\n",
|
||||
"任务 20251126 完成\n",
|
||||
"任务 20251125 完成\n",
|
||||
"任务 20251124 完成\n",
|
||||
"任务 20251121 完成\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -183,71 +194,58 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ts_code trade_date buy_sm_vol buy_sm_amount sell_sm_vol \\\n",
|
||||
"0 603290.SH 20251009 45532 52028.67 42778 \n",
|
||||
"1 600936.SH 20251009 42537 1545.21 42382 \n",
|
||||
"2 300429.SZ 20251009 81914 11768.07 64063 \n",
|
||||
"3 300879.SZ 20251009 15330 5366.90 11651 \n",
|
||||
"4 300031.SZ 20251009 51381 12650.70 43869 \n",
|
||||
"0 002593.SZ 20251121 369428 21109.32 239444 \n",
|
||||
"1 300405.SZ 20251121 173424 11775.01 115988 \n",
|
||||
"2 001336.SZ 20251121 11378 2729.92 10423 \n",
|
||||
"3 002403.SZ 20251121 24219 3104.96 19841 \n",
|
||||
"4 688268.SH 20251121 12369 7423.62 12330 \n",
|
||||
"... ... ... ... ... ... \n",
|
||||
"20574 688083.SH 20250930 13247 10094.95 11236 \n",
|
||||
"20575 002939.SZ 20250930 372609 43083.12 232240 \n",
|
||||
"20576 688303.SH 20250930 62478 18094.19 55086 \n",
|
||||
"20577 300146.SZ 20250930 50078 5792.85 35214 \n",
|
||||
"20578 688351.SH 20250930 15096 3333.84 14017 \n",
|
||||
"5156 000881.SZ 20251121 146959 11936.56 155068 \n",
|
||||
"5157 300676.SZ 20251121 21428 9913.61 15092 \n",
|
||||
"5158 603138.SH 20251121 31243 4558.85 30559 \n",
|
||||
"5159 301526.SZ 20251121 172815 9552.38 105860 \n",
|
||||
"5160 300903.SZ 20251121 124772 20586.88 96098 \n",
|
||||
"\n",
|
||||
" sell_sm_amount buy_md_vol buy_md_amount sell_md_vol sell_md_amount \\\n",
|
||||
"0 48942.98 53824 61495.85 54076 61851.39 \n",
|
||||
"1 1538.97 24175 878.06 31948 1160.07 \n",
|
||||
"2 9211.49 88583 12730.36 88244 12682.05 \n",
|
||||
"3 4089.33 15591 5464.12 17057 5976.94 \n",
|
||||
"4 10822.65 56173 13836.60 49423 12190.63 \n",
|
||||
"0 13673.67 256325 14655.03 298786 17088.39 \n",
|
||||
"1 7859.14 154296 10473.88 176589 11973.97 \n",
|
||||
"2 2498.94 5274 1266.93 5893 1415.57 \n",
|
||||
"3 2546.44 17292 2218.64 18180 2333.03 \n",
|
||||
"4 7430.97 16104 9682.18 16670 10042.76 \n",
|
||||
"... ... ... ... ... ... \n",
|
||||
"20574 8561.02 10482 7994.12 9858 7514.37 \n",
|
||||
"20575 26867.01 279904 32371.96 324997 37595.57 \n",
|
||||
"20576 15952.67 55867 16177.83 53776 15573.61 \n",
|
||||
"20577 4076.10 46159 5337.00 39420 4560.91 \n",
|
||||
"20578 3095.89 6482 1430.69 6675 1474.59 \n",
|
||||
"5156 12623.78 107103 8717.66 97089 7896.18 \n",
|
||||
"5157 6975.73 17857 8249.34 16607 7679.15 \n",
|
||||
"5158 4458.47 15126 2208.57 11879 1733.73 \n",
|
||||
"5159 5855.69 155749 8607.76 160962 8892.48 \n",
|
||||
"5160 15867.99 92082 15223.39 105748 17449.56 \n",
|
||||
"\n",
|
||||
" buy_lg_vol buy_lg_amount sell_lg_vol sell_lg_amount buy_elg_vol \\\n",
|
||||
"0 36150 41253.53 36789 41932.43 10514 \n",
|
||||
"1 11158 405.04 9212 334.60 5672 \n",
|
||||
"2 64282 9239.06 72904 10475.38 8221 \n",
|
||||
"3 10167 3562.24 12327 4313.59 3221 \n",
|
||||
"4 40306 9938.01 41035 10103.23 6112 \n",
|
||||
"0 125303 7153.65 190306 10868.03 13733 \n",
|
||||
"1 68396 4621.42 100633 6820.12 12166 \n",
|
||||
"2 326 77.32 662 159.66 0 \n",
|
||||
"3 7131 916.27 8891 1137.58 0 \n",
|
||||
"4 9155 5523.81 9780 5877.77 2793 \n",
|
||||
"... ... ... ... ... ... \n",
|
||||
"20574 6674 5082.80 8224 6273.43 3329 \n",
|
||||
"20575 204229 23631.31 285167 32986.98 132696 \n",
|
||||
"20576 33304 9638.04 34809 10074.64 5032 \n",
|
||||
"20577 47161 5454.07 36321 4202.88 8662 \n",
|
||||
"20578 2513 555.48 3398 749.54 0 \n",
|
||||
"5156 63727 5186.84 54928 4460.74 8415 \n",
|
||||
"5157 12528 5781.44 16425 7596.83 3906 \n",
|
||||
"5158 5884 857.88 8048 1175.32 0 \n",
|
||||
"5159 63089 3481.66 115498 6376.52 13568 \n",
|
||||
"5160 58186 9624.92 77536 12811.46 25445 \n",
|
||||
"\n",
|
||||
" buy_elg_amount sell_elg_vol sell_elg_amount net_mf_vol \\\n",
|
||||
"0 12073.88 12377 14125.13 20027 \n",
|
||||
"1 205.33 0 0.00 -21182 \n",
|
||||
"2 1183.11 17790 2551.67 -840 \n",
|
||||
"3 1133.90 3275 1147.29 -4996 \n",
|
||||
"4 1507.28 19645 4816.08 1531 \n",
|
||||
"... ... ... ... ... \n",
|
||||
"20574 2538.01 4413 3361.05 7612 \n",
|
||||
"20575 15366.29 147033 17003.12 84949 \n",
|
||||
"20576 1459.24 13010 3768.39 15188 \n",
|
||||
"20577 1000.95 41105 4744.98 -16754 \n",
|
||||
"20578 0.00 0 0.00 3406 \n",
|
||||
" buy_elg_amount sell_elg_vol sell_elg_amount net_mf_vol net_mf_amount \n",
|
||||
"0 781.20 36253 2069.12 -103672 -5866.51 \n",
|
||||
"1 813.01 15071 1030.08 -34131 -2297.62 \n",
|
||||
"2 0.00 0 0.00 -1180 -271.00 \n",
|
||||
"3 0.00 1730 222.81 194 30.22 \n",
|
||||
"4 1708.30 1640 986.41 476 282.30 \n",
|
||||
"... ... ... ... ... ... \n",
|
||||
"5156 686.43 19119 1546.77 -50922 -4113.23 \n",
|
||||
"5157 1805.21 7595 3497.90 -4085 -1873.36 \n",
|
||||
"5158 0.00 1768 257.78 713 110.42 \n",
|
||||
"5159 744.87 22900 1261.99 -64224 -3539.76 \n",
|
||||
"5160 4179.40 21103 3485.60 -29335 -4855.38 \n",
|
||||
"\n",
|
||||
" net_mf_amount \n",
|
||||
"0 22734.35 \n",
|
||||
"1 -766.75 \n",
|
||||
"2 -90.83 \n",
|
||||
"3 -1741.72 \n",
|
||||
"4 385.00 \n",
|
||||
"... ... \n",
|
||||
"20574 5816.07 \n",
|
||||
"20575 9927.60 \n",
|
||||
"20576 4417.72 \n",
|
||||
"20577 -1928.39 \n",
|
||||
"20578 752.20 \n",
|
||||
"\n",
|
||||
"[20579 rows x 20 columns]\n"
|
||||
"[5161 rows x 20 columns]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -272,7 +270,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.2"
|
||||
"version": "3.12.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -34,23 +34,23 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ts_code trade_date\n",
|
||||
"4872 600206.SH 20250926\n",
|
||||
"4873 600207.SH 20250926\n",
|
||||
"4874 600208.SH 20250926\n",
|
||||
"4876 600211.SH 20250926\n",
|
||||
"7280 920037.BJ 20250926\n",
|
||||
"4915 600221.SH 20251120\n",
|
||||
"4916 600222.SH 20251120\n",
|
||||
"4917 600223.SH 20251120\n",
|
||||
"4919 600227.SH 20251120\n",
|
||||
"3693 301448.SZ 20251120\n",
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"Index: 11170571 entries, 0 to 36462\n",
|
||||
"Index: 11412627 entries, 0 to 29456\n",
|
||||
"Data columns (total 2 columns):\n",
|
||||
" # Column Dtype \n",
|
||||
"--- ------ ----- \n",
|
||||
" 0 ts_code object\n",
|
||||
" 1 trade_date object\n",
|
||||
"dtypes: object(2)\n",
|
||||
"memory usage: 255.7+ MB\n",
|
||||
"memory usage: 261.2+ MB\n",
|
||||
"None\n",
|
||||
"20250926\n",
|
||||
"20250929\n"
|
||||
"20251120\n",
|
||||
"20251121\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -68,7 +68,7 @@
|
||||
" max_date = df['trade_date'].max()\n",
|
||||
"\n",
|
||||
"print(max_date)\n",
|
||||
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20251020')\n",
|
||||
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20251220')\n",
|
||||
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
|
||||
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
|
||||
"start_date = min(trade_dates)\n",
|
||||
@@ -91,16 +91,27 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"任务 20251020 完成\n",
|
||||
"任务 20251017 完成\n",
|
||||
"任务 20251015 完成\n",
|
||||
"任务 20251016 完成\n",
|
||||
"任务 20251013 完成\n",
|
||||
"任务 20251014 完成\n",
|
||||
"任务 20251010 完成\n",
|
||||
"任务 20251009 完成\n",
|
||||
"任务 20250929 完成\n",
|
||||
"任务 20250930 完成\n"
|
||||
"任务 20251219 完成\n",
|
||||
"任务 20251218 完成\n",
|
||||
"任务 20251217 完成\n",
|
||||
"任务 20251216 完成\n",
|
||||
"任务 20251215 完成\n",
|
||||
"任务 20251212 完成\n",
|
||||
"任务 20251211 完成\n",
|
||||
"任务 20251210 完成\n",
|
||||
"任务 20251209 完成\n",
|
||||
"任务 20251208 完成\n",
|
||||
"任务 20251205 完成\n",
|
||||
"任务 20251204 完成\n",
|
||||
"任务 20251203 完成\n",
|
||||
"任务 20251202 完成\n",
|
||||
"任务 20251201 完成\n",
|
||||
"任务 20251128 完成\n",
|
||||
"任务 20251127 完成\n",
|
||||
"任务 20251126 完成\n",
|
||||
"任务 20251125 完成\n",
|
||||
"任务 20251124 完成\n",
|
||||
"任务 20251121 完成\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -152,58 +163,19 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[ trade_date ts_code up_limit down_limit\n",
|
||||
"0 20251010 000001.SZ 12.54 10.26\n",
|
||||
"1 20251010 000002.SZ 7.47 6.11\n",
|
||||
"2 20251010 000004.SZ 12.26 11.10\n",
|
||||
"3 20251010 000006.SZ 11.94 9.77\n",
|
||||
"4 20251010 000007.SZ 8.12 6.64\n",
|
||||
"0 20251121 000001.SZ 13.04 10.67\n",
|
||||
"1 20251121 000002.SZ 6.82 5.58\n",
|
||||
"2 20251121 000004.SZ 11.64 10.54\n",
|
||||
"3 20251121 000006.SZ 12.07 9.87\n",
|
||||
"4 20251121 000007.SZ 11.00 9.00\n",
|
||||
"... ... ... ... ...\n",
|
||||
"7309 20251010 920978.BJ 50.08 26.98\n",
|
||||
"7310 20251010 920981.BJ 48.04 25.88\n",
|
||||
"7311 20251010 920982.BJ 354.64 190.96\n",
|
||||
"7312 20251010 920985.BJ 11.86 6.40\n",
|
||||
"7313 20251010 920992.BJ 27.87 15.01\n",
|
||||
"7363 20251121 920978.BJ 49.06 26.42\n",
|
||||
"7364 20251121 920981.BJ 46.99 25.31\n",
|
||||
"7365 20251121 920982.BJ 300.67 161.91\n",
|
||||
"7366 20251121 920985.BJ 11.75 6.33\n",
|
||||
"7367 20251121 920992.BJ 24.06 12.96\n",
|
||||
"\n",
|
||||
"[7314 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
|
||||
"0 20251009 000001.SZ 12.47 10.21\n",
|
||||
"1 20251009 000002.SZ 7.58 6.20\n",
|
||||
"2 20251009 000004.SZ 11.68 10.56\n",
|
||||
"3 20251009 000006.SZ 11.32 9.26\n",
|
||||
"4 20251009 000007.SZ 8.02 6.56\n",
|
||||
"... ... ... ... ...\n",
|
||||
"7306 20251009 920978.BJ 50.44 27.16\n",
|
||||
"7307 20251009 920981.BJ 48.11 25.91\n",
|
||||
"7308 20251009 920982.BJ 366.06 197.12\n",
|
||||
"7309 20251009 920985.BJ 12.01 6.47\n",
|
||||
"7310 20251009 920992.BJ 27.39 14.75\n",
|
||||
"\n",
|
||||
"[7311 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
|
||||
"0 20250929 000001.SZ 12.54 10.26\n",
|
||||
"1 20250929 000002.SZ 7.48 6.12\n",
|
||||
"2 20250929 000004.SZ 11.00 9.96\n",
|
||||
"3 20250929 000006.SZ 10.46 8.56\n",
|
||||
"4 20250929 000007.SZ 7.63 6.25\n",
|
||||
"... ... ... ... ...\n",
|
||||
"7302 20250929 920445.BJ 14.37 7.75\n",
|
||||
"7303 20250929 920489.BJ 29.34 15.80\n",
|
||||
"7304 20250929 920682.BJ 13.10 7.06\n",
|
||||
"7305 20250929 920799.BJ 70.78 38.12\n",
|
||||
"7306 20250929 920819.BJ 5.52 2.98\n",
|
||||
"\n",
|
||||
"[7307 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
|
||||
"0 20250930 000001.SZ 12.51 10.23\n",
|
||||
"1 20250930 000002.SZ 7.49 6.13\n",
|
||||
"2 20250930 000004.SZ 11.12 10.06\n",
|
||||
"3 20250930 000006.SZ 10.29 8.42\n",
|
||||
"4 20250930 000007.SZ 7.92 6.48\n",
|
||||
"... ... ... ... ...\n",
|
||||
"7305 20250930 920445.BJ 14.67 7.91\n",
|
||||
"7306 20250930 920489.BJ 29.26 15.76\n",
|
||||
"7307 20250930 920682.BJ 12.92 6.96\n",
|
||||
"7308 20250930 920799.BJ 73.19 39.41\n",
|
||||
"7309 20250930 920819.BJ 5.55 2.99\n",
|
||||
"\n",
|
||||
"[7310 rows x 4 columns]]\n"
|
||||
"[7368 rows x 4 columns]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -271,7 +243,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.2"
|
||||
"version": "3.12.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -0,0 +1,165 @@
|
||||
"""
|
||||
因子模块初始化文件
|
||||
"""
|
||||
|
||||
# from .operator_framework import (
|
||||
# StockWiseFactor,
|
||||
# DateWiseFactor
|
||||
# )
|
||||
|
||||
# 导入所有因子类
|
||||
from .technical_factors import (
|
||||
SMAFactor,
|
||||
EMAFactor,
|
||||
ATRFactor,
|
||||
OBVFactor,
|
||||
MACDFactor,
|
||||
RSI_Factor,
|
||||
CrossSectionalRankFactor
|
||||
)
|
||||
|
||||
from .money_flow_factors import (
|
||||
LGFlowFactor,
|
||||
FlowIntensityFactor,
|
||||
FlowDivergenceFactor,
|
||||
FlowStructureFactor,
|
||||
FlowAccelerationFactor,
|
||||
CostSqueeze,
|
||||
HighCostSelling,
|
||||
LowCostAccumulation,
|
||||
InstNetAccum,
|
||||
ChipLockin,
|
||||
RetailOutInstIn,
|
||||
AccumAccel
|
||||
)
|
||||
|
||||
from .chip_factors import (
|
||||
ChipConcentrationFactor,
|
||||
ChipSkewnessFactor,
|
||||
FloatingChipFactor,
|
||||
CostSupportFactor,
|
||||
WinnerPriceZoneFactor
|
||||
)
|
||||
|
||||
from .sentiment_factors import (
|
||||
SentimentPanicGreedFactor,
|
||||
SentimentBreadthFactor,
|
||||
SentimentReversalFactor,
|
||||
PriceDeductionFactor,
|
||||
PriceDeductionRatioFactor,
|
||||
IndustryMomentumLeadership,
|
||||
LeadershipPersistenceScore,
|
||||
DynamicIndustryLeadership
|
||||
)
|
||||
|
||||
from .industry_factors import (
|
||||
IndustryMomentumFactor,
|
||||
MarketBreadthFactor,
|
||||
SectorRotationFactor
|
||||
)
|
||||
|
||||
from .financial_factors import (
|
||||
CashflowToEVFactor,
|
||||
BookToPriceFactor,
|
||||
DebtToEquityFactor,
|
||||
ProfitMarginFactor,
|
||||
BMFactor
|
||||
)
|
||||
|
||||
from .special_factors import (
|
||||
LimitFactor,
|
||||
VolumeRatioFactor,
|
||||
BBI_RATIO_FACTOR,
|
||||
VolatilitySlopeFactor,
|
||||
PriceVolumeTrendFactor
|
||||
)
|
||||
from .momentum_factors import (
|
||||
ReturnFactor,
|
||||
VolatilityFactor,
|
||||
MomentumFactor,
|
||||
MomentumAcceleration,
|
||||
TrendEfficiency
|
||||
)
|
||||
|
||||
|
||||
# 导入统一因子计算模块
|
||||
from .all_factors import calculate_all_factors, compute_factors
|
||||
|
||||
# 导入算子框架
|
||||
from .operator_framework import StockWiseFactor, DateWiseFactor, FactorGraph
|
||||
|
||||
# 定义所有因子类的列表,便于统一管理
|
||||
ALL_STOCK_FACTORS = [
|
||||
SMAFactor,
|
||||
EMAFactor,
|
||||
ATRFactor,
|
||||
OBVFactor,
|
||||
MACDFactor,
|
||||
RSI_Factor,
|
||||
LGFlowFactor,
|
||||
FlowIntensityFactor,
|
||||
FlowDivergenceFactor,
|
||||
FlowStructureFactor,
|
||||
FlowAccelerationFactor,
|
||||
ChipConcentrationFactor,
|
||||
ChipSkewnessFactor,
|
||||
FloatingChipFactor,
|
||||
CostSupportFactor,
|
||||
WinnerPriceZoneFactor,
|
||||
SentimentPanicGreedFactor,
|
||||
SentimentBreadthFactor,
|
||||
SentimentReversalFactor,
|
||||
PriceDeductionFactor,
|
||||
PriceDeductionRatioFactor,
|
||||
CashflowToEVFactor,
|
||||
BookToPriceFactor,
|
||||
DebtToEquityFactor,
|
||||
ProfitMarginFactor,
|
||||
LimitFactor,
|
||||
VolumeRatioFactor,
|
||||
BBI_RATIO_FACTOR,
|
||||
VolatilitySlopeFactor,
|
||||
PriceVolumeTrendFactor
|
||||
]
|
||||
|
||||
ALL_DATE_FACTORS = [
|
||||
CrossSectionalRankFactor,
|
||||
IndustryMomentumFactor,
|
||||
MarketBreadthFactor,
|
||||
SectorRotationFactor
|
||||
]
|
||||
|
||||
__all__ = [
|
||||
# 技术指标因子
|
||||
'SMAFactor', 'EMAFactor', 'ATRFactor', 'OBVFactor', 'MACDFactor', 'RSI_Factor',
|
||||
|
||||
# 资金流因子
|
||||
'LGFlowFactor', 'FlowIntensityFactor', 'FlowDivergenceFactor',
|
||||
'FlowStructureFactor', 'FlowAccelerationFactor',
|
||||
|
||||
# 筹码分布因子
|
||||
'ChipConcentrationFactor', 'ChipSkewnessFactor', 'FloatingChipFactor',
|
||||
'CostSupportFactor', 'WinnerPriceZoneFactor',
|
||||
|
||||
# 市场情绪因子
|
||||
'SentimentPanicGreedFactor', 'SentimentBreadthFactor', 'SentimentReversalFactor',
|
||||
'PriceDeductionFactor', 'PriceDeductionRatioFactor',
|
||||
|
||||
# 行业/横截面因子
|
||||
'CrossSectionalRankFactor', 'IndustryMomentumFactor', 'MarketBreadthFactor',
|
||||
'SectorRotationFactor',
|
||||
|
||||
# 财务因子
|
||||
'CashflowToEVFactor', 'BookToPriceFactor', 'ROEFactor', 'DebtToEquityFactor',
|
||||
'ProfitMarginFactor',
|
||||
|
||||
# 特殊因子
|
||||
'LimitFactor', 'VolumeRatioFactor', 'BBI_RATIO_FACTOR',
|
||||
'VolatilitySlopeFactor', 'PriceVolumeTrendFactor',
|
||||
|
||||
# 统一因子计算
|
||||
'calculate_all_factors', 'compute_factors', 'get_available_stock_factors', 'get_available_date_factors',
|
||||
|
||||
# # 算子框架
|
||||
# 'StockWiseFactor', 'DateWiseFactor'
|
||||
]
|
||||
Binary file not shown.
Binary file not shown.
256
main/factor/all_factors.py
Normal file
256
main/factor/all_factors.py
Normal file
@@ -0,0 +1,256 @@
|
||||
"""
|
||||
统一因子计算模块
|
||||
提供统一接口来计算所有类型的因子
|
||||
"""
|
||||
|
||||
import polars as pl
|
||||
from typing import List, Dict, Any
|
||||
from main.factor.operator_framework import FactorGraph
|
||||
from main.factor import (
|
||||
# 技术指标因子
|
||||
SMAFactor,
|
||||
EMAFactor,
|
||||
ATRFactor,
|
||||
OBVFactor,
|
||||
MACDFactor,
|
||||
RSI_Factor,
|
||||
CrossSectionalRankFactor,
|
||||
# 动量因子
|
||||
ReturnFactor,
|
||||
VolatilityFactor,
|
||||
MomentumFactor,
|
||||
MomentumAcceleration,
|
||||
TrendEfficiency,
|
||||
# 资金流因子
|
||||
LGFlowFactor,
|
||||
FlowIntensityFactor,
|
||||
FlowDivergenceFactor,
|
||||
FlowStructureFactor,
|
||||
FlowAccelerationFactor,
|
||||
# 筹码分布因子
|
||||
ChipConcentrationFactor,
|
||||
ChipSkewnessFactor,
|
||||
FloatingChipFactor,
|
||||
CostSupportFactor,
|
||||
WinnerPriceZoneFactor,
|
||||
CostSqueeze,
|
||||
HighCostSelling,
|
||||
LowCostAccumulation,
|
||||
InstNetAccum,
|
||||
ChipLockin,
|
||||
RetailOutInstIn,
|
||||
AccumAccel,
|
||||
# 市场情绪因子
|
||||
SentimentPanicGreedFactor,
|
||||
SentimentBreadthFactor,
|
||||
SentimentReversalFactor,
|
||||
PriceDeductionFactor,
|
||||
PriceDeductionRatioFactor,
|
||||
IndustryMomentumLeadership,
|
||||
LeadershipPersistenceScore,
|
||||
DynamicIndustryLeadership,
|
||||
# 行业/横截面因子
|
||||
IndustryMomentumFactor,
|
||||
MarketBreadthFactor,
|
||||
SectorRotationFactor,
|
||||
# 财务因子
|
||||
CashflowToEVFactor,
|
||||
BookToPriceFactor,
|
||||
DebtToEquityFactor,
|
||||
ProfitMarginFactor,
|
||||
BMFactor,
|
||||
# 特殊因子
|
||||
LimitFactor,
|
||||
VolumeRatioFactor,
|
||||
BBI_RATIO_FACTOR,
|
||||
VolatilitySlopeFactor,
|
||||
PriceVolumeTrendFactor,
|
||||
)
|
||||
|
||||
|
||||
def calculate_all_factors(
|
||||
df: pl.DataFrame,
|
||||
stock_factor_configs: List[Dict[str, Any]] = None,
|
||||
date_factor_configs: List[Dict[str, Any]] = None,
|
||||
) -> pl.DataFrame:
|
||||
"""
|
||||
统一计算所有因子的函数
|
||||
|
||||
Parameters:
|
||||
df (pl.DataFrame): 输入的股票数据表
|
||||
stock_factor_configs (List[Dict]): 股票截面因子配置列表
|
||||
date_factor_configs (List[Dict]): 日期截面因子配置列表
|
||||
|
||||
Returns:
|
||||
pl.DataFrame: 包含所有计算因子的DataFrame
|
||||
"""
|
||||
# 初始化因子图
|
||||
factor_graph = FactorGraph()
|
||||
|
||||
# 如果没有提供配置,则使用默认配置
|
||||
if stock_factor_configs is None:
|
||||
stock_factor_configs = [
|
||||
{"class": SMAFactor, "params": {"window": 5}},
|
||||
{"class": SMAFactor, "params": {"window": 20}},
|
||||
{"class": EMAFactor, "params": {"window": 12}},
|
||||
{"class": EMAFactor, "params": {"window": 26}},
|
||||
{"class": ATRFactor, "params": {"window": 14}},
|
||||
{"class": OBVFactor, "params": {}},
|
||||
{
|
||||
"class": MACDFactor,
|
||||
"params": {"fast_period": 12, "slow_period": 26, "signal_period": 9},
|
||||
},
|
||||
{"class": RSI_Factor, "params": {"window": 14}},
|
||||
# 资金流因子
|
||||
{"class": LGFlowFactor, "params": {}},
|
||||
{"class": FlowIntensityFactor, "params": {}},
|
||||
{"class": FlowDivergenceFactor, "params": {}},
|
||||
{"class": FlowStructureFactor, "params": {}},
|
||||
{"class": FlowAccelerationFactor, "params": {}},
|
||||
{"class": InstNetAccum, "params": {}},
|
||||
{"class": ChipLockin, "params": {}},
|
||||
{"class": RetailOutInstIn, "params": {}},
|
||||
{"class": AccumAccel, "params": {}},
|
||||
# 筹码分布因子
|
||||
{"class": ChipConcentrationFactor, "params": {}},
|
||||
{"class": ChipSkewnessFactor, "params": {}},
|
||||
{"class": FloatingChipFactor, "params": {}},
|
||||
{"class": CostSupportFactor, "params": {}},
|
||||
{"class": WinnerPriceZoneFactor, "params": {}},
|
||||
{"class": LowCostAccumulation, "params": {}},
|
||||
{"class": HighCostSelling, "params": {}},
|
||||
{"class": CostSqueeze, "params": {}},
|
||||
# 市场情绪因子
|
||||
{
|
||||
"class": SentimentPanicGreedFactor,
|
||||
"params": {"window_atr": 14, "window_smooth": 5},
|
||||
},
|
||||
{
|
||||
"class": SentimentBreadthFactor,
|
||||
"params": {"window_vol": 20, "window_smooth": 3},
|
||||
},
|
||||
{
|
||||
"class": SentimentReversalFactor,
|
||||
"params": {"window_ret": 5, "window_vol": 5},
|
||||
},
|
||||
{"class": PriceDeductionFactor, "params": {"n": 10}},
|
||||
{"class": PriceDeductionRatioFactor, "params": {"n": 10}},
|
||||
{"class": IndustryMomentumLeadership, "params": {}},
|
||||
{"class": LeadershipPersistenceScore, "params": {}},
|
||||
# {"class": DynamicIndustryLeadership, "params": {}},
|
||||
|
||||
# 财务因子
|
||||
# {"class": CashflowToEVFactor, "params": {}},
|
||||
# {"class": BookToPriceFactor, "params": {}},
|
||||
# {"class": ROEFactor, "params": {}},
|
||||
# {"class": DebtToEquityFactor, "params": {}},
|
||||
# {"class": ProfitMarginFactor, "params": {}},
|
||||
{"class": BMFactor, "params": {}},
|
||||
# 特殊因子
|
||||
{"class": LimitFactor, "params": {}},
|
||||
{"class": VolumeRatioFactor, "params": {}},
|
||||
{"class": BBI_RATIO_FACTOR, "params": {}},
|
||||
{
|
||||
"class": VolatilitySlopeFactor,
|
||||
"params": {"window_vol": 20, "window_slope": 5},
|
||||
},
|
||||
{"class": PriceVolumeTrendFactor, "params": {}},
|
||||
# 动量因子 - 添加20日收益率因子
|
||||
{"class": ReturnFactor, "params": {"period": 20}},
|
||||
{"class": ReturnFactor, "params": {"period": 5}},
|
||||
{"class": VolatilityFactor, "params": {"period": 10}},
|
||||
{
|
||||
"class": MomentumAcceleration,
|
||||
"params": {"short_period": 5, "long_period": 60},
|
||||
},
|
||||
{"class": TrendEfficiency, "params": {"period": 10}},
|
||||
{
|
||||
"class": CrossSectionalRankFactor,
|
||||
"params": {"column": "circ_mv", "name": "size_rank"},
|
||||
},
|
||||
]
|
||||
|
||||
if date_factor_configs is None:
|
||||
date_factor_configs = [
|
||||
{"class": CrossSectionalRankFactor, "params": {"column": "return_5"}},
|
||||
{"class": CrossSectionalRankFactor, "params": {"column": "return_5"}},
|
||||
{
|
||||
"class": CrossSectionalRankFactor,
|
||||
"params": {"column": "return_20"},
|
||||
},
|
||||
{
|
||||
"class": CrossSectionalRankFactor,
|
||||
"params": {"column": "volatility_10"},
|
||||
},
|
||||
{
|
||||
"class": CrossSectionalRankFactor,
|
||||
"params": {"column": "circ_mv"},
|
||||
},
|
||||
# {
|
||||
# "class": CrossSectionalRankFactor,
|
||||
# "params": {"factor_name": "momentum_10"},
|
||||
# },
|
||||
]
|
||||
|
||||
# 添加股票截面因子
|
||||
stock_factors = []
|
||||
for config in stock_factor_configs:
|
||||
factor_class = config["class"]
|
||||
params = config["params"]
|
||||
try:
|
||||
factor = factor_class(**params)
|
||||
factor_graph.add_factor(factor)
|
||||
stock_factors.append(factor)
|
||||
except Exception as e:
|
||||
print(f"创建股票因子 {factor_class.__name__} 时出错: {e}")
|
||||
|
||||
# 添加日期截面因子
|
||||
date_factors = []
|
||||
for config in date_factor_configs:
|
||||
factor_class = config["class"]
|
||||
params = config["params"]
|
||||
try:
|
||||
factor = factor_class(**params)
|
||||
factor_graph.add_factor(factor)
|
||||
date_factors.append(factor)
|
||||
except Exception as e:
|
||||
print(f"创建日期因子 {factor_class.__name__} 时出错: {e}")
|
||||
|
||||
# 先计算股票截面因子
|
||||
result_df = df.clone()
|
||||
|
||||
# 获取所有需要的因子ID
|
||||
stock_factor_ids = [factor.get_factor_id() for factor in stock_factors]
|
||||
date_factor_ids = [factor.get_factor_id() for factor in date_factors]
|
||||
|
||||
# 计算股票因子
|
||||
if stock_factor_ids:
|
||||
result_df = factor_graph.compute(result_df, stock_factor_ids)
|
||||
|
||||
# 计算日期因子
|
||||
if date_factor_ids:
|
||||
result_df = factor_graph.compute(result_df, date_factor_ids)
|
||||
|
||||
all_ids = []
|
||||
|
||||
for ids in stock_factor_ids:
|
||||
all_ids.append(ids)
|
||||
|
||||
for ids in date_factor_ids:
|
||||
all_ids.append(ids)
|
||||
|
||||
return result_df, all_ids
|
||||
|
||||
|
||||
# 为了兼容旧的函数调用方式,提供一个简化的统一接口
|
||||
def compute_factors(df: pl.DataFrame):
|
||||
"""
|
||||
简化版因子计算接口
|
||||
|
||||
Parameters:
|
||||
df (pl.DataFrame): 输入的股票数据表
|
||||
|
||||
Returns:
|
||||
pl.DataFrame: 包含所有计算因子的DataFrame
|
||||
"""
|
||||
return calculate_all_factors(df)
|
||||
123
main/factor/chip_factors.py
Normal file
123
main/factor/chip_factors.py
Normal file
@@ -0,0 +1,123 @@
|
||||
"""
|
||||
筹码分布因子模块
|
||||
包含基于股票截面的筹码分布因子实现
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import polars as pl
|
||||
from main.factor.operator_framework import StockWiseFactor
|
||||
|
||||
|
||||
class ChipConcentrationFactor(StockWiseFactor):
|
||||
"""筹码集中度因子"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
name="chip_concentration",
|
||||
parameters={},
|
||||
required_factor_ids=["cost_95pct", "cost_5pct", "close"]
|
||||
)
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
# 计算筹码集中度
|
||||
cost_95 = group_df["cost_95pct"]
|
||||
cost_5 = group_df["cost_5pct"]
|
||||
close = group_df["close"]
|
||||
|
||||
chip_concentration = (cost_95 - cost_5) / (close + 1e-8) # 避免除零
|
||||
return chip_concentration.alias(self.factor_id)
|
||||
|
||||
|
||||
class ChipSkewnessFactor(StockWiseFactor):
|
||||
"""筹码分布偏度因子"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
name="chip_skewness",
|
||||
parameters={},
|
||||
required_factor_ids=["weight_avg", "cost_50pct", "cost_50pct"]
|
||||
)
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
# 计算筹码分布偏度
|
||||
weight_avg = group_df["weight_avg"]
|
||||
cost_50 = group_df["cost_50pct"]
|
||||
|
||||
chip_skewness = (weight_avg - cost_50) / (cost_50 + 1e-8) # 避免除零
|
||||
return chip_skewness.alias(self.factor_id)
|
||||
|
||||
|
||||
class FloatingChipFactor(StockWiseFactor):
|
||||
"""浮筹比例因子"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
name="floating_chip",
|
||||
parameters={},
|
||||
required_factor_ids=["winner_rate", "cost_15pct", "close"]
|
||||
)
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
# 计算浮筹比例
|
||||
winner_rate = group_df["winner_rate"]
|
||||
cost_15 = group_df["cost_15pct"]
|
||||
close = group_df["close"]
|
||||
|
||||
price_dist_cost15 = (close - cost_15) / (close + 1e-8) # 避免除零
|
||||
floating_chip = winner_rate * pl.Series(np.maximum(0, price_dist_cost15))
|
||||
|
||||
return floating_chip.alias(self.factor_id)
|
||||
|
||||
|
||||
class CostSupportFactor(StockWiseFactor):
|
||||
"""成本支撑强度因子"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
name="cost_support",
|
||||
parameters={},
|
||||
required_factor_ids=["cost_15pct", "close"]
|
||||
)
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
# 计算成本支撑强度
|
||||
cost_15 = group_df["cost_15pct"]
|
||||
close = group_df["close"]
|
||||
|
||||
# 成本支撑强度变化
|
||||
cost_support_change = (cost_15.diff() / (cost_15 + 1e-8) * 100).alias(self.factor_id)
|
||||
return cost_support_change
|
||||
|
||||
|
||||
class WinnerPriceZoneFactor(StockWiseFactor):
|
||||
"""获利盘价格区域因子"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
name="winner_price_zone",
|
||||
parameters={},
|
||||
required_factor_ids=["close", "cost_85pct", "cost_15pct", "cost_50pct", "winner_rate"]
|
||||
)
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
close = group_df["close"]
|
||||
cost_85 = group_df["cost_85pct"]
|
||||
cost_15 = group_df["cost_15pct"]
|
||||
cost_50 = group_df["cost_50pct"]
|
||||
winner_rate = group_df["winner_rate"]
|
||||
|
||||
# 使用 Polars 的 when/then/otherwise 链
|
||||
winner_zone = (
|
||||
pl.when((close > cost_85) & (winner_rate > 0.8))
|
||||
.then(1)
|
||||
.when((close < cost_15) & (winner_rate < 0.2))
|
||||
.then(2)
|
||||
.when((close > cost_50) & (winner_rate > 0.5))
|
||||
.then(3)
|
||||
.when((close < cost_50) & (winner_rate < 0.5))
|
||||
.then(4)
|
||||
.otherwise(0)
|
||||
.alias(self.factor_id)
|
||||
)
|
||||
|
||||
return winner_zone
|
||||
File diff suppressed because it is too large
Load Diff
116
main/factor/financial_factors.py
Normal file
116
main/factor/financial_factors.py
Normal file
@@ -0,0 +1,116 @@
|
||||
"""
|
||||
财务因子模块
|
||||
包含基于股票截面的财务因子实现
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import polars as pl
|
||||
from main.factor.operator_framework import DateWiseFactor, StockWiseFactor
|
||||
|
||||
|
||||
class CashflowToEVFactor(StockWiseFactor):
|
||||
"""现金流-to-企业价值因子"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
name="cashflow_to_ev",
|
||||
parameters={},
|
||||
required_factor_ids=["n_cashflow_act", "total_liab", "money_cap", "total_mv"]
|
||||
)
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
# 计算企业价值
|
||||
total_mv = group_df["total_mv"]
|
||||
total_liab = group_df["total_liab"]
|
||||
money_cap = group_df["money_cap"]
|
||||
|
||||
# 企业价值 = 市值 + 负债合计 - 货币资金
|
||||
enterprise_value = total_mv + total_liab - money_cap
|
||||
|
||||
# 计算现金流-to-EV比率
|
||||
n_cashflow_act = group_df["n_cashflow_act"]
|
||||
cashflow_ev_ratio = n_cashflow_act / (enterprise_value + 1e-8) # 避免除零
|
||||
|
||||
return cashflow_ev_ratio.alias(self.factor_id)
|
||||
|
||||
|
||||
class BookToPriceFactor(StockWiseFactor):
|
||||
"""账面价值-to-价格因子"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
name="book_to_price",
|
||||
parameters={},
|
||||
required_factor_ids=["bps", "close"]
|
||||
)
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
# 计算账面价值-to-价格比率
|
||||
bps = group_df["bps"]
|
||||
close = group_df["close"]
|
||||
|
||||
book_to_price = bps / (close + 1e-8) # 避免除零
|
||||
|
||||
return book_to_price.alias(self.factor_id)
|
||||
|
||||
|
||||
class DebtToEquityFactor(StockWiseFactor):
|
||||
"""资产负债率因子"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
name="debt_to_equity",
|
||||
parameters={},
|
||||
required_factor_ids=["total_liab", "equity"]
|
||||
)
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
# 计算资产负债率
|
||||
total_liab = group_df["total_liab"]
|
||||
equity = group_df["equity"]
|
||||
|
||||
debt_to_equity = total_liab / (equity + 1e-8) # 避免除零
|
||||
|
||||
return debt_to_equity.alias(self.factor_id)
|
||||
|
||||
|
||||
class ProfitMarginFactor(StockWiseFactor):
|
||||
"""净利润率因子"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
name="profit_margin",
|
||||
parameters={},
|
||||
required_factor_ids=["net_profit", "revenue"]
|
||||
)
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
# 计算净利润率
|
||||
net_profit = group_df["net_profit"]
|
||||
revenue = group_df["revenue"]
|
||||
|
||||
profit_margin = net_profit / (revenue + 1e-8) # 避免除零
|
||||
|
||||
return profit_margin.alias(self.factor_id)
|
||||
|
||||
|
||||
class BMFactor(StockWiseFactor):
|
||||
"""账面市值比(Book-to-Market, BM)因子"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
name="bm",
|
||||
parameters={},
|
||||
required_factor_ids=["total_hldr_eqy_exc_min_int", "total_mv"]
|
||||
)
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
book_value = group_df["total_hldr_eqy_exc_min_int"]
|
||||
market_cap = group_df["total_mv"]
|
||||
|
||||
bm = book_value / (market_cap + 1e-8)
|
||||
|
||||
# 可选:过滤无效值(如负权益)
|
||||
# bm = pl.when((book_value > 0) & (market_cap > 0)).then(bm).otherwise(None)
|
||||
|
||||
return bm.alias(self.factor_id)
|
||||
@@ -16,7 +16,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"e:\\PyProject\\NewStock\\main\\factor\n"
|
||||
"/mnt/d/PyProject/NewStock\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -62,8 +62,8 @@
|
||||
"cyq perf\n",
|
||||
"left merge on ['ts_code', 'trade_date']\n",
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"RangeIndex: 5123740 entries, 0 to 5123739\n",
|
||||
"Data columns (total 31 columns):\n",
|
||||
"RangeIndex: 8713571 entries, 0 to 8713570\n",
|
||||
"Data columns (total 33 columns):\n",
|
||||
" # Column Dtype \n",
|
||||
"--- ------ ----- \n",
|
||||
" 0 ts_code object \n",
|
||||
@@ -74,57 +74,248 @@
|
||||
" 5 low float64 \n",
|
||||
" 6 vol float64 \n",
|
||||
" 7 pct_chg float64 \n",
|
||||
" 8 turnover_rate float64 \n",
|
||||
" 9 pe_ttm float64 \n",
|
||||
" 10 circ_mv float64 \n",
|
||||
" 11 volume_ratio float64 \n",
|
||||
" 12 is_st bool \n",
|
||||
" 13 up_limit float64 \n",
|
||||
" 14 down_limit float64 \n",
|
||||
" 15 buy_sm_vol float64 \n",
|
||||
" 16 sell_sm_vol float64 \n",
|
||||
" 17 buy_lg_vol float64 \n",
|
||||
" 18 sell_lg_vol float64 \n",
|
||||
" 19 buy_elg_vol float64 \n",
|
||||
" 20 sell_elg_vol float64 \n",
|
||||
" 21 net_mf_vol float64 \n",
|
||||
" 22 his_low float64 \n",
|
||||
" 23 his_high float64 \n",
|
||||
" 24 cost_5pct float64 \n",
|
||||
" 25 cost_15pct float64 \n",
|
||||
" 26 cost_50pct float64 \n",
|
||||
" 27 cost_85pct float64 \n",
|
||||
" 28 cost_95pct float64 \n",
|
||||
" 29 weight_avg float64 \n",
|
||||
" 30 winner_rate float64 \n",
|
||||
"dtypes: bool(1), datetime64[ns](1), float64(28), object(1)\n",
|
||||
"memory usage: 1.2+ GB\n",
|
||||
"None\n",
|
||||
"['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'pct_chg', 'turnover_rate', 'pe_ttm', 'circ_mv', 'volume_ratio', 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct', 'cost_50pct', 'cost_85pct', 'cost_95pct', 'weight_avg', 'winner_rate']\n",
|
||||
" 8 amount float64 \n",
|
||||
" 9 turnover_rate float64 \n",
|
||||
" 10 pe_ttm float64 \n",
|
||||
" 11 circ_mv float64 \n",
|
||||
" 12 total_mv float64 \n",
|
||||
" 13 volume_ratio float64 \n",
|
||||
" 14 is_st bool \n",
|
||||
" 15 up_limit float64 \n",
|
||||
" 16 down_limit float64 \n",
|
||||
" 17 buy_sm_vol float64 \n",
|
||||
" 18 sell_sm_vol float64 \n",
|
||||
" 19 buy_lg_vol float64 \n",
|
||||
" 20 sell_lg_vol float64 \n",
|
||||
" 21 buy_elg_vol float64 \n",
|
||||
" 22 sell_elg_vol float64 \n",
|
||||
" 23 net_mf_vol float64 \n",
|
||||
" 24 his_low float64 \n",
|
||||
" 25 his_high float64 \n",
|
||||
" 26 cost_5pct float64 \n",
|
||||
" 27 cost_15pct float64 \n",
|
||||
" 28 cost_50pct float64 \n",
|
||||
" 29 cost_85pct float64 \n",
|
||||
" 30 cost_95pct float64 \n",
|
||||
" 31 weight_avg float64 \n",
|
||||
" 32 winner_rate float64 \n",
|
||||
"dtypes: bool(1), datetime64[ns](1), float64(30), object(1)\n",
|
||||
"memory usage: 2.1+ GB\n",
|
||||
"None\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from main.utils.utils import read_and_merge_h5_data\n",
|
||||
"\n",
|
||||
"print('daily data')\n",
|
||||
"df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/daily_data.h5', key='daily_data',\n",
|
||||
" columns=['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'pct_chg', 'amount'],\n",
|
||||
" df=None)\n",
|
||||
"\n",
|
||||
"print('daily basic')\n",
|
||||
"df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/daily_basic.h5', key='daily_basic',\n",
|
||||
" columns=['ts_code', 'trade_date', 'turnover_rate', 'pe_ttm', 'circ_mv', 'total_mv', 'volume_ratio',\n",
|
||||
" 'is_st'], df=df, join='inner')\n",
|
||||
"\n",
|
||||
"print('stk limit')\n",
|
||||
"df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/stk_limit.h5', key='stk_limit',\n",
|
||||
" columns=['ts_code', 'trade_date', 'pre_close', 'up_limit', 'down_limit'],\n",
|
||||
" df=df)\n",
|
||||
"print('money flow')\n",
|
||||
"df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/money_flow.h5', key='money_flow',\n",
|
||||
" columns=['ts_code', 'trade_date', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol',\n",
|
||||
" 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol'],\n",
|
||||
" df=df)\n",
|
||||
"print('cyq perf')\n",
|
||||
"df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/cyq_perf.h5', key='cyq_perf',\n",
|
||||
" columns=['ts_code', 'trade_date', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct',\n",
|
||||
" 'cost_50pct',\n",
|
||||
" 'cost_85pct', 'cost_95pct', 'weight_avg', 'winner_rate'],\n",
|
||||
" df=df)\n",
|
||||
"print(df.info())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "0acb6625",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'pct_chg', 'amount', 'turnover_rate', 'pe_ttm', 'circ_mv', 'total_mv', 'volume_ratio', 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct', 'cost_50pct', 'cost_85pct', 'cost_95pct', 'weight_avg', 'winner_rate']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"\n",
|
||||
"origin_columns = df.columns.tolist()\n",
|
||||
"origin_columns = [col for col in origin_columns if 'cyq' not in col]\n",
|
||||
"print(origin_columns)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "820a6b50",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fina_indicator_df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/fina_indicator.h5', key='fina_indicator',\n",
|
||||
" columns=['ts_code', 'ann_date', 'undist_profit_ps', 'ocfps', 'bps'],\n",
|
||||
" df=None)\n",
|
||||
"cashflow_df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/cashflow.h5', key='cashflow',\n",
|
||||
" columns=['ts_code', 'ann_date', 'n_cashflow_act'],\n",
|
||||
" df=None)\n",
|
||||
"balancesheet_df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/balancesheet.h5', key='balancesheet',\n",
|
||||
" columns=['ts_code', 'ann_date', 'money_cap', 'total_liab'],\n",
|
||||
" df=None)\n",
|
||||
"top_list_df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/top_list.h5', key='top_list',\n",
|
||||
" columns=['ts_code', 'trade_date', 'reason'],\n",
|
||||
" df=None)\n",
|
||||
"\n",
|
||||
"top_list_df = top_list_df.sort_values(by='trade_date', ascending=False).drop_duplicates(subset=['ts_code', 'trade_date'], keep='first').sort_values(by='trade_date')\n",
|
||||
"\n",
|
||||
"stk_holdertrade_df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/stk_holdertrade.h5', key='stk_holdertrade',\n",
|
||||
" columns=['ts_code', 'ann_date', 'in_de', 'change_ratio'],\n",
|
||||
" df=None)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "903469a7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"✅ 成功从 Redis Hash 'concept_stocks_daily_lists_pickle' 读取 1794 条每日概念股票数据。\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import redis\n",
|
||||
"import pickle\n",
|
||||
"from datetime import date, datetime\n",
|
||||
"\n",
|
||||
"# --- 配置 Redis 连接 ---\n",
|
||||
"REDIS_HOST = '140.143.91.66'\n",
|
||||
"REDIS_PORT = 6389\n",
|
||||
"REDIS_DB = 0\n",
|
||||
"\n",
|
||||
"# --- 定义 Redis 键名 ---\n",
|
||||
"HASH_KEY = \"concept_stocks_daily_lists_pickle\" # 区分之前的 JSON 版本\n",
|
||||
"MAX_DATE_KEY = \"concept_stocks_max_date_pickle\" # 区分之前的 JSON 版本\n",
|
||||
"\n",
|
||||
"concept_dict = {}\n",
|
||||
"\n",
|
||||
"# --- 连接 Redis ---\n",
|
||||
"try:\n",
|
||||
" r = redis.StrictRedis(host=REDIS_HOST, port=REDIS_PORT, db=REDIS_DB, password='Redis520102')\n",
|
||||
"\n",
|
||||
" all_data_from_redis = r.hgetall(HASH_KEY) # 返回的是字典,键是字节,值是字节\n",
|
||||
" \n",
|
||||
" if all_data_from_redis:\n",
|
||||
" for date_bytes, stocks_bytes in all_data_from_redis.items(): # 将变量名改为 date_bytes 更清晰\n",
|
||||
" try:\n",
|
||||
" # *** 修正点:将日期字节解码为字符串 ***\n",
|
||||
" date_str = date_bytes.decode('utf-8') \n",
|
||||
" date_obj = datetime.strptime(date_str, '%Y%m%d').date()\n",
|
||||
" \n",
|
||||
" stocks_list = pickle.loads(stocks_bytes)\n",
|
||||
" concept_dict[date_obj] = stocks_list\n",
|
||||
" except (ValueError, pickle.UnpicklingError) as e:\n",
|
||||
" print(f\"⚠️ 警告: 解析 Redis 数据时出错 (日期键: '{date_bytes.decode('utf-8', errors='ignore')}'),跳过此条数据: {e}\") # 打印警告时也解码一下\n",
|
||||
" print(f\"✅ 成功从 Redis Hash '{HASH_KEY}' 读取 {len(concept_dict)} 条每日概念股票数据。\")\n",
|
||||
" else:\n",
|
||||
" print(f\"ℹ️ Redis Hash '{HASH_KEY}' 中没有找到任何数据。\")\n",
|
||||
"\n",
|
||||
"except redis.exceptions.ConnectionError as e:\n",
|
||||
" print(f\"❌ 错误: 无法连接到 Redis 服务器,请检查 Redis 是否正在运行或连接配置: {e}\")\n",
|
||||
"except Exception as e:\n",
|
||||
" print(f\"❌ 从 Redis 读取数据时发生未知错误: {e}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "afb8da3d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"4566757\n",
|
||||
"开始生成概念相关因子...\n",
|
||||
"开始计算概念内截面排序因子,基于: ['pct_chg', 'turnover_rate', 'volume_ratio']\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Ranking Features in Concepts: 100%|██████████| 3/3 [00:00<00:00, 15.82it/s]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"概念相关因子生成完毕。\n",
|
||||
"4566757\n",
|
||||
"开始计算股东增减持因子...\n",
|
||||
"警告: 'in_de' 列中存在未映射的值,可能导致 _direction 列出现NaN。\n",
|
||||
"股东增减持因子计算完成。\n",
|
||||
"Calculating cat_senti_mom_vol_spike...\n",
|
||||
"Finished cat_senti_mom_vol_spike.\n",
|
||||
"Calculating cat_senti_pre_breakout...\n",
|
||||
"Calculating atr_10 as it's missing...\n",
|
||||
"Calculating atr_40 as it's missing...\n",
|
||||
"Finished cat_senti_pre_breakout.\n",
|
||||
"计算因子 ts_turnover_rate_acceleration_5_20\n",
|
||||
"计算因子 ts_vol_sustain_10_30\n",
|
||||
"计算因子 cs_amount_outlier_10\n",
|
||||
"计算因子 ts_ff_to_total_turnover_ratio\n",
|
||||
"计算因子 ts_price_volume_trend_coherence_5_20\n",
|
||||
"计算因子 ts_ff_turnover_rate_surge_10\n",
|
||||
"使用 'ann_date' 作为财务数据生效日期。\n",
|
||||
"警告: 从 financial_data_subset 中移除了 366 行,因为其 'ts_code' 或 'ann_date' 列存在空值。\n",
|
||||
"使用 'ann_date' 作为财务数据生效日期。\n",
|
||||
"警告: 从 financial_data_subset 中移除了 366 行,因为其 'ts_code' 或 'ann_date' 列存在空值。\n",
|
||||
"开始计算因子: AR, BR (原地修改)...\n",
|
||||
"因子 AR, BR 计算成功。\n",
|
||||
"因子 AR, BR 计算流程结束。\n",
|
||||
"使用 'ann_date' 作为财务数据生效日期。\n",
|
||||
"使用 'ann_date' 作为财务数据生效日期。\n",
|
||||
"使用 'ann_date' 作为财务数据生效日期。\n",
|
||||
"使用 'ann_date' 作为财务数据生效日期。\n",
|
||||
"警告: 从 financial_data_subset 中移除了 366 行,因为其 'ts_code' 或 'ann_date' 列存在空值。\n",
|
||||
"计算 BBI...\n",
|
||||
"--- 计算日级别偏离度 (使用 pct_chg) ---\n",
|
||||
"--- 计算日级别动量基准 (使用 pct_chg) ---\n",
|
||||
"日级别动量基准计算完成 (使用 pct_chg)。\n",
|
||||
"日级别偏离度计算完成 (使用 pct_chg)。\n",
|
||||
"--- 计算日级别行业偏离度 (使用 pct_chg 和行业基准) ---\n",
|
||||
"--- 计算日级别行业动量基准 (使用 pct_chg 和 cat_l2_code) ---\n",
|
||||
"错误: 计算日级别行业动量基准需要以下列: ['pct_chg', 'cat_l2_code', 'trade_date', 'ts_code']。\n",
|
||||
"错误: 计算日级别行业偏离度需要以下列: ['pct_chg', 'daily_industry_positive_benchmark', 'daily_industry_negative_benchmark']。请先运行 daily_industry_momentum_benchmark(df)。\n",
|
||||
"Index(['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol',\n",
|
||||
" 'pct_chg', 'turnover_rate', 'pe_ttm', 'circ_mv', 'volume_ratio',\n",
|
||||
" 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol', 'sell_sm_vol',\n",
|
||||
" 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol', 'sell_elg_vol',\n",
|
||||
" 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct',\n",
|
||||
" 'cost_50pct', 'cost_85pct', 'cost_95pct', 'weight_avg', 'winner_rate',\n",
|
||||
" 'lg_elg_net_buy_vol', 'flow_lg_elg_intensity', 'sm_net_buy_vol',\n",
|
||||
" 'flow_divergence_diff', 'flow_divergence_ratio', 'total_buy_vol',\n",
|
||||
" 'lg_elg_buy_prop', 'flow_struct_buy_change',\n",
|
||||
" 'lg_elg_net_buy_vol_change', 'flow_lg_elg_accel',\n",
|
||||
" 'chip_concentration_range', 'chip_skewness', 'floating_chip_proxy',\n",
|
||||
" 'cost_support_15pct_change', 'cat_winner_price_zone',\n",
|
||||
" 'flow_chip_consistency', 'profit_taking_vs_absorb', '_is_positive',\n",
|
||||
" '_is_negative', 'cat_is_positive', '_pos_returns', '_neg_returns',\n",
|
||||
" '_pos_returns_sq', '_neg_returns_sq', 'upside_vol', 'downside_vol',\n",
|
||||
" 'vol_ratio', 'return_skew', 'return_kurtosis', 'volume_change_rate',\n",
|
||||
" 'pct_chg', 'amount', 'turnover_rate',\n",
|
||||
" ...\n",
|
||||
" 'cat_volume_breakout', 'turnover_deviation', 'cat_turnover_spike',\n",
|
||||
" 'avg_volume_ratio', 'cat_volume_ratio_breakout', 'vol_spike',\n",
|
||||
" 'vol_std_5', 'atr_14', 'atr_6', 'obv'],\n",
|
||||
" dtype='object')\n",
|
||||
" dtype='object', length=103)\n",
|
||||
"Calculating senti_strong_inflow...\n",
|
||||
"Finished senti_strong_inflow.\n",
|
||||
"Calculating lg_flow_mom_corr_20_60...\n",
|
||||
"Finished lg_flow_mom_corr_20_60.\n",
|
||||
"Calculating lg_buy_consolidation_20...\n",
|
||||
"Finished lg_buy_consolidation_20.\n",
|
||||
"Calculating lg_flow_accel...\n",
|
||||
"Finished lg_flow_accel.\n",
|
||||
"Calculating profit_pressure...\n",
|
||||
@@ -155,58 +346,73 @@
|
||||
"Finished vol_wgt_hist_pos_20.\n",
|
||||
"Calculating vol_adj_roc_20...\n",
|
||||
"Finished vol_adj_roc_20.\n",
|
||||
"Calculating intraday_lg_flow_corr_20 (Placeholder - complex implementation)...\n",
|
||||
"Finished intraday_lg_flow_corr_20 (Placeholder).\n",
|
||||
"Calculating cap_neutral_cost_metric (Placeholder - requires statsmodels)...\n",
|
||||
"Finished cap_neutral_cost_metric (Placeholder).\n"
|
||||
"Calculating cs_rank_net_lg_flow_val...\n",
|
||||
"Finished cs_rank_net_lg_flow_val.\n",
|
||||
"Calculating cs_rank_flow_divergence...\n",
|
||||
"Finished cs_rank_flow_divergence.\n",
|
||||
"Calculating cs_rank_ind_adj_lg_flow...\n",
|
||||
"Error calculating cs_rank_ind_adj_lg_flow: Missing 'cat_l2_code' column. Assigning NaN.\n",
|
||||
"Calculating cs_rank_elg_buy_ratio...\n",
|
||||
"Finished cs_rank_elg_buy_ratio.\n",
|
||||
"Calculating cs_rank_rel_profit_margin...\n",
|
||||
"Finished cs_rank_rel_profit_margin.\n",
|
||||
"Calculating cs_rank_cost_breadth...\n",
|
||||
"Finished cs_rank_cost_breadth.\n",
|
||||
"Calculating cs_rank_dist_to_upper_cost...\n",
|
||||
"Finished cs_rank_dist_to_upper_cost.\n",
|
||||
"Calculating cs_rank_winner_rate...\n",
|
||||
"Finished cs_rank_winner_rate.\n",
|
||||
"Calculating cs_rank_intraday_range...\n",
|
||||
"Finished cs_rank_intraday_range.\n",
|
||||
"Calculating cs_rank_close_pos_in_range...\n",
|
||||
"Finished cs_rank_close_pos_in_range.\n",
|
||||
"Calculating cs_rank_opening_gap...\n",
|
||||
"Error calculating cs_rank_opening_gap: Missing 'pre_close' column. Assigning NaN.\n",
|
||||
"Calculating cs_rank_pos_in_hist_range...\n",
|
||||
"Finished cs_rank_pos_in_hist_range.\n",
|
||||
"Calculating cs_rank_vol_x_profit_margin...\n",
|
||||
"Finished cs_rank_vol_x_profit_margin.\n",
|
||||
"Calculating cs_rank_lg_flow_price_concordance...\n",
|
||||
"Finished cs_rank_lg_flow_price_concordance.\n",
|
||||
"Calculating cs_rank_turnover_per_winner...\n",
|
||||
"Finished cs_rank_turnover_per_winner.\n",
|
||||
"Calculating cs_rank_ind_cap_neutral_pe (Placeholder - requires statsmodels)...\n",
|
||||
"Finished cs_rank_ind_cap_neutral_pe (Placeholder).\n",
|
||||
"Calculating cs_rank_volume_ratio...\n",
|
||||
"Finished cs_rank_volume_ratio.\n",
|
||||
"Calculating cs_rank_elg_buy_sell_sm_ratio...\n",
|
||||
"Finished cs_rank_elg_buy_sell_sm_ratio.\n",
|
||||
"Calculating cs_rank_cost_dist_vol_ratio...\n",
|
||||
"Finished cs_rank_cost_dist_vol_ratio.\n",
|
||||
"Calculating cs_rank_size...\n",
|
||||
"Finished cs_rank_size.\n",
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"RangeIndex: 4566757 entries, 0 to 4566756\n",
|
||||
"Columns: 197 entries, ts_code to cs_rank_size\n",
|
||||
"dtypes: bool(10), datetime64[ns](1), float64(176), int64(6), int8(1), object(3)\n",
|
||||
"memory usage: 6.4+ GB\n",
|
||||
"None\n",
|
||||
"['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'pct_chg', 'amount', 'turnover_rate', 'pe_ttm', 'circ_mv', 'total_mv', 'volume_ratio', 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct', 'cost_50pct', 'cost_85pct', 'cost_95pct', 'weight_avg', 'winner_rate', 'cat_hot_concept_stock', 'concept_rank_pct_chg', 'concept_rank_turnover_rate', 'concept_rank_volume_ratio', 'holder_net_change_sum_10d', 'holder_increase_days_10d', 'holder_decrease_days_10d', 'holder_any_increase_flag_10d', 'holder_any_decrease_flag_10d', 'holder_direction_score_10d', 'cat_senti_mom_vol_spike', 'cat_senti_pre_breakout', 'ts_turnover_rate_acceleration_5_20', 'ts_vol_sustain_10_30', 'cs_amount_outlier_10', 'ts_ff_to_total_turnover_ratio', 'ts_price_volume_trend_coherence_5_20', 'ts_ff_turnover_rate_surge_10', 'undist_profit_ps', 'ocfps', 'AR', 'BR', 'AR_BR', 'log_circ_mv', 'cashflow_to_ev_factor', 'book_to_price_ratio', 'turnover_rate_mean_5', 'variance_20', 'bbi_ratio_factor', 'daily_deviation', 'lg_elg_net_buy_vol', 'flow_lg_elg_intensity', 'sm_net_buy_vol', 'flow_divergence_diff', 'flow_divergence_ratio', 'total_buy_vol', 'lg_elg_buy_prop', 'flow_struct_buy_change', 'lg_elg_net_buy_vol_change', 'flow_lg_elg_accel', 'chip_concentration_range', 'chip_skewness', 'floating_chip_proxy', 'cost_support_15pct_change', 'cat_winner_price_zone', 'flow_chip_consistency', 'profit_taking_vs_absorb', 'cat_is_positive', 'upside_vol', 'downside_vol', 'vol_ratio', 'return_skew', 'return_kurtosis', 'volume_change_rate', 'cat_volume_breakout', 'turnover_deviation', 'cat_turnover_spike', 'avg_volume_ratio', 'cat_volume_ratio_breakout', 'vol_spike', 'vol_std_5', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'rsi_3', 'return_5', 'return_20', 'std_return_5', 'std_return_90', 'std_return_90_2', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4', 'rank_act_factor1', 'rank_act_factor2', 'rank_act_factor3', 'cov', 'delta_cov', 'alpha_22_improved', 'alpha_003', 'alpha_007', 'alpha_013', 'vol_break', 'weight_roc5', 'price_cost_divergence', 'smallcap_concentration', 'cost_stability', 'high_cost_break_days', 'liquidity_risk', 'turnover_std', 'mv_volatility', 'volume_growth', 'mv_growth', 'momentum_factor', 'resonance_factor', 'log_close', 'cat_vol_spike', 'up', 'down', 'obv_maobv_6', 'std_return_5_over_std_return_90', 'std_return_90_minus_std_return_90_2', 'cat_af2', 'cat_af3', 'cat_af4', 'act_factor5', 'act_factor6', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'buy_lg_vol_minus_sell_lg_vol', 'buy_elg_vol_minus_sell_elg_vol', 'ctrl_strength', 'low_cost_dev', 'asymmetry', 'lock_factor', 'cat_vol_break', 'cost_atr_adj', 'cat_golden_resonance', 'mv_turnover_ratio', 'mv_adjusted_volume', 'mv_weighted_turnover', 'nonlinear_mv_volume', 'mv_volume_ratio', 'mv_momentum', 'senti_strong_inflow', 'lg_flow_mom_corr_20_60', 'lg_flow_accel', 'profit_pressure', 'underwater_resistance', 'cost_conc_std_20', 'profit_decay_20', 'vol_amp_loss_20', 'vol_drop_profit_cnt_5', 'lg_flow_vol_interact_20', 'cost_break_confirm_cnt_5', 'atr_norm_channel_pos_14', 'turnover_diff_skew_20', 'lg_sm_flow_diverge_20', 'pullback_strong_20_20', 'vol_wgt_hist_pos_20', 'vol_adj_roc_20', 'cs_rank_net_lg_flow_val', 'cs_rank_flow_divergence', 'cs_rank_ind_adj_lg_flow', 'cs_rank_elg_buy_ratio', 'cs_rank_rel_profit_margin', 'cs_rank_cost_breadth', 'cs_rank_dist_to_upper_cost', 'cs_rank_winner_rate', 'cs_rank_intraday_range', 'cs_rank_close_pos_in_range', 'cs_rank_opening_gap', 'cs_rank_pos_in_hist_range', 'cs_rank_vol_x_profit_margin', 'cs_rank_lg_flow_price_concordance', 'cs_rank_turnover_per_winner', 'cs_rank_ind_cap_neutral_pe', 'cs_rank_volume_ratio', 'cs_rank_elg_buy_sell_sm_ratio', 'cs_rank_cost_dist_vol_ratio', 'cs_rank_size']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print('daily data')\n",
|
||||
"df = read_and_merge_h5_data('../../data/daily_data.h5', key='daily_data',\n",
|
||||
" columns=['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'pct_chg'],\n",
|
||||
" df=None)\n",
|
||||
"\n",
|
||||
"print('daily basic')\n",
|
||||
"df = read_and_merge_h5_data('../../data/daily_basic.h5', key='daily_basic',\n",
|
||||
" columns=['ts_code', 'trade_date', 'turnover_rate', 'pe_ttm', 'circ_mv', 'volume_ratio',\n",
|
||||
" 'is_st'], df=df, join='inner')\n",
|
||||
"df = df[df['trade_date'] >= '2021-01-01']\n",
|
||||
"\n",
|
||||
"print('stk limit')\n",
|
||||
"df = read_and_merge_h5_data('../../data/stk_limit.h5', key='stk_limit',\n",
|
||||
" columns=['ts_code', 'trade_date', 'pre_close', 'up_limit', 'down_limit'],\n",
|
||||
" df=df)\n",
|
||||
"print('money flow')\n",
|
||||
"df = read_and_merge_h5_data('../../data/money_flow.h5', key='money_flow',\n",
|
||||
" columns=['ts_code', 'trade_date', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol',\n",
|
||||
" 'sell_lg_vol',\n",
|
||||
" 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol'],\n",
|
||||
" df=df)\n",
|
||||
"print('cyq perf')\n",
|
||||
"df = read_and_merge_h5_data('../../data/cyq_perf.h5', key='cyq_perf',\n",
|
||||
" columns=['ts_code', 'trade_date', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct',\n",
|
||||
" 'cost_50pct',\n",
|
||||
" 'cost_85pct', 'cost_95pct', 'weight_avg', 'winner_rate'],\n",
|
||||
" df=df)\n",
|
||||
"print(df.info())\n",
|
||||
"\n",
|
||||
"origin_columns = df.columns.tolist()\n",
|
||||
"origin_columns = [col for col in origin_columns if 'cyq' not in col]\n",
|
||||
"print(origin_columns)\n",
|
||||
"import numpy as np\n",
|
||||
"from main.factor.factor import *\n",
|
||||
"from main.factor.money_factor import * \n",
|
||||
"from main.factor.concept_factor import * \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def filter_data(df):\n",
|
||||
" # df = df.groupby('trade_date').apply(lambda x: x.nlargest(1000, 'act_factor1'))\n",
|
||||
" df = df[~df['is_st']]\n",
|
||||
" df = df[~df['ts_code'].str.endswith('BJ')]\n",
|
||||
" df = df[~df['ts_code'].str.startswith('30')]\n",
|
||||
" df = df[~df['ts_code'].str.startswith('68')]\n",
|
||||
" df = df[~df['ts_code'].str.startswith('8')]\n",
|
||||
" df = df[df['trade_date'] >= '2022-01-01']\n",
|
||||
" if 'in_date' in df.columns:\n",
|
||||
" df = df.drop(columns=['in_date'])\n",
|
||||
" df = df[~df[\"is_st\"]]\n",
|
||||
" df = df[~df[\"ts_code\"].str.endswith(\"BJ\")]\n",
|
||||
" df = df[~df[\"ts_code\"].str.startswith(\"30\")]\n",
|
||||
" df = df[~df[\"ts_code\"].str.startswith(\"68\")]\n",
|
||||
" df = df[~df[\"ts_code\"].str.startswith(\"8\")]\n",
|
||||
" df = df[df[\"trade_date\"] >= \"2019-01-01\"]\n",
|
||||
" if \"in_date\" in df.columns:\n",
|
||||
" df = df.drop(columns=[\"in_date\"])\n",
|
||||
" df = df.reset_index(drop=True)\n",
|
||||
" return df\n",
|
||||
"\n",
|
||||
@@ -214,11 +420,70 @@
|
||||
"gc.collect()\n",
|
||||
"\n",
|
||||
"df = filter_data(df)\n",
|
||||
"df = df.sort_values(by=[\"ts_code\", \"trade_date\"])\n",
|
||||
"\n",
|
||||
"# df = price_minus_deduction_price(df, n=120)\n",
|
||||
"# df = price_deduction_price_diff_ratio_to_sma(df, n=120)\n",
|
||||
"# df = cat_price_vs_sma_vs_deduction_price(df, n=120)\n",
|
||||
"# df = cat_reason(df, top_list_df)\n",
|
||||
"# df = cat_is_on_top_list(df, top_list_df)\n",
|
||||
"print(len(df))\n",
|
||||
"df = generate_concept_factors(df, concept_dict)\n",
|
||||
"print(len(df))\n",
|
||||
"\n",
|
||||
"df = holder_trade_factors(df, stk_holdertrade_df)\n",
|
||||
"\n",
|
||||
"df = cat_senti_mom_vol_spike(\n",
|
||||
" df,\n",
|
||||
" return_period=3,\n",
|
||||
" return_threshold=0.03, # 近3日涨幅超3%\n",
|
||||
" volume_ratio_threshold=1.3,\n",
|
||||
" current_pct_chg_min=0.0, # 当日必须收红\n",
|
||||
" current_pct_chg_max=0.05,\n",
|
||||
") # 当日涨幅不宜过大\n",
|
||||
"\n",
|
||||
"df = cat_senti_pre_breakout(\n",
|
||||
" df,\n",
|
||||
" atr_short_N=10,\n",
|
||||
" atr_long_M=40,\n",
|
||||
" vol_atrophy_N=10,\n",
|
||||
" vol_atrophy_M=40,\n",
|
||||
" price_stab_N=5,\n",
|
||||
" price_stab_threshold=0.06,\n",
|
||||
" current_pct_chg_min_signal=0.002,\n",
|
||||
" current_pct_chg_max_signal=0.05,\n",
|
||||
" volume_ratio_signal_threshold=1.1,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"df = ts_turnover_rate_acceleration_5_20(df)\n",
|
||||
"df = ts_vol_sustain_10_30(df)\n",
|
||||
"# df = cs_turnover_rate_relative_strength_20(df)\n",
|
||||
"df = cs_amount_outlier_10(df)\n",
|
||||
"df = ts_ff_to_total_turnover_ratio(df)\n",
|
||||
"df = ts_price_volume_trend_coherence_5_20(df)\n",
|
||||
"# df = ts_turnover_rate_trend_strength_5(df)\n",
|
||||
"df = ts_ff_turnover_rate_surge_10(df)\n",
|
||||
"\n",
|
||||
"df = add_financial_factor(df, fina_indicator_df, factor_value_col=\"undist_profit_ps\")\n",
|
||||
"df = add_financial_factor(df, fina_indicator_df, factor_value_col=\"ocfps\")\n",
|
||||
"calculate_arbr(df, N=26)\n",
|
||||
"df[\"log_circ_mv\"] = np.log(df[\"circ_mv\"])\n",
|
||||
"df = calculate_cashflow_to_ev_factor(df, cashflow_df, balancesheet_df)\n",
|
||||
"df = caculate_book_to_price_ratio(df, fina_indicator_df)\n",
|
||||
"df = turnover_rate_n(df, n=5)\n",
|
||||
"df = variance_n(df, n=20)\n",
|
||||
"df = bbi_ratio_factor(df)\n",
|
||||
"df = daily_deviation(df)\n",
|
||||
"df = daily_industry_deviation(df)\n",
|
||||
"df, _ = get_rolling_factor(df)\n",
|
||||
"df, _ = get_simple_factor(df)\n",
|
||||
"from main.factor.factor import *\n",
|
||||
"\n",
|
||||
"df = calculate_strong_inflow_signal(df)\n",
|
||||
"\n",
|
||||
"df = df.rename(columns={\"l1_code\": \"cat_l1_code\"})\n",
|
||||
"df = df.rename(columns={\"l2_code\": \"cat_l2_code\"})\n",
|
||||
"\n",
|
||||
"lg_flow_mom_corr(df, N=20, M=60)\n",
|
||||
"lg_buy_consolidation(df, N=20)\n",
|
||||
"lg_flow_accel(df)\n",
|
||||
"profit_pressure(df)\n",
|
||||
"underwater_resistance(df)\n",
|
||||
@@ -234,12 +499,57 @@
|
||||
"pullback_strong(df, N=20, M=20)\n",
|
||||
"vol_wgt_hist_pos(df, N=20)\n",
|
||||
"vol_adj_roc(df, N=20)\n",
|
||||
"intraday_lg_flow_corr(df, N=20) # Placeholder\n",
|
||||
"cap_neutral_cost_metric(df) # Placeholder\n",
|
||||
"# hurst_exponent_flow(df, N=60) # Placeholder\n",
|
||||
"# df['test'] = 1\n",
|
||||
"# df['test2'] = 2\n",
|
||||
"# df = df.merge(industry_df, on=['l2_code', 'trade_date'], how='left')\n",
|
||||
"\n",
|
||||
"cs_rank_net_lg_flow_val(df)\n",
|
||||
"cs_rank_flow_divergence(df)\n",
|
||||
"cs_rank_industry_adj_lg_flow(df) # Needs cat_l2_code\n",
|
||||
"cs_rank_elg_buy_ratio(df)\n",
|
||||
"cs_rank_rel_profit_margin(df)\n",
|
||||
"cs_rank_cost_breadth(df)\n",
|
||||
"cs_rank_dist_to_upper_cost(df)\n",
|
||||
"cs_rank_winner_rate(df)\n",
|
||||
"cs_rank_intraday_range(df)\n",
|
||||
"cs_rank_close_pos_in_range(df)\n",
|
||||
"cs_rank_opening_gap(df) # Needs pre_close\n",
|
||||
"cs_rank_pos_in_hist_range(df) # Needs his_low, his_high\n",
|
||||
"cs_rank_vol_x_profit_margin(df)\n",
|
||||
"cs_rank_lg_flow_price_concordance(df)\n",
|
||||
"cs_rank_turnover_per_winner(df)\n",
|
||||
"cs_rank_ind_cap_neutral_pe(df) # Placeholder - needs external libraries\n",
|
||||
"cs_rank_volume_ratio(df) # Needs volume_ratio\n",
|
||||
"cs_rank_elg_buy_sell_sm_ratio(df)\n",
|
||||
"cs_rank_cost_dist_vol_ratio(df) # Needs volume_ratio\n",
|
||||
"cs_rank_size(df) # Needs circ_mv\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# df = df.merge(index_data, on='trade_date', how='left')\n",
|
||||
"\n",
|
||||
"print(df.info())\n",
|
||||
"print(df.columns.tolist())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "48712034",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "FileNotFoundError",
|
||||
"evalue": "File ../../data/industry_data.h5 does not exist",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
||||
"\u001b[31mFileNotFoundError\u001b[39m Traceback (most recent call last)",
|
||||
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[8]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m l2_df = \u001b[43mread_and_merge_h5_data\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m../../data/industry_data.h5\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mindustry_data\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 2\u001b[39m \u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m=\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mts_code\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43ml2_code\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43min_date\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 3\u001b[39m \u001b[43m \u001b[49m\u001b[43mdf\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mon\u001b[49m\u001b[43m=\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mts_code\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mleft\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[32m 4\u001b[39m df = merge_with_industry_data(df, l2_df)\n\u001b[32m 5\u001b[39m df = df.rename(columns={\u001b[33m'\u001b[39m\u001b[33ml2_code\u001b[39m\u001b[33m'\u001b[39m: \u001b[33m'\u001b[39m\u001b[33mcat_l2_code\u001b[39m\u001b[33m'\u001b[39m})\n",
|
||||
"\u001b[36mFile \u001b[39m\u001b[32m/mnt/d/PyProject/NewStock/main/utils/utils.py:14\u001b[39m, in \u001b[36mread_and_merge_h5_data\u001b[39m\u001b[34m(h5_filename, key, columns, df, join, on, prefix)\u001b[39m\n\u001b[32m 11\u001b[39m processed_columns.append(col)\n\u001b[32m 13\u001b[39m \u001b[38;5;66;03m# 从 HDF5 文件读取数据,选择需要的列\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m14\u001b[39m data = \u001b[43mpd\u001b[49m\u001b[43m.\u001b[49m\u001b[43mread_hdf\u001b[49m\u001b[43m(\u001b[49m\u001b[43mh5_filename\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m=\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m=\u001b[49m\u001b[43mprocessed_columns\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 16\u001b[39m \u001b[38;5;66;03m# 修改列名,如果列名以前有 _,加上 _\u001b[39;00m\n\u001b[32m 17\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m col \u001b[38;5;129;01min\u001b[39;00m data.columns:\n",
|
||||
"\u001b[36mFile \u001b[39m\u001b[32m~/miniconda3/envs/stock/lib/python3.13/site-packages/pandas/io/pytables.py:424\u001b[39m, in \u001b[36mread_hdf\u001b[39m\u001b[34m(path_or_buf, key, mode, errors, where, start, stop, columns, iterator, chunksize, **kwargs)\u001b[39m\n\u001b[32m 421\u001b[39m exists = \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[32m 423\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m exists:\n\u001b[32m--> \u001b[39m\u001b[32m424\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mFile \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpath_or_buf\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m does not exist\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 426\u001b[39m store = HDFStore(path_or_buf, mode=mode, errors=errors, **kwargs)\n\u001b[32m 427\u001b[39m \u001b[38;5;66;03m# can't auto open/close if we are using an iterator\u001b[39;00m\n\u001b[32m 428\u001b[39m \u001b[38;5;66;03m# so delegate to the iterator\u001b[39;00m\n",
|
||||
"\u001b[31mFileNotFoundError\u001b[39m: File ../../data/industry_data.h5 does not exist"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"\n",
|
||||
"l2_df = read_and_merge_h5_data('../../data/industry_data.h5', key='industry_data',\n",
|
||||
" columns=['ts_code', 'l2_code', 'in_date'],\n",
|
||||
" df=None, on=['ts_code'], join='left')\n",
|
||||
@@ -247,7 +557,7 @@
|
||||
"df = df.rename(columns={'l2_code': 'cat_l2_code'})\n",
|
||||
"# df = df.merge(index_data, on='trade_date', how='left')\n",
|
||||
"\n",
|
||||
"days = 2\n",
|
||||
"days = 5\n",
|
||||
"df = df.sort_values(by=['ts_code', 'trade_date'])\n",
|
||||
"# df['future_return'] = df.groupby('ts_code', group_keys=False)['close'].apply(lambda x: x.shift(-days) / x - 1)\n",
|
||||
"df['future_return'] = (df.groupby('ts_code')['close'].shift(-days) - df.groupby('ts_code')['open'].shift(-1)) / \\\n",
|
||||
@@ -265,7 +575,7 @@
|
||||
"\n",
|
||||
"def select_pre_zt_stocks_dynamic(stock_df):\n",
|
||||
" def select_stocks(group):\n",
|
||||
" return group.nlargest(1000, 'return_5') # 如果循环结束仍未找到足够标签,则返回最大数量的股票\n",
|
||||
" return group.nsmallest(1000, 'total_mv') # 如果循环结束仍未找到足够标签,则返回最大数量的股票\n",
|
||||
"\n",
|
||||
" stock_df = stock_df.groupby('trade_date', group_keys=False).apply(select_stocks)\n",
|
||||
" return stock_df\n",
|
||||
@@ -281,7 +591,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": null,
|
||||
"id": "1c1dd3d6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -316,7 +626,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": null,
|
||||
"id": "2c60c1ea",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -541,7 +851,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": null,
|
||||
"id": "e088bd8a357e815a",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -785,7 +1095,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": null,
|
||||
"id": "a0b3d7551ef0c81f",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -1006,7 +1316,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "new_trader",
|
||||
"display_name": "stock",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -1020,7 +1330,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.11"
|
||||
"version": "3.13.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
90
main/factor/industry_factors.py
Normal file
90
main/factor/industry_factors.py
Normal file
@@ -0,0 +1,90 @@
|
||||
"""
|
||||
行业/横截面因子模块
|
||||
包含基于日期截面的行业/横截面因子实现
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import polars as pl
|
||||
from main.factor.operator_framework import DateWiseFactor
|
||||
|
||||
|
||||
class IndustryMomentumFactor(DateWiseFactor):
|
||||
"""行业动量因子"""
|
||||
|
||||
def __init__(self, factor_name: str):
|
||||
super().__init__(
|
||||
name=f"industry_momentum",
|
||||
parameters={"factor_name": factor_name},
|
||||
required_factor_ids=[factor_name, "cat_l2_code"]
|
||||
)
|
||||
self.factor_name = factor_name
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
# 计算行业动量基准
|
||||
# 这里需要先计算每个行业的平均值,然后与个股比较
|
||||
if self.factor_name in group_df.columns and "cat_l2_code" in group_df.columns:
|
||||
# 按行业计算平均值
|
||||
industry_means = group_df.group_by("cat_l2_code").agg([
|
||||
pl.col(self.factor_name).mean().alias("industry_mean")
|
||||
])
|
||||
|
||||
# 将行业均值合并回原数据
|
||||
result_df = group_df.join(industry_means, on="cat_l2_code", how="left")
|
||||
|
||||
# 计算与行业均值的偏差
|
||||
deviation = result_df[self.factor_name] - result_df["industry_mean"]
|
||||
return deviation.alias(self.factor_id)
|
||||
else:
|
||||
# 如果缺少必要列,返回全NaN
|
||||
return pl.Series([None] * len(group_df)).alias(self.factor_id)
|
||||
|
||||
|
||||
class MarketBreadthFactor(DateWiseFactor):
|
||||
"""市场宽度因子"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
name="market_breadth",
|
||||
parameters={},
|
||||
required_factor_ids=["pct_chg"]
|
||||
)
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
# 计算市场宽度:上涨股票数 / 总股票数
|
||||
pct_chg = group_df["pct_chg"]
|
||||
positive_count = (pct_chg > 0).sum()
|
||||
total_count = len(group_df)
|
||||
|
||||
# 避免除零
|
||||
breadth = positive_count / (total_count + 1e-8)
|
||||
return pl.Series([breadth] * len(group_df)).alias(self.factor_id)
|
||||
|
||||
|
||||
class SectorRotationFactor(DateWiseFactor):
|
||||
"""板块轮动因子"""
|
||||
|
||||
def __init__(self, sector_factor: str):
|
||||
super().__init__(
|
||||
name=f"sector_rotation_{sector_factor}",
|
||||
parameters={"sector_factor": sector_factor},
|
||||
required_factor_ids=[sector_factor, "cat_l2_code"]
|
||||
)
|
||||
self.sector_factor = sector_factor
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
# 计算板块轮动因子
|
||||
if self.sector_factor in group_df.columns and "cat_l2_code" in group_df.columns:
|
||||
# 计算每个板块的因子均值
|
||||
sector_means = group_df.group_by("cat_l2_code").agg([
|
||||
pl.col(self.sector_factor).mean().alias("sector_mean")
|
||||
])
|
||||
|
||||
# 将板块均值合并回原数据
|
||||
result_df = group_df.join(sector_means, on="cat_l2_code", how="left")
|
||||
|
||||
# 计算个股与板块均值的偏差
|
||||
deviation = result_df[self.sector_factor] - result_df["sector_mean"]
|
||||
return deviation.alias(self.factor_id)
|
||||
else:
|
||||
# 如果缺少必要列,返回全NaN
|
||||
return pl.Series([None] * len(group_df)).alias(self.factor_id)
|
||||
174
main/factor/momentum_factors.py
Normal file
174
main/factor/momentum_factors.py
Normal file
@@ -0,0 +1,174 @@
|
||||
"""
|
||||
动量因子模块
|
||||
包含基于股票截面和日期截面的动量因子实现
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import polars as pl
|
||||
from main.factor.operator_framework import StockWiseFactor, DateWiseFactor
|
||||
|
||||
|
||||
# -------------------- 股票截面因子:基于时间序列的动量因子 --------------------
|
||||
class ReturnFactor(StockWiseFactor):
|
||||
"""N日收益率因子"""
|
||||
|
||||
def __init__(self, period: int = 20):
|
||||
super().__init__(
|
||||
name="return",
|
||||
parameters={"period": period},
|
||||
required_factor_ids=["close"]
|
||||
)
|
||||
self.period = period
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
# 计算N日收益率(时间序列操作)
|
||||
return group_df["close"].pct_change(self.period).alias(self.factor_id)
|
||||
|
||||
|
||||
class VolatilityFactor(StockWiseFactor):
|
||||
"""N日波动率因子"""
|
||||
|
||||
def __init__(self, period: int = 20):
|
||||
super().__init__(
|
||||
name="volatility",
|
||||
parameters={"period": period},
|
||||
required_factor_ids=["pct_chg"]
|
||||
)
|
||||
self.period = period
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
# 计算N日波动率(时间序列操作)
|
||||
return group_df["pct_chg"].rolling_std(self.period).alias(self.factor_id)
|
||||
|
||||
|
||||
class MomentumFactor(StockWiseFactor):
|
||||
"""动量因子:过去N日累计收益率"""
|
||||
|
||||
def __init__(self, period: int = 20):
|
||||
super().__init__(
|
||||
name="momentum",
|
||||
parameters={"period": period},
|
||||
required_factor_ids=["pct_chg"]
|
||||
)
|
||||
self.period = period
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
# 计算N日累计动量(时间序列操作)
|
||||
return group_df["pct_chg"].rolling_sum(self.period).alias(self.factor_id)
|
||||
|
||||
class MomentumAcceleration(StockWiseFactor):
|
||||
"""
|
||||
动量加速因子:
|
||||
(短期波动率调整后动量 - 长期波动率调整后动量)
|
||||
用于捕捉趋势正在形成或加强的股票
|
||||
"""
|
||||
|
||||
def __init__(self, short_period: int = 20, long_period: int = 60):
|
||||
super().__init__(
|
||||
name="momentum_acceleration",
|
||||
parameters={"short_period": short_period, "long_period": long_period},
|
||||
required_factor_ids=["pct_chg"]
|
||||
)
|
||||
self.short_period = short_period
|
||||
self.long_period = long_period
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
epsilon = 1e-9
|
||||
|
||||
# 计算短期波动率调整后动量
|
||||
short_momentum = group_df["pct_chg"].rolling_sum(self.short_period)
|
||||
short_vol = group_df["pct_chg"].rolling_std(self.short_period)
|
||||
short_adj_momentum = short_momentum / (short_vol + epsilon)
|
||||
|
||||
# 计算长期波动率调整后动量
|
||||
long_momentum = group_df["pct_chg"].rolling_sum(self.long_period)
|
||||
long_vol = group_df["pct_chg"].rolling_std(self.long_period)
|
||||
long_adj_momentum = long_momentum / (long_vol + epsilon)
|
||||
|
||||
# 计算加速因子
|
||||
acceleration = (short_adj_momentum - long_adj_momentum).alias(self.factor_id)
|
||||
|
||||
return acceleration
|
||||
|
||||
|
||||
class TrendEfficiency(StockWiseFactor):
|
||||
"""
|
||||
趋势效率因子:
|
||||
过去N日价格净变化 / 过去N日每日价格变化的绝对值之和
|
||||
衡量趋势的信噪比,值越接近1,趋势越清晰、噪声越小
|
||||
"""
|
||||
|
||||
def __init__(self, period: int = 20):
|
||||
super().__init__(
|
||||
name="trend_efficiency",
|
||||
parameters={"period": period},
|
||||
# 此因子需要收盘价来计算
|
||||
required_factor_ids=["close"]
|
||||
)
|
||||
self.period = period
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
# 1. 计算N日内的净价格变动(信号)
|
||||
# 使用 diff(n) 计算当前价格与n天前价格的差值
|
||||
net_change = group_df["close"].diff(self.period).abs()
|
||||
|
||||
# 2. 计算N日内每日价格变动的绝对值之和(总路径/噪声)
|
||||
# 先计算每日变动 diff(1),取绝对值,再滚动求和
|
||||
total_path = group_df["close"].diff(1).abs().rolling_sum(self.period)
|
||||
|
||||
# 3. 计算效率比率
|
||||
epsilon = 1e-9
|
||||
efficiency_ratio = (net_change / (total_path + epsilon)).alias(self.factor_id)
|
||||
|
||||
return efficiency_ratio
|
||||
|
||||
# -------------------- 统一计算函数 --------------------
|
||||
def calculate_momentum_factors(df: pl.DataFrame) -> pl.DataFrame:
|
||||
"""
|
||||
统一计算动量因子的函数
|
||||
|
||||
Parameters:
|
||||
df (pl.DataFrame): 输入的股票数据表,必须包含以下列:
|
||||
ts_code, trade_date, close, pct_chg, high, low, vol
|
||||
|
||||
Returns:
|
||||
pl.DataFrame: 包含所有动量因子的DataFrame
|
||||
"""
|
||||
# 初始化结果DataFrame
|
||||
result_df = df.clone()
|
||||
|
||||
# 定义要计算的因子列表
|
||||
# 先计算股票截面因子(时间序列因子)
|
||||
stock_operators = [
|
||||
ReturnFactor(5),
|
||||
ReturnFactor(20),
|
||||
VolatilityFactor(10),
|
||||
VolatilityFactor(30),
|
||||
MomentumFactor(10),
|
||||
MomentumFactor(30),
|
||||
RSI_Factor(14)
|
||||
]
|
||||
|
||||
# 依次应用股票截面因子算子
|
||||
for operator in stock_operators:
|
||||
try:
|
||||
result_df = operator.apply(result_df)
|
||||
except Exception as e:
|
||||
print(f"计算股票截面因子 {operator.factor_id} 时出错: {e}")
|
||||
|
||||
# 再计算日期截面因子(横截面排序因子)
|
||||
date_operators = [
|
||||
CrossSectionalRanking("return_5d"),
|
||||
CrossSectionalRanking("return_20d"),
|
||||
CrossSectionalRanking("volatility_10d"),
|
||||
CrossSectionalRanking("momentum_10d")
|
||||
]
|
||||
|
||||
# 依次应用日期截面因子算子
|
||||
for operator in date_operators:
|
||||
try:
|
||||
result_df = operator.apply(result_df)
|
||||
except Exception as e:
|
||||
print(f"计算日期截面因子 {operator.factor_id} 时出错: {e}")
|
||||
|
||||
return result_df
|
||||
@@ -33,7 +33,7 @@ def holder_trade_factors(all_data_df: pd.DataFrame,
|
||||
# 或者如果 'in_de' 已经是 1 和 -1 (或类似数值),则可以跳过映射,但要确保类型正确
|
||||
stk_trade_processed_df['_direction'] = stk_trade_processed_df['in_de'].map(in_de_map)
|
||||
# 如果 _direction 列在映射后可能产生NaN (因为in_de中有未覆盖的值),需要处理
|
||||
if stk_trade_processed_df['_direction'].isnull().any():
|
||||
if stk_trade_processed_df['_direction'].is_null().any():
|
||||
print("警告: 'in_de' 列中存在未映射的值,可能导致 _direction 列出现NaN。")
|
||||
# 可以选择填充NaN,例如用0填充,或者移除这些行
|
||||
# stk_trade_processed_df['_direction'].fillna(0, inplace=True)
|
||||
@@ -109,4 +109,3 @@ def holder_trade_factors(all_data_df: pd.DataFrame,
|
||||
|
||||
print("股东增减持因子计算完成。")
|
||||
return df_merged
|
||||
|
||||
|
||||
348
main/factor/money_flow_factors.py
Normal file
348
main/factor/money_flow_factors.py
Normal file
@@ -0,0 +1,348 @@
|
||||
"""
|
||||
资金流因子模块
|
||||
包含基于股票截面的资金流因子实现
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import polars as pl
|
||||
from main.factor.operator_framework import StockWiseFactor
|
||||
|
||||
|
||||
class LGFlowFactor(StockWiseFactor):
|
||||
"""大单净买量因子"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
name="lg_flow",
|
||||
parameters={},
|
||||
required_factor_ids=["buy_lg_vol", "buy_elg_vol", "sell_lg_vol", "sell_elg_vol"]
|
||||
)
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
# 计算大单净买量
|
||||
buy_lg = group_df["buy_lg_vol"]
|
||||
buy_elg = group_df["buy_elg_vol"]
|
||||
sell_lg = group_df["sell_lg_vol"]
|
||||
sell_elg = group_df["sell_elg_vol"]
|
||||
|
||||
lg_net_flow = (buy_lg + buy_elg) - (sell_lg + sell_elg)
|
||||
return lg_net_flow.alias(self.factor_id)
|
||||
|
||||
|
||||
class FlowIntensityFactor(StockWiseFactor):
|
||||
"""资金流强度因子"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
name="flow_intensity",
|
||||
parameters={},
|
||||
required_factor_ids=["buy_lg_vol", "buy_elg_vol", "sell_lg_vol", "sell_elg_vol", "vol"]
|
||||
)
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
# 计算资金流强度
|
||||
buy_lg = group_df["buy_lg_vol"]
|
||||
buy_elg = group_df["buy_elg_vol"]
|
||||
sell_lg = group_df["sell_lg_vol"]
|
||||
sell_elg = group_df["sell_elg_vol"]
|
||||
vol = group_df["vol"]
|
||||
|
||||
lg_net_flow = (buy_lg + buy_elg) - (sell_lg + sell_elg)
|
||||
flow_intensity = lg_net_flow / (vol + 1e-8) # 避免除零
|
||||
|
||||
return flow_intensity.alias(self.factor_id)
|
||||
|
||||
|
||||
class FlowDivergenceFactor(StockWiseFactor):
|
||||
"""资金流背离因子"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
name="flow_divergence",
|
||||
parameters={},
|
||||
required_factor_ids=["buy_sm_vol", "sell_sm_vol", "buy_lg_vol", "buy_elg_vol", "sell_lg_vol", "sell_elg_vol"]
|
||||
)
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
# 计算资金流背离度
|
||||
buy_sm = group_df["buy_sm_vol"]
|
||||
sell_sm = group_df["sell_sm_vol"]
|
||||
buy_lg = group_df["buy_lg_vol"]
|
||||
buy_elg = group_df["buy_elg_vol"]
|
||||
sell_lg = group_df["sell_lg_vol"]
|
||||
sell_elg = group_df["sell_elg_vol"]
|
||||
|
||||
sm_net_flow = buy_sm - sell_sm
|
||||
lg_net_flow = (buy_lg + buy_elg) - (sell_lg + sell_elg)
|
||||
|
||||
flow_divergence = sm_net_flow - lg_net_flow
|
||||
return flow_divergence.alias(self.factor_id)
|
||||
|
||||
|
||||
class FlowStructureFactor(StockWiseFactor):
|
||||
"""资金流结构因子"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
name="flow_structure",
|
||||
parameters={},
|
||||
required_factor_ids=["buy_sm_vol", "buy_lg_vol", "buy_elg_vol", "vol"]
|
||||
)
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
# 计算资金流结构
|
||||
buy_sm = group_df["buy_sm_vol"]
|
||||
buy_lg = group_df["buy_lg_vol"]
|
||||
buy_elg = group_df["buy_elg_vol"]
|
||||
vol = group_df["vol"]
|
||||
|
||||
total_buy = buy_sm + buy_lg + buy_elg
|
||||
lg_elg_buy_prop = (buy_lg + buy_elg) / (total_buy + 1e-8) # 避免除零
|
||||
|
||||
flow_structure = lg_elg_buy_prop.diff().alias(self.factor_id)
|
||||
return flow_structure
|
||||
|
||||
|
||||
class FlowAccelerationFactor(StockWiseFactor):
|
||||
"""资金流加速度因子"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
name="flow_acceleration",
|
||||
parameters={},
|
||||
required_factor_ids=["buy_lg_vol", "buy_elg_vol", "sell_lg_vol", "sell_elg_vol"]
|
||||
)
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
# 计算资金流加速度
|
||||
buy_lg = group_df["buy_lg_vol"]
|
||||
buy_elg = group_df["buy_elg_vol"]
|
||||
sell_lg = group_df["sell_lg_vol"]
|
||||
sell_elg = group_df["sell_elg_vol"]
|
||||
|
||||
lg_net_flow = (buy_lg + buy_elg) - (sell_lg + sell_elg)
|
||||
lg_net_flow_change = lg_net_flow.diff()
|
||||
flow_acceleration = lg_net_flow_change.diff()
|
||||
|
||||
return flow_acceleration.alias(self.factor_id)
|
||||
|
||||
|
||||
class CostSqueeze(StockWiseFactor):
|
||||
factor_id = "factor_cost_squeeze"
|
||||
required_factor_ids = ["close", "cost_15pct", "cost_50pct", "cost_85pct", "vol"]
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
name=self.factor_id,
|
||||
parameters={},
|
||||
required_factor_ids=self.required_factor_ids
|
||||
)
|
||||
|
||||
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
|
||||
close = g["close"]
|
||||
cost15 = g["cost_15pct"]
|
||||
cost50 = g["cost_50pct"]
|
||||
cost85 = g["cost_85pct"]
|
||||
vol = g["vol"]
|
||||
|
||||
cost_range = cost85 - cost15
|
||||
median_cost = cost50
|
||||
price_pos = (close - median_cost) / (cost_range + 1e-6)
|
||||
|
||||
vol_5d = vol.rolling_mean(window_size=5, min_periods=1)
|
||||
vol_ratio = vol / (vol_5d + 1e-6)
|
||||
|
||||
# 核心逻辑:成本区间窄 + 价格居中 + 量能萎缩 → 高挤压度
|
||||
squeeze_score = (
|
||||
(1.0 / (cost_range / (close + 1e-6) + 1e-6))
|
||||
* (1.0 - price_pos.abs())
|
||||
* (1.0 / (vol_ratio + 1e-6))
|
||||
)
|
||||
|
||||
# 稳态化:对数变换
|
||||
factor = (squeeze_score + 1.0).log()
|
||||
return factor.alias(self.factor_id)
|
||||
|
||||
|
||||
class HighCostSelling(StockWiseFactor):
|
||||
factor_id = "factor_high_cost_selling"
|
||||
required_factor_ids = ["close", "cost_85pct", "buy_sm_vol", "sell_lg_vol", "vol"]
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
name=self.factor_id,
|
||||
parameters={},
|
||||
required_factor_ids=self.required_factor_ids
|
||||
)
|
||||
|
||||
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
|
||||
close = g["close"]
|
||||
cost85 = g["cost_85pct"]
|
||||
buy_sm_vol = g["buy_sm_vol"]
|
||||
sell_lg_vol = g["sell_lg_vol"]
|
||||
vol = g["vol"]
|
||||
|
||||
is_above_85 = (close > cost85).cast(pl.Float64)
|
||||
small_buy_ratio = buy_sm_vol / (vol + 1e-6)
|
||||
large_sell_ratio = sell_lg_vol / (vol + 1e-6)
|
||||
|
||||
hcsp = is_above_85 * small_buy_ratio * large_sell_ratio
|
||||
|
||||
# 稳态化:取对数(避免极端值)
|
||||
factor = (hcsp + 1e-6).log()
|
||||
return factor.alias(self.factor_id)
|
||||
|
||||
|
||||
class LowCostAccumulation(StockWiseFactor):
|
||||
factor_id = "factor_low_cost_accumulation"
|
||||
required_factor_ids = ["close", "his_low", "cost_15pct", "buy_lg_vol", "buy_elg_vol", "vol"]
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
name=self.factor_id,
|
||||
parameters={}, # 无参数,可扩展
|
||||
required_factor_ids=self.required_factor_ids
|
||||
)
|
||||
|
||||
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
|
||||
close = g["close"]
|
||||
his_low = g["his_low"]
|
||||
cost15 = g["cost_15pct"]
|
||||
buy_lg_vol = g["buy_lg_vol"]
|
||||
buy_elg_vol = g["buy_elg_vol"]
|
||||
vol = g["vol"]
|
||||
|
||||
is_below_15 = (close < cost15).cast(pl.Float64)
|
||||
|
||||
# 近5日最低价(含当日)
|
||||
rolling_min_5 = his_low.rolling_min(window_size=5, min_periods=1)
|
||||
# 注意:his_low 通常是历史最低,但这里我们用 close 的滚动最小更合理
|
||||
# 修正:应使用 close 的滚动最小判断是否新低
|
||||
close_rolling_min_5 = close.rolling_min(window_size=5, min_periods=1)
|
||||
not_new_low = (close >= close_rolling_min_5).cast(pl.Float64)
|
||||
|
||||
big_buy_vol = buy_lg_vol + buy_elg_vol
|
||||
big_buy_ratio = big_buy_vol / (vol + 1e-6)
|
||||
|
||||
lc_am = is_below_15 * not_new_low * big_buy_ratio
|
||||
|
||||
# 稳态化
|
||||
factor = (lc_am + 1e-6).log()
|
||||
return factor.alias(self.factor_id)
|
||||
|
||||
|
||||
|
||||
class InstNetAccum(StockWiseFactor):
|
||||
factor_id = "inst_net_accum"
|
||||
required_factor_ids = ["close", "buy_lg_vol", "buy_elg_vol", "sell_lg_vol", "sell_elg_vol", "circ_mv"]
|
||||
|
||||
def __init__(self):
|
||||
super(InstNetAccum, self).__init__(
|
||||
name=self.factor_id,
|
||||
parameters={},
|
||||
required_factor_ids=self.required_factor_ids
|
||||
)
|
||||
|
||||
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
|
||||
close = g["close"]
|
||||
buy_lg = g["buy_lg_vol"]
|
||||
buy_elg = g["buy_elg_vol"]
|
||||
sell_lg = g["sell_lg_vol"]
|
||||
sell_elg = g["sell_elg_vol"]
|
||||
circ_mv = g["circ_mv"]
|
||||
|
||||
big_net_vol = (buy_lg + buy_elg) - (sell_lg + sell_elg)
|
||||
circ_shares = circ_mv / (close + 1e-6)
|
||||
ina = big_net_vol / (circ_shares + 1e-6)
|
||||
|
||||
# 3日收益率(抑制大涨)
|
||||
ret3 = close / close.shift(3) - 1
|
||||
ret3 = ret3.fill_null(strategy="forward").fill_null(0.0)
|
||||
ina = pl.when(ret3.abs() < 0.05).then(ina).otherwise(0.0)
|
||||
|
||||
return ina.log1p().alias(self.factor_id)
|
||||
|
||||
class ChipLockin(StockWiseFactor):
|
||||
factor_id = "chip_lockin"
|
||||
required_factor_ids = ["cost_5pct", "cost_95pct", "winner_rate"]
|
||||
|
||||
def __init__(self):
|
||||
super(ChipLockin, self).__init__(
|
||||
name=self.factor_id,
|
||||
parameters={},
|
||||
required_factor_ids=self.required_factor_ids
|
||||
)
|
||||
|
||||
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
|
||||
cost5 = g["cost_5pct"]
|
||||
cost95 = g["cost_95pct"]
|
||||
winner_rate = g["winner_rate"]
|
||||
|
||||
cost_width = cost95 - cost5
|
||||
width_5d = cost_width.rolling_mean(window_size=5, min_periods=1)
|
||||
width_10d = cost_width.rolling_mean(window_size=10, min_periods=1)
|
||||
|
||||
# 避免除零
|
||||
width_contraction = (width_10d - width_5d) / (width_10d + 1e-6)
|
||||
winner_std_5 = winner_rate.rolling_std(window_size=5, min_periods=1).fill_null(1e-6)
|
||||
|
||||
clm = width_contraction * (1.0 / (winner_std_5 + 1e-6))
|
||||
return clm.log1p().alias(self.factor_id)
|
||||
|
||||
|
||||
class RetailOutInstIn(StockWiseFactor):
|
||||
factor_id = "retail_out_inst_in"
|
||||
required_factor_ids = ["close", "buy_sm_vol", "sell_sm_vol", "buy_lg_vol", "buy_elg_vol", "sell_lg_vol", "sell_elg_vol"]
|
||||
|
||||
def __init__(self):
|
||||
super(RetailOutInstIn, self).__init__(
|
||||
name=self.factor_id,
|
||||
parameters={},
|
||||
required_factor_ids=self.required_factor_ids
|
||||
)
|
||||
|
||||
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
|
||||
close = g["close"]
|
||||
buy_sm = g["buy_sm_vol"]
|
||||
sell_sm = g["sell_sm_vol"]
|
||||
buy_lg = g["buy_lg_vol"]
|
||||
buy_elg = g["buy_elg_vol"]
|
||||
sell_lg = g["sell_lg_vol"]
|
||||
sell_elg = g["sell_elg_vol"]
|
||||
|
||||
small_net_out = sell_sm - buy_sm
|
||||
big_net_in = (buy_lg + buy_elg) - (sell_lg + sell_elg)
|
||||
|
||||
# 价格抗跌:近5日未破位(用 close 自身滚动最小)
|
||||
close_min_5 = close.rolling_min(window_size=5, min_periods=1)
|
||||
|
||||
roii = small_net_out * big_net_in
|
||||
return roii.log1p().alias(self.factor_id)
|
||||
|
||||
|
||||
class AccumAccel(StockWiseFactor):
|
||||
factor_id = "accum_accel"
|
||||
required_factor_ids = ["buy_lg_vol", "buy_elg_vol", "sell_lg_vol", "sell_elg_vol", "vol"]
|
||||
|
||||
def __init__(self):
|
||||
super(AccumAccel, self).__init__(
|
||||
name=self.factor_id,
|
||||
parameters={},
|
||||
required_factor_ids=self.required_factor_ids
|
||||
)
|
||||
|
||||
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
|
||||
buy_lg = g["buy_lg_vol"]
|
||||
buy_elg = g["buy_elg_vol"]
|
||||
sell_lg = g["sell_lg_vol"]
|
||||
sell_elg = g["sell_elg_vol"]
|
||||
vol = g["vol"]
|
||||
|
||||
big_net_vol = (buy_lg + buy_elg) - (sell_lg + sell_elg)
|
||||
big_net_ratio = big_net_vol / (vol + 1e-6)
|
||||
|
||||
net_5d = big_net_ratio.rolling_mean(window_size=5, min_periods=1)
|
||||
net_5d_lag = net_5d.shift(5).fill_null(strategy="forward").fill_null(0.0)
|
||||
acceleration = net_5d - net_5d_lag
|
||||
|
||||
return acceleration.log1p().alias(self.factor_id)
|
||||
190
main/factor/operator_framework.py
Normal file
190
main/factor/operator_framework.py
Normal file
@@ -0,0 +1,190 @@
|
||||
"""
|
||||
因子算子框架 - Polars 实现(最终精简版)
|
||||
- 因子自行生成 ID
|
||||
- parameters 仅含计算参数(不含因子引用)
|
||||
- required_factor_ids 是因子ID字符串列表
|
||||
- calc_factor 通过 self.parameters 和 self.required_factor_ids 获取所需信息
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List, Literal, Dict, Any
|
||||
from collections import defaultdict, deque
|
||||
import json
|
||||
import polars as pl
|
||||
|
||||
|
||||
def _normalize_params(params: Dict[str, Any]) -> str:
|
||||
if not params:
|
||||
return ""
|
||||
return json.dumps(sorted(params.items()), separators=(",", ":"))
|
||||
|
||||
def _simple_factor_id(name: str, params: Dict[str, Any]) -> str:
|
||||
"""
|
||||
生成简洁因子ID,如:
|
||||
("sma", {"window": 5}) → "sma_5"
|
||||
("return", {"days": 20}) → "return_20"
|
||||
("rank", {"input": "sma_5"}) → "rank_sma_5"
|
||||
|
||||
要求: params 的值必须是简单类型(str/int/float/bool)
|
||||
"""
|
||||
if not params:
|
||||
return name
|
||||
|
||||
# 提取所有参数值,按 key 排序保证一致性
|
||||
parts = []
|
||||
for k in sorted(params.keys()):
|
||||
v = params[k]
|
||||
if isinstance(v, (str, int, float, bool)):
|
||||
# 布尔转小写字符串
|
||||
if isinstance(v, bool):
|
||||
v = str(v).lower()
|
||||
parts.append(str(v))
|
||||
else:
|
||||
raise ValueError(f"Unsupported parameter type for '{k}': {type(v)}. "
|
||||
f"Only str/int/float/bool allowed for simple ID.")
|
||||
|
||||
return f"{name}_{'_'.join(parts)}"
|
||||
|
||||
|
||||
class BaseFactor(ABC):
|
||||
def __init__(self, name: str, parameters: Dict[str, Any], required_factor_ids: List[str]):
|
||||
self.name = name
|
||||
self.parameters = parameters
|
||||
self.required_factor_ids = required_factor_ids
|
||||
self.factor_id = self._generate_factor_id()
|
||||
|
||||
def _generate_factor_id(self) -> str:
|
||||
return _simple_factor_id(self.name, self.parameters)
|
||||
|
||||
def get_factor_id(self) -> str:
|
||||
return self.factor_id
|
||||
|
||||
@abstractmethod
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
pass
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def operator_type(self) -> Literal["stock", "date"]:
|
||||
pass
|
||||
|
||||
|
||||
class StockWiseFactor(BaseFactor):
|
||||
@property
|
||||
def operator_type(self) -> Literal["stock"]:
|
||||
return "stock"
|
||||
|
||||
def _sectional_roll(self, df: pl.DataFrame) -> pl.DataFrame:
|
||||
df_sorted = df.sort(["ts_code", "trade_date"])
|
||||
result = (
|
||||
df_sorted
|
||||
.group_by("ts_code", maintain_order=True)
|
||||
.map_groups(lambda g: g.with_columns(self.calc_factor(g)))
|
||||
.select(["ts_code", "trade_date", self.factor_id])
|
||||
)
|
||||
return result
|
||||
|
||||
def apply(self, df: pl.DataFrame) -> pl.DataFrame:
|
||||
missing = [fid for fid in self.required_factor_ids if fid not in df.columns]
|
||||
if missing:
|
||||
raise ValueError(f"Missing dependencies for {self.factor_id}: {missing}")
|
||||
long_table = self._sectional_roll(df)
|
||||
return df.join(
|
||||
long_table.select(["ts_code", "trade_date", self.factor_id]),
|
||||
on=["ts_code", "trade_date"],
|
||||
how="left"
|
||||
)
|
||||
|
||||
|
||||
class DateWiseFactor(BaseFactor):
|
||||
@property
|
||||
def operator_type(self) -> Literal["date"]:
|
||||
return "date"
|
||||
|
||||
def _sectional_roll(self, df: pl.DataFrame) -> pl.DataFrame:
|
||||
df_sorted = df.sort(["trade_date", "ts_code"])
|
||||
result = (
|
||||
df_sorted
|
||||
.group_by("trade_date", maintain_order=True)
|
||||
.map_groups(lambda g: g.with_columns(self.calc_factor(g)))
|
||||
.select(["ts_code", "trade_date", self.factor_id])
|
||||
)
|
||||
return result
|
||||
|
||||
def apply(self, df: pl.DataFrame) -> pl.DataFrame:
|
||||
missing = [fid for fid in self.required_factor_ids if fid not in df.columns]
|
||||
if missing:
|
||||
raise ValueError(f"Missing dependencies for {self.factor_id}: {missing}")
|
||||
long_table = self._sectional_roll(df)
|
||||
return df.join(
|
||||
long_table.select(["ts_code", "trade_date", self.factor_id]),
|
||||
on=["ts_code", "trade_date"],
|
||||
how="left"
|
||||
)
|
||||
|
||||
|
||||
class FactorGraph:
|
||||
def __init__(self):
|
||||
self._factors = {} # factor_id -> factor
|
||||
|
||||
def add_factor(self, factor: BaseFactor):
|
||||
fid = factor.get_factor_id()
|
||||
if fid in self._factors:
|
||||
raise ValueError(f"Factor '{fid}' already registered.")
|
||||
self._factors[fid] = factor
|
||||
|
||||
def _topological_sort(self, target_ids: List[str]) -> List[str]:
|
||||
all_factors = set()
|
||||
queue = deque(target_ids)
|
||||
while queue:
|
||||
f = queue.popleft()
|
||||
if f not in all_factors:
|
||||
all_factors.add(f)
|
||||
if f in self._factors:
|
||||
for dep in self._factors[f].required_factor_ids:
|
||||
if dep not in all_factors:
|
||||
queue.append(dep)
|
||||
|
||||
to_compute = {f for f in all_factors if f in self._factors}
|
||||
indegree = {f: 0 for f in to_compute}
|
||||
adj = defaultdict(list)
|
||||
|
||||
for f in to_compute:
|
||||
for dep in self._factors[f].required_factor_ids:
|
||||
if dep in to_compute:
|
||||
adj[dep].append(f)
|
||||
indegree[f] += 1
|
||||
|
||||
queue = deque([f for f in to_compute if indegree[f] == 0])
|
||||
order = []
|
||||
while queue:
|
||||
node = queue.popleft()
|
||||
order.append(node)
|
||||
for nb in adj[node]:
|
||||
indegree[nb] -= 1
|
||||
if indegree[nb] == 0:
|
||||
queue.append(nb)
|
||||
|
||||
print("\n=== Factor Dependency Graph ===")
|
||||
to_compute = {f for f in all_factors if f in self._factors}
|
||||
for fid in sorted(to_compute):
|
||||
deps = self._factors[fid].required_factor_ids
|
||||
compute_deps = [d for d in deps if d in to_compute] # 只显示可计算的依赖
|
||||
print(f"{fid} -> {compute_deps}")
|
||||
print("================================\n")
|
||||
|
||||
if len(order) != len(to_compute):
|
||||
print(len(order), len(to_compute))
|
||||
raise RuntimeError("Circular dependency!")
|
||||
return order
|
||||
|
||||
def compute(self, df: pl.DataFrame, target_factor_ids: List[str]) -> pl.DataFrame:
|
||||
exec_order = self._topological_sort(target_factor_ids)
|
||||
current_df = df.clone()
|
||||
for fid in exec_order:
|
||||
print(fid)
|
||||
if fid in current_df.columns:
|
||||
continue
|
||||
factor = self._factors[fid]
|
||||
current_df = factor.apply(current_df)
|
||||
return current_df
|
||||
86
main/factor/qlib_utils.py
Normal file
86
main/factor/qlib_utils.py
Normal file
@@ -0,0 +1,86 @@
|
||||
import polars as pl
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import qlib
|
||||
from qlib.data.dataset.handler import DataHandlerLP
|
||||
from qlib.contrib.report import analysis_model, analysis_position
|
||||
from qlib.constant import REG_CN
|
||||
from typing import List
|
||||
|
||||
import polars as pl
|
||||
import pandas as pd
|
||||
|
||||
def prepare_data(
|
||||
polars_df: pl.DataFrame,
|
||||
label_horizon: int = 5,
|
||||
open_col: str = "open",
|
||||
date_col: str = "trade_date",
|
||||
code_col: str = "ts_code",
|
||||
) -> pd.DataFrame:
|
||||
required = [date_col, code_col, open_col]
|
||||
missing = [col for col in required if col not in polars_df.columns]
|
||||
if missing:
|
||||
raise ValueError(f"Missing columns: {missing}")
|
||||
|
||||
df = polars_df.sort([code_col, date_col])
|
||||
|
||||
# 获取 T+1 日的开盘价(作为买入价)
|
||||
df = df.with_columns([
|
||||
pl.col(open_col).shift(-1).over(code_col).alias("__buy_price"),
|
||||
pl.col(open_col).shift(-(1 + label_horizon)).over(code_col).alias("__sell_price"),
|
||||
]).with_columns([
|
||||
(pl.col("__sell_price") / pl.col("__buy_price") - 1).alias("label")
|
||||
]).drop(["__buy_price", "__sell_price"])
|
||||
|
||||
# 转 pandas
|
||||
df = df.to_pandas()
|
||||
df.rename(columns={date_col: "datetime", code_col: "instrument"}, inplace=True)
|
||||
df["datetime"] = pd.to_datetime(df["datetime"])
|
||||
df.set_index(["datetime", "instrument"], inplace=True)
|
||||
df.sort_index(inplace=True)
|
||||
|
||||
return df
|
||||
|
||||
# 2. Qlib初始化
|
||||
def initialize_qlib():
|
||||
"""
|
||||
在内存模式下初始化Qlib。
|
||||
由于我们直接从DataFrame加载数据,provider_uri可以指向一个虚拟或空路径。
|
||||
"""
|
||||
# provider_uri设置为一个虚拟路径,因为所有数据将从内存加载
|
||||
# region设置为REG_CN表示使用中国A股的交易日历和交易成本设置
|
||||
qlib.init(provider_uri="/mnt/d/PyProject/NewStock/data/qlib", region=REG_CN, freq="day")
|
||||
print("Qlib has been initialized in memory mode.")
|
||||
|
||||
|
||||
import pandas as pd
|
||||
import lightgbm as lgb
|
||||
from qlib.workflow import R
|
||||
from qlib.workflow.record_temp import PortAnaRecord # SignalRecord 在此场景下未被直接使用
|
||||
|
||||
def train_and_backtest_from_df(
|
||||
df: pd.DataFrame,
|
||||
all_features: list,
|
||||
label_col: str = "label",
|
||||
topk: int = 50,
|
||||
start_train: str = "2019-01-01",
|
||||
end_train: str = "2021-12-31",
|
||||
start_valid: str = "2022-01-01",
|
||||
end_valid: str = "2022-12-31",
|
||||
start_test: str = "2023-01-01",
|
||||
end_test: str = "2023-12-31",
|
||||
):
|
||||
"""
|
||||
直接从准备好的 pandas DataFrame 训练模型并运行回测。
|
||||
"""
|
||||
# === 1. 手动准备数据 ===
|
||||
if not isinstance(df.index, pd.MultiIndex):
|
||||
raise ValueError("df 必须是 MultiIndex (datetime, instrument)")
|
||||
df.index = df.index.set_names(["datetime", "instrument"])
|
||||
df.index = df.index.set_levels(pd.to_datetime(df.index.levels[0]), level='datetime')
|
||||
df.sort_index(inplace=True)
|
||||
|
||||
dh = DataHandlerLP.from_df(df)
|
||||
print(dh.fetch())
|
||||
print(dh._infer)
|
||||
print(dh._learn)
|
||||
156
main/factor/select_factor.py
Normal file
156
main/factor/select_factor.py
Normal file
@@ -0,0 +1,156 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from scipy.stats import spearmanr
|
||||
from sklearn.ensemble import RandomForestRegressor
|
||||
from sklearn.inspection import permutation_importance
|
||||
|
||||
def select_factors(
|
||||
df,
|
||||
all_features,
|
||||
label_col='label',
|
||||
ic_threshold=0.01,
|
||||
corr_threshold=0.5,
|
||||
ir_threshold=0.3,
|
||||
sign_consistency_threshold=0.3,
|
||||
perm_imp_threshold=0.0,
|
||||
n_perm=5,
|
||||
random_state=42,
|
||||
verbose=True # 新增:是否打印每步日志
|
||||
):
|
||||
"""
|
||||
因子筛选主函数(带详细过滤日志)
|
||||
"""
|
||||
log = {} # 记录每步数量
|
||||
if verbose:
|
||||
print(f"🔍 开始因子筛选 | 初始因子数: {len(all_features)}")
|
||||
|
||||
# --- Step 0: 展平 ---
|
||||
needed_cols = all_features + [label_col]
|
||||
df_flat = df[needed_cols].reset_index()
|
||||
X = df_flat[all_features]
|
||||
y = df_flat[label_col]
|
||||
|
||||
# --- Step 1: 单因子 IC 筛选 ---
|
||||
ic_series = X.apply(lambda col: spearmanr(col, y, nan_policy='omit')[0])
|
||||
valid_features = ic_series[ic_series.abs() >= ic_threshold].index.tolist()
|
||||
log['after_univariate'] = len(valid_features)
|
||||
if verbose:
|
||||
dropped = len(all_features) - len(valid_features)
|
||||
print(f" ✅ 单变量筛选 (|IC| ≥ {ic_threshold}) → 保留 {len(valid_features)} 个 (+{dropped} 被过滤)")
|
||||
|
||||
if not valid_features:
|
||||
return [], log
|
||||
|
||||
del X
|
||||
X_valid = df_flat[valid_features]
|
||||
|
||||
# --- Step 2: 去冗余 ---
|
||||
corr_mat = X_valid.corr(method='spearman').abs()
|
||||
selected = []
|
||||
for f in valid_features:
|
||||
if not selected:
|
||||
selected.append(f)
|
||||
else:
|
||||
max_corr = corr_mat.loc[f, selected].max()
|
||||
if max_corr < corr_threshold:
|
||||
selected.append(f)
|
||||
else:
|
||||
existing = corr_mat.loc[f, selected].idxmax()
|
||||
if abs(ic_series[f]) > abs(ic_series[existing]):
|
||||
selected.remove(existing)
|
||||
selected.append(f)
|
||||
del corr_mat, X_valid
|
||||
|
||||
log['after_redundancy'] = len(selected)
|
||||
if verbose:
|
||||
dropped = len(valid_features) - len(selected)
|
||||
print(f" 🔗 去冗余 (corr < {corr_threshold}) → 保留 {len(selected)} 个 (+{dropped} 被过滤)")
|
||||
|
||||
if not selected:
|
||||
return [], log
|
||||
|
||||
# --- Step 3: Permutation Importance ---
|
||||
X_sel = df_flat[selected]
|
||||
model = RandomForestRegressor(
|
||||
n_estimators=50,
|
||||
max_depth=10,
|
||||
random_state=random_state,
|
||||
n_jobs=-1
|
||||
)
|
||||
model.fit(X_sel, y)
|
||||
|
||||
perm_result = permutation_importance(
|
||||
model, X_sel, y,
|
||||
n_repeats=n_perm,
|
||||
random_state=random_state,
|
||||
n_jobs=-1
|
||||
)
|
||||
perm_imp = pd.Series(perm_result.importances_mean, index=selected)
|
||||
candidates = perm_imp[perm_imp > perm_imp_threshold].index.tolist()
|
||||
del model, perm_result, X_sel
|
||||
|
||||
# 如果全被过滤,回退到 selected
|
||||
if not candidates:
|
||||
candidates = selected
|
||||
if verbose:
|
||||
print(" ⚠️ Permutation 全过滤,回退到去冗余结果")
|
||||
log['after_permutation'] = len(candidates)
|
||||
if verbose and len(candidates) != len(selected):
|
||||
dropped = len(selected) - len(candidates)
|
||||
print(f" 📊 Permutation Importance (> {perm_imp_threshold}) → 保留 {len(candidates)} 个 (+{dropped} 被过滤)")
|
||||
|
||||
# --- Step 4: 时序稳定性验证 ---
|
||||
grouped = df_flat.groupby('datetime')
|
||||
ic_records = []
|
||||
for date, group in grouped:
|
||||
if len(group) < 10:
|
||||
continue
|
||||
row = {'datetime': date}
|
||||
for f in candidates:
|
||||
try:
|
||||
ic, _ = spearmanr(group[f], group[label_col], nan_policy='omit')
|
||||
row[f] = ic if np.isfinite(ic) else 0.0
|
||||
except:
|
||||
row[f] = 0.0
|
||||
ic_records.append(row)
|
||||
|
||||
if not ic_records:
|
||||
log['final'] = len(candidates)
|
||||
if verbose:
|
||||
print(" ⏳ 无足够时间窗口,跳过稳定性验证")
|
||||
return candidates, log
|
||||
|
||||
ic_df = pd.DataFrame(ic_records).set_index('datetime')
|
||||
del ic_records
|
||||
|
||||
mean_ic = ic_df.mean()
|
||||
std_ic = ic_df.std().replace(0, np.nan)
|
||||
ir = mean_ic / std_ic
|
||||
sign_consistency = (ic_df > 0).mean()
|
||||
|
||||
stable_mask = (
|
||||
ir.abs() >= ir_threshold
|
||||
) & (
|
||||
(sign_consistency >= sign_consistency_threshold) |
|
||||
(sign_consistency <= 1 - sign_consistency_threshold)
|
||||
)
|
||||
final_features = stable_mask[stable_mask].index.tolist()
|
||||
|
||||
if not final_features:
|
||||
final_features = candidates
|
||||
if verbose:
|
||||
print(" ⚠️ 稳定性全过滤,回退到 Permutation 结果")
|
||||
|
||||
log['final'] = len(final_features)
|
||||
if verbose and len(final_features) != len(candidates):
|
||||
dropped = len(candidates) - len(final_features)
|
||||
print(f" 🕰️ 稳定性验证 (IR ≥ {ir_threshold}, 符号一致性 ≥ {sign_consistency_threshold}) → 保留 {len(final_features)} 个 (+{dropped} 被过滤)")
|
||||
|
||||
del df_flat, ic_df, mean_ic, std_ic, ir, sign_consistency
|
||||
|
||||
if verbose:
|
||||
print(f"🎯 最终因子数: {len(final_features)}")
|
||||
if len(final_features) <= 5:
|
||||
print("💡 提示: 因子过少,建议降低 ic_threshold 或 corr_threshold")
|
||||
|
||||
return final_features, log
|
||||
259
main/factor/sentiment_factors.py
Normal file
259
main/factor/sentiment_factors.py
Normal file
@@ -0,0 +1,259 @@
|
||||
"""
|
||||
市场情绪因子模块
|
||||
包含基于股票截面的市场情绪因子实现
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import polars as pl
|
||||
import talib
|
||||
from main.factor.operator_framework import DateWiseFactor, StockWiseFactor
|
||||
|
||||
|
||||
class SentimentPanicGreedFactor(StockWiseFactor):
|
||||
"""恐慌/贪婪指数因子"""
|
||||
|
||||
def __init__(self, window_atr: int = 14, window_smooth: int = 5):
|
||||
super().__init__(
|
||||
name="sentiment_panic_greed",
|
||||
parameters={"window_atr": window_atr, "window_smooth": window_smooth},
|
||||
required_factor_ids=["open", "high", "low", "close", "vol", "pct_chg"]
|
||||
)
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
# 使用talib计算ATR
|
||||
close_array = group_df["close"].to_numpy()
|
||||
high_array = group_df["high"].to_numpy()
|
||||
low_array = group_df["low"].to_numpy()
|
||||
prev_close = group_df["close"].shift(1).to_numpy()
|
||||
|
||||
window_atr = self.parameters["window_atr"]
|
||||
window_smooth = self.parameters["window_smooth"]
|
||||
|
||||
# 计算ATR
|
||||
atr_values = talib.ATR(high_array, low_array, close_array, timeperiod=window_atr)
|
||||
|
||||
# 计算真实波幅和波动性意外
|
||||
tr = np.maximum(high_array - low_array,
|
||||
np.abs(high_array - prev_close),
|
||||
np.abs(low_array - prev_close))
|
||||
|
||||
volatility_surprise = (tr / (atr_values + 1e-8) - 1) * group_df["pct_chg"].to_numpy()
|
||||
|
||||
# 计算情绪指数
|
||||
sentiment = volatility_surprise * 2 # 放大跳空影响
|
||||
|
||||
# 平滑处理
|
||||
smoothed_sentiment = talib.SMA(sentiment, timeperiod=window_smooth)
|
||||
|
||||
return pl.Series(smoothed_sentiment).alias(self.factor_id)
|
||||
|
||||
|
||||
class SentimentBreadthFactor(StockWiseFactor):
|
||||
"""市场宽度情绪代理因子"""
|
||||
|
||||
def __init__(self, window_vol: int = 20, window_smooth: int = 3):
|
||||
super().__init__(
|
||||
name="sentiment_breadth",
|
||||
parameters={"window_vol": window_vol, "window_smooth": window_smooth},
|
||||
required_factor_ids=["pct_chg", "vol"]
|
||||
)
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
window_vol = self.parameters["window_vol"]
|
||||
window_smooth = self.parameters["window_smooth"]
|
||||
|
||||
# 计算滚动平均成交量
|
||||
vol = group_df["vol"].to_numpy()
|
||||
rolling_avg_vol = talib.SMA(vol, timeperiod=window_vol)
|
||||
|
||||
# 计算价量配合度
|
||||
pct_chg = group_df["pct_chg"].to_numpy()
|
||||
breadth = pct_chg * (vol / (rolling_avg_vol + 1e-8))
|
||||
|
||||
# 平滑处理
|
||||
smoothed_breadth = talib.SMA(breadth, timeperiod=window_smooth)
|
||||
|
||||
return pl.Series(smoothed_breadth).alias(self.factor_id)
|
||||
|
||||
|
||||
class SentimentReversalFactor(StockWiseFactor):
|
||||
"""情绪反转因子"""
|
||||
|
||||
def __init__(self, window_ret: int = 5, window_vol: int = 5):
|
||||
super().__init__(
|
||||
name="sentiment_reversal",
|
||||
parameters={"window_ret": window_ret, "window_vol": window_vol},
|
||||
required_factor_ids=["pct_chg"]
|
||||
)
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
window_ret = self.parameters["window_ret"]
|
||||
window_vol = self.parameters["window_vol"]
|
||||
|
||||
# 计算累积收益率
|
||||
pct_chg = group_df["pct_chg"].to_numpy()
|
||||
return_period = window_ret
|
||||
cum_return = np.array([np.prod(1 + pct_chg[i:i+return_period]) - 1
|
||||
for i in range(len(pct_chg) - return_period + 1)])
|
||||
cum_return = np.pad(cum_return, (return_period - 1, 0), constant_values=np.nan)
|
||||
|
||||
# 计算波动率
|
||||
volatility = talib.STDDEV(pct_chg, timeperiod=window_vol)
|
||||
|
||||
# 计算反转因子
|
||||
reversal = -cum_return * volatility
|
||||
|
||||
return pl.Series(reversal).alias(self.factor_id)
|
||||
|
||||
|
||||
class PriceDeductionFactor(StockWiseFactor):
|
||||
"""价格抵扣因子"""
|
||||
|
||||
def __init__(self, n: int = 10):
|
||||
super().__init__(
|
||||
name="price_deduction",
|
||||
parameters={"n": n},
|
||||
required_factor_ids=["close"]
|
||||
)
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
n = self.parameters["n"]
|
||||
|
||||
# 计算抵扣价(n-1周期前的价格)
|
||||
deduction_price = group_df["close"].shift(n - 1)
|
||||
price_diff = group_df["close"] - deduction_price
|
||||
|
||||
return price_diff.alias(self.factor_id)
|
||||
|
||||
|
||||
class PriceDeductionRatioFactor(StockWiseFactor):
|
||||
"""价格抵扣比例因子"""
|
||||
|
||||
def __init__(self, n: int = 10):
|
||||
super().__init__(
|
||||
name="price_deduction_ratio",
|
||||
parameters={"n": n},
|
||||
required_factor_ids=["close"]
|
||||
)
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
n = self.parameters["n"]
|
||||
|
||||
# 计算N周期SMA
|
||||
sma = group_df["close"].rolling_mean(n)
|
||||
|
||||
# 计算抵扣价
|
||||
deduction_price = group_df["close"].shift(n - 1)
|
||||
|
||||
# 计算比例
|
||||
diff = group_df["close"] - deduction_price
|
||||
ratio = diff / (sma + 1e-8) # 避免除零
|
||||
|
||||
return ratio.alias(self.factor_id)
|
||||
|
||||
|
||||
class IndustryMomentumLeadership(StockWiseFactor):
|
||||
factor_id = "industry_momentum_leadership"
|
||||
required_factor_ids = [
|
||||
"industry_return_5_percentile",
|
||||
"industry_return_20_percentile",
|
||||
"roe"
|
||||
]
|
||||
|
||||
def __init__(self):
|
||||
super(IndustryMomentumLeadership, self).__init__(
|
||||
name=self.factor_id,
|
||||
parameters={},
|
||||
required_factor_ids=self.required_factor_ids
|
||||
)
|
||||
|
||||
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
|
||||
pct5 = g["industry_return_5_percentile"]
|
||||
pct20 = g["industry_return_20_percentile"]
|
||||
roe = g["roe"]
|
||||
|
||||
# 动量综合:5日权重更高(短期龙头)
|
||||
momentum_score = 0.7 * pct5 + 0.3 * pct20
|
||||
|
||||
# 基本面质量:ROE 越高越好,取 log1p 防极端值
|
||||
quality_score = pl.when(roe > 0).then(roe.log1p()).otherwise(0.0)
|
||||
|
||||
# 龙头得分 = 动量 × 基本面
|
||||
leadership = momentum_score * (quality_score + 1.0)
|
||||
|
||||
return leadership.alias(self.factor_id)
|
||||
|
||||
|
||||
class LeadershipPersistenceScore(StockWiseFactor):
|
||||
factor_id = "leadership_persistence_score"
|
||||
required_factor_ids = [
|
||||
"industry_return_5_percentile",
|
||||
"industry_return_20_percentile",
|
||||
"undist_profit_ps",
|
||||
"roe",
|
||||
"bps"
|
||||
]
|
||||
|
||||
def __init__(self):
|
||||
super(LeadershipPersistenceScore, self).__init__(
|
||||
name=self.factor_id,
|
||||
parameters={},
|
||||
required_factor_ids=self.required_factor_ids
|
||||
)
|
||||
|
||||
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
|
||||
pct5 = g["industry_return_5_percentile"]
|
||||
pct20 = g["industry_return_20_percentile"]
|
||||
undist = g["undist_profit_ps"]
|
||||
roe = g["roe"]
|
||||
bps = g["bps"]
|
||||
|
||||
momentum = 0.6 * pct5 + 0.4 * pct20
|
||||
|
||||
# 基本面质量(全部取 log1p 处理)
|
||||
quality = (
|
||||
pl.when(undist > 0).then(undist.log1p()).otherwise(0.0) +
|
||||
pl.when(roe > 0).then(roe.log1p()).otherwise(0.0) +
|
||||
pl.when(bps > 0).then(bps.log1p()).otherwise(0.0)
|
||||
)
|
||||
|
||||
score = momentum * (quality + 1.0)
|
||||
return score.alias(self.factor_id)
|
||||
|
||||
|
||||
class DynamicIndustryLeadership(DateWiseFactor):
|
||||
factor_id = "dynamic_industry_leadership"
|
||||
required_factor_ids = ["l2_code", "return_5", "lg_flow", "turnover_rate"]
|
||||
|
||||
def __init__(self):
|
||||
super(DynamicIndustryLeadership, self).__init__(
|
||||
name=self.factor_id,
|
||||
parameters={},
|
||||
required_factor_ids=self.required_factor_ids
|
||||
)
|
||||
|
||||
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
|
||||
# 使用窗口函数:按 industry 分组计算 z-score
|
||||
mom = pl.col("return_5")
|
||||
flow = pl.col("lg_flow")
|
||||
turn = pl.col("turnover_rate").log1p()
|
||||
|
||||
# 行业内均值和标准差
|
||||
mom_mean = mom.mean().over("l2_code")
|
||||
mom_std = mom.std().over("l2_code")
|
||||
flow_mean = flow.mean().over("l2_code")
|
||||
flow_std = flow.std().over("l2_code")
|
||||
turn_mean = turn.mean().over("l2_code")
|
||||
turn_std = turn.std().over("l2_code")
|
||||
|
||||
# 安全 z-score:避免 std=0
|
||||
mom_z = pl.when(mom_std > 1e-8).then((mom - mom_mean) / mom_std).otherwise(0.0)
|
||||
flow_z = pl.when(flow_std > 1e-8).then((flow - flow_mean) / flow_std).otherwise(0.0)
|
||||
turn_z = pl.when(turn_std > 1e-8).then((turn - turn_mean) / turn_std).otherwise(0.0)
|
||||
|
||||
# 合成因子
|
||||
leadership = mom_z + flow_z + turn_z
|
||||
|
||||
# 执行表达式并返回 Series
|
||||
result = g.select(leadership.alias(self.factor_id))
|
||||
return result.to_series()
|
||||
134
main/factor/special_factors.py
Normal file
134
main/factor/special_factors.py
Normal file
@@ -0,0 +1,134 @@
|
||||
"""
|
||||
特殊因子模块
|
||||
包含基于股票截面的特殊因子实现
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import polars as pl
|
||||
from main.factor.operator_framework import StockWiseFactor
|
||||
|
||||
|
||||
class LimitFactor(StockWiseFactor):
|
||||
"""涨跌停因子"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
name="limit",
|
||||
parameters={},
|
||||
required_factor_ids=["close", "up_limit", "down_limit"]
|
||||
)
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
# 计算是否涨停或跌停
|
||||
close = group_df["close"]
|
||||
up_limit = group_df["up_limit"]
|
||||
down_limit = group_df["down_limit"]
|
||||
|
||||
# 是否涨停
|
||||
is_up_limit = (close == up_limit).cast(pl.Int32)
|
||||
# 是否跌停
|
||||
is_down_limit = (close == down_limit).cast(pl.Int32)
|
||||
|
||||
# 合并为一个因子
|
||||
limit_factor = is_up_limit - is_down_limit
|
||||
return limit_factor.alias(self.factor_id)
|
||||
|
||||
|
||||
class VolumeRatioFactor(StockWiseFactor):
|
||||
"""量比因子"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
name="volume_ratio",
|
||||
parameters={},
|
||||
required_factor_ids=["vol"]
|
||||
)
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
# 计算量比:当日成交量 / 5日平均成交量
|
||||
vol = group_df["vol"]
|
||||
avg_vol_5d = vol.rolling_mean(5)
|
||||
# 避免除零
|
||||
volume_ratio = vol / (avg_vol_5d + 1e-8)
|
||||
return volume_ratio.alias(self.factor_id)
|
||||
|
||||
|
||||
class BBI_RATIO_FACTOR(StockWiseFactor):
|
||||
"""BBI比率因子"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
name="bbi_ratio",
|
||||
parameters={},
|
||||
required_factor_ids=["close"]
|
||||
)
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
# 计算BBI比率
|
||||
close = group_df["close"]
|
||||
|
||||
# 计算不同周期的SMA
|
||||
sma3 = close.rolling_mean(3)
|
||||
sma6 = close.rolling_mean(6)
|
||||
sma12 = close.rolling_mean(12)
|
||||
sma24 = close.rolling_mean(24)
|
||||
|
||||
# 计算BBI
|
||||
bbi = (sma3 + sma6 + sma12 + sma24) / 4
|
||||
|
||||
# 计算BBI比率
|
||||
bbi_ratio = bbi / (close + 1e-8) # 避免除零
|
||||
|
||||
return bbi_ratio.alias(self.factor_id)
|
||||
|
||||
|
||||
class VolatilitySlopeFactor(StockWiseFactor):
|
||||
"""波动率斜率因子"""
|
||||
|
||||
def __init__(self, window_vol: int = 20, window_slope: int = 5):
|
||||
super().__init__(
|
||||
name="volatility_slope",
|
||||
parameters={"window_vol": window_vol, "window_slope": window_slope},
|
||||
required_factor_ids=["pct_chg"]
|
||||
)
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
window_vol = self.parameters["window_vol"]
|
||||
window_slope = self.parameters["window_slope"]
|
||||
|
||||
# 计算滚动标准差
|
||||
volatility = group_df["pct_chg"].rolling_std(window_vol)
|
||||
|
||||
# 计算斜率
|
||||
# 这里简化处理,直接计算最后一个窗口的斜率
|
||||
# 实际应用中可能需要更复杂的线性回归计算
|
||||
volatility_slope = volatility.diff().alias(self.factor_id)
|
||||
|
||||
return volatility_slope
|
||||
|
||||
|
||||
class PriceVolumeTrendFactor(StockWiseFactor):
|
||||
"""价格量能趋势因子"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
name="price_volume_trend",
|
||||
parameters={},
|
||||
required_factor_ids=["close", "vol"]
|
||||
)
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
# 计算价格量能趋势
|
||||
close = group_df["close"]
|
||||
vol = group_df["vol"]
|
||||
|
||||
# 计算价格变化
|
||||
price_change = close.diff()
|
||||
|
||||
# 计算成交量变化
|
||||
vol_change = vol.diff()
|
||||
|
||||
# 计算趋势因子
|
||||
trend_factor = price_change * vol_change
|
||||
|
||||
return trend_factor.alias(self.factor_id)
|
||||
144
main/factor/technical_factors.py
Normal file
144
main/factor/technical_factors.py
Normal file
@@ -0,0 +1,144 @@
|
||||
"""
|
||||
技术指标因子模块
|
||||
包含基于股票截面的技术指标因子实现
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import polars as pl
|
||||
import talib
|
||||
from main.factor.operator_framework import DateWiseFactor, StockWiseFactor
|
||||
|
||||
|
||||
class SMAFactor(StockWiseFactor):
|
||||
"""简单移动平均线因子"""
|
||||
|
||||
def __init__(self, window: int):
|
||||
super().__init__(
|
||||
name="SMA",
|
||||
parameters={"window": window}, # ← 只放数值参数
|
||||
required_factor_ids=["close"] # ← 依赖原始列
|
||||
)
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
window = self.parameters["window"] # ← 直接从 self.parameters 取
|
||||
return group_df["close"].rolling_mean(window_size=window).alias(self.factor_id)
|
||||
|
||||
|
||||
class EMAFactor(StockWiseFactor):
|
||||
"""指数移动平均线因子"""
|
||||
|
||||
def __init__(self, window: int):
|
||||
super().__init__(
|
||||
name="EMA",
|
||||
parameters={"window": window},
|
||||
required_factor_ids=["close"]
|
||||
)
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
window = self.parameters["window"]
|
||||
return group_df["close"].ewm_mean(span=window).alias(self.factor_id)
|
||||
|
||||
|
||||
class ATRFactor(StockWiseFactor):
|
||||
"""平均真实波幅因子"""
|
||||
|
||||
def __init__(self, window: int):
|
||||
super().__init__(
|
||||
name="ATR",
|
||||
parameters={"window": window},
|
||||
required_factor_ids=["high", "low", "close"]
|
||||
)
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
window = self.parameters["window"]
|
||||
# 使用talib计算ATR
|
||||
close_array = group_df["close"].to_numpy()
|
||||
high_array = group_df["high"].to_numpy()
|
||||
low_array = group_df["low"].to_numpy()
|
||||
|
||||
atr_values = talib.ATR(high_array, low_array, close_array, timeperiod=window)
|
||||
return pl.Series(atr_values).alias(self.factor_id)
|
||||
|
||||
|
||||
class OBVFactor(StockWiseFactor):
|
||||
"""能量潮指标因子"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
name="OBV",
|
||||
parameters={},
|
||||
required_factor_ids=["close", "vol"]
|
||||
)
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
# 使用talib计算OBV
|
||||
close_array = group_df["close"].to_numpy()
|
||||
vol_array = group_df["vol"].to_numpy()
|
||||
|
||||
obv_values = talib.OBV(close_array, vol_array)
|
||||
return pl.Series(obv_values).alias(self.factor_id)
|
||||
|
||||
|
||||
class MACDFactor(StockWiseFactor):
|
||||
"""MACD指标因子"""
|
||||
|
||||
def __init__(self, fast_period: int = 12, slow_period: int = 26, signal_period: int = 9):
|
||||
super().__init__(
|
||||
name="MACD",
|
||||
parameters={"fast_period": fast_period, "slow_period": slow_period, "signal_period": signal_period},
|
||||
required_factor_ids=["close"]
|
||||
)
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
fast_period = self.parameters["fast_period"]
|
||||
slow_period = self.parameters["slow_period"]
|
||||
signal_period = self.parameters["signal_period"]
|
||||
|
||||
# 使用talib计算MACD
|
||||
close_array = group_df["close"].to_numpy()
|
||||
|
||||
macd, macd_signal, macd_hist = talib.MACD(close_array,
|
||||
fastperiod=fast_period,
|
||||
slowperiod=slow_period,
|
||||
signalperiod=signal_period)
|
||||
|
||||
# 返回MACD线值
|
||||
return pl.Series(macd).alias(self.factor_id)
|
||||
|
||||
|
||||
class RSI_Factor(StockWiseFactor):
|
||||
"""RSI相对强弱指数因子"""
|
||||
|
||||
def __init__(self, window: int = 14):
|
||||
super().__init__(
|
||||
name="RSI",
|
||||
parameters={"window": window},
|
||||
required_factor_ids=["close"]
|
||||
)
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
window = self.parameters["window"]
|
||||
# 使用talib计算RSI
|
||||
close_array = group_df["close"].to_numpy()
|
||||
|
||||
rsi_values = talib.RSI(close_array, timeperiod=window)
|
||||
return pl.Series(rsi_values).alias(self.factor_id)
|
||||
|
||||
|
||||
|
||||
class CrossSectionalRankFactor(DateWiseFactor):
|
||||
def __init__(self, column: str, name: str = None, ascending: bool = True):
|
||||
self.target_column = column
|
||||
self.ascending = ascending
|
||||
factor_name = name or f"{column}_rank"
|
||||
super().__init__(
|
||||
name=factor_name,
|
||||
parameters={"column": column, "ascending": ascending},
|
||||
required_factor_ids=[column]
|
||||
)
|
||||
|
||||
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
|
||||
values = group_df[self.target_column]
|
||||
rank_pct = values.rank(method="average", descending=not self.ascending) / len(values)
|
||||
normalized = (rank_pct - 0.5) * 3.46
|
||||
return normalized.alias(self.factor_id)
|
||||
146
main/factor/utils.py
Normal file
146
main/factor/utils.py
Normal file
@@ -0,0 +1,146 @@
|
||||
import pandas as pd
|
||||
|
||||
def add_financial_factor(
|
||||
main_df: pd.DataFrame,
|
||||
financial_df: pd.DataFrame,
|
||||
factor_value_col: str, # 财务指标值所在的列
|
||||
ts_code_col: str = "ts_code",
|
||||
trade_date_col: str = "trade_date",
|
||||
ann_date_col: str = "ann_date", # 公告日期
|
||||
f_ann_date_col: str = "f_ann_date", # 实际公告日期 (优先使用)
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
将财务指标数据(如每股未分配利润)作为因子添加到主时间序列 DataFrame 中。
|
||||
|
||||
使用 merge_asof 根据股票代码和公告日期,将最新的财务指标值匹配到每个交易日。
|
||||
|
||||
Args:
|
||||
main_df: 包含时间序列交易数据的主 DataFrame (至少包含 ts_code_col 和 trade_date_col)。
|
||||
financial_df: 包含财务指标数据的 DataFrame (至少包含 ts_code_col,
|
||||
ann_date_col 或 f_ann_date_col, 以及 factor_value_col)。
|
||||
ts_code_col: 股票代码列在两个 DataFrame 中的名称。默认为 'ts_code'。
|
||||
trade_date_col: 交易日期列在 main_df 中的名称。默认为 'trade_date'。
|
||||
ann_date_col: 公告日期列在 financial_df 中的名称(作为 f_ann_date_col 的备选)。默认为 'ann_date'。
|
||||
f_ann_date_col: 实际公告日期列在 financial_df 中的名称(优先使用)。默认为 'f_ann_date'。
|
||||
factor_value_col: 财务指标值(即要添加的因子值)在 financial_df 中的列名。默认为 'undistr_pft_ps'。
|
||||
new_factor_col_name: 添加到 main_df 中的新因子列的名称。默认为 'undist_profit_ps'。
|
||||
|
||||
Returns:
|
||||
包含新因子列的 main_df DataFrame。
|
||||
"""
|
||||
if factor_value_col in main_df.columns:
|
||||
return main_df
|
||||
new_factor_col_name = factor_value_col
|
||||
# --- 数据校验 ---
|
||||
required_main_cols = [ts_code_col, trade_date_col]
|
||||
if not all(col in main_df.columns for col in required_main_cols):
|
||||
raise ValueError(f"主 DataFrame 必须包含列: {required_main_cols}")
|
||||
|
||||
required_financial_cols = [ts_code_col, factor_value_col]
|
||||
if f_ann_date_col and f_ann_date_col in financial_df.columns:
|
||||
effective_date_col = f_ann_date_col
|
||||
print(f"使用 '{f_ann_date_col}' 作为财务数据生效日期。")
|
||||
elif ann_date_col and ann_date_col in financial_df.columns:
|
||||
effective_date_col = ann_date_col
|
||||
print(f"使用 '{ann_date_col}' 作为财务数据生效日期。")
|
||||
else:
|
||||
raise ValueError(
|
||||
f"财务指标 DataFrame 必须包含列 '{f_ann_date_col}' 或 '{ann_date_col}' 作为数据生效日期"
|
||||
)
|
||||
required_financial_cols.append(effective_date_col)
|
||||
|
||||
if not all(col in financial_df.columns for col in required_financial_cols):
|
||||
raise ValueError(f"财务指标 DataFrame 必须包含列: {required_financial_cols}")
|
||||
|
||||
# --- 数据准备和清理 ---
|
||||
# 确保日期列是 datetime 类型
|
||||
# 使用 .copy() 避免 SettingWithCopyWarning
|
||||
main_df = main_df.copy()
|
||||
financial_df = financial_df.copy()
|
||||
main_df[trade_date_col] = pd.to_datetime(main_df[trade_date_col], errors="coerce")
|
||||
financial_df[effective_date_col] = pd.to_datetime(
|
||||
financial_df[effective_date_col], errors="coerce"
|
||||
)
|
||||
|
||||
# 确保股票代码是字符串类型
|
||||
main_df[ts_code_col] = main_df[ts_code_col].astype(str)
|
||||
financial_df[ts_code_col] = financial_df[ts_code_col].astype(str)
|
||||
|
||||
# 选取 financial_df 中需要合并的列
|
||||
financial_data_subset = financial_df[
|
||||
[ts_code_col, effective_date_col, factor_value_col]
|
||||
].copy()
|
||||
|
||||
# *** 新增:处理右表合并键中的空值 ***
|
||||
initial_rows_financial = len(financial_data_subset)
|
||||
financial_data_subset = financial_data_subset.dropna(
|
||||
subset=[ts_code_col, effective_date_col]
|
||||
)
|
||||
rows_dropped = initial_rows_financial - len(financial_data_subset)
|
||||
if rows_dropped > 0:
|
||||
print(
|
||||
f"警告: 从 financial_data_subset 中移除了 {rows_dropped} 行,因为其 '{ts_code_col}' 或 '{effective_date_col}' 列存在空值。"
|
||||
)
|
||||
|
||||
if financial_data_subset.empty:
|
||||
print(
|
||||
f"警告: 清理空值后 financial_data_subset 为空,无法添加因子 '{new_factor_col_name}'。将填充 NaN。"
|
||||
)
|
||||
main_df[new_factor_col_name] = np.nan
|
||||
return main_df
|
||||
|
||||
# *** 修改:修正排序顺序以满足 merge_asof 要求 ***
|
||||
# 先按 ts_code 排序,再按日期排序
|
||||
# main_df = main_df.sort_values(by=[ts_code_col, trade_date_col])
|
||||
# financial_data_subset = financial_data_subset.sort_values(by=[ts_code_col, effective_date_col])
|
||||
main_df = main_df.sort_values(by=[trade_date_col, ts_code_col])
|
||||
financial_data_subset = financial_data_subset.sort_values(
|
||||
by=[effective_date_col, ts_code_col]
|
||||
)
|
||||
|
||||
# --- 使用 merge_asof 计算因子 ---
|
||||
try:
|
||||
df_with_factor = pd.merge_asof(
|
||||
main_df,
|
||||
financial_data_subset,
|
||||
left_on=trade_date_col,
|
||||
right_on=effective_date_col,
|
||||
by=ts_code_col,
|
||||
direction="backward",
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"merge_asof 执行失败: {e}")
|
||||
# 根据需要决定如何处理错误,这里填充 NaN
|
||||
main_df[new_factor_col_name] = np.nan
|
||||
return main_df
|
||||
|
||||
# --- 清理与重命名 ---
|
||||
# 移除右表的日期列(如果它与左表日期列名称不同)
|
||||
if (
|
||||
effective_date_col in df_with_factor.columns
|
||||
and effective_date_col != trade_date_col
|
||||
):
|
||||
df_with_factor = df_with_factor.drop(columns=[effective_date_col])
|
||||
|
||||
# 重命名新加入的因子列
|
||||
if factor_value_col != new_factor_col_name:
|
||||
if factor_value_col in df_with_factor.columns:
|
||||
df_with_factor = df_with_factor.rename(
|
||||
columns={factor_value_col: new_factor_col_name}
|
||||
)
|
||||
else:
|
||||
# 这种情况理论上不应发生,因为 merge_asof 应该会把右表的非 key 列带过来
|
||||
print(f"警告: 合并后未找到原始因子列 '{factor_value_col}',无法重命名。")
|
||||
# 如果 factor_value_col 已是目标名称,则无需重命名
|
||||
if new_factor_col_name not in df_with_factor.columns:
|
||||
# 如果目标名称也不存在,则可能合并失败或列名有问题
|
||||
df_with_factor[new_factor_col_name] = np.nan
|
||||
|
||||
# 如果 factor_value_col 就是目标名称,确保该列存在
|
||||
elif new_factor_col_name not in df_with_factor.columns:
|
||||
print(f"警告: 合并后未找到目标因子列 '{new_factor_col_name}'。填充 NaN。")
|
||||
df_with_factor[new_factor_col_name] = np.nan
|
||||
|
||||
return df_with_factor
|
||||
|
||||
|
||||
69586
main/test.txt
69586
main/test.txt
File diff suppressed because one or more lines are too long
8505
main/train/Classify/Classify2.ipynb
Normal file
8505
main/train/Classify/Classify2.ipynb
Normal file
File diff suppressed because one or more lines are too long
66
main/train/Classify/Classify2.py
Normal file
66
main/train/Classify/Classify2.py
Normal file
@@ -0,0 +1,66 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from qlib.data.dataset import DatasetH
|
||||
|
||||
dates = pd.to_datetime(pd.date_range("2020-01-01", "2020-01-10"))
|
||||
instruments = ["SH600000", "SH600001"]
|
||||
index = pd.MultiIndex.from_product([dates, instruments], names=["datetime", "instrument"])
|
||||
|
||||
data = {
|
||||
"feature_1": np.random.randn(len(index)),
|
||||
"feature_2": np.random.randn(len(index)),
|
||||
"label": np.random.randn(len(index)) * 0.01
|
||||
}
|
||||
my_df = pd.DataFrame(data, index=index)
|
||||
my_df.iloc[1, 0] = np.nan # 人为制造一个缺失值
|
||||
my_df.iloc[5, 2] = np.nan # 人为制造一个标签缺失值
|
||||
|
||||
print("----------- 原始 DataFrame -----------")
|
||||
print(my_df.head())
|
||||
|
||||
# 2. 创建包含 StaticDataLoader 和 Processors 的完整配置
|
||||
data_handler_config = {
|
||||
"class": "DataHandlerLP",
|
||||
"module_path": "qlib.data.dataset.handler",
|
||||
"kwargs": {
|
||||
# 核心部分:配置数据加载器
|
||||
"data_loader": {
|
||||
"class": "StaticDataLoader",
|
||||
"module_path": "qlib.data.dataset.loader",
|
||||
"kwargs": {
|
||||
"config": my_df, # <--- 在这里将你的DataFrame传入!
|
||||
}
|
||||
},
|
||||
|
||||
"shared_processors": [
|
||||
|
||||
],
|
||||
"infer_processors": [
|
||||
# {"class": "DropnaLabel", "module_path": "qlib.data.dataset.processor"},
|
||||
|
||||
],
|
||||
"learn_processors": [
|
||||
{"class": "Fillna", "module_path": "qlib.data.dataset.processor", "kwargs": {"fill_value": 0}},
|
||||
]
|
||||
},
|
||||
}
|
||||
|
||||
from qlib.utils import init_instance_by_config
|
||||
|
||||
# 3. 使用配置初始化 DataHandler
|
||||
# 这一步会自动加载 StaticDataLoader 的数据,并运行所有定义的处理器
|
||||
dh = init_instance_by_config(data_handler_config)
|
||||
ds = DatasetH(
|
||||
dh,
|
||||
segments={
|
||||
"train": ("20190101", "20221231"),
|
||||
"valid": ("20220101", "20231231"),
|
||||
"test": ("20240101", "20250101"),
|
||||
},
|
||||
)
|
||||
# 4. 验证结果
|
||||
# DK_L (Learn) 数据经过了 DropnaLabel -> ZScoreNorm -> Fillna
|
||||
learn_data = ds.prepare("all", data_key='learn', segments='train')
|
||||
print("----------- train DataFrame -----------")
|
||||
|
||||
print(learn_data)
|
||||
1211
main/train/Classify/predictions_test.tsv
Normal file
1211
main/train/Classify/predictions_test.tsv
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
@@ -99,7 +99,7 @@
|
||||
"cyq perf\n",
|
||||
"left merge on ['ts_code', 'trade_date']\n",
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"RangeIndex: 9162612 entries, 0 to 9162611\n",
|
||||
"RangeIndex: 9315967 entries, 0 to 9315966\n",
|
||||
"Data columns (total 33 columns):\n",
|
||||
" # Column Dtype \n",
|
||||
"--- ------ ----- \n",
|
||||
@@ -688,10 +688,10 @@
|
||||
"Calculating cs_rank_size...\n",
|
||||
"Finished cs_rank_size.\n",
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"RangeIndex: 4819708 entries, 0 to 4819707\n",
|
||||
"RangeIndex: 4910010 entries, 0 to 4910009\n",
|
||||
"Columns: 181 entries, ts_code to cs_rank_size\n",
|
||||
"dtypes: bool(10), datetime64[ns](1), float64(165), int64(3), object(2)\n",
|
||||
"memory usage: 6.2+ GB\n",
|
||||
"memory usage: 6.3+ GB\n",
|
||||
"None\n",
|
||||
"['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'amount', 'pct_chg', 'turnover_rate', 'pe_ttm', 'circ_mv', 'total_mv', 'volume_ratio', 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct', 'cost_50pct', 'cost_85pct', 'cost_95pct', 'weight_avg', 'winner_rate', 'cat_l2_code', 'undist_profit_ps', 'ocfps', 'roa', 'roe', 'AR', 'BR', 'AR_BR', 'log_circ_mv', 'cashflow_to_ev_factor', 'book_to_price_ratio', 'turnover_rate_mean_5', 'variance_20', 'bbi_ratio_factor', 'daily_deviation', 'lg_elg_net_buy_vol', 'flow_lg_elg_intensity', 'sm_net_buy_vol', 'flow_divergence_diff', 'flow_divergence_ratio', 'total_buy_vol', 'lg_elg_buy_prop', 'flow_struct_buy_change', 'lg_elg_net_buy_vol_change', 'flow_lg_elg_accel', 'chip_concentration_range', 'chip_skewness', 'floating_chip_proxy', 'cost_support_15pct_change', 'cat_winner_price_zone', 'flow_chip_consistency', 'profit_taking_vs_absorb', 'cat_is_positive', 'upside_vol', 'downside_vol', 'vol_ratio', 'return_skew', 'return_kurtosis', 'volume_change_rate', 'cat_volume_breakout', 'turnover_deviation', 'cat_turnover_spike', 'avg_volume_ratio', 'cat_volume_ratio_breakout', 'vol_spike', 'vol_std_5', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'rsi_3', 'return_5', 'return_20', 'std_return_5', 'std_return_90', 'std_return_90_2', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4', 'rank_act_factor1', 'rank_act_factor2', 'rank_act_factor3', 'cov', 'delta_cov', 'alpha_22_improved', 'alpha_003', 'alpha_007', 'alpha_013', 'vol_break', 'weight_roc5', 'price_cost_divergence', 'smallcap_concentration', 'cost_stability', 'high_cost_break_days', 'liquidity_risk', 'turnover_std', 'mv_volatility', 'volume_growth', 'mv_growth', 'momentum_factor', 'resonance_factor', 'log_close', 'cat_vol_spike', 'up', 'down', 'obv_maobv_6', 'std_return_5_over_std_return_90', 'std_return_90_minus_std_return_90_2', 'cat_af2', 'cat_af3', 'cat_af4', 'act_factor5', 'act_factor6', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'buy_lg_vol_minus_sell_lg_vol', 'buy_elg_vol_minus_sell_elg_vol', 'ctrl_strength', 'low_cost_dev', 'asymmetry', 'lock_factor', 'cat_vol_break', 'cost_atr_adj', 'cat_golden_resonance', 'mv_turnover_ratio', 'mv_adjusted_volume', 'mv_weighted_turnover', 'nonlinear_mv_volume', 'mv_volume_ratio', 'mv_momentum', 'lg_flow_mom_corr_20_60', 'lg_flow_accel', 'profit_pressure', 'underwater_resistance', 'cost_conc_std_20', 'profit_decay_20', 'vol_amp_loss_20', 'vol_drop_profit_cnt_5', 'lg_flow_vol_interact_20', 'cost_break_confirm_cnt_5', 'atr_norm_channel_pos_14', 'turnover_diff_skew_20', 'lg_sm_flow_diverge_20', 'pullback_strong_20_20', 'vol_wgt_hist_pos_20', 'vol_adj_roc_20', 'cs_rank_net_lg_flow_val', 'cs_rank_flow_divergence', 'cs_rank_ind_adj_lg_flow', 'cs_rank_elg_buy_ratio', 'cs_rank_rel_profit_margin', 'cs_rank_cost_breadth', 'cs_rank_dist_to_upper_cost', 'cs_rank_winner_rate', 'cs_rank_intraday_range', 'cs_rank_close_pos_in_range', 'cs_rank_opening_gap', 'cs_rank_pos_in_hist_range', 'cs_rank_vol_x_profit_margin', 'cs_rank_lg_flow_price_concordance', 'cs_rank_turnover_per_winner', 'cs_rank_ind_cap_neutral_pe', 'cs_rank_volume_ratio', 'cs_rank_elg_buy_sell_sm_ratio', 'cs_rank_cost_dist_vol_ratio', 'cs_rank_size']\n"
|
||||
]
|
||||
@@ -1583,7 +1583,14 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"MAD Filtering: 100%|██████████| 131/131 [00:14<00:00, 8.77it/s]\n"
|
||||
"MAD Filtering: 62%|██████▏ | 81/131 [00:08<00:05, 9.28it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"MAD Filtering: 100%|██████████| 131/131 [00:13<00:00, 9.63it/s]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1598,14 +1605,7 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"MAD Filtering: 82%|████████▏ | 107/131 [00:12<00:02, 9.41it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"MAD Filtering: 100%|██████████| 131/131 [00:13<00:00, 9.60it/s]\n"
|
||||
"MAD Filtering: 100%|██████████| 131/131 [00:14<00:00, 8.97it/s]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1645,13 +1645,13 @@
|
||||
"截面 MAD 去极值处理完成。\n",
|
||||
"feature_columns: ['vol', 'pct_chg', 'turnover_rate', 'volume_ratio', 'winner_rate', 'undist_profit_ps', 'ocfps', 'AR', 'BR', 'AR_BR', 'cashflow_to_ev_factor', 'book_to_price_ratio', 'turnover_rate_mean_5', 'variance_20', 'bbi_ratio_factor', 'daily_deviation', 'lg_elg_net_buy_vol', 'flow_lg_elg_intensity', 'sm_net_buy_vol', 'total_buy_vol', 'lg_elg_buy_prop', 'flow_struct_buy_change', 'lg_elg_net_buy_vol_change', 'flow_lg_elg_accel', 'chip_concentration_range', 'chip_skewness', 'floating_chip_proxy', 'cost_support_15pct_change', 'cat_winner_price_zone', 'flow_chip_consistency', 'profit_taking_vs_absorb', 'cat_is_positive', 'upside_vol', 'downside_vol', 'vol_ratio', 'return_skew', 'return_kurtosis', 'volume_change_rate', 'cat_volume_breakout', 'turnover_deviation', 'cat_turnover_spike', 'avg_volume_ratio', 'cat_volume_ratio_breakout', 'vol_spike', 'vol_std_5', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'rsi_3', 'return_5', 'return_20', 'std_return_5', 'std_return_90', 'std_return_90_2', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4', 'rank_act_factor1', 'rank_act_factor2', 'rank_act_factor3', 'cov', 'delta_cov', 'alpha_22_improved', 'alpha_003', 'alpha_007', 'alpha_013', 'vol_break', 'weight_roc5', 'smallcap_concentration', 'cost_stability', 'high_cost_break_days', 'liquidity_risk', 'turnover_std', 'mv_volatility', 'volume_growth', 'mv_growth', 'momentum_factor', 'resonance_factor', 'log_close', 'cat_vol_spike', 'up', 'down', 'obv_maobv_6', 'std_return_5_over_std_return_90', 'std_return_90_minus_std_return_90_2', 'cat_af2', 'cat_af3', 'cat_af4', 'act_factor5', 'act_factor6', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'buy_lg_vol_minus_sell_lg_vol', 'buy_elg_vol_minus_sell_elg_vol', 'ctrl_strength', 'low_cost_dev', 'asymmetry', 'lock_factor', 'cat_vol_break', 'cost_atr_adj', 'cat_golden_resonance', 'mv_turnover_ratio', 'mv_adjusted_volume', 'mv_weighted_turnover', 'nonlinear_mv_volume', 'mv_volume_ratio', 'mv_momentum', 'lg_flow_mom_corr_20_60', 'lg_flow_accel', 'profit_pressure', 'underwater_resistance', 'cost_conc_std_20', 'profit_decay_20', 'vol_amp_loss_20', 'vol_drop_profit_cnt_5', 'lg_flow_vol_interact_20', 'cost_break_confirm_cnt_5', 'atr_norm_channel_pos_14', 'turnover_diff_skew_20', 'lg_sm_flow_diverge_20', 'pullback_strong_20_20', 'vol_wgt_hist_pos_20', 'vol_adj_roc_20', 'cs_rank_net_lg_flow_val', 'cs_rank_elg_buy_ratio', 'cs_rank_rel_profit_margin', 'cs_rank_cost_breadth', 'cs_rank_dist_to_upper_cost', 'cs_rank_winner_rate', 'cs_rank_intraday_range', 'cs_rank_close_pos_in_range', 'cs_rank_pos_in_hist_range', 'cs_rank_vol_x_profit_margin', 'cs_rank_lg_flow_price_concordance', 'cs_rank_turnover_per_winner', 'cs_rank_volume_ratio', 'cs_rank_elg_buy_sell_sm_ratio', 'cs_rank_cost_dist_vol_ratio', 'cs_rank_size', 'cat_up_limit', 'industry_obv', 'industry_return_5', 'industry_return_20', 'industry__ema_5', 'industry__ema_13', 'industry__ema_20', 'industry__ema_60', 'industry_act_factor1', 'industry_act_factor2', 'industry_act_factor3', 'industry_act_factor4', 'industry_act_factor5', 'industry_act_factor6', 'industry_rank_act_factor1', 'industry_rank_act_factor2', 'industry_rank_act_factor3', 'industry_return_5_percentile', 'industry_return_20_percentile', '000852.SH_MACD', '000905.SH_MACD', '399006.SZ_MACD', '000852.SH_MACD_hist', '000905.SH_MACD_hist', '399006.SZ_MACD_hist', '000852.SH_RSI', '000905.SH_RSI', '399006.SZ_RSI', '000852.SH_Signal_line', '000905.SH_Signal_line', '399006.SZ_Signal_line', '000852.SH_amount_change_rate', '000905.SH_amount_change_rate', '399006.SZ_amount_change_rate', '000852.SH_amount_mean', '000905.SH_amount_mean', '399006.SZ_amount_mean', '000852.SH_daily_return', '000905.SH_daily_return', '399006.SZ_daily_return', '000852.SH_up_ratio_20d', '000905.SH_up_ratio_20d', '399006.SZ_up_ratio_20d', '000852.SH_volatility', '000905.SH_volatility', '399006.SZ_volatility', '000852.SH_volume_change_rate', '000905.SH_volume_change_rate', '399006.SZ_volume_change_rate']\n",
|
||||
"df最小日期: 2019-01-02\n",
|
||||
"df最大日期: 2025-10-10\n",
|
||||
"2056336\n",
|
||||
"df最大日期: 2025-11-21\n",
|
||||
"2056030\n",
|
||||
"train_data最小日期: 2020-01-02\n",
|
||||
"train_data最大日期: 2022-12-30\n",
|
||||
"2045675\n",
|
||||
"2135782\n",
|
||||
"test_data最小日期: 2023-01-03\n",
|
||||
"test_data最大日期: 2025-10-10\n",
|
||||
"test_data最大日期: 2025-11-21\n",
|
||||
" ts_code trade_date log_circ_mv\n",
|
||||
"0 000001.SZ 2019-01-02 16.574219\n",
|
||||
"1 000001.SZ 2019-01-03 16.583965\n",
|
||||
@@ -1954,7 +1954,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<catboost.core.CatBoostClassifier at 0x707ccc5ac1a0>"
|
||||
"<catboost.core.CatBoostClassifier at 0x7602293f6030>"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
@@ -2068,7 +2068,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"5588 2056336\n",
|
||||
"5587 2056030\n",
|
||||
" ts_code trade_date turnover_rate\n",
|
||||
"0 000001.SZ 2023-01-03 1.1307\n",
|
||||
"1 000001.SZ 2023-01-04 1.1284\n",
|
||||
@@ -2076,13 +2076,13 @@
|
||||
"3 000001.SZ 2023-01-06 0.6162\n",
|
||||
"4 000001.SZ 2023-01-09 0.5450\n",
|
||||
"... ... ... ...\n",
|
||||
"2045670 605599.SH 2025-09-26 0.3434\n",
|
||||
"2045671 605599.SH 2025-09-29 0.3943\n",
|
||||
"2045672 605599.SH 2025-09-30 0.4982\n",
|
||||
"2045673 605599.SH 2025-10-09 1.0319\n",
|
||||
"2045674 605599.SH 2025-10-10 0.8859\n",
|
||||
"2135777 605599.SH 2025-11-17 0.3820\n",
|
||||
"2135778 605599.SH 2025-11-18 0.3565\n",
|
||||
"2135779 605599.SH 2025-11-19 0.3748\n",
|
||||
"2135780 605599.SH 2025-11-20 0.3132\n",
|
||||
"2135781 605599.SH 2025-11-21 0.4580\n",
|
||||
"\n",
|
||||
"[2045675 rows x 3 columns]\n"
|
||||
"[2135782 rows x 3 columns]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -2117,7 +2117,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.2"
|
||||
"version": "3.12.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
File diff suppressed because one or more lines are too long
BIN
main/utils/__pycache__/__init__.cpython-312.pyc
Normal file
BIN
main/utils/__pycache__/__init__.cpython-312.pyc
Normal file
Binary file not shown.
Binary file not shown.
BIN
main/utils/__pycache__/utils.cpython-312.pyc
Normal file
BIN
main/utils/__pycache__/utils.cpython-312.pyc
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1337,3 +1337,63 @@ trade_date,score,ts_code
|
||||
2025-10-09,0.42154288661517764,002591.SZ
|
||||
2025-10-10,0.2807003627051253,002193.SZ
|
||||
2025-10-10,0.31259694334979216,002719.SZ
|
||||
2025-10-13,0.2951270845176498,002856.SZ
|
||||
2025-10-13,0.3389617298778848,002193.SZ
|
||||
2025-10-14,0.3625108344766833,002591.SZ
|
||||
2025-10-14,0.3876832217571092,600735.SH
|
||||
2025-10-15,0.3684329251797533,002591.SZ
|
||||
2025-10-15,0.4012537108164919,600735.SH
|
||||
2025-10-16,0.35194813783938456,600735.SH
|
||||
2025-10-16,0.47588040898459993,002591.SZ
|
||||
2025-10-17,0.4434119771003001,002591.SZ
|
||||
2025-10-17,0.4575670347860125,000890.SZ
|
||||
2025-10-20,0.45163257702571646,000890.SZ
|
||||
2025-10-20,0.4546352741401101,002591.SZ
|
||||
2025-10-21,0.4653630650575277,002591.SZ
|
||||
2025-10-21,0.5032400321085797,600137.SH
|
||||
2025-10-22,0.4575629388073922,000632.SZ
|
||||
2025-10-22,0.46613086209932875,002591.SZ
|
||||
2025-10-23,0.45544805256749116,002591.SZ
|
||||
2025-10-23,0.493066390947383,000632.SZ
|
||||
2025-10-24,0.43331145575224883,000632.SZ
|
||||
2025-10-24,0.45895240962905315,002591.SZ
|
||||
2025-10-27,0.3534800509634666,002193.SZ
|
||||
2025-10-27,0.3687633209705822,600493.SH
|
||||
2025-10-28,0.39020626605234376,001259.SZ
|
||||
2025-10-28,0.432622484773604,600493.SH
|
||||
2025-10-29,0.388162649474833,600493.SH
|
||||
2025-10-29,0.5899817836722746,600847.SH
|
||||
2025-10-30,0.3644512652312262,603616.SH
|
||||
2025-10-30,0.48605588959390245,600847.SH
|
||||
2025-10-31,0.3442043952469046,002591.SZ
|
||||
2025-10-31,0.472699300825448,600847.SH
|
||||
2025-11-03,0.3598403659472199,002856.SZ
|
||||
2025-11-03,0.36028418615974944,600847.SH
|
||||
2025-11-04,0.4098368013275336,603356.SH
|
||||
2025-11-04,0.4157902513122031,002494.SZ
|
||||
2025-11-05,0.4496784204531746,002193.SZ
|
||||
2025-11-05,0.6170797393826642,002856.SZ
|
||||
2025-11-06,0.3743222641474193,002193.SZ
|
||||
2025-11-06,0.5151993158736353,002856.SZ
|
||||
2025-11-07,0.3821400244102041,002591.SZ
|
||||
2025-11-07,0.6416337293101521,002856.SZ
|
||||
2025-11-10,0.4158022301310274,002193.SZ
|
||||
2025-11-10,0.5280653468274031,002856.SZ
|
||||
2025-11-11,0.38888774123241365,002193.SZ
|
||||
2025-11-11,0.5205128900613243,002856.SZ
|
||||
2025-11-12,0.4207243532849393,002856.SZ
|
||||
2025-11-12,0.42295391752723305,002193.SZ
|
||||
2025-11-13,0.4223119822473308,002193.SZ
|
||||
2025-11-13,0.4433093518799348,002856.SZ
|
||||
2025-11-14,0.4228213225112463,002856.SZ
|
||||
2025-11-14,0.5240311394195624,002193.SZ
|
||||
2025-11-17,0.4804005424470699,002494.SZ
|
||||
2025-11-17,0.5081206933698182,002193.SZ
|
||||
2025-11-18,0.45993815526511217,002494.SZ
|
||||
2025-11-18,0.5519071143747787,600493.SH
|
||||
2025-11-19,0.4269366250940664,000890.SZ
|
||||
2025-11-19,0.4707763880425218,600847.SH
|
||||
2025-11-20,0.43476759399773307,600847.SH
|
||||
2025-11-20,0.46185367833556545,600493.SH
|
||||
2025-11-21,0.5033641001654292,600561.SH
|
||||
2025-11-21,0.5181437273273019,603880.SH
|
||||
|
||||
|
0
qmt/__init__.py
Normal file
0
qmt/__init__.py
Normal file
35
qmt/qmt_test.py
Normal file
35
qmt/qmt_test.py
Normal file
@@ -0,0 +1,35 @@
|
||||
from xtquant import xttrader
|
||||
from xtquant.xttype import StockAccount
|
||||
import random
|
||||
|
||||
##订阅账户
|
||||
# 设置 QMT 交易端的数据路径和会话ID
|
||||
min_path = r"D:\QMT\国金证券QMT交易端\userdata_mini"
|
||||
session_id = int(random.randint(100000, 999999))
|
||||
|
||||
# 创建 XtQuantTrader 实例并启动
|
||||
xt_trader = xttrader.XtQuantTrader(min_path, session_id)
|
||||
xt_trader.start()
|
||||
|
||||
# 连接 QMT 交易端
|
||||
connect_result = xt_trader.connect()
|
||||
if connect_result == 0:
|
||||
print('连接成功')
|
||||
else:
|
||||
print('连接失败')
|
||||
xt_trader.stop()
|
||||
exit()
|
||||
|
||||
# 设置账户信息
|
||||
account = StockAccount('8886100517')
|
||||
|
||||
# 订阅账户
|
||||
res = xt_trader.subscribe(account)
|
||||
if res == 0:
|
||||
print('订阅成功')
|
||||
else:
|
||||
print('订阅失败')
|
||||
|
||||
|
||||
asset = xt_trader.query_stock_asset(account)
|
||||
print(asset.cash)
|
||||
182
qmt/qmt_trader.py
Normal file
182
qmt/qmt_trader.py
Normal file
@@ -0,0 +1,182 @@
|
||||
# coding:utf-8
|
||||
import time, datetime, traceback, sys, json
|
||||
import redis
|
||||
from xtquant import xtdata
|
||||
from xtquant.xttrader import XtQuantTrader, XtQuantTraderCallback
|
||||
from xtquant.xttype import StockAccount
|
||||
from xtquant import xtconstant
|
||||
|
||||
# ================= 配置区域 =================
|
||||
QMT_PATH = r'D:\qmt\投研\迅投极速交易终端睿智融科版\userdata'
|
||||
ACCOUNT_ID = '2000128'
|
||||
ACCOUNT_TYPE = 'STOCK'
|
||||
|
||||
REDIS_HOST = '127.0.0.1'
|
||||
REDIS_PORT = 6379
|
||||
REDIS_PASS = None
|
||||
|
||||
# 策略基础名称 (不需要加 _real,代码会自动加)
|
||||
STRATEGY_BASE_NAME = 'default_strategy'
|
||||
# ===========================================
|
||||
|
||||
# 定义监听的队列名称 (只监听实盘队列,物理屏蔽回测数据)
|
||||
LISTEN_QUEUE = f"{STRATEGY_BASE_NAME}_real"
|
||||
|
||||
|
||||
class MyXtQuantTraderCallback(XtQuantTraderCallback):
|
||||
def on_disconnected(self):
|
||||
print("连接断开")
|
||||
|
||||
def on_stock_order(self, order):
|
||||
print(f"委托回报: {order.order_id} {order.order_remark}")
|
||||
|
||||
def on_stock_trade(self, trade):
|
||||
print(f"成交: {trade.stock_code} {trade.traded_volume}")
|
||||
|
||||
def on_order_error(self, order_error):
|
||||
print(f"下单失败: {order_error.error_msg}")
|
||||
|
||||
|
||||
def init_redis():
|
||||
try:
|
||||
r = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASS, decode_responses=True)
|
||||
r.ping()
|
||||
return r
|
||||
except Exception as e:
|
||||
print(f"Redis连接失败: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def is_msg_valid(data):
|
||||
"""
|
||||
【安全核心】校验消息时效性与合法性
|
||||
"""
|
||||
try:
|
||||
# 1. 检查是否为回测标记 (防御性编程,虽然队列已物理隔离)
|
||||
if data.get('is_backtest', False):
|
||||
print(f"警报:拦截到回测数据,已丢弃!")
|
||||
return False
|
||||
|
||||
# 2. 检查时间戳
|
||||
msg_time_str = data.get('timestamp')
|
||||
if not msg_time_str:
|
||||
print("数据缺失时间戳,丢弃")
|
||||
return False
|
||||
|
||||
# 解析消息时间
|
||||
# 格式必须匹配策略端发送的 '%Y-%m-%d %H:%M:%S'
|
||||
msg_dt = datetime.datetime.strptime(msg_time_str, '%Y-%m-%d %H:%M:%S')
|
||||
msg_date = msg_dt.date()
|
||||
|
||||
# 获取当前服务器日期
|
||||
today = datetime.date.today()
|
||||
|
||||
# 3. 【核心】判断是否为当天的消息
|
||||
if msg_date != today:
|
||||
print(f"拦截过期消息: 消息日期[{msg_date}] != 今日[{today}]")
|
||||
return False
|
||||
|
||||
# 可选:如果你想更严格,可以判断时间差不能超过5分钟
|
||||
# delta = datetime.datetime.now() - msg_dt
|
||||
# if abs(delta.total_seconds()) > 300: ...
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"校验逻辑异常: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def process_redis_signal(r_client, xt_trader, acc):
|
||||
try:
|
||||
msg_json = r_client.lpop(LISTEN_QUEUE)
|
||||
if not msg_json: return
|
||||
|
||||
print(f"收到信号: {msg_json}")
|
||||
data = json.loads(msg_json)
|
||||
|
||||
if not is_msg_valid(data): return # 之前的校验逻辑
|
||||
|
||||
stock_code = data['stock_code']
|
||||
action = data['action']
|
||||
price = float(data['price'])
|
||||
|
||||
# 获取切分份数
|
||||
# 兼容性处理:如果redis里还是旧key 'weight',也可以尝试获取
|
||||
div_count = float(data.get('div_count', data.get('weight', 1)))
|
||||
|
||||
# =========================================================
|
||||
# 买入逻辑:资金切片法
|
||||
# =========================================================
|
||||
if action == 'BUY':
|
||||
# 1. 必须查最新的可用资金 (Available Cash)
|
||||
asset = xt_trader.query_stock_asset(acc)
|
||||
if not asset:
|
||||
print("错误:无法查询资产")
|
||||
return
|
||||
|
||||
current_cash = asset.cash
|
||||
|
||||
# 2. 计算下单金额
|
||||
# 逻辑:Amount = Cash / div_count
|
||||
if div_count <= 0: div_count = 1 # 防止除0
|
||||
|
||||
target_amount = current_cash / div_count
|
||||
|
||||
# 3. 打印调试信息 (非常重要)
|
||||
print(f"【资金分配】可用现金:{current_cash:.2f} / 切分份数:{div_count} = 下单金额:{target_amount:.2f}")
|
||||
|
||||
# 4. 计算股数
|
||||
if price <= 0: price = 1.0
|
||||
|
||||
# 过滤小额杂单
|
||||
if target_amount < 2000:
|
||||
print(f"忽略:金额过小 ({target_amount:.2f})")
|
||||
return
|
||||
|
||||
vol = int(target_amount / price / 100) * 100
|
||||
|
||||
if vol >= 100:
|
||||
xt_trader.order_stock(acc, stock_code, xtconstant.STOCK_BUY, vol, xtconstant.FIX_PRICE, price,
|
||||
STRATEGY_BASE_NAME, 'PyBuy')
|
||||
print(f"买入下单: {stock_code} {vol}股")
|
||||
else:
|
||||
print(f"计算股数不足100股")
|
||||
|
||||
# =========================================================
|
||||
# 卖出逻辑 (清仓)
|
||||
# =========================================================
|
||||
elif action == 'SELL':
|
||||
positions = xt_trader.query_stock_positions(acc)
|
||||
target_pos = next((p for p in positions if p.stock_code == stock_code), None)
|
||||
|
||||
if target_pos and target_pos.can_use_volume > 0:
|
||||
xt_trader.order_stock(acc, stock_code, xtconstant.STOCK_SELL, target_pos.can_use_volume,
|
||||
xtconstant.FIX_PRICE, price, STRATEGY_BASE_NAME, 'PySell')
|
||||
print(f"卖出下单: {stock_code} {target_pos.can_use_volume}股")
|
||||
else:
|
||||
print(f"无可用持仓: {stock_code}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"处理异常: {e}")
|
||||
traceback.print_exc()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
r_client = init_redis()
|
||||
session_id = int(time.time())
|
||||
xt_trader = XtQuantTrader(QMT_PATH, session_id)
|
||||
acc = StockAccount(ACCOUNT_ID, ACCOUNT_TYPE)
|
||||
callback = MyXtQuantTraderCallback()
|
||||
xt_trader.register_callback(callback)
|
||||
xt_trader.start()
|
||||
xt_trader.connect()
|
||||
xt_trader.subscribe(acc)
|
||||
|
||||
print(f"=== 启动监听: {LISTEN_QUEUE} ===")
|
||||
print("只处理当日的实盘/模拟信号,自动过滤回测数据及历史遗留数据。")
|
||||
|
||||
while True:
|
||||
if r_client:
|
||||
process_redis_signal(r_client, xt_trader, acc)
|
||||
time.sleep(60)
|
||||
Reference in New Issue
Block a user