Merge branch 'dev'

# Conflicts:
#	.gitignore
#	main/train/Classify2.ipynb
This commit is contained in:
2025-11-29 00:25:23 +08:00
50 changed files with 97041 additions and 4941 deletions

1
.env Normal file
View File

@@ -0,0 +1 @@
PYTHONPATH=${PYTHONPATH}:${workspaceFolder}

5
.gitignore vendored
View File

@@ -18,4 +18,7 @@ model
!.gitignore
!.git
!/.vscode
!.env
**/mlruns/
**/mnt/

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@@ -83,32 +83,32 @@
"output_type": "stream",
"text": [
" ts_code trade_date close open high low \\\n",
"0 000905.SH 20251010 7398.2241 7499.3917 7509.1161 7373.9841 \n",
"1 000905.SH 20251009 7548.9226 7470.0474 7559.0920 7437.3242 \n",
"2 000905.SH 20250930 7412.3684 7372.5240 7428.0307 7372.0634 \n",
"3 000905.SH 20250929 7350.5599 7251.5221 7377.2217 7216.7357 \n",
"4 000905.SH 20250926 7240.9114 7311.8433 7351.7931 7237.0459 \n",
"0 000905.SH 20251121 6817.4103 6955.7485 6986.7784 6817.4103 \n",
"1 000905.SH 20251120 7061.9497 7174.1046 7180.7320 7056.9003 \n",
"2 000905.SH 20251119 7122.7465 7141.2641 7178.1495 7086.1232 \n",
"3 000905.SH 20251118 7151.0176 7215.0302 7230.5416 7118.4085 \n",
"4 000905.SH 20251117 7235.3512 7248.9216 7262.3306 7202.5932 \n",
"... ... ... ... ... ... ... \n",
"13810 399006.SZ 20100607 1069.4680 1005.0280 1075.2250 1001.7020 \n",
"13811 399006.SZ 20100604 1027.6810 989.6810 1027.6810 986.5040 \n",
"13812 399006.SZ 20100603 998.3940 1002.3550 1026.7020 997.7750 \n",
"13813 399006.SZ 20100602 997.1190 967.6090 997.1190 952.6110 \n",
"13814 399006.SZ 20100601 973.2330 986.0150 994.7930 948.1180 \n",
"13900 399006.SZ 20100607 1069.4680 1005.0280 1075.2250 1001.7020 \n",
"13901 399006.SZ 20100604 1027.6810 989.6810 1027.6810 986.5040 \n",
"13902 399006.SZ 20100603 998.3940 1002.3550 1026.7020 997.7750 \n",
"13903 399006.SZ 20100602 997.1190 967.6090 997.1190 952.6110 \n",
"13904 399006.SZ 20100601 973.2330 986.0150 994.7930 948.1180 \n",
"\n",
" pre_close change pct_chg vol amount \n",
"0 7548.9226 -150.6985 -1.9963 2.622566e+08 5.021274e+08 \n",
"1 7412.3684 136.5542 1.8422 2.831308e+08 5.357568e+08 \n",
"2 7350.5599 61.8085 0.8409 2.207075e+08 4.449564e+08 \n",
"3 7240.9114 109.6485 1.5143 2.335394e+08 4.338645e+08 \n",
"4 7341.3238 -100.4124 -1.3678 2.114441e+08 4.301976e+08 \n",
"0 7061.9497 -244.5394 -3.4628 2.089334e+08 3.109687e+08 \n",
"1 7122.7465 -60.7968 -0.8536 1.596187e+08 2.541582e+08 \n",
"2 7151.0176 -28.2711 -0.3953 1.627866e+08 2.567551e+08 \n",
"3 7235.3512 -84.3336 -1.1656 2.022141e+08 3.065400e+08 \n",
"4 7235.4617 -0.1105 -0.0015 2.030506e+08 3.108232e+08 \n",
"... ... ... ... ... ... \n",
"13810 1027.6810 41.7870 4.0661 2.655275e+06 9.106095e+06 \n",
"13811 998.3940 29.2870 2.9334 1.500295e+06 5.269441e+06 \n",
"13812 997.1190 1.2750 0.1279 1.616805e+06 6.240835e+06 \n",
"13813 973.2330 23.8860 2.4543 1.074628e+06 4.001206e+06 \n",
"13814 1000.0000 -26.7670 -2.6767 1.356285e+06 4.924177e+06 \n",
"13900 1027.6810 41.7870 4.0661 2.655275e+06 9.106095e+06 \n",
"13901 998.3940 29.2870 2.9334 1.500295e+06 5.269441e+06 \n",
"13902 997.1190 1.2750 0.1279 1.616805e+06 6.240835e+06 \n",
"13903 973.2330 23.8860 2.4543 1.074628e+06 4.001206e+06 \n",
"13904 1000.0000 -26.7670 -2.6767 1.356285e+06 4.924177e+06 \n",
"\n",
"[13815 rows x 11 columns]\n"
"[13905 rows x 11 columns]\n"
]
}
],

179
main/data/qlib.ipynb Normal file
View File

@@ -0,0 +1,179 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "2d9eb12f",
"metadata": {},
"outputs": [],
"source": [
"from operator import index\n",
"\n",
"import tushare as ts\n",
"import pandas as pd\n",
"import time\n",
"\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "0c5a87ba",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date his_low his_high cost_5pct cost_15pct \\\n",
"0 000001.SZ 20180104 0.2 12.3 7.2 8.7 \n",
"1 000002.SZ 20180104 0.2 25.6 15.0 17.6 \n",
"2 000004.SZ 20180104 0.8 53.2 21.6 22.0 \n",
"3 000008.SZ 20180104 0.1 13.9 7.2 7.8 \n",
"4 000009.SZ 20180104 0.3 15.0 5.8 5.9 \n",
"... ... ... ... ... ... ... \n",
"3095 603991.SH 20180104 12.0 67.8 26.4 27.0 \n",
"3096 603993.SH 20180104 1.4 8.6 5.4 5.6 \n",
"3097 603997.SH 20180104 5.4 31.5 9.9 10.2 \n",
"3098 603998.SH 20180104 3.8 18.3 9.5 9.8 \n",
"3099 603999.SH 20180104 3.6 30.6 6.9 6.9 \n",
"\n",
" cost_50pct cost_85pct cost_95pct weight_avg winner_rate \n",
"0 10.8 11.8 12.1 10.39 44.59 \n",
"1 22.2 24.4 24.8 21.31 97.14 \n",
"2 23.6 27.6 29.6 24.71 45.41 \n",
"3 8.6 9.2 10.5 8.64 47.04 \n",
"4 6.6 7.6 7.8 6.74 38.85 \n",
"... ... ... ... ... ... \n",
"3095 27.6 30.6 34.2 28.54 57.36 \n",
"3096 6.1 6.9 7.3 6.15 72.78 \n",
"3097 10.5 11.7 11.7 10.84 11.28 \n",
"3098 11.5 13.0 15.2 11.72 18.44 \n",
"3099 7.8 9.3 9.9 8.00 31.89 \n",
"\n",
"[3100 rows x 11 columns]\n"
]
}
],
"source": [
"\n",
"df = pro.cyq_perf(trade_date='20180104')\n",
"print(df)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "500292d5",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import time\n",
"\n",
"\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250820')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal['cal_date'].tolist()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4ae3cb65",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"✅ 日历文件已保存至: /mnt/d/PyProject/NewStock/data/qlib/calendar/day.txt\n",
"📅 共 2097 个交易日\n"
]
}
],
"source": [
"import os\n",
"\n",
"calendar_dir = \"/mnt/d/PyProject/NewStock/data/qlib/calendars\"\n",
"os.makedirs(calendar_dir, exist_ok=True) # 自动创建目录(包括父目录)\n",
"\n",
"# 排序为升序Qlib 要求日历按时间升序)\n",
"trade_dates_sorted = sorted(trade_dates)\n",
"\n",
"# 写入 day.txt\n",
"day_txt_path = os.path.join(calendar_dir, \"day.txt\")\n",
"with open(day_txt_path, \"w\") as f:\n",
" for date_str in trade_dates_sorted:\n",
" f.write(date_str + \"\\n\")\n",
"\n",
"print(f\"✅ 日历文件已保存至: {day_txt_path}\")\n",
"print(f\"📅 共 {len(trade_dates_sorted)} 个交易日\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "7a6e529b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"✅ all.txt 已生成,共 5685 只股票\n"
]
}
],
"source": [
"stocks_df = pd.read_csv('/mnt/d/PyProject/NewStock/stocks_list.csv', encoding='utf-8-sig')\n",
"\n",
"import os\n",
"\n",
"# 假设你有一个包含所有股票代码的列表(来自 stocks_df['ts_code']\n",
"# 例如:\n",
"# instrument_list = ['600000.SH', '000001.SZ', '300001.SZ', ...]\n",
"\n",
"instrument_list = stocks_df['ts_code'].unique().tolist()\n",
"\n",
"# 获取你的数据时间范围(从 trade_dates\n",
"start_date = min(trade_dates) # e.g., '20201106'\n",
"end_date = max(trade_dates) # e.g., '20210125'\n",
"\n",
"# 创建 instruments 目录\n",
"instr_dir = \"/mnt/d/PyProject/NewStock/data/qlib/instruments\"\n",
"os.makedirs(instr_dir, exist_ok=True)\n",
"\n",
"# 写入 all.txt\n",
"with open(os.path.join(instr_dir, \"all.txt\"), \"w\") as f:\n",
" for inst in instrument_list:\n",
" f.write(f\"{inst}\\t{start_date}\\t{end_date}\\n\")\n",
"\n",
"print(f\"✅ all.txt 已生成,共 {len(instrument_list)} 只股票\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "stock",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

80
main/data/test.ipynb Normal file
View File

@@ -0,0 +1,80 @@
{
"cells": [
{
"cell_type": "code",
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2025-10-17T07:14:47.275Z",
"start_time": "2025-10-17T07:14:46.966401Z"
}
},
"source": [
"from operator import index\n",
"\n",
"import tushare as ts\n",
"import pandas as pd\n",
"import time\n",
"\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
],
"outputs": [],
"execution_count": 1
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-10-17T07:15:47.631705Z",
"start_time": "2025-10-17T07:15:47.491485Z"
}
},
"cell_type": "code",
"source": [
"pro = ts.pro_api()\n",
"\n",
"#获取单个股票数据\n",
"df = pro.stk_limit(ts_code='603106.SH', start_date='20240924', end_date='20240928')\n",
"\n",
"print(df)"
],
"id": "72dcf1a049d09818",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" trade_date ts_code up_limit down_limit\n",
"0 20240927 603106.SH 7.71 6.31\n",
"1 20240926 603106.SH 7.01 5.73\n",
"2 20240925 603106.SH 6.37 5.21\n",
"3 20240924 603106.SH 5.79 4.73\n"
]
}
],
"execution_count": 6
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 3,
"id": "f74ce078-f7e8-4733-a14c-14d8815a3626",
"metadata": {
"ExecuteTime": {
@@ -19,7 +19,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 4,
"id": "44dd8d87-e60b-49e5-aed9-efaa7f92d4fe",
"metadata": {
"ExecuteTime": {
@@ -39,15 +39,15 @@
"3 000006.SZ 20250312\n",
"4 000007.SZ 20250312\n",
"... ... ...\n",
"27111 920445.BJ 20250922\n",
"27112 920489.BJ 20250922\n",
"27113 920682.BJ 20250922\n",
"27114 920799.BJ 20250922\n",
"27115 920819.BJ 20250922\n",
"21755 920978.BJ 20251117\n",
"21756 920981.BJ 20251117\n",
"21757 920982.BJ 20251117\n",
"21758 920985.BJ 20251117\n",
"21759 920992.BJ 20251117\n",
"\n",
"[8205543 rows x 2 columns]\n",
"20250926\n",
"start_date: 20250929\n"
"[8385278 rows x 2 columns]\n",
"20251120\n",
"start_date: 20251121\n"
]
}
],
@@ -64,7 +64,7 @@
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20251020')\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20251220')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
@@ -73,7 +73,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 5,
"id": "747acc47-0884-4f76-90fb-276f6494e31d",
"metadata": {
"ExecuteTime": {
@@ -86,16 +86,27 @@
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20251020 完成\n",
"任务 20251017 完成\n",
"任务 20251016 完成\n",
"任务 20251015 完成\n",
"任务 20251014 完成\n",
"任务 20251013 完成\n",
"任务 20251010 完成\n",
"任务 20251009 完成\n",
"任务 20250930 完成\n",
"任务 20250929 完成\n"
"任务 20251219 完成\n",
"任务 20251218 完成\n",
"任务 20251216 完成\n",
"任务 20251217 完成\n",
"任务 20251215 完成\n",
"任务 20251212 完成\n",
"任务 20251211 完成\n",
"任务 20251210 完成\n",
"任务 20251209 完成\n",
"任务 20251208 完成\n",
"任务 20251205 完成\n",
"任务 20251204 完成\n",
"任务 20251203 完成\n",
"任务 20251202 完成\n",
"任务 20251201 完成\n",
"任务 20251128 完成\n",
"任务 20251127 完成\n",
"任务 20251126 完成\n",
"任务 20251125 完成\n",
"任务 20251124 完成\n",
"任务 20251121 完成\n"
]
}
],
@@ -132,7 +143,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 6,
"id": "c6765638-481f-40d8-a259-2e7b25362618",
"metadata": {
"ExecuteTime": {
@@ -177,7 +188,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.2"
"version": "3.12.11"
}
},
"nbformat": 4,

View File

@@ -38,16 +38,16 @@
"2 801003.SI 20250221\n",
"3 801005.SI 20250221\n",
"4 801010.SI 20250221\n",
"... ... ...\n",
"2190 859811.SI 20250922\n",
"2191 859821.SI 20250922\n",
"2192 859822.SI 20250922\n",
"2193 859852.SI 20250922\n",
"2194 859951.SI 20250922\n",
".. ... ...\n",
"873 859811.SI 20251120\n",
"874 859821.SI 20251120\n",
"875 859822.SI 20251120\n",
"876 859852.SI 20251120\n",
"877 859951.SI 20251120\n",
"\n",
"[1110243 rows x 2 columns]\n",
"20250926\n",
"start_date: 20250929\n"
"[1123852 rows x 2 columns]\n",
"20251120\n",
"start_date: 20251121\n"
]
}
],
@@ -64,7 +64,7 @@
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20251020')\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20251220')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
@@ -86,16 +86,27 @@
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20251020 完成\n",
"任务 20251017 完成\n",
"任务 20251016 完成\n",
"任务 20251015 完成\n",
"任务 20251014 完成\n",
"任务 20251013 完成\n",
"任务 20251010 完成\n",
"任务 20251009 完成\n",
"任务 20250930 完成\n",
"任务 20250929 完成\n"
"任务 20251218 完成\n",
"任务 20251219 完成\n",
"任务 20251217 完成\n",
"任务 20251216 完成\n",
"任务 20251215 完成\n",
"任务 20251212 完成\n",
"任务 20251211 完成\n",
"任务 20251210 完成\n",
"任务 20251209 完成\n",
"任务 20251208 完成\n",
"任务 20251204 完成\n",
"任务 20251205 完成\n",
"任务 20251202 完成\n",
"任务 20251203 完成\n",
"任务 20251201 完成\n",
"任务 20251128 完成\n",
"任务 20251127 完成\n",
"任务 20251126 完成\n",
"任务 20251125 完成\n",
"任务 20251124 完成\n",
"任务 20251121 完成\n"
]
}
],

View File

@@ -94,17 +94,17 @@
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 9155905 entries, 0 to 27115\n",
"Index: 9335158 entries, 0 to 21759\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 209.6+ MB\n",
"memory usage: 213.7+ MB\n",
"None\n",
"20250926\n",
"20250929\n"
"20251120\n",
"20251121\n"
]
}
],
@@ -121,7 +121,7 @@
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20251020')\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20251220')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
@@ -144,16 +144,27 @@
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20251017 完成\n",
"任务 20251020 完成\n",
"任务 20251015 完成\n",
"任务 20251016 完成\n",
"任务 20251014 完成\n",
"任务 20251013 完成\n",
"任务 20251010 完成\n",
"任务 20251009 完成\n",
"任务 20250930 完成\n",
"任务 20250929 完成\n"
"任务 20251219 完成\n",
"任务 20251218 完成\n",
"任务 20251217 完成\n",
"任务 20251216 完成\n",
"任务 20251215 完成\n",
"任务 20251212 完成\n",
"任务 20251211 完成\n",
"任务 20251210 完成\n",
"任务 20251209 完成\n",
"任务 20251208 完成\n",
"任务 20251205 完成\n",
"任务 20251204 完成\n",
"任务 20251203 完成\n",
"任务 20251202 完成\n",
"任务 20251201 完成\n",
"任务 20251128 完成\n",
"任务 20251127 完成\n",
"任务 20251126 完成\n",
"任务 20251125 完成\n",
"任务 20251124 完成\n",
"任务 20251121 完成\n"
]
}
],
@@ -224,58 +235,58 @@
"output_type": "stream",
"text": [
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
"0 600642.SH 20251010 8.03 0.4806 1.3835 \n",
"1 600295.SH 20251010 10.76 0.8549 3.7056 \n",
"2 600444.SH 20251010 19.00 9.6611 17.4605 \n",
"3 605100.SH 20251010 28.72 3.4770 7.6902 \n",
"4 301399.SZ 20251010 19.53 3.9562 4.6772 \n",
"0 000559.SZ 20251121 11.64 4.8762 13.4563 \n",
"1 002981.SZ 20251121 27.84 1.5833 4.5574 \n",
"2 301053.SZ 20251121 32.50 1.0110 2.9907 \n",
"3 603093.SH 20251121 18.29 0.7403 3.2151 \n",
"4 600269.SH 20251121 5.25 0.8423 1.8459 \n",
"... ... ... ... ... ... \n",
"21679 600653.SH 20250929 2.13 2.1746 2.9589 \n",
"21680 002344.SZ 20250929 4.49 1.7080 3.6338 \n",
"21681 301162.SZ 20250929 60.30 2.8491 3.5744 \n",
"21682 920077.BJ 20250929 14.43 1.1113 1.6410 \n",
"21683 300283.SZ 20250929 7.04 4.8583 5.7018 \n",
"5439 600243.SH 20251121 4.78 1.7524 2.1078 \n",
"5440 300759.SZ 20251121 28.39 1.0514 1.6405 \n",
"5441 600054.SH 20251121 11.10 1.3130 3.1101 \n",
"5442 603579.SH 20251121 23.85 2.2265 4.3412 \n",
"5443 002528.SZ 20251121 3.03 1.9087 4.0726 \n",
"\n",
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
"0 1.49 9.9635 10.2617 1.1073 1.3268 1.3600 4.9816 \n",
"1 1.56 16.3053 16.4683 1.4839 1.0603 1.1230 7.4349 \n",
"2 2.84 69.2746 55.7147 3.8398 3.6313 3.5392 0.5263 \n",
"3 0.55 66.7896 123.2961 2.7276 5.3634 6.7180 2.0794 \n",
"4 0.94 60.7990 75.8958 2.7675 6.8812 7.1828 1.2177 \n",
"0 1.09 40.5790 38.2942 4.1055 2.9989 2.7785 1.2842 \n",
"1 1.44 33.9003 28.1141 3.4000 2.2070 1.9328 0.9280 \n",
"2 1.24 56.6010 98.7688 4.0251 4.4406 4.0870 0.2389 \n",
"3 1.21 24.3641 24.7359 2.5390 1.9536 5.0927 0.3609 \n",
"4 1.32 9.5849 6.9841 0.6165 2.0486 2.1055 3.0476 \n",
"... ... ... ... ... ... ... ... \n",
"21679 0.72 107.4073 227.6354 5.4498 0.9887 0.9724 0.0000 \n",
"21680 0.70 64.8238 75.9239 0.6834 5.5516 5.5560 0.9577 \n",
"21681 0.96 85.4251 76.2427 5.3380 14.5424 12.3677 0.5586 \n",
"21682 0.51 90.3399 82.4861 3.3572 5.2895 4.1636 NaN \n",
"21683 0.94 NaN NaN 3.2821 1.1161 0.9970 0.2499 \n",
"5439 1.37 NaN NaN 3.3110 8.8659 8.4702 0.0000 \n",
"5440 0.86 28.1501 33.3780 3.4547 4.1124 3.7273 0.7056 \n",
"5441 1.53 25.7012 28.5474 1.6912 4.1924 3.9403 1.8829 \n",
"5442 1.23 25.2677 30.2644 1.7649 3.0372 3.0683 3.8598 \n",
"5443 0.61 NaN NaN 35.8962 3.8438 6.1411 0.0000 \n",
"\n",
" dv_ttm total_share float_share free_share total_mv \\\n",
"0 5.6040 489407.9376 489381.3156 170006.8520 3.929946e+06 \n",
"1 5.5762 279877.6254 197557.6254 45577.9458 3.011483e+06 \n",
"2 0.5789 14642.1932 14642.1932 8101.7360 2.782017e+05 \n",
"3 1.0446 17113.2000 16993.2000 7683.2000 4.914911e+05 \n",
"4 1.0594 18502.0000 5468.3586 4625.5000 3.613441e+05 \n",
"0 1.5410 331535.8444 331454.4214 120110.9588 3.859077e+06 \n",
"1 0.9187 13748.6115 11941.3915 4148.6777 3.827613e+05 \n",
"2 0.8961 8421.7803 7749.4689 2619.7738 2.737079e+05 \n",
"3 0.4117 61006.5893 61006.5893 14046.4993 1.115811e+06 \n",
"4 3.2381 233540.7014 233540.7014 106564.7107 1.226089e+06 \n",
"... ... ... ... ... ... \n",
"21679 NaN 194638.0317 194638.0317 143048.5612 4.145790e+05 \n",
"21680 0.8463 128261.6960 128145.0092 60233.0025 5.758950e+05 \n",
"21681 0.9704 13258.3724 8522.5548 6793.1764 7.994799e+05 \n",
"21682 NaN 58768.1817 31695.6817 21464.7599 8.480249e+05 \n",
"21683 NaN 49697.8222 36721.8502 31289.2680 3.498727e+05 \n",
"5439 NaN 43885.0000 43885.0000 36485.0000 2.097703e+05 \n",
"5440 0.7045 177819.5525 141938.4613 90967.4278 5.048297e+06 \n",
"5441 1.5495 72937.9440 51330.0000 21670.4250 8.096112e+05 \n",
"5442 1.2636 20335.5564 20335.5564 10429.5044 4.850030e+05 \n",
"5443 NaN 119867.5082 105021.9577 49219.1551 3.631985e+05 \n",
"\n",
" circ_mv is_st \n",
"0 3.929732e+06 False \n",
"1 2.125720e+06 False \n",
"2 2.782017e+05 False \n",
"3 4.880447e+05 False \n",
"4 1.067970e+05 False \n",
"0 3.858129e+06 False \n",
"1 3.324483e+05 False \n",
"2 2.518577e+05 False \n",
"3 1.115811e+06 False \n",
"4 1.226089e+06 False \n",
"... ... ... \n",
"21679 4.145790e+05 False \n",
"21680 5.753711e+05 False \n",
"21681 5.139101e+05 False \n",
"21682 4.573687e+05 False \n",
"21683 2.585218e+05 False \n",
"5439 2.097703e+05 True \n",
"5440 4.029633e+06 False \n",
"5441 5.697630e+05 False \n",
"5442 4.850030e+05 False \n",
"5443 3.182165e+05 True \n",
"\n",
"[21684 rows x 19 columns]\n"
"[5444 rows x 19 columns]\n"
]
}
],
@@ -300,45 +311,58 @@
"output_type": "stream",
"text": [
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
"9 300313.SZ 20251010 8.84 3.1146 6.4625 \n",
"20 603838.SH 20251010 7.80 0.5503 1.5146 \n",
"29 603813.SH 20251010 24.06 1.5835 4.5173 \n",
"48 002742.SZ 20251010 4.65 1.0473 1.2924 \n",
"69 603559.SH 20251010 8.50 0.2072 0.2945 \n",
"55 000909.SZ 20251121 5.63 0.5785 0.9877 \n",
"62 002485.SZ 20251121 4.61 0.9593 3.9009 \n",
"134 300096.SZ 20251121 7.31 1.6490 1.9675 \n",
"154 300343.SZ 20251121 5.48 4.1298 4.7019 \n",
"166 600525.SH 20251121 3.53 1.8869 2.7053 \n",
"... ... ... ... ... ... \n",
"21466 603021.SH 20250929 4.62 1.3860 2.3418 \n",
"21552 300020.SZ 20250929 3.58 1.5031 1.6828 \n",
"21554 000506.SZ 20250929 10.88 10.5560 15.7565 \n",
"21603 600636.SH 20250929 8.29 0.4693 0.7963 \n",
"21661 603843.SH 20250929 5.17 0.3798 0.5364 \n",
"5340 300368.SZ 20251121 14.86 7.3423 10.4878 \n",
"5381 300020.SZ 20251121 3.63 1.9995 2.2386 \n",
"5383 000506.SZ 20251121 11.55 2.5685 3.8339 \n",
"5439 600243.SH 20251121 4.78 1.7524 2.1078 \n",
"5443 002528.SZ 20251121 3.03 1.9087 4.0726 \n",
"\n",
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio dv_ttm \\\n",
"9 1.30 NaN NaN NaN 20.1067 20.9731 0.0000 NaN \n",
"20 0.57 NaN NaN 2.6121 8.7517 6.9304 0.0000 NaN \n",
"29 1.88 NaN NaN 4.5222 8.4776 7.5124 1.0313 NaN \n",
"48 1.28 NaN NaN NaN 1.6800 2.1226 0.0000 NaN \n",
"69 0.60 NaN NaN 3.5043 9.5964 8.2315 0.0000 NaN \n",
"... ... .. ... ... ... ... ... ... \n",
"21466 0.80 NaN NaN NaN 3.5891 3.7851 0.0000 NaN \n",
"21552 1.00 NaN NaN 0.9812 5.1924 18.4036 0.0000 NaN \n",
"21554 3.17 NaN NaN 16.4257 30.3341 23.4860 0.0000 NaN \n",
"21603 0.81 NaN NaN 1.7909 12.8512 11.0116 0.4825 0.6031 \n",
"21661 0.05 NaN NaN 12.5612 2.6558 3.1369 0.0000 NaN \n",
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
"55 0.99 NaN NaN 2.4818 7.6504 7.4923 0.0 \n",
"62 0.51 NaN NaN 2.1295 3.0458 3.2777 0.0 \n",
"134 0.81 NaN 50.1694 8.9654 5.6290 6.2215 0.0 \n",
"154 0.72 267.9489 106.2988 3.0411 6.7430 6.5207 0.0 \n",
"166 0.72 NaN NaN 1.2373 0.5912 0.5968 0.0 \n",
"... ... ... ... ... ... ... ... \n",
"5340 0.94 NaN NaN 42.1875 42.9123 57.8502 0.0 \n",
"5381 1.00 NaN NaN 1.0776 5.2649 21.5375 0.0 \n",
"5383 0.78 NaN 239.4225 16.7572 32.2021 20.7023 0.0 \n",
"5439 1.37 NaN NaN 3.3110 8.8659 8.4702 0.0 \n",
"5443 0.61 NaN NaN 35.8962 3.8438 6.1411 0.0 \n",
"\n",
" total_share float_share free_share total_mv circ_mv is_st \n",
"9 31297.7396 19735.2789 9511.5479 2.766720e+05 1.744599e+05 True \n",
"20 32001.6000 32001.6000 11627.0468 2.496125e+05 2.496125e+05 True \n",
"29 10501.5000 10501.5000 3681.2000 2.526661e+05 2.526661e+05 True \n",
"48 43200.0000 43185.8082 34994.8239 2.008800e+05 2.008140e+05 True \n",
"69 40127.6979 40127.6979 28231.9697 3.410854e+05 3.410854e+05 True \n",
"... ... ... ... ... ... ... \n",
"21466 31994.8070 31994.8070 18936.7934 1.478160e+05 1.478160e+05 True \n",
"21552 79467.7974 76663.9584 68475.6577 2.844947e+05 2.744570e+05 True \n",
"21554 92901.7761 92858.4361 62210.1427 1.010771e+06 1.010300e+06 True \n",
"21603 43863.6802 43863.6802 25849.6552 3.636299e+05 3.636299e+05 True \n",
"21661 69962.3237 69962.3237 49541.4702 3.617052e+05 3.617052e+05 True \n",
" dv_ttm total_share float_share free_share total_mv \\\n",
"55 NaN 43771.4245 43771.0570 25634.2299 2.464331e+05 \n",
"62 NaN 54400.0000 54400.0000 13377.7333 2.507840e+05 \n",
"134 NaN 43000.0000 43000.0000 36039.3251 3.143300e+05 \n",
"154 NaN 106896.9119 106621.9389 93649.7579 5.857951e+05 \n",
"166 NaN 131878.0152 131878.0152 91981.1744 4.655294e+05 \n",
"... ... ... ... ... ... \n",
"5340 NaN 52894.3475 52894.3475 37030.2475 7.860100e+05 \n",
"5381 NaN 79467.7974 76663.9584 68475.6577 2.884681e+05 \n",
"5383 NaN 92901.7761 92858.4361 62210.1427 1.073016e+06 \n",
"5439 NaN 43885.0000 43885.0000 36485.0000 2.097703e+05 \n",
"5443 NaN 119867.5082 105021.9577 49219.1551 3.631985e+05 \n",
"\n",
"[749 rows x 19 columns]\n"
" circ_mv is_st \n",
"55 2.464311e+05 True \n",
"62 2.507840e+05 True \n",
"134 3.143300e+05 True \n",
"154 5.842882e+05 True \n",
"166 4.655294e+05 True \n",
"... ... ... \n",
"5340 7.860100e+05 True \n",
"5381 2.782902e+05 True \n",
"5383 1.072515e+06 True \n",
"5439 2.097703e+05 True \n",
"5443 3.182165e+05 True \n",
"\n",
"[186 rows x 19 columns]\n"
]
}
],
@@ -388,7 +412,7 @@
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 9177589 entries, 0 to 21683\n",
"Index: 9340602 entries, 0 to 5443\n",
"Data columns (total 3 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
@@ -396,7 +420,7 @@
" 1 trade_date object\n",
" 2 is_st bool \n",
"dtypes: bool(1), object(2)\n",
"memory usage: 218.8+ MB\n",
"memory usage: 222.7+ MB\n",
"None\n"
]
}
@@ -424,7 +448,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.2"
"version": "3.12.11"
}
},
"nbformat": 4,

File diff suppressed because it is too large Load Diff

View File

@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 1,
"id": "17cc645336d4eb18",
"metadata": {
"ExecuteTime": {
@@ -18,7 +18,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 2,
"id": "48ae71ed02d61819",
"metadata": {
"ExecuteTime": {
@@ -26,14 +26,27 @@
"start_time": "2025-02-08T16:55:19.882313Z"
}
},
"outputs": [],
"outputs": [
{
"ename": "FileNotFoundError",
"evalue": "File ../../../data/daily_basic.h5 does not exist",
"output_type": "error",
"traceback": [
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
"\u001b[31mFileNotFoundError\u001b[39m Traceback (most recent call last)",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[2]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m daily_basic = \u001b[43mpd\u001b[49m\u001b[43m.\u001b[49m\u001b[43mread_hdf\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m../../../data/daily_basic.h5\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mdaily_basic\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/miniconda3/envs/stock/lib/python3.12/site-packages/pandas/io/pytables.py:437\u001b[39m, in \u001b[36mread_hdf\u001b[39m\u001b[34m(path_or_buf, key, mode, errors, where, start, stop, columns, iterator, chunksize, **kwargs)\u001b[39m\n\u001b[32m 434\u001b[39m exists = \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[32m 436\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m exists:\n\u001b[32m--> \u001b[39m\u001b[32m437\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mFile \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpath_or_buf\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m does not exist\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 439\u001b[39m store = HDFStore(path_or_buf, mode=mode, errors=errors, **kwargs)\n\u001b[32m 440\u001b[39m \u001b[38;5;66;03m# can't auto open/close if we are using an iterator\u001b[39;00m\n\u001b[32m 441\u001b[39m \u001b[38;5;66;03m# so delegate to the iterator\u001b[39;00m\n",
"\u001b[31mFileNotFoundError\u001b[39m: File ../../../data/daily_basic.h5 does not exist"
]
}
],
"source": [
"daily_basic = pd.read_hdf('../../../data/daily_basic.h5', key='daily_basic')\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"id": "e6606a96e5728b8",
"metadata": {
"ExecuteTime": {
@@ -93,7 +106,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": null,
"id": "41bc125d",
"metadata": {},
"outputs": [
@@ -163,7 +176,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": null,
"id": "initial_id",
"metadata": {
"ExecuteTime": {
@@ -209,7 +222,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "new_trader",
"display_name": "stock",
"language": "python",
"name": "python3"
},
@@ -223,7 +236,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
"version": "3.13.2"
}
},
"nbformat": 4,

View File

@@ -34,17 +34,17 @@
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8964780 entries, 0 to 25739\n",
"Index: 9134824 entries, 0 to 20632\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 205.2+ MB\n",
"memory usage: 209.1+ MB\n",
"None\n",
"20250926\n",
"start_date: 20250929\n"
"20251120\n",
"start_date: 20251121\n"
]
}
],
@@ -61,7 +61,7 @@
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20251020')\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20251220')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
@@ -84,16 +84,27 @@
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20251020 完成\n",
"任务 20251017 完成\n",
"任务 20251016 完成\n",
"任务 20251015 完成\n",
"任务 20251014 完成\n",
"任务 20251013 完成\n",
"任务 20251009 完成\n",
"任务 20251010 完成\n",
"任务 20250929 完成\n",
"任务 20250930 完成\n"
"任务 20251218 完成\n",
"任务 20251219 完成\n",
"任务 20251217 完成\n",
"任务 20251216 完成\n",
"任务 20251215 完成\n",
"任务 20251212 完成\n",
"任务 20251211 完成\n",
"任务 20251210 完成\n",
"任务 20251209 完成\n",
"任务 20251208 完成\n",
"任务 20251205 完成\n",
"任务 20251204 完成\n",
"任务 20251203 完成\n",
"任务 20251202 完成\n",
"任务 20251201 完成\n",
"任务 20251128 完成\n",
"任务 20251127 完成\n",
"任务 20251126 完成\n",
"任务 20251125 完成\n",
"任务 20251124 完成\n",
"任务 20251121 完成\n"
]
}
],
@@ -183,71 +194,58 @@
"output_type": "stream",
"text": [
" ts_code trade_date buy_sm_vol buy_sm_amount sell_sm_vol \\\n",
"0 603290.SH 20251009 45532 52028.67 42778 \n",
"1 600936.SH 20251009 42537 1545.21 42382 \n",
"2 300429.SZ 20251009 81914 11768.07 64063 \n",
"3 300879.SZ 20251009 15330 5366.90 11651 \n",
"4 300031.SZ 20251009 51381 12650.70 43869 \n",
"0 002593.SZ 20251121 369428 21109.32 239444 \n",
"1 300405.SZ 20251121 173424 11775.01 115988 \n",
"2 001336.SZ 20251121 11378 2729.92 10423 \n",
"3 002403.SZ 20251121 24219 3104.96 19841 \n",
"4 688268.SH 20251121 12369 7423.62 12330 \n",
"... ... ... ... ... ... \n",
"20574 688083.SH 20250930 13247 10094.95 11236 \n",
"20575 002939.SZ 20250930 372609 43083.12 232240 \n",
"20576 688303.SH 20250930 62478 18094.19 55086 \n",
"20577 300146.SZ 20250930 50078 5792.85 35214 \n",
"20578 688351.SH 20250930 15096 3333.84 14017 \n",
"5156 000881.SZ 20251121 146959 11936.56 155068 \n",
"5157 300676.SZ 20251121 21428 9913.61 15092 \n",
"5158 603138.SH 20251121 31243 4558.85 30559 \n",
"5159 301526.SZ 20251121 172815 9552.38 105860 \n",
"5160 300903.SZ 20251121 124772 20586.88 96098 \n",
"\n",
" sell_sm_amount buy_md_vol buy_md_amount sell_md_vol sell_md_amount \\\n",
"0 48942.98 53824 61495.85 54076 61851.39 \n",
"1 1538.97 24175 878.06 31948 1160.07 \n",
"2 9211.49 88583 12730.36 88244 12682.05 \n",
"3 4089.33 15591 5464.12 17057 5976.94 \n",
"4 10822.65 56173 13836.60 49423 12190.63 \n",
"0 13673.67 256325 14655.03 298786 17088.39 \n",
"1 7859.14 154296 10473.88 176589 11973.97 \n",
"2 2498.94 5274 1266.93 5893 1415.57 \n",
"3 2546.44 17292 2218.64 18180 2333.03 \n",
"4 7430.97 16104 9682.18 16670 10042.76 \n",
"... ... ... ... ... ... \n",
"20574 8561.02 10482 7994.12 9858 7514.37 \n",
"20575 26867.01 279904 32371.96 324997 37595.57 \n",
"20576 15952.67 55867 16177.83 53776 15573.61 \n",
"20577 4076.10 46159 5337.00 39420 4560.91 \n",
"20578 3095.89 6482 1430.69 6675 1474.59 \n",
"5156 12623.78 107103 8717.66 97089 7896.18 \n",
"5157 6975.73 17857 8249.34 16607 7679.15 \n",
"5158 4458.47 15126 2208.57 11879 1733.73 \n",
"5159 5855.69 155749 8607.76 160962 8892.48 \n",
"5160 15867.99 92082 15223.39 105748 17449.56 \n",
"\n",
" buy_lg_vol buy_lg_amount sell_lg_vol sell_lg_amount buy_elg_vol \\\n",
"0 36150 41253.53 36789 41932.43 10514 \n",
"1 11158 405.04 9212 334.60 5672 \n",
"2 64282 9239.06 72904 10475.38 8221 \n",
"3 10167 3562.24 12327 4313.59 3221 \n",
"4 40306 9938.01 41035 10103.23 6112 \n",
"0 125303 7153.65 190306 10868.03 13733 \n",
"1 68396 4621.42 100633 6820.12 12166 \n",
"2 326 77.32 662 159.66 0 \n",
"3 7131 916.27 8891 1137.58 0 \n",
"4 9155 5523.81 9780 5877.77 2793 \n",
"... ... ... ... ... ... \n",
"20574 6674 5082.80 8224 6273.43 3329 \n",
"20575 204229 23631.31 285167 32986.98 132696 \n",
"20576 33304 9638.04 34809 10074.64 5032 \n",
"20577 47161 5454.07 36321 4202.88 8662 \n",
"20578 2513 555.48 3398 749.54 0 \n",
"5156 63727 5186.84 54928 4460.74 8415 \n",
"5157 12528 5781.44 16425 7596.83 3906 \n",
"5158 5884 857.88 8048 1175.32 0 \n",
"5159 63089 3481.66 115498 6376.52 13568 \n",
"5160 58186 9624.92 77536 12811.46 25445 \n",
"\n",
" buy_elg_amount sell_elg_vol sell_elg_amount net_mf_vol \\\n",
"0 12073.88 12377 14125.13 20027 \n",
"1 205.33 0 0.00 -21182 \n",
"2 1183.11 17790 2551.67 -840 \n",
"3 1133.90 3275 1147.29 -4996 \n",
"4 1507.28 19645 4816.08 1531 \n",
"... ... ... ... ... \n",
"20574 2538.01 4413 3361.05 7612 \n",
"20575 15366.29 147033 17003.12 84949 \n",
"20576 1459.24 13010 3768.39 15188 \n",
"20577 1000.95 41105 4744.98 -16754 \n",
"20578 0.00 0 0.00 3406 \n",
" buy_elg_amount sell_elg_vol sell_elg_amount net_mf_vol net_mf_amount \n",
"0 781.20 36253 2069.12 -103672 -5866.51 \n",
"1 813.01 15071 1030.08 -34131 -2297.62 \n",
"2 0.00 0 0.00 -1180 -271.00 \n",
"3 0.00 1730 222.81 194 30.22 \n",
"4 1708.30 1640 986.41 476 282.30 \n",
"... ... ... ... ... ... \n",
"5156 686.43 19119 1546.77 -50922 -4113.23 \n",
"5157 1805.21 7595 3497.90 -4085 -1873.36 \n",
"5158 0.00 1768 257.78 713 110.42 \n",
"5159 744.87 22900 1261.99 -64224 -3539.76 \n",
"5160 4179.40 21103 3485.60 -29335 -4855.38 \n",
"\n",
" net_mf_amount \n",
"0 22734.35 \n",
"1 -766.75 \n",
"2 -90.83 \n",
"3 -1741.72 \n",
"4 385.00 \n",
"... ... \n",
"20574 5816.07 \n",
"20575 9927.60 \n",
"20576 4417.72 \n",
"20577 -1928.39 \n",
"20578 752.20 \n",
"\n",
"[20579 rows x 20 columns]\n"
"[5161 rows x 20 columns]\n"
]
}
],
@@ -272,7 +270,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.2"
"version": "3.12.11"
}
},
"nbformat": 4,

View File

@@ -34,23 +34,23 @@
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"4872 600206.SH 20250926\n",
"4873 600207.SH 20250926\n",
"4874 600208.SH 20250926\n",
"4876 600211.SH 20250926\n",
"7280 920037.BJ 20250926\n",
"4915 600221.SH 20251120\n",
"4916 600222.SH 20251120\n",
"4917 600223.SH 20251120\n",
"4919 600227.SH 20251120\n",
"3693 301448.SZ 20251120\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 11170571 entries, 0 to 36462\n",
"Index: 11412627 entries, 0 to 29456\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 255.7+ MB\n",
"memory usage: 261.2+ MB\n",
"None\n",
"20250926\n",
"20250929\n"
"20251120\n",
"20251121\n"
]
}
],
@@ -68,7 +68,7 @@
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20251020')\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20251220')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
@@ -91,16 +91,27 @@
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20251020 完成\n",
"任务 20251017 完成\n",
"任务 20251015 完成\n",
"任务 20251016 完成\n",
"任务 20251013 完成\n",
"任务 20251014 完成\n",
"任务 20251010 完成\n",
"任务 20251009 完成\n",
"任务 20250929 完成\n",
"任务 20250930 完成\n"
"任务 20251219 完成\n",
"任务 20251218 完成\n",
"任务 20251217 完成\n",
"任务 20251216 完成\n",
"任务 20251215 完成\n",
"任务 20251212 完成\n",
"任务 20251211 完成\n",
"任务 20251210 完成\n",
"任务 20251209 完成\n",
"任务 20251208 完成\n",
"任务 20251205 完成\n",
"任务 20251204 完成\n",
"任务 20251203 完成\n",
"任务 20251202 完成\n",
"任务 20251201 完成\n",
"任务 20251128 完成\n",
"任务 20251127 完成\n",
"任务 20251126 完成\n",
"任务 20251125 完成\n",
"任务 20251124 完成\n",
"任务 20251121 完成\n"
]
}
],
@@ -152,58 +163,19 @@
"output_type": "stream",
"text": [
"[ trade_date ts_code up_limit down_limit\n",
"0 20251010 000001.SZ 12.54 10.26\n",
"1 20251010 000002.SZ 7.47 6.11\n",
"2 20251010 000004.SZ 12.26 11.10\n",
"3 20251010 000006.SZ 11.94 9.77\n",
"4 20251010 000007.SZ 8.12 6.64\n",
"0 20251121 000001.SZ 13.04 10.67\n",
"1 20251121 000002.SZ 6.82 5.58\n",
"2 20251121 000004.SZ 11.64 10.54\n",
"3 20251121 000006.SZ 12.07 9.87\n",
"4 20251121 000007.SZ 11.00 9.00\n",
"... ... ... ... ...\n",
"7309 20251010 920978.BJ 50.08 26.98\n",
"7310 20251010 920981.BJ 48.04 25.88\n",
"7311 20251010 920982.BJ 354.64 190.96\n",
"7312 20251010 920985.BJ 11.86 6.40\n",
"7313 20251010 920992.BJ 27.87 15.01\n",
"7363 20251121 920978.BJ 49.06 26.42\n",
"7364 20251121 920981.BJ 46.99 25.31\n",
"7365 20251121 920982.BJ 300.67 161.91\n",
"7366 20251121 920985.BJ 11.75 6.33\n",
"7367 20251121 920992.BJ 24.06 12.96\n",
"\n",
"[7314 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20251009 000001.SZ 12.47 10.21\n",
"1 20251009 000002.SZ 7.58 6.20\n",
"2 20251009 000004.SZ 11.68 10.56\n",
"3 20251009 000006.SZ 11.32 9.26\n",
"4 20251009 000007.SZ 8.02 6.56\n",
"... ... ... ... ...\n",
"7306 20251009 920978.BJ 50.44 27.16\n",
"7307 20251009 920981.BJ 48.11 25.91\n",
"7308 20251009 920982.BJ 366.06 197.12\n",
"7309 20251009 920985.BJ 12.01 6.47\n",
"7310 20251009 920992.BJ 27.39 14.75\n",
"\n",
"[7311 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20250929 000001.SZ 12.54 10.26\n",
"1 20250929 000002.SZ 7.48 6.12\n",
"2 20250929 000004.SZ 11.00 9.96\n",
"3 20250929 000006.SZ 10.46 8.56\n",
"4 20250929 000007.SZ 7.63 6.25\n",
"... ... ... ... ...\n",
"7302 20250929 920445.BJ 14.37 7.75\n",
"7303 20250929 920489.BJ 29.34 15.80\n",
"7304 20250929 920682.BJ 13.10 7.06\n",
"7305 20250929 920799.BJ 70.78 38.12\n",
"7306 20250929 920819.BJ 5.52 2.98\n",
"\n",
"[7307 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20250930 000001.SZ 12.51 10.23\n",
"1 20250930 000002.SZ 7.49 6.13\n",
"2 20250930 000004.SZ 11.12 10.06\n",
"3 20250930 000006.SZ 10.29 8.42\n",
"4 20250930 000007.SZ 7.92 6.48\n",
"... ... ... ... ...\n",
"7305 20250930 920445.BJ 14.67 7.91\n",
"7306 20250930 920489.BJ 29.26 15.76\n",
"7307 20250930 920682.BJ 12.92 6.96\n",
"7308 20250930 920799.BJ 73.19 39.41\n",
"7309 20250930 920819.BJ 5.55 2.99\n",
"\n",
"[7310 rows x 4 columns]]\n"
"[7368 rows x 4 columns]]\n"
]
}
],
@@ -271,7 +243,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.2"
"version": "3.12.11"
}
},
"nbformat": 4,

View File

@@ -0,0 +1,165 @@
"""
因子模块初始化文件
"""
# from .operator_framework import (
# StockWiseFactor,
# DateWiseFactor
# )
# 导入所有因子类
from .technical_factors import (
SMAFactor,
EMAFactor,
ATRFactor,
OBVFactor,
MACDFactor,
RSI_Factor,
CrossSectionalRankFactor
)
from .money_flow_factors import (
LGFlowFactor,
FlowIntensityFactor,
FlowDivergenceFactor,
FlowStructureFactor,
FlowAccelerationFactor,
CostSqueeze,
HighCostSelling,
LowCostAccumulation,
InstNetAccum,
ChipLockin,
RetailOutInstIn,
AccumAccel
)
from .chip_factors import (
ChipConcentrationFactor,
ChipSkewnessFactor,
FloatingChipFactor,
CostSupportFactor,
WinnerPriceZoneFactor
)
from .sentiment_factors import (
SentimentPanicGreedFactor,
SentimentBreadthFactor,
SentimentReversalFactor,
PriceDeductionFactor,
PriceDeductionRatioFactor,
IndustryMomentumLeadership,
LeadershipPersistenceScore,
DynamicIndustryLeadership
)
from .industry_factors import (
IndustryMomentumFactor,
MarketBreadthFactor,
SectorRotationFactor
)
from .financial_factors import (
CashflowToEVFactor,
BookToPriceFactor,
DebtToEquityFactor,
ProfitMarginFactor,
BMFactor
)
from .special_factors import (
LimitFactor,
VolumeRatioFactor,
BBI_RATIO_FACTOR,
VolatilitySlopeFactor,
PriceVolumeTrendFactor
)
from .momentum_factors import (
ReturnFactor,
VolatilityFactor,
MomentumFactor,
MomentumAcceleration,
TrendEfficiency
)
# 导入统一因子计算模块
from .all_factors import calculate_all_factors, compute_factors
# 导入算子框架
from .operator_framework import StockWiseFactor, DateWiseFactor, FactorGraph
# 定义所有因子类的列表,便于统一管理
ALL_STOCK_FACTORS = [
SMAFactor,
EMAFactor,
ATRFactor,
OBVFactor,
MACDFactor,
RSI_Factor,
LGFlowFactor,
FlowIntensityFactor,
FlowDivergenceFactor,
FlowStructureFactor,
FlowAccelerationFactor,
ChipConcentrationFactor,
ChipSkewnessFactor,
FloatingChipFactor,
CostSupportFactor,
WinnerPriceZoneFactor,
SentimentPanicGreedFactor,
SentimentBreadthFactor,
SentimentReversalFactor,
PriceDeductionFactor,
PriceDeductionRatioFactor,
CashflowToEVFactor,
BookToPriceFactor,
DebtToEquityFactor,
ProfitMarginFactor,
LimitFactor,
VolumeRatioFactor,
BBI_RATIO_FACTOR,
VolatilitySlopeFactor,
PriceVolumeTrendFactor
]
ALL_DATE_FACTORS = [
CrossSectionalRankFactor,
IndustryMomentumFactor,
MarketBreadthFactor,
SectorRotationFactor
]
__all__ = [
# 技术指标因子
'SMAFactor', 'EMAFactor', 'ATRFactor', 'OBVFactor', 'MACDFactor', 'RSI_Factor',
# 资金流因子
'LGFlowFactor', 'FlowIntensityFactor', 'FlowDivergenceFactor',
'FlowStructureFactor', 'FlowAccelerationFactor',
# 筹码分布因子
'ChipConcentrationFactor', 'ChipSkewnessFactor', 'FloatingChipFactor',
'CostSupportFactor', 'WinnerPriceZoneFactor',
# 市场情绪因子
'SentimentPanicGreedFactor', 'SentimentBreadthFactor', 'SentimentReversalFactor',
'PriceDeductionFactor', 'PriceDeductionRatioFactor',
# 行业/横截面因子
'CrossSectionalRankFactor', 'IndustryMomentumFactor', 'MarketBreadthFactor',
'SectorRotationFactor',
# 财务因子
'CashflowToEVFactor', 'BookToPriceFactor', 'ROEFactor', 'DebtToEquityFactor',
'ProfitMarginFactor',
# 特殊因子
'LimitFactor', 'VolumeRatioFactor', 'BBI_RATIO_FACTOR',
'VolatilitySlopeFactor', 'PriceVolumeTrendFactor',
# 统一因子计算
'calculate_all_factors', 'compute_factors', 'get_available_stock_factors', 'get_available_date_factors',
# # 算子框架
# 'StockWiseFactor', 'DateWiseFactor'
]

256
main/factor/all_factors.py Normal file
View File

@@ -0,0 +1,256 @@
"""
统一因子计算模块
提供统一接口来计算所有类型的因子
"""
import polars as pl
from typing import List, Dict, Any
from main.factor.operator_framework import FactorGraph
from main.factor import (
# 技术指标因子
SMAFactor,
EMAFactor,
ATRFactor,
OBVFactor,
MACDFactor,
RSI_Factor,
CrossSectionalRankFactor,
# 动量因子
ReturnFactor,
VolatilityFactor,
MomentumFactor,
MomentumAcceleration,
TrendEfficiency,
# 资金流因子
LGFlowFactor,
FlowIntensityFactor,
FlowDivergenceFactor,
FlowStructureFactor,
FlowAccelerationFactor,
# 筹码分布因子
ChipConcentrationFactor,
ChipSkewnessFactor,
FloatingChipFactor,
CostSupportFactor,
WinnerPriceZoneFactor,
CostSqueeze,
HighCostSelling,
LowCostAccumulation,
InstNetAccum,
ChipLockin,
RetailOutInstIn,
AccumAccel,
# 市场情绪因子
SentimentPanicGreedFactor,
SentimentBreadthFactor,
SentimentReversalFactor,
PriceDeductionFactor,
PriceDeductionRatioFactor,
IndustryMomentumLeadership,
LeadershipPersistenceScore,
DynamicIndustryLeadership,
# 行业/横截面因子
IndustryMomentumFactor,
MarketBreadthFactor,
SectorRotationFactor,
# 财务因子
CashflowToEVFactor,
BookToPriceFactor,
DebtToEquityFactor,
ProfitMarginFactor,
BMFactor,
# 特殊因子
LimitFactor,
VolumeRatioFactor,
BBI_RATIO_FACTOR,
VolatilitySlopeFactor,
PriceVolumeTrendFactor,
)
def calculate_all_factors(
df: pl.DataFrame,
stock_factor_configs: List[Dict[str, Any]] = None,
date_factor_configs: List[Dict[str, Any]] = None,
) -> pl.DataFrame:
"""
统一计算所有因子的函数
Parameters:
df (pl.DataFrame): 输入的股票数据表
stock_factor_configs (List[Dict]): 股票截面因子配置列表
date_factor_configs (List[Dict]): 日期截面因子配置列表
Returns:
pl.DataFrame: 包含所有计算因子的DataFrame
"""
# 初始化因子图
factor_graph = FactorGraph()
# 如果没有提供配置,则使用默认配置
if stock_factor_configs is None:
stock_factor_configs = [
{"class": SMAFactor, "params": {"window": 5}},
{"class": SMAFactor, "params": {"window": 20}},
{"class": EMAFactor, "params": {"window": 12}},
{"class": EMAFactor, "params": {"window": 26}},
{"class": ATRFactor, "params": {"window": 14}},
{"class": OBVFactor, "params": {}},
{
"class": MACDFactor,
"params": {"fast_period": 12, "slow_period": 26, "signal_period": 9},
},
{"class": RSI_Factor, "params": {"window": 14}},
# 资金流因子
{"class": LGFlowFactor, "params": {}},
{"class": FlowIntensityFactor, "params": {}},
{"class": FlowDivergenceFactor, "params": {}},
{"class": FlowStructureFactor, "params": {}},
{"class": FlowAccelerationFactor, "params": {}},
{"class": InstNetAccum, "params": {}},
{"class": ChipLockin, "params": {}},
{"class": RetailOutInstIn, "params": {}},
{"class": AccumAccel, "params": {}},
# 筹码分布因子
{"class": ChipConcentrationFactor, "params": {}},
{"class": ChipSkewnessFactor, "params": {}},
{"class": FloatingChipFactor, "params": {}},
{"class": CostSupportFactor, "params": {}},
{"class": WinnerPriceZoneFactor, "params": {}},
{"class": LowCostAccumulation, "params": {}},
{"class": HighCostSelling, "params": {}},
{"class": CostSqueeze, "params": {}},
# 市场情绪因子
{
"class": SentimentPanicGreedFactor,
"params": {"window_atr": 14, "window_smooth": 5},
},
{
"class": SentimentBreadthFactor,
"params": {"window_vol": 20, "window_smooth": 3},
},
{
"class": SentimentReversalFactor,
"params": {"window_ret": 5, "window_vol": 5},
},
{"class": PriceDeductionFactor, "params": {"n": 10}},
{"class": PriceDeductionRatioFactor, "params": {"n": 10}},
{"class": IndustryMomentumLeadership, "params": {}},
{"class": LeadershipPersistenceScore, "params": {}},
# {"class": DynamicIndustryLeadership, "params": {}},
# 财务因子
# {"class": CashflowToEVFactor, "params": {}},
# {"class": BookToPriceFactor, "params": {}},
# {"class": ROEFactor, "params": {}},
# {"class": DebtToEquityFactor, "params": {}},
# {"class": ProfitMarginFactor, "params": {}},
{"class": BMFactor, "params": {}},
# 特殊因子
{"class": LimitFactor, "params": {}},
{"class": VolumeRatioFactor, "params": {}},
{"class": BBI_RATIO_FACTOR, "params": {}},
{
"class": VolatilitySlopeFactor,
"params": {"window_vol": 20, "window_slope": 5},
},
{"class": PriceVolumeTrendFactor, "params": {}},
# 动量因子 - 添加20日收益率因子
{"class": ReturnFactor, "params": {"period": 20}},
{"class": ReturnFactor, "params": {"period": 5}},
{"class": VolatilityFactor, "params": {"period": 10}},
{
"class": MomentumAcceleration,
"params": {"short_period": 5, "long_period": 60},
},
{"class": TrendEfficiency, "params": {"period": 10}},
{
"class": CrossSectionalRankFactor,
"params": {"column": "circ_mv", "name": "size_rank"},
},
]
if date_factor_configs is None:
date_factor_configs = [
{"class": CrossSectionalRankFactor, "params": {"column": "return_5"}},
{"class": CrossSectionalRankFactor, "params": {"column": "return_5"}},
{
"class": CrossSectionalRankFactor,
"params": {"column": "return_20"},
},
{
"class": CrossSectionalRankFactor,
"params": {"column": "volatility_10"},
},
{
"class": CrossSectionalRankFactor,
"params": {"column": "circ_mv"},
},
# {
# "class": CrossSectionalRankFactor,
# "params": {"factor_name": "momentum_10"},
# },
]
# 添加股票截面因子
stock_factors = []
for config in stock_factor_configs:
factor_class = config["class"]
params = config["params"]
try:
factor = factor_class(**params)
factor_graph.add_factor(factor)
stock_factors.append(factor)
except Exception as e:
print(f"创建股票因子 {factor_class.__name__} 时出错: {e}")
# 添加日期截面因子
date_factors = []
for config in date_factor_configs:
factor_class = config["class"]
params = config["params"]
try:
factor = factor_class(**params)
factor_graph.add_factor(factor)
date_factors.append(factor)
except Exception as e:
print(f"创建日期因子 {factor_class.__name__} 时出错: {e}")
# 先计算股票截面因子
result_df = df.clone()
# 获取所有需要的因子ID
stock_factor_ids = [factor.get_factor_id() for factor in stock_factors]
date_factor_ids = [factor.get_factor_id() for factor in date_factors]
# 计算股票因子
if stock_factor_ids:
result_df = factor_graph.compute(result_df, stock_factor_ids)
# 计算日期因子
if date_factor_ids:
result_df = factor_graph.compute(result_df, date_factor_ids)
all_ids = []
for ids in stock_factor_ids:
all_ids.append(ids)
for ids in date_factor_ids:
all_ids.append(ids)
return result_df, all_ids
# 为了兼容旧的函数调用方式,提供一个简化的统一接口
def compute_factors(df: pl.DataFrame):
"""
简化版因子计算接口
Parameters:
df (pl.DataFrame): 输入的股票数据表
Returns:
pl.DataFrame: 包含所有计算因子的DataFrame
"""
return calculate_all_factors(df)

123
main/factor/chip_factors.py Normal file
View File

@@ -0,0 +1,123 @@
"""
筹码分布因子模块
包含基于股票截面的筹码分布因子实现
"""
import numpy as np
import polars as pl
from main.factor.operator_framework import StockWiseFactor
class ChipConcentrationFactor(StockWiseFactor):
"""筹码集中度因子"""
def __init__(self):
super().__init__(
name="chip_concentration",
parameters={},
required_factor_ids=["cost_95pct", "cost_5pct", "close"]
)
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
# 计算筹码集中度
cost_95 = group_df["cost_95pct"]
cost_5 = group_df["cost_5pct"]
close = group_df["close"]
chip_concentration = (cost_95 - cost_5) / (close + 1e-8) # 避免除零
return chip_concentration.alias(self.factor_id)
class ChipSkewnessFactor(StockWiseFactor):
"""筹码分布偏度因子"""
def __init__(self):
super().__init__(
name="chip_skewness",
parameters={},
required_factor_ids=["weight_avg", "cost_50pct", "cost_50pct"]
)
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
# 计算筹码分布偏度
weight_avg = group_df["weight_avg"]
cost_50 = group_df["cost_50pct"]
chip_skewness = (weight_avg - cost_50) / (cost_50 + 1e-8) # 避免除零
return chip_skewness.alias(self.factor_id)
class FloatingChipFactor(StockWiseFactor):
"""浮筹比例因子"""
def __init__(self):
super().__init__(
name="floating_chip",
parameters={},
required_factor_ids=["winner_rate", "cost_15pct", "close"]
)
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
# 计算浮筹比例
winner_rate = group_df["winner_rate"]
cost_15 = group_df["cost_15pct"]
close = group_df["close"]
price_dist_cost15 = (close - cost_15) / (close + 1e-8) # 避免除零
floating_chip = winner_rate * pl.Series(np.maximum(0, price_dist_cost15))
return floating_chip.alias(self.factor_id)
class CostSupportFactor(StockWiseFactor):
"""成本支撑强度因子"""
def __init__(self):
super().__init__(
name="cost_support",
parameters={},
required_factor_ids=["cost_15pct", "close"]
)
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
# 计算成本支撑强度
cost_15 = group_df["cost_15pct"]
close = group_df["close"]
# 成本支撑强度变化
cost_support_change = (cost_15.diff() / (cost_15 + 1e-8) * 100).alias(self.factor_id)
return cost_support_change
class WinnerPriceZoneFactor(StockWiseFactor):
"""获利盘价格区域因子"""
def __init__(self):
super().__init__(
name="winner_price_zone",
parameters={},
required_factor_ids=["close", "cost_85pct", "cost_15pct", "cost_50pct", "winner_rate"]
)
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
close = group_df["close"]
cost_85 = group_df["cost_85pct"]
cost_15 = group_df["cost_15pct"]
cost_50 = group_df["cost_50pct"]
winner_rate = group_df["winner_rate"]
# 使用 Polars 的 when/then/otherwise 链
winner_zone = (
pl.when((close > cost_85) & (winner_rate > 0.8))
.then(1)
.when((close < cost_15) & (winner_rate < 0.2))
.then(2)
.when((close > cost_50) & (winner_rate > 0.5))
.then(3)
.when((close < cost_50) & (winner_rate < 0.5))
.then(4)
.otherwise(0)
.alias(self.factor_id)
)
return winner_zone

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,116 @@
"""
财务因子模块
包含基于股票截面的财务因子实现
"""
import numpy as np
import polars as pl
from main.factor.operator_framework import DateWiseFactor, StockWiseFactor
class CashflowToEVFactor(StockWiseFactor):
"""现金流-to-企业价值因子"""
def __init__(self):
super().__init__(
name="cashflow_to_ev",
parameters={},
required_factor_ids=["n_cashflow_act", "total_liab", "money_cap", "total_mv"]
)
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
# 计算企业价值
total_mv = group_df["total_mv"]
total_liab = group_df["total_liab"]
money_cap = group_df["money_cap"]
# 企业价值 = 市值 + 负债合计 - 货币资金
enterprise_value = total_mv + total_liab - money_cap
# 计算现金流-to-EV比率
n_cashflow_act = group_df["n_cashflow_act"]
cashflow_ev_ratio = n_cashflow_act / (enterprise_value + 1e-8) # 避免除零
return cashflow_ev_ratio.alias(self.factor_id)
class BookToPriceFactor(StockWiseFactor):
"""账面价值-to-价格因子"""
def __init__(self):
super().__init__(
name="book_to_price",
parameters={},
required_factor_ids=["bps", "close"]
)
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
# 计算账面价值-to-价格比率
bps = group_df["bps"]
close = group_df["close"]
book_to_price = bps / (close + 1e-8) # 避免除零
return book_to_price.alias(self.factor_id)
class DebtToEquityFactor(StockWiseFactor):
"""资产负债率因子"""
def __init__(self):
super().__init__(
name="debt_to_equity",
parameters={},
required_factor_ids=["total_liab", "equity"]
)
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
# 计算资产负债率
total_liab = group_df["total_liab"]
equity = group_df["equity"]
debt_to_equity = total_liab / (equity + 1e-8) # 避免除零
return debt_to_equity.alias(self.factor_id)
class ProfitMarginFactor(StockWiseFactor):
"""净利润率因子"""
def __init__(self):
super().__init__(
name="profit_margin",
parameters={},
required_factor_ids=["net_profit", "revenue"]
)
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
# 计算净利润率
net_profit = group_df["net_profit"]
revenue = group_df["revenue"]
profit_margin = net_profit / (revenue + 1e-8) # 避免除零
return profit_margin.alias(self.factor_id)
class BMFactor(StockWiseFactor):
"""账面市值比Book-to-Market, BM因子"""
def __init__(self):
super().__init__(
name="bm",
parameters={},
required_factor_ids=["total_hldr_eqy_exc_min_int", "total_mv"]
)
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
book_value = group_df["total_hldr_eqy_exc_min_int"]
market_cap = group_df["total_mv"]
bm = book_value / (market_cap + 1e-8)
# 可选:过滤无效值(如负权益)
# bm = pl.when((book_value > 0) & (market_cap > 0)).then(bm).otherwise(None)
return bm.alias(self.factor_id)

View File

@@ -16,7 +16,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"e:\\PyProject\\NewStock\\main\\factor\n"
"/mnt/d/PyProject/NewStock\n"
]
}
],
@@ -62,8 +62,8 @@
"cyq perf\n",
"left merge on ['ts_code', 'trade_date']\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 5123740 entries, 0 to 5123739\n",
"Data columns (total 31 columns):\n",
"RangeIndex: 8713571 entries, 0 to 8713570\n",
"Data columns (total 33 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object \n",
@@ -74,57 +74,248 @@
" 5 low float64 \n",
" 6 vol float64 \n",
" 7 pct_chg float64 \n",
" 8 turnover_rate float64 \n",
" 9 pe_ttm float64 \n",
" 10 circ_mv float64 \n",
" 11 volume_ratio float64 \n",
" 12 is_st bool \n",
" 13 up_limit float64 \n",
" 14 down_limit float64 \n",
" 15 buy_sm_vol float64 \n",
" 16 sell_sm_vol float64 \n",
" 17 buy_lg_vol float64 \n",
" 18 sell_lg_vol float64 \n",
" 19 buy_elg_vol float64 \n",
" 20 sell_elg_vol float64 \n",
" 21 net_mf_vol float64 \n",
" 22 his_low float64 \n",
" 23 his_high float64 \n",
" 24 cost_5pct float64 \n",
" 25 cost_15pct float64 \n",
" 26 cost_50pct float64 \n",
" 27 cost_85pct float64 \n",
" 28 cost_95pct float64 \n",
" 29 weight_avg float64 \n",
" 30 winner_rate float64 \n",
"dtypes: bool(1), datetime64[ns](1), float64(28), object(1)\n",
"memory usage: 1.2+ GB\n",
"None\n",
"['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'pct_chg', 'turnover_rate', 'pe_ttm', 'circ_mv', 'volume_ratio', 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct', 'cost_50pct', 'cost_85pct', 'cost_95pct', 'weight_avg', 'winner_rate']\n",
" 8 amount float64 \n",
" 9 turnover_rate float64 \n",
" 10 pe_ttm float64 \n",
" 11 circ_mv float64 \n",
" 12 total_mv float64 \n",
" 13 volume_ratio float64 \n",
" 14 is_st bool \n",
" 15 up_limit float64 \n",
" 16 down_limit float64 \n",
" 17 buy_sm_vol float64 \n",
" 18 sell_sm_vol float64 \n",
" 19 buy_lg_vol float64 \n",
" 20 sell_lg_vol float64 \n",
" 21 buy_elg_vol float64 \n",
" 22 sell_elg_vol float64 \n",
" 23 net_mf_vol float64 \n",
" 24 his_low float64 \n",
" 25 his_high float64 \n",
" 26 cost_5pct float64 \n",
" 27 cost_15pct float64 \n",
" 28 cost_50pct float64 \n",
" 29 cost_85pct float64 \n",
" 30 cost_95pct float64 \n",
" 31 weight_avg float64 \n",
" 32 winner_rate float64 \n",
"dtypes: bool(1), datetime64[ns](1), float64(30), object(1)\n",
"memory usage: 2.1+ GB\n",
"None\n"
]
}
],
"source": [
"from main.utils.utils import read_and_merge_h5_data\n",
"\n",
"print('daily data')\n",
"df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/daily_data.h5', key='daily_data',\n",
" columns=['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'pct_chg', 'amount'],\n",
" df=None)\n",
"\n",
"print('daily basic')\n",
"df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/daily_basic.h5', key='daily_basic',\n",
" columns=['ts_code', 'trade_date', 'turnover_rate', 'pe_ttm', 'circ_mv', 'total_mv', 'volume_ratio',\n",
" 'is_st'], df=df, join='inner')\n",
"\n",
"print('stk limit')\n",
"df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/stk_limit.h5', key='stk_limit',\n",
" columns=['ts_code', 'trade_date', 'pre_close', 'up_limit', 'down_limit'],\n",
" df=df)\n",
"print('money flow')\n",
"df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/money_flow.h5', key='money_flow',\n",
" columns=['ts_code', 'trade_date', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol',\n",
" 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol'],\n",
" df=df)\n",
"print('cyq perf')\n",
"df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/cyq_perf.h5', key='cyq_perf',\n",
" columns=['ts_code', 'trade_date', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct',\n",
" 'cost_50pct',\n",
" 'cost_85pct', 'cost_95pct', 'weight_avg', 'winner_rate'],\n",
" df=df)\n",
"print(df.info())"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "0acb6625",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'pct_chg', 'amount', 'turnover_rate', 'pe_ttm', 'circ_mv', 'total_mv', 'volume_ratio', 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct', 'cost_50pct', 'cost_85pct', 'cost_95pct', 'weight_avg', 'winner_rate']\n"
]
}
],
"source": [
"\n",
"origin_columns = df.columns.tolist()\n",
"origin_columns = [col for col in origin_columns if 'cyq' not in col]\n",
"print(origin_columns)\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "820a6b50",
"metadata": {},
"outputs": [],
"source": [
"fina_indicator_df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/fina_indicator.h5', key='fina_indicator',\n",
" columns=['ts_code', 'ann_date', 'undist_profit_ps', 'ocfps', 'bps'],\n",
" df=None)\n",
"cashflow_df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/cashflow.h5', key='cashflow',\n",
" columns=['ts_code', 'ann_date', 'n_cashflow_act'],\n",
" df=None)\n",
"balancesheet_df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/balancesheet.h5', key='balancesheet',\n",
" columns=['ts_code', 'ann_date', 'money_cap', 'total_liab'],\n",
" df=None)\n",
"top_list_df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/top_list.h5', key='top_list',\n",
" columns=['ts_code', 'trade_date', 'reason'],\n",
" df=None)\n",
"\n",
"top_list_df = top_list_df.sort_values(by='trade_date', ascending=False).drop_duplicates(subset=['ts_code', 'trade_date'], keep='first').sort_values(by='trade_date')\n",
"\n",
"stk_holdertrade_df = read_and_merge_h5_data('/mnt/d/PyProject/NewStock/data/stk_holdertrade.h5', key='stk_holdertrade',\n",
" columns=['ts_code', 'ann_date', 'in_de', 'change_ratio'],\n",
" df=None)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "903469a7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"✅ 成功从 Redis Hash 'concept_stocks_daily_lists_pickle' 读取 1794 条每日概念股票数据。\n"
]
}
],
"source": [
"import redis\n",
"import pickle\n",
"from datetime import date, datetime\n",
"\n",
"# --- 配置 Redis 连接 ---\n",
"REDIS_HOST = '140.143.91.66'\n",
"REDIS_PORT = 6389\n",
"REDIS_DB = 0\n",
"\n",
"# --- 定义 Redis 键名 ---\n",
"HASH_KEY = \"concept_stocks_daily_lists_pickle\" # 区分之前的 JSON 版本\n",
"MAX_DATE_KEY = \"concept_stocks_max_date_pickle\" # 区分之前的 JSON 版本\n",
"\n",
"concept_dict = {}\n",
"\n",
"# --- 连接 Redis ---\n",
"try:\n",
" r = redis.StrictRedis(host=REDIS_HOST, port=REDIS_PORT, db=REDIS_DB, password='Redis520102')\n",
"\n",
" all_data_from_redis = r.hgetall(HASH_KEY) # 返回的是字典,键是字节,值是字节\n",
" \n",
" if all_data_from_redis:\n",
" for date_bytes, stocks_bytes in all_data_from_redis.items(): # 将变量名改为 date_bytes 更清晰\n",
" try:\n",
" # *** 修正点:将日期字节解码为字符串 ***\n",
" date_str = date_bytes.decode('utf-8') \n",
" date_obj = datetime.strptime(date_str, '%Y%m%d').date()\n",
" \n",
" stocks_list = pickle.loads(stocks_bytes)\n",
" concept_dict[date_obj] = stocks_list\n",
" except (ValueError, pickle.UnpicklingError) as e:\n",
" print(f\"⚠️ 警告: 解析 Redis 数据时出错 (日期键: '{date_bytes.decode('utf-8', errors='ignore')}'),跳过此条数据: {e}\") # 打印警告时也解码一下\n",
" print(f\"✅ 成功从 Redis Hash '{HASH_KEY}' 读取 {len(concept_dict)} 条每日概念股票数据。\")\n",
" else:\n",
" print(f\" Redis Hash '{HASH_KEY}' 中没有找到任何数据。\")\n",
"\n",
"except redis.exceptions.ConnectionError as e:\n",
" print(f\"❌ 错误: 无法连接到 Redis 服务器,请检查 Redis 是否正在运行或连接配置: {e}\")\n",
"except Exception as e:\n",
" print(f\"❌ 从 Redis 读取数据时发生未知错误: {e}\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "afb8da3d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"4566757\n",
"开始生成概念相关因子...\n",
"开始计算概念内截面排序因子,基于: ['pct_chg', 'turnover_rate', 'volume_ratio']\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Ranking Features in Concepts: 100%|██████████| 3/3 [00:00<00:00, 15.82it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"概念相关因子生成完毕。\n",
"4566757\n",
"开始计算股东增减持因子...\n",
"警告: 'in_de' 列中存在未映射的值,可能导致 _direction 列出现NaN。\n",
"股东增减持因子计算完成。\n",
"Calculating cat_senti_mom_vol_spike...\n",
"Finished cat_senti_mom_vol_spike.\n",
"Calculating cat_senti_pre_breakout...\n",
"Calculating atr_10 as it's missing...\n",
"Calculating atr_40 as it's missing...\n",
"Finished cat_senti_pre_breakout.\n",
"计算因子 ts_turnover_rate_acceleration_5_20\n",
"计算因子 ts_vol_sustain_10_30\n",
"计算因子 cs_amount_outlier_10\n",
"计算因子 ts_ff_to_total_turnover_ratio\n",
"计算因子 ts_price_volume_trend_coherence_5_20\n",
"计算因子 ts_ff_turnover_rate_surge_10\n",
"使用 'ann_date' 作为财务数据生效日期。\n",
"警告: 从 financial_data_subset 中移除了 366 行,因为其 'ts_code' 或 'ann_date' 列存在空值。\n",
"使用 'ann_date' 作为财务数据生效日期。\n",
"警告: 从 financial_data_subset 中移除了 366 行,因为其 'ts_code' 或 'ann_date' 列存在空值。\n",
"开始计算因子: AR, BR (原地修改)...\n",
"因子 AR, BR 计算成功。\n",
"因子 AR, BR 计算流程结束。\n",
"使用 'ann_date' 作为财务数据生效日期。\n",
"使用 'ann_date' 作为财务数据生效日期。\n",
"使用 'ann_date' 作为财务数据生效日期。\n",
"使用 'ann_date' 作为财务数据生效日期。\n",
"警告: 从 financial_data_subset 中移除了 366 行,因为其 'ts_code' 或 'ann_date' 列存在空值。\n",
"计算 BBI...\n",
"--- 计算日级别偏离度 (使用 pct_chg) ---\n",
"--- 计算日级别动量基准 (使用 pct_chg) ---\n",
"日级别动量基准计算完成 (使用 pct_chg)。\n",
"日级别偏离度计算完成 (使用 pct_chg)。\n",
"--- 计算日级别行业偏离度 (使用 pct_chg 和行业基准) ---\n",
"--- 计算日级别行业动量基准 (使用 pct_chg 和 cat_l2_code) ---\n",
"错误: 计算日级别行业动量基准需要以下列: ['pct_chg', 'cat_l2_code', 'trade_date', 'ts_code']。\n",
"错误: 计算日级别行业偏离度需要以下列: ['pct_chg', 'daily_industry_positive_benchmark', 'daily_industry_negative_benchmark']。请先运行 daily_industry_momentum_benchmark(df)。\n",
"Index(['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol',\n",
" 'pct_chg', 'turnover_rate', 'pe_ttm', 'circ_mv', 'volume_ratio',\n",
" 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol', 'sell_sm_vol',\n",
" 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol', 'sell_elg_vol',\n",
" 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct',\n",
" 'cost_50pct', 'cost_85pct', 'cost_95pct', 'weight_avg', 'winner_rate',\n",
" 'lg_elg_net_buy_vol', 'flow_lg_elg_intensity', 'sm_net_buy_vol',\n",
" 'flow_divergence_diff', 'flow_divergence_ratio', 'total_buy_vol',\n",
" 'lg_elg_buy_prop', 'flow_struct_buy_change',\n",
" 'lg_elg_net_buy_vol_change', 'flow_lg_elg_accel',\n",
" 'chip_concentration_range', 'chip_skewness', 'floating_chip_proxy',\n",
" 'cost_support_15pct_change', 'cat_winner_price_zone',\n",
" 'flow_chip_consistency', 'profit_taking_vs_absorb', '_is_positive',\n",
" '_is_negative', 'cat_is_positive', '_pos_returns', '_neg_returns',\n",
" '_pos_returns_sq', '_neg_returns_sq', 'upside_vol', 'downside_vol',\n",
" 'vol_ratio', 'return_skew', 'return_kurtosis', 'volume_change_rate',\n",
" 'pct_chg', 'amount', 'turnover_rate',\n",
" ...\n",
" 'cat_volume_breakout', 'turnover_deviation', 'cat_turnover_spike',\n",
" 'avg_volume_ratio', 'cat_volume_ratio_breakout', 'vol_spike',\n",
" 'vol_std_5', 'atr_14', 'atr_6', 'obv'],\n",
" dtype='object')\n",
" dtype='object', length=103)\n",
"Calculating senti_strong_inflow...\n",
"Finished senti_strong_inflow.\n",
"Calculating lg_flow_mom_corr_20_60...\n",
"Finished lg_flow_mom_corr_20_60.\n",
"Calculating lg_buy_consolidation_20...\n",
"Finished lg_buy_consolidation_20.\n",
"Calculating lg_flow_accel...\n",
"Finished lg_flow_accel.\n",
"Calculating profit_pressure...\n",
@@ -155,58 +346,73 @@
"Finished vol_wgt_hist_pos_20.\n",
"Calculating vol_adj_roc_20...\n",
"Finished vol_adj_roc_20.\n",
"Calculating intraday_lg_flow_corr_20 (Placeholder - complex implementation)...\n",
"Finished intraday_lg_flow_corr_20 (Placeholder).\n",
"Calculating cap_neutral_cost_metric (Placeholder - requires statsmodels)...\n",
"Finished cap_neutral_cost_metric (Placeholder).\n"
"Calculating cs_rank_net_lg_flow_val...\n",
"Finished cs_rank_net_lg_flow_val.\n",
"Calculating cs_rank_flow_divergence...\n",
"Finished cs_rank_flow_divergence.\n",
"Calculating cs_rank_ind_adj_lg_flow...\n",
"Error calculating cs_rank_ind_adj_lg_flow: Missing 'cat_l2_code' column. Assigning NaN.\n",
"Calculating cs_rank_elg_buy_ratio...\n",
"Finished cs_rank_elg_buy_ratio.\n",
"Calculating cs_rank_rel_profit_margin...\n",
"Finished cs_rank_rel_profit_margin.\n",
"Calculating cs_rank_cost_breadth...\n",
"Finished cs_rank_cost_breadth.\n",
"Calculating cs_rank_dist_to_upper_cost...\n",
"Finished cs_rank_dist_to_upper_cost.\n",
"Calculating cs_rank_winner_rate...\n",
"Finished cs_rank_winner_rate.\n",
"Calculating cs_rank_intraday_range...\n",
"Finished cs_rank_intraday_range.\n",
"Calculating cs_rank_close_pos_in_range...\n",
"Finished cs_rank_close_pos_in_range.\n",
"Calculating cs_rank_opening_gap...\n",
"Error calculating cs_rank_opening_gap: Missing 'pre_close' column. Assigning NaN.\n",
"Calculating cs_rank_pos_in_hist_range...\n",
"Finished cs_rank_pos_in_hist_range.\n",
"Calculating cs_rank_vol_x_profit_margin...\n",
"Finished cs_rank_vol_x_profit_margin.\n",
"Calculating cs_rank_lg_flow_price_concordance...\n",
"Finished cs_rank_lg_flow_price_concordance.\n",
"Calculating cs_rank_turnover_per_winner...\n",
"Finished cs_rank_turnover_per_winner.\n",
"Calculating cs_rank_ind_cap_neutral_pe (Placeholder - requires statsmodels)...\n",
"Finished cs_rank_ind_cap_neutral_pe (Placeholder).\n",
"Calculating cs_rank_volume_ratio...\n",
"Finished cs_rank_volume_ratio.\n",
"Calculating cs_rank_elg_buy_sell_sm_ratio...\n",
"Finished cs_rank_elg_buy_sell_sm_ratio.\n",
"Calculating cs_rank_cost_dist_vol_ratio...\n",
"Finished cs_rank_cost_dist_vol_ratio.\n",
"Calculating cs_rank_size...\n",
"Finished cs_rank_size.\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 4566757 entries, 0 to 4566756\n",
"Columns: 197 entries, ts_code to cs_rank_size\n",
"dtypes: bool(10), datetime64[ns](1), float64(176), int64(6), int8(1), object(3)\n",
"memory usage: 6.4+ GB\n",
"None\n",
"['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'pct_chg', 'amount', 'turnover_rate', 'pe_ttm', 'circ_mv', 'total_mv', 'volume_ratio', 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct', 'cost_50pct', 'cost_85pct', 'cost_95pct', 'weight_avg', 'winner_rate', 'cat_hot_concept_stock', 'concept_rank_pct_chg', 'concept_rank_turnover_rate', 'concept_rank_volume_ratio', 'holder_net_change_sum_10d', 'holder_increase_days_10d', 'holder_decrease_days_10d', 'holder_any_increase_flag_10d', 'holder_any_decrease_flag_10d', 'holder_direction_score_10d', 'cat_senti_mom_vol_spike', 'cat_senti_pre_breakout', 'ts_turnover_rate_acceleration_5_20', 'ts_vol_sustain_10_30', 'cs_amount_outlier_10', 'ts_ff_to_total_turnover_ratio', 'ts_price_volume_trend_coherence_5_20', 'ts_ff_turnover_rate_surge_10', 'undist_profit_ps', 'ocfps', 'AR', 'BR', 'AR_BR', 'log_circ_mv', 'cashflow_to_ev_factor', 'book_to_price_ratio', 'turnover_rate_mean_5', 'variance_20', 'bbi_ratio_factor', 'daily_deviation', 'lg_elg_net_buy_vol', 'flow_lg_elg_intensity', 'sm_net_buy_vol', 'flow_divergence_diff', 'flow_divergence_ratio', 'total_buy_vol', 'lg_elg_buy_prop', 'flow_struct_buy_change', 'lg_elg_net_buy_vol_change', 'flow_lg_elg_accel', 'chip_concentration_range', 'chip_skewness', 'floating_chip_proxy', 'cost_support_15pct_change', 'cat_winner_price_zone', 'flow_chip_consistency', 'profit_taking_vs_absorb', 'cat_is_positive', 'upside_vol', 'downside_vol', 'vol_ratio', 'return_skew', 'return_kurtosis', 'volume_change_rate', 'cat_volume_breakout', 'turnover_deviation', 'cat_turnover_spike', 'avg_volume_ratio', 'cat_volume_ratio_breakout', 'vol_spike', 'vol_std_5', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'rsi_3', 'return_5', 'return_20', 'std_return_5', 'std_return_90', 'std_return_90_2', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4', 'rank_act_factor1', 'rank_act_factor2', 'rank_act_factor3', 'cov', 'delta_cov', 'alpha_22_improved', 'alpha_003', 'alpha_007', 'alpha_013', 'vol_break', 'weight_roc5', 'price_cost_divergence', 'smallcap_concentration', 'cost_stability', 'high_cost_break_days', 'liquidity_risk', 'turnover_std', 'mv_volatility', 'volume_growth', 'mv_growth', 'momentum_factor', 'resonance_factor', 'log_close', 'cat_vol_spike', 'up', 'down', 'obv_maobv_6', 'std_return_5_over_std_return_90', 'std_return_90_minus_std_return_90_2', 'cat_af2', 'cat_af3', 'cat_af4', 'act_factor5', 'act_factor6', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'buy_lg_vol_minus_sell_lg_vol', 'buy_elg_vol_minus_sell_elg_vol', 'ctrl_strength', 'low_cost_dev', 'asymmetry', 'lock_factor', 'cat_vol_break', 'cost_atr_adj', 'cat_golden_resonance', 'mv_turnover_ratio', 'mv_adjusted_volume', 'mv_weighted_turnover', 'nonlinear_mv_volume', 'mv_volume_ratio', 'mv_momentum', 'senti_strong_inflow', 'lg_flow_mom_corr_20_60', 'lg_flow_accel', 'profit_pressure', 'underwater_resistance', 'cost_conc_std_20', 'profit_decay_20', 'vol_amp_loss_20', 'vol_drop_profit_cnt_5', 'lg_flow_vol_interact_20', 'cost_break_confirm_cnt_5', 'atr_norm_channel_pos_14', 'turnover_diff_skew_20', 'lg_sm_flow_diverge_20', 'pullback_strong_20_20', 'vol_wgt_hist_pos_20', 'vol_adj_roc_20', 'cs_rank_net_lg_flow_val', 'cs_rank_flow_divergence', 'cs_rank_ind_adj_lg_flow', 'cs_rank_elg_buy_ratio', 'cs_rank_rel_profit_margin', 'cs_rank_cost_breadth', 'cs_rank_dist_to_upper_cost', 'cs_rank_winner_rate', 'cs_rank_intraday_range', 'cs_rank_close_pos_in_range', 'cs_rank_opening_gap', 'cs_rank_pos_in_hist_range', 'cs_rank_vol_x_profit_margin', 'cs_rank_lg_flow_price_concordance', 'cs_rank_turnover_per_winner', 'cs_rank_ind_cap_neutral_pe', 'cs_rank_volume_ratio', 'cs_rank_elg_buy_sell_sm_ratio', 'cs_rank_cost_dist_vol_ratio', 'cs_rank_size']\n"
]
}
],
"source": [
"print('daily data')\n",
"df = read_and_merge_h5_data('../../data/daily_data.h5', key='daily_data',\n",
" columns=['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'pct_chg'],\n",
" df=None)\n",
"\n",
"print('daily basic')\n",
"df = read_and_merge_h5_data('../../data/daily_basic.h5', key='daily_basic',\n",
" columns=['ts_code', 'trade_date', 'turnover_rate', 'pe_ttm', 'circ_mv', 'volume_ratio',\n",
" 'is_st'], df=df, join='inner')\n",
"df = df[df['trade_date'] >= '2021-01-01']\n",
"\n",
"print('stk limit')\n",
"df = read_and_merge_h5_data('../../data/stk_limit.h5', key='stk_limit',\n",
" columns=['ts_code', 'trade_date', 'pre_close', 'up_limit', 'down_limit'],\n",
" df=df)\n",
"print('money flow')\n",
"df = read_and_merge_h5_data('../../data/money_flow.h5', key='money_flow',\n",
" columns=['ts_code', 'trade_date', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol',\n",
" 'sell_lg_vol',\n",
" 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol'],\n",
" df=df)\n",
"print('cyq perf')\n",
"df = read_and_merge_h5_data('../../data/cyq_perf.h5', key='cyq_perf',\n",
" columns=['ts_code', 'trade_date', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct',\n",
" 'cost_50pct',\n",
" 'cost_85pct', 'cost_95pct', 'weight_avg', 'winner_rate'],\n",
" df=df)\n",
"print(df.info())\n",
"\n",
"origin_columns = df.columns.tolist()\n",
"origin_columns = [col for col in origin_columns if 'cyq' not in col]\n",
"print(origin_columns)\n",
"import numpy as np\n",
"from main.factor.factor import *\n",
"from main.factor.money_factor import * \n",
"from main.factor.concept_factor import * \n",
"\n",
"\n",
"def filter_data(df):\n",
" # df = df.groupby('trade_date').apply(lambda x: x.nlargest(1000, 'act_factor1'))\n",
" df = df[~df['is_st']]\n",
" df = df[~df['ts_code'].str.endswith('BJ')]\n",
" df = df[~df['ts_code'].str.startswith('30')]\n",
" df = df[~df['ts_code'].str.startswith('68')]\n",
" df = df[~df['ts_code'].str.startswith('8')]\n",
" df = df[df['trade_date'] >= '2022-01-01']\n",
" if 'in_date' in df.columns:\n",
" df = df.drop(columns=['in_date'])\n",
" df = df[~df[\"is_st\"]]\n",
" df = df[~df[\"ts_code\"].str.endswith(\"BJ\")]\n",
" df = df[~df[\"ts_code\"].str.startswith(\"30\")]\n",
" df = df[~df[\"ts_code\"].str.startswith(\"68\")]\n",
" df = df[~df[\"ts_code\"].str.startswith(\"8\")]\n",
" df = df[df[\"trade_date\"] >= \"2019-01-01\"]\n",
" if \"in_date\" in df.columns:\n",
" df = df.drop(columns=[\"in_date\"])\n",
" df = df.reset_index(drop=True)\n",
" return df\n",
"\n",
@@ -214,11 +420,70 @@
"gc.collect()\n",
"\n",
"df = filter_data(df)\n",
"df = df.sort_values(by=[\"ts_code\", \"trade_date\"])\n",
"\n",
"# df = price_minus_deduction_price(df, n=120)\n",
"# df = price_deduction_price_diff_ratio_to_sma(df, n=120)\n",
"# df = cat_price_vs_sma_vs_deduction_price(df, n=120)\n",
"# df = cat_reason(df, top_list_df)\n",
"# df = cat_is_on_top_list(df, top_list_df)\n",
"print(len(df))\n",
"df = generate_concept_factors(df, concept_dict)\n",
"print(len(df))\n",
"\n",
"df = holder_trade_factors(df, stk_holdertrade_df)\n",
"\n",
"df = cat_senti_mom_vol_spike(\n",
" df,\n",
" return_period=3,\n",
" return_threshold=0.03, # 近3日涨幅超3%\n",
" volume_ratio_threshold=1.3,\n",
" current_pct_chg_min=0.0, # 当日必须收红\n",
" current_pct_chg_max=0.05,\n",
") # 当日涨幅不宜过大\n",
"\n",
"df = cat_senti_pre_breakout(\n",
" df,\n",
" atr_short_N=10,\n",
" atr_long_M=40,\n",
" vol_atrophy_N=10,\n",
" vol_atrophy_M=40,\n",
" price_stab_N=5,\n",
" price_stab_threshold=0.06,\n",
" current_pct_chg_min_signal=0.002,\n",
" current_pct_chg_max_signal=0.05,\n",
" volume_ratio_signal_threshold=1.1,\n",
")\n",
"\n",
"df = ts_turnover_rate_acceleration_5_20(df)\n",
"df = ts_vol_sustain_10_30(df)\n",
"# df = cs_turnover_rate_relative_strength_20(df)\n",
"df = cs_amount_outlier_10(df)\n",
"df = ts_ff_to_total_turnover_ratio(df)\n",
"df = ts_price_volume_trend_coherence_5_20(df)\n",
"# df = ts_turnover_rate_trend_strength_5(df)\n",
"df = ts_ff_turnover_rate_surge_10(df)\n",
"\n",
"df = add_financial_factor(df, fina_indicator_df, factor_value_col=\"undist_profit_ps\")\n",
"df = add_financial_factor(df, fina_indicator_df, factor_value_col=\"ocfps\")\n",
"calculate_arbr(df, N=26)\n",
"df[\"log_circ_mv\"] = np.log(df[\"circ_mv\"])\n",
"df = calculate_cashflow_to_ev_factor(df, cashflow_df, balancesheet_df)\n",
"df = caculate_book_to_price_ratio(df, fina_indicator_df)\n",
"df = turnover_rate_n(df, n=5)\n",
"df = variance_n(df, n=20)\n",
"df = bbi_ratio_factor(df)\n",
"df = daily_deviation(df)\n",
"df = daily_industry_deviation(df)\n",
"df, _ = get_rolling_factor(df)\n",
"df, _ = get_simple_factor(df)\n",
"from main.factor.factor import *\n",
"\n",
"df = calculate_strong_inflow_signal(df)\n",
"\n",
"df = df.rename(columns={\"l1_code\": \"cat_l1_code\"})\n",
"df = df.rename(columns={\"l2_code\": \"cat_l2_code\"})\n",
"\n",
"lg_flow_mom_corr(df, N=20, M=60)\n",
"lg_buy_consolidation(df, N=20)\n",
"lg_flow_accel(df)\n",
"profit_pressure(df)\n",
"underwater_resistance(df)\n",
@@ -234,12 +499,57 @@
"pullback_strong(df, N=20, M=20)\n",
"vol_wgt_hist_pos(df, N=20)\n",
"vol_adj_roc(df, N=20)\n",
"intraday_lg_flow_corr(df, N=20) # Placeholder\n",
"cap_neutral_cost_metric(df) # Placeholder\n",
"# hurst_exponent_flow(df, N=60) # Placeholder\n",
"# df['test'] = 1\n",
"# df['test2'] = 2\n",
"# df = df.merge(industry_df, on=['l2_code', 'trade_date'], how='left')\n",
"\n",
"cs_rank_net_lg_flow_val(df)\n",
"cs_rank_flow_divergence(df)\n",
"cs_rank_industry_adj_lg_flow(df) # Needs cat_l2_code\n",
"cs_rank_elg_buy_ratio(df)\n",
"cs_rank_rel_profit_margin(df)\n",
"cs_rank_cost_breadth(df)\n",
"cs_rank_dist_to_upper_cost(df)\n",
"cs_rank_winner_rate(df)\n",
"cs_rank_intraday_range(df)\n",
"cs_rank_close_pos_in_range(df)\n",
"cs_rank_opening_gap(df) # Needs pre_close\n",
"cs_rank_pos_in_hist_range(df) # Needs his_low, his_high\n",
"cs_rank_vol_x_profit_margin(df)\n",
"cs_rank_lg_flow_price_concordance(df)\n",
"cs_rank_turnover_per_winner(df)\n",
"cs_rank_ind_cap_neutral_pe(df) # Placeholder - needs external libraries\n",
"cs_rank_volume_ratio(df) # Needs volume_ratio\n",
"cs_rank_elg_buy_sell_sm_ratio(df)\n",
"cs_rank_cost_dist_vol_ratio(df) # Needs volume_ratio\n",
"cs_rank_size(df) # Needs circ_mv\n",
"\n",
"\n",
"# df = df.merge(index_data, on='trade_date', how='left')\n",
"\n",
"print(df.info())\n",
"print(df.columns.tolist())"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "48712034",
"metadata": {},
"outputs": [
{
"ename": "FileNotFoundError",
"evalue": "File ../../data/industry_data.h5 does not exist",
"output_type": "error",
"traceback": [
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
"\u001b[31mFileNotFoundError\u001b[39m Traceback (most recent call last)",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[8]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m l2_df = \u001b[43mread_and_merge_h5_data\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m../../data/industry_data.h5\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mindustry_data\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 2\u001b[39m \u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m=\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mts_code\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43ml2_code\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43min_date\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 3\u001b[39m \u001b[43m \u001b[49m\u001b[43mdf\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mon\u001b[49m\u001b[43m=\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mts_code\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mleft\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[32m 4\u001b[39m df = merge_with_industry_data(df, l2_df)\n\u001b[32m 5\u001b[39m df = df.rename(columns={\u001b[33m'\u001b[39m\u001b[33ml2_code\u001b[39m\u001b[33m'\u001b[39m: \u001b[33m'\u001b[39m\u001b[33mcat_l2_code\u001b[39m\u001b[33m'\u001b[39m})\n",
"\u001b[36mFile \u001b[39m\u001b[32m/mnt/d/PyProject/NewStock/main/utils/utils.py:14\u001b[39m, in \u001b[36mread_and_merge_h5_data\u001b[39m\u001b[34m(h5_filename, key, columns, df, join, on, prefix)\u001b[39m\n\u001b[32m 11\u001b[39m processed_columns.append(col)\n\u001b[32m 13\u001b[39m \u001b[38;5;66;03m# 从 HDF5 文件读取数据,选择需要的列\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m14\u001b[39m data = \u001b[43mpd\u001b[49m\u001b[43m.\u001b[49m\u001b[43mread_hdf\u001b[49m\u001b[43m(\u001b[49m\u001b[43mh5_filename\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m=\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m=\u001b[49m\u001b[43mprocessed_columns\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 16\u001b[39m \u001b[38;5;66;03m# 修改列名,如果列名以前有 _加上 _\u001b[39;00m\n\u001b[32m 17\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m col \u001b[38;5;129;01min\u001b[39;00m data.columns:\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/miniconda3/envs/stock/lib/python3.13/site-packages/pandas/io/pytables.py:424\u001b[39m, in \u001b[36mread_hdf\u001b[39m\u001b[34m(path_or_buf, key, mode, errors, where, start, stop, columns, iterator, chunksize, **kwargs)\u001b[39m\n\u001b[32m 421\u001b[39m exists = \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[32m 423\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m exists:\n\u001b[32m--> \u001b[39m\u001b[32m424\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mFile \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpath_or_buf\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m does not exist\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 426\u001b[39m store = HDFStore(path_or_buf, mode=mode, errors=errors, **kwargs)\n\u001b[32m 427\u001b[39m \u001b[38;5;66;03m# can't auto open/close if we are using an iterator\u001b[39;00m\n\u001b[32m 428\u001b[39m \u001b[38;5;66;03m# so delegate to the iterator\u001b[39;00m\n",
"\u001b[31mFileNotFoundError\u001b[39m: File ../../data/industry_data.h5 does not exist"
]
}
],
"source": [
"\n",
"l2_df = read_and_merge_h5_data('../../data/industry_data.h5', key='industry_data',\n",
" columns=['ts_code', 'l2_code', 'in_date'],\n",
" df=None, on=['ts_code'], join='left')\n",
@@ -247,7 +557,7 @@
"df = df.rename(columns={'l2_code': 'cat_l2_code'})\n",
"# df = df.merge(index_data, on='trade_date', how='left')\n",
"\n",
"days = 2\n",
"days = 5\n",
"df = df.sort_values(by=['ts_code', 'trade_date'])\n",
"# df['future_return'] = df.groupby('ts_code', group_keys=False)['close'].apply(lambda x: x.shift(-days) / x - 1)\n",
"df['future_return'] = (df.groupby('ts_code')['close'].shift(-days) - df.groupby('ts_code')['open'].shift(-1)) / \\\n",
@@ -265,7 +575,7 @@
"\n",
"def select_pre_zt_stocks_dynamic(stock_df):\n",
" def select_stocks(group):\n",
" return group.nlargest(1000, 'return_5') # 如果循环结束仍未找到足够标签,则返回最大数量的股票\n",
" return group.nsmallest(1000, 'total_mv') # 如果循环结束仍未找到足够标签,则返回最大数量的股票\n",
"\n",
" stock_df = stock_df.groupby('trade_date', group_keys=False).apply(select_stocks)\n",
" return stock_df\n",
@@ -281,7 +591,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"id": "1c1dd3d6",
"metadata": {},
"outputs": [
@@ -316,7 +626,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"id": "2c60c1ea",
"metadata": {},
"outputs": [
@@ -541,7 +851,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": null,
"id": "e088bd8a357e815a",
"metadata": {
"ExecuteTime": {
@@ -785,7 +1095,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": null,
"id": "a0b3d7551ef0c81f",
"metadata": {
"ExecuteTime": {
@@ -1006,7 +1316,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "new_trader",
"display_name": "stock",
"language": "python",
"name": "python3"
},
@@ -1020,7 +1330,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
"version": "3.13.2"
}
},
"nbformat": 4,

View File

@@ -0,0 +1,90 @@
"""
行业/横截面因子模块
包含基于日期截面的行业/横截面因子实现
"""
import numpy as np
import polars as pl
from main.factor.operator_framework import DateWiseFactor
class IndustryMomentumFactor(DateWiseFactor):
"""行业动量因子"""
def __init__(self, factor_name: str):
super().__init__(
name=f"industry_momentum",
parameters={"factor_name": factor_name},
required_factor_ids=[factor_name, "cat_l2_code"]
)
self.factor_name = factor_name
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
# 计算行业动量基准
# 这里需要先计算每个行业的平均值,然后与个股比较
if self.factor_name in group_df.columns and "cat_l2_code" in group_df.columns:
# 按行业计算平均值
industry_means = group_df.group_by("cat_l2_code").agg([
pl.col(self.factor_name).mean().alias("industry_mean")
])
# 将行业均值合并回原数据
result_df = group_df.join(industry_means, on="cat_l2_code", how="left")
# 计算与行业均值的偏差
deviation = result_df[self.factor_name] - result_df["industry_mean"]
return deviation.alias(self.factor_id)
else:
# 如果缺少必要列返回全NaN
return pl.Series([None] * len(group_df)).alias(self.factor_id)
class MarketBreadthFactor(DateWiseFactor):
"""市场宽度因子"""
def __init__(self):
super().__init__(
name="market_breadth",
parameters={},
required_factor_ids=["pct_chg"]
)
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
# 计算市场宽度:上涨股票数 / 总股票数
pct_chg = group_df["pct_chg"]
positive_count = (pct_chg > 0).sum()
total_count = len(group_df)
# 避免除零
breadth = positive_count / (total_count + 1e-8)
return pl.Series([breadth] * len(group_df)).alias(self.factor_id)
class SectorRotationFactor(DateWiseFactor):
"""板块轮动因子"""
def __init__(self, sector_factor: str):
super().__init__(
name=f"sector_rotation_{sector_factor}",
parameters={"sector_factor": sector_factor},
required_factor_ids=[sector_factor, "cat_l2_code"]
)
self.sector_factor = sector_factor
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
# 计算板块轮动因子
if self.sector_factor in group_df.columns and "cat_l2_code" in group_df.columns:
# 计算每个板块的因子均值
sector_means = group_df.group_by("cat_l2_code").agg([
pl.col(self.sector_factor).mean().alias("sector_mean")
])
# 将板块均值合并回原数据
result_df = group_df.join(sector_means, on="cat_l2_code", how="left")
# 计算个股与板块均值的偏差
deviation = result_df[self.sector_factor] - result_df["sector_mean"]
return deviation.alias(self.factor_id)
else:
# 如果缺少必要列返回全NaN
return pl.Series([None] * len(group_df)).alias(self.factor_id)

View File

@@ -0,0 +1,174 @@
"""
动量因子模块
包含基于股票截面和日期截面的动量因子实现
"""
import numpy as np
import polars as pl
from main.factor.operator_framework import StockWiseFactor, DateWiseFactor
# -------------------- 股票截面因子:基于时间序列的动量因子 --------------------
class ReturnFactor(StockWiseFactor):
"""N日收益率因子"""
def __init__(self, period: int = 20):
super().__init__(
name="return",
parameters={"period": period},
required_factor_ids=["close"]
)
self.period = period
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
# 计算N日收益率时间序列操作
return group_df["close"].pct_change(self.period).alias(self.factor_id)
class VolatilityFactor(StockWiseFactor):
"""N日波动率因子"""
def __init__(self, period: int = 20):
super().__init__(
name="volatility",
parameters={"period": period},
required_factor_ids=["pct_chg"]
)
self.period = period
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
# 计算N日波动率时间序列操作
return group_df["pct_chg"].rolling_std(self.period).alias(self.factor_id)
class MomentumFactor(StockWiseFactor):
"""动量因子过去N日累计收益率"""
def __init__(self, period: int = 20):
super().__init__(
name="momentum",
parameters={"period": period},
required_factor_ids=["pct_chg"]
)
self.period = period
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
# 计算N日累计动量时间序列操作
return group_df["pct_chg"].rolling_sum(self.period).alias(self.factor_id)
class MomentumAcceleration(StockWiseFactor):
"""
动量加速因子:
(短期波动率调整后动量 - 长期波动率调整后动量)
用于捕捉趋势正在形成或加强的股票
"""
def __init__(self, short_period: int = 20, long_period: int = 60):
super().__init__(
name="momentum_acceleration",
parameters={"short_period": short_period, "long_period": long_period},
required_factor_ids=["pct_chg"]
)
self.short_period = short_period
self.long_period = long_period
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
epsilon = 1e-9
# 计算短期波动率调整后动量
short_momentum = group_df["pct_chg"].rolling_sum(self.short_period)
short_vol = group_df["pct_chg"].rolling_std(self.short_period)
short_adj_momentum = short_momentum / (short_vol + epsilon)
# 计算长期波动率调整后动量
long_momentum = group_df["pct_chg"].rolling_sum(self.long_period)
long_vol = group_df["pct_chg"].rolling_std(self.long_period)
long_adj_momentum = long_momentum / (long_vol + epsilon)
# 计算加速因子
acceleration = (short_adj_momentum - long_adj_momentum).alias(self.factor_id)
return acceleration
class TrendEfficiency(StockWiseFactor):
"""
趋势效率因子:
过去N日价格净变化 / 过去N日每日价格变化的绝对值之和
衡量趋势的信噪比值越接近1趋势越清晰、噪声越小
"""
def __init__(self, period: int = 20):
super().__init__(
name="trend_efficiency",
parameters={"period": period},
# 此因子需要收盘价来计算
required_factor_ids=["close"]
)
self.period = period
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
# 1. 计算N日内的净价格变动信号
# 使用 diff(n) 计算当前价格与n天前价格的差值
net_change = group_df["close"].diff(self.period).abs()
# 2. 计算N日内每日价格变动的绝对值之和总路径/噪声)
# 先计算每日变动 diff(1),取绝对值,再滚动求和
total_path = group_df["close"].diff(1).abs().rolling_sum(self.period)
# 3. 计算效率比率
epsilon = 1e-9
efficiency_ratio = (net_change / (total_path + epsilon)).alias(self.factor_id)
return efficiency_ratio
# -------------------- 统一计算函数 --------------------
def calculate_momentum_factors(df: pl.DataFrame) -> pl.DataFrame:
"""
统一计算动量因子的函数
Parameters:
df (pl.DataFrame): 输入的股票数据表,必须包含以下列:
ts_code, trade_date, close, pct_chg, high, low, vol
Returns:
pl.DataFrame: 包含所有动量因子的DataFrame
"""
# 初始化结果DataFrame
result_df = df.clone()
# 定义要计算的因子列表
# 先计算股票截面因子(时间序列因子)
stock_operators = [
ReturnFactor(5),
ReturnFactor(20),
VolatilityFactor(10),
VolatilityFactor(30),
MomentumFactor(10),
MomentumFactor(30),
RSI_Factor(14)
]
# 依次应用股票截面因子算子
for operator in stock_operators:
try:
result_df = operator.apply(result_df)
except Exception as e:
print(f"计算股票截面因子 {operator.factor_id} 时出错: {e}")
# 再计算日期截面因子(横截面排序因子)
date_operators = [
CrossSectionalRanking("return_5d"),
CrossSectionalRanking("return_20d"),
CrossSectionalRanking("volatility_10d"),
CrossSectionalRanking("momentum_10d")
]
# 依次应用日期截面因子算子
for operator in date_operators:
try:
result_df = operator.apply(result_df)
except Exception as e:
print(f"计算日期截面因子 {operator.factor_id} 时出错: {e}")
return result_df

View File

@@ -33,7 +33,7 @@ def holder_trade_factors(all_data_df: pd.DataFrame,
# 或者如果 'in_de' 已经是 1 和 -1 (或类似数值),则可以跳过映射,但要确保类型正确
stk_trade_processed_df['_direction'] = stk_trade_processed_df['in_de'].map(in_de_map)
# 如果 _direction 列在映射后可能产生NaN (因为in_de中有未覆盖的值),需要处理
if stk_trade_processed_df['_direction'].isnull().any():
if stk_trade_processed_df['_direction'].is_null().any():
print("警告: 'in_de' 列中存在未映射的值,可能导致 _direction 列出现NaN。")
# 可以选择填充NaN例如用0填充或者移除这些行
# stk_trade_processed_df['_direction'].fillna(0, inplace=True)
@@ -109,4 +109,3 @@ def holder_trade_factors(all_data_df: pd.DataFrame,
print("股东增减持因子计算完成。")
return df_merged

View File

@@ -0,0 +1,348 @@
"""
资金流因子模块
包含基于股票截面的资金流因子实现
"""
import numpy as np
import polars as pl
from main.factor.operator_framework import StockWiseFactor
class LGFlowFactor(StockWiseFactor):
"""大单净买量因子"""
def __init__(self):
super().__init__(
name="lg_flow",
parameters={},
required_factor_ids=["buy_lg_vol", "buy_elg_vol", "sell_lg_vol", "sell_elg_vol"]
)
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
# 计算大单净买量
buy_lg = group_df["buy_lg_vol"]
buy_elg = group_df["buy_elg_vol"]
sell_lg = group_df["sell_lg_vol"]
sell_elg = group_df["sell_elg_vol"]
lg_net_flow = (buy_lg + buy_elg) - (sell_lg + sell_elg)
return lg_net_flow.alias(self.factor_id)
class FlowIntensityFactor(StockWiseFactor):
"""资金流强度因子"""
def __init__(self):
super().__init__(
name="flow_intensity",
parameters={},
required_factor_ids=["buy_lg_vol", "buy_elg_vol", "sell_lg_vol", "sell_elg_vol", "vol"]
)
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
# 计算资金流强度
buy_lg = group_df["buy_lg_vol"]
buy_elg = group_df["buy_elg_vol"]
sell_lg = group_df["sell_lg_vol"]
sell_elg = group_df["sell_elg_vol"]
vol = group_df["vol"]
lg_net_flow = (buy_lg + buy_elg) - (sell_lg + sell_elg)
flow_intensity = lg_net_flow / (vol + 1e-8) # 避免除零
return flow_intensity.alias(self.factor_id)
class FlowDivergenceFactor(StockWiseFactor):
"""资金流背离因子"""
def __init__(self):
super().__init__(
name="flow_divergence",
parameters={},
required_factor_ids=["buy_sm_vol", "sell_sm_vol", "buy_lg_vol", "buy_elg_vol", "sell_lg_vol", "sell_elg_vol"]
)
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
# 计算资金流背离度
buy_sm = group_df["buy_sm_vol"]
sell_sm = group_df["sell_sm_vol"]
buy_lg = group_df["buy_lg_vol"]
buy_elg = group_df["buy_elg_vol"]
sell_lg = group_df["sell_lg_vol"]
sell_elg = group_df["sell_elg_vol"]
sm_net_flow = buy_sm - sell_sm
lg_net_flow = (buy_lg + buy_elg) - (sell_lg + sell_elg)
flow_divergence = sm_net_flow - lg_net_flow
return flow_divergence.alias(self.factor_id)
class FlowStructureFactor(StockWiseFactor):
"""资金流结构因子"""
def __init__(self):
super().__init__(
name="flow_structure",
parameters={},
required_factor_ids=["buy_sm_vol", "buy_lg_vol", "buy_elg_vol", "vol"]
)
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
# 计算资金流结构
buy_sm = group_df["buy_sm_vol"]
buy_lg = group_df["buy_lg_vol"]
buy_elg = group_df["buy_elg_vol"]
vol = group_df["vol"]
total_buy = buy_sm + buy_lg + buy_elg
lg_elg_buy_prop = (buy_lg + buy_elg) / (total_buy + 1e-8) # 避免除零
flow_structure = lg_elg_buy_prop.diff().alias(self.factor_id)
return flow_structure
class FlowAccelerationFactor(StockWiseFactor):
"""资金流加速度因子"""
def __init__(self):
super().__init__(
name="flow_acceleration",
parameters={},
required_factor_ids=["buy_lg_vol", "buy_elg_vol", "sell_lg_vol", "sell_elg_vol"]
)
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
# 计算资金流加速度
buy_lg = group_df["buy_lg_vol"]
buy_elg = group_df["buy_elg_vol"]
sell_lg = group_df["sell_lg_vol"]
sell_elg = group_df["sell_elg_vol"]
lg_net_flow = (buy_lg + buy_elg) - (sell_lg + sell_elg)
lg_net_flow_change = lg_net_flow.diff()
flow_acceleration = lg_net_flow_change.diff()
return flow_acceleration.alias(self.factor_id)
class CostSqueeze(StockWiseFactor):
factor_id = "factor_cost_squeeze"
required_factor_ids = ["close", "cost_15pct", "cost_50pct", "cost_85pct", "vol"]
def __init__(self):
super().__init__(
name=self.factor_id,
parameters={},
required_factor_ids=self.required_factor_ids
)
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
close = g["close"]
cost15 = g["cost_15pct"]
cost50 = g["cost_50pct"]
cost85 = g["cost_85pct"]
vol = g["vol"]
cost_range = cost85 - cost15
median_cost = cost50
price_pos = (close - median_cost) / (cost_range + 1e-6)
vol_5d = vol.rolling_mean(window_size=5, min_periods=1)
vol_ratio = vol / (vol_5d + 1e-6)
# 核心逻辑:成本区间窄 + 价格居中 + 量能萎缩 → 高挤压度
squeeze_score = (
(1.0 / (cost_range / (close + 1e-6) + 1e-6))
* (1.0 - price_pos.abs())
* (1.0 / (vol_ratio + 1e-6))
)
# 稳态化:对数变换
factor = (squeeze_score + 1.0).log()
return factor.alias(self.factor_id)
class HighCostSelling(StockWiseFactor):
factor_id = "factor_high_cost_selling"
required_factor_ids = ["close", "cost_85pct", "buy_sm_vol", "sell_lg_vol", "vol"]
def __init__(self):
super().__init__(
name=self.factor_id,
parameters={},
required_factor_ids=self.required_factor_ids
)
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
close = g["close"]
cost85 = g["cost_85pct"]
buy_sm_vol = g["buy_sm_vol"]
sell_lg_vol = g["sell_lg_vol"]
vol = g["vol"]
is_above_85 = (close > cost85).cast(pl.Float64)
small_buy_ratio = buy_sm_vol / (vol + 1e-6)
large_sell_ratio = sell_lg_vol / (vol + 1e-6)
hcsp = is_above_85 * small_buy_ratio * large_sell_ratio
# 稳态化:取对数(避免极端值)
factor = (hcsp + 1e-6).log()
return factor.alias(self.factor_id)
class LowCostAccumulation(StockWiseFactor):
factor_id = "factor_low_cost_accumulation"
required_factor_ids = ["close", "his_low", "cost_15pct", "buy_lg_vol", "buy_elg_vol", "vol"]
def __init__(self):
super().__init__(
name=self.factor_id,
parameters={}, # 无参数,可扩展
required_factor_ids=self.required_factor_ids
)
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
close = g["close"]
his_low = g["his_low"]
cost15 = g["cost_15pct"]
buy_lg_vol = g["buy_lg_vol"]
buy_elg_vol = g["buy_elg_vol"]
vol = g["vol"]
is_below_15 = (close < cost15).cast(pl.Float64)
# 近5日最低价含当日
rolling_min_5 = his_low.rolling_min(window_size=5, min_periods=1)
# 注意his_low 通常是历史最低,但这里我们用 close 的滚动最小更合理
# 修正:应使用 close 的滚动最小判断是否新低
close_rolling_min_5 = close.rolling_min(window_size=5, min_periods=1)
not_new_low = (close >= close_rolling_min_5).cast(pl.Float64)
big_buy_vol = buy_lg_vol + buy_elg_vol
big_buy_ratio = big_buy_vol / (vol + 1e-6)
lc_am = is_below_15 * not_new_low * big_buy_ratio
# 稳态化
factor = (lc_am + 1e-6).log()
return factor.alias(self.factor_id)
class InstNetAccum(StockWiseFactor):
factor_id = "inst_net_accum"
required_factor_ids = ["close", "buy_lg_vol", "buy_elg_vol", "sell_lg_vol", "sell_elg_vol", "circ_mv"]
def __init__(self):
super(InstNetAccum, self).__init__(
name=self.factor_id,
parameters={},
required_factor_ids=self.required_factor_ids
)
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
close = g["close"]
buy_lg = g["buy_lg_vol"]
buy_elg = g["buy_elg_vol"]
sell_lg = g["sell_lg_vol"]
sell_elg = g["sell_elg_vol"]
circ_mv = g["circ_mv"]
big_net_vol = (buy_lg + buy_elg) - (sell_lg + sell_elg)
circ_shares = circ_mv / (close + 1e-6)
ina = big_net_vol / (circ_shares + 1e-6)
# 3日收益率抑制大涨
ret3 = close / close.shift(3) - 1
ret3 = ret3.fill_null(strategy="forward").fill_null(0.0)
ina = pl.when(ret3.abs() < 0.05).then(ina).otherwise(0.0)
return ina.log1p().alias(self.factor_id)
class ChipLockin(StockWiseFactor):
factor_id = "chip_lockin"
required_factor_ids = ["cost_5pct", "cost_95pct", "winner_rate"]
def __init__(self):
super(ChipLockin, self).__init__(
name=self.factor_id,
parameters={},
required_factor_ids=self.required_factor_ids
)
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
cost5 = g["cost_5pct"]
cost95 = g["cost_95pct"]
winner_rate = g["winner_rate"]
cost_width = cost95 - cost5
width_5d = cost_width.rolling_mean(window_size=5, min_periods=1)
width_10d = cost_width.rolling_mean(window_size=10, min_periods=1)
# 避免除零
width_contraction = (width_10d - width_5d) / (width_10d + 1e-6)
winner_std_5 = winner_rate.rolling_std(window_size=5, min_periods=1).fill_null(1e-6)
clm = width_contraction * (1.0 / (winner_std_5 + 1e-6))
return clm.log1p().alias(self.factor_id)
class RetailOutInstIn(StockWiseFactor):
factor_id = "retail_out_inst_in"
required_factor_ids = ["close", "buy_sm_vol", "sell_sm_vol", "buy_lg_vol", "buy_elg_vol", "sell_lg_vol", "sell_elg_vol"]
def __init__(self):
super(RetailOutInstIn, self).__init__(
name=self.factor_id,
parameters={},
required_factor_ids=self.required_factor_ids
)
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
close = g["close"]
buy_sm = g["buy_sm_vol"]
sell_sm = g["sell_sm_vol"]
buy_lg = g["buy_lg_vol"]
buy_elg = g["buy_elg_vol"]
sell_lg = g["sell_lg_vol"]
sell_elg = g["sell_elg_vol"]
small_net_out = sell_sm - buy_sm
big_net_in = (buy_lg + buy_elg) - (sell_lg + sell_elg)
# 价格抗跌近5日未破位用 close 自身滚动最小)
close_min_5 = close.rolling_min(window_size=5, min_periods=1)
roii = small_net_out * big_net_in
return roii.log1p().alias(self.factor_id)
class AccumAccel(StockWiseFactor):
factor_id = "accum_accel"
required_factor_ids = ["buy_lg_vol", "buy_elg_vol", "sell_lg_vol", "sell_elg_vol", "vol"]
def __init__(self):
super(AccumAccel, self).__init__(
name=self.factor_id,
parameters={},
required_factor_ids=self.required_factor_ids
)
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
buy_lg = g["buy_lg_vol"]
buy_elg = g["buy_elg_vol"]
sell_lg = g["sell_lg_vol"]
sell_elg = g["sell_elg_vol"]
vol = g["vol"]
big_net_vol = (buy_lg + buy_elg) - (sell_lg + sell_elg)
big_net_ratio = big_net_vol / (vol + 1e-6)
net_5d = big_net_ratio.rolling_mean(window_size=5, min_periods=1)
net_5d_lag = net_5d.shift(5).fill_null(strategy="forward").fill_null(0.0)
acceleration = net_5d - net_5d_lag
return acceleration.log1p().alias(self.factor_id)

View File

@@ -0,0 +1,190 @@
"""
因子算子框架 - Polars 实现(最终精简版)
- 因子自行生成 ID
- parameters 仅含计算参数(不含因子引用)
- required_factor_ids 是因子ID字符串列表
- calc_factor 通过 self.parameters 和 self.required_factor_ids 获取所需信息
"""
from abc import ABC, abstractmethod
from typing import List, Literal, Dict, Any
from collections import defaultdict, deque
import json
import polars as pl
def _normalize_params(params: Dict[str, Any]) -> str:
if not params:
return ""
return json.dumps(sorted(params.items()), separators=(",", ":"))
def _simple_factor_id(name: str, params: Dict[str, Any]) -> str:
"""
生成简洁因子ID如:
("sma", {"window": 5}) → "sma_5"
("return", {"days": 20}) → "return_20"
("rank", {"input": "sma_5"}) → "rank_sma_5"
要求: params 的值必须是简单类型str/int/float/bool
"""
if not params:
return name
# 提取所有参数值,按 key 排序保证一致性
parts = []
for k in sorted(params.keys()):
v = params[k]
if isinstance(v, (str, int, float, bool)):
# 布尔转小写字符串
if isinstance(v, bool):
v = str(v).lower()
parts.append(str(v))
else:
raise ValueError(f"Unsupported parameter type for '{k}': {type(v)}. "
f"Only str/int/float/bool allowed for simple ID.")
return f"{name}_{'_'.join(parts)}"
class BaseFactor(ABC):
def __init__(self, name: str, parameters: Dict[str, Any], required_factor_ids: List[str]):
self.name = name
self.parameters = parameters
self.required_factor_ids = required_factor_ids
self.factor_id = self._generate_factor_id()
def _generate_factor_id(self) -> str:
return _simple_factor_id(self.name, self.parameters)
def get_factor_id(self) -> str:
return self.factor_id
@abstractmethod
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
pass
@property
@abstractmethod
def operator_type(self) -> Literal["stock", "date"]:
pass
class StockWiseFactor(BaseFactor):
@property
def operator_type(self) -> Literal["stock"]:
return "stock"
def _sectional_roll(self, df: pl.DataFrame) -> pl.DataFrame:
df_sorted = df.sort(["ts_code", "trade_date"])
result = (
df_sorted
.group_by("ts_code", maintain_order=True)
.map_groups(lambda g: g.with_columns(self.calc_factor(g)))
.select(["ts_code", "trade_date", self.factor_id])
)
return result
def apply(self, df: pl.DataFrame) -> pl.DataFrame:
missing = [fid for fid in self.required_factor_ids if fid not in df.columns]
if missing:
raise ValueError(f"Missing dependencies for {self.factor_id}: {missing}")
long_table = self._sectional_roll(df)
return df.join(
long_table.select(["ts_code", "trade_date", self.factor_id]),
on=["ts_code", "trade_date"],
how="left"
)
class DateWiseFactor(BaseFactor):
@property
def operator_type(self) -> Literal["date"]:
return "date"
def _sectional_roll(self, df: pl.DataFrame) -> pl.DataFrame:
df_sorted = df.sort(["trade_date", "ts_code"])
result = (
df_sorted
.group_by("trade_date", maintain_order=True)
.map_groups(lambda g: g.with_columns(self.calc_factor(g)))
.select(["ts_code", "trade_date", self.factor_id])
)
return result
def apply(self, df: pl.DataFrame) -> pl.DataFrame:
missing = [fid for fid in self.required_factor_ids if fid not in df.columns]
if missing:
raise ValueError(f"Missing dependencies for {self.factor_id}: {missing}")
long_table = self._sectional_roll(df)
return df.join(
long_table.select(["ts_code", "trade_date", self.factor_id]),
on=["ts_code", "trade_date"],
how="left"
)
class FactorGraph:
def __init__(self):
self._factors = {} # factor_id -> factor
def add_factor(self, factor: BaseFactor):
fid = factor.get_factor_id()
if fid in self._factors:
raise ValueError(f"Factor '{fid}' already registered.")
self._factors[fid] = factor
def _topological_sort(self, target_ids: List[str]) -> List[str]:
all_factors = set()
queue = deque(target_ids)
while queue:
f = queue.popleft()
if f not in all_factors:
all_factors.add(f)
if f in self._factors:
for dep in self._factors[f].required_factor_ids:
if dep not in all_factors:
queue.append(dep)
to_compute = {f for f in all_factors if f in self._factors}
indegree = {f: 0 for f in to_compute}
adj = defaultdict(list)
for f in to_compute:
for dep in self._factors[f].required_factor_ids:
if dep in to_compute:
adj[dep].append(f)
indegree[f] += 1
queue = deque([f for f in to_compute if indegree[f] == 0])
order = []
while queue:
node = queue.popleft()
order.append(node)
for nb in adj[node]:
indegree[nb] -= 1
if indegree[nb] == 0:
queue.append(nb)
print("\n=== Factor Dependency Graph ===")
to_compute = {f for f in all_factors if f in self._factors}
for fid in sorted(to_compute):
deps = self._factors[fid].required_factor_ids
compute_deps = [d for d in deps if d in to_compute] # 只显示可计算的依赖
print(f"{fid} -> {compute_deps}")
print("================================\n")
if len(order) != len(to_compute):
print(len(order), len(to_compute))
raise RuntimeError("Circular dependency!")
return order
def compute(self, df: pl.DataFrame, target_factor_ids: List[str]) -> pl.DataFrame:
exec_order = self._topological_sort(target_factor_ids)
current_df = df.clone()
for fid in exec_order:
print(fid)
if fid in current_df.columns:
continue
factor = self._factors[fid]
current_df = factor.apply(current_df)
return current_df

86
main/factor/qlib_utils.py Normal file
View File

@@ -0,0 +1,86 @@
import polars as pl
import pandas as pd
import numpy as np
import qlib
from qlib.data.dataset.handler import DataHandlerLP
from qlib.contrib.report import analysis_model, analysis_position
from qlib.constant import REG_CN
from typing import List
import polars as pl
import pandas as pd
def prepare_data(
polars_df: pl.DataFrame,
label_horizon: int = 5,
open_col: str = "open",
date_col: str = "trade_date",
code_col: str = "ts_code",
) -> pd.DataFrame:
required = [date_col, code_col, open_col]
missing = [col for col in required if col not in polars_df.columns]
if missing:
raise ValueError(f"Missing columns: {missing}")
df = polars_df.sort([code_col, date_col])
# 获取 T+1 日的开盘价(作为买入价)
df = df.with_columns([
pl.col(open_col).shift(-1).over(code_col).alias("__buy_price"),
pl.col(open_col).shift(-(1 + label_horizon)).over(code_col).alias("__sell_price"),
]).with_columns([
(pl.col("__sell_price") / pl.col("__buy_price") - 1).alias("label")
]).drop(["__buy_price", "__sell_price"])
# 转 pandas
df = df.to_pandas()
df.rename(columns={date_col: "datetime", code_col: "instrument"}, inplace=True)
df["datetime"] = pd.to_datetime(df["datetime"])
df.set_index(["datetime", "instrument"], inplace=True)
df.sort_index(inplace=True)
return df
# 2. Qlib初始化
def initialize_qlib():
"""
在内存模式下初始化Qlib。
由于我们直接从DataFrame加载数据provider_uri可以指向一个虚拟或空路径。
"""
# provider_uri设置为一个虚拟路径因为所有数据将从内存加载
# region设置为REG_CN表示使用中国A股的交易日历和交易成本设置
qlib.init(provider_uri="/mnt/d/PyProject/NewStock/data/qlib", region=REG_CN, freq="day")
print("Qlib has been initialized in memory mode.")
import pandas as pd
import lightgbm as lgb
from qlib.workflow import R
from qlib.workflow.record_temp import PortAnaRecord # SignalRecord 在此场景下未被直接使用
def train_and_backtest_from_df(
df: pd.DataFrame,
all_features: list,
label_col: str = "label",
topk: int = 50,
start_train: str = "2019-01-01",
end_train: str = "2021-12-31",
start_valid: str = "2022-01-01",
end_valid: str = "2022-12-31",
start_test: str = "2023-01-01",
end_test: str = "2023-12-31",
):
"""
直接从准备好的 pandas DataFrame 训练模型并运行回测。
"""
# === 1. 手动准备数据 ===
if not isinstance(df.index, pd.MultiIndex):
raise ValueError("df 必须是 MultiIndex (datetime, instrument)")
df.index = df.index.set_names(["datetime", "instrument"])
df.index = df.index.set_levels(pd.to_datetime(df.index.levels[0]), level='datetime')
df.sort_index(inplace=True)
dh = DataHandlerLP.from_df(df)
print(dh.fetch())
print(dh._infer)
print(dh._learn)

View File

@@ -0,0 +1,156 @@
import pandas as pd
import numpy as np
from scipy.stats import spearmanr
from sklearn.ensemble import RandomForestRegressor
from sklearn.inspection import permutation_importance
def select_factors(
df,
all_features,
label_col='label',
ic_threshold=0.01,
corr_threshold=0.5,
ir_threshold=0.3,
sign_consistency_threshold=0.3,
perm_imp_threshold=0.0,
n_perm=5,
random_state=42,
verbose=True # 新增:是否打印每步日志
):
"""
因子筛选主函数(带详细过滤日志)
"""
log = {} # 记录每步数量
if verbose:
print(f"🔍 开始因子筛选 | 初始因子数: {len(all_features)}")
# --- Step 0: 展平 ---
needed_cols = all_features + [label_col]
df_flat = df[needed_cols].reset_index()
X = df_flat[all_features]
y = df_flat[label_col]
# --- Step 1: 单因子 IC 筛选 ---
ic_series = X.apply(lambda col: spearmanr(col, y, nan_policy='omit')[0])
valid_features = ic_series[ic_series.abs() >= ic_threshold].index.tolist()
log['after_univariate'] = len(valid_features)
if verbose:
dropped = len(all_features) - len(valid_features)
print(f" ✅ 单变量筛选 (|IC| ≥ {ic_threshold}) → 保留 {len(valid_features)} 个 (+{dropped} 被过滤)")
if not valid_features:
return [], log
del X
X_valid = df_flat[valid_features]
# --- Step 2: 去冗余 ---
corr_mat = X_valid.corr(method='spearman').abs()
selected = []
for f in valid_features:
if not selected:
selected.append(f)
else:
max_corr = corr_mat.loc[f, selected].max()
if max_corr < corr_threshold:
selected.append(f)
else:
existing = corr_mat.loc[f, selected].idxmax()
if abs(ic_series[f]) > abs(ic_series[existing]):
selected.remove(existing)
selected.append(f)
del corr_mat, X_valid
log['after_redundancy'] = len(selected)
if verbose:
dropped = len(valid_features) - len(selected)
print(f" 🔗 去冗余 (corr < {corr_threshold}) → 保留 {len(selected)} 个 (+{dropped} 被过滤)")
if not selected:
return [], log
# --- Step 3: Permutation Importance ---
X_sel = df_flat[selected]
model = RandomForestRegressor(
n_estimators=50,
max_depth=10,
random_state=random_state,
n_jobs=-1
)
model.fit(X_sel, y)
perm_result = permutation_importance(
model, X_sel, y,
n_repeats=n_perm,
random_state=random_state,
n_jobs=-1
)
perm_imp = pd.Series(perm_result.importances_mean, index=selected)
candidates = perm_imp[perm_imp > perm_imp_threshold].index.tolist()
del model, perm_result, X_sel
# 如果全被过滤,回退到 selected
if not candidates:
candidates = selected
if verbose:
print(" ⚠️ Permutation 全过滤,回退到去冗余结果")
log['after_permutation'] = len(candidates)
if verbose and len(candidates) != len(selected):
dropped = len(selected) - len(candidates)
print(f" 📊 Permutation Importance (> {perm_imp_threshold}) → 保留 {len(candidates)} 个 (+{dropped} 被过滤)")
# --- Step 4: 时序稳定性验证 ---
grouped = df_flat.groupby('datetime')
ic_records = []
for date, group in grouped:
if len(group) < 10:
continue
row = {'datetime': date}
for f in candidates:
try:
ic, _ = spearmanr(group[f], group[label_col], nan_policy='omit')
row[f] = ic if np.isfinite(ic) else 0.0
except:
row[f] = 0.0
ic_records.append(row)
if not ic_records:
log['final'] = len(candidates)
if verbose:
print(" ⏳ 无足够时间窗口,跳过稳定性验证")
return candidates, log
ic_df = pd.DataFrame(ic_records).set_index('datetime')
del ic_records
mean_ic = ic_df.mean()
std_ic = ic_df.std().replace(0, np.nan)
ir = mean_ic / std_ic
sign_consistency = (ic_df > 0).mean()
stable_mask = (
ir.abs() >= ir_threshold
) & (
(sign_consistency >= sign_consistency_threshold) |
(sign_consistency <= 1 - sign_consistency_threshold)
)
final_features = stable_mask[stable_mask].index.tolist()
if not final_features:
final_features = candidates
if verbose:
print(" ⚠️ 稳定性全过滤,回退到 Permutation 结果")
log['final'] = len(final_features)
if verbose and len(final_features) != len(candidates):
dropped = len(candidates) - len(final_features)
print(f" 🕰️ 稳定性验证 (IR ≥ {ir_threshold}, 符号一致性 ≥ {sign_consistency_threshold}) → 保留 {len(final_features)} 个 (+{dropped} 被过滤)")
del df_flat, ic_df, mean_ic, std_ic, ir, sign_consistency
if verbose:
print(f"🎯 最终因子数: {len(final_features)}")
if len(final_features) <= 5:
print("💡 提示: 因子过少,建议降低 ic_threshold 或 corr_threshold")
return final_features, log

View File

@@ -0,0 +1,259 @@
"""
市场情绪因子模块
包含基于股票截面的市场情绪因子实现
"""
import numpy as np
import polars as pl
import talib
from main.factor.operator_framework import DateWiseFactor, StockWiseFactor
class SentimentPanicGreedFactor(StockWiseFactor):
"""恐慌/贪婪指数因子"""
def __init__(self, window_atr: int = 14, window_smooth: int = 5):
super().__init__(
name="sentiment_panic_greed",
parameters={"window_atr": window_atr, "window_smooth": window_smooth},
required_factor_ids=["open", "high", "low", "close", "vol", "pct_chg"]
)
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
# 使用talib计算ATR
close_array = group_df["close"].to_numpy()
high_array = group_df["high"].to_numpy()
low_array = group_df["low"].to_numpy()
prev_close = group_df["close"].shift(1).to_numpy()
window_atr = self.parameters["window_atr"]
window_smooth = self.parameters["window_smooth"]
# 计算ATR
atr_values = talib.ATR(high_array, low_array, close_array, timeperiod=window_atr)
# 计算真实波幅和波动性意外
tr = np.maximum(high_array - low_array,
np.abs(high_array - prev_close),
np.abs(low_array - prev_close))
volatility_surprise = (tr / (atr_values + 1e-8) - 1) * group_df["pct_chg"].to_numpy()
# 计算情绪指数
sentiment = volatility_surprise * 2 # 放大跳空影响
# 平滑处理
smoothed_sentiment = talib.SMA(sentiment, timeperiod=window_smooth)
return pl.Series(smoothed_sentiment).alias(self.factor_id)
class SentimentBreadthFactor(StockWiseFactor):
"""市场宽度情绪代理因子"""
def __init__(self, window_vol: int = 20, window_smooth: int = 3):
super().__init__(
name="sentiment_breadth",
parameters={"window_vol": window_vol, "window_smooth": window_smooth},
required_factor_ids=["pct_chg", "vol"]
)
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
window_vol = self.parameters["window_vol"]
window_smooth = self.parameters["window_smooth"]
# 计算滚动平均成交量
vol = group_df["vol"].to_numpy()
rolling_avg_vol = talib.SMA(vol, timeperiod=window_vol)
# 计算价量配合度
pct_chg = group_df["pct_chg"].to_numpy()
breadth = pct_chg * (vol / (rolling_avg_vol + 1e-8))
# 平滑处理
smoothed_breadth = talib.SMA(breadth, timeperiod=window_smooth)
return pl.Series(smoothed_breadth).alias(self.factor_id)
class SentimentReversalFactor(StockWiseFactor):
"""情绪反转因子"""
def __init__(self, window_ret: int = 5, window_vol: int = 5):
super().__init__(
name="sentiment_reversal",
parameters={"window_ret": window_ret, "window_vol": window_vol},
required_factor_ids=["pct_chg"]
)
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
window_ret = self.parameters["window_ret"]
window_vol = self.parameters["window_vol"]
# 计算累积收益率
pct_chg = group_df["pct_chg"].to_numpy()
return_period = window_ret
cum_return = np.array([np.prod(1 + pct_chg[i:i+return_period]) - 1
for i in range(len(pct_chg) - return_period + 1)])
cum_return = np.pad(cum_return, (return_period - 1, 0), constant_values=np.nan)
# 计算波动率
volatility = talib.STDDEV(pct_chg, timeperiod=window_vol)
# 计算反转因子
reversal = -cum_return * volatility
return pl.Series(reversal).alias(self.factor_id)
class PriceDeductionFactor(StockWiseFactor):
"""价格抵扣因子"""
def __init__(self, n: int = 10):
super().__init__(
name="price_deduction",
parameters={"n": n},
required_factor_ids=["close"]
)
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
n = self.parameters["n"]
# 计算抵扣价n-1周期前的价格
deduction_price = group_df["close"].shift(n - 1)
price_diff = group_df["close"] - deduction_price
return price_diff.alias(self.factor_id)
class PriceDeductionRatioFactor(StockWiseFactor):
"""价格抵扣比例因子"""
def __init__(self, n: int = 10):
super().__init__(
name="price_deduction_ratio",
parameters={"n": n},
required_factor_ids=["close"]
)
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
n = self.parameters["n"]
# 计算N周期SMA
sma = group_df["close"].rolling_mean(n)
# 计算抵扣价
deduction_price = group_df["close"].shift(n - 1)
# 计算比例
diff = group_df["close"] - deduction_price
ratio = diff / (sma + 1e-8) # 避免除零
return ratio.alias(self.factor_id)
class IndustryMomentumLeadership(StockWiseFactor):
factor_id = "industry_momentum_leadership"
required_factor_ids = [
"industry_return_5_percentile",
"industry_return_20_percentile",
"roe"
]
def __init__(self):
super(IndustryMomentumLeadership, self).__init__(
name=self.factor_id,
parameters={},
required_factor_ids=self.required_factor_ids
)
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
pct5 = g["industry_return_5_percentile"]
pct20 = g["industry_return_20_percentile"]
roe = g["roe"]
# 动量综合5日权重更高短期龙头
momentum_score = 0.7 * pct5 + 0.3 * pct20
# 基本面质量ROE 越高越好,取 log1p 防极端值
quality_score = pl.when(roe > 0).then(roe.log1p()).otherwise(0.0)
# 龙头得分 = 动量 × 基本面
leadership = momentum_score * (quality_score + 1.0)
return leadership.alias(self.factor_id)
class LeadershipPersistenceScore(StockWiseFactor):
factor_id = "leadership_persistence_score"
required_factor_ids = [
"industry_return_5_percentile",
"industry_return_20_percentile",
"undist_profit_ps",
"roe",
"bps"
]
def __init__(self):
super(LeadershipPersistenceScore, self).__init__(
name=self.factor_id,
parameters={},
required_factor_ids=self.required_factor_ids
)
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
pct5 = g["industry_return_5_percentile"]
pct20 = g["industry_return_20_percentile"]
undist = g["undist_profit_ps"]
roe = g["roe"]
bps = g["bps"]
momentum = 0.6 * pct5 + 0.4 * pct20
# 基本面质量(全部取 log1p 处理)
quality = (
pl.when(undist > 0).then(undist.log1p()).otherwise(0.0) +
pl.when(roe > 0).then(roe.log1p()).otherwise(0.0) +
pl.when(bps > 0).then(bps.log1p()).otherwise(0.0)
)
score = momentum * (quality + 1.0)
return score.alias(self.factor_id)
class DynamicIndustryLeadership(DateWiseFactor):
factor_id = "dynamic_industry_leadership"
required_factor_ids = ["l2_code", "return_5", "lg_flow", "turnover_rate"]
def __init__(self):
super(DynamicIndustryLeadership, self).__init__(
name=self.factor_id,
parameters={},
required_factor_ids=self.required_factor_ids
)
def calc_factor(self, g: pl.DataFrame) -> pl.Series:
# 使用窗口函数:按 industry 分组计算 z-score
mom = pl.col("return_5")
flow = pl.col("lg_flow")
turn = pl.col("turnover_rate").log1p()
# 行业内均值和标准差
mom_mean = mom.mean().over("l2_code")
mom_std = mom.std().over("l2_code")
flow_mean = flow.mean().over("l2_code")
flow_std = flow.std().over("l2_code")
turn_mean = turn.mean().over("l2_code")
turn_std = turn.std().over("l2_code")
# 安全 z-score避免 std=0
mom_z = pl.when(mom_std > 1e-8).then((mom - mom_mean) / mom_std).otherwise(0.0)
flow_z = pl.when(flow_std > 1e-8).then((flow - flow_mean) / flow_std).otherwise(0.0)
turn_z = pl.when(turn_std > 1e-8).then((turn - turn_mean) / turn_std).otherwise(0.0)
# 合成因子
leadership = mom_z + flow_z + turn_z
# 执行表达式并返回 Series
result = g.select(leadership.alias(self.factor_id))
return result.to_series()

View File

@@ -0,0 +1,134 @@
"""
特殊因子模块
包含基于股票截面的特殊因子实现
"""
import numpy as np
import polars as pl
from main.factor.operator_framework import StockWiseFactor
class LimitFactor(StockWiseFactor):
"""涨跌停因子"""
def __init__(self):
super().__init__(
name="limit",
parameters={},
required_factor_ids=["close", "up_limit", "down_limit"]
)
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
# 计算是否涨停或跌停
close = group_df["close"]
up_limit = group_df["up_limit"]
down_limit = group_df["down_limit"]
# 是否涨停
is_up_limit = (close == up_limit).cast(pl.Int32)
# 是否跌停
is_down_limit = (close == down_limit).cast(pl.Int32)
# 合并为一个因子
limit_factor = is_up_limit - is_down_limit
return limit_factor.alias(self.factor_id)
class VolumeRatioFactor(StockWiseFactor):
"""量比因子"""
def __init__(self):
super().__init__(
name="volume_ratio",
parameters={},
required_factor_ids=["vol"]
)
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
# 计算量比:当日成交量 / 5日平均成交量
vol = group_df["vol"]
avg_vol_5d = vol.rolling_mean(5)
# 避免除零
volume_ratio = vol / (avg_vol_5d + 1e-8)
return volume_ratio.alias(self.factor_id)
class BBI_RATIO_FACTOR(StockWiseFactor):
"""BBI比率因子"""
def __init__(self):
super().__init__(
name="bbi_ratio",
parameters={},
required_factor_ids=["close"]
)
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
# 计算BBI比率
close = group_df["close"]
# 计算不同周期的SMA
sma3 = close.rolling_mean(3)
sma6 = close.rolling_mean(6)
sma12 = close.rolling_mean(12)
sma24 = close.rolling_mean(24)
# 计算BBI
bbi = (sma3 + sma6 + sma12 + sma24) / 4
# 计算BBI比率
bbi_ratio = bbi / (close + 1e-8) # 避免除零
return bbi_ratio.alias(self.factor_id)
class VolatilitySlopeFactor(StockWiseFactor):
"""波动率斜率因子"""
def __init__(self, window_vol: int = 20, window_slope: int = 5):
super().__init__(
name="volatility_slope",
parameters={"window_vol": window_vol, "window_slope": window_slope},
required_factor_ids=["pct_chg"]
)
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
window_vol = self.parameters["window_vol"]
window_slope = self.parameters["window_slope"]
# 计算滚动标准差
volatility = group_df["pct_chg"].rolling_std(window_vol)
# 计算斜率
# 这里简化处理,直接计算最后一个窗口的斜率
# 实际应用中可能需要更复杂的线性回归计算
volatility_slope = volatility.diff().alias(self.factor_id)
return volatility_slope
class PriceVolumeTrendFactor(StockWiseFactor):
"""价格量能趋势因子"""
def __init__(self):
super().__init__(
name="price_volume_trend",
parameters={},
required_factor_ids=["close", "vol"]
)
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
# 计算价格量能趋势
close = group_df["close"]
vol = group_df["vol"]
# 计算价格变化
price_change = close.diff()
# 计算成交量变化
vol_change = vol.diff()
# 计算趋势因子
trend_factor = price_change * vol_change
return trend_factor.alias(self.factor_id)

View File

@@ -0,0 +1,144 @@
"""
技术指标因子模块
包含基于股票截面的技术指标因子实现
"""
import numpy as np
import polars as pl
import talib
from main.factor.operator_framework import DateWiseFactor, StockWiseFactor
class SMAFactor(StockWiseFactor):
"""简单移动平均线因子"""
def __init__(self, window: int):
super().__init__(
name="SMA",
parameters={"window": window}, # ← 只放数值参数
required_factor_ids=["close"] # ← 依赖原始列
)
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
window = self.parameters["window"] # ← 直接从 self.parameters 取
return group_df["close"].rolling_mean(window_size=window).alias(self.factor_id)
class EMAFactor(StockWiseFactor):
"""指数移动平均线因子"""
def __init__(self, window: int):
super().__init__(
name="EMA",
parameters={"window": window},
required_factor_ids=["close"]
)
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
window = self.parameters["window"]
return group_df["close"].ewm_mean(span=window).alias(self.factor_id)
class ATRFactor(StockWiseFactor):
"""平均真实波幅因子"""
def __init__(self, window: int):
super().__init__(
name="ATR",
parameters={"window": window},
required_factor_ids=["high", "low", "close"]
)
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
window = self.parameters["window"]
# 使用talib计算ATR
close_array = group_df["close"].to_numpy()
high_array = group_df["high"].to_numpy()
low_array = group_df["low"].to_numpy()
atr_values = talib.ATR(high_array, low_array, close_array, timeperiod=window)
return pl.Series(atr_values).alias(self.factor_id)
class OBVFactor(StockWiseFactor):
"""能量潮指标因子"""
def __init__(self):
super().__init__(
name="OBV",
parameters={},
required_factor_ids=["close", "vol"]
)
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
# 使用talib计算OBV
close_array = group_df["close"].to_numpy()
vol_array = group_df["vol"].to_numpy()
obv_values = talib.OBV(close_array, vol_array)
return pl.Series(obv_values).alias(self.factor_id)
class MACDFactor(StockWiseFactor):
"""MACD指标因子"""
def __init__(self, fast_period: int = 12, slow_period: int = 26, signal_period: int = 9):
super().__init__(
name="MACD",
parameters={"fast_period": fast_period, "slow_period": slow_period, "signal_period": signal_period},
required_factor_ids=["close"]
)
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
fast_period = self.parameters["fast_period"]
slow_period = self.parameters["slow_period"]
signal_period = self.parameters["signal_period"]
# 使用talib计算MACD
close_array = group_df["close"].to_numpy()
macd, macd_signal, macd_hist = talib.MACD(close_array,
fastperiod=fast_period,
slowperiod=slow_period,
signalperiod=signal_period)
# 返回MACD线值
return pl.Series(macd).alias(self.factor_id)
class RSI_Factor(StockWiseFactor):
"""RSI相对强弱指数因子"""
def __init__(self, window: int = 14):
super().__init__(
name="RSI",
parameters={"window": window},
required_factor_ids=["close"]
)
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
window = self.parameters["window"]
# 使用talib计算RSI
close_array = group_df["close"].to_numpy()
rsi_values = talib.RSI(close_array, timeperiod=window)
return pl.Series(rsi_values).alias(self.factor_id)
class CrossSectionalRankFactor(DateWiseFactor):
def __init__(self, column: str, name: str = None, ascending: bool = True):
self.target_column = column
self.ascending = ascending
factor_name = name or f"{column}_rank"
super().__init__(
name=factor_name,
parameters={"column": column, "ascending": ascending},
required_factor_ids=[column]
)
def calc_factor(self, group_df: pl.DataFrame) -> pl.Series:
values = group_df[self.target_column]
rank_pct = values.rank(method="average", descending=not self.ascending) / len(values)
normalized = (rank_pct - 0.5) * 3.46
return normalized.alias(self.factor_id)

146
main/factor/utils.py Normal file
View File

@@ -0,0 +1,146 @@
import pandas as pd
def add_financial_factor(
main_df: pd.DataFrame,
financial_df: pd.DataFrame,
factor_value_col: str, # 财务指标值所在的列
ts_code_col: str = "ts_code",
trade_date_col: str = "trade_date",
ann_date_col: str = "ann_date", # 公告日期
f_ann_date_col: str = "f_ann_date", # 实际公告日期 (优先使用)
) -> pd.DataFrame:
"""
将财务指标数据(如每股未分配利润)作为因子添加到主时间序列 DataFrame 中。
使用 merge_asof 根据股票代码和公告日期,将最新的财务指标值匹配到每个交易日。
Args:
main_df: 包含时间序列交易数据的主 DataFrame (至少包含 ts_code_col 和 trade_date_col)。
financial_df: 包含财务指标数据的 DataFrame (至少包含 ts_code_col,
ann_date_col 或 f_ann_date_col, 以及 factor_value_col)。
ts_code_col: 股票代码列在两个 DataFrame 中的名称。默认为 'ts_code'
trade_date_col: 交易日期列在 main_df 中的名称。默认为 'trade_date'
ann_date_col: 公告日期列在 financial_df 中的名称(作为 f_ann_date_col 的备选)。默认为 'ann_date'
f_ann_date_col: 实际公告日期列在 financial_df 中的名称(优先使用)。默认为 'f_ann_date'
factor_value_col: 财务指标值(即要添加的因子值)在 financial_df 中的列名。默认为 'undistr_pft_ps'
new_factor_col_name: 添加到 main_df 中的新因子列的名称。默认为 'undist_profit_ps'
Returns:
包含新因子列的 main_df DataFrame。
"""
if factor_value_col in main_df.columns:
return main_df
new_factor_col_name = factor_value_col
# --- 数据校验 ---
required_main_cols = [ts_code_col, trade_date_col]
if not all(col in main_df.columns for col in required_main_cols):
raise ValueError(f"主 DataFrame 必须包含列: {required_main_cols}")
required_financial_cols = [ts_code_col, factor_value_col]
if f_ann_date_col and f_ann_date_col in financial_df.columns:
effective_date_col = f_ann_date_col
print(f"使用 '{f_ann_date_col}' 作为财务数据生效日期。")
elif ann_date_col and ann_date_col in financial_df.columns:
effective_date_col = ann_date_col
print(f"使用 '{ann_date_col}' 作为财务数据生效日期。")
else:
raise ValueError(
f"财务指标 DataFrame 必须包含列 '{f_ann_date_col}''{ann_date_col}' 作为数据生效日期"
)
required_financial_cols.append(effective_date_col)
if not all(col in financial_df.columns for col in required_financial_cols):
raise ValueError(f"财务指标 DataFrame 必须包含列: {required_financial_cols}")
# --- 数据准备和清理 ---
# 确保日期列是 datetime 类型
# 使用 .copy() 避免 SettingWithCopyWarning
main_df = main_df.copy()
financial_df = financial_df.copy()
main_df[trade_date_col] = pd.to_datetime(main_df[trade_date_col], errors="coerce")
financial_df[effective_date_col] = pd.to_datetime(
financial_df[effective_date_col], errors="coerce"
)
# 确保股票代码是字符串类型
main_df[ts_code_col] = main_df[ts_code_col].astype(str)
financial_df[ts_code_col] = financial_df[ts_code_col].astype(str)
# 选取 financial_df 中需要合并的列
financial_data_subset = financial_df[
[ts_code_col, effective_date_col, factor_value_col]
].copy()
# *** 新增:处理右表合并键中的空值 ***
initial_rows_financial = len(financial_data_subset)
financial_data_subset = financial_data_subset.dropna(
subset=[ts_code_col, effective_date_col]
)
rows_dropped = initial_rows_financial - len(financial_data_subset)
if rows_dropped > 0:
print(
f"警告: 从 financial_data_subset 中移除了 {rows_dropped} 行,因为其 '{ts_code_col}''{effective_date_col}' 列存在空值。"
)
if financial_data_subset.empty:
print(
f"警告: 清理空值后 financial_data_subset 为空,无法添加因子 '{new_factor_col_name}'。将填充 NaN。"
)
main_df[new_factor_col_name] = np.nan
return main_df
# *** 修改:修正排序顺序以满足 merge_asof 要求 ***
# 先按 ts_code 排序,再按日期排序
# main_df = main_df.sort_values(by=[ts_code_col, trade_date_col])
# financial_data_subset = financial_data_subset.sort_values(by=[ts_code_col, effective_date_col])
main_df = main_df.sort_values(by=[trade_date_col, ts_code_col])
financial_data_subset = financial_data_subset.sort_values(
by=[effective_date_col, ts_code_col]
)
# --- 使用 merge_asof 计算因子 ---
try:
df_with_factor = pd.merge_asof(
main_df,
financial_data_subset,
left_on=trade_date_col,
right_on=effective_date_col,
by=ts_code_col,
direction="backward",
)
except Exception as e:
print(f"merge_asof 执行失败: {e}")
# 根据需要决定如何处理错误,这里填充 NaN
main_df[new_factor_col_name] = np.nan
return main_df
# --- 清理与重命名 ---
# 移除右表的日期列(如果它与左表日期列名称不同)
if (
effective_date_col in df_with_factor.columns
and effective_date_col != trade_date_col
):
df_with_factor = df_with_factor.drop(columns=[effective_date_col])
# 重命名新加入的因子列
if factor_value_col != new_factor_col_name:
if factor_value_col in df_with_factor.columns:
df_with_factor = df_with_factor.rename(
columns={factor_value_col: new_factor_col_name}
)
else:
# 这种情况理论上不应发生,因为 merge_asof 应该会把右表的非 key 列带过来
print(f"警告: 合并后未找到原始因子列 '{factor_value_col}',无法重命名。")
# 如果 factor_value_col 已是目标名称,则无需重命名
if new_factor_col_name not in df_with_factor.columns:
# 如果目标名称也不存在,则可能合并失败或列名有问题
df_with_factor[new_factor_col_name] = np.nan
# 如果 factor_value_col 就是目标名称,确保该列存在
elif new_factor_col_name not in df_with_factor.columns:
print(f"警告: 合并后未找到目标因子列 '{new_factor_col_name}'。填充 NaN。")
df_with_factor[new_factor_col_name] = np.nan
return df_with_factor

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,66 @@
import numpy as np
import pandas as pd
from qlib.data.dataset import DatasetH
dates = pd.to_datetime(pd.date_range("2020-01-01", "2020-01-10"))
instruments = ["SH600000", "SH600001"]
index = pd.MultiIndex.from_product([dates, instruments], names=["datetime", "instrument"])
data = {
"feature_1": np.random.randn(len(index)),
"feature_2": np.random.randn(len(index)),
"label": np.random.randn(len(index)) * 0.01
}
my_df = pd.DataFrame(data, index=index)
my_df.iloc[1, 0] = np.nan # 人为制造一个缺失值
my_df.iloc[5, 2] = np.nan # 人为制造一个标签缺失值
print("----------- 原始 DataFrame -----------")
print(my_df.head())
# 2. 创建包含 StaticDataLoader 和 Processors 的完整配置
data_handler_config = {
"class": "DataHandlerLP",
"module_path": "qlib.data.dataset.handler",
"kwargs": {
# 核心部分:配置数据加载器
"data_loader": {
"class": "StaticDataLoader",
"module_path": "qlib.data.dataset.loader",
"kwargs": {
"config": my_df, # <--- 在这里将你的DataFrame传入
}
},
"shared_processors": [
],
"infer_processors": [
# {"class": "DropnaLabel", "module_path": "qlib.data.dataset.processor"},
],
"learn_processors": [
{"class": "Fillna", "module_path": "qlib.data.dataset.processor", "kwargs": {"fill_value": 0}},
]
},
}
from qlib.utils import init_instance_by_config
# 3. 使用配置初始化 DataHandler
# 这一步会自动加载 StaticDataLoader 的数据,并运行所有定义的处理器
dh = init_instance_by_config(data_handler_config)
ds = DatasetH(
dh,
segments={
"train": ("20190101", "20221231"),
"valid": ("20220101", "20231231"),
"test": ("20240101", "20250101"),
},
)
# 4. 验证结果
# DK_L (Learn) 数据经过了 DropnaLabel -> ZScoreNorm -> Fillna
learn_data = ds.prepare("all", data_key='learn', segments='train')
print("----------- train DataFrame -----------")
print(learn_data)

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

View File

@@ -99,7 +99,7 @@
"cyq perf\n",
"left merge on ['ts_code', 'trade_date']\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 9162612 entries, 0 to 9162611\n",
"RangeIndex: 9315967 entries, 0 to 9315966\n",
"Data columns (total 33 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
@@ -688,10 +688,10 @@
"Calculating cs_rank_size...\n",
"Finished cs_rank_size.\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 4819708 entries, 0 to 4819707\n",
"RangeIndex: 4910010 entries, 0 to 4910009\n",
"Columns: 181 entries, ts_code to cs_rank_size\n",
"dtypes: bool(10), datetime64[ns](1), float64(165), int64(3), object(2)\n",
"memory usage: 6.2+ GB\n",
"memory usage: 6.3+ GB\n",
"None\n",
"['ts_code', 'trade_date', 'open', 'close', 'high', 'low', 'vol', 'amount', 'pct_chg', 'turnover_rate', 'pe_ttm', 'circ_mv', 'total_mv', 'volume_ratio', 'is_st', 'up_limit', 'down_limit', 'buy_sm_vol', 'sell_sm_vol', 'buy_lg_vol', 'sell_lg_vol', 'buy_elg_vol', 'sell_elg_vol', 'net_mf_vol', 'his_low', 'his_high', 'cost_5pct', 'cost_15pct', 'cost_50pct', 'cost_85pct', 'cost_95pct', 'weight_avg', 'winner_rate', 'cat_l2_code', 'undist_profit_ps', 'ocfps', 'roa', 'roe', 'AR', 'BR', 'AR_BR', 'log_circ_mv', 'cashflow_to_ev_factor', 'book_to_price_ratio', 'turnover_rate_mean_5', 'variance_20', 'bbi_ratio_factor', 'daily_deviation', 'lg_elg_net_buy_vol', 'flow_lg_elg_intensity', 'sm_net_buy_vol', 'flow_divergence_diff', 'flow_divergence_ratio', 'total_buy_vol', 'lg_elg_buy_prop', 'flow_struct_buy_change', 'lg_elg_net_buy_vol_change', 'flow_lg_elg_accel', 'chip_concentration_range', 'chip_skewness', 'floating_chip_proxy', 'cost_support_15pct_change', 'cat_winner_price_zone', 'flow_chip_consistency', 'profit_taking_vs_absorb', 'cat_is_positive', 'upside_vol', 'downside_vol', 'vol_ratio', 'return_skew', 'return_kurtosis', 'volume_change_rate', 'cat_volume_breakout', 'turnover_deviation', 'cat_turnover_spike', 'avg_volume_ratio', 'cat_volume_ratio_breakout', 'vol_spike', 'vol_std_5', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'rsi_3', 'return_5', 'return_20', 'std_return_5', 'std_return_90', 'std_return_90_2', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4', 'rank_act_factor1', 'rank_act_factor2', 'rank_act_factor3', 'cov', 'delta_cov', 'alpha_22_improved', 'alpha_003', 'alpha_007', 'alpha_013', 'vol_break', 'weight_roc5', 'price_cost_divergence', 'smallcap_concentration', 'cost_stability', 'high_cost_break_days', 'liquidity_risk', 'turnover_std', 'mv_volatility', 'volume_growth', 'mv_growth', 'momentum_factor', 'resonance_factor', 'log_close', 'cat_vol_spike', 'up', 'down', 'obv_maobv_6', 'std_return_5_over_std_return_90', 'std_return_90_minus_std_return_90_2', 'cat_af2', 'cat_af3', 'cat_af4', 'act_factor5', 'act_factor6', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'buy_lg_vol_minus_sell_lg_vol', 'buy_elg_vol_minus_sell_elg_vol', 'ctrl_strength', 'low_cost_dev', 'asymmetry', 'lock_factor', 'cat_vol_break', 'cost_atr_adj', 'cat_golden_resonance', 'mv_turnover_ratio', 'mv_adjusted_volume', 'mv_weighted_turnover', 'nonlinear_mv_volume', 'mv_volume_ratio', 'mv_momentum', 'lg_flow_mom_corr_20_60', 'lg_flow_accel', 'profit_pressure', 'underwater_resistance', 'cost_conc_std_20', 'profit_decay_20', 'vol_amp_loss_20', 'vol_drop_profit_cnt_5', 'lg_flow_vol_interact_20', 'cost_break_confirm_cnt_5', 'atr_norm_channel_pos_14', 'turnover_diff_skew_20', 'lg_sm_flow_diverge_20', 'pullback_strong_20_20', 'vol_wgt_hist_pos_20', 'vol_adj_roc_20', 'cs_rank_net_lg_flow_val', 'cs_rank_flow_divergence', 'cs_rank_ind_adj_lg_flow', 'cs_rank_elg_buy_ratio', 'cs_rank_rel_profit_margin', 'cs_rank_cost_breadth', 'cs_rank_dist_to_upper_cost', 'cs_rank_winner_rate', 'cs_rank_intraday_range', 'cs_rank_close_pos_in_range', 'cs_rank_opening_gap', 'cs_rank_pos_in_hist_range', 'cs_rank_vol_x_profit_margin', 'cs_rank_lg_flow_price_concordance', 'cs_rank_turnover_per_winner', 'cs_rank_ind_cap_neutral_pe', 'cs_rank_volume_ratio', 'cs_rank_elg_buy_sell_sm_ratio', 'cs_rank_cost_dist_vol_ratio', 'cs_rank_size']\n"
]
@@ -1583,7 +1583,14 @@
"name": "stderr",
"output_type": "stream",
"text": [
"MAD Filtering: 100%|██████████| 131/131 [00:14<00:00, 8.77it/s]\n"
"MAD Filtering: 62%|██████▏ | 81/131 [00:08<00:05, 9.28it/s]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"MAD Filtering: 100%|██████████| 131/131 [00:13<00:00, 9.63it/s]\n"
]
},
{
@@ -1598,14 +1605,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"MAD Filtering: 82%|████████| 107/131 [00:12<00:02, 9.41it/s]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"MAD Filtering: 100%|██████████| 131/131 [00:13<00:00, 9.60it/s]\n"
"MAD Filtering: 100%|██████████| 131/131 [00:14<00:00, 8.97it/s]\n"
]
},
{
@@ -1645,13 +1645,13 @@
"截面 MAD 去极值处理完成。\n",
"feature_columns: ['vol', 'pct_chg', 'turnover_rate', 'volume_ratio', 'winner_rate', 'undist_profit_ps', 'ocfps', 'AR', 'BR', 'AR_BR', 'cashflow_to_ev_factor', 'book_to_price_ratio', 'turnover_rate_mean_5', 'variance_20', 'bbi_ratio_factor', 'daily_deviation', 'lg_elg_net_buy_vol', 'flow_lg_elg_intensity', 'sm_net_buy_vol', 'total_buy_vol', 'lg_elg_buy_prop', 'flow_struct_buy_change', 'lg_elg_net_buy_vol_change', 'flow_lg_elg_accel', 'chip_concentration_range', 'chip_skewness', 'floating_chip_proxy', 'cost_support_15pct_change', 'cat_winner_price_zone', 'flow_chip_consistency', 'profit_taking_vs_absorb', 'cat_is_positive', 'upside_vol', 'downside_vol', 'vol_ratio', 'return_skew', 'return_kurtosis', 'volume_change_rate', 'cat_volume_breakout', 'turnover_deviation', 'cat_turnover_spike', 'avg_volume_ratio', 'cat_volume_ratio_breakout', 'vol_spike', 'vol_std_5', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'rsi_3', 'return_5', 'return_20', 'std_return_5', 'std_return_90', 'std_return_90_2', 'act_factor1', 'act_factor2', 'act_factor3', 'act_factor4', 'rank_act_factor1', 'rank_act_factor2', 'rank_act_factor3', 'cov', 'delta_cov', 'alpha_22_improved', 'alpha_003', 'alpha_007', 'alpha_013', 'vol_break', 'weight_roc5', 'smallcap_concentration', 'cost_stability', 'high_cost_break_days', 'liquidity_risk', 'turnover_std', 'mv_volatility', 'volume_growth', 'mv_growth', 'momentum_factor', 'resonance_factor', 'log_close', 'cat_vol_spike', 'up', 'down', 'obv_maobv_6', 'std_return_5_over_std_return_90', 'std_return_90_minus_std_return_90_2', 'cat_af2', 'cat_af3', 'cat_af4', 'act_factor5', 'act_factor6', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'buy_lg_vol_minus_sell_lg_vol', 'buy_elg_vol_minus_sell_elg_vol', 'ctrl_strength', 'low_cost_dev', 'asymmetry', 'lock_factor', 'cat_vol_break', 'cost_atr_adj', 'cat_golden_resonance', 'mv_turnover_ratio', 'mv_adjusted_volume', 'mv_weighted_turnover', 'nonlinear_mv_volume', 'mv_volume_ratio', 'mv_momentum', 'lg_flow_mom_corr_20_60', 'lg_flow_accel', 'profit_pressure', 'underwater_resistance', 'cost_conc_std_20', 'profit_decay_20', 'vol_amp_loss_20', 'vol_drop_profit_cnt_5', 'lg_flow_vol_interact_20', 'cost_break_confirm_cnt_5', 'atr_norm_channel_pos_14', 'turnover_diff_skew_20', 'lg_sm_flow_diverge_20', 'pullback_strong_20_20', 'vol_wgt_hist_pos_20', 'vol_adj_roc_20', 'cs_rank_net_lg_flow_val', 'cs_rank_elg_buy_ratio', 'cs_rank_rel_profit_margin', 'cs_rank_cost_breadth', 'cs_rank_dist_to_upper_cost', 'cs_rank_winner_rate', 'cs_rank_intraday_range', 'cs_rank_close_pos_in_range', 'cs_rank_pos_in_hist_range', 'cs_rank_vol_x_profit_margin', 'cs_rank_lg_flow_price_concordance', 'cs_rank_turnover_per_winner', 'cs_rank_volume_ratio', 'cs_rank_elg_buy_sell_sm_ratio', 'cs_rank_cost_dist_vol_ratio', 'cs_rank_size', 'cat_up_limit', 'industry_obv', 'industry_return_5', 'industry_return_20', 'industry__ema_5', 'industry__ema_13', 'industry__ema_20', 'industry__ema_60', 'industry_act_factor1', 'industry_act_factor2', 'industry_act_factor3', 'industry_act_factor4', 'industry_act_factor5', 'industry_act_factor6', 'industry_rank_act_factor1', 'industry_rank_act_factor2', 'industry_rank_act_factor3', 'industry_return_5_percentile', 'industry_return_20_percentile', '000852.SH_MACD', '000905.SH_MACD', '399006.SZ_MACD', '000852.SH_MACD_hist', '000905.SH_MACD_hist', '399006.SZ_MACD_hist', '000852.SH_RSI', '000905.SH_RSI', '399006.SZ_RSI', '000852.SH_Signal_line', '000905.SH_Signal_line', '399006.SZ_Signal_line', '000852.SH_amount_change_rate', '000905.SH_amount_change_rate', '399006.SZ_amount_change_rate', '000852.SH_amount_mean', '000905.SH_amount_mean', '399006.SZ_amount_mean', '000852.SH_daily_return', '000905.SH_daily_return', '399006.SZ_daily_return', '000852.SH_up_ratio_20d', '000905.SH_up_ratio_20d', '399006.SZ_up_ratio_20d', '000852.SH_volatility', '000905.SH_volatility', '399006.SZ_volatility', '000852.SH_volume_change_rate', '000905.SH_volume_change_rate', '399006.SZ_volume_change_rate']\n",
"df最小日期: 2019-01-02\n",
"df最大日期: 2025-10-10\n",
"2056336\n",
"df最大日期: 2025-11-21\n",
"2056030\n",
"train_data最小日期: 2020-01-02\n",
"train_data最大日期: 2022-12-30\n",
"2045675\n",
"2135782\n",
"test_data最小日期: 2023-01-03\n",
"test_data最大日期: 2025-10-10\n",
"test_data最大日期: 2025-11-21\n",
" ts_code trade_date log_circ_mv\n",
"0 000001.SZ 2019-01-02 16.574219\n",
"1 000001.SZ 2019-01-03 16.583965\n",
@@ -1954,7 +1954,7 @@
{
"data": {
"text/plain": [
"<catboost.core.CatBoostClassifier at 0x707ccc5ac1a0>"
"<catboost.core.CatBoostClassifier at 0x7602293f6030>"
]
},
"execution_count": 19,
@@ -2068,7 +2068,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"5588 2056336\n",
"5587 2056030\n",
" ts_code trade_date turnover_rate\n",
"0 000001.SZ 2023-01-03 1.1307\n",
"1 000001.SZ 2023-01-04 1.1284\n",
@@ -2076,13 +2076,13 @@
"3 000001.SZ 2023-01-06 0.6162\n",
"4 000001.SZ 2023-01-09 0.5450\n",
"... ... ... ...\n",
"2045670 605599.SH 2025-09-26 0.3434\n",
"2045671 605599.SH 2025-09-29 0.3943\n",
"2045672 605599.SH 2025-09-30 0.4982\n",
"2045673 605599.SH 2025-10-09 1.0319\n",
"2045674 605599.SH 2025-10-10 0.8859\n",
"2135777 605599.SH 2025-11-17 0.3820\n",
"2135778 605599.SH 2025-11-18 0.3565\n",
"2135779 605599.SH 2025-11-19 0.3748\n",
"2135780 605599.SH 2025-11-20 0.3132\n",
"2135781 605599.SH 2025-11-21 0.4580\n",
"\n",
"[2045675 rows x 3 columns]\n"
"[2135782 rows x 3 columns]\n"
]
},
{
@@ -2117,7 +2117,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.2"
"version": "3.12.11"
}
},
"nbformat": 4,

File diff suppressed because one or more lines are too long

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -1337,3 +1337,63 @@ trade_date,score,ts_code
2025-10-09,0.42154288661517764,002591.SZ
2025-10-10,0.2807003627051253,002193.SZ
2025-10-10,0.31259694334979216,002719.SZ
2025-10-13,0.2951270845176498,002856.SZ
2025-10-13,0.3389617298778848,002193.SZ
2025-10-14,0.3625108344766833,002591.SZ
2025-10-14,0.3876832217571092,600735.SH
2025-10-15,0.3684329251797533,002591.SZ
2025-10-15,0.4012537108164919,600735.SH
2025-10-16,0.35194813783938456,600735.SH
2025-10-16,0.47588040898459993,002591.SZ
2025-10-17,0.4434119771003001,002591.SZ
2025-10-17,0.4575670347860125,000890.SZ
2025-10-20,0.45163257702571646,000890.SZ
2025-10-20,0.4546352741401101,002591.SZ
2025-10-21,0.4653630650575277,002591.SZ
2025-10-21,0.5032400321085797,600137.SH
2025-10-22,0.4575629388073922,000632.SZ
2025-10-22,0.46613086209932875,002591.SZ
2025-10-23,0.45544805256749116,002591.SZ
2025-10-23,0.493066390947383,000632.SZ
2025-10-24,0.43331145575224883,000632.SZ
2025-10-24,0.45895240962905315,002591.SZ
2025-10-27,0.3534800509634666,002193.SZ
2025-10-27,0.3687633209705822,600493.SH
2025-10-28,0.39020626605234376,001259.SZ
2025-10-28,0.432622484773604,600493.SH
2025-10-29,0.388162649474833,600493.SH
2025-10-29,0.5899817836722746,600847.SH
2025-10-30,0.3644512652312262,603616.SH
2025-10-30,0.48605588959390245,600847.SH
2025-10-31,0.3442043952469046,002591.SZ
2025-10-31,0.472699300825448,600847.SH
2025-11-03,0.3598403659472199,002856.SZ
2025-11-03,0.36028418615974944,600847.SH
2025-11-04,0.4098368013275336,603356.SH
2025-11-04,0.4157902513122031,002494.SZ
2025-11-05,0.4496784204531746,002193.SZ
2025-11-05,0.6170797393826642,002856.SZ
2025-11-06,0.3743222641474193,002193.SZ
2025-11-06,0.5151993158736353,002856.SZ
2025-11-07,0.3821400244102041,002591.SZ
2025-11-07,0.6416337293101521,002856.SZ
2025-11-10,0.4158022301310274,002193.SZ
2025-11-10,0.5280653468274031,002856.SZ
2025-11-11,0.38888774123241365,002193.SZ
2025-11-11,0.5205128900613243,002856.SZ
2025-11-12,0.4207243532849393,002856.SZ
2025-11-12,0.42295391752723305,002193.SZ
2025-11-13,0.4223119822473308,002193.SZ
2025-11-13,0.4433093518799348,002856.SZ
2025-11-14,0.4228213225112463,002856.SZ
2025-11-14,0.5240311394195624,002193.SZ
2025-11-17,0.4804005424470699,002494.SZ
2025-11-17,0.5081206933698182,002193.SZ
2025-11-18,0.45993815526511217,002494.SZ
2025-11-18,0.5519071143747787,600493.SH
2025-11-19,0.4269366250940664,000890.SZ
2025-11-19,0.4707763880425218,600847.SH
2025-11-20,0.43476759399773307,600847.SH
2025-11-20,0.46185367833556545,600493.SH
2025-11-21,0.5033641001654292,600561.SH
2025-11-21,0.5181437273273019,603880.SH
1 trade_date,score,ts_code
1337 2025-10-09,0.42154288661517764,002591.SZ
1338 2025-10-10,0.2807003627051253,002193.SZ
1339 2025-10-10,0.31259694334979216,002719.SZ
1340 2025-10-13,0.2951270845176498,002856.SZ
1341 2025-10-13,0.3389617298778848,002193.SZ
1342 2025-10-14,0.3625108344766833,002591.SZ
1343 2025-10-14,0.3876832217571092,600735.SH
1344 2025-10-15,0.3684329251797533,002591.SZ
1345 2025-10-15,0.4012537108164919,600735.SH
1346 2025-10-16,0.35194813783938456,600735.SH
1347 2025-10-16,0.47588040898459993,002591.SZ
1348 2025-10-17,0.4434119771003001,002591.SZ
1349 2025-10-17,0.4575670347860125,000890.SZ
1350 2025-10-20,0.45163257702571646,000890.SZ
1351 2025-10-20,0.4546352741401101,002591.SZ
1352 2025-10-21,0.4653630650575277,002591.SZ
1353 2025-10-21,0.5032400321085797,600137.SH
1354 2025-10-22,0.4575629388073922,000632.SZ
1355 2025-10-22,0.46613086209932875,002591.SZ
1356 2025-10-23,0.45544805256749116,002591.SZ
1357 2025-10-23,0.493066390947383,000632.SZ
1358 2025-10-24,0.43331145575224883,000632.SZ
1359 2025-10-24,0.45895240962905315,002591.SZ
1360 2025-10-27,0.3534800509634666,002193.SZ
1361 2025-10-27,0.3687633209705822,600493.SH
1362 2025-10-28,0.39020626605234376,001259.SZ
1363 2025-10-28,0.432622484773604,600493.SH
1364 2025-10-29,0.388162649474833,600493.SH
1365 2025-10-29,0.5899817836722746,600847.SH
1366 2025-10-30,0.3644512652312262,603616.SH
1367 2025-10-30,0.48605588959390245,600847.SH
1368 2025-10-31,0.3442043952469046,002591.SZ
1369 2025-10-31,0.472699300825448,600847.SH
1370 2025-11-03,0.3598403659472199,002856.SZ
1371 2025-11-03,0.36028418615974944,600847.SH
1372 2025-11-04,0.4098368013275336,603356.SH
1373 2025-11-04,0.4157902513122031,002494.SZ
1374 2025-11-05,0.4496784204531746,002193.SZ
1375 2025-11-05,0.6170797393826642,002856.SZ
1376 2025-11-06,0.3743222641474193,002193.SZ
1377 2025-11-06,0.5151993158736353,002856.SZ
1378 2025-11-07,0.3821400244102041,002591.SZ
1379 2025-11-07,0.6416337293101521,002856.SZ
1380 2025-11-10,0.4158022301310274,002193.SZ
1381 2025-11-10,0.5280653468274031,002856.SZ
1382 2025-11-11,0.38888774123241365,002193.SZ
1383 2025-11-11,0.5205128900613243,002856.SZ
1384 2025-11-12,0.4207243532849393,002856.SZ
1385 2025-11-12,0.42295391752723305,002193.SZ
1386 2025-11-13,0.4223119822473308,002193.SZ
1387 2025-11-13,0.4433093518799348,002856.SZ
1388 2025-11-14,0.4228213225112463,002856.SZ
1389 2025-11-14,0.5240311394195624,002193.SZ
1390 2025-11-17,0.4804005424470699,002494.SZ
1391 2025-11-17,0.5081206933698182,002193.SZ
1392 2025-11-18,0.45993815526511217,002494.SZ
1393 2025-11-18,0.5519071143747787,600493.SH
1394 2025-11-19,0.4269366250940664,000890.SZ
1395 2025-11-19,0.4707763880425218,600847.SH
1396 2025-11-20,0.43476759399773307,600847.SH
1397 2025-11-20,0.46185367833556545,600493.SH
1398 2025-11-21,0.5033641001654292,600561.SH
1399 2025-11-21,0.5181437273273019,603880.SH

0
qmt/__init__.py Normal file
View File

35
qmt/qmt_test.py Normal file
View File

@@ -0,0 +1,35 @@
from xtquant import xttrader
from xtquant.xttype import StockAccount
import random
##订阅账户
# 设置 QMT 交易端的数据路径和会话ID
min_path = r"D:\QMT\国金证券QMT交易端\userdata_mini"
session_id = int(random.randint(100000, 999999))
# 创建 XtQuantTrader 实例并启动
xt_trader = xttrader.XtQuantTrader(min_path, session_id)
xt_trader.start()
# 连接 QMT 交易端
connect_result = xt_trader.connect()
if connect_result == 0:
print('连接成功')
else:
print('连接失败')
xt_trader.stop()
exit()
# 设置账户信息
account = StockAccount('8886100517')
# 订阅账户
res = xt_trader.subscribe(account)
if res == 0:
print('订阅成功')
else:
print('订阅失败')
asset = xt_trader.query_stock_asset(account)
print(asset.cash)

182
qmt/qmt_trader.py Normal file
View File

@@ -0,0 +1,182 @@
# coding:utf-8
import time, datetime, traceback, sys, json
import redis
from xtquant import xtdata
from xtquant.xttrader import XtQuantTrader, XtQuantTraderCallback
from xtquant.xttype import StockAccount
from xtquant import xtconstant
# ================= 配置区域 =================
QMT_PATH = r'D:\qmt\投研\迅投极速交易终端睿智融科版\userdata'
ACCOUNT_ID = '2000128'
ACCOUNT_TYPE = 'STOCK'
REDIS_HOST = '127.0.0.1'
REDIS_PORT = 6379
REDIS_PASS = None
# 策略基础名称 (不需要加 _real代码会自动加)
STRATEGY_BASE_NAME = 'default_strategy'
# ===========================================
# 定义监听的队列名称 (只监听实盘队列,物理屏蔽回测数据)
LISTEN_QUEUE = f"{STRATEGY_BASE_NAME}_real"
class MyXtQuantTraderCallback(XtQuantTraderCallback):
def on_disconnected(self):
print("连接断开")
def on_stock_order(self, order):
print(f"委托回报: {order.order_id} {order.order_remark}")
def on_stock_trade(self, trade):
print(f"成交: {trade.stock_code} {trade.traded_volume}")
def on_order_error(self, order_error):
print(f"下单失败: {order_error.error_msg}")
def init_redis():
try:
r = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASS, decode_responses=True)
r.ping()
return r
except Exception as e:
print(f"Redis连接失败: {e}")
return None
def is_msg_valid(data):
"""
【安全核心】校验消息时效性与合法性
"""
try:
# 1. 检查是否为回测标记 (防御性编程,虽然队列已物理隔离)
if data.get('is_backtest', False):
print(f"警报:拦截到回测数据,已丢弃!")
return False
# 2. 检查时间戳
msg_time_str = data.get('timestamp')
if not msg_time_str:
print("数据缺失时间戳,丢弃")
return False
# 解析消息时间
# 格式必须匹配策略端发送的 '%Y-%m-%d %H:%M:%S'
msg_dt = datetime.datetime.strptime(msg_time_str, '%Y-%m-%d %H:%M:%S')
msg_date = msg_dt.date()
# 获取当前服务器日期
today = datetime.date.today()
# 3. 【核心】判断是否为当天的消息
if msg_date != today:
print(f"拦截过期消息: 消息日期[{msg_date}] != 今日[{today}]")
return False
# 可选如果你想更严格可以判断时间差不能超过5分钟
# delta = datetime.datetime.now() - msg_dt
# if abs(delta.total_seconds()) > 300: ...
return True
except Exception as e:
print(f"校验逻辑异常: {e}")
return False
def process_redis_signal(r_client, xt_trader, acc):
try:
msg_json = r_client.lpop(LISTEN_QUEUE)
if not msg_json: return
print(f"收到信号: {msg_json}")
data = json.loads(msg_json)
if not is_msg_valid(data): return # 之前的校验逻辑
stock_code = data['stock_code']
action = data['action']
price = float(data['price'])
# 获取切分份数
# 兼容性处理如果redis里还是旧key 'weight',也可以尝试获取
div_count = float(data.get('div_count', data.get('weight', 1)))
# =========================================================
# 买入逻辑:资金切片法
# =========================================================
if action == 'BUY':
# 1. 必须查最新的可用资金 (Available Cash)
asset = xt_trader.query_stock_asset(acc)
if not asset:
print("错误:无法查询资产")
return
current_cash = asset.cash
# 2. 计算下单金额
# 逻辑Amount = Cash / div_count
if div_count <= 0: div_count = 1 # 防止除0
target_amount = current_cash / div_count
# 3. 打印调试信息 (非常重要)
print(f"【资金分配】可用现金:{current_cash:.2f} / 切分份数:{div_count} = 下单金额:{target_amount:.2f}")
# 4. 计算股数
if price <= 0: price = 1.0
# 过滤小额杂单
if target_amount < 2000:
print(f"忽略:金额过小 ({target_amount:.2f})")
return
vol = int(target_amount / price / 100) * 100
if vol >= 100:
xt_trader.order_stock(acc, stock_code, xtconstant.STOCK_BUY, vol, xtconstant.FIX_PRICE, price,
STRATEGY_BASE_NAME, 'PyBuy')
print(f"买入下单: {stock_code} {vol}")
else:
print(f"计算股数不足100股")
# =========================================================
# 卖出逻辑 (清仓)
# =========================================================
elif action == 'SELL':
positions = xt_trader.query_stock_positions(acc)
target_pos = next((p for p in positions if p.stock_code == stock_code), None)
if target_pos and target_pos.can_use_volume > 0:
xt_trader.order_stock(acc, stock_code, xtconstant.STOCK_SELL, target_pos.can_use_volume,
xtconstant.FIX_PRICE, price, STRATEGY_BASE_NAME, 'PySell')
print(f"卖出下单: {stock_code} {target_pos.can_use_volume}")
else:
print(f"无可用持仓: {stock_code}")
except Exception as e:
print(f"处理异常: {e}")
traceback.print_exc()
if __name__ == '__main__':
r_client = init_redis()
session_id = int(time.time())
xt_trader = XtQuantTrader(QMT_PATH, session_id)
acc = StockAccount(ACCOUNT_ID, ACCOUNT_TYPE)
callback = MyXtQuantTraderCallback()
xt_trader.register_callback(callback)
xt_trader.start()
xt_trader.connect()
xt_trader.subscribe(acc)
print(f"=== 启动监听: {LISTEN_QUEUE} ===")
print("只处理当日的实盘/模拟信号,自动过滤回测数据及历史遗留数据。")
while True:
if r_client:
process_redis_signal(r_client, xt_trader, acc)
time.sleep(60)