(exception)勉强赚钱rank

This commit is contained in:
liaozhaorun
2025-03-31 23:08:03 +08:00
parent ee35513935
commit 01092b8cae
14 changed files with 5561 additions and 2922 deletions

View File

@@ -5,8 +5,8 @@
"id": "18d1d622-b083-4cc4-a6f8-7c1ed2d0edd2",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:55:46.122736Z",
"start_time": "2025-02-11T15:55:46.106368Z"
"end_time": "2025-03-02T09:47:08.470810Z",
"start_time": "2025-03-02T09:47:07.512525Z"
}
},
"source": [
@@ -16,19 +16,35 @@
"pro = ts.pro_api()"
],
"outputs": [],
"execution_count": 3
"execution_count": 2
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:55:46.689986Z",
"start_time": "2025-02-11T15:55:46.130840Z"
"end_time": "2025-03-02T09:47:10.242731Z",
"start_time": "2025-03-02T09:47:08.470810Z"
}
},
"cell_type": "code",
"source": [
"from datetime import datetime\n",
"import pandas as pd\n",
"import warnings\n",
"\n",
"warnings.filterwarnings(\"ignore\")\n",
"def filter_rows(df):\n",
" # 按照 name 和 start_date 分组\n",
" def select_row(group):\n",
" # 如果有 end_date 不为 NaT 的行,优先保留这些行\n",
" valid_rows = group[group['end_date'].notna()]\n",
" if not valid_rows.empty:\n",
" return valid_rows.iloc[0] # 返回第一个有效行\n",
" else:\n",
" return group.iloc[0] # 如果没有有效行,返回第一行\n",
"\n",
" filtered_df = df.groupby(['name', 'start_date'], group_keys=False).apply(select_row)\n",
" filtered_df = filtered_df.reset_index(drop=True)\n",
" return filtered_df\n",
"\n",
"def is_st(name_change_dict, stock_code, target_date):\n",
" target_date = datetime.strptime(target_date, '%Y%m%d')\n",
@@ -56,11 +72,11 @@
" # 只保留 'ST' 和 '*ST' 的记录\n",
" st_data = group[(group['change_reason'] == 'ST') | (group['change_reason'] == '*ST')]\n",
" if not st_data.empty:\n",
" name_change_dict[ts_code] = st_data"
" name_change_dict[ts_code] = filter_rows(st_data)"
],
"id": "bc8f03e027027004",
"outputs": [],
"execution_count": 4
"execution_count": 3
},
{
"cell_type": "code",
@@ -68,8 +84,8 @@
"metadata": {
"scrolled": true,
"ExecuteTime": {
"end_time": "2025-02-11T16:15:26.024182Z",
"start_time": "2025-02-11T15:55:46.721189Z"
"end_time": "2025-03-02T08:33:15.997350Z",
"start_time": "2025-03-02T08:17:08.727232Z"
}
},
"source": [
@@ -135,6 +151,28 @@
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250331 完成\n",
"任务 20250401 完成\n",
"任务 20250328 完成\n",
"任务 20250327 完成\n",
"任务 20250326 完成\n",
"任务 20250325 完成\n",
"任务 20250324 完成\n",
"任务 20250321 完成\n",
"任务 20250320 完成\n",
"任务 20250319 完成\n",
"任务 20250318 完成\n",
"任务 20250317 完成\n",
"任务 20250314 完成\n",
"任务 20250313 完成\n",
"任务 20250311 完成\n",
"任务 20250312 完成\n",
"任务 20250310 完成\n",
"任务 20250307 完成\n",
"任务 20250306 完成\n",
"任务 20250305 完成\n",
"任务 20250303 完成\n",
"任务 20250304 完成\n",
"任务 20250227 完成\n",
"任务 20250228 完成\n",
"任务 20250226 完成\n",
@@ -174,8 +212,8 @@
"任务 20241231 完成\n",
"任务 20241230 完成\n",
"任务 20241227 完成\n",
"任务 20241226 完成\n",
"任务 20241225 完成\n",
"任务 20241226 完成\n",
"任务 20241224 完成\n",
"任务 20241223 完成\n",
"任务 20241220 完成\n",
@@ -188,8 +226,8 @@
"任务 20241211 完成\n",
"任务 20241210 完成\n",
"任务 20241209 完成\n",
"任务 20241206 完成\n",
"任务 20241205 完成\n",
"任务 20241206 完成\n",
"任务 20241204 完成\n",
"任务 20241203 完成\n",
"任务 20241202 完成\n",
@@ -230,8 +268,8 @@
"任务 20241014 完成\n",
"任务 20241011 完成\n",
"任务 20241010 完成\n",
"任务 20241008 完成\n",
"任务 20241009 完成\n",
"任务 20241008 完成\n",
"任务 20240930 完成\n",
"任务 20240927 完成\n",
"任务 20240926 完成\n",
@@ -239,8 +277,8 @@
"任务 20240924 完成\n",
"任务 20240923 完成\n",
"任务 20240920 完成\n",
"任务 20240918 完成\n",
"任务 20240919 完成\n",
"任务 20240918 完成\n",
"任务 20240913 完成\n",
"任务 20240912 完成\n",
"任务 20240911 完成\n",
@@ -255,8 +293,8 @@
"任务 20240829 完成\n",
"任务 20240828 完成\n",
"任务 20240827 完成\n",
"任务 20240823 完成\n",
"任务 20240826 完成\n",
"任务 20240823 完成\n",
"任务 20240822 完成\n",
"任务 20240821 完成\n",
"任务 20240820 完成\n",
@@ -274,13 +312,13 @@
"任务 20240802 完成\n",
"任务 20240801 完成\n",
"任务 20240731 完成\n",
"任务 20240729 完成\n",
"任务 20240730 完成\n",
"任务 20240726 完成\n",
"任务 20240729 完成\n",
"任务 20240724 完成\n",
"任务 20240725 完成\n",
"任务 20240722 完成\n",
"任务 20240724 完成\n",
"任务 20240723 完成\n",
"任务 20240722 完成\n",
"任务 20240719 完成\n",
"任务 20240718 完成\n",
"任务 20240717 完成\n",
@@ -292,26 +330,26 @@
"任务 20240709 完成\n",
"任务 20240708 完成\n",
"任务 20240705 完成\n",
"任务 20240703 完成\n",
"任务 20240704 完成\n",
"任务 20240703 完成\n",
"任务 20240702 完成\n",
"任务 20240701 完成\n",
"任务 20240628 完成\n",
"任务 20240627 完成\n",
"任务 20240626 完成\n",
"任务 20240625 完成\n",
"任务 20240624 完成\n",
"任务 20240628 完成\n",
"任务 20240621 完成\n",
"任务 20240620 完成\n",
"任务 20240619 完成\n",
"任务 20240618 完成\n",
"任务 20240617 完成\n",
"任务 20240613 完成\n",
"任务 20240614 完成\n",
"任务 20240613 完成\n",
"任务 20240612 完成\n",
"任务 20240611 完成\n",
"任务 20240606 完成\n",
"任务 20240607 完成\n",
"任务 20240606 完成\n",
"任务 20240605 完成\n",
"任务 20240604 完成\n",
"任务 20240603 完成\n",
@@ -322,12 +360,12 @@
"任务 20240527 完成\n",
"任务 20240524 完成\n",
"任务 20240523 完成\n",
"任务 20240521 完成\n",
"任务 20240522 完成\n",
"任务 20240521 完成\n",
"任务 20240520 完成\n",
"任务 20240517 完成\n",
"任务 20240516 完成\n",
"任务 20240515 完成\n",
"任务 20240516 完成\n",
"任务 20240514 完成\n",
"任务 20240513 完成\n",
"任务 20240510 完成\n",
@@ -339,23 +377,23 @@
"任务 20240429 完成\n",
"任务 20240426 完成\n",
"任务 20240425 完成\n",
"任务 20240424 完成\n",
"任务 20240423 完成\n",
"任务 20240424 完成\n",
"任务 20240422 完成\n",
"任务 20240419 完成\n",
"任务 20240418 完成\n",
"任务 20240417 完成\n",
"任务 20240418 完成\n",
"任务 20240416 完成\n",
"任务 20240415 完成\n",
"任务 20240411 完成\n",
"任务 20240412 完成\n",
"任务 20240411 完成\n",
"任务 20240410 完成\n",
"任务 20240409 完成\n",
"任务 20240408 完成\n",
"任务 20240403 完成\n",
"任务 20240402 完成\n",
"任务 20240329 完成\n",
"任务 20240401 完成\n",
"任务 20240329 完成\n",
"任务 20240328 完成\n",
"任务 20240327 完成\n",
"任务 20240326 完成\n",
@@ -384,8 +422,8 @@
"任务 20240222 完成\n",
"任务 20240221 完成\n",
"任务 20240220 完成\n",
"任务 20240208 完成\n",
"任务 20240219 完成\n",
"任务 20240208 完成\n",
"任务 20240207 完成\n",
"任务 20240206 完成\n",
"任务 20240205 完成\n",
@@ -405,12 +443,12 @@
"任务 20240116 完成\n",
"任务 20240115 完成\n",
"任务 20240112 完成\n",
"任务 20240110 完成\n",
"任务 20240111 完成\n",
"任务 20240110 完成\n",
"任务 20240109 完成\n",
"任务 20240108 完成\n",
"任务 20240104 完成\n",
"任务 20240105 完成\n",
"任务 20240104 完成\n",
"任务 20240103 完成\n",
"任务 20240102 完成\n",
"任务 20231229 完成\n",
@@ -424,16 +462,16 @@
"任务 20231219 完成\n",
"任务 20231218 完成\n",
"任务 20231215 完成\n",
"任务 20231213 完成\n",
"任务 20231214 完成\n",
"任务 20231213 完成\n",
"任务 20231212 完成\n",
"任务 20231211 完成\n",
"任务 20231207 完成\n",
"任务 20231208 完成\n",
"任务 20231205 完成\n",
"任务 20231207 完成\n",
"任务 20231206 完成\n",
"任务 20231201 完成\n",
"任务 20231205 完成\n",
"任务 20231204 完成\n",
"任务 20231201 完成\n",
"任务 20231130 完成\n",
"任务 20231129 完成\n",
"任务 20231128 完成\n",
@@ -455,18 +493,18 @@
"任务 20231106 完成\n",
"任务 20231103 完成\n",
"任务 20231102 完成\n",
"任务 20231101 完成\n",
"任务 20231031 完成\n",
"任务 20231101 完成\n",
"任务 20231030 完成\n",
"任务 20231027 完成\n",
"任务 20231026 完成\n",
"任务 20231025 完成\n",
"任务 20231023 完成\n",
"任务 20231024 完成\n",
"任务 20231020 完成\n",
"任务 20231023 完成\n",
"任务 20231019 完成\n",
"任务 20231018 完成\n",
"任务 20231020 完成\n",
"任务 20231017 完成\n",
"任务 20231018 完成\n",
"任务 20231016 完成\n",
"任务 20231013 完成\n",
"任务 20231012 完成\n",
@@ -480,39 +518,39 @@
"任务 20230922 完成\n",
"任务 20230921 完成\n",
"任务 20230920 完成\n",
"任务 20230918 完成\n",
"任务 20230919 完成\n",
"任务 20230914 完成\n",
"任务 20230918 完成\n",
"任务 20230915 完成\n",
"任务 20230912 完成\n",
"任务 20230914 完成\n",
"任务 20230913 完成\n",
"任务 20230908 完成\n",
"任务 20230912 完成\n",
"任务 20230911 完成\n",
"任务 20230906 完成\n",
"任务 20230908 完成\n",
"任务 20230907 完成\n",
"任务 20230906 完成\n",
"任务 20230905 完成\n",
"任务 20230904 完成\n",
"任务 20230901 完成\n",
"任务 20230831 完成\n",
"任务 20230830 完成\n",
"任务 20230828 完成\n",
"任务 20230829 完成\n",
"任务 20230828 完成\n",
"任务 20230825 完成\n",
"任务 20230824 完成\n",
"任务 20230823 完成\n",
"任务 20230822 完成\n",
"任务 20230821 完成\n",
"任务 20230817 完成\n",
"任务 20230818 完成\n",
"任务 20230817 完成\n",
"任务 20230816 完成\n",
"任务 20230815 完成\n",
"任务 20230814 完成\n",
"任务 20230811 完成\n",
"任务 20230810 完成\n",
"任务 20230811 完成\n",
"任务 20230809 完成\n",
"任务 20230808 完成\n",
"任务 20230804 完成\n",
"任务 20230807 完成\n",
"任务 20230804 完成\n",
"任务 20230803 完成\n",
"任务 20230802 完成\n",
"任务 20230801 完成\n",
@@ -520,10 +558,10 @@
"任务 20230728 完成\n",
"任务 20230727 完成\n",
"任务 20230726 完成\n",
"任务 20230724 完成\n",
"任务 20230725 完成\n",
"任务 20230720 完成\n",
"任务 20230724 完成\n",
"任务 20230721 完成\n",
"任务 20230720 完成\n",
"任务 20230719 完成\n",
"任务 20230718 完成\n",
"任务 20230717 完成\n",
@@ -533,8 +571,8 @@
"任务 20230711 完成\n",
"任务 20230710 完成\n",
"任务 20230707 完成\n",
"任务 20230705 完成\n",
"任务 20230706 完成\n",
"任务 20230705 完成\n",
"任务 20230704 完成\n",
"任务 20230703 完成\n",
"任务 20230630 完成\n",
@@ -554,8 +592,8 @@
"任务 20230608 完成\n",
"任务 20230607 完成\n",
"任务 20230606 完成\n",
"任务 20230605 完成\n",
"任务 20230602 完成\n",
"任务 20230605 完成\n",
"任务 20230601 完成\n",
"任务 20230531 完成\n",
"任务 20230530 完成\n",
@@ -569,8 +607,8 @@
"任务 20230518 完成\n",
"任务 20230517 完成\n",
"任务 20230516 完成\n",
"任务 20230512 完成\n",
"任务 20230515 完成\n",
"任务 20230512 完成\n",
"任务 20230511 完成\n",
"任务 20230510 完成\n",
"任务 20230509 完成\n",
@@ -579,8 +617,8 @@
"任务 20230504 完成\n",
"任务 20230428 完成\n",
"任务 20230427 完成\n",
"任务 20230426 完成\n",
"任务 20230425 完成\n",
"任务 20230426 完成\n",
"任务 20230424 完成\n",
"任务 20230421 完成\n",
"任务 20230420 完成\n",
@@ -593,14 +631,14 @@
"任务 20230411 完成\n",
"任务 20230410 完成\n",
"任务 20230407 完成\n",
"任务 20230406 完成\n",
"任务 20230404 完成\n",
"任务 20230406 完成\n",
"任务 20230403 完成\n",
"任务 20230331 完成\n",
"任务 20230330 完成\n",
"任务 20230329 完成\n",
"任务 20230327 完成\n",
"任务 20230328 完成\n",
"任务 20230327 完成\n",
"任务 20230324 完成\n",
"任务 20230323 完成\n",
"任务 20230322 完成\n",
@@ -618,8 +656,8 @@
"任务 20230306 完成\n",
"任务 20230303 完成\n",
"任务 20230302 完成\n",
"任务 20230228 完成\n",
"任务 20230301 完成\n",
"任务 20230228 完成\n",
"任务 20230227 完成\n",
"任务 20230224 完成\n",
"任务 20230223 完成\n",
@@ -630,13 +668,13 @@
"任务 20230216 完成\n",
"任务 20230215 完成\n",
"任务 20230214 完成\n",
"任务 20230210 完成\n",
"任务 20230213 完成\n",
"任务 20230210 完成\n",
"任务 20230209 完成\n",
"任务 20230208 完成\n",
"任务 20230206 完成\n",
"任务 20230207 完成\n",
"任务 20230203 完成\n",
"任务 20230206 完成\n",
"任务 20230202 完成\n",
"任务 20230201 完成\n",
"任务 20230131 完成\n",
@@ -656,8 +694,8 @@
"任务 20230104 完成\n",
"任务 20230103 完成\n",
"任务 20221230 完成\n",
"任务 20221228 完成\n",
"任务 20221229 完成\n",
"任务 20221228 完成\n",
"任务 20221227 完成\n",
"任务 20221226 完成\n",
"任务 20221223 完成\n",
@@ -675,8 +713,8 @@
"任务 20221207 完成\n",
"任务 20221206 完成\n",
"任务 20221205 完成\n",
"任务 20221201 完成\n",
"任务 20221202 完成\n",
"任务 20221201 完成\n",
"任务 20221130 完成\n",
"任务 20221129 完成\n",
"任务 20221128 完成\n",
@@ -951,8 +989,8 @@
"任务 20211021 完成\n",
"任务 20211020 完成\n",
"任务 20211019 完成\n",
"任务 20211015 完成\n",
"任务 20211018 完成\n",
"任务 20211015 完成\n",
"任务 20211014 完成\n",
"任务 20211013 完成\n",
"任务 20211012 完成\n",
@@ -989,8 +1027,8 @@
"任务 20210819 完成\n",
"任务 20210818 完成\n",
"任务 20210817 完成\n",
"任务 20210813 完成\n",
"任务 20210816 完成\n",
"任务 20210813 完成\n",
"任务 20210812 完成\n",
"任务 20210811 完成\n",
"任务 20210810 完成\n",
@@ -1045,8 +1083,8 @@
"任务 20210601 完成\n",
"任务 20210531 完成\n",
"任务 20210528 完成\n",
"任务 20210527 完成\n",
"任务 20210526 完成\n",
"任务 20210527 完成\n",
"任务 20210525 完成\n",
"任务 20210524 完成\n",
"任务 20210521 完成\n",
@@ -1089,8 +1127,8 @@
"任务 20210325 完成\n",
"任务 20210324 完成\n",
"任务 20210323 完成\n",
"任务 20210319 完成\n",
"任务 20210322 完成\n",
"任务 20210319 完成\n",
"任务 20210318 完成\n",
"任务 20210317 完成\n",
"任务 20210316 完成\n",
@@ -1155,8 +1193,8 @@
"任务 20201215 完成\n",
"任务 20201214 完成\n",
"任务 20201211 完成\n",
"任务 20201209 完成\n",
"任务 20201210 完成\n",
"任务 20201209 完成\n",
"任务 20201208 完成\n",
"任务 20201207 完成\n",
"任务 20201204 完成\n",
@@ -1276,8 +1314,8 @@
"任务 20200617 完成\n",
"任务 20200616 完成\n",
"任务 20200615 完成\n",
"任务 20200611 完成\n",
"任务 20200612 完成\n",
"任务 20200611 完成\n",
"任务 20200610 完成\n",
"任务 20200609 完成\n",
"任务 20200608 完成\n",
@@ -1288,16 +1326,16 @@
"任务 20200601 完成\n",
"任务 20200529 完成\n",
"任务 20200528 完成\n",
"任务 20200526 完成\n",
"任务 20200527 完成\n",
"任务 20200526 完成\n",
"任务 20200525 完成\n",
"任务 20200522 完成\n",
"任务 20200521 完成\n",
"任务 20200520 完成\n",
"任务 20200519 完成\n",
"任务 20200518 完成\n",
"任务 20200514 完成\n",
"任务 20200515 完成\n",
"任务 20200514 完成\n",
"任务 20200513 完成\n",
"任务 20200512 完成\n",
"任务 20200511 完成\n",
@@ -1306,8 +1344,8 @@
"任务 20200506 完成\n",
"任务 20200430 完成\n",
"任务 20200429 完成\n",
"任务 20200427 完成\n",
"任务 20200428 完成\n",
"任务 20200427 完成\n",
"任务 20200424 完成\n",
"任务 20200423 完成\n",
"任务 20200422 完成\n",
@@ -1318,8 +1356,8 @@
"任务 20200415 完成\n",
"任务 20200414 完成\n",
"任务 20200413 完成\n",
"任务 20200409 完成\n",
"任务 20200410 完成\n",
"任务 20200409 完成\n",
"任务 20200408 完成\n",
"任务 20200407 完成\n",
"任务 20200403 完成\n",
@@ -1426,8 +1464,8 @@
"任务 20191105 完成\n",
"任务 20191104 完成\n",
"任务 20191101 完成\n",
"任务 20191030 完成\n",
"任务 20191031 完成\n",
"任务 20191030 完成\n",
"任务 20191029 完成\n",
"任务 20191028 完成\n",
"任务 20191025 完成\n",
@@ -1504,10 +1542,10 @@
"任务 20190710 完成\n",
"任务 20190709 完成\n",
"任务 20190708 完成\n",
"任务 20190704 完成\n",
"任务 20190705 完成\n",
"任务 20190702 完成\n",
"任务 20190704 完成\n",
"任务 20190703 完成\n",
"任务 20190702 完成\n",
"任务 20190701 完成\n",
"任务 20190628 完成\n",
"任务 20190627 完成\n",
@@ -1595,10 +1633,10 @@
"任务 20190226 完成\n",
"任务 20190225 完成\n",
"任务 20190222 完成\n",
"任务 20190221 完成\n",
"任务 20190220 完成\n",
"任务 20190219 完成\n",
"任务 20190218 完成\n",
"任务 20190221 完成\n",
"任务 20190215 完成\n",
"任务 20190214 完成\n",
"任务 20190213 完成\n",
@@ -1624,8 +1662,8 @@
"任务 20190109 完成\n",
"任务 20190108 完成\n",
"任务 20190107 完成\n",
"任务 20190103 完成\n",
"任务 20190104 完成\n",
"任务 20190103 完成\n",
"任务 20190102 完成\n",
"任务 20181228 完成\n",
"任务 20181227 完成\n",
@@ -1664,8 +1702,8 @@
"任务 20181112 完成\n",
"任务 20181109 完成\n",
"任务 20181108 完成\n",
"任务 20181107 完成\n",
"任务 20181106 完成\n",
"任务 20181107 完成\n",
"任务 20181105 完成\n",
"任务 20181102 完成\n",
"任务 20181101 完成\n",
@@ -1822,8 +1860,8 @@
"任务 20180320 完成\n",
"任务 20180319 完成\n",
"任务 20180316 完成\n",
"任务 20180314 完成\n",
"任务 20180315 完成\n",
"任务 20180314 完成\n",
"任务 20180313 完成\n",
"任务 20180312 完成\n",
"任务 20180309 完成\n",
@@ -1842,10 +1880,10 @@
"任务 20180213 完成\n",
"任务 20180212 完成\n",
"任务 20180209 完成\n",
"任务 20180208 完成\n",
"任务 20180207 完成\n",
"任务 20180206 完成\n",
"任务 20180208 完成\n",
"任务 20180205 完成\n",
"任务 20180206 完成\n",
"任务 20180202 完成\n",
"任务 20180201 完成\n",
"任务 20180131 完成\n",
@@ -1890,8 +1928,8 @@
"任务 20171206 完成\n",
"任务 20171205 完成\n",
"任务 20171204 完成\n",
"任务 20171130 完成\n",
"任务 20171201 完成\n",
"任务 20171130 完成\n",
"任务 20171129 完成\n",
"任务 20171128 完成\n",
"任务 20171127 完成\n",
@@ -1904,8 +1942,8 @@
"任务 20171116 完成\n",
"任务 20171115 完成\n",
"任务 20171114 完成\n",
"任务 20171113 完成\n",
"任务 20171110 完成\n",
"任务 20171113 完成\n",
"任务 20171109 完成\n",
"任务 20171108 完成\n",
"任务 20171107 完成\n",
@@ -1934,8 +1972,8 @@
"任务 20170928 完成\n",
"任务 20170927 完成\n",
"任务 20170926 完成\n",
"任务 20170925 完成\n",
"任务 20170922 完成\n",
"任务 20170925 完成\n",
"任务 20170921 完成\n",
"任务 20170920 完成\n",
"任务 20170919 完成\n",
@@ -1948,18 +1986,18 @@
"任务 20170908 完成\n",
"任务 20170907 完成\n",
"任务 20170906 完成\n",
"任务 20170904 完成\n",
"任务 20170905 完成\n",
"任务 20170904 完成\n",
"任务 20170901 完成\n",
"任务 20170831 完成\n",
"任务 20170830 完成\n",
"任务 20170829 完成\n",
"任务 20170828 完成\n",
"任务 20170825 完成\n",
"任务 20170824 完成\n",
"任务 20170823 完成\n",
"任务 20170822 完成\n",
"任务 20170824 完成\n",
"任务 20170821 完成\n",
"任务 20170822 完成\n",
"任务 20170818 完成\n",
"任务 20170817 完成\n",
"任务 20170816 完成\n",
@@ -1998,8 +2036,8 @@
"任务 20170630 完成\n",
"任务 20170629 完成\n",
"任务 20170628 完成\n",
"任务 20170627 完成\n",
"任务 20170626 完成\n",
"任务 20170627 完成\n",
"任务 20170623 完成\n",
"任务 20170622 完成\n",
"任务 20170621 完成\n",
@@ -2028,10 +2066,10 @@
"任务 20170517 完成\n",
"任务 20170516 完成\n",
"任务 20170515 完成\n",
"任务 20170511 完成\n",
"任务 20170512 完成\n",
"任务 20170509 完成\n",
"任务 20170511 完成\n",
"任务 20170510 完成\n",
"任务 20170509 完成\n",
"任务 20170508 完成\n",
"任务 20170505 完成\n",
"任务 20170504 完成\n",
@@ -2050,8 +2088,8 @@
"任务 20170414 完成\n",
"任务 20170413 完成\n",
"任务 20170412 完成\n",
"任务 20170410 完成\n",
"任务 20170411 完成\n",
"任务 20170410 完成\n",
"任务 20170407 完成\n",
"任务 20170406 完成\n",
"任务 20170405 完成\n",
@@ -2070,16 +2108,16 @@
"任务 20170315 完成\n",
"任务 20170314 完成\n",
"任务 20170313 完成\n",
"任务 20170310 完成\n",
"任务 20170309 完成\n",
"任务 20170310 完成\n",
"任务 20170308 完成\n",
"任务 20170307 完成\n",
"任务 20170306 完成\n",
"任务 20170303 完成\n",
"任务 20170302 完成\n",
"任务 20170301 完成\n",
"任务 20170228 完成\n",
"任务 20170227 完成\n",
"任务 20170228 完成\n",
"任务 20170224 完成\n",
"任务 20170223 完成\n",
"任务 20170222 完成\n",
@@ -2117,15 +2155,15 @@
]
}
],
"execution_count": 5
"execution_count": 3
},
{
"cell_type": "code",
"id": "97fdf8be-a86c-4404-bf0c-701f002cd81c",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T16:15:26.646658Z",
"start_time": "2025-02-11T16:15:26.120701Z"
"end_time": "2025-03-02T08:33:16.498221Z",
"start_time": "2025-03-02T08:33:16.033912Z"
}
},
"source": [
@@ -2138,70 +2176,70 @@
"output_type": "stream",
"text": [
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
"0 002512.SZ 20250211 5.03 5.9759 7.8713 \n",
"1 600966.SH 20250211 4.83 0.6904 1.3494 \n",
"2 600358.SH 20250211 3.68 8.5826 11.3780 \n",
"3 002893.SZ 20250211 9.73 1.9217 2.6415 \n",
"4 300648.SZ 20250211 22.90 1.7775 2.3188 \n",
"0 002977.SZ 20250227 27.20 2.2311 3.0411 \n",
"1 688065.SH 20250227 48.17 0.7081 1.4224 \n",
"2 002563.SZ 20250227 6.30 0.7054 2.2058 \n",
"3 300044.SZ 20250227 7.29 11.8529 13.2447 \n",
"4 603219.SH 20250227 17.96 5.9145 28.1559 \n",
"... ... ... ... ... ... \n",
"8291965 600707.SH 20170103 9.12 0.9482 1.3437 \n",
"8291966 600708.SH 20170103 9.03 0.7694 1.0169 \n",
"8291967 600712.SH 20170103 10.29 0.5859 0.8028 \n",
"8291968 001872.SZ 20170103 19.33 1.0970 5.4258 \n",
"8291969 001914.SZ 20170103 12.37 3.2627 6.6991 \n",
"8372851 600708.SH 20170103 9.03 0.7694 1.0169 \n",
"8372852 600712.SH 20170103 10.29 0.5859 0.8028 \n",
"8372853 001872.SZ 20170103 19.33 1.0970 5.4258 \n",
"8372854 001914.SZ 20170103 12.37 3.2627 6.6991 \n",
"8372855 302132.SZ 20170103 23.28 0.4912 1.5149 \n",
"\n",
" volume_ratio pe pe_ttm pb ps ps_ttm \\\n",
"0 0.87 NaN NaN 12.8888 2.9340 3.0625 \n",
"1 1.16 35.5101 15.2315 0.9534 0.3454 0.3402 \n",
"2 1.38 NaN NaN 15.2661 3.4220 4.2041 \n",
"3 0.85 48.9883 41.5405 2.2074 2.3641 2.3637 \n",
"4 0.69 NaN NaN 4.1442 3.7325 3.3186 \n",
"... ... ... ... ... ... ... \n",
"8291965 1.18 133.8070 35.6525 4.5692 28.6047 27.5926 \n",
"8291966 0.85 23.3367 22.2458 1.4847 0.9613 0.9248 \n",
"8291967 0.67 202.4855 287.1454 5.1852 2.3682 2.5386 \n",
"8291968 0.77 23.6158 23.1883 2.7052 6.6556 6.5584 \n",
"8291969 1.02 20.5631 15.1595 2.1186 1.4950 1.2600 \n",
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
"0 0.98 65.2824 275.8084 3.0077 19.5374 20.5874 0.6618 \n",
"1 0.79 76.6697 70.6389 2.4679 13.2919 10.1678 NaN \n",
"2 1.53 15.1340 16.2489 1.4830 1.2425 1.1986 4.7619 \n",
"3 0.79 NaN NaN 9.0179 12.4251 19.5540 0.0000 \n",
"4 1.76 38.6030 47.7249 6.4916 3.7441 3.8039 1.6629 \n",
"... ... ... ... ... ... ... ... \n",
"8372851 0.85 23.3367 22.2458 1.4847 0.9613 0.9248 1.1074 \n",
"8372852 0.67 202.4855 287.1454 5.1852 2.3682 2.5386 0.1555 \n",
"8372853 0.77 23.6158 23.1883 2.7052 6.6556 6.5584 2.1211 \n",
"8372854 1.02 20.5631 15.1595 2.1186 1.4950 1.2600 0.4042 \n",
"8372855 0.74 91.3908 84.6980 6.9391 8.9531 8.8570 0.2291 \n",
"\n",
" dv_ratio dv_ttm total_share float_share free_share total_mv \\\n",
"0 0.0000 NaN 114709.4532 104845.4958 79597.9456 5.769885e+05 \n",
"1 0.5633 0.5633 133684.4288 133684.4288 68397.8451 6.456958e+05 \n",
"2 0.0000 NaN 50493.6660 50493.6660 38088.2934 1.858167e+05 \n",
"3 0.8222 0.8222 26364.0000 20277.8618 14751.7331 2.565217e+05 \n",
"4 0.0000 NaN 14778.3896 10618.9439 8140.0483 3.384251e+05 \n",
"... ... ... ... ... ... ... \n",
"8291965 0.0000 NaN 73675.7688 73603.7688 51940.2015 6.719230e+05 \n",
"8291966 1.1074 1.1074 131871.9966 75088.9215 56812.2811 1.190804e+06 \n",
"8291967 0.1555 0.1555 54465.5360 53795.9475 39266.3119 5.604504e+05 \n",
"8291968 2.1211 2.1211 64476.3730 46486.6050 9398.8050 1.246328e+06 \n",
"8291969 0.4042 0.4042 66696.1416 66678.0666 32475.1786 8.250313e+05 \n",
" dv_ttm total_share float_share free_share total_mv \\\n",
"0 0.6618 12012.0000 6662.0170 4887.5170 3.267264e+05 \n",
"1 NaN 58337.8039 58337.8039 29040.9774 2.810132e+06 \n",
"2 4.7619 269409.0160 220844.2340 70621.6215 1.697277e+06 \n",
"3 NaN 76386.9228 76377.3438 68351.0115 5.568607e+05 \n",
"4 1.6629 56140.0000 56140.0000 11792.9916 1.008274e+06 \n",
"... ... ... ... ... ... \n",
"8372851 1.1074 131871.9966 75088.9215 56812.2811 1.190804e+06 \n",
"8372852 0.1555 54465.5360 53795.9475 39266.3119 5.604504e+05 \n",
"8372853 2.1211 64476.3730 46486.6050 9398.8050 1.246328e+06 \n",
"8372854 0.4042 66696.1416 66678.0666 32475.1786 8.250313e+05 \n",
"8372855 0.2291 39384.0333 30419.3588 9862.3809 9.168603e+05 \n",
"\n",
" circ_mv is_st \n",
"0 527372.8439 False \n",
"1 645695.7911 False \n",
"2 185816.6909 True \n",
"3 197303.5953 False \n",
"4 243173.8153 False \n",
"... ... ... \n",
"8291965 671266.3715 False \n",
"8291966 678052.9611 False \n",
"8291967 553560.2998 False \n",
"8291968 898586.0747 False \n",
"8291969 824807.6838 False \n",
" circ_mv is_st \n",
"0 1.812069e+05 False \n",
"1 2.810132e+06 False \n",
"2 1.391319e+06 False \n",
"3 5.567908e+05 False \n",
"4 1.008274e+06 False \n",
"... ... ... \n",
"8372851 6.780530e+05 False \n",
"8372852 5.535603e+05 False \n",
"8372853 8.985861e+05 False \n",
"8372854 8.248077e+05 False \n",
"8372855 7.081627e+05 False \n",
"\n",
"[8291970 rows x 19 columns]\n"
"[8372856 rows x 19 columns]\n"
]
}
],
"execution_count": 6
"execution_count": 4
},
{
"cell_type": "code",
"id": "2b58a8bf-ffc5-4482-8e4d-bf24da9277de",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T16:17:03.779399Z",
"start_time": "2025-02-11T16:15:26.662526Z"
"end_time": "2025-03-02T08:34:49.733727Z",
"start_time": "2025-03-02T08:33:16.498221Z"
}
},
"source": [
@@ -2219,15 +2257,15 @@
]
}
],
"execution_count": 7
"execution_count": 5
},
{
"cell_type": "code",
"id": "57ac1d86-5ce8-4bc9-812f-b45dcc2a3b4c",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T16:17:03.857407Z",
"start_time": "2025-02-11T16:17:03.843423Z"
"end_time": "2025-03-02T08:34:49.778164Z",
"start_time": "2025-03-02T08:34:49.775512Z"
}
},
"source": [],

View File

@@ -5,8 +5,8 @@
"id": "94412ea8-aad7-47fb-8597-d80adef21a8b",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:41:07.699531Z",
"start_time": "2025-02-11T15:41:06.959399Z"
"end_time": "2025-03-01T09:19:24.564409Z",
"start_time": "2025-03-01T09:19:23.930364Z"
}
},
"source": [
@@ -23,8 +23,8 @@
"metadata": {
"scrolled": true,
"ExecuteTime": {
"end_time": "2025-02-11T16:18:37.728208Z",
"start_time": "2025-02-11T15:41:07.705536Z"
"end_time": "2025-03-01T09:56:42.369757Z",
"start_time": "2025-03-01T09:19:24.709524Z"
}
},
"source": [
@@ -228,7 +228,7 @@
"成功获取 000572.SZ 的数据\n",
"成功获取 000573.SZ 的数据\n",
"成功获取 000576.SZ 的数据\n",
"已调用300次API等待 31.79 秒以满足速率限制...\n",
"已调用300次API等待 41.14 秒以满足速率限制...\n",
"成功获取 000581.SZ 的数据\n",
"成功获取 000582.SZ 的数据\n",
"成功获取 000584.SZ 的数据\n",
@@ -379,7 +379,7 @@
"成功获取 000811.SZ 的数据\n",
"成功获取 000812.SZ 的数据\n",
"成功获取 000813.SZ 的数据\n",
"已调用300次API等待 30.82 秒以满足速率限制...\n",
"已调用300次API等待 40.78 秒以满足速率限制...\n",
"成功获取 000815.SZ 的数据\n",
"成功获取 000816.SZ 的数据\n",
"成功获取 000818.SZ 的数据\n",
@@ -530,7 +530,7 @@
"成功获取 001238.SZ 的数据\n",
"成功获取 001239.SZ 的数据\n",
"成功获取 001255.SZ 的数据\n",
"已调用300次API等待 31.41 秒以满足速率限制...\n",
"已调用300次API等待 40.77 秒以满足速率限制...\n",
"成功获取 001256.SZ 的数据\n",
"成功获取 001258.SZ 的数据\n",
"成功获取 001259.SZ 的数据\n",
@@ -681,7 +681,7 @@
"成功获取 002085.SZ 的数据\n",
"成功获取 002086.SZ 的数据\n",
"成功获取 002088.SZ 的数据\n",
"已调用300次API等待 31.38 秒以满足速率限制...\n",
"已调用300次API等待 40.70 秒以满足速率限制...\n",
"成功获取 002090.SZ 的数据\n",
"成功获取 002091.SZ 的数据\n",
"成功获取 002092.SZ 的数据\n",
@@ -832,7 +832,7 @@
"成功获取 002242.SZ 的数据\n",
"成功获取 002243.SZ 的数据\n",
"成功获取 002244.SZ 的数据\n",
"已调用300次API等待 31.86 秒以满足速率限制...\n",
"已调用300次API等待 40.20 秒以满足速率限制...\n",
"成功获取 002245.SZ 的数据\n",
"成功获取 002246.SZ 的数据\n",
"成功获取 002247.SZ 的数据\n",
@@ -983,7 +983,7 @@
"成功获取 002400.SZ 的数据\n",
"成功获取 002401.SZ 的数据\n",
"成功获取 002402.SZ 的数据\n",
"已调用300次API等待 31.95 秒以满足速率限制...\n",
"已调用300次API等待 40.84 秒以满足速率限制...\n",
"成功获取 002403.SZ 的数据\n",
"成功获取 002404.SZ 的数据\n",
"成功获取 002405.SZ 的数据\n",
@@ -1134,7 +1134,7 @@
"成功获取 002566.SZ 的数据\n",
"成功获取 002567.SZ 的数据\n",
"成功获取 002568.SZ 的数据\n",
"已调用300次API等待 31.12 秒以满足速率限制...\n",
"已调用300次API等待 41.66 秒以满足速率限制...\n",
"成功获取 002569.SZ 的数据\n",
"成功获取 002570.SZ 的数据\n",
"成功获取 002571.SZ 的数据\n",
@@ -1285,7 +1285,7 @@
"成功获取 002729.SZ 的数据\n",
"成功获取 002730.SZ 的数据\n",
"成功获取 002731.SZ 的数据\n",
"已调用300次API等待 29.77 秒以满足速率限制...\n",
"已调用300次API等待 40.74 秒以满足速率限制...\n",
"成功获取 002732.SZ 的数据\n",
"成功获取 002733.SZ 的数据\n",
"成功获取 002734.SZ 的数据\n",
@@ -1436,7 +1436,7 @@
"成功获取 002896.SZ 的数据\n",
"成功获取 002897.SZ 的数据\n",
"成功获取 002898.SZ 的数据\n",
"已调用300次API等待 32.34 秒以满足速率限制...\n",
"已调用300次API等待 41.14 秒以满足速率限制...\n",
"成功获取 002899.SZ 的数据\n",
"成功获取 002900.SZ 的数据\n",
"成功获取 002901.SZ 的数据\n",
@@ -1587,7 +1587,7 @@
"成功获取 300014.SZ 的数据\n",
"成功获取 300015.SZ 的数据\n",
"成功获取 300016.SZ 的数据\n",
"已调用300次API等待 32.02 秒以满足速率限制...\n",
"已调用300次API等待 40.57 秒以满足速率限制...\n",
"成功获取 300017.SZ 的数据\n",
"成功获取 300018.SZ 的数据\n",
"成功获取 300019.SZ 的数据\n",
@@ -1738,7 +1738,7 @@
"成功获取 300174.SZ 的数据\n",
"成功获取 300175.SZ 的数据\n",
"成功获取 300176.SZ 的数据\n",
"已调用300次API等待 31.35 秒以满足速率限制...\n",
"已调用300次API等待 41.05 秒以满足速率限制...\n",
"成功获取 300177.SZ 的数据\n",
"成功获取 300179.SZ 的数据\n",
"成功获取 300180.SZ 的数据\n",
@@ -1889,7 +1889,7 @@
"成功获取 300337.SZ 的数据\n",
"成功获取 300338.SZ 的数据\n",
"成功获取 300339.SZ 的数据\n",
"已调用300次API等待 31.84 秒以满足速率限制...\n",
"已调用300次API等待 40.69 秒以满足速率限制...\n",
"成功获取 300340.SZ 的数据\n",
"成功获取 300341.SZ 的数据\n",
"成功获取 300342.SZ 的数据\n",
@@ -2040,7 +2040,7 @@
"成功获取 300494.SZ 的数据\n",
"成功获取 300496.SZ 的数据\n",
"成功获取 300497.SZ 的数据\n",
"已调用300次API等待 27.83 秒以满足速率限制...\n",
"已调用300次API等待 40.51 秒以满足速率限制...\n",
"成功获取 300498.SZ 的数据\n",
"成功获取 300499.SZ 的数据\n",
"成功获取 300500.SZ 的数据\n",
@@ -2191,7 +2191,7 @@
"成功获取 300650.SZ 的数据\n",
"成功获取 300651.SZ 的数据\n",
"成功获取 300652.SZ 的数据\n",
"已调用300次API等待 31.79 秒以满足速率限制...\n",
"已调用300次API等待 39.15 秒以满足速率限制...\n",
"成功获取 300653.SZ 的数据\n",
"成功获取 300654.SZ 的数据\n",
"成功获取 300655.SZ 的数据\n",
@@ -2342,7 +2342,7 @@
"成功获取 300810.SZ 的数据\n",
"成功获取 300811.SZ 的数据\n",
"成功获取 300812.SZ 的数据\n",
"已调用300次API等待 31.09 秒以满足速率限制...\n",
"已调用300次API等待 38.87 秒以满足速率限制...\n",
"成功获取 300813.SZ 的数据\n",
"成功获取 300814.SZ 的数据\n",
"成功获取 300815.SZ 的数据\n",
@@ -2493,7 +2493,7 @@
"成功获取 300966.SZ 的数据\n",
"成功获取 300967.SZ 的数据\n",
"成功获取 300968.SZ 的数据\n",
"已调用300次API等待 31.39 秒以满足速率限制...\n",
"已调用300次API等待 40.54 秒以满足速率限制...\n",
"成功获取 300969.SZ 的数据\n",
"成功获取 300970.SZ 的数据\n",
"成功获取 300971.SZ 的数据\n",
@@ -2644,7 +2644,7 @@
"成功获取 301128.SZ 的数据\n",
"成功获取 301129.SZ 的数据\n",
"成功获取 301130.SZ 的数据\n",
"已调用300次API等待 31.44 秒以满足速率限制...\n",
"已调用300次API等待 41.03 秒以满足速率限制...\n",
"成功获取 301131.SZ 的数据\n",
"成功获取 301132.SZ 的数据\n",
"成功获取 301133.SZ 的数据\n",
@@ -2795,7 +2795,7 @@
"成功获取 301313.SZ 的数据\n",
"成功获取 301314.SZ 的数据\n",
"成功获取 301315.SZ 的数据\n",
"已调用300次API等待 31.43 秒以满足速率限制...\n",
"已调用300次API等待 40.99 秒以满足速率限制...\n",
"成功获取 301316.SZ 的数据\n",
"成功获取 301317.SZ 的数据\n",
"成功获取 301318.SZ 的数据\n",
@@ -2946,7 +2946,7 @@
"成功获取 301618.SZ 的数据\n",
"成功获取 301622.SZ 的数据\n",
"成功获取 301626.SZ 的数据\n",
"已调用300次API等待 31.51 秒以满足速率限制...\n",
"已调用300次API等待 41.17 秒以满足速率限制...\n",
"成功获取 301628.SZ 的数据\n",
"成功获取 301631.SZ 的数据\n",
"成功获取 301633.SZ 的数据\n",
@@ -3097,7 +3097,7 @@
"成功获取 600170.SH 的数据\n",
"成功获取 600171.SH 的数据\n",
"成功获取 600172.SH 的数据\n",
"已调用300次API等待 31.08 秒以满足速率限制...\n",
"已调用300次API等待 40.74 秒以满足速率限制...\n",
"成功获取 600173.SH 的数据\n",
"成功获取 600176.SH 的数据\n",
"成功获取 600177.SH 的数据\n",
@@ -3248,7 +3248,7 @@
"成功获取 600366.SH 的数据\n",
"成功获取 600367.SH 的数据\n",
"成功获取 600368.SH 的数据\n",
"已调用300次API等待 32.11 秒以满足速率限制...\n",
"已调用300次API等待 41.16 秒以满足速率限制...\n",
"成功获取 600369.SH 的数据\n",
"成功获取 600370.SH 的数据\n",
"成功获取 600371.SH 的数据\n",
@@ -3399,7 +3399,7 @@
"成功获取 600572.SH 的数据\n",
"成功获取 600573.SH 的数据\n",
"成功获取 600575.SH 的数据\n",
"已调用300次API等待 32.27 秒以满足速率限制...\n",
"已调用300次API等待 40.45 秒以满足速率限制...\n",
"成功获取 600576.SH 的数据\n",
"成功获取 600577.SH 的数据\n",
"成功获取 600578.SH 的数据\n",
@@ -3550,7 +3550,7 @@
"成功获取 600748.SH 的数据\n",
"成功获取 600749.SH 的数据\n",
"成功获取 600750.SH 的数据\n",
"已调用300次API等待 30.57 秒以满足速率限制...\n",
"已调用300次API等待 41.00 秒以满足速率限制...\n",
"成功获取 600751.SH 的数据\n",
"成功获取 600753.SH 的数据\n",
"成功获取 600754.SH 的数据\n",
@@ -3701,7 +3701,7 @@
"成功获取 600956.SH 的数据\n",
"成功获取 600958.SH 的数据\n",
"成功获取 600959.SH 的数据\n",
"已调用300次API等待 29.71 秒以满足速率限制...\n",
"已调用300次API等待 41.08 秒以满足速率限制...\n",
"成功获取 600960.SH 的数据\n",
"成功获取 600961.SH 的数据\n",
"成功获取 600962.SH 的数据\n",
@@ -3852,7 +3852,7 @@
"成功获取 601519.SH 的数据\n",
"成功获取 601528.SH 的数据\n",
"成功获取 601555.SH 的数据\n",
"已调用300次API等待 32.29 秒以满足速率限制...\n",
"已调用300次API等待 41.02 秒以满足速率限制...\n",
"成功获取 601566.SH 的数据\n",
"成功获取 601567.SH 的数据\n",
"成功获取 601568.SH 的数据\n",
@@ -4003,7 +4003,7 @@
"成功获取 603041.SH 的数据\n",
"成功获取 603042.SH 的数据\n",
"成功获取 603043.SH 的数据\n",
"已调用300次API等待 30.99 秒以满足速率限制...\n",
"已调用300次API等待 40.67 秒以满足速率限制...\n",
"成功获取 603045.SH 的数据\n",
"成功获取 603048.SH 的数据\n",
"成功获取 603050.SH 的数据\n",
@@ -4154,7 +4154,7 @@
"成功获取 603228.SH 的数据\n",
"成功获取 603229.SH 的数据\n",
"成功获取 603230.SH 的数据\n",
"已调用300次API等待 30.34 秒以满足速率限制...\n",
"已调用300次API等待 41.24 秒以满足速率限制...\n",
"成功获取 603231.SH 的数据\n",
"成功获取 603232.SH 的数据\n",
"成功获取 603233.SH 的数据\n",
@@ -4305,7 +4305,7 @@
"成功获取 603530.SH 的数据\n",
"成功获取 603533.SH 的数据\n",
"成功获取 603535.SH 的数据\n",
"已调用300次API等待 30.84 秒以满足速率限制...\n",
"已调用300次API等待 40.73 秒以满足速率限制...\n",
"成功获取 603536.SH 的数据\n",
"成功获取 603538.SH 的数据\n",
"成功获取 603551.SH 的数据\n",
@@ -4456,7 +4456,7 @@
"成功获取 603819.SH 的数据\n",
"成功获取 603822.SH 的数据\n",
"成功获取 603823.SH 的数据\n",
"已调用300次API等待 30.10 秒以满足速率限制...\n",
"已调用300次API等待 41.30 秒以满足速率限制...\n",
"成功获取 603825.SH 的数据\n",
"成功获取 603826.SH 的数据\n",
"成功获取 603828.SH 的数据\n",
@@ -4607,7 +4607,7 @@
"成功获取 605167.SH 的数据\n",
"成功获取 605168.SH 的数据\n",
"成功获取 605169.SH 的数据\n",
"已调用300次API等待 32.36 秒以满足速率限制...\n",
"已调用300次API等待 40.75 秒以满足速率限制...\n",
"成功获取 605177.SH 的数据\n",
"成功获取 605178.SH 的数据\n",
"成功获取 605179.SH 的数据\n",
@@ -4758,7 +4758,7 @@
"成功获取 688097.SH 的数据\n",
"成功获取 688098.SH 的数据\n",
"成功获取 688099.SH 的数据\n",
"已调用300次API等待 30.11 秒以满足速率限制...\n",
"已调用300次API等待 41.17 秒以满足速率限制...\n",
"成功获取 688100.SH 的数据\n",
"成功获取 688101.SH 的数据\n",
"成功获取 688102.SH 的数据\n",
@@ -4909,7 +4909,7 @@
"成功获取 688271.SH 的数据\n",
"成功获取 688272.SH 的数据\n",
"成功获取 688273.SH 的数据\n",
"已调用300次API等待 32.68 秒以满足速率限制...\n",
"已调用300次API等待 41.28 秒以满足速率限制...\n",
"成功获取 688275.SH 的数据\n",
"成功获取 688276.SH 的数据\n",
"成功获取 688277.SH 的数据\n",
@@ -5060,7 +5060,7 @@
"成功获取 688486.SH 的数据\n",
"成功获取 688488.SH 的数据\n",
"成功获取 688489.SH 的数据\n",
"已调用300次API等待 30.08 秒以满足速率限制...\n",
"已调用300次API等待 41.23 秒以满足速率限制...\n",
"成功获取 688496.SH 的数据\n",
"成功获取 688498.SH 的数据\n",
"成功获取 688499.SH 的数据\n",
@@ -5211,7 +5211,7 @@
"成功获取 688689.SH 的数据\n",
"成功获取 688690.SH 的数据\n",
"成功获取 688691.SH 的数据\n",
"已调用300次API等待 29.34 秒以满足速率限制...\n",
"已调用300次API等待 40.17 秒以满足速率限制...\n",
"成功获取 688692.SH 的数据\n",
"成功获取 688693.SH 的数据\n",
"成功获取 688695.SH 的数据\n",
@@ -5362,7 +5362,7 @@
"成功获取 835184.BJ 的数据\n",
"成功获取 835185.BJ 的数据\n",
"成功获取 835207.BJ 的数据\n",
"已调用300次API等待 31.92 秒以满足速率限制...\n",
"已调用300次API等待 41.36 秒以满足速率限制...\n",
"成功获取 835237.BJ 的数据\n",
"成功获取 835305.BJ 的数据\n",
"成功获取 835368.BJ 的数据\n",
@@ -5513,7 +5513,7 @@
"成功获取 000005.SZ 的数据\n",
"成功获取 000013.SZ 的数据\n",
"成功获取 000015.SZ 的数据\n",
"已调用300次API等待 31.12 秒以满足速率限制...\n",
"已调用300次API等待 40.98 秒以满足速率限制...\n",
"成功获取 000018.SZ 的数据\n",
"成功获取 000023.SZ 的数据\n",
"成功获取 000024.SZ 的数据\n",
@@ -5615,14 +5615,14 @@
"成功获取 002341.SZ 的数据\n",
"成功获取 002359.SZ 的数据\n",
"成功获取 002411.SZ 的数据\n",
"成功获取 002450.SZ 的数据\n",
"成功获取 002464.SZ 的数据\n",
"成功获取 002473.SZ 的数据\n",
"成功获取 002477.SZ 的数据\n",
"成功获取 002417.SZ 的数据\n",
"成功获取 002433.SZ 的数据\n",
"成功获取 002435.SZ 的数据\n",
"成功获取 002447.SZ 的数据\n",
"成功获取 002450.SZ 的数据\n",
"成功获取 002464.SZ 的数据\n",
"成功获取 002473.SZ 的数据\n",
"成功获取 002477.SZ 的数据\n",
"成功获取 002499.SZ 的数据\n",
"成功获取 002502.SZ 的数据\n",
"成功获取 002503.SZ 的数据\n",
@@ -5664,7 +5664,7 @@
"成功获取 300309.SZ 的数据\n",
"成功获取 300312.SZ 的数据\n",
"成功获取 300325.SZ 的数据\n",
"已调用300次API等待 32.31 秒以满足速率限制...\n",
"已调用300次API等待 40.90 秒以满足速率限制...\n",
"成功获取 300330.SZ 的数据\n",
"成功获取 300336.SZ 的数据\n",
"成功获取 300356.SZ 的数据\n",
@@ -5806,7 +5806,7 @@
"2 000001.SZ 深发展A 20070620 20120801 完成股改\n",
"3 000001.SZ 深发展A 20070620 20120801 完成股改\n",
"4 000001.SZ S深发展A 20061009 20070619 未股改加S\n",
"名称变化记录总数: 31891\n"
"名称变化记录总数: 31934\n"
]
}
],
@@ -5817,8 +5817,8 @@
"id": "4d5524b8-2a90-44bb-b5ef-e59cfa232ff0",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T16:18:37.959516Z",
"start_time": "2025-02-11T16:18:37.821725Z"
"end_time": "2025-03-01T09:56:42.543882Z",
"start_time": "2025-03-01T09:56:42.431891Z"
}
},
"source": [
@@ -5845,8 +5845,8 @@
"id": "1e920791-e8de-4a51-a39b-283f54132b44",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T16:18:37.974954Z",
"start_time": "2025-02-11T16:18:37.964501Z"
"end_time": "2025-03-01T09:56:42.552436Z",
"start_time": "2025-03-01T09:56:42.545392Z"
}
},
"source": [
@@ -5873,8 +5873,8 @@
"id": "4f5651f7-0910-4df5-9c3f-79d6ce033d53",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T16:18:38.006078Z",
"start_time": "2025-02-11T16:18:37.991781Z"
"end_time": "2025-03-01T09:56:42.579674Z",
"start_time": "2025-03-01T09:56:42.569013Z"
}
},
"source": [],

View File

@@ -2,35 +2,49 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "18d1d622-b083-4cc4-a6f8-7c1ed2d0edd2",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:43:54.745322Z",
"start_time": "2025-02-11T15:43:53.837662Z"
"end_time": "2025-03-30T16:42:34.194992Z",
"start_time": "2025-03-30T16:42:33.440178Z"
}
},
"outputs": [],
"source": [
"import tushare as ts\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
]
],
"outputs": [],
"execution_count": 1
},
{
"cell_type": "code",
"execution_count": 2,
"id": "14671a7f72de2564",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:53:08.235573Z",
"start_time": "2025-02-11T15:53:07.753701Z"
"end_time": "2025-03-30T16:42:36.432691Z",
"start_time": "2025-03-30T16:42:34.197998Z"
}
},
"outputs": [],
"source": [
"from datetime import datetime\n",
"import pandas as pd\n",
"import warnings\n",
"\n",
"warnings.filterwarnings(\"ignore\")\n",
"def filter_rows(df):\n",
" # 按照 name 和 start_date 分组\n",
" def select_row(group):\n",
" # 如果有 end_date 不为 NaT 的行,优先保留这些行\n",
" valid_rows = group[group['end_date'].notna()]\n",
" if not valid_rows.empty:\n",
" return valid_rows.iloc[0] # 返回第一个有效行\n",
" else:\n",
" return group.iloc[0] # 如果没有有效行,返回第一行\n",
"\n",
" filtered_df = df.groupby(['name', 'start_date'], group_keys=False).apply(select_row)\n",
" filtered_df = filtered_df.reset_index(drop=True)\n",
" return filtered_df\n",
"\n",
"def is_st(name_change_dict, stock_code, target_date):\n",
" target_date = datetime.strptime(target_date, '%Y%m%d')\n",
@@ -58,39 +72,20 @@
" # 只保留 'ST' 和 '*ST' 的记录\n",
" st_data = group[(group['change_reason'] == 'ST') | (group['change_reason'] == '*ST')]\n",
" if not st_data.empty:\n",
" name_change_dict[ts_code] = st_data"
]
" name_change_dict[ts_code] = filter_rows(st_data)"
],
"outputs": [],
"execution_count": 2
},
{
"cell_type": "code",
"execution_count": 3,
"id": "e7f8cce2f80e2f20",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:53:19.812860Z",
"start_time": "2025-02-11T15:53:09.614377Z"
"end_time": "2025-03-30T16:43:03.790361Z",
"start_time": "2025-03-30T16:42:36.633554Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8291970 entries, 0 to 8291969\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 189.8+ MB\n",
"None\n",
"20250211\n",
"20250212\n"
]
}
],
"source": [
"import time\n",
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
@@ -104,39 +99,44 @@
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250220')\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(start_date)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "553cfb36-f560-4cc4-b2bc-68323ccc5072",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:53:24.100612Z",
"start_time": "2025-02-11T15:53:22.361257Z"
},
"scrolled": true
},
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250220 完成\n",
"任务 20250219 完成\n",
"任务 20250217 完成\n",
"任务 20250218 完成\n",
"任务 20250214 完成\n",
"任务 20250213 完成\n",
"任务 20250212 完成\n"
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8453605 entries, 0 to 32308\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 193.5+ MB\n",
"None\n",
"20250321\n",
"20250324\n"
]
}
],
"execution_count": 3
},
{
"cell_type": "code",
"id": "553cfb36-f560-4cc4-b2bc-68323ccc5072",
"metadata": {
"scrolled": true,
"ExecuteTime": {
"end_time": "2025-03-30T16:43:07.947442Z",
"start_time": "2025-03-30T16:43:03.827519Z"
}
},
"source": [
"\n",
"\n",
@@ -186,169 +186,202 @@
" # 重置批次起始时间\n",
" batch_start_time = time.time()\n",
"\n"
]
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250418 完成\n",
"任务 20250417 完成\n",
"任务 20250416 完成\n",
"任务 20250415 完成\n",
"任务 20250411 完成\n",
"任务 20250414 完成\n",
"任务 20250410 完成\n",
"任务 20250409 完成\n",
"任务 20250408 完成\n",
"任务 20250407 完成\n",
"任务 20250403 完成\n",
"任务 20250402 完成\n",
"任务 20250331 完成\n",
"任务 20250401 完成\n",
"任务 20250327 完成\n",
"任务 20250328 完成\n",
"任务 20250326 完成\n",
"任务 20250324 完成\n",
"任务 20250325 完成\n"
]
}
],
"execution_count": 4
},
{
"cell_type": "code",
"execution_count": 5,
"id": "919023c693d7a47a",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:53:25.913933Z",
"start_time": "2025-02-11T15:53:25.902629Z"
"end_time": "2025-03-30T16:43:07.962318Z",
"start_time": "2025-03-30T16:43:07.951757Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
"0 601162.SH 20250212 4.77 7.3760 9.7054 \n",
"1 603216.SH 20250212 11.42 8.8711 8.8711 \n",
"2 872808.BJ 20250212 74.36 4.1219 15.3296 \n",
"3 601881.SH 20250212 14.43 0.5617 1.9533 \n",
"4 002837.SZ 20250212 42.25 3.8199 5.7136 \n",
"... ... ... ... ... ... \n",
"5380 603931.SH 20250212 23.83 1.4692 4.6843 \n",
"5381 688567.SH 20250212 12.35 1.3091 2.1970 \n",
"5382 688530.SH 20250212 19.30 6.6093 6.6093 \n",
"5383 301363.SZ 20250212 31.99 2.1990 2.1990 \n",
"5384 833533.BJ 20250212 46.02 27.7269 27.7597 \n",
"\n",
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
"0 2.00 134.5633 NaN 1.7935 12.0634 19.0461 0.0000 \n",
"1 2.09 26.5657 27.5224 1.4454 1.9304 1.9996 2.6270 \n",
"2 1.20 142.3485 196.0315 22.9124 22.8711 25.8281 NaN \n",
"3 0.84 20.0264 15.5707 1.4245 4.6898 4.4609 2.1067 \n",
"4 0.65 91.3544 64.5935 11.2259 8.9056 7.2600 0.3621 \n",
"... ... ... ... ... ... ... ... \n",
"5380 1.16 27.1631 29.0662 3.0982 6.8392 6.9124 1.1120 \n",
"5381 1.01 NaN NaN 1.4955 0.9183 1.0469 NaN \n",
"5382 0.99 62.5995 198.4906 3.6879 6.4857 7.9319 NaN \n",
"5383 0.98 41.5226 47.9900 3.8396 9.7258 8.9664 0.4982 \n",
"5384 0.84 52.3997 62.1858 13.3582 6.6261 5.9638 NaN \n",
"\n",
" dv_ttm total_share float_share free_share total_mv \\\n",
"0 NaN 8.665757e+05 866575.7464 658594.7570 4.133566e+06 \n",
"1 2.6270 2.226900e+04 5669.0000 5669.0000 2.543120e+05 \n",
"2 NaN 2.000000e+04 19461.9464 5233.0650 1.487200e+06 \n",
"3 2.1067 1.093440e+06 724341.7623 208280.6759 1.577834e+07 \n",
"4 0.3621 7.438227e+04 64662.2002 43230.4691 3.142651e+06 \n",
"... ... ... ... ... ... \n",
"5380 1.1120 1.995584e+04 19955.8380 6258.8392 4.755476e+05 \n",
"5381 NaN 1.222104e+05 122210.3885 72818.9706 1.509298e+06 \n",
"5382 NaN 1.600448e+04 3200.8966 3200.8966 3.088865e+05 \n",
"5383 0.4982 4.066600e+04 11215.9100 11215.9100 1.300905e+06 \n",
"5384 NaN 1.005826e+04 3796.0235 3791.5280 4.628809e+05 \n",
"\n",
" circ_mv is_st \n",
"0 4.133566e+06 False \n",
"1 6.473998e+04 False \n",
"2 1.447190e+06 False \n",
"3 1.045225e+07 False \n",
"4 2.731978e+06 False \n",
"... ... ... \n",
"5380 4.755476e+05 False \n",
"5381 1.509298e+06 False \n",
"5382 6.177730e+04 False \n",
"5383 3.587970e+05 False \n",
"5384 1.746930e+05 False \n",
"\n",
"[5385 rows x 19 columns]\n"
]
}
],
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
"print(all_daily_data_df)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "28cb78d032671b20",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:53:42.062142Z",
"start_time": "2025-02-11T15:53:42.044324Z"
}
},
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
"10 002366.SZ 20250212 5.10 3.8029 4.1742 \n",
"48 002124.SZ 20250212 2.80 1.8388 1.9195 \n",
"57 000504.SZ 20250212 9.32 0.9666 1.5370 \n",
"63 603007.SH 20250212 10.03 2.0477 2.7581 \n",
"91 300201.SZ 20250212 5.33 2.3317 3.1604 \n",
"... ... ... ... ... ... \n",
"5303 002316.SZ 20250212 3.52 3.1023 3.3580 \n",
"5335 600568.SH 20250212 1.30 0.3996 0.6514 \n",
"5364 002168.SZ 20250212 2.48 0.8869 1.0824 \n",
"5367 300600.SZ 20250212 7.19 0.7517 1.4024 \n",
"5369 000972.SZ 20250212 3.38 4.6979 7.2993 \n",
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
"0 603328.SH 20250327 10.44 1.0910 2.6596 \n",
"1 603989.SH 20250327 15.66 0.9036 2.6145 \n",
"2 603194.SH 20250327 38.03 14.0348 14.0348 \n",
"3 600884.SH 20250327 7.13 1.9769 2.1153 \n",
"4 688325.SH 20250327 47.26 1.5250 1.8078 \n",
"... ... ... ... ... ... \n",
"26946 688539.SH 20250325 26.70 1.0257 1.3011 \n",
"26947 688479.SH 20250325 18.73 0.9840 1.2588 \n",
"26948 000552.SZ 20250325 2.63 1.8147 3.0665 \n",
"26949 688719.SH 20250325 31.64 4.2998 5.1737 \n",
"26950 002709.SZ 20250325 19.50 1.2468 1.4268 \n",
"\n",
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
"10 0.92 52.0324 56.8856 2.2889 14.2486 11.9214 0.0000 \n",
"48 0.97 NaN 260.7218 1.7484 0.6080 0.6154 0.0000 \n",
"57 0.83 NaN NaN 12.3702 22.4855 24.7156 0.0000 \n",
"63 0.86 NaN NaN 24.6750 55.2244 76.4853 0.0000 \n",
"91 0.75 26.1255 26.1088 4.2311 3.9774 4.2028 0.6431 \n",
"... ... ... ... ... ... ... ... \n",
"5303 0.95 NaN NaN 19.4146 2.2930 2.3153 0.0000 \n",
"5335 0.76 NaN NaN 1.1378 4.0571 4.0379 0.0000 \n",
"5364 0.88 1024.9794 NaN NaN 7.6515 7.4299 0.0000 \n",
"5367 1.18 NaN NaN 2.2914 10.7845 8.9952 0.0000 \n",
"5369 0.77 24.0853 120.2360 16.2931 4.5277 4.9137 0.0000 \n",
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
"0 0.79 29.3625 23.3887 2.5786 3.2807 2.9727 1.8582 \n",
"1 0.79 17.8968 27.7940 1.7060 1.8591 1.6666 1.6823 \n",
"2 1.87 18.9266 18.3213 3.2891 2.5755 2.4322 NaN \n",
"3 0.52 20.9930 NaN 0.7305 0.8425 0.9106 2.7224 \n",
"4 0.93 67.1638 50.1073 2.3433 16.1029 10.2149 NaN \n",
"... ... ... ... ... ... ... ... \n",
"26946 0.56 51.5254 83.3548 2.8475 14.5500 13.9718 NaN \n",
"26947 0.61 23.5448 33.4921 1.4043 3.6736 4.5444 NaN \n",
"26948 1.42 8.0989 11.6324 0.8431 1.2501 1.3463 3.8023 \n",
"26949 1.64 26.3323 49.9921 2.0474 4.4195 3.6954 NaN \n",
"26950 0.76 19.7447 78.2248 2.9106 2.4233 3.0741 1.5444 \n",
"\n",
" dv_ttm total_share float_share free_share total_mv \\\n",
"10 NaN 208093.7640 125646.4390 114472.2056 1.061278e+06 \n",
"48 NaN 222193.3832 197428.3498 189130.4452 6.221415e+05 \n",
"57 NaN 33002.3098 31066.8701 19536.7046 3.075815e+05 \n",
"63 NaN 87689.6101 49983.0778 37108.5778 8.795268e+05 \n",
"91 0.6431 100904.3607 100450.7422 74110.3317 5.378202e+05 \n",
"... ... ... ... ... ... \n",
"5303 NaN 39312.0000 31500.7500 29101.6694 1.383782e+05 \n",
"5335 NaN 199286.9681 166906.7279 102374.4773 2.590731e+05 \n",
"5364 NaN 78416.3368 78416.3368 64258.0991 1.944725e+05 \n",
"5367 NaN 29423.4480 24616.3436 13195.4382 2.115546e+05 \n",
"5369 NaN 77128.3579 77128.3579 49641.0760 2.606938e+05 \n",
" dv_ttm total_share float_share free_share total_mv \\\n",
"0 1.8582 99844.2611 99844.2611 40955.5563 1.042374e+06 \n",
"1 1.6823 40113.0603 40113.0603 13863.2102 6.281705e+05 \n",
"2 NaN 40100.0000 4982.8436 4982.8436 1.525003e+06 \n",
"3 2.7224 225339.6168 175723.6492 164220.4548 1.606671e+06 \n",
"4 NaN 8494.7740 3830.4117 3231.0886 4.014630e+05 \n",
"... ... ... ... ... ... \n",
"26946 NaN 18592.0000 10286.0800 8109.0800 4.964064e+05 \n",
"26947 NaN 14431.7400 6087.4224 4758.2224 2.703065e+05 \n",
"26948 3.8023 535180.1936 372577.7383 220477.9354 1.407524e+06 \n",
"26949 NaN 11538.5418 7349.9938 6108.5305 3.650795e+05 \n",
"26950 1.5444 191434.3762 138501.6891 121034.9868 3.732970e+06 \n",
"\n",
" circ_mv is_st \n",
"10 640796.8389 True \n",
"48 552799.3794 True \n",
"57 289543.2293 True \n",
"63 501330.2703 True \n",
"91 535402.4559 True \n",
"... ... ... \n",
"5303 110882.6400 True \n",
"5335 216978.7463 True \n",
"5364 194472.5153 True \n",
"5367 176991.5105 True \n",
"5369 260693.8497 True \n",
" circ_mv is_st \n",
"0 1.042374e+06 False \n",
"1 6.281705e+05 False \n",
"2 1.894975e+05 False \n",
"3 1.252910e+06 False \n",
"4 1.810253e+05 False \n",
"... ... ... \n",
"26946 2.746383e+05 False \n",
"26947 1.140174e+05 False \n",
"26948 9.798795e+05 False \n",
"26949 2.325538e+05 False \n",
"26950 2.700783e+06 False \n",
"\n",
"[318 rows x 19 columns]\n"
"[26951 rows x 19 columns]\n"
]
}
],
"source": [
"print(all_daily_data_df[all_daily_data_df['is_st']])"
]
"execution_count": 5
},
{
"cell_type": "code",
"id": "28cb78d032671b20",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-30T16:43:08.000073Z",
"start_time": "2025-03-30T16:43:07.984082Z"
}
},
"source": [
"print(all_daily_data_df[all_daily_data_df['is_st']])"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
"100 002528.SZ 20250327 2.53 0.6855 1.4642 \n",
"128 300163.SZ 20250327 3.15 3.0563 3.2999 \n",
"129 300205.SZ 20250327 4.34 0.9211 1.5246 \n",
"147 000851.SZ 20250327 2.53 2.2990 2.6472 \n",
"299 300097.SZ 20250327 4.88 3.1648 3.6912 \n",
"... ... ... ... ... ... \n",
"26750 000506.SZ 20250325 5.21 1.2689 1.8939 \n",
"26770 002592.SZ 20250325 5.22 1.0547 1.6712 \n",
"26786 600603.SH 20250325 7.63 0.4610 1.0776 \n",
"26828 002528.SZ 20250325 2.51 0.9799 2.0928 \n",
"26906 300097.SZ 20250325 4.92 3.2717 3.8159 \n",
"\n",
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
"100 0.43 NaN NaN 7.3528 2.1714 2.7257 0.0000 \n",
"128 0.87 NaN NaN 3.0547 5.9187 5.8999 0.0000 \n",
"129 0.63 94.7108 NaN 1.3743 1.0976 1.5538 0.4608 \n",
"147 0.64 NaN NaN 1.0360 0.4939 0.8666 0.0000 \n",
"299 0.70 10.0614 NaN 2.2055 2.9549 3.1999 0.0000 \n",
"... ... ... ... ... ... ... ... \n",
"26750 0.37 725.4828 NaN 8.2869 17.0204 21.9262 0.0000 \n",
"26770 0.94 14.0192 61.1217 1.6387 2.7253 2.3121 0.0000 \n",
"26786 0.56 15.6086 24.2223 1.3160 1.8461 2.4398 0.0000 \n",
"26828 0.58 NaN NaN 7.2947 2.1542 2.7042 0.0000 \n",
"26906 0.53 10.1438 NaN 2.2236 2.9791 3.2261 0.0000 \n",
"\n",
" dv_ttm total_share float_share free_share total_mv circ_mv \\\n",
"100 NaN 119867.5082 105021.9577 49171.2582 303264.7957 265705.5530 \n",
"128 NaN 47400.0000 41596.4553 38525.5904 149310.0000 131028.8342 \n",
"129 0.4608 43005.6000 42599.1218 25737.4813 186644.3040 184880.1886 \n",
"147 NaN 115786.0020 113197.7266 98311.5254 292938.5851 286390.2483 \n",
"299 NaN 28854.9669 27000.9948 23150.5534 140812.2385 131764.8546 \n",
"... ... ... ... ... ... ... \n",
"26750 NaN 92901.7761 92867.0961 62218.8027 484018.2535 483837.5707 \n",
"26770 NaN 28333.1157 26271.6370 16580.1814 147898.8640 137137.9451 \n",
"26786 NaN 119332.9151 119332.9151 51048.6002 910510.1422 910510.1422 \n",
"26828 NaN 119867.5082 105021.9577 49171.2582 300867.4456 263605.1138 \n",
"26906 NaN 28854.9669 27000.9948 23150.5534 141966.4371 132844.8944 \n",
"\n",
" is_st \n",
"100 True \n",
"128 True \n",
"129 True \n",
"147 True \n",
"299 True \n",
"... ... \n",
"26750 True \n",
"26770 True \n",
"26786 True \n",
"26828 True \n",
"26906 True \n",
"\n",
"[540 rows x 19 columns]\n"
]
}
],
"execution_count": 6
},
{
"cell_type": "code",
"execution_count": 7,
"id": "692b58674b7462c9",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:53:33.693894Z",
"start_time": "2025-02-11T15:53:33.609884Z"
"end_time": "2025-03-30T16:43:08.703938Z",
"start_time": "2025-03-30T16:43:08.021067Z"
}
},
"source": [
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='daily_basic', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")\n"
],
"outputs": [
{
"name": "stdout",
@@ -358,30 +391,29 @@
]
}
],
"source": [
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='daily_basic', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")\n"
]
"execution_count": 7
},
{
"cell_type": "code",
"execution_count": 8,
"id": "d7a773fc20293477",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:54:27.868021Z",
"start_time": "2025-02-11T15:54:18.853803Z"
"end_time": "2025-03-30T16:43:15.188800Z",
"start_time": "2025-03-30T16:43:08.725449Z"
}
},
"source": [
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date', 'is_st']]\n",
" print(df.info())"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8297355 entries, 0 to 5384\n",
"Index: 8480556 entries, 0 to 26950\n",
"Data columns (total 3 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
@@ -389,16 +421,12 @@
" 1 trade_date object\n",
" 2 is_st bool \n",
"dtypes: bool(1), object(2)\n",
"memory usage: 197.8+ MB\n",
"memory usage: 202.2+ MB\n",
"None\n"
]
}
],
"source": [
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date', 'is_st']]\n",
" print(df.info())"
]
"execution_count": 8
}
],
"metadata": {
@@ -417,7 +445,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.19"
"version": "3.11.11"
}
},
"nbformat": 4,

File diff suppressed because it is too large Load Diff

View File

@@ -2,52 +2,31 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "b94bb1f2-5332-485e-ae1b-eea01f938106",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:21:54.821950Z",
"start_time": "2025-02-11T15:21:54.050569Z"
"end_time": "2025-03-30T16:42:37.847407Z",
"start_time": "2025-03-30T16:42:36.773187Z"
}
},
"outputs": [],
"source": [
"import tushare as ts\n",
"\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
]
],
"outputs": [],
"execution_count": 1
},
{
"cell_type": "code",
"execution_count": 2,
"id": "742c29d453b9bb38",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:22:32.726905Z",
"start_time": "2025-02-11T15:22:25.018135Z"
"end_time": "2025-03-30T16:42:59.016187Z",
"start_time": "2025-03-30T16:42:37.850022Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8153941 entries, 0 to 5120\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 186.6+ MB\n",
"None\n",
"20250211\n",
"start_date: 20250212\n"
]
}
],
"source": [
"import pandas as pd\n",
"import time\n",
@@ -61,39 +40,44 @@
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250220')\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(f'start_date: {start_date}')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "679ce40e-8d62-4887-970c-e1d8cbdeee6b",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:22:14.513527Z",
"start_time": "2025-02-11T15:22:12.973331Z"
},
"scrolled": true
},
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250220 完成\n",
"任务 20250219 完成\n",
"任务 20250217 完成\n",
"任务 20250218 完成\n",
"任务 20250213 完成\n",
"任务 20250214 完成\n",
"任务 20250212 完成\n"
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8297316 entries, 0 to 30724\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 189.9+ MB\n",
"None\n",
"20250321\n",
"start_date: 20250324\n"
]
}
],
"execution_count": 2
},
{
"cell_type": "code",
"id": "679ce40e-8d62-4887-970c-e1d8cbdeee6b",
"metadata": {
"scrolled": true,
"ExecuteTime": {
"end_time": "2025-03-30T16:43:03.168764Z",
"start_time": "2025-03-30T16:42:59.422934Z"
}
},
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
@@ -123,33 +107,69 @@
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
]
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250418 完成\n",
"任务 20250417 完成\n",
"任务 20250415 完成\n",
"任务 20250416 完成\n",
"任务 20250414 完成\n",
"任务 20250411 完成\n",
"任务 20250410 完成\n",
"任务 20250409 完成\n",
"任务 20250408 完成\n",
"任务 20250407 完成\n",
"任务 20250403 完成\n",
"任务 20250402 完成\n",
"任务 20250401 完成\n",
"任务 20250331 完成\n",
"任务 20250328 完成\n",
"任务 20250327 完成\n",
"任务 20250326 完成\n",
"任务 20250325 完成\n",
"任务 20250324 完成\n"
]
}
],
"execution_count": 3
},
{
"cell_type": "code",
"execution_count": 4,
"id": "9af80516849d4e80",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:22:16.656650Z",
"start_time": "2025-02-11T15:22:16.639271Z"
"end_time": "2025-03-30T16:43:03.181032Z",
"start_time": "2025-03-30T16:43:03.173867Z"
}
},
"outputs": [],
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n"
]
],
"outputs": [],
"execution_count": 4
},
{
"cell_type": "code",
"execution_count": 5,
"id": "a2b05187-437f-4053-bc43-bd80d4cf8b0e",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:22:20.447350Z",
"start_time": "2025-02-11T15:22:19.145561Z"
"end_time": "2025-03-30T16:43:05.401668Z",
"start_time": "2025-03-30T16:43:03.197033Z"
}
},
"source": [
"\n",
"# 将所有数据合并为一个 DataFrame\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='money_flow', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
],
"outputs": [
{
"name": "stdout",
@@ -159,15 +179,7 @@
]
}
],
"source": [
"\n",
"# 将所有数据合并为一个 DataFrame\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='money_flow', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
]
"execution_count": 5
}
],
"metadata": {
@@ -186,7 +198,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.19"
"version": "3.11.11"
}
},
"nbformat": 4,

View File

@@ -2,58 +2,31 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "500802dc-7a20-48b7-a470-a4bae3ec534b",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:18:36.892437Z",
"start_time": "2025-02-11T15:18:36.020822Z"
"end_time": "2025-03-30T16:42:39.056767Z",
"start_time": "2025-03-30T16:42:37.817887Z"
}
},
"outputs": [],
"source": [
"import tushare as ts\n",
"\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
]
],
"outputs": [],
"execution_count": 1
},
{
"cell_type": "code",
"execution_count": 2,
"id": "5a84bc9da6d54868",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:20:12.573607Z",
"start_time": "2025-02-11T15:20:00.110127Z"
"end_time": "2025-03-30T16:42:59.784780Z",
"start_time": "2025-03-30T16:42:39.056767Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"4682 600310.SH 20250211\n",
"4683 600312.SH 20250211\n",
"4684 600313.SH 20250211\n",
"4673 600299.SH 20250211\n",
"0 000001.SZ 20250211\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 10040878 entries, 0 to 10040877\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 229.8+ MB\n",
"None\n",
"20250211\n",
"20250212\n"
]
}
],
"source": [
"import pandas as pd\n",
"import time\n",
@@ -68,39 +41,50 @@
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250220')\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(start_date)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "bb3191de-27a2-4c89-a3b5-32a0d7b9496f",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:21:27.831699Z",
"start_time": "2025-02-11T15:21:26.665039Z"
},
"scrolled": true
},
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250219 完成\n",
"任务 20250220 完成\n",
"任务 20250217 完成\n",
"任务 20250218 完成\n",
"任务 20250214 完成\n",
"任务 20250213 完成\n",
"任务 20250212 完成\n"
" ts_code trade_date\n",
"4705 600289.SH 20250321\n",
"4706 600292.SH 20250321\n",
"4707 600293.SH 20250321\n",
"4696 600279.SH 20250321\n",
"7051 920116.BJ 20250321\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 10237887 entries, 0 to 35266\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 234.3+ MB\n",
"None\n",
"20250321\n",
"20250324\n"
]
}
],
"execution_count": 2
},
{
"cell_type": "code",
"id": "bb3191de-27a2-4c89-a3b5-32a0d7b9496f",
"metadata": {
"scrolled": true,
"ExecuteTime": {
"end_time": "2025-03-30T16:43:03.372001Z",
"start_time": "2025-03-30T16:43:00.012140Z"
}
},
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
@@ -131,69 +115,143 @@
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
]
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250418 完成\n",
"任务 20250417 完成\n",
"任务 20250416 完成\n",
"任务 20250415 完成\n",
"任务 20250411 完成\n",
"任务 20250414 完成\n",
"任务 20250409 完成\n",
"任务 20250410 完成\n",
"任务 20250408 完成\n",
"任务 20250407 完成\n",
"任务 20250403 完成\n",
"任务 20250402 完成\n",
"任务 20250401 完成\n",
"任务 20250331 完成\n",
"任务 20250327 完成\n",
"任务 20250328 完成\n",
"任务 20250326 完成\n",
"任务 20250325 完成\n",
"任务 20250324 完成\n"
]
}
],
"execution_count": 3
},
{
"cell_type": "code",
"execution_count": 4,
"id": "96a81aa5890ea3c3",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:21:29.294283Z",
"start_time": "2025-02-11T15:21:29.247112Z"
"end_time": "2025-03-30T16:43:03.397757Z",
"start_time": "2025-03-30T16:43:03.384786Z"
}
},
"source": [
"print(all_daily_data)\n",
"# 将所有数据合并为一个 DataFrame\n",
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ trade_date ts_code up_limit down_limit\n",
"0 20250213 000001.SZ 12.56 10.28\n",
"1 20250213 000002.SZ 8.76 7.16\n",
"2 20250213 000004.SZ 15.40 12.60\n",
"3 20250213 000006.SZ 7.92 6.48\n",
"4 20250213 000007.SZ 7.39 6.05\n",
"0 20250327 000001.SZ 12.52 10.24\n",
"1 20250327 000002.SZ 7.92 6.48\n",
"2 20250327 000004.SZ 11.40 9.32\n",
"3 20250327 000006.SZ 7.44 6.08\n",
"4 20250327 000007.SZ 7.00 5.72\n",
"... ... ... ... ...\n",
"7014 20250213 920108.BJ 27.22 14.66\n",
"7015 20250213 920111.BJ 35.98 19.38\n",
"7016 20250213 920116.BJ 80.44 43.32\n",
"7017 20250213 920118.BJ 34.46 18.56\n",
"7018 20250213 920128.BJ 39.84 21.46\n",
"7059 20250327 920108.BJ 33.56 18.08\n",
"7060 20250327 920111.BJ 40.57 21.85\n",
"7061 20250327 920116.BJ 126.29 68.01\n",
"7062 20250327 920118.BJ 44.14 23.78\n",
"7063 20250327 920128.BJ 47.35 25.51\n",
"\n",
"[7019 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20250212 000001.SZ 12.56 10.28\n",
"1 20250212 000002.SZ 7.96 6.52\n",
"2 20250212 000004.SZ 15.07 12.33\n",
"3 20250212 000006.SZ 7.74 6.34\n",
"4 20250212 000007.SZ 7.40 6.06\n",
"[7064 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20250328 000001.SZ 12.53 10.25\n",
"1 20250328 000002.SZ 7.89 6.45\n",
"2 20250328 000004.SZ 11.19 9.15\n",
"3 20250328 000006.SZ 8.18 6.70\n",
"4 20250328 000007.SZ 6.99 5.72\n",
"... ... ... ... ...\n",
"7014 20250212 920108.BJ 27.41 14.77\n",
"7015 20250212 920111.BJ 34.51 18.59\n",
"7016 20250212 920116.BJ 79.66 42.90\n",
"7017 20250212 920118.BJ 34.81 18.75\n",
"7018 20250212 920128.BJ 38.98 21.00\n",
"7060 20250328 920108.BJ 31.03 16.71\n",
"7061 20250328 920111.BJ 39.65 21.35\n",
"7062 20250328 920116.BJ 115.67 62.29\n",
"7063 20250328 920118.BJ 41.00 22.08\n",
"7064 20250328 920128.BJ 44.83 24.15\n",
"\n",
"[7019 rows x 4 columns]]\n"
"[7065 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20250326 000001.SZ 12.57 10.29\n",
"1 20250326 000002.SZ 7.91 6.47\n",
"2 20250326 000004.SZ 11.28 9.23\n",
"3 20250326 000006.SZ 7.17 5.87\n",
"4 20250326 000007.SZ 6.67 5.45\n",
"... ... ... ... ...\n",
"7056 20250326 920108.BJ 33.96 18.30\n",
"7057 20250326 920111.BJ 41.92 22.58\n",
"7058 20250326 920116.BJ 133.64 71.96\n",
"7059 20250326 920118.BJ 41.93 22.59\n",
"7060 20250326 920128.BJ 49.40 26.60\n",
"\n",
"[7061 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20250325 000001.SZ 12.52 10.24\n",
"1 20250325 000002.SZ 7.90 6.46\n",
"2 20250325 000004.SZ 11.55 9.45\n",
"3 20250325 000006.SZ 7.13 5.83\n",
"4 20250325 000007.SZ 6.60 5.40\n",
"... ... ... ... ...\n",
"7055 20250325 920108.BJ 33.30 17.94\n",
"7056 20250325 920111.BJ 39.97 21.53\n",
"7057 20250325 920116.BJ 137.78 74.20\n",
"7058 20250325 920118.BJ 39.52 21.28\n",
"7059 20250325 920128.BJ 46.22 24.90\n",
"\n",
"[7060 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
"0 20250324 000001.SZ 12.56 10.28\n",
"1 20250324 000002.SZ 8.10 6.62\n",
"2 20250324 000004.SZ 12.82 10.49\n",
"3 20250324 000006.SZ 7.44 6.08\n",
"4 20250324 000007.SZ 6.89 5.63\n",
"... ... ... ... ...\n",
"7053 20250324 920108.BJ 34.84 18.76\n",
"7054 20250324 920111.BJ 40.41 21.77\n",
"7055 20250324 920116.BJ 134.55 72.45\n",
"7056 20250324 920118.BJ 38.67 20.83\n",
"7057 20250324 920128.BJ 45.86 24.70\n",
"\n",
"[7058 rows x 4 columns]]\n"
]
}
],
"source": [
"print(all_daily_data)\n",
"# 将所有数据合并为一个 DataFrame\n",
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)"
]
"execution_count": 4
},
{
"cell_type": "code",
"execution_count": 5,
"id": "ad9733a1-2f42-43ee-a98c-0bf699304c21",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-11T15:20:37.999493Z",
"start_time": "2025-02-11T15:20:37.375220Z"
"end_time": "2025-03-30T16:43:03.696614Z",
"start_time": "2025-03-30T16:43:03.411036Z"
}
},
"source": [
"\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='stk_limit', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
],
"outputs": [
{
"name": "stdout",
@@ -203,22 +261,20 @@
]
}
],
"source": [
"\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='stk_limit', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
]
"execution_count": 5
},
{
"cell_type": "code",
"execution_count": null,
"id": "7e777f1f-4d54-4a74-b916-691ede6af055",
"metadata": {},
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-30T16:43:03.713628Z",
"start_time": "2025-03-30T16:43:03.711521Z"
}
},
"source": [],
"outputs": [],
"source": []
"execution_count": null
}
],
"metadata": {
@@ -237,7 +293,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.19"
"version": "3.11.11"
}
},
"nbformat": 4,

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

1215
code/train/Regression.ipynb Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -8,8 +8,8 @@
"source_hidden": true
},
"ExecuteTime": {
"end_time": "2025-03-29T17:43:30.876671Z",
"start_time": "2025-03-29T17:43:30.425776Z"
"end_time": "2025-03-31T14:33:30.607252Z",
"start_time": "2025-03-31T14:33:30.170544Z"
}
},
"source": [
@@ -32,8 +32,8 @@
"metadata": {
"scrolled": true,
"ExecuteTime": {
"end_time": "2025-03-29T17:44:18.824363Z",
"start_time": "2025-03-29T17:43:30.876671Z"
"end_time": "2025-03-31T14:34:19.160370Z",
"start_time": "2025-03-31T14:33:30.794750Z"
}
},
"source": [
@@ -73,15 +73,11 @@
"text": [
"daily data\n",
"daily basic\n",
"inner merge on ['ts_code', 'trade_date']\n",
"stk limit\n",
"left merge on ['ts_code', 'trade_date']\n",
"money flow\n",
"left merge on ['ts_code', 'trade_date']\n",
"cyq perf\n",
"left merge on ['ts_code', 'trade_date']\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 8450470 entries, 0 to 8450469\n",
"RangeIndex: 8477357 entries, 0 to 8477356\n",
"Data columns (total 31 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
@@ -132,8 +128,8 @@
"source_hidden": true
},
"ExecuteTime": {
"end_time": "2025-03-29T17:44:28.421215Z",
"start_time": "2025-03-29T17:44:19.106345Z"
"end_time": "2025-03-31T14:34:30.996034Z",
"start_time": "2025-03-31T14:34:19.168375Z"
}
},
"source": [
@@ -200,8 +196,8 @@
"id": "c4e9e1d31da6dba6",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-29T17:44:28.620721Z",
"start_time": "2025-03-29T17:44:28.436697Z"
"end_time": "2025-03-31T14:34:31.276589Z",
"start_time": "2025-03-31T14:34:31.060910Z"
}
},
"source": [
@@ -292,8 +288,8 @@
"source_hidden": true
},
"ExecuteTime": {
"end_time": "2025-03-29T17:44:28.706766Z",
"start_time": "2025-03-29T17:44:28.650141Z"
"end_time": "2025-03-31T14:34:31.348068Z",
"start_time": "2025-03-31T14:34:31.304847Z"
}
},
"source": [
@@ -609,8 +605,8 @@
},
"scrolled": true,
"ExecuteTime": {
"end_time": "2025-03-29T17:44:33.959917Z",
"start_time": "2025-03-29T17:44:28.720764Z"
"end_time": "2025-03-31T14:34:36.714777Z",
"start_time": "2025-03-31T14:34:31.369443Z"
}
},
"source": [
@@ -668,8 +664,8 @@
"id": "dbe2fd8021b9417f",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-29T17:44:33.985360Z",
"start_time": "2025-03-29T17:44:33.975319Z"
"end_time": "2025-03-31T14:34:36.727797Z",
"start_time": "2025-03-31T14:34:36.724265Z"
}
},
"source": [
@@ -696,8 +692,8 @@
"id": "85c3e3d0235ffffa",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-29T17:46:27.764400Z",
"start_time": "2025-03-29T17:44:34.016244Z"
"end_time": "2025-03-31T14:37:04.071963Z",
"start_time": "2025-03-31T14:34:36.756415Z"
}
},
"source": [
@@ -736,7 +732,7 @@
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 5102787 entries, 0 to 5102786\n",
"Index: 5118212 entries, 0 to 5118211\n",
"Columns: 115 entries, ts_code to mv_momentum\n",
"dtypes: bool(12), datetime64[ns](1), float64(98), int32(1), int64(1), object(2)\n",
"memory usage: 4.0+ GB\n",
@@ -751,8 +747,8 @@
"id": "92d84ce15a562ec6",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-29T17:46:29.644814Z",
"start_time": "2025-03-29T17:46:28.384214Z"
"end_time": "2025-03-31T14:37:05.401297Z",
"start_time": "2025-03-31T14:37:04.287413Z"
}
},
"source": [
@@ -795,8 +791,8 @@
"source_hidden": true
},
"ExecuteTime": {
"end_time": "2025-03-29T17:46:29.655148Z",
"start_time": "2025-03-29T17:46:29.646857Z"
"end_time": "2025-03-31T14:37:05.435586Z",
"start_time": "2025-03-31T14:37:05.429705Z"
}
},
"source": [
@@ -844,8 +840,8 @@
"id": "40e6b68a91b30c79",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-29T17:46:30.303881Z",
"start_time": "2025-03-29T17:46:29.698776Z"
"end_time": "2025-03-31T14:37:05.994210Z",
"start_time": "2025-03-31T14:37:05.479565Z"
}
},
"source": [
@@ -1013,8 +1009,8 @@
"id": "1c46817a-b5dd-4bec-8bb4-e6e80bfd9d66",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-29T17:46:30.320706Z",
"start_time": "2025-03-29T17:46:30.307889Z"
"end_time": "2025-03-31T14:37:06.026489Z",
"start_time": "2025-03-31T14:37:06.024035Z"
}
},
"source": "# print(test_data.head()[['act_factor1', 'act_factor2', 'ts_code', 'trade_date']])",
@@ -1026,8 +1022,8 @@
"id": "da2bb202843d9275",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-29T17:46:30.929501Z",
"start_time": "2025-03-29T17:46:30.343347Z"
"end_time": "2025-03-31T14:37:06.597135Z",
"start_time": "2025-03-31T14:37:06.031495Z"
}
},
"source": [
@@ -1133,14 +1129,50 @@
"execution_count": 13
},
{
"cell_type": "code",
"id": "20b7836efae720a3",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-29T17:46:31.021203Z",
"start_time": "2025-03-29T17:46:30.953273Z"
"end_time": "2025-03-31T14:37:11.087120Z",
"start_time": "2025-03-31T14:37:06.619979Z"
}
},
"cell_type": "code",
"source": [
"\n",
"days = 2\n",
"df['future_return'] = (df.groupby('ts_code')['close'].shift(-days) - df.groupby('ts_code')['open'].shift(-1)) / \\\n",
" df.groupby('ts_code')['open'].shift(-1)\n",
"df['future_volatility'] = (\n",
" df.groupby('ts_code')['future_return']\n",
" .transform(lambda x: x.rolling(days).std())\n",
")\n",
"\n",
"df['future_score'] = (\n",
" 0.7 * df['future_return'] +\n",
" 0.3 * df['future_volatility']\n",
")\n",
"\n",
"filter_index = df['future_return'].between(df['future_return'].quantile(0.01), df['future_return'].quantile(0.99))\n",
"filter_index = df['future_volatility'].between(df['future_volatility'].quantile(0.01),\n",
" df['future_volatility'].quantile(0.99)) | filter_index\n",
"filter_index2 = df['future_return'].between(df['future_return'].quantile(0.01), df['future_return'].quantile(0.99))\n",
"filter_index2 = df['future_volatility'].between(df['future_volatility'].quantile(0.01),\n",
" df['future_volatility'].quantile(0.99)) | filter_index2\n",
"df['label'] = df.groupby('trade_date', group_keys=False)['future_score'].transform(\n",
" lambda x: pd.qcut(x, q=50, labels=False, duplicates='drop')\n",
")\n"
],
"id": "81d4570663ae21d7",
"outputs": [],
"execution_count": 14
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-31T14:39:16.314466Z",
"start_time": "2025-03-31T14:39:15.609756Z"
}
},
"cell_type": "code",
"source": [
"# print('train data size: ', len(train_data))\n",
"\n",
@@ -1170,54 +1202,20 @@
"\n",
"gc.collect()"
],
"id": "92428d543f4727ad",
"outputs": [
{
"data": {
"text/plain": [
"0"
"6302"
]
},
"execution_count": 14,
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 14
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-29T17:46:35.629560Z",
"start_time": "2025-03-29T17:46:31.046580Z"
}
},
"cell_type": "code",
"source": [
"\n",
"days = 2\n",
"df['future_return'] = (df.groupby('ts_code')['close'].shift(-days) - df.groupby('ts_code')['open'].shift(-1)) / \\\n",
" df.groupby('ts_code')['open'].shift(-1)\n",
"df['future_volatility'] = (\n",
" df.groupby('ts_code')['future_return']\n",
" .transform(lambda x: x.rolling(days).std())\n",
")\n",
"\n",
"df['future_score'] = (\n",
" 0.7 * df['future_return'] +\n",
" 0.3 * df['future_volatility']\n",
")\n",
"\n",
"filter_index = df['future_return'].between(df['future_return'].quantile(0.01), df['future_return'].quantile(0.99))\n",
"filter_index = df['future_volatility'].between(df['future_volatility'].quantile(0.01),\n",
" df['future_volatility'].quantile(0.99)) | filter_index\n",
"\n",
"df['label'] = df.groupby('trade_date', group_keys=False)['future_score'].transform(\n",
" lambda x: pd.qcut(x, q=50, labels=False, duplicates='drop')\n",
")\n"
],
"id": "81d4570663ae21d7",
"outputs": [],
"execution_count": 15
"execution_count": 21
},
{
"cell_type": "code",
@@ -1227,8 +1225,8 @@
"source_hidden": true
},
"ExecuteTime": {
"end_time": "2025-03-29T17:46:35.745784Z",
"start_time": "2025-03-29T17:46:35.675465Z"
"end_time": "2025-03-31T14:39:16.430821Z",
"start_time": "2025-03-31T14:39:16.321471Z"
}
},
"source": [
@@ -1255,7 +1253,7 @@
"\n",
" # 根据日期筛选数据\n",
" train_data = df[filter_index & df['trade_date'].isin(train_dates)]\n",
" test_data = df[filter_index & df['trade_date'].isin(test_dates)]\n",
" test_data = df[filter_index2 & df['trade_date'].isin(test_dates)]\n",
"\n",
" train_data = train_data.sort_values('trade_date')\n",
" test_data = test_data.sort_values('trade_date')\n",
@@ -1403,15 +1401,15 @@
" return final_predictions\n"
],
"outputs": [],
"execution_count": 16
"execution_count": 22
},
{
"cell_type": "code",
"id": "63235069-dc59-48fb-961a-e80373e41a61",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-29T17:52:27.309071Z",
"start_time": "2025-03-29T17:46:35.756531Z"
"end_time": "2025-03-31T14:45:27.262907Z",
"start_time": "2025-03-31T14:39:16.454548Z"
}
},
"source": [
@@ -1429,172 +1427,171 @@
"text": [
"去极值\n",
"去极值\n",
"检测到 20 个可能漂移的特征: ['pct_chg', 'turnover_rate', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'act_factor3', 'log(circ_mv)', 'cov', 'delta_cov', 'alpha_22_improved', 'turnover_std', 'log_close', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'buy_lg_vol_minus_sell_lg_vol', 'buy_elg_vol_minus_sell_elg_vol', 'mv_adjusted_volume', 'nonlinear_mv_volume']\n",
"最小日期: 2020-03-11\n",
"最大日期: 2022-03-30\n",
"最小日期: 2022-03-31\n",
"最大日期: 2022-06-30\n",
"原始训练集大小: 402534\n",
"划分后的训练集大小: 307694, 验证集大小: 94840\n",
"检测到 21 个可能漂移的特征: ['vol', 'pct_chg', 'turnover_rate', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'act_factor3', 'log(circ_mv)', 'cov', 'delta_cov', 'alpha_22_improved', 'turnover_std', 'log_close', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'buy_lg_vol_minus_sell_lg_vol', 'buy_elg_vol_minus_sell_elg_vol', 'mv_adjusted_volume', 'nonlinear_mv_volume']\n",
"最小日期: 2020-03-18\n",
"最大日期: 2022-04-08\n",
"最小日期: 2022-04-11\n",
"最大日期: 2022-07-07\n",
"原始训练集大小: 402509\n",
"划分后的训练集大小: 307874, 验证集大小: 94635\n",
"Training until validation scores don't improve for 50 rounds\n",
"Early stopping, best iteration is:\n",
"[15]\ttrain's ndcg@1: 0.662414\tvalid's ndcg@1: 0.613612\n",
"[1]\ttrain's ndcg@1: 0.519951\tvalid's ndcg@1: 0.628242\n",
"Evaluated only: ndcg@1\n",
"去极值\n",
"去极值\n",
"检测到 26 个可能漂移的特征: ['vol', 'pct_chg', 'turnover_rate', 'return_kurtosis', 'vol_spike', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'rsi_3', 'act_factor3', 'log(circ_mv)', 'cov', 'delta_cov', 'alpha_22_improved', 'turnover_std', 'resonance_factor', 'log_close', 'obv-maobv_6', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'buy_lg_vol_minus_sell_lg_vol', 'buy_elg_vol_minus_sell_elg_vol', 'mv_adjusted_volume', 'nonlinear_mv_volume']\n",
"最小日期: 2020-06-09\n",
"最大日期: 2022-06-30\n",
"最小日期: 2022-07-01\n",
"最大日期: 2022-09-23\n",
"原始训练集大小: 400974\n",
"划分后的训练集大小: 306295, 验证集大小: 94679\n",
"最小日期: 2020-06-16\n",
"最大日期: 2022-07-07\n",
"最小日期: 2022-07-08\n",
"最大日期: 2022-09-30\n",
"原始训练集大小: 401052\n",
"划分后的训练集大小: 306109, 验证集大小: 94943\n",
"Training until validation scores don't improve for 50 rounds\n",
"Early stopping, best iteration is:\n",
"[29]\ttrain's ndcg@1: 0.705891\tvalid's ndcg@1: 0.608979\n",
"[19]\ttrain's ndcg@1: 0.656354\tvalid's ndcg@1: 0.628231\n",
"Evaluated only: ndcg@1\n",
"去极值\n",
"去极值\n",
"检测到 14 个可能漂移的特征: ['turnover_rate', 'vol_spike', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'log(circ_mv)', 'delta_cov', 'alpha_22_improved', 'turnover_std', 'log_close', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small']\n",
"最小日期: 2020-09-03\n",
"最大日期: 2022-09-23\n",
"最小日期: 2022-09-26\n",
"最大日期: 2022-12-23\n",
"原始训练集大小: 398352\n",
"划分后的训练集大小: 303767, 验证集大小: 94585\n",
"检测到 14 个可能漂移的特征: ['vol', 'turnover_rate', 'vol_spike', 'atr_14', 'atr_6', 'log(circ_mv)', 'alpha_22_improved', 'turnover_std', 'log_close', 'obv-maobv_6', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'buy_elg_vol_minus_sell_elg_vol']\n",
"最小日期: 2020-09-10\n",
"最大日期: 2022-09-30\n",
"最小日期: 2022-10-10\n",
"最大日期: 2022-12-30\n",
"原始训练集大小: 398374\n",
"划分后的训练集大小: 303799, 验证集大小: 94575\n",
"Training until validation scores don't improve for 50 rounds\n",
"[100]\ttrain's ndcg@1: 0.827415\tvalid's ndcg@1: 0.630578\n",
"Early stopping, best iteration is:\n",
"[71]\ttrain's ndcg@1: 0.78919\tvalid's ndcg@1: 0.66253\n",
"[8]\ttrain's ndcg@1: 0.624175\tvalid's ndcg@1: 0.60186\n",
"Evaluated only: ndcg@1\n",
"去极值\n",
"去极值\n",
"检测到 18 个可能漂移的特征: ['vol', 'turnover_rate', 'return_kurtosis', 'vol_spike', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'log(circ_mv)', 'turnover_std', 'log_close', 'obv-maobv_6', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'buy_elg_vol_minus_sell_elg_vol', 'mv_adjusted_volume', 'nonlinear_mv_volume']\n",
"最小日期: 2020-12-04\n",
"最大日期: 2022-12-23\n",
"最小日期: 2022-12-26\n",
"最大日期: 2023-03-27\n",
"原始训练集大小: 395407\n",
"划分后的训练集大小: 305189, 验证集大小: 90218\n",
"检测到 17 个可能漂移的特征: ['vol', 'turnover_rate', 'return_kurtosis', 'vol_spike', 'atr_14', 'atr_6', 'act_factor3', 'log(circ_mv)', 'turnover_std', 'log_close', 'obv-maobv_6', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'buy_elg_vol_minus_sell_elg_vol', 'mv_adjusted_volume', 'nonlinear_mv_volume']\n",
"最小日期: 2020-12-11\n",
"最大日期: 2022-12-30\n",
"最小日期: 2023-01-03\n",
"最大日期: 2023-04-03\n",
"原始训练集大小: 395305\n",
"划分后的训练集大小: 305409, 验证集大小: 89896\n",
"Training until validation scores don't improve for 50 rounds\n",
"Early stopping, best iteration is:\n",
"[49]\ttrain's ndcg@1: 0.808651\tvalid's ndcg@1: 0.637922\n",
"[23]\ttrain's ndcg@1: 0.711527\tvalid's ndcg@1: 0.58914\n",
"Evaluated only: ndcg@1\n",
"去极值\n",
"去极值\n",
"检测到 19 个可能漂移的特征: ['vol', 'pct_chg', 'turnover_rate', 'return_skew', 'return_kurtosis', 'vol_spike', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'rsi_3', 'act_factor3', 'resonance_factor', 'obv-maobv_6', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'mv_adjusted_volume', 'nonlinear_mv_volume']\n",
"最小日期: 2021-03-08\n",
"最大日期: 2023-03-27\n",
"最小日期: 2023-03-28\n",
"最大日期: 2023-06-27\n",
"原始训练集大小: 393886\n",
"划分后的训练集大小: 303266, 验证集大小: 90620\n",
"检测到 18 个可能漂移的特征: ['vol', 'pct_chg', 'turnover_rate', 'return_kurtosis', 'vol_spike', 'obv', 'maobv_6', 'rsi_3', 'act_factor3', 'delta_cov', 'resonance_factor', 'obv-maobv_6', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'buy_elg_vol_minus_sell_elg_vol', 'mv_adjusted_volume', 'nonlinear_mv_volume']\n",
"最小日期: 2021-03-15\n",
"最大日期: 2023-04-03\n",
"最小日期: 2023-04-04\n",
"最大日期: 2023-07-04\n",
"原始训练集大小: 394279\n",
"划分后的训练集大小: 303561, 验证集大小: 90718\n",
"Training until validation scores don't improve for 50 rounds\n",
"Early stopping, best iteration is:\n",
"[8]\ttrain's ndcg@1: 0.622492\tvalid's ndcg@1: 0.606022\n",
"[1]\ttrain's ndcg@1: 0.570726\tvalid's ndcg@1: 0.62316\n",
"Evaluated only: ndcg@1\n",
"去极值\n",
"去极值\n",
"检测到 23 个可能漂移的特征: ['vol', 'pct_chg', 'turnover_rate', 'return_skew', 'return_kurtosis', 'vol_spike', 'atr_6', 'obv', 'maobv_6', 'rsi_3', 'act_factor3', 'log(circ_mv)', 'delta_cov', 'alpha_22_improved', 'turnover_std', 'resonance_factor', 'log_close', 'obv-maobv_6', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'mv_adjusted_volume', 'nonlinear_mv_volume']\n",
"最小日期: 2021-06-04\n",
"最大日期: 2023-06-27\n",
"最小日期: 2023-06-28\n",
"最大日期: 2023-09-19\n",
"原始训练集大小: 393201\n",
"划分后的训练集大小: 300541, 验证集大小: 92660\n",
"检测到 25 个可能漂移的特征: ['vol', 'pct_chg', 'turnover_rate', 'return_skew', 'return_kurtosis', 'vol_spike', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'rsi_3', 'act_factor2', 'act_factor3', 'log(circ_mv)', 'delta_cov', 'alpha_22_improved', 'turnover_std', 'resonance_factor', 'log_close', 'obv-maobv_6', 'active_buy_volume_large', 'active_buy_volume_big', 'active_buy_volume_small', 'mv_adjusted_volume', 'nonlinear_mv_volume']\n",
"最小日期: 2021-06-11\n",
"最大日期: 2023-07-04\n",
"最小日期: 2023-07-05\n",
"最大日期: 2023-09-26\n",
"原始训练集大小: 392902\n",
"划分后的训练集大小: 300091, 验证集大小: 92811\n",
"Training until validation scores don't improve for 50 rounds\n",
"Early stopping, best iteration is:\n",
"[3]\ttrain's ndcg@1: 0.623432\tvalid's ndcg@1: 0.604043\n",
"[4]\ttrain's ndcg@1: 0.623501\tvalid's ndcg@1: 0.608889\n",
"Evaluated only: ndcg@1\n",
"去极值\n",
"去极值\n",
"检测到 14 个可能漂移的特征: ['vol', 'pct_chg', 'return_kurtosis', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'act_factor3', 'log(circ_mv)', 'cov', 'alpha_22_improved', 'resonance_factor', 'mv_adjusted_volume', 'nonlinear_mv_volume']\n",
"最小日期: 2021-08-30\n",
"最大日期: 2023-09-19\n",
"最小日期: 2023-09-20\n",
"最大日期: 2023-12-20\n",
"原始训练集大小: 386612\n",
"划分后的训练集大小: 296498, 验证集大小: 90114\n",
"检测到 11 个可能漂移的特征: ['pct_chg', 'return_skew', 'return_kurtosis', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'act_factor3', 'delta_cov', 'alpha_22_improved', 'resonance_factor']\n",
"最小日期: 2021-09-06\n",
"最大日期: 2023-09-26\n",
"最小日期: 2023-09-27\n",
"最大日期: 2023-12-27\n",
"原始训练集大小: 386164\n",
"划分后的训练集大小: 296301, 验证集大小: 89863\n",
"Training until validation scores don't improve for 50 rounds\n",
"Early stopping, best iteration is:\n",
"[12]\ttrain's ndcg@1: 0.663139\tvalid's ndcg@1: 0.652339\n",
"[38]\ttrain's ndcg@1: 0.76403\tvalid's ndcg@1: 0.675597\n",
"Evaluated only: ndcg@1\n",
"去极值\n",
"去极值\n",
"检测到 14 个可能漂移的特征: ['pct_chg', 'turnover_rate', 'vol_spike', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'act_factor3', 'log(circ_mv)', 'cov', 'alpha_22_improved', 'turnover_std', 'resonance_factor', 'log_close']\n",
"最小日期: 2021-12-01\n",
"最大日期: 2023-12-20\n",
"最小日期: 2023-12-21\n",
"最大日期: 2024-03-22\n",
"原始训练集大小: 379352\n",
"划分后的训练集大小: 293416, 验证集大小: 85936\n",
"检测到 14 个可能漂移的特征: ['pct_chg', 'turnover_rate', 'vol_spike', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'act_factor3', 'log(circ_mv)', 'cov', 'delta_cov', 'alpha_22_improved', 'turnover_std', 'log_close']\n",
"最小日期: 2021-12-08\n",
"最大日期: 2023-12-27\n",
"最小日期: 2023-12-28\n",
"最大日期: 2024-03-29\n",
"原始训练集大小: 379125\n",
"划分后的训练集大小: 293170, 验证集大小: 85955\n",
"Training until validation scores don't improve for 50 rounds\n",
"Early stopping, best iteration is:\n",
"[5]\ttrain's ndcg@1: 0.634019\tvalid's ndcg@1: 0.638831\n",
"[17]\ttrain's ndcg@1: 0.707121\tvalid's ndcg@1: 0.6345\n",
"Evaluated only: ndcg@1\n",
"去极值\n",
"去极值\n",
"检测到 12 个可能漂移的特征: ['vol', 'turnover_rate', 'vol_spike', 'atr_14', 'atr_6', 'obv', 'log(circ_mv)', 'cov', 'log_close', 'obv-maobv_6', 'mv_adjusted_volume', 'nonlinear_mv_volume']\n",
"最小日期: 2022-03-03\n",
"最大日期: 2024-03-22\n",
"最小日期: 2024-03-25\n",
"最大日期: 2024-06-24\n",
"原始训练集大小: 379932\n",
"划分后的训练集大小: 290249, 验证集大小: 89683\n",
"检测到 13 个可能漂移的特征: ['vol', 'turnover_rate', 'vol_spike', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'log(circ_mv)', 'turnover_std', 'log_close', 'obv-maobv_6', 'mv_adjusted_volume', 'nonlinear_mv_volume']\n",
"最小日期: 2022-03-10\n",
"最大日期: 2024-03-29\n",
"最小日期: 2024-04-01\n",
"最大日期: 2024-07-01\n",
"原始训练集大小: 379627\n",
"划分后的训练集大小: 290158, 验证集大小: 89469\n",
"Training until validation scores don't improve for 50 rounds\n",
"Early stopping, best iteration is:\n",
"[34]\ttrain's ndcg@1: 0.759349\tvalid's ndcg@1: 0.64614\n",
"[44]\ttrain's ndcg@1: 0.776797\tvalid's ndcg@1: 0.610802\n",
"Evaluated only: ndcg@1\n",
"去极值\n",
"去极值\n",
"检测到 18 个可能漂移的特征: ['vol', 'pct_chg', 'turnover_rate', 'vol_spike', 'atr_6', 'obv', 'maobv_6', 'rsi_3', 'act_factor3', 'cov', 'delta_cov', 'alpha_22_improved', 'turnover_std', 'resonance_factor', 'log_close', 'obv-maobv_6', 'mv_adjusted_volume', 'nonlinear_mv_volume']\n",
"最小日期: 2022-06-02\n",
"最大日期: 2024-06-24\n",
"最小日期: 2024-06-25\n",
"最大日期: 2024-09-18\n",
"原始训练集大小: 381303\n",
"划分后的训练集大小: 284860, 验证集大小: 96443\n",
"检测到 18 个可能漂移的特征: ['vol', 'pct_chg', 'turnover_rate', 'return_kurtosis', 'vol_spike', 'atr_6', 'maobv_6', 'rsi_3', 'act_factor3', 'cov', 'delta_cov', 'alpha_22_improved', 'turnover_std', 'resonance_factor', 'log_close', 'obv-maobv_6', 'mv_adjusted_volume', 'nonlinear_mv_volume']\n",
"最小日期: 2022-06-10\n",
"最大日期: 2024-07-01\n",
"最小日期: 2024-07-02\n",
"最大日期: 2024-09-25\n",
"原始训练集大小: 381403\n",
"划分后的训练集大小: 284687, 验证集大小: 96716\n",
"Training until validation scores don't improve for 50 rounds\n",
"Early stopping, best iteration is:\n",
"[40]\ttrain's ndcg@1: 0.745108\tvalid's ndcg@1: 0.589855\n",
"[49]\ttrain's ndcg@1: 0.787077\tvalid's ndcg@1: 0.579916\n",
"Evaluated only: ndcg@1\n",
"去极值\n",
"去极值\n",
"检测到 21 个可能漂移的特征: ['vol', 'pct_chg', 'turnover_rate', 'return_kurtosis', 'vol_spike', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'rsi_3', 'act_factor1', 'act_factor3', 'log(circ_mv)', 'cov', 'delta_cov', 'alpha_22_improved', 'resonance_factor', 'log_close', 'obv-maobv_6', 'mv_adjusted_volume', 'nonlinear_mv_volume']\n",
"最小日期: 2022-08-26\n",
"最大日期: 2024-09-18\n",
"最小日期: 2024-09-19\n",
"最大日期: 2024-12-18\n",
"原始训练集大小: 379192\n",
"划分后的训练集大小: 285332, 验证集大小: 93860\n",
"检测到 20 个可能漂移的特征: ['vol', 'pct_chg', 'turnover_rate', 'vol_spike', 'atr_14', 'atr_6', 'obv', 'maobv_6', 'rsi_3', 'act_factor3', 'log(circ_mv)', 'cov', 'delta_cov', 'alpha_22_improved', 'turnover_std', 'resonance_factor', 'log_close', 'obv-maobv_6', 'mv_adjusted_volume', 'nonlinear_mv_volume']\n",
"最小日期: 2022-09-02\n",
"最大日期: 2024-09-25\n",
"最小日期: 2024-09-26\n",
"最大日期: 2024-12-25\n",
"原始训练集大小: 378304\n",
"划分后的训练集大小: 284976, 验证集大小: 93328\n",
"Training until validation scores don't improve for 50 rounds\n",
"[100]\ttrain's ndcg@1: 0.840198\tvalid's ndcg@1: 0.629064\n",
"Early stopping, best iteration is:\n",
"[10]\ttrain's ndcg@1: 0.629259\tvalid's ndcg@1: 0.646921\n",
"[98]\ttrain's ndcg@1: 0.836808\tvalid's ndcg@1: 0.644301\n",
"Evaluated only: ndcg@1\n",
"去极值\n",
"去极值\n",
"检测到 18 个可能漂移的特征: ['vol', 'pct_chg', 'turnover_rate', 'return_skew', 'return_kurtosis', 'obv', 'maobv_6', 'rsi_3', 'act_factor1', 'log(circ_mv)', 'cov', 'delta_cov', 'alpha_22_improved', 'resonance_factor', 'log_close', 'obv-maobv_6', 'mv_adjusted_volume', 'nonlinear_mv_volume']\n",
"最小日期: 2022-11-28\n",
"最大日期: 2024-12-18\n",
"最小日期: 2024-12-19\n",
"最大日期: 2025-03-19\n",
"原始训练集大小: 371676\n",
"划分后的训练集大小: 289037, 验证集大小: 82639\n",
"检测到 17 个可能漂移的特征: ['vol', 'pct_chg', 'turnover_rate', 'return_skew', 'obv', 'maobv_6', 'rsi_3', 'act_factor1', 'log(circ_mv)', 'cov', 'delta_cov', 'alpha_22_improved', 'resonance_factor', 'log_close', 'obv-maobv_6', 'mv_adjusted_volume', 'nonlinear_mv_volume']\n",
"最小日期: 2022-12-05\n",
"最大日期: 2024-12-25\n",
"最小日期: 2024-12-26\n",
"最大日期: 2025-03-26\n",
"原始训练集大小: 371843\n",
"划分后的训练集大小: 289769, 验证集大小: 82074\n",
"Training until validation scores don't improve for 50 rounds\n",
"[100]\ttrain's ndcg@1: 0.844744\tvalid's ndcg@1: 0.580519\n",
"Early stopping, best iteration is:\n",
"[54]\ttrain's ndcg@1: 0.792906\tvalid's ndcg@1: 0.599084\n",
"[2]\ttrain's ndcg@1: 0.592644\tvalid's ndcg@1: 0.596307\n",
"Evaluated only: ndcg@1\n"
]
}
],
"execution_count": 17
"execution_count": 23
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-29T17:52:27.393208Z",
"start_time": "2025-03-29T17:52:27.388403Z"
"end_time": "2025-03-31T14:45:27.345941Z",
"start_time": "2025-03-31T14:45:27.342565Z"
}
},
"cell_type": "code",
@@ -1609,208 +1606,17 @@
]
}
],
"execution_count": 18
},
{
"cell_type": "code",
"id": "d86af99d15cb3bdd",
"metadata": {
"scrolled": true
},
"source": [
"import pandas as pd\n",
"\n",
"gc.collect()\n",
"def rolling_train_predict(df, train_days, test_days, industry_df, index_df, days=5, use_pca=False, validation_days=60):\n",
"\n",
" # 1. 按照交易日期排序\n",
" unique_dates = df[df['trade_date'] >= '2020-01-01']['trade_date'].unique().tolist()\n",
" unique_dates = sorted(unique_dates)\n",
" n = len(unique_dates)\n",
" \n",
" # 2. 计算需要跳过的天数,使后续窗口对齐\n",
" extra_days = (n - train_days) % test_days \n",
" start_index = extra_days # 从此索引开始滚动\n",
" \n",
" predictions_list = []\n",
"\n",
"\n",
" for start in range(start_index, n - train_days - test_days + 1, test_days):\n",
" gc.collect()\n",
"\n",
" train_dates = unique_dates[start : start + train_days]\n",
" test_dates = unique_dates[start + train_days : start + train_days + test_days]\n",
"\n",
" # 根据日期筛选数据\n",
" train_data = df[df['trade_date'].isin(train_dates)]\n",
" test_data = df[df['trade_date'].isin(test_dates)]\n",
"\n",
" train_data = train_data.sort_values('trade_date')\n",
" test_data = test_data.sort_values('trade_date')\n",
"\n",
" \n",
" def select_pre_zt_stocks_dynamic(\n",
" stock_df,\n",
" vol_spike_multiplier=1.5,\n",
" min_return=0.03, # 最小累计涨幅(例如 3%\n",
" min_main_net_inflow=1e6, # 最小主力资金净流入(例如 100 万元)\n",
" window=30, # 计算历史均值的窗口大小\n",
" signal_days=1 # 异动信号需要连续出现的天数\n",
" ):\n",
" \n",
" # 排序数据\n",
" stock_df = stock_df.sort_values(by=['trade_date', 'ts_code'])\n",
" \n",
" # stock_df = stock_df[\n",
" # (stock_df['vol'] > vol_spike_multiplier * stock_df['avg_vol_20'])\n",
" # ]\n",
" cd1 = stock_df[\"close\"] > stock_df[\"close\"].shift(1)\n",
"\n",
" cd2 = stock_df[\"close\"] > stock_df[\"close\"].rolling(window=10).mean()\n",
"\n",
" cd3 = (stock_df[\"vol\"] > stock_df[\"vol\"].shift(1)) & (stock_df[\"vol\"] < 10 * stock_df[\"vol\"].shift(1))\n",
"\n",
" stock_df = stock_df[cd1 & cd2 & cd3]\n",
" stock_df = stock_df.groupby('trade_date', group_keys=False).apply(\n",
" lambda x: x.nlargest(1000, 'return_20')\n",
" )\n",
" \n",
" return stock_df\n",
" \n",
" train_data = select_pre_zt_stocks_dynamic(train_data)\n",
" test_data = select_pre_zt_stocks_dynamic(test_data)\n",
"\n",
" \n",
" # train_data, _ = get_simple_factor(train_data)\n",
" # test_data, _ = get_simple_factor(test_data)\n",
"\n",
" # df['future_return'] = (df.groupby('ts_code')['close'].shift(-days) - df.groupby('ts_code')['open'].shift(-1)) / \\\n",
" # df.groupby('ts_code')['open'].shift(-1)\n",
" \n",
" def symmetric_log_transform(values):\n",
" return np.sign(values) * np.log1p(np.abs(values))\n",
"\n",
" train_data['future_return'] = train_data.groupby('ts_code', group_keys=False)['close'].apply(lambda x: x.shift(-days) / x - 1)\n",
" train_data['future_score'] = calculate_score(train_data, days=days, lambda_param=0.3)\n",
" # train_data['future_score'] = symmetric_log_transform(train_data['future_score'])\n",
"\n",
" test_data['future_return'] = test_data.groupby('ts_code', group_keys=False)['close'].apply(lambda x: x.shift(-days) / x - 1)\n",
" test_data['future_score'] = calculate_score(test_data, days=days, lambda_param=0.3)\n",
" # test_data['future_score'] = symmetric_log_transform(test_data['future_score'])\n",
" \n",
" train_data['label'] = train_data.groupby('trade_date', group_keys=False)['future_score'].transform(\n",
" lambda x: pd.qcut(x, q=10, labels=False, duplicates='drop')\n",
" )\n",
" test_data['label'] = test_data.groupby('trade_date', group_keys=False)['future_score'].transform(\n",
" lambda x: pd.qcut(x, q=10, labels=False, duplicates='drop')\n",
" )\n",
" \n",
" industry_df = industry_df.sort_values(by=['trade_date'])\n",
" index_df = index_df.sort_values(by=['trade_date'])\n",
" \n",
" train_data = train_data.merge(industry_df, on=['cat_l2_code', 'trade_date'], how='left')\n",
" # train_data = train_data.merge(index_df, on='trade_date', how='left')\n",
" test_data = test_data.merge(industry_df, on=['cat_l2_code', 'trade_date'], how='left')\n",
" # test_data = test_data.merge(index_df, on='trade_date', how='left')\n",
" \n",
" train_data, test_data = train_data.replace([np.inf, -np.inf], np.nan), test_data.replace([np.inf, -np.inf], np.nan)\n",
" \n",
" feature_columns = [col for col in train_data.columns if col not in ['trade_date',\n",
" 'ts_code',\n",
" 'label']]\n",
" feature_columns = [col for col in feature_columns if 'future' not in col]\n",
" feature_columns = [col for col in feature_columns if 'score' not in col]\n",
" feature_columns = [col for col in feature_columns if col not in origin_columns]\n",
" feature_columns = [col for col in feature_columns if not col.startswith('_')]\n",
" # print(feature_columns)\n",
"\n",
" feature_columns_o = feature_columns[:]\n",
" train_data, feature_columns = create_deviation_within_dates(train_data, feature_columns_o)\n",
" test_data, _ = create_deviation_within_dates(test_data, feature_columns_o)\n",
" print(f'feature_columns size: {len(feature_columns)}')\n",
" \n",
" train_data = train_data.dropna(subset=feature_columns)\n",
" train_data = train_data.dropna(subset=['label'])\n",
" train_data = train_data.reset_index(drop=True)\n",
" \n",
" # print(test_data.tail())\n",
" # test_data = test_data.dropna(subset=feature_columns_new)\n",
" # test_data = test_data.dropna(subset=['label'])\n",
" test_data = test_data.reset_index(drop=True)\n",
" \n",
" # print(len(train_data))\n",
" print(f\"最小日期: {train_data['trade_date'].min().strftime('%Y-%m-%d')}\")\n",
" print(f\"最大日期: {train_data['trade_date'].max().strftime('%Y-%m-%d')}\")\n",
" # print(len(test_data))\n",
" print(f\"最小日期: {test_data['trade_date'].min().strftime('%Y-%m-%d')}\")\n",
" print(f\"最大日期: {test_data['trade_date'].max().strftime('%Y-%m-%d')}\")\n",
" \n",
" cat_columns = [col for col in df.columns if col.startswith('cat')]\n",
" for col in cat_columns:\n",
" train_data[col] = train_data[col].astype('category')\n",
" test_data[col] = test_data[col].astype('category')\n",
"\n",
"\n",
" feature_columns = remove_highly_correlated_features(train_data[train_data['label'] == 9], feature_columns)\n",
" feature_columns, _ = remove_shifted_features(train_data[train_data['label'] == 9], test_data[test_data['label'] == 9], feature_columns)\n",
" keep_columns = [col for col in train_data.columns if\n",
" col in feature_columns or col in ['ts_code', 'trade_date', 'label', 'future_return', 'future_score']]\n",
" train_data = train_data[keep_columns]\n",
"\n",
" label_gain = list(range(len(train_data['label'].unique())))\n",
" label_gain = [gain * 2 for gain in label_gain]\n",
" light_params['label_gain'] = label_gain\n",
" \n",
" ud = train_data[\"trade_date\"].unique()\n",
" date_weights = {date: weight for date, weight in zip(ud, np.linspace(1, 2, len(unique_dates)))}\n",
" light_params['weight'] = train_data[\"trade_date\"].map(date_weights).tolist()\n",
"\n",
" print(f'feature_columns: {feature_columns}')\n",
" model, scaler, pca = train_light_model(train_data.dropna(subset=['label']),\n",
" light_params, feature_columns,\n",
" [lgb.log_evaluation(period=100),\n",
" lgb.callback.record_evaluation(evals),\n",
" lgb.early_stopping(50, first_metric_only=True)\n",
" ], evals,\n",
" num_boost_round=3000, validation_days=validation_days,\n",
" print_feature_importance=False, use_pca=False)\n",
"\n",
" score_df = test_data.copy()\n",
" numeric_columns = score_df.select_dtypes(include=['float64', 'int64']).columns\n",
" numeric_columns = [col for col in numeric_columns if col in feature_columns]\n",
" score_df.loc[:, numeric_columns] = scaler.transform(score_df[numeric_columns])\n",
" if use_pca and pca is not None:\n",
" score_df.loc[:, numeric_columns] = pca.transform(score_df[numeric_columns])\n",
" score_df['score'] = model.predict(score_df[feature_columns])\n",
" # train_data['score'] = catboost_model.predict(train_data[feature_columns_new])\n",
" score_df = score_df.loc[score_df.groupby('trade_date')['score'].idxmax()]\n",
" # score_df = score_df[score_df['score'] > 0]\n",
" score_df = score_df[['trade_date', 'score', 'ts_code']]\n",
" predictions_list.append(score_df)\n",
" final_predictions = pd.concat(predictions_list, ignore_index=True)\n",
" return final_predictions\n",
"\n",
"\n",
"final_predictions = rolling_train_predict(df.sort_values(['trade_date'], ascending=[True]), 500, 60, industry_df, index_df, days=5, validation_days=100)\n",
"final_predictions.to_csv('predictions_test.tsv', index=False)\n"
],
"outputs": [],
"execution_count": null
},
{
"cell_type": "code",
"id": "7ed645f2-7755-496e-8a6d-c64adc9080ac",
"metadata": {},
"source": [
"print('finish')"
],
"outputs": [],
"execution_count": null
"execution_count": 24
},
{
"cell_type": "code",
"id": "0dc75517-c857-4f1d-8815-e807400a6d33",
"metadata": {},
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-31T14:45:27.395705Z",
"start_time": "2025-03-31T14:45:27.393319Z"
}
},
"source": [],
"outputs": [],
"execution_count": null

1393
code/train/UpdateSGD.ipynb Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

View File

@@ -1,21 +1,8 @@
import numpy as np
import pandas as pd
def read_and_merge_h5_data(h5_filename, key, columns, df=None):
"""
读取 HDF5 文件中的数据,根据指定的 columns 筛选数据,
如果传入 df 参数,则将其与读取的数据根据 ts_code 和 trade_date 合并。
参数:
- h5_filename: HDF5 文件名
- key: 数据存储在 HDF5 文件中的 key
- columns: 要读取的列名列表
- df: 需要合并的 DataFrame如果为空则不进行合并
返回:
- 合并后的 DataFrame
"""
# 处理 _ 开头的列名
def read_and_merge_h5_data(h5_filename, key, columns, df=None, join='left', on=['ts_code', 'trade_date'], prefix=None):
processed_columns = []
for col in columns:
if col.startswith('_'):
@@ -32,14 +19,22 @@ def read_and_merge_h5_data(h5_filename, key, columns, df=None):
new_col = f'_{col}'
data.rename(columns={col: new_col}, inplace=True)
if prefix is not None:
for col in data.columns:
if col not in ['ts_code', 'trade_date']: # 只有不在 columns 中的列才需要加下划线
new_col = f'{prefix}_{col}'
data.rename(columns={col: new_col}, inplace=True)
# 如果传入的 df 不为空,则进行合并
if df is not None and not df.empty:
# 确保两个 DataFrame 都有 ts_code 和 trade_date 列
df['trade_date'] = pd.to_datetime(df['trade_date'], format='%Y%m%d')
data['trade_date'] = pd.to_datetime(data['trade_date'], format='%Y%m%d')
print(f'{join} merge on {on}')
if 'trade_date' in on:
# 确保两个 DataFrame 都有 ts_code 和 trade_date 列
df['trade_date'] = pd.to_datetime(df['trade_date'], format='%Y%m%d')
data['trade_date'] = pd.to_datetime(data['trade_date'], format='%Y%m%d')
# 根据 ts_code 和 trade_date 合并
merged_df = pd.merge(df, data, on=['ts_code', 'trade_date'], how='left')
merged_df = pd.merge(df, data, on=on, how=join)
else:
# 如果 df 为空,则直接返回读取的数据
merged_df = data
@@ -84,4 +79,42 @@ def calculate_risk_adjusted_return(df, days=1, method='ratio', lambda_=0.5, eps=
else:
raise ValueError("Invalid method. Use 'ratio' or 'difference'.")
return df
return df
# import polars as pl
#
# def read_and_merge_h5_data_polars(h5_filename, key, columns, df=None, join='left', on=['ts_code', 'trade_date']):
# processed_columns = []
# for col in columns:
# if col.startswith('_'):
# processed_columns.append(col[1:]) # 去掉下划线
# else:
# processed_columns.append(col)
#
# # 从 HDF5 文件读取数据,选择需要的列
# pd_df = pd.read_hdf(h5_filename, key=key, columns=processed_columns)
#
# # 将 Pandas DataFrame 转换为 Polars DataFrame
# data = pl.from_pandas(pd_df)
#
# # 修改列名,如果列名以前有 _加上 _
# data = data.rename({col: f'_{col}' for col in data.columns if col not in columns})
#
# # 如果传入的 df 不为空,则进行合并
# if df is not None and not df.is_empty():
# print(f'{join} merge on {on}')
#
# # 确保两个 DataFrame 都有 ts_code 和 trade_date 列
# # df = df.with_columns(pl.col('trade_date').str.strptime(pl.Datetime, format='%Y%m%d'))
# # data = data.with_columns(pl.col('trade_date').str.strptime(pl.Datetime, format='%Y%m%d'))
#
# # 根据 ts_code 和 trade_date 合并
# merged_df = df.join(data, on=on, how=join)
# else:
# # 如果 df 为空,则直接返回读取的数据
# merged_df = data
#
# return merged_df