(data leak)RollingRank-7.0,赚钱
This commit is contained in:
@@ -2,55 +2,30 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "f74ce078-f7e8-4733-a14c-14d8815a3626",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-06T15:33:48.836794Z",
|
||||
"start_time": "2025-04-06T15:33:48.098706Z"
|
||||
"end_time": "2025-04-08T13:37:05.760051Z",
|
||||
"start_time": "2025-04-08T13:37:04.957429Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import tushare as ts\n",
|
||||
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
|
||||
"pro = ts.pro_api()"
|
||||
]
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": 1
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "44dd8d87-e60b-49e5-aed9-efaa7f92d4fe",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-06T15:33:55.800360Z",
|
||||
"start_time": "2025-04-06T15:33:49.011404Z"
|
||||
"end_time": "2025-04-08T13:37:11.825892Z",
|
||||
"start_time": "2025-04-08T13:37:05.764874Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ts_code trade_date\n",
|
||||
"0 000001.SZ 20250312\n",
|
||||
"1 000002.SZ 20250312\n",
|
||||
"2 000004.SZ 20250312\n",
|
||||
"3 000006.SZ 20250312\n",
|
||||
"4 000007.SZ 20250312\n",
|
||||
"... ... ...\n",
|
||||
"21567 920108.BJ 20250401\n",
|
||||
"21568 920111.BJ 20250401\n",
|
||||
"21569 920116.BJ 20250401\n",
|
||||
"21570 920118.BJ 20250401\n",
|
||||
"21571 920128.BJ 20250401\n",
|
||||
"\n",
|
||||
"[7551938 rows x 2 columns]\n",
|
||||
"20250403\n",
|
||||
"start_date: 20250407\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import time\n",
|
||||
@@ -69,36 +44,42 @@
|
||||
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
|
||||
"start_date = min(trade_dates)\n",
|
||||
"print(f'start_date: {start_date}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "747acc47-0884-4f76-90fb-276f6494e31d",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-06T15:33:57.293636Z",
|
||||
"start_time": "2025-04-06T15:33:55.806283Z"
|
||||
}
|
||||
},
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"任务 20250417 完成\n",
|
||||
"任务 20250418 完成\n",
|
||||
"任务 20250416 完成\n",
|
||||
"任务 20250415 完成\n",
|
||||
"任务 20250414 完成\n",
|
||||
"任务 20250411 完成\n",
|
||||
"任务 20250410 完成\n",
|
||||
"任务 20250409 完成\n",
|
||||
"任务 20250408 完成\n",
|
||||
"任务 20250407 完成\n"
|
||||
" ts_code trade_date\n",
|
||||
"0 000001.SZ 20250312\n",
|
||||
"1 000002.SZ 20250312\n",
|
||||
"2 000004.SZ 20250312\n",
|
||||
"3 000006.SZ 20250312\n",
|
||||
"4 000007.SZ 20250312\n",
|
||||
"... ... ...\n",
|
||||
"5386 920108.BJ 20250407\n",
|
||||
"5387 920111.BJ 20250407\n",
|
||||
"5388 920116.BJ 20250407\n",
|
||||
"5389 920118.BJ 20250407\n",
|
||||
"5390 920128.BJ 20250407\n",
|
||||
"\n",
|
||||
"[7557329 rows x 2 columns]\n",
|
||||
"20250407\n",
|
||||
"start_date: 20250408\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 2
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "747acc47-0884-4f76-90fb-276f6494e31d",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-08T13:37:13.619472Z",
|
||||
"start_time": "2025-04-08T13:37:12.062668Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
|
||||
"\n",
|
||||
@@ -128,27 +109,35 @@
|
||||
" except Exception as e:\n",
|
||||
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "c6765638-481f-40d8-a259-2e7b25362618",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-06T15:33:57.874278Z",
|
||||
"start_time": "2025-04-06T15:33:57.304371Z"
|
||||
}
|
||||
},
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"所有每日基础数据获取并保存完毕!\n"
|
||||
"任务 20250417 完成\n",
|
||||
"任务 20250418 完成\n",
|
||||
"任务 20250415 完成\n",
|
||||
"任务 20250416 完成\n",
|
||||
"任务 20250411 完成\n",
|
||||
"任务 20250414 完成\n",
|
||||
"任务 20250409 完成\n",
|
||||
"任务 20250410 完成\n",
|
||||
"任务 20250408 完成\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 3
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "c6765638-481f-40d8-a259-2e7b25362618",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-08T13:37:15.761420Z",
|
||||
"start_time": "2025-04-08T13:37:13.644113Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
|
||||
"\n",
|
||||
@@ -158,7 +147,17 @@
|
||||
"all_daily_data_df.to_hdf(h5_filename, key=key, mode='a', format='table', append=True, data_columns=True)\n",
|
||||
"\n",
|
||||
"print(\"所有每日基础数据获取并保存完毕!\")"
|
||||
]
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"所有每日基础数据获取并保存完毕!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 4
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -2,55 +2,30 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "f74ce078-f7e8-4733-a14c-14d8815a3626",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-06T15:33:36.424263Z",
|
||||
"start_time": "2025-04-06T15:33:36.048334Z"
|
||||
"end_time": "2025-04-08T13:37:06.782320Z",
|
||||
"start_time": "2025-04-08T13:37:06.021497Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import tushare as ts\n",
|
||||
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
|
||||
"pro = ts.pro_api()"
|
||||
]
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": 1
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "44dd8d87-e60b-49e5-aed9-efaa7f92d4fe",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-06T15:33:37.871314Z",
|
||||
"start_time": "2025-04-06T15:33:36.424263Z"
|
||||
"end_time": "2025-04-08T13:37:08.283747Z",
|
||||
"start_time": "2025-04-08T13:37:06.788327Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ts_code trade_date\n",
|
||||
"0 801001.SI 20250221\n",
|
||||
"1 801002.SI 20250221\n",
|
||||
"2 801003.SI 20250221\n",
|
||||
"3 801005.SI 20250221\n",
|
||||
"4 801010.SI 20250221\n",
|
||||
"... ... ...\n",
|
||||
"1751 859811.SI 20250401\n",
|
||||
"1752 859821.SI 20250401\n",
|
||||
"1753 859822.SI 20250401\n",
|
||||
"1754 859852.SI 20250401\n",
|
||||
"1755 859951.SI 20250401\n",
|
||||
"\n",
|
||||
"[1057124 rows x 2 columns]\n",
|
||||
"20250403\n",
|
||||
"start_date: 20250407\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import time\n",
|
||||
@@ -69,36 +44,42 @@
|
||||
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
|
||||
"start_date = min(trade_dates)\n",
|
||||
"print(f'start_date: {start_date}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "747acc47-0884-4f76-90fb-276f6494e31d",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-06T15:33:40.123037Z",
|
||||
"start_time": "2025-04-06T15:33:37.881433Z"
|
||||
}
|
||||
},
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"任务 20250417 完成\n",
|
||||
"任务 20250418 完成\n",
|
||||
"任务 20250416 完成\n",
|
||||
"任务 20250415 完成\n",
|
||||
"任务 20250414 完成\n",
|
||||
"任务 20250411 完成\n",
|
||||
"任务 20250409 完成\n",
|
||||
"任务 20250410 完成\n",
|
||||
"任务 20250408 完成\n",
|
||||
"任务 20250407 完成\n"
|
||||
" ts_code trade_date\n",
|
||||
"0 801001.SI 20250221\n",
|
||||
"1 801002.SI 20250221\n",
|
||||
"2 801003.SI 20250221\n",
|
||||
"3 801005.SI 20250221\n",
|
||||
"4 801010.SI 20250221\n",
|
||||
".. ... ...\n",
|
||||
"434 859811.SI 20250407\n",
|
||||
"435 859821.SI 20250407\n",
|
||||
"436 859822.SI 20250407\n",
|
||||
"437 859852.SI 20250407\n",
|
||||
"438 859951.SI 20250407\n",
|
||||
"\n",
|
||||
"[1057563 rows x 2 columns]\n",
|
||||
"20250407\n",
|
||||
"start_date: 20250408\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 2
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "747acc47-0884-4f76-90fb-276f6494e31d",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-08T13:37:09.658356Z",
|
||||
"start_time": "2025-04-08T13:37:08.473145Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
|
||||
"\n",
|
||||
@@ -128,27 +109,35 @@
|
||||
" except Exception as e:\n",
|
||||
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "c6765638-481f-40d8-a259-2e7b25362618",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-06T15:33:40.286614Z",
|
||||
"start_time": "2025-04-06T15:33:40.138227Z"
|
||||
}
|
||||
},
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"所有每日基础数据获取并保存完毕!\n"
|
||||
"任务 20250418 完成\n",
|
||||
"任务 20250417 完成\n",
|
||||
"任务 20250416 完成\n",
|
||||
"任务 20250415 完成\n",
|
||||
"任务 20250414 完成\n",
|
||||
"任务 20250411 完成\n",
|
||||
"任务 20250410 完成\n",
|
||||
"任务 20250409 完成\n",
|
||||
"任务 20250408 完成\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 3
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "c6765638-481f-40d8-a259-2e7b25362618",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-08T13:37:09.826834Z",
|
||||
"start_time": "2025-04-08T13:37:09.671846Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
|
||||
"\n",
|
||||
@@ -158,7 +147,17 @@
|
||||
"all_daily_data_df.to_hdf(h5_filename, key=key, mode='a', format='table', append=True, data_columns=True)\n",
|
||||
"\n",
|
||||
"print(\"所有每日基础数据获取并保存完毕!\")"
|
||||
]
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"所有每日基础数据获取并保存完毕!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 4
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -2,32 +2,30 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "18d1d622-b083-4cc4-a6f8-7c1ed2d0edd2",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-06T15:33:43.537483Z",
|
||||
"start_time": "2025-04-06T15:33:42.844004Z"
|
||||
"end_time": "2025-04-08T13:37:08.050676Z",
|
||||
"start_time": "2025-04-08T13:37:07.328483Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import tushare as ts\n",
|
||||
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
|
||||
"pro = ts.pro_api()"
|
||||
]
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": 1
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "14671a7f72de2564",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-06T15:33:45.387772Z",
|
||||
"start_time": "2025-04-06T15:33:43.537483Z"
|
||||
"end_time": "2025-04-08T13:37:10.251715Z",
|
||||
"start_time": "2025-04-08T13:37:08.055681Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from datetime import datetime\n",
|
||||
"import pandas as pd\n",
|
||||
@@ -75,38 +73,19 @@
|
||||
" st_data = group[(group['change_reason'] == 'ST') | (group['change_reason'] == '*ST')]\n",
|
||||
" if not st_data.empty:\n",
|
||||
" name_change_dict[ts_code] = filter_rows(st_data)"
|
||||
]
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": 2
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "e7f8cce2f80e2f20",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-06T15:33:54.089114Z",
|
||||
"start_time": "2025-04-06T15:33:45.576286Z"
|
||||
"end_time": "2025-04-08T13:37:37.727419Z",
|
||||
"start_time": "2025-04-08T13:37:10.461897Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"Index: 8502128 entries, 0 to 21571\n",
|
||||
"Data columns (total 2 columns):\n",
|
||||
" # Column Dtype \n",
|
||||
"--- ------ ----- \n",
|
||||
" 0 ts_code object\n",
|
||||
" 1 trade_date object\n",
|
||||
"dtypes: object(2)\n",
|
||||
"memory usage: 194.6+ MB\n",
|
||||
"None\n",
|
||||
"20250403\n",
|
||||
"20250407\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import time\n",
|
||||
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
|
||||
@@ -125,37 +104,39 @@
|
||||
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
|
||||
"start_date = min(trade_dates)\n",
|
||||
"print(start_date)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "553cfb36-f560-4cc4-b2bc-68323ccc5072",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-06T15:33:57.041254Z",
|
||||
"start_time": "2025-04-06T15:33:54.103322Z"
|
||||
},
|
||||
"scrolled": true
|
||||
},
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"任务 20250417 完成\n",
|
||||
"任务 20250418 完成\n",
|
||||
"任务 20250416 完成\n",
|
||||
"任务 20250415 完成\n",
|
||||
"任务 20250414 完成\n",
|
||||
"任务 20250411 完成\n",
|
||||
"任务 20250410 完成\n",
|
||||
"任务 20250409 完成\n",
|
||||
"任务 20250408 完成\n",
|
||||
"任务 20250407 完成\n"
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"Index: 8507519 entries, 0 to 5390\n",
|
||||
"Data columns (total 2 columns):\n",
|
||||
" # Column Dtype \n",
|
||||
"--- ------ ----- \n",
|
||||
" 0 ts_code object\n",
|
||||
" 1 trade_date object\n",
|
||||
"dtypes: object(2)\n",
|
||||
"memory usage: 194.7+ MB\n",
|
||||
"None\n",
|
||||
"20250407\n",
|
||||
"20250408\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 3
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "553cfb36-f560-4cc4-b2bc-68323ccc5072",
|
||||
"metadata": {
|
||||
"scrolled": true,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-08T13:37:39.056144Z",
|
||||
"start_time": "2025-04-08T13:37:37.770718Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"\n",
|
||||
"\n",
|
||||
@@ -205,169 +186,192 @@
|
||||
" # 重置批次起始时间\n",
|
||||
" batch_start_time = time.time()\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "919023c693d7a47a",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-06T15:33:57.072796Z",
|
||||
"start_time": "2025-04-06T15:33:57.061670Z"
|
||||
}
|
||||
},
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
|
||||
"0 000059.SZ 20250407 4.54 1.8414 3.4767 \n",
|
||||
"1 600830.SH 20250407 8.33 2.5217 3.6802 \n",
|
||||
"2 688061.SH 20250407 24.45 3.1011 3.1011 \n",
|
||||
"3 600868.SH 20250407 2.79 3.8477 4.1435 \n",
|
||||
"4 605168.SH 20250407 25.98 1.3857 2.8470 \n",
|
||||
"... ... ... ... ... ... \n",
|
||||
"5386 688259.SH 20250407 34.99 5.9799 11.4393 \n",
|
||||
"5387 301316.SZ 20250407 19.20 7.2272 7.9512 \n",
|
||||
"5388 601116.SH 20250407 10.37 2.3317 7.1579 \n",
|
||||
"5389 605016.SH 20250407 17.20 1.4773 3.9134 \n",
|
||||
"5390 600148.SH 20250407 16.07 2.0776 4.5745 \n",
|
||||
"\n",
|
||||
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
|
||||
"0 0.84 103.2927 NaN 0.5851 0.1574 0.1928 0.3084 \n",
|
||||
"1 0.69 71.1750 71.1750 1.7467 11.2902 11.2902 0.1801 \n",
|
||||
"2 2.31 292.8121 NaN 1.1504 6.1795 4.9755 NaN \n",
|
||||
"3 1.16 NaN NaN 2.3425 16.8832 16.0274 0.0000 \n",
|
||||
"4 1.56 10.3735 14.0394 1.9988 1.0366 1.2218 4.5870 \n",
|
||||
"... ... ... ... ... ... ... ... \n",
|
||||
"5386 1.10 66.8795 64.8845 2.6173 5.9119 6.5930 NaN \n",
|
||||
"5387 1.30 94.0750 110.9182 7.1350 5.7094 4.8530 0.4126 \n",
|
||||
"5388 1.78 41.2451 36.3656 1.7811 1.4576 1.4350 1.9286 \n",
|
||||
"5389 1.05 28.7938 22.2858 3.3051 6.4003 4.8254 1.3640 \n",
|
||||
"5390 2.12 3441.4901 274.8323 4.8916 3.2666 3.3043 0.1867 \n",
|
||||
"\n",
|
||||
" dv_ttm total_share float_share free_share total_mv circ_mv \\\n",
|
||||
"0 0.3084 159944.2537 159944.2537 84712.3362 726146.9118 726146.9118 \n",
|
||||
"1 0.1801 45432.2747 45432.2747 31131.0133 378450.8483 378450.8483 \n",
|
||||
"2 NaN 11488.9391 4329.7770 4329.7770 280904.5610 105863.0477 \n",
|
||||
"3 NaN 189814.8679 189814.8679 176264.8506 529583.4814 529583.4814 \n",
|
||||
"4 4.5870 21081.6986 21081.6986 10260.7016 547702.5296 547702.5296 \n",
|
||||
"... ... ... ... ... ... ... \n",
|
||||
"5386 NaN 11170.0000 11170.0000 5839.1660 390838.3000 390838.3000 \n",
|
||||
"5387 0.4126 40400.0000 24282.6503 22071.3403 775680.0000 466226.8858 \n",
|
||||
"5388 1.9286 54767.8400 54767.8400 17840.9208 567942.5008 567942.5008 \n",
|
||||
"5389 1.3640 32308.6400 32308.6400 12196.5716 555708.6080 555708.6080 \n",
|
||||
"5390 0.1867 14151.6450 14151.6450 6427.3300 227416.9352 227416.9352 \n",
|
||||
"\n",
|
||||
" is_st \n",
|
||||
"0 False \n",
|
||||
"1 False \n",
|
||||
"2 False \n",
|
||||
"3 False \n",
|
||||
"4 False \n",
|
||||
"... ... \n",
|
||||
"5386 False \n",
|
||||
"5387 False \n",
|
||||
"5388 False \n",
|
||||
"5389 False \n",
|
||||
"5390 False \n",
|
||||
"\n",
|
||||
"[5391 rows x 19 columns]\n"
|
||||
"任务 20250417 完成\n",
|
||||
"任务 20250418 完成\n",
|
||||
"任务 20250416 完成\n",
|
||||
"任务 20250415 完成\n",
|
||||
"任务 20250411 完成\n",
|
||||
"任务 20250414 完成\n",
|
||||
"任务 20250410 完成\n",
|
||||
"任务 20250409 完成\n",
|
||||
"任务 20250408 完成\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
|
||||
"print(all_daily_data_df)"
|
||||
]
|
||||
"execution_count": 4
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "28cb78d032671b20",
|
||||
"id": "919023c693d7a47a",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-06T15:33:57.104132Z",
|
||||
"start_time": "2025-04-06T15:33:57.095010Z"
|
||||
"end_time": "2025-04-08T13:37:39.072117Z",
|
||||
"start_time": "2025-04-08T13:37:39.062189Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
|
||||
"print(all_daily_data_df)"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
|
||||
"16 000656.SZ 20250407 1.28 0.9982 1.1644 \n",
|
||||
"62 002748.SZ 20250407 7.32 0.5503 1.1888 \n",
|
||||
"114 002490.SZ 20250407 3.49 0.7559 1.3380 \n",
|
||||
"128 300165.SZ 20250407 2.78 4.0431 4.7932 \n",
|
||||
"278 600303.SH 20250407 3.22 1.1873 1.4918 \n",
|
||||
"0 300504.SZ 20250408 12.65 2.5494 4.8465 \n",
|
||||
"1 002223.SZ 20250408 34.24 0.9832 1.6194 \n",
|
||||
"2 002036.SZ 20250408 9.13 7.4710 8.1827 \n",
|
||||
"3 688207.SH 20250408 12.29 4.6144 4.6144 \n",
|
||||
"4 002401.SZ 20250408 13.88 4.9037 9.6159 \n",
|
||||
"... ... ... ... ... ... \n",
|
||||
"5263 002217.SZ 20250407 2.07 0.1251 0.1569 \n",
|
||||
"5267 002808.SZ 20250407 2.99 4.0901 4.7924 \n",
|
||||
"5290 002602.SZ 20250407 6.44 0.2276 0.2634 \n",
|
||||
"5315 002501.SZ 20250407 1.92 1.5653 2.0207 \n",
|
||||
"5375 300376.SZ 20250407 2.96 1.4873 3.4865 \n",
|
||||
"5387 600610.SH 20250408 7.56 18.8004 29.6937 \n",
|
||||
"5388 002215.SZ 20250408 8.84 5.7658 6.7838 \n",
|
||||
"5389 600694.SH 20250408 25.00 3.3101 5.4481 \n",
|
||||
"5390 600121.SH 20250408 3.66 3.0305 6.3012 \n",
|
||||
"5391 873167.BJ 20250408 21.56 7.8805 14.2434 \n",
|
||||
"\n",
|
||||
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
|
||||
"16 0.44 NaN NaN NaN 0.1081 0.1637 0.0000 \n",
|
||||
"62 0.61 96.0467 49.7297 1.3328 0.8402 0.8839 1.3661 \n",
|
||||
"114 0.19 NaN NaN 5.6564 2.0529 2.0529 0.0000 \n",
|
||||
"128 2.22 NaN NaN 0.9988 1.3542 1.4288 0.0000 \n",
|
||||
"278 0.77 NaN NaN 1.4997 1.6142 1.6353 0.0000 \n",
|
||||
"... ... ... ... ... ... ... ... \n",
|
||||
"5263 0.23 NaN NaN NaN 3.3436 10.3100 0.0000 \n",
|
||||
"5267 0.79 NaN NaN 2.5039 5.2047 4.8881 0.6689 \n",
|
||||
"5290 0.20 91.5846 53.4453 1.8455 3.6128 2.5226 0.0000 \n",
|
||||
"5315 0.58 NaN NaN 7.1559 14.2934 20.0240 0.0000 \n",
|
||||
"5375 4.52 12.2436 36.2242 0.9837 1.4380 2.0320 1.6554 \n",
|
||||
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
|
||||
"0 1.56 34.0479 220.6414 1.5349 1.3422 1.7126 1.5892 \n",
|
||||
"1 1.07 14.3268 19.7636 2.8291 4.3058 4.6786 3.5030 \n",
|
||||
"2 2.45 NaN NaN 3.6899 0.9822 0.9210 0.0000 \n",
|
||||
"3 1.61 NaN NaN 1.5605 12.1348 26.4230 NaN \n",
|
||||
"4 1.44 40.4258 40.4258 3.0931 2.8715 2.8715 1.2977 \n",
|
||||
"... ... ... ... ... ... ... ... \n",
|
||||
"5387 1.18 NaN NaN 122.1550 7.3648 7.3648 0.0000 \n",
|
||||
"5388 2.49 37.7118 20.0533 2.2997 2.1570 1.7934 1.7092 \n",
|
||||
"5389 3.51 15.4938 13.3524 0.9057 1.0676 1.1271 3.6364 \n",
|
||||
"5390 1.13 15.7764 15.7764 2.3738 1.0605 1.0605 0.0000 \n",
|
||||
"5391 0.79 33.5290 65.6770 3.2183 7.0572 9.9201 NaN \n",
|
||||
"\n",
|
||||
" dv_ttm total_share float_share free_share total_mv \\\n",
|
||||
"16 NaN 533971.5816 531174.3236 455354.2392 6.834836e+05 \n",
|
||||
"62 1.3661 24000.0000 24000.0000 11108.5000 1.756800e+05 \n",
|
||||
"114 NaN 79784.8400 54161.3625 30599.6625 2.784491e+05 \n",
|
||||
"128 NaN 49551.1725 42053.2110 35472.8422 1.377523e+05 \n",
|
||||
"278 NaN 68360.4211 67560.4211 53770.9211 2.201206e+05 \n",
|
||||
"... ... ... ... ... ... \n",
|
||||
"5263 NaN 747939.8928 568036.4278 453036.0995 1.548236e+06 \n",
|
||||
"5267 0.6689 26880.0000 18638.3713 15907.0731 8.037120e+04 \n",
|
||||
"5290 NaN 745255.6968 687870.8273 594244.1179 4.799447e+06 \n",
|
||||
"5315 NaN 355000.0000 354999.9006 274999.9006 6.816000e+05 \n",
|
||||
"5375 1.6554 232824.0476 232743.4901 99284.6609 6.891592e+05 \n",
|
||||
" dv_ttm total_share float_share free_share total_mv \\\n",
|
||||
"0 1.5892 27102.4580 21826.2631 11481.0786 3.428461e+05 \n",
|
||||
"1 3.5030 100247.6929 93867.3649 56990.4202 3.432481e+06 \n",
|
||||
"2 NaN 105938.4915 105290.9483 96132.5171 9.672184e+05 \n",
|
||||
"3 NaN 25897.3147 18867.6306 18867.6306 3.182780e+05 \n",
|
||||
"4 1.2977 37166.8440 37136.3940 18937.9540 5.158758e+05 \n",
|
||||
"... ... ... ... ... ... \n",
|
||||
"5387 NaN 107127.4605 70872.6705 44872.6705 8.098836e+05 \n",
|
||||
"5388 1.7092 100519.1310 79400.9515 67486.1454 8.885891e+05 \n",
|
||||
"5389 3.6364 31305.2571 31305.2571 19020.4513 7.826314e+05 \n",
|
||||
"5390 NaN 121841.2038 121841.2038 58597.2758 4.459388e+05 \n",
|
||||
"5391 NaN 7086.1250 4178.1867 2311.6822 1.527769e+05 \n",
|
||||
"\n",
|
||||
" circ_mv is_st \n",
|
||||
"16 6.799031e+05 True \n",
|
||||
"62 1.756800e+05 True \n",
|
||||
"114 1.890232e+05 True \n",
|
||||
"128 1.169079e+05 True \n",
|
||||
"278 2.175446e+05 True \n",
|
||||
"0 2.761022e+05 False \n",
|
||||
"1 3.214019e+06 False \n",
|
||||
"2 9.613064e+05 False \n",
|
||||
"3 2.318832e+05 False \n",
|
||||
"4 5.154531e+05 False \n",
|
||||
"... ... ... \n",
|
||||
"5263 1.175835e+06 True \n",
|
||||
"5267 5.572873e+04 True \n",
|
||||
"5290 4.429888e+06 True \n",
|
||||
"5315 6.815998e+05 True \n",
|
||||
"5375 6.889207e+05 True \n",
|
||||
"5387 5.357974e+05 False \n",
|
||||
"5388 7.019044e+05 False \n",
|
||||
"5389 7.826314e+05 False \n",
|
||||
"5390 4.459388e+05 False \n",
|
||||
"5391 9.008171e+04 False \n",
|
||||
"\n",
|
||||
"[5392 rows x 19 columns]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 5
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "28cb78d032671b20",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-08T13:37:39.103515Z",
|
||||
"start_time": "2025-04-08T13:37:39.093908Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"print(all_daily_data_df[all_daily_data_df['is_st']])"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
|
||||
"20 000488.SZ 20250408 1.74 2.5808 3.5449 \n",
|
||||
"21 603608.SH 20250408 4.20 0.2313 0.3624 \n",
|
||||
"88 603363.SH 20250408 3.35 1.2763 1.4156 \n",
|
||||
"124 000989.SZ 20250408 7.60 2.5216 3.5863 \n",
|
||||
"136 300965.SZ 20250408 36.20 1.9389 2.6640 \n",
|
||||
"... ... ... ... ... ... \n",
|
||||
"5261 603879.SH 20250408 4.13 4.3647 6.8212 \n",
|
||||
"5273 002024.SZ 20250408 1.76 0.5005 1.3623 \n",
|
||||
"5298 603828.SH 20250408 4.43 1.3711 2.7554 \n",
|
||||
"5337 600234.SH 20250408 5.53 0.5518 1.0422 \n",
|
||||
"5370 300536.SZ 20250408 7.99 2.2037 2.7214 \n",
|
||||
"\n",
|
||||
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
|
||||
"20 0.69 NaN NaN 0.5590 0.2252 0.2252 0.0000 \n",
|
||||
"21 0.35 NaN NaN 1.5767 1.3841 1.5604 0.0000 \n",
|
||||
"88 2.09 NaN NaN NaN 0.4481 0.7781 0.0000 \n",
|
||||
"124 1.71 30.0883 30.0883 1.7332 2.7432 2.7432 5.2053 \n",
|
||||
"136 1.27 NaN NaN 1.7736 NaN NaN 0.0829 \n",
|
||||
"... ... ... ... ... ... ... ... \n",
|
||||
"5261 1.67 NaN NaN 5.6207 4.0072 4.0072 0.0000 \n",
|
||||
"5273 1.06 26.7044 26.7044 1.3118 0.2871 0.2871 0.0000 \n",
|
||||
"5298 0.38 NaN NaN 3.5130 1.0396 1.0348 0.0000 \n",
|
||||
"5337 2.28 NaN NaN 3.2963 20.7089 9.4391 0.0000 \n",
|
||||
"5370 0.86 NaN NaN 4.2696 32.8078 24.2873 0.0000 \n",
|
||||
"\n",
|
||||
" dv_ttm total_share float_share free_share total_mv \\\n",
|
||||
"20 NaN 294145.6200 167582.4530 122004.3211 5.118134e+05 \n",
|
||||
"21 NaN 41971.5446 41971.5446 26785.1109 1.762805e+05 \n",
|
||||
"88 NaN 260296.1826 146776.2912 132325.9245 8.719922e+05 \n",
|
||||
"124 5.2053 85594.2012 69415.3353 48807.3173 6.505159e+05 \n",
|
||||
"136 0.0829 6000.0000 2060.9250 1500.0000 2.172000e+05 \n",
|
||||
"... ... ... ... ... ... \n",
|
||||
"5261 NaN 35934.4440 35934.4440 22993.7696 1.484093e+05 \n",
|
||||
"5273 NaN 926476.7618 925444.1318 340007.5385 1.630599e+06 \n",
|
||||
"5298 NaN 59596.0158 59593.9625 29654.2988 2.640103e+05 \n",
|
||||
"5337 NaN 26252.0973 26252.0973 13899.8888 1.451741e+05 \n",
|
||||
"5370 NaN 29328.8133 29325.3240 23747.3240 2.343372e+05 \n",
|
||||
"\n",
|
||||
" circ_mv is_st \n",
|
||||
"20 2.915935e+05 True \n",
|
||||
"21 1.762805e+05 True \n",
|
||||
"88 4.917006e+05 True \n",
|
||||
"124 5.275565e+05 True \n",
|
||||
"136 7.460549e+04 True \n",
|
||||
"... ... ... \n",
|
||||
"5261 1.484093e+05 True \n",
|
||||
"5273 1.628782e+06 True \n",
|
||||
"5298 2.640013e+05 True \n",
|
||||
"5337 1.451741e+05 True \n",
|
||||
"5370 2.343093e+05 True \n",
|
||||
"\n",
|
||||
"[106 rows x 19 columns]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(all_daily_data_df[all_daily_data_df['is_st']])"
|
||||
]
|
||||
"execution_count": 6
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "692b58674b7462c9",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-06T15:33:57.927188Z",
|
||||
"start_time": "2025-04-06T15:33:57.127166Z"
|
||||
"end_time": "2025-04-08T13:37:39.921445Z",
|
||||
"start_time": "2025-04-08T13:37:39.128232Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"# 将数据保存为 HDF5 文件(table 格式)\n",
|
||||
"all_daily_data_df.to_hdf(h5_filename, key='daily_basic', mode='a', format='table', append=True, data_columns=True)\n",
|
||||
"\n",
|
||||
"print(\"所有每日基础数据获取并保存完毕!\")\n"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@@ -377,30 +381,29 @@
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 将数据保存为 HDF5 文件(table 格式)\n",
|
||||
"all_daily_data_df.to_hdf(h5_filename, key='daily_basic', mode='a', format='table', append=True, data_columns=True)\n",
|
||||
"\n",
|
||||
"print(\"所有每日基础数据获取并保存完毕!\")\n"
|
||||
]
|
||||
"execution_count": 7
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "d7a773fc20293477",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-06T15:34:06.721517Z",
|
||||
"start_time": "2025-04-06T15:33:57.951119Z"
|
||||
"end_time": "2025-04-08T13:37:46.393814Z",
|
||||
"start_time": "2025-04-08T13:37:39.941474Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
|
||||
" df = store[key][['ts_code', 'trade_date', 'is_st']]\n",
|
||||
" print(df.info())"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"Index: 8507519 entries, 0 to 5390\n",
|
||||
"Index: 8512911 entries, 0 to 5391\n",
|
||||
"Data columns (total 3 columns):\n",
|
||||
" # Column Dtype \n",
|
||||
"--- ------ ----- \n",
|
||||
@@ -408,16 +411,12 @@
|
||||
" 1 trade_date object\n",
|
||||
" 2 is_st bool \n",
|
||||
"dtypes: bool(1), object(2)\n",
|
||||
"memory usage: 202.8+ MB\n",
|
||||
"memory usage: 203.0+ MB\n",
|
||||
"None\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
|
||||
" df = store[key][['ts_code', 'trade_date', 'is_st']]\n",
|
||||
" print(df.info())"
|
||||
]
|
||||
"execution_count": 8
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -2,52 +2,31 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "b94bb1f2-5332-485e-ae1b-eea01f938106",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-06T15:34:19.686298Z",
|
||||
"start_time": "2025-04-06T15:34:19.679462Z"
|
||||
"end_time": "2025-04-08T13:37:11.623192Z",
|
||||
"start_time": "2025-04-08T13:37:10.611486Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import tushare as ts\n",
|
||||
"\n",
|
||||
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
|
||||
"pro = ts.pro_api()"
|
||||
]
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": 1
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "742c29d453b9bb38",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-06T15:34:29.569406Z",
|
||||
"start_time": "2025-04-06T15:34:19.711970Z"
|
||||
"end_time": "2025-04-08T13:37:32.754262Z",
|
||||
"start_time": "2025-04-08T13:37:11.629198Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"Index: 8343458 entries, 0 to 20511\n",
|
||||
"Data columns (total 2 columns):\n",
|
||||
" # Column Dtype \n",
|
||||
"--- ------ ----- \n",
|
||||
" 0 ts_code object\n",
|
||||
" 1 trade_date object\n",
|
||||
"dtypes: object(2)\n",
|
||||
"memory usage: 191.0+ MB\n",
|
||||
"None\n",
|
||||
"20250403\n",
|
||||
"start_date: 20250407\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import time\n",
|
||||
@@ -66,37 +45,39 @@
|
||||
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
|
||||
"start_date = min(trade_dates)\n",
|
||||
"print(f'start_date: {start_date}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "679ce40e-8d62-4887-970c-e1d8cbdeee6b",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-06T15:34:32.842166Z",
|
||||
"start_time": "2025-04-06T15:34:29.601368Z"
|
||||
},
|
||||
"scrolled": true
|
||||
},
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"任务 20250417 完成\n",
|
||||
"任务 20250418 完成\n",
|
||||
"任务 20250415 完成\n",
|
||||
"任务 20250416 完成\n",
|
||||
"任务 20250414 完成\n",
|
||||
"任务 20250411 完成\n",
|
||||
"任务 20250409 完成\n",
|
||||
"任务 20250410 完成\n",
|
||||
"任务 20250408 完成\n",
|
||||
"任务 20250407 完成\n"
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"Index: 8348584 entries, 0 to 5125\n",
|
||||
"Data columns (total 2 columns):\n",
|
||||
" # Column Dtype \n",
|
||||
"--- ------ ----- \n",
|
||||
" 0 ts_code object\n",
|
||||
" 1 trade_date object\n",
|
||||
"dtypes: object(2)\n",
|
||||
"memory usage: 191.1+ MB\n",
|
||||
"None\n",
|
||||
"20250407\n",
|
||||
"start_date: 20250408\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 2
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "679ce40e-8d62-4887-970c-e1d8cbdeee6b",
|
||||
"metadata": {
|
||||
"scrolled": true,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-08T13:37:34.659267Z",
|
||||
"start_time": "2025-04-08T13:37:33.094502Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
|
||||
"\n",
|
||||
@@ -126,33 +107,59 @@
|
||||
" except Exception as e:\n",
|
||||
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
|
||||
"\n"
|
||||
]
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"任务 20250417 完成\n",
|
||||
"任务 20250418 完成\n",
|
||||
"任务 20250415 完成\n",
|
||||
"任务 20250416 完成\n",
|
||||
"任务 20250414 完成\n",
|
||||
"任务 20250411 完成\n",
|
||||
"任务 20250410 完成\n",
|
||||
"任务 20250409 完成\n",
|
||||
"任务 20250408 完成\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 3
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "9af80516849d4e80",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-06T15:34:32.851075Z",
|
||||
"start_time": "2025-04-06T15:34:32.844866Z"
|
||||
"end_time": "2025-04-08T13:37:34.678164Z",
|
||||
"start_time": "2025-04-08T13:37:34.674804Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n"
|
||||
]
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": 4
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "a2b05187-437f-4053-bc43-bd80d4cf8b0e",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-06T15:34:35.261741Z",
|
||||
"start_time": "2025-04-06T15:34:32.864789Z"
|
||||
"end_time": "2025-04-08T13:37:37.285649Z",
|
||||
"start_time": "2025-04-08T13:37:34.694595Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"\n",
|
||||
"# 将所有数据合并为一个 DataFrame\n",
|
||||
"\n",
|
||||
"# 将数据保存为 HDF5 文件(table 格式)\n",
|
||||
"all_daily_data_df.to_hdf(h5_filename, key='money_flow', mode='a', format='table', append=True, data_columns=True)\n",
|
||||
"\n",
|
||||
"print(\"所有每日基础数据获取并保存完毕!\")"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@@ -162,15 +169,7 @@
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"\n",
|
||||
"# 将所有数据合并为一个 DataFrame\n",
|
||||
"\n",
|
||||
"# 将数据保存为 HDF5 文件(table 格式)\n",
|
||||
"all_daily_data_df.to_hdf(h5_filename, key='money_flow', mode='a', format='table', append=True, data_columns=True)\n",
|
||||
"\n",
|
||||
"print(\"所有每日基础数据获取并保存完毕!\")"
|
||||
]
|
||||
"execution_count": 5
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -2,58 +2,31 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "500802dc-7a20-48b7-a470-a4bae3ec534b",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-06T15:34:34.020485Z",
|
||||
"start_time": "2025-04-06T15:34:33.497731Z"
|
||||
"end_time": "2025-04-08T13:37:12.814092Z",
|
||||
"start_time": "2025-04-08T13:37:11.953133Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import tushare as ts\n",
|
||||
"\n",
|
||||
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
|
||||
"pro = ts.pro_api()"
|
||||
]
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": 1
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "5a84bc9da6d54868",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-06T15:34:46.227924Z",
|
||||
"start_time": "2025-04-06T15:34:34.042810Z"
|
||||
"end_time": "2025-04-08T13:37:35.724923Z",
|
||||
"start_time": "2025-04-08T13:37:12.820096Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ts_code trade_date\n",
|
||||
"4717 600285.SH 20250403\n",
|
||||
"4718 600287.SH 20250403\n",
|
||||
"4719 600288.SH 20250403\n",
|
||||
"4708 600273.SH 20250403\n",
|
||||
"5309 601121.SH 20250403\n",
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"Index: 10301468 entries, 0 to 28272\n",
|
||||
"Data columns (total 2 columns):\n",
|
||||
" # Column Dtype \n",
|
||||
"--- ------ ----- \n",
|
||||
" 0 ts_code object\n",
|
||||
" 1 trade_date object\n",
|
||||
"dtypes: object(2)\n",
|
||||
"memory usage: 235.8+ MB\n",
|
||||
"None\n",
|
||||
"20250403\n",
|
||||
"20250407\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import time\n",
|
||||
@@ -73,37 +46,45 @@
|
||||
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
|
||||
"start_date = min(trade_dates)\n",
|
||||
"print(start_date)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "bb3191de-27a2-4c89-a3b5-32a0d7b9496f",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-06T15:34:48.652346Z",
|
||||
"start_time": "2025-04-06T15:34:46.236695Z"
|
||||
},
|
||||
"scrolled": true
|
||||
},
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"任务 20250417 完成\n",
|
||||
"任务 20250418 完成\n",
|
||||
"任务 20250416 完成\n",
|
||||
"任务 20250415 完成\n",
|
||||
"任务 20250414 完成\n",
|
||||
"任务 20250411 完成\n",
|
||||
"任务 20250410 完成\n",
|
||||
"任务 20250409 完成\n",
|
||||
"任务 20250408 完成\n",
|
||||
"任务 20250407 完成\n"
|
||||
" ts_code trade_date\n",
|
||||
"4721 600284.SH 20250408\n",
|
||||
"4722 600285.SH 20250408\n",
|
||||
"4723 600287.SH 20250408\n",
|
||||
"4712 600272.SH 20250408\n",
|
||||
"5 000008.SZ 20250408\n",
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"Index: 10315620 entries, 0 to 14151\n",
|
||||
"Data columns (total 2 columns):\n",
|
||||
" # Column Dtype \n",
|
||||
"--- ------ ----- \n",
|
||||
" 0 ts_code object\n",
|
||||
" 1 trade_date object\n",
|
||||
"dtypes: object(2)\n",
|
||||
"memory usage: 236.1+ MB\n",
|
||||
"None\n",
|
||||
"20250408\n",
|
||||
"20250409\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 2
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "bb3191de-27a2-4c89-a3b5-32a0d7b9496f",
|
||||
"metadata": {
|
||||
"scrolled": true,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-08T13:37:36.896959Z",
|
||||
"start_time": "2025-04-08T13:37:35.931558Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
|
||||
"\n",
|
||||
@@ -134,58 +115,63 @@
|
||||
" except Exception as e:\n",
|
||||
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "96a81aa5890ea3c3",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-06T15:34:48.680504Z",
|
||||
"start_time": "2025-04-06T15:34:48.665530Z"
|
||||
}
|
||||
},
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[ trade_date ts_code up_limit down_limit\n",
|
||||
"0 20250408 000001.SZ 11.77 9.63\n",
|
||||
"1 20250408 000002.SZ 7.26 5.94\n",
|
||||
"2 20250408 000004.SZ 9.72 7.96\n",
|
||||
"3 20250408 000006.SZ 6.90 5.64\n",
|
||||
"4 20250408 000007.SZ 6.14 5.02\n",
|
||||
"... ... ... ... ...\n",
|
||||
"7072 20250408 920108.BJ 25.33 13.65\n",
|
||||
"7073 20250408 920111.BJ 29.38 15.82\n",
|
||||
"7074 20250408 920116.BJ 96.40 51.92\n",
|
||||
"7075 20250408 920118.BJ 30.16 16.24\n",
|
||||
"7076 20250408 920128.BJ 34.15 18.39\n",
|
||||
"\n",
|
||||
"[7077 rows x 4 columns], trade_date ts_code up_limit down_limit\n",
|
||||
"0 20250407 000001.SZ 12.47 10.21\n",
|
||||
"1 20250407 000002.SZ 7.85 6.43\n",
|
||||
"2 20250407 000004.SZ 10.80 8.84\n",
|
||||
"3 20250407 000006.SZ 7.67 6.27\n",
|
||||
"4 20250407 000007.SZ 6.82 5.58\n",
|
||||
"... ... ... ... ...\n",
|
||||
"7070 20250407 920108.BJ 31.72 17.08\n",
|
||||
"7071 20250407 920111.BJ 36.85 19.85\n",
|
||||
"7072 20250407 920116.BJ 116.05 62.49\n",
|
||||
"7073 20250407 920118.BJ 38.07 20.51\n",
|
||||
"7074 20250407 920128.BJ 43.38 23.36\n",
|
||||
"\n",
|
||||
"[7075 rows x 4 columns]]\n"
|
||||
"任务 20250418 完成\n",
|
||||
"任务 20250417 完成\n",
|
||||
"任务 20250416 完成\n",
|
||||
"任务 20250415 完成\n",
|
||||
"任务 20250414 完成\n",
|
||||
"任务 20250411 完成\n",
|
||||
"任务 20250409 完成\n",
|
||||
"任务 20250410 完成\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 3
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "96a81aa5890ea3c3",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-08T13:37:37.699901Z",
|
||||
"start_time": "2025-04-08T13:37:36.909744Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"print(all_daily_data)\n",
|
||||
"# 将所有数据合并为一个 DataFrame\n",
|
||||
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)"
|
||||
]
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"ename": "ValueError",
|
||||
"evalue": "No objects to concatenate",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
|
||||
"\u001B[1;31mValueError\u001B[0m Traceback (most recent call last)",
|
||||
"Cell \u001B[1;32mIn[4], line 3\u001B[0m\n\u001B[0;32m 1\u001B[0m \u001B[38;5;28mprint\u001B[39m(all_daily_data)\n\u001B[0;32m 2\u001B[0m \u001B[38;5;66;03m# 将所有数据合并为一个 DataFrame\u001B[39;00m\n\u001B[1;32m----> 3\u001B[0m all_daily_data_df \u001B[38;5;241m=\u001B[39m pd\u001B[38;5;241m.\u001B[39mconcat(all_daily_data, ignore_index\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m)\n",
|
||||
"File \u001B[1;32mE:\\Python\\anaconda\\envs\\new_trader\\Lib\\site-packages\\pandas\\core\\reshape\\concat.py:382\u001B[0m, in \u001B[0;36mconcat\u001B[1;34m(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy)\u001B[0m\n\u001B[0;32m 379\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m copy \u001B[38;5;129;01mand\u001B[39;00m using_copy_on_write():\n\u001B[0;32m 380\u001B[0m copy \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mFalse\u001B[39;00m\n\u001B[1;32m--> 382\u001B[0m op \u001B[38;5;241m=\u001B[39m _Concatenator(\n\u001B[0;32m 383\u001B[0m objs,\n\u001B[0;32m 384\u001B[0m axis\u001B[38;5;241m=\u001B[39maxis,\n\u001B[0;32m 385\u001B[0m ignore_index\u001B[38;5;241m=\u001B[39mignore_index,\n\u001B[0;32m 386\u001B[0m join\u001B[38;5;241m=\u001B[39mjoin,\n\u001B[0;32m 387\u001B[0m keys\u001B[38;5;241m=\u001B[39mkeys,\n\u001B[0;32m 388\u001B[0m levels\u001B[38;5;241m=\u001B[39mlevels,\n\u001B[0;32m 389\u001B[0m names\u001B[38;5;241m=\u001B[39mnames,\n\u001B[0;32m 390\u001B[0m verify_integrity\u001B[38;5;241m=\u001B[39mverify_integrity,\n\u001B[0;32m 391\u001B[0m copy\u001B[38;5;241m=\u001B[39mcopy,\n\u001B[0;32m 392\u001B[0m sort\u001B[38;5;241m=\u001B[39msort,\n\u001B[0;32m 393\u001B[0m )\n\u001B[0;32m 395\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m op\u001B[38;5;241m.\u001B[39mget_result()\n",
|
||||
"File \u001B[1;32mE:\\Python\\anaconda\\envs\\new_trader\\Lib\\site-packages\\pandas\\core\\reshape\\concat.py:445\u001B[0m, in \u001B[0;36m_Concatenator.__init__\u001B[1;34m(self, objs, axis, join, keys, levels, names, ignore_index, verify_integrity, copy, sort)\u001B[0m\n\u001B[0;32m 442\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mverify_integrity \u001B[38;5;241m=\u001B[39m verify_integrity\n\u001B[0;32m 443\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcopy \u001B[38;5;241m=\u001B[39m copy\n\u001B[1;32m--> 445\u001B[0m objs, keys \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_clean_keys_and_objs(objs, keys)\n\u001B[0;32m 447\u001B[0m \u001B[38;5;66;03m# figure out what our result ndim is going to be\u001B[39;00m\n\u001B[0;32m 448\u001B[0m ndims \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_get_ndims(objs)\n",
|
||||
"File \u001B[1;32mE:\\Python\\anaconda\\envs\\new_trader\\Lib\\site-packages\\pandas\\core\\reshape\\concat.py:507\u001B[0m, in \u001B[0;36m_Concatenator._clean_keys_and_objs\u001B[1;34m(self, objs, keys)\u001B[0m\n\u001B[0;32m 504\u001B[0m objs_list \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mlist\u001B[39m(objs)\n\u001B[0;32m 506\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mlen\u001B[39m(objs_list) \u001B[38;5;241m==\u001B[39m \u001B[38;5;241m0\u001B[39m:\n\u001B[1;32m--> 507\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mNo objects to concatenate\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[0;32m 509\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m keys \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[0;32m 510\u001B[0m objs_list \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mlist\u001B[39m(com\u001B[38;5;241m.\u001B[39mnot_none(\u001B[38;5;241m*\u001B[39mobjs_list))\n",
|
||||
"\u001B[1;31mValueError\u001B[0m: No objects to concatenate"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 4
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
@@ -193,7 +179,7 @@
|
||||
"id": "ad9733a1-2f42-43ee-a98c-0bf699304c21",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-06T15:34:48.966102Z",
|
||||
"end_time": "2025-04-08T13:37:37.748574900Z",
|
||||
"start_time": "2025-04-06T15:34:48.693158Z"
|
||||
}
|
||||
},
|
||||
@@ -221,7 +207,7 @@
|
||||
"id": "7e777f1f-4d54-4a74-b916-691ede6af055",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-06T15:34:48.980659Z",
|
||||
"end_time": "2025-04-08T13:37:37.762102Z",
|
||||
"start_time": "2025-04-06T15:34:48.977771Z"
|
||||
}
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user