Classify2

This commit is contained in:
liaozhaorun
2025-05-06 23:42:40 +08:00
parent 721e72c599
commit b783a6f968
19 changed files with 9390 additions and 2774 deletions

View File

@@ -2,6 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "500802dc-7a20-48b7-a470-a4bae3ec534b",
"metadata": {
"ExecuteTime": {
@@ -9,17 +10,17 @@
"start_time": "2025-04-09T14:57:40.584930Z"
}
},
"outputs": [],
"source": [
"import tushare as ts\n",
"\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
],
"outputs": [],
"execution_count": 1
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "5a84bc9da6d54868",
"metadata": {
"ExecuteTime": {
@@ -27,6 +28,32 @@
"start_time": "2025-04-09T14:57:41.540345Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"4745 600276.SH 20250506\n",
"4746 600278.SH 20250506\n",
"4747 600279.SH 20250506\n",
"4736 600262.SH 20250506\n",
"281 000791.SZ 20250506\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 10436295 entries, 0 to 113592\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 238.9+ MB\n",
"None\n",
"20250506\n",
"20250507\n"
]
}
],
"source": [
"import pandas as pd\n",
"import time\n",
@@ -41,50 +68,84 @@
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250720')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(start_date)"
],
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "bb3191de-27a2-4c89-a3b5-32a0d7b9496f",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:09.342522Z",
"start_time": "2025-04-09T14:58:05.259974Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"4721 600284.SH 20250408\n",
"4722 600285.SH 20250408\n",
"4723 600287.SH 20250408\n",
"4712 600272.SH 20250408\n",
"5 000008.SZ 20250408\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 10315620 entries, 0 to 14151\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 236.1+ MB\n",
"None\n",
"20250408\n",
"20250409\n"
"任务 20250718 完成\n",
"任务 20250717 完成\n",
"任务 20250715 完成\n",
"任务 20250716 完成\n",
"任务 20250714 完成\n",
"任务 20250711 完成\n",
"任务 20250709 完成\n",
"任务 20250710 完成\n",
"任务 20250708 完成\n",
"任务 20250707 完成\n",
"任务 20250703 完成\n",
"任务 20250704 完成\n",
"任务 20250701 完成\n",
"任务 20250702 完成\n",
"任务 20250630 完成\n",
"任务 20250627 完成\n",
"任务 20250626 完成\n",
"任务 20250625 完成\n",
"任务 20250624 完成\n",
"任务 20250623 完成\n",
"任务 20250620 完成\n",
"任务 20250619 完成\n",
"任务 20250618 完成\n",
"任务 20250617 完成\n",
"任务 20250616 完成\n",
"任务 20250613 完成\n",
"任务 20250612 完成\n",
"任务 20250611 完成\n",
"任务 20250610 完成\n",
"任务 20250609 完成\n",
"任务 20250606 完成\n",
"任务 20250605 完成\n",
"任务 20250604 完成\n",
"任务 20250603 完成\n",
"任务 20250530 完成\n",
"任务 20250529 完成\n",
"任务 20250528 完成\n",
"任务 20250527 完成\n",
"任务 20250526 完成\n",
"任务 20250523 完成\n",
"任务 20250522 完成\n",
"任务 20250521 完成\n",
"任务 20250520 完成\n",
"任务 20250519 完成\n",
"任务 20250516 完成\n",
"任务 20250515 完成\n",
"任务 20250514 完成\n",
"任务 20250513 完成\n",
"任务 20250512 完成\n",
"任务 20250509 完成\n",
"任务 20250508 完成\n",
"任务 20250507 完成\n"
]
}
],
"execution_count": 2
},
{
"cell_type": "code",
"id": "bb3191de-27a2-4c89-a3b5-32a0d7b9496f",
"metadata": {
"scrolled": true,
"ExecuteTime": {
"end_time": "2025-04-09T14:58:09.342522Z",
"start_time": "2025-04-09T14:58:05.259974Z"
}
},
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
@@ -115,27 +176,11 @@
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250417 完成\n",
"任务 20250418 完成\n",
"任务 20250416 完成\n",
"任务 20250415 完成\n",
"任务 20250414 完成\n",
"任务 20250410 完成\n",
"任务 20250409 完成\n",
"任务 20250411 完成\n"
]
}
],
"execution_count": 3
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "96a81aa5890ea3c3",
"metadata": {
"ExecuteTime": {
@@ -143,37 +188,38 @@
"start_time": "2025-04-09T14:58:09.346528Z"
}
},
"source": [
"print(all_daily_data)\n",
"# 将所有数据合并为一个 DataFrame\n",
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ trade_date ts_code up_limit down_limit\n",
"0 20250409 000001.SZ 11.90 9.74\n",
"1 20250409 000002.SZ 7.48 6.12\n",
"2 20250409 000004.SZ 9.53 7.79\n",
"3 20250409 000006.SZ 6.28 5.14\n",
"4 20250409 000007.SZ 5.91 4.83\n",
"... ... ... ... ...\n",
"7077 20250409 920108.BJ 26.55 14.31\n",
"7078 20250409 920111.BJ 30.84 16.62\n",
"7079 20250409 920116.BJ 100.29 54.01\n",
"7080 20250409 920118.BJ 31.62 17.04\n",
"7081 20250409 920128.BJ 35.26 19.00\n",
"\n",
"[7082 rows x 4 columns]]\n"
"[]\n"
]
},
{
"ename": "ValueError",
"evalue": "No objects to concatenate",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[4], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28mprint\u001b[39m(all_daily_data)\n\u001b[0;32m 2\u001b[0m \u001b[38;5;66;03m# 将所有数据合并为一个 DataFrame\u001b[39;00m\n\u001b[1;32m----> 3\u001b[0m all_daily_data_df \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mconcat(all_daily_data, ignore_index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
"File \u001b[1;32me:\\Python\\anaconda\\envs\\new_trader\\Lib\\site-packages\\pandas\\core\\reshape\\concat.py:382\u001b[0m, in \u001b[0;36mconcat\u001b[1;34m(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy)\u001b[0m\n\u001b[0;32m 379\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m copy \u001b[38;5;129;01mand\u001b[39;00m using_copy_on_write():\n\u001b[0;32m 380\u001b[0m copy \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m--> 382\u001b[0m op \u001b[38;5;241m=\u001b[39m _Concatenator(\n\u001b[0;32m 383\u001b[0m objs,\n\u001b[0;32m 384\u001b[0m axis\u001b[38;5;241m=\u001b[39maxis,\n\u001b[0;32m 385\u001b[0m ignore_index\u001b[38;5;241m=\u001b[39mignore_index,\n\u001b[0;32m 386\u001b[0m join\u001b[38;5;241m=\u001b[39mjoin,\n\u001b[0;32m 387\u001b[0m keys\u001b[38;5;241m=\u001b[39mkeys,\n\u001b[0;32m 388\u001b[0m levels\u001b[38;5;241m=\u001b[39mlevels,\n\u001b[0;32m 389\u001b[0m names\u001b[38;5;241m=\u001b[39mnames,\n\u001b[0;32m 390\u001b[0m verify_integrity\u001b[38;5;241m=\u001b[39mverify_integrity,\n\u001b[0;32m 391\u001b[0m copy\u001b[38;5;241m=\u001b[39mcopy,\n\u001b[0;32m 392\u001b[0m sort\u001b[38;5;241m=\u001b[39msort,\n\u001b[0;32m 393\u001b[0m )\n\u001b[0;32m 395\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m op\u001b[38;5;241m.\u001b[39mget_result()\n",
"File \u001b[1;32me:\\Python\\anaconda\\envs\\new_trader\\Lib\\site-packages\\pandas\\core\\reshape\\concat.py:445\u001b[0m, in \u001b[0;36m_Concatenator.__init__\u001b[1;34m(self, objs, axis, join, keys, levels, names, ignore_index, verify_integrity, copy, sort)\u001b[0m\n\u001b[0;32m 442\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mverify_integrity \u001b[38;5;241m=\u001b[39m verify_integrity\n\u001b[0;32m 443\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcopy \u001b[38;5;241m=\u001b[39m copy\n\u001b[1;32m--> 445\u001b[0m objs, keys \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_clean_keys_and_objs(objs, keys)\n\u001b[0;32m 447\u001b[0m \u001b[38;5;66;03m# figure out what our result ndim is going to be\u001b[39;00m\n\u001b[0;32m 448\u001b[0m ndims \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_ndims(objs)\n",
"File \u001b[1;32me:\\Python\\anaconda\\envs\\new_trader\\Lib\\site-packages\\pandas\\core\\reshape\\concat.py:507\u001b[0m, in \u001b[0;36m_Concatenator._clean_keys_and_objs\u001b[1;34m(self, objs, keys)\u001b[0m\n\u001b[0;32m 504\u001b[0m objs_list \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(objs)\n\u001b[0;32m 506\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(objs_list) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m--> 507\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo objects to concatenate\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 509\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m keys \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 510\u001b[0m objs_list \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(com\u001b[38;5;241m.\u001b[39mnot_none(\u001b[38;5;241m*\u001b[39mobjs_list))\n",
"\u001b[1;31mValueError\u001b[0m: No objects to concatenate"
]
}
],
"execution_count": 4
"source": [
"print(all_daily_data)\n",
"# 将所有数据合并为一个 DataFrame\n",
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ad9733a1-2f42-43ee-a98c-0bf699304c21",
"metadata": {
"ExecuteTime": {
@@ -181,14 +227,6 @@
"start_time": "2025-04-09T14:58:09.366441Z"
}
},
"source": [
"\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='stk_limit', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
],
"outputs": [
{
"name": "stdout",
@@ -198,10 +236,18 @@
]
}
],
"execution_count": 5
"source": [
"\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='stk_limit', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7e777f1f-4d54-4a74-b916-691ede6af055",
"metadata": {
"ExecuteTime": {
@@ -209,14 +255,13 @@
"start_time": "2025-04-09T14:58:09.686524Z"
}
},
"source": [],
"outputs": [],
"execution_count": null
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "new_trader",
"language": "python",
"name": "python3"
},