Classify2

This commit is contained in:
liaozhaorun
2025-05-06 23:42:40 +08:00
parent 721e72c599
commit b783a6f968
19 changed files with 9390 additions and 2774 deletions

View File

@@ -2,6 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "f74ce078-f7e8-4733-a14c-14d8815a3626",
"metadata": {
"ExecuteTime": {
@@ -9,16 +10,16 @@
"start_time": "2025-04-09T14:57:33.903794Z"
}
},
"outputs": [],
"source": [
"import tushare as ts\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
],
"outputs": [],
"execution_count": 1
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "44dd8d87-e60b-49e5-aed9-efaa7f92d4fe",
"metadata": {
"ExecuteTime": {
@@ -26,6 +27,30 @@
"start_time": "2025-04-09T14:57:34.666469Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"0 000001.SZ 20250312\n",
"1 000002.SZ 20250312\n",
"2 000004.SZ 20250312\n",
"3 000006.SZ 20250312\n",
"4 000007.SZ 20250312\n",
"... ... ...\n",
"43070 920108.BJ 20250421\n",
"43071 920111.BJ 20250421\n",
"43072 920116.BJ 20250421\n",
"43073 920118.BJ 20250421\n",
"43074 920128.BJ 20250421\n",
"\n",
"[7648931 rows x 2 columns]\n",
"20250430\n",
"start_date: 20250506\n"
]
}
],
"source": [
"import pandas as pd\n",
"import time\n",
@@ -39,40 +64,16 @@
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250620')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(f'start_date: {start_date}')"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"0 000001.SZ 20250312\n",
"1 000002.SZ 20250312\n",
"2 000004.SZ 20250312\n",
"3 000006.SZ 20250312\n",
"4 000007.SZ 20250312\n",
"... ... ...\n",
"5387 920108.BJ 20250408\n",
"5388 920111.BJ 20250408\n",
"5389 920116.BJ 20250408\n",
"5390 920118.BJ 20250408\n",
"5391 920128.BJ 20250408\n",
"\n",
"[7562721 rows x 2 columns]\n",
"20250408\n",
"start_date: 20250409\n"
]
}
],
"execution_count": 2
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "747acc47-0884-4f76-90fb-276f6494e31d",
"metadata": {
"ExecuteTime": {
@@ -80,6 +81,47 @@
"start_time": "2025-04-09T14:57:42.232250Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250619 完成\n",
"任务 20250620 完成\n",
"任务 20250618 完成\n",
"任务 20250617 完成\n",
"任务 20250613 完成\n",
"任务 20250616 完成\n",
"任务 20250611 完成\n",
"任务 20250612 完成\n",
"任务 20250610 完成\n",
"任务 20250609 完成\n",
"任务 20250606 完成\n",
"任务 20250605 完成\n",
"任务 20250604 完成\n",
"任务 20250603 完成\n",
"任务 20250529 完成\n",
"任务 20250530 完成\n",
"任务 20250528 完成\n",
"任务 20250527 完成\n",
"任务 20250526 完成\n",
"任务 20250523 完成\n",
"任务 20250522 完成\n",
"任务 20250521 完成\n",
"任务 20250519 完成\n",
"任务 20250520 完成\n",
"任务 20250516 完成\n",
"任务 20250515 完成\n",
"任务 20250514 完成\n",
"任务 20250513 完成\n",
"任务 20250512 完成\n",
"任务 20250509 完成\n",
"任务 20250508 完成\n",
"任务 20250507 完成\n",
"任务 20250506 完成\n"
]
}
],
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
@@ -109,27 +151,11 @@
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250418 完成\n",
"任务 20250417 完成\n",
"任务 20250416 完成\n",
"任务 20250414 完成\n",
"任务 20250415 完成\n",
"任务 20250411 完成\n",
"任务 20250410 完成\n",
"任务 20250409 完成\n"
]
}
],
"execution_count": 3
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "c6765638-481f-40d8-a259-2e7b25362618",
"metadata": {
"ExecuteTime": {
@@ -137,16 +163,6 @@
"start_time": "2025-04-09T14:57:45.698824Z"
}
},
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
"\n",
"# 将所有数据合并为一个 DataFrame\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key=key, mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
],
"outputs": [
{
"name": "stdout",
@@ -156,12 +172,21 @@
]
}
],
"execution_count": 4
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
"\n",
"# 将所有数据合并为一个 DataFrame\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key=key, mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "new_trader",
"language": "python",
"name": "python3"
},