RollingRank赚钱- Sharp-1.43

This commit is contained in:
liaozhaorun
2025-04-28 11:02:52 +08:00
parent 94cd9aa6c8
commit 9e598d4ed0
93 changed files with 18134 additions and 4342 deletions

441
main/data/cyq_perf.ipynb Normal file
View File

@@ -0,0 +1,441 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "initial_id",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-12T15:31:25.004019Z",
"start_time": "2025-03-12T15:31:24.322440Z"
}
},
"outputs": [],
"source": [
"from operator import index\n",
"\n",
"import tushare as ts\n",
"import pandas as pd\n",
"import time\n",
"\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "972a5ac9f79fe373",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-12T15:31:40.917015Z",
"start_time": "2025-03-12T15:31:35.958771Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date his_low his_high cost_5pct cost_15pct \\\n",
"0 000001.SZ 20180104 0.2 12.7 7.2 7.9 \n",
"1 000002.SZ 20180104 0.3 31.8 14.1 15.6 \n",
"2 000004.SZ 20180104 0.8 53.2 21.6 22.0 \n",
"3 000008.SZ 20180104 0.1 13.9 7.2 7.8 \n",
"4 000009.SZ 20180104 0.3 15.0 5.9 5.9 \n",
"... ... ... ... ... ... ... \n",
"3091 603991.SH 20180104 12.0 67.8 26.4 27.0 \n",
"3092 603993.SH 20180104 1.5 8.1 5.6 5.8 \n",
"3093 603997.SH 20180104 5.4 31.5 9.9 10.2 \n",
"3094 603998.SH 20180104 3.9 18.9 9.8 10.1 \n",
"3095 603999.SH 20180104 5.4 30.9 6.9 7.2 \n",
"\n",
" cost_50pct cost_85pct cost_95pct weight_avg winner_rate \n",
"0 10.6 11.3 11.9 9.93 71.97 \n",
"1 20.1 23.1 24.3 19.62 99.34 \n",
"2 23.6 27.6 29.6 24.71 45.41 \n",
"3 8.6 9.2 10.5 8.64 47.04 \n",
"4 6.6 7.6 7.9 6.76 38.14 \n",
"... ... ... ... ... ... \n",
"3091 27.6 30.6 34.2 28.54 57.36 \n",
"3092 6.3 7.1 7.6 6.34 73.50 \n",
"3093 10.5 11.7 11.7 10.84 11.28 \n",
"3094 11.9 13.5 15.7 12.13 17.93 \n",
"3095 7.8 9.6 9.9 8.17 21.83 \n",
"\n",
"[3096 rows x 11 columns]\n"
]
}
],
"source": [
"\n",
"df = pro.cyq_perf(trade_date='20180104')\n",
"print(df)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "1b5a82fbf4e380de",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-12T15:30:20.421604Z",
"start_time": "2025-03-12T15:30:20.224851Z"
}
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import time\n",
"\n",
"h5_filename = '../../../data/sw_daily.h5'\n",
"\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal['cal_date'].tolist()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f448da220816bf98",
"metadata": {
"ExecuteTime": {
"start_time": "2025-03-12T15:30:20.436796Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250418 完成\n",
"任务 20250417 完成\n",
"任务 20250416 完成\n",
"任务 20250415 完成\n",
"任务 20250414 完成\n",
"任务 20250411 完成\n",
"任务 20250410 完成\n",
"任务 20250409 完成\n",
"任务 20250408 完成\n",
"任务 20250407 完成\n",
"任务 20250403 完成\n",
"任务 20250402 完成\n",
"任务 20250401 完成\n",
"任务 20250331 完成\n",
"任务 20250328 完成\n",
"任务 20250327 完成\n",
"任务 20250326 完成\n",
"任务 20250325 完成\n",
"任务 20250324 完成\n",
"任务 20250321 完成\n",
"任务 20250320 完成\n",
"任务 20250319 完成\n",
"任务 20250318 完成\n",
"任务 20250317 完成\n",
"任务 20250314 完成\n",
"任务 20250313 完成\n",
"任务 20250312 完成\n",
"任务 20250311 完成\n",
"任务 20250310 完成\n",
"任务 20250307 完成\n",
"任务 20250306 完成\n",
"任务 20250305 完成\n",
"任务 20250304 完成\n",
"任务 20250303 完成\n",
"任务 20250228 完成\n",
"任务 20250227 完成\n",
"任务 20250226 完成\n",
"任务 20250225 完成\n",
"任务 20250224 完成\n",
"任务 20250221 完成\n",
"任务 20250220 完成\n",
"任务 20250219 完成\n",
"任务 20250218 完成\n",
"任务 20250217 完成\n",
"任务 20250214 完成\n",
"任务 20250213 完成\n",
"任务 20250212 完成\n",
"任务 20250211 完成\n",
"任务 20250210 完成\n",
"任务 20250207 完成\n",
"任务 20250206 完成\n",
"任务 20250205 完成\n",
"任务 20250127 完成\n",
"任务 20250124 完成\n",
"任务 20250123 完成\n",
"任务 20250122 完成\n",
"任务 20250121 完成\n",
"任务 20250120 完成\n",
"任务 20250117 完成\n",
"任务 20250116 完成\n",
"任务 20250115 完成\n",
"任务 20250114 完成\n",
"任务 20250113 完成\n",
"任务 20250110 完成\n",
"任务 20250109 完成\n",
"任务 20250108 完成\n",
"任务 20250107 完成\n",
"任务 20250106 完成\n",
"任务 20250103 完成\n",
"任务 20250102 完成\n",
"任务 20241231 完成\n",
"任务 20241230 完成\n",
"任务 20241227 完成\n",
"任务 20241226 完成\n",
"任务 20241225 完成\n",
"任务 20241224 完成\n",
"任务 20241223 完成\n",
"任务 20241220 完成\n",
"任务 20241219 完成\n",
"任务 20241218 完成\n",
"任务 20241217 完成\n",
"任务 20241216 完成\n",
"任务 20241213 完成\n",
"任务 20241212 完成\n",
"任务 20241211 完成\n",
"任务 20241210 完成\n",
"任务 20241209 完成\n",
"任务 20241206 完成\n",
"任务 20241205 完成\n",
"任务 20241204 完成\n",
"任务 20241203 完成\n",
"任务 20241202 完成\n",
"任务 20241129 完成\n",
"任务 20241128 完成\n",
"任务 20241127 完成\n",
"任务 20241126 完成\n",
"任务 20241125 完成\n",
"任务 20241122 完成\n",
"任务 20241121 完成\n",
"任务 20241120 完成\n",
"任务 20241119 完成\n",
"任务 20241118 完成\n",
"任务 20241115 完成\n",
"任务 20241114 完成\n",
"任务 20241113 完成\n",
"任务 20241112 完成\n",
"任务 20241111 完成\n",
"任务 20241108 完成\n",
"任务 20241107 完成\n",
"任务 20241106 完成\n",
"任务 20241105 完成\n",
"任务 20241104 完成\n",
"任务 20241101 完成\n",
"任务 20241031 完成\n",
"任务 20241030 完成\n",
"任务 20241029 完成\n",
"任务 20241028 完成\n",
"任务 20241025 完成\n",
"任务 20241024 完成\n",
"任务 20241022 完成\n",
"任务 20241023 完成\n",
"任务 20241021 完成\n",
"任务 20241018 完成\n",
"任务 20241017 完成\n",
"任务 20241016 完成\n",
"任务 20241015 完成\n",
"任务 20241014 完成\n",
"任务 20241010 完成\n",
"任务 20241011 完成\n",
"任务 20241009 完成\n",
"任务 20241008 完成\n",
"任务 20240930 完成\n",
"任务 20240927 完成\n",
"任务 20240926 完成\n",
"任务 20240925 完成\n",
"任务 20240924 完成\n",
"任务 20240923 完成\n",
"任务 20240919 完成\n",
"任务 20240920 完成\n",
"任务 20240913 完成\n",
"任务 20240918 完成\n",
"任务 20240911 完成\n",
"任务 20240912 完成\n",
"任务 20240910 完成\n",
"任务 20240909 完成\n",
"任务 20240905 完成\n",
"任务 20240906 完成\n",
"任务 20240904 完成\n",
"任务 20240903 完成\n",
"任务 20240902 完成\n",
"任务 20240830 完成\n",
"任务 20240829 完成\n",
"任务 20240828 完成\n",
"任务 20240827 完成\n",
"任务 20240826 完成\n",
"任务 20240823 完成\n",
"任务 20240822 完成\n",
"任务 20240821 完成\n",
"任务 20240820 完成\n",
"任务 20240819 完成\n",
"任务 20240816 完成\n",
"任务 20240815 完成\n",
"任务 20240814 完成\n",
"任务 20240813 完成\n",
"任务 20240812 完成\n",
"任务 20240809 完成\n",
"任务 20240808 完成\n",
"任务 20240807 完成\n",
"任务 20240806 完成\n",
"任务 20240805 完成\n",
"任务 20240802 完成\n",
"任务 20240801 完成\n",
"任务 20240731 完成\n",
"任务 20240730 完成\n",
"任务 20240729 完成\n",
"任务 20240726 完成\n",
"任务 20240725 完成\n",
"任务 20240724 完成\n",
"任务 20240723 完成\n",
"任务 20240722 完成\n",
"任务 20240719 完成\n",
"任务 20240718 完成\n",
"任务 20240717 完成\n",
"任务 20240716 完成\n",
"任务 20240715 完成\n",
"任务 20240712 完成\n",
"任务 20240711 完成\n",
"任务 20240710 完成\n",
"任务 20240709 完成\n",
"任务 20240708 完成\n",
"任务 20240705 完成\n",
"任务 20240704 完成\n",
"任务 20240703 完成\n",
"任务 20240702 完成\n",
"任务 20240701 完成\n",
"任务 20240628 完成\n",
"任务 20240627 完成\n",
"任务 20240626 完成\n",
"任务 20240625 完成\n",
"任务 20240624 完成\n",
"任务 20240621 完成\n",
"任务 20240620 完成\n",
"任务 20240619 完成\n",
"任务 20240618 完成\n",
"任务 20240617 完成\n",
"任务 20240614 完成\n",
"任务 20240613 完成\n",
"任务 20240612 完成\n",
"任务 20240611 完成\n",
"任务 20240607 完成\n",
"任务 20240606 完成\n",
"任务 20240605 完成\n",
"任务 20240604 完成\n"
]
}
],
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
"all_daily_data = []\n",
"\n",
"# API 调用计数和时间控制变量\n",
"api_call_count = 0\n",
"batch_start_time = time.time()\n",
"\n",
"\n",
"def get_data(trade_date):\n",
" time.sleep(0.1)\n",
" data = pro.cyq_perf(trade_date=trade_date)\n",
" if data is not None and not data.empty:\n",
" return data\n",
"\n",
"\n",
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
"\n",
" for future in as_completed(future_to_date):\n",
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
" try:\n",
" result = future.result() # 获取任务执行的结果\n",
" all_daily_data.append(result)\n",
" print(f\"任务 {trade_date} 完成\")\n",
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "907f732d3c397bf",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-12T15:31:10.381348500Z",
"start_time": "2025-03-12T15:23:41.345460Z"
}
},
"outputs": [],
"source": [
"\n",
"# 将所有数据合并为一个 DataFrame\n",
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf('../../data/cyq_perf.h5', key='cyq_perf', mode='w', format='table', data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "73e829ac-ff3d-408e-beb3-0b87f5b00b19",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"0 000001.SZ 20250312\n",
"1 000002.SZ 20250312\n",
"2 000004.SZ 20250312\n",
"3 000006.SZ 20250312\n",
"4 000007.SZ 20250312\n",
"... ... ...\n",
"7465732 603991.SH 20180102\n",
"7465733 603993.SH 20180102\n",
"7465734 603997.SH 20180102\n",
"7465735 603998.SH 20180102\n",
"7465736 603999.SH 20180102\n",
"\n",
"[7465737 rows x 2 columns]\n"
]
}
],
"source": [
"h5_filename = '../../data/cyq_perf.h5'\n",
"key = '/cyq_perf'\n",
"max_date = None\n",
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date']]\n",
" print(df)\n",
" max_date = df['trade_date'].min()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

2297
main/data/daily_basic.ipynb Normal file

File diff suppressed because it is too large Load Diff

5556
main/data/daily_data.ipynb Normal file

File diff suppressed because it is too large Load Diff

11
main/data/daily_data.py Normal file
View File

@@ -0,0 +1,11 @@
import tushare as ts
ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')
pro = ts.pro_api()
import pandas as pd
import time
# 读取本地保存的股票列表 CSV 文件(假设文件名为 stocks_data.csv
df = ts.pro_bar(ts_code='000001.SZ', adj='hfq', start_date='20180101')
print(df)

View File

@@ -0,0 +1,148 @@
{
"cells": [
{
"cell_type": "code",
"id": "initial_id",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:57:27.092313Z",
"start_time": "2025-04-09T14:57:26.124592Z"
}
},
"source": [
"from operator import index\n",
"\n",
"import tushare as ts\n",
"import pandas as pd\n",
"import time\n",
"\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
],
"outputs": [],
"execution_count": 1
},
{
"cell_type": "code",
"id": "f448da220816bf98",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:57:37.680808Z",
"start_time": "2025-04-09T14:57:27.392846Z"
}
},
"source": [
"# 定义四个指数\n",
"index_list = ['399300.SH', '000905.SH', '000852.SH', '399006.SZ']\n",
"\n",
"# 获取并存储数据\n",
"all_data = []\n",
"\n",
"for ts_code in index_list:\n",
" df = pro.index_daily(ts_code=ts_code) # 可根据需要设置日期\n",
" df['ts_code'] = ts_code # 添加ts_code列来区分数据\n",
" all_data.append(df)\n",
"\n",
"# 合并所有数据\n",
"final_df = pd.concat(all_data, ignore_index=True)\n",
"\n",
"# 存储到H5文件\n",
"final_df.to_hdf('../../data/index_data.h5', key='index_data', mode='w')\n",
"\n",
"print(\"数据已经成功存储到index_data.h5文件中\")"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"数据已经成功存储到index_data.h5文件中\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\liaozhaorun\\AppData\\Local\\Temp\\ipykernel_15500\\3209233630.py:13: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n",
" final_df = pd.concat(all_data, ignore_index=True)\n"
]
}
],
"execution_count": 2
},
{
"cell_type": "code",
"id": "907f732d3c397bf",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:57:37.730922Z",
"start_time": "2025-04-09T14:57:37.695917Z"
}
},
"source": [
"h5_filename = '../../data/index_data.h5'\n",
"key = '/index_data'\n",
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key]\n",
" print(df)\n"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date close open high low \\\n",
"0 000905.SH 20250409 5439.7716 5249.6841 5465.1449 5135.9655 \n",
"1 000905.SH 20250408 5326.9140 5279.7566 5371.1834 5249.2318 \n",
"2 000905.SH 20250407 5287.0333 5523.9636 5587.8502 5212.6773 \n",
"3 000905.SH 20250403 5845.5045 5842.6167 5906.7057 5817.9662 \n",
"4 000905.SH 20250402 5899.0865 5884.8925 5936.6467 5884.1126 \n",
"... ... ... ... ... ... ... \n",
"13444 399006.SZ 20100607 1069.4680 1005.0280 1075.2250 1001.7020 \n",
"13445 399006.SZ 20100604 1027.6810 989.6810 1027.6810 986.5040 \n",
"13446 399006.SZ 20100603 998.3940 1002.3550 1026.7020 997.7750 \n",
"13447 399006.SZ 20100602 997.1190 967.6090 997.1190 952.6110 \n",
"13448 399006.SZ 20100601 973.2330 986.0150 994.7930 948.1180 \n",
"\n",
" pre_close change pct_chg vol amount \n",
"0 5326.9140 112.8576 2.1186 2.451180e+08 2.882574e+08 \n",
"1 5287.0333 39.8807 0.7543 2.238407e+08 2.618753e+08 \n",
"2 5845.5045 -558.4712 -9.5539 2.365227e+08 2.673974e+08 \n",
"3 5899.0865 -53.5820 -0.9083 1.349386e+08 1.736621e+08 \n",
"4 5892.8502 6.2363 0.1058 1.121600e+08 1.406421e+08 \n",
"... ... ... ... ... ... \n",
"13444 1027.6810 41.7870 4.0661 2.655275e+06 9.106095e+06 \n",
"13445 998.3940 29.2870 2.9334 1.500295e+06 5.269441e+06 \n",
"13446 997.1190 1.2750 0.1279 1.616805e+06 6.240835e+06 \n",
"13447 973.2330 23.8860 2.4543 1.074628e+06 4.001206e+06 \n",
"13448 1000.0000 -26.7670 -2.6767 1.356285e+06 4.924177e+06 \n",
"\n",
"[13449 rows x 11 columns]\n"
]
}
],
"execution_count": 3
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

190
main/data/is_st.ipynb Normal file
View File

@@ -0,0 +1,190 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "17cc645336d4eb18",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-08T16:55:19.819017Z",
"start_time": "2025-02-08T16:55:18.958639Z"
}
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import tushare as ts"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "48ae71ed02d61819",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-08T16:55:27.578361Z",
"start_time": "2025-02-08T16:55:19.882313Z"
}
},
"outputs": [],
"source": [
"daily_basic = pd.read_hdf('../../data/daily_basic.h5', key='daily_basic')\n",
"name_change_df = pd.read_hdf('../../data/name_change.h5', key='name_change')\n",
"name_change_df = name_change_df.drop_duplicates(keep='first')\n",
"\n",
"# 确保 name_change_df 的日期格式正确\n",
"name_change_df['start_date'] = pd.to_datetime(name_change_df['start_date'], format='%Y%m%d')\n",
"name_change_df['end_date'] = pd.to_datetime(name_change_df['end_date'], format='%Y%m%d', errors='coerce')\n",
"name_change_df = name_change_df[name_change_df.name.str.contains('ST')]\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "e6606a96e5728b8",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-08T16:55:27.938078Z",
"start_time": "2025-02-08T16:55:27.584226Z"
}
},
"outputs": [],
"source": [
"name_change_dict = {}\n",
"for ts_code, group in name_change_df.groupby('ts_code'):\n",
" # 只保留 'ST' 和 '*ST' 的记录\n",
" st_data = group[(group['change_reason'] == 'ST') | (group['change_reason'] == '*ST')]\n",
" if not st_data.empty:\n",
" name_change_dict[ts_code] = st_data"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "initial_id",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-08T16:59:20.537632Z",
"start_time": "2025-02-08T16:55:27.971219Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"is st...\n"
]
}
],
"source": [
"from datetime import datetime\n",
"import pandas as pd\n",
"\n",
"\n",
"# 判断股票是否为 ST 的函数\n",
"#stock_code = 'xxxxxx.SH'\n",
"#target_date = '20200830'\n",
"#若为ST返回True否则返回False\n",
"def is_st(name_change_dict, stock_code, target_date):\n",
" target_date = datetime.strptime(target_date, '%Y%m%d')\n",
" if stock_code not in name_change_dict.keys():\n",
" return False\n",
" df = name_change_dict[stock_code]\n",
" for i in range(len(df)):\n",
" sds = df.iloc[i, 2]\n",
" eds = df.iloc[i, 3]\n",
" if eds is None or eds is pd.NaT:\n",
" eds = datetime.now()\n",
" if (target_date - sds).days >= 0 and (target_date - eds).days <= 0:\n",
" return True\n",
" return False\n",
"\n",
"\n",
"print('is st...')\n",
"# 创建一个新的列 is_st判断每只股票是否是 ST\n",
"daily_basic['is_st'] = daily_basic.apply(\n",
" lambda row: is_st(name_change_dict, row['ts_code'], row['trade_date']), axis=1\n",
")\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "c74bc633-fc73-48c2-bb44-0a798d2cf070",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Empty DataFrame\n",
"Columns: [ts_code, trade_date, close, turnover_rate, turnover_rate_f, volume_ratio, pe, pe_ttm, pb, ps, ps_ttm, dv_ratio, dv_ttm, total_share, float_share, free_share, total_mv, circ_mv, is_st]\n",
"Index: []\n"
]
}
],
"source": [
"print(daily_basic[(daily_basic['is_st'] != True) & (daily_basic['is_st'] != False)])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "0464ce15-320c-40d4-b499-2e18bac5910f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date is_st\n",
"0 002512.SZ 20250211 False\n",
"1 600966.SH 20250211 False\n",
"2 600358.SH 20250211 True\n",
"3 002893.SZ 20250211 False\n",
"4 300648.SZ 20250211 False\n"
]
}
],
"source": [
"# 保存结果到新的 HDF5 文件\n",
"daily_basic.to_hdf('../../data/daily_basic.h5', key='daily_basic', mode='w', format='table')\n",
"\n",
"# 输出部分结果\n",
"print(daily_basic[['ts_code', 'trade_date', 'is_st']].head())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "30c882de-3a89-4056-900d-459a3a012af9",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.19"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

273
main/data/kpl_concept.ipynb Normal file
View File

@@ -0,0 +1,273 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "initial_id",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-12T15:28:49.275220Z",
"start_time": "2025-03-12T15:28:48.624632Z"
}
},
"outputs": [],
"source": [
"from operator import index\n",
"\n",
"import tushare as ts\n",
"import pandas as pd\n",
"import time\n",
"\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "972a5ac9f79fe373",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-12T15:28:49.280632Z",
"start_time": "2025-03-12T15:28:49.275220Z"
}
},
"outputs": [],
"source": [
"\n",
"# df = pro.cyq_perf(start_date='20220101', end_date='20220429')\n",
"# print(df)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "f448da220816bf98",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-12T15:39:50.128089Z",
"start_time": "2025-03-12T15:28:49.437760Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"成功获取并保存 20250228 的每日基础数据\n",
"成功获取并保存 20250227 的每日基础数据\n",
"成功获取并保存 20250226 的每日基础数据\n",
"成功获取并保存 20250225 的每日基础数据\n",
"成功获取并保存 20250224 的每日基础数据\n",
"成功获取并保存 20250221 的每日基础数据\n",
"成功获取并保存 20250220 的每日基础数据\n",
"成功获取并保存 20250219 的每日基础数据\n",
"成功获取并保存 20250218 的每日基础数据\n",
"成功获取并保存 20250217 的每日基础数据\n",
"成功获取并保存 20250214 的每日基础数据\n",
"成功获取并保存 20250213 的每日基础数据\n",
"成功获取并保存 20250212 的每日基础数据\n",
"成功获取并保存 20250211 的每日基础数据\n",
"成功获取并保存 20250210 的每日基础数据\n",
"成功获取并保存 20250207 的每日基础数据\n",
"成功获取并保存 20250206 的每日基础数据\n",
"成功获取并保存 20250205 的每日基础数据\n",
"成功获取并保存 20250127 的每日基础数据\n",
"成功获取并保存 20250124 的每日基础数据\n",
"成功获取并保存 20250123 的每日基础数据\n",
"成功获取并保存 20250122 的每日基础数据\n",
"成功获取并保存 20250121 的每日基础数据\n",
"成功获取并保存 20250120 的每日基础数据\n",
"成功获取并保存 20250117 的每日基础数据\n",
"成功获取并保存 20250116 的每日基础数据\n",
"成功获取并保存 20250115 的每日基础数据\n",
"成功获取并保存 20250114 的每日基础数据\n",
"成功获取并保存 20250113 的每日基础数据\n",
"成功获取并保存 20250110 的每日基础数据\n",
"成功获取并保存 20250109 的每日基础数据\n",
"成功获取并保存 20250108 的每日基础数据\n",
"成功获取并保存 20250107 的每日基础数据\n",
"成功获取并保存 20250106 的每日基础数据\n",
"成功获取并保存 20250103 的每日基础数据\n",
"成功获取并保存 20250102 的每日基础数据\n",
"成功获取并保存 20241231 的每日基础数据\n",
"成功获取并保存 20241230 的每日基础数据\n",
"成功获取并保存 20241227 的每日基础数据\n",
"成功获取并保存 20241226 的每日基础数据\n",
"成功获取并保存 20241225 的每日基础数据\n",
"成功获取并保存 20241224 的每日基础数据\n",
"成功获取并保存 20241223 的每日基础数据\n",
"成功获取并保存 20241220 的每日基础数据\n",
"成功获取并保存 20241219 的每日基础数据\n",
"成功获取并保存 20241218 的每日基础数据\n",
"成功获取并保存 20241217 的每日基础数据\n",
"成功获取并保存 20241216 的每日基础数据\n",
"成功获取并保存 20241213 的每日基础数据\n",
"成功获取并保存 20241212 的每日基础数据\n",
"成功获取并保存 20241211 的每日基础数据\n",
"成功获取并保存 20241210 的每日基础数据\n",
"成功获取并保存 20241209 的每日基础数据\n",
"成功获取并保存 20241206 的每日基础数据\n",
"成功获取并保存 20241205 的每日基础数据\n",
"成功获取并保存 20241204 的每日基础数据\n",
"成功获取并保存 20241203 的每日基础数据\n",
"成功获取并保存 20241202 的每日基础数据\n",
"成功获取并保存 20241129 的每日基础数据\n",
"成功获取并保存 20241128 的每日基础数据\n",
"成功获取并保存 20241127 的每日基础数据\n",
"成功获取并保存 20241126 的每日基础数据\n",
"成功获取并保存 20241125 的每日基础数据\n",
"成功获取并保存 20241122 的每日基础数据\n",
"成功获取并保存 20241121 的每日基础数据\n",
"成功获取并保存 20241120 的每日基础数据\n",
"成功获取并保存 20241119 的每日基础数据\n",
"成功获取并保存 20241118 的每日基础数据\n",
"成功获取并保存 20241115 的每日基础数据\n",
"成功获取并保存 20241114 的每日基础数据\n",
"成功获取并保存 20241113 的每日基础数据\n",
"成功获取并保存 20241112 的每日基础数据\n",
"成功获取并保存 20241111 的每日基础数据\n",
"成功获取并保存 20241108 的每日基础数据\n",
"成功获取并保存 20241107 的每日基础数据\n",
"成功获取并保存 20241106 的每日基础数据\n",
"成功获取并保存 20241105 的每日基础数据\n",
"成功获取并保存 20241104 的每日基础数据\n",
"成功获取并保存 20241101 的每日基础数据\n",
"成功获取并保存 20241031 的每日基础数据\n",
"成功获取并保存 20241030 的每日基础数据\n",
"成功获取并保存 20241029 的每日基础数据\n",
"成功获取并保存 20241028 的每日基础数据\n",
"成功获取并保存 20241025 的每日基础数据\n",
"成功获取并保存 20241024 的每日基础数据\n",
"成功获取并保存 20241023 的每日基础数据\n",
"成功获取并保存 20241022 的每日基础数据\n",
"成功获取并保存 20241021 的每日基础数据\n",
"成功获取并保存 20241014 的每日基础数据\n",
"150 1741835004.3988936 1741834982.2357981\n",
"已调用 150 次 API等待 37.84 秒以满足速率限制...\n",
"300 1741835064.0700593 1741835042.2372077\n",
"已调用 150 次 API等待 38.17 秒以满足速率限制...\n",
"450 1741835124.4976892 1741835102.2381623\n",
"已调用 150 次 API等待 37.74 秒以满足速率限制...\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[4], line 22\u001b[0m\n\u001b[0;32m 19\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m trade_date \u001b[38;5;129;01min\u001b[39;00m trade_dates:\n\u001b[0;32m 20\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 21\u001b[0m \u001b[38;5;66;03m# 获取每日基础数据\u001b[39;00m\n\u001b[1;32m---> 22\u001b[0m kpl_concept \u001b[38;5;241m=\u001b[39m pro\u001b[38;5;241m.\u001b[39mkpl_concept(trade_date\u001b[38;5;241m=\u001b[39mtrade_date)\n\u001b[0;32m 23\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m kpl_concept \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m kpl_concept\u001b[38;5;241m.\u001b[39mempty:\n\u001b[0;32m 24\u001b[0m all_daily_data\u001b[38;5;241m.\u001b[39mappend(kpl_concept)\n",
"File \u001b[1;32mE:\\Python\\anaconda\\envs\\new_trader\\Lib\\site-packages\\tushare\\pro\\client.py:41\u001b[0m, in \u001b[0;36mDataApi.query\u001b[1;34m(self, api_name, fields, **kwargs)\u001b[0m\n\u001b[0;32m 33\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mquery\u001b[39m(\u001b[38;5;28mself\u001b[39m, api_name, fields\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m 34\u001b[0m req_params \u001b[38;5;241m=\u001b[39m {\n\u001b[0;32m 35\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mapi_name\u001b[39m\u001b[38;5;124m'\u001b[39m: api_name,\n\u001b[0;32m 36\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtoken\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__token,\n\u001b[0;32m 37\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mparams\u001b[39m\u001b[38;5;124m'\u001b[39m: kwargs,\n\u001b[0;32m 38\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mfields\u001b[39m\u001b[38;5;124m'\u001b[39m: fields\n\u001b[0;32m 39\u001b[0m }\n\u001b[1;32m---> 41\u001b[0m res \u001b[38;5;241m=\u001b[39m requests\u001b[38;5;241m.\u001b[39mpost(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__http_url\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mapi_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, json\u001b[38;5;241m=\u001b[39mreq_params, timeout\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__timeout)\n\u001b[0;32m 42\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m res:\n\u001b[0;32m 43\u001b[0m result \u001b[38;5;241m=\u001b[39m json\u001b[38;5;241m.\u001b[39mloads(res\u001b[38;5;241m.\u001b[39mtext)\n",
"File \u001b[1;32mE:\\Python\\anaconda\\envs\\new_trader\\Lib\\site-packages\\requests\\api.py:115\u001b[0m, in \u001b[0;36mpost\u001b[1;34m(url, data, json, **kwargs)\u001b[0m\n\u001b[0;32m 103\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpost\u001b[39m(url, data\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, json\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m 104\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124;03m\"\"\"Sends a POST request.\u001b[39;00m\n\u001b[0;32m 105\u001b[0m \n\u001b[0;32m 106\u001b[0m \u001b[38;5;124;03m :param url: URL for the new :class:`Request` object.\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 112\u001b[0m \u001b[38;5;124;03m :rtype: requests.Response\u001b[39;00m\n\u001b[0;32m 113\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 115\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m request(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpost\u001b[39m\u001b[38;5;124m\"\u001b[39m, url, data\u001b[38;5;241m=\u001b[39mdata, json\u001b[38;5;241m=\u001b[39mjson, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
"File \u001b[1;32mE:\\Python\\anaconda\\envs\\new_trader\\Lib\\site-packages\\requests\\api.py:59\u001b[0m, in \u001b[0;36mrequest\u001b[1;34m(method, url, **kwargs)\u001b[0m\n\u001b[0;32m 55\u001b[0m \u001b[38;5;66;03m# By using the 'with' statement we are sure the session is closed, thus we\u001b[39;00m\n\u001b[0;32m 56\u001b[0m \u001b[38;5;66;03m# avoid leaving sockets open which can trigger a ResourceWarning in some\u001b[39;00m\n\u001b[0;32m 57\u001b[0m \u001b[38;5;66;03m# cases, and look like a memory leak in others.\u001b[39;00m\n\u001b[0;32m 58\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m sessions\u001b[38;5;241m.\u001b[39mSession() \u001b[38;5;28;01mas\u001b[39;00m session:\n\u001b[1;32m---> 59\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m session\u001b[38;5;241m.\u001b[39mrequest(method\u001b[38;5;241m=\u001b[39mmethod, url\u001b[38;5;241m=\u001b[39murl, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
"File \u001b[1;32mE:\\Python\\anaconda\\envs\\new_trader\\Lib\\site-packages\\requests\\sessions.py:589\u001b[0m, in \u001b[0;36mSession.request\u001b[1;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[0;32m 584\u001b[0m send_kwargs \u001b[38;5;241m=\u001b[39m {\n\u001b[0;32m 585\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtimeout\u001b[39m\u001b[38;5;124m\"\u001b[39m: timeout,\n\u001b[0;32m 586\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mallow_redirects\u001b[39m\u001b[38;5;124m\"\u001b[39m: allow_redirects,\n\u001b[0;32m 587\u001b[0m }\n\u001b[0;32m 588\u001b[0m send_kwargs\u001b[38;5;241m.\u001b[39mupdate(settings)\n\u001b[1;32m--> 589\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msend(prep, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39msend_kwargs)\n\u001b[0;32m 591\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m resp\n",
"File \u001b[1;32mE:\\Python\\anaconda\\envs\\new_trader\\Lib\\site-packages\\requests\\sessions.py:724\u001b[0m, in \u001b[0;36mSession.send\u001b[1;34m(self, request, **kwargs)\u001b[0m\n\u001b[0;32m 721\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m allow_redirects:\n\u001b[0;32m 722\u001b[0m \u001b[38;5;66;03m# Redirect resolving generator.\u001b[39;00m\n\u001b[0;32m 723\u001b[0m gen \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mresolve_redirects(r, request, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m--> 724\u001b[0m history \u001b[38;5;241m=\u001b[39m [resp \u001b[38;5;28;01mfor\u001b[39;00m resp \u001b[38;5;129;01min\u001b[39;00m gen]\n\u001b[0;32m 725\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 726\u001b[0m history \u001b[38;5;241m=\u001b[39m []\n",
"File \u001b[1;32mE:\\Python\\anaconda\\envs\\new_trader\\Lib\\site-packages\\requests\\sessions.py:724\u001b[0m, in \u001b[0;36m<listcomp>\u001b[1;34m(.0)\u001b[0m\n\u001b[0;32m 721\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m allow_redirects:\n\u001b[0;32m 722\u001b[0m \u001b[38;5;66;03m# Redirect resolving generator.\u001b[39;00m\n\u001b[0;32m 723\u001b[0m gen \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mresolve_redirects(r, request, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m--> 724\u001b[0m history \u001b[38;5;241m=\u001b[39m [resp \u001b[38;5;28;01mfor\u001b[39;00m resp \u001b[38;5;129;01min\u001b[39;00m gen]\n\u001b[0;32m 725\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 726\u001b[0m history \u001b[38;5;241m=\u001b[39m []\n",
"File \u001b[1;32mE:\\Python\\anaconda\\envs\\new_trader\\Lib\\site-packages\\requests\\sessions.py:265\u001b[0m, in \u001b[0;36mSessionRedirectMixin.resolve_redirects\u001b[1;34m(self, resp, req, stream, timeout, verify, cert, proxies, yield_requests, **adapter_kwargs)\u001b[0m\n\u001b[0;32m 263\u001b[0m \u001b[38;5;28;01myield\u001b[39;00m req\n\u001b[0;32m 264\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 265\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msend(\n\u001b[0;32m 266\u001b[0m req,\n\u001b[0;32m 267\u001b[0m stream\u001b[38;5;241m=\u001b[39mstream,\n\u001b[0;32m 268\u001b[0m timeout\u001b[38;5;241m=\u001b[39mtimeout,\n\u001b[0;32m 269\u001b[0m verify\u001b[38;5;241m=\u001b[39mverify,\n\u001b[0;32m 270\u001b[0m cert\u001b[38;5;241m=\u001b[39mcert,\n\u001b[0;32m 271\u001b[0m proxies\u001b[38;5;241m=\u001b[39mproxies,\n\u001b[0;32m 272\u001b[0m allow_redirects\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m 273\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39madapter_kwargs,\n\u001b[0;32m 274\u001b[0m )\n\u001b[0;32m 276\u001b[0m extract_cookies_to_jar(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcookies, prepared_request, resp\u001b[38;5;241m.\u001b[39mraw)\n\u001b[0;32m 278\u001b[0m \u001b[38;5;66;03m# extract redirect url, if any, for the next loop\u001b[39;00m\n",
"File \u001b[1;32mE:\\Python\\anaconda\\envs\\new_trader\\Lib\\site-packages\\requests\\sessions.py:703\u001b[0m, in \u001b[0;36mSession.send\u001b[1;34m(self, request, **kwargs)\u001b[0m\n\u001b[0;32m 700\u001b[0m start \u001b[38;5;241m=\u001b[39m preferred_clock()\n\u001b[0;32m 702\u001b[0m \u001b[38;5;66;03m# Send the request\u001b[39;00m\n\u001b[1;32m--> 703\u001b[0m r \u001b[38;5;241m=\u001b[39m adapter\u001b[38;5;241m.\u001b[39msend(request, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 705\u001b[0m \u001b[38;5;66;03m# Total elapsed time of the request (approximately)\u001b[39;00m\n\u001b[0;32m 706\u001b[0m elapsed \u001b[38;5;241m=\u001b[39m preferred_clock() \u001b[38;5;241m-\u001b[39m start\n",
"File \u001b[1;32mE:\\Python\\anaconda\\envs\\new_trader\\Lib\\site-packages\\requests\\adapters.py:667\u001b[0m, in \u001b[0;36mHTTPAdapter.send\u001b[1;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[0;32m 664\u001b[0m timeout \u001b[38;5;241m=\u001b[39m TimeoutSauce(connect\u001b[38;5;241m=\u001b[39mtimeout, read\u001b[38;5;241m=\u001b[39mtimeout)\n\u001b[0;32m 666\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 667\u001b[0m resp \u001b[38;5;241m=\u001b[39m conn\u001b[38;5;241m.\u001b[39murlopen(\n\u001b[0;32m 668\u001b[0m method\u001b[38;5;241m=\u001b[39mrequest\u001b[38;5;241m.\u001b[39mmethod,\n\u001b[0;32m 669\u001b[0m url\u001b[38;5;241m=\u001b[39murl,\n\u001b[0;32m 670\u001b[0m body\u001b[38;5;241m=\u001b[39mrequest\u001b[38;5;241m.\u001b[39mbody,\n\u001b[0;32m 671\u001b[0m headers\u001b[38;5;241m=\u001b[39mrequest\u001b[38;5;241m.\u001b[39mheaders,\n\u001b[0;32m 672\u001b[0m redirect\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m 673\u001b[0m assert_same_host\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m 674\u001b[0m preload_content\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m 675\u001b[0m decode_content\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m 676\u001b[0m retries\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmax_retries,\n\u001b[0;32m 677\u001b[0m timeout\u001b[38;5;241m=\u001b[39mtimeout,\n\u001b[0;32m 678\u001b[0m chunked\u001b[38;5;241m=\u001b[39mchunked,\n\u001b[0;32m 679\u001b[0m )\n\u001b[0;32m 681\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (ProtocolError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[0;32m 682\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m(err, request\u001b[38;5;241m=\u001b[39mrequest)\n",
"File \u001b[1;32mE:\\Python\\anaconda\\envs\\new_trader\\Lib\\site-packages\\urllib3\\connectionpool.py:787\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[1;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)\u001b[0m\n\u001b[0;32m 784\u001b[0m response_conn \u001b[38;5;241m=\u001b[39m conn \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m release_conn \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m 786\u001b[0m \u001b[38;5;66;03m# Make the request on the HTTPConnection object\u001b[39;00m\n\u001b[1;32m--> 787\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_request(\n\u001b[0;32m 788\u001b[0m conn,\n\u001b[0;32m 789\u001b[0m method,\n\u001b[0;32m 790\u001b[0m url,\n\u001b[0;32m 791\u001b[0m timeout\u001b[38;5;241m=\u001b[39mtimeout_obj,\n\u001b[0;32m 792\u001b[0m body\u001b[38;5;241m=\u001b[39mbody,\n\u001b[0;32m 793\u001b[0m headers\u001b[38;5;241m=\u001b[39mheaders,\n\u001b[0;32m 794\u001b[0m chunked\u001b[38;5;241m=\u001b[39mchunked,\n\u001b[0;32m 795\u001b[0m retries\u001b[38;5;241m=\u001b[39mretries,\n\u001b[0;32m 796\u001b[0m response_conn\u001b[38;5;241m=\u001b[39mresponse_conn,\n\u001b[0;32m 797\u001b[0m preload_content\u001b[38;5;241m=\u001b[39mpreload_content,\n\u001b[0;32m 798\u001b[0m decode_content\u001b[38;5;241m=\u001b[39mdecode_content,\n\u001b[0;32m 799\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mresponse_kw,\n\u001b[0;32m 800\u001b[0m )\n\u001b[0;32m 802\u001b[0m \u001b[38;5;66;03m# Everything went great!\u001b[39;00m\n\u001b[0;32m 803\u001b[0m clean_exit \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
"File \u001b[1;32mE:\\Python\\anaconda\\envs\\new_trader\\Lib\\site-packages\\urllib3\\connectionpool.py:534\u001b[0m, in \u001b[0;36mHTTPConnectionPool._make_request\u001b[1;34m(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)\u001b[0m\n\u001b[0;32m 532\u001b[0m \u001b[38;5;66;03m# Receive the response from the server\u001b[39;00m\n\u001b[0;32m 533\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 534\u001b[0m response \u001b[38;5;241m=\u001b[39m conn\u001b[38;5;241m.\u001b[39mgetresponse()\n\u001b[0;32m 535\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (BaseSSLError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m 536\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_raise_timeout(err\u001b[38;5;241m=\u001b[39me, url\u001b[38;5;241m=\u001b[39murl, timeout_value\u001b[38;5;241m=\u001b[39mread_timeout)\n",
"File \u001b[1;32mE:\\Python\\anaconda\\envs\\new_trader\\Lib\\site-packages\\urllib3\\connection.py:516\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 513\u001b[0m _shutdown \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msock, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mshutdown\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[0;32m 515\u001b[0m \u001b[38;5;66;03m# Get the response from http.client.HTTPConnection\u001b[39;00m\n\u001b[1;32m--> 516\u001b[0m httplib_response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39mgetresponse()\n\u001b[0;32m 518\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 519\u001b[0m assert_header_parsing(httplib_response\u001b[38;5;241m.\u001b[39mmsg)\n",
"File \u001b[1;32mE:\\Python\\anaconda\\envs\\new_trader\\Lib\\http\\client.py:1395\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1393\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 1394\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m-> 1395\u001b[0m response\u001b[38;5;241m.\u001b[39mbegin()\n\u001b[0;32m 1396\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m:\n\u001b[0;32m 1397\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclose()\n",
"File \u001b[1;32mE:\\Python\\anaconda\\envs\\new_trader\\Lib\\http\\client.py:325\u001b[0m, in \u001b[0;36mHTTPResponse.begin\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 323\u001b[0m \u001b[38;5;66;03m# read until we get a non-100 response\u001b[39;00m\n\u001b[0;32m 324\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[1;32m--> 325\u001b[0m version, status, reason \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_read_status()\n\u001b[0;32m 326\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m status \u001b[38;5;241m!=\u001b[39m CONTINUE:\n\u001b[0;32m 327\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n",
"File \u001b[1;32mE:\\Python\\anaconda\\envs\\new_trader\\Lib\\http\\client.py:286\u001b[0m, in \u001b[0;36mHTTPResponse._read_status\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 285\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_read_status\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m--> 286\u001b[0m line \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mstr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfp\u001b[38;5;241m.\u001b[39mreadline(_MAXLINE \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m), \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124miso-8859-1\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 287\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(line) \u001b[38;5;241m>\u001b[39m _MAXLINE:\n\u001b[0;32m 288\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m LineTooLong(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstatus line\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
"File \u001b[1;32mE:\\Python\\anaconda\\envs\\new_trader\\Lib\\socket.py:718\u001b[0m, in \u001b[0;36mSocketIO.readinto\u001b[1;34m(self, b)\u001b[0m\n\u001b[0;32m 716\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m 717\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 718\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_sock\u001b[38;5;241m.\u001b[39mrecv_into(b)\n\u001b[0;32m 719\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m timeout:\n\u001b[0;32m 720\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_timeout_occurred \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
"\u001b[1;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
"import tushare as ts\n",
"import pandas as pd\n",
"import time\n",
"\n",
"\n",
"# 获取交易日历\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250301')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal['cal_date'].tolist() # 获取所有交易日期列表\n",
"\n",
"# 使用 HDFStore 存储数据\n",
"all_daily_data = []\n",
"\n",
"# API 调用计数和时间控制变量\n",
"api_call_count = 0\n",
"batch_start_time = time.time()\n",
"\n",
"# 遍历每个交易日期并获取数据\n",
"for trade_date in trade_dates:\n",
" try:\n",
" # 获取每日基础数据\n",
" kpl_concept = pro.kpl_concept(trade_date=trade_date)\n",
" if kpl_concept is not None and not kpl_concept.empty:\n",
" all_daily_data.append(kpl_concept)\n",
" print(f\"成功获取并保存 {trade_date} 的每日基础数据\")\n",
"\n",
" # 计数一次 API 调用\n",
" api_call_count += 1\n",
"\n",
" # 每调用 300 次,检查时间是否少于 1 分钟,如果少于则等待剩余时间\n",
" if api_call_count % 150 == 0:\n",
" print(api_call_count,time.time(), batch_start_time)\n",
" elapsed = time.time() - batch_start_time\n",
" if elapsed < 60:\n",
" sleep_time = 60 - elapsed\n",
" print(f\"已调用 150 次 API等待 {sleep_time:.2f} 秒以满足速率限制...\")\n",
" time.sleep(sleep_time)\n",
" # 重置批次起始时间\n",
" batch_start_time = time.time()\n",
"\n",
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "907f732d3c397bf",
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-12T15:39:50.141920800Z",
"start_time": "2025-03-12T15:23:41.345460Z"
}
},
"outputs": [],
"source": [
"\n",
"# 将所有数据合并为一个 DataFrame\n",
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf('../../data/kpl_concept.h5', key='kpl_concept', mode='w', format='table', data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

2087
main/data/money_flow.ipynb Normal file

File diff suppressed because it is too large Load Diff

5906
main/data/name_change.ipynb Normal file

File diff suppressed because it is too large Load Diff

2109
main/data/stk_limit.ipynb Normal file

File diff suppressed because it is too large Load Diff

1926
main/data/ths_index.ipynb Normal file

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,183 @@
{
"cells": [
{
"cell_type": "code",
"id": "f74ce078-f7e8-4733-a14c-14d8815a3626",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:57:34.662465Z",
"start_time": "2025-04-09T14:57:33.903794Z"
}
},
"source": [
"import tushare as ts\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
],
"outputs": [],
"execution_count": 1
},
{
"cell_type": "code",
"id": "44dd8d87-e60b-49e5-aed9-efaa7f92d4fe",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:57:41.818953Z",
"start_time": "2025-04-09T14:57:34.666469Z"
}
},
"source": [
"import pandas as pd\n",
"import time\n",
"\n",
"h5_filename = '../../../data/cyq_perf.h5'\n",
"key = '/cyq_perf'\n",
"max_date = None\n",
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date']]\n",
" print(df)\n",
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(f'start_date: {start_date}')"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"0 000001.SZ 20250312\n",
"1 000002.SZ 20250312\n",
"2 000004.SZ 20250312\n",
"3 000006.SZ 20250312\n",
"4 000007.SZ 20250312\n",
"... ... ...\n",
"5387 920108.BJ 20250408\n",
"5388 920111.BJ 20250408\n",
"5389 920116.BJ 20250408\n",
"5390 920118.BJ 20250408\n",
"5391 920128.BJ 20250408\n",
"\n",
"[7562721 rows x 2 columns]\n",
"20250408\n",
"start_date: 20250409\n"
]
}
],
"execution_count": 2
},
{
"cell_type": "code",
"id": "747acc47-0884-4f76-90fb-276f6494e31d",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:57:45.660215Z",
"start_time": "2025-04-09T14:57:42.232250Z"
}
},
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
"all_daily_data = []\n",
"\n",
"# API 调用计数和时间控制变量\n",
"api_call_count = 0\n",
"batch_start_time = time.time()\n",
"\n",
"\n",
"def get_data(trade_date):\n",
" time.sleep(0.1)\n",
" data = pro.cyq_perf(trade_date=trade_date)\n",
" if data is not None and not data.empty:\n",
" return data\n",
"\n",
"\n",
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
"\n",
" for future in as_completed(future_to_date):\n",
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
" try:\n",
" result = future.result() # 获取任务执行的结果\n",
" all_daily_data.append(result)\n",
" print(f\"任务 {trade_date} 完成\")\n",
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250418 完成\n",
"任务 20250417 完成\n",
"任务 20250416 完成\n",
"任务 20250414 完成\n",
"任务 20250415 完成\n",
"任务 20250411 完成\n",
"任务 20250410 完成\n",
"任务 20250409 完成\n"
]
}
],
"execution_count": 3
},
{
"cell_type": "code",
"id": "c6765638-481f-40d8-a259-2e7b25362618",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:57:48.970445Z",
"start_time": "2025-04-09T14:57:45.698824Z"
}
},
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
"\n",
"# 将所有数据合并为一个 DataFrame\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key=key, mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"所有每日基础数据获取并保存完毕!\n"
]
}
],
"execution_count": 4
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,194 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "f74ce078-f7e8-4733-a14c-14d8815a3626",
"metadata": {},
"outputs": [],
"source": [
"import tushare as ts\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "44dd8d87-e60b-49e5-aed9-efaa7f92d4fe",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"0 801001.SI 20250221\n",
"1 801002.SI 20250221\n",
"2 801003.SI 20250221\n",
"3 801005.SI 20250221\n",
"4 801010.SI 20250221\n",
"... ... ...\n",
"1044388 857344.SI 20170103\n",
"1044389 857411.SI 20170103\n",
"1044390 857421.SI 20170103\n",
"1044391 857431.SI 20170103\n",
"1044392 858811.SI 20170103\n",
"\n",
"[1044393 rows x 2 columns]\n",
"20250221\n",
"start_date: 20250224\n"
]
}
],
"source": [
"import pandas as pd\n",
"import time\n",
"\n",
"h5_filename = '../../../data/sw_daily.h5'\n",
"key = '/sw_daily'\n",
"max_date = None\n",
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date']]\n",
" print(df)\n",
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(f'start_date: {start_date}')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "747acc47-0884-4f76-90fb-276f6494e31d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250417 完成\n",
"任务 20250418 完成\n",
"任务 20250416 完成\n",
"任务 20250415 完成\n",
"任务 20250411 完成\n",
"任务 20250414 完成\n",
"任务 20250410 完成\n",
"任务 20250409 完成\n",
"任务 20250408 完成\n",
"任务 20250403 完成\n",
"任务 20250407 完成\n",
"任务 20250402 完成\n",
"任务 20250401 完成\n",
"任务 20250331 完成\n",
"任务 20250328 完成\n",
"任务 20250327 完成\n",
"任务 20250326 完成\n",
"任务 20250325 完成\n",
"任务 20250324 完成\n",
"任务 20250321 完成\n",
"任务 20250320 完成\n",
"任务 20250319 完成\n",
"任务 20250317 完成\n",
"任务 20250314 完成\n",
"任务 20250318 完成\n",
"任务 20250313 完成\n",
"任务 20250312 完成\n",
"任务 20250311 完成\n",
"任务 20250310 完成\n",
"任务 20250307 完成\n",
"任务 20250306 完成\n",
"任务 20250305 完成\n",
"任务 20250304 完成\n",
"任务 20250303 完成\n",
"任务 20250228 完成\n",
"任务 20250227 完成\n",
"任务 20250226 完成\n",
"任务 20250225 完成\n",
"任务 20250224 完成\n"
]
}
],
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
"all_daily_data = []\n",
"\n",
"# API 调用计数和时间控制变量\n",
"api_call_count = 0\n",
"batch_start_time = time.time()\n",
"\n",
"index_list = ['399300.SH', '000905.SH', '000852.SH', '399006.SZ']\n",
"def get_data(trade_date):\n",
" time.sleep(0.1)\n",
" data = pro.sw_daily(trade_date=trade_date)\n",
" if data is not None and not data.empty:\n",
" return data\n",
"\n",
"\n",
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
"\n",
" for future in as_completed(future_to_date):\n",
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
" try:\n",
" result = future.result() # 获取任务执行的结果\n",
" all_daily_data.append(result)\n",
" print(f\"任务 {trade_date} 完成\")\n",
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "c6765638-481f-40d8-a259-2e7b25362618",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"所有每日基础数据获取并保存完毕!\n"
]
}
],
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
"\n",
"# 将所有数据合并为一个 DataFrame\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key=key, mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,183 @@
{
"cells": [
{
"cell_type": "code",
"id": "f74ce078-f7e8-4733-a14c-14d8815a3626",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:57:35.618124Z",
"start_time": "2025-04-09T14:57:34.837095Z"
}
},
"source": [
"import tushare as ts\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
],
"outputs": [],
"execution_count": 1
},
{
"cell_type": "code",
"id": "44dd8d87-e60b-49e5-aed9-efaa7f92d4fe",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:57:38.089531Z",
"start_time": "2025-04-09T14:57:35.854308Z"
}
},
"source": [
"import pandas as pd\n",
"import time\n",
"\n",
"h5_filename = '../../../data/sw_daily.h5'\n",
"key = '/sw_daily'\n",
"max_date = None\n",
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date']]\n",
" print(df)\n",
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(f'start_date: {start_date}')"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"0 801001.SI 20250221\n",
"1 801002.SI 20250221\n",
"2 801003.SI 20250221\n",
"3 801005.SI 20250221\n",
"4 801010.SI 20250221\n",
".. ... ...\n",
"434 859811.SI 20250408\n",
"435 859821.SI 20250408\n",
"436 859822.SI 20250408\n",
"437 859852.SI 20250408\n",
"438 859951.SI 20250408\n",
"\n",
"[1058002 rows x 2 columns]\n",
"20250408\n",
"start_date: 20250409\n"
]
}
],
"execution_count": 2
},
{
"cell_type": "code",
"id": "747acc47-0884-4f76-90fb-276f6494e31d",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:57:40.754159Z",
"start_time": "2025-04-09T14:57:38.104541Z"
}
},
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
"all_daily_data = []\n",
"\n",
"# API 调用计数和时间控制变量\n",
"api_call_count = 0\n",
"batch_start_time = time.time()\n",
"\n",
"\n",
"def get_data(trade_date):\n",
" time.sleep(0.1)\n",
" data = pro.sw_daily(trade_date=trade_date)\n",
" if data is not None and not data.empty:\n",
" return data\n",
"\n",
"\n",
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
"\n",
" for future in as_completed(future_to_date):\n",
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
" try:\n",
" result = future.result() # 获取任务执行的结果\n",
" all_daily_data.append(result)\n",
" print(f\"任务 {trade_date} 完成\")\n",
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250417 完成\n",
"任务 20250418 完成\n",
"任务 20250415 完成\n",
"任务 20250416 完成\n",
"任务 20250414 完成\n",
"任务 20250411 完成\n",
"任务 20250410 完成\n",
"任务 20250409 完成\n"
]
}
],
"execution_count": 3
},
{
"cell_type": "code",
"id": "c6765638-481f-40d8-a259-2e7b25362618",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:57:40.994975Z",
"start_time": "2025-04-09T14:57:40.773783Z"
}
},
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
"\n",
"# 将所有数据合并为一个 DataFrame\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key=key, mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"所有每日基础数据获取并保存完毕!\n"
]
}
],
"execution_count": 4
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,442 @@
{
"cells": [
{
"cell_type": "code",
"id": "18d1d622-b083-4cc4-a6f8-7c1ed2d0edd2",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:57:36.913044Z",
"start_time": "2025-04-09T14:57:36.159612Z"
}
},
"source": [
"import tushare as ts\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
],
"outputs": [],
"execution_count": 1
},
{
"cell_type": "code",
"id": "14671a7f72de2564",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:57:39.128278Z",
"start_time": "2025-04-09T14:57:36.918051Z"
}
},
"source": [
"from datetime import datetime\n",
"import pandas as pd\n",
"import warnings\n",
"\n",
"warnings.filterwarnings(\"ignore\")\n",
"def filter_rows(df):\n",
" # 按照 name 和 start_date 分组\n",
" def select_row(group):\n",
" # 如果有 end_date 不为 NaT 的行,优先保留这些行\n",
" valid_rows = group[group['end_date'].notna()]\n",
" if not valid_rows.empty:\n",
" return valid_rows.iloc[0] # 返回第一个有效行\n",
" else:\n",
" return group.iloc[0] # 如果没有有效行,返回第一行\n",
"\n",
" filtered_df = df.groupby(['name', 'start_date'], group_keys=False).apply(select_row)\n",
" filtered_df = filtered_df.reset_index(drop=True)\n",
" return filtered_df\n",
"\n",
"def is_st(name_change_dict, stock_code, target_date):\n",
" target_date = datetime.strptime(target_date, '%Y%m%d')\n",
" if stock_code not in name_change_dict.keys():\n",
" return False\n",
" df = name_change_dict[stock_code]\n",
" for i in range(len(df)):\n",
" sds = df.iloc[i, 2]\n",
" eds = df.iloc[i, 3]\n",
" if eds is None or eds is pd.NaT:\n",
" eds = datetime.now()\n",
" if (target_date - sds).days >= 0 and (target_date - eds).days <= 0:\n",
" return True\n",
" return False\n",
"\n",
"name_change_df = pd.read_hdf('../../../data/name_change.h5', key='name_change')\n",
"name_change_df = name_change_df.drop_duplicates(keep='first')\n",
"\n",
"# 确保 name_change_df 的日期格式正确\n",
"name_change_df['start_date'] = pd.to_datetime(name_change_df['start_date'], format='%Y%m%d')\n",
"name_change_df['end_date'] = pd.to_datetime(name_change_df['end_date'], format='%Y%m%d', errors='coerce')\n",
"name_change_df = name_change_df[name_change_df.name.str.contains('ST')]\n",
"name_change_dict = {}\n",
"for ts_code, group in name_change_df.groupby('ts_code'):\n",
" # 只保留 'ST' 和 '*ST' 的记录\n",
" st_data = group[(group['change_reason'] == 'ST') | (group['change_reason'] == '*ST')]\n",
" if not st_data.empty:\n",
" name_change_dict[ts_code] = filter_rows(st_data)"
],
"outputs": [],
"execution_count": 2
},
{
"cell_type": "code",
"id": "e7f8cce2f80e2f20",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:09.296046Z",
"start_time": "2025-04-09T14:57:39.339423Z"
}
},
"source": [
"import time\n",
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
"h5_filename = '../../../data/daily_basic.h5'\n",
"key = '/daily_basic'\n",
"max_date = None\n",
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date']]\n",
" print(df.info())\n",
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(start_date)"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8512911 entries, 0 to 5391\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 194.8+ MB\n",
"None\n",
"20250408\n",
"20250409\n"
]
}
],
"execution_count": 3
},
{
"cell_type": "code",
"id": "553cfb36-f560-4cc4-b2bc-68323ccc5072",
"metadata": {
"scrolled": true,
"ExecuteTime": {
"end_time": "2025-04-09T14:58:16.817010Z",
"start_time": "2025-04-09T14:58:09.326485Z"
}
},
"source": [
"\n",
"\n",
"# 使用 HDFStore 存储数据\n",
"all_daily_data = []\n",
"\n",
"# API 调用计数和时间控制变量\n",
"api_call_count = 0\n",
"batch_start_time = time.time()\n",
"\n",
"\n",
"def get_data(trade_date):\n",
" daily_basic_data = pro.daily_basic(ts_code='', trade_date=trade_date)\n",
" if daily_basic_data is not None and not daily_basic_data.empty:\n",
" # 添加交易日期列标识\n",
" daily_basic_data['trade_date'] = trade_date\n",
" daily_basic_data['is_st'] = daily_basic_data.apply(\n",
" lambda row: is_st(name_change_dict, row['ts_code'], row['trade_date']), axis=1\n",
" )\n",
" time.sleep(0.2)\n",
" # print(f\"成功获取并保存 {trade_date} 的每日基础数据\")\n",
" return daily_basic_data\n",
"\n",
"\n",
"# 遍历每个交易日期并获取数据\n",
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
"\n",
" for future in as_completed(future_to_date):\n",
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
" try:\n",
" result = future.result() # 获取任务执行的结果\n",
" all_daily_data.append(result)\n",
" print(f\"任务 {trade_date} 完成\")\n",
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
" # 计数一次 API 调用\n",
" api_call_count += 1\n",
"\n",
" # 每调用 300 次,检查时间是否少于 1 分钟,如果少于则等待剩余时间\n",
" if api_call_count % 150 == 0:\n",
" elapsed = time.time() - batch_start_time\n",
" if elapsed < 60:\n",
" sleep_time = 60 - elapsed\n",
" print(f\"已调用 150 次 API等待 {sleep_time:.2f} 秒以满足速率限制...\")\n",
" time.sleep(sleep_time)\n",
" # 重置批次起始时间\n",
" batch_start_time = time.time()\n",
"\n"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250418 完成\n",
"任务 20250417 完成\n",
"任务 20250416 完成\n",
"任务 20250415 完成\n",
"任务 20250414 完成\n",
"任务 20250411 完成\n",
"任务 20250410 完成\n",
"任务 20250409 完成\n"
]
}
],
"execution_count": 4
},
{
"cell_type": "code",
"id": "919023c693d7a47a",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:16.864178Z",
"start_time": "2025-04-09T14:58:16.855084Z"
}
},
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
"print(all_daily_data_df)"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
"0 300285.SZ 20250409 16.61 2.1086 2.2506 \n",
"1 300458.SZ 20250409 44.48 9.9286 11.7046 \n",
"2 605090.SH 20250409 23.81 0.6834 1.1888 \n",
"3 688686.SH 20250409 69.52 1.6005 5.7492 \n",
"4 002057.SZ 20250409 7.18 4.7461 7.1088 \n",
"... ... ... ... ... ... \n",
"5390 301511.SZ 20250409 12.23 3.4040 4.6900 \n",
"5391 688355.SH 20250409 15.84 1.4154 4.4898 \n",
"5392 600019.SH 20250409 6.83 0.4729 1.2898 \n",
"5393 603507.SH 20250409 22.00 30.8936 42.4775 \n",
"5394 600886.SH 20250409 14.58 0.7795 2.4989 \n",
"\n",
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
"0 1.11 29.0985 27.1266 2.5144 4.2913 4.1010 0.6020 \n",
"1 1.54 168.9309 168.9309 9.3966 12.3119 12.3119 0.3364 \n",
"2 1.00 11.8377 9.0427 1.7135 0.5819 0.6421 3.2226 \n",
"3 1.18 43.8690 61.1222 2.9105 9.0031 9.2377 NaN \n",
"4 1.35 19.8304 29.3370 1.7625 1.9656 2.0487 3.2191 \n",
"... ... ... ... ... ... ... ... \n",
"5390 1.36 58.1209 NaN 1.9116 1.1803 1.1129 0.3212 \n",
"5391 1.31 133.9017 29.7427 1.8103 3.6805 3.1067 NaN \n",
"5392 1.28 12.5281 15.7915 0.7518 0.4344 0.4503 4.4796 \n",
"5393 2.89 22.7537 22.7537 1.6401 1.0276 1.0276 1.3553 \n",
"5394 1.04 17.4059 16.1402 1.8424 2.0579 1.9930 3.1604 \n",
"\n",
" dv_ttm total_share float_share free_share total_mv \\\n",
"0 0.6020 9.970483e+04 8.039498e+04 75323.2612 1.656097e+06 \n",
"1 0.3364 6.332851e+04 5.179696e+04 43937.3622 2.816852e+06 \n",
"2 3.2226 6.492580e+04 6.426965e+04 36946.4646 1.545883e+06 \n",
"3 NaN 1.222355e+04 1.222355e+04 3402.7889 8.497809e+05 \n",
"4 3.2191 7.584828e+04 7.501396e+04 50081.8345 5.445906e+05 \n",
"... ... ... ... ... ... \n",
"5390 0.3212 6.303220e+04 3.736720e+04 27120.6014 7.708838e+05 \n",
"5391 NaN 1.239561e+04 1.239561e+04 3907.6756 1.963464e+05 \n",
"5392 4.4796 2.190864e+06 2.178208e+06 798651.6922 1.496360e+07 \n",
"5393 1.3553 1.843013e+04 1.843013e+04 13404.1045 4.054629e+05 \n",
"5394 3.1604 8.004494e+05 7.454180e+05 232532.2636 1.167055e+07 \n",
"\n",
" circ_mv is_st \n",
"0 1.335361e+06 False \n",
"1 2.303929e+06 False \n",
"2 1.530260e+06 False \n",
"3 8.497809e+05 False \n",
"4 5.386002e+05 False \n",
"... ... ... \n",
"5390 4.570009e+05 False \n",
"5391 1.963464e+05 False \n",
"5392 1.487716e+07 False \n",
"5393 4.054629e+05 False \n",
"5394 1.086819e+07 False \n",
"\n",
"[5395 rows x 19 columns]\n"
]
}
],
"execution_count": 5
},
{
"cell_type": "code",
"id": "28cb78d032671b20",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:16.881685Z",
"start_time": "2025-04-09T14:58:16.871184Z"
}
},
"source": [
"print(all_daily_data_df[all_daily_data_df['is_st']])"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
"85 002822.SZ 20250409 3.11 1.8467 1.9219 \n",
"123 603959.SH 20250409 3.27 1.7568 2.2420 \n",
"181 688282.SH 20250409 42.59 2.5546 3.0570 \n",
"259 600777.SH 20250409 2.66 1.9331 2.4597 \n",
"283 002052.SZ 20250409 6.15 1.5326 2.5481 \n",
"... ... ... ... ... ... \n",
"5286 002602.SZ 20250409 5.93 3.0376 3.5162 \n",
"5345 002501.SZ 20250409 1.89 4.3252 5.5834 \n",
"5364 600387.SH 20250409 2.34 0.0904 0.1163 \n",
"5366 002656.SZ 20250409 1.95 2.7047 3.0210 \n",
"5378 300013.SZ 20250409 3.57 2.8370 3.1107 \n",
"\n",
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
"85 2.59 NaN NaN 1.2023 0.5923 0.7314 0.0 \n",
"123 2.22 NaN NaN 4.3282 0.7749 1.1811 0.0 \n",
"181 1.07 NaN NaN 2.9277 172.3150 21.9335 NaN \n",
"259 0.96 6.9694 7.6204 0.8381 2.0443 2.0567 0.0 \n",
"283 0.74 NaN NaN NaN 19.5551 17.1988 0.0 \n",
"... ... ... ... ... ... ... ... \n",
"5286 3.30 84.3318 49.2129 1.6993 3.3267 2.3228 0.0 \n",
"5345 1.75 NaN NaN 7.0441 14.0701 19.7111 0.0 \n",
"5364 1.33 NaN NaN 0.3818 0.5148 0.8454 0.0 \n",
"5366 1.75 NaN NaN 3.8456 4.7986 5.9354 0.0 \n",
"5378 0.90 NaN NaN 8.2438 4.8281 4.2666 0.0 \n",
"\n",
" dv_ttm total_share float_share free_share total_mv \\\n",
"85 NaN 73467.1821 56245.3696 54046.3738 2.284829e+05 \n",
"123 NaN 49029.8992 49029.8992 38419.3842 1.603278e+05 \n",
"181 NaN 8800.0000 3652.0000 3051.8414 3.747920e+05 \n",
"259 NaN 680049.5825 636615.2391 500325.8436 1.808932e+06 \n",
"283 NaN 74595.9694 74595.5944 44867.2806 4.587652e+05 \n",
"... ... ... ... ... ... \n",
"5286 NaN 745255.6968 687870.8273 594244.1179 4.419366e+06 \n",
"5345 NaN 355000.0000 354999.9006 274999.9006 6.709500e+05 \n",
"5364 NaN 46814.4464 40404.8492 31411.4405 1.095458e+05 \n",
"5366 NaN 71251.9844 60945.7555 54564.8212 1.389414e+05 \n",
"5378 NaN 55835.8894 44606.0865 40680.8215 1.993341e+05 \n",
"\n",
" circ_mv is_st \n",
"85 1.749231e+05 True \n",
"123 1.603278e+05 True \n",
"181 1.555387e+05 True \n",
"259 1.693397e+06 True \n",
"283 4.587629e+05 True \n",
"... ... ... \n",
"5286 4.079074e+06 True \n",
"5345 6.709498e+05 True \n",
"5364 9.454735e+04 True \n",
"5366 1.188442e+05 True \n",
"5378 1.592437e+05 True \n",
"\n",
"[106 rows x 19 columns]\n"
]
}
],
"execution_count": 6
},
{
"cell_type": "code",
"id": "692b58674b7462c9",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:17.773453Z",
"start_time": "2025-04-09T14:58:16.903459Z"
}
},
"source": [
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='daily_basic', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")\n"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"所有每日基础数据获取并保存完毕!\n"
]
}
],
"execution_count": 7
},
{
"cell_type": "code",
"id": "d7a773fc20293477",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:24.305403Z",
"start_time": "2025-04-09T14:58:17.816332Z"
}
},
"source": [
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date', 'is_st']]\n",
" print(df.info())"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8518306 entries, 0 to 5394\n",
"Data columns (total 3 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
" 2 is_st bool \n",
"dtypes: bool(1), object(2)\n",
"memory usage: 203.1+ MB\n",
"None\n"
]
}
],
"execution_count": 8
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,149 @@
{
"cells": [
{
"cell_type": "code",
"id": "17cc645336d4eb18",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-08T16:55:19.819017Z",
"start_time": "2025-02-08T16:55:18.958639Z"
}
},
"source": [
"import pandas as pd\n",
"import tushare as ts"
],
"outputs": [],
"execution_count": 1
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-08T16:55:27.578361Z",
"start_time": "2025-02-08T16:55:19.882313Z"
}
},
"cell_type": "code",
"source": [
"daily_basic = pd.read_hdf('../../data/daily_basic.h5', key='daily_basic', columns=['ts_code', 'trade_date '])\n",
"name_change_df = pd.read_hdf('../../data/name_change.h5', key='name_change')\n",
"name_change_df = name_change_df.drop_duplicates(keep='first')\n",
"\n",
"# 确保 name_change_df 的日期格式正确\n",
"name_change_df['start_date'] = pd.to_datetime(name_change_df['start_date'], format='%Y%m%d')\n",
"name_change_df['end_date'] = pd.to_datetime(name_change_df['end_date'], format='%Y%m%d', errors='coerce')\n",
"name_change_df = name_change_df[name_change_df.name.str.contains('ST')]\n"
],
"id": "48ae71ed02d61819",
"outputs": [],
"execution_count": 2
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-08T16:55:27.938078Z",
"start_time": "2025-02-08T16:55:27.584226Z"
}
},
"cell_type": "code",
"source": [
"name_change_dict = {}\n",
"for ts_code, group in name_change_df.groupby('ts_code'):\n",
" # 只保留 'ST' 和 '*ST' 的记录\n",
" st_data = group[(group['change_reason'] == 'ST') | (group['change_reason'] == '*ST')]\n",
" if not st_data.empty:\n",
" name_change_dict[ts_code] = st_data"
],
"id": "e6606a96e5728b8",
"outputs": [],
"execution_count": 3
},
{
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2025-02-08T16:59:20.537632Z",
"start_time": "2025-02-08T16:55:27.971219Z"
}
},
"cell_type": "code",
"source": [
"from datetime import datetime\n",
"import pandas as pd\n",
"\n",
"\n",
"# 判断股票是否为 ST 的函数\n",
"#stock_code = 'xxxxxx.SH'\n",
"#target_date = '20200830'\n",
"#若为ST返回True否则返回False\n",
"def is_st(name_change_dict, stock_code, target_date):\n",
" target_date = datetime.strptime(target_date, '%Y%m%d')\n",
" if stock_code not in name_change_dict.keys():\n",
" return False\n",
" df = name_change_dict[stock_code]\n",
" for i in range(len(df)):\n",
" sds = df.iloc[i, 2]\n",
" eds = df.iloc[i, 3]\n",
" # sd = datetime.strptime(sds, '%Y%m%d')\n",
" if eds == None:\n",
" ed = datetime.now()\n",
" # else:\n",
" # ed = datetime.strptime(eds, '%Y%m%d')\n",
" if (target_date - sds).days >= 0 and (target_date - eds).days <= 0:\n",
" return True\n",
" return False\n",
"\n",
"\n",
"print('is st...')\n",
"# 创建一个新的列 is_st判断每只股票是否是 ST\n",
"daily_basic['is_st'] = daily_basic.apply(\n",
" lambda row: is_st(name_change_dict, row['ts_code'], row['trade_date']), axis=1\n",
")\n",
"\n",
"# 保存结果到新的 HDF5 文件\n",
"daily_basic.to_hdf('../../data/daily_basic_with_st.h5', key='daily_basic_with_st', mode='w', format='table')\n",
"\n",
"# 输出部分结果\n",
"print(daily_basic[['ts_code', 'trade_date', 'is_st']].head())\n"
],
"id": "initial_id",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"is st...\n",
" ts_code trade_date is_st\n",
"0 603429.SH 20250127 False\n",
"1 300917.SZ 20250127 False\n",
"2 301266.SZ 20250127 False\n",
"3 688399.SH 20250127 False\n",
"4 603737.SH 20250127 False\n"
]
}
],
"execution_count": 4
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,195 @@
{
"cells": [
{
"cell_type": "code",
"id": "b94bb1f2-5332-485e-ae1b-eea01f938106",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:57:40.184418Z",
"start_time": "2025-04-09T14:57:39.137312Z"
}
},
"source": [
"import tushare as ts\n",
"\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
],
"outputs": [],
"execution_count": 1
},
{
"cell_type": "code",
"id": "742c29d453b9bb38",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:10.515830Z",
"start_time": "2025-04-09T14:57:40.190466Z"
}
},
"source": [
"import pandas as pd\n",
"import time\n",
"\n",
"h5_filename = '../../../data/money_flow.h5'\n",
"key = '/money_flow'\n",
"max_date = None\n",
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date']]\n",
" print(df.info())\n",
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(f'start_date: {start_date}')"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8353711 entries, 0 to 5126\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 191.2+ MB\n",
"None\n",
"20250408\n",
"start_date: 20250409\n"
]
}
],
"execution_count": 2
},
{
"cell_type": "code",
"id": "679ce40e-8d62-4887-970c-e1d8cbdeee6b",
"metadata": {
"scrolled": true,
"ExecuteTime": {
"end_time": "2025-04-09T14:58:17.197319Z",
"start_time": "2025-04-09T14:58:10.724923Z"
}
},
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
"all_daily_data = []\n",
"\n",
"# API 调用计数和时间控制变量\n",
"api_call_count = 0\n",
"batch_start_time = time.time()\n",
"\n",
"\n",
"def get_data(trade_date):\n",
" time.sleep(0.1)\n",
" money_flow_data = pro.moneyflow(trade_date=trade_date)\n",
" if money_flow_data is not None and not money_flow_data.empty:\n",
" return money_flow_data\n",
"\n",
"\n",
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
"\n",
" for future in as_completed(future_to_date):\n",
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
" try:\n",
" result = future.result() # 获取任务执行的结果\n",
" all_daily_data.append(result)\n",
" print(f\"任务 {trade_date} 完成\")\n",
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250417 完成\n",
"任务 20250418 完成\n",
"任务 20250416 完成\n",
"任务 20250415 完成\n",
"任务 20250411 完成\n",
"任务 20250414 完成\n",
"任务 20250410 完成\n",
"任务 20250409 完成\n"
]
}
],
"execution_count": 3
},
{
"cell_type": "code",
"id": "9af80516849d4e80",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:17.214168Z",
"start_time": "2025-04-09T14:58:17.210734Z"
}
},
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n"
],
"outputs": [],
"execution_count": 4
},
{
"cell_type": "code",
"id": "a2b05187-437f-4053-bc43-bd80d4cf8b0e",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:19.633456Z",
"start_time": "2025-04-09T14:58:17.229837Z"
}
},
"source": [
"\n",
"# 将所有数据合并为一个 DataFrame\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='money_flow', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"所有每日基础数据获取并保存完毕!\n"
]
}
],
"execution_count": 5
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,238 @@
{
"cells": [
{
"cell_type": "code",
"id": "500802dc-7a20-48b7-a470-a4bae3ec534b",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:57:41.532210Z",
"start_time": "2025-04-09T14:57:40.584930Z"
}
},
"source": [
"import tushare as ts\n",
"\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
],
"outputs": [],
"execution_count": 1
},
{
"cell_type": "code",
"id": "5a84bc9da6d54868",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:04.911924Z",
"start_time": "2025-04-09T14:57:41.540345Z"
}
},
"source": [
"import pandas as pd\n",
"import time\n",
"\n",
"h5_filename = '../../../data/stk_limit.h5'\n",
"key = '/stk_limit'\n",
"max_date = None\n",
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date']]\n",
" print(df.sort_values(by='trade_date', ascending=True).tail())\n",
" print(df.info())\n",
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(start_date)"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"4721 600284.SH 20250408\n",
"4722 600285.SH 20250408\n",
"4723 600287.SH 20250408\n",
"4712 600272.SH 20250408\n",
"5 000008.SZ 20250408\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 10315620 entries, 0 to 14151\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 236.1+ MB\n",
"None\n",
"20250408\n",
"20250409\n"
]
}
],
"execution_count": 2
},
{
"cell_type": "code",
"id": "bb3191de-27a2-4c89-a3b5-32a0d7b9496f",
"metadata": {
"scrolled": true,
"ExecuteTime": {
"end_time": "2025-04-09T14:58:09.342522Z",
"start_time": "2025-04-09T14:58:05.259974Z"
}
},
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
"all_daily_data = []\n",
"\n",
"# API 调用计数和时间控制变量\n",
"api_call_count = 0\n",
"batch_start_time = time.time()\n",
"\n",
"\n",
"def get_data(trade_date):\n",
" time.sleep(0.1)\n",
" stk_limit_data = pro.stk_limit(trade_date=trade_date)\n",
" if stk_limit_data is not None and not stk_limit_data.empty:\n",
" return stk_limit_data\n",
"\n",
"\n",
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
"\n",
" for future in as_completed(future_to_date):\n",
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
" try:\n",
" result = future.result() # 获取任务执行的结果\n",
" if result is not None:\n",
" all_daily_data.append(result)\n",
" print(f\"任务 {trade_date} 完成\")\n",
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250417 完成\n",
"任务 20250418 完成\n",
"任务 20250416 完成\n",
"任务 20250415 完成\n",
"任务 20250414 完成\n",
"任务 20250410 完成\n",
"任务 20250409 完成\n",
"任务 20250411 完成\n"
]
}
],
"execution_count": 3
},
{
"cell_type": "code",
"id": "96a81aa5890ea3c3",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:09.353560Z",
"start_time": "2025-04-09T14:58:09.346528Z"
}
},
"source": [
"print(all_daily_data)\n",
"# 将所有数据合并为一个 DataFrame\n",
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ trade_date ts_code up_limit down_limit\n",
"0 20250409 000001.SZ 11.90 9.74\n",
"1 20250409 000002.SZ 7.48 6.12\n",
"2 20250409 000004.SZ 9.53 7.79\n",
"3 20250409 000006.SZ 6.28 5.14\n",
"4 20250409 000007.SZ 5.91 4.83\n",
"... ... ... ... ...\n",
"7077 20250409 920108.BJ 26.55 14.31\n",
"7078 20250409 920111.BJ 30.84 16.62\n",
"7079 20250409 920116.BJ 100.29 54.01\n",
"7080 20250409 920118.BJ 31.62 17.04\n",
"7081 20250409 920128.BJ 35.26 19.00\n",
"\n",
"[7082 rows x 4 columns]]\n"
]
}
],
"execution_count": 4
},
{
"cell_type": "code",
"id": "ad9733a1-2f42-43ee-a98c-0bf699304c21",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:09.674078Z",
"start_time": "2025-04-09T14:58:09.366441Z"
}
},
"source": [
"\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='stk_limit', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"所有每日基础数据获取并保存完毕!\n"
]
}
],
"execution_count": 5
},
{
"cell_type": "code",
"id": "7e777f1f-4d54-4a74-b916-691ede6af055",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:09.689422Z",
"start_time": "2025-04-09T14:58:09.686524Z"
}
},
"source": [],
"outputs": [],
"execution_count": null
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}