RollingRank赚钱- Sharp-1.43
This commit is contained in:
183
main/data/update/cyq-perf.ipynb
Normal file
183
main/data/update/cyq-perf.ipynb
Normal file
@@ -0,0 +1,183 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "f74ce078-f7e8-4733-a14c-14d8815a3626",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-09T14:57:34.662465Z",
|
||||
"start_time": "2025-04-09T14:57:33.903794Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"import tushare as ts\n",
|
||||
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
|
||||
"pro = ts.pro_api()"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": 1
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "44dd8d87-e60b-49e5-aed9-efaa7f92d4fe",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-09T14:57:41.818953Z",
|
||||
"start_time": "2025-04-09T14:57:34.666469Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import time\n",
|
||||
"\n",
|
||||
"h5_filename = '../../../data/cyq_perf.h5'\n",
|
||||
"key = '/cyq_perf'\n",
|
||||
"max_date = None\n",
|
||||
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
|
||||
" df = store[key][['ts_code', 'trade_date']]\n",
|
||||
" print(df)\n",
|
||||
" max_date = df['trade_date'].max()\n",
|
||||
"\n",
|
||||
"print(max_date)\n",
|
||||
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
|
||||
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
|
||||
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
|
||||
"start_date = min(trade_dates)\n",
|
||||
"print(f'start_date: {start_date}')"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ts_code trade_date\n",
|
||||
"0 000001.SZ 20250312\n",
|
||||
"1 000002.SZ 20250312\n",
|
||||
"2 000004.SZ 20250312\n",
|
||||
"3 000006.SZ 20250312\n",
|
||||
"4 000007.SZ 20250312\n",
|
||||
"... ... ...\n",
|
||||
"5387 920108.BJ 20250408\n",
|
||||
"5388 920111.BJ 20250408\n",
|
||||
"5389 920116.BJ 20250408\n",
|
||||
"5390 920118.BJ 20250408\n",
|
||||
"5391 920128.BJ 20250408\n",
|
||||
"\n",
|
||||
"[7562721 rows x 2 columns]\n",
|
||||
"20250408\n",
|
||||
"start_date: 20250409\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 2
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "747acc47-0884-4f76-90fb-276f6494e31d",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-09T14:57:45.660215Z",
|
||||
"start_time": "2025-04-09T14:57:42.232250Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
|
||||
"\n",
|
||||
"all_daily_data = []\n",
|
||||
"\n",
|
||||
"# API 调用计数和时间控制变量\n",
|
||||
"api_call_count = 0\n",
|
||||
"batch_start_time = time.time()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_data(trade_date):\n",
|
||||
" time.sleep(0.1)\n",
|
||||
" data = pro.cyq_perf(trade_date=trade_date)\n",
|
||||
" if data is not None and not data.empty:\n",
|
||||
" return data\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
|
||||
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
|
||||
"\n",
|
||||
" for future in as_completed(future_to_date):\n",
|
||||
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
|
||||
" try:\n",
|
||||
" result = future.result() # 获取任务执行的结果\n",
|
||||
" all_daily_data.append(result)\n",
|
||||
" print(f\"任务 {trade_date} 完成\")\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
|
||||
"\n"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"任务 20250418 完成\n",
|
||||
"任务 20250417 完成\n",
|
||||
"任务 20250416 完成\n",
|
||||
"任务 20250414 完成\n",
|
||||
"任务 20250415 完成\n",
|
||||
"任务 20250411 完成\n",
|
||||
"任务 20250410 完成\n",
|
||||
"任务 20250409 完成\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 3
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "c6765638-481f-40d8-a259-2e7b25362618",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-09T14:57:48.970445Z",
|
||||
"start_time": "2025-04-09T14:57:45.698824Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
|
||||
"\n",
|
||||
"# 将所有数据合并为一个 DataFrame\n",
|
||||
"\n",
|
||||
"# 将数据保存为 HDF5 文件(table 格式)\n",
|
||||
"all_daily_data_df.to_hdf(h5_filename, key=key, mode='a', format='table', append=True, data_columns=True)\n",
|
||||
"\n",
|
||||
"print(\"所有每日基础数据获取并保存完毕!\")"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"所有每日基础数据获取并保存完毕!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 4
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
194
main/data/update/index_data.ipynb
Normal file
194
main/data/update/index_data.ipynb
Normal file
@@ -0,0 +1,194 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "f74ce078-f7e8-4733-a14c-14d8815a3626",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import tushare as ts\n",
|
||||
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
|
||||
"pro = ts.pro_api()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "44dd8d87-e60b-49e5-aed9-efaa7f92d4fe",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ts_code trade_date\n",
|
||||
"0 801001.SI 20250221\n",
|
||||
"1 801002.SI 20250221\n",
|
||||
"2 801003.SI 20250221\n",
|
||||
"3 801005.SI 20250221\n",
|
||||
"4 801010.SI 20250221\n",
|
||||
"... ... ...\n",
|
||||
"1044388 857344.SI 20170103\n",
|
||||
"1044389 857411.SI 20170103\n",
|
||||
"1044390 857421.SI 20170103\n",
|
||||
"1044391 857431.SI 20170103\n",
|
||||
"1044392 858811.SI 20170103\n",
|
||||
"\n",
|
||||
"[1044393 rows x 2 columns]\n",
|
||||
"20250221\n",
|
||||
"start_date: 20250224\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import time\n",
|
||||
"\n",
|
||||
"h5_filename = '../../../data/sw_daily.h5'\n",
|
||||
"key = '/sw_daily'\n",
|
||||
"max_date = None\n",
|
||||
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
|
||||
" df = store[key][['ts_code', 'trade_date']]\n",
|
||||
" print(df)\n",
|
||||
" max_date = df['trade_date'].max()\n",
|
||||
"\n",
|
||||
"print(max_date)\n",
|
||||
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
|
||||
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
|
||||
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
|
||||
"start_date = min(trade_dates)\n",
|
||||
"print(f'start_date: {start_date}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "747acc47-0884-4f76-90fb-276f6494e31d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"任务 20250417 完成\n",
|
||||
"任务 20250418 完成\n",
|
||||
"任务 20250416 完成\n",
|
||||
"任务 20250415 完成\n",
|
||||
"任务 20250411 完成\n",
|
||||
"任务 20250414 完成\n",
|
||||
"任务 20250410 完成\n",
|
||||
"任务 20250409 完成\n",
|
||||
"任务 20250408 完成\n",
|
||||
"任务 20250403 完成\n",
|
||||
"任务 20250407 完成\n",
|
||||
"任务 20250402 完成\n",
|
||||
"任务 20250401 完成\n",
|
||||
"任务 20250331 完成\n",
|
||||
"任务 20250328 完成\n",
|
||||
"任务 20250327 完成\n",
|
||||
"任务 20250326 完成\n",
|
||||
"任务 20250325 完成\n",
|
||||
"任务 20250324 完成\n",
|
||||
"任务 20250321 完成\n",
|
||||
"任务 20250320 完成\n",
|
||||
"任务 20250319 完成\n",
|
||||
"任务 20250317 完成\n",
|
||||
"任务 20250314 完成\n",
|
||||
"任务 20250318 完成\n",
|
||||
"任务 20250313 完成\n",
|
||||
"任务 20250312 完成\n",
|
||||
"任务 20250311 完成\n",
|
||||
"任务 20250310 完成\n",
|
||||
"任务 20250307 完成\n",
|
||||
"任务 20250306 完成\n",
|
||||
"任务 20250305 完成\n",
|
||||
"任务 20250304 完成\n",
|
||||
"任务 20250303 完成\n",
|
||||
"任务 20250228 完成\n",
|
||||
"任务 20250227 完成\n",
|
||||
"任务 20250226 完成\n",
|
||||
"任务 20250225 完成\n",
|
||||
"任务 20250224 完成\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
|
||||
"\n",
|
||||
"all_daily_data = []\n",
|
||||
"\n",
|
||||
"# API 调用计数和时间控制变量\n",
|
||||
"api_call_count = 0\n",
|
||||
"batch_start_time = time.time()\n",
|
||||
"\n",
|
||||
"index_list = ['399300.SH', '000905.SH', '000852.SH', '399006.SZ']\n",
|
||||
"def get_data(trade_date):\n",
|
||||
" time.sleep(0.1)\n",
|
||||
" data = pro.sw_daily(trade_date=trade_date)\n",
|
||||
" if data is not None and not data.empty:\n",
|
||||
" return data\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
|
||||
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
|
||||
"\n",
|
||||
" for future in as_completed(future_to_date):\n",
|
||||
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
|
||||
" try:\n",
|
||||
" result = future.result() # 获取任务执行的结果\n",
|
||||
" all_daily_data.append(result)\n",
|
||||
" print(f\"任务 {trade_date} 完成\")\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "c6765638-481f-40d8-a259-2e7b25362618",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"所有每日基础数据获取并保存完毕!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
|
||||
"\n",
|
||||
"# 将所有数据合并为一个 DataFrame\n",
|
||||
"\n",
|
||||
"# 将数据保存为 HDF5 文件(table 格式)\n",
|
||||
"all_daily_data_df.to_hdf(h5_filename, key=key, mode='a', format='table', append=True, data_columns=True)\n",
|
||||
"\n",
|
||||
"print(\"所有每日基础数据获取并保存完毕!\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
183
main/data/update/sw_daily.ipynb
Normal file
183
main/data/update/sw_daily.ipynb
Normal file
@@ -0,0 +1,183 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "f74ce078-f7e8-4733-a14c-14d8815a3626",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-09T14:57:35.618124Z",
|
||||
"start_time": "2025-04-09T14:57:34.837095Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"import tushare as ts\n",
|
||||
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
|
||||
"pro = ts.pro_api()"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": 1
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "44dd8d87-e60b-49e5-aed9-efaa7f92d4fe",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-09T14:57:38.089531Z",
|
||||
"start_time": "2025-04-09T14:57:35.854308Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import time\n",
|
||||
"\n",
|
||||
"h5_filename = '../../../data/sw_daily.h5'\n",
|
||||
"key = '/sw_daily'\n",
|
||||
"max_date = None\n",
|
||||
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
|
||||
" df = store[key][['ts_code', 'trade_date']]\n",
|
||||
" print(df)\n",
|
||||
" max_date = df['trade_date'].max()\n",
|
||||
"\n",
|
||||
"print(max_date)\n",
|
||||
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
|
||||
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
|
||||
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
|
||||
"start_date = min(trade_dates)\n",
|
||||
"print(f'start_date: {start_date}')"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ts_code trade_date\n",
|
||||
"0 801001.SI 20250221\n",
|
||||
"1 801002.SI 20250221\n",
|
||||
"2 801003.SI 20250221\n",
|
||||
"3 801005.SI 20250221\n",
|
||||
"4 801010.SI 20250221\n",
|
||||
".. ... ...\n",
|
||||
"434 859811.SI 20250408\n",
|
||||
"435 859821.SI 20250408\n",
|
||||
"436 859822.SI 20250408\n",
|
||||
"437 859852.SI 20250408\n",
|
||||
"438 859951.SI 20250408\n",
|
||||
"\n",
|
||||
"[1058002 rows x 2 columns]\n",
|
||||
"20250408\n",
|
||||
"start_date: 20250409\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 2
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "747acc47-0884-4f76-90fb-276f6494e31d",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-09T14:57:40.754159Z",
|
||||
"start_time": "2025-04-09T14:57:38.104541Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
|
||||
"\n",
|
||||
"all_daily_data = []\n",
|
||||
"\n",
|
||||
"# API 调用计数和时间控制变量\n",
|
||||
"api_call_count = 0\n",
|
||||
"batch_start_time = time.time()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_data(trade_date):\n",
|
||||
" time.sleep(0.1)\n",
|
||||
" data = pro.sw_daily(trade_date=trade_date)\n",
|
||||
" if data is not None and not data.empty:\n",
|
||||
" return data\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
|
||||
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
|
||||
"\n",
|
||||
" for future in as_completed(future_to_date):\n",
|
||||
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
|
||||
" try:\n",
|
||||
" result = future.result() # 获取任务执行的结果\n",
|
||||
" all_daily_data.append(result)\n",
|
||||
" print(f\"任务 {trade_date} 完成\")\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
|
||||
"\n"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"任务 20250417 完成\n",
|
||||
"任务 20250418 完成\n",
|
||||
"任务 20250415 完成\n",
|
||||
"任务 20250416 完成\n",
|
||||
"任务 20250414 完成\n",
|
||||
"任务 20250411 完成\n",
|
||||
"任务 20250410 完成\n",
|
||||
"任务 20250409 完成\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 3
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "c6765638-481f-40d8-a259-2e7b25362618",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-09T14:57:40.994975Z",
|
||||
"start_time": "2025-04-09T14:57:40.773783Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
|
||||
"\n",
|
||||
"# 将所有数据合并为一个 DataFrame\n",
|
||||
"\n",
|
||||
"# 将数据保存为 HDF5 文件(table 格式)\n",
|
||||
"all_daily_data_df.to_hdf(h5_filename, key=key, mode='a', format='table', append=True, data_columns=True)\n",
|
||||
"\n",
|
||||
"print(\"所有每日基础数据获取并保存完毕!\")"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"所有每日基础数据获取并保存完毕!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 4
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
442
main/data/update/update_daily_basic.ipynb
Normal file
442
main/data/update/update_daily_basic.ipynb
Normal file
@@ -0,0 +1,442 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "18d1d622-b083-4cc4-a6f8-7c1ed2d0edd2",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-09T14:57:36.913044Z",
|
||||
"start_time": "2025-04-09T14:57:36.159612Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"import tushare as ts\n",
|
||||
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
|
||||
"pro = ts.pro_api()"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": 1
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "14671a7f72de2564",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-09T14:57:39.128278Z",
|
||||
"start_time": "2025-04-09T14:57:36.918051Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"from datetime import datetime\n",
|
||||
"import pandas as pd\n",
|
||||
"import warnings\n",
|
||||
"\n",
|
||||
"warnings.filterwarnings(\"ignore\")\n",
|
||||
"def filter_rows(df):\n",
|
||||
" # 按照 name 和 start_date 分组\n",
|
||||
" def select_row(group):\n",
|
||||
" # 如果有 end_date 不为 NaT 的行,优先保留这些行\n",
|
||||
" valid_rows = group[group['end_date'].notna()]\n",
|
||||
" if not valid_rows.empty:\n",
|
||||
" return valid_rows.iloc[0] # 返回第一个有效行\n",
|
||||
" else:\n",
|
||||
" return group.iloc[0] # 如果没有有效行,返回第一行\n",
|
||||
"\n",
|
||||
" filtered_df = df.groupby(['name', 'start_date'], group_keys=False).apply(select_row)\n",
|
||||
" filtered_df = filtered_df.reset_index(drop=True)\n",
|
||||
" return filtered_df\n",
|
||||
"\n",
|
||||
"def is_st(name_change_dict, stock_code, target_date):\n",
|
||||
" target_date = datetime.strptime(target_date, '%Y%m%d')\n",
|
||||
" if stock_code not in name_change_dict.keys():\n",
|
||||
" return False\n",
|
||||
" df = name_change_dict[stock_code]\n",
|
||||
" for i in range(len(df)):\n",
|
||||
" sds = df.iloc[i, 2]\n",
|
||||
" eds = df.iloc[i, 3]\n",
|
||||
" if eds is None or eds is pd.NaT:\n",
|
||||
" eds = datetime.now()\n",
|
||||
" if (target_date - sds).days >= 0 and (target_date - eds).days <= 0:\n",
|
||||
" return True\n",
|
||||
" return False\n",
|
||||
"\n",
|
||||
"name_change_df = pd.read_hdf('../../../data/name_change.h5', key='name_change')\n",
|
||||
"name_change_df = name_change_df.drop_duplicates(keep='first')\n",
|
||||
"\n",
|
||||
"# 确保 name_change_df 的日期格式正确\n",
|
||||
"name_change_df['start_date'] = pd.to_datetime(name_change_df['start_date'], format='%Y%m%d')\n",
|
||||
"name_change_df['end_date'] = pd.to_datetime(name_change_df['end_date'], format='%Y%m%d', errors='coerce')\n",
|
||||
"name_change_df = name_change_df[name_change_df.name.str.contains('ST')]\n",
|
||||
"name_change_dict = {}\n",
|
||||
"for ts_code, group in name_change_df.groupby('ts_code'):\n",
|
||||
" # 只保留 'ST' 和 '*ST' 的记录\n",
|
||||
" st_data = group[(group['change_reason'] == 'ST') | (group['change_reason'] == '*ST')]\n",
|
||||
" if not st_data.empty:\n",
|
||||
" name_change_dict[ts_code] = filter_rows(st_data)"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": 2
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "e7f8cce2f80e2f20",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-09T14:58:09.296046Z",
|
||||
"start_time": "2025-04-09T14:57:39.339423Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"import time\n",
|
||||
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
|
||||
"\n",
|
||||
"h5_filename = '../../../data/daily_basic.h5'\n",
|
||||
"key = '/daily_basic'\n",
|
||||
"max_date = None\n",
|
||||
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
|
||||
" df = store[key][['ts_code', 'trade_date']]\n",
|
||||
" print(df.info())\n",
|
||||
" max_date = df['trade_date'].max()\n",
|
||||
"\n",
|
||||
"print(max_date)\n",
|
||||
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
|
||||
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
|
||||
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
|
||||
"start_date = min(trade_dates)\n",
|
||||
"print(start_date)"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"Index: 8512911 entries, 0 to 5391\n",
|
||||
"Data columns (total 2 columns):\n",
|
||||
" # Column Dtype \n",
|
||||
"--- ------ ----- \n",
|
||||
" 0 ts_code object\n",
|
||||
" 1 trade_date object\n",
|
||||
"dtypes: object(2)\n",
|
||||
"memory usage: 194.8+ MB\n",
|
||||
"None\n",
|
||||
"20250408\n",
|
||||
"20250409\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 3
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "553cfb36-f560-4cc4-b2bc-68323ccc5072",
|
||||
"metadata": {
|
||||
"scrolled": true,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-09T14:58:16.817010Z",
|
||||
"start_time": "2025-04-09T14:58:09.326485Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"\n",
|
||||
"\n",
|
||||
"# 使用 HDFStore 存储数据\n",
|
||||
"all_daily_data = []\n",
|
||||
"\n",
|
||||
"# API 调用计数和时间控制变量\n",
|
||||
"api_call_count = 0\n",
|
||||
"batch_start_time = time.time()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_data(trade_date):\n",
|
||||
" daily_basic_data = pro.daily_basic(ts_code='', trade_date=trade_date)\n",
|
||||
" if daily_basic_data is not None and not daily_basic_data.empty:\n",
|
||||
" # 添加交易日期列标识\n",
|
||||
" daily_basic_data['trade_date'] = trade_date\n",
|
||||
" daily_basic_data['is_st'] = daily_basic_data.apply(\n",
|
||||
" lambda row: is_st(name_change_dict, row['ts_code'], row['trade_date']), axis=1\n",
|
||||
" )\n",
|
||||
" time.sleep(0.2)\n",
|
||||
" # print(f\"成功获取并保存 {trade_date} 的每日基础数据\")\n",
|
||||
" return daily_basic_data\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# 遍历每个交易日期并获取数据\n",
|
||||
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
|
||||
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
|
||||
"\n",
|
||||
" for future in as_completed(future_to_date):\n",
|
||||
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
|
||||
" try:\n",
|
||||
" result = future.result() # 获取任务执行的结果\n",
|
||||
" all_daily_data.append(result)\n",
|
||||
" print(f\"任务 {trade_date} 完成\")\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
|
||||
" # 计数一次 API 调用\n",
|
||||
" api_call_count += 1\n",
|
||||
"\n",
|
||||
" # 每调用 300 次,检查时间是否少于 1 分钟,如果少于则等待剩余时间\n",
|
||||
" if api_call_count % 150 == 0:\n",
|
||||
" elapsed = time.time() - batch_start_time\n",
|
||||
" if elapsed < 60:\n",
|
||||
" sleep_time = 60 - elapsed\n",
|
||||
" print(f\"已调用 150 次 API,等待 {sleep_time:.2f} 秒以满足速率限制...\")\n",
|
||||
" time.sleep(sleep_time)\n",
|
||||
" # 重置批次起始时间\n",
|
||||
" batch_start_time = time.time()\n",
|
||||
"\n"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"任务 20250418 完成\n",
|
||||
"任务 20250417 完成\n",
|
||||
"任务 20250416 完成\n",
|
||||
"任务 20250415 完成\n",
|
||||
"任务 20250414 完成\n",
|
||||
"任务 20250411 完成\n",
|
||||
"任务 20250410 完成\n",
|
||||
"任务 20250409 完成\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 4
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "919023c693d7a47a",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-09T14:58:16.864178Z",
|
||||
"start_time": "2025-04-09T14:58:16.855084Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
|
||||
"print(all_daily_data_df)"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
|
||||
"0 300285.SZ 20250409 16.61 2.1086 2.2506 \n",
|
||||
"1 300458.SZ 20250409 44.48 9.9286 11.7046 \n",
|
||||
"2 605090.SH 20250409 23.81 0.6834 1.1888 \n",
|
||||
"3 688686.SH 20250409 69.52 1.6005 5.7492 \n",
|
||||
"4 002057.SZ 20250409 7.18 4.7461 7.1088 \n",
|
||||
"... ... ... ... ... ... \n",
|
||||
"5390 301511.SZ 20250409 12.23 3.4040 4.6900 \n",
|
||||
"5391 688355.SH 20250409 15.84 1.4154 4.4898 \n",
|
||||
"5392 600019.SH 20250409 6.83 0.4729 1.2898 \n",
|
||||
"5393 603507.SH 20250409 22.00 30.8936 42.4775 \n",
|
||||
"5394 600886.SH 20250409 14.58 0.7795 2.4989 \n",
|
||||
"\n",
|
||||
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
|
||||
"0 1.11 29.0985 27.1266 2.5144 4.2913 4.1010 0.6020 \n",
|
||||
"1 1.54 168.9309 168.9309 9.3966 12.3119 12.3119 0.3364 \n",
|
||||
"2 1.00 11.8377 9.0427 1.7135 0.5819 0.6421 3.2226 \n",
|
||||
"3 1.18 43.8690 61.1222 2.9105 9.0031 9.2377 NaN \n",
|
||||
"4 1.35 19.8304 29.3370 1.7625 1.9656 2.0487 3.2191 \n",
|
||||
"... ... ... ... ... ... ... ... \n",
|
||||
"5390 1.36 58.1209 NaN 1.9116 1.1803 1.1129 0.3212 \n",
|
||||
"5391 1.31 133.9017 29.7427 1.8103 3.6805 3.1067 NaN \n",
|
||||
"5392 1.28 12.5281 15.7915 0.7518 0.4344 0.4503 4.4796 \n",
|
||||
"5393 2.89 22.7537 22.7537 1.6401 1.0276 1.0276 1.3553 \n",
|
||||
"5394 1.04 17.4059 16.1402 1.8424 2.0579 1.9930 3.1604 \n",
|
||||
"\n",
|
||||
" dv_ttm total_share float_share free_share total_mv \\\n",
|
||||
"0 0.6020 9.970483e+04 8.039498e+04 75323.2612 1.656097e+06 \n",
|
||||
"1 0.3364 6.332851e+04 5.179696e+04 43937.3622 2.816852e+06 \n",
|
||||
"2 3.2226 6.492580e+04 6.426965e+04 36946.4646 1.545883e+06 \n",
|
||||
"3 NaN 1.222355e+04 1.222355e+04 3402.7889 8.497809e+05 \n",
|
||||
"4 3.2191 7.584828e+04 7.501396e+04 50081.8345 5.445906e+05 \n",
|
||||
"... ... ... ... ... ... \n",
|
||||
"5390 0.3212 6.303220e+04 3.736720e+04 27120.6014 7.708838e+05 \n",
|
||||
"5391 NaN 1.239561e+04 1.239561e+04 3907.6756 1.963464e+05 \n",
|
||||
"5392 4.4796 2.190864e+06 2.178208e+06 798651.6922 1.496360e+07 \n",
|
||||
"5393 1.3553 1.843013e+04 1.843013e+04 13404.1045 4.054629e+05 \n",
|
||||
"5394 3.1604 8.004494e+05 7.454180e+05 232532.2636 1.167055e+07 \n",
|
||||
"\n",
|
||||
" circ_mv is_st \n",
|
||||
"0 1.335361e+06 False \n",
|
||||
"1 2.303929e+06 False \n",
|
||||
"2 1.530260e+06 False \n",
|
||||
"3 8.497809e+05 False \n",
|
||||
"4 5.386002e+05 False \n",
|
||||
"... ... ... \n",
|
||||
"5390 4.570009e+05 False \n",
|
||||
"5391 1.963464e+05 False \n",
|
||||
"5392 1.487716e+07 False \n",
|
||||
"5393 4.054629e+05 False \n",
|
||||
"5394 1.086819e+07 False \n",
|
||||
"\n",
|
||||
"[5395 rows x 19 columns]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 5
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "28cb78d032671b20",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-09T14:58:16.881685Z",
|
||||
"start_time": "2025-04-09T14:58:16.871184Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"print(all_daily_data_df[all_daily_data_df['is_st']])"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
|
||||
"85 002822.SZ 20250409 3.11 1.8467 1.9219 \n",
|
||||
"123 603959.SH 20250409 3.27 1.7568 2.2420 \n",
|
||||
"181 688282.SH 20250409 42.59 2.5546 3.0570 \n",
|
||||
"259 600777.SH 20250409 2.66 1.9331 2.4597 \n",
|
||||
"283 002052.SZ 20250409 6.15 1.5326 2.5481 \n",
|
||||
"... ... ... ... ... ... \n",
|
||||
"5286 002602.SZ 20250409 5.93 3.0376 3.5162 \n",
|
||||
"5345 002501.SZ 20250409 1.89 4.3252 5.5834 \n",
|
||||
"5364 600387.SH 20250409 2.34 0.0904 0.1163 \n",
|
||||
"5366 002656.SZ 20250409 1.95 2.7047 3.0210 \n",
|
||||
"5378 300013.SZ 20250409 3.57 2.8370 3.1107 \n",
|
||||
"\n",
|
||||
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
|
||||
"85 2.59 NaN NaN 1.2023 0.5923 0.7314 0.0 \n",
|
||||
"123 2.22 NaN NaN 4.3282 0.7749 1.1811 0.0 \n",
|
||||
"181 1.07 NaN NaN 2.9277 172.3150 21.9335 NaN \n",
|
||||
"259 0.96 6.9694 7.6204 0.8381 2.0443 2.0567 0.0 \n",
|
||||
"283 0.74 NaN NaN NaN 19.5551 17.1988 0.0 \n",
|
||||
"... ... ... ... ... ... ... ... \n",
|
||||
"5286 3.30 84.3318 49.2129 1.6993 3.3267 2.3228 0.0 \n",
|
||||
"5345 1.75 NaN NaN 7.0441 14.0701 19.7111 0.0 \n",
|
||||
"5364 1.33 NaN NaN 0.3818 0.5148 0.8454 0.0 \n",
|
||||
"5366 1.75 NaN NaN 3.8456 4.7986 5.9354 0.0 \n",
|
||||
"5378 0.90 NaN NaN 8.2438 4.8281 4.2666 0.0 \n",
|
||||
"\n",
|
||||
" dv_ttm total_share float_share free_share total_mv \\\n",
|
||||
"85 NaN 73467.1821 56245.3696 54046.3738 2.284829e+05 \n",
|
||||
"123 NaN 49029.8992 49029.8992 38419.3842 1.603278e+05 \n",
|
||||
"181 NaN 8800.0000 3652.0000 3051.8414 3.747920e+05 \n",
|
||||
"259 NaN 680049.5825 636615.2391 500325.8436 1.808932e+06 \n",
|
||||
"283 NaN 74595.9694 74595.5944 44867.2806 4.587652e+05 \n",
|
||||
"... ... ... ... ... ... \n",
|
||||
"5286 NaN 745255.6968 687870.8273 594244.1179 4.419366e+06 \n",
|
||||
"5345 NaN 355000.0000 354999.9006 274999.9006 6.709500e+05 \n",
|
||||
"5364 NaN 46814.4464 40404.8492 31411.4405 1.095458e+05 \n",
|
||||
"5366 NaN 71251.9844 60945.7555 54564.8212 1.389414e+05 \n",
|
||||
"5378 NaN 55835.8894 44606.0865 40680.8215 1.993341e+05 \n",
|
||||
"\n",
|
||||
" circ_mv is_st \n",
|
||||
"85 1.749231e+05 True \n",
|
||||
"123 1.603278e+05 True \n",
|
||||
"181 1.555387e+05 True \n",
|
||||
"259 1.693397e+06 True \n",
|
||||
"283 4.587629e+05 True \n",
|
||||
"... ... ... \n",
|
||||
"5286 4.079074e+06 True \n",
|
||||
"5345 6.709498e+05 True \n",
|
||||
"5364 9.454735e+04 True \n",
|
||||
"5366 1.188442e+05 True \n",
|
||||
"5378 1.592437e+05 True \n",
|
||||
"\n",
|
||||
"[106 rows x 19 columns]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 6
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "692b58674b7462c9",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-09T14:58:17.773453Z",
|
||||
"start_time": "2025-04-09T14:58:16.903459Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"# 将数据保存为 HDF5 文件(table 格式)\n",
|
||||
"all_daily_data_df.to_hdf(h5_filename, key='daily_basic', mode='a', format='table', append=True, data_columns=True)\n",
|
||||
"\n",
|
||||
"print(\"所有每日基础数据获取并保存完毕!\")\n"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"所有每日基础数据获取并保存完毕!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 7
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "d7a773fc20293477",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-09T14:58:24.305403Z",
|
||||
"start_time": "2025-04-09T14:58:17.816332Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
|
||||
" df = store[key][['ts_code', 'trade_date', 'is_st']]\n",
|
||||
" print(df.info())"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"Index: 8518306 entries, 0 to 5394\n",
|
||||
"Data columns (total 3 columns):\n",
|
||||
" # Column Dtype \n",
|
||||
"--- ------ ----- \n",
|
||||
" 0 ts_code object\n",
|
||||
" 1 trade_date object\n",
|
||||
" 2 is_st bool \n",
|
||||
"dtypes: bool(1), object(2)\n",
|
||||
"memory usage: 203.1+ MB\n",
|
||||
"None\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 8
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
5925
main/data/update/update_daily_data.ipynb
Normal file
5925
main/data/update/update_daily_data.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
149
main/data/update/update_is_st.ipynb
Normal file
149
main/data/update/update_is_st.ipynb
Normal file
@@ -0,0 +1,149 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "17cc645336d4eb18",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-02-08T16:55:19.819017Z",
|
||||
"start_time": "2025-02-08T16:55:18.958639Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import tushare as ts"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": 1
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-02-08T16:55:27.578361Z",
|
||||
"start_time": "2025-02-08T16:55:19.882313Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"daily_basic = pd.read_hdf('../../data/daily_basic.h5', key='daily_basic', columns=['ts_code', 'trade_date '])\n",
|
||||
"name_change_df = pd.read_hdf('../../data/name_change.h5', key='name_change')\n",
|
||||
"name_change_df = name_change_df.drop_duplicates(keep='first')\n",
|
||||
"\n",
|
||||
"# 确保 name_change_df 的日期格式正确\n",
|
||||
"name_change_df['start_date'] = pd.to_datetime(name_change_df['start_date'], format='%Y%m%d')\n",
|
||||
"name_change_df['end_date'] = pd.to_datetime(name_change_df['end_date'], format='%Y%m%d', errors='coerce')\n",
|
||||
"name_change_df = name_change_df[name_change_df.name.str.contains('ST')]\n"
|
||||
],
|
||||
"id": "48ae71ed02d61819",
|
||||
"outputs": [],
|
||||
"execution_count": 2
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-02-08T16:55:27.938078Z",
|
||||
"start_time": "2025-02-08T16:55:27.584226Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"name_change_dict = {}\n",
|
||||
"for ts_code, group in name_change_df.groupby('ts_code'):\n",
|
||||
" # 只保留 'ST' 和 '*ST' 的记录\n",
|
||||
" st_data = group[(group['change_reason'] == 'ST') | (group['change_reason'] == '*ST')]\n",
|
||||
" if not st_data.empty:\n",
|
||||
" name_change_dict[ts_code] = st_data"
|
||||
],
|
||||
"id": "e6606a96e5728b8",
|
||||
"outputs": [],
|
||||
"execution_count": 3
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-02-08T16:59:20.537632Z",
|
||||
"start_time": "2025-02-08T16:55:27.971219Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"from datetime import datetime\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# 判断股票是否为 ST 的函数\n",
|
||||
"#stock_code = 'xxxxxx.SH'\n",
|
||||
"#target_date = '20200830'\n",
|
||||
"#若为ST,返回True;否则返回False\n",
|
||||
"def is_st(name_change_dict, stock_code, target_date):\n",
|
||||
" target_date = datetime.strptime(target_date, '%Y%m%d')\n",
|
||||
" if stock_code not in name_change_dict.keys():\n",
|
||||
" return False\n",
|
||||
" df = name_change_dict[stock_code]\n",
|
||||
" for i in range(len(df)):\n",
|
||||
" sds = df.iloc[i, 2]\n",
|
||||
" eds = df.iloc[i, 3]\n",
|
||||
" # sd = datetime.strptime(sds, '%Y%m%d')\n",
|
||||
" if eds == None:\n",
|
||||
" ed = datetime.now()\n",
|
||||
" # else:\n",
|
||||
" # ed = datetime.strptime(eds, '%Y%m%d')\n",
|
||||
" if (target_date - sds).days >= 0 and (target_date - eds).days <= 0:\n",
|
||||
" return True\n",
|
||||
" return False\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"print('is st...')\n",
|
||||
"# 创建一个新的列 is_st,判断每只股票是否是 ST\n",
|
||||
"daily_basic['is_st'] = daily_basic.apply(\n",
|
||||
" lambda row: is_st(name_change_dict, row['ts_code'], row['trade_date']), axis=1\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# 保存结果到新的 HDF5 文件\n",
|
||||
"daily_basic.to_hdf('../../data/daily_basic_with_st.h5', key='daily_basic_with_st', mode='w', format='table')\n",
|
||||
"\n",
|
||||
"# 输出部分结果\n",
|
||||
"print(daily_basic[['ts_code', 'trade_date', 'is_st']].head())\n"
|
||||
],
|
||||
"id": "initial_id",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"is st...\n",
|
||||
" ts_code trade_date is_st\n",
|
||||
"0 603429.SH 20250127 False\n",
|
||||
"1 300917.SZ 20250127 False\n",
|
||||
"2 301266.SZ 20250127 False\n",
|
||||
"3 688399.SH 20250127 False\n",
|
||||
"4 603737.SH 20250127 False\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 4
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
195
main/data/update/update_money_flow.ipynb
Normal file
195
main/data/update/update_money_flow.ipynb
Normal file
@@ -0,0 +1,195 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "b94bb1f2-5332-485e-ae1b-eea01f938106",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-09T14:57:40.184418Z",
|
||||
"start_time": "2025-04-09T14:57:39.137312Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"import tushare as ts\n",
|
||||
"\n",
|
||||
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
|
||||
"pro = ts.pro_api()"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": 1
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "742c29d453b9bb38",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-09T14:58:10.515830Z",
|
||||
"start_time": "2025-04-09T14:57:40.190466Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import time\n",
|
||||
"\n",
|
||||
"h5_filename = '../../../data/money_flow.h5'\n",
|
||||
"key = '/money_flow'\n",
|
||||
"max_date = None\n",
|
||||
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
|
||||
" df = store[key][['ts_code', 'trade_date']]\n",
|
||||
" print(df.info())\n",
|
||||
" max_date = df['trade_date'].max()\n",
|
||||
"\n",
|
||||
"print(max_date)\n",
|
||||
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
|
||||
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
|
||||
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
|
||||
"start_date = min(trade_dates)\n",
|
||||
"print(f'start_date: {start_date}')"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"Index: 8353711 entries, 0 to 5126\n",
|
||||
"Data columns (total 2 columns):\n",
|
||||
" # Column Dtype \n",
|
||||
"--- ------ ----- \n",
|
||||
" 0 ts_code object\n",
|
||||
" 1 trade_date object\n",
|
||||
"dtypes: object(2)\n",
|
||||
"memory usage: 191.2+ MB\n",
|
||||
"None\n",
|
||||
"20250408\n",
|
||||
"start_date: 20250409\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 2
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "679ce40e-8d62-4887-970c-e1d8cbdeee6b",
|
||||
"metadata": {
|
||||
"scrolled": true,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-09T14:58:17.197319Z",
|
||||
"start_time": "2025-04-09T14:58:10.724923Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
|
||||
"\n",
|
||||
"all_daily_data = []\n",
|
||||
"\n",
|
||||
"# API 调用计数和时间控制变量\n",
|
||||
"api_call_count = 0\n",
|
||||
"batch_start_time = time.time()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_data(trade_date):\n",
|
||||
" time.sleep(0.1)\n",
|
||||
" money_flow_data = pro.moneyflow(trade_date=trade_date)\n",
|
||||
" if money_flow_data is not None and not money_flow_data.empty:\n",
|
||||
" return money_flow_data\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
|
||||
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
|
||||
"\n",
|
||||
" for future in as_completed(future_to_date):\n",
|
||||
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
|
||||
" try:\n",
|
||||
" result = future.result() # 获取任务执行的结果\n",
|
||||
" all_daily_data.append(result)\n",
|
||||
" print(f\"任务 {trade_date} 完成\")\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
|
||||
"\n"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"任务 20250417 完成\n",
|
||||
"任务 20250418 完成\n",
|
||||
"任务 20250416 完成\n",
|
||||
"任务 20250415 完成\n",
|
||||
"任务 20250411 完成\n",
|
||||
"任务 20250414 完成\n",
|
||||
"任务 20250410 完成\n",
|
||||
"任务 20250409 完成\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 3
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "9af80516849d4e80",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-09T14:58:17.214168Z",
|
||||
"start_time": "2025-04-09T14:58:17.210734Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": 4
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "a2b05187-437f-4053-bc43-bd80d4cf8b0e",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-09T14:58:19.633456Z",
|
||||
"start_time": "2025-04-09T14:58:17.229837Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"\n",
|
||||
"# 将所有数据合并为一个 DataFrame\n",
|
||||
"\n",
|
||||
"# 将数据保存为 HDF5 文件(table 格式)\n",
|
||||
"all_daily_data_df.to_hdf(h5_filename, key='money_flow', mode='a', format='table', append=True, data_columns=True)\n",
|
||||
"\n",
|
||||
"print(\"所有每日基础数据获取并保存完毕!\")"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"所有每日基础数据获取并保存完毕!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 5
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
5902
main/data/update/update_name_change.ipynb
Normal file
5902
main/data/update/update_name_change.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
238
main/data/update/update_stk_limit.ipynb
Normal file
238
main/data/update/update_stk_limit.ipynb
Normal file
@@ -0,0 +1,238 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "500802dc-7a20-48b7-a470-a4bae3ec534b",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-09T14:57:41.532210Z",
|
||||
"start_time": "2025-04-09T14:57:40.584930Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"import tushare as ts\n",
|
||||
"\n",
|
||||
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
|
||||
"pro = ts.pro_api()"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": 1
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "5a84bc9da6d54868",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-09T14:58:04.911924Z",
|
||||
"start_time": "2025-04-09T14:57:41.540345Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import time\n",
|
||||
"\n",
|
||||
"h5_filename = '../../../data/stk_limit.h5'\n",
|
||||
"key = '/stk_limit'\n",
|
||||
"max_date = None\n",
|
||||
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
|
||||
" df = store[key][['ts_code', 'trade_date']]\n",
|
||||
" print(df.sort_values(by='trade_date', ascending=True).tail())\n",
|
||||
" print(df.info())\n",
|
||||
" max_date = df['trade_date'].max()\n",
|
||||
"\n",
|
||||
"print(max_date)\n",
|
||||
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
|
||||
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
|
||||
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
|
||||
"start_date = min(trade_dates)\n",
|
||||
"print(start_date)"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ts_code trade_date\n",
|
||||
"4721 600284.SH 20250408\n",
|
||||
"4722 600285.SH 20250408\n",
|
||||
"4723 600287.SH 20250408\n",
|
||||
"4712 600272.SH 20250408\n",
|
||||
"5 000008.SZ 20250408\n",
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"Index: 10315620 entries, 0 to 14151\n",
|
||||
"Data columns (total 2 columns):\n",
|
||||
" # Column Dtype \n",
|
||||
"--- ------ ----- \n",
|
||||
" 0 ts_code object\n",
|
||||
" 1 trade_date object\n",
|
||||
"dtypes: object(2)\n",
|
||||
"memory usage: 236.1+ MB\n",
|
||||
"None\n",
|
||||
"20250408\n",
|
||||
"20250409\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 2
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "bb3191de-27a2-4c89-a3b5-32a0d7b9496f",
|
||||
"metadata": {
|
||||
"scrolled": true,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-09T14:58:09.342522Z",
|
||||
"start_time": "2025-04-09T14:58:05.259974Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
|
||||
"\n",
|
||||
"all_daily_data = []\n",
|
||||
"\n",
|
||||
"# API 调用计数和时间控制变量\n",
|
||||
"api_call_count = 0\n",
|
||||
"batch_start_time = time.time()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_data(trade_date):\n",
|
||||
" time.sleep(0.1)\n",
|
||||
" stk_limit_data = pro.stk_limit(trade_date=trade_date)\n",
|
||||
" if stk_limit_data is not None and not stk_limit_data.empty:\n",
|
||||
" return stk_limit_data\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
|
||||
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
|
||||
"\n",
|
||||
" for future in as_completed(future_to_date):\n",
|
||||
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
|
||||
" try:\n",
|
||||
" result = future.result() # 获取任务执行的结果\n",
|
||||
" if result is not None:\n",
|
||||
" all_daily_data.append(result)\n",
|
||||
" print(f\"任务 {trade_date} 完成\")\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
|
||||
"\n"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"任务 20250417 完成\n",
|
||||
"任务 20250418 完成\n",
|
||||
"任务 20250416 完成\n",
|
||||
"任务 20250415 完成\n",
|
||||
"任务 20250414 完成\n",
|
||||
"任务 20250410 完成\n",
|
||||
"任务 20250409 完成\n",
|
||||
"任务 20250411 完成\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 3
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "96a81aa5890ea3c3",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-09T14:58:09.353560Z",
|
||||
"start_time": "2025-04-09T14:58:09.346528Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"print(all_daily_data)\n",
|
||||
"# 将所有数据合并为一个 DataFrame\n",
|
||||
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[ trade_date ts_code up_limit down_limit\n",
|
||||
"0 20250409 000001.SZ 11.90 9.74\n",
|
||||
"1 20250409 000002.SZ 7.48 6.12\n",
|
||||
"2 20250409 000004.SZ 9.53 7.79\n",
|
||||
"3 20250409 000006.SZ 6.28 5.14\n",
|
||||
"4 20250409 000007.SZ 5.91 4.83\n",
|
||||
"... ... ... ... ...\n",
|
||||
"7077 20250409 920108.BJ 26.55 14.31\n",
|
||||
"7078 20250409 920111.BJ 30.84 16.62\n",
|
||||
"7079 20250409 920116.BJ 100.29 54.01\n",
|
||||
"7080 20250409 920118.BJ 31.62 17.04\n",
|
||||
"7081 20250409 920128.BJ 35.26 19.00\n",
|
||||
"\n",
|
||||
"[7082 rows x 4 columns]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 4
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "ad9733a1-2f42-43ee-a98c-0bf699304c21",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-09T14:58:09.674078Z",
|
||||
"start_time": "2025-04-09T14:58:09.366441Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"\n",
|
||||
"\n",
|
||||
"# 将数据保存为 HDF5 文件(table 格式)\n",
|
||||
"all_daily_data_df.to_hdf(h5_filename, key='stk_limit', mode='a', format='table', append=True, data_columns=True)\n",
|
||||
"\n",
|
||||
"print(\"所有每日基础数据获取并保存完毕!\")"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"所有每日基础数据获取并保存完毕!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 5
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "7e777f1f-4d54-4a74-b916-691ede6af055",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-09T14:58:09.689422Z",
|
||||
"start_time": "2025-04-09T14:58:09.686524Z"
|
||||
}
|
||||
},
|
||||
"source": [],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user