RollingRank赚钱- Sharp-1.43

This commit is contained in:
liaozhaorun
2025-04-28 11:02:52 +08:00
parent 94cd9aa6c8
commit 9e598d4ed0
93 changed files with 18134 additions and 4342 deletions

View File

@@ -0,0 +1,183 @@
{
"cells": [
{
"cell_type": "code",
"id": "f74ce078-f7e8-4733-a14c-14d8815a3626",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:57:34.662465Z",
"start_time": "2025-04-09T14:57:33.903794Z"
}
},
"source": [
"import tushare as ts\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
],
"outputs": [],
"execution_count": 1
},
{
"cell_type": "code",
"id": "44dd8d87-e60b-49e5-aed9-efaa7f92d4fe",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:57:41.818953Z",
"start_time": "2025-04-09T14:57:34.666469Z"
}
},
"source": [
"import pandas as pd\n",
"import time\n",
"\n",
"h5_filename = '../../../data/cyq_perf.h5'\n",
"key = '/cyq_perf'\n",
"max_date = None\n",
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date']]\n",
" print(df)\n",
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(f'start_date: {start_date}')"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"0 000001.SZ 20250312\n",
"1 000002.SZ 20250312\n",
"2 000004.SZ 20250312\n",
"3 000006.SZ 20250312\n",
"4 000007.SZ 20250312\n",
"... ... ...\n",
"5387 920108.BJ 20250408\n",
"5388 920111.BJ 20250408\n",
"5389 920116.BJ 20250408\n",
"5390 920118.BJ 20250408\n",
"5391 920128.BJ 20250408\n",
"\n",
"[7562721 rows x 2 columns]\n",
"20250408\n",
"start_date: 20250409\n"
]
}
],
"execution_count": 2
},
{
"cell_type": "code",
"id": "747acc47-0884-4f76-90fb-276f6494e31d",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:57:45.660215Z",
"start_time": "2025-04-09T14:57:42.232250Z"
}
},
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
"all_daily_data = []\n",
"\n",
"# API 调用计数和时间控制变量\n",
"api_call_count = 0\n",
"batch_start_time = time.time()\n",
"\n",
"\n",
"def get_data(trade_date):\n",
" time.sleep(0.1)\n",
" data = pro.cyq_perf(trade_date=trade_date)\n",
" if data is not None and not data.empty:\n",
" return data\n",
"\n",
"\n",
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
"\n",
" for future in as_completed(future_to_date):\n",
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
" try:\n",
" result = future.result() # 获取任务执行的结果\n",
" all_daily_data.append(result)\n",
" print(f\"任务 {trade_date} 完成\")\n",
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250418 完成\n",
"任务 20250417 完成\n",
"任务 20250416 完成\n",
"任务 20250414 完成\n",
"任务 20250415 完成\n",
"任务 20250411 完成\n",
"任务 20250410 完成\n",
"任务 20250409 完成\n"
]
}
],
"execution_count": 3
},
{
"cell_type": "code",
"id": "c6765638-481f-40d8-a259-2e7b25362618",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:57:48.970445Z",
"start_time": "2025-04-09T14:57:45.698824Z"
}
},
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
"\n",
"# 将所有数据合并为一个 DataFrame\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key=key, mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"所有每日基础数据获取并保存完毕!\n"
]
}
],
"execution_count": 4
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,194 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "f74ce078-f7e8-4733-a14c-14d8815a3626",
"metadata": {},
"outputs": [],
"source": [
"import tushare as ts\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "44dd8d87-e60b-49e5-aed9-efaa7f92d4fe",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"0 801001.SI 20250221\n",
"1 801002.SI 20250221\n",
"2 801003.SI 20250221\n",
"3 801005.SI 20250221\n",
"4 801010.SI 20250221\n",
"... ... ...\n",
"1044388 857344.SI 20170103\n",
"1044389 857411.SI 20170103\n",
"1044390 857421.SI 20170103\n",
"1044391 857431.SI 20170103\n",
"1044392 858811.SI 20170103\n",
"\n",
"[1044393 rows x 2 columns]\n",
"20250221\n",
"start_date: 20250224\n"
]
}
],
"source": [
"import pandas as pd\n",
"import time\n",
"\n",
"h5_filename = '../../../data/sw_daily.h5'\n",
"key = '/sw_daily'\n",
"max_date = None\n",
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date']]\n",
" print(df)\n",
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(f'start_date: {start_date}')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "747acc47-0884-4f76-90fb-276f6494e31d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250417 完成\n",
"任务 20250418 完成\n",
"任务 20250416 完成\n",
"任务 20250415 完成\n",
"任务 20250411 完成\n",
"任务 20250414 完成\n",
"任务 20250410 完成\n",
"任务 20250409 完成\n",
"任务 20250408 完成\n",
"任务 20250403 完成\n",
"任务 20250407 完成\n",
"任务 20250402 完成\n",
"任务 20250401 完成\n",
"任务 20250331 完成\n",
"任务 20250328 完成\n",
"任务 20250327 完成\n",
"任务 20250326 完成\n",
"任务 20250325 完成\n",
"任务 20250324 完成\n",
"任务 20250321 完成\n",
"任务 20250320 完成\n",
"任务 20250319 完成\n",
"任务 20250317 完成\n",
"任务 20250314 完成\n",
"任务 20250318 完成\n",
"任务 20250313 完成\n",
"任务 20250312 完成\n",
"任务 20250311 完成\n",
"任务 20250310 完成\n",
"任务 20250307 完成\n",
"任务 20250306 完成\n",
"任务 20250305 完成\n",
"任务 20250304 完成\n",
"任务 20250303 完成\n",
"任务 20250228 完成\n",
"任务 20250227 完成\n",
"任务 20250226 完成\n",
"任务 20250225 完成\n",
"任务 20250224 完成\n"
]
}
],
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
"all_daily_data = []\n",
"\n",
"# API 调用计数和时间控制变量\n",
"api_call_count = 0\n",
"batch_start_time = time.time()\n",
"\n",
"index_list = ['399300.SH', '000905.SH', '000852.SH', '399006.SZ']\n",
"def get_data(trade_date):\n",
" time.sleep(0.1)\n",
" data = pro.sw_daily(trade_date=trade_date)\n",
" if data is not None and not data.empty:\n",
" return data\n",
"\n",
"\n",
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
"\n",
" for future in as_completed(future_to_date):\n",
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
" try:\n",
" result = future.result() # 获取任务执行的结果\n",
" all_daily_data.append(result)\n",
" print(f\"任务 {trade_date} 完成\")\n",
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "c6765638-481f-40d8-a259-2e7b25362618",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"所有每日基础数据获取并保存完毕!\n"
]
}
],
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
"\n",
"# 将所有数据合并为一个 DataFrame\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key=key, mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,183 @@
{
"cells": [
{
"cell_type": "code",
"id": "f74ce078-f7e8-4733-a14c-14d8815a3626",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:57:35.618124Z",
"start_time": "2025-04-09T14:57:34.837095Z"
}
},
"source": [
"import tushare as ts\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
],
"outputs": [],
"execution_count": 1
},
{
"cell_type": "code",
"id": "44dd8d87-e60b-49e5-aed9-efaa7f92d4fe",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:57:38.089531Z",
"start_time": "2025-04-09T14:57:35.854308Z"
}
},
"source": [
"import pandas as pd\n",
"import time\n",
"\n",
"h5_filename = '../../../data/sw_daily.h5'\n",
"key = '/sw_daily'\n",
"max_date = None\n",
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date']]\n",
" print(df)\n",
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(f'start_date: {start_date}')"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"0 801001.SI 20250221\n",
"1 801002.SI 20250221\n",
"2 801003.SI 20250221\n",
"3 801005.SI 20250221\n",
"4 801010.SI 20250221\n",
".. ... ...\n",
"434 859811.SI 20250408\n",
"435 859821.SI 20250408\n",
"436 859822.SI 20250408\n",
"437 859852.SI 20250408\n",
"438 859951.SI 20250408\n",
"\n",
"[1058002 rows x 2 columns]\n",
"20250408\n",
"start_date: 20250409\n"
]
}
],
"execution_count": 2
},
{
"cell_type": "code",
"id": "747acc47-0884-4f76-90fb-276f6494e31d",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:57:40.754159Z",
"start_time": "2025-04-09T14:57:38.104541Z"
}
},
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
"all_daily_data = []\n",
"\n",
"# API 调用计数和时间控制变量\n",
"api_call_count = 0\n",
"batch_start_time = time.time()\n",
"\n",
"\n",
"def get_data(trade_date):\n",
" time.sleep(0.1)\n",
" data = pro.sw_daily(trade_date=trade_date)\n",
" if data is not None and not data.empty:\n",
" return data\n",
"\n",
"\n",
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
"\n",
" for future in as_completed(future_to_date):\n",
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
" try:\n",
" result = future.result() # 获取任务执行的结果\n",
" all_daily_data.append(result)\n",
" print(f\"任务 {trade_date} 完成\")\n",
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250417 完成\n",
"任务 20250418 完成\n",
"任务 20250415 完成\n",
"任务 20250416 完成\n",
"任务 20250414 完成\n",
"任务 20250411 完成\n",
"任务 20250410 完成\n",
"任务 20250409 完成\n"
]
}
],
"execution_count": 3
},
{
"cell_type": "code",
"id": "c6765638-481f-40d8-a259-2e7b25362618",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:57:40.994975Z",
"start_time": "2025-04-09T14:57:40.773783Z"
}
},
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
"\n",
"# 将所有数据合并为一个 DataFrame\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key=key, mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"所有每日基础数据获取并保存完毕!\n"
]
}
],
"execution_count": 4
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,442 @@
{
"cells": [
{
"cell_type": "code",
"id": "18d1d622-b083-4cc4-a6f8-7c1ed2d0edd2",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:57:36.913044Z",
"start_time": "2025-04-09T14:57:36.159612Z"
}
},
"source": [
"import tushare as ts\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
],
"outputs": [],
"execution_count": 1
},
{
"cell_type": "code",
"id": "14671a7f72de2564",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:57:39.128278Z",
"start_time": "2025-04-09T14:57:36.918051Z"
}
},
"source": [
"from datetime import datetime\n",
"import pandas as pd\n",
"import warnings\n",
"\n",
"warnings.filterwarnings(\"ignore\")\n",
"def filter_rows(df):\n",
" # 按照 name 和 start_date 分组\n",
" def select_row(group):\n",
" # 如果有 end_date 不为 NaT 的行,优先保留这些行\n",
" valid_rows = group[group['end_date'].notna()]\n",
" if not valid_rows.empty:\n",
" return valid_rows.iloc[0] # 返回第一个有效行\n",
" else:\n",
" return group.iloc[0] # 如果没有有效行,返回第一行\n",
"\n",
" filtered_df = df.groupby(['name', 'start_date'], group_keys=False).apply(select_row)\n",
" filtered_df = filtered_df.reset_index(drop=True)\n",
" return filtered_df\n",
"\n",
"def is_st(name_change_dict, stock_code, target_date):\n",
" target_date = datetime.strptime(target_date, '%Y%m%d')\n",
" if stock_code not in name_change_dict.keys():\n",
" return False\n",
" df = name_change_dict[stock_code]\n",
" for i in range(len(df)):\n",
" sds = df.iloc[i, 2]\n",
" eds = df.iloc[i, 3]\n",
" if eds is None or eds is pd.NaT:\n",
" eds = datetime.now()\n",
" if (target_date - sds).days >= 0 and (target_date - eds).days <= 0:\n",
" return True\n",
" return False\n",
"\n",
"name_change_df = pd.read_hdf('../../../data/name_change.h5', key='name_change')\n",
"name_change_df = name_change_df.drop_duplicates(keep='first')\n",
"\n",
"# 确保 name_change_df 的日期格式正确\n",
"name_change_df['start_date'] = pd.to_datetime(name_change_df['start_date'], format='%Y%m%d')\n",
"name_change_df['end_date'] = pd.to_datetime(name_change_df['end_date'], format='%Y%m%d', errors='coerce')\n",
"name_change_df = name_change_df[name_change_df.name.str.contains('ST')]\n",
"name_change_dict = {}\n",
"for ts_code, group in name_change_df.groupby('ts_code'):\n",
" # 只保留 'ST' 和 '*ST' 的记录\n",
" st_data = group[(group['change_reason'] == 'ST') | (group['change_reason'] == '*ST')]\n",
" if not st_data.empty:\n",
" name_change_dict[ts_code] = filter_rows(st_data)"
],
"outputs": [],
"execution_count": 2
},
{
"cell_type": "code",
"id": "e7f8cce2f80e2f20",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:09.296046Z",
"start_time": "2025-04-09T14:57:39.339423Z"
}
},
"source": [
"import time\n",
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
"h5_filename = '../../../data/daily_basic.h5'\n",
"key = '/daily_basic'\n",
"max_date = None\n",
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date']]\n",
" print(df.info())\n",
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(start_date)"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8512911 entries, 0 to 5391\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 194.8+ MB\n",
"None\n",
"20250408\n",
"20250409\n"
]
}
],
"execution_count": 3
},
{
"cell_type": "code",
"id": "553cfb36-f560-4cc4-b2bc-68323ccc5072",
"metadata": {
"scrolled": true,
"ExecuteTime": {
"end_time": "2025-04-09T14:58:16.817010Z",
"start_time": "2025-04-09T14:58:09.326485Z"
}
},
"source": [
"\n",
"\n",
"# 使用 HDFStore 存储数据\n",
"all_daily_data = []\n",
"\n",
"# API 调用计数和时间控制变量\n",
"api_call_count = 0\n",
"batch_start_time = time.time()\n",
"\n",
"\n",
"def get_data(trade_date):\n",
" daily_basic_data = pro.daily_basic(ts_code='', trade_date=trade_date)\n",
" if daily_basic_data is not None and not daily_basic_data.empty:\n",
" # 添加交易日期列标识\n",
" daily_basic_data['trade_date'] = trade_date\n",
" daily_basic_data['is_st'] = daily_basic_data.apply(\n",
" lambda row: is_st(name_change_dict, row['ts_code'], row['trade_date']), axis=1\n",
" )\n",
" time.sleep(0.2)\n",
" # print(f\"成功获取并保存 {trade_date} 的每日基础数据\")\n",
" return daily_basic_data\n",
"\n",
"\n",
"# 遍历每个交易日期并获取数据\n",
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
"\n",
" for future in as_completed(future_to_date):\n",
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
" try:\n",
" result = future.result() # 获取任务执行的结果\n",
" all_daily_data.append(result)\n",
" print(f\"任务 {trade_date} 完成\")\n",
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
" # 计数一次 API 调用\n",
" api_call_count += 1\n",
"\n",
" # 每调用 300 次,检查时间是否少于 1 分钟,如果少于则等待剩余时间\n",
" if api_call_count % 150 == 0:\n",
" elapsed = time.time() - batch_start_time\n",
" if elapsed < 60:\n",
" sleep_time = 60 - elapsed\n",
" print(f\"已调用 150 次 API等待 {sleep_time:.2f} 秒以满足速率限制...\")\n",
" time.sleep(sleep_time)\n",
" # 重置批次起始时间\n",
" batch_start_time = time.time()\n",
"\n"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250418 完成\n",
"任务 20250417 完成\n",
"任务 20250416 完成\n",
"任务 20250415 完成\n",
"任务 20250414 完成\n",
"任务 20250411 完成\n",
"任务 20250410 完成\n",
"任务 20250409 完成\n"
]
}
],
"execution_count": 4
},
{
"cell_type": "code",
"id": "919023c693d7a47a",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:16.864178Z",
"start_time": "2025-04-09T14:58:16.855084Z"
}
},
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
"print(all_daily_data_df)"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
"0 300285.SZ 20250409 16.61 2.1086 2.2506 \n",
"1 300458.SZ 20250409 44.48 9.9286 11.7046 \n",
"2 605090.SH 20250409 23.81 0.6834 1.1888 \n",
"3 688686.SH 20250409 69.52 1.6005 5.7492 \n",
"4 002057.SZ 20250409 7.18 4.7461 7.1088 \n",
"... ... ... ... ... ... \n",
"5390 301511.SZ 20250409 12.23 3.4040 4.6900 \n",
"5391 688355.SH 20250409 15.84 1.4154 4.4898 \n",
"5392 600019.SH 20250409 6.83 0.4729 1.2898 \n",
"5393 603507.SH 20250409 22.00 30.8936 42.4775 \n",
"5394 600886.SH 20250409 14.58 0.7795 2.4989 \n",
"\n",
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
"0 1.11 29.0985 27.1266 2.5144 4.2913 4.1010 0.6020 \n",
"1 1.54 168.9309 168.9309 9.3966 12.3119 12.3119 0.3364 \n",
"2 1.00 11.8377 9.0427 1.7135 0.5819 0.6421 3.2226 \n",
"3 1.18 43.8690 61.1222 2.9105 9.0031 9.2377 NaN \n",
"4 1.35 19.8304 29.3370 1.7625 1.9656 2.0487 3.2191 \n",
"... ... ... ... ... ... ... ... \n",
"5390 1.36 58.1209 NaN 1.9116 1.1803 1.1129 0.3212 \n",
"5391 1.31 133.9017 29.7427 1.8103 3.6805 3.1067 NaN \n",
"5392 1.28 12.5281 15.7915 0.7518 0.4344 0.4503 4.4796 \n",
"5393 2.89 22.7537 22.7537 1.6401 1.0276 1.0276 1.3553 \n",
"5394 1.04 17.4059 16.1402 1.8424 2.0579 1.9930 3.1604 \n",
"\n",
" dv_ttm total_share float_share free_share total_mv \\\n",
"0 0.6020 9.970483e+04 8.039498e+04 75323.2612 1.656097e+06 \n",
"1 0.3364 6.332851e+04 5.179696e+04 43937.3622 2.816852e+06 \n",
"2 3.2226 6.492580e+04 6.426965e+04 36946.4646 1.545883e+06 \n",
"3 NaN 1.222355e+04 1.222355e+04 3402.7889 8.497809e+05 \n",
"4 3.2191 7.584828e+04 7.501396e+04 50081.8345 5.445906e+05 \n",
"... ... ... ... ... ... \n",
"5390 0.3212 6.303220e+04 3.736720e+04 27120.6014 7.708838e+05 \n",
"5391 NaN 1.239561e+04 1.239561e+04 3907.6756 1.963464e+05 \n",
"5392 4.4796 2.190864e+06 2.178208e+06 798651.6922 1.496360e+07 \n",
"5393 1.3553 1.843013e+04 1.843013e+04 13404.1045 4.054629e+05 \n",
"5394 3.1604 8.004494e+05 7.454180e+05 232532.2636 1.167055e+07 \n",
"\n",
" circ_mv is_st \n",
"0 1.335361e+06 False \n",
"1 2.303929e+06 False \n",
"2 1.530260e+06 False \n",
"3 8.497809e+05 False \n",
"4 5.386002e+05 False \n",
"... ... ... \n",
"5390 4.570009e+05 False \n",
"5391 1.963464e+05 False \n",
"5392 1.487716e+07 False \n",
"5393 4.054629e+05 False \n",
"5394 1.086819e+07 False \n",
"\n",
"[5395 rows x 19 columns]\n"
]
}
],
"execution_count": 5
},
{
"cell_type": "code",
"id": "28cb78d032671b20",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:16.881685Z",
"start_time": "2025-04-09T14:58:16.871184Z"
}
},
"source": [
"print(all_daily_data_df[all_daily_data_df['is_st']])"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
"85 002822.SZ 20250409 3.11 1.8467 1.9219 \n",
"123 603959.SH 20250409 3.27 1.7568 2.2420 \n",
"181 688282.SH 20250409 42.59 2.5546 3.0570 \n",
"259 600777.SH 20250409 2.66 1.9331 2.4597 \n",
"283 002052.SZ 20250409 6.15 1.5326 2.5481 \n",
"... ... ... ... ... ... \n",
"5286 002602.SZ 20250409 5.93 3.0376 3.5162 \n",
"5345 002501.SZ 20250409 1.89 4.3252 5.5834 \n",
"5364 600387.SH 20250409 2.34 0.0904 0.1163 \n",
"5366 002656.SZ 20250409 1.95 2.7047 3.0210 \n",
"5378 300013.SZ 20250409 3.57 2.8370 3.1107 \n",
"\n",
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
"85 2.59 NaN NaN 1.2023 0.5923 0.7314 0.0 \n",
"123 2.22 NaN NaN 4.3282 0.7749 1.1811 0.0 \n",
"181 1.07 NaN NaN 2.9277 172.3150 21.9335 NaN \n",
"259 0.96 6.9694 7.6204 0.8381 2.0443 2.0567 0.0 \n",
"283 0.74 NaN NaN NaN 19.5551 17.1988 0.0 \n",
"... ... ... ... ... ... ... ... \n",
"5286 3.30 84.3318 49.2129 1.6993 3.3267 2.3228 0.0 \n",
"5345 1.75 NaN NaN 7.0441 14.0701 19.7111 0.0 \n",
"5364 1.33 NaN NaN 0.3818 0.5148 0.8454 0.0 \n",
"5366 1.75 NaN NaN 3.8456 4.7986 5.9354 0.0 \n",
"5378 0.90 NaN NaN 8.2438 4.8281 4.2666 0.0 \n",
"\n",
" dv_ttm total_share float_share free_share total_mv \\\n",
"85 NaN 73467.1821 56245.3696 54046.3738 2.284829e+05 \n",
"123 NaN 49029.8992 49029.8992 38419.3842 1.603278e+05 \n",
"181 NaN 8800.0000 3652.0000 3051.8414 3.747920e+05 \n",
"259 NaN 680049.5825 636615.2391 500325.8436 1.808932e+06 \n",
"283 NaN 74595.9694 74595.5944 44867.2806 4.587652e+05 \n",
"... ... ... ... ... ... \n",
"5286 NaN 745255.6968 687870.8273 594244.1179 4.419366e+06 \n",
"5345 NaN 355000.0000 354999.9006 274999.9006 6.709500e+05 \n",
"5364 NaN 46814.4464 40404.8492 31411.4405 1.095458e+05 \n",
"5366 NaN 71251.9844 60945.7555 54564.8212 1.389414e+05 \n",
"5378 NaN 55835.8894 44606.0865 40680.8215 1.993341e+05 \n",
"\n",
" circ_mv is_st \n",
"85 1.749231e+05 True \n",
"123 1.603278e+05 True \n",
"181 1.555387e+05 True \n",
"259 1.693397e+06 True \n",
"283 4.587629e+05 True \n",
"... ... ... \n",
"5286 4.079074e+06 True \n",
"5345 6.709498e+05 True \n",
"5364 9.454735e+04 True \n",
"5366 1.188442e+05 True \n",
"5378 1.592437e+05 True \n",
"\n",
"[106 rows x 19 columns]\n"
]
}
],
"execution_count": 6
},
{
"cell_type": "code",
"id": "692b58674b7462c9",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:17.773453Z",
"start_time": "2025-04-09T14:58:16.903459Z"
}
},
"source": [
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='daily_basic', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")\n"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"所有每日基础数据获取并保存完毕!\n"
]
}
],
"execution_count": 7
},
{
"cell_type": "code",
"id": "d7a773fc20293477",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:24.305403Z",
"start_time": "2025-04-09T14:58:17.816332Z"
}
},
"source": [
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date', 'is_st']]\n",
" print(df.info())"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8518306 entries, 0 to 5394\n",
"Data columns (total 3 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
" 2 is_st bool \n",
"dtypes: bool(1), object(2)\n",
"memory usage: 203.1+ MB\n",
"None\n"
]
}
],
"execution_count": 8
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,149 @@
{
"cells": [
{
"cell_type": "code",
"id": "17cc645336d4eb18",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-08T16:55:19.819017Z",
"start_time": "2025-02-08T16:55:18.958639Z"
}
},
"source": [
"import pandas as pd\n",
"import tushare as ts"
],
"outputs": [],
"execution_count": 1
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-08T16:55:27.578361Z",
"start_time": "2025-02-08T16:55:19.882313Z"
}
},
"cell_type": "code",
"source": [
"daily_basic = pd.read_hdf('../../data/daily_basic.h5', key='daily_basic', columns=['ts_code', 'trade_date '])\n",
"name_change_df = pd.read_hdf('../../data/name_change.h5', key='name_change')\n",
"name_change_df = name_change_df.drop_duplicates(keep='first')\n",
"\n",
"# 确保 name_change_df 的日期格式正确\n",
"name_change_df['start_date'] = pd.to_datetime(name_change_df['start_date'], format='%Y%m%d')\n",
"name_change_df['end_date'] = pd.to_datetime(name_change_df['end_date'], format='%Y%m%d', errors='coerce')\n",
"name_change_df = name_change_df[name_change_df.name.str.contains('ST')]\n"
],
"id": "48ae71ed02d61819",
"outputs": [],
"execution_count": 2
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-08T16:55:27.938078Z",
"start_time": "2025-02-08T16:55:27.584226Z"
}
},
"cell_type": "code",
"source": [
"name_change_dict = {}\n",
"for ts_code, group in name_change_df.groupby('ts_code'):\n",
" # 只保留 'ST' 和 '*ST' 的记录\n",
" st_data = group[(group['change_reason'] == 'ST') | (group['change_reason'] == '*ST')]\n",
" if not st_data.empty:\n",
" name_change_dict[ts_code] = st_data"
],
"id": "e6606a96e5728b8",
"outputs": [],
"execution_count": 3
},
{
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2025-02-08T16:59:20.537632Z",
"start_time": "2025-02-08T16:55:27.971219Z"
}
},
"cell_type": "code",
"source": [
"from datetime import datetime\n",
"import pandas as pd\n",
"\n",
"\n",
"# 判断股票是否为 ST 的函数\n",
"#stock_code = 'xxxxxx.SH'\n",
"#target_date = '20200830'\n",
"#若为ST返回True否则返回False\n",
"def is_st(name_change_dict, stock_code, target_date):\n",
" target_date = datetime.strptime(target_date, '%Y%m%d')\n",
" if stock_code not in name_change_dict.keys():\n",
" return False\n",
" df = name_change_dict[stock_code]\n",
" for i in range(len(df)):\n",
" sds = df.iloc[i, 2]\n",
" eds = df.iloc[i, 3]\n",
" # sd = datetime.strptime(sds, '%Y%m%d')\n",
" if eds == None:\n",
" ed = datetime.now()\n",
" # else:\n",
" # ed = datetime.strptime(eds, '%Y%m%d')\n",
" if (target_date - sds).days >= 0 and (target_date - eds).days <= 0:\n",
" return True\n",
" return False\n",
"\n",
"\n",
"print('is st...')\n",
"# 创建一个新的列 is_st判断每只股票是否是 ST\n",
"daily_basic['is_st'] = daily_basic.apply(\n",
" lambda row: is_st(name_change_dict, row['ts_code'], row['trade_date']), axis=1\n",
")\n",
"\n",
"# 保存结果到新的 HDF5 文件\n",
"daily_basic.to_hdf('../../data/daily_basic_with_st.h5', key='daily_basic_with_st', mode='w', format='table')\n",
"\n",
"# 输出部分结果\n",
"print(daily_basic[['ts_code', 'trade_date', 'is_st']].head())\n"
],
"id": "initial_id",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"is st...\n",
" ts_code trade_date is_st\n",
"0 603429.SH 20250127 False\n",
"1 300917.SZ 20250127 False\n",
"2 301266.SZ 20250127 False\n",
"3 688399.SH 20250127 False\n",
"4 603737.SH 20250127 False\n"
]
}
],
"execution_count": 4
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,195 @@
{
"cells": [
{
"cell_type": "code",
"id": "b94bb1f2-5332-485e-ae1b-eea01f938106",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:57:40.184418Z",
"start_time": "2025-04-09T14:57:39.137312Z"
}
},
"source": [
"import tushare as ts\n",
"\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
],
"outputs": [],
"execution_count": 1
},
{
"cell_type": "code",
"id": "742c29d453b9bb38",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:10.515830Z",
"start_time": "2025-04-09T14:57:40.190466Z"
}
},
"source": [
"import pandas as pd\n",
"import time\n",
"\n",
"h5_filename = '../../../data/money_flow.h5'\n",
"key = '/money_flow'\n",
"max_date = None\n",
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date']]\n",
" print(df.info())\n",
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(f'start_date: {start_date}')"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8353711 entries, 0 to 5126\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 191.2+ MB\n",
"None\n",
"20250408\n",
"start_date: 20250409\n"
]
}
],
"execution_count": 2
},
{
"cell_type": "code",
"id": "679ce40e-8d62-4887-970c-e1d8cbdeee6b",
"metadata": {
"scrolled": true,
"ExecuteTime": {
"end_time": "2025-04-09T14:58:17.197319Z",
"start_time": "2025-04-09T14:58:10.724923Z"
}
},
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
"all_daily_data = []\n",
"\n",
"# API 调用计数和时间控制变量\n",
"api_call_count = 0\n",
"batch_start_time = time.time()\n",
"\n",
"\n",
"def get_data(trade_date):\n",
" time.sleep(0.1)\n",
" money_flow_data = pro.moneyflow(trade_date=trade_date)\n",
" if money_flow_data is not None and not money_flow_data.empty:\n",
" return money_flow_data\n",
"\n",
"\n",
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
"\n",
" for future in as_completed(future_to_date):\n",
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
" try:\n",
" result = future.result() # 获取任务执行的结果\n",
" all_daily_data.append(result)\n",
" print(f\"任务 {trade_date} 完成\")\n",
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250417 完成\n",
"任务 20250418 完成\n",
"任务 20250416 完成\n",
"任务 20250415 完成\n",
"任务 20250411 完成\n",
"任务 20250414 完成\n",
"任务 20250410 完成\n",
"任务 20250409 完成\n"
]
}
],
"execution_count": 3
},
{
"cell_type": "code",
"id": "9af80516849d4e80",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:17.214168Z",
"start_time": "2025-04-09T14:58:17.210734Z"
}
},
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n"
],
"outputs": [],
"execution_count": 4
},
{
"cell_type": "code",
"id": "a2b05187-437f-4053-bc43-bd80d4cf8b0e",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:19.633456Z",
"start_time": "2025-04-09T14:58:17.229837Z"
}
},
"source": [
"\n",
"# 将所有数据合并为一个 DataFrame\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='money_flow', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"所有每日基础数据获取并保存完毕!\n"
]
}
],
"execution_count": 5
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,238 @@
{
"cells": [
{
"cell_type": "code",
"id": "500802dc-7a20-48b7-a470-a4bae3ec534b",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:57:41.532210Z",
"start_time": "2025-04-09T14:57:40.584930Z"
}
},
"source": [
"import tushare as ts\n",
"\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
],
"outputs": [],
"execution_count": 1
},
{
"cell_type": "code",
"id": "5a84bc9da6d54868",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:04.911924Z",
"start_time": "2025-04-09T14:57:41.540345Z"
}
},
"source": [
"import pandas as pd\n",
"import time\n",
"\n",
"h5_filename = '../../../data/stk_limit.h5'\n",
"key = '/stk_limit'\n",
"max_date = None\n",
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date']]\n",
" print(df.sort_values(by='trade_date', ascending=True).tail())\n",
" print(df.info())\n",
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(start_date)"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date\n",
"4721 600284.SH 20250408\n",
"4722 600285.SH 20250408\n",
"4723 600287.SH 20250408\n",
"4712 600272.SH 20250408\n",
"5 000008.SZ 20250408\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 10315620 entries, 0 to 14151\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 236.1+ MB\n",
"None\n",
"20250408\n",
"20250409\n"
]
}
],
"execution_count": 2
},
{
"cell_type": "code",
"id": "bb3191de-27a2-4c89-a3b5-32a0d7b9496f",
"metadata": {
"scrolled": true,
"ExecuteTime": {
"end_time": "2025-04-09T14:58:09.342522Z",
"start_time": "2025-04-09T14:58:05.259974Z"
}
},
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
"all_daily_data = []\n",
"\n",
"# API 调用计数和时间控制变量\n",
"api_call_count = 0\n",
"batch_start_time = time.time()\n",
"\n",
"\n",
"def get_data(trade_date):\n",
" time.sleep(0.1)\n",
" stk_limit_data = pro.stk_limit(trade_date=trade_date)\n",
" if stk_limit_data is not None and not stk_limit_data.empty:\n",
" return stk_limit_data\n",
"\n",
"\n",
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
"\n",
" for future in as_completed(future_to_date):\n",
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
" try:\n",
" result = future.result() # 获取任务执行的结果\n",
" if result is not None:\n",
" all_daily_data.append(result)\n",
" print(f\"任务 {trade_date} 完成\")\n",
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20250417 完成\n",
"任务 20250418 完成\n",
"任务 20250416 完成\n",
"任务 20250415 完成\n",
"任务 20250414 完成\n",
"任务 20250410 完成\n",
"任务 20250409 完成\n",
"任务 20250411 完成\n"
]
}
],
"execution_count": 3
},
{
"cell_type": "code",
"id": "96a81aa5890ea3c3",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:09.353560Z",
"start_time": "2025-04-09T14:58:09.346528Z"
}
},
"source": [
"print(all_daily_data)\n",
"# 将所有数据合并为一个 DataFrame\n",
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ trade_date ts_code up_limit down_limit\n",
"0 20250409 000001.SZ 11.90 9.74\n",
"1 20250409 000002.SZ 7.48 6.12\n",
"2 20250409 000004.SZ 9.53 7.79\n",
"3 20250409 000006.SZ 6.28 5.14\n",
"4 20250409 000007.SZ 5.91 4.83\n",
"... ... ... ... ...\n",
"7077 20250409 920108.BJ 26.55 14.31\n",
"7078 20250409 920111.BJ 30.84 16.62\n",
"7079 20250409 920116.BJ 100.29 54.01\n",
"7080 20250409 920118.BJ 31.62 17.04\n",
"7081 20250409 920128.BJ 35.26 19.00\n",
"\n",
"[7082 rows x 4 columns]]\n"
]
}
],
"execution_count": 4
},
{
"cell_type": "code",
"id": "ad9733a1-2f42-43ee-a98c-0bf699304c21",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:09.674078Z",
"start_time": "2025-04-09T14:58:09.366441Z"
}
},
"source": [
"\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='stk_limit', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"所有每日基础数据获取并保存完毕!\n"
]
}
],
"execution_count": 5
},
{
"cell_type": "code",
"id": "7e777f1f-4d54-4a74-b916-691ede6af055",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:09.689422Z",
"start_time": "2025-04-09T14:58:09.686524Z"
}
},
"source": [],
"outputs": [],
"execution_count": null
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}