2025-02-12 00:21:33 +08:00
|
|
|
|
{
|
|
|
|
|
|
"cells": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
"id": "18d1d622-b083-4cc4-a6f8-7c1ed2d0edd2",
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-04-09 22:57:01 +08:00
|
|
|
|
"end_time": "2025-04-08T13:37:08.050676Z",
|
|
|
|
|
|
"start_time": "2025-04-08T13:37:07.328483Z"
|
2025-02-12 00:21:33 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
"import tushare as ts\n",
|
|
|
|
|
|
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
|
|
|
|
|
|
"pro = ts.pro_api()"
|
2025-04-09 22:57:01 +08:00
|
|
|
|
],
|
|
|
|
|
|
"outputs": [],
|
|
|
|
|
|
"execution_count": 1
|
2025-02-12 00:21:33 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
2025-02-15 23:33:34 +08:00
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
"id": "14671a7f72de2564",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-04-09 22:57:01 +08:00
|
|
|
|
"end_time": "2025-04-08T13:37:10.251715Z",
|
|
|
|
|
|
"start_time": "2025-04-08T13:37:08.055681Z"
|
2025-02-12 00:21:33 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
"from datetime import datetime\n",
|
|
|
|
|
|
"import pandas as pd\n",
|
2025-03-31 23:08:03 +08:00
|
|
|
|
"import warnings\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"warnings.filterwarnings(\"ignore\")\n",
|
|
|
|
|
|
"def filter_rows(df):\n",
|
|
|
|
|
|
" # 按照 name 和 start_date 分组\n",
|
|
|
|
|
|
" def select_row(group):\n",
|
|
|
|
|
|
" # 如果有 end_date 不为 NaT 的行,优先保留这些行\n",
|
|
|
|
|
|
" valid_rows = group[group['end_date'].notna()]\n",
|
|
|
|
|
|
" if not valid_rows.empty:\n",
|
|
|
|
|
|
" return valid_rows.iloc[0] # 返回第一个有效行\n",
|
|
|
|
|
|
" else:\n",
|
|
|
|
|
|
" return group.iloc[0] # 如果没有有效行,返回第一行\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" filtered_df = df.groupby(['name', 'start_date'], group_keys=False).apply(select_row)\n",
|
|
|
|
|
|
" filtered_df = filtered_df.reset_index(drop=True)\n",
|
|
|
|
|
|
" return filtered_df\n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"\n",
|
|
|
|
|
|
"def is_st(name_change_dict, stock_code, target_date):\n",
|
|
|
|
|
|
" target_date = datetime.strptime(target_date, '%Y%m%d')\n",
|
|
|
|
|
|
" if stock_code not in name_change_dict.keys():\n",
|
|
|
|
|
|
" return False\n",
|
|
|
|
|
|
" df = name_change_dict[stock_code]\n",
|
|
|
|
|
|
" for i in range(len(df)):\n",
|
|
|
|
|
|
" sds = df.iloc[i, 2]\n",
|
|
|
|
|
|
" eds = df.iloc[i, 3]\n",
|
|
|
|
|
|
" if eds is None or eds is pd.NaT:\n",
|
|
|
|
|
|
" eds = datetime.now()\n",
|
|
|
|
|
|
" if (target_date - sds).days >= 0 and (target_date - eds).days <= 0:\n",
|
|
|
|
|
|
" return True\n",
|
|
|
|
|
|
" return False\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"name_change_df = pd.read_hdf('../../../data/name_change.h5', key='name_change')\n",
|
|
|
|
|
|
"name_change_df = name_change_df.drop_duplicates(keep='first')\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# 确保 name_change_df 的日期格式正确\n",
|
|
|
|
|
|
"name_change_df['start_date'] = pd.to_datetime(name_change_df['start_date'], format='%Y%m%d')\n",
|
|
|
|
|
|
"name_change_df['end_date'] = pd.to_datetime(name_change_df['end_date'], format='%Y%m%d', errors='coerce')\n",
|
|
|
|
|
|
"name_change_df = name_change_df[name_change_df.name.str.contains('ST')]\n",
|
|
|
|
|
|
"name_change_dict = {}\n",
|
|
|
|
|
|
"for ts_code, group in name_change_df.groupby('ts_code'):\n",
|
|
|
|
|
|
" # 只保留 'ST' 和 '*ST' 的记录\n",
|
|
|
|
|
|
" st_data = group[(group['change_reason'] == 'ST') | (group['change_reason'] == '*ST')]\n",
|
|
|
|
|
|
" if not st_data.empty:\n",
|
2025-03-31 23:08:03 +08:00
|
|
|
|
" name_change_dict[ts_code] = filter_rows(st_data)"
|
2025-04-09 22:57:01 +08:00
|
|
|
|
],
|
|
|
|
|
|
"outputs": [],
|
|
|
|
|
|
"execution_count": 2
|
2025-02-12 00:21:33 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
2025-02-15 23:33:34 +08:00
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
"id": "e7f8cce2f80e2f20",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-04-09 22:57:01 +08:00
|
|
|
|
"end_time": "2025-04-08T13:37:37.727419Z",
|
|
|
|
|
|
"start_time": "2025-04-08T13:37:10.461897Z"
|
2025-02-12 00:21:33 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
"import time\n",
|
|
|
|
|
|
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"h5_filename = '../../../data/daily_basic.h5'\n",
|
|
|
|
|
|
"key = '/daily_basic'\n",
|
|
|
|
|
|
"max_date = None\n",
|
|
|
|
|
|
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
|
|
|
|
|
|
" df = store[key][['ts_code', 'trade_date']]\n",
|
|
|
|
|
|
" print(df.info())\n",
|
|
|
|
|
|
" max_date = df['trade_date'].max()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"print(max_date)\n",
|
2025-03-31 23:08:03 +08:00
|
|
|
|
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250420')\n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
|
|
|
|
|
|
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
|
|
|
|
|
|
"start_date = min(trade_dates)\n",
|
|
|
|
|
|
"print(start_date)"
|
2025-04-09 22:57:01 +08:00
|
|
|
|
],
|
2025-02-15 23:33:34 +08:00
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
2025-04-09 22:57:01 +08:00
|
|
|
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
|
|
|
|
|
"Index: 8507519 entries, 0 to 5390\n",
|
|
|
|
|
|
"Data columns (total 2 columns):\n",
|
|
|
|
|
|
" # Column Dtype \n",
|
|
|
|
|
|
"--- ------ ----- \n",
|
|
|
|
|
|
" 0 ts_code object\n",
|
|
|
|
|
|
" 1 trade_date object\n",
|
|
|
|
|
|
"dtypes: object(2)\n",
|
|
|
|
|
|
"memory usage: 194.7+ MB\n",
|
|
|
|
|
|
"None\n",
|
|
|
|
|
|
"20250407\n",
|
|
|
|
|
|
"20250408\n"
|
2025-02-15 23:33:34 +08:00
|
|
|
|
]
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
2025-04-09 22:57:01 +08:00
|
|
|
|
"execution_count": 3
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
"id": "553cfb36-f560-4cc4-b2bc-68323ccc5072",
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
"scrolled": true,
|
|
|
|
|
|
"ExecuteTime": {
|
|
|
|
|
|
"end_time": "2025-04-08T13:37:39.056144Z",
|
|
|
|
|
|
"start_time": "2025-04-08T13:37:37.770718Z"
|
|
|
|
|
|
}
|
|
|
|
|
|
},
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"source": [
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# 使用 HDFStore 存储数据\n",
|
|
|
|
|
|
"all_daily_data = []\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# API 调用计数和时间控制变量\n",
|
|
|
|
|
|
"api_call_count = 0\n",
|
|
|
|
|
|
"batch_start_time = time.time()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"def get_data(trade_date):\n",
|
|
|
|
|
|
" daily_basic_data = pro.daily_basic(ts_code='', trade_date=trade_date)\n",
|
|
|
|
|
|
" if daily_basic_data is not None and not daily_basic_data.empty:\n",
|
|
|
|
|
|
" # 添加交易日期列标识\n",
|
|
|
|
|
|
" daily_basic_data['trade_date'] = trade_date\n",
|
|
|
|
|
|
" daily_basic_data['is_st'] = daily_basic_data.apply(\n",
|
|
|
|
|
|
" lambda row: is_st(name_change_dict, row['ts_code'], row['trade_date']), axis=1\n",
|
|
|
|
|
|
" )\n",
|
|
|
|
|
|
" time.sleep(0.2)\n",
|
|
|
|
|
|
" # print(f\"成功获取并保存 {trade_date} 的每日基础数据\")\n",
|
|
|
|
|
|
" return daily_basic_data\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# 遍历每个交易日期并获取数据\n",
|
|
|
|
|
|
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
|
|
|
|
|
|
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" for future in as_completed(future_to_date):\n",
|
|
|
|
|
|
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
|
|
|
|
|
|
" try:\n",
|
|
|
|
|
|
" result = future.result() # 获取任务执行的结果\n",
|
|
|
|
|
|
" all_daily_data.append(result)\n",
|
|
|
|
|
|
" print(f\"任务 {trade_date} 完成\")\n",
|
|
|
|
|
|
" except Exception as e:\n",
|
|
|
|
|
|
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
|
|
|
|
|
|
" # 计数一次 API 调用\n",
|
|
|
|
|
|
" api_call_count += 1\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 每调用 300 次,检查时间是否少于 1 分钟,如果少于则等待剩余时间\n",
|
|
|
|
|
|
" if api_call_count % 150 == 0:\n",
|
|
|
|
|
|
" elapsed = time.time() - batch_start_time\n",
|
|
|
|
|
|
" if elapsed < 60:\n",
|
|
|
|
|
|
" sleep_time = 60 - elapsed\n",
|
|
|
|
|
|
" print(f\"已调用 150 次 API,等待 {sleep_time:.2f} 秒以满足速率限制...\")\n",
|
|
|
|
|
|
" time.sleep(sleep_time)\n",
|
|
|
|
|
|
" # 重置批次起始时间\n",
|
|
|
|
|
|
" batch_start_time = time.time()\n",
|
|
|
|
|
|
"\n"
|
2025-04-09 22:57:01 +08:00
|
|
|
|
],
|
|
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
|
|
|
|
|
"任务 20250417 完成\n",
|
|
|
|
|
|
"任务 20250418 完成\n",
|
|
|
|
|
|
"任务 20250416 完成\n",
|
|
|
|
|
|
"任务 20250415 完成\n",
|
|
|
|
|
|
"任务 20250411 完成\n",
|
|
|
|
|
|
"任务 20250414 完成\n",
|
|
|
|
|
|
"任务 20250410 完成\n",
|
|
|
|
|
|
"任务 20250409 完成\n",
|
|
|
|
|
|
"任务 20250408 完成\n"
|
|
|
|
|
|
]
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
|
|
|
|
|
"execution_count": 4
|
2025-02-12 00:21:33 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
2025-02-15 23:33:34 +08:00
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
"id": "919023c693d7a47a",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-04-09 22:57:01 +08:00
|
|
|
|
"end_time": "2025-04-08T13:37:39.072117Z",
|
|
|
|
|
|
"start_time": "2025-04-08T13:37:39.062189Z"
|
2025-02-12 00:21:33 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
2025-04-09 22:57:01 +08:00
|
|
|
|
"source": [
|
|
|
|
|
|
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
|
|
|
|
|
|
"print(all_daily_data_df)"
|
|
|
|
|
|
],
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
2025-04-08 20:32:51 +08:00
|
|
|
|
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
|
2025-04-09 22:57:01 +08:00
|
|
|
|
"0 300504.SZ 20250408 12.65 2.5494 4.8465 \n",
|
|
|
|
|
|
"1 002223.SZ 20250408 34.24 0.9832 1.6194 \n",
|
|
|
|
|
|
"2 002036.SZ 20250408 9.13 7.4710 8.1827 \n",
|
|
|
|
|
|
"3 688207.SH 20250408 12.29 4.6144 4.6144 \n",
|
|
|
|
|
|
"4 002401.SZ 20250408 13.88 4.9037 9.6159 \n",
|
2025-04-08 20:32:51 +08:00
|
|
|
|
"... ... ... ... ... ... \n",
|
2025-04-09 22:57:01 +08:00
|
|
|
|
"5387 600610.SH 20250408 7.56 18.8004 29.6937 \n",
|
|
|
|
|
|
"5388 002215.SZ 20250408 8.84 5.7658 6.7838 \n",
|
|
|
|
|
|
"5389 600694.SH 20250408 25.00 3.3101 5.4481 \n",
|
|
|
|
|
|
"5390 600121.SH 20250408 3.66 3.0305 6.3012 \n",
|
|
|
|
|
|
"5391 873167.BJ 20250408 21.56 7.8805 14.2434 \n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"\n",
|
2025-04-09 22:57:01 +08:00
|
|
|
|
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
|
|
|
|
|
|
"0 1.56 34.0479 220.6414 1.5349 1.3422 1.7126 1.5892 \n",
|
|
|
|
|
|
"1 1.07 14.3268 19.7636 2.8291 4.3058 4.6786 3.5030 \n",
|
|
|
|
|
|
"2 2.45 NaN NaN 3.6899 0.9822 0.9210 0.0000 \n",
|
|
|
|
|
|
"3 1.61 NaN NaN 1.5605 12.1348 26.4230 NaN \n",
|
|
|
|
|
|
"4 1.44 40.4258 40.4258 3.0931 2.8715 2.8715 1.2977 \n",
|
|
|
|
|
|
"... ... ... ... ... ... ... ... \n",
|
|
|
|
|
|
"5387 1.18 NaN NaN 122.1550 7.3648 7.3648 0.0000 \n",
|
|
|
|
|
|
"5388 2.49 37.7118 20.0533 2.2997 2.1570 1.7934 1.7092 \n",
|
|
|
|
|
|
"5389 3.51 15.4938 13.3524 0.9057 1.0676 1.1271 3.6364 \n",
|
|
|
|
|
|
"5390 1.13 15.7764 15.7764 2.3738 1.0605 1.0605 0.0000 \n",
|
|
|
|
|
|
"5391 0.79 33.5290 65.6770 3.2183 7.0572 9.9201 NaN \n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"\n",
|
2025-04-09 22:57:01 +08:00
|
|
|
|
" dv_ttm total_share float_share free_share total_mv \\\n",
|
|
|
|
|
|
"0 1.5892 27102.4580 21826.2631 11481.0786 3.428461e+05 \n",
|
|
|
|
|
|
"1 3.5030 100247.6929 93867.3649 56990.4202 3.432481e+06 \n",
|
|
|
|
|
|
"2 NaN 105938.4915 105290.9483 96132.5171 9.672184e+05 \n",
|
|
|
|
|
|
"3 NaN 25897.3147 18867.6306 18867.6306 3.182780e+05 \n",
|
|
|
|
|
|
"4 1.2977 37166.8440 37136.3940 18937.9540 5.158758e+05 \n",
|
|
|
|
|
|
"... ... ... ... ... ... \n",
|
|
|
|
|
|
"5387 NaN 107127.4605 70872.6705 44872.6705 8.098836e+05 \n",
|
|
|
|
|
|
"5388 1.7092 100519.1310 79400.9515 67486.1454 8.885891e+05 \n",
|
|
|
|
|
|
"5389 3.6364 31305.2571 31305.2571 19020.4513 7.826314e+05 \n",
|
|
|
|
|
|
"5390 NaN 121841.2038 121841.2038 58597.2758 4.459388e+05 \n",
|
|
|
|
|
|
"5391 NaN 7086.1250 4178.1867 2311.6822 1.527769e+05 \n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"\n",
|
2025-04-09 22:57:01 +08:00
|
|
|
|
" circ_mv is_st \n",
|
|
|
|
|
|
"0 2.761022e+05 False \n",
|
|
|
|
|
|
"1 3.214019e+06 False \n",
|
|
|
|
|
|
"2 9.613064e+05 False \n",
|
|
|
|
|
|
"3 2.318832e+05 False \n",
|
|
|
|
|
|
"4 5.154531e+05 False \n",
|
|
|
|
|
|
"... ... ... \n",
|
|
|
|
|
|
"5387 5.357974e+05 False \n",
|
|
|
|
|
|
"5388 7.019044e+05 False \n",
|
|
|
|
|
|
"5389 7.826314e+05 False \n",
|
|
|
|
|
|
"5390 4.459388e+05 False \n",
|
|
|
|
|
|
"5391 9.008171e+04 False \n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"\n",
|
2025-04-09 22:57:01 +08:00
|
|
|
|
"[5392 rows x 19 columns]\n"
|
2025-02-12 00:21:33 +08:00
|
|
|
|
]
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
2025-04-09 22:57:01 +08:00
|
|
|
|
"execution_count": 5
|
2025-02-12 00:21:33 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
2025-02-15 23:33:34 +08:00
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
"id": "28cb78d032671b20",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-04-09 22:57:01 +08:00
|
|
|
|
"end_time": "2025-04-08T13:37:39.103515Z",
|
|
|
|
|
|
"start_time": "2025-04-08T13:37:39.093908Z"
|
2025-02-12 00:21:33 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
2025-04-09 22:57:01 +08:00
|
|
|
|
"source": [
|
|
|
|
|
|
"print(all_daily_data_df[all_daily_data_df['is_st']])"
|
|
|
|
|
|
],
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
2025-04-08 20:32:51 +08:00
|
|
|
|
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
|
2025-04-09 22:57:01 +08:00
|
|
|
|
"20 000488.SZ 20250408 1.74 2.5808 3.5449 \n",
|
|
|
|
|
|
"21 603608.SH 20250408 4.20 0.2313 0.3624 \n",
|
|
|
|
|
|
"88 603363.SH 20250408 3.35 1.2763 1.4156 \n",
|
|
|
|
|
|
"124 000989.SZ 20250408 7.60 2.5216 3.5863 \n",
|
|
|
|
|
|
"136 300965.SZ 20250408 36.20 1.9389 2.6640 \n",
|
2025-04-08 20:32:51 +08:00
|
|
|
|
"... ... ... ... ... ... \n",
|
2025-04-09 22:57:01 +08:00
|
|
|
|
"5261 603879.SH 20250408 4.13 4.3647 6.8212 \n",
|
|
|
|
|
|
"5273 002024.SZ 20250408 1.76 0.5005 1.3623 \n",
|
|
|
|
|
|
"5298 603828.SH 20250408 4.43 1.3711 2.7554 \n",
|
|
|
|
|
|
"5337 600234.SH 20250408 5.53 0.5518 1.0422 \n",
|
|
|
|
|
|
"5370 300536.SZ 20250408 7.99 2.2037 2.7214 \n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"\n",
|
2025-04-08 20:32:51 +08:00
|
|
|
|
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
|
2025-04-09 22:57:01 +08:00
|
|
|
|
"20 0.69 NaN NaN 0.5590 0.2252 0.2252 0.0000 \n",
|
|
|
|
|
|
"21 0.35 NaN NaN 1.5767 1.3841 1.5604 0.0000 \n",
|
|
|
|
|
|
"88 2.09 NaN NaN NaN 0.4481 0.7781 0.0000 \n",
|
|
|
|
|
|
"124 1.71 30.0883 30.0883 1.7332 2.7432 2.7432 5.2053 \n",
|
|
|
|
|
|
"136 1.27 NaN NaN 1.7736 NaN NaN 0.0829 \n",
|
2025-04-08 20:32:51 +08:00
|
|
|
|
"... ... ... ... ... ... ... ... \n",
|
2025-04-09 22:57:01 +08:00
|
|
|
|
"5261 1.67 NaN NaN 5.6207 4.0072 4.0072 0.0000 \n",
|
|
|
|
|
|
"5273 1.06 26.7044 26.7044 1.3118 0.2871 0.2871 0.0000 \n",
|
|
|
|
|
|
"5298 0.38 NaN NaN 3.5130 1.0396 1.0348 0.0000 \n",
|
|
|
|
|
|
"5337 2.28 NaN NaN 3.2963 20.7089 9.4391 0.0000 \n",
|
|
|
|
|
|
"5370 0.86 NaN NaN 4.2696 32.8078 24.2873 0.0000 \n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"\n",
|
2025-04-08 20:32:51 +08:00
|
|
|
|
" dv_ttm total_share float_share free_share total_mv \\\n",
|
2025-04-09 22:57:01 +08:00
|
|
|
|
"20 NaN 294145.6200 167582.4530 122004.3211 5.118134e+05 \n",
|
|
|
|
|
|
"21 NaN 41971.5446 41971.5446 26785.1109 1.762805e+05 \n",
|
|
|
|
|
|
"88 NaN 260296.1826 146776.2912 132325.9245 8.719922e+05 \n",
|
|
|
|
|
|
"124 5.2053 85594.2012 69415.3353 48807.3173 6.505159e+05 \n",
|
|
|
|
|
|
"136 0.0829 6000.0000 2060.9250 1500.0000 2.172000e+05 \n",
|
2025-04-08 20:32:51 +08:00
|
|
|
|
"... ... ... ... ... ... \n",
|
2025-04-09 22:57:01 +08:00
|
|
|
|
"5261 NaN 35934.4440 35934.4440 22993.7696 1.484093e+05 \n",
|
|
|
|
|
|
"5273 NaN 926476.7618 925444.1318 340007.5385 1.630599e+06 \n",
|
|
|
|
|
|
"5298 NaN 59596.0158 59593.9625 29654.2988 2.640103e+05 \n",
|
|
|
|
|
|
"5337 NaN 26252.0973 26252.0973 13899.8888 1.451741e+05 \n",
|
|
|
|
|
|
"5370 NaN 29328.8133 29325.3240 23747.3240 2.343372e+05 \n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"\n",
|
2025-04-08 20:32:51 +08:00
|
|
|
|
" circ_mv is_st \n",
|
2025-04-09 22:57:01 +08:00
|
|
|
|
"20 2.915935e+05 True \n",
|
|
|
|
|
|
"21 1.762805e+05 True \n",
|
|
|
|
|
|
"88 4.917006e+05 True \n",
|
|
|
|
|
|
"124 5.275565e+05 True \n",
|
|
|
|
|
|
"136 7.460549e+04 True \n",
|
2025-04-08 20:32:51 +08:00
|
|
|
|
"... ... ... \n",
|
2025-04-09 22:57:01 +08:00
|
|
|
|
"5261 1.484093e+05 True \n",
|
|
|
|
|
|
"5273 1.628782e+06 True \n",
|
|
|
|
|
|
"5298 2.640013e+05 True \n",
|
|
|
|
|
|
"5337 1.451741e+05 True \n",
|
|
|
|
|
|
"5370 2.343093e+05 True \n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"\n",
|
2025-04-08 20:32:51 +08:00
|
|
|
|
"[106 rows x 19 columns]\n"
|
2025-02-12 00:21:33 +08:00
|
|
|
|
]
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
2025-04-09 22:57:01 +08:00
|
|
|
|
"execution_count": 6
|
2025-02-12 00:21:33 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
2025-02-15 23:33:34 +08:00
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
"id": "692b58674b7462c9",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-04-09 22:57:01 +08:00
|
|
|
|
"end_time": "2025-04-08T13:37:39.921445Z",
|
|
|
|
|
|
"start_time": "2025-04-08T13:37:39.128232Z"
|
2025-02-12 00:21:33 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
2025-04-09 22:57:01 +08:00
|
|
|
|
"source": [
|
|
|
|
|
|
"# 将数据保存为 HDF5 文件(table 格式)\n",
|
|
|
|
|
|
"all_daily_data_df.to_hdf(h5_filename, key='daily_basic', mode='a', format='table', append=True, data_columns=True)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"print(\"所有每日基础数据获取并保存完毕!\")\n"
|
|
|
|
|
|
],
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
|
|
|
|
|
"所有每日基础数据获取并保存完毕!\n"
|
|
|
|
|
|
]
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
2025-04-09 22:57:01 +08:00
|
|
|
|
"execution_count": 7
|
2025-02-12 00:21:33 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
2025-02-15 23:33:34 +08:00
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
"id": "d7a773fc20293477",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-04-09 22:57:01 +08:00
|
|
|
|
"end_time": "2025-04-08T13:37:46.393814Z",
|
|
|
|
|
|
"start_time": "2025-04-08T13:37:39.941474Z"
|
2025-02-12 00:21:33 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
2025-04-09 22:57:01 +08:00
|
|
|
|
"source": [
|
|
|
|
|
|
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
|
|
|
|
|
|
" df = store[key][['ts_code', 'trade_date', 'is_st']]\n",
|
|
|
|
|
|
" print(df.info())"
|
|
|
|
|
|
],
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
|
|
|
|
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
2025-04-09 22:57:01 +08:00
|
|
|
|
"Index: 8512911 entries, 0 to 5391\n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"Data columns (total 3 columns):\n",
|
|
|
|
|
|
" # Column Dtype \n",
|
|
|
|
|
|
"--- ------ ----- \n",
|
|
|
|
|
|
" 0 ts_code object\n",
|
|
|
|
|
|
" 1 trade_date object\n",
|
|
|
|
|
|
" 2 is_st bool \n",
|
|
|
|
|
|
"dtypes: bool(1), object(2)\n",
|
2025-04-09 22:57:01 +08:00
|
|
|
|
"memory usage: 203.0+ MB\n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"None\n"
|
|
|
|
|
|
]
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
2025-04-09 22:57:01 +08:00
|
|
|
|
"execution_count": 8
|
2025-02-12 00:21:33 +08:00
|
|
|
|
}
|
|
|
|
|
|
],
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
"kernelspec": {
|
|
|
|
|
|
"display_name": "Python 3 (ipykernel)",
|
|
|
|
|
|
"language": "python",
|
|
|
|
|
|
"name": "python3"
|
|
|
|
|
|
},
|
|
|
|
|
|
"language_info": {
|
|
|
|
|
|
"codemirror_mode": {
|
|
|
|
|
|
"name": "ipython",
|
|
|
|
|
|
"version": 3
|
|
|
|
|
|
},
|
|
|
|
|
|
"file_extension": ".py",
|
|
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
|
|
"name": "python",
|
|
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
|
|
"pygments_lexer": "ipython3",
|
2025-03-31 23:08:03 +08:00
|
|
|
|
"version": "3.11.11"
|
2025-02-12 00:21:33 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
|
|
|
|
|
"nbformat": 4,
|
|
|
|
|
|
"nbformat_minor": 5
|
|
|
|
|
|
}
|