2025-02-12 00:21:33 +08:00
|
|
|
|
{
|
|
|
|
|
|
"cells": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
2025-05-06 23:42:40 +08:00
|
|
|
|
"execution_count": 1,
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"id": "18d1d622-b083-4cc4-a6f8-7c1ed2d0edd2",
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-04-10 23:17:22 +08:00
|
|
|
|
"end_time": "2025-04-09T14:57:36.913044Z",
|
|
|
|
|
|
"start_time": "2025-04-09T14:57:36.159612Z"
|
2025-02-12 00:21:33 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
2025-05-06 23:42:40 +08:00
|
|
|
|
"outputs": [],
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"source": [
|
|
|
|
|
|
"import tushare as ts\n",
|
|
|
|
|
|
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
|
|
|
|
|
|
"pro = ts.pro_api()"
|
2025-05-06 23:42:40 +08:00
|
|
|
|
]
|
2025-02-12 00:21:33 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
2025-02-15 23:33:34 +08:00
|
|
|
|
"cell_type": "code",
|
2025-05-13 15:30:06 +08:00
|
|
|
|
"execution_count": 2,
|
2025-02-15 23:33:34 +08:00
|
|
|
|
"id": "14671a7f72de2564",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-04-10 23:17:22 +08:00
|
|
|
|
"end_time": "2025-04-09T14:57:39.128278Z",
|
|
|
|
|
|
"start_time": "2025-04-09T14:57:36.918051Z"
|
2025-02-12 00:21:33 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
2025-05-06 23:42:40 +08:00
|
|
|
|
"outputs": [],
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"source": [
|
|
|
|
|
|
"from datetime import datetime\n",
|
|
|
|
|
|
"import pandas as pd\n",
|
2025-03-31 23:08:03 +08:00
|
|
|
|
"import warnings\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"warnings.filterwarnings(\"ignore\")\n",
|
|
|
|
|
|
"def filter_rows(df):\n",
|
|
|
|
|
|
" # 按照 name 和 start_date 分组\n",
|
|
|
|
|
|
" def select_row(group):\n",
|
|
|
|
|
|
" # 如果有 end_date 不为 NaT 的行,优先保留这些行\n",
|
|
|
|
|
|
" valid_rows = group[group['end_date'].notna()]\n",
|
|
|
|
|
|
" if not valid_rows.empty:\n",
|
|
|
|
|
|
" return valid_rows.iloc[0] # 返回第一个有效行\n",
|
|
|
|
|
|
" else:\n",
|
|
|
|
|
|
" return group.iloc[0] # 如果没有有效行,返回第一行\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" filtered_df = df.groupby(['name', 'start_date'], group_keys=False).apply(select_row)\n",
|
|
|
|
|
|
" filtered_df = filtered_df.reset_index(drop=True)\n",
|
|
|
|
|
|
" return filtered_df\n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"\n",
|
|
|
|
|
|
"def is_st(name_change_dict, stock_code, target_date):\n",
|
|
|
|
|
|
" target_date = datetime.strptime(target_date, '%Y%m%d')\n",
|
|
|
|
|
|
" if stock_code not in name_change_dict.keys():\n",
|
|
|
|
|
|
" return False\n",
|
|
|
|
|
|
" df = name_change_dict[stock_code]\n",
|
|
|
|
|
|
" for i in range(len(df)):\n",
|
|
|
|
|
|
" sds = df.iloc[i, 2]\n",
|
|
|
|
|
|
" eds = df.iloc[i, 3]\n",
|
|
|
|
|
|
" if eds is None or eds is pd.NaT:\n",
|
|
|
|
|
|
" eds = datetime.now()\n",
|
|
|
|
|
|
" if (target_date - sds).days >= 0 and (target_date - eds).days <= 0:\n",
|
|
|
|
|
|
" return True\n",
|
|
|
|
|
|
" return False\n",
|
|
|
|
|
|
"\n",
|
2025-06-02 22:23:44 +08:00
|
|
|
|
"name_change_df = pd.read_hdf('/mnt/d/PyProject/NewStock/data/name_change.h5', key='name_change')\n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"name_change_df = name_change_df.drop_duplicates(keep='first')\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# 确保 name_change_df 的日期格式正确\n",
|
|
|
|
|
|
"name_change_df['start_date'] = pd.to_datetime(name_change_df['start_date'], format='%Y%m%d')\n",
|
|
|
|
|
|
"name_change_df['end_date'] = pd.to_datetime(name_change_df['end_date'], format='%Y%m%d', errors='coerce')\n",
|
2025-05-08 15:42:17 +08:00
|
|
|
|
"# name_change_df = name_change_df[name_change_df.name.str.contains('ST') ]\n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"name_change_dict = {}\n",
|
|
|
|
|
|
"for ts_code, group in name_change_df.groupby('ts_code'):\n",
|
|
|
|
|
|
" # 只保留 'ST' 和 '*ST' 的记录\n",
|
2025-05-06 23:42:40 +08:00
|
|
|
|
" # st_data = group[(group['change_reason'] == 'ST') | (group['change_reason'] == '*ST')]\n",
|
2025-05-08 15:42:17 +08:00
|
|
|
|
" st_data = group[(group['name'].str.contains('ST')) | (group['name'].str.contains('退'))]\n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
" if not st_data.empty:\n",
|
2025-03-31 23:08:03 +08:00
|
|
|
|
" name_change_dict[ts_code] = filter_rows(st_data)"
|
2025-05-06 23:42:40 +08:00
|
|
|
|
]
|
2025-02-12 00:21:33 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
2025-02-15 23:33:34 +08:00
|
|
|
|
"cell_type": "code",
|
2025-05-13 15:30:06 +08:00
|
|
|
|
"execution_count": 3,
|
2025-02-15 23:33:34 +08:00
|
|
|
|
"id": "e7f8cce2f80e2f20",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-04-10 23:17:22 +08:00
|
|
|
|
"end_time": "2025-04-09T14:58:09.296046Z",
|
|
|
|
|
|
"start_time": "2025-04-09T14:57:39.339423Z"
|
2025-02-12 00:21:33 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
2025-05-06 23:42:40 +08:00
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
|
|
|
|
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
2026-02-24 13:06:14 +08:00
|
|
|
|
"Index: 9629640 entries, 0 to 27329\n",
|
2025-05-06 23:42:40 +08:00
|
|
|
|
"Data columns (total 2 columns):\n",
|
|
|
|
|
|
" # Column Dtype \n",
|
|
|
|
|
|
"--- ------ ----- \n",
|
|
|
|
|
|
" 0 ts_code object\n",
|
|
|
|
|
|
" 1 trade_date object\n",
|
|
|
|
|
|
"dtypes: object(2)\n",
|
2026-02-24 13:06:14 +08:00
|
|
|
|
"memory usage: 220.4+ MB\n",
|
2025-05-06 23:42:40 +08:00
|
|
|
|
"None\n",
|
2026-02-24 13:06:14 +08:00
|
|
|
|
"20260206\n",
|
|
|
|
|
|
"20260209\n"
|
2025-05-06 23:42:40 +08:00
|
|
|
|
]
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"source": [
|
|
|
|
|
|
"import time\n",
|
|
|
|
|
|
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
|
|
|
|
|
|
"\n",
|
2025-06-02 22:23:44 +08:00
|
|
|
|
"h5_filename = '/mnt/d/PyProject/NewStock/data/daily_basic.h5'\n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"key = '/daily_basic'\n",
|
|
|
|
|
|
"max_date = None\n",
|
|
|
|
|
|
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
|
|
|
|
|
|
" df = store[key][['ts_code', 'trade_date']]\n",
|
|
|
|
|
|
" print(df.info())\n",
|
|
|
|
|
|
" max_date = df['trade_date'].max()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"print(max_date)\n",
|
2026-02-09 22:12:14 +08:00
|
|
|
|
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20260310')\n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
|
|
|
|
|
|
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
|
|
|
|
|
|
"start_date = min(trade_dates)\n",
|
|
|
|
|
|
"print(start_date)"
|
2025-05-06 23:42:40 +08:00
|
|
|
|
]
|
2025-04-09 22:57:01 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
2025-05-13 15:30:06 +08:00
|
|
|
|
"execution_count": 4,
|
2025-04-09 22:57:01 +08:00
|
|
|
|
"id": "553cfb36-f560-4cc4-b2bc-68323ccc5072",
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-04-10 23:17:22 +08:00
|
|
|
|
"end_time": "2025-04-09T14:58:16.817010Z",
|
|
|
|
|
|
"start_time": "2025-04-09T14:58:09.326485Z"
|
2025-05-06 23:42:40 +08:00
|
|
|
|
},
|
|
|
|
|
|
"scrolled": true
|
2025-04-09 22:57:01 +08:00
|
|
|
|
},
|
2025-05-06 23:42:40 +08:00
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
2026-02-09 22:12:14 +08:00
|
|
|
|
"任务 20260309 完成\n",
|
|
|
|
|
|
"任务 20260310 完成\n",
|
|
|
|
|
|
"任务 20260306 完成\n",
|
2026-02-24 13:06:14 +08:00
|
|
|
|
"任务 20260305 完成\n",
|
2026-02-09 22:12:14 +08:00
|
|
|
|
"任务 20260303 完成\n",
|
|
|
|
|
|
"任务 20260304 完成\n",
|
|
|
|
|
|
"任务 20260302 完成\n",
|
|
|
|
|
|
"任务 20260227 完成\n",
|
|
|
|
|
|
"任务 20260226 完成\n",
|
|
|
|
|
|
"任务 20260225 完成\n",
|
|
|
|
|
|
"任务 20260224 完成\n",
|
|
|
|
|
|
"任务 20260213 完成\n",
|
|
|
|
|
|
"任务 20260212 完成\n",
|
|
|
|
|
|
"任务 20260211 完成\n",
|
|
|
|
|
|
"任务 20260210 完成\n",
|
2026-02-24 13:06:14 +08:00
|
|
|
|
"任务 20260209 完成\n"
|
2025-05-06 23:42:40 +08:00
|
|
|
|
]
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"source": [
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# 使用 HDFStore 存储数据\n",
|
|
|
|
|
|
"all_daily_data = []\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# API 调用计数和时间控制变量\n",
|
|
|
|
|
|
"api_call_count = 0\n",
|
|
|
|
|
|
"batch_start_time = time.time()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"def get_data(trade_date):\n",
|
|
|
|
|
|
" daily_basic_data = pro.daily_basic(ts_code='', trade_date=trade_date)\n",
|
|
|
|
|
|
" if daily_basic_data is not None and not daily_basic_data.empty:\n",
|
|
|
|
|
|
" # 添加交易日期列标识\n",
|
|
|
|
|
|
" daily_basic_data['trade_date'] = trade_date\n",
|
|
|
|
|
|
" daily_basic_data['is_st'] = daily_basic_data.apply(\n",
|
|
|
|
|
|
" lambda row: is_st(name_change_dict, row['ts_code'], row['trade_date']), axis=1\n",
|
|
|
|
|
|
" )\n",
|
|
|
|
|
|
" time.sleep(0.2)\n",
|
|
|
|
|
|
" # print(f\"成功获取并保存 {trade_date} 的每日基础数据\")\n",
|
|
|
|
|
|
" return daily_basic_data\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# 遍历每个交易日期并获取数据\n",
|
|
|
|
|
|
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
|
|
|
|
|
|
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" for future in as_completed(future_to_date):\n",
|
|
|
|
|
|
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
|
|
|
|
|
|
" try:\n",
|
|
|
|
|
|
" result = future.result() # 获取任务执行的结果\n",
|
|
|
|
|
|
" all_daily_data.append(result)\n",
|
|
|
|
|
|
" print(f\"任务 {trade_date} 完成\")\n",
|
|
|
|
|
|
" except Exception as e:\n",
|
|
|
|
|
|
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
|
|
|
|
|
|
" # 计数一次 API 调用\n",
|
|
|
|
|
|
" api_call_count += 1\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 每调用 300 次,检查时间是否少于 1 分钟,如果少于则等待剩余时间\n",
|
|
|
|
|
|
" if api_call_count % 150 == 0:\n",
|
|
|
|
|
|
" elapsed = time.time() - batch_start_time\n",
|
|
|
|
|
|
" if elapsed < 60:\n",
|
|
|
|
|
|
" sleep_time = 60 - elapsed\n",
|
|
|
|
|
|
" print(f\"已调用 150 次 API,等待 {sleep_time:.2f} 秒以满足速率限制...\")\n",
|
|
|
|
|
|
" time.sleep(sleep_time)\n",
|
|
|
|
|
|
" # 重置批次起始时间\n",
|
|
|
|
|
|
" batch_start_time = time.time()\n",
|
|
|
|
|
|
"\n"
|
2025-05-06 23:42:40 +08:00
|
|
|
|
]
|
2025-02-12 00:21:33 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
2025-02-15 23:33:34 +08:00
|
|
|
|
"cell_type": "code",
|
2025-05-13 15:30:06 +08:00
|
|
|
|
"execution_count": 5,
|
2025-02-15 23:33:34 +08:00
|
|
|
|
"id": "919023c693d7a47a",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-04-10 23:17:22 +08:00
|
|
|
|
"end_time": "2025-04-09T14:58:16.864178Z",
|
|
|
|
|
|
"start_time": "2025-04-09T14:58:16.855084Z"
|
2025-02-12 00:21:33 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
|
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
2026-02-24 13:06:14 +08:00
|
|
|
|
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
|
|
|
|
|
|
"0 001301.SZ 20260213 78.24 1.0939 1.3757 \n",
|
|
|
|
|
|
"1 301050.SZ 20260213 53.61 2.8931 3.6211 \n",
|
|
|
|
|
|
"2 000829.SZ 20260213 10.58 2.2336 3.5271 \n",
|
|
|
|
|
|
"3 688498.SH 20260213 746.98 3.4302 4.7903 \n",
|
|
|
|
|
|
"4 920510.BJ 20260213 18.30 2.8519 4.1749 \n",
|
|
|
|
|
|
"... ... ... ... ... ... \n",
|
|
|
|
|
|
"27350 000065.SZ 20260209 12.07 1.7167 3.2552 \n",
|
|
|
|
|
|
"27351 920249.BJ 20260209 13.86 3.2235 3.3218 \n",
|
|
|
|
|
|
"27352 300824.SZ 20260209 11.36 1.2161 3.0644 \n",
|
|
|
|
|
|
"27353 000766.SZ 20260209 23.65 1.8680 2.3253 \n",
|
|
|
|
|
|
"27354 000591.SZ 20260209 5.72 5.9341 9.0727 \n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"\n",
|
2026-02-24 13:06:14 +08:00
|
|
|
|
" volume_ratio pe pe_ttm pb ps ps_ttm \\\n",
|
|
|
|
|
|
"0 0.51 24.3404 20.9997 3.0175 3.9021 2.8680 \n",
|
|
|
|
|
|
"1 0.80 38.6753 105.5089 4.2033 11.2338 18.9830 \n",
|
|
|
|
|
|
"2 0.65 347.7936 NaN 3.7189 0.1291 0.1352 \n",
|
|
|
|
|
|
"3 0.66 NaN 640.0343 29.5658 254.5923 140.4090 \n",
|
|
|
|
|
|
"4 1.41 160.0964 NaN 8.4111 14.5063 16.3558 \n",
|
|
|
|
|
|
"... ... ... ... ... ... ... \n",
|
|
|
|
|
|
"27350 0.73 13.3573 18.0503 1.2589 0.7347 0.9425 \n",
|
|
|
|
|
|
"27351 0.85 NaN NaN 8.0159 2.9840 2.5496 \n",
|
|
|
|
|
|
"27352 0.87 53.3349 33.5338 4.9536 4.9192 3.9029 \n",
|
|
|
|
|
|
"27353 0.75 405.0086 397.6822 9.9329 17.2829 18.3415 \n",
|
|
|
|
|
|
"27354 1.27 18.3189 21.1797 0.9314 3.7171 4.1825 \n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"\n",
|
2026-02-24 13:06:14 +08:00
|
|
|
|
" dv_ratio dv_ttm total_share float_share free_share total_mv \\\n",
|
|
|
|
|
|
"0 1.0183 1.0183 26080.2350 18605.2851 14794.9501 2.040518e+06 \n",
|
|
|
|
|
|
"1 0.6398 0.6398 24721.2171 21036.5666 16807.4482 1.325304e+06 \n",
|
|
|
|
|
|
"2 0.0945 0.0945 102510.0438 102412.9669 64854.1551 1.084556e+06 \n",
|
|
|
|
|
|
"3 0.0665 0.0533 8594.7726 8400.0000 6014.9271 6.420123e+06 \n",
|
|
|
|
|
|
"4 NaN NaN 18421.3929 9389.9078 6414.2078 3.371115e+05 \n",
|
|
|
|
|
|
"... ... ... ... ... ... ... \n",
|
|
|
|
|
|
"27350 1.1303 1.1303 116144.2159 97496.2288 51416.3105 1.401861e+06 \n",
|
|
|
|
|
|
"27351 NaN NaN 42163.0000 12466.9576 12097.9576 5.843792e+05 \n",
|
|
|
|
|
|
"27352 1.7937 1.7937 32634.1682 31649.5307 12560.3926 3.707242e+05 \n",
|
|
|
|
|
|
"27353 NaN NaN 96649.4707 96600.7681 77600.7681 2.285760e+06 \n",
|
|
|
|
|
|
"27354 1.9711 2.0779 392444.2493 392354.1671 256620.6136 2.244781e+06 \n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"\n",
|
2026-01-27 00:52:35 +08:00
|
|
|
|
" circ_mv is_st \n",
|
2026-02-24 13:06:14 +08:00
|
|
|
|
"0 1.455678e+06 False \n",
|
|
|
|
|
|
"1 1.127770e+06 False \n",
|
|
|
|
|
|
"2 1.083529e+06 False \n",
|
|
|
|
|
|
"3 6.274632e+06 False \n",
|
|
|
|
|
|
"4 1.718353e+05 False \n",
|
2026-01-27 00:52:35 +08:00
|
|
|
|
"... ... ... \n",
|
2026-02-24 13:06:14 +08:00
|
|
|
|
"27350 1.176779e+06 False \n",
|
|
|
|
|
|
"27351 1.727920e+05 False \n",
|
|
|
|
|
|
"27352 3.595387e+05 False \n",
|
|
|
|
|
|
"27353 2.284608e+06 False \n",
|
|
|
|
|
|
"27354 2.244266e+06 False \n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"\n",
|
2026-02-24 13:06:14 +08:00
|
|
|
|
"[27355 rows x 19 columns]\n"
|
2025-02-12 00:21:33 +08:00
|
|
|
|
]
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
2025-05-06 23:42:40 +08:00
|
|
|
|
"source": [
|
|
|
|
|
|
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
|
|
|
|
|
|
"print(all_daily_data_df)"
|
|
|
|
|
|
]
|
2025-02-12 00:21:33 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
2025-02-15 23:33:34 +08:00
|
|
|
|
"cell_type": "code",
|
2025-05-13 15:30:06 +08:00
|
|
|
|
"execution_count": 6,
|
2025-02-15 23:33:34 +08:00
|
|
|
|
"id": "28cb78d032671b20",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-04-10 23:17:22 +08:00
|
|
|
|
"end_time": "2025-04-09T14:58:16.881685Z",
|
|
|
|
|
|
"start_time": "2025-04-09T14:58:16.871184Z"
|
2025-02-12 00:21:33 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
|
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
2026-01-27 00:52:35 +08:00
|
|
|
|
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
|
2026-02-24 13:06:14 +08:00
|
|
|
|
"5 603261.SH 20260213 28.95 0.4664 1.3719 \n",
|
|
|
|
|
|
"14 002700.SZ 20260213 7.28 1.5973 1.6181 \n",
|
|
|
|
|
|
"34 300344.SZ 20260213 1.87 14.3407 25.9753 \n",
|
|
|
|
|
|
"67 000430.SZ 20260213 7.26 0.8541 1.3705 \n",
|
|
|
|
|
|
"81 000752.SZ 20260213 10.86 1.0429 1.2849 \n",
|
2026-01-27 00:52:35 +08:00
|
|
|
|
"... ... ... ... ... ... \n",
|
2026-02-24 13:06:14 +08:00
|
|
|
|
"27165 300301.SZ 20260209 2.49 1.9012 2.0554 \n",
|
|
|
|
|
|
"27244 002822.SZ 20260209 3.53 1.0762 1.0762 \n",
|
|
|
|
|
|
"27270 300147.SZ 20260209 9.44 1.7053 2.2069 \n",
|
|
|
|
|
|
"27281 002501.SZ 20260209 2.16 3.6044 4.6543 \n",
|
|
|
|
|
|
"27297 002620.SZ 20260209 5.98 2.2406 3.1755 \n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
|
|
|
|
|
|
"5 0.49 NaN NaN 3.3744 7.7410 7.8754 NaN \n",
|
|
|
|
|
|
"14 0.77 32.3153 40.4077 2.2014 4.2912 4.3540 0.9066 \n",
|
|
|
|
|
|
"34 0.60 NaN NaN 5.3118 4.6416 4.9881 NaN \n",
|
|
|
|
|
|
"67 0.49 NaN NaN 21.9522 13.6188 12.8340 NaN \n",
|
|
|
|
|
|
"81 1.21 109.3444 19.4711 4.6597 6.7963 6.4062 NaN \n",
|
|
|
|
|
|
"... ... ... ... ... ... ... ... \n",
|
|
|
|
|
|
"27165 0.97 NaN NaN 12.5588 4.0907 4.7109 NaN \n",
|
|
|
|
|
|
"27244 0.95 NaN NaN 4.9828 2.9693 6.5620 NaN \n",
|
|
|
|
|
|
"27270 1.21 NaN NaN 9.0126 3.3586 4.2436 NaN \n",
|
|
|
|
|
|
"27281 0.87 NaN NaN 29.0833 22.9884 26.1834 NaN \n",
|
|
|
|
|
|
"27297 1.30 NaN NaN NaN 2.8840 4.2972 NaN \n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"\n",
|
2026-02-24 13:06:14 +08:00
|
|
|
|
" dv_ttm total_share float_share free_share total_mv \\\n",
|
|
|
|
|
|
"5 NaN 7750.5022 7750.5022 2634.9858 224377.0387 \n",
|
|
|
|
|
|
"14 0.9066 41362.8185 26346.4874 26007.0037 301121.3187 \n",
|
|
|
|
|
|
"34 NaN 64170.6416 63999.4166 35333.5001 119999.0998 \n",
|
|
|
|
|
|
"67 NaN 80963.5372 37055.6486 23092.8156 587795.2801 \n",
|
|
|
|
|
|
"81 NaN 26375.8491 26375.8491 21407.3042 286441.7212 \n",
|
|
|
|
|
|
"... ... ... ... ... ... \n",
|
|
|
|
|
|
"27165 NaN 82986.8769 78987.6719 73061.8561 206637.3235 \n",
|
|
|
|
|
|
"27244 NaN 195094.2200 107059.0368 107059.0368 688682.5966 \n",
|
|
|
|
|
|
"27270 NaN 66127.9045 65739.8353 50798.8432 624247.4185 \n",
|
|
|
|
|
|
"27281 NaN 355000.0000 354646.9206 274646.9206 766800.0000 \n",
|
|
|
|
|
|
"27297 NaN 37749.4000 31536.0303 22251.3747 225741.4120 \n",
|
2025-06-02 22:23:44 +08:00
|
|
|
|
"\n",
|
2026-02-24 13:06:14 +08:00
|
|
|
|
" circ_mv is_st \n",
|
|
|
|
|
|
"5 224377.0387 True \n",
|
|
|
|
|
|
"14 191802.4283 True \n",
|
|
|
|
|
|
"34 119678.9090 True \n",
|
|
|
|
|
|
"67 269024.0088 True \n",
|
|
|
|
|
|
"81 286441.7212 True \n",
|
|
|
|
|
|
"... ... ... \n",
|
|
|
|
|
|
"27165 196679.3030 True \n",
|
|
|
|
|
|
"27244 377918.3999 True \n",
|
|
|
|
|
|
"27270 620584.0452 True \n",
|
|
|
|
|
|
"27281 766037.3485 True \n",
|
|
|
|
|
|
"27297 188585.4612 True \n",
|
2025-11-29 00:23:12 +08:00
|
|
|
|
"\n",
|
2026-02-24 13:06:14 +08:00
|
|
|
|
"[870 rows x 19 columns]\n"
|
2025-02-12 00:21:33 +08:00
|
|
|
|
]
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
2025-05-06 23:42:40 +08:00
|
|
|
|
"source": [
|
|
|
|
|
|
"print(all_daily_data_df[all_daily_data_df['is_st']])"
|
|
|
|
|
|
]
|
2025-02-12 00:21:33 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
2025-02-15 23:33:34 +08:00
|
|
|
|
"cell_type": "code",
|
2025-05-13 15:30:06 +08:00
|
|
|
|
"execution_count": 7,
|
2025-02-15 23:33:34 +08:00
|
|
|
|
"id": "692b58674b7462c9",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-04-10 23:17:22 +08:00
|
|
|
|
"end_time": "2025-04-09T14:58:17.773453Z",
|
|
|
|
|
|
"start_time": "2025-04-09T14:58:16.903459Z"
|
2025-02-12 00:21:33 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
|
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
|
|
|
|
|
"所有每日基础数据获取并保存完毕!\n"
|
|
|
|
|
|
]
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
2025-05-06 23:42:40 +08:00
|
|
|
|
"source": [
|
|
|
|
|
|
"# 将数据保存为 HDF5 文件(table 格式)\n",
|
|
|
|
|
|
"all_daily_data_df.to_hdf(h5_filename, key='daily_basic', mode='a', format='table', append=True, data_columns=True)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"print(\"所有每日基础数据获取并保存完毕!\")\n"
|
|
|
|
|
|
]
|
2025-02-12 00:21:33 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
2025-02-15 23:33:34 +08:00
|
|
|
|
"cell_type": "code",
|
2025-05-13 15:30:06 +08:00
|
|
|
|
"execution_count": 8,
|
2025-02-15 23:33:34 +08:00
|
|
|
|
"id": "d7a773fc20293477",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-04-10 23:17:22 +08:00
|
|
|
|
"end_time": "2025-04-09T14:58:24.305403Z",
|
|
|
|
|
|
"start_time": "2025-04-09T14:58:17.816332Z"
|
2025-02-12 00:21:33 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
|
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
|
|
|
|
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
2026-02-24 13:06:14 +08:00
|
|
|
|
"Index: 9656995 entries, 0 to 27354\n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"Data columns (total 3 columns):\n",
|
|
|
|
|
|
" # Column Dtype \n",
|
|
|
|
|
|
"--- ------ ----- \n",
|
|
|
|
|
|
" 0 ts_code object\n",
|
|
|
|
|
|
" 1 trade_date object\n",
|
|
|
|
|
|
" 2 is_st bool \n",
|
|
|
|
|
|
"dtypes: bool(1), object(2)\n",
|
2026-02-24 13:06:14 +08:00
|
|
|
|
"memory usage: 230.2+ MB\n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"None\n"
|
|
|
|
|
|
]
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
2025-05-06 23:42:40 +08:00
|
|
|
|
"source": [
|
|
|
|
|
|
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
|
|
|
|
|
|
" df = store[key][['ts_code', 'trade_date', 'is_st']]\n",
|
|
|
|
|
|
" print(df.info())"
|
|
|
|
|
|
]
|
2025-02-12 00:21:33 +08:00
|
|
|
|
}
|
|
|
|
|
|
],
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
"kernelspec": {
|
2025-06-02 22:23:44 +08:00
|
|
|
|
"display_name": "stock",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"language": "python",
|
|
|
|
|
|
"name": "python3"
|
|
|
|
|
|
},
|
|
|
|
|
|
"language_info": {
|
|
|
|
|
|
"codemirror_mode": {
|
|
|
|
|
|
"name": "ipython",
|
|
|
|
|
|
"version": 3
|
|
|
|
|
|
},
|
|
|
|
|
|
"file_extension": ".py",
|
|
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
|
|
"name": "python",
|
|
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
|
|
"pygments_lexer": "ipython3",
|
2025-11-29 00:23:12 +08:00
|
|
|
|
"version": "3.12.11"
|
2025-02-12 00:21:33 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
|
|
|
|
|
"nbformat": 4,
|
|
|
|
|
|
"nbformat_minor": 5
|
|
|
|
|
|
}
|