2025-02-12 00:21:33 +08:00
|
|
|
|
{
|
|
|
|
|
|
"cells": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
2025-05-06 23:42:40 +08:00
|
|
|
|
"execution_count": 1,
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"id": "18d1d622-b083-4cc4-a6f8-7c1ed2d0edd2",
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-04-10 23:17:22 +08:00
|
|
|
|
"end_time": "2025-04-09T14:57:36.913044Z",
|
|
|
|
|
|
"start_time": "2025-04-09T14:57:36.159612Z"
|
2025-02-12 00:21:33 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
2025-05-06 23:42:40 +08:00
|
|
|
|
"outputs": [],
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"source": [
|
|
|
|
|
|
"import tushare as ts\n",
|
|
|
|
|
|
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
|
|
|
|
|
|
"pro = ts.pro_api()"
|
2025-05-06 23:42:40 +08:00
|
|
|
|
]
|
2025-02-12 00:21:33 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
2025-02-15 23:33:34 +08:00
|
|
|
|
"cell_type": "code",
|
2025-05-13 15:30:06 +08:00
|
|
|
|
"execution_count": 2,
|
2025-02-15 23:33:34 +08:00
|
|
|
|
"id": "14671a7f72de2564",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-04-10 23:17:22 +08:00
|
|
|
|
"end_time": "2025-04-09T14:57:39.128278Z",
|
|
|
|
|
|
"start_time": "2025-04-09T14:57:36.918051Z"
|
2025-02-12 00:21:33 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
2025-05-06 23:42:40 +08:00
|
|
|
|
"outputs": [],
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"source": [
|
|
|
|
|
|
"from datetime import datetime\n",
|
|
|
|
|
|
"import pandas as pd\n",
|
2025-03-31 23:08:03 +08:00
|
|
|
|
"import warnings\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"warnings.filterwarnings(\"ignore\")\n",
|
|
|
|
|
|
"def filter_rows(df):\n",
|
|
|
|
|
|
" # 按照 name 和 start_date 分组\n",
|
|
|
|
|
|
" def select_row(group):\n",
|
|
|
|
|
|
" # 如果有 end_date 不为 NaT 的行,优先保留这些行\n",
|
|
|
|
|
|
" valid_rows = group[group['end_date'].notna()]\n",
|
|
|
|
|
|
" if not valid_rows.empty:\n",
|
|
|
|
|
|
" return valid_rows.iloc[0] # 返回第一个有效行\n",
|
|
|
|
|
|
" else:\n",
|
|
|
|
|
|
" return group.iloc[0] # 如果没有有效行,返回第一行\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" filtered_df = df.groupby(['name', 'start_date'], group_keys=False).apply(select_row)\n",
|
|
|
|
|
|
" filtered_df = filtered_df.reset_index(drop=True)\n",
|
|
|
|
|
|
" return filtered_df\n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"\n",
|
|
|
|
|
|
"def is_st(name_change_dict, stock_code, target_date):\n",
|
|
|
|
|
|
" target_date = datetime.strptime(target_date, '%Y%m%d')\n",
|
|
|
|
|
|
" if stock_code not in name_change_dict.keys():\n",
|
|
|
|
|
|
" return False\n",
|
|
|
|
|
|
" df = name_change_dict[stock_code]\n",
|
|
|
|
|
|
" for i in range(len(df)):\n",
|
|
|
|
|
|
" sds = df.iloc[i, 2]\n",
|
|
|
|
|
|
" eds = df.iloc[i, 3]\n",
|
|
|
|
|
|
" if eds is None or eds is pd.NaT:\n",
|
|
|
|
|
|
" eds = datetime.now()\n",
|
|
|
|
|
|
" if (target_date - sds).days >= 0 and (target_date - eds).days <= 0:\n",
|
|
|
|
|
|
" return True\n",
|
|
|
|
|
|
" return False\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"name_change_df = pd.read_hdf('../../../data/name_change.h5', key='name_change')\n",
|
|
|
|
|
|
"name_change_df = name_change_df.drop_duplicates(keep='first')\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# 确保 name_change_df 的日期格式正确\n",
|
|
|
|
|
|
"name_change_df['start_date'] = pd.to_datetime(name_change_df['start_date'], format='%Y%m%d')\n",
|
|
|
|
|
|
"name_change_df['end_date'] = pd.to_datetime(name_change_df['end_date'], format='%Y%m%d', errors='coerce')\n",
|
2025-05-08 15:42:17 +08:00
|
|
|
|
"# name_change_df = name_change_df[name_change_df.name.str.contains('ST') ]\n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"name_change_dict = {}\n",
|
|
|
|
|
|
"for ts_code, group in name_change_df.groupby('ts_code'):\n",
|
|
|
|
|
|
" # 只保留 'ST' 和 '*ST' 的记录\n",
|
2025-05-06 23:42:40 +08:00
|
|
|
|
" # st_data = group[(group['change_reason'] == 'ST') | (group['change_reason'] == '*ST')]\n",
|
2025-05-08 15:42:17 +08:00
|
|
|
|
" st_data = group[(group['name'].str.contains('ST')) | (group['name'].str.contains('退'))]\n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
" if not st_data.empty:\n",
|
2025-03-31 23:08:03 +08:00
|
|
|
|
" name_change_dict[ts_code] = filter_rows(st_data)"
|
2025-05-06 23:42:40 +08:00
|
|
|
|
]
|
2025-02-12 00:21:33 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
2025-02-15 23:33:34 +08:00
|
|
|
|
"cell_type": "code",
|
2025-05-13 15:30:06 +08:00
|
|
|
|
"execution_count": 3,
|
2025-02-15 23:33:34 +08:00
|
|
|
|
"id": "e7f8cce2f80e2f20",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-04-10 23:17:22 +08:00
|
|
|
|
"end_time": "2025-04-09T14:58:09.296046Z",
|
|
|
|
|
|
"start_time": "2025-04-09T14:57:39.339423Z"
|
2025-02-12 00:21:33 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
2025-05-06 23:42:40 +08:00
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
|
|
|
|
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
2025-05-13 15:30:06 +08:00
|
|
|
|
"Index: 8615301 entries, 0 to 5388\n",
|
2025-05-06 23:42:40 +08:00
|
|
|
|
"Data columns (total 2 columns):\n",
|
|
|
|
|
|
" # Column Dtype \n",
|
|
|
|
|
|
"--- ------ ----- \n",
|
|
|
|
|
|
" 0 ts_code object\n",
|
|
|
|
|
|
" 1 trade_date object\n",
|
|
|
|
|
|
"dtypes: object(2)\n",
|
2025-05-13 15:30:06 +08:00
|
|
|
|
"memory usage: 197.2+ MB\n",
|
2025-05-06 23:42:40 +08:00
|
|
|
|
"None\n",
|
2025-05-13 15:30:06 +08:00
|
|
|
|
"20250508\n",
|
|
|
|
|
|
"20250509\n"
|
2025-05-06 23:42:40 +08:00
|
|
|
|
]
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"source": [
|
|
|
|
|
|
"import time\n",
|
|
|
|
|
|
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"h5_filename = '../../../data/daily_basic.h5'\n",
|
|
|
|
|
|
"key = '/daily_basic'\n",
|
|
|
|
|
|
"max_date = None\n",
|
|
|
|
|
|
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
|
|
|
|
|
|
" df = store[key][['ts_code', 'trade_date']]\n",
|
|
|
|
|
|
" print(df.info())\n",
|
|
|
|
|
|
" max_date = df['trade_date'].max()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"print(max_date)\n",
|
2025-05-06 23:42:40 +08:00
|
|
|
|
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20250720')\n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
|
|
|
|
|
|
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
|
|
|
|
|
|
"start_date = min(trade_dates)\n",
|
|
|
|
|
|
"print(start_date)"
|
2025-05-06 23:42:40 +08:00
|
|
|
|
]
|
2025-04-09 22:57:01 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
2025-05-13 15:30:06 +08:00
|
|
|
|
"execution_count": 4,
|
2025-04-09 22:57:01 +08:00
|
|
|
|
"id": "553cfb36-f560-4cc4-b2bc-68323ccc5072",
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-04-10 23:17:22 +08:00
|
|
|
|
"end_time": "2025-04-09T14:58:16.817010Z",
|
|
|
|
|
|
"start_time": "2025-04-09T14:58:09.326485Z"
|
2025-05-06 23:42:40 +08:00
|
|
|
|
},
|
|
|
|
|
|
"scrolled": true
|
2025-04-09 22:57:01 +08:00
|
|
|
|
},
|
2025-05-06 23:42:40 +08:00
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
|
|
|
|
|
"任务 20250718 完成\n",
|
|
|
|
|
|
"任务 20250717 完成\n",
|
|
|
|
|
|
"任务 20250716 完成\n",
|
2025-05-08 15:42:17 +08:00
|
|
|
|
"任务 20250715 完成\n",
|
|
|
|
|
|
"任务 20250711 完成\n",
|
2025-05-13 15:30:06 +08:00
|
|
|
|
"任务 20250714 完成\n",
|
2025-05-06 23:42:40 +08:00
|
|
|
|
"任务 20250710 完成\n",
|
2025-05-08 15:42:17 +08:00
|
|
|
|
"任务 20250709 完成\n",
|
2025-05-06 23:42:40 +08:00
|
|
|
|
"任务 20250708 完成\n",
|
2025-05-08 15:42:17 +08:00
|
|
|
|
"任务 20250707 完成\n",
|
2025-05-06 23:42:40 +08:00
|
|
|
|
"任务 20250704 完成\n",
|
|
|
|
|
|
"任务 20250703 完成\n",
|
|
|
|
|
|
"任务 20250702 完成\n",
|
|
|
|
|
|
"任务 20250701 完成\n",
|
|
|
|
|
|
"任务 20250630 完成\n",
|
|
|
|
|
|
"任务 20250627 完成\n",
|
|
|
|
|
|
"任务 20250626 完成\n",
|
|
|
|
|
|
"任务 20250625 完成\n",
|
|
|
|
|
|
"任务 20250624 完成\n",
|
|
|
|
|
|
"任务 20250623 完成\n",
|
|
|
|
|
|
"任务 20250620 完成\n",
|
2025-05-08 15:42:17 +08:00
|
|
|
|
"任务 20250619 完成\n",
|
2025-05-06 23:42:40 +08:00
|
|
|
|
"任务 20250618 完成\n",
|
|
|
|
|
|
"任务 20250617 完成\n",
|
|
|
|
|
|
"任务 20250616 完成\n",
|
|
|
|
|
|
"任务 20250613 完成\n",
|
|
|
|
|
|
"任务 20250612 完成\n",
|
|
|
|
|
|
"任务 20250611 完成\n",
|
|
|
|
|
|
"任务 20250610 完成\n",
|
|
|
|
|
|
"任务 20250609 完成\n",
|
|
|
|
|
|
"任务 20250606 完成\n",
|
|
|
|
|
|
"任务 20250605 完成\n",
|
|
|
|
|
|
"任务 20250604 完成\n",
|
|
|
|
|
|
"任务 20250603 完成\n",
|
2025-05-08 15:42:17 +08:00
|
|
|
|
"任务 20250530 完成\n",
|
2025-05-13 15:30:06 +08:00
|
|
|
|
"任务 20250529 完成\n",
|
2025-05-08 15:42:17 +08:00
|
|
|
|
"任务 20250528 完成\n",
|
2025-05-13 15:30:06 +08:00
|
|
|
|
"任务 20250527 完成\n",
|
2025-05-06 23:42:40 +08:00
|
|
|
|
"任务 20250526 完成\n",
|
|
|
|
|
|
"任务 20250523 完成\n",
|
2025-05-08 15:42:17 +08:00
|
|
|
|
"任务 20250522 完成\n",
|
2025-05-13 15:30:06 +08:00
|
|
|
|
"任务 20250521 完成\n",
|
2025-05-06 23:42:40 +08:00
|
|
|
|
"任务 20250520 完成\n",
|
|
|
|
|
|
"任务 20250519 完成\n",
|
|
|
|
|
|
"任务 20250516 完成\n",
|
|
|
|
|
|
"任务 20250515 完成\n",
|
|
|
|
|
|
"任务 20250514 完成\n",
|
|
|
|
|
|
"任务 20250513 完成\n",
|
|
|
|
|
|
"任务 20250512 完成\n",
|
2025-05-13 15:30:06 +08:00
|
|
|
|
"任务 20250509 完成\n"
|
2025-05-06 23:42:40 +08:00
|
|
|
|
]
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"source": [
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# 使用 HDFStore 存储数据\n",
|
|
|
|
|
|
"all_daily_data = []\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# API 调用计数和时间控制变量\n",
|
|
|
|
|
|
"api_call_count = 0\n",
|
|
|
|
|
|
"batch_start_time = time.time()\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"def get_data(trade_date):\n",
|
|
|
|
|
|
" daily_basic_data = pro.daily_basic(ts_code='', trade_date=trade_date)\n",
|
|
|
|
|
|
" if daily_basic_data is not None and not daily_basic_data.empty:\n",
|
|
|
|
|
|
" # 添加交易日期列标识\n",
|
|
|
|
|
|
" daily_basic_data['trade_date'] = trade_date\n",
|
|
|
|
|
|
" daily_basic_data['is_st'] = daily_basic_data.apply(\n",
|
|
|
|
|
|
" lambda row: is_st(name_change_dict, row['ts_code'], row['trade_date']), axis=1\n",
|
|
|
|
|
|
" )\n",
|
|
|
|
|
|
" time.sleep(0.2)\n",
|
|
|
|
|
|
" # print(f\"成功获取并保存 {trade_date} 的每日基础数据\")\n",
|
|
|
|
|
|
" return daily_basic_data\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"# 遍历每个交易日期并获取数据\n",
|
|
|
|
|
|
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
|
|
|
|
|
|
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" for future in as_completed(future_to_date):\n",
|
|
|
|
|
|
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
|
|
|
|
|
|
" try:\n",
|
|
|
|
|
|
" result = future.result() # 获取任务执行的结果\n",
|
|
|
|
|
|
" all_daily_data.append(result)\n",
|
|
|
|
|
|
" print(f\"任务 {trade_date} 完成\")\n",
|
|
|
|
|
|
" except Exception as e:\n",
|
|
|
|
|
|
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
|
|
|
|
|
|
" # 计数一次 API 调用\n",
|
|
|
|
|
|
" api_call_count += 1\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" # 每调用 300 次,检查时间是否少于 1 分钟,如果少于则等待剩余时间\n",
|
|
|
|
|
|
" if api_call_count % 150 == 0:\n",
|
|
|
|
|
|
" elapsed = time.time() - batch_start_time\n",
|
|
|
|
|
|
" if elapsed < 60:\n",
|
|
|
|
|
|
" sleep_time = 60 - elapsed\n",
|
|
|
|
|
|
" print(f\"已调用 150 次 API,等待 {sleep_time:.2f} 秒以满足速率限制...\")\n",
|
|
|
|
|
|
" time.sleep(sleep_time)\n",
|
|
|
|
|
|
" # 重置批次起始时间\n",
|
|
|
|
|
|
" batch_start_time = time.time()\n",
|
|
|
|
|
|
"\n"
|
2025-05-06 23:42:40 +08:00
|
|
|
|
]
|
2025-02-12 00:21:33 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
2025-02-15 23:33:34 +08:00
|
|
|
|
"cell_type": "code",
|
2025-05-13 15:30:06 +08:00
|
|
|
|
"execution_count": 5,
|
2025-02-15 23:33:34 +08:00
|
|
|
|
"id": "919023c693d7a47a",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-04-10 23:17:22 +08:00
|
|
|
|
"end_time": "2025-04-09T14:58:16.864178Z",
|
|
|
|
|
|
"start_time": "2025-04-09T14:58:16.855084Z"
|
2025-02-12 00:21:33 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
|
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
2025-05-13 15:30:06 +08:00
|
|
|
|
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
|
|
|
|
|
|
"0 300575.SZ 20250509 6.05 1.9284 2.1880 \n",
|
|
|
|
|
|
"1 300247.SZ 20250509 3.77 2.1735 2.5437 \n",
|
|
|
|
|
|
"2 603038.SH 20250509 15.80 17.5702 32.3972 \n",
|
|
|
|
|
|
"3 002030.SZ 20250509 5.82 0.8252 1.2070 \n",
|
|
|
|
|
|
"4 600157.SH 20250509 1.36 0.8369 1.0222 \n",
|
|
|
|
|
|
"... ... ... ... ... ... \n",
|
|
|
|
|
|
"5384 600841.SH 20250509 5.57 1.0271 3.2670 \n",
|
|
|
|
|
|
"5385 300968.SZ 20250509 14.76 1.2857 2.7636 \n",
|
|
|
|
|
|
"5386 300634.SZ 20250509 25.79 5.2551 9.4581 \n",
|
|
|
|
|
|
"5387 300295.SZ 20250509 15.73 3.0347 3.2458 \n",
|
|
|
|
|
|
"5388 688370.SH 20250509 19.15 1.2008 1.2008 \n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"\n",
|
2025-05-13 15:30:06 +08:00
|
|
|
|
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
|
|
|
|
|
|
"0 0.71 239.8914 NaN 1.3451 1.1608 1.1259 1.9835 \n",
|
|
|
|
|
|
"1 0.96 64.6952 53.1680 2.7649 4.4008 3.9673 0.0000 \n",
|
|
|
|
|
|
"2 4.47 183.7603 154.4297 3.1047 4.0259 3.7692 0.2434 \n",
|
|
|
|
|
|
"3 0.62 NaN NaN 1.0296 9.5754 9.9145 0.2577 \n",
|
|
|
|
|
|
"4 0.55 19.3625 26.3896 0.6394 1.0656 1.1327 0.4044 \n",
|
|
|
|
|
|
"... ... ... ... ... ... ... ... \n",
|
|
|
|
|
|
"5384 0.77 NaN NaN 2.3362 1.1952 1.2860 0.0000 \n",
|
|
|
|
|
|
"5385 0.71 115.0812 181.8721 3.2254 4.9990 5.1146 0.3388 \n",
|
|
|
|
|
|
"5386 1.01 50.5639 52.9222 4.1166 7.0433 6.7806 0.8063 \n",
|
|
|
|
|
|
"5387 0.65 NaN NaN 2.6398 24.2982 28.1758 0.0000 \n",
|
|
|
|
|
|
"5388 1.25 29.1668 36.1111 0.9812 4.4106 4.4983 NaN \n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"\n",
|
2025-05-13 15:30:06 +08:00
|
|
|
|
" dv_ttm total_share float_share free_share total_mv \\\n",
|
|
|
|
|
|
"0 1.9835 4.647564e+04 3.427082e+04 3.020469e+04 2.811776e+05 \n",
|
|
|
|
|
|
"1 NaN 8.040403e+04 8.032753e+04 6.863630e+04 3.031232e+05 \n",
|
|
|
|
|
|
"2 0.2434 2.686771e+04 2.686771e+04 1.457134e+04 4.245098e+05 \n",
|
|
|
|
|
|
"3 0.2577 1.403446e+05 1.403446e+05 9.595371e+04 8.168056e+05 \n",
|
|
|
|
|
|
"4 0.4044 2.221776e+06 2.221776e+06 1.819047e+06 3.021616e+06 \n",
|
|
|
|
|
|
"... ... ... ... ... ... \n",
|
|
|
|
|
|
"5384 NaN 1.387822e+05 1.043024e+05 3.279094e+04 7.730167e+05 \n",
|
|
|
|
|
|
"5385 0.3388 4.133800e+04 4.133800e+04 1.923185e+04 6.101489e+05 \n",
|
|
|
|
|
|
"5386 0.8063 4.512109e+04 4.346809e+04 2.415175e+04 1.163673e+06 \n",
|
|
|
|
|
|
"5387 NaN 1.896137e+04 1.675486e+04 1.566518e+04 2.982624e+05 \n",
|
|
|
|
|
|
"5388 NaN 1.371079e+04 4.374912e+03 4.374912e+03 2.625616e+05 \n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"\n",
|
2025-05-13 15:30:06 +08:00
|
|
|
|
" circ_mv is_st \n",
|
|
|
|
|
|
"0 2.073385e+05 False \n",
|
|
|
|
|
|
"1 3.028348e+05 False \n",
|
|
|
|
|
|
"2 4.245098e+05 False \n",
|
|
|
|
|
|
"3 8.168056e+05 False \n",
|
|
|
|
|
|
"4 3.021616e+06 False \n",
|
|
|
|
|
|
"... ... ... \n",
|
|
|
|
|
|
"5384 5.809646e+05 False \n",
|
|
|
|
|
|
"5385 6.101489e+05 False \n",
|
|
|
|
|
|
"5386 1.121042e+06 False \n",
|
|
|
|
|
|
"5387 2.635540e+05 False \n",
|
|
|
|
|
|
"5388 8.377956e+04 False \n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"\n",
|
2025-05-13 15:30:06 +08:00
|
|
|
|
"[5389 rows x 19 columns]\n"
|
2025-02-12 00:21:33 +08:00
|
|
|
|
]
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
2025-05-06 23:42:40 +08:00
|
|
|
|
"source": [
|
|
|
|
|
|
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n",
|
|
|
|
|
|
"print(all_daily_data_df)"
|
|
|
|
|
|
]
|
2025-02-12 00:21:33 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
2025-02-15 23:33:34 +08:00
|
|
|
|
"cell_type": "code",
|
2025-05-13 15:30:06 +08:00
|
|
|
|
"execution_count": 6,
|
2025-02-15 23:33:34 +08:00
|
|
|
|
"id": "28cb78d032671b20",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-04-10 23:17:22 +08:00
|
|
|
|
"end_time": "2025-04-09T14:58:16.881685Z",
|
|
|
|
|
|
"start_time": "2025-04-09T14:58:16.871184Z"
|
2025-02-12 00:21:33 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
|
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
2025-05-13 15:30:06 +08:00
|
|
|
|
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
|
|
|
|
|
|
"54 002496.SZ 20250509 1.43 3.1262 3.2341 \n",
|
|
|
|
|
|
"148 603828.SH 20250509 5.04 3.5674 7.1692 \n",
|
|
|
|
|
|
"166 600599.SH 20250509 7.70 10.8623 27.2882 \n",
|
|
|
|
|
|
"193 000820.SZ 20250509 2.16 5.5698 5.7239 \n",
|
|
|
|
|
|
"203 300506.SZ 20250509 3.28 0.6710 0.9449 \n",
|
|
|
|
|
|
"... ... ... ... ... ... \n",
|
|
|
|
|
|
"5204 002602.SZ 20250509 8.00 1.3867 1.7044 \n",
|
|
|
|
|
|
"5253 300147.SZ 20250509 7.37 7.2159 9.3379 \n",
|
|
|
|
|
|
"5264 002501.SZ 20250509 2.08 2.4301 3.1371 \n",
|
|
|
|
|
|
"5317 600421.SH 20250509 5.27 2.7391 5.8971 \n",
|
|
|
|
|
|
"5345 600289.SH 20250509 5.78 1.3847 2.0115 \n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"\n",
|
2025-05-13 15:30:06 +08:00
|
|
|
|
" volume_ratio pe pe_ttm pb ps ps_ttm dv_ratio \\\n",
|
|
|
|
|
|
"54 0.73 NaN NaN 1.6044 7.6992 7.2633 0.0 \n",
|
|
|
|
|
|
"148 1.65 349.9490 1691.0271 3.9734 1.2211 1.3170 0.0 \n",
|
|
|
|
|
|
"166 4.51 NaN NaN 11.5933 3.9468 4.0472 0.0 \n",
|
|
|
|
|
|
"193 1.00 NaN NaN 9.5443 11.2714 14.3393 0.0 \n",
|
|
|
|
|
|
"203 0.87 NaN NaN 28.5909 19.5183 19.3088 0.0 \n",
|
|
|
|
|
|
"... ... ... ... ... ... ... ... \n",
|
|
|
|
|
|
"5204 0.78 49.1432 31.1887 2.2169 2.6358 2.2496 0.0 \n",
|
|
|
|
|
|
"5253 1.74 NaN NaN 5.0393 2.6221 2.8487 0.0 \n",
|
|
|
|
|
|
"5264 0.87 NaN NaN 22.5816 22.1370 26.0255 0.0 \n",
|
|
|
|
|
|
"5317 0.74 NaN NaN 143.1934 8.7976 8.9449 0.0 \n",
|
|
|
|
|
|
"5345 0.55 NaN NaN 2.9752 11.3890 11.6628 0.0 \n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"\n",
|
2025-05-13 15:30:06 +08:00
|
|
|
|
" dv_ttm total_share float_share free_share total_mv \\\n",
|
|
|
|
|
|
"54 NaN 150758.9677 118138.6559 114196.4999 2.155853e+05 \n",
|
|
|
|
|
|
"148 NaN 59596.0158 59593.9625 29654.2988 3.003639e+05 \n",
|
|
|
|
|
|
"166 NaN 16600.0000 16600.0000 6607.7948 1.278200e+05 \n",
|
|
|
|
|
|
"193 NaN 64362.0201 29403.1899 28611.4718 1.390220e+05 \n",
|
|
|
|
|
|
"203 NaN 69559.6569 57572.5450 40880.9749 2.281557e+05 \n",
|
|
|
|
|
|
"... ... ... ... ... ... \n",
|
|
|
|
|
|
"5204 NaN 745255.6968 687870.8273 559649.7754 5.962046e+06 \n",
|
|
|
|
|
|
"5253 NaN 66127.9045 65745.9042 50804.9121 4.873627e+05 \n",
|
|
|
|
|
|
"5264 NaN 355000.0000 354999.9006 274999.9006 7.384000e+05 \n",
|
|
|
|
|
|
"5317 NaN 19560.0000 19560.0000 9085.2748 1.030812e+05 \n",
|
|
|
|
|
|
"5345 NaN 63105.2069 56592.2684 38956.2787 3.647481e+05 \n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"\n",
|
2025-05-08 15:42:17 +08:00
|
|
|
|
" circ_mv is_st \n",
|
2025-05-13 15:30:06 +08:00
|
|
|
|
"54 1.689383e+05 True \n",
|
|
|
|
|
|
"148 3.003536e+05 True \n",
|
|
|
|
|
|
"166 1.278200e+05 True \n",
|
|
|
|
|
|
"193 6.351089e+04 True \n",
|
|
|
|
|
|
"203 1.888379e+05 True \n",
|
2025-05-08 15:42:17 +08:00
|
|
|
|
"... ... ... \n",
|
2025-05-13 15:30:06 +08:00
|
|
|
|
"5204 5.502967e+06 True \n",
|
|
|
|
|
|
"5253 4.845473e+05 True \n",
|
|
|
|
|
|
"5264 7.383998e+05 True \n",
|
|
|
|
|
|
"5317 1.030812e+05 True \n",
|
|
|
|
|
|
"5345 3.271033e+05 True \n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"\n",
|
2025-05-13 15:30:06 +08:00
|
|
|
|
"[197 rows x 19 columns]\n"
|
2025-02-12 00:21:33 +08:00
|
|
|
|
]
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
2025-05-06 23:42:40 +08:00
|
|
|
|
"source": [
|
|
|
|
|
|
"print(all_daily_data_df[all_daily_data_df['is_st']])"
|
|
|
|
|
|
]
|
2025-02-12 00:21:33 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
2025-02-15 23:33:34 +08:00
|
|
|
|
"cell_type": "code",
|
2025-05-13 15:30:06 +08:00
|
|
|
|
"execution_count": 7,
|
2025-02-15 23:33:34 +08:00
|
|
|
|
"id": "692b58674b7462c9",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-04-10 23:17:22 +08:00
|
|
|
|
"end_time": "2025-04-09T14:58:17.773453Z",
|
|
|
|
|
|
"start_time": "2025-04-09T14:58:16.903459Z"
|
2025-02-12 00:21:33 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
|
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
|
|
|
|
|
"所有每日基础数据获取并保存完毕!\n"
|
|
|
|
|
|
]
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
2025-05-06 23:42:40 +08:00
|
|
|
|
"source": [
|
|
|
|
|
|
"# 将数据保存为 HDF5 文件(table 格式)\n",
|
|
|
|
|
|
"all_daily_data_df.to_hdf(h5_filename, key='daily_basic', mode='a', format='table', append=True, data_columns=True)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"print(\"所有每日基础数据获取并保存完毕!\")\n"
|
|
|
|
|
|
]
|
2025-02-12 00:21:33 +08:00
|
|
|
|
},
|
|
|
|
|
|
{
|
2025-02-15 23:33:34 +08:00
|
|
|
|
"cell_type": "code",
|
2025-05-13 15:30:06 +08:00
|
|
|
|
"execution_count": 8,
|
2025-02-15 23:33:34 +08:00
|
|
|
|
"id": "d7a773fc20293477",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"metadata": {
|
|
|
|
|
|
"ExecuteTime": {
|
2025-04-10 23:17:22 +08:00
|
|
|
|
"end_time": "2025-04-09T14:58:24.305403Z",
|
|
|
|
|
|
"start_time": "2025-04-09T14:58:17.816332Z"
|
2025-02-12 00:21:33 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
|
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
|
|
|
|
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
2025-05-13 15:30:06 +08:00
|
|
|
|
"Index: 8620690 entries, 0 to 5388\n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"Data columns (total 3 columns):\n",
|
|
|
|
|
|
" # Column Dtype \n",
|
|
|
|
|
|
"--- ------ ----- \n",
|
|
|
|
|
|
" 0 ts_code object\n",
|
|
|
|
|
|
" 1 trade_date object\n",
|
|
|
|
|
|
" 2 is_st bool \n",
|
|
|
|
|
|
"dtypes: bool(1), object(2)\n",
|
2025-05-13 15:30:06 +08:00
|
|
|
|
"memory usage: 205.5+ MB\n",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"None\n"
|
|
|
|
|
|
]
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
2025-05-06 23:42:40 +08:00
|
|
|
|
"source": [
|
|
|
|
|
|
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
|
|
|
|
|
|
" df = store[key][['ts_code', 'trade_date', 'is_st']]\n",
|
|
|
|
|
|
" print(df.info())"
|
|
|
|
|
|
]
|
2025-02-12 00:21:33 +08:00
|
|
|
|
}
|
|
|
|
|
|
],
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
"kernelspec": {
|
2025-05-06 23:42:40 +08:00
|
|
|
|
"display_name": "new_trader",
|
2025-02-12 00:21:33 +08:00
|
|
|
|
"language": "python",
|
|
|
|
|
|
"name": "python3"
|
|
|
|
|
|
},
|
|
|
|
|
|
"language_info": {
|
|
|
|
|
|
"codemirror_mode": {
|
|
|
|
|
|
"name": "ipython",
|
|
|
|
|
|
"version": 3
|
|
|
|
|
|
},
|
|
|
|
|
|
"file_extension": ".py",
|
|
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
|
|
"name": "python",
|
|
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
|
|
"pygments_lexer": "ipython3",
|
2025-03-31 23:08:03 +08:00
|
|
|
|
"version": "3.11.11"
|
2025-02-12 00:21:33 +08:00
|
|
|
|
}
|
|
|
|
|
|
},
|
|
|
|
|
|
"nbformat": 4,
|
|
|
|
|
|
"nbformat_minor": 5
|
|
|
|
|
|
}
|