232 lines
8.8 KiB
Plaintext
232 lines
8.8 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"id": "17cc645336d4eb18",
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-02-08T16:55:19.819017Z",
|
||
"start_time": "2025-02-08T16:55:18.958639Z"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"import tushare as ts"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"id": "48ae71ed02d61819",
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-02-08T16:55:27.578361Z",
|
||
"start_time": "2025-02-08T16:55:19.882313Z"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"daily_basic = pd.read_hdf('../../../data/daily_basic.h5', key='daily_basic')\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"id": "e6606a96e5728b8",
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-02-08T16:55:27.938078Z",
|
||
"start_time": "2025-02-08T16:55:27.584226Z"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"from datetime import datetime\n",
|
||
"import warnings\n",
|
||
"\n",
|
||
"warnings.filterwarnings(\"ignore\")\n",
|
||
"def filter_rows(df):\n",
|
||
" # 按照 name 和 start_date 分组\n",
|
||
" def select_row(group):\n",
|
||
" # 如果有 end_date 不为 NaT 的行,优先保留这些行\n",
|
||
" valid_rows = group[group['end_date'].notna()]\n",
|
||
" if not valid_rows.empty:\n",
|
||
" return valid_rows.iloc[0] # 返回第一个有效行\n",
|
||
" else:\n",
|
||
" return group.iloc[0] # 如果没有有效行,返回第一行\n",
|
||
"\n",
|
||
" filtered_df = df.groupby(['name', 'start_date'], group_keys=False).apply(select_row)\n",
|
||
" filtered_df = filtered_df.reset_index(drop=True)\n",
|
||
" return filtered_df\n",
|
||
"\n",
|
||
"def is_st(name_change_dict, stock_code, target_date):\n",
|
||
" target_date = datetime.strptime(target_date, '%Y%m%d')\n",
|
||
" if stock_code not in name_change_dict.keys():\n",
|
||
" return False\n",
|
||
" df = name_change_dict[stock_code]\n",
|
||
" for i in range(len(df)):\n",
|
||
" sds = df.iloc[i, 2]\n",
|
||
" eds = df.iloc[i, 3]\n",
|
||
" if eds is None or eds is pd.NaT:\n",
|
||
" eds = datetime.now()\n",
|
||
" if (target_date - sds).days >= 0 and (target_date - eds).days <= 0:\n",
|
||
" return True\n",
|
||
" return False\n",
|
||
"\n",
|
||
"name_change_df = pd.read_hdf('../../../data/name_change.h5', key='name_change')\n",
|
||
"name_change_df = name_change_df.drop_duplicates(keep='first')\n",
|
||
"\n",
|
||
"# 确保 name_change_df 的日期格式正确\n",
|
||
"name_change_df['start_date'] = pd.to_datetime(name_change_df['start_date'], format='%Y%m%d')\n",
|
||
"name_change_df['end_date'] = pd.to_datetime(name_change_df['end_date'], format='%Y%m%d', errors='coerce')\n",
|
||
"name_change_df = name_change_df[name_change_df.name.str.contains('ST')]\n",
|
||
"name_change_dict = {}\n",
|
||
"for ts_code, group in name_change_df.groupby('ts_code'):\n",
|
||
" # 只保留 'ST' 和 '*ST' 的记录\n",
|
||
" # st_data = group[(group['change_reason'] == 'ST') | (group['change_reason'] == '*ST')]\n",
|
||
" st_data = group[group['name'].str.contains('ST')]\n",
|
||
" if not st_data.empty:\n",
|
||
" name_change_dict[ts_code] = filter_rows(st_data)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"id": "41bc125d",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
" ts_code trade_date close turnover_rate turnover_rate_f \\\n",
|
||
"0 603848.SH 20250430 14.36 0.5401 4.6897 \n",
|
||
"1 300290.SZ 20250430 16.30 2.8540 3.5686 \n",
|
||
"2 603877.SH 20250430 15.90 0.3794 1.2707 \n",
|
||
"3 834639.BJ 20250430 8.37 6.1158 7.8866 \n",
|
||
"4 000909.SZ 20250430 5.72 0.6104 1.0424 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"8594006 600708.SH 20170103 9.03 0.7694 1.0169 \n",
|
||
"8594007 600712.SH 20170103 10.29 0.5859 0.8028 \n",
|
||
"8594008 001872.SZ 20170103 19.33 1.0970 5.4258 \n",
|
||
"8594009 001914.SZ 20170103 12.37 3.2627 6.6991 \n",
|
||
"8594010 302132.SZ 20170103 23.28 0.4912 1.5149 \n",
|
||
"\n",
|
||
" volume_ratio pe pe_ttm pb ps ps_ttm \\\n",
|
||
"0 1.31 23.3421 25.6176 2.3433 3.7254 3.8065 \n",
|
||
"1 1.00 NaN NaN 13.1076 13.5867 13.5756 \n",
|
||
"2 0.98 29.1494 33.6975 1.6522 1.1075 1.1304 \n",
|
||
"3 0.87 70.0984 215.1863 2.0171 0.8405 0.8329 \n",
|
||
"4 0.55 NaN NaN 2.3539 7.7727 8.2925 \n",
|
||
"... ... ... ... ... ... ... \n",
|
||
"8594006 0.85 23.3367 22.2458 1.4847 0.9613 0.9248 \n",
|
||
"8594007 0.67 202.4855 287.1454 5.1852 2.3682 2.5386 \n",
|
||
"8594008 0.77 23.6158 23.1883 2.7052 6.6556 6.5584 \n",
|
||
"8594009 1.02 20.5631 15.1595 2.1186 1.4950 1.2600 \n",
|
||
"8594010 0.74 91.3908 84.6980 6.9391 8.9531 8.8570 \n",
|
||
"\n",
|
||
" dv_ratio dv_ttm total_share float_share free_share total_mv \\\n",
|
||
"0 2.0904 2.0904 40391.1511 40240.6511 4634.6511 5.800169e+05 \n",
|
||
"1 0.0000 NaN 63973.2569 63922.1969 51122.1969 1.042764e+06 \n",
|
||
"2 3.7471 3.7471 47382.5333 46932.3226 14014.3219 7.533823e+05 \n",
|
||
"3 NaN NaN 20160.0000 11721.5883 9089.7537 1.687392e+05 \n",
|
||
"4 0.0000 NaN 43771.4245 43771.0570 25634.2299 2.503725e+05 \n",
|
||
"... ... ... ... ... ... ... \n",
|
||
"8594006 1.1074 1.1074 131871.9966 75088.9215 56812.2811 1.190804e+06 \n",
|
||
"8594007 0.1555 0.1555 54465.5360 53795.9475 39266.3119 5.604504e+05 \n",
|
||
"8594008 2.1211 2.1211 64476.3730 46486.6050 9398.8050 1.246328e+06 \n",
|
||
"8594009 0.4042 0.4042 66696.1416 66678.0666 32475.1786 8.250313e+05 \n",
|
||
"8594010 0.2291 0.2291 39384.0333 30419.3588 9862.3809 9.168603e+05 \n",
|
||
"\n",
|
||
" circ_mv is_st \n",
|
||
"0 5.778557e+05 False \n",
|
||
"1 1.041932e+06 False \n",
|
||
"2 7.462239e+05 False \n",
|
||
"3 9.810969e+04 False \n",
|
||
"4 2.503704e+05 True \n",
|
||
"... ... ... \n",
|
||
"8594006 6.780530e+05 False \n",
|
||
"8594007 5.535603e+05 False \n",
|
||
"8594008 8.985861e+05 False \n",
|
||
"8594009 8.248077e+05 False \n",
|
||
"8594010 7.081627e+05 False \n",
|
||
"\n",
|
||
"[8594011 rows x 19 columns]\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(daily_basic)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"id": "initial_id",
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-02-08T16:59:20.537632Z",
|
||
"start_time": "2025-02-08T16:55:27.971219Z"
|
||
},
|
||
"collapsed": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"is st...\n",
|
||
" ts_code trade_date is_st\n",
|
||
"0 603848.SH 20250430 False\n",
|
||
"1 300290.SZ 20250430 False\n",
|
||
"2 603877.SH 20250430 False\n",
|
||
"3 834639.BJ 20250430 False\n",
|
||
"4 000909.SZ 20250430 True\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"from datetime import datetime\n",
|
||
"import pandas as pd\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"print('is st...')\n",
|
||
"# 创建一个新的列 is_st,判断每只股票是否是 ST\n",
|
||
"daily_basic['is_st'] = daily_basic.apply(\n",
|
||
" lambda row: is_st(name_change_dict, row['ts_code'], row['trade_date']), axis=1\n",
|
||
")\n",
|
||
"\n",
|
||
"# 保存结果到新的 HDF5 文件\n",
|
||
"daily_basic.to_hdf('../../../data/daily_basic.h5', key='daily_basic', mode='w', format='table')\n",
|
||
"\n",
|
||
"# 输出部分结果\n",
|
||
"print(daily_basic[['ts_code', 'trade_date', 'is_st']].head())\n"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "new_trader",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.11.11"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|