This commit is contained in:
liaozhaorun
2025-02-12 00:21:33 +08:00
commit 71c9496df8
24 changed files with 34783 additions and 0 deletions

View File

@@ -0,0 +1,149 @@
{
"cells": [
{
"cell_type": "code",
"id": "17cc645336d4eb18",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-08T16:55:19.819017Z",
"start_time": "2025-02-08T16:55:18.958639Z"
}
},
"source": [
"import pandas as pd\n",
"import tushare as ts"
],
"outputs": [],
"execution_count": 1
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-08T16:55:27.578361Z",
"start_time": "2025-02-08T16:55:19.882313Z"
}
},
"cell_type": "code",
"source": [
"daily_basic = pd.read_hdf('../../data/daily_basic.h5', key='daily_basic', columns=['ts_code', 'trade_date '])\n",
"name_change_df = pd.read_hdf('../../data/name_change.h5', key='name_change')\n",
"name_change_df = name_change_df.drop_duplicates(keep='first')\n",
"\n",
"# 确保 name_change_df 的日期格式正确\n",
"name_change_df['start_date'] = pd.to_datetime(name_change_df['start_date'], format='%Y%m%d')\n",
"name_change_df['end_date'] = pd.to_datetime(name_change_df['end_date'], format='%Y%m%d', errors='coerce')\n",
"name_change_df = name_change_df[name_change_df.name.str.contains('ST')]\n"
],
"id": "48ae71ed02d61819",
"outputs": [],
"execution_count": 2
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-08T16:55:27.938078Z",
"start_time": "2025-02-08T16:55:27.584226Z"
}
},
"cell_type": "code",
"source": [
"name_change_dict = {}\n",
"for ts_code, group in name_change_df.groupby('ts_code'):\n",
" # 只保留 'ST' 和 '*ST' 的记录\n",
" st_data = group[(group['change_reason'] == 'ST') | (group['change_reason'] == '*ST')]\n",
" if not st_data.empty:\n",
" name_change_dict[ts_code] = st_data"
],
"id": "e6606a96e5728b8",
"outputs": [],
"execution_count": 3
},
{
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2025-02-08T16:59:20.537632Z",
"start_time": "2025-02-08T16:55:27.971219Z"
}
},
"cell_type": "code",
"source": [
"from datetime import datetime\n",
"import pandas as pd\n",
"\n",
"\n",
"# 判断股票是否为 ST 的函数\n",
"#stock_code = 'xxxxxx.SH'\n",
"#target_date = '20200830'\n",
"#若为ST返回True否则返回False\n",
"def is_st(name_change_dict, stock_code, target_date):\n",
" target_date = datetime.strptime(target_date, '%Y%m%d')\n",
" if stock_code not in name_change_dict.keys():\n",
" return False\n",
" df = name_change_dict[stock_code]\n",
" for i in range(len(df)):\n",
" sds = df.iloc[i, 2]\n",
" eds = df.iloc[i, 3]\n",
" # sd = datetime.strptime(sds, '%Y%m%d')\n",
" if eds == None:\n",
" ed = datetime.now()\n",
" # else:\n",
" # ed = datetime.strptime(eds, '%Y%m%d')\n",
" if (target_date - sds).days >= 0 and (target_date - eds).days <= 0:\n",
" return True\n",
" return False\n",
"\n",
"\n",
"print('is st...')\n",
"# 创建一个新的列 is_st判断每只股票是否是 ST\n",
"daily_basic['is_st'] = daily_basic.apply(\n",
" lambda row: is_st(name_change_dict, row['ts_code'], row['trade_date']), axis=1\n",
")\n",
"\n",
"# 保存结果到新的 HDF5 文件\n",
"daily_basic.to_hdf('../../data/daily_basic_with_st.h5', key='daily_basic_with_st', mode='w', format='table')\n",
"\n",
"# 输出部分结果\n",
"print(daily_basic[['ts_code', 'trade_date', 'is_st']].head())\n"
],
"id": "initial_id",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"is st...\n",
" ts_code trade_date is_st\n",
"0 603429.SH 20250127 False\n",
"1 300917.SZ 20250127 False\n",
"2 301266.SZ 20250127 False\n",
"3 688399.SH 20250127 False\n",
"4 603737.SH 20250127 False\n"
]
}
],
"execution_count": 4
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}