191 lines
5.2 KiB
Plaintext
191 lines
5.2 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"id": "17cc645336d4eb18",
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-02-08T16:55:19.819017Z",
|
||
"start_time": "2025-02-08T16:55:18.958639Z"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"import tushare as ts"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"id": "48ae71ed02d61819",
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-02-08T16:55:27.578361Z",
|
||
"start_time": "2025-02-08T16:55:19.882313Z"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"daily_basic = pd.read_hdf('../../data/daily_basic.h5', key='daily_basic')\n",
|
||
"name_change_df = pd.read_hdf('../../data/name_change.h5', key='name_change')\n",
|
||
"name_change_df = name_change_df.drop_duplicates(keep='first')\n",
|
||
"\n",
|
||
"# 确保 name_change_df 的日期格式正确\n",
|
||
"name_change_df['start_date'] = pd.to_datetime(name_change_df['start_date'], format='%Y%m%d')\n",
|
||
"name_change_df['end_date'] = pd.to_datetime(name_change_df['end_date'], format='%Y%m%d', errors='coerce')\n",
|
||
"name_change_df = name_change_df[name_change_df.name.str.contains('ST')]\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"id": "e6606a96e5728b8",
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-02-08T16:55:27.938078Z",
|
||
"start_time": "2025-02-08T16:55:27.584226Z"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"name_change_dict = {}\n",
|
||
"for ts_code, group in name_change_df.groupby('ts_code'):\n",
|
||
" # 只保留 'ST' 和 '*ST' 的记录\n",
|
||
" st_data = group[(group['change_reason'] == 'ST') | (group['change_reason'] == '*ST')]\n",
|
||
" if not st_data.empty:\n",
|
||
" name_change_dict[ts_code] = st_data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"id": "initial_id",
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-02-08T16:59:20.537632Z",
|
||
"start_time": "2025-02-08T16:55:27.971219Z"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"is st...\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"from datetime import datetime\n",
|
||
"import pandas as pd\n",
|
||
"\n",
|
||
"\n",
|
||
"# 判断股票是否为 ST 的函数\n",
|
||
"#stock_code = 'xxxxxx.SH'\n",
|
||
"#target_date = '20200830'\n",
|
||
"#若为ST,返回True;否则返回False\n",
|
||
"def is_st(name_change_dict, stock_code, target_date):\n",
|
||
" target_date = datetime.strptime(target_date, '%Y%m%d')\n",
|
||
" if stock_code not in name_change_dict.keys():\n",
|
||
" return False\n",
|
||
" df = name_change_dict[stock_code]\n",
|
||
" for i in range(len(df)):\n",
|
||
" sds = df.iloc[i, 2]\n",
|
||
" eds = df.iloc[i, 3]\n",
|
||
" if eds is None or eds is pd.NaT:\n",
|
||
" eds = datetime.now()\n",
|
||
" if (target_date - sds).days >= 0 and (target_date - eds).days <= 0:\n",
|
||
" return True\n",
|
||
" return False\n",
|
||
"\n",
|
||
"\n",
|
||
"print('is st...')\n",
|
||
"# 创建一个新的列 is_st,判断每只股票是否是 ST\n",
|
||
"daily_basic['is_st'] = daily_basic.apply(\n",
|
||
" lambda row: is_st(name_change_dict, row['ts_code'], row['trade_date']), axis=1\n",
|
||
")\n",
|
||
"\n",
|
||
"\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"id": "c74bc633-fc73-48c2-bb44-0a798d2cf070",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Empty DataFrame\n",
|
||
"Columns: [ts_code, trade_date, close, turnover_rate, turnover_rate_f, volume_ratio, pe, pe_ttm, pb, ps, ps_ttm, dv_ratio, dv_ttm, total_share, float_share, free_share, total_mv, circ_mv, is_st]\n",
|
||
"Index: []\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(daily_basic[(daily_basic['is_st'] != True) & (daily_basic['is_st'] != False)])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"id": "0464ce15-320c-40d4-b499-2e18bac5910f",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
" ts_code trade_date is_st\n",
|
||
"0 002512.SZ 20250211 False\n",
|
||
"1 600966.SH 20250211 False\n",
|
||
"2 600358.SH 20250211 True\n",
|
||
"3 002893.SZ 20250211 False\n",
|
||
"4 300648.SZ 20250211 False\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# 保存结果到新的 HDF5 文件\n",
|
||
"daily_basic.to_hdf('../../data/daily_basic.h5', key='daily_basic', mode='w', format='table')\n",
|
||
"\n",
|
||
"# 输出部分结果\n",
|
||
"print(daily_basic[['ts_code', 'trade_date', 'is_st']].head())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "30c882de-3a89-4056-900d-459a3a012af9",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.8.19"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|