classify2赚钱

This commit is contained in:
liaozhaorun
2025-05-04 22:00:05 +08:00
parent 9e598d4ed0
commit aff5e32bb4
19 changed files with 19755 additions and 21963 deletions

1903
main/train/Classify2.ipynb Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

200
main/train/test.ipynb Normal file
View File

@@ -0,0 +1,200 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 6,
"id": "1c4126c0",
"metadata": {},
"outputs": [],
"source": [
"from operator import index\n",
"\n",
"import tushare as ts\n",
"import pandas as pd\n",
"import time\n",
"import akshare as ak\n",
"\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()\n",
"\n",
"from datetime import datetime\n",
"import pandas as pd\n",
"import warnings\n",
"import warnings\n",
"\n",
"warnings.filterwarnings(\"ignore\")"
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "dc90836a",
"metadata": {},
"outputs": [],
"source": [
"def filter_rows(df):\n",
" # 按照 name 和 start_date 分组\n",
" def select_row(group):\n",
" # 如果有 end_date 不为 NaT 的行,优先保留这些行\n",
" valid_rows = group[group['end_date'].notna()]\n",
" if not valid_rows.empty:\n",
" return valid_rows.iloc[0] # 返回第一个有效行\n",
" else:\n",
" return group.iloc[0] # 如果没有有效行,返回第一行\n",
"\n",
" filtered_df = df.groupby(['name', 'start_date'], group_keys=False).apply(select_row)\n",
" filtered_df = filtered_df.reset_index(drop=True)\n",
" return filtered_df\n",
"\n",
"name_change_df = pd.read_hdf('E:/PyProject/NewStock/data/name_change.h5', key='name_change')\n",
"name_change_df = name_change_df.drop_duplicates(keep='first')\n",
"\n",
"# 确保 name_change_df 的日期格式正确\n",
"name_change_df['start_date'] = pd.to_datetime(name_change_df['start_date'], format='%Y%m%d')\n",
"name_change_df['end_date'] = pd.to_datetime(name_change_df['end_date'], format='%Y%m%d', errors='coerce')\n",
"name_change_df = name_change_df[name_change_df.name.str.contains('ST')]\n",
"name_change_dict = {}\n",
"for ts_code, group in name_change_df.groupby('ts_code'):\n",
" # 只保留 'ST' 和 '*ST' 的记录\n",
" st_data = group[group['name'].str.contains('ST')]\n",
" if not st_data.empty and ts_code == '002569.SZ':\n",
" name_change_dict[ts_code] = filter_rows(st_data)\n",
" # name_change_dict[ts_code] = st_data"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "f0ed36d1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code name start_date end_date change_reason\n",
"7990 002569.SZ ST步森 2020-06-09 NaT 摘星\n",
"7992 002569.SZ *ST步森 2019-04-30 NaT *ST\n",
"7993 002569.SZ *ST步森 2019-04-30 2020-06-08 *ST\n",
" ts_code name start_date end_date change_reason\n",
"0 002569.SZ *ST步森 2019-04-30 2020-06-08 *ST\n",
"1 002569.SZ ST步森 2020-06-09 NaT 摘星\n"
]
}
],
"source": [
"print(name_change_df[name_change_df['ts_code'] == '002569.SZ'])\n",
"print(filter_rows(name_change_df[name_change_df['ts_code'] == '002569.SZ']))\n"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "3c526622",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code name start_date end_date change_reason\n",
"0 002569.SZ *ST步森 2019-04-30 2020-06-08 *ST\n",
"1 002569.SZ ST步森 2020-06-09 NaT 摘星\n",
" ts_code trade_date is_st\n",
"3753 002569.SZ 20230120 True\n"
]
}
],
"source": [
"\n",
"warnings.filterwarnings(\"ignore\")\n",
"\n",
"\n",
"def is_st(name_change_dict, stock_code, target_date):\n",
" target_date = datetime.strptime(target_date, '%Y%m%d')\n",
" if stock_code not in name_change_dict.keys():\n",
" return False\n",
" df = name_change_dict[stock_code]\n",
" print(df)\n",
" for i in range(len(df)):\n",
" sds = df.iloc[i, 2]\n",
" eds = df.iloc[i, 3]\n",
" if eds is None or eds is pd.NaT:\n",
" eds = datetime.now()\n",
" if (target_date - sds).days >= 0 and (target_date - eds).days <= 0:\n",
" return True\n",
" return False\n",
"\n",
" \n",
"trade_date = '20230120'\n",
"daily_basic_data = pro.daily_basic(ts_code='', trade_date=trade_date)\n",
"daily_basic_data = daily_basic_data[daily_basic_data['ts_code'] == '002569.SZ']\n",
"if daily_basic_data is not None and not daily_basic_data.empty:\n",
" # 添加交易日期列标识\n",
" daily_basic_data['trade_date'] = trade_date\n",
" daily_basic_data['is_st'] = daily_basic_data.apply(\n",
" lambda row: is_st(name_change_dict, row['ts_code'], row['trade_date']), axis=1\n",
" )\n",
" \n",
"print(daily_basic_data[daily_basic_data['ts_code'] == '002569.SZ'][['ts_code', 'trade_date', 'is_st']])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cf4c9fd5",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 45,
"id": "4a3638e6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code name start_date end_date change_reason\n",
"0 002569.SZ *ST步森 20250428 None *ST\n",
"1 002569.SZ ST步森 20200609 None 摘星\n",
"2 002569.SZ ST步森 20200609 None 摘星\n",
"3 002569.SZ ST步森 20200609 20250427 摘星\n",
"4 002569.SZ *ST步森 20190430 None *ST\n",
"5 002569.SZ *ST步森 20190430 20200608 *ST\n",
"6 002569.SZ *ST步森 20190430 20200608 *ST\n",
"7 002569.SZ 步森股份 20110412 20190429 其他\n",
"8 002569.SZ 步森股份 20110412 20190429 其他\n"
]
}
],
"source": [
"df = pro.namechange(ts_code='002569.SZ', fields='ts_code,name,start_date,end_date,change_reason')\n",
"print(df)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "new_trader",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -3,12 +3,13 @@ from operator import index
import tushare as ts
import pandas as pd
import time
import akshare as ak
from main.factor.factor import calculate_arbr
ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')
pro = ts.pro_api()
df = pro.index_member_all(ts_code='603579.SH')
print(df)
df = pro.balancesheet(ts_code='600000.SH', start_date='20180101', end_date='20180730')
df = pro.sw_daily(trade_date='20250305', fields='ts_code,name,open,close,vol,pe,pb')
print(df[df['ts_code'] == '851171.SI'])
print(df['total_liab'])