Files
NewStock/main/data/update/update_money_flow.ipynb
liaozhaorun dc29f153ca 1、load model
2、修改update data相关函数
2025-10-13 15:04:48 +08:00

281 lines
10 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "b94bb1f2-5332-485e-ae1b-eea01f938106",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:57:40.184418Z",
"start_time": "2025-04-09T14:57:39.137312Z"
}
},
"outputs": [],
"source": [
"import tushare as ts\n",
"\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "742c29d453b9bb38",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:10.515830Z",
"start_time": "2025-04-09T14:57:40.190466Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8964780 entries, 0 to 25739\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 205.2+ MB\n",
"None\n",
"20250926\n",
"start_date: 20250929\n"
]
}
],
"source": [
"import pandas as pd\n",
"import time\n",
"\n",
"h5_filename = '/mnt/d/PyProject/NewStock/data/money_flow.h5'\n",
"key = '/money_flow'\n",
"max_date = None\n",
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date']]\n",
" print(df.info())\n",
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20251020')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(f'start_date: {start_date}')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "679ce40e-8d62-4887-970c-e1d8cbdeee6b",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:17.197319Z",
"start_time": "2025-04-09T14:58:10.724923Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20251020 完成\n",
"任务 20251017 完成\n",
"任务 20251016 完成\n",
"任务 20251015 完成\n",
"任务 20251014 完成\n",
"任务 20251013 完成\n",
"任务 20251009 完成\n",
"任务 20251010 完成\n",
"任务 20250929 完成\n",
"任务 20250930 完成\n"
]
}
],
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
"all_daily_data = []\n",
"\n",
"# API 调用计数和时间控制变量\n",
"api_call_count = 0\n",
"batch_start_time = time.time()\n",
"\n",
"\n",
"def get_data(trade_date):\n",
" time.sleep(0.1)\n",
" money_flow_data = pro.moneyflow(trade_date=trade_date)\n",
" if money_flow_data is not None and not money_flow_data.empty:\n",
" return money_flow_data\n",
"\n",
"\n",
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
"\n",
" for future in as_completed(future_to_date):\n",
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
" try:\n",
" result = future.result() # 获取任务执行的结果\n",
" all_daily_data.append(result)\n",
" print(f\"任务 {trade_date} 完成\")\n",
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "9af80516849d4e80",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:17.214168Z",
"start_time": "2025-04-09T14:58:17.210734Z"
}
},
"outputs": [],
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "a2b05187-437f-4053-bc43-bd80d4cf8b0e",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:19.633456Z",
"start_time": "2025-04-09T14:58:17.229837Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"所有每日基础数据获取并保存完毕!\n"
]
}
],
"source": [
"\n",
"# 将所有数据合并为一个 DataFrame\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='money_flow', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "e6f2a2fe",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date buy_sm_vol buy_sm_amount sell_sm_vol \\\n",
"0 603290.SH 20251009 45532 52028.67 42778 \n",
"1 600936.SH 20251009 42537 1545.21 42382 \n",
"2 300429.SZ 20251009 81914 11768.07 64063 \n",
"3 300879.SZ 20251009 15330 5366.90 11651 \n",
"4 300031.SZ 20251009 51381 12650.70 43869 \n",
"... ... ... ... ... ... \n",
"20574 688083.SH 20250930 13247 10094.95 11236 \n",
"20575 002939.SZ 20250930 372609 43083.12 232240 \n",
"20576 688303.SH 20250930 62478 18094.19 55086 \n",
"20577 300146.SZ 20250930 50078 5792.85 35214 \n",
"20578 688351.SH 20250930 15096 3333.84 14017 \n",
"\n",
" sell_sm_amount buy_md_vol buy_md_amount sell_md_vol sell_md_amount \\\n",
"0 48942.98 53824 61495.85 54076 61851.39 \n",
"1 1538.97 24175 878.06 31948 1160.07 \n",
"2 9211.49 88583 12730.36 88244 12682.05 \n",
"3 4089.33 15591 5464.12 17057 5976.94 \n",
"4 10822.65 56173 13836.60 49423 12190.63 \n",
"... ... ... ... ... ... \n",
"20574 8561.02 10482 7994.12 9858 7514.37 \n",
"20575 26867.01 279904 32371.96 324997 37595.57 \n",
"20576 15952.67 55867 16177.83 53776 15573.61 \n",
"20577 4076.10 46159 5337.00 39420 4560.91 \n",
"20578 3095.89 6482 1430.69 6675 1474.59 \n",
"\n",
" buy_lg_vol buy_lg_amount sell_lg_vol sell_lg_amount buy_elg_vol \\\n",
"0 36150 41253.53 36789 41932.43 10514 \n",
"1 11158 405.04 9212 334.60 5672 \n",
"2 64282 9239.06 72904 10475.38 8221 \n",
"3 10167 3562.24 12327 4313.59 3221 \n",
"4 40306 9938.01 41035 10103.23 6112 \n",
"... ... ... ... ... ... \n",
"20574 6674 5082.80 8224 6273.43 3329 \n",
"20575 204229 23631.31 285167 32986.98 132696 \n",
"20576 33304 9638.04 34809 10074.64 5032 \n",
"20577 47161 5454.07 36321 4202.88 8662 \n",
"20578 2513 555.48 3398 749.54 0 \n",
"\n",
" buy_elg_amount sell_elg_vol sell_elg_amount net_mf_vol \\\n",
"0 12073.88 12377 14125.13 20027 \n",
"1 205.33 0 0.00 -21182 \n",
"2 1183.11 17790 2551.67 -840 \n",
"3 1133.90 3275 1147.29 -4996 \n",
"4 1507.28 19645 4816.08 1531 \n",
"... ... ... ... ... \n",
"20574 2538.01 4413 3361.05 7612 \n",
"20575 15366.29 147033 17003.12 84949 \n",
"20576 1459.24 13010 3768.39 15188 \n",
"20577 1000.95 41105 4744.98 -16754 \n",
"20578 0.00 0 0.00 3406 \n",
"\n",
" net_mf_amount \n",
"0 22734.35 \n",
"1 -766.75 \n",
"2 -90.83 \n",
"3 -1741.72 \n",
"4 385.00 \n",
"... ... \n",
"20574 5816.07 \n",
"20575 9927.60 \n",
"20576 4417.72 \n",
"20577 -1928.39 \n",
"20578 752.20 \n",
"\n",
"[20579 rows x 20 columns]\n"
]
}
],
"source": [
"print(all_daily_data_df)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "stock",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}