Files
NewStock/main/data/update/update_money_flow.ipynb

279 lines
10 KiB
Plaintext
Raw Normal View History

2025-02-12 00:21:33 +08:00
{
"cells": [
{
"cell_type": "code",
2025-05-06 23:42:40 +08:00
"execution_count": 1,
2025-02-12 00:21:33 +08:00
"id": "b94bb1f2-5332-485e-ae1b-eea01f938106",
"metadata": {
"ExecuteTime": {
2025-04-10 23:17:22 +08:00
"end_time": "2025-04-09T14:57:40.184418Z",
"start_time": "2025-04-09T14:57:39.137312Z"
2025-02-12 00:21:33 +08:00
}
},
2025-05-06 23:42:40 +08:00
"outputs": [],
2025-02-12 00:21:33 +08:00
"source": [
"import tushare as ts\n",
"\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
2025-05-06 23:42:40 +08:00
]
2025-02-12 00:21:33 +08:00
},
{
2025-02-15 23:33:34 +08:00
"cell_type": "code",
2025-05-06 23:42:40 +08:00
"execution_count": 2,
2025-02-15 23:33:34 +08:00
"id": "742c29d453b9bb38",
2025-02-12 00:21:33 +08:00
"metadata": {
"ExecuteTime": {
2025-04-10 23:17:22 +08:00
"end_time": "2025-04-09T14:58:10.515830Z",
"start_time": "2025-04-09T14:57:40.190466Z"
2025-02-12 00:21:33 +08:00
}
},
2025-05-06 23:42:40 +08:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
2025-11-29 00:23:12 +08:00
"Index: 9134824 entries, 0 to 20632\n",
2025-05-06 23:42:40 +08:00
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
2025-11-29 00:23:12 +08:00
"memory usage: 209.1+ MB\n",
2025-05-06 23:42:40 +08:00
"None\n",
2025-11-29 00:23:12 +08:00
"20251120\n",
"start_date: 20251121\n"
2025-05-06 23:42:40 +08:00
]
}
],
2025-02-15 23:33:34 +08:00
"source": [
"import pandas as pd\n",
"import time\n",
"\n",
2025-06-02 22:23:44 +08:00
"h5_filename = '/mnt/d/PyProject/NewStock/data/money_flow.h5'\n",
2025-02-15 23:33:34 +08:00
"key = '/money_flow'\n",
"max_date = None\n",
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date']]\n",
" print(df.info())\n",
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
2025-11-29 00:23:12 +08:00
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20251220')\n",
2025-02-15 23:33:34 +08:00
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(f'start_date: {start_date}')"
2025-05-06 23:42:40 +08:00
]
2025-04-09 22:57:01 +08:00
},
{
"cell_type": "code",
2025-05-06 23:42:40 +08:00
"execution_count": 3,
2025-04-09 22:57:01 +08:00
"id": "679ce40e-8d62-4887-970c-e1d8cbdeee6b",
"metadata": {
"ExecuteTime": {
2025-04-10 23:17:22 +08:00
"end_time": "2025-04-09T14:58:17.197319Z",
"start_time": "2025-04-09T14:58:10.724923Z"
2025-05-06 23:42:40 +08:00
},
"scrolled": true
2025-04-09 22:57:01 +08:00
},
2025-05-06 23:42:40 +08:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2025-11-29 00:23:12 +08:00
"任务 20251218 完成\n",
"任务 20251219 完成\n",
"任务 20251217 完成\n",
"任务 20251216 完成\n",
"任务 20251215 完成\n",
"任务 20251212 完成\n",
"任务 20251211 完成\n",
"任务 20251210 完成\n",
"任务 20251209 完成\n",
"任务 20251208 完成\n",
"任务 20251205 完成\n",
"任务 20251204 完成\n",
"任务 20251203 完成\n",
"任务 20251202 完成\n",
"任务 20251201 完成\n",
"任务 20251128 完成\n",
"任务 20251127 完成\n",
"任务 20251126 完成\n",
"任务 20251125 完成\n",
"任务 20251124 完成\n",
"任务 20251121 完成\n"
2025-05-06 23:42:40 +08:00
]
}
],
2025-02-12 00:21:33 +08:00
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
"all_daily_data = []\n",
"\n",
"# API 调用计数和时间控制变量\n",
"api_call_count = 0\n",
"batch_start_time = time.time()\n",
"\n",
"\n",
"def get_data(trade_date):\n",
" time.sleep(0.1)\n",
" money_flow_data = pro.moneyflow(trade_date=trade_date)\n",
" if money_flow_data is not None and not money_flow_data.empty:\n",
" return money_flow_data\n",
"\n",
"\n",
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
"\n",
" for future in as_completed(future_to_date):\n",
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
" try:\n",
" result = future.result() # 获取任务执行的结果\n",
" all_daily_data.append(result)\n",
" print(f\"任务 {trade_date} 完成\")\n",
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
2025-05-06 23:42:40 +08:00
]
2025-02-12 00:21:33 +08:00
},
{
2025-02-15 23:33:34 +08:00
"cell_type": "code",
2025-05-06 23:42:40 +08:00
"execution_count": 4,
2025-02-15 23:33:34 +08:00
"id": "9af80516849d4e80",
2025-02-12 00:21:33 +08:00
"metadata": {
"ExecuteTime": {
2025-04-10 23:17:22 +08:00
"end_time": "2025-04-09T14:58:17.214168Z",
"start_time": "2025-04-09T14:58:17.210734Z"
2025-02-12 00:21:33 +08:00
}
},
2025-05-06 23:42:40 +08:00
"outputs": [],
2025-02-15 23:33:34 +08:00
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n"
2025-05-06 23:42:40 +08:00
]
2025-02-12 00:21:33 +08:00
},
{
"cell_type": "code",
2025-05-06 23:42:40 +08:00
"execution_count": 5,
2025-02-12 00:21:33 +08:00
"id": "a2b05187-437f-4053-bc43-bd80d4cf8b0e",
"metadata": {
"ExecuteTime": {
2025-04-10 23:17:22 +08:00
"end_time": "2025-04-09T14:58:19.633456Z",
"start_time": "2025-04-09T14:58:17.229837Z"
2025-02-12 00:21:33 +08:00
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"所有每日基础数据获取并保存完毕!\n"
]
}
],
2025-05-06 23:42:40 +08:00
"source": [
"\n",
"# 将所有数据合并为一个 DataFrame\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='money_flow', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "e6f2a2fe",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2025-11-29 00:23:12 +08:00
" ts_code trade_date buy_sm_vol buy_sm_amount sell_sm_vol \\\n",
"0 002593.SZ 20251121 369428 21109.32 239444 \n",
"1 300405.SZ 20251121 173424 11775.01 115988 \n",
"2 001336.SZ 20251121 11378 2729.92 10423 \n",
"3 002403.SZ 20251121 24219 3104.96 19841 \n",
"4 688268.SH 20251121 12369 7423.62 12330 \n",
"... ... ... ... ... ... \n",
"5156 000881.SZ 20251121 146959 11936.56 155068 \n",
"5157 300676.SZ 20251121 21428 9913.61 15092 \n",
"5158 603138.SH 20251121 31243 4558.85 30559 \n",
"5159 301526.SZ 20251121 172815 9552.38 105860 \n",
"5160 300903.SZ 20251121 124772 20586.88 96098 \n",
"\n",
2025-11-29 00:23:12 +08:00
" sell_sm_amount buy_md_vol buy_md_amount sell_md_vol sell_md_amount \\\n",
"0 13673.67 256325 14655.03 298786 17088.39 \n",
"1 7859.14 154296 10473.88 176589 11973.97 \n",
"2 2498.94 5274 1266.93 5893 1415.57 \n",
"3 2546.44 17292 2218.64 18180 2333.03 \n",
"4 7430.97 16104 9682.18 16670 10042.76 \n",
"... ... ... ... ... ... \n",
"5156 12623.78 107103 8717.66 97089 7896.18 \n",
"5157 6975.73 17857 8249.34 16607 7679.15 \n",
"5158 4458.47 15126 2208.57 11879 1733.73 \n",
"5159 5855.69 155749 8607.76 160962 8892.48 \n",
"5160 15867.99 92082 15223.39 105748 17449.56 \n",
"\n",
2025-11-29 00:23:12 +08:00
" buy_lg_vol buy_lg_amount sell_lg_vol sell_lg_amount buy_elg_vol \\\n",
"0 125303 7153.65 190306 10868.03 13733 \n",
"1 68396 4621.42 100633 6820.12 12166 \n",
"2 326 77.32 662 159.66 0 \n",
"3 7131 916.27 8891 1137.58 0 \n",
"4 9155 5523.81 9780 5877.77 2793 \n",
"... ... ... ... ... ... \n",
"5156 63727 5186.84 54928 4460.74 8415 \n",
"5157 12528 5781.44 16425 7596.83 3906 \n",
"5158 5884 857.88 8048 1175.32 0 \n",
"5159 63089 3481.66 115498 6376.52 13568 \n",
"5160 58186 9624.92 77536 12811.46 25445 \n",
"\n",
2025-11-29 00:23:12 +08:00
" buy_elg_amount sell_elg_vol sell_elg_amount net_mf_vol net_mf_amount \n",
"0 781.20 36253 2069.12 -103672 -5866.51 \n",
"1 813.01 15071 1030.08 -34131 -2297.62 \n",
"2 0.00 0 0.00 -1180 -271.00 \n",
"3 0.00 1730 222.81 194 30.22 \n",
"4 1708.30 1640 986.41 476 282.30 \n",
"... ... ... ... ... ... \n",
"5156 686.43 19119 1546.77 -50922 -4113.23 \n",
"5157 1805.21 7595 3497.90 -4085 -1873.36 \n",
"5158 0.00 1768 257.78 713 110.42 \n",
"5159 744.87 22900 1261.99 -64224 -3539.76 \n",
"5160 4179.40 21103 3485.60 -29335 -4855.38 \n",
"\n",
2025-11-29 00:23:12 +08:00
"[5161 rows x 20 columns]\n"
]
}
],
"source": [
"print(all_daily_data_df)"
]
2025-02-12 00:21:33 +08:00
}
],
"metadata": {
"kernelspec": {
2025-06-02 22:23:44 +08:00
"display_name": "stock",
2025-02-12 00:21:33 +08:00
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
2025-11-29 00:23:12 +08:00
"version": "3.12.11"
2025-02-12 00:21:33 +08:00
}
},
"nbformat": 4,
"nbformat_minor": 5
}